zoukankan      html  css  js  c++  java
  • 数据导入(二):MapReduce

    package test091201;
    
    import java.io.IOException;
    import java.text.SimpleDateFormat;
    import java.util.Date;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.hbase.client.Put;
    import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
    import org.apache.hadoop.hbase.mapreduce.TableReducer;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
    
    public class Demo3 {
        public static void main(String[] args) throws Exception {
            Configuration conf = new Configuration();
            //设置hbase表名称
            conf.set(TableOutputFormat.OUTPUT_TABLE, "waln2");
            //将该值改大,防止hbase超时退出
            conf.set("dfs.socket.timeout", "180000");
            conf.set("hbase.rootdir", "hdfs://ncst:9000/hbase");
            conf.set("hbase.zookeeper.quorum", "ncst");
    
            Job job = Job.getInstance(conf);
            job.setJarByClass(Demo3.class);
    
            job.setMapperClass(Demo3Mapper.class);
            job.setReducerClass(Demo3Reduce.class);
    
            //设置map的输出,不设置reduce的输出类型
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
    
            job.setInputFormatClass(TextInputFormat.class);
            //不再设置输出路径,而是设置输出格式类型
            job.setOutputFormatClass(TableOutputFormat.class);
    
            FileInputFormat.setInputPaths(job, new Path("hdfs://10.16.17.182:9000/test/wal_log"));
            job.waitForCompletion(true);
        }
    
        //map
        public static class Demo3Mapper extends Mapper<LongWritable, Text, Text, Text>{
            @Override
            protected void map(LongWritable key, Text value, Context context)
                    throws IOException, InterruptedException {
                String[] splited = value.toString().split("	");
    
                SimpleDateFormat dataformat = new SimpleDateFormat("yyyyMMddHHmmss");
                String string = splited[0];
                long parseLong = Long.parseLong(string.trim());
                Date date = new Date(parseLong);
                String format = dataformat.format(date);
    
                //Key=>TelNum:Date Value=>Line
                context.write(new Text(splited[1]+":"+format), value);
            }
        }
    
        //reduce
        public static class Demo3Reduce extends TableReducer<Text, Text, NullWritable>{
            @Override
            protected void reduce(Text key, Iterable<Text> v2s, Context context)
                    throws IOException, InterruptedException {
                Put put = new Put(key.toString().getBytes());
                for (Text text : v2s) {
                    String[] splited = text.toString().split("	");
                    put.add("info".getBytes(), "date".getBytes(), splited[0].getBytes());
                    put.add("info".getBytes(), "tel".getBytes(), splited[1].getBytes());
                    put.add("info".getBytes(), "upPack".getBytes(), splited[6].getBytes());
                    put.add("info".getBytes(), "downPack".getBytes(), splited[7].getBytes());
                    put.add("info".getBytes(), "upPay".getBytes(), splited[8].getBytes());
                    put.add("info".getBytes(), "downPay".getBytes(), splited[9].getBytes());                
                }
                context.write(NullWritable.get(), put);
            }
        }
    }
  • 相关阅读:
    python之路---类
    python之路---走台阶(递归)
    python之路---递归函数
    python之路---filter、map、lambda函数
    python之路---封装
    python07--抽象数据类型和python类(P34)
    python06--计算机内存结构与存储管理(P27)
    匹配算法大纲
    并查集及其优化
    Hash技术初涉
  • 原文地址:https://www.cnblogs.com/skyl/p/4849222.html
Copyright © 2011-2022 走看看