zoukankan      html  css  js  c++  java
  • 数据导入(二):MapReduce

    package test091201;
    
    import java.io.IOException;
    import java.text.SimpleDateFormat;
    import java.util.Date;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.hbase.client.Put;
    import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
    import org.apache.hadoop.hbase.mapreduce.TableReducer;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
    
    public class Demo3 {
        public static void main(String[] args) throws Exception {
            Configuration conf = new Configuration();
            //设置hbase表名称
            conf.set(TableOutputFormat.OUTPUT_TABLE, "waln2");
            //将该值改大,防止hbase超时退出
            conf.set("dfs.socket.timeout", "180000");
            conf.set("hbase.rootdir", "hdfs://ncst:9000/hbase");
            conf.set("hbase.zookeeper.quorum", "ncst");
    
            Job job = Job.getInstance(conf);
            job.setJarByClass(Demo3.class);
    
            job.setMapperClass(Demo3Mapper.class);
            job.setReducerClass(Demo3Reduce.class);
    
            //设置map的输出,不设置reduce的输出类型
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
    
            job.setInputFormatClass(TextInputFormat.class);
            //不再设置输出路径,而是设置输出格式类型
            job.setOutputFormatClass(TableOutputFormat.class);
    
            FileInputFormat.setInputPaths(job, new Path("hdfs://10.16.17.182:9000/test/wal_log"));
            job.waitForCompletion(true);
        }
    
        //map
        public static class Demo3Mapper extends Mapper<LongWritable, Text, Text, Text>{
            @Override
            protected void map(LongWritable key, Text value, Context context)
                    throws IOException, InterruptedException {
                String[] splited = value.toString().split("	");
    
                SimpleDateFormat dataformat = new SimpleDateFormat("yyyyMMddHHmmss");
                String string = splited[0];
                long parseLong = Long.parseLong(string.trim());
                Date date = new Date(parseLong);
                String format = dataformat.format(date);
    
                //Key=>TelNum:Date Value=>Line
                context.write(new Text(splited[1]+":"+format), value);
            }
        }
    
        //reduce
        public static class Demo3Reduce extends TableReducer<Text, Text, NullWritable>{
            @Override
            protected void reduce(Text key, Iterable<Text> v2s, Context context)
                    throws IOException, InterruptedException {
                Put put = new Put(key.toString().getBytes());
                for (Text text : v2s) {
                    String[] splited = text.toString().split("	");
                    put.add("info".getBytes(), "date".getBytes(), splited[0].getBytes());
                    put.add("info".getBytes(), "tel".getBytes(), splited[1].getBytes());
                    put.add("info".getBytes(), "upPack".getBytes(), splited[6].getBytes());
                    put.add("info".getBytes(), "downPack".getBytes(), splited[7].getBytes());
                    put.add("info".getBytes(), "upPay".getBytes(), splited[8].getBytes());
                    put.add("info".getBytes(), "downPay".getBytes(), splited[9].getBytes());                
                }
                context.write(NullWritable.get(), put);
            }
        }
    }
  • 相关阅读:
    KDD 2018 | 最佳论文:首个面向Facebook、arXiv网络图类的对抗攻击研究
    Distill详述「可微图像参数化」:神经网络可视化和风格迁移利器!
    T1330 最少步数(#Ⅱ- 8)(广度优先搜索)
    细胞个数题解(广度优先搜索)
    DRL前沿之:Benchmarking Deep Reinforcement Learning for Continuous Control
    DRL 教程 | 如何保持运动小车上的旗杆屹立不倒?TensorFlow利用A3C算法训练智能体玩CartPole游戏
    强化学习是如何解决问题的?
    深度强化学习泡沫及路在何方?
    ECCV 2018 | UBC&腾讯AI Lab提出首个模块化GAN架构,搞定任意图像PS组合
    纵览神经架构搜索方法
  • 原文地址:https://www.cnblogs.com/skyl/p/4849222.html
Copyright © 2011-2022 走看看