zoukankan      html  css  js  c++  java
  • MapReduce单表关联

    数据:
    找出孩子的爷爷奶奶姥姥老爷

    child parent
    Tom Lucy
    Tom Jack
    Jone Lucy
    Jone Jack
    Lucy Marry
    Lucy Jesse
    Jack Alice
    Jack Jesse
    Terry Alice
    Terry Jesse
    Philip Terry
    Philip Alma
    Mark Terry
    Mark Alma

    结果:

    Jone    Alice
    Tom    Alice
    Jone    Jesse
    Tom    Jesse
    Jone    Marry
    Tom    Marry
    Jone    Jesse
    Tom    Jesse
    Mark    Alice
    Philip    Alice
    Mark    Jesse
    Philip    Jesse

    Mapper:

    一个坑:每次放入context.write()的时候都需要重新new 一个Text出来。不可以用原来的Text.set()方法

    package _SingleTable;
    
    
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    
    /**
     * @Author:Dapeng
     * @Discription:
     * @Date:Created in 上午 10:11 2018/11/8 0008
     */
    public class SingleTableMap extends Mapper<LongWritable,Text,Text,Text> {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    
    
            String line = value.toString();
            String[] wordArr = line.split("\s+");
            if(!"child".equals(wordArr[0])){
                //设置parents
                context.write(new Text(wordArr[0]),new Text("1:" + wordArr[1]));
                //设置son
                context.write(new Text(wordArr[1]),new Text("2:" + wordArr[0]));
            }
    
        }
    }

    Reducer

    package _SingleTable;
    
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;
    
    /**
     * @Author:Dapeng
     * @Discription:
     * @Date:Created in 上午 10:11 2018/11/8 0008
     */
    public class SingleTableReduce extends Reducer<Text,Text,Text,Text> {
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    
    
            List<String> parents = new ArrayList<String>();
            List<String> childs = new ArrayList<String>();
            Text t1 = new Text();
            Text t2 = new Text();
    
            for(Text t : values){
                String str = t.toString();
                String[] s = str.split(":");
    
                if ("1".equals(s[0])) {
                    parents.add(s[1]);
                } else if("2".equals(s[0])) {
                    childs.add(s[1]);
                }
    
            }
    
           for(String p :parents){
                for(String c:childs){
                    t1.set(p);
                    t2.set(c);
                    context.write(t2,t1);
                }
           }
        }
    }
    package _SingleTable;
    
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    /**
     * @Author:Dapeng
     * @Discription:
     * @Date:Created in 上午 10:11 2018/11/8 0008
     */
    public class SingleTableMain {
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            //0.创建一个job
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf,"single_table");
            job.setJarByClass(SingleTableMain.class);
            //1.输入文件
            //默认用TextInputFormat
            FileInputFormat.addInputPath(job,new Path("file:/D:/hadoopFile/singleTable/data.txt"));
            //2.编写mapper
            job.setMapperClass(SingleTableMap.class);
            //设置输出的格式
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            //3.shuffle
    
            //4.reduce
            job.setReducerClass(SingleTableReduce.class);
            //设置输出的格式
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            //5.输出
            FileOutputFormat.setOutputPath(job,new Path("file:/D:/hadoopFile/singleTable/out"));
    
            //6.运行
    
            boolean result = job.waitForCompletion(true);
            System.out.println(result);
        }
    }
  • 相关阅读:
    Python小白学习之路(五)—【类和对象】【列表】【列表相关功能】
    Python小白学习之路(四)——第一次练习题
    Python小白学习之路(三)—【数字功能】【字符串功能】
    Python小白学习之路(二)—【Pycharm安装与配置】【创建项目】【运算符】【数据类型】
    Python初体验(一)—【配置环境变量】【变量】【input】【条件语句】【循环语句】
    HTML下直接调用Less文件
    继承的几种方式
    sublime从官网纯净版到插件完整版
    bower工具的简单使用
    sublime修改代码字体颜色
  • 原文地址:https://www.cnblogs.com/da-peng/p/9930765.html
Copyright © 2011-2022 走看看