zoukankan      html  css  js  c++  java
  • MR案例:外连接代码实现

    【外连接】是在【内连接】的基础上稍微修改即可。具体HQL语句详见Hive查询Join

    package join.map;
    
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.io.VLongWritable;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.FileSplit;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class JoinOn {
    
        public static void main(String[] args) throws Exception {
    
            //临时配置windows的环境变量
            System.setProperty("hadoop.home.dir", "D:\workspace\hadoop-2.2.0");
    
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf);
            job.setJarByClass(JoinOn.class);
    
            job.setMapperClass(JOMapper.class);
            job.setReducerClass(JOReducer.class);
    
            job.setMapOutputKeyClass(VLongWritable.class);
            job.setMapOutputValueClass(Text.class);
    
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
    
            FileInputFormat.addInputPath(job, new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1]));
    
            System.exit(job.waitForCompletion(true)? 0:1);
    
        }
    
        public static class JOMapper extends Mapper<LongWritable, Text, VLongWritable, Text>{
            @Override
            protected void map(LongWritable key, Text value, Context context)
                    throws IOException, InterruptedException {
    
                //获取当前分片所对应的文件名
                String name = ((FileSplit)context.getInputSplit()).getPath().getName();
    
                String[] splited = value.toString().split("	");
    
                if(name.endsWith("sales")){
    
                    //sales表
                    //<key,value> --> <id, things+':'+name+'	'+id>
                    context.write(new VLongWritable(Long.parseLong(splited[1])), new Text(name+":"+value.toString()));
                }else if(name.endsWith("things")) {
    
    //<key,value> --> <id, sales+':'+id+' '+name> context.write(new VLongWritable(Long.parseLong(splited[0])), new Text(name+":"+value.toString())); } } } public static class JOReducer extends Reducer<VLongWritable, Text, Text, Text>{ @Override protected void reduce(VLongWritable key, Iterable<Text> v2s, Context context) throws IOException, InterruptedException { //分别存储sales和things两表的name List<String> sales=new ArrayList<String>(); List<String> things=new ArrayList<String>(); for(Text text : v2s){ String[] splited = text.toString().split(":"); //sales表中的数据 if(splited[0].endsWith("sales")){ //加入集合 sales.add(splited[1]); } //things表中数据 else if(splited[0].endsWith("things")){ things.add(splited[1]); } } //笛卡尔积 /** * 左外连接:只要求左表中有数据即可 */ if(sales.size()!=0 /*&& things.size()!=0*/){ for(String sale : sales){
    //如果右表中没有数据,则使用 NULL 代替 if(things.size()==0){ context.write(new Text(sale), new Text("NULL"+" "+"NILL"));
    }
    else {//如果右表中有数据,则直接输出 for(String thing : things){ context.write(new Text(sale), new Text(thing)); } } } } } } }

    总结:

     1).左外连接:左表全部显示,右表不匹配的部分以NULL替代。

     2).代码实现即要求左表不为空即可,右表为空则以NULL输出,右表不为空则直接输出。

  • 相关阅读:
    struts2自定义拦截器之过滤不良言论---http500可能的问题所在
    bzoj4205[FJ2015集训] 卡牌配对
    bzoj1562[NOI2009] 变换序列
    bzoj1433[ZJOI2009] 假期的宿舍
    bzoj2150 部落战争
    从bzoj2463到bzoj1443和bzoj2437 博弈+二分图匹配
    bzoj4554[Tjoi2016&Heoi2016] 游戏
    bzoj1059[ZJOI2007] 矩阵游戏
    bzoj1143[CTSC2008] 祭祀river
    bzoj3175[Tjoi2013] 攻击装置
  • 原文地址:https://www.cnblogs.com/skyl/p/4749454.html
Copyright © 2011-2022 走看看