zoukankan      html  css  js  c++  java
  • MR案例:外连接代码实现

    【外连接】是在【内连接】的基础上稍微修改即可。具体HQL语句详见Hive查询Join

    package join.map;
    
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.io.VLongWritable;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.FileSplit;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class JoinOn {
    
        public static void main(String[] args) throws Exception {
    
            //临时配置windows的环境变量
            System.setProperty("hadoop.home.dir", "D:\workspace\hadoop-2.2.0");
    
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf);
            job.setJarByClass(JoinOn.class);
    
            job.setMapperClass(JOMapper.class);
            job.setReducerClass(JOReducer.class);
    
            job.setMapOutputKeyClass(VLongWritable.class);
            job.setMapOutputValueClass(Text.class);
    
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
    
            FileInputFormat.addInputPath(job, new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1]));
    
            System.exit(job.waitForCompletion(true)? 0:1);
    
        }
    
        public static class JOMapper extends Mapper<LongWritable, Text, VLongWritable, Text>{
            @Override
            protected void map(LongWritable key, Text value, Context context)
                    throws IOException, InterruptedException {
    
                //获取当前分片所对应的文件名
                String name = ((FileSplit)context.getInputSplit()).getPath().getName();
    
                String[] splited = value.toString().split("	");
    
                if(name.endsWith("sales")){
    
                    //sales表
                    //<key,value> --> <id, things+':'+name+'	'+id>
                    context.write(new VLongWritable(Long.parseLong(splited[1])), new Text(name+":"+value.toString()));
                }else if(name.endsWith("things")) {
    
    //<key,value> --> <id, sales+':'+id+' '+name> context.write(new VLongWritable(Long.parseLong(splited[0])), new Text(name+":"+value.toString())); } } } public static class JOReducer extends Reducer<VLongWritable, Text, Text, Text>{ @Override protected void reduce(VLongWritable key, Iterable<Text> v2s, Context context) throws IOException, InterruptedException { //分别存储sales和things两表的name List<String> sales=new ArrayList<String>(); List<String> things=new ArrayList<String>(); for(Text text : v2s){ String[] splited = text.toString().split(":"); //sales表中的数据 if(splited[0].endsWith("sales")){ //加入集合 sales.add(splited[1]); } //things表中数据 else if(splited[0].endsWith("things")){ things.add(splited[1]); } } //笛卡尔积 /** * 左外连接:只要求左表中有数据即可 */ if(sales.size()!=0 /*&& things.size()!=0*/){ for(String sale : sales){
    //如果右表中没有数据,则使用 NULL 代替 if(things.size()==0){ context.write(new Text(sale), new Text("NULL"+" "+"NILL"));
    }
    else {//如果右表中有数据,则直接输出 for(String thing : things){ context.write(new Text(sale), new Text(thing)); } } } } } } }

    总结:

     1).左外连接:左表全部显示,右表不匹配的部分以NULL替代。

     2).代码实现即要求左表不为空即可,右表为空则以NULL输出,右表不为空则直接输出。

  • 相关阅读:
    centos8 将SSSD配置为使用LDAP并要求TLS身份验证
    Centos8 搭建 kafka2.8 .net5 简单使用kafka
    .net core 3.1 ActionFilter 拦截器 偶然 OnActionExecuting 中HttpContext.Session.Id 为空字符串 的问题
    Springboot根据不同环境加载对应的配置
    VMware Workstation12 安装 Centos8.3
    .net core json配置文件小结
    springboot mybatisplus createtime和updatetime自动填充
    .net core autofac依赖注入简洁版
    .Net Core 使用 redis 存储 session
    .Net Core 接入 RocketMQ
  • 原文地址:https://www.cnblogs.com/skyl/p/4749454.html
Copyright © 2011-2022 走看看