zoukankan      html  css  js  c++  java
  • MR案例:内连接代码实现

    本文是对Hive中【内连接】的Java-API的实现,具体的HQL语句详见Hive查询Join

    package join.map;
    
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.io.VLongWritable;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.FileSplit;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class JoinOn {
    
        public static void main(String[] args) throws Exception {
    
            //临时配置windows的环境变量
            System.setProperty("hadoop.home.dir", "D:\workspace\hadoop-2.2.0");
    
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf);
            job.setJarByClass(JoinOn.class);
    
            job.setMapperClass(JOMapper.class);
            job.setReducerClass(JOReducer.class);
    
            job.setMapOutputKeyClass(VLongWritable.class);
            job.setMapOutputValueClass(Text.class);
    
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
    
            FileInputFormat.addInputPath(job, new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1]));
    
            System.exit(job.waitForCompletion(true)? 0:1);
        }
    
        public static class JOMapper extends Mapper<LongWritable, Text, VLongWritable, Text>{
            @Override
            protected void map(LongWritable key, Text value, Context context)
                    throws IOException, InterruptedException {
    
                //获取当前分片所对应的文件名(优化:放在setup()方法中)
                String name = ((FileSplit)context.getInputSplit()).getPath().getName();
    
                String[] splited = value.toString().split("	");
    
                if(name.endsWith("sales")){
    
                    //sales表(注意:第二个参数中name和value之间的分隔符不能和value中个字段分隔符一样)
                    //<key,value> --> <id, things+':'+name+'	'+id>
                    context.write(new VLongWritable(Long.parseLong(splited[1])), new Text(name+":"+value.toString()));
                }else if(name.endsWith("things")) {
                    //<key,value> --> <id, sales+'	'+id+'	'+name>
                    context.write(new VLongWritable(Long.parseLong(splited[0])), new Text(name+":"+value.toString()));
                }    
            }
        }
    
        public static class JOReducer extends Reducer<VLongWritable, Text, Text, Text>{
            @Override
            protected void reduce(VLongWritable key, Iterable<Text> v2s, Context context)
                    throws IOException, InterruptedException {
    
                //分别存储sales和things两表的name
                List<String> sales=new ArrayList<String>();
                List<String> things=new ArrayList<String>();
    
                for(Text text : v2s){
                    String[] splited = text.toString().split(":");
    
                    //sales表中的数据
                    if(splited[0].endsWith("sales")){
    
                        //加入集合
                        sales.add(splited[1]);
                    }
                    //things表中数据
                    else if(splited[0].endsWith("things")){
                        things.add(splited[1]);
                    }
                }
                //笛卡尔积
                if(sales.size()!=0 && things.size()!=0){
                    for(String sale : sales){
                        for(String thing : things){
                            context.write(new Text(sale), new Text(thing));
                        }
                    }
                }
            }
        }
    }

    总结:

     1).程序中获取FileName应放置中setup()方法中,因为每个文件只需执行一次此方法

     2).Map输出的第二个参数中name和value之间的分隔符不能和value中个字段分隔符一样

  • 相关阅读:
    ReactNative: 数据请求
    ReactNative: 使用Geolocation的API获取位置信息
    ReactNative: 使用第三方库图像选择器react-native-image-picker和react-native-image-crop-picker
    MDG_TR_DEST
    【VerySky原创】后台JOB运行-相关表
    【VerySky原创】RPR_ABAP_SOURCE_SCAN
    【VerySky原创】 ME9F
    【VerySky原创】如何查找SNRO编号范围的使用情况;
    【VerySky原创】怎样查找到CDHDR、CDPOS表中的OBJECTCLAS字段
    【由VerySky原创】由Number Range 导致凭证生成但无法保存的问题
  • 原文地址:https://www.cnblogs.com/skyl/p/4749449.html
Copyright © 2011-2022 走看看