zoukankan      html  css  js  c++  java
  • Map Join案例

    一、需求分析

    1、需求

    与Reduce join的需求一致

    2、分析

    a、在mapper的setup加载缓存,设置  kv 键值对

    b、在map()方法中根据pid 获取 panme 根据上面的k v

    c、写driver,设置reducenum为 0 ,使用缓存文件

    二、代码

    1、Driver

    package com.wt.mapjoin;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    import java.net.URI;
    import java.net.URISyntaxException;
    
    public class TableJoinDriver {
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
            // 0 根据自己电脑路径重新配置
            args = new String[]{"E:\a\input1\order.txt", "E:\a\output2"};
            // 1 获取job信息
            Configuration configuration = new Configuration();
            Job job = Job.getInstance(configuration);
            // 2 设置加载jar包路径
            job.setJarByClass(TableJoinDriver.class);
            // 3 关联map
            job.setMapperClass(TableJoinMapper.class);
            // 4 设置最终输出数据类型
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(NullWritable.class);
            // 5 设置输入输出路径
            FileInputFormat.setInputPaths(job, new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1]));
            // 6 加载缓存数据
            job.addCacheFile(new URI("file:///E:/a/inputmap/pd.txt"));
            // 7 Map端Join的逻辑不需要Reduce阶段,设置reduceTask数量为0
            job.setNumReduceTasks(0);
            // 8 提交
            boolean result = job.waitForCompletion(true);
            System.exit(result ? 0 : 1);
        }
    }

    2、Mapper

    package com.wt.mapjoin;
    
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.commons.lang.StringUtils;
    import java.io.BufferedReader;
    import java.io.FileInputStream;
    import java.io.IOException;
    import java.io.InputStreamReader;
    import java.net.URI;
    import java.util.HashMap;
    import java.util.Map;
    
    public class TableJoinMapper extends Mapper<LongWritable,Text,Text, NullWritable> {
        Map<String, String> pdMap = new HashMap<String, String>();
        Text k = new Text();
        String line;
        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            super.setup(context);
            // 1 获取缓存的文件
            URI[] cacheFiles = context.getCacheFiles();
            String path = cacheFiles[0].getPath().toString();
            BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8"));
            while(StringUtils.isNotEmpty(line = reader.readLine())){
                // 2 切割
                String[] fields = line.split("	");
                // 3 缓存数据到集合
                pdMap.put(fields[0], fields[1]);
            }
            reader.close();
    //        pid    pname
    //        01    小米
        }
    
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();
            String[] fields = line.split("	");
    //        id    pid    amount
    //        1001    01    1
    //  目标  id    pname    amount
            String id = fields[0];
            String pId = fields[1];
            String amount = fields[2];
            String pName = pdMap.get(pId);
            String newLine = id + "	" + pName + "	" + amount;
            k.set(newLine);
            context.write(k, NullWritable.get());
        }
    }
  • 相关阅读:
    如何用C#在Excel中生成图表?
    SQL2000怎样可以让一个数据库用几个磁盘分区
    用C#快速往Excel写数据
    SQL语句导入导出大全
    js解密
    Word的常用操作
    网页javascript获得当前页面或窗口的各个宽度高度
    用C#动态创建Access数据库
    MSSQL一些精典语句
    寻找Vista下PC硬件驱动
  • 原文地址:https://www.cnblogs.com/wt7018/p/13643360.html
Copyright © 2011-2022 走看看