zoukankan      html  css  js  c++  java
  • MR 文件合并

     1 package com.euphe.filter;
     2 
     3 import com.euphe.util.HUtils;
     4 import com.euphe.util.Utils;
     5 import org.apache.hadoop.conf.Configuration;
     6 import org.apache.hadoop.conf.Configured;
     7 import org.apache.hadoop.fs.FileSystem;
     8 import org.apache.hadoop.fs.Path;
     9 import org.apache.hadoop.io.Text;
    10 import org.apache.hadoop.mapreduce.Job;
    11 import org.apache.hadoop.mapreduce.Mapper;
    12 import org.apache.hadoop.mapreduce.Reducer;
    13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    15 import org.apache.hadoop.util.GenericOptionsParser;
    16 import org.apache.hadoop.util.Tool;
    17 
    18 import java.io.IOException;
    19 
    20 public class ReductionJob extends Configured implements Tool {
    21     public static class Map extends Mapper<Object, Text, Text, Text> {
    22         private static Text text = new Text();
    23 
    24         public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
    25             text = value;
    26             context.write(text, new Text());
    27         }
    28     }
    29 
    30     public static class Reduce extends Reducer<Text, Text, Text, Text> {
    31         public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    32             context.write(key, new Text());
    33         }
    34     }
    35     @Override
    36     public int run(String[] args) throws Exception {
    37         Configuration conf = HUtils.getConf();
    38         conf.set("mapreduce.job.jar", Utils.getRootPathBasedPath("WEB-INF/jars/redu.jar"));
    39         String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();//解析命令行参数
    40         if (otherArgs.length !=2) {//要求必须有输入和输出路径两个参数
    41             System.err.println("Usage: com.euphe.filter.ReductionJob <in> <out>");
    42             System.exit(2);
    43         }
    44         Job job =  Job.getInstance(conf,"Reduction input  :"+otherArgs[0]+" to "+otherArgs[1]);
    45         job.setJarByClass(ReductionJob.class);
    46         job.setMapperClass(Map.class);
    47         job.setReducerClass(Reduce.class);
    48         job.setNumReduceTasks(1);
    49 
    50         job.setOutputKeyClass(Text.class);
    51         job.setOutputValueClass(Text.class);
    52 
    53         FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    54         FileOutputFormat.setOutputPath(job,new Path(otherArgs[1]));
    55         FileSystem.get(conf).delete(new Path(otherArgs[1]), true);//调用任务前先删除输出目录
    56         return job.waitForCompletion(true) ? 0 : 1;
    57     }
    58 }
  • 相关阅读:
    斯特林反演
    子集计数
    快速求斯特林数总结(洛谷模板题解)
    min-25筛总结
    数学笔记
    [WC2018]即时战略(LCT,splay上二分)
    [WC2018]通道(乱搞,迭代)
    Python requests 多线程抓取 出现HTTPConnectionPool Max retires exceeded异常
    Python监控服务器利器--psutil
    gevent
  • 原文地址:https://www.cnblogs.com/xym4869/p/8960931.html
Copyright © 2011-2022 走看看