zoukankan      html  css  js  c++  java
  • hadoop-job(mapReducer计算单词出现的个数)

    1.============map===============

    package com.it18zhang.hadoop.mr;

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;

    import java.io.IOException;

    /**
    * Mapper
    */
    public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
    /**
    * key : 行首偏移量,字节数,意义不大。
    * value : 一行文本
    */
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    //
    String line = value.toString() ;
    String[] arr = line.split(" ");

    Text keyOut = new Text() ;
    IntWritable valueOut = new IntWritable(1) ;
    for(String word : arr){
    keyOut.set(word);
    context.write(keyOut,valueOut);
    }
    }
    }

    2.============refucer===============

    package com.it18zhang.hadoop.mr;

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;

    import java.io.IOException;

    /**
    * reducer
    */
    public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
    /**
    * key : word
    * values : 该key下聚合的value
    */
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
    int count = 0 ;
    for(IntWritable iw : values){
    count = count + iw.get() ;
    }
    context.write(key , new IntWritable(count));
    }
    }

    3.============统计===============

    package com.it18zhang.hadoop.mr;

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

    import java.io.IOException;
    public class App {
    public static void main(String[] args) throws Exception {
    if(args == null || args.length<2){
    throw new Exception("参数不足,需要2个参数");
    }
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    //递归删除输出目录
    fs.delete(new Path(args[1]),true);

    //创建一个作业
    Job job = Job.getInstance(conf);
    //调用job方法 名字随便期(word_count_add )
    job.setJobName("word_count_add");
    //获取类的路径
    job.setJarByClass(App.class);

    // //需要计算的文件路径
    // FileInputFormat.addInputPath(job,new Path("file:///Users/yangyanqing/godev/wc"));
    // //计算后文件输出
    // FileOutputFormat.setOutputPath(job,new Path("file:///Users/yangyanqing/godev/wc/out"));
    //需要计算的文件路径
    FileInputFormat.addInputPath(job,new Path(args[0]));
    //计算后文件输出
    FileOutputFormat.setOutputPath(job,new Path(args[1]));

    //设置mapper类和reducer类
    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReducer.class);

    //输出mapper类和reducer类的类型
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class );
    //设置readuce个数
    job.setNumReduceTasks(1);
    //开始作业
    job.waitForCompletion(true);
    }
    }

  • 相关阅读:
    request.getParameter() 、 request.getInputStream()和request.getReader() 使用体会
    HTTP之Content-Length
    关于spring3中No Session found for current thread!and Transaction的配置和管理(转)
    Java数据类型和MySql数据类型对应一览
    Spring MVC 解读——View,ViewResolver(转)
    LeetCode 441. Arranging Coins
    LeetCode 415. Add Strings
    LeetCode 400. Nth Digit
    LeetCode 367. Valid Perfect Square
    LeetCode 326. Power of Three
  • 原文地址:https://www.cnblogs.com/nyfz/p/9041992.html
Copyright © 2011-2022 走看看