zoukankan      html  css  js  c++  java
  • hadoop-job(mapReducer计算单词出现的个数)

    1.============map===============

    package com.it18zhang.hadoop.mr;

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;

    import java.io.IOException;

    /**
    * Mapper
    */
    public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
    /**
    * key : 行首偏移量,字节数,意义不大。
    * value : 一行文本
    */
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    //
    String line = value.toString() ;
    String[] arr = line.split(" ");

    Text keyOut = new Text() ;
    IntWritable valueOut = new IntWritable(1) ;
    for(String word : arr){
    keyOut.set(word);
    context.write(keyOut,valueOut);
    }
    }
    }

    2.============refucer===============

    package com.it18zhang.hadoop.mr;

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;

    import java.io.IOException;

    /**
    * reducer
    */
    public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
    /**
    * key : word
    * values : 该key下聚合的value
    */
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
    int count = 0 ;
    for(IntWritable iw : values){
    count = count + iw.get() ;
    }
    context.write(key , new IntWritable(count));
    }
    }

    3.============统计===============

    package com.it18zhang.hadoop.mr;

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

    import java.io.IOException;
    public class App {
    public static void main(String[] args) throws Exception {
    if(args == null || args.length<2){
    throw new Exception("参数不足,需要2个参数");
    }
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    //递归删除输出目录
    fs.delete(new Path(args[1]),true);

    //创建一个作业
    Job job = Job.getInstance(conf);
    //调用job方法 名字随便期(word_count_add )
    job.setJobName("word_count_add");
    //获取类的路径
    job.setJarByClass(App.class);

    // //需要计算的文件路径
    // FileInputFormat.addInputPath(job,new Path("file:///Users/yangyanqing/godev/wc"));
    // //计算后文件输出
    // FileOutputFormat.setOutputPath(job,new Path("file:///Users/yangyanqing/godev/wc/out"));
    //需要计算的文件路径
    FileInputFormat.addInputPath(job,new Path(args[0]));
    //计算后文件输出
    FileOutputFormat.setOutputPath(job,new Path(args[1]));

    //设置mapper类和reducer类
    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReducer.class);

    //输出mapper类和reducer类的类型
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class );
    //设置readuce个数
    job.setNumReduceTasks(1);
    //开始作业
    job.waitForCompletion(true);
    }
    }

  • 相关阅读:
    Neko's loop HDU-6444(网络赛1007)
    Parameters
    SETLOCAL
    RD / RMDIR Command
    devenv 命令用法
    Cannot determine the location of the VS Common Tools folder.
    'DEVENV' is not recognized as an internal or external command,
    How to change Visual Studio default environment setting
    error signing assembly unknown error
    What is the Xcopy Command?:
  • 原文地址:https://www.cnblogs.com/nyfz/p/9041992.html
Copyright © 2011-2022 走看看