zoukankan      html  css  js  c++  java
  • 强-大数据第八讲

    基于Hadoop的WordCount源码示例:

    一、WordCountMain.java

    package demo;

    import java.io.IOException;

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

    public class WordCountMain {

    public static void main(String[] args) throws Exception {
    //创建一个job = map + reduce
    Configuration conf = new Configuration();

    //创建一个Job
    Job job = Job.getInstance(conf);
    //指定任务的入口
    job.setJarByClass(WordCountMain.class);

    //指定job的mapper
    job.setMapperClass(WordCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    //指定job的reducer
    job.setReducerClass(WordCountReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    //指定任务的输入和输出
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    //提交任务
    job.waitForCompletion(true);
    }

    }

    二、WordCountMapper.java

    package demo;

    import java.io.IOException;

    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;

    public class WordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable> {

    @Override
    protected void map(LongWritable key, Text value, Context context)
    throws IOException, InterruptedException {
    /*
    * key: 输入的key
    * value: 数据 I love Beijing
    * context: Map上下文
    */
    String data= value.toString();
    //分词
    String[] words = data.split(" ");

    //输出每个单词
    for(String w:words){
    context.write(new Text(w), new LongWritable(1));
    }
    }

    }

    三、WordCountReducer.java

    package demo;

    import java.io.IOException;

    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;

    public class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable>{

    @Override
    protected void reduce(Text k3, Iterable<LongWritable> v3,Context context) throws IOException, InterruptedException {
    //v3: 是一个集合,每个元素就是v2
    long total = 0;
    for(LongWritable l:v3){
    total = total + l.get();
    }

    //输出
    context.write(k3, new LongWritable(total));
    }

    }

     

  • 相关阅读:
    创业成功关键在于专注“核心竞争力”(外包有时候能大大提高开发周期)
    华为为什么不设事业部制?
    雷军三年花10亿造“芯” 营销还是“不服气”(外界对雷军做芯片的三种猜测,以及雷军本人的看法)
    Windows完成端口与猪肉佬
    分布式锁实现
    Quartz(GUI)图形界面程序----Quartz Web
    Quartz.net开源作业调度
    js 闭包
    Nancy
    MVC—WebAPI(调用、授权)
  • 原文地址:https://www.cnblogs.com/Zac1010/p/11175145.html
Copyright © 2011-2022 走看看