zoukankan      html  css  js  c++  java
  • 实验6:Mapreduce实例——WordCount

    WordCount.java代码:

    package org.apache.hadoop.examples;
    import java.io.IOException;
    import java.util.StringTokenizer;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    public class WordCount {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    Job job = Job.getInstance();
    job.setJobName("WordCount");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(doMapper.class);
    job.setReducerClass(doReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    Path in = new Path("hdfs://localhost:9000/user/hadoop/input");
    Path out = new Path("hdfs://localhost:9000/user/hadoop/output");
    FileInputFormat.addInputPath(job, in);
    FileOutputFormat.setOutputPath(job, out);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
    public static class doMapper extends Mapper<Object, Text, Text, IntWritable>{
    public static final IntWritable one = new IntWritable(1);
    public static Text word = new Text();
    @Override
    protected void map(Object key, Text value, Context context)
    throws IOException, InterruptedException {
    StringTokenizer tokenizer = new StringTokenizer(value.toString(), " ");
    word.set(tokenizer.nextToken());
    context.write(word, one);
    }
    }
    public static class doReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
    private IntWritable result = new IntWritable();
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context)
    throws IOException, InterruptedException {
    int sum = 0;
    for (IntWritable value : values) {
    sum += value.get();
    }
    result.set(sum);
    context.write(key, result);
    }
    }
    }

    input文件夹test.txt文件内容:

    10181 1000481 2010-04-04 16:54:31
    20001 1001597 2010-04-07 15:07:52
    20001 1001560 2010-04-07 15:08:27
    20042 1001368 2010-04-08 08:20:30
    20067 1002061 2010-04-08 16:45:33
    20056 1003289 2010-04-12 10:50:55
    20056 1003290 2010-04-12 11:57:35
    20056 1003292 2010-04-12 12:05:29
    20054 1002420 2010-04-14 15:24:12
    20055 1001679 2010-04-14 19:46:04
    20054 1010675 2010-04-14 15:23:53
    20054 1002429 2010-04-14 17:52:45
    20076 1002427 2010-04-14 19:35:39
    20054 1003326 2010-04-20 12:54:44
    20056 1002420 2010-04-15 11:24:49
    20064 1002422 2010-04-15 11:35:54
    20056 1003066 2010-04-15 11:43:01
    20056 1003055 2010-04-15 11:43:06
    20056 1010183 2010-04-15 11:45:24
    20056 1002422 2010-04-15 11:45:49
    20056 1003100 2010-04-15 11:45:54
    20056 1003094 2010-04-15 11:45:57
    20056 1003064 2010-04-15 11:46:04
    20056 1010178 2010-04-15 16:15:20
    20076 1003101 2010-04-15 16:37:27
    20076 1003103 2010-04-15 16:37:05
    20076 1003100 2010-04-15 16:37:18
    20076 1003066 2010-04-15 16:37:31
    20054 1003103 2010-04-15 16:40:14
    20054 1003100 2010-04-15 16:40:16

    运行结果:

    10181 1
    20001 2
    20042 1
    20054 6
    20055 1
    20056 12
    20064 1
    20067 1
    20076 5

    学习于:https://blog.csdn.net/qq_41035588/article/details/90514824

  • 相关阅读:
    jQuery Deferred和Promise的使用介绍:
    asp.net客户端IP跟踪
    jquery常用的一些方法
    前端音频流播放
    c# Http请求下载二进制流文件
    iView表格行验证问题
    【已解决】Https请求—未能创建 SSL/TLS 安全通道
    安全开发规范
    数据库设计规范
    高性能开发规范
  • 原文地址:https://www.cnblogs.com/my---world/p/11768560.html
Copyright © 2011-2022 走看看