zoukankan      html  css  js  c++  java
  • MRWordCount

    一、map
    package com.pdd.mapreduce;

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;

    import java.io.IOException;

    /**

    • @Description todo:
    • @Author hfx
    • @Date 2018/2/14 21:03
      */
      public class WCmap extends Mapper<LongWritable, Text, Text, IntWritable> {
      Text k = new Text();
      IntWritable v = new IntWritable(1);
      @Override
      protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
      // 1 获取一行
      String line = value.toString();
      // 2 切割
      String[] words = line.split(" ");
      // 3 输出
      for (String word : words) {
      k.set(word);
      context.write(k, v);
      }
      }
      }
      二、reduce
      package com.pdd.mapreduce;

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;

    import java.io.IOException;

    /**

    • @Description todo:

    • @Author hfx

    • @Date 2018/2/14 21:05
      */
      public class WcReduce extends Reducer<Text, IntWritable, Text, IntWritable> {

      int sum;
      IntWritable v = new IntWritable();

      @Override
      protected void reduce(Text key, Iterable values,Context context) throws IOException, InterruptedException {
      // 1 累加求和
      sum = 0;
      for (IntWritable count : values) {
      sum += count.get();
      }
      // 2 输出
      v.set(sum);
      context.write(key,v);
      }
      }
      三、job测试
      package com.pdd.mapreduce;

    import java.io.IOException;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    /**

    • @Description todo:
    • @Author hfx
    • @Date 2018/2/14 21:15
      */
      public class JobTest {
      public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
      // args=new String[]{"d:/input","d:/output"}; 本地测试
      //打包集群运行 hadoop jar jar包 包全路径名 输入参数 输出不指定
      // 1 获取配置信息以及封装任务
      Configuration configuration = new Configuration();
      Job job = Job.getInstance(configuration);
      // 2 设置jar加载路径
      job.setJarByClass(JobTest.class);
      // 3 设置map和reduce类
      job.setMapperClass(WCmap.class);
      job.setReducerClass(WcReduce.class);
      // 4 设置map输出
      job.setMapOutputKeyClass(Text.class);
      job.setMapOutputValueClass(IntWritable.class);
      // 5 设置最终输出kv类型
      job.setOutputKeyClass(Text.class);
      job.setOutputValueClass(IntWritable.class);
      // 6 设置输入和输出路径
      FileInputFormat.setInputPaths(job, new Path(args[0]));
      FileOutputFormat.setOutputPath(job, new Path(args[1]));
      // 7 提交
      boolean result = job.waitForCompletion(true);
      System.exit(result ? 0 : 1);
      }
      }
  • 相关阅读:
    20145325张梓靖 《Java程序设计》第9周学习总结
    20145325张梓靖 实验四 "Andoid开发基础"
    20145325张梓靖 《Java程序设计》第8周学习总结
    20145307《信息安全系统设计基础》第7周学习总结
    20145307《信息安全系统设计基础》第六周学习总结
    Y86模拟器安装
    20145307《信息安全系统设计基础》第五周学习总结PT2
    git失败案例
    20145307陈俊达《信息安全系统设计基础》第5周学习总结PT1
    20145307陈俊达《信息安全系统设计基础》第3周学习总结
  • 原文地址:https://www.cnblogs.com/sgjk/p/javaWordCount.html
Copyright © 2011-2022 走看看