zoukankan      html  css  js  c++  java
  • Hadoop 求单词count数

    package com.yw.hadoop273;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    
    /**
     * @Auther: YW
     * @Date: 2019/9/18 20:58
     * @Description:
     */
    public class WCWordCount extends Mapper<LongWritable, Text, Text, IntWritable> {
        /*
        * Mapper 
      * 把单词分割出来 *
    */ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Text keyOut = new Text(); IntWritable valueOut = new IntWritable(); String[] arr = value.toString().split(""); for (String s : arr) { keyOut.set(s); valueOut.set(1); context.write(keyOut,valueOut); } } }
    
    
    package com.yw.hadoop273;

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;

    import java.io.IOException;

    /**
    * @Auther: YW
    * @Date: 2019/9/18 21:20
    * @Description:
    */
    public class WCReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
    /***
    * 聚合
    */

    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
    int count=0;
    for (IntWritable value : values) {
    count = count + value.get();
    }
    context.write(key,new IntWritable(count));
    }

    }
    package com.yw.hadoop273;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    /**
     * @Auther: YW
     * @Date: 2019/9/16 21:20
     * @Description:
     */
    public class WCApp {
        public static void main(String[] args) throws IOException {
            Configuration conf = new Configuration();
            // 删除已有的目录
            if (args.length>1){
                FileSystem.get(conf).delete(new Path(args[1]));
            }
    
            Job job = Job.getInstance(conf);
            //  设置job属性
            job.setJobName("WCApp");        // 设置作业名称
            job.setJarByClass(WCApp.class); // 设置搜索类
            job.setInputFormatClass(TextInputFormat.class);// 设置输入格式
    
            FileInputFormat.addInputPath(job,new Path(args[0])); // 输入路径
            FileOutputFormat.setOutputPath(job,new Path(args[1]));// 输出路径
            job.setMapperClass(WCWordCount.class);  // 设置mapper 类
            job.setReducerClass(WCReducer.class);   //  设置reducer类
            job.setNumReduceTasks(1);               // reducer个数
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputKeyClass(IntWritable.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputKeyClass(IntWritable.class);
        }
    
    }
    
    
  • 相关阅读:
    最新java学习路线:含阶段性java视频教程完整版
    2019最新WEB前端开发小白必看的学习路线(附学习视频教程)
    区块链技术学习路线(全网最新)
    java学习路线之必会的java基础教程
    新手如何学习python(python学习路线图)
    python学习教程,史上最全面的python学习路线图
    机器学习中的误差 Where does error come from?
    主成分分析 Principle Component Analysis
    线性回归 Linear Regression
    MCtalk对话尚德机构:AI讲师,假套路还是真功夫?
  • 原文地址:https://www.cnblogs.com/YuanWeiBlogger/p/11547346.html
Copyright © 2011-2022 走看看