zoukankan      html  css  js  c++  java
  • Hadoop 求单词count数

    package com.yw.hadoop273;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    
    /**
     * @Auther: YW
     * @Date: 2019/9/18 20:58
     * @Description:
     */
    public class WCWordCount extends Mapper<LongWritable, Text, Text, IntWritable> {
        /*
        * Mapper 
      * 把单词分割出来 *
    */ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Text keyOut = new Text(); IntWritable valueOut = new IntWritable(); String[] arr = value.toString().split(""); for (String s : arr) { keyOut.set(s); valueOut.set(1); context.write(keyOut,valueOut); } } }
    
    
    package com.yw.hadoop273;

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;

    import java.io.IOException;

    /**
    * @Auther: YW
    * @Date: 2019/9/18 21:20
    * @Description:
    */
    public class WCReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
    /***
    * 聚合
    */

    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
    int count=0;
    for (IntWritable value : values) {
    count = count + value.get();
    }
    context.write(key,new IntWritable(count));
    }

    }
    package com.yw.hadoop273;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    /**
     * @Auther: YW
     * @Date: 2019/9/16 21:20
     * @Description:
     */
    public class WCApp {
        public static void main(String[] args) throws IOException {
            Configuration conf = new Configuration();
            // 删除已有的目录
            if (args.length>1){
                FileSystem.get(conf).delete(new Path(args[1]));
            }
    
            Job job = Job.getInstance(conf);
            //  设置job属性
            job.setJobName("WCApp");        // 设置作业名称
            job.setJarByClass(WCApp.class); // 设置搜索类
            job.setInputFormatClass(TextInputFormat.class);// 设置输入格式
    
            FileInputFormat.addInputPath(job,new Path(args[0])); // 输入路径
            FileOutputFormat.setOutputPath(job,new Path(args[1]));// 输出路径
            job.setMapperClass(WCWordCount.class);  // 设置mapper 类
            job.setReducerClass(WCReducer.class);   //  设置reducer类
            job.setNumReduceTasks(1);               // reducer个数
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputKeyClass(IntWritable.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputKeyClass(IntWritable.class);
        }
    
    }
    
    
  • 相关阅读:
    $_SERVER
    下面介绍mysql中模糊查询的四种用法:
    qq第三方登录
    远程连接数据库出错
    lnmp中的tp的pathinfo模式
    TP5配置所谓的url_moudel
    tp3.2.3中的xss攻击基本防护
    tp中的Csv文件读取(原创)
    mysql语句整理
    SVN的详细使用
  • 原文地址:https://www.cnblogs.com/YuanWeiBlogger/p/11547346.html
Copyright © 2011-2022 走看看