zoukankan      html  css  js  c++  java
  • Wordcount

    package com.ibifeng.hadoop.senior.mapreduce;

    import java.io.IOException;
    import java.util.StringTokenizer;

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapred.FileInputFormat;
    import org.apache.hadoop.mapred.lib.aggregate.ValueAggregator;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.jboss.netty.handler.codec.http.HttpHeaders.Values;

    public class WordCount {
     //step 1: Map calss
        public static class WordcountmMap extends Mapper<LongWritable, Text, Text, IntWritable>{
          private Text mapOutputKey = new Text();
          private final static IntWritable mapOutputValue = new IntWritable(1);
            @Override
            protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
                    throws IOException, InterruptedException {
                //line value
                String lineValue = value.toString();
                //split stringTokenizer
                StringTokenizer stringTokenizer = new StringTokenizer(lineValue);
                // iterator
                while(stringTokenizer.hasMoreTokens()){
                     //get value
                    String wordValu = stringTokenizer.nextToken();
                    //set value
                    mapOutputKey.set(wordValu);
                    //output
                    context.write(mapOutputKey, mapOutputValue);
                }
                
            }
            
        }
        //step 2: Reduce class
        public  static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
            private IntWritable OutputValue = new IntWritable(1);
            @Override
            protected void reduce(Text key, Iterable<IntWritable> values,
                    Context context) throws IOException, InterruptedException {
                     //sum tmp
               int sum=0;
               //iterator
               for(IntWritable value: values){
                   //total
                   sum += value.get();
               }
                //set value
               OutputValue.set(sum);
                //output
               context.write(key, OutputValue);
            }
            
        }
        //step Driver , commponent job
        public int run(String[] args) throws IOException, Exception, InterruptedException{
            //1.get confifuration
            Configuration configuration = new Configuration();
            //2.create Job
            Job job = Job.getInstance(configuration,this.getClass().getSimpleName());
            //run jar
            job.setJarByClass(this.getClass());
            //3.set job
            Path inPath = new Path(args[0]);
            org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, inPath);
            job.setMapperClass(WordcountmMap.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            //3.reduce
            job.setReducerClass(WordCountReducer.class);
            job.setMapOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            //output
            Path outPath = new Path(args[1]);
            FileOutputFormat.setOutputPath(job, outPath);
            //submit job
            boolean isSuccess = job.waitForCompletion(true);
            return isSuccess ? 0 : 1;
        }
        //step rin program
        public static void main(String[] args) throws IOException, InterruptedException, Exception {
            int status = new WordCount().run(args);
            System.exit(status);
        }
    }

  • 相关阅读:
    Node自动重启工具 nodemon
    centos 集群
    kettle操作数据库增删改
    Dinic
    vim
    mermaid简介
    联赛模拟测试32
    检讨书模板
    博客园如何添加看板娘!
    手机浏览器如何调试
  • 原文地址:https://www.cnblogs.com/chenligeng/p/8589172.html
Copyright © 2011-2022 走看看