zoukankan      html  css  js  c++  java
  • MR案例:WordCount改写

    请参照wordcount实现一个自己的MapReduce,需求为:
        a. 输入文件格式:
           xxx,xxx,xxx,xxx,xxx,xxx,xxx
        b. 输出文件格式:
           xxx,20
           xxx,30
           xxx.40
        c. 功能:根据命令行参数统计输入文件中指定关键字出现的次数,并展示出来
           例如:hadoop jar xxxxx.jar keywordcount xxx,xxx,xxx,xxx(四个关键字)
    package demo0830;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
    
    import java.io.IOException;
    import java.util.ArrayList;
    
    public class Demo0902 {
        public static void main(String[] args) throws Exception {
            Configuration conf = new Configuration();
    
            if (args.length < 3) {
                System.out.println("Usage: wordcount <input_path> <output_path> <keyword_list>");
                return;
            }
    
            //Add to target(静态方法)
            String[] target_words = args[2].split(",");
            for (String word : target_words) {
                WCMap.addTargetWord(word.toLowerCase());
            }
    
            Job job = Job.getInstance(conf);
            job.setJarByClass(Demo0902.class);
    
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
    
            job.setMapperClass(WCMap.class);
            job.setReducerClass(WCReduce.class);
    
            job.setInputFormatClass(TextInputFormat.class);
            job.setOutputFormatClass(TextOutputFormat.class);
    
            FileInputFormat.addInputPath(job, new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1]));
    
            job.waitForCompletion(true);
        }
        public static class WCMap extends Mapper<LongWritable, Text, Text, IntWritable> {
    
            private final static IntWritable one = new IntWritable(1);
            private Text word = new Text();
            private final static ArrayList<String> target_words = new ArrayList<String>();
    
            public static void addTargetWord(String word) {
                target_words.add(word);
            }
    
            public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
                String[] items = value.toString().toLowerCase().split(" ");
                for (String item : items) {
                    
                    //filter keyword
                    if (target_words.contains(item)) {
                        word.set(item);
                        context.write(word, one);
                    }
                }
            }
        }
    
        public static class WCReduce extends Reducer<Text, IntWritable, Text, IntWritable> {
            public void reduce(Text key, Iterable<IntWritable> values, Context context)
                    throws IOException, InterruptedException {
                int sum = 0;
                for (IntWritable val : values) {
                    sum += val.get();
                }
                context.write(key, new IntWritable(sum));
            }
        }
    }
  • 相关阅读:
    UITextView 和 UITextField 的提示信息placeholder
    【转载】ios下的正则表达式,RegexKitLite
    Java集合(2)一 ArrayList 与 LinkList
    Java并发(2) 聊聊happensbefore
    Java并发(3) 聊聊Volatile
    Java并发(1) 聊聊Java内存模型
    Java集合(5)一 HashMap与HashSet
    Java集合(3)一 红黑树、TreeMap与TreeSet(上)
    Java集合(4)一 红黑树、TreeMap与TreeSet(下)
    Java集合(1)一 集合框架
  • 原文地址:https://www.cnblogs.com/skyl/p/4779512.html
Copyright © 2011-2022 走看看