zoukankan      html  css  js  c++  java
  • hadoop-mapreduce-(1)-统计单词数量

    编写map程序

    package com.cvicse.ump.hadoop.mapreduce.map;
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    public class WordCountMap extends Mapper<LongWritable, Text, Text, IntWritable> {
    
        @Override
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            
            String line = value.toString();
            String[] words = line.split(" ");
            for(String word:words){
                context.write(new Text(word), new IntWritable(1));
            }
            
        }
    
    }

    编写reduce程序

    package com.cvicse.ump.hadoop.mapreduce.reduce;
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    public class WordCountReduce extends
            Reducer<Text, IntWritable, Text, IntWritable> {
    
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context)
                throws IOException, InterruptedException {
            
            Integer count = 0;
            for(IntWritable value:values){
                count+=value.get();
            }
            
            context.write(key, new IntWritable(count));
            
        }
    
    }

    编写main函数

    package com.cvicse.ump.hadoop.mapreduce;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import com.cvicse.ump.hadoop.mapreduce.map.WordCountMap;
    import com.cvicse.ump.hadoop.mapreduce.reduce.WordCountReduce;
    
    public class WordCount {
        
        public static void main(String[] args) throws Exception {
            
            Configuration conf = new Configuration();
            
            Job job = Job.getInstance(conf,"wordCount");
            job.setJarByClass(WordCount.class);
            job.setMapperClass(WordCountMap.class);
            job.setReducerClass(WordCountReduce.class);
            
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            
            FileInputFormat.setInputPaths(job, new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1]));
            
            boolean bb = job.waitForCompletion(true);
            if(!bb){
                System.out.println("wrodcount task fail!");
            }else{
                System.out.println("wordcount task success!");
            }
            
        }
    
    }

    把wordcount.txt放在hdfs的/dyh/data/input/目录下

    执行:hadoop jar hdfs.jar com.cvicse.ump.hadoop.mapreduce.WordCount /dyh/data/input/wordcount.txt /dyh/data/output/1

  • 相关阅读:
    C#快速开发平台(C/S架构+Winform+DevExpress+FastReport)
    C# Winform C/S系统快速开发框架企业版V4.5已发布,欢迎下载试用
    CSS3 Animation
    CSS3 Transition
    CSS3 Transform
    jQuery 屏蔽鼠标快速经过
    JQ插件jquery.fn.extend与jquery.extend
    font-size单位换算
    js获取客户端操作系统
    js数组的操作
  • 原文地址:https://www.cnblogs.com/dyh004/p/7878406.html
Copyright © 2011-2022 走看看