zoukankan      html  css  js  c++  java
  • hadoop-mapreduce-(1)-统计单词数量

    编写map程序

    package com.cvicse.ump.hadoop.mapreduce.map;
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    public class WordCountMap extends Mapper<LongWritable, Text, Text, IntWritable> {
    
        @Override
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            
            String line = value.toString();
            String[] words = line.split(" ");
            for(String word:words){
                context.write(new Text(word), new IntWritable(1));
            }
            
        }
    
    }

    编写reduce程序

    package com.cvicse.ump.hadoop.mapreduce.reduce;
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    public class WordCountReduce extends
            Reducer<Text, IntWritable, Text, IntWritable> {
    
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context)
                throws IOException, InterruptedException {
            
            Integer count = 0;
            for(IntWritable value:values){
                count+=value.get();
            }
            
            context.write(key, new IntWritable(count));
            
        }
    
    }

    编写main函数

    package com.cvicse.ump.hadoop.mapreduce;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import com.cvicse.ump.hadoop.mapreduce.map.WordCountMap;
    import com.cvicse.ump.hadoop.mapreduce.reduce.WordCountReduce;
    
    public class WordCount {
        
        public static void main(String[] args) throws Exception {
            
            Configuration conf = new Configuration();
            
            Job job = Job.getInstance(conf,"wordCount");
            job.setJarByClass(WordCount.class);
            job.setMapperClass(WordCountMap.class);
            job.setReducerClass(WordCountReduce.class);
            
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            
            FileInputFormat.setInputPaths(job, new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1]));
            
            boolean bb = job.waitForCompletion(true);
            if(!bb){
                System.out.println("wrodcount task fail!");
            }else{
                System.out.println("wordcount task success!");
            }
            
        }
    
    }

    把wordcount.txt放在hdfs的/dyh/data/input/目录下

    执行:hadoop jar hdfs.jar com.cvicse.ump.hadoop.mapreduce.WordCount /dyh/data/input/wordcount.txt /dyh/data/output/1

  • 相关阅读:
    NABCD(校园包车)
    作业5.2~5.3
    作业5.1
    作业
    JAVA EE社团管理升级版-数据库设计
    JAVA EE社团管理升级版-微信小程序端说明文档
    社团项目软件展示
    社团项目个人总结
    北京地铁规划项目总结
    地铁出行线路规划项目设计
  • 原文地址:https://www.cnblogs.com/dyh004/p/7878406.html
Copyright © 2011-2022 走看看