zoukankan      html  css  js  c++  java
  • hadoop第一个程序WordCount

    hadoop第一个程序WordCount

    package test;
    
    import org.apache.hadoop.mapreduce.Job;
    import java.io.IOException;
    import java.util.StringTokenizer;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    /*
     * 作者:翟超科
     * 时间:2019.9.3
     * 任务:mapreduce 实现单词计数
    * */
    public class WordCount {
        //map类继承Mapper,实现map功能
        public static class doMapper extends Mapper<Object, Text, Text, IntWritable>{
            //定义变量 one 为数字1
            public static final IntWritable one = new IntWritable(1);
            //定义关键字变量 word
            public static Text word = new Text();
            @Override
            protected void map(Object key, Text value, Mapper<Object, Text, Text, IntWritable>.Context context)
                    throws IOException, InterruptedException {
                //将hdfs上的文件按行分割放入tokenzer集合中
                StringTokenizer tokenizer = new StringTokenizer(value.toString(),"	");
                //将每一行作为一个关键字
                word.set(tokenizer.nextToken());
                //每个关键字出现1次,将键值对写入缓存。
                context.write(word, one);
                
            }
        }
        
        
        //reduce部分整合缓存的键值对,
        public static class doReduce extends Reducer<Text, IntWritable, Text, IntWritable>{
            //定义每次读入的键值对的同键值对的个数
            private IntWritable result = new IntWritable();
            @Override
            protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context)
                    throws IOException, InterruptedException {
                int sum = 0;//定义每个键对应的值只用0个
                for(IntWritable value:values) {
                    sum += value.get();
                }
                result.set(sum);
                context.write(key, result);
            }
        }
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            // TODO Auto-generated method stub
            Job job = Job.getInstance();
            job.setJobName("WordCount");
            job.setJarByClass(WordCount.class);
            job.setMapperClass(doMapper.class);
            job.setReducerClass(doReduce.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            Path in = new Path("hdfs://192.168.13.101:9000/data");//文件所处位置
            Path out = new Path("hdfs://192.168.13.101:9000/output");//输出位置
            FileInputFormat.addInputPath(job,in);
            FileOutputFormat.setOutputPath(job,out);
            System.exit(job.waitForCompletion(true) ? 0 : 1);
            
        }
    
    }

     

  • 相关阅读:
    【消息队列MQ】各类MQ比较
    MySql查询功能梳理
    头条日常实习生面经 2018.11.28
    排序算法 JavaScript
    浅谈二分查找 JavaScript
    LeetCode17.电话号码的字母组合 JavaScript
    LeetCode16.最接近的三数之和 JavaScript
    LeetCode15.三数之和 JavaScript
    LeetCode14.最长公共前缀 JavaScript
    LeetCode13.罗马数字转整数 JavaScript
  • 原文地址:https://www.cnblogs.com/2016-zck/p/11452487.html
Copyright © 2011-2022 走看看