zoukankan      html  css  js  c++  java
  • 第一个MapReduce程序

    计算文件中每个单词的频数

           wordcount 程序调用 wordmap 和 wordreduce 程序。

     1 import org.apache.hadoop.conf.Configuration;
     2 import org.apache.hadoop.fs.Path;
     3 import org.apache.hadoop.io.IntWritable;
     4 import org.apache.hadoop.io.Text;
     5 import org.apache.hadoop.mapreduce.Job;
     6 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
     7 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
     8 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
     9 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
    10 
    11 public class wordcount {
    12 
    13     /**
    14      * @param args
    15      */
    16     public static void main(String[] args) throws Exception {
    17         // TODO Auto-generated method stub
    18         
    19         Configuration conf = new Configuration();
    20         Job job = new Job(conf,"wordcount");
    21         job.setJarByClass(wordcount.class);
    22         
    23         job.setMapperClass(wordmap.class);
    24         job.setReducerClass(wordreduce.class);
    25         
    26         job.setInputFormatClass(TextInputFormat.class);
    27         job.setOutputFormatClass(TextOutputFormat.class);
    28         
    29         FileInputFormat.addInputPath(job,new Path(args[0]));
    30         FileOutputFormat.setOutputPath(job, new Path(args[1]));
    31         
    32         job.setOutputKeyClass(Text.class);
    33         job.setOutputValueClass(IntWritable.class);
    34         
    35         job.waitForCompletion(true);
    36         
    37 
    38     }
    39 
    40 }

          wordmap 程序的输入为<key,value>(key是当前输入的行数,value对应的是行的内容),然后对此行的内容进行切词,每切下一个词就将其组织成<word,1>的形式,word表示文本内容,1代表出现了一次。

     1 import org.apache.hadoop.io.IntWritable;
     2 import org.apache.hadoop.io.LongWritable;
     3 import org.apache.hadoop.io.Text;
     4 import org.apache.hadoop.mapreduce.Mapper;
     5 
     6 public class wordmap extends Mapper<LongWritable, Text, Text, IntWritable> {
     7   
     8     private static final IntWritable one = new IntWritable(1);
     9     protected void map(
    10             LongWritable key,
    11             Text value,
    12             org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, IntWritable>.Context context)
    13             throws java.io.IOException, InterruptedException {
    14         
    15         String line = value.toString();
    16         String[] words = line.split(" ");
    17         for(String word : words){
    18             context.write(new Text(word), one);
    19             
    20         }
    21         
    22     };
    23 
    24 }


          wordreduce 程序会接受到<word,{1,1,1,1……}>形式的数据,也就是特定单词及其出现的次数,其中 "1" 表示 word 出现的频数,所以每接收一个<word,{1,1,1,1……}>,就会在 word 的频数加 1 ,最后组织成<word,sum>的形式直接输出。

     1 import org.apache.hadoop.io.IntWritable;
     2 import org.apache.hadoop.io.Text;
     3 import org.apache.hadoop.mapreduce.Reducer;
     4 
     5 public class wordreduce extends Reducer<Text, IntWritable, Text, IntWritable> {
     6 
     7     protected void reduce(
     8             Text key,
     9             java.lang.Iterable<IntWritable> values,
    10             org.apache.hadoop.mapreduce.Reducer<Text, IntWritable, Text, IntWritable>.Context context)
    11             throws java.io.IOException, InterruptedException {
    12         
    13         int sum = 0;
    14         for(IntWritable count : values){
    15             sum+= count.get();
    16             
    17             
    18         }
    19         context.write(key, new IntWritable(sum));
    20     };
    21 
    22 }

  • 相关阅读:
    Javascript Property Names
    Java泛型
    Activity 与 Task
    使用ddns搭建免费服务器
    DDNS
    SimpleAdapter用法
    Java KeyNote
    Android无法访问本地服务器(localhost/127.0.0.1)的解决方案
    Android 添加网络权限
    Java 匿名内部类
  • 原文地址:https://www.cnblogs.com/k-yang/p/5595334.html
Copyright © 2011-2022 走看看