zoukankan      html  css  js  c++  java
  • Hadoop--map/reduce实现单词计数

     1 import org.apache.hadoop.fs.Path;
     2 import org.apache.hadoop.io.*;
     3 import org.apache.hadoop.mapred.*;
     4 
     5 import java.io.IOException;
     6 import java.util.*;
     7 
     8 public class WordCount {
     9 
    10     /*
    11      * 实现输入内容单词的计数功能
    12      * 一、mapper方法将输入内容处理为<key1,value1>形式
    13      * 二、reduce方法接收mapper的结果,将相同key1的value值相加得到单词的个数
    14      * 三、输出得到的结果到hdfs中
    15      * 
    16      * */
    17     
    18     //main函数
    19     public static void main(String[] args) throws Exception{
    20         JobConf conf=new JobConf(WordCount.class);
    21         conf.setJobName("WordCount");
    22         conf.setOutputKeyClass(Text.class);
    23         conf.setOutputValueClass(IntWritable.class);
    24         
    25         conf.setMapperClass(Map.class);
    26         conf.setReducerClass(Reduce.class);
    27         
    28         conf.setInputFormat(TextInputFormat.class);
    29         conf.setOutputFormat(TextOutputFormat.class);
    30         
    31         FileInputFormat.setInputPaths(conf,new Path(args[0]));
    32         FileOutputFormat.setOutputPath(conf, new Path(args[1]));
    33         
    34         JobClient.runJob(conf);
    35         
    36     }
    37     
    38     //map函数
    39     public static class Map extends MapReduceBase implements Mapper<LongWritable,Text,
    40     Text,IntWritable>{
    41         private final static IntWritable one=new IntWritable(1);
    42         private Text word=new Text();
    43         
    44         public void map(LongWritable key,Text value, 
    45         OutputCollector<Text,IntWritable>output,Reporter reporter)throws IOException{
    46             String line=value.toString();
    47             StringTokenizer tokenizer=new StringTokenizer(line);
    48             while(tokenizer.hasMoreTokens()){
    49                 word.set(tokenizer.nextToken());
    50                 output.collect(word, one);
    51                 
    52             }
    53             
    54         }
    55         
    56     }
    57     
    58     //reduce函数
    59     public static class Reduce extends MapReduceBase implements Reducer<Text,IntWritable,
    60     Text,IntWritable>{
    61         public void reduce(Text key,Iterator<IntWritable>values,OutputCollector<Text,
    62         IntWritable>output,Reporter repoter) throws IOException{
    63             int sum=0;
    64             while(values.hasNext()){
    65                 sum+=values.next().get();
    66             }
    67             output.collect(key,new IntWritable(sum));
    68         }
    69     }
    70     
    71     
    72     
    73 }
  • 相关阅读:
    springboot文件上传: 单个文件上传 和 多个文件上传
    Eclipse:很不错的插件-devStyle,将你的eclipse变成idea风格
    springboot项目搭建:结构和入门程序
    POJ 3169 Layout 差分约束系统
    POJ 3723 Conscription 最小生成树
    POJ 3255 Roadblocks 次短路
    UVA 11367 Full Tank? 最短路
    UVA 10269 Adventure of Super Mario 最短路
    UVA 10603 Fill 最短路
    POJ 2431 Expedition 优先队列
  • 原文地址:https://www.cnblogs.com/6tian/p/3829188.html
Copyright © 2011-2022 走看看