zoukankan      html  css  js  c++  java
  • Hadoop--map/reduce实现单词计数

     1 import org.apache.hadoop.fs.Path;
     2 import org.apache.hadoop.io.*;
     3 import org.apache.hadoop.mapred.*;
     4 
     5 import java.io.IOException;
     6 import java.util.*;
     7 
     8 public class WordCount {
     9 
    10     /*
    11      * 实现输入内容单词的计数功能
    12      * 一、mapper方法将输入内容处理为<key1,value1>形式
    13      * 二、reduce方法接收mapper的结果,将相同key1的value值相加得到单词的个数
    14      * 三、输出得到的结果到hdfs中
    15      * 
    16      * */
    17     
    18     //main函数
    19     public static void main(String[] args) throws Exception{
    20         JobConf conf=new JobConf(WordCount.class);
    21         conf.setJobName("WordCount");
    22         conf.setOutputKeyClass(Text.class);
    23         conf.setOutputValueClass(IntWritable.class);
    24         
    25         conf.setMapperClass(Map.class);
    26         conf.setReducerClass(Reduce.class);
    27         
    28         conf.setInputFormat(TextInputFormat.class);
    29         conf.setOutputFormat(TextOutputFormat.class);
    30         
    31         FileInputFormat.setInputPaths(conf,new Path(args[0]));
    32         FileOutputFormat.setOutputPath(conf, new Path(args[1]));
    33         
    34         JobClient.runJob(conf);
    35         
    36     }
    37     
    38     //map函数
    39     public static class Map extends MapReduceBase implements Mapper<LongWritable,Text,
    40     Text,IntWritable>{
    41         private final static IntWritable one=new IntWritable(1);
    42         private Text word=new Text();
    43         
    44         public void map(LongWritable key,Text value, 
    45         OutputCollector<Text,IntWritable>output,Reporter reporter)throws IOException{
    46             String line=value.toString();
    47             StringTokenizer tokenizer=new StringTokenizer(line);
    48             while(tokenizer.hasMoreTokens()){
    49                 word.set(tokenizer.nextToken());
    50                 output.collect(word, one);
    51                 
    52             }
    53             
    54         }
    55         
    56     }
    57     
    58     //reduce函数
    59     public static class Reduce extends MapReduceBase implements Reducer<Text,IntWritable,
    60     Text,IntWritable>{
    61         public void reduce(Text key,Iterator<IntWritable>values,OutputCollector<Text,
    62         IntWritable>output,Reporter repoter) throws IOException{
    63             int sum=0;
    64             while(values.hasNext()){
    65                 sum+=values.next().get();
    66             }
    67             output.collect(key,new IntWritable(sum));
    68         }
    69     }
    70     
    71     
    72     
    73 }
  • 相关阅读:
    python全栈开发_day41_数据库5
    python全栈开发_day40_数据库4
    python全栈开发_day39_数据库3
    python全栈开发_day38_数据库2
    python全栈开发_day37_数据库1
    python全栈开发_day36_高并发,协程
    python全栈开发_day35_锁,进程池,线程池
    python全栈开发_day34_线程了解知识点
    python全栈开发_day33_关于进程的了解知识
    python全栈开发_day32_粘包分析,socketserver和多道技术
  • 原文地址:https://www.cnblogs.com/6tian/p/3829188.html
Copyright © 2011-2022 走看看