zoukankan      html  css  js  c++  java
  • hadoop的WordCount样例

    package cn.lmj.mapreduce;


    import java.io.IOException;
    import java.util.Iterator;


    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapred.FileInputFormat;
    import org.apache.hadoop.mapred.FileOutputFormat;
    import org.apache.hadoop.mapred.JobClient;
    import org.apache.hadoop.mapred.JobConf;
    import org.apache.hadoop.mapred.MapReduceBase;
    import org.apache.hadoop.mapred.Mapper;
    import org.apache.hadoop.mapred.OutputCollector;
    import org.apache.hadoop.mapred.Reducer;
    import org.apache.hadoop.mapred.Reporter;
    import org.apache.hadoop.mapred.TextInputFormat;
    import org.apache.hadoop.mapred.TextOutputFormat;


    public class WordCount

    {

    //mapper

    public static class WordCountMapper extends MapReduceBase implements Mapper<LongWritable,Text,Text,LongWritable>
    {
    LongWritable count = new LongWritable(1);
    Text content = new Text();
    @Override
    public void map(LongWritable key, Text value,
    OutputCollector<Text, LongWritable> output, Reporter report)
    throws IOException

    {

    //切割字符串

    String str = value.toString();
    String[] arr = str.split(" ");
    for(String s : arr)
    {
    content.set(s);
    output.collect(content,count);
    }
    }
    }

    //reducer

    public static class WordCountReduce extends MapReduceBase implements Reducer<Text,LongWritable,Text,LongWritable>
    {
    @Override
    public void reduce(Text key, Iterator<LongWritable> values,
    OutputCollector<Text, LongWritable> output, Reporter rep)
    throws IOException

    {

    //将同样key的value累加

    long sum = 0;
    while(values.hasNext())
    {
    sum+=values.next().get();
    }
    output.collect(key,new LongWritable(sum));
    }
    }


    public static void main(String[] args) throws Exception

    {

    //创建一个JobConf

    JobConf conf = new JobConf(WordCount2.class);
    conf.setJobName("lmj");

    //设置输出类型

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(LongWritable.class);

    //设置MapCombineReduce处理类

    conf.setMapperClass(WordCountMapper.class);
    conf.setCombinerClass(WordCountReduce.class);
    conf.setReducerClass(WordCountReduce.class);

    //设置输入类型

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    //设置输入和输出文件夹

    FileInputFormat.setInputPaths(conf,new Path("/aaa/hadoop.txt"));
    FileOutputFormat.setOutputPath(conf,new Path("/aaa/output"));

    //启动jobConf

    JobClient.runJob(conf);
    }
    }
  • 相关阅读:
    wikioi-1039-数的划分
    BNUOJ27873:A Special "Happy Birthday" Song!!!
    BaseAdapter 注意的关键点!
    装饰器模式
    【Leetcode】Same Tree
    科学-科研装置:大科学装置
    汉语-词语:言情
    Error-ONS-ASP.NET-IIS:0x000007FEFC8ABE0D (KernelBase.dll) (w3wp.exe 中)处有未经处理的异常: 0xE0434352
    协议-网络-TCP/IP:Telnet
    Error-ONS-ASP.NET-ISS:0x000007FEFD04BE0D (KernelBase.dll) (w3wp.exe 中)处有未经处理的异常: 0xE0434352
  • 原文地址:https://www.cnblogs.com/mengfanrong/p/3860304.html
Copyright © 2011-2022 走看看