zoukankan      html  css  js  c++  java
  • 编译hadoop版的hello,world

    cd ~/src
    mkdir classes
    javac -classpath ~/hadoop-0.20.2/hadoop-0.20.2-core.jar WordCount.java -d classes
    jar -cvf WordCount.jar -C classes/ .
    hadoop jar WordCount.jar com.codestyle.hadoop.WordCount input output
    hadoop fs -ls output
    hadoop fs -cat output/part-00000

    要点:

    编译WordCount.java时必须通过classpath指定hadoop的库文件。指定源码输出到classes目录

    打包class文件成为jar文件

    通过hadoop调用jar文件执行MapReduce, 内容输出到output目录 (如果该目录存在,则要先删掉这个目录)在命令参数中必须指定包名+类名


    WordCount.java

    package com.codestyle.hadoop;
    
    import java.io.IOException;
    import java.util.*;
    
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.conf.*;
    import org.apache.hadoop.io.*;
    import org.apache.hadoop.mapred.*;
    import org.apache.hadoop.util.*;
    
    public class WordCount {
    
       public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
         private final static IntWritable one = new IntWritable(1);
         private Text word = new Text();
    
         public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
           String line = value.toString();
           StringTokenizer tokenizer = new StringTokenizer(line);
           while (tokenizer.hasMoreTokens()) {
             word.set(tokenizer.nextToken());
             output.collect(word, one);
           }
         }
       }
    
       public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
         public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
           int sum = 0;
           while (values.hasNext()) {
             sum += values.next().get();
           }
           output.collect(key, new IntWritable(sum));
         }
       }
    
       public static void main(String[] args) throws Exception {
         JobConf conf = new JobConf(WordCount.class);
         conf.setJobName("wordcount");
    
         conf.setOutputKeyClass(Text.class);
         conf.setOutputValueClass(IntWritable.class);
    
         conf.setMapperClass(Map.class);
         conf.setReducerClass(Reduce.class);
    
         conf.setInputFormat(TextInputFormat.class);
         conf.setOutputFormat(TextOutputFormat.class);
    
         FileInputFormat.setInputPaths(conf, new Path(args[0]));
         FileOutputFormat.setOutputPath(conf, new Path(args[1]));
    
         JobClient.runJob(conf);
       }
    }

    查看执行结果

    lishujun@lishujun-virtual-machine:~/src$ hadoop fs -cat output/part-00000
    Hadoop    1
    Hello    2
    World    1

    参考资料:

    http://www.cnblogs.com/xia520pi/archive/2012/05/16/2504205.html

    http://blog.csdn.net/xw13106209/article/details/6862480

    http://blog.csdn.net/turkeyzhou/article/details/8121601

  • 相关阅读:
    POJ 3630 Phone List | Trie 树
    POJ 3974 Palindrome | 马拉车模板
    POJ 3422 Kaka's Matrix Travels | 最小费用最大流
    POJ 2195 Going Home | 带权二分图匹配
    POJ 3068 "Shortest" pair of paths | 最小费用最大流
    POJ 3686 The Windy's | 最小费用最大流
    洛谷 最小费用最大流 模板 P3381
    POJ 2987 Firing | 最大权闭合团
    POJ 3469 Dual Core CPU | 最小割
    POJ 3281 Dining | 最大流
  • 原文地址:https://www.cnblogs.com/code-style/p/3737035.html
Copyright © 2011-2022 走看看