zoukankan      html  css  js  c++  java
  • 编译hadoop版的hello,world

    cd ~/src
    mkdir classes
    javac -classpath ~/hadoop-0.20.2/hadoop-0.20.2-core.jar WordCount.java -d classes
    jar -cvf WordCount.jar -C classes/ .
    hadoop jar WordCount.jar com.codestyle.hadoop.WordCount input output
    hadoop fs -ls output
    hadoop fs -cat output/part-00000

    要点:

    编译WordCount.java时必须通过classpath指定hadoop的库文件。指定源码输出到classes目录

    打包class文件成为jar文件

    通过hadoop调用jar文件执行MapReduce, 内容输出到output目录 (如果该目录存在,则要先删掉这个目录)在命令参数中必须指定包名+类名


    WordCount.java

    package com.codestyle.hadoop;
    
    import java.io.IOException;
    import java.util.*;
    
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.conf.*;
    import org.apache.hadoop.io.*;
    import org.apache.hadoop.mapred.*;
    import org.apache.hadoop.util.*;
    
    public class WordCount {
    
       public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
         private final static IntWritable one = new IntWritable(1);
         private Text word = new Text();
    
         public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
           String line = value.toString();
           StringTokenizer tokenizer = new StringTokenizer(line);
           while (tokenizer.hasMoreTokens()) {
             word.set(tokenizer.nextToken());
             output.collect(word, one);
           }
         }
       }
    
       public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
         public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
           int sum = 0;
           while (values.hasNext()) {
             sum += values.next().get();
           }
           output.collect(key, new IntWritable(sum));
         }
       }
    
       public static void main(String[] args) throws Exception {
         JobConf conf = new JobConf(WordCount.class);
         conf.setJobName("wordcount");
    
         conf.setOutputKeyClass(Text.class);
         conf.setOutputValueClass(IntWritable.class);
    
         conf.setMapperClass(Map.class);
         conf.setReducerClass(Reduce.class);
    
         conf.setInputFormat(TextInputFormat.class);
         conf.setOutputFormat(TextOutputFormat.class);
    
         FileInputFormat.setInputPaths(conf, new Path(args[0]));
         FileOutputFormat.setOutputPath(conf, new Path(args[1]));
    
         JobClient.runJob(conf);
       }
    }

    查看执行结果

    lishujun@lishujun-virtual-machine:~/src$ hadoop fs -cat output/part-00000
    Hadoop    1
    Hello    2
    World    1

    参考资料:

    http://www.cnblogs.com/xia520pi/archive/2012/05/16/2504205.html

    http://blog.csdn.net/xw13106209/article/details/6862480

    http://blog.csdn.net/turkeyzhou/article/details/8121601

  • 相关阅读:
    LeetCode 461. Hamming Distance
    LeetCode 442. Find All Duplicates in an Array
    LeetCode 448. Find All Numbers Disappeared in an Array
    LeetCode Find the Difference
    LeetCode 415. Add Strings
    LeetCode 445. Add Two Numbers II
    LeetCode 438. Find All Anagrams in a String
    LeetCode 463. Island Perimeter
    LeetCode 362. Design Hit Counter
    LeetCode 359. Logger Rate Limiter
  • 原文地址:https://www.cnblogs.com/code-style/p/3737035.html
Copyright © 2011-2022 走看看