zoukankan      html  css  js  c++  java
  • Hadoop AWS Word Count 样例

    在AWS里用Elastic Map Reduce 开一个Cluster

    然后登陆master node并编译下面程序:


    import java.io.IOException;
    import java.util.StringTokenizer;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    
    public class WordCount {
    
    	public static class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
    
    		private final IntWritable one = new IntWritable(1);
    		private Text word = new Text();
    		
    		@Override
    		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    			String line = value.toString();
    			StringTokenizer tokenizer = new StringTokenizer(line);
    			while(tokenizer.hasMoreTokens()) {
    				word.set(tokenizer.nextToken());
    				context.write(word, one);
    			}
    		}
    		
    	}
    	
    	public static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
    
    		@Override
    		protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
    			int sum = 0;
    			for(IntWritable value : values) {
    				sum += value.get();
    			}
    			context.write(key, new IntWritable(sum));
    		}
    
    	}
    	
    	
    	
    	public static void main(String[] args) throws Exception {
    		Configuration conf = new Configuration();
    		Job job = new Job(conf, "Word Count hadoop-0.20");
    	      
            //setting the class names
            job.setJarByClass(WordCount.class);
            job.setMapperClass(WordCountMapper.class);
            job.setReducerClass(WordCountReducer.class);
    
            //setting the output data type classes
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
    
            //to accept the hdfs input and outpur dir at run time
            FileInputFormat.addInputPath(job, new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1]));
    		
            System.exit(job.waitForCompletion(true) ?

    0 : 1); } }



    设置:

    export CLASSPATH=$CLASSPATH:/home/hadoop/*:/home/hadoop/lib/*:'.'

    javac WordCount.java

    jar cvf WordCount.jar *.class

    hadoop jar WordCount.jar WordCount s3://15-319-s13/book-dataset/pg_00 /output

    执行成功后,由于output目录在Hadoop FS下,所以能够这样查看:

    hadoop fs -cat /output/part-r-00000  | less



    主要參考:

    http://kickstarthadoop.blogspot.com/2011/04/word-count-hadoop-map-reduce-example.html

    http://kickstarthadoop.blogspot.com/2011/05/word-count-example-with-hadoop-020.html

  • 相关阅读:
    demo12-回到顶部
    demo11-友情链接
    demo10-超链接标签
    demo09-程序员练习
    demo08-图片标签
    demo07-盒子标签
    demo06-字体标签
    demo05-换行标签
    转&nbsp;j2ee&nbsp;.线程池.对象池,连接池
    几种开源Java&nbsp;Web容器线程池…
  • 原文地址:https://www.cnblogs.com/clnchanpin/p/7112750.html
Copyright © 2011-2022 走看看