zoukankan      html  css  js  c++  java
  • hadoop简单例子

    在网上看见一个见得的hadoop例子,给要学习的大家分享一下

    package com.run.ayena.distributed.test;  
    
    002    
    
    003 import java.io.IOException;  
    
    004 import java.util.StringTokenizer;  
    
    005    
    
    006 import org.apache.hadoop.conf.Configuration;  
    
    007 import org.apache.hadoop.fs.Path;  
    
    008 import org.apache.hadoop.io.IntWritable;  
    
    009 import org.apache.hadoop.io.Text;  
    
    010 import org.apache.hadoop.mapreduce.Job;  
    
    011 import org.apache.hadoop.mapreduce.Mapper;  
    
    012 import org.apache.hadoop.mapreduce.Reducer;  
    
    013 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
    
    014 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  
    
    015 import org.apache.hadoop.util.GenericOptionsParser;  
    
    016    
    
    017 ////统计文本中指定某个单词出现的次数  
    
    018    
    
    019    
    
    020    
    
    021 public class SingleWordCount {  
    
    022     public static class SingleWordCountMapper extends 
    
    023             Mapper<Object, Text, Text, IntWritable> {  
    
    024    
    
    025         private final static IntWritable one = new IntWritable(1);  
    
    026         private Text val = new Text();  
    
    027    
    
    028         public void map(Object key, Text value, Context context)  
    
    029                 throws IOException, InterruptedException {  
    
    030             StringTokenizer itr = new StringTokenizer(value.toString());  
    
    031             String keyword = context.getConfiguration().get("word");  
    
    032             while (itr.hasMoreTokens()) {  
    
    033                 String nextkey = itr.nextToken();  
    
    034                 if (nextkey.trim().equals(keyword)) {  
    
    035                     val.set(nextkey);  
    
    036                     context.write(val, one);  
    
    037                 } else {  
    
    038                     // do nothing  
    
    039                 }  
    
    040             }  
    
    041         }  
    
    042     }  
    
    043    
    
    044     public static class SingleWordCountReducer extends 
    
    045             Reducer<Text,IntWritable,Text,IntWritable> {  
    
    046         private IntWritable result = new IntWritable();  
    
    047    
    
    048         public void reduce(Text key, Iterable<IntWritable> values,  
    
    049                 Context context) throws IOException, InterruptedException {  
    
    050             int sum = 0;  
    
    051             for (IntWritable val : values) {  
    
    052                 sum += val.get();  
    
    053             }  
    
    054             result.set(sum);  
    
    055             context.write(key, result);  
    
    056         }  
    
    057     }  
    
    058    
    
    059     public static void main(String[] args) throws Exception {  
    
    060         Configuration conf = new Configuration();  
    
    061         String[] otherArgs = new GenericOptionsParser(conf, args)  
    
    062                 .getRemainingArgs();  
    
    063         if (otherArgs.length != 3) {  
    
    064             System.err.println("Usage: wordcount  ");  
    
    065             System.exit(2);  
    
    066         }  
    
    067    
    
    068    
    
    069         // 输入指定的单词  
    
    070         conf.set("word", otherArgs[2]);  
    
    071    
    
    072         // 指定系统路  
    
    073    
    
    074         conf.set("mapred.system.dir", "/cygdrive/e/workspace_hadoop/SingleWordCount/");  
    
    075    
    
    076         // 设置运行的job名称  
    
    077         Job job = new Job(conf, "word count");  
    
    078    
    
    079         // 设置运行的job类  
    
    080         job.setJarByClass(SingleWordCount.class);  
    
    081    
    
    082         // 设置Mapper  
    
    083         job.setMapperClass(SingleWordCountMapper.class);  
    
    084    
    
    085         // 设置本地聚合类,该例本地聚合类同Reduer类  
    
    086         job.setCombinerClass(SingleWordCountReducer.class);  
    
    087    
    
    088         // 设置Reduer  
    
    089         job.setReducerClass(SingleWordCountReducer.class);  
    
    090    
    
    091         // 设置Map的输出  
    
    092         job.setMapOutputKeyClass(Text.class);  
    
    093         job.setMapOutputValueClass(IntWritable.class);  
    
    094    
    
    095         // 设置Reducer输出的key类型  
    
    096         job.setOutputKeyClass(Text.class);  
    
    097         // 设置Reducer输出的value类型  
    
    098         job.setOutputValueClass(IntWritable.class);  
    
    099    
    
    100         // 设置输入和输出的目录  
    
    101         FileInputFormat.addInputPath(job, new Path(otherArgs[0]));  
    
    102         FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));  
    
    103    
    
    104         // 执行,直到结束就退出  
    
    105         System.exit(job.waitForCompletion(true) ? 0 : 1);  
    
    106    
    
    107    
    
    108    
    
    109     }  
    
    110 } 
  • 相关阅读:
    codeforces-1144 (div3)
    codeforces-1142 (div1)
    codeforces-1131 (div2)
    codeforces-1132 (div2)
    [HAOI2006]均分数据
    Ellipsoid
    [JSOI2004]平衡点 / 吊打XXX
    CF208E Blood Cousins
    CF570D Tree Requests
    CF600E Lomsat gelral
  • 原文地址:https://www.cnblogs.com/xiehaoyu/p/3419801.html
Copyright © 2011-2022 走看看