zoukankan      html  css  js  c++  java
  • 用eclipse编写Hadoop程序

    前提:
    eclipse与hadoop的配置成功
    总结:
    1.创建一个hadoop项目
      导入hadoop包: hadoop-0.20.2-core.jar hadoop-0.20.2-ant.jar hadoop-0.20.2-tools.jar
    2.创建一个WordCount.java
    源码
    import java.io.IOException;
    import java.util.StringTokenizer;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    public class WordCount {
     public static class TokenizerMapper extends Mapper<LongWritable, Text, Text, IntWritable>
     {
      private final static IntWritable one = new IntWritable(1);
      private Text word = new Text();
      public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
         StringTokenizer itr = new StringTokenizer(value.toString());
         while (itr.hasMoreTokens()) {
          word.set(itr.nextToken());
          context.write(word, one);
          }
         }
      }
     public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable>
     {
      private IntWritable result = new IntWritable();
      public void reduce(Text key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
       int sum = 0;
       for (IntWritable val : values) {
        sum += val.get();
        }
       result.set(sum);
       context.write(key, result);
       }
      }
     public static void main(String[] args) throws Exception {
      Configuration conf = new Configuration();
      if (args.length != 2) {
       System.err.println("Usage: wordcount  ");
       System.exit(2);
       }
      conf.set("hadoop.job.ugi", "root,chenbo");
      conf.set("mapred.system.dir", "/hadoopdata/mapred/system");
      Job job = new Job(conf, "word count");
      job.setJarByClass(WordCount.class);
      job.setMapperClass(TokenizerMapper.class);
      job.setReducerClass(IntSumReducer.class);
      job.setMapOutputKeyClass(Text.class);
      job.setMapOutputValueClass(IntWritable.class);
      job.setOutputKeyClass(Text.class);
      job.setOutputValueClass(IntWritable.class);
      FileInputFormat.addInputPath(job, new Path(args[0]));
      FileOutputFormat.setOutputPath(job, new Path(args[1]));
      System.exit(job.waitForCompletion(true) ? 0 : 1);
      }
     }

    3.编译WordCount.java
      javac -classpath /jz/hadoop-0.20.2/hadoop-0.20.2-core.jar WordCount.java -d /Home/chenbo/code/WordCount
      生成三个class文件 WordCount.class,WordCount$Map.class,WordCount$Reduce.class
    4.生成WordCount.jar
      进入/Home/chenbo/code/WordCount目录
      jar cvf WordCount.jar *.class
    5.引用WordCount
       Hadoop jar WordCount.jar WordCount in out
     
    今天有收获,GO ON!
  • 相关阅读:
    【Uvalive4960】 Sensor network (苗条树,进化版)
    【UVA 1151】 Buy or Build (有某些特别的东东的最小生成树)
    【UVA 1395】 Slim Span (苗条树)
    【UVA 10600】 ACM Contest and Blackout(最小生成树和次小生成树)
    【UVA 10369】 Arctic Network (最小生成树)
    【UVA 10816】 Travel in Desert (最小瓶颈树+最短路)
    【UVA 11183】 Teen Girl Squad (定根MDST)
    【UVA 11865】 Stream My Contest (二分+MDST最小树形图)
    【UVA 11354】 Bond (最小瓶颈生成树、树上倍增)
    【LA 5713 】 Qin Shi Huang's National Road System (MST)
  • 原文地址:https://www.cnblogs.com/bobsoft/p/2714492.html
Copyright © 2011-2022 走看看