zoukankan      html  css  js  c++  java
  • 第一个WordCount类运行

    import java.io.IOException;
    import java.util.*;

    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.conf.*;
    import org.apache.hadoop.io.*;
    import org.apache.hadoop.mapreduce.*;
    import org.apache.hadoop.mapreduce.lib.input.*;
    import org.apache.hadoop.mapreduce.lib.output.*;
    import org.apache.hadoop.util.*;

    public class WordCount extends Configured implements Tool {
     public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
          private final static IntWritable one = new IntWritable(1);
          private Text word = new Text();
          public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();
            StringTokenizer tokenizer = new StringTokenizer(line);
            while (tokenizer.hasMoreTokens()) {
              word.set(tokenizer.nextToken());
              context.write(word, one);
            }
          }
        }
     
     public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
    public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
         int sum = 0;
          for (IntWritable val : values) {
            sum += val.get();
          }
          context.write(key, new IntWritable(sum));
     }
    }
     
    public int run(String [] args) throws Exception {
         Job job = new Job(getConf());
         job.setJarByClass(WordCount.class);
         job.setJobName("wordcount");

         job.setOutputKeyClass(Text.class);
         job.setOutputValueClass(IntWritable.class);
     
         job.setMapperClass(Map.class);
         job.setReducerClass(Reduce.class);

         job.setInputFormatClass(TextInputFormat.class);
         job.setOutputFormatClass(TextOutputFormat.class);

         FileInputFormat.setInputPaths(job, new Path(args[0]));
         FileOutputFormat.setOutputPath(job, new Path(args[1]));

         boolean success = job.waitForCompletion(true);
         return success ? 0 : 1;
    }
     
       public static void main(String[] args) throws Exception {
          int ret = ToolRunner.run(new WordCount(), args);
          System.exit(ret);
       }
    }

    新建成WordCount.java ,  把上面代码拷贝进去。

    然后编译,打成jar包。

    然后新建  touch file01  ,里面写入hello world bye world

    touch file02  ,  里面写入hello hadoop bye hadoop

    然后   hadoop dfs -put  file0*  input  ,放进HDFS文件系统中

    然后运行hadoop  jar   WordCount.jar   WordCount  input output

    输出结果:

    bye 2

    hadoop 2

    hello2

    world 2

  • 相关阅读:
    关于VS2010出现“此方法显式使用的 CAS 策略已被 .NET Framework 弃用... ...请使用 NetFx40_LegacySecurityPolicy 配置开关”解决办法
    数据挖掘---Pandas的学习
    数据挖掘---Numpy的学习
    数据挖掘---Matplotib的学习
    AI学习---数据IO操作&神经网络基础
    AI学习---基于TensorFlow的案例[实现线性回归的训练]
    AI学习---卷积神经网络
    AI学习---数据读取&神经网络
    AI学习---TensorFlow框架介绍[图+会话+张量+变量OP+API]
    AI学习---深度学习&TensorFlow安装
  • 原文地址:https://www.cnblogs.com/baoendemao/p/3804821.html
Copyright © 2011-2022 走看看