zoukankan      html  css  js  c++  java
  • 第一个WordCount类运行

    import java.io.IOException;
    import java.util.*;

    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.conf.*;
    import org.apache.hadoop.io.*;
    import org.apache.hadoop.mapreduce.*;
    import org.apache.hadoop.mapreduce.lib.input.*;
    import org.apache.hadoop.mapreduce.lib.output.*;
    import org.apache.hadoop.util.*;

    public class WordCount extends Configured implements Tool {
     public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
          private final static IntWritable one = new IntWritable(1);
          private Text word = new Text();
          public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();
            StringTokenizer tokenizer = new StringTokenizer(line);
            while (tokenizer.hasMoreTokens()) {
              word.set(tokenizer.nextToken());
              context.write(word, one);
            }
          }
        }
     
     public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
    public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
         int sum = 0;
          for (IntWritable val : values) {
            sum += val.get();
          }
          context.write(key, new IntWritable(sum));
     }
    }
     
    public int run(String [] args) throws Exception {
         Job job = new Job(getConf());
         job.setJarByClass(WordCount.class);
         job.setJobName("wordcount");

         job.setOutputKeyClass(Text.class);
         job.setOutputValueClass(IntWritable.class);
     
         job.setMapperClass(Map.class);
         job.setReducerClass(Reduce.class);

         job.setInputFormatClass(TextInputFormat.class);
         job.setOutputFormatClass(TextOutputFormat.class);

         FileInputFormat.setInputPaths(job, new Path(args[0]));
         FileOutputFormat.setOutputPath(job, new Path(args[1]));

         boolean success = job.waitForCompletion(true);
         return success ? 0 : 1;
    }
     
       public static void main(String[] args) throws Exception {
          int ret = ToolRunner.run(new WordCount(), args);
          System.exit(ret);
       }
    }

    新建成WordCount.java ,  把上面代码拷贝进去。

    然后编译,打成jar包。

    然后新建  touch file01  ,里面写入hello world bye world

    touch file02  ,  里面写入hello hadoop bye hadoop

    然后   hadoop dfs -put  file0*  input  ,放进HDFS文件系统中

    然后运行hadoop  jar   WordCount.jar   WordCount  input output

    输出结果:

    bye 2

    hadoop 2

    hello2

    world 2

  • 相关阅读:
    Redhat7.x静默安装19C客户端
    利用增量备份修复DG备库中的gap>>>>>>>>>>>有新增数据文件
    利用增量备份修复DG备库中的gap>>>>>>>>>>>无新增数据文件
    ORA-01665 control file is not a standby control file
    ORA-01110 ORA-01122 ORA-01110 ORA-01200解决办法
    Zabbix5.0+Grafana可视化部署教程
    RedHat 7.5配置bonding双网卡绑定(转)
    11.2.0.1 RAC环境部分磁盘组无法自动挂载,导致数据库实例无法启动(转)
    11.2.0.1 RAC环境经典bug CRS-4124: Oracle High Availability Services startup failed.
    Git配置SSH及常用命令
  • 原文地址:https://www.cnblogs.com/baoendemao/p/3804821.html
Copyright © 2011-2022 走看看