zoukankan      html  css  js  c++  java
  • hadoop-mapreduce-wordcount-api

     

    
    
    /**
     * 
     */
    package com.lxl.hadoop.mr;
    
    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    /**
     * Description:
     * 
     * @author LXL
     * @date 2019年5月23日
     */
    public class MyWC {
    
        public static void main(String[] args) throws Exception {
    
            Configuration conf = new Configuration(true);
    
            Job job = Job.getInstance(conf);
    
            // Create a new Job
            // Job job = Job.getInstance();
            job.setJarByClass(MyWC.class);
    
            // Specify various job-specific parameters
            job.setJobName("ooxx");
    
            
            
            //输入
            // job.setInputPath(new Path("in"));
            // job.setOutputPath(new Path("out"));
            
            Path input = new Path("/user/root/test.txt");
            FileInputFormat.addInputPath(job, input );
            
            
            //输出
            Path output = new Path("/data/wc/output");
            
            if(output.getFileSystem(conf).exists(output)){
                output.getFileSystem(conf).delete(output, true);
            }
            
            FileOutputFormat.setOutputPath(job, output);
            
            
    
            job.setMapperClass(MyMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
    
            job.setReducerClass(MyReducer.class);
    
            // Submit the job, then poll for progress until the job is complete
            job.waitForCompletion(true);
    
        }
    
    }
    
    
    


    /**
    * */ package com.lxl.hadoop.mr; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; /** * Description: * * @author LXL * @date 2019年5月24日 */ public class MyMapper extends Mapper<Object, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(Object key, Text value, Context context) throws IOException, InterruptedException { // hello sxt 102 StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } } }

    /**
     * 
     */
    package com.lxl.hadoop.mr;
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    /**
     * Description:
     * 
     * @author LXL
     * @date 2019年5月24日
     */
    public class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
    
        // 相同的key为一组。。调用一次reduce方法,在方法内迭代这一组数据,进行计算:sum count max min.....
    
        private IntWritable result = new IntWritable();
    
        public void reduce(Text key, Iterable<IntWritable> values, Context context)
                throws IOException, InterruptedException {
    
            
            //hello 1
            //hello 1
            //.....
            
            //key: hello
            //values:(1,1,...)
            
            int sum = 0;
            for (IntWritable val : values) {
                sum += val.get();
            }
            result.set(sum);
            context.write(key, result);
        }
    
    }
    
    
    
     

    导出JAR包:

     

    需要计算文件的目录位置:

    执行jar包:

    修改自己写的代码。重新上传jar包

    重新执行:

     

  • 相关阅读:
    YII2操作mongodb笔记(转)
    MongoDB创建数据库和集合命令db.createCollection详解(转)
    MongoDB设置访问权限、设置用户(转)
    使用Robomongo 连接MongoDB 3.x 报 Authorization failed 解决办法(转)
    Yii2框架与MongoDB拓展、Redis拓展的安装流程
    YII2 Model 类切换数据库连接
    Centos6.5搭建java开发环境
    YII切换开发和生产环境(命令)
    YII2 实现后台操作记录日志(转)
    phpstorm2018激活方法--使用激活码
  • 原文地址:https://www.cnblogs.com/LXL616/p/10915797.html
Copyright © 2011-2022 走看看