zoukankan      html  css  js  c++  java
  • Hadoop之Mapreduce 程序

    package com.gylhaut.hadoop.senior.mapreduce;
    
    import java.io.IOException;
    import java.util.StringTokenizer;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    /**
     * Shift +Alt +S 快捷键用法
     *
     */
    public class WordCount {
    	// step 1:Map Class
    	public static class WordCountMapper extends
    			Mapper<LongWritable, Text, Text, IntWritable> {
    		private final static IntWritable one = new IntWritable(1);
    		private Text word = new Text();
    
    		@Override
    		public void map(LongWritable key, Text value, Context context)
    				throws IOException, InterruptedException {
    			StringTokenizer itr = new StringTokenizer(value.toString());
    			while (itr.hasMoreTokens()) {
    				word.set(itr.nextToken());
    				context.write(word, one);
    			}
    		}
    	}
    
    	// step 2: Reduce Class
    	public static class WordCountReducer extends
    			Reducer<Text, IntWritable, Text, IntWritable> {
    		private IntWritable result = new IntWritable();
    
    		@Override
    		public void reduce(Text key, Iterable<IntWritable> values,
    				Context context) throws IOException, InterruptedException {
    
    			int sum = 0;
    			for (IntWritable val : values) {
    				sum += val.get();
    			}
    			result.set(sum);
    			context.write(key, result);
    		}
    	}
    
    	// step 3: Driver, component job
    	public int run(String[] args) throws Exception {
    		// 1.get configuration
    		Configuration configuration = new Configuration();
    		// 2:create job
    		Job job = Job.getInstance(configuration, this.getClass()
    				.getSimpleName());
    		// run jar
    		job.setJarByClass(this.getClass());
    		// 3.set job
    		// input ->map ->reduce->output
    		// 3.1 input
    		Path inPath = new Path(args[0]);
    		FileInputFormat.addInputPath(job, inPath);
    		// 3.2 map
    		job.setMapperClass(WordCountMapper.class);
    		// 设置map 输出类型
    		job.setMapOutputKeyClass(Text.class);
    		job.setMapOutputValueClass(IntWritable.class);
    		// 3.3 reduce
    		job.setReducerClass(WordCountReducer.class);
    		// 设置reduce 输出类型
    		job.setOutputKeyClass(Text.class);
    		job.setOutputValueClass(IntWritable.class);
    		// 3.4 output
    		Path outPath = new Path(args[1]);
    		FileOutputFormat.setOutputPath(job, outPath);
    		// 4.submit job
    		boolean isSuccess = job.waitForCompletion(true);
    
    		return isSuccess ? 0 : 1;
    
    	}
    
    	public static void main(String[] args) throws Exception {
    		int status = new WordCount().run(args);
    		System.exit(status);
    	}
    }
    

      

  • 相关阅读:
    电商总结(二)日志与监控系统的解决方案
    倾力推荐,哪一本让你想要加入书单
    电商总结(一)小型电商网站的架构
    聊一聊如何提升团队开发效率
    Nancy总结(三)Nancy资料介绍
    【推荐】2016年不得不读的九本好书
    Solr学习总结(七)Solr搜索引擎的整体架构
    再见 2015,你好 2016
    Solr学习总结(六)SolrNet的高级用法(复杂查询,分页,高亮,Facet查询)
    Solr学习总结(五)SolrNet的基本用法及CURD
  • 原文地址:https://www.cnblogs.com/gylhaut/p/10106059.html
Copyright © 2011-2022 走看看