zoukankan      html  css  js  c++  java
  • MapReduce的自定义分组

      1 package com.mengyao.hadoop.mapreduce;
      2 
      3 import java.io.IOException;
      4 import java.text.SimpleDateFormat;
      5 import java.util.Date;
      6 
      7 import org.apache.hadoop.conf.Configuration;
      8 import org.apache.hadoop.conf.Configured;
      9 import org.apache.hadoop.fs.Path;
     10 import org.apache.hadoop.io.LongWritable;
     11 import org.apache.hadoop.io.Text;
     12 import org.apache.hadoop.mapreduce.Job;
     13 import org.apache.hadoop.mapreduce.Mapper;
     14 import org.apache.hadoop.mapreduce.Reducer;
     15 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
     16 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
     17 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
     18 import org.apache.hadoop.util.Tool;
     19 import org.apache.hadoop.util.ToolRunner;
     20 
     21 
     22 public class MyGroupApp extends Configured implements Tool {
     23 
     24     static class MyGroupMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
     25         
     26         private Text k = null;
     27         private LongWritable v = null;
     28         
     29         @Override
     30         protected void setup(
     31                 Mapper<LongWritable, Text, Text, LongWritable>.Context context)
     32                 throws IOException, InterruptedException {
     33             k = new Text();
     34             v = new LongWritable(1L);
     35         }
     36 
     37         @Override
     38         protected void map(LongWritable key, Text value, Context context)
     39                 throws IOException, InterruptedException {
     40             final String[] words = value.toString().split("\t");
     41             for (String word : words) {
     42                 k.set(word);
     43                 context.write(k, v);
     44             }
     45         }
     46     }
     47     
     48     static class MyGroupReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
     49         @Override
     50         protected void reduce(Text key, Iterable<LongWritable> value, Context context)
     51                 throws IOException, InterruptedException {
     52             long count = 0L;
     53             for (LongWritable item : value) {
     54                 count += item.get();
     55             }
     56             context.write(key, new LongWritable(count));
     57         }
     58     }
     59     
     60     @Override
     61     public int run(String[] arg0) throws Exception {
     62         Configuration conf = getConf();
     63         conf.set("mapreduce.job.jvm.numtasks", "-1");        
     64         conf.set("mapreduce.map.speculative", "false");        
     65         conf.set("mapreduce.reduce.speculative", "false");    
     66         conf.set("mapreduce.map.maxattempts", "4");            
     67         conf.set("mapreduce.reduce.maxattempts", "4");        
     68         conf.set("mapreduce.map.skip.maxrecords", "0");        
     69         Job job = Job.getInstance(conf, MyGroupApp.class.getSimpleName());
     70         job.setJarByClass(MyGroupApp.class);
     71         job.setInputFormatClass(TextInputFormat.class);
     72         
     73         FileInputFormat.addInputPath(job, new Path(arg0[0]));
     74         FileOutputFormat.setOutputPath(job, new Path(arg0[1]));
     75         
     76         job.setMapperClass(MyGroupMapper.class);
     77         job.setMapOutputKeyClass(Text.class);
     78         job.setMapOutputValueClass(LongWritable.class);
     79         
     80         job.setReducerClass(MyGroupReducer.class);
     81         job.setOutputKeyClass(Text.class);
     82         job.setOutputValueClass(LongWritable.class);
     83         
     84         return job.waitForCompletion(true)?0:1;
     85     }
     86     
     87     
     88     public static int createJob(String[] args) {
     89         Configuration conf = new Configuration();
     90         int status = 1;
     91         try {
     92             status = ToolRunner.run(conf, new MyGroupApp(), args);
     93         } catch (Exception e) {
     94             e.printStackTrace();
     95             throw new RuntimeException(e);
     96         }
     97         
     98         return status;
     99     }
    100     
    101     public static void main(String[] args) throws Exception {
    102         //此处用ant直接编译打包上传运行,先行赋值
    103         args = new String[]{"/testdata/words", "/job/mapreduce/"+WordCountApp.class.getSimpleName()+"_"+new SimpleDateFormat("yyyyMMddhhMMss").format(new Date())};
    104         if (args.length != 2) {
    105             System.out.println("Usage: "+WordCountApp.class.getSimpleName()+" <in> <out>");
    106             System.exit(2);
    107         } else {
    108             int status = createJob(args);
    109             System.exit(status);
    110         }
    111 
    112     }
    113 
    114 }
  • 相关阅读:
    python 3.6.2 使用VScode 安装requests包
    JExcel读取Excel,完成随机数对应学号,随机选取学生姓名
    HashMap和Map的道道
    腾讯云主机windows 2012 r2搭建ftp服务器
    关于网站域名访问出现welcome to nginx 的问题
    腾讯云主机windows 2012 r2标准版&&搭建java环境(jdk+tomcat)
    Anaconda 安装+使用+换源+更新
    已知三点求平面方程、平面法向量和点到平面的距离
    Open Cascade:计算体积
    VS2015安装QT插件
  • 原文地址:https://www.cnblogs.com/mengyao/p/4865584.html
Copyright © 2011-2022 走看看