zoukankan      html  css  js  c++  java
  • 使用eclipse的快捷键自动生成的map或者reduce函数的参数中:“org.apache.hadoop.mapreduce.Reducer.Context context”

    今天在测试mapreduce的程序时,就是简单的去重,对照课本上的程序和自己的程序,唯一不同的就是“org.apache.hadoop.mapreduce.Reducer.Context context”,我写的程序如下:

    package com.pro.bq;
    
    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.util.GenericOptionsParser;
    
    public class Dedup {
        public static class Map extends Mapper<Object,Text, Text, Text>{
            private Text line=new Text();
      
        @Override
        protected void map(Object key, Text value,
                Context context)
                throws IOException, InterruptedException {
            // TODO Auto-generated method stub
              line=value;
              context.write(line, new Text(""));
            
        }
            
        }
        public static class Reduce extends Reducer<Text, Text, Text, Text>
        {
    
            @SuppressWarnings("unchecked")
            protected void reduce(Text key, Iterable<Text> value,
                    org.apache.hadoop.mapreduce.Reducer.Context context)
                    throws IOException, InterruptedException {
                // TODO Auto-generated method stub
                context.write(key, new Text(""));
    
            }
        }
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            Configuration conf=new Configuration();
    //        conf.set("mapred.job.tracker", "localhost:9001");
            String hdfs=new String("hdfs://localhost:9000/user/haduser/");
            String[] ioStr=new String[]{hdfs+"input",hdfs+"output/outDedup"};
            
            //自己在代码中定义路径,否则的话就要就要在程序的输入参数中设置了
            String[] otherStr=new GenericOptionsParser(conf, ioStr).getRemainingArgs();
    
            
            if(otherStr.length!=2)
            {
                System.err.println("Usage: Data deduplication <in> <out>");
                System.exit(2);
            }
            
            Job job=new Job(conf, "Data deduplication");
            job.setJarByClass(Dedup.class);
            
            job.setMapperClass(Map.class);
            job.setCombinerClass(Reduce.class);
            job.setReducerClass(Reduce.class);
            
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            
            FileInputFormat.addInputPath(job, new Path(otherStr[0]));
            FileOutputFormat.setOutputPath(job, new Path(otherStr[1]));
            System.exit(job.waitForCompletion(true) ? 0:1);
            
            
        }
    
    }

    课本上给出的程序如下:

    package com.pro.bq;
    
    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.util.GenericOptionsParser;
    
    public class Dedup {
        public static class Map extends Mapper<Object,Text, Text, Text>{
            private Text line=new Text();
    
        protected void map(Object key, Text value,
                Context context)
                throws IOException, InterruptedException {
            // TODO Auto-generated method stub
              line=value;
              context.write(line, new Text(""));
            
        }
            
        }
        public static class Reduce extends Reducer<Text, Text, Text, Text>
        {
    
            protected void reduce(Text key, Iterable<Text> value,
                    Context context)
                    throws IOException, InterruptedException {
                // TODO Auto-generated method stub
                context.write(key, new Text(""));
    
            }
        }
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            Configuration conf=new Configuration();
    //        conf.set("mapred.job.tracker", "localhost:9001");
            String hdfs=new String("hdfs://localhost:9000/user/haduser/");
            String[] ioStr=new String[]{hdfs+"input",hdfs+"output/outDedup"};
            
            //自己在代码中定义路径,否则的话就要就要在程序的输入参数中设置了
            String[] otherStr=new GenericOptionsParser(conf, ioStr).getRemainingArgs();
    
            
            if(otherStr.length!=2)
            {
                System.err.println("Usage: Data deduplication <in> <out>");
                System.exit(2);
            }
            
            Job job=new Job(conf, "Data deduplication");
            job.setJarByClass(Dedup.class);
            
            job.setMapperClass(Map.class);
            job.setCombinerClass(Reduce.class);
            job.setReducerClass(Reduce.class);
            
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            
            FileInputFormat.addInputPath(job, new Path(otherStr[0]));
            FileOutputFormat.setOutputPath(job, new Path(otherStr[1]));
            System.exit(job.waitForCompletion(true) ? 0:1);
            
            
        }
    
    }

    测试的文件file1.txt是:

    2012-3-1 a
    2012-3-2 b
    2012-3-3 c
    2012-3-4 d
    2012-3-5 a
    2012-3-6 b
    2012-3-7 c
    2012-3-3 c

    file2.txt:

    2012-3-1 b
    2012-3-2 a
    2012-3-3 b
    2012-3-4 d
    2012-3-5 a
    2012-3-6 c
    2012-3-7 d
    2012-3-3 c

    按照我写的运行的结果是:

    2012-3-1 a    
    2012-3-1 b    
    2012-3-2 a    
    2012-3-2 b    
    2012-3-3 b    
    2012-3-3 c    
    2012-3-3 c    
    2012-3-3 c    
    2012-3-4 d    
    2012-3-4 d    
    2012-3-5 a    
    2012-3-5 a    
    2012-3-6 b    
    2012-3-6 c    
    2012-3-7 c    
    2012-3-7 d    

    想要的结果是:

    2012-3-1 a
    2012-3-1 b
    2012-3-2 a
    2012-3-2 b
    2012-3-3 b
    2012-3-3 c
    2012-3-4 d
    2012-3-5 a
    2012-3-6 b
    2012-3-6 c
    2012-3-7 c
    2012-3-7 d

    不知道为什么?暂且记下,有懂的希望不吝赐教,我是菜鸟...

  • 相关阅读:
    Linux内核网络协议栈优化总纲
    Java实现 蓝桥杯VIP 算法训练 连续正整数的和
    Java实现 蓝桥杯VIP 算法训练 连续正整数的和
    Java实现 蓝桥杯VIP 算法训练 寂寞的数
    Java实现 蓝桥杯VIP 算法训练 寂寞的数
    Java实现 蓝桥杯VIP 算法训练 学做菜
    Java实现 蓝桥杯VIP 算法训练 学做菜
    Java实现 蓝桥杯VIP 算法训练 判断字符位置
    Java实现 蓝桥杯VIP 算法训练 判断字符位置
    Java实现 蓝桥杯VIP 算法训练 链表数据求和操作
  • 原文地址:https://www.cnblogs.com/wzyj/p/3554577.html
Copyright © 2011-2022 走看看