zoukankan      html  css  js  c++  java
  • mapreduce排序

    1010037 100
    1010102 100
    1010152 97
    1010178 96
    1010280 104
    1010320 103
    1010510 104
    1010603 96
    1010637 97

    源代码:

    package mapreduce;
    
    import java.io.IOException;
    import java.util.StringTokenizer;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.Reducer.Context;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
    
    import mapreduce.WordCount.MyMapper;
    import mapreduce.WordCount.MyReducer;
    
    public class OneSort {
        public static class Map extends Mapper<Object, Text, IntWritable,Text > {
            private static Text goods= new Text();
            private static IntWritable num=new IntWritable();
            
            public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
                StringTokenizer itr = new StringTokenizer(value.toString());
                while (itr.hasMoreTokens()) {
                    String line = itr.nextToken();
                    String arr[]=line.split(" ");
                    num.set(Integer.parseInt(arr[1]));
                    goods.set(arr[0]);
                    context.write(num,goods);
    
                }
            }
    
        }
    
        public static class Reduce extends Reducer<IntWritable,Text,IntWritable, Text> {
            private static IntWritable result=new IntWritable();
            
            public void reduce(IntWritable key, Iterable<Text> values, Context context)
                    throws IOException, InterruptedException {
                for(Text val:values) {
                    context.write(key, val);
                }
            }
        }
    
        public static void main(String[] args) throws Exception {
    
            Configuration conf = new Configuration();
            System.out.println("start");
            Job job = new Job(conf, "OneSort");
            job.setJarByClass(OneSort.class);
            job.setMapperClass(Map.class);
            job.setReducerClass(Reduce.class);
            job.setOutputKeyClass(IntWritable.class);
            job.setOutputValueClass(Text.class);
            job.setInputFormatClass(TextInputFormat.class);
            job.setOutputFormatClass(TextOutputFormat.class);
            Path in = new Path("hdfs://localhost:9000/mymapreduce4/in/goods_visit1");
            Path out = new Path("hdfs://localhost:9000/mymapreduce4/out");
    
            FileInputFormat.addInputPath(job, in);
            FileOutputFormat.setOutputPath(job, out);
            System.exit(job.waitForCompletion(true) ? 0 : 1);
        }
    }

    排序结果:

    96    1010603
    96    1010178
    97    1010637
    97    1010152
    100    1010102
    100    1010037
    103    1010320
    104    1010510
    104    1010280
    本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接,否则保留追究法律责任的权利.
  • 相关阅读:
    python之private variable
    python实例、类方法、静态方法
    python常用option
    access
    FD_CLOEXEC
    fork后父子进程文件描述问题
    split
    信号
    kill
    进程组&Session
  • 原文地址:https://www.cnblogs.com/wl2017/p/9978450.html
Copyright © 2011-2022 走看看