zoukankan      html  css  js  c++  java
  • Mapreduce 进阶

    场景描述

    订单需要封装成为一个bean 传入reduce,然后实现排序取出top1,或者分组求和

    首先要实现排序就要实现comparable接口

    要实现分组top1,那么"相同的bean"要到同一个reduce中去,要实现自定义partitioner

    到了同一个分区之后 "相同的bean"要reduce程序认为是相同的要实现groupingComparator

    
    
    /**
    * 利用reduce端的GroupingComparator来实现将一组bean看成相同的key
    */
    public class ItemidGroupingComparator extends WritableComparator {
    
        //传入作为key的bean的class类型,以及制定需要让框架做反射获取实例对象
        protected ItemidGroupingComparator() {
            super(OrderBean.class, true);
        }
        
        @Override
        public int compare(WritableComparable a, WritableComparable b) {
            OrderBean abean = (OrderBean) a;
            OrderBean bbean = (OrderBean) b;
            
            //比较两个bean时,指定只比较bean中的orderid
            return abean.getItemid().compareTo(bbean.getItemid());
            
        }
    
    }
    public class ItemIdPartitioner extends Partitioner<OrderBean, NullWritable>{
    
        @Override
        public int getPartition(OrderBean bean, NullWritable value, int numReduceTasks) {
            //相同id的订单bean,会发往相同的partition
            //而且,产生的分区数,是会跟用户设置的reduce task数保持一致
            return (bean.getItemid().hashCode() & Integer.MAX_VALUE) % numReduceTasks;
            
        }
    
    }
    /**
     * mapreduce 框架会调用compareTo方法, 实现排序
     */
    public class OrderBean implements WritableComparable<OrderBean> {
    
        private Text itemid;
        private DoubleWritable amount;
    
        public OrderBean() {
        }
    
        public OrderBean(Text itemid, DoubleWritable amount) {
            set(itemid, amount);
    
        }
    
        public void set(Text itemid, DoubleWritable amount) {
    
            this.itemid = itemid;
            this.amount = amount;
    
        }
    
        public Text getItemid() {
            return itemid;
        }
    
        public DoubleWritable getAmount() {
            return amount;
        }
    
        @Override
        public int compareTo(OrderBean o) {
            int cmp = this.itemid.compareTo(o.getItemid());
            if (cmp == 0) {
                cmp = -this.amount.compareTo(o.getAmount());
            }
            return cmp;
        }
    
        @Override
        public void write(DataOutput out) throws IOException {
            out.writeUTF(itemid.toString());
            out.writeDouble(amount.get());
    
        }
    
        @Override
        public void readFields(DataInput in) throws IOException {
            String readUTF = in.readUTF();
            double readDouble = in.readDouble();
    
            this.itemid = new Text(readUTF);
            this.amount = new DoubleWritable(readDouble);
        }
    
    
        @Override
        public String toString() {
    
            return itemid.toString() + "	" + amount.get();
        }
    }
    /**
     * 求每笔订单中交易金额最大的一笔交易的交易金额
      */
    public class SecondarySort {
    
        static class SecondarySortMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable> {
    
            OrderBean bean = new OrderBean();
    
            @Override
            protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    
                String line = value.toString();
                String[] fields = StringUtils.split(line, ",");
    
                bean.set(new Text(fields[0]), new DoubleWritable(Double.parseDouble(fields[2])));
                //在shuffle时实现排序
                context.write(bean, NullWritable.get());
            }
        }
    
        static class SecondarySortReducer extends Reducer<OrderBean, NullWritable, OrderBean, NullWritable> {
            //到达reduce时,相同id的所有bean已经被看成一组,且金额最大的那个一排在第一位
            @Override
            protected void reduce(OrderBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
                context.write(key, NullWritable.get());
            }
        }
    
        public static void main(String[] args) throws Exception {
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf);
    
            job.setJarByClass(SecondarySort.class);
    
            job.setMapperClass(SecondarySortMapper.class);
            job.setReducerClass(SecondarySortReducer.class);
    
    
            job.setOutputKeyClass(OrderBean.class);
            job.setOutputValueClass(NullWritable.class);
    
            FileInputFormat.setInputPaths(job, new Path("D:\test\hadoop\ordertest\input"));
            FileOutputFormat.setOutputPath(job, new Path("D:\test\hadoop\ordertest\output\01"));
    
            //在此设置自定义的Groupingcomparator类
            job.setGroupingComparatorClass(ItemidGroupingComparator.class);
            //在此设置自定义的partitioner类
            job.setPartitionerClass(ItemIdPartitioner.class);
    
            job.setNumReduceTasks(2);
    
            job.waitForCompletion(true);
    
        }
    
    }
  • 相关阅读:
    基于VIP的keepalived高可用架构
    高性能Nginx最佳实践
    Ubuntu安装Nginx
    Synchronized总结及底层原理分析#网易微专业# #Java#
    springboot Restful开发
    Volatile详解
    IOC知识点详细汇总
    python 一个函数让你决定你的二维码
    用python批量生成简单的xml文档
    基于tensorflow搭建一个神经网络
  • 原文地址:https://www.cnblogs.com/rocky-AGE-24/p/7473886.html
Copyright © 2011-2022 走看看