zoukankan html css js c++ java

Mapreduce 进阶

场景描述

订单需要封装成为一个bean 传入reduce,然后实现排序取出top1,或者分组求和

首先要实现排序就要实现comparable接口

要实现分组top1,那么"相同的bean"要到同一个reduce中去,要实现自定义partitioner

到了同一个分区之后 "相同的bean"要reduce程序认为是相同的要实现groupingComparator

/**
 * 利用reduce端的GroupingComparator来实现将一组bean看成相同的key
 */

public class ItemidGroupingComparator extends WritableComparator {

    //传入作为key的bean的class类型，以及制定需要让框架做反射获取实例对象
    protected ItemidGroupingComparator() {
        super(OrderBean.class, true);
    }
    
    @Override
    public int compare(WritableComparable a, WritableComparable b) {
        OrderBean abean = (OrderBean) a;
        OrderBean bbean = (OrderBean) b;
        
        //比较两个bean时，指定只比较bean中的orderid
        return abean.getItemid().compareTo(bbean.getItemid());
        
    }

}

public class ItemIdPartitioner extends Partitioner<OrderBean, NullWritable>{

    @Override
    public int getPartition(OrderBean bean, NullWritable value, int numReduceTasks) {
        //相同id的订单bean，会发往相同的partition
        //而且，产生的分区数，是会跟用户设置的reduce task数保持一致
        return (bean.getItemid().hashCode() & Integer.MAX_VALUE) % numReduceTasks;
        
    }

}

/**
 * mapreduce 框架会调用compareTo方法, 实现排序
 */
public class OrderBean implements WritableComparable<OrderBean> {

    private Text itemid;
    private DoubleWritable amount;

    public OrderBean() {
    }

    public OrderBean(Text itemid, DoubleWritable amount) {
        set(itemid, amount);

    }

    public void set(Text itemid, DoubleWritable amount) {

        this.itemid = itemid;
        this.amount = amount;

    }

    public Text getItemid() {
        return itemid;
    }

    public DoubleWritable getAmount() {
        return amount;
    }

    @Override
    public int compareTo(OrderBean o) {
        int cmp = this.itemid.compareTo(o.getItemid());
        if (cmp == 0) {
            cmp = -this.amount.compareTo(o.getAmount());
        }
        return cmp;
    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeUTF(itemid.toString());
        out.writeDouble(amount.get());

    }

    @Override
    public void readFields(DataInput in) throws IOException {
        String readUTF = in.readUTF();
        double readDouble = in.readDouble();

        this.itemid = new Text(readUTF);
        this.amount = new DoubleWritable(readDouble);
    }


    @Override
    public String toString() {

        return itemid.toString() + "	" + amount.get();
    }
}

/**
 * 求每笔订单中交易金额最大的一笔交易的交易金额
  */
public class SecondarySort {

    static class SecondarySortMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable> {

        OrderBean bean = new OrderBean();

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            String line = value.toString();
            String[] fields = StringUtils.split(line, ",");

            bean.set(new Text(fields[0]), new DoubleWritable(Double.parseDouble(fields[2])));
            //在shuffle时实现排序
            context.write(bean, NullWritable.get());
        }
    }

    static class SecondarySortReducer extends Reducer<OrderBean, NullWritable, OrderBean, NullWritable> {
        //到达reduce时，相同id的所有bean已经被看成一组，且金额最大的那个一排在第一位
        @Override
        protected void reduce(OrderBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
            context.write(key, NullWritable.get());
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        job.setJarByClass(SecondarySort.class);

        job.setMapperClass(SecondarySortMapper.class);
        job.setReducerClass(SecondarySortReducer.class);


        job.setOutputKeyClass(OrderBean.class);
        job.setOutputValueClass(NullWritable.class);

        FileInputFormat.setInputPaths(job, new Path("D:\test\hadoop\ordertest\input"));
        FileOutputFormat.setOutputPath(job, new Path("D:\test\hadoop\ordertest\output\01"));

        //在此设置自定义的Groupingcomparator类
        job.setGroupingComparatorClass(ItemidGroupingComparator.class);
        //在此设置自定义的partitioner类
        job.setPartitionerClass(ItemIdPartitioner.class);

        job.setNumReduceTasks(2);

        job.waitForCompletion(true);

    }

}

查看全文

相关阅读:
基于VIP的keepalived高可用架构
 高性能Nginx最佳实践
 Ubuntu安装Nginx
Synchronized总结及底层原理分析#网易微专业# #Java#
springboot Restful开发
 Volatile详解
 IOC知识点详细汇总
 python 一个函数让你决定你的二维码
 用python批量生成简单的xml文档
 基于tensorflow搭建一个神经网络

原文地址：https://www.cnblogs.com/rocky-AGE-24/p/7473886.html