场景描述
订单需要封装成为一个bean 传入reduce,然后实现排序取出top1,或者分组求和
首先要实现排序就要实现comparable接口
要实现分组top1,那么"相同的bean"要到同一个reduce中去,要实现自定义partitioner
到了同一个分区之后 "相同的bean"要reduce程序认为是相同的要实现groupingComparator
/**
* 利用reduce端的GroupingComparator来实现将一组bean看成相同的key
*/
public class ItemidGroupingComparator extends WritableComparator { //传入作为key的bean的class类型,以及制定需要让框架做反射获取实例对象 protected ItemidGroupingComparator() { super(OrderBean.class, true); } @Override public int compare(WritableComparable a, WritableComparable b) { OrderBean abean = (OrderBean) a; OrderBean bbean = (OrderBean) b; //比较两个bean时,指定只比较bean中的orderid return abean.getItemid().compareTo(bbean.getItemid()); } }
public class ItemIdPartitioner extends Partitioner<OrderBean, NullWritable>{ @Override public int getPartition(OrderBean bean, NullWritable value, int numReduceTasks) { //相同id的订单bean,会发往相同的partition //而且,产生的分区数,是会跟用户设置的reduce task数保持一致 return (bean.getItemid().hashCode() & Integer.MAX_VALUE) % numReduceTasks; } }
/** * mapreduce 框架会调用compareTo方法, 实现排序 */ public class OrderBean implements WritableComparable<OrderBean> { private Text itemid; private DoubleWritable amount; public OrderBean() { } public OrderBean(Text itemid, DoubleWritable amount) { set(itemid, amount); } public void set(Text itemid, DoubleWritable amount) { this.itemid = itemid; this.amount = amount; } public Text getItemid() { return itemid; } public DoubleWritable getAmount() { return amount; } @Override public int compareTo(OrderBean o) { int cmp = this.itemid.compareTo(o.getItemid()); if (cmp == 0) { cmp = -this.amount.compareTo(o.getAmount()); } return cmp; } @Override public void write(DataOutput out) throws IOException { out.writeUTF(itemid.toString()); out.writeDouble(amount.get()); } @Override public void readFields(DataInput in) throws IOException { String readUTF = in.readUTF(); double readDouble = in.readDouble(); this.itemid = new Text(readUTF); this.amount = new DoubleWritable(readDouble); } @Override public String toString() { return itemid.toString() + " " + amount.get(); } }
/** * 求每笔订单中交易金额最大的一笔交易的交易金额 */ public class SecondarySort { static class SecondarySortMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable> { OrderBean bean = new OrderBean(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] fields = StringUtils.split(line, ","); bean.set(new Text(fields[0]), new DoubleWritable(Double.parseDouble(fields[2]))); //在shuffle时实现排序 context.write(bean, NullWritable.get()); } } static class SecondarySortReducer extends Reducer<OrderBean, NullWritable, OrderBean, NullWritable> { //到达reduce时,相同id的所有bean已经被看成一组,且金额最大的那个一排在第一位 @Override protected void reduce(OrderBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException { context.write(key, NullWritable.get()); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(SecondarySort.class); job.setMapperClass(SecondarySortMapper.class); job.setReducerClass(SecondarySortReducer.class); job.setOutputKeyClass(OrderBean.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.setInputPaths(job, new Path("D:\test\hadoop\ordertest\input")); FileOutputFormat.setOutputPath(job, new Path("D:\test\hadoop\ordertest\output\01")); //在此设置自定义的Groupingcomparator类 job.setGroupingComparatorClass(ItemidGroupingComparator.class); //在此设置自定义的partitioner类 job.setPartitionerClass(ItemIdPartitioner.class); job.setNumReduceTasks(2); job.waitForCompletion(true); } }