zoukankan      html  css  js  c++  java
  • hadoop中compare函数

    在看hadoop  的二次排序的时候,改写了下, 加了第三个参数,  本来以为是在

       public int compareTo(IntPair o) {
          System.out.println("-----------compareTo");
          if (first != o.first) {
            return first < o.first ? -1 : 1;
          } else if (second != o.second) {
            return second < o.second ? -1 : 1;
          }else if (third != o.third) {
        	        return third < o.third ? -1 : 1;}
          
            return 0;
          }
    

     本来以为排序在这里面进行, 后来发现不是,把比较第3个字段的代码去掉, 发现还是有序的。

    后来通过打印得知在compare函数中,稍微改写了下

          public int compare(byte[] b1, int s1, int l1,
                             byte[] b2, int s2, int l2) { 
           // 二进制数组读取
           int intvalue = readInt(b1, s1);    
           System.out.println("s1 = " +  b1.length);
           
        // 验证b1中存储的数据
    int third = 0; for(int i =s1 + 9; i<= s1+ 12; i++){ third += (b1[i]&0xff) << (24-8*i); } System.out.println("third = " + third); return compareBytes(b1, s1, l1, b2, s2, l2); } }

     有3个整形值, s1为开始位置, l1为长度12, 这样我们就可以读出我们的值

    return compareBytes(b1, s1, l1, b2, s2, l2);调用 return FastByteComparisons.compareTo(b1, s1, l1, b2, s2, l2);

        public int compareTo(byte[] buffer1, int offset1, int length1, byte[] buffer2, int offset2, int length2)
          {
            if ((buffer1 == buffer2) && (offset1 == offset2) && (length1 == length2))
            {
              return 0;
            }
    
            int end1 = offset1 + length1;
            int end2 = offset2 + length2;
            int i = offset1; for (int j = offset2; (i < end1) && (j < end2); ++j) {
              int a = buffer1[i] & 0xFF;
              int b = buffer2[j] & 0xFF;
              if (a != b)
                return (a - b);
              ++i;
            }
    
            return (length1 - length2);
          }
        }
      }
    

     从代码中就知道了,通过字节数组比较三个值, 这样就出来的结果就是有序的了

    结论, 理论上N个字段这样出来的结果的都是有序的,只是比较的长度有所变化

    测试又加了一个字段, 输出结果都是有序的。

    测试代码

      public static class IntPair 
                          implements WritableComparable<IntPair> {
        private int first = 0;
        private int second = 0;
        private int third = 0;
        private int fourth = 0;
        
        /**
         * Set the left and right values.
         */
        public void set(int left, int right, int third, int fourth) {
          first = left;
          second = right;
          this.third = third;
          this.fourth = fourth;
        }
        
        public int getFirst() {
          return first;
        }
        
        public int getSecond() {
          return second;
        }
        
        public int getThird() {
            return third;
          }
        
        public int getFourth() {
            return fourth;
          }
        
        @Override
    	public String toString() {
        	System.out.println("third = " + third);
        	return first + "	" + second + "	" + third + "	" + fourth;
    	}
    
    	/**
         * Read the two integers. 
         * Encoded as: MIN_VALUE -> 0, 0 -> -MIN_VALUE, MAX_VALUE-> -1
         */
        @Override
        public void readFields(DataInput in) throws IOException {
          first = in.readInt();// + Integer.MIN_VALUE;
          second = in.readInt();// + Integer.MIN_VALUE;
          third = in.readInt();// + Integer.MIN_VALUE;
          fourth = in.readInt();
        }
        @Override
        public void write(DataOutput out) throws IOException {
         /*
          out.writeInt(first - Integer.MIN_VALUE);
          out.writeInt(second - Integer.MIN_VALUE);
          out.writeInt(third - Integer.MIN_VALUE);
          */
            out.writeInt(first );
            out.writeInt(second );
            out.writeInt(third );
            out.writeInt(fourth);
        }
        @Override
        public int hashCode() {
          return first * 157 + second*10 + third;
        }
        
        @Override
        public boolean equals(Object right) {
          if (right instanceof IntPair) {
            IntPair r = (IntPair) right;
            return r.first == first && r.second == second && r.third == third && r.fourth == fourth;
          } else {
            return false;
          }
        }
        
        /** A Comparator that compares serialized IntPair. */ 
        public static class Comparator extends WritableComparator {
          public Comparator() {
            super(IntPair.class);
          }
          
          // 排序比较器,数据全部存在byte数组
          public int compare(byte[] b1, int s1, int l1,
                             byte[] b2, int s2, int l2) { 
           // 二进制数组读取
           int intvalue = readInt(b1, s1);	
           System.out.println("s1 = " +  b1.length);
           
           int third = 0;
           for(int i =s1 + 9; i<= s1+ 12; i++){
        	   third += (b1[i]&0xff) << (24-8*i);
        	}
           	System.out.println("third = " + third);
        	  
            return compareBytes(b1, s1, l1, b2, s2, l2);
          }
        }
    
        static {   // register this comparator
          WritableComparator.define(IntPair.class, new Comparator());
        }
    
        // 好像没用上    
        @Override
        public int compareTo(IntPair o) {
          System.out.println("-----------compareTo");
          if (first != o.first) {
            return first < o.first ? -1 : 1;
          } else if (second != o.second) {
            return second < o.second ? -1 : 1;
          }// else if (third != o.third) {
        	//        return third < o.third ? -1 : 1;}
          
            return 0;
          }
      }
      public static class StrPair 
                          implements WritableComparable<StrPair> {
        private Text first;
        private Text second ;
        private Text third ;
        private Text fourth;
        
        // 这句很重要, 要不读的时候会出错
        public StrPair(){
        	set(new Text(),new Text(),new Text(),new Text());
        }
        
        public void set(Text left, Text right, Text third, Text fourth) {
          this.first = left;
          this.second = right;
          this.third = third;
          this.fourth = fourth;
        }
        
        public Text getFirst() {
          return first;
        }
        
        public Text getSecond() {
          return second;
        }
        
        public Text getThird() {
            return third;
          }
        
        public Text getFourth() {
            return fourth;
          }
        
        @Override
    	public String toString() {
        	return first + "	" + second + "	" + third + "	" + fourth;
    	}
    
        @Override
        public void readFields(DataInput in) throws IOException {
        	first.readFields(in);
        	second.readFields(in);
        	third.readFields(in);
        	fourth.readFields(in);
        }
        @Override
        public void write(DataOutput out) throws IOException {
        	System.out.println(out);
        	first.write(out);
        	second.write(out);
        	third.write(out);
        	fourth.write(out);
        	System.out.println("First = " + second.toString());
        }
        @Override
        public int hashCode() {
          return first.hashCode()* 157 + second.hashCode()*10 + third.hashCode();
        }
        
        @Override
        public boolean equals(Object right) {
          if (right instanceof StrPair) {
        	  StrPair r = (StrPair) right;
            return first.equals(r.first) && second.equals(r.second) && third.equals(r.third) && fourth.equals(r.fourth);
          } else {
            return false;
          }
        }
        
        /** A Comparator that compares serialized StrPair. */ 
        public static class Comparator extends WritableComparator {
          public Comparator() {
            super(StrPair.class);
          }
          
          // 排序比较器,数据全部存在byte数组
          public int compare(byte[] b1, int s1, int l1,
                             byte[] b2, int s2, int l2) { 
           // 二进制数组读取
           int intvalue = readInt(b1, s1);	
           System.out.println("s1 = " +  b1.length);
           /*
           int third = 0;
           for(int i =s1 + 9; i<= s1+ 12; i++){
        	   third += (b1[i]&0xff) << (24-8*i);
        	}
           	System.out.println("third = " + third);
        	
        	*/  
            return compareBytes(b1, s1, l1, b2, s2, l2);
          }
        }
    
        static {   // register this comparator
          WritableComparator.define(StrPair.class, new Comparator());
        }
       
        @Override
        public int compareTo(StrPair o) {/*
          if (first != o.first) {
            return first < o.first ? -1 : 1;
          } else if (second != o.second) {
            return second < o.second ? -1 : 1;
          }// else if (third != o.third) {
        	//        return third < o.third ? -1 : 1;}
          
            return 0;
            */
           return 0;
        }
      }
    
     
      /**
       * Partition based on the first part of the pair.
       */
      public static class FirstPartitioner extends Partitioner<StrPair,Text>{
        @Override    // 
        public int getPartition(StrPair key, Text value, 
                                int numPartitions) {
          return Math.abs(key.getFirst().hashCode() * 127) % numPartitions;
        }
      }
    
      /**
       * Compare only the first part of the pair, so that reduce is called once
       * for each value of the first part.
       */
      public static class FirstGroupingComparator 
                    implements RawComparator<StrPair> {
        @Override
        public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
          return WritableComparator.compareBytes(b1, s1, Integer.SIZE/8, 
                                                 b2, s2, Integer.SIZE/8);
        }
    
        @Override
        public int compare(StrPair o1, StrPair o2) {
          Text l = o1.getFirst();
          Text r = o2.getFirst();
          return l.equals(r)?0:1;
         // return l == r ? 0 : (l < r ? -1 : 1);
        }
      }
    
  • 相关阅读:
    系统吞吐量、TPS(QPS)、用户并发量、性能测试概念和公式[转]
    EF RepositoryBase 参考示例【转】
    Entity Framework 杂碎
    Oracle.ManagedDataAccessDTC.dll 使用
    c# http请求,获取非200时的响应体
    c# windows service(服务)
    git log
    解决冲突
    clone命令
    remote指令添加远程数据库
  • 原文地址:https://www.cnblogs.com/chengxin1982/p/3853725.html
Copyright © 2011-2022 走看看