zoukankan      html  css  js  c++  java
  • hadoop中compare函数

    在看hadoop  的二次排序的时候,改写了下, 加了第三个参数,  本来以为是在

       public int compareTo(IntPair o) {
          System.out.println("-----------compareTo");
          if (first != o.first) {
            return first < o.first ? -1 : 1;
          } else if (second != o.second) {
            return second < o.second ? -1 : 1;
          }else if (third != o.third) {
        	        return third < o.third ? -1 : 1;}
          
            return 0;
          }
    

     本来以为排序在这里面进行, 后来发现不是,把比较第3个字段的代码去掉, 发现还是有序的。

    后来通过打印得知在compare函数中,稍微改写了下

          public int compare(byte[] b1, int s1, int l1,
                             byte[] b2, int s2, int l2) { 
           // 二进制数组读取
           int intvalue = readInt(b1, s1);    
           System.out.println("s1 = " +  b1.length);
           
        // 验证b1中存储的数据
    int third = 0; for(int i =s1 + 9; i<= s1+ 12; i++){ third += (b1[i]&0xff) << (24-8*i); } System.out.println("third = " + third); return compareBytes(b1, s1, l1, b2, s2, l2); } }

     有3个整形值, s1为开始位置, l1为长度12, 这样我们就可以读出我们的值

    return compareBytes(b1, s1, l1, b2, s2, l2);调用 return FastByteComparisons.compareTo(b1, s1, l1, b2, s2, l2);

        public int compareTo(byte[] buffer1, int offset1, int length1, byte[] buffer2, int offset2, int length2)
          {
            if ((buffer1 == buffer2) && (offset1 == offset2) && (length1 == length2))
            {
              return 0;
            }
    
            int end1 = offset1 + length1;
            int end2 = offset2 + length2;
            int i = offset1; for (int j = offset2; (i < end1) && (j < end2); ++j) {
              int a = buffer1[i] & 0xFF;
              int b = buffer2[j] & 0xFF;
              if (a != b)
                return (a - b);
              ++i;
            }
    
            return (length1 - length2);
          }
        }
      }
    

     从代码中就知道了,通过字节数组比较三个值, 这样就出来的结果就是有序的了

    结论, 理论上N个字段这样出来的结果的都是有序的,只是比较的长度有所变化

    测试又加了一个字段, 输出结果都是有序的。

    测试代码

      public static class IntPair 
                          implements WritableComparable<IntPair> {
        private int first = 0;
        private int second = 0;
        private int third = 0;
        private int fourth = 0;
        
        /**
         * Set the left and right values.
         */
        public void set(int left, int right, int third, int fourth) {
          first = left;
          second = right;
          this.third = third;
          this.fourth = fourth;
        }
        
        public int getFirst() {
          return first;
        }
        
        public int getSecond() {
          return second;
        }
        
        public int getThird() {
            return third;
          }
        
        public int getFourth() {
            return fourth;
          }
        
        @Override
    	public String toString() {
        	System.out.println("third = " + third);
        	return first + "	" + second + "	" + third + "	" + fourth;
    	}
    
    	/**
         * Read the two integers. 
         * Encoded as: MIN_VALUE -> 0, 0 -> -MIN_VALUE, MAX_VALUE-> -1
         */
        @Override
        public void readFields(DataInput in) throws IOException {
          first = in.readInt();// + Integer.MIN_VALUE;
          second = in.readInt();// + Integer.MIN_VALUE;
          third = in.readInt();// + Integer.MIN_VALUE;
          fourth = in.readInt();
        }
        @Override
        public void write(DataOutput out) throws IOException {
         /*
          out.writeInt(first - Integer.MIN_VALUE);
          out.writeInt(second - Integer.MIN_VALUE);
          out.writeInt(third - Integer.MIN_VALUE);
          */
            out.writeInt(first );
            out.writeInt(second );
            out.writeInt(third );
            out.writeInt(fourth);
        }
        @Override
        public int hashCode() {
          return first * 157 + second*10 + third;
        }
        
        @Override
        public boolean equals(Object right) {
          if (right instanceof IntPair) {
            IntPair r = (IntPair) right;
            return r.first == first && r.second == second && r.third == third && r.fourth == fourth;
          } else {
            return false;
          }
        }
        
        /** A Comparator that compares serialized IntPair. */ 
        public static class Comparator extends WritableComparator {
          public Comparator() {
            super(IntPair.class);
          }
          
          // 排序比较器,数据全部存在byte数组
          public int compare(byte[] b1, int s1, int l1,
                             byte[] b2, int s2, int l2) { 
           // 二进制数组读取
           int intvalue = readInt(b1, s1);	
           System.out.println("s1 = " +  b1.length);
           
           int third = 0;
           for(int i =s1 + 9; i<= s1+ 12; i++){
        	   third += (b1[i]&0xff) << (24-8*i);
        	}
           	System.out.println("third = " + third);
        	  
            return compareBytes(b1, s1, l1, b2, s2, l2);
          }
        }
    
        static {   // register this comparator
          WritableComparator.define(IntPair.class, new Comparator());
        }
    
        // 好像没用上    
        @Override
        public int compareTo(IntPair o) {
          System.out.println("-----------compareTo");
          if (first != o.first) {
            return first < o.first ? -1 : 1;
          } else if (second != o.second) {
            return second < o.second ? -1 : 1;
          }// else if (third != o.third) {
        	//        return third < o.third ? -1 : 1;}
          
            return 0;
          }
      }
      public static class StrPair 
                          implements WritableComparable<StrPair> {
        private Text first;
        private Text second ;
        private Text third ;
        private Text fourth;
        
        // 这句很重要, 要不读的时候会出错
        public StrPair(){
        	set(new Text(),new Text(),new Text(),new Text());
        }
        
        public void set(Text left, Text right, Text third, Text fourth) {
          this.first = left;
          this.second = right;
          this.third = third;
          this.fourth = fourth;
        }
        
        public Text getFirst() {
          return first;
        }
        
        public Text getSecond() {
          return second;
        }
        
        public Text getThird() {
            return third;
          }
        
        public Text getFourth() {
            return fourth;
          }
        
        @Override
    	public String toString() {
        	return first + "	" + second + "	" + third + "	" + fourth;
    	}
    
        @Override
        public void readFields(DataInput in) throws IOException {
        	first.readFields(in);
        	second.readFields(in);
        	third.readFields(in);
        	fourth.readFields(in);
        }
        @Override
        public void write(DataOutput out) throws IOException {
        	System.out.println(out);
        	first.write(out);
        	second.write(out);
        	third.write(out);
        	fourth.write(out);
        	System.out.println("First = " + second.toString());
        }
        @Override
        public int hashCode() {
          return first.hashCode()* 157 + second.hashCode()*10 + third.hashCode();
        }
        
        @Override
        public boolean equals(Object right) {
          if (right instanceof StrPair) {
        	  StrPair r = (StrPair) right;
            return first.equals(r.first) && second.equals(r.second) && third.equals(r.third) && fourth.equals(r.fourth);
          } else {
            return false;
          }
        }
        
        /** A Comparator that compares serialized StrPair. */ 
        public static class Comparator extends WritableComparator {
          public Comparator() {
            super(StrPair.class);
          }
          
          // 排序比较器,数据全部存在byte数组
          public int compare(byte[] b1, int s1, int l1,
                             byte[] b2, int s2, int l2) { 
           // 二进制数组读取
           int intvalue = readInt(b1, s1);	
           System.out.println("s1 = " +  b1.length);
           /*
           int third = 0;
           for(int i =s1 + 9; i<= s1+ 12; i++){
        	   third += (b1[i]&0xff) << (24-8*i);
        	}
           	System.out.println("third = " + third);
        	
        	*/  
            return compareBytes(b1, s1, l1, b2, s2, l2);
          }
        }
    
        static {   // register this comparator
          WritableComparator.define(StrPair.class, new Comparator());
        }
       
        @Override
        public int compareTo(StrPair o) {/*
          if (first != o.first) {
            return first < o.first ? -1 : 1;
          } else if (second != o.second) {
            return second < o.second ? -1 : 1;
          }// else if (third != o.third) {
        	//        return third < o.third ? -1 : 1;}
          
            return 0;
            */
           return 0;
        }
      }
    
     
      /**
       * Partition based on the first part of the pair.
       */
      public static class FirstPartitioner extends Partitioner<StrPair,Text>{
        @Override    // 
        public int getPartition(StrPair key, Text value, 
                                int numPartitions) {
          return Math.abs(key.getFirst().hashCode() * 127) % numPartitions;
        }
      }
    
      /**
       * Compare only the first part of the pair, so that reduce is called once
       * for each value of the first part.
       */
      public static class FirstGroupingComparator 
                    implements RawComparator<StrPair> {
        @Override
        public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
          return WritableComparator.compareBytes(b1, s1, Integer.SIZE/8, 
                                                 b2, s2, Integer.SIZE/8);
        }
    
        @Override
        public int compare(StrPair o1, StrPair o2) {
          Text l = o1.getFirst();
          Text r = o2.getFirst();
          return l.equals(r)?0:1;
         // return l == r ? 0 : (l < r ? -1 : 1);
        }
      }
    
  • 相关阅读:
    DRUPAL-PSA-CORE-2014-005 && CVE-2014-3704 Drupal 7.31 SQL Injection Vulnerability /includes/database/database.inc Analysis
    WDCP(WDlinux Control Panel) mysql/add_user.php、mysql/add_db.php Authentication Loss
    Penetration Testing、Security Testing、Automation Testing
    Tomcat Server Configuration Automation Reinforcement
    Xcon2014 && Geekpwn2014
    phpMyadmin /scripts/setup.php Remote Code Injection && Execution CVE-2009-1151
    Linux System Log Collection、Log Integration、Log Analysis System Building Learning
    The Linux Process Principle,NameSpace, PID、TID、PGID、PPID、SID、TID、TTY
    Windows Management Instrumentation WMI Security Technology Learning
    IIS FTP Server Anonymous Writeable Reinforcement, WEBDAV Anonymous Writeable Reinforcement(undone)
  • 原文地址:https://www.cnblogs.com/chengxin1982/p/3853725.html
Copyright © 2011-2022 走看看