zoukankan      html  css  js  c++  java
  • 第5章 散列

    一个好的散列函数

     1     public static int hash(String key, int tableSize)
     2     {
     3         int hashVal = 0;
     4         
     5         for (int i = 0; i < key.length(); i++)
     6             hashVal = 37 * hashVal + key.charAt(i);
     7         
     8         hashVal %= tableSize;
     9         if (hashVal < 0)
    10             hashVal += tableSize;
    11         
    12         return hashVal;
    13     }

    如果当一个元素被插入时与一个已经插入的元素散列到相同的值,那么就产生一个冲突,这个冲突需要消除。解决这种冲突的方法有几种,其中最简单的两种:分离链接法开放地址法

    分离链接法

      1 import java.util.LinkedList;
      2 import java.util.List;
      3 
      4 public class SeparateChainingHashTable<AnyType>
      5 {
      6     public SeparateChainingHashTable()
      7     { this(DEFAULT_TABLE_SIZE); }
      8     public SeparateChainingHashTable(int size)
      9     {
     10         theLists = new LinkedList[nextPrime(size)];
     11         for (int i = 0; i < theLists.length; i++)
     12             theLists[i] = new LinkedList<>();
     13     }
     14 
     15     public boolean contains(AnyType x)
     16     {
     17         List<AnyType>whichList = theLists[myhash(x)];
     18         return whichList.contains(x);
     19     }
     20 
     21     public void insert(AnyType x)
     22     {
     23         List<AnyType>whichList = theLists[myhash(x)];
     24         if (!whichList.contains(x))
     25         {
     26             whichList.add(x);
     27 
     28             if (++currentSize > theLists.length)
     29                 rehash();
     30         }
     31     }
     32 
     33     public void remove(AnyType x)
     34     {
     35         List<AnyType>whichList = theLists[myhash(x)];
     36         if (whichList.contains(x))
     37         {
     38             whichList.remove(x);
     39             currentSize--;
     40         }
     41     }
     42 
     43     public void makeEmpty()
     44     {
     45         for (int i = 0; i < theLists.length; i++)
     46             theLists[i].clear();
     47 
     48         currentSize = 0;
     49     }
     50 
     51     private int currentSize;
     52     private List<AnyType>[] theLists;
     53     private static final int DEFAULT_TABLE_SIZE = 101;
     54 
     55     private void rehash()
     56     {
     57         List<AnyType>[] oldLists = theLists;
     58 
     59         theLists = new List[nextPrime( 2 * theLists.length)];
     60         for (int i = 0; i < theLists.length; i++)
     61             theLists[i] = new LinkedList<>();
     62 
     63         currentSize = 0;
     64         for (int i = 0; i < oldLists.length; i++)
     65             for (AnyType item : oldLists[i])
     66                 insert(item);
     67     }
     68 
     69     private int myhash(AnyType x)
     70     {
     71         int hashVal = x.hashCode();
     72 
     73         hashVal %= theLists.length;
     74         if (hashVal < 0)
     75             hashVal += theLists.length;
     76 
     77         return hashVal;
     78     }
     79 
     80     private static boolean isPrime(int n)
     81     {
     82         if (n == 2 || n == 3)
     83             return true;
     84         if (n == 1 || (n&1) == 0)
     85             return false;
     86         for (int i = 3; i * i < n; i++)
     87             if (n % i == 0)
     88                 return false;
     89 
     90         return true;
     91     }
     92 
     93     private static int nextPrime(int n)
     94     {
     95         if (n % 2 == 0)
     96             n++;
     97         for (; !isPrime(n); n += 2)
     98             ;
     99         return n;
    100     }
    101 }

    5.4 不用链表的散列表

    5.4.1 线性探测法

    只要表足够大,总能够找到一个自由单元,但是如此花费的时间是相当多的。更糟的是,即使表相对较空,这样占据的单元也会开始形成一些区块,其结果称为一次聚集,就是说,散列到区块中的任何关键字都需要多次试选单元才能够解决冲突,然后该关键字被添加到相应的区块中。

    5.4.2 平方探测法

    平方探测是消除线性探测中一次聚焦问题的冲突解决方法。虽然平方探测排除了一次聚集,但是散列到同一位置上的那些元素将探测相同的备选单元。这叫作二次聚集。 

      1 public class QuadraticProbingHashTable<AnyType> {
      2     public QuadraticProbingHashTable() {
      3         this(DEFAULT_TABLE_SIZE);
      4     }
      5 
      6     public QuadraticProbingHashTable(int size) {
      7         allocateArray(size);
      8         makeEmpty();
      9     }
     10 
     11     public void makeEmpty() {
     12         currentSize = 0;
     13         for (int i = 0; i < array.length; i++)
     14             array[i] = null;
     15     }
     16 
     17     public boolean contains(AnyType x) {
     18         int currentPos = findPos(x);
     19         return isActive(currentPos);
     20     }
     21 
     22     public void insert(AnyType x) {
     23         int currentPos = findPos(x);
     24         if (isActive(currentPos))
     25             return;
     26 
     27         array[currentPos] = new HashEntry<>(x, true);
     28         if (currentPos > array.length / 2)
     29             rehash();
     30     }
     31 
     32     public void remove(AnyType x)
     33     {
     34         int currentPos = findPos(x);
     35         if (isActive(currentPos))
     36             array[currentPos].isActive = false;
     37     }
     38     
     39     private static class HashEntry<AnyType>
     40     {
     41         public AnyType element;
     42         public boolean isActive;
     43 
     44         public HashEntry(AnyType e)
     45         { this(e, true); }
     46 
     47         public HashEntry(AnyType e, boolean i)
     48         {
     49             element = e;
     50             isActive = i;
     51         }
     52     }
     53 
     54     private static final int DEFAULT_TABLE_SIZE = 101;
     55 
     56     private HashEntry<AnyType>[] array;
     57     private int currentSize;
     58 
     59     private void allocateArray(int arraySize)
     60     { array = new HashEntry[nextPrime(arraySize)]; }
     61     private int findPos(AnyType x)
     62     {
     63         int offest = 1;
     64         int currentPos = myhash(x);
     65 
     66         while (array[currentPos] != null && !array[currentPos].element.equals(x))
     67         {
     68             currentPos += offest;
     69             offest += 2;
     70             if (currentPos >= array.length)
     71                 currentPos -= array.length;
     72         }
     73         return currentPos;
     74     }
     75     private boolean isActive(int currentPos)
     76     { return array[currentPos] != null && array[currentPos].isActive; }
     77     private void rehash()
     78     {
     79         HashEntry<AnyType>[] oldArray = array;
     80         allocateArray(nextPrime(2 * array.length));
     81         currentSize = 0;
     82 
     83         for (int i = 0; i < oldArray.length; i++)
     84             if (oldArray[i] != null && oldArray[i].isActive)
     85                 insert(oldArray[i].element);
     86     }
     87 
     88     private int myhash(AnyType x)
     89     {
     90         int hashVal = x.hashCode();
     91 
     92         hashVal %= array.length;
     93         if (hashVal < 0)
     94             hashVal += array.length;
     95 
     96         return hashVal;
     97     }
     98 
     99     private static int nextPrime(int n)
    100     {
    101         if ((n&1) == 0)
    102             n++;
    103 
    104         for (; !isPrime(n); n += 2)
    105             ;
    106 
    107         return n;
    108     }
    109 
    110     private static boolean isPrime(int n)
    111     {
    112         if (n == 2 || n == 3)
    113             return true;
    114         if (n == 1 || (n&1) == 0)
    115             return false;
    116         for (int i = 3; i * i <= n; i++)
    117             if (n % i == 0)
    118                 return false;
    119 
    120         return true;
    121     }
    122 }

    5.4.3 双散列 最后一个冲突解决方法

    5.5 再散列 对于使用平方探测的开放定址散列法,如果散列表填的太满,那么操作的运行时间将开始消耗过长,且插入操作可能失败。这可能发生在有太多的移动和插入混合的场合。此时,一个解决方法是建立另外一个大约两倍大的表(而且使用一个相关的新散列函数),扫描整个原始散列表,计算每个(未删除)元素的新散列值并将其插入到新表中。

    5.6 标准库中的散列表

    HashSetHashMap通常是用分离链接散列实现的。

    5.7.2 布谷鸟散列

      1 import java.util.Random;
      2 
      3 public class CuckooHashTable<AnyType>
      4 {
      5     public CuckooHashTable(HashFamily<? super AnyType>hf) { this(hf, DEFAULLT_TABLE_SIZE); }
      6 
      7     public CuckooHashTable(HashFamily<? super AnyType>hf, int size)
      8     {
      9         allocateArray(nextPrime(size));
     10         doClear();
     11         hashFunctions = hf;
     12         numHashFunctions = hf.getNumberOfFunctions();
     13     }
     14 
     15     private Random r = new Random();
     16 
     17     private static final double MAX_LOAD = 0.4;
     18     private static final int ALLOWED_REHASHES = 1;
     19 
     20     private int rehashes = 0;
     21 
     22    private boolean insertHelper1(AnyType x)
     23    {
     24        final int COUNT_LINIT = 100;
     25        
     26        while (true)
     27        {
     28            int lastPos = 1;
     29            int pos;
     30            for (int count = 0; count < COUNT_LINIT; count++)
     31            {
     32                for (int i = 0; i < numHashFunctions; i++)
     33                {
     34                    pos = myhash(x, i);
     35                    if (array[pos] == null)
     36                    {
     37                        array[pos] = x;
     38                        currentSize++;
     39                        return true;
     40                    }
     41                }
     42                int i = 0;
     43                do 
     44                {
     45                    pos = myhash(x, r.nextInt(numHashFunctions));
     46                }while (pos == lastPos && i++ < 5);
     47                
     48                AnyType tmp = array[lastPos = pos];
     49                array[pos] = x;
     50                x = tmp;
     51            }
     52            if (++rehashes > ALLOWED_REHASHES)
     53            {
     54                expand();
     55                rehashes = 0;
     56            }
     57            else
     58                rehash();
     59        }
     60    }
     61     
     62 
     63     public boolean insert(AnyType x)
     64     {
     65         if (contains(x))
     66             return false;
     67         if (currentSize >= array.length / MAX_LOAD)
     68             expand();
     69         return insertHelper1(x);
     70     }
     71 
     72     private int myhash(AnyType x, int which)
     73     {
     74         int hashVal = hashFunctions.hash(x, which);
     75 
     76         hashVal %= array.length;
     77         if (hashVal < 0)
     78             hashVal += array.length;
     79 
     80         return hashVal;
     81     }
     82 
     83     private void expand(){ rehash((int)(array.length / MAX_LOAD));}
     84 
     85     private void rehash()
     86     {
     87         hashFunctions.generateNewFunctions();
     88         rehash(array.length);
     89     }
     90 
     91     private void rehash(int newLength)
     92     {
     93         AnyType[] oldArray = array;
     94         allocateArray(nextPrime(newLength));
     95 
     96         currentSize = 0;
     97 
     98         for (AnyType str : oldArray)
     99             if (str != null)
    100                 insert(str);
    101     }
    102 
    103     public int size(){ return currentSize; }
    104 
    105     public int capacity(){ return array.length; }
    106 
    107     private int findPos(AnyType x)
    108     {
    109         for (int i = 0; i < numHashFunctions; i++)
    110         {
    111             int pos = myhash(x, i);
    112             if (array[pos] != null && array[pos].equals(x))
    113                 return pos;
    114         }
    115         return -1;
    116     }
    117 
    118     public boolean remove(AnyType x)
    119     {
    120         int pos = findPos(x);
    121         if (pos != -1)
    122         {
    123             array[pos] = null;
    124             currentSize--;
    125         }
    126         return pos != -1;
    127     }
    128 
    129     public boolean contains(AnyType x){ return findPos(x) != -1;}
    130 
    131     public void makeEmpty(){ doClear(); }
    132 
    133     private void doClear()
    134     {
    135         currentSize = 0;
    136         for (int i = 0; i < array.length; i++)
    137             array[i] = null;
    138     }
    139 
    140     private static final int DEFAULLT_TABLE_SIZE = 101;
    141 
    142     private final HashFamily<? super AnyType>hashFunctions;
    143     private final int numHashFunctions;
    144     private AnyType[] array;
    145     private int currentSize;
    146 
    147     private void allocateArray(int arraySize) { array = (AnyType[])new Object[arraySize]; }
    148 
    149     protected static int nextPrime(int n)
    150     {
    151         if ((n&1) == 0)
    152             n++;
    153         for (; !isPime(n); n += 2)
    154             ;
    155         return n;
    156     }
    157 
    158     private static boolean isPime(int n)
    159     {
    160         if (n == 2 || n == 3)
    161             return true;
    162         if (n == 1 || (n&1) == 0)
    163             return false;
    164         for (int i = 3; i * i <= n; i += 2)
    165             if (n % i == 0)
    166                 return false;
    167 
    168         return true;
    169     }
    170 }

    5.7.3 跳房子散列的思路是,用事先确定的、对计算机的底层体系结构而言是最优的一个常数,给探测序列的最大长度加个上界。这样做可以给出常数级的最坏查询时间,并且与布谷鸟散列一样,查询可以并行化,以同时检查可用位置的有限集。 

  • 相关阅读:
    saltstack
    python一个命令开启http服务器
    常用服务安装部署
    位置1
    linux中python3安装和使用
    Linux基础系统优化
    Shell基本命令
    linux系统目录结构
    远程连接linux服务器
    VMware和Centos安装使用
  • 原文地址:https://www.cnblogs.com/tjj-love-world/p/10561157.html
Copyright © 2011-2022 走看看