zoukankan      html  css  js  c++  java
  • 实现哈希表

    散列函数满足以下的条件
    1、对输入值运算,得到一个固定长度的摘要(Hash value);
    2、不同的输入值可能对应同样的输出值;
    3、散列函数的输出值尽量接近均匀分布,即输出值y的分布函数F(y)=y/m, m为散列函数的最大值;
    4、x的微小变化可以使f(x)发生非常大的变化,即所谓“雪崩效应”(Avalanche effect),即|df(x)/dx| >> 1;

    哈希冲突(Hash collision)是无法避免的。哈希冲突的处理方法
    1, 链地址法

    //HashTable.h
    typedef unsigned int UINT;
    
    class Node{
    public:
    Node(int key, const string & str);
        int key;
        string value;
        Node * next;
    };
    
    class HashTable{
    public:
        HashTable();
        bool Insert(int key, const string & value);
        bool Find(int key);
        string & operator[](int key); 
    private:
        bool Insert(Node ** node, int key, const string & value);
        Node * HashTable::FindNode(int key);
        unsigned int hasher(int key);//哈希函数
        enum{SIZE = 100};
        Node * nodes[SIZE];
    };
    
    //HashTable.cpp
    Node::Node(int Key, const string & str): key(Key), value(str), next(0){}
    
    HashTable::HashTable(){
        memset(nodes, 0, SIZE * sizeof(Node *));
    }
    
    unsigned int HashTable::hasher(int key){
        return abs(key) % SIZE;//最简单的hash函数
    }
    
    bool HashTable::Insert(int key, const std::string &value){
        UINT adr = hasher(key);
        Node *  node = nodes[adr];
        if(node == 0){
            nodes[adr] = new Node(key, value);
        }else{
            return Insert(&node->next, key, value);
        }
    }
    
    bool HashTable::Insert(Node * * next, int key, const string & value){
        Node * node = *next;
        if(node == 0){
            (*next) = new Node(key, value);
            return true;
        }
        else{
            return Insert(&node->next, key, value);
        }
    }
    
    bool HashTable::Find(int key){
        UINT adr = hasher(key);
        Node *  node = nodes[adr];
        if(node == 0){
            return false;
        }else{
            do{
                if(node->key == key){
                    return true;
                }else{
                    node = node->next;
                }
            }while(node != 0);
            return false;
        }
    }
    
    Node * HashTable::FindNode(int key){
        UINT adr = hasher(key);
        Node *  node = nodes[adr];
        if(node == 0){
            return 0;
        }else{
            do{
                if(node->key == key){
                    return node;
                }else{
                    node = node->next;
                }
            }while(node != 0);
            return 0;
        }
    }
    
    string & HashTable::operator[](int key){
        Node * node = FindNode(key);
        assert(node != 0);
        return node->value; 
    }
    
    //main.cpp
    int main()
    {
        HashTable ht;
        ht.Insert(1, "you");
        string value = ht[1];
        cout << value << endl;
        ht.Insert(101, "girl");
        value = ht[101];
        cout << value << endl;
        ht.Insert(201, "boy");
        value = ht[201];
        cout << value << endl;
        ht[201] = "man";
        cout << ht[201] << endl;
        cin.get();
        return 0;
    }

     2, 开放地址法
    为每个Hash值,建立一个Hash桶(Bucket),哈希桶的个数是固定的,桶的容量也是固定的。
    好处是查表的最大开销是可以确定的,因为最多处理的冲突数是确定的,所以算法的时间复杂度为O(1)+O(m),其中m为Hash桶容量。
    坏处是新建的表项可能会由于冲突过多,而不能装入Hash表中。
    http://www.360doc.com/content/13/0108/16/8363527_258987810.shtml

    3,线性探测再散列

    //StringHash.h
    #define MAXTABLELEN 1024    // 默认哈希索引表大小   
    typedef struct  _HASHTABLE  {    // 哈希索引表定义      
        long nHashA;        
        long nHashB;        
        bool bExists;    
    }HASHTABLE, *PHASHTABLE ; 
    class StringHash  {
    public: 
        StringHash(const long nTableLength = MAXTABLELEN); 
        ~StringHash(void);   
    private: 
        unsigned long cryptTable[0x500];  
        unsigned long m_tablelength;    // 哈希索引表长度   
        HASHTABLE *m_HashIndexTable;
        void InitCryptTable(); // 对哈希索引表预处理  
        unsigned long HashString(const string &lpszString, unsigned long dwHashType); // 哈希函数
    public:  
        bool Hash(string url);  
        unsigned long Hashed(string url); // 检测url是否被hash过,是则返回位置,否则返回-1
    };  
    
    //StringHash.c
    StringHash::StringHash(const long nTableLength )  {   
        InitCryptTable();   
        m_tablelength = nTableLength;    
        m_HashIndexTable = new HASHTABLE[nTableLength];    
        for ( int i = 0; i < nTableLength; i++ ) {      
            m_HashIndexTable[i].nHashA = -1;         
            m_HashIndexTable[i].nHashB = -1;     
            m_HashIndexTable[i].bExists = false;   
        } 
    }
    StringHash::~StringHash(void)  {      //清理内存 
        if ( NULL != m_HashIndexTable ) {     
            delete []m_HashIndexTable;        
            m_HashIndexTable = NULL;    
            m_tablelength = 0;     
        }    
    }  
    void StringHash::InitCryptTable(){     
        unsigned long seed = 0x00100001, index1 = 0, index2 = 0, i;    
        for( index1 = 0; index1 < 0x100; index1++ ) {         
            for( index2 = index1, i = 0; i < 5; i++, index2 += 0x100 ){     
                unsigned long temp1, temp2; 
                seed = (seed * 125 + 3) % 0x2AAAAB;   
                temp1 = (seed & 0xFFFF) << 0x10;     
                seed = (seed * 125 + 3) % 0x2AAAAB; 
                temp2 = (seed & 0xFFFF);    
                cryptTable[index2] = ( temp1 | temp2 );   
            }     
        }     
    }    
    unsigned long StringHash::HashString(const string& lpszString, unsigned long dwHashType){ //第二个参数指明使用哪个哈希表的哈希函数     
        unsigned char *key = (unsigned char *)(const_cast(lpszString.c_str())); 
        unsigned long seed1 = 0x7FED7FED, seed2 = 0xEEEEEEEE;  
        int ch;     
        while(*key != 0){        
            ch = toupper(*key++);       
            seed1 = cryptTable[(dwHashType << 8) + ch] ^ (seed1 + seed2);       
            seed2 = ch + seed1 + seed2 + (seed2 << 5) + 3;    
         }    
         return seed1;     
    }    
    unsigned long StringHash::Hashed(string lpszString){    
         const unsigned long HASH_OFFSET = 0, HASH_A = 1, HASH_B = 2;//不同的字符串三次hash还会碰撞的几率无限接近于不可能
         unsigned long nHash = HashString(lpszString, HASH_OFFSET);   
         unsigned long nHashA = HashString(lpszString, HASH_A);   
         unsigned long nHashB = HashString(lpszString, HASH_B);    
         unsigned long nHashStart = nHash % m_tablelength, nHashPos = nHashStart; 
         while ( m_HashIndexTable[nHashPos].bExists){      
             if (m_HashIndexTable[nHashPos].nHashA == nHashA && m_HashIndexTable[nHashPos].nHashB == nHashB)
                  return nHashPos;        
              else 
                  nHashPos = (nHashPos + 1) % m_tablelength; 
              if (nHashPos == nHashStart)  break;    
         }    
         return -1; //没有找到   
    }    
    bool StringHash::Hash(string lpszString)  { //hash a string
         const unsigned long HASH_OFFSET = 0, HASH_A = 1, HASH_B = 2;   
         unsigned long nHash = HashString(lpszString, HASH_OFFSET);  
         unsigned long nHashA = HashString(lpszString, HASH_A);   
         unsigned long nHashB = HashString(lpszString, HASH_B); 
         unsigned long nHashStart = nHash % m_tablelength, nHashPos = nHashStart;
         while ( m_HashIndexTable[nHashPos].bExists){        
             nHashPos = (nHashPos + 1) % m_tablelength;  
             if (nHashPos == nHashStart){ //一个轮回               
                 return false;  //hash表中没有空余的位置了,无法完成hash 
             }    
         }   
         m_HashIndexTable[nHashPos].bExists = true; 
         m_HashIndexTable[nHashPos].nHashA = nHashA; 
         m_HashIndexTable[nHashPos].nHashB = nHashB;  
         return true;    
    }

    注解:字符串的哈希函数
    你可以把哈希表存储在字符串数组中,然后你可以计算字符串的哈希值,然后与已经存储的字符串的哈希值进行比较。如果有匹配的哈希值,就可以通过字符串比较进行匹配验证。这种方法叫索引,根据数组的大小以及字符串的平均长度可以约100倍。

    unsigned long HashString(char *lpszString){  
         unsigned long ulHash = 0xf1e2d3c4;
         while (*lpszString != 0) { 
             ulHash <<= 1; 
             ulHash += *lpszString++; 
         } 
         return ulHash;
    }

    上面代码中的散列算法在遍历字符串过程中,将哈希值左移一位,然后加上字符值。它会在较低的数据范围内产生相对可预测的输出,从而可能会产生大量冲突。
    MPQ格式,使用了一种非常复杂的散列算法(如下所示),产生完全不可预测的哈希值,这个算法十分有效,这就是所谓的单向散列算法。

    unsigned long HashString(char *lpszFileName, unsigned long dwHashType){    
        unsigned char *key = (unsigned char *)lpszFileName; 
        unsigned long seed1 = 0x7FED7FED, seed2 = 0xEEEEEEEE;
        int ch; 
        while(*key != 0) {
            ch = toupper(*key++);
            seed1 = cryptTable[(dwHashType << 8) + ch] ^ (seed1 + seed2);
            seed2 = ch + seed1 + seed2 + (seed2 << 5) + 3;
        }
        return seed1;
    }
  • 相关阅读:
    Java学习小记 29
    Java学习小记 28
    Java学习小记 27
    Java学习小记 26
    Java学习小记 25
    Java学习小记 24
    Java学习小记 23
    Java学习小记 22
    Java学习小记 21
    Java学习小记 20
  • 原文地址:https://www.cnblogs.com/qionglouyuyu/p/4850704.html
Copyright © 2011-2022 走看看