zoukankan      html  css  js  c++  java
  • 记录几个经典的字符串hash算法

    记录几个经典的字符串hash算法,方便以后查看:

    推荐一篇文章:

    http://www.partow.net/programming/hashfunctions/#

    (1)暴雪字符串hash

      1 #include <stdio.h>
      2 #include <string.h>
      3 #include <stdlib.h>
      4 #include <stdint.h>
      5 #include <stdbool.h>
      6 
      7 #define MAXTABLELEN 102400000
      8 
      9 typedef struct  _HASHTABLE
     10 {  
     11     long nHashA;  
     12     long nHashB;  
     13     bool bExists;  
     14 }HASHTABLE, *PHASHTABLE;
     15 
     16 const unsigned long nTableLength = MAXTABLELEN;
     17 unsigned long m_tablelength;    // 哈希索引表长度  
     18 HASHTABLE *m_HashIndexTable;
     19 unsigned long cryptTable[0x500];
     20 
     21 int collc = 0;
     22 int errstr = 0;
     23 
     24 void InitCryptTable()  
     25 {   
     26     unsigned long seed = 0x00100001, index1 = 0, index2 = 0, i;  
     27  
     28     for( index1 = 0; index1 < 0x100; index1++ )  
     29     {   
     30         for( index2 = index1, i = 0; i < 5; i++, index2 += 0x100 )  
     31         {   
     32             unsigned long temp1, temp2;  
     33             seed = (seed * 125 + 3) % 0x2AAAAB;  
     34             temp1 = (seed & 0xFFFF) << 0x10;  
     35             seed = (seed * 125 + 3) % 0x2AAAAB;  
     36             temp2 = (seed & 0xFFFF);  
     37             cryptTable[index2] = ( temp1 | temp2 );   
     38         }   
     39     }   
     40 }  
     41  
     42 /************************************************************************/
     43 /*函数名:HashString
     44  *功  能:求取哈希值   
     45  *返回值:返回hash值
     46  ************************************************************************/
     47 unsigned long HashString(char *lpszString, unsigned long dwHashType)
     48 {
     49     unsigned char *key = (unsigned char *)lpszString;
     50     unsigned long seed1 = 0x7FED7FED, seed2 = 0xEEEEEEEE;
     51     int ch; 
     52 
     53     while(*key != 0)
     54     {   
     55         ch = toupper(*key++);
     56 
     57         seed1 = cryptTable[(dwHashType << 8) + ch] ^ (seed1 + seed2);
     58         seed2 = ch + seed1 + seed2 + (seed2 << 5) + 3;
     59     }   
     60     return seed1;
     61 }
     62 /************************************************************************/
     63 /*函数名:Hashed
     64  *功  能:检测一个字符串是否被hash过
     65  *返回值:如果存在,返回位置;否则,返回-1
     66  ************************************************************************/
     67 unsigned long Hashed(char * lpszString)  
     68  
     69 {   
     70     const unsigned long HASH_OFFSET = 0, HASH_A = 1, HASH_B = 2;  
     71     //不同的字符串三次hash还会碰撞的率无限接近于不可能
     72     unsigned long nHash = HashString(lpszString, HASH_OFFSET);  
     73     unsigned long nHashA = HashString(lpszString, HASH_A);  
     74     unsigned long nHashB = HashString(lpszString, HASH_B);  
     75     unsigned long nHashStart = nHash % m_tablelength;  
     76     unsigned long nHashPos = nHashStart;  
     77  
     78     while (m_HashIndexTable[nHashPos].bExists)  
     79     {   
     80         if (m_HashIndexTable[nHashPos].nHashA == nHashA && m_HashIndexTable[nHashPos].nHashB == nHashB)   
     81             return nHashPos;   
     82         else  
     83             nHashPos = (nHashPos + 1) % m_tablelength;  
     84  
     85         if (nHashPos == nHashStart)   
     86             break;   
     87     }  
     88     errstr++;
     89  
     90     return -1; //没有找到  
     91 }  
     92  
     93 /************************************************************************/
     94 /*函数名:Hash
     95  *功  能:hash一个字符串 
     96  *返回值:成功,返回true;失败,返回false
     97  ************************************************************************/
     98 bool Hash(char * lpszString)
     99 {  
    100     const unsigned long HASH_OFFSET = 0, HASH_A = 1, HASH_B = 2;  
    101     unsigned long nHash = HashString(lpszString, HASH_OFFSET);  
    102     unsigned long nHashA = HashString(lpszString, HASH_A);  
    103     unsigned long nHashB = HashString(lpszString, HASH_B);  
    104     unsigned long nHashStart = nHash % m_tablelength, 
    105     nHashPos = nHashStart;  
    106  
    107     while (m_HashIndexTable[nHashPos].bExists)  
    108     {    
    109         nHashPos = (nHashPos + 1) % m_tablelength;  
    110         if (nHashPos == nHashStart) //一个轮回  
    111         { 
    112             collc ++; 
    113             //hash表中没有空余的位置了,无法完成hash
    114             return false;   
    115         }  
    116     }  
    117     m_HashIndexTable[nHashPos].bExists = true;  
    118     m_HashIndexTable[nHashPos].nHashA = nHashA;  
    119     m_HashIndexTable[nHashPos].nHashB = nHashB;  
    120  
    121     return true;  
    122 }
    123 
    124 int InitHashTable()
    125 {
    126     int i;
    127     
    128     InitCryptTable();  
    129     m_tablelength = nTableLength;
    130     
    131     m_HashIndexTable = (HASHTABLE *)malloc(nTableLength * sizeof(HASHTABLE));
    132     if (NULL == m_HashIndexTable) {
    133         printf("Init HashTable failure!!
    ");
    134         return -1;
    135     }
    136 
    137     for (i = 0; i < nTableLength; i++ )  
    138     {  
    139         m_HashIndexTable[i].nHashA = 0;  
    140         m_HashIndexTable[i].nHashB = 0;  
    141         m_HashIndexTable[i].bExists = false;  
    142     }
    143     
    144     return 0;    
    145 }
    146 
    147 void do_test()
    148 {
    149     int count = 0;
    150     FILE *fp;
    151     char url[2048] = {0};
    152     
    153     fp = fopen("urllist", "rb+");
    154     if (NULL == fp) {
    155         return;
    156     }
    157 
    158     if (InitHashTable()) {
    159         return;
    160     }
    161 
    162     while (!feof(fp)) {
    163         fgets(url, 2048, fp);
    164         Hash(url);
    165         count++;
    166     }
    167 
    168     printf("count: %d
    ", count);
    169 
    170     fclose(fp);
    171 }
    172 
    173 /*test main*/
    174 int main()
    175 {
    176     do_test();
    177 
    178     printf("conflict: %d
    ", collc);
    179     printf("not find: %d
    ", errstr);
    180 
    181     return 0;
    182 }

     (2)字符串hash算法 ELFhash 

    #include <stdio.h>
    #include <string.h>
    #include <stdlib.h>
    
    #define MOD 10
    
    int ELFhash(char*key)
    {
        unsigned long h=0;
        while(*key)
        {   
            h = (h << 4) + *key++;
            unsigned long g = h & 0xF0000000L;
            if(g)
                h ^= g >> 24; 
            h &= ~g; 
        }   
        return h % MOD;
    }
    
    int main(int argc, char **argv)
    {
        if (argc < 2) {
            printf("using %s <string>
    ", argv[0]);
            return -1; 
        }   
    
        int num = 0;
        num = ELFhash(argv[1]);
    
        printf("num is %d
    ", num);
    }
  • 相关阅读:
    数据分析之可反复与独立样本的T-Test分析
    朗朗上口的两幅对联
    mysql编码、数据表编码查看和改动总结
    2014-04-19编程之美初赛题目及答案解析
    测试集群模式安装实施Hadoop
    笔记:常用排序算法
    笔记:常用排序算法
    安装Redis并测试
    常见架构风格举例总结
    转载:PostgreSQL SQL的性能提升
  • 原文地址:https://www.cnblogs.com/wenqiang/p/6762071.html
Copyright © 2011-2022 走看看