zoukankan      html  css  js  c++  java
  • Squid--hash代码分析

    #ifndef SQUID_HASH_H
    #define SQUID_HASH_H
    
    //几个函数和变量的别名
    typedef void HASHFREE(void *);
    typedef int HASHCMP(const void *, const void *);
    typedef unsigned int HASHHASH(const void *, unsigned int);
    typedef struct _hash_link hash_link;
    typedef struct _hash_table hash_table;
    
    //每个hash节点的数据结构
    struct _hash_link {
        void *key;
        hash_link *next;
    };
    //hash表的数据结构
    struct _hash_table {
        hash_link **buckets;    //存储hash节点(hash_link)地址的桶链表
        HASHCMP *cmp;           //hash比较函数
        HASHHASH *hash;         //获取hash值函数
        unsigned int size;      //buckets桶链表的大小
        unsigned int current_slot;  //指向当前的桶
        hash_link *next;        //指向下一个桶(相对于current_slot的下一个桶)
        int count;      //hash_table中已经存储的hash节点(hash_link)的数目
    };
    
    SQUIDCEXTERN hash_table *hash_create(HASHCMP *, int, HASHHASH *);
    SQUIDCEXTERN void hash_join(hash_table *, hash_link *);
    SQUIDCEXTERN void hash_remove_link(hash_table *, hash_link *);
    SQUIDCEXTERN int hashPrime(int n);
    SQUIDCEXTERN hash_link *hash_lookup(hash_table *, const void *);
    SQUIDCEXTERN void hash_first(hash_table *);
    SQUIDCEXTERN hash_link *hash_next(hash_table *);
    SQUIDCEXTERN void hash_last(hash_table *);
    SQUIDCEXTERN hash_link *hash_get_bucket(hash_table *, unsigned int);
    SQUIDCEXTERN void hashFreeMemory(hash_table *);
    SQUIDCEXTERN void hashFreeItems(hash_table *, HASHFREE *);
    SQUIDCEXTERN HASHHASH hash_string;
    SQUIDCEXTERN HASHHASH hash4;
    SQUIDCEXTERN const char *hashKeyStr(hash_link *);
    
    /*  squid建议的hansh素数
     *  Here are some good prime number choices.  It's important not to
     *  choose a prime number that is too close to exact powers of 2.
     *
     *  HASH_SIZE 103               // prime number < 128
     *  HASH_SIZE 229               // prime number < 256
     *  HASH_SIZE 467               // prime number < 512
     *  HASH_SIZE 977               // prime number < 1024
     *  HASH_SIZE 1979              // prime number < 2048
     *  HASH_SIZE 4019              // prime number < 4096
     *  HASH_SIZE 6037              // prime number < 6144
     *  HASH_SIZE 7951              // prime number < 8192
     *  HASH_SIZE 12149             // prime number < 12288
     *  HASH_SIZE 16231             // prime number < 16384
     *  HASH_SIZE 33493             // prime number < 32768
     *  HASH_SIZE 65357             // prime number < 65536
     */
    //默认的桶链表大小
    #define  DEFAULT_HASH_SIZE 7951 /* prime number < 8192 */
    
    #endif /* SQUID_HASH_H */
    
    /*
     * DEBUG: section 00    Hash Tables
     * AUTHOR: Harvest Derived
     *
     * SQUID Web Proxy Cache          http://www.squid-cache.org/
     * ----------------------------------------------------------
     *
     *  Squid is the result of efforts by numerous individuals from
     *  the Internet community; see the CONTRIBUTORS file for full
     *  details.   Many organizations have provided support for Squid's
     *  development; see the SPONSORS file for full details.  Squid is
     *  Copyrighted (C) 2001 by the Regents of the University of
     *  California; see the COPYRIGHT file for full details.  Squid
     *  incorporates software developed and/or copyrighted by other
     *  sources; see the CREDITS file for full details.
     *
     *  This program is free software; you can redistribute it and/or modify
     *  it under the terms of the GNU General Public License as published by
     *  the Free Software Foundation; either version 2 of the License, or
     *  (at your option) any later version.
     *
     *  This program is distributed in the hope that it will be useful,
     *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     *  GNU General Public License for more details.
     *
     *  You should have received a copy of the GNU General Public License
     *  along with this program; if not, write to the Free Software
     *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
     *
     */
    
    #include "squid.h"
    #include "hash.h"
    #include "profiler/Profiler.h"
    
    #if HAVE_STDIO_H
    #include <stdio.h>
    #endif
    #if HAVE_STDLIB_H
    #include <stdlib.h>
    #endif
    #if HAVE_STRING_H
    #include <string.h>
    #endif
    #if HAVE_UNISTD_H
    #include <unistd.h>
    #endif
    #if HAVE_GNUMALLLOC_H
    #include <gnumalloc.h>
    #elif HAVE_MALLOC_H
    #include <malloc.h>
    #endif
    #if HAVE_ASSERT_H
    #include <assert.h>
    #endif
    #if HAVE_MATH_H
    #include <math.h>
    #endif
    
    static void hash_next_bucket(hash_table * hid);
    
    /*下面两个函数常用来作为第二个参数建立hash表, 也就是取hash值的操作函数*/
    /*第二个更适合于操作字符串,第一个可以是二进制数*/
    unsigned int
    hash_string(const void *data, unsigned int size)
    {
        const unsigned char *s = static_cast<const unsigned char *>(data);
        unsigned int n = 0;
        unsigned int j = 0;
        unsigned int i = 0;
        while (*s) {
            ++j;
            n ^= 271 * *s;	//^按位异或运算
            ++s;
        }
        i = n ^ (j * 271);
        return i % size;
    }
    
    /* the following function(s) were adapted from
     *    usr/src/lib/libc/db/hash_func.c, 4.4 BSD lite */
    
    /* Hash function from Chris Torek. */
    unsigned int
    hash4(const void *data, unsigned int size)
    {
        const char *key = static_cast<const char *>(data);
        size_t loop;
        unsigned int h;
        size_t len;
    
    #define HASH4a   h = (h << 5) - h + *key++;
    #define HASH4b   h = (h << 5) + h + *key++;
    #define HASH4 HASH4b
    
        h = 0;
        len = strlen(key);
        loop = len >> 3;
        switch (len & (8 - 1)) {
        case 0:
            break;
        case 7:
            HASH4;
            /* FALLTHROUGH */
        case 6:
            HASH4;
            /* FALLTHROUGH */
        case 5:
            HASH4;
            /* FALLTHROUGH */
        case 4:
            HASH4;
            /* FALLTHROUGH */
        case 3:
            HASH4;
            /* FALLTHROUGH */
        case 2:
            HASH4;
            /* FALLTHROUGH */
        case 1:
            HASH4;
        }
        while (loop) {
            --loop;
            HASH4;
            HASH4;
            HASH4;
            HASH4;
            HASH4;
            HASH4;
            HASH4;
            HASH4;
        }
        return h % size;
    }
    
    /**
     *  hash_create - creates a new hash table, uses the cmp_func
     *  to compare keys.  Returns the identification for the hash table;
     *  otherwise returns a negative number on error.
     * 创建hash表,返回hash_table的对象
     */
    hash_table *
    hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func)
    {
        hash_table *hid = (hash_table *)xcalloc(1, sizeof(hash_table));
        if (!hash_sz)
            hid->size = (unsigned int) DEFAULT_HASH_SIZE;	//EFAULT_HASH_SIZE 7951
        else
            hid->size = (unsigned int) hash_sz;
        /* allocate and null the buckets */
        hid->buckets = (hash_link **)xcalloc(hid->size, sizeof(hash_link *));
        hid->cmp = cmp_func;
        hid->hash = hash_func;
        hid->next = NULL;
        hid->current_slot = 0;
        return hid;
    }
    
    /**
     *  hash_join - joins a hash_link under its key lnk->key
     *  into the hash table 'hid'.
     *
     *  It does not copy any data into the hash table, only links pointers.
     *	将hash节点链接到hash表中对应的桶节点。
     */
    void
    hash_join(hash_table * hid, hash_link * lnk)
    {
        int i;
        i = hid->hash(lnk->key, hid->size);
        lnk->next = hid->buckets[i];
        hid->buckets[i] = lnk;
        ++hid->count;
    }
    
    /**
     *  hash_lookup - locates the item under the key 'k' in the hash table
     *  'hid'.  Returns a pointer to the hash bucket on success; otherwise
     *  returns NULL.
     *	在hash_table中定位哈希值为k的hash_link节点
     */
    hash_link *
    hash_lookup(hash_table * hid, const void *k)
    {
        int b;
        PROF_start(hash_lookup);
        assert(k != NULL);
        b = hid->hash(k, hid->size);
        for (hash_link *walker = hid->buckets[b]; walker != NULL; walker = walker->next) {
            if ((hid->cmp) (k, walker->key) == 0) {
                PROF_stop(hash_lookup);
                return (walker);
            }
            assert(walker != walker->next);
        }
        PROF_stop(hash_lookup);
        return NULL;
    }
    
    //指向下一个桶
    static void
    hash_next_bucket(hash_table * hid)
    {
        while (hid->next == NULL && ++hid->current_slot < hid->size)
            hid->next = hid->buckets[hid->current_slot];
    }
    
    /**
     *  hash_first - initializes the hash table for the hash_next()
     *  function.
     *	使hid->current_slot指向第一个桶,hid->next指向下一个桶
     */
    void
    hash_first(hash_table * hid)
    {
        assert(NULL == hid->next);
        hid->current_slot = 0;
        hid->next = hid->buckets[hid->current_slot];
        if (NULL == hid->next)	//如果hash_table还没有使用
            hash_next_bucket(hid);
    }
    
    /**
     *  hash_next - returns the next item in the hash table 'hid'.
     *  Otherwise, returns NULL on error or end of list.
     *
     *  MUST call hash_first() before hash_next().
     *	获取下一个hash_link节点
     */
    hash_link *
    hash_next(hash_table * hid)
    {
        hash_link *p = hid->next;
        if (NULL == p)
            return NULL;
        hid->next = p->next;
        if (NULL == hid->next)
            hash_next_bucket(hid);
        return p;
    }
    
    /**
     *  hash_last - resets hash traversal state to NULL
     *
     */
    void
    hash_last(hash_table * hid)
    {
        assert(hid != NULL);
        hid->next = NULL;
        hid->current_slot = 0;
    }
    
    /**
     *  hash_remove_link - deletes the given hash_link node from the
     *  hash table 'hid'.  Does not free the item, only removes it
     *  from the list.
     *
     *  An assertion is triggered if the hash_link is not found in the
     *  list.
     *	将hash_link为hl的节点从桶链表中移除
     */
    void
    hash_remove_link(hash_table * hid, hash_link * hl)
    {
        assert(hl != NULL);
        int i = hid->hash(hl->key, hid->size);
        for (hash_link **P = &hid->buckets[i]; *P; P = &(*P)->next) {
            if (*P != hl)
                continue;
            *P = hl->next;
            if (hid->next == hl) {
                hid->next = hl->next;
                if (NULL == hid->next)
                    hash_next_bucket(hid);
            }
            --hid->count;
            return;
        }
        assert(0);
    }
    
    /**
     *  hash_get_bucket - returns the head item of the bucket
     *  in the hash table 'hid'. Otherwise, returns NULL on error.
     *	获取hid->buckets[bucket]
     */
    hash_link *
    hash_get_bucket(hash_table * hid, unsigned int bucket)
    {
        if (bucket >= hid->size)
            return NULL;
        return (hid->buckets[bucket]);
    }
    //将所有hash_link节点集中到一起,集中释放存储空间
    void
    hashFreeItems(hash_table * hid, HASHFREE * free_func)
    {
        hash_link *l;
        int i = 0;
        hash_link **list = (hash_link **)xcalloc(hid->count, sizeof(hash_link *));
        hash_first(hid);
        while ((l = hash_next(hid)) && i < hid->count) {
            *(list + i) = l;
            ++i;
        }
        for (int j = 0; j < i; ++j)
            free_func(*(list + j));
        xfree(list);
    }
    //释放hash_table空间
    void
    hashFreeMemory(hash_table * hid)
    {
        if (hid == NULL)
            return;
        if (hid->buckets)
            xfree(hid->buckets);
        xfree(hid);
    }
    
    static int hash_primes[] = {
        103,
        229,
        467,
        977,
        1979,
        4019,
        6037,
        7951,
        12149,
        16231,
        33493,
        65357
    };
    
    int
    hashPrime(int n)
    {
        int I = sizeof(hash_primes) / sizeof(int);
        int best_prime = hash_primes[0];
        double min = fabs(log((double) n) - log((double) hash_primes[0]));
        double d;
        for (int i = 0; i < I; ++i) {
            d = fabs(log((double) n) - log((double) hash_primes[i]));
            if (d > min)
                continue;
            min = d;
            best_prime = hash_primes[i];
        }
        return best_prime;
    }
    
    /**
     * return the key of a hash_link as a const string
     *	获取hl的哈希值
     */
    const char *
    hashKeyStr(hash_link * hl)
    {
        return (const char *) hl->key;
    }
    
    #if USE_HASH_DRIVER
    /**
     *  hash-driver - Run with a big file as stdin to insert each line into the
     *  hash table, then prints the whole hash table, then deletes a random item,
     *  and prints the table again...
     */
    int
    main(void)
    {
        hash_table *hid;
        LOCAL_ARRAY(char, buf, BUFSIZ);
        LOCAL_ARRAY(char, todelete, BUFSIZ);
        hash_link *walker = NULL;
    
        todelete[0] = '';
        printf("init
    ");
    
        printf("creating hash table
    ");
        if ((hid = hash_create((HASHCMP *) strcmp, 229, hash4)) < 0) {
            printf("hash_create error.
    ");
            exit(1);
        }
        printf("done creating hash table: %d
    ", hid);
    
        while (fgets(buf, BUFSIZ, stdin)) {
            buf[strlen(buf) - 1] = '';
            printf("Inserting '%s' for item %p to hash table: %d
    ",
                   buf, buf, hid);
            hash_insert(hid, xstrdup(buf), (void *) 0x12345678);
            if (random() % 17 == 0)
                strcpy(todelete, buf);
        }
    
        printf("walking hash table...
    ");
        for (int i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {
            printf("item %5d: key: '%s' item: %p
    ", i++, walker->key,
                   walker->item);
        }
        printf("done walking hash table...
    ");
    
        if (todelete[0]) {
            printf("deleting %s from %d
    ", todelete, hid);
            if (hash_delete(hid, todelete))
                printf("hash_delete error
    ");
        }
        printf("walking hash table...
    ");
        for (int i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {
            printf("item %5d: key: '%s' item: %p
    ", i++, walker->key,
                   walker->item);
        }
        printf("done walking hash table...
    ");
    
        printf("driver finished.
    ");
        exit(0);
    }
    #endif
    

    下面具体分析:

    hash表整体结构:


    1、hash_create

    /**
     *  hash_create - creates a new hash table, uses the cmp_func
     *  to compare keys.  Returns the identification for the hash table;
     *  otherwise returns a negative number on error.
     * 创建hash表,返回hash_table的对象
     */
    hash_table *
    hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func)
    {
        hash_table *hid = (hash_table *)xcalloc(1, sizeof(hash_table));
        if (!hash_sz)
            hid->size = (unsigned int) DEFAULT_HASH_SIZE;	//EFAULT_HASH_SIZE 7951
        else
            hid->size = (unsigned int) hash_sz;
        /* allocate and null the buckets */
        hid->buckets = (hash_link **)xcalloc(hid->size, sizeof(hash_link *));
        hid->cmp = cmp_func;
        hid->hash = hash_func;
        hid->next = NULL;
        hid->current_slot = 0;
        return hid;
    }

    创建hash表。需要三个参数:cmp_func、hash_sz、hash_func,其中hash_sz用来表示创建的hash表的桶链表的大小,如果为0,则使用默认的大小DEFAULT_HASH_SIZE.

    桶链表储存的数据类型为:hash_link * ,即它只存储hash_link节点的地址。初始化后的桶链表没有存储任何地址,全部为0。

    current_slot = 0,当前指向的桶为buckets[0];指向下一个桶的指针hid->next为NULL,表示hash表还没有被使用。

    2、hash_join

    /**
     *  hash_join - joins a hash_link under its key lnk->key
     *  into the hash table 'hid'.
     *
     *  It does not copy any data into the hash table, only links pointers.
     *	将hash节点链接到hash表中对应的桶节点。
     */
    void
    hash_join(hash_table * hid, hash_link * lnk)
    {
        int i;
        i = hid->hash(lnk->key, hid->size);
        lnk->next = hid->buckets[i];
        hid->buckets[i] = lnk;
        ++hid->count;
    }

    首先利用函数hash找到节点link应该插入到的桶号i,将link的next指针指向桶号i存储的链表的首节点,再将link节点的地址储存到桶号i内,link节点成为桶号i储存的链表的首节点。

    3、hash_lookup

    /**
     *  hash_lookup - locates the item under the key 'k' in the hash table
     *  'hid'.  Returns a pointer to the hash bucket on success; otherwise
     *  returns NULL.
     *	在hash_table中定位哈希值为k的hash_link节点
     */
    hash_link *
    hash_lookup(hash_table * hid, const void *k)
    {
        int b;
        PROF_start(hash_lookup);
        assert(k != NULL);
        b = hid->hash(k, hid->size);
        for (hash_link *walker = hid->buckets[b]; walker != NULL; walker = walker->next) {
            if ((hid->cmp) (k, walker->key) == 0) {
                PROF_stop(hash_lookup);
                return (walker);
            }
            assert(walker != walker->next);
        }
        PROF_stop(hash_lookup);
        return NULL;
    }

    首先根据哈希值k找到对应的桶链表节点b,walker指向b所在链表的首节点。


    4、hash_remove_link

    /**
     *  hash_remove_link - deletes the given hash_link node from the
     *  hash table 'hid'.  Does not free the item, only removes it
     *  from the list.
     *
     *  An assertion is triggered if the hash_link is not found in the
     *  list.
     *	将hash_link为hl的节点从桶链表中移除
     */
    void
    hash_remove_link(hash_table * hid, hash_link * hl)
    {
        assert(hl != NULL);
        int i = hid->hash(hl->key, hid->size);
        for (hash_link **P = &hid->buckets[i]; *P; P = &(*P)->next) {
            if (*P != hl)
                continue;
            *P = hl->next;
            if (hid->next == hl) {
                hid->next = hl->next;
                if (NULL == hid->next)
                    hash_next_bucket(hid);
            }
            --hid->count;
            return;
        }
        assert(0);
    }

    移除分两种情况:

    1、hl为首节点,将hid->next = hl->next

    2、hl为中间节点,*p = hl->next,p指向hl的下一个节点


    5、hashFreeItems

    //将所有hash_link节点集中到一起,集中释放存储空间
    void
    hashFreeItems(hash_table * hid, HASHFREE * free_func)
    {
        hash_link *l;
        int i = 0;
        hash_link **list = (hash_link **)xcalloc(hid->count, sizeof(hash_link *));
        hash_first(hid);
        while ((l = hash_next(hid)) && i < hid->count) {
            *(list + i) = l;
            ++i;
        }
        for (int j = 0; j < i; ++j)
            free_func(*(list + j));
        xfree(list);
    }

    根据hid->count大小分配存储空间来存储hash_link节点的地址。调用hash_first将hid->current_slot指向第一个桶,同时将hid->next指向第二个桶。调用hash_next取得hash链表中的每一个hash_link节点,并将地址赋值到list中,最后对list中的hash_link地址统一销毁。

    本文为Eliot原创,转载请注明出处:http://blog.csdn.net/xyw_blog/article/details/9791221

  • 相关阅读:
    批处理+7zip解压用纯数字加密的压缩包zip
    golang 读取 chrome保存的网站账号信息
    c++实现"扫描检测硬件改动"
    c++获取磁盘句柄
    golang设置title并获取窗口句柄
    golang获取文件的md5
    golang获取u盘序列号(通过读取注册表实现)
    golang从文件按行读取并输出
    golang cgo注意事项
    python调用远程chromedriver.exe、selenium抓包方法
  • 原文地址:https://www.cnblogs.com/riskyer/p/3241326.html
Copyright © 2011-2022 走看看