zoukankan      html  css  js  c++  java
  • 动手实践PHP7的HashTable

    参照PHP7的HashTable实现思路写一个精简版,加深对hashtable数据结构的理解。

    PHP7 HashTable数据结构:

    /*
     * HashTable Data Layout
     * =====================
     *
     *                 +=============================+
     *                 | HT_HASH(ht, ht->nTableMask) |
     *                 | ...                         |
     *                 | HT_HASH(ht, -1)             |
     *                 +-----------------------------+
     * ht->arData ---> | Bucket[0]                   |
     *                 | ...                         |
     *                 | Bucket[ht->nTableSize-1]    |
     *                 +=============================+
     */

    详细分析参见:PHP7 HashTable源码分析

    代码实现:

    1.基本数据结构

    typedef struct bucket_val_t {
        short type;
        union {
            long d;
            char *str;
            struct hashtable_t *arr;
        } v;
    } bucket_val;
    
    typedef struct bucket_t {
        unsigned long h;
        char *key;
        bucket_val *val;
        size_t next;
    } bucket;
    
    typedef struct hashtable_t {
        size_t cap;
        size_t sizemask;
        size_t used;
        size_t next;
        bucket *arrData;
    } hashtable;

    2.功能清单

    hashtable *new_hash_table(size_t cap);
    void init_hash_data(size_t *data,size_t cap);
    unsigned long hash(char *key);
    
    bucket_val *hash_get(hashtable *ht,char *key);
    int hash_set(hashtable *ht,char *key,char *val);
    unsigned short hash_exists(hashtable *ht,char *key);
    int hash_remove(hashtable *ht,char *key);
    
    int hash_resize(hashtable *ht);
    void hash_rehash(hashtable *ht);
    size_t hash_recap(size_t cap);
    void hash_copy_bucket(bucket* dest,bucket* src,size_t count);
    
    void hash_free_bucket(bucket *pb,char freeval);
    void hash_free_bucket_val(bucket_val *pval);
    void hash_free(hashtable *ht);

    3.源码

    #include <stdio.h>
    #include <string.h>
    #include <stdlib.h>
    //HashTable
    #define HASH_TABLE_INIT_CAP (8)
    #define HASH_VAL_TYPE_STR (1<<1)
    #define HASH_VAL_TYPE_LONG (1<<2)
    #define HASH_VAL_TYPE_ARR (1<<3)
    
    #define HASH_DATA_START(ht) ((size_t*)ht->arrData - ht->cap)
    #define HASH_SIZEMASK(ht) (-(ht->cap))
    #define HASH_OFFSET(ht,h) (h | ht->sizemask)
    
    typedef struct bucket_val_t {
        short type;
        union {
            long d;
            char *str;
            struct hashtable_t *arr;
        } v;
    } bucket_val;
    
    typedef struct bucket_t {
        unsigned long h;
        char *key;
        bucket_val *val;
        size_t next;
    } bucket;
    
    typedef struct hashtable_t {
        size_t cap;
        size_t sizemask;
        size_t used;
        size_t next;
        bucket *arrData;
    } hashtable;
    
    
    hashtable *new_hash_table(size_t cap);
    void init_hash_data(size_t *data,size_t cap);
    unsigned long hash(char *key);
    
    bucket_val *hash_get(hashtable *ht,char *key);
    int hash_set(hashtable *ht,char *key,char *val);
    unsigned short hash_exists(hashtable *ht,char *key);
    int hash_remove(hashtable *ht,char *key);
    
    int hash_resize(hashtable *ht);
    void hash_rehash(hashtable *ht);
    size_t hash_recap(size_t cap);
    void hash_copy_bucket(bucket* dest,bucket* src,size_t count);
    
    void hash_free_bucket(bucket *pb,char freeval);
    void hash_free_bucket_val(bucket_val *pval);
    void hash_free(hashtable *ht);
    hashtable.h
    #include "hashtable.h"
    
    hashtable *new_hash_table(size_t cap){
        cap = hash_recap(cap);
        hashtable *ht = (hashtable *)malloc(sizeof(hashtable));
        if(ht == NULL){
            return NULL;
        }
        size_t *data = (size_t *)malloc(sizeof(size_t) * cap + sizeof(bucket) * cap);
        if(data == NULL){
            free(ht);
            return NULL;
        }
        init_hash_data(data,cap);
        ht->cap = cap;
        ht->used = 0;
        ht->next = 0;
        ht->sizemask = HASH_SIZEMASK(ht);
        ht->arrData = (bucket *)(data + cap);
        return ht;
    }
    
    void init_hash_data(size_t *data,size_t cap){
        int i;
        for(i = 0;i<cap;++i){
            *(data+i) = -1;
        }
        bucket *arrData = (bucket *)(data+cap);
        for(i=0;i<cap;++i){
            (arrData + i)->h = -1;
            (arrData + i)->key = NULL;
            (arrData + i)->val = NULL;
            (arrData + i)->next = -1;
        }
        return;
    }
    unsigned long hash(char *key){
        unsigned long h = 1234;
        int c;
        while(c = *key++){
            h = h + c;
        }
        return h;
    }
    
    int hash_set(hashtable *ht,char *key,char *val){
        if(ht == NULL){
            return -1;
        }
        char *v = (char *)malloc(sizeof(char) * (strlen(val)+1));
        if(v == NULL){
            return -1;
        }
        v = strcpy(v,val);
        if(v == NULL){
            return -1;
        }
    
        // key already exists
        bucket_val *bval = hash_get(ht,key);
    
        if(bval != NULL){
            free(bval->v.str);
            bval->v.str = v;
            return 0;
        }
        char *k = (char *)malloc(sizeof(char) * (strlen(key)+1));
        if(k == NULL){
            return -1;
        }
        k = strcpy(k,key);
        if(k == NULL){
            free(k);
            free(v);
            return -1;
        }
        
        // key not exists
        if(ht->next == ht->cap - 1){
            printf("prepare to resize:%d
    ",ht->next);
            int res = hash_resize(ht);
            if(res < 0){
                free(k);
                free(v);
                return res;
            }
        }
    
        bval = (bucket_val*)malloc(sizeof(bucket_val));
        if(bval == NULL){
            return -1;
        }
        bval->type = HASH_VAL_TYPE_STR;
        bval->v.str = v;
    
        unsigned long h = hash(key);
        size_t offset = HASH_OFFSET(ht,h);
        size_t *pidx = (size_t *)ht->arrData + offset;
        bucket *pb;
        if(*pidx == -1){    
            *pidx = ht->next;
            ht->next++;
            ht->used++;
            pb = ht->arrData + *pidx;
            pb->h = h;
            pb->key = k;
            pb->val = bval;
            pb->next = -1;
            printf("hash_set(%s)idx:%d,h:%d,next:%d
    ",pb->key,*pidx,pb->h,pb->next);
            return 0;
        }
        pb = ht->arrData + ht->next;
        pb->next = *pidx;
        *pidx = ht->next;
        ht->used++;
        ht->next++;
        pb->h = h;
        pb->key = k;
        pb->val = bval;
        printf("hash_set(%s)idx:%d,h:%d,next:%d
    ",pb->key,*pidx,pb->h,pb->next);
        return 0;
    }
    
    bucket_val *hash_get(hashtable *ht,char *key){
        if(ht == NULL){
            return NULL;
        }
        unsigned long h = hash(key);
        size_t offset = HASH_OFFSET(ht,h);
        size_t index = *((size_t *)(ht->arrData) + offset);
        if(index == -1){
            return NULL;
        }
        bucket *pb = ht->arrData+index;
        
        while(pb->h != -1){
            if(strcmp(pb->key,key) == 0){
                return pb->val;
            }
            if(pb->next == -1){
                break;
            }else{
                pb = ht->arrData + pb->next;
            }
            
        }
        return NULL;
    }
    
    unsigned short hash_exists(hashtable *ht,char *key){
        if(ht == NULL){
            return 0;
        }
        unsigned long h = hash(key);
        size_t offset = HASH_OFFSET(ht,h);
        size_t index = *((size_t *)(ht->arrData) + offset);
        if(index == -1){
            return 0;
        }
        bucket *pb = ht->arrData+index;
        while(pb){
            if(strcmp(pb->key,key) == 0){
                return 1;
            }
            pb = ht->arrData + pb->next;
        }
        return 0;
    }
    
    int hash_remove(hashtable *ht,char *key){
        if(ht == NULL || !hash_exists(ht,key)){
            return 0;
        }
        unsigned long h = hash(key);
        size_t offset = HASH_OFFSET(ht,h);
        size_t *pidx = (size_t*)ht->arrData + offset;
        bucket *pb = ht->arrData + *pidx;
        bucket *preb = NULL;
        while(pb){
            if(strcmp(pb->key,key) == 0){
                break;
            }
            preb = pb;
            if(pb->next != -1){
                pb = ht->arrData + pb->next;
            }else{
                return 0;
            }
        }
        if(preb != NULL){
            preb->next = pb->next;
        }else{
            *pidx = pb->next;
        }
        hash_free_bucket(pb,'1');
        ht->used--;
        return 0;
    }
    
    void hash_free_bucket(bucket *pb,char freeval){
        if(pb == NULL){
            return;
        }
        pb->h = -1;
        pb->key = NULL;
        if(freeval == '1'){
            free(pb->key);
            hash_free_bucket_val(pb->val);
        }
        pb->val = NULL;
        pb->next = -1;
        return;
    }
    
    void hash_free_bucket_val(bucket_val *pval){
        if(pval == NULL){
            return;
        }
        switch(pval->type){
            case HASH_VAL_TYPE_STR:
                free(pval->v.str);
                break;
            case HASH_VAL_TYPE_LONG:
                break;
            case HASH_VAL_TYPE_ARR:
                hash_free(pval->v.arr);
                break;
            default:
                printf("unknown bucket value type.
    ");
                break;
        }
        free(pval);
        return;
    }
    
    void hash_free(hashtable *ht){
        int idx;
        for(idx=0;idx < ht->next;++idx){
            hash_free_bucket(ht->arrData+idx,'1');
        }
        free(HASH_DATA_START(ht));
        free(ht);
    }
    
    // 计算大于等于cap的最小2^n
    size_t hash_recap(size_t cap){
        if(cap <= HASH_TABLE_INIT_CAP){
            return HASH_TABLE_INIT_CAP;
        }
        
        int count=0,last=0;
        while(cap > 0){
            last = cap;
            cap = cap & (cap-1);
            count++;
        }
        
        return count == 1 ? last : last<<1;
    }
    
    int hash_resize(hashtable *ht){
        if(ht == NULL){
            *ht = *new_hash_table(HASH_TABLE_INIT_CAP);
            return 0;
        }
        size_t cap = ht->cap << 1;
        printf("new cap:%d
    ",cap);
        size_t *data = (size_t *)malloc(sizeof(size_t)*cap + sizeof(bucket) * cap);
        if(data == NULL){
            return -1;
        }
        size_t *origdata = HASH_DATA_START(ht);
        init_hash_data(data,cap);
        hash_copy_bucket((bucket *)(data+cap),ht->arrData,ht->next);
        ht->cap = cap;
        ht->sizemask = HASH_SIZEMASK(ht);
        ht->arrData = (bucket *)(data+cap);
        hash_rehash(ht);
        free(origdata);
        return 0;
    }
    
    void hash_copy_bucket(bucket* dest,bucket* src,size_t count){
        if(count <= 0){
            return;
        }
        int idx;
        for (idx=0; idx<count;++idx){
            if((src+idx)->h == -1){
                continue;
            }
            *(dest+idx) = *(src+idx);
        }
        return;
    }
    
    void hash_rehash(hashtable *ht){
        int idx,idxused;
        bucket *pb,*pbused;
        for(idx = 0; idx < ht->next; ++idx){
            pb = ht->arrData+idx;
            if(pb->h == -1){
                // find next used bucket to fill the hole
                pbused = NULL;
                idxused = idx+1;
                while(idxused < ht->next){
                    if((ht->arrData+idxused)->h != -1){
                        pbused = ht->arrData+idxused;
                        break;
                    }
                    idxused++; 
                }
                if(pbused == NULL){
                    ht->next = idx;
                    break;
                }
                *pb = *pbused;
                hash_free_bucket(pbused,'0');
            }
            size_t offset = HASH_OFFSET(ht,pb->h);
            size_t * pidx = (size_t*)ht->arrData + offset;
            if(*pidx == -1){
                *pidx = idx;
                continue;
            } 
            pb->next = *pidx;
            *pidx = idx;
        }
    }
    hashtable.c

    4.测试

    #include "hashtable.h"
    #include <stdio.h>
    
    int test_recap(){
        hashtable *ht = new_hash_table(15);
        if(ht->cap != 16){
            printf("16 != ht->cap=%d
    ",ht->cap);
            return -1;
        }
        hash_free(ht);
        ht = new_hash_table(0);
        if(ht->cap != HASH_TABLE_INIT_CAP){
            printf("%d != ht->cap=%d
    ",HASH_TABLE_INIT_CAP,ht->cap);
            return -1;
        }
        hash_free(ht);
        ht = new_hash_table(32);
        if(ht->cap != 32){
            printf("32 != ht->cap=%d
    ",ht->cap);
            return -1;
        }
        
        hash_free(ht);
        ht = new_hash_table(33);
        if(ht->cap != 64){
            printf("64 != ht->cap=%d
    ",ht->cap);
            return -1;
        }
        hash_free(ht);
    
        return 0;
    }
    
    int test_set(hashtable *ht){
        int res = hash_set(ht,"a","aaa");
        if(res < 0){
            printf("hash_set(a) fail
    ");
            return res;
        }
        if(ht->used != 1){
            printf("ht->used:%d != 1
    ",ht->used);
            return -1;
        }
    
        res = hash_set(ht,"ab","ababab");
        if(res < 0){
            printf("hash_set(ab) fail
    ");
            return res;
        }
        if(ht->used != 2){
            printf("ht->used:%d != 2
    ",ht->used);
            return -1;
        }
    
        res = hash_set(ht,"a","aaaaaaaaaa");
        if(res < 0){
            printf("hash_set(abc) fail
    ");
            return res;
        }
        if(ht->used != 2){
            printf("ht->used:%d != 2
    ",ht->used);
            return -1;
        }
    
        res = hash_set(ht,"ccccc","cccccc");
        if(res < 0){
            printf("hash_set fail %d
    ",res);
            return res;
        }
    
        res = hash_set(ht,"ddddddd","ddddddd");
        if(res < 0){
            printf("hash_set fail %d
    ",res);
            return res;
        }    
    
        return 0;
    }
    
    int test_get(hashtable *ht){
        bucket_val * v = hash_get(ht,"a");
        if(v == NULL){
            return -1;
        }
        if(!(v->type & HASH_VAL_TYPE_STR)){
            printf("type:%d != %d
    ",v->type,HASH_VAL_TYPE_STR);
            return -1;
        }
    
        if(strcmp(v->v.str,"aaaaaaaaaa") != 0){
            printf("value:%s != %s
    ",v->v.str,"aaaaaaaaaa");
            return -1;
        }
    
        return 0;
    }
    
    int test_remove(hashtable *ht){
        int res = hash_remove(ht,"a");
        if(res < 0){
            printf("hash_remove(a) fail
    ");
            return res;
        }
        unsigned short e = hash_exists(ht,"a");
        if(e){
            printf("hash_exists(a)=%d
    ",e);
            return -1;
        }
        return 0;
    }
    
    int test_resize(hashtable *ht){
        int i=1,res,origcap = ht->cap;
        res = hash_set(ht,"1111","11111");
        if(res < 0){
            printf("hash_set fail %d
    ",i);
            return res;
        }
    
        res = hash_set(ht,"2222","22222");
        if(res < 0){
            printf("hash_set fail %d
    ",i);
            return res;
        }
    
        res = hash_set(ht,"3333","33333");
        if(res < 0){
            printf("hash_set fail %d
    ",i);
            return res;
        }
    
        res = hash_set(ht,"44444","44444");
        if(res < 0){
            printf("hash_set fail %d
    ",i);
            return res;
        }
    
        res = hash_set(ht,"55555","55555");
        if(res < 0){
            printf("hash_set fail %d
    ",i);
            return res;
        }
    
        if(ht->cap != 2*origcap){
            printf("ht->cap:%d != %d
    ",ht->cap,2*origcap);
            return -1;
        }
    
        return 0;
    }
    
    int main(int argc,char* argv[]){    
        int res;
        res = test_recap();
        if(res == 0){
            printf("PASS hash_recap
    ");
        }
    
        hashtable *ht = new_hash_table(0);
        res = test_set(ht);
        if(res == 0){
            printf("PASS hash_set
    ");
        }
    
        res = test_get(ht);
        if(res == 0){
            printf("PASS hash_get
    ");
        }
        res = test_remove(ht);
        if(res == 0){
            printf("PASS hash_remove
    ");
        }
        res = test_resize(ht);
        if(res == 0){
            printf("PASS hash_resize
    ");
        }
        hash_free(ht);
        return 0;
    }
    test.c

  • 相关阅读:
    250 浅拷贝Object.assign(target, ...sources),深拷贝
    249 递归:概念,利用递归求1~n的阶乘,利用递归求斐波那契数列,利用递归遍历数据
    248 闭包:概念,作用,案例,思考题案例,chrome 中调试闭包
    247 高阶函数 之 函数可以作为参数传递
    246 JavaScript严格模式
    245 改变函数内部 this 指向:call,apply,bind,call、apply、bind 三者的异同
    244 函数内部的this指向:6种
    243 函数:函数的3种定义方式,函数的6种调用方式
    242 Object.defineProperty
    241 获取对象的属性名:Object.keys(对象)
  • 原文地址:https://www.cnblogs.com/ling-diary/p/10676109.html
Copyright © 2011-2022 走看看