zoukankan      html  css  js  c++  java
  • Python 2.7的字典实现简化版(C语言)

    这是一个能自动调整大小的哈希字典,外部接口实现了下列功能.

    1.字典级别:

    创建字典 dict_new

    归零字典 dict_clear

    2.键值级别:

    查找 dict_search

    强制查找 dict_force_search

    更新 dict_update

    添加 dict_add

    删除 dict_del

    所谓强制查找就是假如key不存在,那么它将先在字典中添加这个key,值设置为默认值,再返回这个值的指针.

    由于键值都是以空指针定义的,所以在处理一些简单的值类型时(如int),显得繁琐了些(比如valcmp),但好处是更加灵活了,比如稍作修改(valdup和get_default_val)就可以处理值为字符串的情况.

    C确实很快,但繁重的内存管理果然名不虚传.这个简单的字典要求:

    1.键(me_key)和值(me_value)的指针所指向的堆内存区域能够直接用free释放,如果这些区域还包含另一些堆指针,那么可能会出问题.

    2.只需传递缓冲数据(main中的keybuf和valbuf)给键值函数,函数内部会根据情况申请或释放内存,或不做任何处理.

    为方便处理,words文本格式要求每行一个词语.

    /* Pure C simple version of python 2.7.8 hash table */
    /* Sample usage: see main() */
    #include <stdio.h>
    #include <stdlib.h>
    #include <assert.h>
    #include <string.h>
    #define PyDict_MINSIZE 8
    #define PERTURB_SHIFT 5
    #define NEED_RESIZE(mp) ((mp)->ma_fill * 3 >= ((mp)->ma_mask + 1) * 2)
    
    typedef void PyObject;
    
    typedef struct {
        size_t me_hash;
        PyObject *me_key;
        PyObject *me_value;
    } PyDictEntry;
    
    typedef struct _dictobject PyDictObject;
    struct _dictobject {
        size_t ma_fill;  /* # Active + # Dummy */
        size_t ma_used;  /* # Active */
        size_t ma_mask;
        PyDictEntry *ma_table;
        size_t(*ma_keyhash)(PyObject *key);
        int(*ma_keycmp)(PyObject *key1, PyObject *key2);
        PyObject *(*ma_keydup)(PyObject *key);
        PyObject *(*ma_valuedup)(PyObject *value);
        PyObject *(*ma_default)(void);
    };
    
    /* Object used as dummy key to fill deleted entries */
    static PyDictEntry _dummy_struct;
    #define dummy (&_dummy_struct)
    
    static size_t
    keyhash(PyObject *_key)
    {
        char *key = (char *)_key;
        size_t hash = 5381;
        for (; *key; key++)
            hash = ((hash << 5) + hash) + *key; /* hash * 33 + c */
        return hash;
    }
    
    static int
    keycmp(PyObject *_key1, PyObject *_key2)
    {
        char *key1 = (char *)_key1;
        char *key2 = (char *)_key2;
        for (; *key1 == *key2; key1++, key2++)
            if (*key1 == '')
                return 0;
        return *key1 - *key2;
    }
    
    static PyObject *
    keydup(PyObject *key)
    {
        return (PyObject *)strdup((char *)key);
    }
    
    static PyObject *
    valuedup(PyObject *_value)
    {
        size_t *value = (size_t *)malloc(sizeof(size_t));
        *value = *(size_t *)_value;
        return (PyObject *)value;
    }
    
    static PyObject *
    get_default_value(void)
    {
        size_t *value = (size_t *)malloc(sizeof(size_t));
        *value = 0;
        return (PyObject *)value;
    }
    
    PyDictObject *
    dict_new_custom(size_t ma_size,
                    size_t(*ma_keyhash)(PyObject *key),
                    int(*ma_keycmp)(PyObject *key1, PyObject *key2),
                    PyObject * (*ma_keydup)(PyObject *key),
                    PyObject * (*ma_valuedup)(PyObject *value),
                    PyObject * (*ma_default)(void))
    {
        PyDictObject *mp;
        mp = (PyDictObject *)malloc(sizeof(PyDictObject));
        if (mp == NULL)
            return NULL;
        size_t newsize;
        for (newsize = PyDict_MINSIZE;
                newsize < ma_size && newsize > 0;
                newsize <<= 1)
            ;
        PyDictEntry *newtable = (PyDictEntry*)malloc(sizeof(PyDictEntry) * newsize);
        if (newtable == NULL)
            return NULL;
        memset(newtable, 0, sizeof(PyDictEntry)* newsize);
        mp->ma_table = newtable;
        mp->ma_mask = newsize - 1;
        mp->ma_fill = mp->ma_used = 0;
        mp->ma_keyhash = ma_keyhash ? ma_keyhash : keyhash;
        mp->ma_keycmp = ma_keycmp ? ma_keycmp : keycmp;
        mp->ma_keydup = ma_keydup ? ma_keydup : keydup;
        mp->ma_valuedup = ma_valuedup ? ma_valuedup : valuedup;
        mp->ma_default = ma_default ? ma_default : get_default_value;
        return mp;
    }
    
    PyDictObject *
    dict_new(void)
    {
        return dict_new_custom(0, 0, 0, 0, 0, 0);
    }
    
    /*intern basic search method, used by other fucntions*/
    static PyDictEntry *
    lookdict(PyDictObject *mp, PyObject *key, size_t hash)
    {
        size_t i;
        size_t perturb;
        PyDictEntry *freeslot;
        size_t mask = mp->ma_mask;
        PyDictEntry *ep0 = mp->ma_table;
        PyDictEntry *ep;
        i = (size_t)hash & mask;
        ep = &ep0[i];
        if (ep->me_key == NULL || ep->me_key == key)
            return ep;
        if (ep->me_key == dummy)
            freeslot = ep;
        else if (ep->me_hash == hash
                 && mp->ma_keycmp(ep->me_key, key) == 0)
            return ep;
        else
            freeslot = NULL;
        for (perturb = hash;; perturb >>= PERTURB_SHIFT) {
            i = (i << 2) + i + perturb + 1;
            ep = &ep0[i & mask];
            if (ep->me_key == NULL)
                return freeslot == NULL ? ep : freeslot;
            if (ep->me_key == key
                    || (ep->me_hash == hash
                        && ep->me_key != dummy
                        && mp->ma_keycmp(ep->me_key, key) == 0))
                return ep;
            if (ep->me_key == dummy && freeslot == NULL)
                freeslot = ep;
        }
        assert(0);          /* NOT REACHED */
        return 0;
    }
    
    /*faster method used when no dummy key exists in table*/
    static PyDictEntry *
    lookdict_nodummy(PyDictObject *mp, PyObject *key, size_t hash)
    {
        size_t i;
        size_t perturb;
        size_t mask = mp->ma_mask;
        PyDictEntry *ep0 = mp->ma_table;
        PyDictEntry *ep;
        i = (size_t)hash & mask;
        ep = &ep0[i];
        if (ep->me_key == NULL
                || ep->me_key == key
                || (ep->me_hash == hash && mp->ma_keycmp(ep->me_key, key) == 0))
            return ep;
        for (perturb = hash;; perturb >>= PERTURB_SHIFT) {
            i = (i << 2) + i + perturb + 1;
            ep = &ep0[i & mask];
            if (ep->me_key == NULL
                    || ep->me_key == key
                    || (ep->me_hash == hash && mp->ma_keycmp(ep->me_key, key) == 0))
                return ep;
        }
        assert(0);          /* NOT REACHED */
        return 0;
    }
    
    /*intern fast function to insert item when no dummy key exists in table*/
    static void
    insertdict_clean(PyDictObject *mp, PyObject *key, size_t hash, PyObject *value)
    {
        size_t i;
        size_t perturb;
        size_t mask = mp->ma_mask;
        PyDictEntry *ep0 = mp->ma_table;
        PyDictEntry *ep;
        i = (size_t)hash & mask;
        ep = &ep0[i];
        for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
            i = (i << 2) + i + perturb + 1;
            ep = &ep0[i & mask];
        }
        mp->ma_fill++;
        mp->ma_used++;
        ep->me_key = key;
        ep->me_hash = hash;
        ep->me_value = value;
    }
    
    /*
    Restructure the table by allocating a new table and reinserting all
    items again.  When entries have been deleted, the new table may
    actually be smaller than the old one.
    */
    static int
    dict_resize(PyDictObject *mp, size_t minused)
    {
        size_t newsize;
        PyDictEntry *oldtable, *newtable, *ep;
        oldtable = mp->ma_table;
        /* Find the smallest table size > minused. */
        for (newsize = PyDict_MINSIZE;
                newsize <= minused && newsize > 0;
                newsize <<= 1)
            ;
        /* Get space for a new table. */
        newtable = (PyDictEntry*)malloc(sizeof(PyDictEntry) * newsize);
        if (newtable == NULL)
            return -1;
        memset(newtable, 0, sizeof(PyDictEntry)* newsize);
        mp->ma_table = newtable;
        mp->ma_mask = newsize - 1;
        size_t used = mp->ma_used;
        mp->ma_used = 0;
        mp->ma_fill = 0;
        for (ep = oldtable; used > 0; ep++) {
            /* only active entry */
            if (ep->me_value != NULL) {
                used--;
                insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value);
            }
        }
        free(oldtable);
        return 0;
    }
    
    PyObject *
    dict_search(PyDictObject *mp, PyObject *key)
    {
        assert(key);
        size_t hash = mp->ma_keyhash(key);
        PyDictEntry *ep = lookdict(mp, key, hash);
        return ep->me_value;
    }
    
    int
    dict_contain(PyDictObject *mp, PyObject *key)
    {
        return dict_search(mp, key) ? 1 : 0;
    }
    
    int
    dict_add(PyDictObject *mp, PyObject *key, PyObject *value)
    {
        assert(key);
        assert(value);
        size_t hash = mp->ma_keyhash(key);
        PyDictEntry *ep = lookdict(mp, key, hash);
        /*only for non-existing keys*/
        assert(ep->me_value == NULL);
        PyObject *old_key = ep->me_key;
        if ((ep->me_key = mp->ma_keydup(key)) == NULL)
            return -1;
        if ((ep->me_value = mp->ma_valuedup(value)) == NULL) {
            free(ep->me_key);
            return -1;
        }
        if (old_key == NULL)
            mp->ma_fill++;
        mp->ma_used++;
        ep->me_hash = hash;
        if (NEED_RESIZE(mp))
            return dict_resize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
        return 0;
    }
    
    int
    dict_update(PyDictObject *mp, PyObject *key, PyObject *value)
    {
        assert(key);
        assert(value);
        size_t hash = mp->ma_keyhash(key);
        PyDictEntry *ep = lookdict(mp, key, hash);
        /*only for existing keys*/
        assert(ep->me_value != NULL);
        PyObject *old_value = ep->me_value;
        if ((ep->me_value = mp->ma_valuedup(value)) == NULL)
            return -1;
        free(old_value);
        return 0;
    }
    
    int
    dict_del(PyDictObject *mp, PyObject *key)
    {
        assert(key);
        size_t hash = mp->ma_keyhash(key);
        PyDictEntry *ep = lookdict(mp, key, hash);
        /*only for existing keys*/
        assert(ep->me_value != NULL);
        free(ep->me_key);
        free(ep->me_value);
        ep->me_key = dummy;
        ep->me_value = NULL;
        mp->ma_used--;
        return 0;
    }
    
    PyObject *
    dict_force_search(PyDictObject *mp, PyObject *key)
    {
        assert(key);
        size_t hash = mp->ma_keyhash(key);
        PyDictEntry *ep = lookdict(mp, key, hash);
        if (ep->me_value == NULL) {
            PyObject *old_key = ep->me_key;
            if ((ep->me_key = mp->ma_keydup(key)) == NULL)
                return NULL;
            if ((ep->me_value = mp->ma_default()) == NULL) {
                free(ep->me_key);
                return NULL;
            }
            if (old_key == NULL)
                mp->ma_fill++;
            mp->ma_used++;
            ep->me_hash = hash;
            if (NEED_RESIZE(mp)) {
                dict_resize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
                ep = lookdict_nodummy(mp, key, hash);
            }
        }
        return ep->me_value;
    }
    
    void
    dict_clear(PyDictObject *mp)
    {
        PyDictEntry *table = mp->ma_table;
        assert(table != NULL);
        size_t used = mp->ma_used;
        if (mp->ma_fill == 0)
            return;
        PyDictEntry *ep;
        for (ep = table; used > 0; ep++) {
            /*only free active entry, this is different from Python 2.7*/
            if (ep->me_value != NULL) {
                used--;
                free(ep->me_key);
                free(ep->me_value);
            }
        }
        memset(table, 0, sizeof(PyDictEntry) * (mp->ma_mask + 1));
    }
    
    size_t
    dict_len(PyDictObject *mp)
    {
        return mp->ma_used;
    }
    
    /*helper function for sorting a PyDictEntry by its value*/
    static int
    _valcmp(const void *a, const void *b)
    {
        return *(size_t *)(*(PyDictEntry *)a).me_value > *(size_t *)(*
                (PyDictEntry *)b).me_value ? -1 : 1;
    }
    
    /*print key value pair by value DESC order*/
    static void
    print_all_by_value_desc(PyDictObject *mp)
    {
        PyDictEntry *ep;
        PyDictEntry *temp_table = (PyDictEntry *)malloc(sizeof(PyDictEntry) *
                                  (mp->ma_used));
        size_t i = 0, used = mp->ma_used;
        for (ep = mp->ma_table; used > 0; ep++) {
            if (ep->me_value != NULL) {
                used--;
                temp_table[i++] = *ep;
            }
        }
        used = mp->ma_used;
        qsort(temp_table, used, sizeof(temp_table[0]), _valcmp);
        for (i = 0; i < used; i++)
            fprintf(stdout, "%s	%d
    ", (char *)temp_table[i].me_key,
                    *(size_t *)temp_table[i].me_value);
        free(temp_table);
    }
    
    void printd(PyDictObject *mp)
    {
        PyDictEntry *ep;
        size_t used = mp->ma_used;
        for (ep = mp->ma_table; used > 0; ep++) {
            if (ep->me_value) {
                used--;
                fprintf(stdout, "%s	%d	%u
    ", (char *)ep->me_key, *(size_t *)ep->me_value,
                        ep->me_hash);
            } else if (ep->me_key == dummy) {
                fprintf(stdout, "it is a dummy key! it's hash is %u
    ", ep->me_hash);
            }
        }
    }
    
    /*scan words from stdin, print total amount for each word by DESC order*/
    int main(void)
    {
        //PyDictObject *mp = dict_new_custom(32, 0, 0, 0, 0, 0);
        PyDictObject *mp = dict_new();
        FILE *fp;
        fp = fopen("words", "r");
        char keybuf[100];
        size_t valuebuf[] = { 1 };
        size_t *vp;
        /*    while (fscanf(stdin, "%s", keybuf) == 1) {
        if (dict_contain(mp, keybuf)) {
        vp = dict_search(mp, keybuf);
        *vp += 1;
        } else
        dict_add(mp, keybuf, valuebuf);
        }*/
        while (fscanf(fp, "%s", keybuf) == 1) {
            vp = dict_force_search(mp, keybuf);
            *vp += 1;
        }
    
        print_all_by_value_desc(mp);
        //printd(mp);
        dict_clear(mp);
        fclose(fp);
        free(mp);
        return 0;
    }
  • 相关阅读:
    Linux学习笔记(二):实战-根据微服务端口号关闭进程
    Linux学习笔记(一):文件操作命令
    算法-Java组合
    Springboot学习笔记(六)-配置化注入
    Springboot学习笔记(五)-条件化注入
    Springboot学习笔记(四)-配置相关
    Springboot学习笔记(三)-常用注入组件方式
    CentOS7安装MySQL
    扫二维码下载apk并统计被扫描次数(及微信屏蔽下载解决方案)
    Java jacob调用打印机打印word文档
  • 原文地址:https://www.cnblogs.com/xiangnan/p/3859578.html
Copyright © 2011-2022 走看看