zoukankan      html  css  js  c++  java
  • 支持快速查询的字符串数组

      大致思路就是从string中得到一个hash码,然后比较hash码。

      从Python的string源码中的发现的;线性搜索;时间复杂度O(n)吧大概。

      这是Python的部分源码

    static long string_hash(PyStringObject *a) {
        register int len;
        register unsigned char *p;
        register long x;
    
        if (a->ob_shash != -1) {
            return a->ob_shash;
        }
        len = a->ob_size;
        p = (unsigned char *)a->ob_sval;
        x = *p << 7;
        while (--len >= 0) x = (1000003*x) ^ *p++;
        x ^= a->ob_size;
        if (x == -1) x = -2;
        a->ob_shash = x;
        return x;
    }

      hash码多次打乱可以理解,但是具体为什么就不知道了。如果有dalao求告知|・ω・`) 

      下面是我的代码,仅做记录用。

      

    #include <stdio.h>
    #include <string.h>
    #include <stdlib.h>
    
    typedef struct FastStrings {
        int len;    // max len of a string
        int num;    // max number of strings
        int cur_num;
        char *string;
        long *hash;
    }* fstrs;
    
    int fstrs_init(fstrs *ff) {
        *ff = (fstrs)malloc(sizeof(FastStrings));
        if (!*ff) return -1;
        fstrs f = *ff;
        f->cur_num = 0;
        f->len = 100;    // contains ''
        f->num = 10;
        f->string = (char *)malloc(f->len*f->num*sizeof(char));
        f->hash = (long *)malloc(f->num*sizeof(long));
        if (!f->string || !f->hash) return -2;
        memset(f->string, 0, f->len*f->num*sizeof(char));
        memset(f->hash, 0, f->num*sizeof(long));
        return 0;
    }
    long _get_hash(char *str) {
        long x = 1, len = strlen(str), i = 0;
        while (--len >= 0) x = (1000003*x) ^ *str++;
        x ^= i;
        return x;
    }
    int fstrs_insert(fstrs f, char *str) {
        long len = strlen(str), i;
        if (f->cur_num >= f->num || len >= f->len) {
            return -1;
        }
        f->hash[f->cur_num] = _get_hash(str);
        memset(f->string+f->cur_num*f->len, 0, f->len);
        for (i = 0; i < len; i++) {
            f->string[f->cur_num*f->len+i] = *str++;
        }
        f->cur_num++;
        return 0;
    }
    int fstrs_find(fstrs f, char *str, int begin) {
        long hash = _get_hash(str), i;
        for (i = begin; i < f->cur_num; i++) {
            if (f->hash[i] == hash && strcmp(f->string+f->cur_num*f->len, str))
                return i;
        }
        return -1;
    }
    int main() {
        int res;
        fstrs f = NULL;
        res = fstrs_init(&f);
        if (res < 0) {printf("f error:%d
    ", res);return -1;}
        res = fstrs_insert(f, "asfsa");
        res = fstrs_insert(f, "aassf");
        res = fstrs_insert(f, "assdff");
        res = fstrs_insert(f, "asf");
        res = fstrs_find(f, "asf", 0);
        printf("res = %d
    ", res);
        return 0;
    }
    View Code

      有时间再和字典树方法做一下比较。

  • 相关阅读:
    ehcache memcache redis 三大缓存男高音
    tomcat启用压缩的方式
    Linux rpm 命令参数使用详解[介绍和应用]
    rpm常用命令及rpm参数介绍
    RPM 命令大全
    BZOJ2298: [HAOI2011]problem a(带权区间覆盖DP)
    BZOJ2037: [Sdoi2008]Sue的小球(区间DP)
    HDU3507 Print Article(斜率优化DP)
    线性代数学习笔记(几何版)
    HDU 2065 "红色病毒"问题(生成函数)
  • 原文地址:https://www.cnblogs.com/backinfile/p/8698930.html
Copyright © 2011-2022 走看看