zoukankan      html  css  js  c++  java
  • [itint5]字符串匹配

    http://www.itint5.com/oj/#15

    用hash来做,目前为止做到最好也是case16超时(20w的规模),即使分桶也超时。注意计算hashcode时,'a'要算成1,否则如果'a'为0,那么"aa"和"a"是一样的。下面是超时的代码:

    #define BUCKET 65535
    #define ulong long long
    
    vector<unordered_set<ulong> > uset(BUCKET);
    vector<ulong> pow26(11);
    
    ulong hashcode(char *str, int n) {
        ulong code = 0;
        for (int i = 0; i < n; i++) {
            code = code * 26 + str[i] - 'a' + 1;
        }
        return code;
    }
    
    // 预处理初始化
    void initWithString(char *str) {
        int len = 0;
        while (str[len] != '') {
            len++;
        }
        ulong num = 1;
        pow26[0] = 1;
        for (int i = 1; i <= 10; i++) {
            num *= 26;
            pow26[i] = num;
        }
        for (int l = 1; l <= 10; l++) {
            vector<ulong> codes(len);
            for (int i = 0; i < len; i++) {
                if (i + l <= len) {
                    ulong code = 0l;
                    if (i == 0) {
                        code = hashcode(str+i, l);
                        codes[i] = code;
                    } else {
                        ulong diff = pow26[l-1];
                        diff *= (str[i-1] - 'a' + 1);
                        code = (codes[i-1] - diff) * 26 + str[i+l-1] - 'a' + 1;
                        codes[i] = code;
                    }
    				
                    int buck = code % BUCKET;
                    uset[buck].insert(code);
                }
            }
        }
    }
    // 如果query是str的字串,返回true,否则返回false
    bool existSubString(char *query) {
        int len = strlen(query);
        ulong code = hashcode(query, len);
        int buck = code % BUCKET;
        if (uset[buck].find(code) != uset[buck].end()) {
            return true;
        } else {
            return false;
        }
    }
    

    如果只存长度为10的字符串到排序好的vector里,然后用二分来做,是能过的。注意有的源字符串长度就小于10了。其他的备选方法还有trie以及后缀数组。

    vector<string> vec;
    
    // 预处理初始化
    void initWithString(char *str) {
        set<string> sset;
        int len = strlen(str);
        for (int i = 0; i < len; i++) {
    		if (i + 10 >= len) {
    			string sub(str+i);
    			sset.insert(sub);
    		} else {
    			string sub(str+i, str+i+10);
    			sset.insert(sub);
    		}
        }
        
        for (set<string>::iterator it = sset.begin(); it != sset.end(); it++) {
            vec.push_back(*it);
        }
    }
    // 如果query是str的字串,返回true,否则返回false
    bool existSubString(char *query) {
        string str(query);
        int low = 0;
        int high = vec.size()-1;
        
        while (low <= high) {
            int mid = (low + high) / 2;
            bool found = true;
            for (int i = 0; i < str.length(); i++) {
                if (vec[mid][i] < str[i]) {
                    low = mid + 1;
                    found = false;
                    break;
                } else if (vec[mid][i] > str[i]) {
                    high = mid - 1;
                    found = false;
                    break;
                }
            }
            if (found) return true;
        }
        return false;
    }
    

      

  • 相关阅读:
    Nginx 集群 反向代理多个服务器
    Nginx 反向代理
    Nginx 图片服务器
    网鼎杯 pwn 记录
    demo.testfire.net 靶场测试流程记录
    靶场测试系列(已办清单)
    Burp Suite插件推荐
    ida不错的插件记录
    0ctf2017-babyheap
    0ctf2018 pwn
  • 原文地址:https://www.cnblogs.com/lautsie/p/3527386.html
Copyright © 2011-2022 走看看