zoukankan      html  css  js  c++  java
  • trie树查找和hash查找比较(大量数据)

    trie树代码

    #include<iostream>
    #include<stdio.h>
    #include<iostream>
    #include<string>
    #include<stdlib.h>
    #include<fstream>
    #include<sstream>
    #include<vector>
    #include<string>
    #include<time.h>
    using namespace std;
    class trienode
    {
    public:
        char *word;
        int count;
        trienode *branch[26];
    public:
        trienode()
        {
            word = NULL;
            count = 0;//词频
            memset(branch, NULL, sizeof(trienode*) * 26);
        }
    };
    class trie
    {
    public:
        trienode *root;
    public:
        trie();
        ~trie();
        void Insert(char *str);
        bool Search(char*str, int &count);//索引
        void printall(trienode *root);//字符排序
        void printpre(char *str);//前缀匹配
    };
    trie::trie()
    {
        root = new trienode();
    }
    trie::~trie() {}
    void trie::Insert(char *str)
    {
        int index;
        trienode *tt = root;
        for (int i = 0; str[i]; i++)
        {
            index = str[i] - 'a';
            if (index < 0 || index>26)
            {
                return;
            }
            if (tt->branch[index] == NULL)
            {
                tt->branch[index] = new trienode();
            }
            tt = tt->branch[index];
        }
        if (tt->word)
        {
            tt->count++;
            return;
        }
        else
        {
            tt->count++;
            tt->word = new char[strlen(str) + 1];
            strcpy_s(tt->word, strlen(str) + 1, str);
        }
    
    }
    bool trie::Search(char *str, int &count)
    {
        int index = -1;
        trienode *tt = root;
        while (tt&&*str)
        {
            index = *str - 'a';
            if (index < 0 || index>26) return false;
            tt = tt->branch[index];
            str++;
        }
        if (tt&&tt->word)
        {
            count = tt->count;
            return true;
        }
        return false;
    }
    void trie::printall(trienode *root)
    {
        trienode *t = root;
        if (!t) return;
        if (t->word)
        {
            cout << t->word << endl;
        }
        for (int i = 0; i < 26; i++)
        {
            printall(t->branch[i]);
        }
    
    }
    void trie::printpre(char *str)
    {
        trienode *t = root;
        int index = -1;
        while (t&&*str)
        {
            index = *str - 'a';
            if (index < 0 || index>26) return;
            t = t->branch[index];
            str++;
        }
        if (t)
        {
            printall(t);
        }
    }
    int main()
    {
        clock_t startTime, endTime;
        startTime = clock();
        trie *t = new trie();
        ifstream it("C:/Users/ww/Desktop/string.txt");
        string sline;
        string str = "";
        while (it&&getline(it, sline))
        {
            str += sline + " ";
        }
        it.close();
        for (int i = 0; i < str.length(); i++)
        {
            if (str[i] == '.' || str[i] == ',' || str[i] == '(' || str[i] == '(')
            {
                str.erase(i, 1);
            }
        }
        string word;
        stringstream ss(str);
        vector<string> vec;
        while (ss >> word)
        {
            vec.push_back(word);
        }
        vector<string>::iterator iter;
        for (iter = vec.begin(); iter != vec.end(); iter++)
        {
            t->Insert((char*)(*iter).data());
        }
        int val = -1;
        if (t->Search("the", val))
        {
            cout << val << endl;
        }
        else
        {
            cout << "empty" << endl;
        }
        endTime = clock();
        cout << "the running time is " << (double)(endTime - startTime) << endl;
        return 0;
    }

    hash代码

    #include<iostream>
    #include<fstream>
    #include<sstream>
    #include<string>
    #include<vector>
    #include<stdlib.h>
    #include<time.h>
    using namespace std;
    class hashnode
    {
    public:
        char *p;
        hashnode *next;
    };
    class hashmap
    {
    public:
        hashnode *hashps[1000];
    public:
        hashmap();
        ~hashmap();
        int String2Int(char *p);
        void Insert(char *p);
        bool Find(char *p);
    };
    hashmap::hashmap()
    {
        for (int i = 0; i < 1000; i++)
        {
            hashps[i] = new hashnode();
        }
        for (int i = 0; i < 1000; i++)
        {
            hashps[i]->next = NULL;
        }
    }
    hashmap::~hashmap() {}
    int hashmap::String2Int(char *p)
    {
        int num = 0;
        while (*p)
        {
            num += *p;
            p++;
        }
        return num % 1000;
    }
    void hashmap::Insert(char *p)
    {
        int index = String2Int(p);
        hashnode *hash = hashps[index];
        hashnode *newr = new hashnode();
        newr->p = new char[strlen(p) + 1];
        strcpy_s(newr->p, strlen(p) + 1, p);
        newr->next = hash->next;
        hash->next = newr;
    }
    bool hashmap::Find(char *p)
    {
        int index = String2Int(p);
        hashnode *t = hashps[index]->next;
        if (!t)
        {
            return false;
        }
        else
        {
            hashnode *w = t;
            while (w)
            {
                if (strcmp(p, w->p)==0)
                {
                    return true;
                }
                w = w->next;
            }
        }
    }
    int re(int *p)
    {
        return *p;
    }
    int main()
    {
        clock_t startTime, endTime;
        startTime = clock();
        hashmap *t = new hashmap();
        ifstream it("C:/Users/ww/Desktop/string.txt");
        string sline;
        string str = "";
        while (it&&getline(it, sline))
        {
            str += sline + " ";
        }
        it.close();
        for (int i = 0; i < str.length(); i++)
        {
            if (str[i] == '.' || str[i] == ',' || str[i] == '(' || str[i] == '(')
            {
                str.erase(i, 1);
            }
        }
        stringstream ss(str);
        string word;
        vector<string> vec;
        while (ss >> word)
        {
            vec.push_back(word);
        }
        vector<string>::iterator iter;
        for (iter = vec.begin(); iter != vec.end(); iter++)
        {
            t->Insert((char*)(*iter).data());
        }
        cout << "the result is: " << t->Find("the") << endl;
        endTime = clock();
        cout << "the running time is " << (double)(endTime - startTime) << endl;
        return 0;
    }

    trie树查找时间是O(L)L是字符串长度,而hash是O(LL),LL是关键字对应哈希地址链表长度,都和数据的大小无关,查找都很高效

  • 相关阅读:
    idea websitehttp://www.youyur.com/
    chromium project相关页面
    WebKit Remote Debugging
    天兰尾货
    GitCookbook from google chromium
    ocr识别
    Google发布Chrome官方扩展DOM Snitch 可发现网页代码漏洞
    WebKit2 High Level Document ¶
    Phantom JS: an alternative to Selenium
    Python Extension
  • 原文地址:https://www.cnblogs.com/semen/p/7196575.html
Copyright © 2011-2022 走看看