zoukankan      html  css  js  c++  java
  • 基于trie树做一个ac自动机

    基于trie树做一个ac自动机


    #!/usr/bin/python
    # -*- coding: utf-8 -*-
    
    
    
    class Node:
        def __init__(self):
            self.value = None
            self.children = {}  # children is of type {char, Node}
            self.fre = 0
            self.father = None
            self.fail = None
    
    
    def CMP(a, b):
        return b.fre - a.fre
    
    
    class Trie:
        def __init__(self):
            self.root = Node()
            self.choose = []
            self.__OpenCorrect__ = 0
    
        def insert(self, key):  # key is of type string
            # key should be a low-case string, this must be checked here!
            node = self.root
            for char in key:
                if char not in node.children:
                    child = Node()
                    node.children[char] = child
                    child.value = char
                    child.father = node
                    node = child
                else:
                    node = node.children[char]
            # node.value = key
            node.fre += 1
    
    
        def find_node(self, string):
            res_node = self.root
            try:
                for i in string:
                    res_node = res_node.children[i]
            except:
                res_node = None
            return res_node
    
        def buildac3(self):
            queuelist = []
            queuelist.append(self.root)
            while len(queuelist) > 0:
                temp = queuelist.pop()
                p = None
                for k, v in temp.children.items():
                    if temp == self.root:
                        temp.children[k].fail = self.root
                    else:
                        p = temp.fail
                        while p is not None:
                            if p.children.has_key(k):
                                temp.children[k].fail = p.children[k]
                                break
                            p = p.fail
                        if p is None:
                            temp.children[k].fail = self.root
                    queuelist.append(temp.children[k])
    
        def acfind(self, content):
            count = 0
            content2 = content
            while len(content2) > 1:
                p = self.root
                result = []
                startWordIndex = 0
                endWordIndex = -1
                currentPosition = 0
                while currentPosition < len(content2):
                    word = content2[currentPosition]
                    while p.children.has_key(word) == False and p != self.root:
                        p = p.fail
                    if p.children.has_key(word):
                        if p == self.root:
                            startWordIndex = currentPosition
                        p = p.children[word]
                    else:
                        p = self.root
                    if p.fre > 0 and currentPosition - startWordIndex < len(content) - 1:
                        result.append((startWordIndex + count, currentPosition + count))
                    currentPosition += 1
                for i in result:
                    print content[i[0]:i[1] + 1]
                print result
                count += 1
                content2 = content2[1:]
    
    if __name__ == '__main__':
        trie = Trie()
        trie.__OpenCorrect__ = 1
        trie.insert("she")
        trie.insert("he")
        trie.insert("her")
        trie.insert("hers")
        trie.buildac3()
        # print trie.find_node('sw')
        # print trie.root.children['s'].children['h'].fail.value
        print trie.acfind('shers')
    
    
  • 相关阅读:
    js 复杂研究
    js 页面 保持状态 的方法
    C# 向上取整数
    js 获取dom 为null 测试
    net core 下 接受文件 测试
    layui 源码解读(部分)
    js 定时器
    js addEventListener
    C# 获得对象的命名空间 ?.
    修改maven的默认jdk版本
  • 原文地址:https://www.cnblogs.com/chuxiuhong/p/5939830.html
Copyright © 2011-2022 走看看