zoukankan      html  css  js  c++  java
  • 字符串模式匹配算法系列(三):Trie树及AC改进算法

    Trie树的python实现(leetcode 208)

      1 #!/usr/bin/env python
      2 #-*- coding: utf-8 -*-
      3 import sys
      4 import pdb
      5 
      6 reload(sys)
      7 sys.setdefaultencoding('utf-8')
      8 
      9 
     10 class TrieNode(object):
     11     """Trie节点
     12 
     13     Attributes:
     14         _val: 本节点的值(非None即作为结束判断条件)
     15         _next: 后继节点
     16     """
     17     def __init__(self, value=None):
     18         self._val = value
     19         self._next = {}
     20 
     21     def set_value(self, value=None):
     22         """为当前节点设置值
     23         """
     24         self._val = value
     25 
     26     def get_value(self):
     27         """获取当前节点的值
     28         """
     29         return self._val
     30 
     31     def set_next(self, key, value=None):
     32         """为当前节点添加一个后继节点
     33         """
     34         if key not in self._next:
     35             self._next[key] = TrieNode(value)
     36         return self._next[key]
     37 
     38     def get_next(self, key):
     39         """从当前节点获取指定的后继节点
     40         """
     41         if key not in self._next:
     42             return None
     43         return self._next[key]
     44 
     45 
     46 class Trie(object):
     47     """Trie树
     48     Attribures:
     49         _root: 根节点
     50     """
     51     def __init__(self):
     52         # 生成root节点
     53         self._root = TrieNode()
     54 
     55     def insert(self, word):
     56         """将一个单词插入trie树
     57         """
     58         curr = self._root
     59 
     60         for char in word:
     61             curr = curr.set_next(char)
     62         curr.set_value(True)
     63 
     64     def search(self, word):
     65         """检索一个单词是否trie树中存在
     66         """
     67         curr = self._root
     68         ret = False
     69 
     70         for i, c in enumerate(word):
     71             curr = curr.get_next(c)
     72             if curr is None:
     73                 break
     74             if i + 1 == len(word) and curr.get_value() is True:
     75                 ret = True
     76                 break
     77         return ret
     78 
     79     def startsWith(self, prefix):
     80         """检索trie树中是否有prefix开头的单词
     81         """
     82         curr = self._root
     83         ret = True
     84 
     85         for c in prefix:
     86             curr = curr.get_next(c)
     87             if curr is None:
     88                 ret = False
     89                 break
     90         return ret
     91 
     92 
     93 def main():
     94     trie = Trie()
     95     trie.insert("app")
     96     trie.insert("apple")
     97     print trie.search("app")
     98 
     99 
    100 if __name__ == '__main__':
    101     main()

    AC改进算法python实现

      1 #!/usr/bin/env python
      2 #-*- coding: utf-8 -*-
      3 import sys
      4 import pdb
      5 
      6 reload(sys)
      7 sys.setdefaultencoding('utf-8')
      8 
      9 
     10 class ACTrieNode(object):
     11     """ACTrie节点
     12 
     13     Attributes:
     14         val: 本节点的值(非None即作为结束判断条件)
     15         children: 孩子节点
     16         fail: 失配跳转指针
     17     """
     18     def __init__(self, value=None):
     19         self.val = value
     20         self.children = {}
     21         self.fail = None
     22 
     23     def get_next(self, key):
     24         """从本节点开始,找到children中包含key的节点,如果找不到就返回根节点
     25         """
     26         if key in self.children.keys():
     27             return self.children[key]
     28         if self.fail is None:
     29             # fail为None就是根节点
     30             return self
     31         return self.fail.get_next(key)
     32 
     33 
     34 class ACTrie(object):
     35     """ACTrie树
     36 
     37     Attribures:
     38         _root: 根节点
     39     """
     40     def __init__(self):
     41         self._root = ACTrieNode() # 生成root节点
     42 
     43     def insert(self, word):
     44         """将一个单词插入trie树
     45         """
     46         curr = self._root
     47         for char in word:
     48             if char not in curr.children:
     49                 curr.children[char] = ACTrieNode()
     50             curr = curr.children[char]
     51         curr.val = word
     52 
     53     def update_failure(self):
     54         """更新failure跳转
     55         """
     56         bfs_queue = [self._root] # 利用list作为bfs缓存队列
     57 
     58         while len(bfs_queue) > 0:
     59             father = bfs_queue.pop(0) # 取出队列头部元素
     60 
     61             # BFS遍历父节点的所有子节点,为他们设置failure
     62             for key, child in father.children.items():
     63                 bfs_queue.append(child) # 将当前元素放入队列尾部
     64 
     65                 if father == self._root:
     66                     # 当前父节点是root时,其子节点的failure也指向root
     67                     child.fail = self._root
     68                 else:
     69                     # 当前父节点不是root时,其子节点的failure尝试指向"(迭代)父节点的failure的同名子节点"
     70                     child.fail = father.fail.get_next(key)
     71 
     72     def search(self, text):
     73         """从源字符串中寻找目标字符串
     74         """
     75         match_set = set()
     76         curr = self._root
     77 
     78         for char in text:
     79             curr = curr.get_next(char)
     80             # 搜集匹配上的单词
     81             tmp_node = curr
     82             while tmp_node:
     83                 if tmp_node.val:
     84                     match_set.add(tmp_node.val)
     85                 tmp_node = tmp_node.fail
     86         return match_set
     87 
     88 
     89 def main():
     90     trie = ACTrie()
     91     trie.insert("abcd")
     92     trie.insert("ab")
     93     trie.insert("bc")
     94     trie.insert("cf")
     95     trie.insert("cde")
     96 
     97     trie.update_failure()
     98 
     99     text = 'abcdefg'
    100     ret = trie.search(text)
    101     print ret
    102 
    103 if __name__ == '__main__':
    104     main()
  • 相关阅读:
    Hive优化
    RDD
    从Hadoop MapReduce到Spark
    Spark on yarn模式
    Hive的web端配置——HWI
    Spark环境搭建
    java身份证号校验
    java手机号码、电子邮箱校验
    服务器运维的日常维护工作
    JavaSSM框架简介
  • 原文地址:https://www.cnblogs.com/zhongmiaozhimen/p/11258165.html
Copyright © 2011-2022 走看看