Trie树的python实现(leetcode 208)
1 #!/usr/bin/env python 2 #-*- coding: utf-8 -*- 3 import sys 4 import pdb 5 6 reload(sys) 7 sys.setdefaultencoding('utf-8') 8 9 10 class TrieNode(object): 11 """Trie节点 12 13 Attributes: 14 _val: 本节点的值(非None即作为结束判断条件) 15 _next: 后继节点 16 """ 17 def __init__(self, value=None): 18 self._val = value 19 self._next = {} 20 21 def set_value(self, value=None): 22 """为当前节点设置值 23 """ 24 self._val = value 25 26 def get_value(self): 27 """获取当前节点的值 28 """ 29 return self._val 30 31 def set_next(self, key, value=None): 32 """为当前节点添加一个后继节点 33 """ 34 if key not in self._next: 35 self._next[key] = TrieNode(value) 36 return self._next[key] 37 38 def get_next(self, key): 39 """从当前节点获取指定的后继节点 40 """ 41 if key not in self._next: 42 return None 43 return self._next[key] 44 45 46 class Trie(object): 47 """Trie树 48 Attribures: 49 _root: 根节点 50 """ 51 def __init__(self): 52 # 生成root节点 53 self._root = TrieNode() 54 55 def insert(self, word): 56 """将一个单词插入trie树 57 """ 58 curr = self._root 59 60 for char in word: 61 curr = curr.set_next(char) 62 curr.set_value(True) 63 64 def search(self, word): 65 """检索一个单词是否trie树中存在 66 """ 67 curr = self._root 68 ret = False 69 70 for i, c in enumerate(word): 71 curr = curr.get_next(c) 72 if curr is None: 73 break 74 if i + 1 == len(word) and curr.get_value() is True: 75 ret = True 76 break 77 return ret 78 79 def startsWith(self, prefix): 80 """检索trie树中是否有prefix开头的单词 81 """ 82 curr = self._root 83 ret = True 84 85 for c in prefix: 86 curr = curr.get_next(c) 87 if curr is None: 88 ret = False 89 break 90 return ret 91 92 93 def main(): 94 trie = Trie() 95 trie.insert("app") 96 trie.insert("apple") 97 print trie.search("app") 98 99 100 if __name__ == '__main__': 101 main()
AC改进算法python实现
1 #!/usr/bin/env python 2 #-*- coding: utf-8 -*- 3 import sys 4 import pdb 5 6 reload(sys) 7 sys.setdefaultencoding('utf-8') 8 9 10 class ACTrieNode(object): 11 """ACTrie节点 12 13 Attributes: 14 val: 本节点的值(非None即作为结束判断条件) 15 children: 孩子节点 16 fail: 失配跳转指针 17 """ 18 def __init__(self, value=None): 19 self.val = value 20 self.children = {} 21 self.fail = None 22 23 def get_next(self, key): 24 """从本节点开始,找到children中包含key的节点,如果找不到就返回根节点 25 """ 26 if key in self.children.keys(): 27 return self.children[key] 28 if self.fail is None: 29 # fail为None就是根节点 30 return self 31 return self.fail.get_next(key) 32 33 34 class ACTrie(object): 35 """ACTrie树 36 37 Attribures: 38 _root: 根节点 39 """ 40 def __init__(self): 41 self._root = ACTrieNode() # 生成root节点 42 43 def insert(self, word): 44 """将一个单词插入trie树 45 """ 46 curr = self._root 47 for char in word: 48 if char not in curr.children: 49 curr.children[char] = ACTrieNode() 50 curr = curr.children[char] 51 curr.val = word 52 53 def update_failure(self): 54 """更新failure跳转 55 """ 56 bfs_queue = [self._root] # 利用list作为bfs缓存队列 57 58 while len(bfs_queue) > 0: 59 father = bfs_queue.pop(0) # 取出队列头部元素 60 61 # BFS遍历父节点的所有子节点,为他们设置failure 62 for key, child in father.children.items(): 63 bfs_queue.append(child) # 将当前元素放入队列尾部 64 65 if father == self._root: 66 # 当前父节点是root时,其子节点的failure也指向root 67 child.fail = self._root 68 else: 69 # 当前父节点不是root时,其子节点的failure尝试指向"(迭代)父节点的failure的同名子节点" 70 child.fail = father.fail.get_next(key) 71 72 def search(self, text): 73 """从源字符串中寻找目标字符串 74 """ 75 match_set = set() 76 curr = self._root 77 78 for char in text: 79 curr = curr.get_next(char) 80 # 搜集匹配上的单词 81 tmp_node = curr 82 while tmp_node: 83 if tmp_node.val: 84 match_set.add(tmp_node.val) 85 tmp_node = tmp_node.fail 86 return match_set 87 88 89 def main(): 90 trie = ACTrie() 91 trie.insert("abcd") 92 trie.insert("ab") 93 trie.insert("bc") 94 trie.insert("cf") 95 trie.insert("cde") 96 97 trie.update_failure() 98 99 text = 'abcdefg' 100 ret = trie.search(text) 101 print ret 102 103 if __name__ == '__main__': 104 main()