Trie树 不解析, 本园很多博文有提到。
直接上代码:
#coding:utf-8 ''' create on 2013-07-30 @author :HuangYanQiang ''' LETTER_NUM=27;#组成单词的字母个数,26个字母+'-' #Trie 结构体 class Node: def __init__(self, is_word=False): global LETTER_NUM; self.is_word = is_word;#是不是单词结束节点 self.prefix_count = 0;#这个前缀的单词个数 self.children = [None for child in range(LETTER_NUM)]; #Trie 结构体 class Trie: def __init__(self): self.head = Node(); ###插入新单词 def insert(self, word): current = self.head; count = 0 ; for letter in word: if (letter == '-'): int_letter=LETTER_NUM-1; else: int_letter = ord(letter)-ord('a'); if(current.children[int_letter] is None): current.children[int_letter] = Node(); current = current.children[int_letter]; count += 1; current.prefix_count = count; else: current = current.children[int_letter]; current.prefix_count += 1; current.is_word = True; ###查询单词是否存在 def search(self, word): current = self.head; int_letter = 0; for letter in word: if (letter == '-'): int_letter=LETTER_NUM-1; else: int_letter = ord(letter)-ord('a'); if (current.children[int_letter] is None): #print "int_letter = " + str(int_letter); return False; else: current = current.children[int_letter]; return current.is_word; ###根据字母前缀输出所有的单词 def output(self,strPrefix): if(strPrefix is None or strPrefix == ""): print ("please tell me prefix letter."); currentNode = self.head; int_letter = 0; for letter in strPrefix: if (letter == '-'): int_letter=LETTER_NUM-1; else: int_letter = ord(letter)-ord('a'); currentNode = currentNode.children[int_letter]; if(currentNode is not None): if(currentNode.is_word): print (strPrefix+"; "); else: return; for i in range(LETTER_NUM): if(currentNode.children[i] is not None): self.output(strPrefix+chr(i+ord('a'))); ################# ###读取单词列表文本构造Trie结构 class BuildTrie: def __init__(self): self.trie = Trie(); for line in file("EnglishDict.txt"): line = line.lower();#全部换成小写 line = line.replace(' ','').replace(' ','');#去掉结束符 isword = True; int_letter = 0; str_letter="abcdefghijklmnopqrstuvwxyz-ABCDEFGHIJKLMNOPQRSTUVWXYZ" for letter in line: if(letter not in str_letter ): isword = False; break; if(isword == False): print (line + ", it is not a word"); continue; else: self.trie.insert(line); if __name__=="__main__": import doctest doctest.testmod(); # t = Trie(); # t.insert("apple"); # t.insert("abc"); # t.insert("abandon"); # t.insert("bride"); # t.insert("bridegroom"); # t.insert("good"); # t.output("b"); bt = BuildTrie(); t = bt.trie t.output("z"); print t.search("apple"); print t.search("fff"); print t.search("good"); print("a num:"+str(t.head.children[0].prefix_count)); print("ab num:"+str(t.head.children[0].children[1].prefix_count)); print("b num:"+str(t.head.children[1].prefix_count));