zoukankan      html  css  js  c++  java
  • sss

     #!/usr/bin/env python
     2 # -*-coding: UTF-8-*-
     3 
     4 #构建节点
     5 class Node:
     6   def __init__(self):
     7     self.map = {}
     8   def contain(self,key):
     9     return self.map.__contains__(key)
    10   def __getitem__(self,key):
    11     return self.map[key]
    12   def __setitem__(self,key,value):
    13     self.map[key]=value
    14     
    15 class Item:
    16   def __init__(self,key):
    17     self.key=key
    18     self.subNum=0
    19     self.subNode=Node()
    20   def add(self,key,item):
    21     self.subNum += 1
    22     self.subNode[key] = item
    23 
    24 #建树
    25 def build_tree(input):
    26   """build a tree"""
    27   sock = open(input,"r")
    28   buf = sock.read()
    29   buf = chinese(buf)
    30   tree = Item(" ")
    31   for word in buf:
    32     current = tree
    33     for x in word:
    34       if current.subNode.contain(x):
    35         current = current.subNode[x]
    36       else:
    37         item = Item(x)
    38         current.add(x,item)
    39         current = item
    40   print tree
    41   return tree
    42 
    43 def search(buf,thefile):
    44   "search."
    45   buf = chinese(buf)
    46   tree = build_tree(thefile)
    47   havefind = ""
    48   for word in buf:
    49     current = tree
    50     for x in word:
    51       if current.subNode.contain(x):
    52         current = current.subNode[x]
    53         if  current.subNum == 0:
    54           havefind += "".join(word)
    55       else:
    56         break
    57   return havefind
    58 
    59 #判断读入的是否为汉字
    60 def chinese(input):
    61   "remove the not chinese"
    62   input = unicode(input,"utf8")
    63   buf = []
    64   for word in input:
    65     if word >= u'u4e00' and word <= u'u9fa5':
    66       buf = buf+[word.encode('utf-8')]
    67   return buf
    68 
    69 
    70 def run():
    71   fileName = "test"
    72   textChar = ""
    73   buf = search(textChar,fileName)
    74   print buf
    75 
    76 if __name__ == "__main__":
    77   run()

    有关字典树的理论和概念可以参见http://blog.csdn.net/v_july_v/article/details/6897097
    有关python的建树过程http://bytes.com/topic/python/answers/827476-dictionary-tree-format-hopefully-simple

    http://www.cnblogs.com/wangfupeng1988/archive/2011/04/12/2013860.html

  • 相关阅读:
    oracle_case when
    sql记录
    修改tomcat默认编码
    sql server_TED(透明数据加密)
    web请求过程
    freemarker 标签循环list 以及获取下标值
    鼠标操作
    窗口切换
    日历控件
    xpath定位
  • 原文地址:https://www.cnblogs.com/dengyigod/p/3179059.html
Copyright © 2011-2022 走看看