zoukankan      html  css  js  c++  java
  • Python 统计文本中单词的个数

    1.读文件,通过正则匹配

     1 def statisticWord():
     2     line_number = 0
     3     words_dict = {}
     4     with open (r'D:	est	est.txt',encoding='utf-8') as a_file:
     5         for line in a_file:
     6             words = re.findall(r'&#d+;|&#d+;|&w+;',line)
     7             for word in words:
     8                 words_dict[word] = words_dict.get(word,0) + 1 #get the value of word, default is 0
     9         sort_words_dict = OrderedDict(sorted(words_dict.items(),key = lambda x : x[1], reverse = True))
    10 #        sort_words_dict = sorted(words_dict, key = operator.itemgetter(1))
    11         with open(r'D:	estoutput.txt',encoding = 'utf-8', mode='w') as b_file:
    12             for k,v in sort_words_dict.items():
    13                 b_file.write("%-15s:%15s" % (k,v))
    14                 b_file.write('
    ')

    2. 通过命令行参数

    def statisticWord2():
        if len(sys.argv) == 1 or sys.argv[1] in {"-h", "--help"}:
            print("usage: filename_1 filename_2 ... filename_n")
            sys.exit()
        else:
            words = {}
            strip = string.whitespace + string.punctuation + string.digits + ""'"
            for filename in sys.argv[1:]:
                for line in open(filename):
                    for word in line.split():
                        word = word.strip(strip) # remove all the combination of strip in prefix or suffix
                        if len(word) >= 2:
                            words[word] = words.get(word, 0) + 1
            for word in sorted(words):
                print("'{0}' occurs {1} times".format(word,words[word]))
  • 相关阅读:
    Servlet监听器及在线用户
    数据分页jdbc+mysql实现
    使用ajax验证用户名重复
    Mysql中的事务
    用户登录注册案例分析
    Java连接mysql数据库
    Java连接sqlite数据库
    虚拟主机TOMCAT配置
    用jquery控制表格奇偶行及活动行颜色
    JDK安装后 没有tools.jar 和dt.jar包的解决办法
  • 原文地址:https://www.cnblogs.com/zyf7630/p/3209976.html
Copyright © 2011-2022 走看看