词频统计

# utf-8
# 打开英文演讲的txt文档
s_words = {'own', 'the', 'and', 'that', 'this', 'it', 'my', 'when', 'but', 'so', 'where', 'an', 'a'}
sep = ''',.?!:”'“;][ ’'''
with open("dream.txt") as fd:
    words = fd.readlines()
    for i in sep:
        # print(i)
        words = str(words).replace(i, '
').lower()
tr_words = words.lower().split()
# for w in wordset:
#     print(w)
worddit = {}
for j in tr_words:
    worddit[j] = tr_words.count(j)
diclist = list(worddit.items())
diclist.sort(key=lambda x: x[1], reverse=True)
for d in diclist[0:10]:
    print(d)
wordset = set(tr_words) - s_words
tr_words = list(wordset)
diclist = list(worddit.items())
diclist.sort(key=lambda x: x[1])  #lamba定义了一个匿名函数，只对x[1]进行排序操作
# for d in diclist[0:10]:
#     print(d)

fd.close()

查看全文

相关阅读:
Building Performant Expand & Collapse Animations
选取图片上对应区域
 css绝对对齐
 如何在node.js中使用neo4j
io.js的六大新特性
 npm-install once
C# EF & linq &重定向等常用操作
 js 数组
 jquery/js iframe 元素操作
 js on 和 bind 绑定click的区别事件的冒泡捕获委托

原文地址：https://www.cnblogs.com/miranda-76/p/8653947.html