完整的中英文词频统计

def getTxt():
    txt = open("zmy.txt").read()
    txt = txt.lower()
    for ch in '!"@#$%^&*()+,-./:;<=>?@[\]_`~{|}':
        txt.replace(ch," ")
    return txt

 
zmy = getTxt().split()
sunstrset = set(zmy)
 
dic = {}
for i in sunstrset:
    dic[i] = zmy.count(i)

 
 
wcList = list(dic.items())
 
def tskeSecond(elem):
    return elem[1]
 
wcList.sort(key=tskeSecond,reverse=True)
print(wcList)

import jieba
word =open("zhong.txt","r",encoding="utf-8").read()
  
for ch in "，。“”":
    word = word.replace(ch,"")
  
word = jieba.cut(word)
word = list(word)
print(word)
  
  
wordset =set(word)
worddic ={}
  
for i in wordset:
    worddic[i]= word.count(i)
print(worddic)
  
wcList = list(worddic.items())
wcList.sort(key=lambda x:x[1],reverse=True)
print(wcList)

查看全文

相关阅读:
渗透测试-实用版
 微观传输原理：协议、端口、套接字接口
 如何暂停和继续运行Linux程序
 QUIC：基于udp的传输新技术
 ops运维工具
 日志监控工具安装：windows上安装elk
python实现图片隐藏信息技术
 让网站不去请求favicon.ico图标
 nginx 内核优化
 nginx 模块介绍

原文地址：https://www.cnblogs.com/ZHONGmy/p/9819177.html