zoukankan      html  css  js  c++  java
  • 完整的中英文词频统计

    def getTxt():
        txt = open("zmy.txt").read()
        txt = txt.lower()
        for ch in '!"@#$%^&*()+,-./:;<=>?@[\]_`~{|}':
            txt.replace(ch," ")
        return txt
    
     
    zmy = getTxt().split()
    sunstrset = set(zmy)
     
    dic = {}
    for i in sunstrset:
        dic[i] = zmy.count(i)
    
     
     
    wcList = list(dic.items())
     
    def tskeSecond(elem):
        return elem[1]
     
    wcList.sort(key=tskeSecond,reverse=True)
    print(wcList)
    
    
    
    
    import jieba
    word =open("zhong.txt","r",encoding="utf-8").read()
      
    for ch in ",。“”":
        word = word.replace(ch,"")
      
    word = jieba.cut(word)
    word = list(word)
    print(word)
      
      
    wordset =set(word)
    worddic ={}
      
    for i in wordset:
        worddic[i]= word.count(i)
    print(worddic)
      
    wcList = list(worddic.items())
    wcList.sort(key=lambda x:x[1],reverse=True)
    print(wcList)
    
    
    
     
  • 相关阅读:
    PowerDesigner导出word表结构
    Java
    Java
    Java
    Java
    Java
    Java
    Java
    Java
    Java
  • 原文地址:https://www.cnblogs.com/ZHONGmy/p/9819177.html
Copyright © 2011-2022 走看看