zoukankan      html  css  js  c++  java
  • 词频统计+词云(傲慢与偏见)

    #4.8.py
    import jieba
    excludes = {"先生","没有","太太","一个","自己","小姐","我们","可是","她们","他们","知道","事情","时候"}
    txt = open("傲慢与偏见.txt", "r", encoding='utf-8').read()
    words = jieba.lcut(txt)
    counts = {}
    for word in words:
         if len(word) == 1:
             continue
        else:
               rword = word
               counts[rword] = counts.get(rword,0) + 1
    for word in excludes:
         del(counts[word])
    items = list(counts.items())
    items.sort(key=lambda x:x[1], reverse=True)
    for i in range(5):
         word, count = items[i]
         print ("{0:<10}{1:>5}".format(word, count))

    #4.8.py
    import matplotlib.pyplot as plt
    import jieba
    from wordcloud import WordCloud
    txt = open("傲慢与偏见.txt", "r", encoding='utf-8').read()
    excludes = {"先生","没有","太太","一个","自己","小姐","我们","可是","她们","他们","知道","事情","时候"}

    words = jieba.lcut(txt)
    counts = {}
    for word in words:
         if len(word) == 1:
             continue
         else:
                rword = word
                counts[rword] = counts.get(rword,0) + 1
    for word in excludes:
         del(counts[word])
    items = list(counts.items())
    items.sort(key=lambda x:x[1], reverse=True) 
    for i in range(5):
         word, count = items[i]

    wc = WordCloud(font_path = r'.simhei.ttf',background_color = 'white',width = 500,height = 350,max_font_size=50,min_font_size=10)
    wc.generate(txt)
    wc.to_file("wordcloud.png")
    plt.figure('wordcloud.png')
    plt.imshow(wc)
    plt.axis('off')
    plt.show()

     

  • 相关阅读:
    微信小程序开发工具初始化代码仓库
    微信小程序开发如何初始化远程仓库并 PUSH
    Git 提示用户密码错误如何修改
    MariaDB 在创建外键的时候提示 1005 错误
    Fiddler实现移动端手机抓包
    Windows 安装 Go语言开发环境以及使用
    laravel队列,事件简单使用方法
    视图
    laravel Passport
    多台服务器 同时部署一套代码
  • 原文地址:https://www.cnblogs.com/Adaran/p/12659857.html
Copyright © 2011-2022 走看看