zoukankan      html  css  js  c++  java
  • Python统计excel表格中文本的词频,生成词云图片

    import xlrd
    import jieba
    import pymysql
    import matplotlib.pylab as plt
    from wordcloud import WordCloud
    from collections import Counter
    import numpy as np
    
    def getExcelData(excel,txt):
        readbook = xlrd.open_workbook(excel)
        sheet = readbook.sheet_by_index(2) #取第二个sheet页
        rows = sheet.nrows
        i = 0
        while i < rows:
            txt += sheet.cell(i, 2).value #取第三列的值
            i += 1
        seg_list = jieba.cut(txt)
        c = Counter()
        result = {}
        for x in seg_list:
            if len(x) > 1 and x != '
    ':
                c[x] += 1
        for (k, v) in c.most_common():
            result[k] = v #放到字典中,用于生成词云的源数据
        return result
    
    def makeWordCloud(txt):
        x, y = np.ogrid[:300, :500]
    
        mask = (x - 150) ** 2 + (y - 150) ** 2 > 150 ** 2
        mask = 255 * mask.astype(int)
    
        wc = WordCloud(background_color="white",
                        max_words=500,
                        mask=mask,
                        repeat=True,
                        width=1000,
                        height=1000,
                        scale=4, #这个数值越大,产生的图片分辨率越高,字迹越清晰
                        font_path="C:WindowsFontsSTXINGKA.TTF")
        wc.generate_from_frequencies(txt)
        wc.to_file('abc.png')
    
        plt.axis("off")
        plt.imshow(wc, interpolation="bilinear")
        plt.show()
    
    
    if __name__ == '__main__':
        txt = ''
        makeWordCloud(getExcelData('getdata.xlsx', txt))
    

      

  • 相关阅读:
    log4js 2.X版本配置详解
    ping -c 3 localhost
    children_process.exec 执行 ping命令报错
    淘宝双十一为什么会出现通道拥挤?
    'ascii' codec can't decode byte 0xd6 in position 0
    window 安装grunt
    display:none与visible:hidden的区别 slideDown与
    ztree-demo 2
    ztree-demo
    技术总结PHP+微信
  • 原文地址:https://www.cnblogs.com/ning-blogs/p/10491361.html
Copyright © 2011-2022 走看看