zoukankan      html  css  js  c++  java
  • jieba Word frequency statistics

    #!/usr/bin/env python
    # _*_ coding: utf-8 _*_
    # @Time     : 2017/4/18 15:22
    # @Author   : otfsenter
    # @File     : strip_extr.py
    import pprint
    
    import jieba
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    import PIL
    from wordcloud import WordCloud
    
    
    def wordcloudplot(txt):
        path = r'C:WindowsFontsverdana.ttf'
        path = unicode(path, 'utf8').encode('gb18030')
        alice_mask = np.array(PIL.Image.open('she.jpg'))
        wordcloud = WordCloud(font_path=path,
                              background_color='white',
                              margin=1, width=10, height=2, mask=alice_mask,
                              max_words=200, max_font_size=1000, random_state=42)
        wordcloud = wordcloud.generate(txt)
        wordcloud.to_file('she2.jpg')
        plt.imshow(wordcloud)
        plt.axis('off')
        plt.show()
    
    
    def main():
        a = []
        # b = {}
        f = open('nms.txt', 'r').read()
        words = list(jieba.cut(f))
        for index, word in enumerate(words):
            if len(word) > 1:
                a.append(word.encode('utf-8'))
                # b.setdefault(word, []).append(index)
        # for k in b:
        #     b[k] = len(b[k])
    
        # dict1 = sorted(b.iteritems(),
        #                key=lambda d: d[1],
        #                reverse=True)
        # print dict1
        # for k in dict1:
        #     print list(k)[0], list(k)[1]
        txt = ' '.join(a)
        wordcloudplot(txt)
    
    
    if __name__ == '__main__':
        main()
    
    
  • 相关阅读:
    python2.7 print 中文乱码
    caffe模型转pytorchLSTM
    不同框架下验证精度
    opencv 曲线拟合
    gFlags 测试与使用
    glog 测试与使用
    CBAMConvolutional Block Attention Module
    caffe lstm
    聚合查询和原生数据库操作(18)
    关系映射1(20)
  • 原文地址:https://www.cnblogs.com/otfsenter/p/6743443.html
Copyright © 2011-2022 走看看