zoukankan      html  css  js  c++  java
  • 第一次python词云尝试

    1 分析英文文本

     1 from wordcloud import WordCloud
     2 import os
     3 
     4 cur_path = os.path.dirname(__file__)
     5 
     6 with open(os.path.join(cur_path, 'love_en.txt')) as fp:
     7     txt = fp.read()
     8     # print(txt)
     9     wordcloud = WordCloud().generate(txt)
    10     image = wordcloud.to_image()
    11     image.show()

    发生错误,错误类型:OSError: cannot open resource

    解决方案:添加字体

    改正后代码:

     1 from wordcloud import WordCloud
     2 import os
     3 
     4 cur_path = os.path.dirname(__file__)
     5 
     6 with open(os.path.join(cur_path, 'love_en.txt')) as fp:
     7     txt = fp.read()
     8     # print(txt)
     9     wordcloud = WordCloud(font_path = 'FZLTXIHK.TTF').generate(txt)
    10     image = wordcloud.to_image()
    11     image.show()

    进一步优化代码:

     1 from wordcloud import WordCloud
     2 import os
     3 
     4 cur_path = os.path.dirname(__file__)
     5 
     6 with open(os.path.join(cur_path, 'love_en.txt')) as fp:
     7     txt = fp.read()
     8     # print(txt)
     9     wordcloud = WordCloud(font_path = 'FZLTXIHK.TTF', # 字体
    10                           background_color = 'black', # 背景色
    11                           max_words = 30, # 最大显示单词数
    12                           max_font_size = 60 # 频率最大单词字体大小
    13                         ).generate(txt)
    14     image = wordcloud.to_image()
    15     image.show()

    效果图:

    2 分析中文文本

     1 import jieba
     2 from wordcloud import WordCloud
     3 import os
     4 
     5 cur_path = os.path.dirname(__file__)
     6 
     7 def chinese_jieba(txt):
     8     wordlist_jieba = jieba.cut(txt) # 将文本分割,返回列表
     9     txt_jieba = " ".join(wordlist_jieba) # 将列表拼接为以空格为间断的字符串
    10     return txt_jieba
    11 
    12 stopwords = {'这些':0, '那些':0, '因为':0, '所以':0} # 噪声词
    13 
    14 with open(os.path.join(cur_path, '择天记.txt')) as fp:
    15     txt = fp.read()
    16     txt = chinese_jieba(txt)
    17     # print(txt)
    18     wordcloud = WordCloud(font_path = 'FZLTXIHK.TTF', # 字体
    19                           background_color = 'black', # 背景色
    20                           max_words = 30, # 最大显示单词数
    21                           max_font_size = 60, # 频率最大单词字体大小
    22                           stopwords = stopwords # 过滤噪声词
    23                         ).generate(txt)
    24     image = wordcloud.to_image()
    25     image.show()

    效果图:

    3 进一步优化显示效果

     1 import jieba
     2 from wordcloud import WordCloud
     3 import os
     4 import numpy
     5 import PIL.Image as Image
     6 
     7 cur_path = os.path.dirname(__file__)
     8 
     9 def chinese_jieba(txt):
    10     wordlist_jieba = jieba.cut(txt) # 将文本分割,返回列表
    11     txt_jieba = " ".join(wordlist_jieba) # 将列表拼接为以空格为间断的字符串
    12     return txt_jieba
    13 
    14 stopwords = {'这些':0, '那些':0, '因为':0, '所以':0} # 噪声词
    15 mask_pic = numpy.array(Image.open(os.path.join(cur_path, 'love.jpg')))
    16 
    17 with open(os.path.join(cur_path, '择天记.txt')) as fp:
    18     txt = fp.read()
    19     txt = chinese_jieba(txt)
    20     # print(txt)
    21     wordcloud = WordCloud(font_path = 'FZLTXIHK.TTF', # 字体
    22                           background_color = 'white', # 背景色
    23                           max_words = 100, # 最大显示单词数
    24                           max_font_size = 60, # 频率最大单词字体大小
    25                           stopwords = stopwords, # 过滤噪声词
    26                           mask = mask_pic # 自定义显示的效果图
    27                         ).generate(txt)
    28     image = wordcloud.to_image()
    29     image.show()

    效果图:

  • 相关阅读:
    关于mybatis中的#{},和${} 的区别
    免费的手机号码归属地查询API接口文档
    小程序即将上线,现在就可以开发啦
    (二)cordova+framework7入门——笑笑APP
    (一)半小时开发一个APP
    图灵机器人(问答机器人)API调用示例
    免费股票数据API接口
    CTO和技术副总裁应该如何分工?谁才是技术领导者?
    基于JAVA的全国天气预报接口调用示例
    PhpSms 稳定可靠的php短信发送库
  • 原文地址:https://www.cnblogs.com/jiangchengzi93812/p/9450455.html
Copyright © 2011-2022 走看看