zoukankan      html  css  js  c++  java
  • jieba+wordcloud+imageio—自定义词云

    import jieba
    import wordcloud
    from imageio import imread
    mask=imread('fivestar.png')
    def shuchu():
    txt=open('threekingdoms.txt','r',encoding='gb18030').read()
    excludes={'将军','却说','荆州','二人','不可','不能','如此'}
    words=jieba.lcut(txt)
    counts={}
    for word in words:
    if len(word)==1:
    continue
    elif word=='诸葛亮' or word=='孔明曰':
    rword='孔明'
    elif word=='关公' or word=='云长':
    rword='关羽'
    elif word=='玄德' or word=='玄德曰':
    rword='刘备'
    elif word=='孟德' or word=='丞相':
    rword='曹操'
    else:
    rword=word
    counts[rword]=counts.get(rword,0)+1
    for word in excludes:
    del counts[word]
    items = list(counts.items())
    items.sort(key=lambda x:x[1],reverse=True)
    for i in range(10):
    word,count=items[i]
    print('{0:<10}{1:>5}'.format(word,count))
    shuchu()
    f=open('threekingdoms.txt','r',encoding='gb18030')
    t=f.read()
    f.close()
    ls=jieba.lcut(t)
    txt=' '.join(ls)
    w=wordcloud.WordCloud(font_path='msyh.ttc',width=1000,height=700,background_color='white',mask=mask)
    w.generate(txt)
    w.to_file('grwordcloud.png')

  • 相关阅读:
    [ZJOI2010]count 数字计数
    小雄数
    简单筛法函数
    [Noip模拟题]lucky
    欧拉线筛
    Intern Day78
    CodeForces1360C
    CodeForces1373B
    Intern Day78
    Intern Day78
  • 原文地址:https://www.cnblogs.com/Aluosen/p/11509480.html
Copyright © 2011-2022 走看看