import jieba txt=open("C:/Users/25764/Desktop/jieba1.txt","r",encoding=("utf-8")).read() words=jieba.lcut(txt) counts = {} for word in words: if len(word)==1: continue else: counts[word] = counts.get(word,0) + 1 items = list(counts.items()) items.sort(key = lambda x : x[1],reverse = True) for i in range(10): word,count = items[i] print("{0:<10}{1:>5}".format(word, count))
import jieba from matplotlib import pyplot as plt from wordcloud import WordCloud from PIL import Image import numpy as np font = r'C:WindowsFontsFZSTK.TTF' text = (open('C:/Users/25764/Desktop/jieba1.txt','r',encoding='utf-8')).read() cut = jieba.cut(text) #分词 string = ' '.join(cut) print(len(string)) img = Image.open('C:/Users/25764/Pictures/Saved Pictures/timg.jpg') #打开图片 img_array = np.array(img) #将图片装换为数组 stopword=['xa0'] #设置停止词,也就是你不想显示的词,这里这个词是我前期处理没处理好,你可以删掉他看看他的作用 wc = WordCloud( background_color='white', width=1000, height=800, mask=img_array, font_path=font, stopwords=stopword ) wc.generate_from_text(string)#绘制图片 plt.imshow(wc) plt.axis('off') plt.figure() plt.show() #显示图片 wc.to_file('C:/Users/25764/OneDrive/图片/ss.png') #保存图片