import requests from bs4 import BeautifulSoup import jieba from PIL import Image,ImageSequence import numpy as np import matplotlib.pyplot as plt from wordcloud import WordCloud,ImageColorGenerator def changeTitleToDict(): f = open('yingchao.txt', 'r',encoding='utf-8') str = f.read() stringList = list(jieba.cut(str)) symbol = {"/", "(", ")" , " ", ";", "!", "、" , ":"} stringSet = set(stringList) - symbol title_dict = {} for i in stringSet: title_dict[i] = stringList.count(i) print(title_dict) return title_dict for i in range(1,10): page = i; hupu = 'https://voice.hupu.com/soccer/tag/496-%s.html' % (page) reslist = requests.get(hupu) reslist.encoding = 'utf-8' soup_list = BeautifulSoup(reslist.text, 'html.parser') for news in soup_list.find_all('span',class_='n1'): print(news.text) f = open('yingchao.txt', 'a', encoding='utf-8') f.write(news.text) f.close() title_dict = changeTitleToDict() font = r'C:WindowsFontssimhei.ttf' content = ' '.join(title_dict.keys()) # 根据图片生成词云 image = np.array(Image.open('1.jpg')) wordcloud = WordCloud(background_color='white', font_path=font, mask=image, width=1000, height=860, margin=2).generate(content) #字体颜色 image2 = np.array(Image.open('2.jpg')) iamge_colors = ImageColorGenerator(image2) wordcloud.recolor(color_func=iamge_colors) # 显示生成的词云 plt.imshow(wordcloud) plt.axis("off") plt.show() wordcloud.to_file('3.jpg')
背景图
字体颜色图
词云图