import os, codecs import jieba from collections import Counter from pymysql import * #词云 from PIL import Image import numpy as np import matplotlib.pyplot as plt #词云生成工具 from wordcloud import WordCloud,ImageColorGenerator #需要对中文进行处理 import matplotlib.font_manager as fm def get_words(txt): # 创建connection连接 conn = connect(host='localhost', port=3306, database='test', user='root', password='123asd..00', charset='utf8') # 获取cursor对象 cs1 = conn.cursor() # 执行sql语句 seg_list = jieba.cut(txt) c = Counter() for x in seg_list: if len(x) > 1 and x != '\r\n': c[x] += 1 print('常用词频度统计结果') for (k, v) in c.most_common(): print('%s %s %d' % (k,"***************", v)) query = 'insert into allnews(name, number) values(%s, %s)' for (k, v) in c.most_common(): name =k number = v values = (name, number) cs1.execute(query, values) # 提交之前的操作,如果之前已经执行多次的execute,那么就都进行提交 conn.commit() # 关闭cursor对象 cs1.close() # 关闭connection对象 conn.close() if __name__ == '__main__': with codecs.open('D:\softWareProject/txt/news_all.txt', 'r', 'utf8') as f: txt = f.read() get_words(txt)