def word_frequency():
word_dict = {}
with open('E:\PythonFile\tingyongci.txt') as ti:
ti_list = list(ti.read()) # 获取停用词表(综合哈工大停用词词表)
with open('E:\PythonFile\jd\phone\3133927.txt') as wf:
comments = list(wf.read().split())
for comment in comments:
if comment in ti_list:
continue
else:
if comment not in word_dict:
word_dict[comment] = int(1)
else:
word_dict[comment] += 1
file = open('E:\PythonFile\jd\phone\test.txt', mode='a') # 将处理结果存到本地TXT文件中
sorted(word_dict.items(), key=lambda item: item[1]) # 按value将字典排序
for key in word_dict:
print(key, word_dict[key])
file.write(key + ' ' + str(word_dict[key]) + '
') # 写入文档
file.close()
用jieba分词处理字符串,将分词结果存到TXT文件中
去停用词