import jieba from collections import Counter if __name__ == '__main__': filehandle = open("boke.txt", "r", encoding='utf-8',errors='ignore'); mystr = filehandle.read() seg_list = jieba.cut(mystr) # 默认是精确模式 print(seg_list) # all_words = cut_words.split() # print(all_words) stopwords = {}.fromkeys([line.rstrip() for line in open("stop.txt", "r", encoding='utf-8',errors='ignore')]) c = Counter() for x in seg_list: if x not in stopwords: if len(x) > 1 and x != ' ': c[x] += 1 print(' 词频统计结果:') for (k, v) in c.most_common(50): # 输出词频最高的前两个词 print("%s:%d" % (k, v)) # print(mystr) filehandle.close(); # seg2 = jieba.cut("好好学学python,有用。", cut_all=False) # print("精确模式(也是默认模式):", ' '.join(seg2))
参考了龙哥的代码。自己代码总是出现转码问题