https://blog.csdn.net/amao1998/article/details/80366286
#encoding=utf-8
import jieba
import jieba.posseg as pseg
import re
filename='result.txt'
fileneedCut='./in_the_name_of_people.txt'
fn=open(fileneedCut,"r",encoding='UTF-8')
f=open(filename,"w+",encoding='UTF-8')
for line in fn.readlines():
words=jieba.cut(line)
words=' '.join(words)
for w in words:
f.write(w)
f.close()
fn.close()
import multiprocessing from gensim.models import Word2Vec from gensim.models.word2vec import LineSentence vocab = 'result.txt' model = Word2Vec(LineSentence(vocab), size=32, window=5, min_count=5,workers=multiprocessing.cpu_count())


https://blog.csdn.net/zl_best/article/details/53433072
jieba分词原理:https://blog.csdn.net/baidu_33718858/article/details/81073093(较难理解)

