Python包网站: https://pypi.org/
1. 繁体与简体转换(https://github.com/berniey/hanziconv.git)
pip install hanziconv
示例:
from hanziconv import HanziConv def switch_hanzi(keyword): """ 汉字转换(简体<>繁体) :return: """ simple_chinese = HanziConv.toSimplified(keyword) if simple_chinese == keyword: # 简体 tradition_chinese = HanziConv.toTraditional(keyword) if tradition_chinese == simple_chinese: return return tradition_chinese else: return simple_chinese keyword = '生活' print(switch_hanzi(keyword)) keyword = '中國' print(switch_hanzi(keyword))
2. 汉字转拼音(https://github.com/mozillazg/python-pinyin)
pip install pypinyin
>>> from pypinyin import pinyin, lazy_pinyin, Style >>> pinyin('中心') [['zhōng'], ['xīn']] >>> pinyin('中心', heteronym=True) # 启用多音字模式 [['zhōng', 'zhòng'], ['xīn']] >>> pinyin('中心', style=Style.FIRST_LETTER) # 设置拼音风格 [['z'], ['x']] >>> pinyin('中心', style=Style.TONE2, heteronym=True) [['zho1ng', 'zho4ng'], ['xi1n']] >>> pinyin('中心', style=Style.TONE3, heteronym=True) [['zhong1', 'zhong4'], ['xin1']] >>> pinyin('中心', style=Style.BOPOMOFO) # 注音风格 [['ㄓㄨㄥ'], ['ㄒㄧㄣ']] >>> lazy_pinyin('中心') # 不考虑多音字的情况 ['zhong', 'xin']
3. 拼音转汉字(https://github.com/someus/Pinyin2Hanzi)
pip install Pinyin2Hanzi
示例:
from Pinyin2Hanzi import DefaultHmmParams from Pinyin2Hanzi import viterbi hmmparams = DefaultHmmParams() ## 2个候选 result = viterbi(hmm_params=hmmparams, observations=('ni', 'zhi', 'bu', 'zhi', 'dao'), path_num = 2) for item in result: print(item.score, item.path) '''输出 1.3155294593897203e-08 ['你', '知', '不', '知', '道'] 3.6677865125992192e-09 ['你', '只', '不', '知', '道'] '''
def get_similar_words(word, num=3): """ 获取相似词 :param word: 单词 :param num: 返回词汇数量 """ word_pinyin = lazy_pinyin(word) dagparams = DefaultDagParams() result = dag(dagparams, tuple(word_pinyin), path_num=num) # 拼接,去重 words = set() for item in result: print(item.path) new_word = item.path[0] if len(item.path) == 1 else ''.join(item.path) words.add(new_word) similar_words = words - set([word]) return list(similar_words)