import re import codecs def joint_list(xianbingshi,xianbingshi_sub,list_write): xianbingshi_list_x =[] xianbingshi_list_y =[] xianbingshi_list_xy = [] with codecs.open(xianbingshi,'r','utf8') as f: for line in f: xianbingshi_list_x.append(line.strip()) with codecs.open(xianbingshi_sub,'r','utf8') as f: for line in f: xianbingshi_list_y.append(line.strip()) for i in range(0, len(xianbingshi_list_x)): print(xianbingshi_list_x[i]+'||||'+xianbingshi_list_y[i]) xianbingshi_list_xy.append(xianbingshi_list_x[i] +'||||'+ xianbingshi_list_y[i]) with codecs.open(list_write, 'w', 'utf-8') as w: for line in xianbingshi_list_xy: w.writelines(line) w.write(' ') w.close() def xianbingshi_write(list_write): line_list = [] with codecs.open(list_write,'r','utf8') as f: for line in f: line = line.strip() line = line.split('||||') line_list.append(line) line_list.sort(key=lambda line_list:line_list[1]) with codecs.open(list_write, 'w', 'utf-8') as w: for line in line_list: w.writelines(line[1] + '||||' +line[0]) w.write(' ') w.close() def word_replace(word): """替换词表""" word_list = [] """去重""" with codecs.open(word,'r','utf8') as f: for line in f: line = line.strip() if line not in word_list: word_list.append(line) print(line) else: continue word_list.sort(key=len, reverse=True) with codecs.open(word,'w','utf8') as f: for line in word_list: print(line) f.write(line + ' ') if __name__=='__main__': xianbingshi = r'C:UsersAdministrator.SC-201812211013PycharmProjectsuntitled29yiwoqucodexianbingshi_write.txt' xianbingshi_sub = r'C:UsersAdministrator.SC-201812211013PycharmProjectsuntitled29yiwoqucodexianbingshi_write_sub.txt' list_write= r'C:UsersAdministrator.SC-201812211013PycharmProjectsuntitled29yiwoqucode est.txt' joint_list(xianbingshi,xianbingshi_sub,list_write) word_replace(list_write) xianbingshi_write(list_write)