完善上一篇,使用列表制作数据集
# encoding:utf-8 def main(): with open('./result.txt', 'r', encoding='utf8')as f: data1 = f.readlines() data1 = [x.strip() for x in data1 if x.strip()!=''] l_1 = ['大', 'B1'] l_2 = ['学', 'B1'] # print(data1) for s in data1: list_1 = [] for i in s: l1 = [] le = len(s) # s.index(i) + 1 < le - 1 # s.index(i) - 1 > 1 l1.append(i) if(i == '大' and s.index(i) + 1 < le - 1 and s[s.index(i) + 1] == '学'): list_1.append(l_1) continue if(i == '学' and s.index(i) - 1 > 1 and s[s.index(i) - 1] == '大'): list_1.append(l_2) continue if(i == '镇' or i == '乡' or i == '园' or i == '区' or i == '郊' or i == '场' or i == '道'): i1 = 'B1' l1.append(i1) else: i1 = 'o' l1.append(i1) list_1.append(l1) print(list_1) with open('./d1.txt', 'a', encoding='utf8')as fp: for item in list_1: fp.write(' '.join(j for j in item)+ ' ') fp.write(' ') if __name__ == '__main__': main()
解析:
data.txt 格式:
2.txt 最后保存格式:
data1 = [x.strip() for x in data1 if x.strip()!=''] 为了解决以下问题:
或者有空的地址
问题:可能会有下标超出的问题,给下标一个限制即可!
列表添加列表