实现的效果如上所示:
(1)总共几千个文件,遍历文件的关键代码:
1 files = os.listdir(src_path) # 得到文件夹下的所有文件名称 2 for file in files: # 遍历文件夹 3 if not os.path.isdir(file): # 判断是否是文件夹,不是文件夹才打开 4 f = open(src_path + "/" + file) # 打开文件 5 target_file = open(target_path + file, 'w') 6 iter_f = iter(f) # 创建迭代器 7 for line in iter_f: 8 line = line.rstrip(' ')
(2)正则表达式:
1 target_file.write(re.sub(u'\(.*?\)|\{.*?}|\[.*?]','',line))
(3) 拆分,排序的问题
1 import itertools 2 import os 3 4 def split_str(count): 5 temp_str = '' 6 str_param = list() 7 for num in range(len(count)-1,-1,-1): 8 s = count[num] 9 if s != '': 10 temp=s.split('-') 11 if temp[0] == '' or temp[1] == '':# 去掉没有对应的 12 count.remove(s) 13 else: 14 left,right = deal_Str(s) 15 temp =[] 16 temp = list(itertools.product(left,right)) 17 temp = list(temp) 18 for i in range(0,len(temp)): 19 for j in range(0,1): 20 str_param.append(str(temp[i][j])+'-'+str(temp[i][j+1])) 21 return str_param 22 23 def deal_Str(Str): # 将一个字符串分割,eg:'7,8,9-4,5,6,7',变成[7,8,9]和[4,5,6,7] 24 L=Str.split('-') 25 left=L[0].split(',') 26 right=L[1].split(',') 27 left=[int(x) for x in left] 28 right=[int(x) for x in right] 29 return left,right 30 31 def sort_str(str_list):# 排序 32 count = len(str_list) 33 for i in range(0, count): 34 for j in range(0, count-1): 35 m = str_list[j].split('-')[0] 36 n = str_list[j+1].split('-')[0] 37 if int(m) > int(n): 38 temp = str_list[j] 39 str_list[j] = str_list[j+1] 40 str_list[j+1] = temp 41 42 return str_list 43 44 45 src_path = 'D:\wordalign\WA\ctb_aligned_chuli' 46 target_path = 'D:\wordalign\WA\ctb_aligned_last_chuli\' 47 files = os.listdir(src_path) # 得到文件夹下的所有文件名称 48 for file in files: # 遍历文件夹 49 if not os.path.isdir(file): # 判断是否是文件夹,不是文件夹才打开 50 f = open(src_path + "/" + file) # 打开文件 51 target_file = open(target_path + file, 'w') 52 iter_f = iter(f) # 创建迭代器 53 for line in iter_f: 54 line = line.rstrip(' ') 55 if line != 'rejected': 56 count = line.split(' ') 57 58 arr = sort_str(split_str(count)) 59 temp = '' 60 for st in arr: 61 temp += st 62 temp += ' ' 63 target_file.write(temp+' ') 64 else: 65 target_file.write(line+' ') 66 f.close() 67 target_file.close()