zoukankan      html  css  js  c++  java
  • python文件操作,读取,修改,合并

      1 # -*- coding:utf-8 -*-
      2 '''
      3 从11c开始提取
      4 '''
      5 import re
      6 import numpy as np
      7 import os
      8 year = '17A'
      9 ss="./data/edmd/"
     10 # filename=ss+"/EDMDI1.17A"
     11 try:
     12     os.rename(ss+"/EDMDI1.17A",ss+"/EDMDI1.txt")
     13 except:
     14     pass
     15 f1=open(ss+"/EDMDI1.txt")
     16 p1=re.compile(r"^(?:s{3}|Xs{2}|Ws{2})([A-Z]{6})s.+
    ")
     17 list_tag=list()
     18 for line in f1.readlines():
     19     # print(line)
     20     match1=re.findall(p1,line)
     21     # print(match1)
     22     if match1:
     23         for j in match1:
     24             list_tag.append(j)
     25 # filename_w1= ss+'%s'%list_tag[i]
     26 print(list_tag)
     27 for i in range(len(list_tag)):
     28     try:
     29         os.rename(ss+'%s_D.17A'%list_tag[i],ss+'%s.txt'%list_tag[i])
     30     except:
     31         break
     32 
     33     filename_w= ss+'/new/%s_w.txt'%list_tag[i]
     34     if os.path.exists(filename_w):
     35         os.remove(filename_w)
     36     # import os
     37 
     38     # os.rename('./data/CODECO_D.02A','./data/CODECO_D.txt')
     39     filename_r = ss+'%s.txt'%list_tag[i]  # txt文件和当前脚本在同一目录下,所以不用写具体路径
     40     #00010   UNH Message header      M   1
     41     pattern1   =  re.compile(r"(^d{5})s{3}[A-Z]{3}.+[CM]s{3}d*s{1,}|{0,}
    ")#00010
     42     pattern1_2 =  re.compile(r"^d{5}s{3}([A-Z]{3}).+[CM]s{3}d*s{1,}|{0,}
    ")#UNH
     43     pattern1_3 =  re.compile(r"^d{5}s{3}[A-Z]{3}(.+)[CM]s{3}d*s{1,}|{0,}
    ")#Message header
     44     pattern1_4 =  re.compile(r"^d{5}s{3}[A-Z]{3}.+([CM])s{3}d*s{1,}|{0,}
    ")#C
     45     pattern1_5 =  re.compile(r"^d{5}s{3}[A-Z]{3}.+[CM]s{3}(d*)s{1,}|{0,}
    ")#1
     46     #pattern2 = re.compile(r"^d{5}.+Segmentsgroups(d)*.+[CM]s{3}d*-++
    " )#+结尾
     47     #00050       ---- Segment group 1  ------------------ C   9----------------+
     48     pattern4_1 = re.compile(r"(^d{5}).+Segmentsgroupsd*.+[CM]s{3}d*.+
    ")
     49     pattern4_2 = re.compile(r"^d{5}.+Segmentsgroups(d*).+[CM]s{3}d*.+
    ")
     50     pattern4_3 = re.compile(r"^d{5}.+Segmentsgroupsd*.+([CM])s{3}d*.+
    ")
     51     pattern4_4 = re.compile(r"^d{5}.+Segmentsgroupsd*.+[CM]s{3}(d*).+
    ")
     52     #匹配每组的单独结尾的一行即没有Segment group的以+、+|、+||、+|||……结尾的的每个字段
     53     #如00280   RNG Range details                            C   1---------------+|
     54     pattern5_1 = re.compile(r"(^d{5})s{3}[A-Z]{3}.+[CM]s{3}d*-++{1,10}|{0,20}
    " )
     55     pattern5_2 = re.compile(r"^d{5}s{3}([A-Z]{3}).+[CM]s{3}d*-++{1,10}|{0,20}
    " )
     56     pattern5_3 = re.compile(r"^d{5}s{3}[A-Z]{3}.+([CM])s{3}d*-++{1,10}|{0,20}
    " )
     57     pattern5_4 = re.compile(r"^d{5}s{3}[A-Z]{3}.+[CM]s{3}(d*)-++{1,10}|{0,20}
    " )
     58     #以下是确定层级关系
     59     #匹配每组的单独结尾的一行即没有Segment group的以+、+|、+||、+|||……结尾的
     60     pattern5 = re.compile(r"^d{5}s{3}[A-Z]{3}.+[CM]s{3}d*-++|{0,10}
    " )
     61     #匹配每组的开头一行即有Segment group的以+、+|、+||、+|||……结尾的
     62     pattern2_1 = re.compile(r"^d{5}.+Segmentsgroups(d*).+[CM]s{3}d*-++
    " )#+结尾
     63     pattern2_2 = re.compile(r"^d{5}.+Segmentsgroups(d*).+[CM]s{3}d*-++|
    " )#+|结尾
     64     pattern2_3 = re.compile(r"^d{5}.+Segmentsgroups(d*).+[CM]s{3}d*-++||
    " )#+||结尾
     65     pattern2_4 = re.compile(r"^d{5}.+Segmentsgroups(d*).+[CM]s{3}d*-++|||
    " )
     66     pattern2_5 = re.compile(r"^d{5}.+Segmentsgroups(d*).+[CM]s{3}d*-++||||
    " )
     67     pattern2_6 = re.compile(r"^d{5}.+Segmentsgroups(d*).+[CM]s{3}d*-++|||||
    " )
     68     pattern2_7 = re.compile(r"^d{5}.+Segmentsgroups(d*).+[CM]s{3}d*-++||||||
    " )
     69     #匹配有同时多个组同时结束的情况,即以++、++|、++||……++、++|、++||……等结尾的
     70     pattern3_1 = re.compile(r"^d{5}.+[CM]s{3}d*-++{2}|{0,20}
    ")# 匹配++、++|、++||……等结尾
     71     pattern3_2 = re.compile(r"^d{5}.+[CM]s{3}d*-++{3}|{0,20}
    ")# 匹配+++、+++|、+++||……等结尾
     72     pattern3_3 = re.compile(r"^d{5}.+[CM]s{3}d*-++{4}|{0,20}
    ")
     73     pattern3_4 = re.compile(r"^d{5}.+[CM]s{3}d*-++{5}|{0,20}
    ")
     74     pattern3_5 = re.compile(r"^d{5}.+[CM]s{3}d*-++{6}|{0,20}
    ")
     75     pattern3_6 = re.compile(r"^d{5}.+[CM]s{3}d*-++{7}|{0,20}
    ")
     76 
     77 
     78     flag = 0
     79     #listgr中第一个不为0的点
     80     pos = -1
     81     listgr =[0,0,0,0,0,0,0,0,0,0]
     82 
     83     fr = open(filename_r)
     84     w2 = open(filename_w,'a')#a代表追加 w代表重写
     85     for line in fr.readlines():
     86         matcher1 = re.findall(pattern1,line)
     87         matcher1_2 = re.findall(pattern1_2,line)
     88         matcher1_3 = re.findall(pattern1_3,line)
     89         matcher1_4 = re.findall(pattern1_4,line)
     90         matcher1_5 = re.findall(pattern1_5,line)
     91         matcher2_1 = re.findall(pattern2_1,line)
     92         matcher2_2 = re.findall(pattern2_2,line)
     93         matcher2_3 = re.findall(pattern2_3,line)
     94         matcher2_4 = re.findall(pattern2_4,line)
     95         matcher2_5 = re.findall(pattern2_5,line)
     96         matcher2_6 = re.findall(pattern2_6,line)
     97         matcher2_7 = re.findall(pattern2_7,line)
     98         matcher3_1 = re.findall(pattern3_1,line)
     99         matcher3_2 = re.findall(pattern3_2,line)
    100         matcher3_3 = re.findall(pattern3_3,line)
    101         matcher3_4 = re.findall(pattern3_4,line)
    102         matcher3_5 = re.findall(pattern3_5,line)
    103         matcher3_6 = re.findall(pattern3_6,line)
    104         matcher4_1 = re.findall(pattern4_1,line)
    105         matcher4_2 = re.findall(pattern4_2,line)
    106         matcher4_3 = re.findall(pattern4_3,line)
    107         matcher4_4 = re.findall(pattern4_4,line)
    108         matcher5   = re.findall(pattern5,line)
    109         matcher5_1 = re.findall(pattern5_1,line)
    110         matcher5_2 = re.findall(pattern5_2,line)
    111         matcher5_3 = re.findall(pattern5_3,line)
    112         matcher5_4 = re.findall(pattern5_4,line)
    113 
    114         if matcher4_1!=[]:
    115             w2.write("
    ")
    116             for j in matcher4_1:
    117                 for k in j:
    118                     w2.write(k)
    119         if matcher4_2!=[]:
    120             w2.write(",")
    121             #写入parent列
    122             if pos!= -1:
    123                 numgr =listgr[pos]
    124             else:
    125                 numgr = 0
    126             w2.write("SG"+str(numgr)+",")
    127             for j in matcher4_2:
    128                 for k in j:
    129                     w2.write(k)
    130         if matcher4_3!=[]:
    131             flag = 3
    132             w2.write(",")
    133             for j in matcher4_3:
    134                 for k in j:
    135                     w2.write(k)
    136         if matcher4_4!=[]:
    137             w2.write(",")
    138             for j in matcher4_4:
    139                 for k in j:
    140                     w2.write(k)
    141         if matcher5_1!=[]:
    142             w2.write("
    ")
    143             for j in matcher5_1:
    144                 for k in j:
    145                     w2.write(k)
    146         if matcher5_2!=[]:
    147             w2.write(",")
    148             #写入parent列
    149             if pos!= -1:
    150                 numgr =listgr[pos]
    151             else:
    152                 numgr = 0
    153             w2.write("SG"+str(numgr)+",")
    154             for j in matcher5_2:
    155                 for k in j:
    156                     w2.write(k)
    157         if matcher5_3!=[]:
    158             flag = 3
    159             w2.write(",")
    160             for j in matcher5_3:
    161                 for k in j:
    162                     w2.write(k)
    163         if matcher5_4!=[]:
    164             w2.write(",")
    165             for j in matcher5_4:
    166                 for k in j:
    167                     w2.write(k)
    168         #确定层级关系,也就是确定listgr
    169         if(matcher5!=[]):
    170             for i in listgr:
    171                 if i==0:
    172                     pos = listgr.index(i)-1
    173                     break
    174             listgr[pos]=0
    175         if (matcher2_1!=[]):
    176             # print "2_1"
    177             for j in matcher2_1:
    178                 # print j
    179                 if(listgr[0]==0):
    180                     listgr[0]=j
    181                 else:
    182                     listgr[0]=0
    183             # print listgr
    184         if (matcher2_2!=[]):
    185             for j in matcher2_2:
    186                 #numgr_d = j
    187                 if(listgr[1]==0):
    188                     listgr[1]=j
    189                 else:
    190                     listgr[1]=0
    191         if (matcher2_3!=[]):
    192             for j in matcher2_3:
    193                 if(listgr[2]==0):
    194                     listgr[2]=j
    195                 else:
    196                     listgr[2]=0
    197         if (matcher2_4!=[]):
    198             for j in matcher2_4:
    199                 if(listgr[3]==0):
    200                     listgr[3]=j
    201                 else:
    202                     listgr[3]=0
    203         if (matcher2_5!=[]):
    204             for j in matcher2_5:
    205                 if(listgr[4]==0):
    206                     listgr[4]=j
    207                 else:
    208                     listgr[4]=0
    209         if (matcher2_6!=[]):
    210             for j in matcher2_6:
    211                 if(listgr[5]==0):
    212                     listgr[5]=j
    213                 else:
    214                     listgr[5]=0
    215         if (matcher2_7!=[]):
    216             for j in matcher2_7:
    217                 if(listgr[6]==0):
    218                     listgr[6]=j
    219                 else:
    220                     listgr[6]=0
    221         if (matcher3_1!=[]):
    222             for i in listgr:
    223                 if i==0:
    224                     pos = listgr.index(i)-1
    225                     break
    226             listgr[pos]=0
    227             listgr[pos-1]=0
    228         if (matcher3_2!=[]):
    229             for i in listgr:
    230                 if i==0:
    231                     pos = listgr.index(i)-1
    232                     break
    233             for k in range((pos-2),(pos+1)):
    234                 listgr[k]=0
    235         if (matcher3_3!=[]):
    236             for i in listgr:
    237                 if i==0:
    238                     pos = listgr.index(i)-1
    239                     break
    240             for k in range((pos-3),(pos+1)):
    241                 listgr[k]=0
    242         if (matcher3_4!=[]):
    243             for i in listgr:
    244                 if i==0:
    245                     pos = listgr.index(i)-1
    246                     break
    247             for k in range(pos-4,pos+1):
    248                 listgr[k]=0
    249         if (matcher3_5!=[]):
    250             for i in listgr:
    251                 if i==0:
    252                     pos = listgr.index(i)-1
    253                     break
    254             for k in range(pos-5,pos+1):
    255                 listgr[k]=0
    256         if (matcher3_6!=[]):
    257             for i in listgr:
    258                 if i==0:
    259                     pos = listgr.index(i)-1
    260                     break
    261             for k in range(pos-6,pos+1):
    262                 listgr[k]=0
    263          #确定层级关系结束
    264         if (matcher1!=[]):
    265             flag = 1
    266             w2.write("
    ")
    267             for j in matcher1:
    268                 for k in j:
    269                     w2.write(k)
    270         #print listgr
    271         #判断当前lit不为0的位置
    272         for i in listgr:
    273             if i==0:
    274                 pos = listgr.index(i)-1
    275                 break
    276         if matcher1_2!=[]:
    277             flag = 2
    278             w2.write(",")
    279             #写入parent列
    280             if pos!= -1:
    281                 numgr =listgr[pos]
    282             else:
    283                 numgr = 0
    284             w2.write("SG"+str(numgr)+",")
    285             for j in matcher1_2:
    286                 for k in j:
    287                     w2.write(k)
    288         if matcher1_3!=[]:
    289             flag = 3
    290             w2.write(",")
    291             for j in matcher1_3:
    292                 for k in j:
    293                     w2.write(k)
    294         if matcher1_4!=[]:
    295             flag = 4
    296             w2.write(",")
    297             for j in matcher1_4:
    298                 for k in j:
    299                     w2.write(k)
    300         if ((matcher1_5!=[])and(flag ==4)):
    301             flag = 5
    302             w2.write(",")
    303             for j in matcher1_5:
    304                 for k in j:
    305                     w2.write(k)
    306 
    307     w2.close( )
    308     fr.close()
    309 
    310 f2_w= open(ss+'/new/%s.txt'%year,'a')
    311 
    312 for i in range(len(list_tag)):
    313     f2_r = open(ss+'/new/%s_w.txt'%list_tag[i])
    314     for line in f2_r:
    315         # for j in line:
    316         f2_w.write(year+','+line)
    317     f2_r.close() 
    318     print("--%i--is ok"%i)
    319 f2_w.close()   
    320 
    321 # if __name__ == '__main__':
    322     
    323 
    324 """
    325     特殊情况
    326 
    327 
    328 
    329     """
  • 相关阅读:
    SourceTree用法
    @Valid注解的使用
    mysql命令导入导出sql文件
    eclipse集成svn及使用
    eclipse设置
    @Component
    购物车单选全选,计算总价,出现个小问题,没找到.....
    十三、迭代器,分部类,文件流,内存流,缓存,枚举(IEnumerator接口),线程(单线程,多线程)
    十二、事件,委托,泛型委托,集合(泛型和非泛型),Lambda表达式(声明委托,使用委托,简单的委托示例,action<T>和func<t>委托,多播委托,匿名方法,Lambda表达式,参数,事件)
    十一、接口(接口的概念,实现,继承,实现)、抽象类与抽象方法(抽象类,抽象方法概念,使用)
  • 原文地址:https://www.cnblogs.com/smuxiaolei/p/7405291.html
Copyright © 2011-2022 走看看