zoukankan      html  css  js  c++  java
  • trmd_b1_ok

      1 # -*- coding:utf-8 -*-
      2 '''
      3 从11c开始提取
      4 '''
      5 import re
      6 import numpy as np
      7 import os
      8 year = '17A'
      9 ss="./data/edmd/"
     10 # filename=ss+"/EDMDI1.17A"
     11 def get_tag():
     12     try:
     13         os.rename(ss+"/EDMDI1.17A",ss+"/EDMDI1.txt")
     14     except:
     15         pass
     16     f1=open(ss+"/EDMDI1.txt")
     17     p1=re.compile(r"^(?:s{3}|Xs{2}|Ws{2})([A-Z]{6})s.+
    ")
     18     list_tag=list()
     19     for line in f1.readlines():
     20         # print(line)
     21         match1=re.findall(p1,line)
     22         # print(match1)
     23         if match1:
     24             for j in match1:
     25                 list_tag.append(j)
     26     # filename_w1= ss+'%s'%list_tag[MM]
     27     print(list_tag)
     28     return list_tag
     29 def trmd_b1_nonote(list_tag):
     30     if not os.path.exists('./data/edmd/new/'):
     31         os.makedirs('./data/edmd/new/')
     32 
     33     for MM in range(len(list_tag)):
     34         try:
     35             os.rename(ss+'%s_D.17A'%list_tag[MM],ss+'%s.txt'%list_tag[MM])
     36         except:
     37             break
     38 
     39         filename_w= ss+'new/%s_w.txt'%list_tag[MM]
     40         if os.path.exists(filename_w):
     41             os.remove(filename_w)
     42         # import os
     43 
     44         # os.rename('./data/CODECO_D.02A','./data/CODECO_D.txt')
     45         filename_r = ss+'%s.txt'%list_tag[MM]  # txt文件和当前脚本在同一目录下,所以不用写具体路径
     46         #00010   UNH Message header      M   1
     47         pattern1   =  re.compile(r"(^d{4,5})s{3}[A-Z]{3}.+[CM]s{3}d*s{1,}|{0,}
    ")#00010
     48         pattern1_2 =  re.compile(r"^d{4,5}s{3}([A-Z]{3}).+[CM]s{3}d*s{1,}|{0,}
    ")#UNH
     49         #pattern1_3 =  re.compile(r"^d{5}s{3}[A-Z]{3}(.+)[CM]s{3}d*s{1,}|{0,}
    ")#Message header
     50         pattern1_4 =  re.compile(r"^d{4,5}s{3}[A-Z]{3}.+([CM])s{3}d*s{1,}|{0,}
    ")#C
     51         pattern1_5 =  re.compile(r"^d{4,5}s{3}[A-Z]{3}.+[CM]s{3}(d*)s{1,}|{0,}
    ")#1
     52         #pattern2 = re.compile(r"^d{5}.+Segmentsgroups(d)*.+[CM]s{3}d*-++
    " )#+结尾
     53         #00050       ---- Segment group 1  ------------------ C   9----------------+
     54         pattern4_1 = re.compile(r"(^d{4,5}).+Segmentsgroupsd*.+[CM]s{3}d*.+
    ")
     55         pattern4_2 = re.compile(r"^d{4,5}.+Segmentsgroups(d*).+[CM]s{3}d*.+
    ")
     56         pattern4_3 = re.compile(r"^d{4,5}.+Segmentsgroupsd*.+([CM])s{3}d*.+
    ")
     57         pattern4_4 = re.compile(r"^d{4,5}.+Segmentsgroupsd*.+[CM]s{3}(d*).+
    ")
     58         #匹配每组的单独结尾的一行即没有Segment group的以+、+|、+||、+|||……结尾的的每个字段
     59         #如00280   RNG Range details                            C   1---------------+|
     60         pattern5_1 = re.compile(r"(^d{4,5})s{3}[A-Z]{3}.+[CM]s{3}d*-++{1,10}|{0,20}
    " )
     61         pattern5_2 = re.compile(r"^d{4,5}s{3}([A-Z]{3}).+[CM]s{3}d*-++{1,10}|{0,20}
    " )
     62         pattern5_3 = re.compile(r"^d{4,5}s{3}[A-Z]{3}.+([CM])s{3}d*-++{1,10}|{0,20}
    " )
     63         pattern5_4 = re.compile(r"^d{4,5}s{3}[A-Z]{3}.+[CM]s{3}(d*)-++{1,10}|{0,20}
    " )
     64         #以下是确定层级关系
     65         #匹配每组的单独结尾的一行即没有Segment group的以+、+|、+||、+|||……结尾的
     66         pattern5 = re.compile(r"^d{5}s{3}[A-Z]{3}.+[CM]s{3}d*-++|{0,10}
    " )
     67         #匹配每组的开头一行即有Segment group的以+、+|、+||、+|||……结尾的
     68         pattern2_1 = re.compile(r"^d{5}.+Segmentsgroups(d*).+[CM]s{3}d*-++
    " )#+结尾
     69         pattern2_2 = re.compile(r"^d{5}.+Segmentsgroups(d*).+[CM]s{3}d*-++|
    " )#+|结尾
     70         pattern2_3 = re.compile(r"^d{5}.+Segmentsgroups(d*).+[CM]s{3}d*-++||
    " )#+||结尾
     71         pattern2_4 = re.compile(r"^d{5}.+Segmentsgroups(d*).+[CM]s{3}d*-++|||
    " )
     72         pattern2_5 = re.compile(r"^d{5}.+Segmentsgroups(d*).+[CM]s{3}d*-++||||
    " )
     73         pattern2_6 = re.compile(r"^d{5}.+Segmentsgroups(d*).+[CM]s{3}d*-++|||||
    " )
     74         pattern2_7 = re.compile(r"^d{5}.+Segmentsgroups(d*).+[CM]s{3}d*-++||||||
    " )
     75         #匹配有同时多个组同时结束的情况,即以++、++|、++||……++、++|、++||……等结尾的
     76         pattern3_1 = re.compile(r"^d{5}.+[CM]s{3}d*-++{2}|{0,20}
    ")# 匹配++、++|、++||……等结尾
     77         pattern3_2 = re.compile(r"^d{5}.+[CM]s{3}d*-++{3}|{0,20}
    ")# 匹配+++、+++|、+++||……等结尾
     78         pattern3_3 = re.compile(r"^d{5}.+[CM]s{3}d*-++{4}|{0,20}
    ")
     79         pattern3_4 = re.compile(r"^d{5}.+[CM]s{3}d*-++{5}|{0,20}
    ")
     80         pattern3_5 = re.compile(r"^d{5}.+[CM]s{3}d*-++{6}|{0,20}
    ")
     81         pattern3_6 = re.compile(r"^d{5}.+[CM]s{3}d*-++{7}|{0,20}
    ")
     82 
     83 
     84         flag = 0
     85         #listgr中第一个不为0的点
     86         pos = -1
     87         listgr =[0,0,0,0,0,0,0,0,0,0]
     88 
     89         fr = open(filename_r)
     90         w2 = open(filename_w,'a')#a代表追加 w代表重写
     91         # w2.write("code_pos,parent,TRSD_tag,year,list_tag[MM],S,R")
     92         for line in fr.readlines():
     93             matcher1 = re.findall(pattern1,line)
     94             matcher1_2 = re.findall(pattern1_2,line)
     95             #matcher1_3 = re.findall(pattern1_3,line)
     96             matcher1_4 = re.findall(pattern1_4,line)
     97             matcher1_5 = re.findall(pattern1_5,line)
     98             matcher2_1 = re.findall(pattern2_1,line)
     99             matcher2_2 = re.findall(pattern2_2,line)
    100             matcher2_3 = re.findall(pattern2_3,line)
    101             matcher2_4 = re.findall(pattern2_4,line)
    102             matcher2_5 = re.findall(pattern2_5,line)
    103             matcher2_6 = re.findall(pattern2_6,line)
    104             matcher2_7 = re.findall(pattern2_7,line)
    105             matcher3_1 = re.findall(pattern3_1,line)
    106             matcher3_2 = re.findall(pattern3_2,line)
    107             matcher3_3 = re.findall(pattern3_3,line)
    108             matcher3_4 = re.findall(pattern3_4,line)
    109             matcher3_5 = re.findall(pattern3_5,line)
    110             matcher3_6 = re.findall(pattern3_6,line)
    111             matcher4_1 = re.findall(pattern4_1,line)
    112             matcher4_2 = re.findall(pattern4_2,line)
    113             matcher4_3 = re.findall(pattern4_3,line)
    114             matcher4_4 = re.findall(pattern4_4,line)
    115             matcher5   = re.findall(pattern5,line)
    116             matcher5_1 = re.findall(pattern5_1,line)
    117             matcher5_2 = re.findall(pattern5_2,line)
    118             matcher5_3 = re.findall(pattern5_3,line)
    119             matcher5_4 = re.findall(pattern5_4,line)
    120 
    121             if matcher4_1!=[]:
    122                 w2.write("
    ")
    123                 for j in matcher4_1:
    124                     for k in j:
    125                         w2.write(k)
    126             if matcher4_2!=[]:
    127                 w2.write(",")
    128                 #写入parent列
    129                 if pos!= -1:
    130                     numgr =listgr[pos]
    131                 else:
    132                     numgr = 0
    133                 if numgr ==0:
    134                     w2.write("SG0,")
    135                 else:
    136                     w2.write("SG"+str(numgr)+",")
    137                 for j in matcher4_2:
    138                     for k in j:
    139                         w2.write("SG"+str(k))
    140             if matcher4_3!=[]:
    141                 flag = 3
    142                 w2.write(",")
    143                 #默认写入year,list_tag[MM]两列
    144                 w2.write(year+","+list_tag[MM]+",")
    145                 for j in matcher4_3:
    146                     for k in j:
    147                         w2.write(k)
    148             if matcher4_4!=[]:
    149                 w2.write(",")
    150                 for j in matcher4_4:
    151                     for k in j:
    152                         w2.write(k)
    153             if matcher5_1!=[]:
    154                 w2.write("
    ")
    155                 for j in matcher5_1:
    156                     for k in j:
    157                         w2.write(k)
    158             if matcher5_2!=[]:
    159                 w2.write(",")
    160                 #写入parent列
    161                 if pos!= -1:
    162                     numgr =listgr[pos]
    163                 else:
    164                     numgr = 0
    165                 if numgr ==0:
    166                     w2.write("SG0,")
    167                 else:
    168                     w2.write("SG"+str(numgr)+",")
    169                 for j in matcher5_2:
    170                     for k in j:
    171                         w2.write(k)
    172             if matcher5_3!=[]:
    173                 flag = 3
    174                 w2.write(",")
    175                 #默认写入year,list_tag[MM]两列
    176                 w2.write(year+","+list_tag[MM]+",")
    177                 for j in matcher5_3:
    178                     for k in j:
    179                         w2.write(k)
    180             if matcher5_4!=[]:
    181                 w2.write(",")
    182                 for j in matcher5_4:
    183                     for k in j:
    184                         w2.write(k)
    185             #确定层级关系,也就是确定listgr
    186             if(matcher5!=[]):
    187                 for i in listgr:
    188                     if i==0:
    189                         pos = listgr.index(i)-1
    190                         break
    191                 listgr[pos]=0
    192             if (matcher2_1!=[]):
    193                 # print "2_1"
    194                 for j in matcher2_1:
    195                     # print j
    196                     if(listgr[0]==0):
    197                         listgr[0]=j
    198                     else:
    199                         listgr[0]=0
    200                 # print listgr
    201             if (matcher2_2!=[]):
    202                 for j in matcher2_2:
    203                     #numgr_d = j
    204                     if(listgr[1]==0):
    205                         listgr[1]=j
    206                     else:
    207                         listgr[1]=0
    208             if (matcher2_3!=[]):
    209                 for j in matcher2_3:
    210                     if(listgr[2]==0):
    211                         listgr[2]=j
    212                     else:
    213                         listgr[2]=0
    214             if (matcher2_4!=[]):
    215                 for j in matcher2_4:
    216                     if(listgr[3]==0):
    217                         listgr[3]=j
    218                     else:
    219                         listgr[3]=0
    220             if (matcher2_5!=[]):
    221                 for j in matcher2_5:
    222                     if(listgr[4]==0):
    223                         listgr[4]=j
    224                     else:
    225                         listgr[4]=0
    226             if (matcher2_6!=[]):
    227                 for j in matcher2_6:
    228                     if(listgr[5]==0):
    229                         listgr[5]=j
    230                     else:
    231                         listgr[5]=0
    232             if (matcher2_7!=[]):
    233                 for j in matcher2_7:
    234                     if(listgr[6]==0):
    235                         listgr[6]=j
    236                     else:
    237                         listgr[6]=0
    238             if (matcher3_1!=[]):
    239                 for i in listgr:
    240                     if i==0:
    241                         pos = listgr.index(i)-1
    242                         break
    243                 listgr[pos]=0
    244                 listgr[pos-1]=0
    245             if (matcher3_2!=[]):
    246                 for i in listgr:
    247                     if i==0:
    248                         pos = listgr.index(i)-1
    249                         break
    250                 for k in range((pos-2),(pos+1)):
    251                     listgr[k]=0
    252             if (matcher3_3!=[]):
    253                 for i in listgr:
    254                     if i==0:
    255                         pos = listgr.index(i)-1
    256                         break
    257                 for k in range((pos-3),(pos+1)):
    258                     listgr[k]=0
    259             if (matcher3_4!=[]):
    260                 for i in listgr:
    261                     if i==0:
    262                         pos = listgr.index(i)-1
    263                         break
    264                 for k in range(pos-4,pos+1):
    265                     listgr[k]=0
    266             if (matcher3_5!=[]):
    267                 for i in listgr:
    268                     if i==0:
    269                         pos = listgr.index(i)-1
    270                         break
    271                 for k in range(pos-5,pos+1):
    272                     listgr[k]=0
    273             if (matcher3_6!=[]):
    274                 for i in listgr:
    275                     if i==0:
    276                         pos = listgr.index(i)-1
    277                         break
    278                 for k in range(pos-6,pos+1):
    279                     listgr[k]=0
    280              #确定层级关系结束
    281             if (matcher1!=[]):
    282                 flag = 1
    283                 w2.write("
    ")
    284                 for j in matcher1:
    285                     for k in j:
    286                         w2.write(k)
    287             #print listgr
    288             #判断当前lit不为0的位置
    289             for i in listgr:
    290                 if i==0:
    291                     pos = listgr.index(i)-1
    292                     break
    293             if matcher1_2!=[]:
    294                 flag = 2
    295                 w2.write(",")
    296                 #写入parent列
    297                 if pos!= -1:
    298                     numgr =listgr[pos]
    299                 else:
    300                     numgr = 0
    301                 if numgr ==0:
    302                     w2.write("SG0,")
    303                 else:
    304                     w2.write("SG"+str(numgr)+",")
    305                 for j in matcher1_2:
    306                     for k in j:
    307                         w2.write(k)
    308         #    if matcher1_3!=[]:
    309         #        flag = 3
    310         #        w2.write(",")
    311         #        for j in matcher1_3:
    312         #            for k in j:
    313         #                w2.write(k)
    314             if matcher1_4!=[]:
    315                 flag = 4
    316                 w2.write(",")
    317                 #默认写入year,list_tag[MM]两列
    318                 w2.write(year+","+list_tag[MM]+",")
    319                 for j in matcher1_4:
    320                     for k in j:
    321                         w2.write(k)
    322             if ((matcher1_5!=[])and(flag ==4)):
    323                 flag = 5
    324                 w2.write(",")
    325                 for j in matcher1_5:
    326                     for k in j:
    327                         w2.write(k)
    328         w2.close()
    329         fr.close()
    330 def trmd_b1_note(list_tag):
    331     for MM in range(len(list_tag)):
    332         filename_r = ss+'%s.txt'%list_tag[MM]
    333         filename_w =  ss+'new/%s_wnote.txt'%list_tag[MM]
    334         if os.path.exists(filename_w):
    335             os.remove(filename_w)
    336 
    337         fr = open(filename_r)
    338         w2 = open(filename_w,'a')
    339         m=0
    340         for line in fr.readlines():
    341             list1 = [3,6,9,12,15,18,21,24,27,30]
    342             for i in range(10):
    343                 k = list1[i]
    344                 # print k
    345                 pattern1 = re.compile(r"^(d{4,5})s{"+str(k)+"}[^ ].+
    ")
    346                 matcher1 = re.findall(pattern1,line)
    347                 if matcher1!=[]:
    348                     flag = 1
    349                     m = k
    350                     # print m
    351                     w2.write(""
    ")
    352                     # for j in matcher1:
    353                     #     w2.write(j)
    354                     flag = 1
    355                     w2.write(""")
    356                     break
    357             v = m+5
    358             #print v
    359             pattern2 = re.compile(r"^s{"+str(v)+"}([^ ].+)
    ")
    360             matcher2 = re.findall(pattern2,line)
    361             if (matcher2!=[]):
    362                 for j in matcher2:
    363                     w2.write(j)
    364                 w2.write(" ")
    365                 #防止匹配到下面结构中的行
    366             pattern3 = re.compile(r"(:?4.3s{4}Messagesstructure)|(:?Poss+TagsNames+Ss+R)")
    367             matcher3 = re.findall(pattern3,line)
    368             if (matcher3!=[]):
    369                 break
    370         w2.write(""")
    371         w2.close( )
    372         #把第一行的“修改为note
    373         old_file=filename_w
    374         fopen=open(old_file,'r')
    375         w_str=""
    376         i =0
    377         for line in fopen:
    378             i =i+1
    379             if ((re.search(""",line)) and (i ==1)):
    380                     line=re.sub('"','code_pos,note',line)
    381                     w_str+=line
    382             else:
    383                     w_str+=line
    384         # print w_str
    385         wopen=open(old_file,'w')
    386         wopen.write(w_str)
    387         fopen.close()
    388         wopen.close()
    389 def join(list_tag):
    390     for MM in range(len(list_tag)):
    391         f1 = open(ss+'new/%s_w.txt'%list_tag[MM])
    392         f2 = open(ss+'new/%s_wnote.txt'%list_tag[MM])
    393 
    394 
    395         list_note=[]
    396         for line1 in f1:
    397             # print(line1)
    398 
    399             list_note.append(line1)
    400              
    401         f1.close()
    402 
    403         # print(list_note)
    404         f2_w= open(ss+'new/b1%s.csv'%year,'a')  
    405         # for i in range(len(list_note)):
    406         j=0
    407             # f2_r = open(ss+'/new/%s_w.txt'%list_tag[MM])
    408         for line2 in f2:
    409 
    410             str11="%s,%s
    "%(list_note[j].strip('
    '),line2.strip('
    '))
    411             j=j+1
    412             # print(i)
    413             # print(str11)
    414             f2_w.write(str11)
    415 
    416 
    417          
    418         f2.close()
    419     f2_w.close()
    420 
    421 
    422   
    423 if __name__ == '__main__':
    424     list_tag=get_tag()
    425     trmd_b1_nonote(list_tag)
    426     trmd_b1_note(list_tag)
    427     join(list_tag) 
    428 
    429 """
    430     特殊情况
    431 
    432 
    433 
    434     """
  • 相关阅读:
    遗传算法在自动组卷中的应用
    MYSQL添加远程用户或允许远程访问
    Access restriction: The method createJPEGEncoder(OutputStream) from the type JPEGCodec is not access
    Linux iptables开启80端口
    struts2
    struts2 action之间参数的传递
    struts2
    struts2 -result 中的 redirect 和 redirectAction 的 区别
    struts2
    servlet 基础
  • 原文地址:https://www.cnblogs.com/smuxiaolei/p/7427674.html
Copyright © 2011-2022 走看看