zoukankan      html  css  js  c++  java
  • trsd_extract_EDSD_new

      1 # -*- coding:utf-8 -*-
      2 import re
      3 
      4 
      5 '''
      6 适应新版本
      7 '''
      8 
      9 
     10 year='17A'#用户自定义
     11 ss='./data/'#根目录
     12 filename = ss+'EDSD%s.txt'%year#输入文件名
     13 
     14 
     15 
     16 
     17 def trsd_nonote():
     18 
     19 
     20     p1 = r"^s{4}(?:X|W)s{2}([A-Z]{3})ss.+
    "#TCC
     21     p2 = r"s{4}(?:X|W)s{2}[A-Z]{3}ss(.+)
    "
     22     """
     23            Function: To specify information regarding the transport
     24                      such as mode of transport, means of transport,
     25                      its conveyance reference number and the
     26                      identification of the means of transport.
     27     """
     28     p3 = r"^s{7}Function:s(.+ww.)
    "
     29     p4 = r"^s{7}Function:s(.+.g.|.+[^.])
    "
     30     # p4 = r"^s{7}Function:s(.+[.g.|[^.]])
    "
     31     p5 = r"^s{17}(w.+[^.])
    "
     32     p6 = r"^s{17}(.+.)
    "
     33 
     34     #Note
     35     # p7 = r"^s{7}Note:s
    "#Note
     36     # p8= r"^s{12}([A-Z].+.)
    "#Note内容只有1行
     37     # p9 = r"^s{12}(.+[^.]|)
    "#Note内容只多行的非最后行
     38     # p10 = r"^s{12}(.+.)
    "#Note内容只多行的最后行
     39 
     40     pattern1 = re.compile(p1)
     41     pattern2 = re.compile(p2)
     42     pattern3 = re.compile(p3)
     43     pattern4 = re.compile(p4)
     44     pattern5 = re.compile(p5)
     45     pattern6 = re.compile(p6)
     46     fr = open(filename)
     47     # temp = "";
     48     flag = 0
     49     for line in fr.readlines():
     50         matcher1 = re.findall(pattern1,line)
     51         matcher2 = re.findall(pattern2,line)
     52         matcher3 = re.findall(pattern3,line)
     53         matcher4 = re.findall(pattern4,line)
     54         matcher5 = re.findall(pattern5,line)
     55         matcher6 = re.findall(pattern6,line)
     56         #print matcher
     57         w2 = open(ss+'trsd_nonote%s.txt'%year,'a')#a代表追加 w代表重写
     58         if matcher1:
     59             flag = 1
     60             w2.write("
    ")
     61             for j in matcher1:
     62                 # for k in j:
     63                     w2.write(j)
     64         if ((matcher2!=[])and(flag ==1)):
     65             flag = 2
     66             w2.write(",")
     67             for j in matcher2:
     68                 # for k in j:
     69                     w2.write(j)
     70         if ((matcher3!=[])and(flag ==2)):
     71             flag = 3
     72             #防止有逗号,用双引号括起
     73             w2.write(","")
     74             for j in matcher3:
     75                 # for k in j:
     76                     w2.write(j)
     77             w2.write(""")
     78         if ((matcher4!=[])and(flag ==2)):
     79             flag = 4
     80             w2.write(","")
     81             for j in matcher4:
     82                 # for k in j:
     83                     w2.write(j)
     84         if ((matcher5!=[])and(flag ==4 or 5)):
     85             flag = 5
     86             w2.write(" ")
     87             for j in matcher5:
     88                 # for k in j:
     89                     w2.write(j)
     90             # w2.write(""")
     91         if ((matcher6!=[])and(flag ==4 or flag==5)):
     92             flag = 6
     93             w2.write(" ")
     94             for j in matcher6:
     95                 # for k in j:
     96                     w2.write(j)
     97             w2.write(""")
     98     w2.close( )
     99 
    100 def trsd_note():
    101 
    102     p1 = r"^(?:s{7}|Xs{6}|Ws{6})([A-Z]{3})ss[A-Z].+$"#匹配1001
    103     p2 = r"^s{7}Note:s
    "#Note
    104     p3= r"^s{12}([^ ].+)
    "#Note内容
    105     p4= r"^(?:-|컴)+
    "
    106     pattern1 = re.compile(p1)
    107     pattern2 = re.compile(p2)
    108     pattern3 = re.compile(p3)
    109     pattern4 = re.compile(p4)
    110 
    111 
    112     fr = open(filename)
    113     w2 = open(ss+'trsd_note%s.txt'%year,'a')#a代表追加 w代表重写
    114     # temp = ();
    115     flag = 0
    116     flag1=0
    117     for line in fr.readlines():
    118         matcher1 = re.findall(pattern1,line)
    119         matcher2 = re.findall(pattern2,line)
    120         matcher3 = re.findall(pattern3,line)
    121         matcher4 = re.findall(pattern4,line)
    122 
    123        
    124         #print matcher
    125 
    126         if matcher1!=[]:
    127             flag = 1
    128             w2.write("
    ")
    129             # for j in matcher1:
    130                 
    131             #     w2.write(j)
    132 
    133         if ((matcher2!=[])and(flag == 1)):
    134             flag = 2
    135             flag1=1
    136             # w2.write(",")
    137         if flag1==1:
    138             if ((matcher3!=[])and(flag ==2 or 3)):
    139                 flag = 3
    140                 w2.write(" ")
    141                 for j in matcher3:
    142                     
    143                     w2.write(j)
    144             # w2.write(")
    145             if ((matcher4!=[])and(flag == 3)):
    146                 flag=0
    147                 flag1=0
    148     w2.write("
    ")
    149     w2.close( )
    150     fr.close()
    151 
    152 def join():
    153 
    154 
    155 
    156     f1= open(ss+'trsd_note%s.txt'%year)
    157     f2 =open(ss+'trsd_nonote%s.txt'%year) 
    158 
    159     list_note=[]
    160     for line1 in f1:
    161         # print(line1)
    162         if line1.isspace():
    163             list_note.append('')
    164         else:
    165             list_note.append(line1)
    166          
    167     f1.close()
    168 
    169     # print(list_note)
    170     f2_w= open(ss+'trsd%s.csv'%year,'a')  
    171     # for i in range(len(list_note)):
    172     i=0
    173         # f2_r = open(ss+'/new/%s_w.txt'%list_tag[i])
    174     for line2 in f2:
    175 
    176         str11="%s,"%s"
    "%(line2.strip('
    '),list_note[i].strip('
    '))
    177         i=i+1
    178         # print(i)
    179         # print(str11)
    180         f2_w.write(str11)
    181 
    182 
    183     f2_w.close() 
    184     f2.close()
    185 if __name__ == '__main__':
    186     trsd_nonote()
    187     trsd_note()
    188     join()
  • 相关阅读:
    剑指 Offer 47. 礼物的最大价值
    剑指 Offer 46. 把数字翻译成字符串
    剑指 Offer 50. 第一个只出现一次的字符
    剑指 Offer 42. 连续子数组的最大和
    剑指 Offer 45. 把数组排成最小的数
    剑指 Offer 44. 数字序列中某一位的数字
    Express全系列教程之(七):cookie的加密
    Express全系列教程之(六):cookie的使用
    Express全系列教程之(八):session的基本使用
    在express项目中使用redis
  • 原文地址:https://www.cnblogs.com/smuxiaolei/p/7427676.html
Copyright © 2011-2022 走看看