zoukankan      html  css  js  c++  java
  • tred_extract_EDED_new

      1 # -*- coding:utf-8 -*-
      2 import re
      3 
      4 
      5 '''
      6 适应新版本
      7 '''
      8 
      9 
     10 year='17a'#用户自定义
     11 ss='./data/'#根目录
     12 filename = ss+'EDED%s.txt'%year#输入文件名
     13 
     14 
     15 
     16 
     17 def tred_nonote():
     18 
     19     p1 = r"^(?:s{5}|Xs{4}|Ws{4})(dddd)ss[A-Z].+]$"#匹配1001
     20     p2 = r"^(?:s{5}|Xs{4}|Ws{4})ddddss([A-Z].+)s+[[A-Z]]$"
     21     p3 = r"^(?:s{5}|Xs{4}|Ws{4})ddddss[A-Z].+s+[([A-Z])]$"
     22     p4 = r"^s{5}Desc:s(.+ww.)
    "
     23 
     24     p5 = r"^s{5}Desc:s(.+[^.]|.+.g.)
    "#非以.结尾的Desc
     25     p6 = r"^s{11}(.+.)
    "#非以.结尾的Desc的第二行
     26     p7 = r"^s{5}Repr:s(.+)
    "#Repr
     27 
     28     pattern1 = re.compile(p1)
     29     pattern2 = re.compile(p2)
     30     pattern3 = re.compile(p3)
     31     pattern4 = re.compile(p4)
     32     pattern5 = re.compile(p5)
     33     pattern6 = re.compile(p6)
     34     pattern7 = re.compile(p7)
     35 
     36     fr = open(filename)
     37     temp = ();
     38     flag = 0
     39     for line in fr.readlines():
     40         matcher1 = re.findall(pattern1,line)
     41         matcher2 = re.findall(pattern2,line)
     42         matcher3 = re.findall(pattern3,line)
     43         matcher4 = re.findall(pattern4,line)
     44         matcher5 = re.findall(pattern5,line)
     45         matcher6 = re.findall(pattern6,line)
     46         matcher7 = re.findall(pattern7,line)
     47 
     48         w2 = open(ss+'tred_nonote%s.txt'%year,'a')#a代表追加 w代表重写
     49         if matcher1:
     50             flag = 1
     51             w2.write("
    ")
     52             for j in matcher1:
     53                 for k in j:
     54                     w2.write(k)
     55 
     56         if ((matcher2!=[])and(flag ==1)):
     57             flag = 2
     58 
     59             w2.write(",")
     60             for j in matcher2:
     61                 for k in j:
     62                     w2.write(k)
     63         if ((matcher3!=[])and(flag ==2)):
     64             flag = 3
     65             # w2.write(",")
     66             for j in matcher3:
     67                 for k in j:
     68                     w2.write(k)
     69         if ((matcher4!=[])and(flag ==3)):
     70             flag = 4
     71             w2.write(","")
     72             for j in matcher4:
     73                 for k in j:
     74                     w2.write(k)
     75             w2.write(""")
     76         if ((matcher5!=[])and(flag ==3 or 5)):
     77             flag = 5
     78             w2.write(","")
     79             for j in matcher5:
     80                 for k in j:
     81                     w2.write(k)
     82         if ((matcher6!=[])and(flag ==5)):
     83             flag = 6
     84             w2.write(" ")
     85             for j in matcher6:
     86                 for k in j:
     87                     w2.write(k)
     88             w2.write(""")
     89         if ((matcher7!=[])and(flag ==4 or 6)):
     90             flag = 7
     91             w2.write(",")
     92             for j in matcher7:
     93                 for k in j:
     94                     w2.write(k)
     95 
     96         w2.close( )
     97 
     98 
     99 def tred_note():
    100 
    101     p1 = r"^(?:s{5}|Xs{4}|Ws{4})(dddd)ss[A-Z].+]$"#匹配1001
    102     p2 = r"^s{5}Note:s
    "#Note
    103     p3= r"^s{11}([^ ].+)
    "#Note内容
    104     p4= r"^(?:-|컴)+
    "
    105     pattern1 = re.compile(p1)
    106     pattern2 = re.compile(p2)
    107     pattern3 = re.compile(p3)
    108     pattern4 = re.compile(p4)
    109 
    110 
    111     fr = open(filename)
    112     w2 = open(ss+'tred_note%s.txt'%year,'a')#a代表追加 w代表重写
    113     # temp = ();
    114     flag = 0
    115     flag1=0
    116     for line in fr.readlines():
    117         matcher1 = re.findall(pattern1,line)
    118         matcher2 = re.findall(pattern2,line)
    119         matcher3 = re.findall(pattern3,line)
    120         matcher4 = re.findall(pattern4,line)
    121 
    122        
    123         #print matcher
    124 
    125         if matcher1!=[]:
    126             flag = 1
    127             w2.write("
    ")
    128             # for j in matcher1:
    129                 
    130             #     w2.write(j)
    131 
    132         if ((matcher2!=[])and(flag == 1)):
    133             flag = 2
    134             flag1=1
    135             # w2.write(",")
    136         if flag1==1:
    137             if ((matcher3!=[])and(flag ==2 or 3)):
    138                 flag = 3
    139                 w2.write(" ")
    140                 for j in matcher3:
    141                     
    142                     w2.write(j)
    143             # w2.write(")
    144             if ((matcher4!=[])and(flag == 3)):
    145                 flag=0
    146                 flag1=0
    147     w2.write("
    ")
    148     w2.close( )
    149     fr.close()
    150 
    151 def join():
    152 
    153 
    154 
    155     f1= open(ss+'tred_note%s.txt'%year)
    156     f2 =open(ss+'tred_nonote%s.txt'%year) 
    157 
    158     list_note=[]
    159     for line1 in f1:
    160         # print(line1)
    161         if line1.isspace():
    162             list_note.append('')
    163         else:
    164             list_note.append(line1)
    165          
    166     f1.close()
    167 
    168     # print(list_note)
    169     f2_w= open(ss+'tred%s.csv'%year,'a')  
    170     # for i in range(len(list_note)):
    171     i=0
    172         # f2_r = open(ss+'/new/%s_w.txt'%list_tag[i])
    173     for line2 in f2:
    174 
    175         str11="%s,"%s"
    "%(line2.strip('
    '),list_note[i].strip('
    '))
    176         i=i+1
    177         # print(i)
    178         # print(str11)
    179         f2_w.write(str11)
    180 
    181 
    182     f2_w.close() 
    183     f2.close()
    184 if __name__ == '__main__':
    185     tred_nonote()
    186     tred_note()
    187     join()
  • 相关阅读:
    线段树
    坐标离散化
    超大背包问题
    折半枚举 双向搜索
    弹性碰撞
    反转 开关问题
    尺取法
    二分法
    最小生成树
    区间素数筛法
  • 原文地址:https://www.cnblogs.com/smuxiaolei/p/7427670.html
Copyright © 2011-2022 走看看