zoukankan      html  css  js  c++  java
  • python 批处理excel文件实现数据的提取

    import re
    import xlrd
    f1 = open("v9_c8_a3_a16.txt","w")
    f2 = open("a9_not_c8a3a16.txt","w")
    f3 = open("c8_not_v9a3a16.txt","w")
    f4 = open("a3_not_v9c8a16.txt","w")
    f5 = open("a16_not_v9c8a3.txt","w")
    def read(file, sheet_index=0):
        workbook = xlrd.open_workbook(file)
        sheet = workbook.sheet_by_index(sheet_index)
        print("工作表名称:", sheet.name, "行数:", sheet.nrows, "列数:", sheet.ncols)
        data = []
        for i in range(0, sheet.nrows):
            data.append(sheet.row_values(i))
        return data
    
    def red(text):
        with open(text, 'r') as f:
            file = f.read()
            regexp = r'MGG_d{5}'
            pat = re.compile(regexp)
            MGG_all = re.findall(pat, file)
            Mgg_unique = set(MGG_all)
            return Mgg_unique
    v9 = read(r'zhu.xlsx')
    c8 = read(r'liu.xlsx')
    a3 = red(r'ATG3.csv')
    a16 = red(r'ATG16.csv')
    def reg(data):
            regexp = r'MGG_d{5}'
            pat = re.compile(regexp)
            MGG_all = re.findall(pat, str(data))#需为string格式
            Mgg_unique = set(MGG_all)
            return Mgg_unique
    def vps9():
            return reg(v9)
    def cdk8():
            return reg(c8)
    def Atg3():
        return reg(a3)
    def Atg16():
        return reg(a16)
    def Mgg1_Mgg2():
        v9 = vps9()
        c8 = cdk8()
        a3 = Atg3()
        a16 = Atg16()
        v9_c8_a3_a16 = v9&c8&a3&a16
        v9_not_c8a3a16 = v9-(c8|a3|a16)
        c8_not_v9a3a16 = c8-(v9|a3|a16)
        a3_not_v9c8a16 = a3-(v9|c8|a16)
        a16_not_v9c8a3 = a16-(v9|a3|c8)
        return v9_c8_a3_a16, v9_not_c8a3a16, c8_not_v9a3a16, a3_not_v9c8a16,a16_not_v9c8a3
    def message():
        v9_c8_a3a16, v9_not_c8a3a16, c8_not_v9a3a16, a3_not_v9c8a16, a16_not_v9c8a3 = Mgg1_Mgg2()
        with open('magnaporthe.txt','r') as f:
            file = f.read()
            infile = file.split('>')
            for m in infile:
                for i in v9_c8_a3a16:
                    if i in m:
                        f1.write(i+' '+m)
                for i2 in v9_not_c8a3a16:
                    if i2 in m:
                        f2.write(i2+' '+m )
                for i3 in c8_not_v9a3a16:
                    if i3 in m:
                        f3.write(i3+' '+m )
                for i4 in a3_not_v9c8a16:
                    if i4 in m:
                        f4.write(i4+' '+m )
                for i5 in a16_not_v9c8a3:
                    if i5 in m:
                        f5.write(i5+' '+m )
    message()
     
  • 相关阅读:
    flask笔记(引用)
    三元表达式(列表解析+if)
    生成器
    闭包、装饰器、高阶函数
    列表的解压和取值
    模块
    时间操作
    day16 css, dom
    day 15 html(2) css,javascript,dom
    day 14 html
  • 原文地址:https://www.cnblogs.com/Zhu-Xueming/p/7732770.html
Copyright © 2011-2022 走看看