zoukankan      html  css  js  c++  java
  • 采用pandas读取文件,进行自动化统计小程序

    自己完成的第二个自动化统计小程序,完成之后感觉:命名不够规范,造成可读性比较没那么好,幸好给自己很多地方都加了注释
    #coding:utf-8
    import os,sys
    import re
    import xlwt
    import xlrd
    import xlutils
    import xlutils.copy
    from xlutils.copy import copy
    import pandas as pd
    import numpy as np

    import os.path
    filedir = "E:/内部项目文档/2G SCAN/0617trc/二维码一维码同框.trc" #文件路径
    f = open(filedir,'r',errors='ignore')
    decodes = []
    time = []
    Filtertime = []
    decoderesult = []
    sheethead = ['解码次数','解码时间十六进制','解码时间十进制']
    decode = open('decode.txt', 'w')
    decodetime = open('decodetime.txt','w')
    postFilter = open("postFilter.txt","w")
    date = f.readlines() # 一次性读取所以行,并按行返回
    f.close()
    def find (keyword,file,x,y,list): #文件查找,传入参数:keyword:查找关键字,file;查找结果生成的文件名,list:查找结果生成的列表
    for line in date:
    if keyword in line:
    print(line)
    file.write(line)
    list.append(line[x:y])
    print(list)
    file.close()
    return file
    return list
    def getexcel(mylist,header):
    book = xlwt.Workbook()
    sheet = book.add_sheet("Sheet1",cell_overwrite_ok=True)
    for k in range(len(header)) : # 根据列数遍历
    sheet.write(0,k,header[k])
    i =1 # 丛第一行开始,上面一行标题已经占了
    for list in mylist: # 列表中的元组 mylist[0]
    j = 0 #J 表示列
    for data in list: #元素中的第一个索引开始 mylist[0][0]
    sheet.write(i,j,data)
    j += 1
    i += 1
    book.save("解码统计.xls")
    #if __name__ == '__main__':
    finddecode = find("number",decode,-2,-1,decodes)
    findetime = find("80800",decodetime,-4,-1,time)
    findetime = find("0x7654000",postFilter,-2,-1,Filtertime)
    time = list(filter(lambda e:e!='000'and e!='001',time))
    print(len(Filtertime))
    print(len([e for e in Filtertime if int(e,16)>8] ))
    print(time)
    print(type(time[0]))
    ''' 用pandas制作filtertime表格'''
    book2 = xlwt.Workbook()
    sh = book2.add_sheet("sheet1",cell_overwrite_ok=True)
    sh_head = ['Filtertime十六进制','Filtertime十进制']
    for k in range(len(sh_head)):
    sh.write(0,k,sh_head[k])#写表头
    for m in range(len(Filtertime)):
    j = 0
    sh.write(m+1,j,Filtertime[m])
    sh.write(m+1,j+1,int(Filtertime[m],16))
    j += 1
    m += 1
    book2.save("Filtertime统计.xls")
    df = pd.read_excel("Filtertime统计.xls")
    result1 = df['Filtertime十进制'].describe().round(0) #Filtertime 的数据统计
    print(result1)
    pd.concat([df, result1]).to_excel("Filtertime统计.xls") #将统计结果和原表格内容重新保存到 表格中
    print(len(decodes),len(time))
    if len(decodes) == len(time): # 胖次解码次数和解码时间个数一样
    decoderesult = list(zip(decodes,time))
    getexcel(decoderesult,sheethead)
    decodestatistical = xlrd.open_workbook("解码统计.xls") #打开表格
    newdecodestatistical = copy(decodestatistical) #拷贝book生成一个新的表格作为填写基础
    tabel1 = decodestatistical.sheet_by_name(u"Sheet1")#通过sheet_by_index()获取的sheet没有write()方法
    newtabel1 = newdecodestatistical.get_sheet(0) #用get_sheet()获取的sheet有write()方法
    #获取表格的行数和列数
    nclos = tabel1.ncols
    nrows = tabel1.nrows
    for i in range(1,nrows):
    newtabel1.write(i,nclos-1,int(tabel1.cell(i,1).value,16))

    newdecodestatistical.save("解码统计.xls")

    ''' 利用pandas 自动数据统计'''
    df2 = pd.read_excel("解码统计.xls")
    result2 =df2['解码时间十进制'].describe().round(0) # 解码时间的数据统计,保留1个小数点
    print(result2)
    failcount = len(df2[df2['解码次数']==0])
    print(failcount)
    result2.loc['失败次数']=failcount # 统计的结果添加一行 失败的次数
    pd.concat([df2, result2]).to_excel("解码统计.xls")
    else:
    print('解码次数和时间不一致,结果查看:解码.xls')
    write = pd.ExcelWriter(r'D:/untitled3/解码.xls') #定义一个表格作为写入,类似文件open
    date1 =pd.read_table('decode.txt',sep = ' ',header=None,engine = 'python',encoding = 'utf8') # log提取的decodenumber解码结果pandas读取
    print(date1.columns)
    x = [1,2,3,4,5,6,7,8,9] # 要删除的列索引,只保留第一列;系统时间,最后一列:解码结果
    date1.drop(date1.columns[x],axis=1,inplace=True)
    print(date1.columns)
    date1.columns=['time','decodemuner']
    print(date1)
    date1.to_excel(write,sheet_name='sheet1') # 保存到表的sheet1
    date2 =pd.read_table('decodetime.txt',sep = ' ',header=None,engine = 'python',encoding = 'utf8')#log提取的event 时间pandas读取
    print(date2.columns)
    x = [1,2,3,4,5,6,7,8,9,10,11,12] # 要删除的列索引,只保留第一列;系统时间,最后一列:0x0800时间
    date2.drop(date2.columns[x],axis=1,inplace=True)
    print(date2.columns)
    date2.columns=['time','hexevent']
    print(date2)
    date2.to_excel(write,sheet_name='sheet2') # 保存到表的sheet2
    write.save()
    write.close()
    df1 =pd.DataFrame(pd.read_excel('解码.xls',sheet_name='sheet1')) #表格转为dateFrame表达
    df2 =pd.DataFrame(pd.read_excel('解码.xls',sheet_name='sheet2'))
    print(df1)
    print(df2)
    result = pd.merge(df1,df2.loc[:,['time','hexevent']],how='left',on='time') #提取time相同的是内容,实现表格的vlookup功能
    result=result.dropna(axis=0,how='any') # 删除表中包含NAN的行
    print (result)
    evnet = result['hexevent']
    print(evnet)
    hexevent=np.array(evnet)
    print(hexevent)
    print(type(hexevent))
    decevent=[] #evet 对应十进制
    for i in range(len(hexevent)):
    hexevent[i] =hexevent[i][-3:] #取最后3位
    decevent.append(int(hexevent[i],16))
    print(decevent)
    col_name=result.columns.tolist()# 查询当前列的索
    print(col_name)
    col_name.insert(4,'解码十进制时间') #添加新的列名
    print(col_name)
    result['解码十进制时间'] = decevent
    print(result)
    tongji = result['解码十进制时间'].describe().round(0)
    print(tongji)
    failcount = len(result[result['decodemuner']==0]) # 统计界面失败的个数
    print(len)
    tongji.loc['失败次数']=failcount
    pd.concat([result,tongji]).to_excel("解码.xls")
  • 相关阅读:
    (字典树)Revenge of Fibonacci -- HDU -- 4099
    (字符串 KMP)Blue Jeans -- POJ -- 3080:
    (广搜)聪明的打字员 -- POJ --1184
    (线段树 点更新 区间求和)lightoj1112
    Jquery弹窗插件Lhgdialog的用法
    SQL Server数据库大型应用解决方案总结
    C# 使用XmlDocument类对XML文档进行操作
    反射实例【转】
    如何使用dynamic
    [C#]DataTable常用操作总结
  • 原文地址:https://www.cnblogs.com/wellons/p/13224775.html
Copyright © 2011-2022 走看看