zoukankan      html  css  js  c++  java
  • python分析日志脚本

      1 #!/usr/bin/env python
      2 # coding:utf-8
      3 
      4 import sys,time
      5 
      6 class DisplayFormat(object):
      7 
      8     def format_size(self,size):
      9         KB = 1024                   # KB -> B  1024
     10         MB = 1048576                # MB -> B  1024 * 1024
     11         GB = 1073741824             # GB -> B  1024 * 1024 * 1024
     12         TB = 1099511627776          # TB -> B  1024 * 1024 * 1024
     13 
     14         if size >= TB:
     15             size = str(size >> 40) + 'T'
     16         elif size < KB:
     17             size = str(size) + 'B'
     18         elif size >= GB and size < TB:
     19             size = str(size >> 30) + 'G'
     20         elif size >= MB and size < GB:
     21             size = str(size >> 20) + 'M'
     22         else:
     23             size = str(size >> 10) + 'K'
     24 
     25         return size
     26 
     27     formatstring = '%-18s %-10s %-12s %8s %10s %10s %10s %10s %10s %10s %10s'
     28 
     29     def echo_line(self):
     30         '''输出头部横线'''
     31         print self.formatstring % ('-'*15,'-'*10,'-'*12,'-'*12,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,)
     32 
     33     def echo_head(self):
     34         '''输出头部信息'''
     35         print self.formatstring % ('IP','Traffic','Time','Time%',200,404,403,503,500,302,304)
     36 
     37     def echo_error(self):
     38         '''输出错误信息'''
     39         print 'Usage: ' + sys.argv[0] + 'filepath [number]'
     40 
     41     def echo_time(self):
     42         '''输出脚本执行时间'''
     43         print 'The script is running %s second' % time.clock()
     44 
     45 
     46 class HostInfo(object):
     47 
     48     # 定义一个主机ip 的所有状态列表
     49     host_info = ['200','404','403','503','500','302','304','size','time']
     50 
     51     def __init__(self,host):
     52         '''初始化一个主机信息字典'''
     53         self.host = host = {}.fromkeys(self.host_info,0)
     54 
     55     def add_1(self,status_size,is_size):
     56         '''对访问次数,http返回的状态码,ip流量进行加1操作'''
     57         if status_size == 'time':
     58             self.host['time'] += 1
     59         elif is_size:
     60             self.host['size'] = self.host['size'] + status_size
     61         else:
     62             self.host[status_size] += 1
     63 
     64     def get_value(self,value):
     65         '''取出字典的值'''
     66         return self.host[value]
     67 
     68 
     69 class AnalysisFile(object):
     70 
     71     def __init__(self):
     72         '''初始化一个空字典'''
     73         self.empty = {}
     74         self.total_request_time,self.total_traffic,self.total_200,
     75         self.total_404,self.total_403,self.total_503,self.total_500,
     76         self.total_302,self.total_304 = 0,0,0,0,0,0,0,0,0
     77 
     78     def split_line_todict(self,line):
     79         '''传入文件的每一行取出0、8、9字段 生成字典 并返回这个字典'''
     80         line_split = line.split()
     81         line_dict = {'remote_host':line_split[0],'status':line_split[8],'bytes_sent':line_split[9]}
     82         return line_dict
     83 
     84     def read_log(self,logs):
     85         for line in logs:
     86             try:
     87                 dict_line = self.split_line_todict(line)
     88                 host = dict_line['remote_host']
     89                 status = dict_line['status']
     90             except ValueError:
     91                 continue
     92             except IndexError:
     93                 continue
     94 
     95             if host not in self.empty:
     96                 host_info_obj = HostInfo(host)
     97                 self.empty[host] = host_info_obj
     98             else:
     99                 host_info_obj = self.empty[host]
    100 
    101             host_info_obj.add_1('time',False)
    102 
    103             if status in host_info_obj.host_info:
    104                 host_info_obj.add_1(status,False)
    105 
    106             try:
    107                 bytes_sent = int(dict_line['bytes_sent'])
    108             except ValueError:
    109                 bytes_sent = 0
    110 
    111             host_info_obj.add_1(bytes_sent,True)
    112 
    113         return self.empty
    114 
    115     def return_sorted_list(self,true_dict):
    116         '''循环读取字典,计算总的流量、总的访问次数以及总的http返回码'''
    117         for host_key in true_dict:
    118             host_value = true_dict[host_key]
    119             time = host_value.get_value('time')
    120             self.total_request_time = self.total_request_time + time
    121             size = host_value.get_value('size')
    122             self.total_traffic = self.total_traffic + size
    123 
    124             # 获取http返回状态码的次数
    125             v_200 = host_value.get_value('200')
    126             v_404 = host_value.get_value('404')
    127             v_403 = host_value.get_value('403')
    128             v_503 = host_value.get_value('503')
    129             v_500 = host_value.get_value('500')
    130             v_302 = host_value.get_value('302')
    131             v_304 = host_value.get_value('304')
    132 
    133             # 重新规划字典
    134             true_dict[host_key] = {'200':v_200,'404':v_404,'403':v_403,
    135                                    '503':v_503,'500':v_500,'302':v_302,
    136                                    '304':v_304,'size':size,'time':time}
    137 
    138 
    139             # 计算http返回状态码的总量
    140             self.total_200 = self.total_200 + v_200
    141             self.total_404 = self.total_404 + v_404
    142             self.total_403 = self.total_403 + v_403
    143             self.total_503 = self.total_503 + v_503
    144             self.total_500 = self.total_500 + v_500
    145             self.total_302 = self.total_302 + v_302
    146             self.total_304 = self.total_304 + v_304
    147 
    148                 # 对总的访问次数和访问流量进行降序排序,并生成一个有序的列表
    149         sorted_list = sorted(true_dict.items(),key=lambda i:(i[1]['size'],
    150                                                                  i[1]['time']),reverse=True)
    151 
    152         return sorted_list
    153 
    154 
    155 class Main(object):
    156 
    157     def main(self):
    158         '''主调函数'''
    159         # 初始化DisplayFormat类的实例
    160         displayformat = DisplayFormat()
    161 
    162         args = len(sys.argv)
    163         if args == 1:
    164             displayformat.echo_error()
    165         elif args == 2 or args == 3:
    166             log_file = sys.argv[1]
    167             try:
    168                 files = open(log_file,'r')
    169                 if args == 3:
    170                     lines = int(sys.argv[2])
    171                 else:
    172                     lines = 0
    173             except IOError,e:
    174                 print
    175                 print e
    176                 displayformat.echo_error()
    177             except VaueError,e:
    178                 print
    179                 print e
    180                 displayformat.echo_error()
    181 
    182         else:
    183             displayformat.echo_error()
    184 
    185 
    186         #AnalysisFile类的实例化
    187         fileanalysis = AnalysisFile()
    188 
    189         # 调用read_log方法
    190         news_dict = fileanalysis.read_log(files)
    191 
    192         # 调用return_sorted_list方法
    193         new_list = fileanalysis.return_sorted_list(news_dict)
    194 
    195         # 计算所有ip的总量
    196         total_ip = len(new_list)
    197 
    198         if lines:
    199             new_list = new_list[0:lines]
    200         files.close()
    201 
    202         # 打印出总的ip数,总访问流量,总的访问次数
    203         print
    204         total_request_time = fileanalysis.total_request_time
    205         total_traffic = displayformat.format_size(fileanalysis.total_traffic)
    206         print '总IP数量: %s    总的访问流量: %s    总的请求次数: %d' % (total_ip,
    207                                                                    total_traffic,
    208                                                                    total_request_time)
    209         
    210         # 打印头部信息,和横线                                                                      
    211         print
    212         displayformat.echo_head()
    213         displayformat.echo_line()
    214 
    215         # 循环读取news_list列表取出time项目 计算time百分比 通过displayformat格式化输出主机信息
    216         for i in new_list:
    217             time = i[1]['time']
    218             time_percentage = (float(time) / float(fileanalysis.total_request_time)) * 100
    219             print displayformat.formatstring % (i[0],
    220                                                 displayformat.format_size(i[1]['size']),
    221                                                 time,str(time_percentage)[0:5],
    222                                                 i[1]['200'],i[1]['404'],i[1]['403'],
    223                                                 i[1]['503'],i[1]['500'],i[1]['302'],i[1]['304'])
    224 
    225         if not lines or total_ip == lines:
    226             displayformat.echo_line()
    227             print displayformat.formatstring % (total_ip,total_traffic,total_request_time,'100%',
    228                                                 fileanalysis.total_200,fileanalysis.total_404,
    229                                                 fileanalysis.total_403,fileanalysis.total_503,
    230                                                 fileanalysis.total_500,fileanalysis.total_302,
    231                                                 fileanalysis.total_304)
    232 
    233         # 显示执行脚本的时间
    234         print
    235         displayformat.echo_time()
    236 
    237 if __name__ == '__main__':
    238     main = Main()
    239     main.main()
  • 相关阅读:
    时间日期date/cal
    chown命令
    su命令
    which命令和bin目录
    python基础之文件操作
    python之模块之shutil模块
    python基础之面向对象01
    python基础之面向对象02
    python基础之map/reduce/filter/sorted
    python基础之模块之序列化
  • 原文地址:https://www.cnblogs.com/zenghui940/p/4228844.html
Copyright © 2011-2022 走看看