zoukankan      html  css  js  c++  java
  • 统计nginx日志里每五分钟的访问量

    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    # Author:Random_lee
    import time
    import os
    import re
    
    
    class StatusPV(object):
        def __init__(self):
            self.log_path = '/opt/apache-tomcat-7.0.69/logs/'
            self.log_time = time.strftime("%Y-%m-%d")
            self.log_name = 'localhost_access_log.%s.txt'%(self.log_time)
            self.logfile = os.path.join(self.log_path, self.log_name)
    
        def get_filesize(self):
            file_size = os.path.getsize(self.logfile)
            f = open(self.logfile, 'r')
            if file_size > 1000000000:
                # 文件大小超过1G从中间读取
                f.seek(0, 2)
                self.seek= f.tell()/5
            else:
                self.seek=0
            f.close()
        def count_pv(self):
            self.get_filesize()
            if not os.path.exists(self.logfile):
                print('error:' + self.logfile + ' not existed.')
                return 0
            else:
                f = open(self.logfile, 'r')
                f.seek(self.seek,0)
                num = 0
                for line in f:
                    data = time.strftime('%d/%b/%Y:%H:', time.localtime())
                    if data in line:
                        expr = re.compile('^(?P<RemoteIP>.*) - - (?P<datatime>.*) (?P<request>".+") (?P<status>d{3}) (?P<web_size>d{1,10})')
                        # 日志格式 10.116.201.71 - - [02/Sep/2018:09:44:13 +0800] "POST /servlet/UpdateJavaFXServlet HTTP/1.0" 200 268
                        try:
                            log_info = expr.search(line)
                            log_info = log_info.groupdict()
    
                            # 解析日志信息
                            datatime = log_info["datatime"]
                            # 取出日志信息中的datatime
                            datatime = datatime.replace('[', '')
                            datatime = datatime.replace(']', '')
                            # 去掉[]
                            data_time = datatime.split(' ')[0]
                            # 取出日期时间
                            time_zone = datatime.split(' ')[1]
                            # 取出时区
                            if time_zone == '+0800':
                                # print(time_zone)
                                # print(data_time)
                                ctime = time.strptime(data_time, '%d/%b/%Y:%H:%M:%S')
                                # 转换为格式化时间 24/Aug/2018:15:42:08
                                time_stamp = time.mktime(ctime)
                                # 转换为时间戳
                                # print(time_stamp)
                                if time.time() - time_stamp <= 300:
                                    # 观测的时间间隔
                                    num += 1
                                    # print(datatime)
                                else:
                                    # print("error data_time:%s"%datatime)
                                    pass
                            else:
                                print("log format error")
                        except:
                            pass
                    else:
                        pass
                f.close()
                print(num)
    
    
    if __name__ == '__main__':
        obj_StatusPV = StatusPV()
        obj_StatusPV.count_pv()
    

      

  • 相关阅读:
    JS判断年月
    斐波那契数列
    webkit 控件供vb 6 调用,不错~
    webkit com wrapper 推荐!
    firefox usercontrol for donet
    [z] How can we render CSS3 in a WebBrowser Control ?
    A simple way to crack VBA password in Excel file
    putty教程
    Putty建立隧道的方法[z]
    Step By Step Hibernate Tutorial Using eclipse WTP[z]
  • 原文地址:https://www.cnblogs.com/randomlee/p/9490466.html
Copyright © 2011-2022 走看看