zoukankan      html  css  js  c++  java
  • 统计nginx日志里每五分钟的访问量

    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    # Author:Random_lee
    import time
    import os
    import re
    
    
    class StatusPV(object):
        def __init__(self):
            self.log_path = '/opt/apache-tomcat-7.0.69/logs/'
            self.log_time = time.strftime("%Y-%m-%d")
            self.log_name = 'localhost_access_log.%s.txt'%(self.log_time)
            self.logfile = os.path.join(self.log_path, self.log_name)
    
        def get_filesize(self):
            file_size = os.path.getsize(self.logfile)
            f = open(self.logfile, 'r')
            if file_size > 1000000000:
                # 文件大小超过1G从中间读取
                f.seek(0, 2)
                self.seek= f.tell()/5
            else:
                self.seek=0
            f.close()
        def count_pv(self):
            self.get_filesize()
            if not os.path.exists(self.logfile):
                print('error:' + self.logfile + ' not existed.')
                return 0
            else:
                f = open(self.logfile, 'r')
                f.seek(self.seek,0)
                num = 0
                for line in f:
                    data = time.strftime('%d/%b/%Y:%H:', time.localtime())
                    if data in line:
                        expr = re.compile('^(?P<RemoteIP>.*) - - (?P<datatime>.*) (?P<request>".+") (?P<status>d{3}) (?P<web_size>d{1,10})')
                        # 日志格式 10.116.201.71 - - [02/Sep/2018:09:44:13 +0800] "POST /servlet/UpdateJavaFXServlet HTTP/1.0" 200 268
                        try:
                            log_info = expr.search(line)
                            log_info = log_info.groupdict()
    
                            # 解析日志信息
                            datatime = log_info["datatime"]
                            # 取出日志信息中的datatime
                            datatime = datatime.replace('[', '')
                            datatime = datatime.replace(']', '')
                            # 去掉[]
                            data_time = datatime.split(' ')[0]
                            # 取出日期时间
                            time_zone = datatime.split(' ')[1]
                            # 取出时区
                            if time_zone == '+0800':
                                # print(time_zone)
                                # print(data_time)
                                ctime = time.strptime(data_time, '%d/%b/%Y:%H:%M:%S')
                                # 转换为格式化时间 24/Aug/2018:15:42:08
                                time_stamp = time.mktime(ctime)
                                # 转换为时间戳
                                # print(time_stamp)
                                if time.time() - time_stamp <= 300:
                                    # 观测的时间间隔
                                    num += 1
                                    # print(datatime)
                                else:
                                    # print("error data_time:%s"%datatime)
                                    pass
                            else:
                                print("log format error")
                        except:
                            pass
                    else:
                        pass
                f.close()
                print(num)
    
    
    if __name__ == '__main__':
        obj_StatusPV = StatusPV()
        obj_StatusPV.count_pv()
    

      

  • 相关阅读:
    如何将SQLServer2005中的数据同步到Oracle中【转】
    八大排序算法总结[转]
    Web报表相关汇集(个人收藏)
    分析:城域网技术PBT交锋TMPLS
    Ubuntu下通过SSH远程登录服务器的方法
    关于VC代码的编写和调试(三)
    Start Starts a separate window to run a specified program or command.
    罗技劲雕鼠标移动速度过慢问题
    Microsoft XML Core Services 4.0 SP2 KB936181老是提示安装(转,不过我也遇到这个问题了)
    四川人逆境中的幽默
  • 原文地址:https://www.cnblogs.com/randomlee/p/9490466.html
Copyright © 2011-2022 走看看