#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:Random_lee
import time
import os
import re
class StatusPV(object):
def __init__(self):
self.log_path = '/opt/apache-tomcat-7.0.69/logs/'
self.log_time = time.strftime("%Y-%m-%d")
self.log_name = 'localhost_access_log.%s.txt'%(self.log_time)
self.logfile = os.path.join(self.log_path, self.log_name)
def get_filesize(self):
file_size = os.path.getsize(self.logfile)
f = open(self.logfile, 'r')
if file_size > 1000000000:
# 文件大小超过1G从中间读取
f.seek(0, 2)
self.seek= f.tell()/5
else:
self.seek=0
f.close()
def count_pv(self):
self.get_filesize()
if not os.path.exists(self.logfile):
print('error:' + self.logfile + ' not existed.')
return 0
else:
f = open(self.logfile, 'r')
f.seek(self.seek,0)
num = 0
for line in f:
data = time.strftime('%d/%b/%Y:%H:', time.localtime())
if data in line:
expr = re.compile('^(?P<RemoteIP>.*) - - (?P<datatime>.*) (?P<request>".+") (?P<status>d{3}) (?P<web_size>d{1,10})')
# 日志格式 10.116.201.71 - - [02/Sep/2018:09:44:13 +0800] "POST /servlet/UpdateJavaFXServlet HTTP/1.0" 200 268
try:
log_info = expr.search(line)
log_info = log_info.groupdict()
# 解析日志信息
datatime = log_info["datatime"]
# 取出日志信息中的datatime
datatime = datatime.replace('[', '')
datatime = datatime.replace(']', '')
# 去掉[]
data_time = datatime.split(' ')[0]
# 取出日期时间
time_zone = datatime.split(' ')[1]
# 取出时区
if time_zone == '+0800':
# print(time_zone)
# print(data_time)
ctime = time.strptime(data_time, '%d/%b/%Y:%H:%M:%S')
# 转换为格式化时间 24/Aug/2018:15:42:08
time_stamp = time.mktime(ctime)
# 转换为时间戳
# print(time_stamp)
if time.time() - time_stamp <= 300:
# 观测的时间间隔
num += 1
# print(datatime)
else:
# print("error data_time:%s"%datatime)
pass
else:
print("log format error")
except:
pass
else:
pass
f.close()
print(num)
if __name__ == '__main__':
obj_StatusPV = StatusPV()
obj_StatusPV.count_pv()