zoukankan      html  css  js  c++  java
  • python解析Nginx访问日志

    环境说明

    python3+

    pip install geoip2==2.9.0

    nginx日志配置成json格式,配置如下:

    log_format json_log '{ "time": "$time_local", '
                             '"remote_addr": "$remote_addr", '
                             '"remote_user": "$remote_user", '
                             '"body_bytes_sent": "$body_bytes_sent", '
                             '"request_time": "$request_time", '
                             '"status": "$status", '
                             '"request": "$request", '
                             '"request_method": "$request_method", '
                             '"http_referrer": "$http_referer", '
                             '"body_bytes_sent":"$body_bytes_sent", '
                             '"http_x_forwarded_for": "$http_x_forwarded_for", '
                             '"http_user_agent": "$http_user_agent"}';
    配置日志成json格式

    生成的日志如下:

    配置脚本

    #encoding: utf-8
    import os
    import sys
    import json
    from datetime import datetime
    from geoip2.database import Reader
    
    logfile = sys.argv[1]
    BASE_DIR = os.path.dirname(os.path.abspath(__file__))
    
    
    def stat_days(infile):
        """
        统计每天日志数据
        """
        day_data = {}
        with open(infile, 'r', encoding="utf-8") as fhandler:
            for line in fhandler.readlines():
                try:
                    line=line.strip('
    ')
                    #print('---------------')
                    #print(line)
                    dict_line = json.loads(line)
                
                    # ip                          datetime               method                  url                     status                      bytes                
                    #dict_line['remote_addr']  dict_line['time']  dict_line['request_method']   dict_line['request'] dict_line['status']  dict_line['body_bytes_sent']
                    #_day = datetime.strptime(dict_line['time'], '%d/%b/%Y:%H:%M:%S').strftime('%Y-%m-%d')
                    _day = '2018-11-29'
                    #设置每天的默认值
                    day_data.setdefault(_day, {'hits': 0, 'vistors': {}, 'status': {}, 'bytes': 0})
                    #设置每天出现的IP访问次数默认为0
                    day_data[_day]['vistors'].setdefault(dict_line['remote_addr'], 0)
                    #设置每天出现的状态码默认值为0
                    day_data[_day]['status'].setdefault(dict_line['status'],0)
    
                    #统计数据
                    day_data[_day]['hits'] += 1
                    day_data[_day]['vistors'][dict_line['remote_addr']] += 1
                    day_data[_day]['status'][dict_line['status']] += 1
                    day_data[_day]['bytes'] += int(dict_line['body_bytes_sent']) if dict_line['body_bytes_sent'].isdigit() else 0
                except Exception as err:
                    continue
        return sorted(day_data.items(), key=lambda x:x[0])
    
    
    def stat_total(days):
        """
        统计总数据
        """
        total_data = {'hits': 0,  'vistors': {}, 'status': {}, 'bytes': 0}
    
        for _day, _stat in days:
            total_data['hits'] += _stat['hits']
            total_data['bytes'] += _stat['bytes']
    
            for _ip, _cnt in _stat['vistors'].items():
                total_data['vistors'].setdefault(_ip, 0)
                total_data['vistors'][_ip] += _cnt
    
            for _status, _cnt in _stat['status'].items():
                total_data['status'].setdefault(_status, 0)
                total_data['status'][_status] += _cnt
        return total_data
                
    
    
    def stat_region(total_data):
        """
        统计区域
        """
        region_data = {}
        region_location = {}
    
        #打开maxmind mmdb文件
        geoip2_reader = Reader(os.path.join(BASE_DIR, 'db', 'GeoLite2-City.mmdb'))
    
        for _ip, _cnt in total_data['vistors'].items():
            try:
                _city = geoip2_reader.city(_ip)
    
                #只显示国内IP地址
                #if _city.country.names.get('zh-CN', '') != '中国':
                    #continue
                #获取国家和城市信息
                _city_name = '{}/{}'.format(_city.country.names.get('zh-CN', ''), _city.city.names.get('zh-CN', ''))
                region_data.setdefault(_city_name, 0)
    
                #统计每天城市发生访问次数
                region_data[_city_name] += _cnt
            except Exception as err:
                print(err)
    
        #关闭文件
        geoip2_reader.close()
        return region_data
    
    
    def formatSize(bytes):
        bytes = float(bytes)
        kb = bytes / 1024
        if kb >= 1024:
            M = kb /1024
            if M >= 1024:
                G = M /1024
                return "{} G".format(G)
            else:
                return "{} M".format(M)
        else:
            return "{} K".format(kb)
    
    
    def main(infile):
        """
        主程序
        """
        #获取各种统计结果
        day_data = stat_days(infile) #每天统计项
        total_data = stat_total(day_data) #总统计项
        region_data  = sorted(stat_region(total_data).items(), key=lambda x:x[1], reverse=True)
        status_data = total_data['status']
    
        access_num = total_data['hits']
        ip_num = len(total_data['vistors'])
        ip_detail = sorted(total_data['vistors'].items(), key=lambda x:x[1], reverse=True)
        traffic = formatSize(total_data['bytes'])
    
    
        print("""
        总访问量: {}
        总IP数: {}
        总流量: {}
    
        """.format(access_num, ip_num, traffic))
        print('
    -------Top 15 地区访问分布-------')
        for region in region_data[0:15]:
            print("{}:{}".format(region[0], region[1]))
    
    
        print('
    -------Top 15 ip访问-------')
        for ip in ip_detail[0:15]:
            print("{}         {}".format(ip[0], ip[1]))
    
    
        print('
    -------状态码情况-------')
        for code, cnt in status_data.items():
            print("{}   {}".format(code, cnt))
    
    
    if __name__ == "__main__":
        main(logfile)
    logganalysis.py

  • 相关阅读:
    【读书笔记】:MIT线性代数(1):Linear Combinations
    Adam Optimization Algorithm
    CSS 对齐方式
    CSS Position
    设置Table边框的CSS
    p_CreateAuditEntry
    Entity FramWork Code first 使用心得
    CRM 配置 ADFS后,使用自定义STS遇到的问题总结
    Sql Server Always On主库与附库遇到的问题
    ADFS 登录页面自定义
  • 原文地址:https://www.cnblogs.com/sellsa/p/10058790.html
Copyright © 2011-2022 走看看