zoukankan      html  css  js  c++  java
  • 使用python找出nginx访问日志中访问次数最多的10个ip排序生成网页

    使用python找出nginx访问日志中访问次数最多的10个ip排序生成网页

    方法1:
    linux下使用awk命令

    # cat access1.log | awk '{print $1"  "$7"  "$9}'|sort -n|uniq -c |sort -n -r|head -10

    方法2:
    通过python处理日志

    #encoding=utf-8
    
    # 找到日志中的top 10,日志格式如下
    #txt = '''100.116.167.9 - - [22/Oct/2017:03:55:53 +0800] "HEAD /check HTTP/1.0" 200 0 "-" "-" "-" ut = 0.001'''
    
    #nodes = txt.split()
    #print 'ip:%s, url:%s, code:%s' % (nodes[0],nodes[6],nodes[8])
    
    # 统计ip,url,code的次数,并且生成字典
    def log_analysis(log_file, dpath, topn = 10):
        path=log_file
        shandle = open(path, 'r')
        count = 1
    
        log_dict = {}
    
        while True:
            line = shandle.readline()
            if line == '':
                break
            #print line
            nodes = line.split()
            #count += 1
            #if count >= 10:
            #    break
    
            # {(ip,url,code):count}当做字典的key
            #print 'ip:%s, url:%s, code:%s' % (nodes[0],nodes[6],nodes[8])
    
            # 拼凑字典,如果不存在赋值为1,如果存在则+1
            ip,url,code = nodes[0],nodes[6],nodes[8]
            if (ip, url, code) not in log_dict:
                log_dict[(ip, url, code)] = 1
            else:
                log_dict[(ip, url, code)] = log_dict[(ip, url, code)] + 1
        # 关闭文件句柄
        shandle.close()
        # 对字典进行排序
        #print log_dict
        # ('111.37.21.148', '/index', '200'): 2
        rst_list = log_dict.items()
        #print rst_list
        # 
        for j in range(10):
        # 冒泡法根据rst_list中的count排序,找出访问量最大的10个IP
            for i in range(0,len(rst_list) - 1):
                if rst_list[i][1] > rst_list[i+1][1]:
                    temp = rst_list[i]
                    rst_list[i] = rst_list[i+1]
                    rst_list[i+1] = temp
    
        need_list = rst_list[-1:-topn - 1:-1]
        # 打印出top 10访问日志,并写入网页中
        title = 'nginx访问日志'
        tbody = ''
        for i in need_list:
            tbody += '<tr>
    <td>%s</td><td>%s</td><td>%s</td><td>%s</td>
    <tr>
    ' % (i[1],i[0][0],i[0][1],i[0][2])
    
        html_tpl = '''
        <!DOCTYPE html>
        <html>
            <head>
                <meta charset="utf-8">
                <title>{title}</title>
            </head>
            <body>
                <table border="1" cellspacing="0" cellpadding="0" color='pink'>
                    <thead>
                        <tr cellspacing="0" cellpadding="0">
                            <th>访问次数</th>
                            <th>ip</th>
                            <th>url</th>
                            <th>http_code</th>
                        </tr>
                    </thead>
                    {tbody}
                </table>
            </body>
        </html>
        '''
        html_handle = open(dpath,'w')
        html_handle.write(html_tpl.format(title = title, tbody = tbody))
        html_handle.close()
    
    # 函数入口
    if __name__ == '__main__':
        # nginx日志文件
        log_file = 'access1.log'
        dpath = 'top10.html'
        # topn 表示去top多少个
        # 不传,默认10个
        topn = 10
        # log_analysis(log_file, dpath)
        log_analysis(log_file,dpath,topn)

    方法2

    # 统计nginx日志中的前十名
    
    def static_file(file_name):
        res_dict = {}
        with open(file_name) as f:
            for line in f:
                if line == '
    ':
                    continue
                # ['100.116.x.x', '-', '-', '[08/Feb/2018:14:37:13', '+0800]', '"HEAD', 
                # '/check', 'HTTP/1.0"', '200', '0', '"-"', '"-"', '"-"', 'ut', '=', '0.002']
                tmp = line.split()
                # print(tmp)
                tup = (tmp[0],tmp[8])
                # 赋值
                res_dict[tup] = res_dict.get(tup,0) + 1
        return res_dict
    
    def generate_html(rst_list):
        str_html = '<table border="1" cellpading=0 cellspacing=0>'
        str_html += "<tr><th>ip地址</th><th>状态码</th><th>次数</th></tr>"
        html_tmpl = '<tr><td>%s</td><td>%s</td><td>%s</td></tr>'
    
        for (ip, status),count in rst_list[-20:]:
            str_html += html_tmpl % (ip,status,count)
        str_html += '</table>'
        return str_html
    
    def write_to_html(html_list):
        with open('res.html', 'w') as f:
            f.write(html_list)
    
    
    def main():
        res_dict = static_file('voice20180208.log')
        res_list = sorted(res_dict.items(), key = lambda x:x[1])
        # html_content = generate_html(res_list[-10:])
        html_content = generate_html(res_list[-1:-20:-1])
        write_to_html(html_content)
    
    if __name__ == "__main__":
        main()
  • 相关阅读:
    插入排序(二)
    选择排序(一)
    (转)示例化讲解RIP路由更新机制
    Css元素居中设置
    (转)盒子概念和DiV布局
    (转)浅析CSS——元素重叠及position定位的z-index顺序
    (转)Java中的static关键字解析
    (转)字符串循环移位
    linux把某个文件拷贝到不同的目录下面
    linux中查找文件并合并文件
  • 原文地址:https://www.cnblogs.com/reblue520/p/7884810.html
Copyright © 2011-2022 走看看