zoukankan      html  css  js  c++  java
  • python -- 题目不看别人的自己写然后比较

    题目一

    '''
    编写Python脚本,分析xx.log文件,按域名统计访问次数倒序输出
    
    xx.log文件内容如下:
    https://www.sogo.com/ale.html
    https://www.qq.com/3asd.html
    https://www.sogo.com/teoans.html
    https://www.bilibili.com/2
    https://www.sogo.com/asd_sa.html
    https://y.qq.com/
    https://www.bilibili.com/1
    https://dig.chouti.com/
    https://www.bilibili.com/imd.html
    https://www.bilibili.com/
    
    输出:
    www.bilibili.com
    www.sogo.com
    www.qq.com
    y.qq.com
    dig.chouti.com
    
    '''
    import re
    
    domain_dict = {}
    with open('./visit.log','r') as fr:
        for line in fr.readlines():
            pattern = re.compile(r'(http.*?com).*')
            domain = pattern.match(line).group(1)
            if domain in domain_dict:
                domain_dict[domain] = domain_dict[domain]+1
            else:
                domain_dict[domain] = 1
    print(domain_dict)
    sorted(domain_dict.items(),key=lambda domain_dict:domain_dict[1],reverse=True)
    

    改进版,优化内存

    import re
    def buffered_read(file_opened,block_size=4096):
        while True:
            data = file_opened.read(block_size)
            if not data:
                break
            yield data
            
    domain_dict = {}        
    with open('./visit.log') as f:
        for block in buffered_read(f):
            pattern = re.compile(r'https:.*?com')
            domain_list = pattern.findall(block)
            #domain_dict = [{domain:1} for domain in domain_list]
            for key in domain_list:
                if key in domain_dict:
                    domain_dict[key] = domain_dict[key]+1
                else:
                    domain_dict[key] = 1
                    
    sorted(domain_dict.items(),key=lambda d:d[1],reverse=True)
    
    # 别人家的方法
    #第一种方式
    import re
    from collections import Counter
    with open("xx.log","r",encoding="utf-8") as f:
        data=f.read()
        res=re.findall(r"https://(.*?)/.*?",data)
        dic=Counter(res)
          
    ret=sorted(dic.items(),key=lambda x:x[1],reverse=True)
    
    for k,v in ret:
        print(v,k)
    
    #第二种方式
    dic={}
    with open("xx.log","r",encoding="utf-8") as f:
        for line in f:
            line=line.split("/")[2]
            if line not in dic:
                dic[line]=1
            else:
                dic[line]+=1
    ret=sorted(dic.items(),key=lambda x:x[1],reverse=True)
    for k,v in ret:
        print( v,k)
    
    
    如果有来生,一个人去远行,看不同的风景,感受生命的活力。。。
  • 相关阅读:
    JS 时间格式化函数
    jQuery 输入框 在光标位置插入内容, 并选中
    js Html结构转字符串形式显示
    .aspx 页面引用命名空间
    sql随机实现,sql GUID
    一个清华女大学生与一个普通二本男大学生的QQ聊天记录
    asp.net inc 的使用
    JS编码,解码. asp.net(C#)对应解码,编码
    SQL的小常识, 备忘之用, 慢慢补充.
    Js 时间间隔计算(间隔天数)
  • 原文地址:https://www.cnblogs.com/Frank99/p/9328260.html
Copyright © 2011-2022 走看看