zoukankan      html  css  js  c++  java
  • python 爬虫 user-agent 生成

    有些网站做了反爬技术,如:比较初级的通过判断请求头部中的user-agent字段来检测是否通过浏览器访问的。

    在爬这类网站时需要模拟user-agent

    import random
    import re
    from typing import Dict, List
    
    
    class UserAgent:
    
        '''
        代理
        '''
        __filepath = 'user-agent.txt'
    
        '''
        对象实例
        '''
        __instance = None
    
        '''
        代理浏览器
        '''
        __dict: Dict[str, list] = {}
    
        '''
        代理浏览器
        '''
        __list: List[str] = []
    
        '''
        初始化
        '''
    
        def __init__(self):
            reg = re.compile(r'firefox|chrome|msie|opera', re.I)
            with open(self.__filepath, 'r', encoding='utf_8_sig') as f:
                for r in f:
                    result = reg.search(r) and reg.search(r).group().lower()
                    if result and (not result in self.__dict):
                        self.__dict[result] = []
                    result and self.__dict[result].append(r.strip())
                    self.__list.append(r.strip())
    
        '''
        单例 - 构造函数
        '''
        def __new__(cls):
            if not cls.__instance:
                cls.__instance = super(UserAgent, cls).__new__(cls)
            return cls.__instance
    
        '''
        谷歌
        '''
        @property
        def chrome(self) -> str:
            return random.choice(self.__dict['chrome'])
    
        '''
        火狐
        '''
        @property
        def firefox(self) -> str:
            return random.choice(self.__dict['firefox'])
    
        '''
        IE
        '''
        @property
        def ie(self) -> str:
            return random.choice(self.__dict['msie'])
    
        '''
        Opera 浏览器
        '''
        @property
        def opera(self) -> str:
            return random.choice(self.__dict['opera'])
    
        '''
        随机
        '''
    
        def random(self) -> str:
            return random.choice(self.__list)
    
    
        '''
        迭代
        '''
        def __iter__(self):
            self.__iter = iter(self.__list)
            return self
    
        '''
        下一个
        '''
        def __next__(self):
            return next(self.__iter)
    
        '''
        索引
        '''
        def __getitem__(self, index) -> str or List(str):
            return self.__list[index]
    
    
    useragent = UserAgent()
    print(useragent.random())
    
    '''
    for n in useragent:
        print(n)
    '''

    user-agent.txt

    Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36
    Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36
    Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36
    Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36
    Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36
    Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36
    Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36
    Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36
    Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36
    Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36
    Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36
    Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36
    Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36
    Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36
    Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36
    Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36
    Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36
    Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2309.372 Safari/537.36
    Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2117.157 Safari/537.36
    Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36
    Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1866.237 Safari/537.36
    Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/4E423F
    Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36 Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10
    Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.517 Safari/537.36
    Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36
    Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36
    Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36
    Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36
    Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1623.0 Safari/537.36
    ...... Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; zh-cn) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5

    百度网盘

    链接:https://pan.baidu.com/s/1ramkIyjVSI2_GXbxypj1Dg
    提取码:hak8

  • 相关阅读:
    C语言关键字
    C语言返回值
    五、Vue:使用axios库进行get和post、用拦截器对请求和响应进行预处理、Mock(数据模拟)
    四、Vue过渡与动画、过渡css类名、自定义指定、过滤器
    三、深入Vue组件——Vue插槽slot、动态组件
    二、Vue组件(component):组件的相互引用、通过props实现父子组件互传值
    一、Vue环境搭建及基础用法
    Django(十三)状态保持 —— cookie与session+ajax异步请求+session记住登录状态+cookie记住登录名密码
    Django(十二)视图--利用jquery从后台发送ajax请求并处理、ajax登录案例
    Django(十一)视图详解:基本使用、登录实例、HttpReqeust对象、HttpResponse对象
  • 原文地址:https://www.cnblogs.com/whnba/p/11618438.html
Copyright © 2011-2022 走看看