zoukankan      html  css  js  c++  java
  • 批量获取代理ip

    获取站大爷免费代理ip,然后打印出来,也可以把他存放在其他容器中

    # coding:utf-8
    import requests, re
    
    requests.packages.urllib3.disable_warnings()
    
    
    class IpPool:
        # 获取ip地址
        def getip(self):
            header = {
                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
                "Accept-Encoding": "gzip, deflate, br",
                "Accept-Language": "zh-CN,zh;q=0.9",
                "Cache-Control": "no-cache",
                "Connection": "keep-alive",
                "Cookie": "_qddac=3-4-1.2mthib.7w4yxc.kd1f5iav; __root_domain_v=.zdaye.com; _qddaz=QD.o7ezzx.wk5j9m.kc4mszqh; acw_tc=76b20f6315956673679925039e2bf4f4df78e0869342e377e87dc13e269d33; __51cke__=; _qdda=3-1.2mthib; _qddab=3-7w4yxc.kd1f5iav; _qddamta_2355087264=3-0; acw_sc__v2=5f1bf3aca8364235011db0c32cc514d94a772abe; Hm_lvt_80f407a85cf0bc32ab5f9cc91c15f88b=1593684839,1595667369,1595667382,1595667466; ASPSESSIONIDAEDCAQCD=HIDIHNOBBBHJIMGFNDDABBOF; __tins__16949115=%7B%22sid%22%3A%201595667368811%2C%20%22vd%22%3A%208%2C%20%22expires%22%3A%201595669740633%7D; __51laig__=8; Hm_lpvt_80f407a85cf0bc32ab5f9cc91c15f88b=1595667941",
                "Host": "www.zdaye.com",
                "Pragma": "no-cache",
                "Referer": "https://www.zdaye.com/dayProxy.html",
                "Sec-Fetch-Dest": "document",
                'Sec-Fetch-Mode': "navigate",
                "Sec-Fetch-Site": 'same-origin',
                "Sec-Fetch-User": "?1",
                "Upgrade-Insecure-Requests": "1",
                "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Mobile Safari/537.36",
            }
            url = 'https://www.zdaye.com/dayProxy/ip/322896.html'
            response = requests.get(url=url, headers=header, verify=False)
            response.encoding = response.apparent_encoding
            result = re.findall('<br>(.*?)@HTTP', response.text)
            print(result)
            turl = 'https://www.baidu.com'
            # 调用验证IP方法
            self.checkip(turl, result)
    
        # 验证IP并保存
        def checkip(self, url, ips):
            for ip in ips:
                try:
                    proxies = {
                        'http': 'http://%s' % ip,
                        'https': 'https://%s' % ip,
                    }
                    User_Agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
                    headers = {
                        'User-Agent': User_Agent,
                    }
                    r = requests.get(url=url, headers=headers, proxies=proxies, timeout=3)
                    if r.status_code == 200:
                        print('成功:', ip)
                        f = open('tbip.txt', 'r+', encoding='utf-8')
                        tbips = f.read()
                        tbipstoarray = tbips.split('
    ')
                        # 判断如果ip不在ip池里则加入
                        if ip not in tbipstoarray:
                            f.write(ip + '
    ')
                        f.close()
                except Exception as e:
                    print(e)
    
    
    if __name__ == '__main__':
        ippool = IpPool()
        ippool.getip()
  • 相关阅读:
    解决Too many connections问题
    TPCC-MySQL安装、使用及结果解读
    Spring事务配置
    【转】Spring事务介绍
    Spring AOP的实现原理
    Spring AOP的使用
    Spring整合Junit4进行单元测试
    Spring Bean定义的三种方式
    spring集成Redis(单机、集群)
    Redis搭建(五):Cluster集群搭建
  • 原文地址:https://www.cnblogs.com/1314h/p/14072668.html
Copyright © 2011-2022 走看看