zoukankan      html  css  js  c++  java
  • python爬虫

    python爬虫 - 代理ip的使用

    import sys
    import time
    import hashlib
    import requests
    import urllib3
    from lxml import etree
    
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
    _version = sys.version_info
    
    is_python3 = (_version[0] == 3)
    
    orderno = "ZF20179xxxxxxxxx"
    secret = "3f9c2ecac7xxxxxxxxxxxxxxxx"
    
    ip = "forward.xdaili.cn"
    port = "80"
    
    ip_port = ip + ":" + port
    
    timestamp = str(int(time.time()))              
    string = ""
    string = "orderno=" + orderno + "," + "secret=" + secret + "," + "timestamp=" + timestamp
    
    if is_python3:                          
        string = string.encode()
    
    md5_string = hashlib.md5(string).hexdigest()                
    sign = md5_string.upper()                             
    #print(sign)
    auth = "sign=" + sign + "&" + "orderno=" + orderno + "&" + "timestamp=" + timestamp
    
    #print(auth)
    proxy = {"http": "http://" + ip_port, "https": "https://" + ip_port}
    headers = {"Proxy-Authorization": auth, "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36"}
    r = requests.get("http://2000019.ip138.com", headers=headers, proxies=proxy, verify=False,allow_redirects=False)
    r.encoding='utf8'
    print(r.status_code)
    print(r.text)
    if r.status_code == 302 or r.status_code == 301 :
        loc = r.headers['Location']
        print(loc)
        r = requests.get(loc, headers=headers, proxies=proxy, verify=False, allow_redirects=False)
        r.encoding='utf8'
        print(r.status_code)
        print(r.text)
    

      

  • 相关阅读:
    [算法] Tarjan算法求割点
    [题解] POJ2689 [质数距离]
    [题解] 「NOIP2009」最优贸易
    pyCharm更换主题、字体大小和编辑器字体颜色
    python之冒泡排序简单算法
    修改pip阿里镜像
    Postman工具简介
    接口文档解析
    HTTP接口请求
    接口(API)测试学习
  • 原文地址:https://www.cnblogs.com/iupoint/p/14079093.html
Copyright © 2011-2022 走看看