zoukankan      html  css  js  c++  java
  • 爬虫代理的设置

    1. requests 中的设置

    import requests
    
    import random
    
    # 测试url
    url ='https://www.baidu.com'
    url2 ='https://httpbin.org/get'
    # 代理池
    proxy_pool =['138.201.223.250:31288', '196.13.208.23:8080', '91.197.132.99:53281']
    
    def get_content(url,proxy_pool):
        # proxy = '138.201.223.250:31288'
        proxy = random.choices(proxy_pool)[0]
        print('本次使用的代理为:%s'%proxy)
        proxies = {
            'https':'http://'+proxy,
            'http': 'http://' + proxy
        }
    
        try :
            res = requests.get(url=url,proxies=proxies)
            print(res.status_code)
            print(res.text)
    
        except requests.exceptions.ConnectionError as e:
    
            print('Error:',e.args)
    
    get_content(url2,proxy_pool)
    # get_content(url,proxy_pool)

    2. 在selenium 中的设置

    import random
    import requests
    from selenium import webdriver
    # 测试url
    url ='https://www.baidu.com'
    url2 ='http://httpbin.org/get'
    # 代理池
    proxy_pool =['138.201.223.250:31288', '196.13.208.23:8080', '91.197.132.99:53281']
    
    proxy = random.choices(proxy_pool)[0]
    
    chrome_options = webdriver.ChromeOptions()
    
    chrome_options.add_argument('--proxy_server=https://%s'%proxy) bro = webdriver.Chrome(chrome_options=
    chrome_options) bro.get(url)

    3. PhantomJS中的设置

    from selenium import webdriver
    
    
    url ='http://www.baidu.com'
    url2 ='http://httpbin.org/get'
    service_args =[
        '--proxy=196.13.208.23:8080',
        '--proxy_type=https'
    ]
    bro =webdriver.PhantomJS(executable_path=r'D:phantomjsinphantomjs.exe',service_args=service_args)
    
    bro.get(url)
    
    print(bro.page_source)

    待续!

  • 相关阅读:

    转:在自己的工具条中使用ArcGIS Engine提供的命令和工具
    配置Subversion Apache TortoiseSVN
    vc++2005移除自定义向导目录
    NetAdvantage2006 For ASP.NET2.0印象
    三天了!今天终于成功安装ArcIMS9.2!
    vss 去除源代码管理
    转:设计模式趣谈
    转一篇:有关项目报告
    演练:使用 Visual Studio Team Test 进行单元测试
  • 原文地址:https://www.cnblogs.com/knighterrant/p/10798366.html
Copyright © 2011-2022 走看看