zoukankan      html  css  js  c++  java
  • 多个spider用不同的代理(指定城市的ip)

    class proxyMiddleware(object):
    def __init__(self):
    self.ip_pool_cc = []
    self.get_ip_url_cc = 'http://xxxxx'
    self.ip_pool_cq = []
    self.get_ip_url_cq = 'http://xxxxx'

    def process_request(self, request, spider):
    if "zlwmw_cc" in spider.name:
    url_test = "http://xxxxxxxxx"
    pro_addr = self.base_proxy(self.ip_pool_cc,self.get_ip_url_cc,url_test)
    request.meta['proxy'] = "http://" + pro_addr
    elif "zlwmw_cq" in spider.name:
    url_test = "http://xxxxxxxxx"
    pro_addr = self.base_proxy(self.ip_pool_cq, self.get_ip_url_cq,url_test)
    request.meta['proxy'] = "http://" + pro_addr


    def base_proxy(self,ip_pool,get_ip_url_0,url_test):
    while True:
    # time.sleep(0.1)
    if len(ip_pool) < 5:
    get_ip_url = get_ip_url_0
    ips_0 = requests.get(get_ip_url).text
    #{"code":111,"data":[],"msg":"请2秒后再试","success":false} <class 'str'>
    if "请2秒后再试" in ips_0:
    time.sleep(2)
    ips_0 = requests.get(get_ip_url).text
    if "请2秒后再试" in ips_0:
    time.sleep(4)
    ips_0 = requests.get(get_ip_url).text
    if "请2秒后再试" in ips_0:
    time.sleep(6)
    ips_0 = requests.get(get_ip_url).text
    ips = ips_0.split(' ')
    for i in ips[:-1]:
    ip_pool.append(i.strip())
    else:
    break
    pro_addr = random.choice(ip_pool)
    while True:
    # time.sleep(0.1)
    url = url_test
    proxies = {
    "http": pro_addr,
    }
    try:
    s = requests.session()
    s.keep_alive = False # 关闭多余连接
    response = s.get(url=url, proxies=proxies, timeout=2, verify=False)
    code = response.status_code
    except Exception as e:
    print(e)
    code = '0'
    if code == 200:
    return pro_addr
    break
    else:
    ip_pool.remove(pro_addr)
    while True:
    if len(ip_pool) < 5:
    get_ip_url = get_ip_url_0
    ips_0 = requests.get(get_ip_url).text
    if "请2秒后再试" in ips_0:
    time.sleep(2)
    ips_0 = requests.get(get_ip_url).text
    if "请2秒后再试" in ips_0:
    time.sleep(4)
    ips_0 = requests.get(get_ip_url).text
    if "请2秒后再试" in ips_0:
    time.sleep(6)
    ips_0 = requests.get(get_ip_url).text
    ips = ips_0.split(' ')
    for i in ips[:-1]:
    ip_pool.append(i.strip())
    else:
    break
    pro_addr = random.choice(ip_pool)
  • 相关阅读:
    PAT 甲级 1040 Longest Symmetric String
    POJ 1276 Cash Machine
    #Leetcode# 349. Intersection of Two Arrays
    #Leetcode# 922. Sort Array By Parity II
    【USACO题库】3.1.2 Score Inflation总分
    【USACO题库】3.4.4 Raucous Rockers“破锣摇滚”乐队
    【USACO题库】3.4.2 American Heritage美国血统
    【USACO题库】3.3.5 A Game游戏
    【USACO题库】3.3.4 Home on the Range家的范围
    【USACO题库】3.3.1 Riding the Fences骑马修栅栏
  • 原文地址:https://www.cnblogs.com/qiaoer1993/p/11639666.html
Copyright © 2011-2022 走看看