zoukankan      html  css  js  c++  java
  • scrapy框架中间件配置代理

    scrapy框架中间件配置代理
    import random
    #代理池
    PROXY_http = [
    '106.240.254.138:80',
    '211.24.102.168:80',
    ]
    PROXY_https =[
    '218.57.146.212:8888',
    '139.217.24.50:3128',
    ]
    class XiaohuaproDownloaderMiddleware(object):
    def process_request(self, request, spider):

    # 代理访问,配置代理池random 随机选取
    h =request.url.split(':')[0]
    if h == 'http':
    ip = random.choice(PROXY_http)
    request.meta['proxy'] = 'http://'+ip
    else:
    ip = random.choice(PROXY_https)
    request.meta['proxy'] = 'https://'+ip
    # print(request)
    return None

    #使用UA伪装配置爬取数据
    首先配置一个UA池
    user_agent_list = [
    'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    。。。#多个User-Agent
    ]
    user_agent_list = [
    'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    ]
    class XiaohuaproDownloaderMiddleware(object):
    63
    def process_request(self, request, spider):

    # 代理访问,配置代理池random 随机选取
    h =request.url.split(':')[0]
    if h == 'http':
    ip = random.choice(PROXY_http)
    request.meta['proxy'] = 'http://'+ip
    else:
    ip = random.choice(PROXY_https)
    request.meta['proxy'] = 'https://'+ip

    request.headers['User-Agent'] = random.choice(user_agent_list)
    # print(request)
    return None

  • 相关阅读:
    Leetcode 15
    setjmp和longjmp重复使用的问题
    linux的下两种定时器实现
    linux中的信号处理
    【推荐软件】ack
    《代码大全》阅读心得二
    更换svn diff为vimdiff
    unpack的一点使用问题
    【lua】table是否为空的判断
    vi复制部分字符
  • 原文地址:https://www.cnblogs.com/michael2018/p/10505745.html
Copyright © 2011-2022 走看看