zoukankan      html  css  js  c++  java
  • scrapy框架中间件配置代理

    scrapy框架中间件配置代理
    import random
    #代理池
    PROXY_http = [
    '106.240.254.138:80',
    '211.24.102.168:80',
    ]
    PROXY_https =[
    '218.57.146.212:8888',
    '139.217.24.50:3128',
    ]
    class XiaohuaproDownloaderMiddleware(object):
    def process_request(self, request, spider):

    # 代理访问,配置代理池random 随机选取
    h =request.url.split(':')[0]
    if h == 'http':
    ip = random.choice(PROXY_http)
    request.meta['proxy'] = 'http://'+ip
    else:
    ip = random.choice(PROXY_https)
    request.meta['proxy'] = 'https://'+ip
    # print(request)
    return None

    #使用UA伪装配置爬取数据
    首先配置一个UA池
    user_agent_list = [
    'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    。。。#多个User-Agent
    ]
    user_agent_list = [
    'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    ]
    class XiaohuaproDownloaderMiddleware(object):
    63
    def process_request(self, request, spider):

    # 代理访问,配置代理池random 随机选取
    h =request.url.split(':')[0]
    if h == 'http':
    ip = random.choice(PROXY_http)
    request.meta['proxy'] = 'http://'+ip
    else:
    ip = random.choice(PROXY_https)
    request.meta['proxy'] = 'https://'+ip

    request.headers['User-Agent'] = random.choice(user_agent_list)
    # print(request)
    return None

  • 相关阅读:
    deb包的2种安装安装方法
    苹果全系产品信息查询
    水货的运作流程
    关于手机字库损坏的真相
    关闭IOS更新功能(ios4/5/6)
    c++ builder xe2 字符串转日期
    《windows核心编程》 18章 堆
    《windows核心编程》 17章 内存映射文件
    使用内存映射文件来共享数据
    <转>C++位运算详解
  • 原文地址:https://www.cnblogs.com/michael2018/p/10505745.html
Copyright © 2011-2022 走看看