zoukankan      html  css  js  c++  java
  • scrapy框架中间件配置代理

    scrapy框架中间件配置代理
    import random
    #代理池
    PROXY_http = [
    '106.240.254.138:80',
    '211.24.102.168:80',
    ]
    PROXY_https =[
    '218.57.146.212:8888',
    '139.217.24.50:3128',
    ]
    class XiaohuaproDownloaderMiddleware(object):
    def process_request(self, request, spider):

    # 代理访问,配置代理池random 随机选取
    h =request.url.split(':')[0]
    if h == 'http':
    ip = random.choice(PROXY_http)
    request.meta['proxy'] = 'http://'+ip
    else:
    ip = random.choice(PROXY_https)
    request.meta['proxy'] = 'https://'+ip
    # print(request)
    return None

    #使用UA伪装配置爬取数据
    首先配置一个UA池
    user_agent_list = [
    'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    。。。#多个User-Agent
    ]
    user_agent_list = [
    'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    ]
    class XiaohuaproDownloaderMiddleware(object):
    63
    def process_request(self, request, spider):

    # 代理访问,配置代理池random 随机选取
    h =request.url.split(':')[0]
    if h == 'http':
    ip = random.choice(PROXY_http)
    request.meta['proxy'] = 'http://'+ip
    else:
    ip = random.choice(PROXY_https)
    request.meta['proxy'] = 'https://'+ip

    request.headers['User-Agent'] = random.choice(user_agent_list)
    # print(request)
    return None

  • 相关阅读:
    计算机网络
    一行代码实现字符串逆序输出
    移动前端开发初涉篇【2014/03/25】
    小识闭包【2013/07/18】
    [转载]IE6Bug之躲猫猫【2013/10/29】
    关于maven仓库镜像
    关于spring resttemplate超时设置
    关于springboot访问多个mysql库
    关于Java基础类型自动装箱(autoboxing)
    关于Java(JDBC连接数据库)
  • 原文地址:https://www.cnblogs.com/michael2018/p/10505745.html
Copyright © 2011-2022 走看看