zoukankan      html  css  js  c++  java
  • scrapy框架中间件配置代理

    scrapy框架中间件配置代理
    import random
    #代理池
    PROXY_http = [
    '106.240.254.138:80',
    '211.24.102.168:80',
    ]
    PROXY_https =[
    '218.57.146.212:8888',
    '139.217.24.50:3128',
    ]
    class XiaohuaproDownloaderMiddleware(object):
    def process_request(self, request, spider):

    # 代理访问,配置代理池random 随机选取
    h =request.url.split(':')[0]
    if h == 'http':
    ip = random.choice(PROXY_http)
    request.meta['proxy'] = 'http://'+ip
    else:
    ip = random.choice(PROXY_https)
    request.meta['proxy'] = 'https://'+ip
    # print(request)
    return None

    #使用UA伪装配置爬取数据
    首先配置一个UA池
    user_agent_list = [
    'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    。。。#多个User-Agent
    ]
    user_agent_list = [
    'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    ]
    class XiaohuaproDownloaderMiddleware(object):
    63
    def process_request(self, request, spider):

    # 代理访问,配置代理池random 随机选取
    h =request.url.split(':')[0]
    if h == 'http':
    ip = random.choice(PROXY_http)
    request.meta['proxy'] = 'http://'+ip
    else:
    ip = random.choice(PROXY_https)
    request.meta['proxy'] = 'https://'+ip

    request.headers['User-Agent'] = random.choice(user_agent_list)
    # print(request)
    return None

  • 相关阅读:
    Zookeeper安装-单机版
    Centos 7 安装 Redis 5
    java利用dom4j将xml字符串转换为json
    计算机科学导论笔记-计算机组成
    计算机科学导论笔记-数据运算
    计算机科学导论笔记-数据存储
    计算机科学导论笔记-数字系统
    计算机科学导论笔记
    springmvc03
    springmvc02
  • 原文地址:https://www.cnblogs.com/michael2018/p/10505745.html
Copyright © 2011-2022 走看看