zoukankan      html  css  js  c++  java
  • 在Scrapy中使用IP池或用户代理更新版(python3)

    middlewares.py

     1 # -*- coding: utf-8 -*-
     2 # 导入随机模块
     3 import random
     4 # 导入有关IP池有关的模块
     5 from scrapy.downloadermiddlewares.httpproxy import HttpProxyMiddleware
     6 # 导入有关用户代理有关的模块
     7 from scrapy.downloadermiddlewares.useragent import UserAgentMiddleware
     8 
     9 # IP池
    10 class HTTPPROXY(HttpProxyMiddleware):
    11     # 初始化 注意一定是 ip=''
    12     def __init__(self, ip=''):
    13         self.ip = ip
    14 
    15     def process_request(self, request, spider):
    16         item = random.choice(IPPOOL)
    17         try:
    18             print("当前的IP是:"+item["ipaddr"])
    19             request.meta["proxy"] = "http://"+item["ipaddr"]
    20         except Exception as e:
    21             print(e)
    22             pass
    23 
    24 
    25 # 设置IP池
    26 IPPOOL = [
    27     {"ipaddr": "182.117.102.10:8118"},
    28     {"ipaddr": "121.31.102.215:8123"},
    29     {"ipaddr": "1222.94.128.49:8118"}
    30 ]
    31 
    32 
    33 # 用户代理
    34 class USERAGENT(UserAgentMiddleware):
    35     #初始化 注意一定是 user_agent=''
    36     def __init__(self, user_agent=''):
    37         self.user_agent = user_agent
    38 
    39     def process_request(self, request, spider):
    40         item = random.choice(UPPOOL)
    41         try:
    42             print("当前的User-Agent是:"+item)
    43             request.headers.setdefault('User-Agent', item)
    44         except Exception as e:
    45             print(e)
    46             pass
    47 
    48 
    49 # 设置用户代理池
    50 UPPOOL = [
    51     "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393"
    52 ]

    settngs.py中添加一下代码(注意根据项目名修改指向,如这里的工程名是“的demo3”)

    1 DOWNLOADER_MIDDLEWARES = {
    2     # 'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware':123,
    3     # 'demo3.middlewares.HTTPPROXY' : 125,
    4     'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': 2,
    5     'demo3.middlewares.USERAGENT': 1
    6 }
  • 相关阅读:
    webstorm编辑器使用
    css深入理解z-index
    vue-cli安装失败问题
    html5 离线存储
    ESXI安装
    文档相似性匹配
    Hibernate基础
    云存储技术
    Signs of a poorly written jQuery plugin 翻译 (Jquery插件开发注意事项,Jquey官方推荐)
    Jquery类级别与对象级别插件开发
  • 原文地址:https://www.cnblogs.com/xiaomingzaixian/p/7125796.html
Copyright © 2011-2022 走看看