zoukankan      html  css  js  c++  java
  • Scrapy源码 Request对象

    Scrapy源码 Request对象

    """
    This module implements the Request class which is used to represent HTTP
    requests in Scrapy.

    See documentation in docs/topics/request-response.rst
    """
    import six
    from w3lib.url import safe_url_string

    from scrapy.http.headers import Headers
    from scrapy.utils.python import to_bytes
    from scrapy.utils.trackref import object_ref
    from scrapy.utils.url import escape_ajax
    from scrapy.http.common import obsolete_setter
    from scrapy.utils.curl import curl_to_request_kwargs


    class Request(object_ref):

    def __init__(self, url, callback=None, method='GET', headers=None, body=None,
    cookies=None, meta=None, encoding='utf-8', priority=0,
    dont_filter=False, errback=None, flags=None, cb_kwargs=None):

    self._encoding = encoding # this one has to be set first
    self.method = str(method).upper()
    self._set_url(url)
    self._set_body(body)
    assert isinstance(priority, int), "Request priority not an integer: %r" % priority
    self.priority = priority

    if callback is not None and not callable(callback):
    raise TypeError('callback must be a callable, got %s' % type(callback).__name__)
    if errback is not None and not callable(errback):
    raise TypeError('errback must be a callable, got %s' % type(errback).__name__)
    assert callback or not errback, "Cannot use errback without a callback"
    self.callback = callback
    self.errback = errback

    self.cookies = cookies or {}
    self.headers = Headers(headers or {}, encoding=encoding)
    self.dont_filter = dont_filter

    self._meta = dict(meta) if meta else None
    self._cb_kwargs = dict(cb_kwargs) if cb_kwargs else None
    self.flags = [] if flags is None else list(flags)

    @property
    def cb_kwargs(self):
    if self._cb_kwargs is None:
    self._cb_kwargs = {}
    return self._cb_kwargs

    @property
    def meta(self):
    if self._meta is None:
    self._meta = {}
    return self._meta

    def _get_url(self):
    return self._url

    def _set_url(self, url):
    if not isinstance(url, six.string_types):
    raise TypeError('Request url must be str or unicode, got %s:' % type(url).__name__)

    s = safe_url_string(url, self.encoding)
    self._url = escape_ajax(s)

    if ':' not in self._url:
    raise ValueError('Missing scheme in request url: %s' % self._url)

    url = property(_get_url, obsolete_setter(_set_url, 'url'))

    def _get_body(self):
    return self._body

    def _set_body(self, body):
    if body is None:
    self._body = b''
    else:
    self._body = to_bytes(body, self.encoding)

    body = property(_get_body, obsolete_setter(_set_body, 'body'))

    @property
    def encoding(self):
    return self._encoding

    def __str__(self):
    return "<%s %s>" % (self.method, self.url)

    __repr__ = __str__

    def copy(self):
    """Return a copy of this Request"""
    return self.replace()

    def replace(self, *args, **kwargs):
    """Create a new Request with the same attributes except for those
    given new values.
    """
    for x in ['url', 'method', 'headers', 'body', 'cookies', 'meta', 'flags',
    'encoding', 'priority', 'dont_filter', 'callback', 'errback', 'cb_kwargs']:
    kwargs.setdefault(x, getattr(self, x))
    cls = kwargs.pop('cls', self.__class__)
    return cls(*args, **kwargs)

    @classmethod
    def from_curl(cls, curl_command, ignore_unknown_options=True, **kwargs):
    """Create a Request object from a string containing a `cURL
    <https://curl.haxx.se/>`_ command. It populates the HTTP method, the
    URL, the headers, the cookies and the body. It accepts the same
    arguments as the :class:`Request` class, taking preference and
    overriding the values of the same arguments contained in the cURL
    command.

    Unrecognized options are ignored by default. To raise an error when
    finding unknown options call this method by passing
    ``ignore_unknown_options=False``.

    .. caution:: Using :meth:`from_curl` from :class:`~scrapy.http.Request`
    subclasses, such as :class:`~scrapy.http.JSONRequest`, or
    :class:`~scrapy.http.XmlRpcRequest`, as well as having
    :ref:`downloader middlewares <topics-downloader-middleware>`
    and
    :ref:`spider middlewares <topics-spider-middleware>`
    enabled, such as
    :class:`~scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware`,
    :class:`~scrapy.downloadermiddlewares.useragent.UserAgentMiddleware`,
    or
    :class:`~scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware`,
    may modify the :class:`~scrapy.http.Request` object.

    """
    request_kwargs = curl_to_request_kwargs(curl_command, ignore_unknown_options)
    request_kwargs.update(kwargs)
    return cls(**request_kwargs)
  • 相关阅读:
    zookeeper 集群搭建 转
    zookeeper单机版安装
    zookeeper单机安装
    linux安装jdk
    Zookeeper入门看这篇就够了
    帮你解读什么是Redis缓存穿透和缓存雪崩(包含解决方案)
    goland 2020 去除形参提醒
    redis 开启AOF 持久化
    replace into 影响行数
    一个hql 关键字member(非mysql)引起的 vo 数据 保存数据库错误
  • 原文地址:https://www.cnblogs.com/yinminbo/p/12159232.html
Copyright © 2011-2022 走看看