| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137 |
- """
- This module implements the Request class which is used to represent HTTP
- requests in Scrapy.
- See documentation in docs/topics/request-response.rst
- """
- import six
- from w3lib.url import safe_url_string
- from scrapy.http.headers import Headers
- from scrapy.utils.python import to_bytes
- from scrapy.utils.trackref import object_ref
- from scrapy.utils.url import escape_ajax
- from scrapy.http.common import obsolete_setter
- from scrapy.utils.curl import curl_to_request_kwargs
- class Request(object_ref):
- def __init__(self, url, callback=None, method='GET', headers=None, body=None,
- cookies=None, meta=None, encoding='utf-8', priority=0,
- dont_filter=False, errback=None, flags=None, cb_kwargs=None):
- self._encoding = encoding # this one has to be set first
- self.method = str(method).upper()
- self._set_url(url)
- self._set_body(body)
- assert isinstance(priority, int), "Request priority not an integer: %r" % priority
- self.priority = priority
- if callback is not None and not callable(callback):
- raise TypeError('callback must be a callable, got %s' % type(callback).__name__)
- if errback is not None and not callable(errback):
- raise TypeError('errback must be a callable, got %s' % type(errback).__name__)
- assert callback or not errback, "Cannot use errback without a callback"
- self.callback = callback
- self.errback = errback
- self.cookies = cookies or {}
- self.headers = Headers(headers or {}, encoding=encoding)
- self.dont_filter = dont_filter
- self._meta = dict(meta) if meta else None
- self._cb_kwargs = dict(cb_kwargs) if cb_kwargs else None
- self.flags = [] if flags is None else list(flags)
- @property
- def cb_kwargs(self):
- if self._cb_kwargs is None:
- self._cb_kwargs = {}
- return self._cb_kwargs
- @property
- def meta(self):
- if self._meta is None:
- self._meta = {}
- return self._meta
- def _get_url(self):
- return self._url
- def _set_url(self, url):
- if not isinstance(url, six.string_types):
- raise TypeError('Request url must be str or unicode, got %s:' % type(url).__name__)
- s = safe_url_string(url, self.encoding)
- self._url = escape_ajax(s)
- if ':' not in self._url:
- raise ValueError('Missing scheme in request url: %s' % self._url)
- url = property(_get_url, obsolete_setter(_set_url, 'url'))
- def _get_body(self):
- return self._body
- def _set_body(self, body):
- if body is None:
- self._body = b''
- else:
- self._body = to_bytes(body, self.encoding)
- body = property(_get_body, obsolete_setter(_set_body, 'body'))
- @property
- def encoding(self):
- return self._encoding
- def __str__(self):
- return "<%s %s>" % (self.method, self.url)
- __repr__ = __str__
- def copy(self):
- """Return a copy of this Request"""
- return self.replace()
- def replace(self, *args, **kwargs):
- """Create a new Request with the same attributes except for those
- given new values.
- """
- for x in ['url', 'method', 'headers', 'body', 'cookies', 'meta', 'flags',
- 'encoding', 'priority', 'dont_filter', 'callback', 'errback', 'cb_kwargs']:
- kwargs.setdefault(x, getattr(self, x))
- cls = kwargs.pop('cls', self.__class__)
- return cls(*args, **kwargs)
- @classmethod
- def from_curl(cls, curl_command, ignore_unknown_options=True, **kwargs):
- """Create a Request object from a string containing a `cURL
- <https://curl.haxx.se/>`_ command. It populates the HTTP method, the
- URL, the headers, the cookies and the body. It accepts the same
- arguments as the :class:`Request` class, taking preference and
- overriding the values of the same arguments contained in the cURL
- command.
- Unrecognized options are ignored by default. To raise an error when
- finding unknown options call this method by passing
- ``ignore_unknown_options=False``.
- .. caution:: Using :meth:`from_curl` from :class:`~scrapy.http.Request`
- subclasses, such as :class:`~scrapy.http.JSONRequest`, or
- :class:`~scrapy.http.XmlRpcRequest`, as well as having
- :ref:`downloader middlewares <topics-downloader-middleware>`
- and
- :ref:`spider middlewares <topics-spider-middleware>`
- enabled, such as
- :class:`~scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware`,
- :class:`~scrapy.downloadermiddlewares.useragent.UserAgentMiddleware`,
- or
- :class:`~scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware`,
- may modify the :class:`~scrapy.http.Request` object.
- """
- request_kwargs = curl_to_request_kwargs(curl_command, ignore_unknown_options)
- request_kwargs.update(kwargs)
- return cls(**request_kwargs)
|