__init__.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. """
  2. This module implements the Request class which is used to represent HTTP
  3. requests in Scrapy.
  4. See documentation in docs/topics/request-response.rst
  5. """
  6. import six
  7. from w3lib.url import safe_url_string
  8. from scrapy.http.headers import Headers
  9. from scrapy.utils.python import to_bytes
  10. from scrapy.utils.trackref import object_ref
  11. from scrapy.utils.url import escape_ajax
  12. from scrapy.http.common import obsolete_setter
  13. from scrapy.utils.curl import curl_to_request_kwargs
  14. class Request(object_ref):
  15. def __init__(self, url, callback=None, method='GET', headers=None, body=None,
  16. cookies=None, meta=None, encoding='utf-8', priority=0,
  17. dont_filter=False, errback=None, flags=None, cb_kwargs=None):
  18. self._encoding = encoding # this one has to be set first
  19. self.method = str(method).upper()
  20. self._set_url(url)
  21. self._set_body(body)
  22. assert isinstance(priority, int), "Request priority not an integer: %r" % priority
  23. self.priority = priority
  24. if callback is not None and not callable(callback):
  25. raise TypeError('callback must be a callable, got %s' % type(callback).__name__)
  26. if errback is not None and not callable(errback):
  27. raise TypeError('errback must be a callable, got %s' % type(errback).__name__)
  28. assert callback or not errback, "Cannot use errback without a callback"
  29. self.callback = callback
  30. self.errback = errback
  31. self.cookies = cookies or {}
  32. self.headers = Headers(headers or {}, encoding=encoding)
  33. self.dont_filter = dont_filter
  34. self._meta = dict(meta) if meta else None
  35. self._cb_kwargs = dict(cb_kwargs) if cb_kwargs else None
  36. self.flags = [] if flags is None else list(flags)
  37. @property
  38. def cb_kwargs(self):
  39. if self._cb_kwargs is None:
  40. self._cb_kwargs = {}
  41. return self._cb_kwargs
  42. @property
  43. def meta(self):
  44. if self._meta is None:
  45. self._meta = {}
  46. return self._meta
  47. def _get_url(self):
  48. return self._url
  49. def _set_url(self, url):
  50. if not isinstance(url, six.string_types):
  51. raise TypeError('Request url must be str or unicode, got %s:' % type(url).__name__)
  52. s = safe_url_string(url, self.encoding)
  53. self._url = escape_ajax(s)
  54. if ':' not in self._url:
  55. raise ValueError('Missing scheme in request url: %s' % self._url)
  56. url = property(_get_url, obsolete_setter(_set_url, 'url'))
  57. def _get_body(self):
  58. return self._body
  59. def _set_body(self, body):
  60. if body is None:
  61. self._body = b''
  62. else:
  63. self._body = to_bytes(body, self.encoding)
  64. body = property(_get_body, obsolete_setter(_set_body, 'body'))
  65. @property
  66. def encoding(self):
  67. return self._encoding
  68. def __str__(self):
  69. return "<%s %s>" % (self.method, self.url)
  70. __repr__ = __str__
  71. def copy(self):
  72. """Return a copy of this Request"""
  73. return self.replace()
  74. def replace(self, *args, **kwargs):
  75. """Create a new Request with the same attributes except for those
  76. given new values.
  77. """
  78. for x in ['url', 'method', 'headers', 'body', 'cookies', 'meta', 'flags',
  79. 'encoding', 'priority', 'dont_filter', 'callback', 'errback', 'cb_kwargs']:
  80. kwargs.setdefault(x, getattr(self, x))
  81. cls = kwargs.pop('cls', self.__class__)
  82. return cls(*args, **kwargs)
  83. @classmethod
  84. def from_curl(cls, curl_command, ignore_unknown_options=True, **kwargs):
  85. """Create a Request object from a string containing a `cURL
  86. <https://curl.haxx.se/>`_ command. It populates the HTTP method, the
  87. URL, the headers, the cookies and the body. It accepts the same
  88. arguments as the :class:`Request` class, taking preference and
  89. overriding the values of the same arguments contained in the cURL
  90. command.
  91. Unrecognized options are ignored by default. To raise an error when
  92. finding unknown options call this method by passing
  93. ``ignore_unknown_options=False``.
  94. .. caution:: Using :meth:`from_curl` from :class:`~scrapy.http.Request`
  95. subclasses, such as :class:`~scrapy.http.JSONRequest`, or
  96. :class:`~scrapy.http.XmlRpcRequest`, as well as having
  97. :ref:`downloader middlewares <topics-downloader-middleware>`
  98. and
  99. :ref:`spider middlewares <topics-spider-middleware>`
  100. enabled, such as
  101. :class:`~scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware`,
  102. :class:`~scrapy.downloadermiddlewares.useragent.UserAgentMiddleware`,
  103. or
  104. :class:`~scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware`,
  105. may modify the :class:`~scrapy.http.Request` object.
  106. """
  107. request_kwargs = curl_to_request_kwargs(curl_command, ignore_unknown_options)
  108. request_kwargs.update(kwargs)
  109. return cls(**request_kwargs)