__init__.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. """
  2. This module implements the Response class which is used to represent HTTP
  3. responses in Scrapy.
  4. See documentation in docs/topics/request-response.rst
  5. """
  6. from six.moves.urllib.parse import urljoin
  7. from scrapy.http.request import Request
  8. from scrapy.http.headers import Headers
  9. from scrapy.link import Link
  10. from scrapy.utils.trackref import object_ref
  11. from scrapy.http.common import obsolete_setter
  12. from scrapy.exceptions import NotSupported
  13. class Response(object_ref):
  14. def __init__(self, url, status=200, headers=None, body=b'', flags=None, request=None):
  15. self.headers = Headers(headers or {})
  16. self.status = int(status)
  17. self._set_body(body)
  18. self._set_url(url)
  19. self.request = request
  20. self.flags = [] if flags is None else list(flags)
  21. @property
  22. def meta(self):
  23. try:
  24. return self.request.meta
  25. except AttributeError:
  26. raise AttributeError(
  27. "Response.meta not available, this response "
  28. "is not tied to any request"
  29. )
  30. def _get_url(self):
  31. return self._url
  32. def _set_url(self, url):
  33. if isinstance(url, str):
  34. self._url = url
  35. else:
  36. raise TypeError('%s url must be str, got %s:' % (type(self).__name__,
  37. type(url).__name__))
  38. url = property(_get_url, obsolete_setter(_set_url, 'url'))
  39. def _get_body(self):
  40. return self._body
  41. def _set_body(self, body):
  42. if body is None:
  43. self._body = b''
  44. elif not isinstance(body, bytes):
  45. raise TypeError(
  46. "Response body must be bytes. "
  47. "If you want to pass unicode body use TextResponse "
  48. "or HtmlResponse.")
  49. else:
  50. self._body = body
  51. body = property(_get_body, obsolete_setter(_set_body, 'body'))
  52. def __str__(self):
  53. return "<%d %s>" % (self.status, self.url)
  54. __repr__ = __str__
  55. def copy(self):
  56. """Return a copy of this Response"""
  57. return self.replace()
  58. def replace(self, *args, **kwargs):
  59. """Create a new Response with the same attributes except for those
  60. given new values.
  61. """
  62. for x in ['url', 'status', 'headers', 'body', 'request', 'flags']:
  63. kwargs.setdefault(x, getattr(self, x))
  64. cls = kwargs.pop('cls', self.__class__)
  65. return cls(*args, **kwargs)
  66. def urljoin(self, url):
  67. """Join this Response's url with a possible relative url to form an
  68. absolute interpretation of the latter."""
  69. return urljoin(self.url, url)
  70. @property
  71. def text(self):
  72. """For subclasses of TextResponse, this will return the body
  73. as text (unicode object in Python 2 and str in Python 3)
  74. """
  75. raise AttributeError("Response content isn't text")
  76. def css(self, *a, **kw):
  77. """Shortcut method implemented only by responses whose content
  78. is text (subclasses of TextResponse).
  79. """
  80. raise NotSupported("Response content isn't text")
  81. def xpath(self, *a, **kw):
  82. """Shortcut method implemented only by responses whose content
  83. is text (subclasses of TextResponse).
  84. """
  85. raise NotSupported("Response content isn't text")
  86. def follow(self, url, callback=None, method='GET', headers=None, body=None,
  87. cookies=None, meta=None, encoding='utf-8', priority=0,
  88. dont_filter=False, errback=None, cb_kwargs=None):
  89. # type: (...) -> Request
  90. """
  91. Return a :class:`~.Request` instance to follow a link ``url``.
  92. It accepts the same arguments as ``Request.__init__`` method,
  93. but ``url`` can be a relative URL or a ``scrapy.link.Link`` object,
  94. not only an absolute URL.
  95. :class:`~.TextResponse` provides a :meth:`~.TextResponse.follow`
  96. method which supports selectors in addition to absolute/relative URLs
  97. and Link objects.
  98. """
  99. if isinstance(url, Link):
  100. url = url.url
  101. elif url is None:
  102. raise ValueError("url can't be None")
  103. url = self.urljoin(url)
  104. return Request(url, callback,
  105. method=method,
  106. headers=headers,
  107. body=body,
  108. cookies=cookies,
  109. meta=meta,
  110. encoding=encoding,
  111. priority=priority,
  112. dont_filter=dont_filter,
  113. errback=errback,
  114. cb_kwargs=cb_kwargs)