123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136 |
- """
- This module implements the Response class which is used to represent HTTP
- responses in Scrapy.
- See documentation in docs/topics/request-response.rst
- """
- from six.moves.urllib.parse import urljoin
- from scrapy.http.request import Request
- from scrapy.http.headers import Headers
- from scrapy.link import Link
- from scrapy.utils.trackref import object_ref
- from scrapy.http.common import obsolete_setter
- from scrapy.exceptions import NotSupported
- class Response(object_ref):
- def __init__(self, url, status=200, headers=None, body=b'', flags=None, request=None):
- self.headers = Headers(headers or {})
- self.status = int(status)
- self._set_body(body)
- self._set_url(url)
- self.request = request
- self.flags = [] if flags is None else list(flags)
- @property
- def meta(self):
- try:
- return self.request.meta
- except AttributeError:
- raise AttributeError(
- "Response.meta not available, this response "
- "is not tied to any request"
- )
- def _get_url(self):
- return self._url
- def _set_url(self, url):
- if isinstance(url, str):
- self._url = url
- else:
- raise TypeError('%s url must be str, got %s:' % (type(self).__name__,
- type(url).__name__))
- url = property(_get_url, obsolete_setter(_set_url, 'url'))
- def _get_body(self):
- return self._body
- def _set_body(self, body):
- if body is None:
- self._body = b''
- elif not isinstance(body, bytes):
- raise TypeError(
- "Response body must be bytes. "
- "If you want to pass unicode body use TextResponse "
- "or HtmlResponse.")
- else:
- self._body = body
- body = property(_get_body, obsolete_setter(_set_body, 'body'))
- def __str__(self):
- return "<%d %s>" % (self.status, self.url)
- __repr__ = __str__
- def copy(self):
- """Return a copy of this Response"""
- return self.replace()
- def replace(self, *args, **kwargs):
- """Create a new Response with the same attributes except for those
- given new values.
- """
- for x in ['url', 'status', 'headers', 'body', 'request', 'flags']:
- kwargs.setdefault(x, getattr(self, x))
- cls = kwargs.pop('cls', self.__class__)
- return cls(*args, **kwargs)
- def urljoin(self, url):
- """Join this Response's url with a possible relative url to form an
- absolute interpretation of the latter."""
- return urljoin(self.url, url)
- @property
- def text(self):
- """For subclasses of TextResponse, this will return the body
- as text (unicode object in Python 2 and str in Python 3)
- """
- raise AttributeError("Response content isn't text")
- def css(self, *a, **kw):
- """Shortcut method implemented only by responses whose content
- is text (subclasses of TextResponse).
- """
- raise NotSupported("Response content isn't text")
- def xpath(self, *a, **kw):
- """Shortcut method implemented only by responses whose content
- is text (subclasses of TextResponse).
- """
- raise NotSupported("Response content isn't text")
- def follow(self, url, callback=None, method='GET', headers=None, body=None,
- cookies=None, meta=None, encoding='utf-8', priority=0,
- dont_filter=False, errback=None, cb_kwargs=None):
- # type: (...) -> Request
- """
- Return a :class:`~.Request` instance to follow a link ``url``.
- It accepts the same arguments as ``Request.__init__`` method,
- but ``url`` can be a relative URL or a ``scrapy.link.Link`` object,
- not only an absolute URL.
-
- :class:`~.TextResponse` provides a :meth:`~.TextResponse.follow`
- method which supports selectors in addition to absolute/relative URLs
- and Link objects.
- """
- if isinstance(url, Link):
- url = url.url
- elif url is None:
- raise ValueError("url can't be None")
- url = self.urljoin(url)
- return Request(url, callback,
- method=method,
- headers=headers,
- body=body,
- cookies=cookies,
- meta=meta,
- encoding=encoding,
- priority=priority,
- dont_filter=dont_filter,
- errback=errback,
- cb_kwargs=cb_kwargs)
|