middleware.py 3.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. """
  2. Downloader Middleware manager
  3. See documentation in docs/topics/downloader-middleware.rst
  4. """
  5. import six
  6. from twisted.internet import defer
  7. from scrapy.exceptions import _InvalidOutput
  8. from scrapy.http import Request, Response
  9. from scrapy.middleware import MiddlewareManager
  10. from scrapy.utils.defer import mustbe_deferred
  11. from scrapy.utils.conf import build_component_list
  12. class DownloaderMiddlewareManager(MiddlewareManager):
  13. component_name = 'downloader middleware'
  14. @classmethod
  15. def _get_mwlist_from_settings(cls, settings):
  16. return build_component_list(
  17. settings.getwithbase('DOWNLOADER_MIDDLEWARES'))
  18. def _add_middleware(self, mw):
  19. if hasattr(mw, 'process_request'):
  20. self.methods['process_request'].append(mw.process_request)
  21. if hasattr(mw, 'process_response'):
  22. self.methods['process_response'].appendleft(mw.process_response)
  23. if hasattr(mw, 'process_exception'):
  24. self.methods['process_exception'].appendleft(mw.process_exception)
  25. def download(self, download_func, request, spider):
  26. @defer.inlineCallbacks
  27. def process_request(request):
  28. for method in self.methods['process_request']:
  29. response = yield method(request=request, spider=spider)
  30. if response is not None and not isinstance(response, (Response, Request)):
  31. raise _InvalidOutput('Middleware %s.process_request must return None, Response or Request, got %s' % \
  32. (six.get_method_self(method).__class__.__name__, response.__class__.__name__))
  33. if response:
  34. defer.returnValue(response)
  35. defer.returnValue((yield download_func(request=request, spider=spider)))
  36. @defer.inlineCallbacks
  37. def process_response(response):
  38. assert response is not None, 'Received None in process_response'
  39. if isinstance(response, Request):
  40. defer.returnValue(response)
  41. for method in self.methods['process_response']:
  42. response = yield method(request=request, response=response, spider=spider)
  43. if not isinstance(response, (Response, Request)):
  44. raise _InvalidOutput('Middleware %s.process_response must return Response or Request, got %s' % \
  45. (six.get_method_self(method).__class__.__name__, type(response)))
  46. if isinstance(response, Request):
  47. defer.returnValue(response)
  48. defer.returnValue(response)
  49. @defer.inlineCallbacks
  50. def process_exception(_failure):
  51. exception = _failure.value
  52. for method in self.methods['process_exception']:
  53. response = yield method(request=request, exception=exception, spider=spider)
  54. if response is not None and not isinstance(response, (Response, Request)):
  55. raise _InvalidOutput('Middleware %s.process_exception must return None, Response or Request, got %s' % \
  56. (six.get_method_self(method).__class__.__name__, type(response)))
  57. if response:
  58. defer.returnValue(response)
  59. defer.returnValue(_failure)
  60. deferred = mustbe_deferred(process_request, request)
  61. deferred.addErrback(process_exception)
  62. deferred.addCallback(process_response)
  63. return deferred