default.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. import json
  2. from scrapy.item import BaseItem
  3. from scrapy.http import Request
  4. from scrapy.exceptions import ContractFail
  5. from . import Contract
  6. # contracts
  7. class UrlContract(Contract):
  8. """ Contract to set the url of the request (mandatory)
  9. @url http://scrapy.org
  10. """
  11. name = 'url'
  12. def adjust_request_args(self, args):
  13. args['url'] = self.args[0]
  14. return args
  15. class CallbackKeywordArgumentsContract(Contract):
  16. """ Contract to set the keyword arguments for the request.
  17. The value should be a JSON-encoded dictionary, e.g.:
  18. @cb_kwargs {"arg1": "some value"}
  19. """
  20. name = 'cb_kwargs'
  21. def adjust_request_args(self, args):
  22. args['cb_kwargs'] = json.loads(' '.join(self.args))
  23. return args
  24. class ReturnsContract(Contract):
  25. """ Contract to check the output of a callback
  26. general form:
  27. @returns request(s)/item(s) [min=1 [max]]
  28. e.g.:
  29. @returns request
  30. @returns request 2
  31. @returns request 2 10
  32. @returns request 0 10
  33. """
  34. name = 'returns'
  35. objects = {
  36. 'request': Request,
  37. 'requests': Request,
  38. 'item': (BaseItem, dict),
  39. 'items': (BaseItem, dict),
  40. }
  41. def __init__(self, *args, **kwargs):
  42. super(ReturnsContract, self).__init__(*args, **kwargs)
  43. assert len(self.args) in [1, 2, 3]
  44. self.obj_name = self.args[0] or None
  45. self.obj_type = self.objects[self.obj_name]
  46. try:
  47. self.min_bound = int(self.args[1])
  48. except IndexError:
  49. self.min_bound = 1
  50. try:
  51. self.max_bound = int(self.args[2])
  52. except IndexError:
  53. self.max_bound = float('inf')
  54. def post_process(self, output):
  55. occurrences = 0
  56. for x in output:
  57. if isinstance(x, self.obj_type):
  58. occurrences += 1
  59. assertion = (self.min_bound <= occurrences <= self.max_bound)
  60. if not assertion:
  61. if self.min_bound == self.max_bound:
  62. expected = self.min_bound
  63. else:
  64. expected = '%s..%s' % (self.min_bound, self.max_bound)
  65. raise ContractFail("Returned %s %s, expected %s" % \
  66. (occurrences, self.obj_name, expected))
  67. class ScrapesContract(Contract):
  68. """ Contract to check presence of fields in scraped items
  69. @scrapes page_name page_body
  70. """
  71. name = 'scrapes'
  72. def post_process(self, output):
  73. for x in output:
  74. if isinstance(x, (BaseItem, dict)):
  75. missing = [arg for arg in self.args if arg not in x]
  76. if missing:
  77. raise ContractFail(
  78. "Missing fields: %s" % ", ".join(missing))