| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- import json
- from scrapy.item import BaseItem
- from scrapy.http import Request
- from scrapy.exceptions import ContractFail
- from . import Contract
- # contracts
- class UrlContract(Contract):
- """ Contract to set the url of the request (mandatory)
- @url http://scrapy.org
- """
- name = 'url'
- def adjust_request_args(self, args):
- args['url'] = self.args[0]
- return args
- class CallbackKeywordArgumentsContract(Contract):
- """ Contract to set the keyword arguments for the request.
- The value should be a JSON-encoded dictionary, e.g.:
- @cb_kwargs {"arg1": "some value"}
- """
- name = 'cb_kwargs'
- def adjust_request_args(self, args):
- args['cb_kwargs'] = json.loads(' '.join(self.args))
- return args
- class ReturnsContract(Contract):
- """ Contract to check the output of a callback
- general form:
- @returns request(s)/item(s) [min=1 [max]]
- e.g.:
- @returns request
- @returns request 2
- @returns request 2 10
- @returns request 0 10
- """
- name = 'returns'
- objects = {
- 'request': Request,
- 'requests': Request,
- 'item': (BaseItem, dict),
- 'items': (BaseItem, dict),
- }
- def __init__(self, *args, **kwargs):
- super(ReturnsContract, self).__init__(*args, **kwargs)
- assert len(self.args) in [1, 2, 3]
- self.obj_name = self.args[0] or None
- self.obj_type = self.objects[self.obj_name]
- try:
- self.min_bound = int(self.args[1])
- except IndexError:
- self.min_bound = 1
- try:
- self.max_bound = int(self.args[2])
- except IndexError:
- self.max_bound = float('inf')
- def post_process(self, output):
- occurrences = 0
- for x in output:
- if isinstance(x, self.obj_type):
- occurrences += 1
- assertion = (self.min_bound <= occurrences <= self.max_bound)
- if not assertion:
- if self.min_bound == self.max_bound:
- expected = self.min_bound
- else:
- expected = '%s..%s' % (self.min_bound, self.max_bound)
- raise ContractFail("Returned %s %s, expected %s" % \
- (occurrences, self.obj_name, expected))
- class ScrapesContract(Contract):
- """ Contract to check presence of fields in scraped items
- @scrapes page_name page_body
- """
- name = 'scrapes'
- def post_process(self, output):
- for x in output:
- if isinstance(x, (BaseItem, dict)):
- missing = [arg for arg in self.args if arg not in x]
- if missing:
- raise ContractFail(
- "Missing fields: %s" % ", ".join(missing))
|