| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107 |
- """
- This module provides some commonly used processors for Item Loaders.
- See documentation in docs/topics/loaders.rst
- """
- try:
- from collections import ChainMap
- except ImportError:
- from scrapy.utils.datatypes import MergeDict as ChainMap
- from scrapy.utils.misc import arg_to_iter
- from scrapy.loader.common import wrap_loader_context
- class MapCompose(object):
- def __init__(self, *functions, **default_loader_context):
- self.functions = functions
- self.default_loader_context = default_loader_context
- def __call__(self, value, loader_context=None):
- values = arg_to_iter(value)
- if loader_context:
- context = ChainMap(loader_context, self.default_loader_context)
- else:
- context = self.default_loader_context
- wrapped_funcs = [wrap_loader_context(f, context) for f in self.functions]
- for func in wrapped_funcs:
- next_values = []
- for v in values:
- try:
- next_values += arg_to_iter(func(v))
- except Exception as e:
- raise ValueError("Error in MapCompose with "
- "%s value=%r error='%s: %s'" %
- (str(func), value, type(e).__name__,
- str(e)))
- values = next_values
- return values
- class Compose(object):
- def __init__(self, *functions, **default_loader_context):
- self.functions = functions
- self.stop_on_none = default_loader_context.get('stop_on_none', True)
- self.default_loader_context = default_loader_context
- def __call__(self, value, loader_context=None):
- if loader_context:
- context = ChainMap(loader_context, self.default_loader_context)
- else:
- context = self.default_loader_context
- wrapped_funcs = [wrap_loader_context(f, context) for f in self.functions]
- for func in wrapped_funcs:
- if value is None and self.stop_on_none:
- break
- try:
- value = func(value)
- except Exception as e:
- raise ValueError("Error in Compose with "
- "%s value=%r error='%s: %s'" %
- (str(func), value, type(e).__name__, str(e)))
- return value
- class TakeFirst(object):
- def __call__(self, values):
- for value in values:
- if value is not None and value != '':
- return value
- class Identity(object):
- def __call__(self, values):
- return values
- class SelectJmes(object):
- """
- Query the input string for the jmespath (given at instantiation),
- and return the answer
- Requires : jmespath(https://github.com/jmespath/jmespath)
- Note: SelectJmes accepts only one input element at a time.
- """
- def __init__(self, json_path):
- self.json_path = json_path
- import jmespath
- self.compiled_path = jmespath.compile(self.json_path)
- def __call__(self, value):
- """Query value for the jmespath query and return answer
- :param value: a data structure (dict, list) to extract from
- :return: Element extracted according to jmespath query
- """
- return self.compiled_path.search(value)
- class Join(object):
- def __init__(self, separator=u' '):
- self.separator = separator
- def __call__(self, values):
- return self.separator.join(values)
|