item.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. """
  2. Scrapy Item
  3. See documentation in docs/topics/item.rst
  4. """
  5. import collections
  6. from abc import ABCMeta
  7. from copy import deepcopy
  8. from pprint import pformat
  9. from warnings import warn
  10. import six
  11. from scrapy.utils.deprecate import ScrapyDeprecationWarning
  12. from scrapy.utils.trackref import object_ref
  13. if six.PY2:
  14. MutableMapping = collections.MutableMapping
  15. else:
  16. MutableMapping = collections.abc.MutableMapping
  17. class BaseItem(object_ref):
  18. """Base class for all scraped items.
  19. In Scrapy, an object is considered an *item* if it is an instance of either
  20. :class:`BaseItem` or :class:`dict`. For example, when the output of a
  21. spider callback is evaluated, only instances of :class:`BaseItem` or
  22. :class:`dict` are passed to :ref:`item pipelines <topics-item-pipeline>`.
  23. If you need instances of a custom class to be considered items by Scrapy,
  24. you must inherit from either :class:`BaseItem` or :class:`dict`.
  25. Unlike instances of :class:`dict`, instances of :class:`BaseItem` may be
  26. :ref:`tracked <topics-leaks-trackrefs>` to debug memory leaks.
  27. """
  28. pass
  29. class Field(dict):
  30. """Container of field metadata"""
  31. class ItemMeta(ABCMeta):
  32. """Metaclass_ of :class:`Item` that handles field definitions.
  33. .. _metaclass: https://realpython.com/python-metaclasses
  34. """
  35. def __new__(mcs, class_name, bases, attrs):
  36. classcell = attrs.pop('__classcell__', None)
  37. new_bases = tuple(base._class for base in bases if hasattr(base, '_class'))
  38. _class = super(ItemMeta, mcs).__new__(mcs, 'x_' + class_name, new_bases, attrs)
  39. fields = getattr(_class, 'fields', {})
  40. new_attrs = {}
  41. for n in dir(_class):
  42. v = getattr(_class, n)
  43. if isinstance(v, Field):
  44. fields[n] = v
  45. elif n in attrs:
  46. new_attrs[n] = attrs[n]
  47. new_attrs['fields'] = fields
  48. new_attrs['_class'] = _class
  49. if classcell is not None:
  50. new_attrs['__classcell__'] = classcell
  51. return super(ItemMeta, mcs).__new__(mcs, class_name, bases, new_attrs)
  52. class DictItem(MutableMapping, BaseItem):
  53. fields = {}
  54. def __new__(cls, *args, **kwargs):
  55. if issubclass(cls, DictItem) and not issubclass(cls, Item):
  56. warn('scrapy.item.DictItem is deprecated, please use '
  57. 'scrapy.item.Item instead',
  58. ScrapyDeprecationWarning, stacklevel=2)
  59. return super(DictItem, cls).__new__(cls, *args, **kwargs)
  60. def __init__(self, *args, **kwargs):
  61. self._values = {}
  62. if args or kwargs: # avoid creating dict for most common case
  63. for k, v in six.iteritems(dict(*args, **kwargs)):
  64. self[k] = v
  65. def __getitem__(self, key):
  66. return self._values[key]
  67. def __setitem__(self, key, value):
  68. if key in self.fields:
  69. self._values[key] = value
  70. else:
  71. raise KeyError("%s does not support field: %s" %
  72. (self.__class__.__name__, key))
  73. def __delitem__(self, key):
  74. del self._values[key]
  75. def __getattr__(self, name):
  76. if name in self.fields:
  77. raise AttributeError("Use item[%r] to get field value" % name)
  78. raise AttributeError(name)
  79. def __setattr__(self, name, value):
  80. if not name.startswith('_'):
  81. raise AttributeError("Use item[%r] = %r to set field value" %
  82. (name, value))
  83. super(DictItem, self).__setattr__(name, value)
  84. def __len__(self):
  85. return len(self._values)
  86. def __iter__(self):
  87. return iter(self._values)
  88. __hash__ = BaseItem.__hash__
  89. def keys(self):
  90. return self._values.keys()
  91. def __repr__(self):
  92. return pformat(dict(self))
  93. def copy(self):
  94. return self.__class__(self)
  95. def deepcopy(self):
  96. """Return a `deep copy`_ of this item.
  97. .. _deep copy: https://docs.python.org/library/copy.html#copy.deepcopy
  98. """
  99. return deepcopy(self)
  100. @six.add_metaclass(ItemMeta)
  101. class Item(DictItem):
  102. pass