__init__.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. import six
  2. import json
  3. import copy
  4. import collections
  5. from importlib import import_module
  6. from pprint import pformat
  7. from scrapy.settings import default_settings
  8. if six.PY2:
  9. MutableMapping = collections.MutableMapping
  10. else:
  11. MutableMapping = collections.abc.MutableMapping
  12. SETTINGS_PRIORITIES = {
  13. 'default': 0,
  14. 'command': 10,
  15. 'project': 20,
  16. 'spider': 30,
  17. 'cmdline': 40,
  18. }
  19. def get_settings_priority(priority):
  20. """
  21. Small helper function that looks up a given string priority in the
  22. :attr:`~scrapy.settings.SETTINGS_PRIORITIES` dictionary and returns its
  23. numerical value, or directly returns a given numerical priority.
  24. """
  25. if isinstance(priority, six.string_types):
  26. return SETTINGS_PRIORITIES[priority]
  27. else:
  28. return priority
  29. class SettingsAttribute(object):
  30. """Class for storing data related to settings attributes.
  31. This class is intended for internal usage, you should try Settings class
  32. for settings configuration, not this one.
  33. """
  34. def __init__(self, value, priority):
  35. self.value = value
  36. if isinstance(self.value, BaseSettings):
  37. self.priority = max(self.value.maxpriority(), priority)
  38. else:
  39. self.priority = priority
  40. def set(self, value, priority):
  41. """Sets value if priority is higher or equal than current priority."""
  42. if priority >= self.priority:
  43. if isinstance(self.value, BaseSettings):
  44. value = BaseSettings(value, priority=priority)
  45. self.value = value
  46. self.priority = priority
  47. def __str__(self):
  48. return "<SettingsAttribute value={self.value!r} " \
  49. "priority={self.priority}>".format(self=self)
  50. __repr__ = __str__
  51. class BaseSettings(MutableMapping):
  52. """
  53. Instances of this class behave like dictionaries, but store priorities
  54. along with their ``(key, value)`` pairs, and can be frozen (i.e. marked
  55. immutable).
  56. Key-value entries can be passed on initialization with the ``values``
  57. argument, and they would take the ``priority`` level (unless ``values`` is
  58. already an instance of :class:`~scrapy.settings.BaseSettings`, in which
  59. case the existing priority levels will be kept). If the ``priority``
  60. argument is a string, the priority name will be looked up in
  61. :attr:`~scrapy.settings.SETTINGS_PRIORITIES`. Otherwise, a specific integer
  62. should be provided.
  63. Once the object is created, new settings can be loaded or updated with the
  64. :meth:`~scrapy.settings.BaseSettings.set` method, and can be accessed with
  65. the square bracket notation of dictionaries, or with the
  66. :meth:`~scrapy.settings.BaseSettings.get` method of the instance and its
  67. value conversion variants. When requesting a stored key, the value with the
  68. highest priority will be retrieved.
  69. """
  70. def __init__(self, values=None, priority='project'):
  71. self.frozen = False
  72. self.attributes = {}
  73. self.update(values, priority)
  74. def __getitem__(self, opt_name):
  75. if opt_name not in self:
  76. return None
  77. return self.attributes[opt_name].value
  78. def __contains__(self, name):
  79. return name in self.attributes
  80. def get(self, name, default=None):
  81. """
  82. Get a setting value without affecting its original type.
  83. :param name: the setting name
  84. :type name: string
  85. :param default: the value to return if no setting is found
  86. :type default: any
  87. """
  88. return self[name] if self[name] is not None else default
  89. def getbool(self, name, default=False):
  90. """
  91. Get a setting value as a boolean.
  92. ``1``, ``'1'``, `True`` and ``'True'`` return ``True``,
  93. while ``0``, ``'0'``, ``False``, ``'False'`` and ``None`` return ``False``.
  94. For example, settings populated through environment variables set to
  95. ``'0'`` will return ``False`` when using this method.
  96. :param name: the setting name
  97. :type name: string
  98. :param default: the value to return if no setting is found
  99. :type default: any
  100. """
  101. got = self.get(name, default)
  102. try:
  103. return bool(int(got))
  104. except ValueError:
  105. if got in ("True", "true"):
  106. return True
  107. if got in ("False", "false"):
  108. return False
  109. raise ValueError("Supported values for boolean settings "
  110. "are 0/1, True/False, '0'/'1', "
  111. "'True'/'False' and 'true'/'false'")
  112. def getint(self, name, default=0):
  113. """
  114. Get a setting value as an int.
  115. :param name: the setting name
  116. :type name: string
  117. :param default: the value to return if no setting is found
  118. :type default: any
  119. """
  120. return int(self.get(name, default))
  121. def getfloat(self, name, default=0.0):
  122. """
  123. Get a setting value as a float.
  124. :param name: the setting name
  125. :type name: string
  126. :param default: the value to return if no setting is found
  127. :type default: any
  128. """
  129. return float(self.get(name, default))
  130. def getlist(self, name, default=None):
  131. """
  132. Get a setting value as a list. If the setting original type is a list, a
  133. copy of it will be returned. If it's a string it will be split by ",".
  134. For example, settings populated through environment variables set to
  135. ``'one,two'`` will return a list ['one', 'two'] when using this method.
  136. :param name: the setting name
  137. :type name: string
  138. :param default: the value to return if no setting is found
  139. :type default: any
  140. """
  141. value = self.get(name, default or [])
  142. if isinstance(value, six.string_types):
  143. value = value.split(',')
  144. return list(value)
  145. def getdict(self, name, default=None):
  146. """
  147. Get a setting value as a dictionary. If the setting original type is a
  148. dictionary, a copy of it will be returned. If it is a string it will be
  149. evaluated as a JSON dictionary. In the case that it is a
  150. :class:`~scrapy.settings.BaseSettings` instance itself, it will be
  151. converted to a dictionary, containing all its current settings values
  152. as they would be returned by :meth:`~scrapy.settings.BaseSettings.get`,
  153. and losing all information about priority and mutability.
  154. :param name: the setting name
  155. :type name: string
  156. :param default: the value to return if no setting is found
  157. :type default: any
  158. """
  159. value = self.get(name, default or {})
  160. if isinstance(value, six.string_types):
  161. value = json.loads(value)
  162. return dict(value)
  163. def getwithbase(self, name):
  164. """Get a composition of a dictionary-like setting and its `_BASE`
  165. counterpart.
  166. :param name: name of the dictionary-like setting
  167. :type name: string
  168. """
  169. compbs = BaseSettings()
  170. compbs.update(self[name + '_BASE'])
  171. compbs.update(self[name])
  172. return compbs
  173. def getpriority(self, name):
  174. """
  175. Return the current numerical priority value of a setting, or ``None`` if
  176. the given ``name`` does not exist.
  177. :param name: the setting name
  178. :type name: string
  179. """
  180. if name not in self:
  181. return None
  182. return self.attributes[name].priority
  183. def maxpriority(self):
  184. """
  185. Return the numerical value of the highest priority present throughout
  186. all settings, or the numerical value for ``default`` from
  187. :attr:`~scrapy.settings.SETTINGS_PRIORITIES` if there are no settings
  188. stored.
  189. """
  190. if len(self) > 0:
  191. return max(self.getpriority(name) for name in self)
  192. else:
  193. return get_settings_priority('default')
  194. def __setitem__(self, name, value):
  195. self.set(name, value)
  196. def set(self, name, value, priority='project'):
  197. """
  198. Store a key/value attribute with a given priority.
  199. Settings should be populated *before* configuring the Crawler object
  200. (through the :meth:`~scrapy.crawler.Crawler.configure` method),
  201. otherwise they won't have any effect.
  202. :param name: the setting name
  203. :type name: string
  204. :param value: the value to associate with the setting
  205. :type value: any
  206. :param priority: the priority of the setting. Should be a key of
  207. :attr:`~scrapy.settings.SETTINGS_PRIORITIES` or an integer
  208. :type priority: string or int
  209. """
  210. self._assert_mutability()
  211. priority = get_settings_priority(priority)
  212. if name not in self:
  213. if isinstance(value, SettingsAttribute):
  214. self.attributes[name] = value
  215. else:
  216. self.attributes[name] = SettingsAttribute(value, priority)
  217. else:
  218. self.attributes[name].set(value, priority)
  219. def setdict(self, values, priority='project'):
  220. self.update(values, priority)
  221. def setmodule(self, module, priority='project'):
  222. """
  223. Store settings from a module with a given priority.
  224. This is a helper function that calls
  225. :meth:`~scrapy.settings.BaseSettings.set` for every globally declared
  226. uppercase variable of ``module`` with the provided ``priority``.
  227. :param module: the module or the path of the module
  228. :type module: module object or string
  229. :param priority: the priority of the settings. Should be a key of
  230. :attr:`~scrapy.settings.SETTINGS_PRIORITIES` or an integer
  231. :type priority: string or int
  232. """
  233. self._assert_mutability()
  234. if isinstance(module, six.string_types):
  235. module = import_module(module)
  236. for key in dir(module):
  237. if key.isupper():
  238. self.set(key, getattr(module, key), priority)
  239. def update(self, values, priority='project'):
  240. """
  241. Store key/value pairs with a given priority.
  242. This is a helper function that calls
  243. :meth:`~scrapy.settings.BaseSettings.set` for every item of ``values``
  244. with the provided ``priority``.
  245. If ``values`` is a string, it is assumed to be JSON-encoded and parsed
  246. into a dict with ``json.loads()`` first. If it is a
  247. :class:`~scrapy.settings.BaseSettings` instance, the per-key priorities
  248. will be used and the ``priority`` parameter ignored. This allows
  249. inserting/updating settings with different priorities with a single
  250. command.
  251. :param values: the settings names and values
  252. :type values: dict or string or :class:`~scrapy.settings.BaseSettings`
  253. :param priority: the priority of the settings. Should be a key of
  254. :attr:`~scrapy.settings.SETTINGS_PRIORITIES` or an integer
  255. :type priority: string or int
  256. """
  257. self._assert_mutability()
  258. if isinstance(values, six.string_types):
  259. values = json.loads(values)
  260. if values is not None:
  261. if isinstance(values, BaseSettings):
  262. for name, value in six.iteritems(values):
  263. self.set(name, value, values.getpriority(name))
  264. else:
  265. for name, value in six.iteritems(values):
  266. self.set(name, value, priority)
  267. def delete(self, name, priority='project'):
  268. self._assert_mutability()
  269. priority = get_settings_priority(priority)
  270. if priority >= self.getpriority(name):
  271. del self.attributes[name]
  272. def __delitem__(self, name):
  273. self._assert_mutability()
  274. del self.attributes[name]
  275. def _assert_mutability(self):
  276. if self.frozen:
  277. raise TypeError("Trying to modify an immutable Settings object")
  278. def copy(self):
  279. """
  280. Make a deep copy of current settings.
  281. This method returns a new instance of the :class:`Settings` class,
  282. populated with the same values and their priorities.
  283. Modifications to the new object won't be reflected on the original
  284. settings.
  285. """
  286. return copy.deepcopy(self)
  287. def freeze(self):
  288. """
  289. Disable further changes to the current settings.
  290. After calling this method, the present state of the settings will become
  291. immutable. Trying to change values through the :meth:`~set` method and
  292. its variants won't be possible and will be alerted.
  293. """
  294. self.frozen = True
  295. def frozencopy(self):
  296. """
  297. Return an immutable copy of the current settings.
  298. Alias for a :meth:`~freeze` call in the object returned by :meth:`copy`.
  299. """
  300. copy = self.copy()
  301. copy.freeze()
  302. return copy
  303. def __iter__(self):
  304. return iter(self.attributes)
  305. def __len__(self):
  306. return len(self.attributes)
  307. def _to_dict(self):
  308. return {k: (v._to_dict() if isinstance(v, BaseSettings) else v)
  309. for k, v in six.iteritems(self)}
  310. def copy_to_dict(self):
  311. """
  312. Make a copy of current settings and convert to a dict.
  313. This method returns a new dict populated with the same values
  314. and their priorities as the current settings.
  315. Modifications to the returned dict won't be reflected on the original
  316. settings.
  317. This method can be useful for example for printing settings
  318. in Scrapy shell.
  319. """
  320. settings = self.copy()
  321. return settings._to_dict()
  322. def _repr_pretty_(self, p, cycle):
  323. if cycle:
  324. p.text(repr(self))
  325. else:
  326. p.text(pformat(self.copy_to_dict()))
  327. class _DictProxy(MutableMapping):
  328. def __init__(self, settings, priority):
  329. self.o = {}
  330. self.settings = settings
  331. self.priority = priority
  332. def __len__(self):
  333. return len(self.o)
  334. def __getitem__(self, k):
  335. return self.o[k]
  336. def __setitem__(self, k, v):
  337. self.settings.set(k, v, priority=self.priority)
  338. self.o[k] = v
  339. def __delitem__(self, k):
  340. del self.o[k]
  341. def __iter__(self, k, v):
  342. return iter(self.o)
  343. class Settings(BaseSettings):
  344. """
  345. This object stores Scrapy settings for the configuration of internal
  346. components, and can be used for any further customization.
  347. It is a direct subclass and supports all methods of
  348. :class:`~scrapy.settings.BaseSettings`. Additionally, after instantiation
  349. of this class, the new object will have the global default settings
  350. described on :ref:`topics-settings-ref` already populated.
  351. """
  352. def __init__(self, values=None, priority='project'):
  353. # Do not pass kwarg values here. We don't want to promote user-defined
  354. # dicts, and we want to update, not replace, default dicts with the
  355. # values given by the user
  356. super(Settings, self).__init__()
  357. self.setmodule(default_settings, 'default')
  358. # Promote default dictionaries to BaseSettings instances for per-key
  359. # priorities
  360. for name, val in six.iteritems(self):
  361. if isinstance(val, dict):
  362. self.set(name, BaseSettings(val, 'default'), 'default')
  363. self.update(values, priority)
  364. def iter_default_settings():
  365. """Return the default settings as an iterator of (name, value) tuples"""
  366. for name in dir(default_settings):
  367. if name.isupper():
  368. yield name, getattr(default_settings, name)
  369. def overridden_settings(settings):
  370. """Return a dict of the settings that have been overridden"""
  371. for name, defvalue in iter_default_settings():
  372. value = settings[name]
  373. if not isinstance(defvalue, dict) and value != defvalue:
  374. yield name, value