123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472 |
- import six
- import json
- import copy
- import collections
- from importlib import import_module
- from pprint import pformat
- from scrapy.settings import default_settings
- if six.PY2:
- MutableMapping = collections.MutableMapping
- else:
- MutableMapping = collections.abc.MutableMapping
- SETTINGS_PRIORITIES = {
- 'default': 0,
- 'command': 10,
- 'project': 20,
- 'spider': 30,
- 'cmdline': 40,
- }
- def get_settings_priority(priority):
- """
- Small helper function that looks up a given string priority in the
- :attr:`~scrapy.settings.SETTINGS_PRIORITIES` dictionary and returns its
- numerical value, or directly returns a given numerical priority.
- """
- if isinstance(priority, six.string_types):
- return SETTINGS_PRIORITIES[priority]
- else:
- return priority
- class SettingsAttribute(object):
- """Class for storing data related to settings attributes.
- This class is intended for internal usage, you should try Settings class
- for settings configuration, not this one.
- """
- def __init__(self, value, priority):
- self.value = value
- if isinstance(self.value, BaseSettings):
- self.priority = max(self.value.maxpriority(), priority)
- else:
- self.priority = priority
- def set(self, value, priority):
- """Sets value if priority is higher or equal than current priority."""
- if priority >= self.priority:
- if isinstance(self.value, BaseSettings):
- value = BaseSettings(value, priority=priority)
- self.value = value
- self.priority = priority
- def __str__(self):
- return "<SettingsAttribute value={self.value!r} " \
- "priority={self.priority}>".format(self=self)
- __repr__ = __str__
- class BaseSettings(MutableMapping):
- """
- Instances of this class behave like dictionaries, but store priorities
- along with their ``(key, value)`` pairs, and can be frozen (i.e. marked
- immutable).
- Key-value entries can be passed on initialization with the ``values``
- argument, and they would take the ``priority`` level (unless ``values`` is
- already an instance of :class:`~scrapy.settings.BaseSettings`, in which
- case the existing priority levels will be kept). If the ``priority``
- argument is a string, the priority name will be looked up in
- :attr:`~scrapy.settings.SETTINGS_PRIORITIES`. Otherwise, a specific integer
- should be provided.
- Once the object is created, new settings can be loaded or updated with the
- :meth:`~scrapy.settings.BaseSettings.set` method, and can be accessed with
- the square bracket notation of dictionaries, or with the
- :meth:`~scrapy.settings.BaseSettings.get` method of the instance and its
- value conversion variants. When requesting a stored key, the value with the
- highest priority will be retrieved.
- """
- def __init__(self, values=None, priority='project'):
- self.frozen = False
- self.attributes = {}
- self.update(values, priority)
- def __getitem__(self, opt_name):
- if opt_name not in self:
- return None
- return self.attributes[opt_name].value
- def __contains__(self, name):
- return name in self.attributes
- def get(self, name, default=None):
- """
- Get a setting value without affecting its original type.
- :param name: the setting name
- :type name: string
- :param default: the value to return if no setting is found
- :type default: any
- """
- return self[name] if self[name] is not None else default
- def getbool(self, name, default=False):
- """
- Get a setting value as a boolean.
- ``1``, ``'1'``, `True`` and ``'True'`` return ``True``,
- while ``0``, ``'0'``, ``False``, ``'False'`` and ``None`` return ``False``.
- For example, settings populated through environment variables set to
- ``'0'`` will return ``False`` when using this method.
- :param name: the setting name
- :type name: string
- :param default: the value to return if no setting is found
- :type default: any
- """
- got = self.get(name, default)
- try:
- return bool(int(got))
- except ValueError:
- if got in ("True", "true"):
- return True
- if got in ("False", "false"):
- return False
- raise ValueError("Supported values for boolean settings "
- "are 0/1, True/False, '0'/'1', "
- "'True'/'False' and 'true'/'false'")
- def getint(self, name, default=0):
- """
- Get a setting value as an int.
- :param name: the setting name
- :type name: string
- :param default: the value to return if no setting is found
- :type default: any
- """
- return int(self.get(name, default))
- def getfloat(self, name, default=0.0):
- """
- Get a setting value as a float.
- :param name: the setting name
- :type name: string
- :param default: the value to return if no setting is found
- :type default: any
- """
- return float(self.get(name, default))
- def getlist(self, name, default=None):
- """
- Get a setting value as a list. If the setting original type is a list, a
- copy of it will be returned. If it's a string it will be split by ",".
- For example, settings populated through environment variables set to
- ``'one,two'`` will return a list ['one', 'two'] when using this method.
- :param name: the setting name
- :type name: string
- :param default: the value to return if no setting is found
- :type default: any
- """
- value = self.get(name, default or [])
- if isinstance(value, six.string_types):
- value = value.split(',')
- return list(value)
- def getdict(self, name, default=None):
- """
- Get a setting value as a dictionary. If the setting original type is a
- dictionary, a copy of it will be returned. If it is a string it will be
- evaluated as a JSON dictionary. In the case that it is a
- :class:`~scrapy.settings.BaseSettings` instance itself, it will be
- converted to a dictionary, containing all its current settings values
- as they would be returned by :meth:`~scrapy.settings.BaseSettings.get`,
- and losing all information about priority and mutability.
- :param name: the setting name
- :type name: string
- :param default: the value to return if no setting is found
- :type default: any
- """
- value = self.get(name, default or {})
- if isinstance(value, six.string_types):
- value = json.loads(value)
- return dict(value)
- def getwithbase(self, name):
- """Get a composition of a dictionary-like setting and its `_BASE`
- counterpart.
- :param name: name of the dictionary-like setting
- :type name: string
- """
- compbs = BaseSettings()
- compbs.update(self[name + '_BASE'])
- compbs.update(self[name])
- return compbs
- def getpriority(self, name):
- """
- Return the current numerical priority value of a setting, or ``None`` if
- the given ``name`` does not exist.
- :param name: the setting name
- :type name: string
- """
- if name not in self:
- return None
- return self.attributes[name].priority
- def maxpriority(self):
- """
- Return the numerical value of the highest priority present throughout
- all settings, or the numerical value for ``default`` from
- :attr:`~scrapy.settings.SETTINGS_PRIORITIES` if there are no settings
- stored.
- """
- if len(self) > 0:
- return max(self.getpriority(name) for name in self)
- else:
- return get_settings_priority('default')
- def __setitem__(self, name, value):
- self.set(name, value)
- def set(self, name, value, priority='project'):
- """
- Store a key/value attribute with a given priority.
- Settings should be populated *before* configuring the Crawler object
- (through the :meth:`~scrapy.crawler.Crawler.configure` method),
- otherwise they won't have any effect.
- :param name: the setting name
- :type name: string
- :param value: the value to associate with the setting
- :type value: any
- :param priority: the priority of the setting. Should be a key of
- :attr:`~scrapy.settings.SETTINGS_PRIORITIES` or an integer
- :type priority: string or int
- """
- self._assert_mutability()
- priority = get_settings_priority(priority)
- if name not in self:
- if isinstance(value, SettingsAttribute):
- self.attributes[name] = value
- else:
- self.attributes[name] = SettingsAttribute(value, priority)
- else:
- self.attributes[name].set(value, priority)
- def setdict(self, values, priority='project'):
- self.update(values, priority)
- def setmodule(self, module, priority='project'):
- """
- Store settings from a module with a given priority.
- This is a helper function that calls
- :meth:`~scrapy.settings.BaseSettings.set` for every globally declared
- uppercase variable of ``module`` with the provided ``priority``.
- :param module: the module or the path of the module
- :type module: module object or string
- :param priority: the priority of the settings. Should be a key of
- :attr:`~scrapy.settings.SETTINGS_PRIORITIES` or an integer
- :type priority: string or int
- """
- self._assert_mutability()
- if isinstance(module, six.string_types):
- module = import_module(module)
- for key in dir(module):
- if key.isupper():
- self.set(key, getattr(module, key), priority)
- def update(self, values, priority='project'):
- """
- Store key/value pairs with a given priority.
- This is a helper function that calls
- :meth:`~scrapy.settings.BaseSettings.set` for every item of ``values``
- with the provided ``priority``.
- If ``values`` is a string, it is assumed to be JSON-encoded and parsed
- into a dict with ``json.loads()`` first. If it is a
- :class:`~scrapy.settings.BaseSettings` instance, the per-key priorities
- will be used and the ``priority`` parameter ignored. This allows
- inserting/updating settings with different priorities with a single
- command.
- :param values: the settings names and values
- :type values: dict or string or :class:`~scrapy.settings.BaseSettings`
- :param priority: the priority of the settings. Should be a key of
- :attr:`~scrapy.settings.SETTINGS_PRIORITIES` or an integer
- :type priority: string or int
- """
- self._assert_mutability()
- if isinstance(values, six.string_types):
- values = json.loads(values)
- if values is not None:
- if isinstance(values, BaseSettings):
- for name, value in six.iteritems(values):
- self.set(name, value, values.getpriority(name))
- else:
- for name, value in six.iteritems(values):
- self.set(name, value, priority)
- def delete(self, name, priority='project'):
- self._assert_mutability()
- priority = get_settings_priority(priority)
- if priority >= self.getpriority(name):
- del self.attributes[name]
- def __delitem__(self, name):
- self._assert_mutability()
- del self.attributes[name]
- def _assert_mutability(self):
- if self.frozen:
- raise TypeError("Trying to modify an immutable Settings object")
- def copy(self):
- """
- Make a deep copy of current settings.
- This method returns a new instance of the :class:`Settings` class,
- populated with the same values and their priorities.
- Modifications to the new object won't be reflected on the original
- settings.
- """
- return copy.deepcopy(self)
- def freeze(self):
- """
- Disable further changes to the current settings.
- After calling this method, the present state of the settings will become
- immutable. Trying to change values through the :meth:`~set` method and
- its variants won't be possible and will be alerted.
- """
- self.frozen = True
- def frozencopy(self):
- """
- Return an immutable copy of the current settings.
- Alias for a :meth:`~freeze` call in the object returned by :meth:`copy`.
- """
- copy = self.copy()
- copy.freeze()
- return copy
- def __iter__(self):
- return iter(self.attributes)
- def __len__(self):
- return len(self.attributes)
- def _to_dict(self):
- return {k: (v._to_dict() if isinstance(v, BaseSettings) else v)
- for k, v in six.iteritems(self)}
- def copy_to_dict(self):
- """
- Make a copy of current settings and convert to a dict.
- This method returns a new dict populated with the same values
- and their priorities as the current settings.
- Modifications to the returned dict won't be reflected on the original
- settings.
- This method can be useful for example for printing settings
- in Scrapy shell.
- """
- settings = self.copy()
- return settings._to_dict()
- def _repr_pretty_(self, p, cycle):
- if cycle:
- p.text(repr(self))
- else:
- p.text(pformat(self.copy_to_dict()))
- class _DictProxy(MutableMapping):
- def __init__(self, settings, priority):
- self.o = {}
- self.settings = settings
- self.priority = priority
- def __len__(self):
- return len(self.o)
- def __getitem__(self, k):
- return self.o[k]
- def __setitem__(self, k, v):
- self.settings.set(k, v, priority=self.priority)
- self.o[k] = v
- def __delitem__(self, k):
- del self.o[k]
- def __iter__(self, k, v):
- return iter(self.o)
- class Settings(BaseSettings):
- """
- This object stores Scrapy settings for the configuration of internal
- components, and can be used for any further customization.
- It is a direct subclass and supports all methods of
- :class:`~scrapy.settings.BaseSettings`. Additionally, after instantiation
- of this class, the new object will have the global default settings
- described on :ref:`topics-settings-ref` already populated.
- """
- def __init__(self, values=None, priority='project'):
- # Do not pass kwarg values here. We don't want to promote user-defined
- # dicts, and we want to update, not replace, default dicts with the
- # values given by the user
- super(Settings, self).__init__()
- self.setmodule(default_settings, 'default')
- # Promote default dictionaries to BaseSettings instances for per-key
- # priorities
- for name, val in six.iteritems(self):
- if isinstance(val, dict):
- self.set(name, BaseSettings(val, 'default'), 'default')
- self.update(values, priority)
- def iter_default_settings():
- """Return the default settings as an iterator of (name, value) tuples"""
- for name in dir(default_settings):
- if name.isupper():
- yield name, getattr(default_settings, name)
- def overridden_settings(settings):
- """Return a dict of the settings that have been overridden"""
- for name, defvalue in iter_default_settings():
- value = settings[name]
- if not isinstance(defvalue, dict) and value != defvalue:
- yield name, value
|