# coding=utf-8 import datetime from decimal import Decimal import re import six import unicodedata _re_pattern = re.compile(r'[^\w\s-]', flags=re.U) _re_pattern_allow_dots = re.compile(r'[^\.\w\s-]', flags=re.U) _re_spaces = re.compile(r'[-\s]+', flags=re.U) _PROTECTED_TYPES = six.integer_types + ( type(None), float, Decimal, datetime.datetime, datetime.date, datetime.time, ) def is_protected_type(obj): """Determine if the object instance is of a protected type. Objects of protected types are preserved as-is when passed to force_text(strings_only=True). """ return isinstance(obj, _PROTECTED_TYPES) def force_text(s, encoding='utf-8', strings_only=False, errors='strict'): """ Similar to smart_text, except that lazy instances are resolved to strings, rather than kept as lazy objects. If strings_only is True, don't convert (some) non-string-like objects. """ # Handle the common case first for performance reasons. if issubclass(type(s), six.text_type): return s if strings_only and is_protected_type(s): return s try: if not issubclass(type(s), six.string_types): if six.PY3: if isinstance(s, bytes): s = six.text_type(s, encoding, errors) else: s = six.text_type(s) elif hasattr(s, '__unicode__'): s = six.text_type(s) else: s = six.text_type(bytes(s), encoding, errors) else: # Note: We use .decode() here, instead of six.text_type(s, encoding, # errors), so that if s is a SafeBytes, it ends up being a # SafeText at the end. s = s.decode(encoding, errors) except UnicodeDecodeError as e: if not isinstance(s, Exception): raise ValueError(s, *e.args) else: # If we get to here, the caller has passed in an Exception # subclass populated with non-ASCII bytestring data without a # working unicode method. Try to handle this without raising a # further exception by individually forcing the exception args # to unicode. s = ' '.join(force_text(arg, encoding, strings_only, errors) for arg in s) return s def slugify(value, allow_dots=False, allow_unicode=False): """ Converts to lowercase, removes non-word characters (alphanumerics and underscores) and converts spaces to hyphens. Also strips leading and trailing whitespace. Modified to optionally allow dots. Adapted from Django 1.9 """ if allow_dots: pattern = _re_pattern_allow_dots else: pattern = _re_pattern value = force_text(value) if allow_unicode: value = unicodedata.normalize('NFKC', value) value = pattern.sub('', value).strip().lower() return _re_spaces.sub('-', value) value = unicodedata.normalize('NFKD', value).encode( 'ascii', 'ignore').decode('ascii') value = pattern.sub('', value).strip().lower() return _re_spaces.sub('-', value)