1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192 |
- # coding=utf-8
- import datetime
- from decimal import Decimal
- import re
- import six
- import unicodedata
- _re_pattern = re.compile(r'[^\w\s-]', flags=re.U)
- _re_pattern_allow_dots = re.compile(r'[^\.\w\s-]', flags=re.U)
- _re_spaces = re.compile(r'[-\s]+', flags=re.U)
- _PROTECTED_TYPES = six.integer_types + (
- type(None), float, Decimal,
- datetime.datetime, datetime.date, datetime.time,
- )
- def is_protected_type(obj):
- """Determine if the object instance is of a protected type.
- Objects of protected types are preserved as-is when passed to
- force_text(strings_only=True).
- """
- return isinstance(obj, _PROTECTED_TYPES)
- def force_text(s, encoding='utf-8', strings_only=False, errors='strict'):
- """
- Similar to smart_text, except that lazy instances are resolved to
- strings, rather than kept as lazy objects.
- If strings_only is True, don't convert (some) non-string-like objects.
- """
- # Handle the common case first for performance reasons.
- if issubclass(type(s), six.text_type):
- return s
- if strings_only and is_protected_type(s):
- return s
- try:
- if not issubclass(type(s), six.string_types):
- if six.PY3:
- if isinstance(s, bytes):
- s = six.text_type(s, encoding, errors)
- else:
- s = six.text_type(s)
- elif hasattr(s, '__unicode__'):
- s = six.text_type(s)
- else:
- s = six.text_type(bytes(s), encoding, errors)
- else:
- # Note: We use .decode() here, instead of six.text_type(s, encoding,
- # errors), so that if s is a SafeBytes, it ends up being a
- # SafeText at the end.
- s = s.decode(encoding, errors)
- except UnicodeDecodeError as e:
- if not isinstance(s, Exception):
- raise ValueError(s, *e.args)
- else:
- # If we get to here, the caller has passed in an Exception
- # subclass populated with non-ASCII bytestring data without a
- # working unicode method. Try to handle this without raising a
- # further exception by individually forcing the exception args
- # to unicode.
- s = ' '.join(force_text(arg, encoding, strings_only, errors)
- for arg in s)
- return s
- def slugify(value, allow_dots=False, allow_unicode=False):
- """
- Converts to lowercase, removes non-word characters (alphanumerics and
- underscores) and converts spaces to hyphens. Also strips leading and
- trailing whitespace. Modified to optionally allow dots.
- Adapted from Django 1.9
- """
- if allow_dots:
- pattern = _re_pattern_allow_dots
- else:
- pattern = _re_pattern
- value = force_text(value)
- if allow_unicode:
- value = unicodedata.normalize('NFKC', value)
- value = pattern.sub('', value).strip().lower()
- return _re_spaces.sub('-', value)
- value = unicodedata.normalize('NFKD', value).encode(
- 'ascii', 'ignore').decode('ascii')
- value = pattern.sub('', value).strip().lower()
- return _re_spaces.sub('-', value)
|