util.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. # -*- coding: utf-8 -*-
  2. """
  3. babel.util
  4. ~~~~~~~~~~
  5. Various utility classes and functions.
  6. :copyright: (c) 2013 by the Babel Team.
  7. :license: BSD, see LICENSE for more details.
  8. """
  9. import codecs
  10. from datetime import timedelta, tzinfo
  11. import os
  12. import re
  13. import textwrap
  14. from babel._compat import izip, imap
  15. import pytz as _pytz
  16. from babel import localtime
  17. missing = object()
  18. def distinct(iterable):
  19. """Yield all items in an iterable collection that are distinct.
  20. Unlike when using sets for a similar effect, the original ordering of the
  21. items in the collection is preserved by this function.
  22. >>> print(list(distinct([1, 2, 1, 3, 4, 4])))
  23. [1, 2, 3, 4]
  24. >>> print(list(distinct('foobar')))
  25. ['f', 'o', 'b', 'a', 'r']
  26. :param iterable: the iterable collection providing the data
  27. """
  28. seen = set()
  29. for item in iter(iterable):
  30. if item not in seen:
  31. yield item
  32. seen.add(item)
  33. # Regexp to match python magic encoding line
  34. PYTHON_MAGIC_COMMENT_re = re.compile(
  35. br'[ \t\f]* \# .* coding[=:][ \t]*([-\w.]+)', re.VERBOSE)
  36. def parse_encoding(fp):
  37. """Deduce the encoding of a source file from magic comment.
  38. It does this in the same way as the `Python interpreter`__
  39. .. __: https://docs.python.org/3.4/reference/lexical_analysis.html#encoding-declarations
  40. The ``fp`` argument should be a seekable file object.
  41. (From Jeff Dairiki)
  42. """
  43. pos = fp.tell()
  44. fp.seek(0)
  45. try:
  46. line1 = fp.readline()
  47. has_bom = line1.startswith(codecs.BOM_UTF8)
  48. if has_bom:
  49. line1 = line1[len(codecs.BOM_UTF8):]
  50. m = PYTHON_MAGIC_COMMENT_re.match(line1)
  51. if not m:
  52. try:
  53. import parser
  54. parser.suite(line1.decode('latin-1'))
  55. except (ImportError, SyntaxError, UnicodeEncodeError):
  56. # Either it's a real syntax error, in which case the source is
  57. # not valid python source, or line2 is a continuation of line1,
  58. # in which case we don't want to scan line2 for a magic
  59. # comment.
  60. pass
  61. else:
  62. line2 = fp.readline()
  63. m = PYTHON_MAGIC_COMMENT_re.match(line2)
  64. if has_bom:
  65. if m:
  66. magic_comment_encoding = m.group(1).decode('latin-1')
  67. if magic_comment_encoding != 'utf-8':
  68. raise SyntaxError(
  69. 'encoding problem: {0} with BOM'.format(
  70. magic_comment_encoding))
  71. return 'utf-8'
  72. elif m:
  73. return m.group(1).decode('latin-1')
  74. else:
  75. return None
  76. finally:
  77. fp.seek(pos)
  78. PYTHON_FUTURE_IMPORT_re = re.compile(
  79. r'from\s+__future__\s+import\s+\(*(.+)\)*')
  80. def parse_future_flags(fp, encoding='latin-1'):
  81. """Parse the compiler flags by :mod:`__future__` from the given Python
  82. code.
  83. """
  84. import __future__
  85. pos = fp.tell()
  86. fp.seek(0)
  87. flags = 0
  88. try:
  89. body = fp.read().decode(encoding)
  90. for m in PYTHON_FUTURE_IMPORT_re.finditer(body):
  91. names = [x.strip() for x in m.group(1).split(',')]
  92. for name in names:
  93. flags |= getattr(__future__, name).compiler_flag
  94. finally:
  95. fp.seek(pos)
  96. return flags
  97. def pathmatch(pattern, filename):
  98. """Extended pathname pattern matching.
  99. This function is similar to what is provided by the ``fnmatch`` module in
  100. the Python standard library, but:
  101. * can match complete (relative or absolute) path names, and not just file
  102. names, and
  103. * also supports a convenience pattern ("**") to match files at any
  104. directory level.
  105. Examples:
  106. >>> pathmatch('**.py', 'bar.py')
  107. True
  108. >>> pathmatch('**.py', 'foo/bar/baz.py')
  109. True
  110. >>> pathmatch('**.py', 'templates/index.html')
  111. False
  112. >>> pathmatch('**/templates/*.html', 'templates/index.html')
  113. True
  114. >>> pathmatch('**/templates/*.html', 'templates/foo/bar.html')
  115. False
  116. :param pattern: the glob pattern
  117. :param filename: the path name of the file to match against
  118. """
  119. symbols = {
  120. '?': '[^/]',
  121. '?/': '[^/]/',
  122. '*': '[^/]+',
  123. '*/': '[^/]+/',
  124. '**/': '(?:.+/)*?',
  125. '**': '(?:.+/)*?[^/]+',
  126. }
  127. buf = []
  128. for idx, part in enumerate(re.split('([?*]+/?)', pattern)):
  129. if idx % 2:
  130. buf.append(symbols[part])
  131. elif part:
  132. buf.append(re.escape(part))
  133. match = re.match(''.join(buf) + '$', filename.replace(os.sep, '/'))
  134. return match is not None
  135. class TextWrapper(textwrap.TextWrapper):
  136. wordsep_re = re.compile(
  137. r'(\s+|' # any whitespace
  138. r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))' # em-dash
  139. )
  140. def wraptext(text, width=70, initial_indent='', subsequent_indent=''):
  141. """Simple wrapper around the ``textwrap.wrap`` function in the standard
  142. library. This version does not wrap lines on hyphens in words.
  143. :param text: the text to wrap
  144. :param width: the maximum line width
  145. :param initial_indent: string that will be prepended to the first line of
  146. wrapped output
  147. :param subsequent_indent: string that will be prepended to all lines save
  148. the first of wrapped output
  149. """
  150. wrapper = TextWrapper(width=width, initial_indent=initial_indent,
  151. subsequent_indent=subsequent_indent,
  152. break_long_words=False)
  153. return wrapper.wrap(text)
  154. class odict(dict):
  155. """Ordered dict implementation.
  156. :see: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/107747
  157. """
  158. def __init__(self, data=None):
  159. dict.__init__(self, data or {})
  160. self._keys = list(dict.keys(self))
  161. def __delitem__(self, key):
  162. dict.__delitem__(self, key)
  163. self._keys.remove(key)
  164. def __setitem__(self, key, item):
  165. new_key = key not in self
  166. dict.__setitem__(self, key, item)
  167. if new_key:
  168. self._keys.append(key)
  169. def __iter__(self):
  170. return iter(self._keys)
  171. iterkeys = __iter__
  172. def clear(self):
  173. dict.clear(self)
  174. self._keys = []
  175. def copy(self):
  176. d = odict()
  177. d.update(self)
  178. return d
  179. def items(self):
  180. return zip(self._keys, self.values())
  181. def iteritems(self):
  182. return izip(self._keys, self.itervalues())
  183. def keys(self):
  184. return self._keys[:]
  185. def pop(self, key, default=missing):
  186. try:
  187. value = dict.pop(self, key)
  188. self._keys.remove(key)
  189. return value
  190. except KeyError as e:
  191. if default == missing:
  192. raise e
  193. else:
  194. return default
  195. def popitem(self, key):
  196. self._keys.remove(key)
  197. return dict.popitem(key)
  198. def setdefault(self, key, failobj=None):
  199. dict.setdefault(self, key, failobj)
  200. if key not in self._keys:
  201. self._keys.append(key)
  202. def update(self, dict):
  203. for (key, val) in dict.items():
  204. self[key] = val
  205. def values(self):
  206. return map(self.get, self._keys)
  207. def itervalues(self):
  208. return imap(self.get, self._keys)
  209. class FixedOffsetTimezone(tzinfo):
  210. """Fixed offset in minutes east from UTC."""
  211. def __init__(self, offset, name=None):
  212. self._offset = timedelta(minutes=offset)
  213. if name is None:
  214. name = 'Etc/GMT%+d' % offset
  215. self.zone = name
  216. def __str__(self):
  217. return self.zone
  218. def __repr__(self):
  219. return '<FixedOffset "%s" %s>' % (self.zone, self._offset)
  220. def utcoffset(self, dt):
  221. return self._offset
  222. def tzname(self, dt):
  223. return self.zone
  224. def dst(self, dt):
  225. return ZERO
  226. # Export the localtime functionality here because that's
  227. # where it was in the past.
  228. UTC = _pytz.utc
  229. LOCALTZ = localtime.LOCALTZ
  230. get_localzone = localtime.get_localzone
  231. STDOFFSET = localtime.STDOFFSET
  232. DSTOFFSET = localtime.DSTOFFSET
  233. DSTDIFF = localtime.DSTDIFF
  234. ZERO = localtime.ZERO