utils.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591
  1. # -*- coding: utf-8 -*-
  2. # !/usr/bin/env python
  3. import itertools
  4. import math
  5. import re
  6. import time
  7. import codecs
  8. import pickle
  9. from functools import partial
  10. import addressparser
  11. import chardet
  12. import platform
  13. import collections
  14. from collections import OrderedDict
  15. from itertools import islice, takewhile
  16. from typing import NamedTuple, Tuple, Optional
  17. from Crypto.PublicKey import RSA
  18. is_windows = any(platform.win32_ver())
  19. def xor_encrypt_buffer(buffer, offset, key):
  20. import struct
  21. lkey = len(key)
  22. step = 0
  23. num = offset
  24. for _ in buffer[offset:]:
  25. struct.pack_into('<B', buffer, num, ord(_) ^ ord(key[step % lkey]))
  26. num += 1
  27. step += 1
  28. return buffer
  29. def xor_decrypt_buffer(buffer, offset, key):
  30. return xor_encrypt_buffer(buffer, offset, key)
  31. clock = time.clock if is_windows else time.time
  32. class Timer(object):
  33. def __init__(self, func = clock):
  34. self.elapsed = 0.0
  35. self._func = func
  36. self._start = None
  37. def start(self):
  38. if self._start is not None:
  39. raise RuntimeError('Already started')
  40. self._start = self._func()
  41. def stop(self):
  42. if self._start is None:
  43. raise RuntimeError('Not started')
  44. end = self._func()
  45. self.elapsed += end - self._start
  46. self._start = None
  47. def reset(self):
  48. self.elapsed = 0.0
  49. @property
  50. def running(self):
  51. return self._start is not None
  52. def __enter__(self):
  53. self.start()
  54. return self
  55. def __exit__(self, *args):
  56. self.stop()
  57. def write_file(name, content):
  58. # type: (str, bytes)->None
  59. with open(name, 'w') as f:
  60. f.write(content)
  61. KeyPair = NamedTuple('KeyPair', [('public', str), ('private', str)])
  62. def generate_RSA_key_pairs(bits = 2048):
  63. # type: (int)->KeyPair
  64. """
  65. :param bits:
  66. :return:
  67. """
  68. key = RSA.generate(bits)
  69. return KeyPair(public = key.publickey().exportKey('PEM').decode('ascii'),
  70. private = key.exportKey('PEM').decode('ascii'))
  71. def write_RSA_key_pairs(path, pair = None):
  72. # type: (str, bool, KeyPair)->Tuple[str, str]
  73. if pair is None:
  74. pair = generate_RSA_key_pairs() # type: KeyPair
  75. def key_name(name, kind): return '{name}_{kind}.pem'.format(name = name, kind = kind)
  76. public_key_file_name = key_name(path, 'public')
  77. private_key_file_name = key_name(path, 'private')
  78. write_file(public_key_file_name, pair.public)
  79. write_file(private_key_file_name, pair.private)
  80. return public_key_file_name, private_key_file_name
  81. def head(iterable, default = None):
  82. return next(iter(iterable), default)
  83. def first_true(iterable, pred = None, default = None):
  84. """Returns the first true value in the iterable.
  85. If no true value is found, returns *default*
  86. If *pred* is not None, returns the first item
  87. for which pred(item) is true."""
  88. # first_true([a,b,c], default=x) --> a or b or c or x
  89. # first_true([a,b], fn, x) --> a if fn(a) else b if fn(b) else x
  90. return next(filter(pred, iterable), default)
  91. def nth(iterable, n, default = None):
  92. """Returns the nth item of iterable, or a default value"""
  93. return next(islice(iterable, n, None), default)
  94. def upto(iterable, max_val):
  95. """From a monotonically increasing iterable, generate all the values <= max_val."""
  96. # Why <= max_val rather than < max_val? In part because that's how Ruby's upto does it.
  97. return takewhile(lambda x: x <= max_val, iterable)
  98. def ilen(iterable):
  99. """Length of any iterable (consumes generators)."""
  100. return sum(1 for _ in iterable)
  101. count_iterable = ilen
  102. def recursive_repr(fill_value = '...'):
  103. """
  104. back-ported from Python3
  105. Decorator to make a repr function return fill_value for a recursive call"""
  106. from thread import get_ident
  107. def decorating_function(user_function):
  108. repr_running = set()
  109. def wrapper(self):
  110. key = id(self), get_ident()
  111. if key in repr_running:
  112. return fill_value
  113. repr_running.add(key)
  114. try:
  115. result = user_function(self)
  116. finally:
  117. repr_running.discard(key)
  118. return result
  119. # Can't use functools.wraps() here because of bootstrap issues
  120. wrapper.__module__ = getattr(user_function, '__module__')
  121. wrapper.__doc__ = getattr(user_function, '__doc__')
  122. wrapper.__name__ = getattr(user_function, '__name__')
  123. wrapper.__annotations__ = getattr(user_function, '__annotations__', {})
  124. return wrapper
  125. return decorating_function
  126. class ChainMap(collections.MutableMapping):
  127. """
  128. back-ported from Python3
  129. A ChainMap groups multiple dicts (or other mappings) together
  130. to create a single, updateable view.
  131. The underlying mappings are stored in a list. That list is public and can
  132. be accessed or updated using the *maps* attribute. There is no other
  133. state.
  134. Lookups search the underlying mappings successively until a key is found.
  135. In contrast, writes, updates, and deletions only operate on the first
  136. mapping.
  137. """
  138. def __init__(self, *maps):
  139. """Initialize a ChainMap by setting *maps* to the given mappings.
  140. If no mappings are provided, a single empty dictionary is used.
  141. """
  142. self.maps = list(maps) or [{}] # always at least one map
  143. def __missing__(self, key):
  144. raise KeyError(key)
  145. def __getitem__(self, key):
  146. for mapping in self.maps:
  147. try:
  148. return mapping[key] # can't use 'key in mapping' with defaultdict
  149. except KeyError:
  150. pass
  151. return self.__missing__(key) # support subclasses that define __missing__
  152. def get(self, key, default = None):
  153. return self[key] if key in self else default
  154. def __len__(self):
  155. return len(set().union(*self.maps)) # reuses stored hash values if possible
  156. def __iter__(self):
  157. d = {}
  158. for mapping in reversed(self.maps):
  159. d.update(mapping) # reuses stored hash values if possible
  160. return iter(d)
  161. def __contains__(self, key):
  162. return any(key in m for m in self.maps)
  163. def __bool__(self):
  164. return any(self.maps)
  165. @recursive_repr()
  166. def __repr__(self):
  167. return '{name}({maps})'.format(name = self.__class__.__name__, maps = ", ".join(map(repr, self.maps)))
  168. @classmethod
  169. def fromkeys(cls, iterable, *args):
  170. """Create a ChainMap with a single dict created from the iterable."""
  171. return cls(dict.fromkeys(iterable, *args))
  172. def copy(self):
  173. """New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]"""
  174. return self.__class__(self.maps[0].copy(), *self.maps[1:])
  175. __copy__ = copy
  176. def new_child(self, m = None): # like Django's Context.push()
  177. """New ChainMap with a new map followed by all previous maps.
  178. If no map is provided, an empty dict is used.
  179. """
  180. if m is None:
  181. m = {}
  182. return self.__class__(m, *self.maps)
  183. @property
  184. def parents(self): # like Django's Context.pop()
  185. """New ChainMap from maps[1:]."""
  186. return self.__class__(*self.maps[1:])
  187. def __setitem__(self, key, value):
  188. self.maps[0][key] = value
  189. def __delitem__(self, key):
  190. try:
  191. del self.maps[0][key]
  192. except KeyError:
  193. raise KeyError('Key not found in the first mapping: {!r}'.format(key))
  194. def popitem(self):
  195. """Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty."""
  196. try:
  197. return self.maps[0].popitem()
  198. except KeyError:
  199. raise KeyError('No keys found in the first mapping.')
  200. def pop(self, key, *args):
  201. """Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0]."""
  202. try:
  203. return self.maps[0].pop(key, *args)
  204. except KeyError:
  205. raise KeyError('Key not found in the first mapping: {!r}'.format(key))
  206. def clear(self):
  207. """Clear maps[0], leaving maps[1:] intact."""
  208. self.maps[0].clear()
  209. class Immutable(object):
  210. """
  211. Immutable object which adheres to the Mapping and the Sequence protocols.
  212. * Attributes are kept in `self._ordered_dict`. NEVER MUTATE THIS!
  213. * Instantiate with kwargs or args. Instantiating with args preserves order.
  214. * Mapping methods get(), items(), keys(), and values() are also included.
  215. * Sequence methods index() and count() are also included.
  216. """
  217. class ImmutableError(Exception):
  218. pass
  219. def __init__(self, *args, **kwargs):
  220. """
  221. Instantiate an Immutable instance.
  222. >>> # tuple instantiation --> Note that this method preserves order!
  223. >>> obj = Immutable((('val_0', 0), ('val_1', 1)))
  224. >>> # key=value pairs
  225. >>> obj1 = Immutable(val_0=0, val_1=1)
  226. >>> # same as above, but by upacking a dict
  227. >>> attribute_dict = {'val_0': 0, 'val_1': 1}
  228. >>> obj2 = Immutable(**attribute_dict)
  229. >>> # access via '.' or '[]'
  230. >>> obj.val_0
  231. >>> obj['val_0']
  232. :param args: (<attr>, <val>,) pairs to add *in order*
  233. :param kwargs: Allows you to unpack a dict to create this.
  234. """
  235. reserved_keys = ('get', 'keys', 'values', 'items', 'count', 'index',
  236. '_ordered_dict', '_tuple')
  237. ordered_dict = OrderedDict()
  238. for key, val in args:
  239. if key in kwargs:
  240. raise self.ImmutableError('Key in args duplicated in kwargs.')
  241. ordered_dict[key] = val
  242. ordered_dict.update(kwargs)
  243. for key, val in ordered_dict.items():
  244. try:
  245. hash(key)
  246. hash(val)
  247. except TypeError:
  248. raise self.ImmutableError('Keys and vals must be hashable.')
  249. if isinstance(key, int):
  250. raise self.ImmutableError('Keys cannot be integers.')
  251. if key in reserved_keys:
  252. raise self.ImmutableError('Keys cannot be any of these: {}.'
  253. .format(reserved_keys))
  254. self.__dict__['_ordered_dict'] = ordered_dict
  255. self.__dict__['_tuple'] = tuple(ordered_dict.values())
  256. def __contains__(self, item):
  257. raise self.ImmutableError('Containment not implemented. Try with '
  258. 'keys(), values(), or items().')
  259. def __reversed__(self):
  260. raise self.ImmutableError('Reversal not implemented. Try with '
  261. 'keys(), values(), or items().')
  262. def __getitem__(self, key):
  263. if isinstance(key, int):
  264. return self.__dict__['_tuple'][key]
  265. else:
  266. return self.__dict__['_ordered_dict'][key]
  267. def __setitem__(self, key, value):
  268. raise self.ImmutableError('Cannot set items on Immutable.')
  269. def __getattr__(self, key):
  270. if key == 'items':
  271. return self.__dict__['_ordered_dict'].items
  272. elif key == 'keys':
  273. return self.__dict__['_ordered_dict'].keys
  274. elif key == 'values':
  275. return self.__dict__['_ordered_dict'].values
  276. elif key == 'index':
  277. return self.__dict__['_tuple'].index
  278. elif key == 'count':
  279. return self.__dict__['_tuple'].count
  280. else:
  281. return self.__getitem__(key)
  282. def __setattr__(self, key, value):
  283. raise self.ImmutableError('Cannot set attributes on Immutable.')
  284. def __cmp__(self, other):
  285. raise self.ImmutableError('Only equality comparisons implemented.')
  286. def __eq__(self, other):
  287. if not isinstance(other, Immutable):
  288. return False
  289. return hash(self) == hash(other)
  290. def __ne__(self, other):
  291. if not isinstance(other, Immutable):
  292. return True
  293. return hash(self) != hash(other)
  294. def __len__(self):
  295. return len(self.__dict__['_tuple'])
  296. def __iter__(self):
  297. raise self.ImmutableError('Iteration not implemented. Try with '
  298. 'keys(), values(), or items().')
  299. def __hash__(self):
  300. return hash(tuple(self._ordered_dict.items()))
  301. def __str__(self):
  302. return bytes('{}'.format(self.__repr__()))
  303. def __unicode__(self):
  304. return '{}'.format(self.__repr__())
  305. def __repr__(self):
  306. keys_repr = ', '.join('{}={}'.format(key, repr(val))
  307. for key, val in self.items())
  308. return 'Immutable({})'.format(keys_repr)
  309. def __dir__(self):
  310. return list(self.keys())
  311. def guess_encoding(file_path):
  312. # type: (str, int)->str
  313. """Predict a file's encoding using chardet"""
  314. # Open the file as binary data
  315. with open(file_path, 'rb') as f:
  316. # Join binary lines for specified number of lines
  317. raw_data = b''.join(f.readlines())
  318. return chardet.detect(raw_data)['encoding']
  319. def convert_encoding(source_file, target_file = None, source_encoding = None, target_encoding = "utf-8"):
  320. # type: (str, Optional[str], Optional[str], Optional[str])->str
  321. """
  322. :param source_file:
  323. :param target_file:
  324. :param source_encoding:
  325. :param target_encoding:
  326. :return:
  327. """
  328. source_encoding = source_encoding if source_encoding is not None else guess_encoding(source_file)
  329. if source_encoding in ('gb2312', 'GB2312'):
  330. #: gb18030 is a superset of gb2312, it can cover more corner cases
  331. source_encoding = 'gb18030'
  332. if not target_file:
  333. filename, suffix = source_file.rsplit('.')
  334. target_file = 'converted-{filename}-{source_encoding}-{target_encoding}.{suffix}' \
  335. .format(filename = filename,
  336. source_encoding = source_encoding,
  337. target_encoding = target_encoding,
  338. suffix = suffix)
  339. BLOCK_SIZE = 1048576
  340. with codecs.open(source_file, "r", source_encoding) as source:
  341. with codecs.open(target_file, "w", target_encoding) as target:
  342. while True:
  343. contents = source.read(BLOCK_SIZE)
  344. if not contents:
  345. break
  346. target.write(contents)
  347. return target_file
  348. def rec_update_dict(d, update_dict, firstLevelOverwrite = False):
  349. """
  350. 递归地更新字典
  351. :param firstLevelOverwrite: 特指是否覆盖第一层dict
  352. :param d:
  353. :param update_dict:
  354. :return:
  355. """
  356. for k, v in update_dict.iteritems():
  357. if firstLevelOverwrite:
  358. d[k] = v
  359. else:
  360. if isinstance(v, collections.Mapping):
  361. d[k] = rec_update_dict(d.get(k, {}), v)
  362. else:
  363. d[k] = v
  364. return d
  365. flatten = itertools.chain.from_iterable
  366. def convert(text):
  367. return int(text) if text.isdigit() else text
  368. def alphanum_key(key):
  369. return [convert(c) for c in re.split('([0-9]+)', key)]
  370. def natural_sort(array, key, reverse):
  371. return sorted(array, key = lambda d: alphanum_key(d[key]), reverse = reverse)
  372. def paginated(dataList, pageIndex, pageSize):
  373. return dataList[(pageIndex - 1) * pageSize:pageIndex * pageSize]
  374. def is_number(s):
  375. try:
  376. float(s)
  377. return True
  378. except ValueError:
  379. pass
  380. try:
  381. import unicodedata
  382. unicodedata.numeric(s)
  383. return True
  384. except (TypeError, ValueError, ImportError):
  385. pass
  386. return False
  387. def ceil_floor(x): return math.ceil(x) if x < 0 else math.floor(x)
  388. def round_n_digits(x, n): return ceil_floor(x * math.pow(10, n)) / math.pow(10, n)
  389. round_2_digits = partial(round_n_digits, n = 2)
  390. def with_metaclass(meta, *bases):
  391. """
  392. Function from jinja2/_compat.py. License: BSD.
  393. Use it like this::
  394. class BaseForm(object):
  395. pass
  396. class FormType(type):
  397. pass
  398. class Form(with_metaclass(FormType, BaseForm)):
  399. pass
  400. This requires a bit of explanation: the basic idea is to make a
  401. dummy metaclass for one level of class instantiation that replaces
  402. itself with the actual metaclass. Because of internal type checks
  403. we also need to make sure that we downgrade the custom metaclass
  404. for one level to something closer to type (that's why __call__ and
  405. __init__ comes back from type etc.).
  406. This has the advantage over six.with_metaclass of not introducing
  407. dummy classes into the final MRO.
  408. """
  409. class Metaclass(meta):
  410. __call__ = type.__call__
  411. __init__ = type.__init__
  412. def __new__(cls, name, this_bases, d):
  413. if this_bases is None:
  414. return type.__new__(cls, name, (), d)
  415. return meta(name, bases, d)
  416. return Metaclass('temporary_class', None, {})
  417. def load_pickle(filepath):
  418. with open(filepath) as f:
  419. return pickle.load(f)
  420. def dump_pickle(obj, filepath):
  421. with open(filepath, 'w') as f:
  422. pickle.dump(obj, f)
  423. def split_addr(addr): # type:(unicode) -> tuple
  424. addr = [addr]
  425. df = addressparser.transform(addr)
  426. df.fillna("", inplace=True)
  427. return df.values.tolist()[0]
  428. def fix_dict(mydict, check_empty_string = []):
  429. # type: (dict, list)->dict
  430. result = {}
  431. if not mydict:
  432. return result
  433. for key, value in mydict.iteritems():
  434. if key in check_empty_string:
  435. if value is None or value == '':
  436. continue
  437. else:
  438. result[key] = value
  439. else:
  440. if value is not None:
  441. result[key] = value
  442. return result