_pmap.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. from ._compat import Mapping, Hashable
  2. from itertools import chain
  3. import six
  4. from pyrsistent._pvector import pvector
  5. from pyrsistent._transformations import transform
  6. class PMap(object):
  7. """
  8. Persistent map/dict. Tries to follow the same naming conventions as the built in dict where feasible.
  9. Do not instantiate directly, instead use the factory functions :py:func:`m` or :py:func:`pmap` to
  10. create an instance.
  11. Was originally written as a very close copy of the Clojure equivalent but was later rewritten to closer
  12. re-assemble the python dict. This means that a sparse vector (a PVector) of buckets is used. The keys are
  13. hashed and the elements inserted at position hash % len(bucket_vector). Whenever the map size exceeds 2/3 of
  14. the containing vectors size the map is reallocated to a vector of double the size. This is done to avoid
  15. excessive hash collisions.
  16. This structure corresponds most closely to the built in dict type and is intended as a replacement. Where the
  17. semantics are the same (more or less) the same function names have been used but for some cases it is not possible,
  18. for example assignments and deletion of values.
  19. PMap implements the Mapping protocol and is Hashable. It also supports dot-notation for
  20. element access.
  21. Random access and insert is log32(n) where n is the size of the map.
  22. The following are examples of some common operations on persistent maps
  23. >>> m1 = m(a=1, b=3)
  24. >>> m2 = m1.set('c', 3)
  25. >>> m3 = m2.remove('a')
  26. >>> m1
  27. pmap({'b': 3, 'a': 1})
  28. >>> m2
  29. pmap({'c': 3, 'b': 3, 'a': 1})
  30. >>> m3
  31. pmap({'c': 3, 'b': 3})
  32. >>> m3['c']
  33. 3
  34. >>> m3.c
  35. 3
  36. """
  37. __slots__ = ('_size', '_buckets', '__weakref__', '_cached_hash')
  38. def __new__(cls, size, buckets):
  39. self = super(PMap, cls).__new__(cls)
  40. self._size = size
  41. self._buckets = buckets
  42. return self
  43. @staticmethod
  44. def _get_bucket(buckets, key):
  45. index = hash(key) % len(buckets)
  46. bucket = buckets[index]
  47. return index, bucket
  48. @staticmethod
  49. def _getitem(buckets, key):
  50. _, bucket = PMap._get_bucket(buckets, key)
  51. if bucket:
  52. for k, v in bucket:
  53. if k == key:
  54. return v
  55. raise KeyError(key)
  56. def __getitem__(self, key):
  57. return PMap._getitem(self._buckets, key)
  58. @staticmethod
  59. def _contains(buckets, key):
  60. _, bucket = PMap._get_bucket(buckets, key)
  61. if bucket:
  62. for k, _ in bucket:
  63. if k == key:
  64. return True
  65. return False
  66. return False
  67. def __contains__(self, key):
  68. return self._contains(self._buckets, key)
  69. get = Mapping.get
  70. def __iter__(self):
  71. return self.iterkeys()
  72. def __getattr__(self, key):
  73. try:
  74. return self[key]
  75. except KeyError:
  76. raise AttributeError(
  77. "{0} has no attribute '{1}'".format(type(self).__name__, key)
  78. )
  79. def iterkeys(self):
  80. for k, _ in self.iteritems():
  81. yield k
  82. # These are more efficient implementations compared to the original
  83. # methods that are based on the keys iterator and then calls the
  84. # accessor functions to access the value for the corresponding key
  85. def itervalues(self):
  86. for _, v in self.iteritems():
  87. yield v
  88. def iteritems(self):
  89. for bucket in self._buckets:
  90. if bucket:
  91. for k, v in bucket:
  92. yield k, v
  93. def values(self):
  94. return pvector(self.itervalues())
  95. def keys(self):
  96. return pvector(self.iterkeys())
  97. def items(self):
  98. return pvector(self.iteritems())
  99. def __len__(self):
  100. return self._size
  101. def __repr__(self):
  102. return 'pmap({0})'.format(str(dict(self)))
  103. def __eq__(self, other):
  104. if self is other:
  105. return True
  106. if not isinstance(other, Mapping):
  107. return NotImplemented
  108. if len(self) != len(other):
  109. return False
  110. if isinstance(other, PMap):
  111. if (hasattr(self, '_cached_hash') and hasattr(other, '_cached_hash')
  112. and self._cached_hash != other._cached_hash):
  113. return False
  114. if self._buckets == other._buckets:
  115. return True
  116. return dict(self.iteritems()) == dict(other.iteritems())
  117. elif isinstance(other, dict):
  118. return dict(self.iteritems()) == other
  119. return dict(self.iteritems()) == dict(six.iteritems(other))
  120. __ne__ = Mapping.__ne__
  121. def __lt__(self, other):
  122. raise TypeError('PMaps are not orderable')
  123. __le__ = __lt__
  124. __gt__ = __lt__
  125. __ge__ = __lt__
  126. def __str__(self):
  127. return self.__repr__()
  128. def __hash__(self):
  129. if not hasattr(self, '_cached_hash'):
  130. self._cached_hash = hash(frozenset(self.iteritems()))
  131. return self._cached_hash
  132. def set(self, key, val):
  133. """
  134. Return a new PMap with key and val inserted.
  135. >>> m1 = m(a=1, b=2)
  136. >>> m2 = m1.set('a', 3)
  137. >>> m3 = m1.set('c' ,4)
  138. >>> m1
  139. pmap({'b': 2, 'a': 1})
  140. >>> m2
  141. pmap({'b': 2, 'a': 3})
  142. >>> m3
  143. pmap({'c': 4, 'b': 2, 'a': 1})
  144. """
  145. return self.evolver().set(key, val).persistent()
  146. def remove(self, key):
  147. """
  148. Return a new PMap without the element specified by key. Raises KeyError if the element
  149. is not present.
  150. >>> m1 = m(a=1, b=2)
  151. >>> m1.remove('a')
  152. pmap({'b': 2})
  153. """
  154. return self.evolver().remove(key).persistent()
  155. def discard(self, key):
  156. """
  157. Return a new PMap without the element specified by key. Returns reference to itself
  158. if element is not present.
  159. >>> m1 = m(a=1, b=2)
  160. >>> m1.discard('a')
  161. pmap({'b': 2})
  162. >>> m1 is m1.discard('c')
  163. True
  164. """
  165. try:
  166. return self.remove(key)
  167. except KeyError:
  168. return self
  169. def update(self, *maps):
  170. """
  171. Return a new PMap with the items in Mappings inserted. If the same key is present in multiple
  172. maps the rightmost (last) value is inserted.
  173. >>> m1 = m(a=1, b=2)
  174. >>> m1.update(m(a=2, c=3), {'a': 17, 'd': 35})
  175. pmap({'c': 3, 'b': 2, 'a': 17, 'd': 35})
  176. """
  177. return self.update_with(lambda l, r: r, *maps)
  178. def update_with(self, update_fn, *maps):
  179. """
  180. Return a new PMap with the items in Mappings maps inserted. If the same key is present in multiple
  181. maps the values will be merged using merge_fn going from left to right.
  182. >>> from operator import add
  183. >>> m1 = m(a=1, b=2)
  184. >>> m1.update_with(add, m(a=2))
  185. pmap({'b': 2, 'a': 3})
  186. The reverse behaviour of the regular merge. Keep the leftmost element instead of the rightmost.
  187. >>> m1 = m(a=1)
  188. >>> m1.update_with(lambda l, r: l, m(a=2), {'a':3})
  189. pmap({'a': 1})
  190. """
  191. evolver = self.evolver()
  192. for map in maps:
  193. for key, value in map.items():
  194. evolver.set(key, update_fn(evolver[key], value) if key in evolver else value)
  195. return evolver.persistent()
  196. def __add__(self, other):
  197. return self.update(other)
  198. def __reduce__(self):
  199. # Pickling support
  200. return pmap, (dict(self),)
  201. def transform(self, *transformations):
  202. """
  203. Transform arbitrarily complex combinations of PVectors and PMaps. A transformation
  204. consists of two parts. One match expression that specifies which elements to transform
  205. and one transformation function that performs the actual transformation.
  206. >>> from pyrsistent import freeze, ny
  207. >>> news_paper = freeze({'articles': [{'author': 'Sara', 'content': 'A short article'},
  208. ... {'author': 'Steve', 'content': 'A slightly longer article'}],
  209. ... 'weather': {'temperature': '11C', 'wind': '5m/s'}})
  210. >>> short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:25] + '...' if len(c) > 25 else c)
  211. >>> very_short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:15] + '...' if len(c) > 15 else c)
  212. >>> very_short_news.articles[0].content
  213. 'A short article'
  214. >>> very_short_news.articles[1].content
  215. 'A slightly long...'
  216. When nothing has been transformed the original data structure is kept
  217. >>> short_news is news_paper
  218. True
  219. >>> very_short_news is news_paper
  220. False
  221. >>> very_short_news.articles[0] is news_paper.articles[0]
  222. True
  223. """
  224. return transform(self, transformations)
  225. def copy(self):
  226. return self
  227. class _Evolver(object):
  228. __slots__ = ('_buckets_evolver', '_size', '_original_pmap')
  229. def __init__(self, original_pmap):
  230. self._original_pmap = original_pmap
  231. self._buckets_evolver = original_pmap._buckets.evolver()
  232. self._size = original_pmap._size
  233. def __getitem__(self, key):
  234. return PMap._getitem(self._buckets_evolver, key)
  235. def __setitem__(self, key, val):
  236. self.set(key, val)
  237. def set(self, key, val):
  238. if len(self._buckets_evolver) < 0.67 * self._size:
  239. self._reallocate(2 * len(self._buckets_evolver))
  240. kv = (key, val)
  241. index, bucket = PMap._get_bucket(self._buckets_evolver, key)
  242. if bucket:
  243. for k, v in bucket:
  244. if k == key:
  245. if v is not val:
  246. new_bucket = [(k2, v2) if k2 != k else (k2, val) for k2, v2 in bucket]
  247. self._buckets_evolver[index] = new_bucket
  248. return self
  249. new_bucket = [kv]
  250. new_bucket.extend(bucket)
  251. self._buckets_evolver[index] = new_bucket
  252. self._size += 1
  253. else:
  254. self._buckets_evolver[index] = [kv]
  255. self._size += 1
  256. return self
  257. def _reallocate(self, new_size):
  258. new_list = new_size * [None]
  259. buckets = self._buckets_evolver.persistent()
  260. for k, v in chain.from_iterable(x for x in buckets if x):
  261. index = hash(k) % new_size
  262. if new_list[index]:
  263. new_list[index].append((k, v))
  264. else:
  265. new_list[index] = [(k, v)]
  266. # A reallocation should always result in a dirty buckets evolver to avoid
  267. # possible loss of elements when doing the reallocation.
  268. self._buckets_evolver = pvector().evolver()
  269. self._buckets_evolver.extend(new_list)
  270. def is_dirty(self):
  271. return self._buckets_evolver.is_dirty()
  272. def persistent(self):
  273. if self.is_dirty():
  274. self._original_pmap = PMap(self._size, self._buckets_evolver.persistent())
  275. return self._original_pmap
  276. def __len__(self):
  277. return self._size
  278. def __contains__(self, key):
  279. return PMap._contains(self._buckets_evolver, key)
  280. def __delitem__(self, key):
  281. self.remove(key)
  282. def remove(self, key):
  283. index, bucket = PMap._get_bucket(self._buckets_evolver, key)
  284. if bucket:
  285. new_bucket = [(k, v) for (k, v) in bucket if k != key]
  286. if len(bucket) > len(new_bucket):
  287. self._buckets_evolver[index] = new_bucket if new_bucket else None
  288. self._size -= 1
  289. return self
  290. raise KeyError('{0}'.format(key))
  291. def evolver(self):
  292. """
  293. Create a new evolver for this pmap. For a discussion on evolvers in general see the
  294. documentation for the pvector evolver.
  295. Create the evolver and perform various mutating updates to it:
  296. >>> m1 = m(a=1, b=2)
  297. >>> e = m1.evolver()
  298. >>> e['c'] = 3
  299. >>> len(e)
  300. 3
  301. >>> del e['a']
  302. The underlying pmap remains the same:
  303. >>> m1
  304. pmap({'b': 2, 'a': 1})
  305. The changes are kept in the evolver. An updated pmap can be created using the
  306. persistent() function on the evolver.
  307. >>> m2 = e.persistent()
  308. >>> m2
  309. pmap({'c': 3, 'b': 2})
  310. The new pmap will share data with the original pmap in the same way that would have
  311. been done if only using operations on the pmap.
  312. """
  313. return self._Evolver(self)
  314. Mapping.register(PMap)
  315. Hashable.register(PMap)
  316. def _turbo_mapping(initial, pre_size):
  317. if pre_size:
  318. size = pre_size
  319. else:
  320. try:
  321. size = 2 * len(initial) or 8
  322. except Exception:
  323. # Guess we can't figure out the length. Give up on length hinting,
  324. # we can always reallocate later.
  325. size = 8
  326. buckets = size * [None]
  327. if not isinstance(initial, Mapping):
  328. # Make a dictionary of the initial data if it isn't already,
  329. # that will save us some job further down since we can assume no
  330. # key collisions
  331. initial = dict(initial)
  332. for k, v in six.iteritems(initial):
  333. h = hash(k)
  334. index = h % size
  335. bucket = buckets[index]
  336. if bucket:
  337. bucket.append((k, v))
  338. else:
  339. buckets[index] = [(k, v)]
  340. return PMap(len(initial), pvector().extend(buckets))
  341. _EMPTY_PMAP = _turbo_mapping({}, 0)
  342. def pmap(initial={}, pre_size=0):
  343. """
  344. Create new persistent map, inserts all elements in initial into the newly created map.
  345. The optional argument pre_size may be used to specify an initial size of the underlying bucket vector. This
  346. may have a positive performance impact in the cases where you know beforehand that a large number of elements
  347. will be inserted into the map eventually since it will reduce the number of reallocations required.
  348. >>> pmap({'a': 13, 'b': 14})
  349. pmap({'b': 14, 'a': 13})
  350. """
  351. if not initial:
  352. return _EMPTY_PMAP
  353. return _turbo_mapping(initial, pre_size)
  354. def m(**kwargs):
  355. """
  356. Creates a new persitent map. Inserts all key value arguments into the newly created map.
  357. >>> m(a=13, b=14)
  358. pmap({'b': 14, 'a': 13})
  359. """
  360. return pmap(kwargs)