_pvector.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713
  1. from abc import abstractmethod, ABCMeta
  2. from ._compat import Sequence, Hashable
  3. from numbers import Integral
  4. import operator
  5. import six
  6. from pyrsistent._transformations import transform
  7. def _bitcount(val):
  8. return bin(val).count("1")
  9. BRANCH_FACTOR = 32
  10. BIT_MASK = BRANCH_FACTOR - 1
  11. SHIFT = _bitcount(BIT_MASK)
  12. def compare_pvector(v, other, operator):
  13. return operator(v.tolist(), other.tolist() if isinstance(other, PVector) else other)
  14. def _index_or_slice(index, stop):
  15. if stop is None:
  16. return index
  17. return slice(index, stop)
  18. class PythonPVector(object):
  19. """
  20. Support structure for PVector that implements structural sharing for vectors using a trie.
  21. """
  22. __slots__ = ('_count', '_shift', '_root', '_tail', '_tail_offset', '__weakref__')
  23. def __new__(cls, count, shift, root, tail):
  24. self = super(PythonPVector, cls).__new__(cls)
  25. self._count = count
  26. self._shift = shift
  27. self._root = root
  28. self._tail = tail
  29. # Derived attribute stored for performance
  30. self._tail_offset = self._count - len(self._tail)
  31. return self
  32. def __len__(self):
  33. return self._count
  34. def __getitem__(self, index):
  35. if isinstance(index, slice):
  36. # There are more conditions than the below where it would be OK to
  37. # return ourselves, implement those...
  38. if index.start is None and index.stop is None and index.step is None:
  39. return self
  40. # This is a bit nasty realizing the whole structure as a list before
  41. # slicing it but it is the fastest way I've found to date, and it's easy :-)
  42. return _EMPTY_PVECTOR.extend(self.tolist()[index])
  43. if index < 0:
  44. index += self._count
  45. return PythonPVector._node_for(self, index)[index & BIT_MASK]
  46. def __add__(self, other):
  47. return self.extend(other)
  48. def __repr__(self):
  49. return 'pvector({0})'.format(str(self.tolist()))
  50. def __str__(self):
  51. return self.__repr__()
  52. def __iter__(self):
  53. # This is kind of lazy and will produce some memory overhead but it is the fasted method
  54. # by far of those tried since it uses the speed of the built in python list directly.
  55. return iter(self.tolist())
  56. def __ne__(self, other):
  57. return not self.__eq__(other)
  58. def __eq__(self, other):
  59. return self is other or (hasattr(other, '__len__') and self._count == len(other)) and compare_pvector(self, other, operator.eq)
  60. def __gt__(self, other):
  61. return compare_pvector(self, other, operator.gt)
  62. def __lt__(self, other):
  63. return compare_pvector(self, other, operator.lt)
  64. def __ge__(self, other):
  65. return compare_pvector(self, other, operator.ge)
  66. def __le__(self, other):
  67. return compare_pvector(self, other, operator.le)
  68. def __mul__(self, times):
  69. if times <= 0 or self is _EMPTY_PVECTOR:
  70. return _EMPTY_PVECTOR
  71. if times == 1:
  72. return self
  73. return _EMPTY_PVECTOR.extend(times * self.tolist())
  74. __rmul__ = __mul__
  75. def _fill_list(self, node, shift, the_list):
  76. if shift:
  77. shift -= SHIFT
  78. for n in node:
  79. self._fill_list(n, shift, the_list)
  80. else:
  81. the_list.extend(node)
  82. def tolist(self):
  83. """
  84. The fastest way to convert the vector into a python list.
  85. """
  86. the_list = []
  87. self._fill_list(self._root, self._shift, the_list)
  88. the_list.extend(self._tail)
  89. return the_list
  90. def _totuple(self):
  91. """
  92. Returns the content as a python tuple.
  93. """
  94. return tuple(self.tolist())
  95. def __hash__(self):
  96. # Taking the easy way out again...
  97. return hash(self._totuple())
  98. def transform(self, *transformations):
  99. return transform(self, transformations)
  100. def __reduce__(self):
  101. # Pickling support
  102. return pvector, (self.tolist(),)
  103. def mset(self, *args):
  104. if len(args) % 2:
  105. raise TypeError("mset expected an even number of arguments")
  106. evolver = self.evolver()
  107. for i in range(0, len(args), 2):
  108. evolver[args[i]] = args[i+1]
  109. return evolver.persistent()
  110. class Evolver(object):
  111. __slots__ = ('_count', '_shift', '_root', '_tail', '_tail_offset', '_dirty_nodes',
  112. '_extra_tail', '_cached_leafs', '_orig_pvector')
  113. def __init__(self, v):
  114. self._reset(v)
  115. def __getitem__(self, index):
  116. if not isinstance(index, Integral):
  117. raise TypeError("'%s' object cannot be interpreted as an index" % type(index).__name__)
  118. if index < 0:
  119. index += self._count + len(self._extra_tail)
  120. if self._count <= index < self._count + len(self._extra_tail):
  121. return self._extra_tail[index - self._count]
  122. return PythonPVector._node_for(self, index)[index & BIT_MASK]
  123. def _reset(self, v):
  124. self._count = v._count
  125. self._shift = v._shift
  126. self._root = v._root
  127. self._tail = v._tail
  128. self._tail_offset = v._tail_offset
  129. self._dirty_nodes = {}
  130. self._cached_leafs = {}
  131. self._extra_tail = []
  132. self._orig_pvector = v
  133. def append(self, element):
  134. self._extra_tail.append(element)
  135. return self
  136. def extend(self, iterable):
  137. self._extra_tail.extend(iterable)
  138. return self
  139. def set(self, index, val):
  140. self[index] = val
  141. return self
  142. def __setitem__(self, index, val):
  143. if not isinstance(index, Integral):
  144. raise TypeError("'%s' object cannot be interpreted as an index" % type(index).__name__)
  145. if index < 0:
  146. index += self._count + len(self._extra_tail)
  147. if 0 <= index < self._count:
  148. node = self._cached_leafs.get(index >> SHIFT)
  149. if node:
  150. node[index & BIT_MASK] = val
  151. elif index >= self._tail_offset:
  152. if id(self._tail) not in self._dirty_nodes:
  153. self._tail = list(self._tail)
  154. self._dirty_nodes[id(self._tail)] = True
  155. self._cached_leafs[index >> SHIFT] = self._tail
  156. self._tail[index & BIT_MASK] = val
  157. else:
  158. self._root = self._do_set(self._shift, self._root, index, val)
  159. elif self._count <= index < self._count + len(self._extra_tail):
  160. self._extra_tail[index - self._count] = val
  161. elif index == self._count + len(self._extra_tail):
  162. self._extra_tail.append(val)
  163. else:
  164. raise IndexError("Index out of range: %s" % (index,))
  165. def _do_set(self, level, node, i, val):
  166. if id(node) in self._dirty_nodes:
  167. ret = node
  168. else:
  169. ret = list(node)
  170. self._dirty_nodes[id(ret)] = True
  171. if level == 0:
  172. ret[i & BIT_MASK] = val
  173. self._cached_leafs[i >> SHIFT] = ret
  174. else:
  175. sub_index = (i >> level) & BIT_MASK # >>>
  176. ret[sub_index] = self._do_set(level - SHIFT, node[sub_index], i, val)
  177. return ret
  178. def delete(self, index):
  179. del self[index]
  180. return self
  181. def __delitem__(self, key):
  182. if self._orig_pvector:
  183. # All structural sharing bets are off, base evolver on _extra_tail only
  184. l = PythonPVector(self._count, self._shift, self._root, self._tail).tolist()
  185. l.extend(self._extra_tail)
  186. self._reset(_EMPTY_PVECTOR)
  187. self._extra_tail = l
  188. del self._extra_tail[key]
  189. def persistent(self):
  190. result = self._orig_pvector
  191. if self.is_dirty():
  192. result = PythonPVector(self._count, self._shift, self._root, self._tail).extend(self._extra_tail)
  193. self._reset(result)
  194. return result
  195. def __len__(self):
  196. return self._count + len(self._extra_tail)
  197. def is_dirty(self):
  198. return bool(self._dirty_nodes or self._extra_tail)
  199. def evolver(self):
  200. return PythonPVector.Evolver(self)
  201. def set(self, i, val):
  202. # This method could be implemented by a call to mset() but doing so would cause
  203. # a ~5 X performance penalty on PyPy (considered the primary platform for this implementation
  204. # of PVector) so we're keeping this implementation for now.
  205. if not isinstance(i, Integral):
  206. raise TypeError("'%s' object cannot be interpreted as an index" % type(i).__name__)
  207. if i < 0:
  208. i += self._count
  209. if 0 <= i < self._count:
  210. if i >= self._tail_offset:
  211. new_tail = list(self._tail)
  212. new_tail[i & BIT_MASK] = val
  213. return PythonPVector(self._count, self._shift, self._root, new_tail)
  214. return PythonPVector(self._count, self._shift, self._do_set(self._shift, self._root, i, val), self._tail)
  215. if i == self._count:
  216. return self.append(val)
  217. raise IndexError("Index out of range: %s" % (i,))
  218. def _do_set(self, level, node, i, val):
  219. ret = list(node)
  220. if level == 0:
  221. ret[i & BIT_MASK] = val
  222. else:
  223. sub_index = (i >> level) & BIT_MASK # >>>
  224. ret[sub_index] = self._do_set(level - SHIFT, node[sub_index], i, val)
  225. return ret
  226. @staticmethod
  227. def _node_for(pvector_like, i):
  228. if 0 <= i < pvector_like._count:
  229. if i >= pvector_like._tail_offset:
  230. return pvector_like._tail
  231. node = pvector_like._root
  232. for level in range(pvector_like._shift, 0, -SHIFT):
  233. node = node[(i >> level) & BIT_MASK] # >>>
  234. return node
  235. raise IndexError("Index out of range: %s" % (i,))
  236. def _create_new_root(self):
  237. new_shift = self._shift
  238. # Overflow root?
  239. if (self._count >> SHIFT) > (1 << self._shift): # >>>
  240. new_root = [self._root, self._new_path(self._shift, self._tail)]
  241. new_shift += SHIFT
  242. else:
  243. new_root = self._push_tail(self._shift, self._root, self._tail)
  244. return new_root, new_shift
  245. def append(self, val):
  246. if len(self._tail) < BRANCH_FACTOR:
  247. new_tail = list(self._tail)
  248. new_tail.append(val)
  249. return PythonPVector(self._count + 1, self._shift, self._root, new_tail)
  250. # Full tail, push into tree
  251. new_root, new_shift = self._create_new_root()
  252. return PythonPVector(self._count + 1, new_shift, new_root, [val])
  253. def _new_path(self, level, node):
  254. if level == 0:
  255. return node
  256. return [self._new_path(level - SHIFT, node)]
  257. def _mutating_insert_tail(self):
  258. self._root, self._shift = self._create_new_root()
  259. self._tail = []
  260. def _mutating_fill_tail(self, offset, sequence):
  261. max_delta_len = BRANCH_FACTOR - len(self._tail)
  262. delta = sequence[offset:offset + max_delta_len]
  263. self._tail.extend(delta)
  264. delta_len = len(delta)
  265. self._count += delta_len
  266. return offset + delta_len
  267. def _mutating_extend(self, sequence):
  268. offset = 0
  269. sequence_len = len(sequence)
  270. while offset < sequence_len:
  271. offset = self._mutating_fill_tail(offset, sequence)
  272. if len(self._tail) == BRANCH_FACTOR:
  273. self._mutating_insert_tail()
  274. self._tail_offset = self._count - len(self._tail)
  275. def extend(self, obj):
  276. # Mutates the new vector directly for efficiency but that's only an
  277. # implementation detail, once it is returned it should be considered immutable
  278. l = obj.tolist() if isinstance(obj, PythonPVector) else list(obj)
  279. if l:
  280. new_vector = self.append(l[0])
  281. new_vector._mutating_extend(l[1:])
  282. return new_vector
  283. return self
  284. def _push_tail(self, level, parent, tail_node):
  285. """
  286. if parent is leaf, insert node,
  287. else does it map to an existing child? ->
  288. node_to_insert = push node one more level
  289. else alloc new path
  290. return node_to_insert placed in copy of parent
  291. """
  292. ret = list(parent)
  293. if level == SHIFT:
  294. ret.append(tail_node)
  295. return ret
  296. sub_index = ((self._count - 1) >> level) & BIT_MASK # >>>
  297. if len(parent) > sub_index:
  298. ret[sub_index] = self._push_tail(level - SHIFT, parent[sub_index], tail_node)
  299. return ret
  300. ret.append(self._new_path(level - SHIFT, tail_node))
  301. return ret
  302. def index(self, value, *args, **kwargs):
  303. return self.tolist().index(value, *args, **kwargs)
  304. def count(self, value):
  305. return self.tolist().count(value)
  306. def delete(self, index, stop=None):
  307. l = self.tolist()
  308. del l[_index_or_slice(index, stop)]
  309. return _EMPTY_PVECTOR.extend(l)
  310. def remove(self, value):
  311. l = self.tolist()
  312. l.remove(value)
  313. return _EMPTY_PVECTOR.extend(l)
  314. @six.add_metaclass(ABCMeta)
  315. class PVector(object):
  316. """
  317. Persistent vector implementation. Meant as a replacement for the cases where you would normally
  318. use a Python list.
  319. Do not instantiate directly, instead use the factory functions :py:func:`v` and :py:func:`pvector` to
  320. create an instance.
  321. Heavily influenced by the persistent vector available in Clojure. Initially this was more or
  322. less just a port of the Java code for the Clojure vector. It has since been modified and to
  323. some extent optimized for usage in Python.
  324. The vector is organized as a trie, any mutating method will return a new vector that contains the changes. No
  325. updates are done to the original vector. Structural sharing between vectors are applied where possible to save
  326. space and to avoid making complete copies.
  327. This structure corresponds most closely to the built in list type and is intended as a replacement. Where the
  328. semantics are the same (more or less) the same function names have been used but for some cases it is not possible,
  329. for example assignments.
  330. The PVector implements the Sequence protocol and is Hashable.
  331. Inserts are amortized O(1). Random access is log32(n) where n is the size of the vector.
  332. The following are examples of some common operations on persistent vectors:
  333. >>> p = v(1, 2, 3)
  334. >>> p2 = p.append(4)
  335. >>> p3 = p2.extend([5, 6, 7])
  336. >>> p
  337. pvector([1, 2, 3])
  338. >>> p2
  339. pvector([1, 2, 3, 4])
  340. >>> p3
  341. pvector([1, 2, 3, 4, 5, 6, 7])
  342. >>> p3[5]
  343. 6
  344. >>> p.set(1, 99)
  345. pvector([1, 99, 3])
  346. >>>
  347. """
  348. @abstractmethod
  349. def __len__(self):
  350. """
  351. >>> len(v(1, 2, 3))
  352. 3
  353. """
  354. @abstractmethod
  355. def __getitem__(self, index):
  356. """
  357. Get value at index. Full slicing support.
  358. >>> v1 = v(5, 6, 7, 8)
  359. >>> v1[2]
  360. 7
  361. >>> v1[1:3]
  362. pvector([6, 7])
  363. """
  364. @abstractmethod
  365. def __add__(self, other):
  366. """
  367. >>> v1 = v(1, 2)
  368. >>> v2 = v(3, 4)
  369. >>> v1 + v2
  370. pvector([1, 2, 3, 4])
  371. """
  372. @abstractmethod
  373. def __mul__(self, times):
  374. """
  375. >>> v1 = v(1, 2)
  376. >>> 3 * v1
  377. pvector([1, 2, 1, 2, 1, 2])
  378. """
  379. @abstractmethod
  380. def __hash__(self):
  381. """
  382. >>> v1 = v(1, 2, 3)
  383. >>> v2 = v(1, 2, 3)
  384. >>> hash(v1) == hash(v2)
  385. True
  386. """
  387. @abstractmethod
  388. def evolver(self):
  389. """
  390. Create a new evolver for this pvector. The evolver acts as a mutable view of the vector
  391. with "transaction like" semantics. No part of the underlying vector i updated, it is still
  392. fully immutable. Furthermore multiple evolvers created from the same pvector do not
  393. interfere with each other.
  394. You may want to use an evolver instead of working directly with the pvector in the
  395. following cases:
  396. * Multiple updates are done to the same vector and the intermediate results are of no
  397. interest. In this case using an evolver may be a more efficient and easier to work with.
  398. * You need to pass a vector into a legacy function or a function that you have no control
  399. over which performs in place mutations of lists. In this case pass an evolver instance
  400. instead and then create a new pvector from the evolver once the function returns.
  401. The following example illustrates a typical workflow when working with evolvers. It also
  402. displays most of the API (which i kept small by design, you should not be tempted to
  403. use evolvers in excess ;-)).
  404. Create the evolver and perform various mutating updates to it:
  405. >>> v1 = v(1, 2, 3, 4, 5)
  406. >>> e = v1.evolver()
  407. >>> e[1] = 22
  408. >>> _ = e.append(6)
  409. >>> _ = e.extend([7, 8, 9])
  410. >>> e[8] += 1
  411. >>> len(e)
  412. 9
  413. The underlying pvector remains the same:
  414. >>> v1
  415. pvector([1, 2, 3, 4, 5])
  416. The changes are kept in the evolver. An updated pvector can be created using the
  417. persistent() function on the evolver.
  418. >>> v2 = e.persistent()
  419. >>> v2
  420. pvector([1, 22, 3, 4, 5, 6, 7, 8, 10])
  421. The new pvector will share data with the original pvector in the same way that would have
  422. been done if only using operations on the pvector.
  423. """
  424. @abstractmethod
  425. def mset(self, *args):
  426. """
  427. Return a new vector with elements in specified positions replaced by values (multi set).
  428. Elements on even positions in the argument list are interpreted as indexes while
  429. elements on odd positions are considered values.
  430. >>> v1 = v(1, 2, 3)
  431. >>> v1.mset(0, 11, 2, 33)
  432. pvector([11, 2, 33])
  433. """
  434. @abstractmethod
  435. def set(self, i, val):
  436. """
  437. Return a new vector with element at position i replaced with val. The original vector remains unchanged.
  438. Setting a value one step beyond the end of the vector is equal to appending. Setting beyond that will
  439. result in an IndexError.
  440. >>> v1 = v(1, 2, 3)
  441. >>> v1.set(1, 4)
  442. pvector([1, 4, 3])
  443. >>> v1.set(3, 4)
  444. pvector([1, 2, 3, 4])
  445. >>> v1.set(-1, 4)
  446. pvector([1, 2, 4])
  447. """
  448. @abstractmethod
  449. def append(self, val):
  450. """
  451. Return a new vector with val appended.
  452. >>> v1 = v(1, 2)
  453. >>> v1.append(3)
  454. pvector([1, 2, 3])
  455. """
  456. @abstractmethod
  457. def extend(self, obj):
  458. """
  459. Return a new vector with all values in obj appended to it. Obj may be another
  460. PVector or any other Iterable.
  461. >>> v1 = v(1, 2, 3)
  462. >>> v1.extend([4, 5])
  463. pvector([1, 2, 3, 4, 5])
  464. """
  465. @abstractmethod
  466. def index(self, value, *args, **kwargs):
  467. """
  468. Return first index of value. Additional indexes may be supplied to limit the search to a
  469. sub range of the vector.
  470. >>> v1 = v(1, 2, 3, 4, 3)
  471. >>> v1.index(3)
  472. 2
  473. >>> v1.index(3, 3, 5)
  474. 4
  475. """
  476. @abstractmethod
  477. def count(self, value):
  478. """
  479. Return the number of times that value appears in the vector.
  480. >>> v1 = v(1, 4, 3, 4)
  481. >>> v1.count(4)
  482. 2
  483. """
  484. @abstractmethod
  485. def transform(self, *transformations):
  486. """
  487. Transform arbitrarily complex combinations of PVectors and PMaps. A transformation
  488. consists of two parts. One match expression that specifies which elements to transform
  489. and one transformation function that performs the actual transformation.
  490. >>> from pyrsistent import freeze, ny
  491. >>> news_paper = freeze({'articles': [{'author': 'Sara', 'content': 'A short article'},
  492. ... {'author': 'Steve', 'content': 'A slightly longer article'}],
  493. ... 'weather': {'temperature': '11C', 'wind': '5m/s'}})
  494. >>> short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:25] + '...' if len(c) > 25 else c)
  495. >>> very_short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:15] + '...' if len(c) > 15 else c)
  496. >>> very_short_news.articles[0].content
  497. 'A short article'
  498. >>> very_short_news.articles[1].content
  499. 'A slightly long...'
  500. When nothing has been transformed the original data structure is kept
  501. >>> short_news is news_paper
  502. True
  503. >>> very_short_news is news_paper
  504. False
  505. >>> very_short_news.articles[0] is news_paper.articles[0]
  506. True
  507. """
  508. @abstractmethod
  509. def delete(self, index, stop=None):
  510. """
  511. Delete a portion of the vector by index or range.
  512. >>> v1 = v(1, 2, 3, 4, 5)
  513. >>> v1.delete(1)
  514. pvector([1, 3, 4, 5])
  515. >>> v1.delete(1, 3)
  516. pvector([1, 4, 5])
  517. """
  518. @abstractmethod
  519. def remove(self, value):
  520. """
  521. Remove the first occurrence of a value from the vector.
  522. >>> v1 = v(1, 2, 3, 2, 1)
  523. >>> v2 = v1.remove(1)
  524. >>> v2
  525. pvector([2, 3, 2, 1])
  526. >>> v2.remove(1)
  527. pvector([2, 3, 2])
  528. """
  529. _EMPTY_PVECTOR = PythonPVector(0, SHIFT, [], [])
  530. PVector.register(PythonPVector)
  531. Sequence.register(PVector)
  532. Hashable.register(PVector)
  533. def python_pvector(iterable=()):
  534. """
  535. Create a new persistent vector containing the elements in iterable.
  536. >>> v1 = pvector([1, 2, 3])
  537. >>> v1
  538. pvector([1, 2, 3])
  539. """
  540. return _EMPTY_PVECTOR.extend(iterable)
  541. try:
  542. # Use the C extension as underlying trie implementation if it is available
  543. import os
  544. if os.environ.get('PYRSISTENT_NO_C_EXTENSION'):
  545. pvector = python_pvector
  546. else:
  547. from pvectorc import pvector
  548. PVector.register(type(pvector()))
  549. except ImportError:
  550. pvector = python_pvector
  551. def v(*elements):
  552. """
  553. Create a new persistent vector containing all parameters to this function.
  554. >>> v1 = v(1, 2, 3)
  555. >>> v1
  556. pvector([1, 2, 3])
  557. """
  558. return pvector(elements)