123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713 |
- from abc import abstractmethod, ABCMeta
- from ._compat import Sequence, Hashable
- from numbers import Integral
- import operator
- import six
- from pyrsistent._transformations import transform
- def _bitcount(val):
- return bin(val).count("1")
- BRANCH_FACTOR = 32
- BIT_MASK = BRANCH_FACTOR - 1
- SHIFT = _bitcount(BIT_MASK)
- def compare_pvector(v, other, operator):
- return operator(v.tolist(), other.tolist() if isinstance(other, PVector) else other)
- def _index_or_slice(index, stop):
- if stop is None:
- return index
- return slice(index, stop)
- class PythonPVector(object):
- """
- Support structure for PVector that implements structural sharing for vectors using a trie.
- """
- __slots__ = ('_count', '_shift', '_root', '_tail', '_tail_offset', '__weakref__')
- def __new__(cls, count, shift, root, tail):
- self = super(PythonPVector, cls).__new__(cls)
- self._count = count
- self._shift = shift
- self._root = root
- self._tail = tail
- # Derived attribute stored for performance
- self._tail_offset = self._count - len(self._tail)
- return self
- def __len__(self):
- return self._count
- def __getitem__(self, index):
- if isinstance(index, slice):
- # There are more conditions than the below where it would be OK to
- # return ourselves, implement those...
- if index.start is None and index.stop is None and index.step is None:
- return self
- # This is a bit nasty realizing the whole structure as a list before
- # slicing it but it is the fastest way I've found to date, and it's easy :-)
- return _EMPTY_PVECTOR.extend(self.tolist()[index])
- if index < 0:
- index += self._count
- return PythonPVector._node_for(self, index)[index & BIT_MASK]
- def __add__(self, other):
- return self.extend(other)
- def __repr__(self):
- return 'pvector({0})'.format(str(self.tolist()))
- def __str__(self):
- return self.__repr__()
- def __iter__(self):
- # This is kind of lazy and will produce some memory overhead but it is the fasted method
- # by far of those tried since it uses the speed of the built in python list directly.
- return iter(self.tolist())
- def __ne__(self, other):
- return not self.__eq__(other)
- def __eq__(self, other):
- return self is other or (hasattr(other, '__len__') and self._count == len(other)) and compare_pvector(self, other, operator.eq)
- def __gt__(self, other):
- return compare_pvector(self, other, operator.gt)
- def __lt__(self, other):
- return compare_pvector(self, other, operator.lt)
- def __ge__(self, other):
- return compare_pvector(self, other, operator.ge)
- def __le__(self, other):
- return compare_pvector(self, other, operator.le)
- def __mul__(self, times):
- if times <= 0 or self is _EMPTY_PVECTOR:
- return _EMPTY_PVECTOR
- if times == 1:
- return self
- return _EMPTY_PVECTOR.extend(times * self.tolist())
- __rmul__ = __mul__
- def _fill_list(self, node, shift, the_list):
- if shift:
- shift -= SHIFT
- for n in node:
- self._fill_list(n, shift, the_list)
- else:
- the_list.extend(node)
- def tolist(self):
- """
- The fastest way to convert the vector into a python list.
- """
- the_list = []
- self._fill_list(self._root, self._shift, the_list)
- the_list.extend(self._tail)
- return the_list
- def _totuple(self):
- """
- Returns the content as a python tuple.
- """
- return tuple(self.tolist())
- def __hash__(self):
- # Taking the easy way out again...
- return hash(self._totuple())
- def transform(self, *transformations):
- return transform(self, transformations)
- def __reduce__(self):
- # Pickling support
- return pvector, (self.tolist(),)
- def mset(self, *args):
- if len(args) % 2:
- raise TypeError("mset expected an even number of arguments")
- evolver = self.evolver()
- for i in range(0, len(args), 2):
- evolver[args[i]] = args[i+1]
- return evolver.persistent()
- class Evolver(object):
- __slots__ = ('_count', '_shift', '_root', '_tail', '_tail_offset', '_dirty_nodes',
- '_extra_tail', '_cached_leafs', '_orig_pvector')
- def __init__(self, v):
- self._reset(v)
- def __getitem__(self, index):
- if not isinstance(index, Integral):
- raise TypeError("'%s' object cannot be interpreted as an index" % type(index).__name__)
- if index < 0:
- index += self._count + len(self._extra_tail)
- if self._count <= index < self._count + len(self._extra_tail):
- return self._extra_tail[index - self._count]
- return PythonPVector._node_for(self, index)[index & BIT_MASK]
- def _reset(self, v):
- self._count = v._count
- self._shift = v._shift
- self._root = v._root
- self._tail = v._tail
- self._tail_offset = v._tail_offset
- self._dirty_nodes = {}
- self._cached_leafs = {}
- self._extra_tail = []
- self._orig_pvector = v
- def append(self, element):
- self._extra_tail.append(element)
- return self
- def extend(self, iterable):
- self._extra_tail.extend(iterable)
- return self
- def set(self, index, val):
- self[index] = val
- return self
- def __setitem__(self, index, val):
- if not isinstance(index, Integral):
- raise TypeError("'%s' object cannot be interpreted as an index" % type(index).__name__)
- if index < 0:
- index += self._count + len(self._extra_tail)
- if 0 <= index < self._count:
- node = self._cached_leafs.get(index >> SHIFT)
- if node:
- node[index & BIT_MASK] = val
- elif index >= self._tail_offset:
- if id(self._tail) not in self._dirty_nodes:
- self._tail = list(self._tail)
- self._dirty_nodes[id(self._tail)] = True
- self._cached_leafs[index >> SHIFT] = self._tail
- self._tail[index & BIT_MASK] = val
- else:
- self._root = self._do_set(self._shift, self._root, index, val)
- elif self._count <= index < self._count + len(self._extra_tail):
- self._extra_tail[index - self._count] = val
- elif index == self._count + len(self._extra_tail):
- self._extra_tail.append(val)
- else:
- raise IndexError("Index out of range: %s" % (index,))
- def _do_set(self, level, node, i, val):
- if id(node) in self._dirty_nodes:
- ret = node
- else:
- ret = list(node)
- self._dirty_nodes[id(ret)] = True
- if level == 0:
- ret[i & BIT_MASK] = val
- self._cached_leafs[i >> SHIFT] = ret
- else:
- sub_index = (i >> level) & BIT_MASK # >>>
- ret[sub_index] = self._do_set(level - SHIFT, node[sub_index], i, val)
- return ret
- def delete(self, index):
- del self[index]
- return self
- def __delitem__(self, key):
- if self._orig_pvector:
- # All structural sharing bets are off, base evolver on _extra_tail only
- l = PythonPVector(self._count, self._shift, self._root, self._tail).tolist()
- l.extend(self._extra_tail)
- self._reset(_EMPTY_PVECTOR)
- self._extra_tail = l
- del self._extra_tail[key]
- def persistent(self):
- result = self._orig_pvector
- if self.is_dirty():
- result = PythonPVector(self._count, self._shift, self._root, self._tail).extend(self._extra_tail)
- self._reset(result)
- return result
- def __len__(self):
- return self._count + len(self._extra_tail)
- def is_dirty(self):
- return bool(self._dirty_nodes or self._extra_tail)
- def evolver(self):
- return PythonPVector.Evolver(self)
- def set(self, i, val):
- # This method could be implemented by a call to mset() but doing so would cause
- # a ~5 X performance penalty on PyPy (considered the primary platform for this implementation
- # of PVector) so we're keeping this implementation for now.
- if not isinstance(i, Integral):
- raise TypeError("'%s' object cannot be interpreted as an index" % type(i).__name__)
- if i < 0:
- i += self._count
- if 0 <= i < self._count:
- if i >= self._tail_offset:
- new_tail = list(self._tail)
- new_tail[i & BIT_MASK] = val
- return PythonPVector(self._count, self._shift, self._root, new_tail)
- return PythonPVector(self._count, self._shift, self._do_set(self._shift, self._root, i, val), self._tail)
- if i == self._count:
- return self.append(val)
- raise IndexError("Index out of range: %s" % (i,))
- def _do_set(self, level, node, i, val):
- ret = list(node)
- if level == 0:
- ret[i & BIT_MASK] = val
- else:
- sub_index = (i >> level) & BIT_MASK # >>>
- ret[sub_index] = self._do_set(level - SHIFT, node[sub_index], i, val)
- return ret
- @staticmethod
- def _node_for(pvector_like, i):
- if 0 <= i < pvector_like._count:
- if i >= pvector_like._tail_offset:
- return pvector_like._tail
- node = pvector_like._root
- for level in range(pvector_like._shift, 0, -SHIFT):
- node = node[(i >> level) & BIT_MASK] # >>>
- return node
- raise IndexError("Index out of range: %s" % (i,))
- def _create_new_root(self):
- new_shift = self._shift
- # Overflow root?
- if (self._count >> SHIFT) > (1 << self._shift): # >>>
- new_root = [self._root, self._new_path(self._shift, self._tail)]
- new_shift += SHIFT
- else:
- new_root = self._push_tail(self._shift, self._root, self._tail)
- return new_root, new_shift
- def append(self, val):
- if len(self._tail) < BRANCH_FACTOR:
- new_tail = list(self._tail)
- new_tail.append(val)
- return PythonPVector(self._count + 1, self._shift, self._root, new_tail)
- # Full tail, push into tree
- new_root, new_shift = self._create_new_root()
- return PythonPVector(self._count + 1, new_shift, new_root, [val])
- def _new_path(self, level, node):
- if level == 0:
- return node
- return [self._new_path(level - SHIFT, node)]
- def _mutating_insert_tail(self):
- self._root, self._shift = self._create_new_root()
- self._tail = []
- def _mutating_fill_tail(self, offset, sequence):
- max_delta_len = BRANCH_FACTOR - len(self._tail)
- delta = sequence[offset:offset + max_delta_len]
- self._tail.extend(delta)
- delta_len = len(delta)
- self._count += delta_len
- return offset + delta_len
- def _mutating_extend(self, sequence):
- offset = 0
- sequence_len = len(sequence)
- while offset < sequence_len:
- offset = self._mutating_fill_tail(offset, sequence)
- if len(self._tail) == BRANCH_FACTOR:
- self._mutating_insert_tail()
- self._tail_offset = self._count - len(self._tail)
- def extend(self, obj):
- # Mutates the new vector directly for efficiency but that's only an
- # implementation detail, once it is returned it should be considered immutable
- l = obj.tolist() if isinstance(obj, PythonPVector) else list(obj)
- if l:
- new_vector = self.append(l[0])
- new_vector._mutating_extend(l[1:])
- return new_vector
- return self
- def _push_tail(self, level, parent, tail_node):
- """
- if parent is leaf, insert node,
- else does it map to an existing child? ->
- node_to_insert = push node one more level
- else alloc new path
- return node_to_insert placed in copy of parent
- """
- ret = list(parent)
- if level == SHIFT:
- ret.append(tail_node)
- return ret
- sub_index = ((self._count - 1) >> level) & BIT_MASK # >>>
- if len(parent) > sub_index:
- ret[sub_index] = self._push_tail(level - SHIFT, parent[sub_index], tail_node)
- return ret
- ret.append(self._new_path(level - SHIFT, tail_node))
- return ret
- def index(self, value, *args, **kwargs):
- return self.tolist().index(value, *args, **kwargs)
- def count(self, value):
- return self.tolist().count(value)
- def delete(self, index, stop=None):
- l = self.tolist()
- del l[_index_or_slice(index, stop)]
- return _EMPTY_PVECTOR.extend(l)
- def remove(self, value):
- l = self.tolist()
- l.remove(value)
- return _EMPTY_PVECTOR.extend(l)
- @six.add_metaclass(ABCMeta)
- class PVector(object):
- """
- Persistent vector implementation. Meant as a replacement for the cases where you would normally
- use a Python list.
- Do not instantiate directly, instead use the factory functions :py:func:`v` and :py:func:`pvector` to
- create an instance.
- Heavily influenced by the persistent vector available in Clojure. Initially this was more or
- less just a port of the Java code for the Clojure vector. It has since been modified and to
- some extent optimized for usage in Python.
- The vector is organized as a trie, any mutating method will return a new vector that contains the changes. No
- updates are done to the original vector. Structural sharing between vectors are applied where possible to save
- space and to avoid making complete copies.
- This structure corresponds most closely to the built in list type and is intended as a replacement. Where the
- semantics are the same (more or less) the same function names have been used but for some cases it is not possible,
- for example assignments.
- The PVector implements the Sequence protocol and is Hashable.
- Inserts are amortized O(1). Random access is log32(n) where n is the size of the vector.
- The following are examples of some common operations on persistent vectors:
- >>> p = v(1, 2, 3)
- >>> p2 = p.append(4)
- >>> p3 = p2.extend([5, 6, 7])
- >>> p
- pvector([1, 2, 3])
- >>> p2
- pvector([1, 2, 3, 4])
- >>> p3
- pvector([1, 2, 3, 4, 5, 6, 7])
- >>> p3[5]
- 6
- >>> p.set(1, 99)
- pvector([1, 99, 3])
- >>>
- """
- @abstractmethod
- def __len__(self):
- """
- >>> len(v(1, 2, 3))
- 3
- """
- @abstractmethod
- def __getitem__(self, index):
- """
- Get value at index. Full slicing support.
- >>> v1 = v(5, 6, 7, 8)
- >>> v1[2]
- 7
- >>> v1[1:3]
- pvector([6, 7])
- """
- @abstractmethod
- def __add__(self, other):
- """
- >>> v1 = v(1, 2)
- >>> v2 = v(3, 4)
- >>> v1 + v2
- pvector([1, 2, 3, 4])
- """
- @abstractmethod
- def __mul__(self, times):
- """
- >>> v1 = v(1, 2)
- >>> 3 * v1
- pvector([1, 2, 1, 2, 1, 2])
- """
- @abstractmethod
- def __hash__(self):
- """
- >>> v1 = v(1, 2, 3)
- >>> v2 = v(1, 2, 3)
- >>> hash(v1) == hash(v2)
- True
- """
- @abstractmethod
- def evolver(self):
- """
- Create a new evolver for this pvector. The evolver acts as a mutable view of the vector
- with "transaction like" semantics. No part of the underlying vector i updated, it is still
- fully immutable. Furthermore multiple evolvers created from the same pvector do not
- interfere with each other.
- You may want to use an evolver instead of working directly with the pvector in the
- following cases:
- * Multiple updates are done to the same vector and the intermediate results are of no
- interest. In this case using an evolver may be a more efficient and easier to work with.
- * You need to pass a vector into a legacy function or a function that you have no control
- over which performs in place mutations of lists. In this case pass an evolver instance
- instead and then create a new pvector from the evolver once the function returns.
- The following example illustrates a typical workflow when working with evolvers. It also
- displays most of the API (which i kept small by design, you should not be tempted to
- use evolvers in excess ;-)).
- Create the evolver and perform various mutating updates to it:
- >>> v1 = v(1, 2, 3, 4, 5)
- >>> e = v1.evolver()
- >>> e[1] = 22
- >>> _ = e.append(6)
- >>> _ = e.extend([7, 8, 9])
- >>> e[8] += 1
- >>> len(e)
- 9
- The underlying pvector remains the same:
- >>> v1
- pvector([1, 2, 3, 4, 5])
- The changes are kept in the evolver. An updated pvector can be created using the
- persistent() function on the evolver.
- >>> v2 = e.persistent()
- >>> v2
- pvector([1, 22, 3, 4, 5, 6, 7, 8, 10])
- The new pvector will share data with the original pvector in the same way that would have
- been done if only using operations on the pvector.
- """
- @abstractmethod
- def mset(self, *args):
- """
- Return a new vector with elements in specified positions replaced by values (multi set).
- Elements on even positions in the argument list are interpreted as indexes while
- elements on odd positions are considered values.
- >>> v1 = v(1, 2, 3)
- >>> v1.mset(0, 11, 2, 33)
- pvector([11, 2, 33])
- """
- @abstractmethod
- def set(self, i, val):
- """
- Return a new vector with element at position i replaced with val. The original vector remains unchanged.
- Setting a value one step beyond the end of the vector is equal to appending. Setting beyond that will
- result in an IndexError.
- >>> v1 = v(1, 2, 3)
- >>> v1.set(1, 4)
- pvector([1, 4, 3])
- >>> v1.set(3, 4)
- pvector([1, 2, 3, 4])
- >>> v1.set(-1, 4)
- pvector([1, 2, 4])
- """
- @abstractmethod
- def append(self, val):
- """
- Return a new vector with val appended.
- >>> v1 = v(1, 2)
- >>> v1.append(3)
- pvector([1, 2, 3])
- """
- @abstractmethod
- def extend(self, obj):
- """
- Return a new vector with all values in obj appended to it. Obj may be another
- PVector or any other Iterable.
- >>> v1 = v(1, 2, 3)
- >>> v1.extend([4, 5])
- pvector([1, 2, 3, 4, 5])
- """
- @abstractmethod
- def index(self, value, *args, **kwargs):
- """
- Return first index of value. Additional indexes may be supplied to limit the search to a
- sub range of the vector.
- >>> v1 = v(1, 2, 3, 4, 3)
- >>> v1.index(3)
- 2
- >>> v1.index(3, 3, 5)
- 4
- """
- @abstractmethod
- def count(self, value):
- """
- Return the number of times that value appears in the vector.
- >>> v1 = v(1, 4, 3, 4)
- >>> v1.count(4)
- 2
- """
- @abstractmethod
- def transform(self, *transformations):
- """
- Transform arbitrarily complex combinations of PVectors and PMaps. A transformation
- consists of two parts. One match expression that specifies which elements to transform
- and one transformation function that performs the actual transformation.
- >>> from pyrsistent import freeze, ny
- >>> news_paper = freeze({'articles': [{'author': 'Sara', 'content': 'A short article'},
- ... {'author': 'Steve', 'content': 'A slightly longer article'}],
- ... 'weather': {'temperature': '11C', 'wind': '5m/s'}})
- >>> short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:25] + '...' if len(c) > 25 else c)
- >>> very_short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:15] + '...' if len(c) > 15 else c)
- >>> very_short_news.articles[0].content
- 'A short article'
- >>> very_short_news.articles[1].content
- 'A slightly long...'
- When nothing has been transformed the original data structure is kept
- >>> short_news is news_paper
- True
- >>> very_short_news is news_paper
- False
- >>> very_short_news.articles[0] is news_paper.articles[0]
- True
- """
- @abstractmethod
- def delete(self, index, stop=None):
- """
- Delete a portion of the vector by index or range.
- >>> v1 = v(1, 2, 3, 4, 5)
- >>> v1.delete(1)
- pvector([1, 3, 4, 5])
- >>> v1.delete(1, 3)
- pvector([1, 4, 5])
- """
- @abstractmethod
- def remove(self, value):
- """
- Remove the first occurrence of a value from the vector.
- >>> v1 = v(1, 2, 3, 2, 1)
- >>> v2 = v1.remove(1)
- >>> v2
- pvector([2, 3, 2, 1])
- >>> v2.remove(1)
- pvector([2, 3, 2])
- """
- _EMPTY_PVECTOR = PythonPVector(0, SHIFT, [], [])
- PVector.register(PythonPVector)
- Sequence.register(PVector)
- Hashable.register(PVector)
- def python_pvector(iterable=()):
- """
- Create a new persistent vector containing the elements in iterable.
- >>> v1 = pvector([1, 2, 3])
- >>> v1
- pvector([1, 2, 3])
- """
- return _EMPTY_PVECTOR.extend(iterable)
- try:
- # Use the C extension as underlying trie implementation if it is available
- import os
- if os.environ.get('PYRSISTENT_NO_C_EXTENSION'):
- pvector = python_pvector
- else:
- from pvectorc import pvector
- PVector.register(type(pvector()))
- except ImportError:
- pvector = python_pvector
- def v(*elements):
- """
- Create a new persistent vector containing all parameters to this function.
- >>> v1 = v(1, 2, 3)
- >>> v1
- pvector([1, 2, 3])
- """
- return pvector(elements)
|