core.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. from toolz.itertoolz import getter, cons, pluck
  2. from itertools import tee, starmap
  3. # See #166: https://github.com/pytoolz/toolz/issues/166
  4. # See #173: https://github.com/pytoolz/toolz/pull/173
  5. class EqualityHashKey(object):
  6. """ Create a hash key that uses equality comparisons between items.
  7. This may be used to create hash keys for otherwise unhashable types:
  8. >>> from toolz import curry
  9. >>> EqualityHashDefault = curry(EqualityHashKey, None)
  10. >>> set(map(EqualityHashDefault, [[], (), [1], [1]])) # doctest: +SKIP
  11. {=[]=, =()=, =[1]=}
  12. **Caution:** adding N ``EqualityHashKey`` items to a hash container
  13. may require O(N**2) operations, not O(N) as for typical hashable types.
  14. Therefore, a suitable key function such as ``tuple`` or ``frozenset``
  15. is usually preferred over using ``EqualityHashKey`` if possible.
  16. The ``key`` argument to ``EqualityHashKey`` should be a function or
  17. index that returns a hashable object that effectively distinguishes
  18. unequal items. This helps avoid the poor scaling that occurs when
  19. using the default key. For example, the above example can be improved
  20. by using a key function that distinguishes items by length or type:
  21. >>> EqualityHashLen = curry(EqualityHashKey, len)
  22. >>> EqualityHashType = curry(EqualityHashKey, type) # this works too
  23. >>> set(map(EqualityHashLen, [[], (), [1], [1]])) # doctest: +SKIP
  24. {=[]=, =()=, =[1]=}
  25. ``EqualityHashKey`` is convenient to use when a suitable key function
  26. is complicated or unavailable. For example, the following returns all
  27. unique values based on equality:
  28. >>> from toolz import unique
  29. >>> vals = [[], [], (), [1], [1], [2], {}, {}, {}]
  30. >>> list(unique(vals, key=EqualityHashDefault))
  31. [[], (), [1], [2], {}]
  32. **Warning:** don't change the equality value of an item already in a hash
  33. containter. Unhashable types are unhashable for a reason. For example:
  34. >>> L1 = [1] ; L2 = [2]
  35. >>> s = set(map(EqualityHashDefault, [L1, L2]))
  36. >>> s # doctest: +SKIP
  37. {=[1]=, =[2]=}
  38. >>> L1[0] = 2 # Don't do this! ``s`` now has duplicate items!
  39. >>> s # doctest: +SKIP
  40. {=[2]=, =[2]=}
  41. Although this may appear problematic, immutable data types is a common
  42. idiom in functional programming, and``EqualityHashKey`` easily allows
  43. the same idiom to be used by convention rather than strict requirement.
  44. See Also:
  45. identity
  46. """
  47. __slots__ = ['item', 'key']
  48. _default_hashkey = '__default__hashkey__'
  49. def __init__(self, key, item):
  50. if key is None:
  51. self.key = self._default_hashkey
  52. elif not callable(key):
  53. self.key = getter(key)
  54. else:
  55. self.key = key
  56. self.item = item
  57. def __hash__(self):
  58. if self.key == self._default_hashkey:
  59. val = self.key
  60. else:
  61. val = self.key(self.item)
  62. return hash(val)
  63. def __eq__(self, other):
  64. try:
  65. return (self._default_hashkey == other._default_hashkey and
  66. self.item == other.item)
  67. except AttributeError:
  68. return False
  69. def __ne__(self, other):
  70. return not self.__eq__(other)
  71. def __str__(self):
  72. return '=%s=' % str(self.item)
  73. def __repr__(self):
  74. return '=%s=' % repr(self.item)
  75. # See issue #293: https://github.com/pytoolz/toolz/issues/239
  76. def unzip(seq):
  77. """Inverse of ``zip``
  78. >>> a, b = unzip([('a', 1), ('b', 2)])
  79. >>> list(a)
  80. ['a', 'b']
  81. >>> list(b)
  82. [1, 2]
  83. Unlike the naive implementation ``def unzip(seq): zip(*seq)`` this
  84. implementation can handle a finite sequence of infinite sequences.
  85. Caveats:
  86. * The implementation uses ``tee``, and so can use a significant amount
  87. of auxiliary storage if the resulting iterators are consumed at
  88. different times.
  89. * The top level sequence cannot be infinite.
  90. """
  91. seq = iter(seq)
  92. # Check how many iterators we need
  93. try:
  94. first = tuple(next(seq))
  95. except StopIteration:
  96. return tuple()
  97. # and create them
  98. niters = len(first)
  99. seqs = tee(cons(first, seq), niters)
  100. return tuple(starmap(pluck, enumerate(seqs)))