more.py 60 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985
  1. from __future__ import print_function
  2. from collections import Counter, defaultdict, deque, Sequence
  3. from functools import partial, wraps
  4. from heapq import merge
  5. from itertools import (
  6. chain,
  7. compress,
  8. count,
  9. cycle,
  10. dropwhile,
  11. groupby,
  12. islice,
  13. repeat,
  14. takewhile,
  15. tee
  16. )
  17. from operator import itemgetter, lt, gt, sub
  18. from sys import maxsize, version_info
  19. from six import binary_type, string_types, text_type
  20. from six.moves import filter, map, range, zip, zip_longest
  21. from .recipes import consume, flatten, take
  22. __all__ = [
  23. 'adjacent',
  24. 'always_iterable',
  25. 'always_reversible',
  26. 'bucket',
  27. 'chunked',
  28. 'circular_shifts',
  29. 'collapse',
  30. 'collate',
  31. 'consecutive_groups',
  32. 'consumer',
  33. 'count_cycle',
  34. 'difference',
  35. 'distinct_permutations',
  36. 'distribute',
  37. 'divide',
  38. 'exactly_n',
  39. 'first',
  40. 'groupby_transform',
  41. 'ilen',
  42. 'interleave_longest',
  43. 'interleave',
  44. 'intersperse',
  45. 'islice_extended',
  46. 'iterate',
  47. 'locate',
  48. 'lstrip',
  49. 'make_decorator',
  50. 'numeric_range',
  51. 'one',
  52. 'padded',
  53. 'peekable',
  54. 'rstrip',
  55. 'run_length',
  56. 'seekable',
  57. 'SequenceView',
  58. 'side_effect',
  59. 'sliced',
  60. 'sort_together',
  61. 'split_at',
  62. 'split_after',
  63. 'split_before',
  64. 'spy',
  65. 'stagger',
  66. 'strip',
  67. 'unique_to_each',
  68. 'windowed',
  69. 'with_iter',
  70. 'zip_offset',
  71. ]
  72. _marker = object()
  73. def chunked(iterable, n):
  74. """Break *iterable* into lists of length *n*:
  75. >>> list(chunked([1, 2, 3, 4, 5, 6], 3))
  76. [[1, 2, 3], [4, 5, 6]]
  77. If the length of *iterable* is not evenly divisible by *n*, the last
  78. returned list will be shorter:
  79. >>> list(chunked([1, 2, 3, 4, 5, 6, 7, 8], 3))
  80. [[1, 2, 3], [4, 5, 6], [7, 8]]
  81. To use a fill-in value instead, see the :func:`grouper` recipe.
  82. :func:`chunked` is useful for splitting up a computation on a large number
  83. of keys into batches, to be pickled and sent off to worker processes. One
  84. example is operations on rows in MySQL, which does not implement
  85. server-side cursors properly and would otherwise load the entire dataset
  86. into RAM on the client.
  87. """
  88. return iter(partial(take, n, iter(iterable)), [])
  89. def first(iterable, default=_marker):
  90. """Return the first item of *iterable*, or *default* if *iterable* is
  91. empty.
  92. >>> first([0, 1, 2, 3])
  93. 0
  94. >>> first([], 'some default')
  95. 'some default'
  96. If *default* is not provided and there are no items in the iterable,
  97. raise ``ValueError``.
  98. :func:`first` is useful when you have a generator of expensive-to-retrieve
  99. values and want any arbitrary one. It is marginally shorter than
  100. ``next(iter(iterable), default)``.
  101. """
  102. try:
  103. return next(iter(iterable))
  104. except StopIteration:
  105. # I'm on the edge about raising ValueError instead of StopIteration. At
  106. # the moment, ValueError wins, because the caller could conceivably
  107. # want to do something different with flow control when I raise the
  108. # exception, and it's weird to explicitly catch StopIteration.
  109. if default is _marker:
  110. raise ValueError('first() was called on an empty iterable, and no '
  111. 'default value was provided.')
  112. return default
  113. class peekable(object):
  114. """Wrap an iterator to allow lookahead and prepending elements.
  115. Call :meth:`peek` on the result to get the value that will be returned
  116. by :func:`next`. This won't advance the iterator:
  117. >>> p = peekable(['a', 'b'])
  118. >>> p.peek()
  119. 'a'
  120. >>> next(p)
  121. 'a'
  122. Pass :meth:`peek` a default value to return that instead of raising
  123. ``StopIteration`` when the iterator is exhausted.
  124. >>> p = peekable([])
  125. >>> p.peek('hi')
  126. 'hi'
  127. peekables also offer a :meth:`prepend` method, which "inserts" items
  128. at the head of the iterable:
  129. >>> p = peekable([1, 2, 3])
  130. >>> p.prepend(10, 11, 12)
  131. >>> next(p)
  132. 10
  133. >>> p.peek()
  134. 11
  135. >>> list(p)
  136. [11, 12, 1, 2, 3]
  137. peekables can be indexed. Index 0 is the item that will be returned by
  138. :func:`next`, index 1 is the item after that, and so on:
  139. The values up to the given index will be cached.
  140. >>> p = peekable(['a', 'b', 'c', 'd'])
  141. >>> p[0]
  142. 'a'
  143. >>> p[1]
  144. 'b'
  145. >>> next(p)
  146. 'a'
  147. Negative indexes are supported, but be aware that they will cache the
  148. remaining items in the source iterator, which may require significant
  149. storage.
  150. To check whether a peekable is exhausted, check its truth value:
  151. >>> p = peekable(['a', 'b'])
  152. >>> if p: # peekable has items
  153. ... list(p)
  154. ['a', 'b']
  155. >>> if not p: # peekable is exhaused
  156. ... list(p)
  157. []
  158. """
  159. def __init__(self, iterable):
  160. self._it = iter(iterable)
  161. self._cache = deque()
  162. def __iter__(self):
  163. return self
  164. def __bool__(self):
  165. try:
  166. self.peek()
  167. except StopIteration:
  168. return False
  169. return True
  170. def __nonzero__(self):
  171. # For Python 2 compatibility
  172. return self.__bool__()
  173. def peek(self, default=_marker):
  174. """Return the item that will be next returned from ``next()``.
  175. Return ``default`` if there are no items left. If ``default`` is not
  176. provided, raise ``StopIteration``.
  177. """
  178. if not self._cache:
  179. try:
  180. self._cache.append(next(self._it))
  181. except StopIteration:
  182. if default is _marker:
  183. raise
  184. return default
  185. return self._cache[0]
  186. def prepend(self, *items):
  187. """Stack up items to be the next ones returned from ``next()`` or
  188. ``self.peek()``. The items will be returned in
  189. first in, first out order::
  190. >>> p = peekable([1, 2, 3])
  191. >>> p.prepend(10, 11, 12)
  192. >>> next(p)
  193. 10
  194. >>> list(p)
  195. [11, 12, 1, 2, 3]
  196. It is possible, by prepending items, to "resurrect" a peekable that
  197. previously raised ``StopIteration``.
  198. >>> p = peekable([])
  199. >>> next(p)
  200. Traceback (most recent call last):
  201. ...
  202. StopIteration
  203. >>> p.prepend(1)
  204. >>> next(p)
  205. 1
  206. >>> next(p)
  207. Traceback (most recent call last):
  208. ...
  209. StopIteration
  210. """
  211. self._cache.extendleft(reversed(items))
  212. def __next__(self):
  213. if self._cache:
  214. return self._cache.popleft()
  215. return next(self._it)
  216. next = __next__ # For Python 2 compatibility
  217. def _get_slice(self, index):
  218. # Normalize the slice's arguments
  219. step = 1 if (index.step is None) else index.step
  220. if step > 0:
  221. start = 0 if (index.start is None) else index.start
  222. stop = maxsize if (index.stop is None) else index.stop
  223. elif step < 0:
  224. start = -1 if (index.start is None) else index.start
  225. stop = (-maxsize - 1) if (index.stop is None) else index.stop
  226. else:
  227. raise ValueError('slice step cannot be zero')
  228. # If either the start or stop index is negative, we'll need to cache
  229. # the rest of the iterable in order to slice from the right side.
  230. if (start < 0) or (stop < 0):
  231. self._cache.extend(self._it)
  232. # Otherwise we'll need to find the rightmost index and cache to that
  233. # point.
  234. else:
  235. n = min(max(start, stop) + 1, maxsize)
  236. cache_len = len(self._cache)
  237. if n >= cache_len:
  238. self._cache.extend(islice(self._it, n - cache_len))
  239. return list(self._cache)[index]
  240. def __getitem__(self, index):
  241. if isinstance(index, slice):
  242. return self._get_slice(index)
  243. cache_len = len(self._cache)
  244. if index < 0:
  245. self._cache.extend(self._it)
  246. elif index >= cache_len:
  247. self._cache.extend(islice(self._it, index + 1 - cache_len))
  248. return self._cache[index]
  249. def _collate(*iterables, **kwargs):
  250. """Helper for ``collate()``, called when the user is using the ``reverse``
  251. or ``key`` keyword arguments on Python versions below 3.5.
  252. """
  253. key = kwargs.pop('key', lambda a: a)
  254. reverse = kwargs.pop('reverse', False)
  255. min_or_max = partial(max if reverse else min, key=itemgetter(0))
  256. peekables = [peekable(it) for it in iterables]
  257. peekables = [p for p in peekables if p] # Kill empties.
  258. while peekables:
  259. _, p = min_or_max((key(p.peek()), p) for p in peekables)
  260. yield next(p)
  261. peekables = [x for x in peekables if x]
  262. def collate(*iterables, **kwargs):
  263. """Return a sorted merge of the items from each of several already-sorted
  264. *iterables*.
  265. >>> list(collate('ACDZ', 'AZ', 'JKL'))
  266. ['A', 'A', 'C', 'D', 'J', 'K', 'L', 'Z', 'Z']
  267. Works lazily, keeping only the next value from each iterable in memory. Use
  268. :func:`collate` to, for example, perform a n-way mergesort of items that
  269. don't fit in memory.
  270. If a *key* function is specified, the iterables will be sorted according
  271. to its result:
  272. >>> key = lambda s: int(s) # Sort by numeric value, not by string
  273. >>> list(collate(['1', '10'], ['2', '11'], key=key))
  274. ['1', '2', '10', '11']
  275. If the *iterables* are sorted in descending order, set *reverse* to
  276. ``True``:
  277. >>> list(collate([5, 3, 1], [4, 2, 0], reverse=True))
  278. [5, 4, 3, 2, 1, 0]
  279. If the elements of the passed-in iterables are out of order, you might get
  280. unexpected results.
  281. On Python 2.7, this function delegates to :func:`heapq.merge` if neither
  282. of the keyword arguments are specified. On Python 3.5+, this function
  283. is an alias for :func:`heapq.merge`.
  284. """
  285. if not kwargs:
  286. return merge(*iterables)
  287. return _collate(*iterables, **kwargs)
  288. # If using Python version 3.5 or greater, heapq.merge() will be faster than
  289. # collate - use that instead.
  290. if version_info >= (3, 5, 0):
  291. _collate_docstring = collate.__doc__
  292. collate = partial(merge)
  293. collate.__doc__ = _collate_docstring
  294. def consumer(func):
  295. """Decorator that automatically advances a PEP-342-style "reverse iterator"
  296. to its first yield point so you don't have to call ``next()`` on it
  297. manually.
  298. >>> @consumer
  299. ... def tally():
  300. ... i = 0
  301. ... while True:
  302. ... print('Thing number %s is %s.' % (i, (yield)))
  303. ... i += 1
  304. ...
  305. >>> t = tally()
  306. >>> t.send('red')
  307. Thing number 0 is red.
  308. >>> t.send('fish')
  309. Thing number 1 is fish.
  310. Without the decorator, you would have to call ``next(t)`` before
  311. ``t.send()`` could be used.
  312. """
  313. @wraps(func)
  314. def wrapper(*args, **kwargs):
  315. gen = func(*args, **kwargs)
  316. next(gen)
  317. return gen
  318. return wrapper
  319. def ilen(iterable):
  320. """Return the number of items in *iterable*.
  321. >>> ilen(x for x in range(1000000) if x % 3 == 0)
  322. 333334
  323. This consumes the iterable, so handle with care.
  324. """
  325. d = deque(enumerate(iterable, 1), maxlen=1)
  326. return d[0][0] if d else 0
  327. def iterate(func, start):
  328. """Return ``start``, ``func(start)``, ``func(func(start))``, ...
  329. >>> from itertools import islice
  330. >>> list(islice(iterate(lambda x: 2*x, 1), 10))
  331. [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]
  332. """
  333. while True:
  334. yield start
  335. start = func(start)
  336. def with_iter(context_manager):
  337. """Wrap an iterable in a ``with`` statement, so it closes once exhausted.
  338. For example, this will close the file when the iterator is exhausted::
  339. upper_lines = (line.upper() for line in with_iter(open('foo')))
  340. Any context manager which returns an iterable is a candidate for
  341. ``with_iter``.
  342. """
  343. with context_manager as iterable:
  344. for item in iterable:
  345. yield item
  346. def one(iterable, too_short=None, too_long=None):
  347. """Return the first item from *iterable*, which is expected to contain only
  348. that item. Raise an exception if *iterable* is empty or has more than one
  349. item.
  350. :func:`one` is useful for ensuring that an iterable contains only one item.
  351. For example, it can be used to retrieve the result of a database query
  352. that is expected to return a single row.
  353. If *iterable* is empty, ``ValueError`` will be raised. You may specify a
  354. different exception with the *too_short* keyword:
  355. >>> it = []
  356. >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL
  357. Traceback (most recent call last):
  358. ...
  359. ValueError: too many items in iterable (expected 1)'
  360. >>> too_short = IndexError('too few items')
  361. >>> one(it, too_short=too_short) # doctest: +IGNORE_EXCEPTION_DETAIL
  362. Traceback (most recent call last):
  363. ...
  364. IndexError: too few items
  365. Similarly, if *iterable* contains more than one item, ``ValueError`` will
  366. be raised. You may specify a different exception with the *too_long*
  367. keyword:
  368. >>> it = ['too', 'many']
  369. >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL
  370. Traceback (most recent call last):
  371. ...
  372. ValueError: too many items in iterable (expected 1)'
  373. >>> too_long = RuntimeError
  374. >>> one(it, too_long=too_long) # doctest: +IGNORE_EXCEPTION_DETAIL
  375. Traceback (most recent call last):
  376. ...
  377. RuntimeError
  378. Note that :func:`one` attempts to advance *iterable* twice to ensure there
  379. is only one item. If there is more than one, both items will be discarded.
  380. See :func:`spy` or :func:`peekable` to check iterable contents less
  381. destructively.
  382. """
  383. it = iter(iterable)
  384. try:
  385. value = next(it)
  386. except StopIteration:
  387. raise too_short or ValueError('too few items in iterable (expected 1)')
  388. try:
  389. next(it)
  390. except StopIteration:
  391. pass
  392. else:
  393. raise too_long or ValueError('too many items in iterable (expected 1)')
  394. return value
  395. def distinct_permutations(iterable):
  396. """Yield successive distinct permutations of the elements in *iterable*.
  397. >>> sorted(distinct_permutations([1, 0, 1]))
  398. [(0, 1, 1), (1, 0, 1), (1, 1, 0)]
  399. Equivalent to ``set(permutations(iterable))``, except duplicates are not
  400. generated and thrown away. For larger input sequences this is much more
  401. efficient.
  402. Duplicate permutations arise when there are duplicated elements in the
  403. input iterable. The number of items returned is
  404. `n! / (x_1! * x_2! * ... * x_n!)`, where `n` is the total number of
  405. items input, and each `x_i` is the count of a distinct item in the input
  406. sequence.
  407. """
  408. def perm_unique_helper(item_counts, perm, i):
  409. """Internal helper function
  410. :arg item_counts: Stores the unique items in ``iterable`` and how many
  411. times they are repeated
  412. :arg perm: The permutation that is being built for output
  413. :arg i: The index of the permutation being modified
  414. The output permutations are built up recursively; the distinct items
  415. are placed until their repetitions are exhausted.
  416. """
  417. if i < 0:
  418. yield tuple(perm)
  419. else:
  420. for item in item_counts:
  421. if item_counts[item] <= 0:
  422. continue
  423. perm[i] = item
  424. item_counts[item] -= 1
  425. for x in perm_unique_helper(item_counts, perm, i - 1):
  426. yield x
  427. item_counts[item] += 1
  428. item_counts = Counter(iterable)
  429. return perm_unique_helper(item_counts, [None] * len(iterable),
  430. len(iterable) - 1)
  431. def intersperse(e, iterable, n=1):
  432. """Intersperse filler element *e* among the items in *iterable*, leaving
  433. *n* items between each filler element.
  434. >>> list(intersperse('!', [1, 2, 3, 4, 5]))
  435. [1, '!', 2, '!', 3, '!', 4, '!', 5]
  436. >>> list(intersperse(None, [1, 2, 3, 4, 5], n=2))
  437. [1, 2, None, 3, 4, None, 5]
  438. """
  439. if n == 0:
  440. raise ValueError('n must be > 0')
  441. elif n == 1:
  442. # interleave(repeat(e), iterable) -> e, x_0, e, e, x_1, e, x_2...
  443. # islice(..., 1, None) -> x_0, e, e, x_1, e, x_2...
  444. return islice(interleave(repeat(e), iterable), 1, None)
  445. else:
  446. # interleave(filler, chunks) -> [e], [x_0, x_1], [e], [x_2, x_3]...
  447. # islice(..., 1, None) -> [x_0, x_1], [e], [x_2, x_3]...
  448. # flatten(...) -> x_0, x_1, e, x_2, x_3...
  449. filler = repeat([e])
  450. chunks = chunked(iterable, n)
  451. return flatten(islice(interleave(filler, chunks), 1, None))
  452. def unique_to_each(*iterables):
  453. """Return the elements from each of the input iterables that aren't in the
  454. other input iterables.
  455. For example, suppose you have a set of packages, each with a set of
  456. dependencies::
  457. {'pkg_1': {'A', 'B'}, 'pkg_2': {'B', 'C'}, 'pkg_3': {'B', 'D'}}
  458. If you remove one package, which dependencies can also be removed?
  459. If ``pkg_1`` is removed, then ``A`` is no longer necessary - it is not
  460. associated with ``pkg_2`` or ``pkg_3``. Similarly, ``C`` is only needed for
  461. ``pkg_2``, and ``D`` is only needed for ``pkg_3``::
  462. >>> unique_to_each({'A', 'B'}, {'B', 'C'}, {'B', 'D'})
  463. [['A'], ['C'], ['D']]
  464. If there are duplicates in one input iterable that aren't in the others
  465. they will be duplicated in the output. Input order is preserved::
  466. >>> unique_to_each("mississippi", "missouri")
  467. [['p', 'p'], ['o', 'u', 'r']]
  468. It is assumed that the elements of each iterable are hashable.
  469. """
  470. pool = [list(it) for it in iterables]
  471. counts = Counter(chain.from_iterable(map(set, pool)))
  472. uniques = {element for element in counts if counts[element] == 1}
  473. return [list(filter(uniques.__contains__, it)) for it in pool]
  474. def windowed(seq, n, fillvalue=None, step=1):
  475. """Return a sliding window of width *n* over the given iterable.
  476. >>> all_windows = windowed([1, 2, 3, 4, 5], 3)
  477. >>> list(all_windows)
  478. [(1, 2, 3), (2, 3, 4), (3, 4, 5)]
  479. When the window is larger than the iterable, *fillvalue* is used in place
  480. of missing values::
  481. >>> list(windowed([1, 2, 3], 4))
  482. [(1, 2, 3, None)]
  483. Each window will advance in increments of *step*:
  484. >>> list(windowed([1, 2, 3, 4, 5, 6], 3, fillvalue='!', step=2))
  485. [(1, 2, 3), (3, 4, 5), (5, 6, '!')]
  486. """
  487. if n < 0:
  488. raise ValueError('n must be >= 0')
  489. if n == 0:
  490. yield tuple()
  491. return
  492. if step < 1:
  493. raise ValueError('step must be >= 1')
  494. it = iter(seq)
  495. window = deque([], n)
  496. append = window.append
  497. # Initial deque fill
  498. for _ in range(n):
  499. append(next(it, fillvalue))
  500. yield tuple(window)
  501. # Appending new items to the right causes old items to fall off the left
  502. i = 0
  503. for item in it:
  504. append(item)
  505. i = (i + 1) % step
  506. if i % step == 0:
  507. yield tuple(window)
  508. # If there are items from the iterable in the window, pad with the given
  509. # value and emit them.
  510. if (i % step) and (step - i < n):
  511. for _ in range(step - i):
  512. append(fillvalue)
  513. yield tuple(window)
  514. class bucket(object):
  515. """Wrap *iterable* and return an object that buckets it iterable into
  516. child iterables based on a *key* function.
  517. >>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3']
  518. >>> s = bucket(iterable, key=lambda x: x[0])
  519. >>> a_iterable = s['a']
  520. >>> next(a_iterable)
  521. 'a1'
  522. >>> next(a_iterable)
  523. 'a2'
  524. >>> list(s['b'])
  525. ['b1', 'b2', 'b3']
  526. The original iterable will be advanced and its items will be cached until
  527. they are used by the child iterables. This may require significant storage.
  528. By default, attempting to select a bucket to which no items belong will
  529. exhaust the iterable and cache all values.
  530. If you specify a *validator* function, selected buckets will instead be
  531. checked against it.
  532. >>> from itertools import count
  533. >>> it = count(1, 2) # Infinite sequence of odd numbers
  534. >>> key = lambda x: x % 10 # Bucket by last digit
  535. >>> validator = lambda x: x in {1, 3, 5, 7, 9} # Odd digits only
  536. >>> s = bucket(it, key=key, validator=validator)
  537. >>> 2 in s
  538. False
  539. >>> list(s[2])
  540. []
  541. """
  542. def __init__(self, iterable, key, validator=None):
  543. self._it = iter(iterable)
  544. self._key = key
  545. self._cache = defaultdict(deque)
  546. self._validator = validator or (lambda x: True)
  547. def __contains__(self, value):
  548. if not self._validator(value):
  549. return False
  550. try:
  551. item = next(self[value])
  552. except StopIteration:
  553. return False
  554. else:
  555. self._cache[value].appendleft(item)
  556. return True
  557. def _get_values(self, value):
  558. """
  559. Helper to yield items from the parent iterator that match *value*.
  560. Items that don't match are stored in the local cache as they
  561. are encountered.
  562. """
  563. while True:
  564. # If we've cached some items that match the target value, emit
  565. # the first one and evict it from the cache.
  566. if self._cache[value]:
  567. yield self._cache[value].popleft()
  568. # Otherwise we need to advance the parent iterator to search for
  569. # a matching item, caching the rest.
  570. else:
  571. while True:
  572. item = next(self._it)
  573. item_value = self._key(item)
  574. if item_value == value:
  575. yield item
  576. break
  577. elif self._validator(item_value):
  578. self._cache[item_value].append(item)
  579. def __getitem__(self, value):
  580. if not self._validator(value):
  581. return iter(())
  582. return self._get_values(value)
  583. def spy(iterable, n=1):
  584. """Return a 2-tuple with a list containing the first *n* elements of
  585. *iterable*, and an iterator with the same items as *iterable*.
  586. This allows you to "look ahead" at the items in the iterable without
  587. advancing it.
  588. There is one item in the list by default:
  589. >>> iterable = 'abcdefg'
  590. >>> head, iterable = spy(iterable)
  591. >>> head
  592. ['a']
  593. >>> list(iterable)
  594. ['a', 'b', 'c', 'd', 'e', 'f', 'g']
  595. You may use unpacking to retrieve items instead of lists:
  596. >>> (head,), iterable = spy('abcdefg')
  597. >>> head
  598. 'a'
  599. >>> (first, second), iterable = spy('abcdefg', 2)
  600. >>> first
  601. 'a'
  602. >>> second
  603. 'b'
  604. The number of items requested can be larger than the number of items in
  605. the iterable:
  606. >>> iterable = [1, 2, 3, 4, 5]
  607. >>> head, iterable = spy(iterable, 10)
  608. >>> head
  609. [1, 2, 3, 4, 5]
  610. >>> list(iterable)
  611. [1, 2, 3, 4, 5]
  612. """
  613. it = iter(iterable)
  614. head = take(n, it)
  615. return head, chain(head, it)
  616. def interleave(*iterables):
  617. """Return a new iterable yielding from each iterable in turn,
  618. until the shortest is exhausted.
  619. >>> list(interleave([1, 2, 3], [4, 5], [6, 7, 8]))
  620. [1, 4, 6, 2, 5, 7]
  621. For a version that doesn't terminate after the shortest iterable is
  622. exhausted, see :func:`interleave_longest`.
  623. """
  624. return chain.from_iterable(zip(*iterables))
  625. def interleave_longest(*iterables):
  626. """Return a new iterable yielding from each iterable in turn,
  627. skipping any that are exhausted.
  628. >>> list(interleave_longest([1, 2, 3], [4, 5], [6, 7, 8]))
  629. [1, 4, 6, 2, 5, 7, 3, 8]
  630. This function produces the same output as :func:`roundrobin`, but may
  631. perform better for some inputs (in particular when the number of iterables
  632. is large).
  633. """
  634. i = chain.from_iterable(zip_longest(*iterables, fillvalue=_marker))
  635. return (x for x in i if x is not _marker)
  636. def collapse(iterable, base_type=None, levels=None):
  637. """Flatten an iterable with multiple levels of nesting (e.g., a list of
  638. lists of tuples) into non-iterable types.
  639. >>> iterable = [(1, 2), ([3, 4], [[5], [6]])]
  640. >>> list(collapse(iterable))
  641. [1, 2, 3, 4, 5, 6]
  642. String types are not considered iterable and will not be collapsed.
  643. To avoid collapsing other types, specify *base_type*:
  644. >>> iterable = ['ab', ('cd', 'ef'), ['gh', 'ij']]
  645. >>> list(collapse(iterable, base_type=tuple))
  646. ['ab', ('cd', 'ef'), 'gh', 'ij']
  647. Specify *levels* to stop flattening after a certain level:
  648. >>> iterable = [('a', ['b']), ('c', ['d'])]
  649. >>> list(collapse(iterable)) # Fully flattened
  650. ['a', 'b', 'c', 'd']
  651. >>> list(collapse(iterable, levels=1)) # Only one level flattened
  652. ['a', ['b'], 'c', ['d']]
  653. """
  654. def walk(node, level):
  655. if (
  656. ((levels is not None) and (level > levels)) or
  657. isinstance(node, string_types) or
  658. ((base_type is not None) and isinstance(node, base_type))
  659. ):
  660. yield node
  661. return
  662. try:
  663. tree = iter(node)
  664. except TypeError:
  665. yield node
  666. return
  667. else:
  668. for child in tree:
  669. for x in walk(child, level + 1):
  670. yield x
  671. for x in walk(iterable, 0):
  672. yield x
  673. def side_effect(func, iterable, chunk_size=None, before=None, after=None):
  674. """Invoke *func* on each item in *iterable* (or on each *chunk_size* group
  675. of items) before yielding the item.
  676. `func` must be a function that takes a single argument. Its return value
  677. will be discarded.
  678. *before* and *after* are optional functions that take no arguments. They
  679. will be executed before iteration starts and after it ends, respectively.
  680. `side_effect` can be used for logging, updating progress bars, or anything
  681. that is not functionally "pure."
  682. Emitting a status message:
  683. >>> from more_itertools import consume
  684. >>> func = lambda item: print('Received {}'.format(item))
  685. >>> consume(side_effect(func, range(2)))
  686. Received 0
  687. Received 1
  688. Operating on chunks of items:
  689. >>> pair_sums = []
  690. >>> func = lambda chunk: pair_sums.append(sum(chunk))
  691. >>> list(side_effect(func, [0, 1, 2, 3, 4, 5], 2))
  692. [0, 1, 2, 3, 4, 5]
  693. >>> list(pair_sums)
  694. [1, 5, 9]
  695. Writing to a file-like object:
  696. >>> from io import StringIO
  697. >>> from more_itertools import consume
  698. >>> f = StringIO()
  699. >>> func = lambda x: print(x, file=f)
  700. >>> before = lambda: print(u'HEADER', file=f)
  701. >>> after = f.close
  702. >>> it = [u'a', u'b', u'c']
  703. >>> consume(side_effect(func, it, before=before, after=after))
  704. >>> f.closed
  705. True
  706. """
  707. try:
  708. if before is not None:
  709. before()
  710. if chunk_size is None:
  711. for item in iterable:
  712. func(item)
  713. yield item
  714. else:
  715. for chunk in chunked(iterable, chunk_size):
  716. func(chunk)
  717. for item in chunk:
  718. yield item
  719. finally:
  720. if after is not None:
  721. after()
  722. def sliced(seq, n):
  723. """Yield slices of length *n* from the sequence *seq*.
  724. >>> list(sliced((1, 2, 3, 4, 5, 6), 3))
  725. [(1, 2, 3), (4, 5, 6)]
  726. If the length of the sequence is not divisible by the requested slice
  727. length, the last slice will be shorter.
  728. >>> list(sliced((1, 2, 3, 4, 5, 6, 7, 8), 3))
  729. [(1, 2, 3), (4, 5, 6), (7, 8)]
  730. This function will only work for iterables that support slicing.
  731. For non-sliceable iterables, see :func:`chunked`.
  732. """
  733. return takewhile(bool, (seq[i: i + n] for i in count(0, n)))
  734. def split_at(iterable, pred):
  735. """Yield lists of items from *iterable*, where each list is delimited by
  736. an item where callable *pred* returns ``True``. The lists do not include
  737. the delimiting items.
  738. >>> list(split_at('abcdcba', lambda x: x == 'b'))
  739. [['a'], ['c', 'd', 'c'], ['a']]
  740. >>> list(split_at(range(10), lambda n: n % 2 == 1))
  741. [[0], [2], [4], [6], [8], []]
  742. """
  743. buf = []
  744. for item in iterable:
  745. if pred(item):
  746. yield buf
  747. buf = []
  748. else:
  749. buf.append(item)
  750. yield buf
  751. def split_before(iterable, pred):
  752. """Yield lists of items from *iterable*, where each list starts with an
  753. item where callable *pred* returns ``True``:
  754. >>> list(split_before('OneTwo', lambda s: s.isupper()))
  755. [['O', 'n', 'e'], ['T', 'w', 'o']]
  756. >>> list(split_before(range(10), lambda n: n % 3 == 0))
  757. [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
  758. """
  759. buf = []
  760. for item in iterable:
  761. if pred(item) and buf:
  762. yield buf
  763. buf = []
  764. buf.append(item)
  765. yield buf
  766. def split_after(iterable, pred):
  767. """Yield lists of items from *iterable*, where each list ends with an
  768. item where callable *pred* returns ``True``:
  769. >>> list(split_after('one1two2', lambda s: s.isdigit()))
  770. [['o', 'n', 'e', '1'], ['t', 'w', 'o', '2']]
  771. >>> list(split_after(range(10), lambda n: n % 3 == 0))
  772. [[0], [1, 2, 3], [4, 5, 6], [7, 8, 9]]
  773. """
  774. buf = []
  775. for item in iterable:
  776. buf.append(item)
  777. if pred(item) and buf:
  778. yield buf
  779. buf = []
  780. if buf:
  781. yield buf
  782. def padded(iterable, fillvalue=None, n=None, next_multiple=False):
  783. """Yield the elements from *iterable*, followed by *fillvalue*, such that
  784. at least *n* items are emitted.
  785. >>> list(padded([1, 2, 3], '?', 5))
  786. [1, 2, 3, '?', '?']
  787. If *next_multiple* is ``True``, *fillvalue* will be emitted until the
  788. number of items emitted is a multiple of *n*::
  789. >>> list(padded([1, 2, 3, 4], n=3, next_multiple=True))
  790. [1, 2, 3, 4, None, None]
  791. If *n* is ``None``, *fillvalue* will be emitted indefinitely.
  792. """
  793. it = iter(iterable)
  794. if n is None:
  795. for item in chain(it, repeat(fillvalue)):
  796. yield item
  797. elif n < 1:
  798. raise ValueError('n must be at least 1')
  799. else:
  800. item_count = 0
  801. for item in it:
  802. yield item
  803. item_count += 1
  804. remaining = (n - item_count) % n if next_multiple else n - item_count
  805. for _ in range(remaining):
  806. yield fillvalue
  807. def distribute(n, iterable):
  808. """Distribute the items from *iterable* among *n* smaller iterables.
  809. >>> group_1, group_2 = distribute(2, [1, 2, 3, 4, 5, 6])
  810. >>> list(group_1)
  811. [1, 3, 5]
  812. >>> list(group_2)
  813. [2, 4, 6]
  814. If the length of *iterable* is not evenly divisible by *n*, then the
  815. length of the returned iterables will not be identical:
  816. >>> children = distribute(3, [1, 2, 3, 4, 5, 6, 7])
  817. >>> [list(c) for c in children]
  818. [[1, 4, 7], [2, 5], [3, 6]]
  819. If the length of *iterable* is smaller than *n*, then the last returned
  820. iterables will be empty:
  821. >>> children = distribute(5, [1, 2, 3])
  822. >>> [list(c) for c in children]
  823. [[1], [2], [3], [], []]
  824. This function uses :func:`itertools.tee` and may require significant
  825. storage. If you need the order items in the smaller iterables to match the
  826. original iterable, see :func:`divide`.
  827. """
  828. if n < 1:
  829. raise ValueError('n must be at least 1')
  830. children = tee(iterable, n)
  831. return [islice(it, index, None, n) for index, it in enumerate(children)]
  832. def stagger(iterable, offsets=(-1, 0, 1), longest=False, fillvalue=None):
  833. """Yield tuples whose elements are offset from *iterable*.
  834. The amount by which the `i`-th item in each tuple is offset is given by
  835. the `i`-th item in *offsets*.
  836. >>> list(stagger([0, 1, 2, 3]))
  837. [(None, 0, 1), (0, 1, 2), (1, 2, 3)]
  838. >>> list(stagger(range(8), offsets=(0, 2, 4)))
  839. [(0, 2, 4), (1, 3, 5), (2, 4, 6), (3, 5, 7)]
  840. By default, the sequence will end when the final element of a tuple is the
  841. last item in the iterable. To continue until the first element of a tuple
  842. is the last item in the iterable, set *longest* to ``True``::
  843. >>> list(stagger([0, 1, 2, 3], longest=True))
  844. [(None, 0, 1), (0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)]
  845. By default, ``None`` will be used to replace offsets beyond the end of the
  846. sequence. Specify *fillvalue* to use some other value.
  847. """
  848. children = tee(iterable, len(offsets))
  849. return zip_offset(
  850. *children, offsets=offsets, longest=longest, fillvalue=fillvalue
  851. )
  852. def zip_offset(*iterables, **kwargs):
  853. """``zip`` the input *iterables* together, but offset the `i`-th iterable
  854. by the `i`-th item in *offsets*.
  855. >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1)))
  856. [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e')]
  857. This can be used as a lightweight alternative to SciPy or pandas to analyze
  858. data sets in which somes series have a lead or lag relationship.
  859. By default, the sequence will end when the shortest iterable is exhausted.
  860. To continue until the longest iterable is exhausted, set *longest* to
  861. ``True``.
  862. >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1), longest=True))
  863. [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e'), (None, 'f')]
  864. By default, ``None`` will be used to replace offsets beyond the end of the
  865. sequence. Specify *fillvalue* to use some other value.
  866. """
  867. offsets = kwargs['offsets']
  868. longest = kwargs.get('longest', False)
  869. fillvalue = kwargs.get('fillvalue', None)
  870. if len(iterables) != len(offsets):
  871. raise ValueError("Number of iterables and offsets didn't match")
  872. staggered = []
  873. for it, n in zip(iterables, offsets):
  874. if n < 0:
  875. staggered.append(chain(repeat(fillvalue, -n), it))
  876. elif n > 0:
  877. staggered.append(islice(it, n, None))
  878. else:
  879. staggered.append(it)
  880. if longest:
  881. return zip_longest(*staggered, fillvalue=fillvalue)
  882. return zip(*staggered)
  883. def sort_together(iterables, key_list=(0,), reverse=False):
  884. """Return the input iterables sorted together, with *key_list* as the
  885. priority for sorting. All iterables are trimmed to the length of the
  886. shortest one.
  887. This can be used like the sorting function in a spreadsheet. If each
  888. iterable represents a column of data, the key list determines which
  889. columns are used for sorting.
  890. By default, all iterables are sorted using the ``0``-th iterable::
  891. >>> iterables = [(4, 3, 2, 1), ('a', 'b', 'c', 'd')]
  892. >>> sort_together(iterables)
  893. [(1, 2, 3, 4), ('d', 'c', 'b', 'a')]
  894. Set a different key list to sort according to another iterable.
  895. Specifying mutliple keys dictates how ties are broken::
  896. >>> iterables = [(3, 1, 2), (0, 1, 0), ('c', 'b', 'a')]
  897. >>> sort_together(iterables, key_list=(1, 2))
  898. [(2, 3, 1), (0, 0, 1), ('a', 'c', 'b')]
  899. Set *reverse* to ``True`` to sort in descending order.
  900. >>> sort_together([(1, 2, 3), ('c', 'b', 'a')], reverse=True)
  901. [(3, 2, 1), ('a', 'b', 'c')]
  902. """
  903. return list(zip(*sorted(zip(*iterables),
  904. key=itemgetter(*key_list),
  905. reverse=reverse)))
  906. def divide(n, iterable):
  907. """Divide the elements from *iterable* into *n* parts, maintaining
  908. order.
  909. >>> group_1, group_2 = divide(2, [1, 2, 3, 4, 5, 6])
  910. >>> list(group_1)
  911. [1, 2, 3]
  912. >>> list(group_2)
  913. [4, 5, 6]
  914. If the length of *iterable* is not evenly divisible by *n*, then the
  915. length of the returned iterables will not be identical:
  916. >>> children = divide(3, [1, 2, 3, 4, 5, 6, 7])
  917. >>> [list(c) for c in children]
  918. [[1, 2, 3], [4, 5], [6, 7]]
  919. If the length of the iterable is smaller than n, then the last returned
  920. iterables will be empty:
  921. >>> children = divide(5, [1, 2, 3])
  922. >>> [list(c) for c in children]
  923. [[1], [2], [3], [], []]
  924. This function will exhaust the iterable before returning and may require
  925. significant storage. If order is not important, see :func:`distribute`,
  926. which does not first pull the iterable into memory.
  927. """
  928. if n < 1:
  929. raise ValueError('n must be at least 1')
  930. seq = tuple(iterable)
  931. q, r = divmod(len(seq), n)
  932. ret = []
  933. for i in range(n):
  934. start = (i * q) + (i if i < r else r)
  935. stop = ((i + 1) * q) + (i + 1 if i + 1 < r else r)
  936. ret.append(iter(seq[start:stop]))
  937. return ret
  938. def always_iterable(obj, base_type=(text_type, binary_type)):
  939. """If *obj* is iterable, return an iterator over its items::
  940. >>> obj = (1, 2, 3)
  941. >>> list(always_iterable(obj))
  942. [1, 2, 3]
  943. If *obj* is not iterable, return a one-item iterable containing *obj*::
  944. >>> obj = 1
  945. >>> list(always_iterable(obj))
  946. [1]
  947. If *obj* is ``None``, return an empty iterable:
  948. >>> obj = None
  949. >>> list(always_iterable(None))
  950. []
  951. By default, binary and text strings are not considered iterable::
  952. >>> obj = 'foo'
  953. >>> list(always_iterable(obj))
  954. ['foo']
  955. If *base_type* is set, objects for which ``isinstance(obj, base_type)``
  956. returns ``True`` won't be considered iterable.
  957. >>> obj = {'a': 1}
  958. >>> list(always_iterable(obj)) # Iterate over the dict's keys
  959. ['a']
  960. >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit
  961. [{'a': 1}]
  962. Set *base_type* to ``None`` to avoid any special handling and treat objects
  963. Python considers iterable as iterable:
  964. >>> obj = 'foo'
  965. >>> list(always_iterable(obj, base_type=None))
  966. ['f', 'o', 'o']
  967. """
  968. if obj is None:
  969. return iter(())
  970. if (base_type is not None) and isinstance(obj, base_type):
  971. return iter((obj,))
  972. try:
  973. return iter(obj)
  974. except TypeError:
  975. return iter((obj,))
  976. def adjacent(predicate, iterable, distance=1):
  977. """Return an iterable over `(bool, item)` tuples where the `item` is
  978. drawn from *iterable* and the `bool` indicates whether
  979. that item satisfies the *predicate* or is adjacent to an item that does.
  980. For example, to find whether items are adjacent to a ``3``::
  981. >>> list(adjacent(lambda x: x == 3, range(6)))
  982. [(False, 0), (False, 1), (True, 2), (True, 3), (True, 4), (False, 5)]
  983. Set *distance* to change what counts as adjacent. For example, to find
  984. whether items are two places away from a ``3``:
  985. >>> list(adjacent(lambda x: x == 3, range(6), distance=2))
  986. [(False, 0), (True, 1), (True, 2), (True, 3), (True, 4), (True, 5)]
  987. This is useful for contextualizing the results of a search function.
  988. For example, a code comparison tool might want to identify lines that
  989. have changed, but also surrounding lines to give the viewer of the diff
  990. context.
  991. The predicate function will only be called once for each item in the
  992. iterable.
  993. See also :func:`groupby_transform`, which can be used with this function
  994. to group ranges of items with the same `bool` value.
  995. """
  996. # Allow distance=0 mainly for testing that it reproduces results with map()
  997. if distance < 0:
  998. raise ValueError('distance must be at least 0')
  999. i1, i2 = tee(iterable)
  1000. padding = [False] * distance
  1001. selected = chain(padding, map(predicate, i1), padding)
  1002. adjacent_to_selected = map(any, windowed(selected, 2 * distance + 1))
  1003. return zip(adjacent_to_selected, i2)
  1004. def groupby_transform(iterable, keyfunc=None, valuefunc=None):
  1005. """An extension of :func:`itertools.groupby` that transforms the values of
  1006. *iterable* after grouping them.
  1007. *keyfunc* is a function used to compute a grouping key for each item.
  1008. *valuefunc* is a function for transforming the items after grouping.
  1009. >>> iterable = 'AaaABbBCcA'
  1010. >>> keyfunc = lambda x: x.upper()
  1011. >>> valuefunc = lambda x: x.lower()
  1012. >>> grouper = groupby_transform(iterable, keyfunc, valuefunc)
  1013. >>> [(k, ''.join(g)) for k, g in grouper]
  1014. [('A', 'aaaa'), ('B', 'bbb'), ('C', 'cc'), ('A', 'a')]
  1015. *keyfunc* and *valuefunc* default to identity functions if they are not
  1016. specified.
  1017. :func:`groupby_transform` is useful when grouping elements of an iterable
  1018. using a separate iterable as the key. To do this, :func:`zip` the iterables
  1019. and pass a *keyfunc* that extracts the first element and a *valuefunc*
  1020. that extracts the second element::
  1021. >>> from operator import itemgetter
  1022. >>> keys = [0, 0, 1, 1, 1, 2, 2, 2, 3]
  1023. >>> values = 'abcdefghi'
  1024. >>> iterable = zip(keys, values)
  1025. >>> grouper = groupby_transform(iterable, itemgetter(0), itemgetter(1))
  1026. >>> [(k, ''.join(g)) for k, g in grouper]
  1027. [(0, 'ab'), (1, 'cde'), (2, 'fgh'), (3, 'i')]
  1028. """
  1029. valuefunc = (lambda x: x) if valuefunc is None else valuefunc
  1030. return ((k, map(valuefunc, g)) for k, g in groupby(iterable, keyfunc))
  1031. def numeric_range(*args):
  1032. """An extension of the built-in ``range()`` function whose arguments can
  1033. be any orderable numeric type.
  1034. With only *stop* specified, *start* defaults to ``0`` and *step*
  1035. defaults to ``1``. The output items will match the type of *stop*:
  1036. >>> list(numeric_range(3.5))
  1037. [0.0, 1.0, 2.0, 3.0]
  1038. With only *start* and *stop* specified, *step* defaults to ``1``. The
  1039. output items will match the type of *start*:
  1040. >>> from decimal import Decimal
  1041. >>> start = Decimal('2.1')
  1042. >>> stop = Decimal('5.1')
  1043. >>> list(numeric_range(start, stop))
  1044. [Decimal('2.1'), Decimal('3.1'), Decimal('4.1')]
  1045. With *start*, *stop*, and *step* specified the output items will match
  1046. the type of ``start + step``:
  1047. >>> from fractions import Fraction
  1048. >>> start = Fraction(1, 2) # Start at 1/2
  1049. >>> stop = Fraction(5, 2) # End at 5/2
  1050. >>> step = Fraction(1, 2) # Count by 1/2
  1051. >>> list(numeric_range(start, stop, step))
  1052. [Fraction(1, 2), Fraction(1, 1), Fraction(3, 2), Fraction(2, 1)]
  1053. If *step* is zero, ``ValueError`` is raised. Negative steps are supported:
  1054. >>> list(numeric_range(3, -1, -1.0))
  1055. [3.0, 2.0, 1.0, 0.0]
  1056. Be aware of the limitations of floating point numbers; the representation
  1057. of the yielded numbers may be surprising.
  1058. """
  1059. argc = len(args)
  1060. if argc == 1:
  1061. stop, = args
  1062. start = type(stop)(0)
  1063. step = 1
  1064. elif argc == 2:
  1065. start, stop = args
  1066. step = 1
  1067. elif argc == 3:
  1068. start, stop, step = args
  1069. else:
  1070. err_msg = 'numeric_range takes at most 3 arguments, got {}'
  1071. raise TypeError(err_msg.format(argc))
  1072. values = (start + (step * n) for n in count())
  1073. if step > 0:
  1074. return takewhile(partial(gt, stop), values)
  1075. elif step < 0:
  1076. return takewhile(partial(lt, stop), values)
  1077. else:
  1078. raise ValueError('numeric_range arg 3 must not be zero')
  1079. def count_cycle(iterable, n=None):
  1080. """Cycle through the items from *iterable* up to *n* times, yielding
  1081. the number of completed cycles along with each item. If *n* is omitted the
  1082. process repeats indefinitely.
  1083. >>> list(count_cycle('AB', 3))
  1084. [(0, 'A'), (0, 'B'), (1, 'A'), (1, 'B'), (2, 'A'), (2, 'B')]
  1085. """
  1086. iterable = tuple(iterable)
  1087. if not iterable:
  1088. return iter(())
  1089. counter = count() if n is None else range(n)
  1090. return ((i, item) for i in counter for item in iterable)
  1091. def locate(iterable, pred=bool):
  1092. """Yield the index of each item in *iterable* for which *pred* returns
  1093. ``True``.
  1094. *pred* defaults to :func:`bool`, which will select truthy items:
  1095. >>> list(locate([0, 1, 1, 0, 1, 0, 0]))
  1096. [1, 2, 4]
  1097. Set *pred* to a custom function to, e.g., find the indexes for a particular
  1098. item:
  1099. >>> list(locate(['a', 'b', 'c', 'b'], lambda x: x == 'b'))
  1100. [1, 3]
  1101. Use with :func:`windowed` to find the indexes of a sub-sequence:
  1102. >>> from more_itertools import windowed
  1103. >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]
  1104. >>> sub = [1, 2, 3]
  1105. >>> pred = lambda w: w == tuple(sub) # windowed() returns tuples
  1106. >>> list(locate(windowed(iterable, len(sub)), pred=pred))
  1107. [1, 5, 9]
  1108. Use with :func:`seekable` to find indexes and then retrieve the associated
  1109. items:
  1110. >>> from itertools import count
  1111. >>> from more_itertools import seekable
  1112. >>> source = (3 * n + 1 if (n % 2) else n // 2 for n in count())
  1113. >>> it = seekable(source)
  1114. >>> pred = lambda x: x > 100
  1115. >>> indexes = locate(it, pred=pred)
  1116. >>> i = next(indexes)
  1117. >>> it.seek(i)
  1118. >>> next(it)
  1119. 106
  1120. """
  1121. return compress(count(), map(pred, iterable))
  1122. def lstrip(iterable, pred):
  1123. """Yield the items from *iterable*, but strip any from the beginning
  1124. for which *pred* returns ``True``.
  1125. For example, to remove a set of items from the start of an iterable:
  1126. >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
  1127. >>> pred = lambda x: x in {None, False, ''}
  1128. >>> list(lstrip(iterable, pred))
  1129. [1, 2, None, 3, False, None]
  1130. This function is analogous to to :func:`str.lstrip`, and is essentially
  1131. an wrapper for :func:`itertools.dropwhile`.
  1132. """
  1133. return dropwhile(pred, iterable)
  1134. def rstrip(iterable, pred):
  1135. """Yield the items from *iterable*, but strip any from the end
  1136. for which *pred* returns ``True``.
  1137. For example, to remove a set of items from the end of an iterable:
  1138. >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
  1139. >>> pred = lambda x: x in {None, False, ''}
  1140. >>> list(rstrip(iterable, pred))
  1141. [None, False, None, 1, 2, None, 3]
  1142. This function is analogous to :func:`str.rstrip`.
  1143. """
  1144. cache = []
  1145. cache_append = cache.append
  1146. for x in iterable:
  1147. if pred(x):
  1148. cache_append(x)
  1149. else:
  1150. for y in cache:
  1151. yield y
  1152. del cache[:]
  1153. yield x
  1154. def strip(iterable, pred):
  1155. """Yield the items from *iterable*, but strip any from the
  1156. beginning and end for which *pred* returns ``True``.
  1157. For example, to remove a set of items from both ends of an iterable:
  1158. >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
  1159. >>> pred = lambda x: x in {None, False, ''}
  1160. >>> list(strip(iterable, pred))
  1161. [1, 2, None, 3]
  1162. This function is analogous to :func:`str.strip`.
  1163. """
  1164. return rstrip(lstrip(iterable, pred), pred)
  1165. def islice_extended(iterable, *args):
  1166. """An extension of :func:`itertools.islice` that supports negative values
  1167. for *stop*, *start*, and *step*.
  1168. >>> iterable = iter('abcdefgh')
  1169. >>> list(islice_extended(iterable, -4, -1))
  1170. ['e', 'f', 'g']
  1171. Slices with negative values require some caching of *iterable*, but this
  1172. function takes care to minimize the amount of memory required.
  1173. For example, you can use a negative step with an infinite iterator:
  1174. >>> from itertools import count
  1175. >>> list(islice_extended(count(), 110, 99, -2))
  1176. [110, 108, 106, 104, 102, 100]
  1177. """
  1178. s = slice(*args)
  1179. start = s.start
  1180. stop = s.stop
  1181. if s.step == 0:
  1182. raise ValueError('step argument must be a non-zero integer or None.')
  1183. step = s.step or 1
  1184. it = iter(iterable)
  1185. if step > 0:
  1186. start = 0 if (start is None) else start
  1187. if (start < 0):
  1188. # Consume all but the last -start items
  1189. cache = deque(enumerate(it, 1), maxlen=-start)
  1190. len_iter = cache[-1][0] if cache else 0
  1191. # Adjust start to be positive
  1192. i = max(len_iter + start, 0)
  1193. # Adjust stop to be positive
  1194. if stop is None:
  1195. j = len_iter
  1196. elif stop >= 0:
  1197. j = min(stop, len_iter)
  1198. else:
  1199. j = max(len_iter + stop, 0)
  1200. # Slice the cache
  1201. n = j - i
  1202. if n <= 0:
  1203. return
  1204. for index, item in islice(cache, 0, n, step):
  1205. yield item
  1206. elif (stop is not None) and (stop < 0):
  1207. # Advance to the start position
  1208. next(islice(it, start, start), None)
  1209. # When stop is negative, we have to carry -stop items while
  1210. # iterating
  1211. cache = deque(islice(it, -stop), maxlen=-stop)
  1212. for index, item in enumerate(it):
  1213. cached_item = cache.popleft()
  1214. if index % step == 0:
  1215. yield cached_item
  1216. cache.append(item)
  1217. else:
  1218. # When both start and stop are positive we have the normal case
  1219. for item in islice(it, start, stop, step):
  1220. yield item
  1221. else:
  1222. start = -1 if (start is None) else start
  1223. if (stop is not None) and (stop < 0):
  1224. # Consume all but the last items
  1225. n = -stop - 1
  1226. cache = deque(enumerate(it, 1), maxlen=n)
  1227. len_iter = cache[-1][0] if cache else 0
  1228. # If start and stop are both negative they are comparable and
  1229. # we can just slice. Otherwise we can adjust start to be negative
  1230. # and then slice.
  1231. if start < 0:
  1232. i, j = start, stop
  1233. else:
  1234. i, j = min(start - len_iter, -1), None
  1235. for index, item in list(cache)[i:j:step]:
  1236. yield item
  1237. else:
  1238. # Advance to the stop position
  1239. if stop is not None:
  1240. m = stop + 1
  1241. next(islice(it, m, m), None)
  1242. # stop is positive, so if start is negative they are not comparable
  1243. # and we need the rest of the items.
  1244. if start < 0:
  1245. i = start
  1246. n = None
  1247. # stop is None and start is positive, so we just need items up to
  1248. # the start index.
  1249. elif stop is None:
  1250. i = None
  1251. n = start + 1
  1252. # Both stop and start are positive, so they are comparable.
  1253. else:
  1254. i = None
  1255. n = start - stop
  1256. if n <= 0:
  1257. return
  1258. cache = list(islice(it, n))
  1259. for item in cache[i::step]:
  1260. yield item
  1261. def always_reversible(iterable):
  1262. """An extension of :func:`reversed` that supports all iterables, not
  1263. just those which implement the ``Reversible`` or ``Sequence`` protocols.
  1264. >>> print(*always_reversible(x for x in range(3)))
  1265. 2 1 0
  1266. If the iterable is already reversible, this function returns the
  1267. result of :func:`reversed()`. If the iterable is not reversible,
  1268. this function will cache the remaining items in the iterable and
  1269. yield them in reverse order, which may require significant storage.
  1270. """
  1271. try:
  1272. return reversed(iterable)
  1273. except TypeError:
  1274. return reversed(list(iterable))
  1275. def consecutive_groups(iterable, ordering=lambda x: x):
  1276. """Yield groups of consecutive items using :func:`itertools.groupby`.
  1277. The *ordering* function determines whether two items are adjacent by
  1278. returning their position.
  1279. By default, the ordering function is the identity function. This is
  1280. suitable for finding runs of numbers:
  1281. >>> iterable = [1, 10, 11, 12, 20, 30, 31, 32, 33, 40]
  1282. >>> for group in consecutive_groups(iterable):
  1283. ... print(list(group))
  1284. [1]
  1285. [10, 11, 12]
  1286. [20]
  1287. [30, 31, 32, 33]
  1288. [40]
  1289. For finding runs of adjacent letters, try using the :meth:`index` method
  1290. of a string of letters:
  1291. >>> from string import ascii_lowercase
  1292. >>> iterable = 'abcdfgilmnop'
  1293. >>> ordering = ascii_lowercase.index
  1294. >>> for group in consecutive_groups(iterable, ordering):
  1295. ... print(list(group))
  1296. ['a', 'b', 'c', 'd']
  1297. ['f', 'g']
  1298. ['i']
  1299. ['l', 'm', 'n', 'o', 'p']
  1300. """
  1301. for k, g in groupby(
  1302. enumerate(iterable), key=lambda x: x[0] - ordering(x[1])
  1303. ):
  1304. yield map(itemgetter(1), g)
  1305. def difference(iterable, func=sub):
  1306. """By default, compute the first difference of *iterable* using
  1307. :func:`operator.sub`.
  1308. >>> iterable = [0, 1, 3, 6, 10]
  1309. >>> list(difference(iterable))
  1310. [0, 1, 2, 3, 4]
  1311. This is the opposite of :func:`accumulate`'s default behavior:
  1312. >>> from more_itertools import accumulate
  1313. >>> iterable = [0, 1, 2, 3, 4]
  1314. >>> list(accumulate(iterable))
  1315. [0, 1, 3, 6, 10]
  1316. >>> list(difference(accumulate(iterable)))
  1317. [0, 1, 2, 3, 4]
  1318. By default *func* is :func:`operator.sub`, but other functions can be
  1319. specified. They will be applied as follows::
  1320. A, B, C, D, ... --> A, func(B, A), func(C, B), func(D, C), ...
  1321. For example, to do progressive division:
  1322. >>> iterable = [1, 2, 6, 24, 120] # Factorial sequence
  1323. >>> func = lambda x, y: x // y
  1324. >>> list(difference(iterable, func))
  1325. [1, 2, 3, 4, 5]
  1326. """
  1327. a, b = tee(iterable)
  1328. try:
  1329. item = next(b)
  1330. except StopIteration:
  1331. return iter([])
  1332. return chain([item], map(lambda x: func(x[1], x[0]), zip(a, b)))
  1333. class SequenceView(Sequence):
  1334. """Return a read-only view of the sequence object *target*.
  1335. :class:`SequenceView` objects are analagous to Python's built-in
  1336. "dictionary view" types. They provide a dynamic view of a sequence's items,
  1337. meaning that when the sequence updates, so does the view.
  1338. >>> seq = ['0', '1', '2']
  1339. >>> view = SequenceView(seq)
  1340. >>> view
  1341. SequenceView(['0', '1', '2'])
  1342. >>> seq.append('3')
  1343. >>> view
  1344. SequenceView(['0', '1', '2', '3'])
  1345. Sequence views support indexing, slicing, and length queries. They act
  1346. like the underlying sequence, except they don't allow assignment:
  1347. >>> view[1]
  1348. '1'
  1349. >>> view[1:-1]
  1350. ['1', '2']
  1351. >>> len(view)
  1352. 4
  1353. Sequence views are useful as an alternative to copying, as they don't
  1354. require (much) extra storage.
  1355. """
  1356. def __init__(self, target):
  1357. if not isinstance(target, Sequence):
  1358. raise TypeError
  1359. self._target = target
  1360. def __getitem__(self, index):
  1361. return self._target[index]
  1362. def __len__(self):
  1363. return len(self._target)
  1364. def __repr__(self):
  1365. return '{}({})'.format(self.__class__.__name__, repr(self._target))
  1366. class seekable(object):
  1367. """Wrap an iterator to allow for seeking backward and forward. This
  1368. progressively caches the items in the source iterable so they can be
  1369. re-visited.
  1370. Call :meth:`seek` with an index to seek to that position in the source
  1371. iterable.
  1372. To "reset" an iterator, seek to ``0``:
  1373. >>> from itertools import count
  1374. >>> it = seekable((str(n) for n in count()))
  1375. >>> next(it), next(it), next(it)
  1376. ('0', '1', '2')
  1377. >>> it.seek(0)
  1378. >>> next(it), next(it), next(it)
  1379. ('0', '1', '2')
  1380. >>> next(it)
  1381. '3'
  1382. You can also seek forward:
  1383. >>> it = seekable((str(n) for n in range(20)))
  1384. >>> it.seek(10)
  1385. >>> next(it)
  1386. '10'
  1387. >>> it.seek(20) # Seeking past the end of the source isn't a problem
  1388. >>> list(it)
  1389. []
  1390. >>> it.seek(0) # Resetting works even after hitting the end
  1391. >>> next(it), next(it), next(it)
  1392. ('0', '1', '2')
  1393. The cache grows as the source iterable progresses, so beware of wrapping
  1394. very large or infinite iterables.
  1395. You may view the contents of the cache with the :meth:`elements` method.
  1396. That returns a :class:`SequenceView`, a view that updates automatically:
  1397. >>> it = seekable((str(n) for n in range(10)))
  1398. >>> next(it), next(it), next(it)
  1399. ('0', '1', '2')
  1400. >>> elements = it.elements()
  1401. >>> elements
  1402. SequenceView(['0', '1', '2'])
  1403. >>> next(it)
  1404. '3'
  1405. >>> elements
  1406. SequenceView(['0', '1', '2', '3'])
  1407. """
  1408. def __init__(self, iterable):
  1409. self._source = iter(iterable)
  1410. self._cache = []
  1411. self._index = None
  1412. def __iter__(self):
  1413. return self
  1414. def __next__(self):
  1415. if self._index is not None:
  1416. try:
  1417. item = self._cache[self._index]
  1418. except IndexError:
  1419. self._index = None
  1420. else:
  1421. self._index += 1
  1422. return item
  1423. item = next(self._source)
  1424. self._cache.append(item)
  1425. return item
  1426. next = __next__
  1427. def elements(self):
  1428. return SequenceView(self._cache)
  1429. def seek(self, index):
  1430. self._index = index
  1431. remainder = index - len(self._cache)
  1432. if remainder > 0:
  1433. consume(self, remainder)
  1434. class run_length(object):
  1435. """
  1436. :func:`run_length.encode` compresses an iterable with run-length encoding.
  1437. It yields groups of repeated items with the count of how many times they
  1438. were repeated:
  1439. >>> uncompressed = 'abbcccdddd'
  1440. >>> list(run_length.encode(uncompressed))
  1441. [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
  1442. :func:`run_length.decode` decompresses an iterable that was previously
  1443. compressed with run-length encoding. It yields the items of the
  1444. decompressed iterable:
  1445. >>> compressed = [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
  1446. >>> list(run_length.decode(compressed))
  1447. ['a', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd', 'd']
  1448. """
  1449. @staticmethod
  1450. def encode(iterable):
  1451. return ((k, ilen(g)) for k, g in groupby(iterable))
  1452. @staticmethod
  1453. def decode(iterable):
  1454. return chain.from_iterable(repeat(k, n) for k, n in iterable)
  1455. def exactly_n(iterable, n, predicate=bool):
  1456. """Return ``True`` if exactly ``n`` items in the iterable are ``True``
  1457. according to the *predicate* function.
  1458. >>> exactly_n([True, True, False], 2)
  1459. True
  1460. >>> exactly_n([True, True, False], 1)
  1461. False
  1462. >>> exactly_n([0, 1, 2, 3, 4, 5], 3, lambda x: x < 3)
  1463. True
  1464. The iterable will be advanced until ``n + 1`` truthy items are encountered,
  1465. so avoid calling it on infinite iterables.
  1466. """
  1467. return len(take(n + 1, filter(predicate, iterable))) == n
  1468. def circular_shifts(iterable):
  1469. """Return a list of circular shifts of *iterable*.
  1470. >>> circular_shifts(range(4))
  1471. [(0, 1, 2, 3), (1, 2, 3, 0), (2, 3, 0, 1), (3, 0, 1, 2)]
  1472. """
  1473. lst = list(iterable)
  1474. return take(len(lst), windowed(cycle(lst), len(lst)))
  1475. def make_decorator(wrapping_func, result_index=0):
  1476. """Return a decorator version of *wrapping_func*, which is a function that
  1477. modifies an iterable. *result_index* is the position in that function's
  1478. signature where the iterable goes.
  1479. This lets you use itertools on the "production end," i.e. at function
  1480. definition. This can augment what the function returns without changing the
  1481. function's code.
  1482. For example, to produce a decorator version of :func:`chunked`:
  1483. >>> from more_itertools import chunked
  1484. >>> chunker = make_decorator(chunked, result_index=0)
  1485. >>> @chunker(3)
  1486. ... def iter_range(n):
  1487. ... return iter(range(n))
  1488. ...
  1489. >>> list(iter_range(9))
  1490. [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
  1491. To only allow truthy items to be returned:
  1492. >>> truth_serum = make_decorator(filter, result_index=1)
  1493. >>> @truth_serum(bool)
  1494. ... def boolean_test():
  1495. ... return [0, 1, '', ' ', False, True]
  1496. ...
  1497. >>> list(boolean_test())
  1498. [1, ' ', True]
  1499. The :func:`peekable` and :func:`seekable` wrappers make for practical
  1500. decorators:
  1501. >>> from more_itertools import peekable
  1502. >>> peekable_function = make_decorator(peekable)
  1503. >>> @peekable_function()
  1504. ... def str_range(*args):
  1505. ... return (str(x) for x in range(*args))
  1506. ...
  1507. >>> it = str_range(1, 20, 2)
  1508. >>> next(it), next(it), next(it)
  1509. ('1', '3', '5')
  1510. >>> it.peek()
  1511. '7'
  1512. >>> next(it)
  1513. '7'
  1514. """
  1515. # See https://sites.google.com/site/bbayles/index/decorator_factory for
  1516. # notes on how this works.
  1517. def decorator(*wrapping_args, **wrapping_kwargs):
  1518. def outer_wrapper(f):
  1519. def inner_wrapper(*args, **kwargs):
  1520. result = f(*args, **kwargs)
  1521. wrapping_args_ = list(wrapping_args)
  1522. wrapping_args_.insert(result_index, result)
  1523. return wrapping_func(*wrapping_args_, **wrapping_kwargs)
  1524. return inner_wrapper
  1525. return outer_wrapper
  1526. return decorator