dicttoolz.pyx 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577
  1. from cpython.dict cimport (PyDict_Check, PyDict_CheckExact, PyDict_GetItem,
  2. PyDict_Merge, PyDict_New, PyDict_Next,
  3. PyDict_SetItem, PyDict_Update, PyDict_DelItem)
  4. from cpython.list cimport PyList_Append, PyList_New
  5. from cpython.object cimport PyObject_SetItem
  6. from cpython.ref cimport PyObject, Py_DECREF, Py_INCREF, Py_XDECREF
  7. # Locally defined bindings that differ from `cython.cpython` bindings
  8. from cytoolz.cpython cimport PyDict_Next_Compat, PtrIter_Next
  9. from copy import copy
  10. __all__ = ['merge', 'merge_with', 'valmap', 'keymap', 'itemmap', 'valfilter',
  11. 'keyfilter', 'itemfilter', 'assoc', 'dissoc', 'assoc_in', 'get_in',
  12. 'update_in']
  13. cdef class _iter_mapping:
  14. """ Keep a handle on the current item to prevent memory clean up too early"""
  15. def __cinit__(self, object it):
  16. self.it = it
  17. self.cur = None
  18. def __iter__(self):
  19. return self
  20. def __next__(self):
  21. self.cur = next(self.it)
  22. return self.cur
  23. cdef int PyMapping_Next(object p, Py_ssize_t *ppos, PyObject* *pkey, PyObject* *pval) except -1:
  24. """Mimic "PyDict_Next" interface, but for any mapping"""
  25. cdef PyObject *obj
  26. obj = PtrIter_Next(p)
  27. if obj is NULL:
  28. return 0
  29. pkey[0] = <PyObject*>(<object>obj)[0]
  30. pval[0] = <PyObject*>(<object>obj)[1]
  31. Py_XDECREF(obj) # removing this results in memory leak
  32. return 1
  33. cdef f_map_next get_map_iter(object d, PyObject* *ptr) except NULL:
  34. """Return function pointer to perform iteration over object returned in ptr.
  35. The returned function signature matches "PyDict_Next". If ``d`` is a dict,
  36. then the returned function *is* PyDict_Next, so iteration wil be very fast.
  37. The object returned through ``ptr`` needs to have its reference count
  38. reduced by one once the caller "owns" the object.
  39. This function lets us control exactly how iteration should be performed
  40. over a given mapping. The current rules are:
  41. 1) If ``d`` is exactly a dict, use PyDict_Next
  42. 2) If ``d`` is subtype of dict, use PyMapping_Next. This lets the user
  43. control the order iteration, such as for ordereddict.
  44. 3) If using PyMapping_Next, iterate using ``iteritems`` if possible,
  45. otherwise iterate using ``items``.
  46. """
  47. cdef object val
  48. cdef f_map_next rv
  49. if PyDict_CheckExact(d):
  50. val = d
  51. rv = &PyDict_Next_Compat
  52. elif hasattr(d, 'iteritems'):
  53. val = _iter_mapping(iter(d.iteritems()))
  54. rv = &PyMapping_Next
  55. else:
  56. val = _iter_mapping(iter(d.items()))
  57. rv = &PyMapping_Next
  58. Py_INCREF(val)
  59. ptr[0] = <PyObject*>val
  60. return rv
  61. cdef get_factory(name, kwargs):
  62. factory = kwargs.pop('factory', dict)
  63. if kwargs:
  64. raise TypeError("{0}() got an unexpected keyword argument "
  65. "'{1}'".format(name, kwargs.popitem()[0]))
  66. return factory
  67. cdef object c_merge(object dicts, object factory=dict):
  68. cdef object rv
  69. rv = factory()
  70. if PyDict_CheckExact(rv):
  71. for d in dicts:
  72. PyDict_Update(rv, d)
  73. else:
  74. for d in dicts:
  75. rv.update(d)
  76. return rv
  77. def merge(*dicts, **kwargs):
  78. """
  79. Merge a collection of dictionaries
  80. >>> merge({1: 'one'}, {2: 'two'})
  81. {1: 'one', 2: 'two'}
  82. Later dictionaries have precedence
  83. >>> merge({1: 2, 3: 4}, {3: 3, 4: 4})
  84. {1: 2, 3: 3, 4: 4}
  85. See Also:
  86. merge_with
  87. """
  88. if len(dicts) == 1 and not PyDict_Check(dicts[0]):
  89. dicts = dicts[0]
  90. factory = get_factory('merge', kwargs)
  91. return c_merge(dicts, factory)
  92. cdef object c_merge_with(object func, object dicts, object factory=dict):
  93. cdef:
  94. dict result
  95. object rv, d
  96. list seq
  97. f_map_next f
  98. PyObject *obj
  99. PyObject *pkey
  100. PyObject *pval
  101. Py_ssize_t pos
  102. result = PyDict_New()
  103. rv = factory()
  104. for d in dicts:
  105. f = get_map_iter(d, &obj)
  106. d = <object>obj
  107. Py_DECREF(d)
  108. pos = 0
  109. while f(d, &pos, &pkey, &pval):
  110. obj = PyDict_GetItem(result, <object>pkey)
  111. if obj is NULL:
  112. seq = PyList_New(0)
  113. PyList_Append(seq, <object>pval)
  114. PyDict_SetItem(result, <object>pkey, seq)
  115. else:
  116. PyList_Append(<object>obj, <object>pval)
  117. f = get_map_iter(result, &obj)
  118. d = <object>obj
  119. Py_DECREF(d)
  120. pos = 0
  121. while f(d, &pos, &pkey, &pval):
  122. PyObject_SetItem(rv, <object>pkey, func(<object>pval))
  123. return rv
  124. def merge_with(func, *dicts, **kwargs):
  125. """
  126. Merge dictionaries and apply function to combined values
  127. A key may occur in more than one dict, and all values mapped from the key
  128. will be passed to the function as a list, such as func([val1, val2, ...]).
  129. >>> merge_with(sum, {1: 1, 2: 2}, {1: 10, 2: 20})
  130. {1: 11, 2: 22}
  131. >>> merge_with(first, {1: 1, 2: 2}, {2: 20, 3: 30}) # doctest: +SKIP
  132. {1: 1, 2: 2, 3: 30}
  133. See Also:
  134. merge
  135. """
  136. if len(dicts) == 1 and not PyDict_Check(dicts[0]):
  137. dicts = dicts[0]
  138. factory = get_factory('merge_with', kwargs)
  139. return c_merge_with(func, dicts, factory)
  140. cpdef object valmap(object func, object d, object factory=dict):
  141. """
  142. Apply function to values of dictionary
  143. >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]}
  144. >>> valmap(sum, bills) # doctest: +SKIP
  145. {'Alice': 65, 'Bob': 45}
  146. See Also:
  147. keymap
  148. itemmap
  149. """
  150. cdef:
  151. object rv
  152. f_map_next f
  153. PyObject *obj
  154. PyObject *pkey
  155. PyObject *pval
  156. Py_ssize_t pos = 0
  157. rv = factory()
  158. f = get_map_iter(d, &obj)
  159. d = <object>obj
  160. Py_DECREF(d)
  161. while f(d, &pos, &pkey, &pval):
  162. rv[<object>pkey] = func(<object>pval)
  163. return rv
  164. cpdef object keymap(object func, object d, object factory=dict):
  165. """
  166. Apply function to keys of dictionary
  167. >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]}
  168. >>> keymap(str.lower, bills) # doctest: +SKIP
  169. {'alice': [20, 15, 30], 'bob': [10, 35]}
  170. See Also:
  171. valmap
  172. itemmap
  173. """
  174. cdef:
  175. object rv
  176. f_map_next f
  177. PyObject *obj
  178. PyObject *pkey
  179. PyObject *pval
  180. Py_ssize_t pos = 0
  181. rv = factory()
  182. f = get_map_iter(d, &obj)
  183. d = <object>obj
  184. Py_DECREF(d)
  185. while f(d, &pos, &pkey, &pval):
  186. rv[func(<object>pkey)] = <object>pval
  187. return rv
  188. cpdef object itemmap(object func, object d, object factory=dict):
  189. """
  190. Apply function to items of dictionary
  191. >>> accountids = {"Alice": 10, "Bob": 20}
  192. >>> itemmap(reversed, accountids) # doctest: +SKIP
  193. {10: "Alice", 20: "Bob"}
  194. See Also:
  195. keymap
  196. valmap
  197. """
  198. cdef:
  199. object rv, k, v
  200. f_map_next f
  201. PyObject *obj
  202. PyObject *pkey
  203. PyObject *pval
  204. Py_ssize_t pos = 0
  205. rv = factory()
  206. f = get_map_iter(d, &obj)
  207. d = <object>obj
  208. Py_DECREF(d)
  209. while f(d, &pos, &pkey, &pval):
  210. k, v = func((<object>pkey, <object>pval))
  211. rv[k] = v
  212. return rv
  213. cpdef object valfilter(object predicate, object d, object factory=dict):
  214. """
  215. Filter items in dictionary by value
  216. >>> iseven = lambda x: x % 2 == 0
  217. >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
  218. >>> valfilter(iseven, d)
  219. {1: 2, 3: 4}
  220. See Also:
  221. keyfilter
  222. itemfilter
  223. valmap
  224. """
  225. cdef:
  226. object rv
  227. f_map_next f
  228. PyObject *obj
  229. PyObject *pkey
  230. PyObject *pval
  231. Py_ssize_t pos = 0
  232. rv = factory()
  233. f = get_map_iter(d, &obj)
  234. d = <object>obj
  235. Py_DECREF(d)
  236. while f(d, &pos, &pkey, &pval):
  237. if predicate(<object>pval):
  238. rv[<object>pkey] = <object>pval
  239. return rv
  240. cpdef object keyfilter(object predicate, object d, object factory=dict):
  241. """
  242. Filter items in dictionary by key
  243. >>> iseven = lambda x: x % 2 == 0
  244. >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
  245. >>> keyfilter(iseven, d)
  246. {2: 3, 4: 5}
  247. See Also:
  248. valfilter
  249. itemfilter
  250. keymap
  251. """
  252. cdef:
  253. object rv
  254. f_map_next f
  255. PyObject *obj
  256. PyObject *pkey
  257. PyObject *pval
  258. Py_ssize_t pos = 0
  259. rv = factory()
  260. f = get_map_iter(d, &obj)
  261. d = <object>obj
  262. Py_DECREF(d)
  263. while f(d, &pos, &pkey, &pval):
  264. if predicate(<object>pkey):
  265. rv[<object>pkey] = <object>pval
  266. return rv
  267. cpdef object itemfilter(object predicate, object d, object factory=dict):
  268. """
  269. Filter items in dictionary by item
  270. >>> def isvalid(item):
  271. ... k, v = item
  272. ... return k % 2 == 0 and v < 4
  273. >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
  274. >>> itemfilter(isvalid, d)
  275. {2: 3}
  276. See Also:
  277. keyfilter
  278. valfilter
  279. itemmap
  280. """
  281. cdef:
  282. object rv, k, v
  283. f_map_next f
  284. PyObject *obj
  285. PyObject *pkey
  286. PyObject *pval
  287. Py_ssize_t pos = 0
  288. rv = factory()
  289. f = get_map_iter(d, &obj)
  290. d = <object>obj
  291. Py_DECREF(d)
  292. while f(d, &pos, &pkey, &pval):
  293. k = <object>pkey
  294. v = <object>pval
  295. if predicate((k, v)):
  296. rv[k] = v
  297. return rv
  298. cpdef object assoc(object d, object key, object value, object factory=dict):
  299. """
  300. Return a new dict with new key value pair
  301. New dict has d[key] set to value. Does not modify the initial dictionary.
  302. >>> assoc({'x': 1}, 'x', 2)
  303. {'x': 2}
  304. >>> assoc({'x': 1}, 'y', 3) # doctest: +SKIP
  305. {'x': 1, 'y': 3}
  306. """
  307. cdef object rv
  308. rv = factory()
  309. if PyDict_CheckExact(rv):
  310. PyDict_Update(rv, d)
  311. else:
  312. rv.update(d)
  313. rv[key] = value
  314. return rv
  315. cpdef object assoc_in(object d, object keys, object value, object factory=dict):
  316. """
  317. Return a new dict with new, potentially nested, key value pair
  318. >>> purchase = {'name': 'Alice',
  319. ... 'order': {'items': ['Apple', 'Orange'],
  320. ... 'costs': [0.50, 1.25]},
  321. ... 'credit card': '5555-1234-1234-1234'}
  322. >>> assoc_in(purchase, ['order', 'costs'], [0.25, 1.00]) # doctest: +SKIP
  323. {'credit card': '5555-1234-1234-1234',
  324. 'name': 'Alice',
  325. 'order': {'costs': [0.25, 1.00], 'items': ['Apple', 'Orange']}}
  326. """
  327. cdef object prevkey, key
  328. cdef object rv, inner, dtemp
  329. prevkey, keys = keys[0], keys[1:]
  330. rv = factory()
  331. if PyDict_CheckExact(rv):
  332. PyDict_Update(rv, d)
  333. else:
  334. rv.update(d)
  335. inner = rv
  336. for key in keys:
  337. if prevkey in d:
  338. d = d[prevkey]
  339. dtemp = factory()
  340. if PyDict_CheckExact(dtemp):
  341. PyDict_Update(dtemp, d)
  342. else:
  343. dtemp.update(d)
  344. else:
  345. d = factory()
  346. dtemp = d
  347. inner[prevkey] = dtemp
  348. prevkey = key
  349. inner = dtemp
  350. inner[prevkey] = value
  351. return rv
  352. cdef object c_dissoc(object d, object keys, object factory=dict):
  353. # implementation copied from toolz. Not benchmarked.
  354. cdef object rv
  355. rv = factory()
  356. if len(keys) < len(d) * 0.6:
  357. rv.update(d)
  358. for key in keys:
  359. if key in rv:
  360. del rv[key]
  361. else:
  362. remaining = set(d)
  363. remaining.difference_update(keys)
  364. for k in remaining:
  365. rv[k] = d[k]
  366. return rv
  367. def dissoc(d, *keys, **kwargs):
  368. """
  369. Return a new dict with the given key(s) removed.
  370. New dict has d[key] deleted for each supplied key.
  371. Does not modify the initial dictionary.
  372. >>> dissoc({'x': 1, 'y': 2}, 'y')
  373. {'x': 1}
  374. >>> dissoc({'x': 1, 'y': 2}, 'y', 'x')
  375. {}
  376. >>> dissoc({'x': 1}, 'y') # Ignores missing keys
  377. {'x': 1}
  378. """
  379. return c_dissoc(d, keys, get_factory('dissoc', kwargs))
  380. cpdef object update_in(object d, object keys, object func, object default=None, object factory=dict):
  381. """
  382. Update value in a (potentially) nested dictionary
  383. inputs:
  384. d - dictionary on which to operate
  385. keys - list or tuple giving the location of the value to be changed in d
  386. func - function to operate on that value
  387. If keys == [k0,..,kX] and d[k0]..[kX] == v, update_in returns a copy of the
  388. original dictionary with v replaced by func(v), but does not mutate the
  389. original dictionary.
  390. If k0 is not a key in d, update_in creates nested dictionaries to the depth
  391. specified by the keys, with the innermost value set to func(default).
  392. >>> inc = lambda x: x + 1
  393. >>> update_in({'a': 0}, ['a'], inc)
  394. {'a': 1}
  395. >>> transaction = {'name': 'Alice',
  396. ... 'purchase': {'items': ['Apple', 'Orange'],
  397. ... 'costs': [0.50, 1.25]},
  398. ... 'credit card': '5555-1234-1234-1234'}
  399. >>> update_in(transaction, ['purchase', 'costs'], sum) # doctest: +SKIP
  400. {'credit card': '5555-1234-1234-1234',
  401. 'name': 'Alice',
  402. 'purchase': {'costs': 1.75, 'items': ['Apple', 'Orange']}}
  403. >>> # updating a value when k0 is not in d
  404. >>> update_in({}, [1, 2, 3], str, default="bar")
  405. {1: {2: {3: 'bar'}}}
  406. >>> update_in({1: 'foo'}, [2, 3, 4], inc, 0)
  407. {1: 'foo', 2: {3: {4: 1}}}
  408. """
  409. cdef object prevkey, key
  410. cdef object rv, inner, dtemp
  411. prevkey, keys = keys[0], keys[1:]
  412. rv = factory()
  413. if PyDict_CheckExact(rv):
  414. PyDict_Update(rv, d)
  415. else:
  416. rv.update(d)
  417. inner = rv
  418. for key in keys:
  419. if prevkey in d:
  420. d = d[prevkey]
  421. dtemp = factory()
  422. if PyDict_CheckExact(dtemp):
  423. PyDict_Update(dtemp, d)
  424. else:
  425. dtemp.update(d)
  426. else:
  427. d = factory()
  428. dtemp = d
  429. inner[prevkey] = dtemp
  430. prevkey = key
  431. inner = dtemp
  432. if prevkey in d:
  433. key = func(d[prevkey])
  434. else:
  435. key = func(default)
  436. inner[prevkey] = key
  437. return rv
  438. cdef tuple _get_in_exceptions = (KeyError, IndexError, TypeError)
  439. cpdef object get_in(object keys, object coll, object default=None, object no_default=False):
  440. """
  441. Returns coll[i0][i1]...[iX] where [i0, i1, ..., iX]==keys.
  442. If coll[i0][i1]...[iX] cannot be found, returns ``default``, unless
  443. ``no_default`` is specified, then it raises KeyError or IndexError.
  444. ``get_in`` is a generalization of ``operator.getitem`` for nested data
  445. structures such as dictionaries and lists.
  446. >>> transaction = {'name': 'Alice',
  447. ... 'purchase': {'items': ['Apple', 'Orange'],
  448. ... 'costs': [0.50, 1.25]},
  449. ... 'credit card': '5555-1234-1234-1234'}
  450. >>> get_in(['purchase', 'items', 0], transaction)
  451. 'Apple'
  452. >>> get_in(['name'], transaction)
  453. 'Alice'
  454. >>> get_in(['purchase', 'total'], transaction)
  455. >>> get_in(['purchase', 'items', 'apple'], transaction)
  456. >>> get_in(['purchase', 'items', 10], transaction)
  457. >>> get_in(['purchase', 'total'], transaction, 0)
  458. 0
  459. >>> get_in(['y'], {}, no_default=True)
  460. Traceback (most recent call last):
  461. ...
  462. KeyError: 'y'
  463. See Also:
  464. itertoolz.get
  465. operator.getitem
  466. """
  467. cdef object item
  468. try:
  469. for item in keys:
  470. coll = coll[item]
  471. return coll
  472. except _get_in_exceptions:
  473. if no_default:
  474. raise
  475. return default