cache.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. # Copyright 2007 Matt Chaput. All rights reserved.
  2. #
  3. # Redistribution and use in source and binary forms, with or without
  4. # modification, are permitted provided that the following conditions are met:
  5. #
  6. # 1. Redistributions of source code must retain the above copyright notice,
  7. # this list of conditions and the following disclaimer.
  8. #
  9. # 2. Redistributions in binary form must reproduce the above copyright
  10. # notice, this list of conditions and the following disclaimer in the
  11. # documentation and/or other materials provided with the distribution.
  12. #
  13. # THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
  14. # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  15. # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
  16. # EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  17. # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  18. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
  19. # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  20. # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  21. # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  22. # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  23. #
  24. # The views and conclusions contained in the software and documentation are
  25. # those of the authors and should not be interpreted as representing official
  26. # policies, either expressed or implied, of Matt Chaput.
  27. from __future__ import with_statement
  28. import functools, random
  29. from array import array
  30. from heapq import nsmallest
  31. from operator import itemgetter
  32. from threading import Lock
  33. from time import time
  34. from whoosh.compat import iteritems, xrange
  35. try:
  36. from collections import Counter
  37. except ImportError:
  38. class Counter(dict):
  39. def __missing__(self, key):
  40. return 0
  41. def unbound_cache(func):
  42. """Caching decorator with an unbounded cache size.
  43. """
  44. cache = {}
  45. @functools.wraps(func)
  46. def caching_wrapper(*args):
  47. try:
  48. return cache[args]
  49. except KeyError:
  50. result = func(*args)
  51. cache[args] = result
  52. return result
  53. return caching_wrapper
  54. def lru_cache(maxsize=100):
  55. """A simple cache that, when the cache is full, deletes the least recently
  56. used 10% of the cached values.
  57. This function duplicates (more-or-less) the protocol of the
  58. ``functools.lru_cache`` decorator in the Python 3.2 standard library.
  59. Arguments to the cached function must be hashable.
  60. View the cache statistics tuple ``(hits, misses, maxsize, currsize)``
  61. with f.cache_info(). Clear the cache and statistics with f.cache_clear().
  62. Access the underlying function with f.__wrapped__.
  63. """
  64. def decorating_function(user_function):
  65. stats = [0, 0] # Hits, misses
  66. data = {}
  67. lastused = {}
  68. @functools.wraps(user_function)
  69. def wrapper(*args):
  70. try:
  71. result = data[args]
  72. stats[0] += 1 # Hit
  73. except KeyError:
  74. stats[1] += 1 # Miss
  75. if len(data) == maxsize:
  76. for k, _ in nsmallest(maxsize // 10 or 1,
  77. iteritems(lastused),
  78. key=itemgetter(1)):
  79. del data[k]
  80. del lastused[k]
  81. data[args] = user_function(*args)
  82. result = data[args]
  83. finally:
  84. lastused[args] = time()
  85. return result
  86. def cache_info():
  87. return stats[0], stats[1], maxsize, len(data)
  88. def cache_clear():
  89. data.clear()
  90. lastused.clear()
  91. stats[0] = stats[1] = 0
  92. wrapper.cache_info = cache_info
  93. wrapper.cache_clear = cache_clear
  94. return wrapper
  95. return decorating_function
  96. def lfu_cache(maxsize=100):
  97. """A simple cache that, when the cache is full, deletes the least frequently
  98. used 10% of the cached values.
  99. This function duplicates (more-or-less) the protocol of the
  100. ``functools.lru_cache`` decorator in the Python 3.2 standard library.
  101. Arguments to the cached function must be hashable.
  102. View the cache statistics tuple ``(hits, misses, maxsize, currsize)``
  103. with f.cache_info(). Clear the cache and statistics with f.cache_clear().
  104. Access the underlying function with f.__wrapped__.
  105. """
  106. def decorating_function(user_function):
  107. stats = [0, 0] # Hits, misses
  108. data = {}
  109. usecount = Counter()
  110. @functools.wraps(user_function)
  111. def wrapper(*args):
  112. try:
  113. result = data[args]
  114. stats[0] += 1 # Hit
  115. except KeyError:
  116. stats[1] += 1 # Miss
  117. if len(data) == maxsize:
  118. for k, _ in nsmallest(maxsize // 10 or 1,
  119. iteritems(usecount),
  120. key=itemgetter(1)):
  121. del data[k]
  122. del usecount[k]
  123. data[args] = user_function(*args)
  124. result = data[args]
  125. finally:
  126. usecount[args] += 1
  127. return result
  128. def cache_info():
  129. return stats[0], stats[1], maxsize, len(data)
  130. def cache_clear():
  131. data.clear()
  132. usecount.clear()
  133. wrapper.cache_info = cache_info
  134. wrapper.cache_clear = cache_clear
  135. return wrapper
  136. return decorating_function
  137. def random_cache(maxsize=100):
  138. """A very simple cache that, when the cache is filled, deletes 10% of the
  139. cached values AT RANDOM.
  140. This function duplicates (more-or-less) the protocol of the
  141. ``functools.lru_cache`` decorator in the Python 3.2 standard library.
  142. Arguments to the cached function must be hashable.
  143. View the cache statistics tuple ``(hits, misses, maxsize, currsize)``
  144. with f.cache_info(). Clear the cache and statistics with f.cache_clear().
  145. Access the underlying function with f.__wrapped__.
  146. """
  147. def decorating_function(user_function):
  148. stats = [0, 0] # hits, misses
  149. data = {}
  150. @functools.wraps(user_function)
  151. def wrapper(*args):
  152. try:
  153. result = data[args]
  154. stats[0] += 1 # Hit
  155. except KeyError:
  156. stats[1] += 1 # Miss
  157. if len(data) == maxsize:
  158. keys = data.keys()
  159. for i in xrange(maxsize // 10 or 1):
  160. n = random.randint(0, len(keys) - 1)
  161. k = keys.pop(n)
  162. del data[k]
  163. data[args] = user_function(*args)
  164. result = data[args]
  165. return result
  166. def cache_info():
  167. return stats[0], stats[1], maxsize, len(data)
  168. def cache_clear():
  169. data.clear()
  170. wrapper.cache_info = cache_info
  171. wrapper.cache_clear = cache_clear
  172. return wrapper
  173. return decorating_function
  174. def db_lru_cache(maxsize=100):
  175. """Double-barrel least-recently-used cache decorator. This is a simple
  176. LRU algorithm that keeps a primary and secondary dict. Keys are checked
  177. in the primary dict, and then the secondary. Once the primary dict fills
  178. up, the secondary dict is cleared and the two dicts are swapped.
  179. This function duplicates (more-or-less) the protocol of the
  180. ``functools.lru_cache`` decorator in the Python 3.2 standard library.
  181. Arguments to the cached function must be hashable.
  182. View the cache statistics tuple ``(hits, misses, maxsize, currsize)``
  183. with f.cache_info(). Clear the cache and statistics with f.cache_clear().
  184. Access the underlying function with f.__wrapped__.
  185. """
  186. def decorating_function(user_function):
  187. # Cache1, Cache2, Pointer, Hits, Misses
  188. stats = [{}, {}, 0, 0, 0]
  189. @functools.wraps(user_function)
  190. def wrapper(*args):
  191. ptr = stats[2]
  192. a = stats[ptr]
  193. b = stats[not ptr]
  194. key = args
  195. if key in a:
  196. stats[3] += 1 # Hit
  197. return a[key]
  198. elif key in b:
  199. stats[3] += 1 # Hit
  200. return b[key]
  201. else:
  202. stats[4] += 1 # Miss
  203. result = user_function(*args)
  204. a[key] = result
  205. if len(a) >= maxsize:
  206. stats[2] = not ptr
  207. b.clear()
  208. return result
  209. def cache_info():
  210. return stats[3], stats[4], maxsize, len(stats[0]) + len(stats[1])
  211. def cache_clear():
  212. """Clear the cache and cache statistics"""
  213. stats[0].clear()
  214. stats[1].clear()
  215. stats[3] = stats[4] = 0
  216. wrapper.cache_info = cache_info
  217. wrapper.cache_clear = cache_clear
  218. return wrapper
  219. return decorating_function
  220. def clockface_lru_cache(maxsize=100):
  221. """Least-recently-used cache decorator.
  222. This function duplicates (more-or-less) the protocol of the
  223. ``functools.lru_cache`` decorator in the Python 3.2 standard library, but
  224. uses the clock face LRU algorithm instead of an ordered dictionary.
  225. If *maxsize* is set to None, the LRU features are disabled and the cache
  226. can grow without bound.
  227. Arguments to the cached function must be hashable.
  228. View the cache statistics named tuple (hits, misses, maxsize, currsize)
  229. with f.cache_info(). Clear the cache and statistics with f.cache_clear().
  230. Access the underlying function with f.__wrapped__.
  231. """
  232. def decorating_function(user_function):
  233. stats = [0, 0, 0] # hits, misses, hand
  234. data = {}
  235. if maxsize:
  236. # The keys at each point on the clock face
  237. clock_keys = [None] * maxsize
  238. # The "referenced" bits at each point on the clock face
  239. clock_refs = array("B", (0 for _ in xrange(maxsize)))
  240. lock = Lock()
  241. @functools.wraps(user_function)
  242. def wrapper(*args):
  243. key = args
  244. try:
  245. with lock:
  246. pos, result = data[key]
  247. # The key is in the cache. Set the key's reference bit
  248. clock_refs[pos] = 1
  249. # Record a cache hit
  250. stats[0] += 1
  251. except KeyError:
  252. # Compute the value
  253. result = user_function(*args)
  254. with lock:
  255. # Current position of the clock hand
  256. hand = stats[2]
  257. # Remember to stop here after a full revolution
  258. end = hand
  259. # Sweep around the clock looking for a position with
  260. # the reference bit off
  261. while True:
  262. hand = (hand + 1) % maxsize
  263. current_ref = clock_refs[hand]
  264. if current_ref:
  265. # This position's "referenced" bit is set. Turn
  266. # the bit off and move on.
  267. clock_refs[hand] = 0
  268. elif not current_ref or hand == end:
  269. # We've either found a position with the
  270. # "reference" bit off or reached the end of the
  271. # circular cache. So we'll replace this
  272. # position with the new key
  273. current_key = clock_keys[hand]
  274. if current_key in data:
  275. del data[current_key]
  276. clock_keys[hand] = key
  277. clock_refs[hand] = 1
  278. break
  279. # Put the key and result in the cache
  280. data[key] = (hand, result)
  281. # Save the new hand position
  282. stats[2] = hand
  283. # Record a cache miss
  284. stats[1] += 1
  285. return result
  286. else:
  287. @functools.wraps(user_function)
  288. def wrapper(*args):
  289. key = args
  290. try:
  291. result = data[key]
  292. stats[0] += 1
  293. except KeyError:
  294. result = user_function(*args)
  295. data[key] = result
  296. stats[1] += 1
  297. return result
  298. def cache_info():
  299. return stats[0], stats[1], maxsize, len(data)
  300. def cache_clear():
  301. """Clear the cache and cache statistics"""
  302. data.clear()
  303. stats[0] = stats[1] = stats[2] = 0
  304. for i in xrange(maxsize):
  305. clock_keys[i] = None
  306. clock_refs[i] = 0
  307. wrapper.cache_info = cache_info
  308. wrapper.cache_clear = cache_clear
  309. return wrapper
  310. return decorating_function