123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228 |
- # coding=utf-8
- #
- # This file is part of Hypothesis, which may be found at
- # https://github.com/HypothesisWorks/hypothesis-python
- #
- # Most of this work is copyright (C) 2013-2018 David R. MacIver
- # (david@drmaciver.com), but it contains contributions by others. See
- # CONTRIBUTING.rst for a full list of people who may hold copyright, and
- # consult the git log if you need to determine who owns an individual
- # contribution.
- #
- # This Source Code Form is subject to the terms of the Mozilla Public License,
- # v. 2.0. If a copy of the MPL was not distributed with this file, You can
- # obtain one at http://mozilla.org/MPL/2.0/.
- #
- # END HEADER
- from __future__ import division, print_function, absolute_import
- import attr
- @attr.s(slots=True)
- class Entry(object):
- key = attr.ib()
- value = attr.ib()
- score = attr.ib()
- class GenericCache(object):
- """Generic supertype for cache implementations.
- Defines a dict-like mapping with a maximum size, where as well as mapping
- to a value, each key also maps to a score. When a write would cause the
- dict to exceed its maximum size, it first evicts the existing key with
- the smallest score, then adds the new key to the map.
- A key has the following lifecycle:
- 1. key is written for the first time, the key is given the score
- self.new_entry(key, value)
- 2. whenever an existing key is read or written, self.on_access(key, value,
- score) is called. This returns a new score for the key.
- 3. When a key is evicted, self.on_evict(key, value, score) is called.
- The cache will be in a valid state in all of these cases.
- Implementations are expected to implement new_entry and optionally
- on_access and on_evict to implement a specific scoring strategy.
- """
- __slots__ = ('keys_to_indices', 'data', 'max_size')
- def __init__(self, max_size):
- self.max_size = max_size
- # Implementation: We store a binary heap of Entry objects in self.data,
- # with the heap property requiring that a parent's score is <= that of
- # its children. keys_to_index then maps keys to their index in the
- # heap. We keep these two in sync automatically - the heap is never
- # reordered without updating the index.
- self.keys_to_indices = {}
- self.data = []
- def __len__(self):
- assert len(self.keys_to_indices) == len(self.data)
- return len(self.data)
- def __getitem__(self, key):
- i = self.keys_to_indices[key]
- result = self.data[i]
- self.on_access(result.key, result.value, result.score)
- self.__balance(i)
- return result.value
- def __setitem__(self, key, value):
- if self.max_size == 0:
- return
- evicted = None
- try:
- i = self.keys_to_indices[key]
- except KeyError:
- entry = Entry(key, value, self.new_entry(key, value))
- if len(self.data) >= self.max_size:
- evicted = self.data[0]
- del self.keys_to_indices[evicted.key]
- i = 0
- self.data[0] = entry
- else:
- i = len(self.data)
- self.data.append(entry)
- self.keys_to_indices[key] = i
- else:
- entry = self.data[i]
- assert entry.key == key
- entry.value = value
- entry.score = self.on_access(entry.key, entry.value, entry.score)
- self.__balance(i)
- if evicted is not None:
- if self.data[0] is not entry:
- assert evicted.score <= self.data[0].score
- self.on_evict(evicted.key, evicted.value, evicted.score)
- def clear(self):
- del self.data[:]
- self.keys_to_indices.clear()
- def __repr__(self):
- return '{%s}' % (', '.join(
- '%r: %r' % (e.key, e.value) for e in self.data),)
- def new_entry(self, key, value):
- """Called when a key is written that does not currently appear in the
- map.
- Returns the score to associate with the key.
- """
- raise NotImplementedError()
- def on_access(self, key, value, score):
- """Called every time a key that is already in the map is read or
- written.
- Returns the new score for the key.
- """
- return score
- def on_evict(self, key, value, score):
- """Called after a key has been evicted, with the score it had had at
- the point of eviction."""
- pass
- def check_valid(self):
- """Debugging method for use in tests.
- Asserts that all of the cache's invariants hold. When everything
- is working correctly this should be an expensive no-op.
- """
- for i, e in enumerate(self.data):
- assert self.keys_to_indices[e.key] == i
- for j in [i * 2 + 1, i * 2 + 2]:
- if j < len(self.data):
- assert e.score <= self.data[j].score, self.data
- def __swap(self, i, j):
- assert i < j
- assert self.data[j].score < self.data[i].score
- self.data[i], self.data[j] = self.data[j], self.data[i]
- self.keys_to_indices[self.data[i].key] = i
- self.keys_to_indices[self.data[j].key] = j
- def __balance(self, i):
- """When we have made a modification to the heap such that means that
- the heap property has been violated locally around i but previously
- held for all other indexes (and no other values have been modified),
- this fixes the heap so that the heap property holds everywhere."""
- while i > 0:
- parent = (i - 1) // 2
- if self.__out_of_order(parent, i):
- self.__swap(parent, i)
- i = parent
- else:
- break
- while True:
- children = [
- j for j in (2 * i + 1, 2 * i + 2)
- if j < len(self.data)
- ]
- if len(children) == 2:
- children.sort(key=lambda j: self.data[j].score)
- for j in children:
- if self.__out_of_order(i, j):
- self.__swap(i, j)
- i = j
- break
- else:
- break
- def __out_of_order(self, i, j):
- """Returns True if the indices i, j are in the wrong order.
- i must be the parent of j.
- """
- assert i == (j - 1) // 2
- return self.data[j].score < self.data[i].score
- class LRUReusedCache(GenericCache):
- """The only concrete implementation of GenericCache we use outside of tests
- currently.
- Adopts a modified least-frequently used eviction policy: It evicts the key
- that has been used least recently, but it will always preferentially evict
- keys that have only ever been accessed once. Among keys that have been
- accessed more than once, it ignores the number of accesses.
- This retains most of the benefits of an LRU cache, but adds an element of
- scan-resistance to the process: If we end up scanning through a large
- number of keys without reusing them, this does not evict the existing
- entries in preference for the new ones.
- """
- __slots__ = ('__tick',)
- def __init__(self, max_size, ):
- super(LRUReusedCache, self).__init__(max_size)
- self.__tick = 0
- def tick(self):
- self.__tick += 1
- return self.__tick
- def new_entry(self, key, value):
- return [1, self.tick()]
- def on_access(self, key, value, score):
- score[0] = 2
- score[1] = self.tick()
- return score
|