123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144 |
- import json
- import mmap
- import os
- import struct
- _INITIAL_MMAP_SIZE = 1 << 20
- _pack_integer_func = struct.Struct(b'i').pack
- _pack_double_func = struct.Struct(b'd').pack
- _unpack_integer = struct.Struct(b'i').unpack_from
- _unpack_double = struct.Struct(b'd').unpack_from
- # struct.pack_into has atomicity issues because it will temporarily write 0 into
- # the mmap, resulting in false reads to 0 when experiencing a lot of writes.
- # Using direct assignment solves this issue.
- def _pack_double(data, pos, value):
- data[pos:pos + 8] = _pack_double_func(value)
- def _pack_integer(data, pos, value):
- data[pos:pos + 4] = _pack_integer_func(value)
- def _read_all_values(data, used=0):
- """Yield (key, value, pos). No locking is performed."""
- if used <= 0:
- # If not valid `used` value is passed in, read it from the file.
- used = _unpack_integer(data, 0)[0]
- pos = 8
- while pos < used:
- encoded_len = _unpack_integer(data, pos)[0]
- # check we are not reading beyond bounds
- if encoded_len + pos > used:
- raise RuntimeError('Read beyond file size detected, file is corrupted.')
- pos += 4
- encoded_key = data[pos : pos + encoded_len]
- padded_len = encoded_len + (8 - (encoded_len + 4) % 8)
- pos += padded_len
- value = _unpack_double(data, pos)[0]
- yield encoded_key.decode('utf-8'), value, pos
- pos += 8
- class MmapedDict(object):
- """A dict of doubles, backed by an mmapped file.
- The file starts with a 4 byte int, indicating how much of it is used.
- Then 4 bytes of padding.
- There's then a number of entries, consisting of a 4 byte int which is the
- size of the next field, a utf-8 encoded string key, padding to a 8 byte
- alignment, and then a 8 byte float which is the value.
- Not thread safe.
- """
- def __init__(self, filename, read_mode=False):
- self._f = open(filename, 'rb' if read_mode else 'a+b')
- self._fname = filename
- capacity = os.fstat(self._f.fileno()).st_size
- if capacity == 0:
- self._f.truncate(_INITIAL_MMAP_SIZE)
- capacity = _INITIAL_MMAP_SIZE
- self._capacity = capacity
- self._m = mmap.mmap(self._f.fileno(), self._capacity,
- access=mmap.ACCESS_READ if read_mode else mmap.ACCESS_WRITE)
- self._positions = {}
- self._used = _unpack_integer(self._m, 0)[0]
- if self._used == 0:
- self._used = 8
- _pack_integer(self._m, 0, self._used)
- else:
- if not read_mode:
- for key, _, pos in self._read_all_values():
- self._positions[key] = pos
- @staticmethod
- def read_all_values_from_file(filename):
- with open(filename, 'rb') as infp:
- # Read the first block of data, including the first 4 bytes which tell us
- # how much of the file (which is preallocated to _INITIAL_MMAP_SIZE bytes) is occupied.
- data = infp.read(mmap.PAGESIZE)
- used = _unpack_integer(data, 0)[0]
- if used > len(data): # Then read in the rest, if needed.
- data += infp.read(used - len(data))
- return _read_all_values(data, used)
- def _init_value(self, key):
- """Initialize a value. Lock must be held by caller."""
- encoded = key.encode('utf-8')
- # Pad to be 8-byte aligned.
- padded = encoded + (b' ' * (8 - (len(encoded) + 4) % 8))
- value = struct.pack('i{0}sd'.format(len(padded)).encode(), len(encoded), padded, 0.0)
- while self._used + len(value) > self._capacity:
- self._capacity *= 2
- self._f.truncate(self._capacity)
- self._m = mmap.mmap(self._f.fileno(), self._capacity)
- self._m[self._used:self._used + len(value)] = value
- # Update how much space we've used.
- self._used += len(value)
- _pack_integer(self._m, 0, self._used)
- self._positions[key] = self._used - 8
- def _read_all_values(self):
- """Yield (key, value, pos). No locking is performed."""
- return _read_all_values(data=self._m, used=self._used)
- def read_all_values(self):
- """Yield (key, value). No locking is performed."""
- for k, v, _ in self._read_all_values():
- yield k, v
- def read_value(self, key):
- if key not in self._positions:
- self._init_value(key)
- pos = self._positions[key]
- # We assume that reading from an 8 byte aligned value is atomic
- return _unpack_double(self._m, pos)[0]
- def write_value(self, key, value):
- if key not in self._positions:
- self._init_value(key)
- pos = self._positions[key]
- # We assume that writing to an 8 byte aligned value is atomic
- _pack_double(self._m, pos, value)
- def close(self):
- if self._f:
- self._m.close()
- self._m = None
- self._f.close()
- self._f = None
- def mmap_key(metric_name, name, labelnames, labelvalues):
- """Format a key for use in the mmap file."""
- # ensure labels are in consistent order for identity
- labels = dict(zip(labelnames, labelvalues))
- return json.dumps([metric_name, name, labels], sort_keys=True)
|