histogram.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. from __future__ import absolute_import
  2. import math
  3. class Histogram(object):
  4. def __init__(self, bin_scheme):
  5. self._hist = [0.0] * bin_scheme.bins
  6. self._count = 0.0
  7. self._bin_scheme = bin_scheme
  8. def record(self, value):
  9. self._hist[self._bin_scheme.to_bin(value)] += 1.0
  10. self._count += 1.0
  11. def value(self, quantile):
  12. if self._count == 0.0:
  13. return float('NaN')
  14. _sum = 0.0
  15. quant = float(quantile)
  16. for i, value in enumerate(self._hist[:-1]):
  17. _sum += value
  18. if _sum / self._count > quant:
  19. return self._bin_scheme.from_bin(i)
  20. return float('inf')
  21. @property
  22. def counts(self):
  23. return self._hist
  24. def clear(self):
  25. for i in range(self._hist):
  26. self._hist[i] = 0.0
  27. self._count = 0
  28. def __str__(self):
  29. values = ['%.10f:%.0f' % (self._bin_scheme.from_bin(i), value) for
  30. i, value in enumerate(self._hist[:-1])]
  31. values.append('%s:%s' % (float('inf'), self._hist[-1]))
  32. return '{%s}' % ','.join(values)
  33. class ConstantBinScheme(object):
  34. def __init__(self, bins, min_val, max_val):
  35. if bins < 2:
  36. raise ValueError('Must have at least 2 bins.')
  37. self._min = float(min_val)
  38. self._max = float(max_val)
  39. self._bins = int(bins)
  40. self._bucket_width = (max_val - min_val) / (bins - 2)
  41. @property
  42. def bins(self):
  43. return self._bins
  44. def from_bin(self, b):
  45. if b == 0:
  46. return float('-inf')
  47. elif b == self._bins - 1:
  48. return float('inf')
  49. else:
  50. return self._min + (b - 1) * self._bucket_width
  51. def to_bin(self, x):
  52. if x < self._min:
  53. return 0
  54. elif x > self._max:
  55. return self._bins - 1
  56. else:
  57. return int(((x - self._min) / self._bucket_width) + 1)
  58. class LinearBinScheme(object):
  59. def __init__(self, num_bins, max_val):
  60. self._bins = num_bins
  61. self._max = max_val
  62. self._scale = max_val / (num_bins * (num_bins - 1) / 2)
  63. @property
  64. def bins(self):
  65. return self._bins
  66. def from_bin(self, b):
  67. if b == self._bins - 1:
  68. return float('inf')
  69. else:
  70. unscaled = (b * (b + 1.0)) / 2.0
  71. return unscaled * self._scale
  72. def to_bin(self, x):
  73. if x < 0.0:
  74. raise ValueError('Values less than 0.0 not accepted.')
  75. elif x > self._max:
  76. return self._bins - 1
  77. else:
  78. scaled = x / self._scale
  79. return int(-0.5 + math.sqrt(2.0 * scaled + 0.25))