metrics.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669
  1. import sys
  2. from threading import Lock
  3. import time
  4. import types
  5. from . import values # retain this import style for testability
  6. from .context_managers import ExceptionCounter, InprogressTracker, Timer
  7. from .metrics_core import (
  8. Metric, METRIC_LABEL_NAME_RE, METRIC_NAME_RE,
  9. RESERVED_METRIC_LABEL_NAME_RE,
  10. )
  11. from .registry import REGISTRY
  12. from .utils import floatToGoString, INF
  13. if sys.version_info > (3,):
  14. unicode = str
  15. create_bound_method = types.MethodType
  16. else:
  17. def create_bound_method(func, obj):
  18. return types.MethodType(func, obj, obj.__class__)
  19. def _build_full_name(metric_type, name, namespace, subsystem, unit):
  20. full_name = ''
  21. if namespace:
  22. full_name += namespace + '_'
  23. if subsystem:
  24. full_name += subsystem + '_'
  25. full_name += name
  26. if metric_type == 'counter' and full_name.endswith('_total'):
  27. full_name = full_name[:-6] # Munge to OpenMetrics.
  28. if unit and not full_name.endswith("_" + unit):
  29. full_name += "_" + unit
  30. if unit and metric_type in ('info', 'stateset'):
  31. raise ValueError('Metric name is of a type that cannot have a unit: ' + full_name)
  32. return full_name
  33. def _validate_labelnames(cls, labelnames):
  34. labelnames = tuple(labelnames)
  35. for l in labelnames:
  36. if not METRIC_LABEL_NAME_RE.match(l):
  37. raise ValueError('Invalid label metric name: ' + l)
  38. if RESERVED_METRIC_LABEL_NAME_RE.match(l):
  39. raise ValueError('Reserved label metric name: ' + l)
  40. if l in cls._reserved_labelnames:
  41. raise ValueError('Reserved label metric name: ' + l)
  42. return labelnames
  43. class MetricWrapperBase(object):
  44. _type = None
  45. _reserved_labelnames = ()
  46. def _is_observable(self):
  47. # Whether this metric is observable, i.e.
  48. # * a metric without label names and values, or
  49. # * the child of a labelled metric.
  50. return not self._labelnames or (self._labelnames and self._labelvalues)
  51. def _raise_if_not_observable(self):
  52. # Functions that mutate the state of the metric, for example incrementing
  53. # a counter, will fail if the metric is not observable, because only if a
  54. # metric is observable will the value be initialized.
  55. if not self._is_observable():
  56. raise ValueError('%s metric is missing label values' % str(self._type))
  57. def _is_parent(self):
  58. return self._labelnames and not self._labelvalues
  59. def _get_metric(self):
  60. return Metric(self._name, self._documentation, self._type, self._unit)
  61. def describe(self):
  62. return [self._get_metric()]
  63. def collect(self):
  64. metric = self._get_metric()
  65. for suffix, labels, value in self._samples():
  66. metric.add_sample(self._name + suffix, labels, value)
  67. return [metric]
  68. def __str__(self):
  69. return "{0}:{1}".format(self._type, self._name)
  70. def __repr__(self):
  71. metric_type = type(self)
  72. return "{0}.{1}({2})".format(metric_type.__module__, metric_type.__name__, self._name)
  73. def __init__(self,
  74. name,
  75. documentation,
  76. labelnames=(),
  77. namespace='',
  78. subsystem='',
  79. unit='',
  80. registry=REGISTRY,
  81. labelvalues=None,
  82. ):
  83. self._name = _build_full_name(self._type, name, namespace, subsystem, unit)
  84. self._labelnames = _validate_labelnames(self, labelnames)
  85. self._labelvalues = tuple(labelvalues or ())
  86. self._kwargs = {}
  87. self._documentation = documentation
  88. self._unit = unit
  89. if not METRIC_NAME_RE.match(self._name):
  90. raise ValueError('Invalid metric name: ' + self._name)
  91. if self._is_parent():
  92. # Prepare the fields needed for child metrics.
  93. self._lock = Lock()
  94. self._metrics = {}
  95. if self._is_observable():
  96. self._metric_init()
  97. if not self._labelvalues:
  98. # Register the multi-wrapper parent metric, or if a label-less metric, the whole shebang.
  99. if registry:
  100. registry.register(self)
  101. def labels(self, *labelvalues, **labelkwargs):
  102. """Return the child for the given labelset.
  103. All metrics can have labels, allowing grouping of related time series.
  104. Taking a counter as an example:
  105. from prometheus_client import Counter
  106. c = Counter('my_requests_total', 'HTTP Failures', ['method', 'endpoint'])
  107. c.labels('get', '/').inc()
  108. c.labels('post', '/submit').inc()
  109. Labels can also be provided as keyword arguments:
  110. from prometheus_client import Counter
  111. c = Counter('my_requests_total', 'HTTP Failures', ['method', 'endpoint'])
  112. c.labels(method='get', endpoint='/').inc()
  113. c.labels(method='post', endpoint='/submit').inc()
  114. See the best practices on [naming](http://prometheus.io/docs/practices/naming/)
  115. and [labels](http://prometheus.io/docs/practices/instrumentation/#use-labels).
  116. """
  117. if not self._labelnames:
  118. raise ValueError('No label names were set when constructing %s' % self)
  119. if self._labelvalues:
  120. raise ValueError('%s already has labels set (%s); can not chain calls to .labels()' % (
  121. self,
  122. dict(zip(self._labelnames, self._labelvalues))
  123. ))
  124. if labelvalues and labelkwargs:
  125. raise ValueError("Can't pass both *args and **kwargs")
  126. if labelkwargs:
  127. if sorted(labelkwargs) != sorted(self._labelnames):
  128. raise ValueError('Incorrect label names')
  129. labelvalues = tuple(unicode(labelkwargs[l]) for l in self._labelnames)
  130. else:
  131. if len(labelvalues) != len(self._labelnames):
  132. raise ValueError('Incorrect label count')
  133. labelvalues = tuple(unicode(l) for l in labelvalues)
  134. with self._lock:
  135. if labelvalues not in self._metrics:
  136. self._metrics[labelvalues] = self.__class__(
  137. self._name,
  138. documentation=self._documentation,
  139. labelnames=self._labelnames,
  140. unit=self._unit,
  141. labelvalues=labelvalues,
  142. **self._kwargs
  143. )
  144. return self._metrics[labelvalues]
  145. def remove(self, *labelvalues):
  146. if not self._labelnames:
  147. raise ValueError('No label names were set when constructing %s' % self)
  148. """Remove the given labelset from the metric."""
  149. if len(labelvalues) != len(self._labelnames):
  150. raise ValueError('Incorrect label count (expected %d, got %s)' % (len(self._labelnames), labelvalues))
  151. labelvalues = tuple(unicode(l) for l in labelvalues)
  152. with self._lock:
  153. del self._metrics[labelvalues]
  154. def _samples(self):
  155. if self._is_parent():
  156. return self._multi_samples()
  157. else:
  158. return self._child_samples()
  159. def _multi_samples(self):
  160. with self._lock:
  161. metrics = self._metrics.copy()
  162. for labels, metric in metrics.items():
  163. series_labels = list(zip(self._labelnames, labels))
  164. for suffix, sample_labels, value in metric._samples():
  165. yield (suffix, dict(series_labels + list(sample_labels.items())), value)
  166. def _child_samples(self): # pragma: no cover
  167. raise NotImplementedError('_child_samples() must be implemented by %r' % self)
  168. def _metric_init(self): # pragma: no cover
  169. """
  170. Initialize the metric object as a child, i.e. when it has labels (if any) set.
  171. This is factored as a separate function to allow for deferred initialization.
  172. """
  173. raise NotImplementedError('_metric_init() must be implemented by %r' % self)
  174. class Counter(MetricWrapperBase):
  175. """A Counter tracks counts of events or running totals.
  176. Example use cases for Counters:
  177. - Number of requests processed
  178. - Number of items that were inserted into a queue
  179. - Total amount of data that a system has processed
  180. Counters can only go up (and be reset when the process restarts). If your use case can go down,
  181. you should use a Gauge instead.
  182. An example for a Counter:
  183. from prometheus_client import Counter
  184. c = Counter('my_failures_total', 'Description of counter')
  185. c.inc() # Increment by 1
  186. c.inc(1.6) # Increment by given value
  187. There are utilities to count exceptions raised:
  188. @c.count_exceptions()
  189. def f():
  190. pass
  191. with c.count_exceptions():
  192. pass
  193. # Count only one type of exception
  194. with c.count_exceptions(ValueError):
  195. pass
  196. """
  197. _type = 'counter'
  198. def _metric_init(self):
  199. self._value = values.ValueClass(self._type, self._name, self._name + '_total', self._labelnames,
  200. self._labelvalues)
  201. self._created = time.time()
  202. def inc(self, amount=1):
  203. """Increment counter by the given amount."""
  204. if amount < 0:
  205. raise ValueError('Counters can only be incremented by non-negative amounts.')
  206. self._value.inc(amount)
  207. def count_exceptions(self, exception=Exception):
  208. """Count exceptions in a block of code or function.
  209. Can be used as a function decorator or context manager.
  210. Increments the counter when an exception of the given
  211. type is raised up out of the code.
  212. """
  213. self._raise_if_not_observable()
  214. return ExceptionCounter(self, exception)
  215. def _child_samples(self):
  216. return (
  217. ('_total', {}, self._value.get()),
  218. ('_created', {}, self._created),
  219. )
  220. class Gauge(MetricWrapperBase):
  221. """Gauge metric, to report instantaneous values.
  222. Examples of Gauges include:
  223. - Inprogress requests
  224. - Number of items in a queue
  225. - Free memory
  226. - Total memory
  227. - Temperature
  228. Gauges can go both up and down.
  229. from prometheus_client import Gauge
  230. g = Gauge('my_inprogress_requests', 'Description of gauge')
  231. g.inc() # Increment by 1
  232. g.dec(10) # Decrement by given value
  233. g.set(4.2) # Set to a given value
  234. There are utilities for common use cases:
  235. g.set_to_current_time() # Set to current unixtime
  236. # Increment when entered, decrement when exited.
  237. @g.track_inprogress()
  238. def f():
  239. pass
  240. with g.track_inprogress():
  241. pass
  242. A Gauge can also take its value from a callback:
  243. d = Gauge('data_objects', 'Number of objects')
  244. my_dict = {}
  245. d.set_function(lambda: len(my_dict))
  246. """
  247. _type = 'gauge'
  248. _MULTIPROC_MODES = frozenset(('min', 'max', 'livesum', 'liveall', 'all'))
  249. def __init__(self,
  250. name,
  251. documentation,
  252. labelnames=(),
  253. namespace='',
  254. subsystem='',
  255. unit='',
  256. registry=REGISTRY,
  257. labelvalues=None,
  258. multiprocess_mode='all',
  259. ):
  260. self._multiprocess_mode = multiprocess_mode
  261. if multiprocess_mode not in self._MULTIPROC_MODES:
  262. raise ValueError('Invalid multiprocess mode: ' + multiprocess_mode)
  263. super(Gauge, self).__init__(
  264. name=name,
  265. documentation=documentation,
  266. labelnames=labelnames,
  267. namespace=namespace,
  268. subsystem=subsystem,
  269. unit=unit,
  270. registry=registry,
  271. labelvalues=labelvalues,
  272. )
  273. self._kwargs['multiprocess_mode'] = self._multiprocess_mode
  274. def _metric_init(self):
  275. self._value = values.ValueClass(
  276. self._type, self._name, self._name, self._labelnames, self._labelvalues,
  277. multiprocess_mode=self._multiprocess_mode
  278. )
  279. def inc(self, amount=1):
  280. """Increment gauge by the given amount."""
  281. self._value.inc(amount)
  282. def dec(self, amount=1):
  283. """Decrement gauge by the given amount."""
  284. self._value.inc(-amount)
  285. def set(self, value):
  286. """Set gauge to the given value."""
  287. self._value.set(float(value))
  288. def set_to_current_time(self):
  289. """Set gauge to the current unixtime."""
  290. self.set(time.time())
  291. def track_inprogress(self):
  292. """Track inprogress blocks of code or functions.
  293. Can be used as a function decorator or context manager.
  294. Increments the gauge when the code is entered,
  295. and decrements when it is exited.
  296. """
  297. self._raise_if_not_observable()
  298. return InprogressTracker(self)
  299. def time(self):
  300. """Time a block of code or function, and set the duration in seconds.
  301. Can be used as a function decorator or context manager.
  302. """
  303. self._raise_if_not_observable()
  304. return Timer(self.set)
  305. def set_function(self, f):
  306. """Call the provided function to return the Gauge value.
  307. The function must return a float, and may be called from
  308. multiple threads. All other methods of the Gauge become NOOPs.
  309. """
  310. def samples(self):
  311. return (('', {}, float(f())),)
  312. self._child_samples = create_bound_method(samples, self)
  313. def _child_samples(self):
  314. return (('', {}, self._value.get()),)
  315. class Summary(MetricWrapperBase):
  316. """A Summary tracks the size and number of events.
  317. Example use cases for Summaries:
  318. - Response latency
  319. - Request size
  320. Example for a Summary:
  321. from prometheus_client import Summary
  322. s = Summary('request_size_bytes', 'Request size (bytes)')
  323. s.observe(512) # Observe 512 (bytes)
  324. Example for a Summary using time:
  325. from prometheus_client import Summary
  326. REQUEST_TIME = Summary('response_latency_seconds', 'Response latency (seconds)')
  327. @REQUEST_TIME.time()
  328. def create_response(request):
  329. '''A dummy function'''
  330. time.sleep(1)
  331. Example for using the same Summary object as a context manager:
  332. with REQUEST_TIME.time():
  333. pass # Logic to be timed
  334. """
  335. _type = 'summary'
  336. _reserved_labelnames = ['quantile']
  337. def _metric_init(self):
  338. self._count = values.ValueClass(self._type, self._name, self._name + '_count', self._labelnames,
  339. self._labelvalues)
  340. self._sum = values.ValueClass(self._type, self._name, self._name + '_sum', self._labelnames, self._labelvalues)
  341. self._created = time.time()
  342. def observe(self, amount):
  343. """Observe the given amount."""
  344. self._count.inc(1)
  345. self._sum.inc(amount)
  346. def time(self):
  347. """Time a block of code or function, and observe the duration in seconds.
  348. Can be used as a function decorator or context manager.
  349. """
  350. self._raise_if_not_observable()
  351. return Timer(self.observe)
  352. def _child_samples(self):
  353. return (
  354. ('_count', {}, self._count.get()),
  355. ('_sum', {}, self._sum.get()),
  356. ('_created', {}, self._created))
  357. class Histogram(MetricWrapperBase):
  358. """A Histogram tracks the size and number of events in buckets.
  359. You can use Histograms for aggregatable calculation of quantiles.
  360. Example use cases:
  361. - Response latency
  362. - Request size
  363. Example for a Histogram:
  364. from prometheus_client import Histogram
  365. h = Histogram('request_size_bytes', 'Request size (bytes)')
  366. h.observe(512) # Observe 512 (bytes)
  367. Example for a Histogram using time:
  368. from prometheus_client import Histogram
  369. REQUEST_TIME = Histogram('response_latency_seconds', 'Response latency (seconds)')
  370. @REQUEST_TIME.time()
  371. def create_response(request):
  372. '''A dummy function'''
  373. time.sleep(1)
  374. Example of using the same Histogram object as a context manager:
  375. with REQUEST_TIME.time():
  376. pass # Logic to be timed
  377. The default buckets are intended to cover a typical web/rpc request from milliseconds to seconds.
  378. They can be overridden by passing `buckets` keyword argument to `Histogram`.
  379. """
  380. _type = 'histogram'
  381. _reserved_labelnames = ['le']
  382. DEFAULT_BUCKETS = (.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0, 7.5, 10.0, INF)
  383. def __init__(self,
  384. name,
  385. documentation,
  386. labelnames=(),
  387. namespace='',
  388. subsystem='',
  389. unit='',
  390. registry=REGISTRY,
  391. labelvalues=None,
  392. buckets=DEFAULT_BUCKETS,
  393. ):
  394. self._prepare_buckets(buckets)
  395. super(Histogram, self).__init__(
  396. name=name,
  397. documentation=documentation,
  398. labelnames=labelnames,
  399. namespace=namespace,
  400. subsystem=subsystem,
  401. unit=unit,
  402. registry=registry,
  403. labelvalues=labelvalues,
  404. )
  405. self._kwargs['buckets'] = buckets
  406. def _prepare_buckets(self, buckets):
  407. buckets = [float(b) for b in buckets]
  408. if buckets != sorted(buckets):
  409. # This is probably an error on the part of the user,
  410. # so raise rather than sorting for them.
  411. raise ValueError('Buckets not in sorted order')
  412. if buckets and buckets[-1] != INF:
  413. buckets.append(INF)
  414. if len(buckets) < 2:
  415. raise ValueError('Must have at least two buckets')
  416. self._upper_bounds = buckets
  417. def _metric_init(self):
  418. self._buckets = []
  419. self._created = time.time()
  420. bucket_labelnames = self._labelnames + ('le',)
  421. self._sum = values.ValueClass(self._type, self._name, self._name + '_sum', self._labelnames, self._labelvalues)
  422. for b in self._upper_bounds:
  423. self._buckets.append(values.ValueClass(
  424. self._type,
  425. self._name,
  426. self._name + '_bucket',
  427. bucket_labelnames,
  428. self._labelvalues + (floatToGoString(b),))
  429. )
  430. def observe(self, amount):
  431. """Observe the given amount."""
  432. self._sum.inc(amount)
  433. for i, bound in enumerate(self._upper_bounds):
  434. if amount <= bound:
  435. self._buckets[i].inc(1)
  436. break
  437. def time(self):
  438. """Time a block of code or function, and observe the duration in seconds.
  439. Can be used as a function decorator or context manager.
  440. """
  441. return Timer(self.observe)
  442. def _child_samples(self):
  443. samples = []
  444. acc = 0
  445. for i, bound in enumerate(self._upper_bounds):
  446. acc += self._buckets[i].get()
  447. samples.append(('_bucket', {'le': floatToGoString(bound)}, acc))
  448. samples.append(('_count', {}, acc))
  449. if self._upper_bounds[0] >= 0:
  450. samples.append(('_sum', {}, self._sum.get()))
  451. samples.append(('_created', {}, self._created))
  452. return tuple(samples)
  453. class Info(MetricWrapperBase):
  454. """Info metric, key-value pairs.
  455. Examples of Info include:
  456. - Build information
  457. - Version information
  458. - Potential target metadata
  459. Example usage:
  460. from prometheus_client import Info
  461. i = Info('my_build', 'Description of info')
  462. i.info({'version': '1.2.3', 'buildhost': 'foo@bar'})
  463. Info metrics do not work in multiprocess mode.
  464. """
  465. _type = 'info'
  466. def _metric_init(self):
  467. self._labelname_set = set(self._labelnames)
  468. self._lock = Lock()
  469. self._value = {}
  470. def info(self, val):
  471. """Set info metric."""
  472. if self._labelname_set.intersection(val.keys()):
  473. raise ValueError('Overlapping labels for Info metric, metric: %s child: %s' % (
  474. self._labelnames, val))
  475. with self._lock:
  476. self._value = dict(val)
  477. def _child_samples(self):
  478. with self._lock:
  479. return (('_info', self._value, 1.0,),)
  480. class Enum(MetricWrapperBase):
  481. """Enum metric, which of a set of states is true.
  482. Example usage:
  483. from prometheus_client import Enum
  484. e = Enum('task_state', 'Description of enum',
  485. states=['starting', 'running', 'stopped'])
  486. e.state('running')
  487. The first listed state will be the default.
  488. Enum metrics do not work in multiprocess mode.
  489. """
  490. _type = 'stateset'
  491. def __init__(self,
  492. name,
  493. documentation,
  494. labelnames=(),
  495. namespace='',
  496. subsystem='',
  497. unit='',
  498. registry=REGISTRY,
  499. labelvalues=None,
  500. states=None,
  501. ):
  502. super(Enum, self).__init__(
  503. name=name,
  504. documentation=documentation,
  505. labelnames=labelnames,
  506. namespace=namespace,
  507. subsystem=subsystem,
  508. unit=unit,
  509. registry=registry,
  510. labelvalues=labelvalues,
  511. )
  512. if name in labelnames:
  513. raise ValueError('Overlapping labels for Enum metric: %s' % (name,))
  514. if not states:
  515. raise ValueError('No states provided for Enum metric: %s' % (name,))
  516. self._kwargs['states'] = self._states = states
  517. def _metric_init(self):
  518. self._value = 0
  519. self._lock = Lock()
  520. def state(self, state):
  521. """Set enum metric state."""
  522. self._raise_if_not_observable()
  523. with self._lock:
  524. self._value = self._states.index(state)
  525. def _child_samples(self):
  526. with self._lock:
  527. return [
  528. ('', {self._name: s}, 1 if i == self._value else 0,)
  529. for i, s
  530. in enumerate(self._states)
  531. ]