metrics.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. from __future__ import absolute_import
  2. import logging
  3. import sys
  4. import time
  5. import threading
  6. from kafka.metrics import AnonMeasurable, KafkaMetric, MetricConfig, MetricName
  7. from kafka.metrics.stats import Sensor
  8. logger = logging.getLogger(__name__)
  9. class Metrics(object):
  10. """
  11. A registry of sensors and metrics.
  12. A metric is a named, numerical measurement. A sensor is a handle to
  13. record numerical measurements as they occur. Each Sensor has zero or
  14. more associated metrics. For example a Sensor might represent message
  15. sizes and we might associate with this sensor a metric for the average,
  16. maximum, or other statistics computed off the sequence of message sizes
  17. that are recorded by the sensor.
  18. Usage looks something like this:
  19. # set up metrics:
  20. metrics = Metrics() # the global repository of metrics and sensors
  21. sensor = metrics.sensor('message-sizes')
  22. metric_name = MetricName('message-size-avg', 'producer-metrics')
  23. sensor.add(metric_name, Avg())
  24. metric_name = MetricName('message-size-max', 'producer-metrics')
  25. sensor.add(metric_name, Max())
  26. # as messages are sent we record the sizes
  27. sensor.record(message_size);
  28. """
  29. def __init__(self, default_config=None, reporters=None,
  30. enable_expiration=False):
  31. """
  32. Create a metrics repository with a default config, given metric
  33. reporters and the ability to expire eligible sensors
  34. Arguments:
  35. default_config (MetricConfig, optional): The default config
  36. reporters (list of AbstractMetricsReporter, optional):
  37. The metrics reporters
  38. enable_expiration (bool, optional): true if the metrics instance
  39. can garbage collect inactive sensors, false otherwise
  40. """
  41. self._lock = threading.RLock()
  42. self._config = default_config or MetricConfig()
  43. self._sensors = {}
  44. self._metrics = {}
  45. self._children_sensors = {}
  46. self._reporters = reporters or []
  47. for reporter in self._reporters:
  48. reporter.init([])
  49. if enable_expiration:
  50. def expire_loop():
  51. while True:
  52. # delay 30 seconds
  53. time.sleep(30)
  54. self.ExpireSensorTask.run(self)
  55. metrics_scheduler = threading.Thread(target=expire_loop)
  56. # Creating a daemon thread to not block shutdown
  57. metrics_scheduler.daemon = True
  58. metrics_scheduler.start()
  59. self.add_metric(self.metric_name('count', 'kafka-metrics-count',
  60. 'total number of registered metrics'),
  61. AnonMeasurable(lambda config, now: len(self._metrics)))
  62. @property
  63. def config(self):
  64. return self._config
  65. @property
  66. def metrics(self):
  67. """
  68. Get all the metrics currently maintained and indexed by metricName
  69. """
  70. return self._metrics
  71. def metric_name(self, name, group, description='', tags=None):
  72. """
  73. Create a MetricName with the given name, group, description and tags,
  74. plus default tags specified in the metric configuration.
  75. Tag in tags takes precedence if the same tag key is specified in
  76. the default metric configuration.
  77. Arguments:
  78. name (str): The name of the metric
  79. group (str): logical group name of the metrics to which this
  80. metric belongs
  81. description (str, optional): A human-readable description to
  82. include in the metric
  83. tags (dict, optionals): additional key/value attributes of
  84. the metric
  85. """
  86. combined_tags = dict(self.config.tags)
  87. combined_tags.update(tags or {})
  88. return MetricName(name, group, description, combined_tags)
  89. def get_sensor(self, name):
  90. """
  91. Get the sensor with the given name if it exists
  92. Arguments:
  93. name (str): The name of the sensor
  94. Returns:
  95. Sensor: The sensor or None if no such sensor exists
  96. """
  97. if not name:
  98. raise ValueError('name must be non-empty')
  99. return self._sensors.get(name, None)
  100. def sensor(self, name, config=None,
  101. inactive_sensor_expiration_time_seconds=sys.maxsize,
  102. parents=None):
  103. """
  104. Get or create a sensor with the given unique name and zero or
  105. more parent sensors. All parent sensors will receive every value
  106. recorded with this sensor.
  107. Arguments:
  108. name (str): The name of the sensor
  109. config (MetricConfig, optional): A default configuration to use
  110. for this sensor for metrics that don't have their own config
  111. inactive_sensor_expiration_time_seconds (int, optional):
  112. If no value if recorded on the Sensor for this duration of
  113. time, it is eligible for removal
  114. parents (list of Sensor): The parent sensors
  115. Returns:
  116. Sensor: The sensor that is created
  117. """
  118. sensor = self.get_sensor(name)
  119. if sensor:
  120. return sensor
  121. with self._lock:
  122. sensor = self.get_sensor(name)
  123. if not sensor:
  124. sensor = Sensor(self, name, parents, config or self.config,
  125. inactive_sensor_expiration_time_seconds)
  126. self._sensors[name] = sensor
  127. if parents:
  128. for parent in parents:
  129. children = self._children_sensors.get(parent)
  130. if not children:
  131. children = []
  132. self._children_sensors[parent] = children
  133. children.append(sensor)
  134. logger.debug('Added sensor with name %s', name)
  135. return sensor
  136. def remove_sensor(self, name):
  137. """
  138. Remove a sensor (if it exists), associated metrics and its children.
  139. Arguments:
  140. name (str): The name of the sensor to be removed
  141. """
  142. sensor = self._sensors.get(name)
  143. if sensor:
  144. child_sensors = None
  145. with sensor._lock:
  146. with self._lock:
  147. val = self._sensors.pop(name, None)
  148. if val and val == sensor:
  149. for metric in sensor.metrics:
  150. self.remove_metric(metric.metric_name)
  151. logger.debug('Removed sensor with name %s', name)
  152. child_sensors = self._children_sensors.pop(sensor, None)
  153. if child_sensors:
  154. for child_sensor in child_sensors:
  155. self.remove_sensor(child_sensor.name)
  156. def add_metric(self, metric_name, measurable, config=None):
  157. """
  158. Add a metric to monitor an object that implements measurable.
  159. This metric won't be associated with any sensor.
  160. This is a way to expose existing values as metrics.
  161. Arguments:
  162. metricName (MetricName): The name of the metric
  163. measurable (AbstractMeasurable): The measurable that will be
  164. measured by this metric
  165. config (MetricConfig, optional): The configuration to use when
  166. measuring this measurable
  167. """
  168. # NOTE there was a lock here, but i don't think it's needed
  169. metric = KafkaMetric(metric_name, measurable, config or self.config)
  170. self.register_metric(metric)
  171. def remove_metric(self, metric_name):
  172. """
  173. Remove a metric if it exists and return it. Return None otherwise.
  174. If a metric is removed, `metric_removal` will be invoked
  175. for each reporter.
  176. Arguments:
  177. metric_name (MetricName): The name of the metric
  178. Returns:
  179. KafkaMetric: the removed `KafkaMetric` or None if no such
  180. metric exists
  181. """
  182. with self._lock:
  183. metric = self._metrics.pop(metric_name, None)
  184. if metric:
  185. for reporter in self._reporters:
  186. reporter.metric_removal(metric)
  187. return metric
  188. def add_reporter(self, reporter):
  189. """Add a MetricReporter"""
  190. with self._lock:
  191. reporter.init(list(self.metrics.values()))
  192. self._reporters.append(reporter)
  193. def register_metric(self, metric):
  194. with self._lock:
  195. if metric.metric_name in self.metrics:
  196. raise ValueError('A metric named "%s" already exists, cannot'
  197. ' register another one.' % metric.metric_name)
  198. self.metrics[metric.metric_name] = metric
  199. for reporter in self._reporters:
  200. reporter.metric_change(metric)
  201. class ExpireSensorTask(object):
  202. """
  203. This iterates over every Sensor and triggers a remove_sensor
  204. if it has expired. Package private for testing
  205. """
  206. @staticmethod
  207. def run(metrics):
  208. items = list(metrics._sensors.items())
  209. for name, sensor in items:
  210. # remove_sensor also locks the sensor object. This is fine
  211. # because synchronized is reentrant. There is however a minor
  212. # race condition here. Assume we have a parent sensor P and
  213. # child sensor C. Calling record on C would cause a record on
  214. # P as well. So expiration time for P == expiration time for C.
  215. # If the record on P happens via C just after P is removed,
  216. # that will cause C to also get removed. Since the expiration
  217. # time is typically high it is not expected to be a significant
  218. # concern and thus not necessary to optimize
  219. with sensor._lock:
  220. if sensor.has_expired():
  221. logger.debug('Removing expired sensor %s', name)
  222. metrics.remove_sensor(name)
  223. def close(self):
  224. """Close this metrics repository."""
  225. for reporter in self._reporters:
  226. reporter.close()