parser.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. from __future__ import unicode_literals
  2. import re
  3. from .metrics_core import Metric
  4. from .samples import Sample
  5. try:
  6. import StringIO
  7. except ImportError:
  8. # Python 3
  9. import io as StringIO
  10. def text_string_to_metric_families(text):
  11. """Parse Prometheus text format from a unicode string.
  12. See text_fd_to_metric_families.
  13. """
  14. for metric_family in text_fd_to_metric_families(StringIO.StringIO(text)):
  15. yield metric_family
  16. ESCAPE_SEQUENCES = {
  17. '\\\\': '\\',
  18. '\\n': '\n',
  19. '\\"': '"',
  20. }
  21. def replace_escape_sequence(match):
  22. return ESCAPE_SEQUENCES[match.group(0)]
  23. HELP_ESCAPING_RE = re.compile(r'\\[\\n]')
  24. ESCAPING_RE = re.compile(r'\\[\\n"]')
  25. def _replace_help_escaping(s):
  26. return HELP_ESCAPING_RE.sub(replace_escape_sequence, s)
  27. def _replace_escaping(s):
  28. return ESCAPING_RE.sub(replace_escape_sequence, s)
  29. def _is_character_escaped(s, charpos):
  30. num_bslashes = 0
  31. while (charpos > num_bslashes and
  32. s[charpos - 1 - num_bslashes] == '\\'):
  33. num_bslashes += 1
  34. return num_bslashes % 2 == 1
  35. def _parse_labels(labels_string):
  36. labels = {}
  37. # Return if we don't have valid labels
  38. if "=" not in labels_string:
  39. return labels
  40. escaping = False
  41. if "\\" in labels_string:
  42. escaping = True
  43. # Copy original labels
  44. sub_labels = labels_string
  45. try:
  46. # Process one label at a time
  47. while sub_labels:
  48. # The label name is before the equal
  49. value_start = sub_labels.index("=")
  50. label_name = sub_labels[:value_start]
  51. sub_labels = sub_labels[value_start + 1:].lstrip()
  52. # Find the first quote after the equal
  53. quote_start = sub_labels.index('"') + 1
  54. value_substr = sub_labels[quote_start:]
  55. # Find the last unescaped quote
  56. i = 0
  57. while i < len(value_substr):
  58. i = value_substr.index('"', i)
  59. if not _is_character_escaped(value_substr, i):
  60. break
  61. i += 1
  62. # The label value is between the first and last quote
  63. quote_end = i + 1
  64. label_value = sub_labels[quote_start:quote_end]
  65. # Replace escaping if needed
  66. if escaping:
  67. label_value = _replace_escaping(label_value)
  68. labels[label_name.strip()] = label_value
  69. # Remove the processed label from the sub-slice for next iteration
  70. sub_labels = sub_labels[quote_end + 1:]
  71. next_comma = sub_labels.find(",") + 1
  72. sub_labels = sub_labels[next_comma:].lstrip()
  73. return labels
  74. except ValueError:
  75. raise ValueError("Invalid labels: %s" % labels_string)
  76. # If we have multiple values only consider the first
  77. def _parse_value_and_timestamp(s):
  78. s = s.lstrip()
  79. separator = " "
  80. if separator not in s:
  81. separator = "\t"
  82. values = [value.strip() for value in s.split(separator) if value.strip()]
  83. if not values:
  84. return float(s), None
  85. value = float(values[0])
  86. timestamp = (float(values[-1])/1000) if len(values) > 1 else None
  87. return value, timestamp
  88. def _parse_sample(text):
  89. # Detect the labels in the text
  90. try:
  91. label_start, label_end = text.index("{"), text.rindex("}")
  92. # The name is before the labels
  93. name = text[:label_start].strip()
  94. # We ignore the starting curly brace
  95. label = text[label_start + 1:label_end]
  96. # The value is after the label end (ignoring curly brace and space)
  97. value, timestamp = _parse_value_and_timestamp(text[label_end + 2:])
  98. return Sample(name, _parse_labels(label), value, timestamp)
  99. # We don't have labels
  100. except ValueError:
  101. # Detect what separator is used
  102. separator = " "
  103. if separator not in text:
  104. separator = "\t"
  105. name_end = text.index(separator)
  106. name = text[:name_end]
  107. # The value is after the name
  108. value, timestamp = _parse_value_and_timestamp(text[name_end:])
  109. return Sample(name, {}, value, timestamp)
  110. def text_fd_to_metric_families(fd):
  111. """Parse Prometheus text format from a file descriptor.
  112. This is a laxer parser than the main Go parser,
  113. so successful parsing does not imply that the parsed
  114. text meets the specification.
  115. Yields Metric's.
  116. """
  117. name = ''
  118. documentation = ''
  119. typ = 'untyped'
  120. samples = []
  121. allowed_names = []
  122. def build_metric(name, documentation, typ, samples):
  123. # Munge counters into OpenMetrics representation
  124. # used internally.
  125. if typ == 'counter':
  126. if name.endswith('_total'):
  127. name = name[:-6]
  128. else:
  129. new_samples = []
  130. for s in samples:
  131. new_samples.append(Sample(s[0] + '_total', *s[1:]))
  132. samples = new_samples
  133. metric = Metric(name, documentation, typ)
  134. metric.samples = samples
  135. return metric
  136. for line in fd:
  137. line = line.strip()
  138. if line.startswith('#'):
  139. parts = line.split(None, 3)
  140. if len(parts) < 2:
  141. continue
  142. if parts[1] == 'HELP':
  143. if parts[2] != name:
  144. if name != '':
  145. yield build_metric(name, documentation, typ, samples)
  146. # New metric
  147. name = parts[2]
  148. typ = 'untyped'
  149. samples = []
  150. allowed_names = [parts[2]]
  151. if len(parts) == 4:
  152. documentation = _replace_help_escaping(parts[3])
  153. else:
  154. documentation = ''
  155. elif parts[1] == 'TYPE':
  156. if parts[2] != name:
  157. if name != '':
  158. yield build_metric(name, documentation, typ, samples)
  159. # New metric
  160. name = parts[2]
  161. documentation = ''
  162. samples = []
  163. typ = parts[3]
  164. allowed_names = {
  165. 'counter': [''],
  166. 'gauge': [''],
  167. 'summary': ['_count', '_sum', ''],
  168. 'histogram': ['_count', '_sum', '_bucket'],
  169. }.get(typ, [''])
  170. allowed_names = [name + n for n in allowed_names]
  171. else:
  172. # Ignore other comment tokens
  173. pass
  174. elif line == '':
  175. # Ignore blank lines
  176. pass
  177. else:
  178. sample = _parse_sample(line)
  179. if sample.name not in allowed_names:
  180. if name != '':
  181. yield build_metric(name, documentation, typ, samples)
  182. # New metric, yield immediately as untyped singleton
  183. name = ''
  184. documentation = ''
  185. typ = 'untyped'
  186. samples = []
  187. allowed_names = []
  188. yield build_metric(sample[0], documentation, typ, [sample])
  189. else:
  190. samples.append(sample)
  191. if name != '':
  192. yield build_metric(name, documentation, typ, samples)