data.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.data
  4. ~~~~~~~~~~~~~~~~~~~~
  5. Lexers for data file format.
  6. :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import RegexLexer, ExtendedRegexLexer, LexerContext, \
  11. include, bygroups, inherit
  12. from pygments.token import Text, Comment, Keyword, Name, String, Number, \
  13. Punctuation, Literal, Error
  14. __all__ = ['YamlLexer', 'JsonLexer', 'JsonBareObjectLexer', 'JsonLdLexer']
  15. class YamlLexerContext(LexerContext):
  16. """Indentation context for the YAML lexer."""
  17. def __init__(self, *args, **kwds):
  18. super(YamlLexerContext, self).__init__(*args, **kwds)
  19. self.indent_stack = []
  20. self.indent = -1
  21. self.next_indent = 0
  22. self.block_scalar_indent = None
  23. class YamlLexer(ExtendedRegexLexer):
  24. """
  25. Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization
  26. language.
  27. .. versionadded:: 0.11
  28. """
  29. name = 'YAML'
  30. aliases = ['yaml']
  31. filenames = ['*.yaml', '*.yml']
  32. mimetypes = ['text/x-yaml']
  33. def something(token_class):
  34. """Do not produce empty tokens."""
  35. def callback(lexer, match, context):
  36. text = match.group()
  37. if not text:
  38. return
  39. yield match.start(), token_class, text
  40. context.pos = match.end()
  41. return callback
  42. def reset_indent(token_class):
  43. """Reset the indentation levels."""
  44. def callback(lexer, match, context):
  45. text = match.group()
  46. context.indent_stack = []
  47. context.indent = -1
  48. context.next_indent = 0
  49. context.block_scalar_indent = None
  50. yield match.start(), token_class, text
  51. context.pos = match.end()
  52. return callback
  53. def save_indent(token_class, start=False):
  54. """Save a possible indentation level."""
  55. def callback(lexer, match, context):
  56. text = match.group()
  57. extra = ''
  58. if start:
  59. context.next_indent = len(text)
  60. if context.next_indent < context.indent:
  61. while context.next_indent < context.indent:
  62. context.indent = context.indent_stack.pop()
  63. if context.next_indent > context.indent:
  64. extra = text[context.indent:]
  65. text = text[:context.indent]
  66. else:
  67. context.next_indent += len(text)
  68. if text:
  69. yield match.start(), token_class, text
  70. if extra:
  71. yield match.start()+len(text), token_class.Error, extra
  72. context.pos = match.end()
  73. return callback
  74. def set_indent(token_class, implicit=False):
  75. """Set the previously saved indentation level."""
  76. def callback(lexer, match, context):
  77. text = match.group()
  78. if context.indent < context.next_indent:
  79. context.indent_stack.append(context.indent)
  80. context.indent = context.next_indent
  81. if not implicit:
  82. context.next_indent += len(text)
  83. yield match.start(), token_class, text
  84. context.pos = match.end()
  85. return callback
  86. def set_block_scalar_indent(token_class):
  87. """Set an explicit indentation level for a block scalar."""
  88. def callback(lexer, match, context):
  89. text = match.group()
  90. context.block_scalar_indent = None
  91. if not text:
  92. return
  93. increment = match.group(1)
  94. if increment:
  95. current_indent = max(context.indent, 0)
  96. increment = int(increment)
  97. context.block_scalar_indent = current_indent + increment
  98. if text:
  99. yield match.start(), token_class, text
  100. context.pos = match.end()
  101. return callback
  102. def parse_block_scalar_empty_line(indent_token_class, content_token_class):
  103. """Process an empty line in a block scalar."""
  104. def callback(lexer, match, context):
  105. text = match.group()
  106. if (context.block_scalar_indent is None or
  107. len(text) <= context.block_scalar_indent):
  108. if text:
  109. yield match.start(), indent_token_class, text
  110. else:
  111. indentation = text[:context.block_scalar_indent]
  112. content = text[context.block_scalar_indent:]
  113. yield match.start(), indent_token_class, indentation
  114. yield (match.start()+context.block_scalar_indent,
  115. content_token_class, content)
  116. context.pos = match.end()
  117. return callback
  118. def parse_block_scalar_indent(token_class):
  119. """Process indentation spaces in a block scalar."""
  120. def callback(lexer, match, context):
  121. text = match.group()
  122. if context.block_scalar_indent is None:
  123. if len(text) <= max(context.indent, 0):
  124. context.stack.pop()
  125. context.stack.pop()
  126. return
  127. context.block_scalar_indent = len(text)
  128. else:
  129. if len(text) < context.block_scalar_indent:
  130. context.stack.pop()
  131. context.stack.pop()
  132. return
  133. if text:
  134. yield match.start(), token_class, text
  135. context.pos = match.end()
  136. return callback
  137. def parse_plain_scalar_indent(token_class):
  138. """Process indentation spaces in a plain scalar."""
  139. def callback(lexer, match, context):
  140. text = match.group()
  141. if len(text) <= context.indent:
  142. context.stack.pop()
  143. context.stack.pop()
  144. return
  145. if text:
  146. yield match.start(), token_class, text
  147. context.pos = match.end()
  148. return callback
  149. tokens = {
  150. # the root rules
  151. 'root': [
  152. # ignored whitespaces
  153. (r'[ ]+(?=#|$)', Text),
  154. # line breaks
  155. (r'\n+', Text),
  156. # a comment
  157. (r'#[^\n]*', Comment.Single),
  158. # the '%YAML' directive
  159. (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'),
  160. # the %TAG directive
  161. (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'),
  162. # document start and document end indicators
  163. (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace),
  164. 'block-line'),
  165. # indentation spaces
  166. (r'[ ]*(?!\s|$)', save_indent(Text, start=True),
  167. ('block-line', 'indentation')),
  168. ],
  169. # trailing whitespaces after directives or a block scalar indicator
  170. 'ignored-line': [
  171. # ignored whitespaces
  172. (r'[ ]+(?=#|$)', Text),
  173. # a comment
  174. (r'#[^\n]*', Comment.Single),
  175. # line break
  176. (r'\n', Text, '#pop:2'),
  177. ],
  178. # the %YAML directive
  179. 'yaml-directive': [
  180. # the version number
  181. (r'([ ]+)([0-9]+\.[0-9]+)',
  182. bygroups(Text, Number), 'ignored-line'),
  183. ],
  184. # the %YAG directive
  185. 'tag-directive': [
  186. # a tag handle and the corresponding prefix
  187. (r'([ ]+)(!|![\w-]*!)'
  188. r'([ ]+)(!|!?[\w;/?:@&=+$,.!~*\'()\[\]%-]+)',
  189. bygroups(Text, Keyword.Type, Text, Keyword.Type),
  190. 'ignored-line'),
  191. ],
  192. # block scalar indicators and indentation spaces
  193. 'indentation': [
  194. # trailing whitespaces are ignored
  195. (r'[ ]*$', something(Text), '#pop:2'),
  196. # whitespaces preceeding block collection indicators
  197. (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)),
  198. # block collection indicators
  199. (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)),
  200. # the beginning a block line
  201. (r'[ ]*', save_indent(Text), '#pop'),
  202. ],
  203. # an indented line in the block context
  204. 'block-line': [
  205. # the line end
  206. (r'[ ]*(?=#|$)', something(Text), '#pop'),
  207. # whitespaces separating tokens
  208. (r'[ ]+', Text),
  209. # tags, anchors and aliases,
  210. include('descriptors'),
  211. # block collections and scalars
  212. include('block-nodes'),
  213. # flow collections and quoted scalars
  214. include('flow-nodes'),
  215. # a plain scalar
  216. (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`-]|[?:-]\S)',
  217. something(Name.Variable),
  218. 'plain-scalar-in-block-context'),
  219. ],
  220. # tags, anchors, aliases
  221. 'descriptors': [
  222. # a full-form tag
  223. (r'!<[\w#;/?:@&=+$,.!~*\'()\[\]%-]+>', Keyword.Type),
  224. # a tag in the form '!', '!suffix' or '!handle!suffix'
  225. (r'!(?:[\w-]+!)?'
  226. r'[\w#;/?:@&=+$,.!~*\'()\[\]%-]+', Keyword.Type),
  227. # an anchor
  228. (r'&[\w-]+', Name.Label),
  229. # an alias
  230. (r'\*[\w-]+', Name.Variable),
  231. ],
  232. # block collections and scalars
  233. 'block-nodes': [
  234. # implicit key
  235. (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)),
  236. # literal and folded scalars
  237. (r'[|>]', Punctuation.Indicator,
  238. ('block-scalar-content', 'block-scalar-header')),
  239. ],
  240. # flow collections and quoted scalars
  241. 'flow-nodes': [
  242. # a flow sequence
  243. (r'\[', Punctuation.Indicator, 'flow-sequence'),
  244. # a flow mapping
  245. (r'\{', Punctuation.Indicator, 'flow-mapping'),
  246. # a single-quoted scalar
  247. (r'\'', String, 'single-quoted-scalar'),
  248. # a double-quoted scalar
  249. (r'\"', String, 'double-quoted-scalar'),
  250. ],
  251. # the content of a flow collection
  252. 'flow-collection': [
  253. # whitespaces
  254. (r'[ ]+', Text),
  255. # line breaks
  256. (r'\n+', Text),
  257. # a comment
  258. (r'#[^\n]*', Comment.Single),
  259. # simple indicators
  260. (r'[?:,]', Punctuation.Indicator),
  261. # tags, anchors and aliases
  262. include('descriptors'),
  263. # nested collections and quoted scalars
  264. include('flow-nodes'),
  265. # a plain scalar
  266. (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`])',
  267. something(Name.Variable),
  268. 'plain-scalar-in-flow-context'),
  269. ],
  270. # a flow sequence indicated by '[' and ']'
  271. 'flow-sequence': [
  272. # include flow collection rules
  273. include('flow-collection'),
  274. # the closing indicator
  275. (r'\]', Punctuation.Indicator, '#pop'),
  276. ],
  277. # a flow mapping indicated by '{' and '}'
  278. 'flow-mapping': [
  279. # include flow collection rules
  280. include('flow-collection'),
  281. # the closing indicator
  282. (r'\}', Punctuation.Indicator, '#pop'),
  283. ],
  284. # block scalar lines
  285. 'block-scalar-content': [
  286. # line break
  287. (r'\n', Text),
  288. # empty line
  289. (r'^[ ]+$',
  290. parse_block_scalar_empty_line(Text, Name.Constant)),
  291. # indentation spaces (we may leave the state here)
  292. (r'^[ ]*', parse_block_scalar_indent(Text)),
  293. # line content
  294. (r'[\S\t ]+', Name.Constant),
  295. ],
  296. # the content of a literal or folded scalar
  297. 'block-scalar-header': [
  298. # indentation indicator followed by chomping flag
  299. (r'([1-9])?[+-]?(?=[ ]|$)',
  300. set_block_scalar_indent(Punctuation.Indicator),
  301. 'ignored-line'),
  302. # chomping flag followed by indentation indicator
  303. (r'[+-]?([1-9])?(?=[ ]|$)',
  304. set_block_scalar_indent(Punctuation.Indicator),
  305. 'ignored-line'),
  306. ],
  307. # ignored and regular whitespaces in quoted scalars
  308. 'quoted-scalar-whitespaces': [
  309. # leading and trailing whitespaces are ignored
  310. (r'^[ ]+', Text),
  311. (r'[ ]+$', Text),
  312. # line breaks are ignored
  313. (r'\n+', Text),
  314. # other whitespaces are a part of the value
  315. (r'[ ]+', Name.Variable),
  316. ],
  317. # single-quoted scalars
  318. 'single-quoted-scalar': [
  319. # include whitespace and line break rules
  320. include('quoted-scalar-whitespaces'),
  321. # escaping of the quote character
  322. (r'\'\'', String.Escape),
  323. # regular non-whitespace characters
  324. (r'[^\s\']+', String),
  325. # the closing quote
  326. (r'\'', String, '#pop'),
  327. ],
  328. # double-quoted scalars
  329. 'double-quoted-scalar': [
  330. # include whitespace and line break rules
  331. include('quoted-scalar-whitespaces'),
  332. # escaping of special characters
  333. (r'\\[0abt\tn\nvfre "\\N_LP]', String),
  334. # escape codes
  335. (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})',
  336. String.Escape),
  337. # regular non-whitespace characters
  338. (r'[^\s"\\]+', String),
  339. # the closing quote
  340. (r'"', String, '#pop'),
  341. ],
  342. # the beginning of a new line while scanning a plain scalar
  343. 'plain-scalar-in-block-context-new-line': [
  344. # empty lines
  345. (r'^[ ]+$', Text),
  346. # line breaks
  347. (r'\n+', Text),
  348. # document start and document end indicators
  349. (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'),
  350. # indentation spaces (we may leave the block line state here)
  351. (r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'),
  352. ],
  353. # a plain scalar in the block context
  354. 'plain-scalar-in-block-context': [
  355. # the scalar ends with the ':' indicator
  356. (r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'),
  357. # the scalar ends with whitespaces followed by a comment
  358. (r'[ ]+(?=#)', Text, '#pop'),
  359. # trailing whitespaces are ignored
  360. (r'[ ]+$', Text),
  361. # line breaks are ignored
  362. (r'\n+', Text, 'plain-scalar-in-block-context-new-line'),
  363. # other whitespaces are a part of the value
  364. (r'[ ]+', Literal.Scalar.Plain),
  365. # regular non-whitespace characters
  366. (r'(?::(?!\s)|[^\s:])+', Literal.Scalar.Plain),
  367. ],
  368. # a plain scalar is the flow context
  369. 'plain-scalar-in-flow-context': [
  370. # the scalar ends with an indicator character
  371. (r'[ ]*(?=[,:?\[\]{}])', something(Text), '#pop'),
  372. # the scalar ends with a comment
  373. (r'[ ]+(?=#)', Text, '#pop'),
  374. # leading and trailing whitespaces are ignored
  375. (r'^[ ]+', Text),
  376. (r'[ ]+$', Text),
  377. # line breaks are ignored
  378. (r'\n+', Text),
  379. # other whitespaces are a part of the value
  380. (r'[ ]+', Name.Variable),
  381. # regular non-whitespace characters
  382. (r'[^\s,:?\[\]{}]+', Name.Variable),
  383. ],
  384. }
  385. def get_tokens_unprocessed(self, text=None, context=None):
  386. if context is None:
  387. context = YamlLexerContext(text, 0)
  388. return super(YamlLexer, self).get_tokens_unprocessed(text, context)
  389. class JsonLexer(RegexLexer):
  390. """
  391. For JSON data structures.
  392. .. versionadded:: 1.5
  393. """
  394. name = 'JSON'
  395. aliases = ['json']
  396. filenames = ['*.json']
  397. mimetypes = ['application/json']
  398. flags = re.DOTALL
  399. # integer part of a number
  400. int_part = r'-?(0|[1-9]\d*)'
  401. # fractional part of a number
  402. frac_part = r'\.\d+'
  403. # exponential part of a number
  404. exp_part = r'[eE](\+|-)?\d+'
  405. tokens = {
  406. 'whitespace': [
  407. (r'\s+', Text),
  408. ],
  409. # represents a simple terminal value
  410. 'simplevalue': [
  411. (r'(true|false|null)\b', Keyword.Constant),
  412. (('%(int_part)s(%(frac_part)s%(exp_part)s|'
  413. '%(exp_part)s|%(frac_part)s)') % vars(),
  414. Number.Float),
  415. (int_part, Number.Integer),
  416. (r'"(\\\\|\\"|[^"])*"', String.Double),
  417. ],
  418. # the right hand side of an object, after the attribute name
  419. 'objectattribute': [
  420. include('value'),
  421. (r':', Punctuation),
  422. # comma terminates the attribute but expects more
  423. (r',', Punctuation, '#pop'),
  424. # a closing bracket terminates the entire object, so pop twice
  425. (r'\}', Punctuation, '#pop:2'),
  426. ],
  427. # a json object - { attr, attr, ... }
  428. 'objectvalue': [
  429. include('whitespace'),
  430. (r'"(\\\\|\\"|[^"])*"', Name.Tag, 'objectattribute'),
  431. (r'\}', Punctuation, '#pop'),
  432. ],
  433. # json array - [ value, value, ... }
  434. 'arrayvalue': [
  435. include('whitespace'),
  436. include('value'),
  437. (r',', Punctuation),
  438. (r'\]', Punctuation, '#pop'),
  439. ],
  440. # a json value - either a simple value or a complex value (object or array)
  441. 'value': [
  442. include('whitespace'),
  443. include('simplevalue'),
  444. (r'\{', Punctuation, 'objectvalue'),
  445. (r'\[', Punctuation, 'arrayvalue'),
  446. ],
  447. # the root of a json document whould be a value
  448. 'root': [
  449. include('value'),
  450. ],
  451. }
  452. class JsonBareObjectLexer(JsonLexer):
  453. """
  454. For JSON data structures (with missing object curly braces).
  455. .. versionadded:: 2.2
  456. """
  457. name = 'JSONBareObject'
  458. aliases = ['json-object']
  459. filenames = []
  460. mimetypes = ['application/json-object']
  461. tokens = {
  462. 'root': [
  463. (r'\}', Error),
  464. include('objectvalue'),
  465. ],
  466. 'objectattribute': [
  467. (r'\}', Error),
  468. inherit,
  469. ],
  470. }
  471. class JsonLdLexer(JsonLexer):
  472. """
  473. For `JSON-LD <http://json-ld.org/>`_ linked data.
  474. .. versionadded:: 2.0
  475. """
  476. name = 'JSON-LD'
  477. aliases = ['jsonld', 'json-ld']
  478. filenames = ['*.jsonld']
  479. mimetypes = ['application/ld+json']
  480. tokens = {
  481. 'objectvalue': [
  482. (r'"@(context|id|value|language|type|container|list|set|'
  483. r'reverse|index|base|vocab|graph)"', Name.Decorator,
  484. 'objectattribute'),
  485. inherit,
  486. ],
  487. }