dsls.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.dsls
  4. ~~~~~~~~~~~~~~~~~~~~
  5. Lexers for various domain-specific languages.
  6. :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import ExtendedRegexLexer, RegexLexer, bygroups, words, \
  11. include, default, this, using, combined
  12. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  13. Number, Punctuation, Literal, Whitespace
  14. __all__ = ['ProtoBufLexer', 'BroLexer', 'PuppetLexer', 'RslLexer',
  15. 'MscgenLexer', 'VGLLexer', 'AlloyLexer', 'PanLexer',
  16. 'CrmshLexer', 'ThriftLexer', 'FlatlineLexer', 'SnowballLexer']
  17. class ProtoBufLexer(RegexLexer):
  18. """
  19. Lexer for `Protocol Buffer <http://code.google.com/p/protobuf/>`_
  20. definition files.
  21. .. versionadded:: 1.4
  22. """
  23. name = 'Protocol Buffer'
  24. aliases = ['protobuf', 'proto']
  25. filenames = ['*.proto']
  26. tokens = {
  27. 'root': [
  28. (r'[ \t]+', Text),
  29. (r'[,;{}\[\]()<>]', Punctuation),
  30. (r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single),
  31. (r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment.Multiline),
  32. (words((
  33. 'import', 'option', 'optional', 'required', 'repeated', 'default',
  34. 'packed', 'ctype', 'extensions', 'to', 'max', 'rpc', 'returns',
  35. 'oneof'), prefix=r'\b', suffix=r'\b'),
  36. Keyword),
  37. (words((
  38. 'int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64',
  39. 'fixed32', 'fixed64', 'sfixed32', 'sfixed64',
  40. 'float', 'double', 'bool', 'string', 'bytes'), suffix=r'\b'),
  41. Keyword.Type),
  42. (r'(true|false)\b', Keyword.Constant),
  43. (r'(package)(\s+)', bygroups(Keyword.Namespace, Text), 'package'),
  44. (r'(message|extend)(\s+)',
  45. bygroups(Keyword.Declaration, Text), 'message'),
  46. (r'(enum|group|service)(\s+)',
  47. bygroups(Keyword.Declaration, Text), 'type'),
  48. (r'\".*?\"', String),
  49. (r'\'.*?\'', String),
  50. (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float),
  51. (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
  52. (r'(\-?(inf|nan))\b', Number.Float),
  53. (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex),
  54. (r'0[0-7]+[LlUu]*', Number.Oct),
  55. (r'\d+[LlUu]*', Number.Integer),
  56. (r'[+-=]', Operator),
  57. (r'([a-zA-Z_][\w.]*)([ \t]*)(=)',
  58. bygroups(Name.Attribute, Text, Operator)),
  59. ('[a-zA-Z_][\w.]*', Name),
  60. ],
  61. 'package': [
  62. (r'[a-zA-Z_]\w*', Name.Namespace, '#pop'),
  63. default('#pop'),
  64. ],
  65. 'message': [
  66. (r'[a-zA-Z_]\w*', Name.Class, '#pop'),
  67. default('#pop'),
  68. ],
  69. 'type': [
  70. (r'[a-zA-Z_]\w*', Name, '#pop'),
  71. default('#pop'),
  72. ],
  73. }
  74. class ThriftLexer(RegexLexer):
  75. """
  76. For `Thrift <https://thrift.apache.org/>`__ interface definitions.
  77. .. versionadded:: 2.1
  78. """
  79. name = 'Thrift'
  80. aliases = ['thrift']
  81. filenames = ['*.thrift']
  82. mimetypes = ['application/x-thrift']
  83. tokens = {
  84. 'root': [
  85. include('whitespace'),
  86. include('comments'),
  87. (r'"', String.Double, combined('stringescape', 'dqs')),
  88. (r'\'', String.Single, combined('stringescape', 'sqs')),
  89. (r'(namespace)(\s+)',
  90. bygroups(Keyword.Namespace, Text.Whitespace), 'namespace'),
  91. (r'(enum|union|struct|service|exception)(\s+)',
  92. bygroups(Keyword.Declaration, Text.Whitespace), 'class'),
  93. (r'((?:(?:[^\W\d]|\$)[\w.\[\]$<>]*\s+)+?)' # return arguments
  94. r'((?:[^\W\d]|\$)[\w$]*)' # method name
  95. r'(\s*)(\()', # signature start
  96. bygroups(using(this), Name.Function, Text, Operator)),
  97. include('keywords'),
  98. include('numbers'),
  99. (r'[&=]', Operator),
  100. (r'[:;,{}()<>\[\]]', Punctuation),
  101. (r'[a-zA-Z_](\.\w|\w)*', Name),
  102. ],
  103. 'whitespace': [
  104. (r'\n', Text.Whitespace),
  105. (r'\s+', Text.Whitespace),
  106. ],
  107. 'comments': [
  108. (r'#.*$', Comment),
  109. (r'//.*?\n', Comment),
  110. (r'/\*[\w\W]*?\*/', Comment.Multiline),
  111. ],
  112. 'stringescape': [
  113. (r'\\([\\nrt"\'])', String.Escape),
  114. ],
  115. 'dqs': [
  116. (r'"', String.Double, '#pop'),
  117. (r'[^\\"\n]+', String.Double),
  118. ],
  119. 'sqs': [
  120. (r"'", String.Single, '#pop'),
  121. (r'[^\\\'\n]+', String.Single),
  122. ],
  123. 'namespace': [
  124. (r'[a-z*](\.\w|\w)*', Name.Namespace, '#pop'),
  125. default('#pop'),
  126. ],
  127. 'class': [
  128. (r'[a-zA-Z_]\w*', Name.Class, '#pop'),
  129. default('#pop'),
  130. ],
  131. 'keywords': [
  132. (r'(async|oneway|extends|throws|required|optional)\b', Keyword),
  133. (r'(true|false)\b', Keyword.Constant),
  134. (r'(const|typedef)\b', Keyword.Declaration),
  135. (words((
  136. 'cpp_namespace', 'cpp_include', 'cpp_type', 'java_package',
  137. 'cocoa_prefix', 'csharp_namespace', 'delphi_namespace',
  138. 'php_namespace', 'py_module', 'perl_package',
  139. 'ruby_namespace', 'smalltalk_category', 'smalltalk_prefix',
  140. 'xsd_all', 'xsd_optional', 'xsd_nillable', 'xsd_namespace',
  141. 'xsd_attrs', 'include'), suffix=r'\b'),
  142. Keyword.Namespace),
  143. (words((
  144. 'void', 'bool', 'byte', 'i16', 'i32', 'i64', 'double',
  145. 'string', 'binary', 'map', 'list', 'set', 'slist',
  146. 'senum'), suffix=r'\b'),
  147. Keyword.Type),
  148. (words((
  149. 'BEGIN', 'END', '__CLASS__', '__DIR__', '__FILE__',
  150. '__FUNCTION__', '__LINE__', '__METHOD__', '__NAMESPACE__',
  151. 'abstract', 'alias', 'and', 'args', 'as', 'assert', 'begin',
  152. 'break', 'case', 'catch', 'class', 'clone', 'continue',
  153. 'declare', 'def', 'default', 'del', 'delete', 'do', 'dynamic',
  154. 'elif', 'else', 'elseif', 'elsif', 'end', 'enddeclare',
  155. 'endfor', 'endforeach', 'endif', 'endswitch', 'endwhile',
  156. 'ensure', 'except', 'exec', 'finally', 'float', 'for',
  157. 'foreach', 'function', 'global', 'goto', 'if', 'implements',
  158. 'import', 'in', 'inline', 'instanceof', 'interface', 'is',
  159. 'lambda', 'module', 'native', 'new', 'next', 'nil', 'not',
  160. 'or', 'pass', 'public', 'print', 'private', 'protected',
  161. 'raise', 'redo', 'rescue', 'retry', 'register', 'return',
  162. 'self', 'sizeof', 'static', 'super', 'switch', 'synchronized',
  163. 'then', 'this', 'throw', 'transient', 'try', 'undef',
  164. 'unless', 'unsigned', 'until', 'use', 'var', 'virtual',
  165. 'volatile', 'when', 'while', 'with', 'xor', 'yield'),
  166. prefix=r'\b', suffix=r'\b'),
  167. Keyword.Reserved),
  168. ],
  169. 'numbers': [
  170. (r'[+-]?(\d+\.\d+([eE][+-]?\d+)?|\.?\d+[eE][+-]?\d+)', Number.Float),
  171. (r'[+-]?0x[0-9A-Fa-f]+', Number.Hex),
  172. (r'[+-]?[0-9]+', Number.Integer),
  173. ],
  174. }
  175. class BroLexer(RegexLexer):
  176. """
  177. For `Bro <http://bro-ids.org/>`_ scripts.
  178. .. versionadded:: 1.5
  179. """
  180. name = 'Bro'
  181. aliases = ['bro']
  182. filenames = ['*.bro']
  183. _hex = r'[0-9a-fA-F_]'
  184. _float = r'((\d*\.?\d+)|(\d+\.?\d*))([eE][-+]?\d+)?'
  185. _h = r'[A-Za-z0-9][-A-Za-z0-9]*'
  186. tokens = {
  187. 'root': [
  188. # Whitespace
  189. (r'^@.*?\n', Comment.Preproc),
  190. (r'#.*?\n', Comment.Single),
  191. (r'\n', Text),
  192. (r'\s+', Text),
  193. (r'\\\n', Text),
  194. # Keywords
  195. (r'(add|alarm|break|case|const|continue|delete|do|else|enum|event'
  196. r'|export|for|function|if|global|hook|local|module|next'
  197. r'|of|print|redef|return|schedule|switch|type|when|while)\b', Keyword),
  198. (r'(addr|any|bool|count|counter|double|file|int|interval|net'
  199. r'|pattern|port|record|set|string|subnet|table|time|timer'
  200. r'|vector)\b', Keyword.Type),
  201. (r'(T|F)\b', Keyword.Constant),
  202. (r'(&)((?:add|delete|expire)_func|attr|(?:create|read|write)_expire'
  203. r'|default|disable_print_hook|raw_output|encrypt|group|log'
  204. r'|mergeable|optional|persistent|priority|redef'
  205. r'|rotate_(?:interval|size)|synchronized)\b',
  206. bygroups(Punctuation, Keyword)),
  207. (r'\s+module\b', Keyword.Namespace),
  208. # Addresses, ports and networks
  209. (r'\d+/(tcp|udp|icmp|unknown)\b', Number),
  210. (r'(\d+\.){3}\d+', Number),
  211. (r'(' + _hex + r'){7}' + _hex, Number),
  212. (r'0x' + _hex + r'(' + _hex + r'|:)*::(' + _hex + r'|:)*', Number),
  213. (r'((\d+|:)(' + _hex + r'|:)*)?::(' + _hex + r'|:)*', Number),
  214. (r'(\d+\.\d+\.|(\d+\.){2}\d+)', Number),
  215. # Hostnames
  216. (_h + r'(\.' + _h + r')+', String),
  217. # Numeric
  218. (_float + r'\s+(day|hr|min|sec|msec|usec)s?\b', Literal.Date),
  219. (r'0[xX]' + _hex, Number.Hex),
  220. (_float, Number.Float),
  221. (r'\d+', Number.Integer),
  222. (r'/', String.Regex, 'regex'),
  223. (r'"', String, 'string'),
  224. # Operators
  225. (r'[!%*/+:<=>?~|-]', Operator),
  226. (r'([-+=&|]{2}|[+=!><-]=)', Operator),
  227. (r'(in|match)\b', Operator.Word),
  228. (r'[{}()\[\]$.,;]', Punctuation),
  229. # Identfier
  230. (r'([_a-zA-Z]\w*)(::)', bygroups(Name, Name.Namespace)),
  231. (r'[a-zA-Z_]\w*', Name)
  232. ],
  233. 'string': [
  234. (r'"', String, '#pop'),
  235. (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
  236. (r'[^\\"\n]+', String),
  237. (r'\\\n', String),
  238. (r'\\', String)
  239. ],
  240. 'regex': [
  241. (r'/', String.Regex, '#pop'),
  242. (r'\\[\\nt/]', String.Regex), # String.Escape is too intense here.
  243. (r'[^\\/\n]+', String.Regex),
  244. (r'\\\n', String.Regex),
  245. (r'\\', String.Regex)
  246. ]
  247. }
  248. class PuppetLexer(RegexLexer):
  249. """
  250. For `Puppet <http://puppetlabs.com/>`__ configuration DSL.
  251. .. versionadded:: 1.6
  252. """
  253. name = 'Puppet'
  254. aliases = ['puppet']
  255. filenames = ['*.pp']
  256. tokens = {
  257. 'root': [
  258. include('comments'),
  259. include('keywords'),
  260. include('names'),
  261. include('numbers'),
  262. include('operators'),
  263. include('strings'),
  264. (r'[]{}:(),;[]', Punctuation),
  265. (r'[^\S\n]+', Text),
  266. ],
  267. 'comments': [
  268. (r'\s*#.*$', Comment),
  269. (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
  270. ],
  271. 'operators': [
  272. (r'(=>|\?|<|>|=|\+|-|/|\*|~|!|\|)', Operator),
  273. (r'(in|and|or|not)\b', Operator.Word),
  274. ],
  275. 'names': [
  276. ('[a-zA-Z_]\w*', Name.Attribute),
  277. (r'(\$\S+)(\[)(\S+)(\])', bygroups(Name.Variable, Punctuation,
  278. String, Punctuation)),
  279. (r'\$\S+', Name.Variable),
  280. ],
  281. 'numbers': [
  282. # Copypasta from the Python lexer
  283. (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?', Number.Float),
  284. (r'\d+[eE][+-]?[0-9]+j?', Number.Float),
  285. (r'0[0-7]+j?', Number.Oct),
  286. (r'0[xX][a-fA-F0-9]+', Number.Hex),
  287. (r'\d+L', Number.Integer.Long),
  288. (r'\d+j?', Number.Integer)
  289. ],
  290. 'keywords': [
  291. # Left out 'group' and 'require'
  292. # Since they're often used as attributes
  293. (words((
  294. 'absent', 'alert', 'alias', 'audit', 'augeas', 'before', 'case',
  295. 'check', 'class', 'computer', 'configured', 'contained',
  296. 'create_resources', 'crit', 'cron', 'debug', 'default',
  297. 'define', 'defined', 'directory', 'else', 'elsif', 'emerg',
  298. 'err', 'exec', 'extlookup', 'fail', 'false', 'file',
  299. 'filebucket', 'fqdn_rand', 'generate', 'host', 'if', 'import',
  300. 'include', 'info', 'inherits', 'inline_template', 'installed',
  301. 'interface', 'k5login', 'latest', 'link', 'loglevel',
  302. 'macauthorization', 'mailalias', 'maillist', 'mcx', 'md5',
  303. 'mount', 'mounted', 'nagios_command', 'nagios_contact',
  304. 'nagios_contactgroup', 'nagios_host', 'nagios_hostdependency',
  305. 'nagios_hostescalation', 'nagios_hostextinfo', 'nagios_hostgroup',
  306. 'nagios_service', 'nagios_servicedependency', 'nagios_serviceescalation',
  307. 'nagios_serviceextinfo', 'nagios_servicegroup', 'nagios_timeperiod',
  308. 'node', 'noop', 'notice', 'notify', 'package', 'present', 'purged',
  309. 'realize', 'regsubst', 'resources', 'role', 'router', 'running',
  310. 'schedule', 'scheduled_task', 'search', 'selboolean', 'selmodule',
  311. 'service', 'sha1', 'shellquote', 'split', 'sprintf',
  312. 'ssh_authorized_key', 'sshkey', 'stage', 'stopped', 'subscribe',
  313. 'tag', 'tagged', 'template', 'tidy', 'true', 'undef', 'unmounted',
  314. 'user', 'versioncmp', 'vlan', 'warning', 'yumrepo', 'zfs', 'zone',
  315. 'zpool'), prefix='(?i)', suffix=r'\b'),
  316. Keyword),
  317. ],
  318. 'strings': [
  319. (r'"([^"])*"', String),
  320. (r"'(\\'|[^'])*'", String),
  321. ],
  322. }
  323. class RslLexer(RegexLexer):
  324. """
  325. `RSL <http://en.wikipedia.org/wiki/RAISE>`_ is the formal specification
  326. language used in RAISE (Rigorous Approach to Industrial Software Engineering)
  327. method.
  328. .. versionadded:: 2.0
  329. """
  330. name = 'RSL'
  331. aliases = ['rsl']
  332. filenames = ['*.rsl']
  333. mimetypes = ['text/rsl']
  334. flags = re.MULTILINE | re.DOTALL
  335. tokens = {
  336. 'root': [
  337. (words((
  338. 'Bool', 'Char', 'Int', 'Nat', 'Real', 'Text', 'Unit', 'abs',
  339. 'all', 'always', 'any', 'as', 'axiom', 'card', 'case', 'channel',
  340. 'chaos', 'class', 'devt_relation', 'dom', 'elems', 'else', 'elif',
  341. 'end', 'exists', 'extend', 'false', 'for', 'hd', 'hide', 'if',
  342. 'in', 'is', 'inds', 'initialise', 'int', 'inter', 'isin', 'len',
  343. 'let', 'local', 'ltl_assertion', 'object', 'of', 'out', 'post',
  344. 'pre', 'read', 'real', 'rng', 'scheme', 'skip', 'stop', 'swap',
  345. 'then', 'theory', 'test_case', 'tl', 'transition_system', 'true',
  346. 'type', 'union', 'until', 'use', 'value', 'variable', 'while',
  347. 'with', 'write', '~isin', '-inflist', '-infset', '-list',
  348. '-set'), prefix=r'\b', suffix=r'\b'),
  349. Keyword),
  350. (r'(variable|value)\b', Keyword.Declaration),
  351. (r'--.*?\n', Comment),
  352. (r'<:.*?:>', Comment),
  353. (r'\{!.*?!\}', Comment),
  354. (r'/\*.*?\*/', Comment),
  355. (r'^[ \t]*([\w]+)[ \t]*:[^:]', Name.Function),
  356. (r'(^[ \t]*)([\w]+)([ \t]*\([\w\s,]*\)[ \t]*)(is|as)',
  357. bygroups(Text, Name.Function, Text, Keyword)),
  358. (r'\b[A-Z]\w*\b', Keyword.Type),
  359. (r'(true|false)\b', Keyword.Constant),
  360. (r'".*"', String),
  361. (r'\'.\'', String.Char),
  362. (r'(><|->|-m->|/\\|<=|<<=|<\.|\|\||\|\^\||-~->|-~m->|\\/|>=|>>|'
  363. r'\.>|\+\+|-\\|<->|=>|:-|~=|\*\*|<<|>>=|\+>|!!|\|=\||#)',
  364. Operator),
  365. (r'[0-9]+\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
  366. (r'0x[0-9a-f]+', Number.Hex),
  367. (r'[0-9]+', Number.Integer),
  368. (r'.', Text),
  369. ],
  370. }
  371. def analyse_text(text):
  372. """
  373. Check for the most common text in the beginning of a RSL file.
  374. """
  375. if re.search(r'scheme\s*.*?=\s*class\s*type', text, re.I) is not None:
  376. return 1.0
  377. class MscgenLexer(RegexLexer):
  378. """
  379. For `Mscgen <http://www.mcternan.me.uk/mscgen/>`_ files.
  380. .. versionadded:: 1.6
  381. """
  382. name = 'Mscgen'
  383. aliases = ['mscgen', 'msc']
  384. filenames = ['*.msc']
  385. _var = r'(\w+|"(?:\\"|[^"])*")'
  386. tokens = {
  387. 'root': [
  388. (r'msc\b', Keyword.Type),
  389. # Options
  390. (r'(hscale|HSCALE|width|WIDTH|wordwraparcs|WORDWRAPARCS'
  391. r'|arcgradient|ARCGRADIENT)\b', Name.Property),
  392. # Operators
  393. (r'(abox|ABOX|rbox|RBOX|box|BOX|note|NOTE)\b', Operator.Word),
  394. (r'(\.|-|\|){3}', Keyword),
  395. (r'(?:-|=|\.|:){2}'
  396. r'|<<=>>|<->|<=>|<<>>|<:>'
  397. r'|->|=>>|>>|=>|:>|-x|-X'
  398. r'|<-|<<=|<<|<=|<:|x-|X-|=', Operator),
  399. # Names
  400. (r'\*', Name.Builtin),
  401. (_var, Name.Variable),
  402. # Other
  403. (r'\[', Punctuation, 'attrs'),
  404. (r'\{|\}|,|;', Punctuation),
  405. include('comments')
  406. ],
  407. 'attrs': [
  408. (r'\]', Punctuation, '#pop'),
  409. (_var + r'(\s*)(=)(\s*)' + _var,
  410. bygroups(Name.Attribute, Text.Whitespace, Operator, Text.Whitespace,
  411. String)),
  412. (r',', Punctuation),
  413. include('comments')
  414. ],
  415. 'comments': [
  416. (r'(?://|#).*?\n', Comment.Single),
  417. (r'/\*(?:.|\n)*?\*/', Comment.Multiline),
  418. (r'[ \t\r\n]+', Text.Whitespace)
  419. ]
  420. }
  421. class VGLLexer(RegexLexer):
  422. """
  423. For `SampleManager VGL <http://www.thermoscientific.com/samplemanager>`_
  424. source code.
  425. .. versionadded:: 1.6
  426. """
  427. name = 'VGL'
  428. aliases = ['vgl']
  429. filenames = ['*.rpf']
  430. flags = re.MULTILINE | re.DOTALL | re.IGNORECASE
  431. tokens = {
  432. 'root': [
  433. (r'\{[^}]*\}', Comment.Multiline),
  434. (r'declare', Keyword.Constant),
  435. (r'(if|then|else|endif|while|do|endwhile|and|or|prompt|object'
  436. r'|create|on|line|with|global|routine|value|endroutine|constant'
  437. r'|global|set|join|library|compile_option|file|exists|create|copy'
  438. r'|delete|enable|windows|name|notprotected)(?! *[=<>.,()])',
  439. Keyword),
  440. (r'(true|false|null|empty|error|locked)', Keyword.Constant),
  441. (r'[~^*#!%&\[\]()<>|+=:;,./?-]', Operator),
  442. (r'"[^"]*"', String),
  443. (r'(\.)([a-z_$][\w$]*)', bygroups(Operator, Name.Attribute)),
  444. (r'[0-9][0-9]*(\.[0-9]+(e[+\-]?[0-9]+)?)?', Number),
  445. (r'[a-z_$][\w$]*', Name),
  446. (r'[\r\n]+', Text),
  447. (r'\s+', Text)
  448. ]
  449. }
  450. class AlloyLexer(RegexLexer):
  451. """
  452. For `Alloy <http://alloy.mit.edu>`_ source code.
  453. .. versionadded:: 2.0
  454. """
  455. name = 'Alloy'
  456. aliases = ['alloy']
  457. filenames = ['*.als']
  458. mimetypes = ['text/x-alloy']
  459. flags = re.MULTILINE | re.DOTALL
  460. iden_rex = r'[a-zA-Z_][\w\']*'
  461. text_tuple = (r'[^\S\n]+', Text)
  462. tokens = {
  463. 'sig': [
  464. (r'(extends)\b', Keyword, '#pop'),
  465. (iden_rex, Name),
  466. text_tuple,
  467. (r',', Punctuation),
  468. (r'\{', Operator, '#pop'),
  469. ],
  470. 'module': [
  471. text_tuple,
  472. (iden_rex, Name, '#pop'),
  473. ],
  474. 'fun': [
  475. text_tuple,
  476. (r'\{', Operator, '#pop'),
  477. (iden_rex, Name, '#pop'),
  478. ],
  479. 'root': [
  480. (r'--.*?$', Comment.Single),
  481. (r'//.*?$', Comment.Single),
  482. (r'/\*.*?\*/', Comment.Multiline),
  483. text_tuple,
  484. (r'(module|open)(\s+)', bygroups(Keyword.Namespace, Text),
  485. 'module'),
  486. (r'(sig|enum)(\s+)', bygroups(Keyword.Declaration, Text), 'sig'),
  487. (r'(iden|univ|none)\b', Keyword.Constant),
  488. (r'(int|Int)\b', Keyword.Type),
  489. (r'(this|abstract|extends|set|seq|one|lone|let)\b', Keyword),
  490. (r'(all|some|no|sum|disj|when|else)\b', Keyword),
  491. (r'(run|check|for|but|exactly|expect|as)\b', Keyword),
  492. (r'(and|or|implies|iff|in)\b', Operator.Word),
  493. (r'(fun|pred|fact|assert)(\s+)', bygroups(Keyword, Text), 'fun'),
  494. (r'!|#|&&|\+\+|<<|>>|>=|<=>|<=|\.|->', Operator),
  495. (r'[-+/*%=<>&!^|~{}\[\]().]', Operator),
  496. (iden_rex, Name),
  497. (r'[:,]', Punctuation),
  498. (r'[0-9]+', Number.Integer),
  499. (r'"(\\\\|\\"|[^"])*"', String),
  500. (r'\n', Text),
  501. ]
  502. }
  503. class PanLexer(RegexLexer):
  504. """
  505. Lexer for `pan <http://github.com/quattor/pan/>`_ source files.
  506. Based on tcsh lexer.
  507. .. versionadded:: 2.0
  508. """
  509. name = 'Pan'
  510. aliases = ['pan']
  511. filenames = ['*.pan']
  512. tokens = {
  513. 'root': [
  514. include('basic'),
  515. (r'\(', Keyword, 'paren'),
  516. (r'\{', Keyword, 'curly'),
  517. include('data'),
  518. ],
  519. 'basic': [
  520. (words((
  521. 'if', 'for', 'with', 'else', 'type', 'bind', 'while', 'valid', 'final',
  522. 'prefix', 'unique', 'object', 'foreach', 'include', 'template',
  523. 'function', 'variable', 'structure', 'extensible', 'declaration'),
  524. prefix=r'\b', suffix=r'\s*\b'),
  525. Keyword),
  526. (words((
  527. 'file_contents', 'format', 'index', 'length', 'match', 'matches',
  528. 'replace', 'splice', 'split', 'substr', 'to_lowercase', 'to_uppercase',
  529. 'debug', 'error', 'traceback', 'deprecated', 'base64_decode',
  530. 'base64_encode', 'digest', 'escape', 'unescape', 'append', 'create',
  531. 'first', 'nlist', 'key', 'list', 'merge', 'next', 'prepend', 'is_boolean',
  532. 'is_defined', 'is_double', 'is_list', 'is_long', 'is_nlist', 'is_null',
  533. 'is_number', 'is_property', 'is_resource', 'is_string', 'to_boolean',
  534. 'to_double', 'to_long', 'to_string', 'clone', 'delete', 'exists',
  535. 'path_exists', 'if_exists', 'return', 'value'),
  536. prefix=r'\b', suffix=r'\s*\b'),
  537. Name.Builtin),
  538. (r'#.*', Comment),
  539. (r'\\[\w\W]', String.Escape),
  540. (r'(\b\w+)(\s*)(=)', bygroups(Name.Variable, Text, Operator)),
  541. (r'[\[\]{}()=]+', Operator),
  542. (r'<<\s*(\'?)\\?(\w+)[\w\W]+?\2', String),
  543. (r';', Punctuation),
  544. ],
  545. 'data': [
  546. (r'(?s)"(\\\\|\\[0-7]+|\\.|[^"\\])*"', String.Double),
  547. (r"(?s)'(\\\\|\\[0-7]+|\\.|[^'\\])*'", String.Single),
  548. (r'\s+', Text),
  549. (r'[^=\s\[\]{}()$"\'`\\;#]+', Text),
  550. (r'\d+(?= |\Z)', Number),
  551. ],
  552. 'curly': [
  553. (r'\}', Keyword, '#pop'),
  554. (r':-', Keyword),
  555. (r'\w+', Name.Variable),
  556. (r'[^}:"\'`$]+', Punctuation),
  557. (r':', Punctuation),
  558. include('root'),
  559. ],
  560. 'paren': [
  561. (r'\)', Keyword, '#pop'),
  562. include('root'),
  563. ],
  564. }
  565. class CrmshLexer(RegexLexer):
  566. """
  567. Lexer for `crmsh <http://crmsh.github.io/>`_ configuration files
  568. for Pacemaker clusters.
  569. .. versionadded:: 2.1
  570. """
  571. name = 'Crmsh'
  572. aliases = ['crmsh', 'pcmk']
  573. filenames = ['*.crmsh', '*.pcmk']
  574. mimetypes = []
  575. elem = words((
  576. 'node', 'primitive', 'group', 'clone', 'ms', 'location',
  577. 'colocation', 'order', 'fencing_topology', 'rsc_ticket',
  578. 'rsc_template', 'property', 'rsc_defaults',
  579. 'op_defaults', 'acl_target', 'acl_group', 'user', 'role',
  580. 'tag'), suffix=r'(?![\w#$-])')
  581. sub = words((
  582. 'params', 'meta', 'operations', 'op', 'rule',
  583. 'attributes', 'utilization'), suffix=r'(?![\w#$-])')
  584. acl = words(('read', 'write', 'deny'), suffix=r'(?![\w#$-])')
  585. bin_rel = words(('and', 'or'), suffix=r'(?![\w#$-])')
  586. un_ops = words(('defined', 'not_defined'), suffix=r'(?![\w#$-])')
  587. date_exp = words(('in_range', 'date', 'spec', 'in'), suffix=r'(?![\w#$-])')
  588. acl_mod = (r'(?:tag|ref|reference|attribute|type|xpath)')
  589. bin_ops = (r'(?:lt|gt|lte|gte|eq|ne)')
  590. val_qual = (r'(?:string|version|number)')
  591. rsc_role_action = (r'(?:Master|Started|Slave|Stopped|'
  592. r'start|promote|demote|stop)')
  593. tokens = {
  594. 'root': [
  595. (r'^#.*\n?', Comment),
  596. # attr=value (nvpair)
  597. (r'([\w#$-]+)(=)("(?:""|[^"])*"|\S+)',
  598. bygroups(Name.Attribute, Punctuation, String)),
  599. # need this construct, otherwise numeric node ids
  600. # are matched as scores
  601. # elem id:
  602. (r'(node)(\s+)([\w#$-]+)(:)',
  603. bygroups(Keyword, Whitespace, Name, Punctuation)),
  604. # scores
  605. (r'([+-]?([0-9]+|inf)):', Number),
  606. # keywords (elements and other)
  607. (elem, Keyword),
  608. (sub, Keyword),
  609. (acl, Keyword),
  610. # binary operators
  611. (r'(?:%s:)?(%s)(?![\w#$-])' % (val_qual, bin_ops), Operator.Word),
  612. # other operators
  613. (bin_rel, Operator.Word),
  614. (un_ops, Operator.Word),
  615. (date_exp, Operator.Word),
  616. # builtin attributes (e.g. #uname)
  617. (r'#[a-z]+(?![\w#$-])', Name.Builtin),
  618. # acl_mod:blah
  619. (r'(%s)(:)("(?:""|[^"])*"|\S+)' % acl_mod,
  620. bygroups(Keyword, Punctuation, Name)),
  621. # rsc_id[:(role|action)]
  622. # NB: this matches all other identifiers
  623. (r'([\w#$-]+)(?:(:)(%s))?(?![\w#$-])' % rsc_role_action,
  624. bygroups(Name, Punctuation, Operator.Word)),
  625. # punctuation
  626. (r'(\\(?=\n)|[[\](){}/:@])', Punctuation),
  627. (r'\s+|\n', Whitespace),
  628. ],
  629. }
  630. class FlatlineLexer(RegexLexer):
  631. """
  632. Lexer for `Flatline <https://github.com/bigmlcom/flatline>`_ expressions.
  633. .. versionadded:: 2.2
  634. """
  635. name = 'Flatline'
  636. aliases = ['flatline']
  637. filenames = []
  638. mimetypes = ['text/x-flatline']
  639. special_forms = ('let',)
  640. builtins = (
  641. "!=", "*", "+", "-", "<", "<=", "=", ">", ">=", "abs", "acos", "all",
  642. "all-but", "all-with-defaults", "all-with-numeric-default", "and",
  643. "asin", "atan", "avg", "avg-window", "bin-center", "bin-count", "call",
  644. "category-count", "ceil", "cond", "cond-window", "cons", "cos", "cosh",
  645. "count", "diff-window", "div", "ensure-value", "ensure-weighted-value",
  646. "epoch", "epoch-day", "epoch-fields", "epoch-hour", "epoch-millisecond",
  647. "epoch-minute", "epoch-month", "epoch-second", "epoch-weekday",
  648. "epoch-year", "exp", "f", "field", "field-prop", "fields", "filter",
  649. "first", "floor", "head", "if", "in", "integer", "language", "length",
  650. "levenshtein", "linear-regression", "list", "ln", "log", "log10", "map",
  651. "matches", "matches?", "max", "maximum", "md5", "mean", "median", "min",
  652. "minimum", "missing", "missing-count", "missing?", "missing_count",
  653. "mod", "mode", "normalize", "not", "nth", "occurrences", "or",
  654. "percentile", "percentile-label", "population", "population-fraction",
  655. "pow", "preferred", "preferred?", "quantile-label", "rand", "rand-int",
  656. "random-value", "re-quote", "real", "replace", "replace-first", "rest",
  657. "round", "row-number", "segment-label", "sha1", "sha256", "sin", "sinh",
  658. "sqrt", "square", "standard-deviation", "standard_deviation", "str",
  659. "subs", "sum", "sum-squares", "sum-window", "sum_squares", "summary",
  660. "summary-no", "summary-str", "tail", "tan", "tanh", "to-degrees",
  661. "to-radians", "variance", "vectorize", "weighted-random-value", "window",
  662. "winnow", "within-percentiles?", "z-score",
  663. )
  664. valid_name = r'(?!#)[\w!$%*+<=>?/.#-]+'
  665. tokens = {
  666. 'root': [
  667. # whitespaces - usually not relevant
  668. (r'[,\s]+', Text),
  669. # numbers
  670. (r'-?\d+\.\d+', Number.Float),
  671. (r'-?\d+', Number.Integer),
  672. (r'0x-?[a-f\d]+', Number.Hex),
  673. # strings, symbols and characters
  674. (r'"(\\\\|\\"|[^"])*"', String),
  675. (r"\\(.|[a-z]+)", String.Char),
  676. # expression template placeholder
  677. (r'_', String.Symbol),
  678. # highlight the special forms
  679. (words(special_forms, suffix=' '), Keyword),
  680. # highlight the builtins
  681. (words(builtins, suffix=' '), Name.Builtin),
  682. # the remaining functions
  683. (r'(?<=\()' + valid_name, Name.Function),
  684. # find the remaining variables
  685. (valid_name, Name.Variable),
  686. # parentheses
  687. (r'(\(|\))', Punctuation),
  688. ],
  689. }
  690. class SnowballLexer(ExtendedRegexLexer):
  691. """
  692. Lexer for `Snowball <http://snowballstem.org/>`_ source code.
  693. .. versionadded:: 2.2
  694. """
  695. name = 'Snowball'
  696. aliases = ['snowball']
  697. filenames = ['*.sbl']
  698. _ws = r'\n\r\t '
  699. def __init__(self, **options):
  700. self._reset_stringescapes()
  701. ExtendedRegexLexer.__init__(self, **options)
  702. def _reset_stringescapes(self):
  703. self._start = "'"
  704. self._end = "'"
  705. def _string(do_string_first):
  706. def callback(lexer, match, ctx):
  707. s = match.start()
  708. text = match.group()
  709. string = re.compile(r'([^%s]*)(.)' % re.escape(lexer._start)).match
  710. escape = re.compile(r'([^%s]*)(.)' % re.escape(lexer._end)).match
  711. pos = 0
  712. do_string = do_string_first
  713. while pos < len(text):
  714. if do_string:
  715. match = string(text, pos)
  716. yield s + match.start(1), String.Single, match.group(1)
  717. if match.group(2) == "'":
  718. yield s + match.start(2), String.Single, match.group(2)
  719. ctx.stack.pop()
  720. break
  721. yield s + match.start(2), String.Escape, match.group(2)
  722. pos = match.end()
  723. match = escape(text, pos)
  724. yield s + match.start(), String.Escape, match.group()
  725. if match.group(2) != lexer._end:
  726. ctx.stack[-1] = 'escape'
  727. break
  728. pos = match.end()
  729. do_string = True
  730. ctx.pos = s + match.end()
  731. return callback
  732. def _stringescapes(lexer, match, ctx):
  733. lexer._start = match.group(3)
  734. lexer._end = match.group(5)
  735. return bygroups(Keyword.Reserved, Text, String.Escape, Text,
  736. String.Escape)(lexer, match, ctx)
  737. tokens = {
  738. 'root': [
  739. (words(('len', 'lenof'), suffix=r'\b'), Operator.Word),
  740. include('root1'),
  741. ],
  742. 'root1': [
  743. (r'[%s]+' % _ws, Text),
  744. (r'\d+', Number.Integer),
  745. (r"'", String.Single, 'string'),
  746. (r'[()]', Punctuation),
  747. (r'/\*[\w\W]*?\*/', Comment.Multiline),
  748. (r'//.*', Comment.Single),
  749. (r'[!*+\-/<=>]=|[-=]>|<[+-]|[$*+\-/<=>?\[\]]', Operator),
  750. (words(('as', 'get', 'hex', 'among', 'define', 'decimal',
  751. 'backwardmode'), suffix=r'\b'),
  752. Keyword.Reserved),
  753. (words(('strings', 'booleans', 'integers', 'routines', 'externals',
  754. 'groupings'), suffix=r'\b'),
  755. Keyword.Reserved, 'declaration'),
  756. (words(('do', 'or', 'and', 'for', 'hop', 'non', 'not', 'set', 'try',
  757. 'fail', 'goto', 'loop', 'next', 'test', 'true',
  758. 'false', 'unset', 'atmark', 'attach', 'delete', 'gopast',
  759. 'insert', 'repeat', 'sizeof', 'tomark', 'atleast',
  760. 'atlimit', 'reverse', 'setmark', 'tolimit', 'setlimit',
  761. 'backwards', 'substring'), suffix=r'\b'),
  762. Operator.Word),
  763. (words(('size', 'limit', 'cursor', 'maxint', 'minint'),
  764. suffix=r'\b'),
  765. Name.Builtin),
  766. (r'(stringdef\b)([%s]*)([^%s]+)' % (_ws, _ws),
  767. bygroups(Keyword.Reserved, Text, String.Escape)),
  768. (r'(stringescapes\b)([%s]*)(.)([%s]*)(.)' % (_ws, _ws),
  769. _stringescapes),
  770. (r'[A-Za-z]\w*', Name),
  771. ],
  772. 'declaration': [
  773. (r'\)', Punctuation, '#pop'),
  774. (words(('len', 'lenof'), suffix=r'\b'), Name,
  775. ('root1', 'declaration')),
  776. include('root1'),
  777. ],
  778. 'string': [
  779. (r"[^']*'", _string(True)),
  780. ],
  781. 'escape': [
  782. (r"[^']*'", _string(False)),
  783. ],
  784. }
  785. def get_tokens_unprocessed(self, text=None, context=None):
  786. self._reset_stringescapes()
  787. return ExtendedRegexLexer.get_tokens_unprocessed(self, text, context)