haskell.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.haskell
  4. ~~~~~~~~~~~~~~~~~~~~~~~
  5. Lexers for Haskell and related languages.
  6. :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
  11. default, include
  12. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  13. Number, Punctuation, Generic
  14. from pygments import unistring as uni
  15. __all__ = ['HaskellLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',
  16. 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer',
  17. 'LiterateCryptolLexer', 'KokaLexer']
  18. line_re = re.compile('.*?\n')
  19. class HaskellLexer(RegexLexer):
  20. """
  21. A Haskell lexer based on the lexemes defined in the Haskell 98 Report.
  22. .. versionadded:: 0.8
  23. """
  24. name = 'Haskell'
  25. aliases = ['haskell', 'hs']
  26. filenames = ['*.hs']
  27. mimetypes = ['text/x-haskell']
  28. flags = re.MULTILINE | re.UNICODE
  29. reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else',
  30. 'family', 'if', 'in', 'infix[lr]?', 'instance',
  31. 'let', 'newtype', 'of', 'then', 'type', 'where', '_')
  32. ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
  33. 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
  34. 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
  35. 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
  36. tokens = {
  37. 'root': [
  38. # Whitespace:
  39. (r'\s+', Text),
  40. # (r'--\s*|.*$', Comment.Doc),
  41. (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
  42. (r'\{-', Comment.Multiline, 'comment'),
  43. # Lexemes:
  44. # Identifiers
  45. (r'\bimport\b', Keyword.Reserved, 'import'),
  46. (r'\bmodule\b', Keyword.Reserved, 'module'),
  47. (r'\berror\b', Name.Exception),
  48. (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
  49. (r"'[^\\]'", String.Char), # this has to come before the TH quote
  50. (r'^[_' + uni.Ll + r'][\w\']*', Name.Function),
  51. (r"'?[_" + uni.Ll + r"][\w']*", Name),
  52. (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type),
  53. (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type),
  54. (r"(')\[[^\]]*\]", Keyword.Type), # tuples and lists get special treatment in GHC
  55. (r"(')\([^)]*\)", Keyword.Type), # ..
  56. # Operators
  57. (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
  58. (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
  59. (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
  60. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
  61. # Numbers
  62. (r'\d+[eE][+-]?\d+', Number.Float),
  63. (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
  64. (r'0[oO][0-7]+', Number.Oct),
  65. (r'0[xX][\da-fA-F]+', Number.Hex),
  66. (r'\d+', Number.Integer),
  67. # Character/String Literals
  68. (r"'", String.Char, 'character'),
  69. (r'"', String, 'string'),
  70. # Special
  71. (r'\[\]', Keyword.Type),
  72. (r'\(\)', Name.Builtin),
  73. (r'[][(),;`{}]', Punctuation),
  74. ],
  75. 'import': [
  76. # Import statements
  77. (r'\s+', Text),
  78. (r'"', String, 'string'),
  79. # after "funclist" state
  80. (r'\)', Punctuation, '#pop'),
  81. (r'qualified\b', Keyword),
  82. # import X as Y
  83. (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',
  84. bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
  85. # import X hiding (functions)
  86. (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',
  87. bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
  88. # import X (functions)
  89. (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
  90. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  91. # import X
  92. (r'[\w.]+', Name.Namespace, '#pop'),
  93. ],
  94. 'module': [
  95. (r'\s+', Text),
  96. (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
  97. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  98. (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),
  99. ],
  100. 'funclist': [
  101. (r'\s+', Text),
  102. (r'[' + uni.Lu + r']\w*', Keyword.Type),
  103. (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),
  104. (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
  105. (r'\{-', Comment.Multiline, 'comment'),
  106. (r',', Punctuation),
  107. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
  108. # (HACK, but it makes sense to push two instances, believe me)
  109. (r'\(', Punctuation, ('funclist', 'funclist')),
  110. (r'\)', Punctuation, '#pop:2'),
  111. ],
  112. # NOTE: the next four states are shared in the AgdaLexer; make sure
  113. # any change is compatible with Agda as well or copy over and change
  114. 'comment': [
  115. # Multiline Comments
  116. (r'[^-{}]+', Comment.Multiline),
  117. (r'\{-', Comment.Multiline, '#push'),
  118. (r'-\}', Comment.Multiline, '#pop'),
  119. (r'[-{}]', Comment.Multiline),
  120. ],
  121. 'character': [
  122. # Allows multi-chars, incorrectly.
  123. (r"[^\\']'", String.Char, '#pop'),
  124. (r"\\", String.Escape, 'escape'),
  125. ("'", String.Char, '#pop'),
  126. ],
  127. 'string': [
  128. (r'[^\\"]+', String),
  129. (r"\\", String.Escape, 'escape'),
  130. ('"', String, '#pop'),
  131. ],
  132. 'escape': [
  133. (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
  134. (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'),
  135. ('|'.join(ascii), String.Escape, '#pop'),
  136. (r'o[0-7]+', String.Escape, '#pop'),
  137. (r'x[\da-fA-F]+', String.Escape, '#pop'),
  138. (r'\d+', String.Escape, '#pop'),
  139. (r'\s+\\', String.Escape, '#pop'),
  140. ],
  141. }
  142. class IdrisLexer(RegexLexer):
  143. """
  144. A lexer for the dependently typed programming language Idris.
  145. Based on the Haskell and Agda Lexer.
  146. .. versionadded:: 2.0
  147. """
  148. name = 'Idris'
  149. aliases = ['idris', 'idr']
  150. filenames = ['*.idr']
  151. mimetypes = ['text/x-idris']
  152. reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else',
  153. 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto',
  154. 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract',
  155. 'total', 'partial',
  156. 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with',
  157. 'pattern', 'term', 'syntax', 'prefix',
  158. 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit',
  159. 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial')
  160. ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
  161. 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
  162. 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
  163. 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
  164. directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access',
  165. 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language')
  166. tokens = {
  167. 'root': [
  168. # Comments
  169. (r'^(\s*)(%%%s)' % '|'.join(directives),
  170. bygroups(Text, Keyword.Reserved)),
  171. (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Text, Comment.Single)),
  172. (r'(\s*)(\|{3}.*?)$', bygroups(Text, Comment.Single)),
  173. (r'(\s*)(\{-)', bygroups(Text, Comment.Multiline), 'comment'),
  174. # Declaration
  175. (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
  176. bygroups(Text, Name.Function, Text, Operator.Word, Text)),
  177. # Identifiers
  178. (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
  179. (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
  180. (r"('')?[A-Z][\w\']*", Keyword.Type),
  181. (r'[a-z][\w\']*', Text),
  182. # Special Symbols
  183. (r'(<-|::|->|=>|=)', Operator.Word), # specials
  184. (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
  185. # Numbers
  186. (r'\d+[eE][+-]?\d+', Number.Float),
  187. (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
  188. (r'0[xX][\da-fA-F]+', Number.Hex),
  189. (r'\d+', Number.Integer),
  190. # Strings
  191. (r"'", String.Char, 'character'),
  192. (r'"', String, 'string'),
  193. (r'[^\s(){}]+', Text),
  194. (r'\s+?', Text), # Whitespace
  195. ],
  196. 'module': [
  197. (r'\s+', Text),
  198. (r'([A-Z][\w.]*)(\s+)(\()',
  199. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  200. (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
  201. ],
  202. 'funclist': [
  203. (r'\s+', Text),
  204. (r'[A-Z]\w*', Keyword.Type),
  205. (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
  206. (r'--.*$', Comment.Single),
  207. (r'\{-', Comment.Multiline, 'comment'),
  208. (r',', Punctuation),
  209. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
  210. # (HACK, but it makes sense to push two instances, believe me)
  211. (r'\(', Punctuation, ('funclist', 'funclist')),
  212. (r'\)', Punctuation, '#pop:2'),
  213. ],
  214. # NOTE: the next four states are shared in the AgdaLexer; make sure
  215. # any change is compatible with Agda as well or copy over and change
  216. 'comment': [
  217. # Multiline Comments
  218. (r'[^-{}]+', Comment.Multiline),
  219. (r'\{-', Comment.Multiline, '#push'),
  220. (r'-\}', Comment.Multiline, '#pop'),
  221. (r'[-{}]', Comment.Multiline),
  222. ],
  223. 'character': [
  224. # Allows multi-chars, incorrectly.
  225. (r"[^\\']", String.Char),
  226. (r"\\", String.Escape, 'escape'),
  227. ("'", String.Char, '#pop'),
  228. ],
  229. 'string': [
  230. (r'[^\\"]+', String),
  231. (r"\\", String.Escape, 'escape'),
  232. ('"', String, '#pop'),
  233. ],
  234. 'escape': [
  235. (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
  236. (r'\^[][A-Z@^_]', String.Escape, '#pop'),
  237. ('|'.join(ascii), String.Escape, '#pop'),
  238. (r'o[0-7]+', String.Escape, '#pop'),
  239. (r'x[\da-fA-F]+', String.Escape, '#pop'),
  240. (r'\d+', String.Escape, '#pop'),
  241. (r'\s+\\', String.Escape, '#pop')
  242. ],
  243. }
  244. class AgdaLexer(RegexLexer):
  245. """
  246. For the `Agda <http://wiki.portal.chalmers.se/agda/pmwiki.php>`_
  247. dependently typed functional programming language and proof assistant.
  248. .. versionadded:: 2.0
  249. """
  250. name = 'Agda'
  251. aliases = ['agda']
  252. filenames = ['*.agda']
  253. mimetypes = ['text/x-agda']
  254. reserved = ['abstract', 'codata', 'coinductive', 'constructor', 'data',
  255. 'field', 'forall', 'hiding', 'in', 'inductive', 'infix',
  256. 'infixl', 'infixr', 'instance', 'let', 'mutual', 'open',
  257. 'pattern', 'postulate', 'primitive', 'private',
  258. 'quote', 'quoteGoal', 'quoteTerm',
  259. 'record', 'renaming', 'rewrite', 'syntax', 'tactic',
  260. 'unquote', 'unquoteDecl', 'using', 'where', 'with']
  261. tokens = {
  262. 'root': [
  263. # Declaration
  264. (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
  265. bygroups(Text, Name.Function, Text, Operator.Word, Text)),
  266. # Comments
  267. (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
  268. (r'\{-', Comment.Multiline, 'comment'),
  269. # Holes
  270. (r'\{!', Comment.Directive, 'hole'),
  271. # Lexemes:
  272. # Identifiers
  273. (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
  274. (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
  275. (r'\b(Set|Prop)\b', Keyword.Type),
  276. # Special Symbols
  277. (r'(\(|\)|\{|\})', Operator),
  278. (u'(\\.{1,3}|\\||\u039B|\u2200|\u2192|:|=|->)', Operator.Word),
  279. # Numbers
  280. (r'\d+[eE][+-]?\d+', Number.Float),
  281. (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
  282. (r'0[xX][\da-fA-F]+', Number.Hex),
  283. (r'\d+', Number.Integer),
  284. # Strings
  285. (r"'", String.Char, 'character'),
  286. (r'"', String, 'string'),
  287. (r'[^\s(){}]+', Text),
  288. (r'\s+?', Text), # Whitespace
  289. ],
  290. 'hole': [
  291. # Holes
  292. (r'[^!{}]+', Comment.Directive),
  293. (r'\{!', Comment.Directive, '#push'),
  294. (r'!\}', Comment.Directive, '#pop'),
  295. (r'[!{}]', Comment.Directive),
  296. ],
  297. 'module': [
  298. (r'\{-', Comment.Multiline, 'comment'),
  299. (r'[a-zA-Z][\w.]*', Name, '#pop'),
  300. (r'[\W0-9_]+', Text)
  301. ],
  302. 'comment': HaskellLexer.tokens['comment'],
  303. 'character': HaskellLexer.tokens['character'],
  304. 'string': HaskellLexer.tokens['string'],
  305. 'escape': HaskellLexer.tokens['escape']
  306. }
  307. class CryptolLexer(RegexLexer):
  308. """
  309. FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report.
  310. .. versionadded:: 2.0
  311. """
  312. name = 'Cryptol'
  313. aliases = ['cryptol', 'cry']
  314. filenames = ['*.cry']
  315. mimetypes = ['text/x-cryptol']
  316. reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else',
  317. 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2',
  318. 'max', 'min', 'module', 'newtype', 'pragma', 'property',
  319. 'then', 'type', 'where', 'width')
  320. ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
  321. 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
  322. 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
  323. 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
  324. tokens = {
  325. 'root': [
  326. # Whitespace:
  327. (r'\s+', Text),
  328. # (r'--\s*|.*$', Comment.Doc),
  329. (r'//.*$', Comment.Single),
  330. (r'/\*', Comment.Multiline, 'comment'),
  331. # Lexemes:
  332. # Identifiers
  333. (r'\bimport\b', Keyword.Reserved, 'import'),
  334. (r'\bmodule\b', Keyword.Reserved, 'module'),
  335. (r'\berror\b', Name.Exception),
  336. (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
  337. (r'^[_a-z][\w\']*', Name.Function),
  338. (r"'?[_a-z][\w']*", Name),
  339. (r"('')?[A-Z][\w\']*", Keyword.Type),
  340. # Operators
  341. (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
  342. (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
  343. (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
  344. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
  345. # Numbers
  346. (r'\d+[eE][+-]?\d+', Number.Float),
  347. (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
  348. (r'0[oO][0-7]+', Number.Oct),
  349. (r'0[xX][\da-fA-F]+', Number.Hex),
  350. (r'\d+', Number.Integer),
  351. # Character/String Literals
  352. (r"'", String.Char, 'character'),
  353. (r'"', String, 'string'),
  354. # Special
  355. (r'\[\]', Keyword.Type),
  356. (r'\(\)', Name.Builtin),
  357. (r'[][(),;`{}]', Punctuation),
  358. ],
  359. 'import': [
  360. # Import statements
  361. (r'\s+', Text),
  362. (r'"', String, 'string'),
  363. # after "funclist" state
  364. (r'\)', Punctuation, '#pop'),
  365. (r'qualified\b', Keyword),
  366. # import X as Y
  367. (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',
  368. bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
  369. # import X hiding (functions)
  370. (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',
  371. bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
  372. # import X (functions)
  373. (r'([A-Z][\w.]*)(\s+)(\()',
  374. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  375. # import X
  376. (r'[\w.]+', Name.Namespace, '#pop'),
  377. ],
  378. 'module': [
  379. (r'\s+', Text),
  380. (r'([A-Z][\w.]*)(\s+)(\()',
  381. bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
  382. (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
  383. ],
  384. 'funclist': [
  385. (r'\s+', Text),
  386. (r'[A-Z]\w*', Keyword.Type),
  387. (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
  388. # TODO: these don't match the comments in docs, remove.
  389. #(r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
  390. #(r'{-', Comment.Multiline, 'comment'),
  391. (r',', Punctuation),
  392. (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
  393. # (HACK, but it makes sense to push two instances, believe me)
  394. (r'\(', Punctuation, ('funclist', 'funclist')),
  395. (r'\)', Punctuation, '#pop:2'),
  396. ],
  397. 'comment': [
  398. # Multiline Comments
  399. (r'[^/*]+', Comment.Multiline),
  400. (r'/\*', Comment.Multiline, '#push'),
  401. (r'\*/', Comment.Multiline, '#pop'),
  402. (r'[*/]', Comment.Multiline),
  403. ],
  404. 'character': [
  405. # Allows multi-chars, incorrectly.
  406. (r"[^\\']'", String.Char, '#pop'),
  407. (r"\\", String.Escape, 'escape'),
  408. ("'", String.Char, '#pop'),
  409. ],
  410. 'string': [
  411. (r'[^\\"]+', String),
  412. (r"\\", String.Escape, 'escape'),
  413. ('"', String, '#pop'),
  414. ],
  415. 'escape': [
  416. (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
  417. (r'\^[][A-Z@^_]', String.Escape, '#pop'),
  418. ('|'.join(ascii), String.Escape, '#pop'),
  419. (r'o[0-7]+', String.Escape, '#pop'),
  420. (r'x[\da-fA-F]+', String.Escape, '#pop'),
  421. (r'\d+', String.Escape, '#pop'),
  422. (r'\s+\\', String.Escape, '#pop'),
  423. ],
  424. }
  425. EXTRA_KEYWORDS = set(('join', 'split', 'reverse', 'transpose', 'width',
  426. 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const',
  427. 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error',
  428. 'trace'))
  429. def get_tokens_unprocessed(self, text):
  430. stack = ['root']
  431. for index, token, value in \
  432. RegexLexer.get_tokens_unprocessed(self, text, stack):
  433. if token is Name and value in self.EXTRA_KEYWORDS:
  434. yield index, Name.Builtin, value
  435. else:
  436. yield index, token, value
  437. class LiterateLexer(Lexer):
  438. """
  439. Base class for lexers of literate file formats based on LaTeX or Bird-style
  440. (prefixing each code line with ">").
  441. Additional options accepted:
  442. `litstyle`
  443. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  444. is autodetected: if the first non-whitespace character in the source
  445. is a backslash or percent character, LaTeX is assumed, else Bird.
  446. """
  447. bird_re = re.compile(r'(>[ \t]*)(.*\n)')
  448. def __init__(self, baselexer, **options):
  449. self.baselexer = baselexer
  450. Lexer.__init__(self, **options)
  451. def get_tokens_unprocessed(self, text):
  452. style = self.options.get('litstyle')
  453. if style is None:
  454. style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
  455. code = ''
  456. insertions = []
  457. if style == 'bird':
  458. # bird-style
  459. for match in line_re.finditer(text):
  460. line = match.group()
  461. m = self.bird_re.match(line)
  462. if m:
  463. insertions.append((len(code),
  464. [(0, Comment.Special, m.group(1))]))
  465. code += m.group(2)
  466. else:
  467. insertions.append((len(code), [(0, Text, line)]))
  468. else:
  469. # latex-style
  470. from pygments.lexers.markup import TexLexer
  471. lxlexer = TexLexer(**self.options)
  472. codelines = 0
  473. latex = ''
  474. for match in line_re.finditer(text):
  475. line = match.group()
  476. if codelines:
  477. if line.lstrip().startswith('\\end{code}'):
  478. codelines = 0
  479. latex += line
  480. else:
  481. code += line
  482. elif line.lstrip().startswith('\\begin{code}'):
  483. codelines = 1
  484. latex += line
  485. insertions.append((len(code),
  486. list(lxlexer.get_tokens_unprocessed(latex))))
  487. latex = ''
  488. else:
  489. latex += line
  490. insertions.append((len(code),
  491. list(lxlexer.get_tokens_unprocessed(latex))))
  492. for item in do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code)):
  493. yield item
  494. class LiterateHaskellLexer(LiterateLexer):
  495. """
  496. For Literate Haskell (Bird-style or LaTeX) source.
  497. Additional options accepted:
  498. `litstyle`
  499. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  500. is autodetected: if the first non-whitespace character in the source
  501. is a backslash or percent character, LaTeX is assumed, else Bird.
  502. .. versionadded:: 0.9
  503. """
  504. name = 'Literate Haskell'
  505. aliases = ['lhs', 'literate-haskell', 'lhaskell']
  506. filenames = ['*.lhs']
  507. mimetypes = ['text/x-literate-haskell']
  508. def __init__(self, **options):
  509. hslexer = HaskellLexer(**options)
  510. LiterateLexer.__init__(self, hslexer, **options)
  511. class LiterateIdrisLexer(LiterateLexer):
  512. """
  513. For Literate Idris (Bird-style or LaTeX) source.
  514. Additional options accepted:
  515. `litstyle`
  516. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  517. is autodetected: if the first non-whitespace character in the source
  518. is a backslash or percent character, LaTeX is assumed, else Bird.
  519. .. versionadded:: 2.0
  520. """
  521. name = 'Literate Idris'
  522. aliases = ['lidr', 'literate-idris', 'lidris']
  523. filenames = ['*.lidr']
  524. mimetypes = ['text/x-literate-idris']
  525. def __init__(self, **options):
  526. hslexer = IdrisLexer(**options)
  527. LiterateLexer.__init__(self, hslexer, **options)
  528. class LiterateAgdaLexer(LiterateLexer):
  529. """
  530. For Literate Agda source.
  531. Additional options accepted:
  532. `litstyle`
  533. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  534. is autodetected: if the first non-whitespace character in the source
  535. is a backslash or percent character, LaTeX is assumed, else Bird.
  536. .. versionadded:: 2.0
  537. """
  538. name = 'Literate Agda'
  539. aliases = ['lagda', 'literate-agda']
  540. filenames = ['*.lagda']
  541. mimetypes = ['text/x-literate-agda']
  542. def __init__(self, **options):
  543. agdalexer = AgdaLexer(**options)
  544. LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options)
  545. class LiterateCryptolLexer(LiterateLexer):
  546. """
  547. For Literate Cryptol (Bird-style or LaTeX) source.
  548. Additional options accepted:
  549. `litstyle`
  550. If given, must be ``"bird"`` or ``"latex"``. If not given, the style
  551. is autodetected: if the first non-whitespace character in the source
  552. is a backslash or percent character, LaTeX is assumed, else Bird.
  553. .. versionadded:: 2.0
  554. """
  555. name = 'Literate Cryptol'
  556. aliases = ['lcry', 'literate-cryptol', 'lcryptol']
  557. filenames = ['*.lcry']
  558. mimetypes = ['text/x-literate-cryptol']
  559. def __init__(self, **options):
  560. crylexer = CryptolLexer(**options)
  561. LiterateLexer.__init__(self, crylexer, **options)
  562. class KokaLexer(RegexLexer):
  563. """
  564. Lexer for the `Koka <http://koka.codeplex.com>`_
  565. language.
  566. .. versionadded:: 1.6
  567. """
  568. name = 'Koka'
  569. aliases = ['koka']
  570. filenames = ['*.kk', '*.kki']
  571. mimetypes = ['text/x-koka']
  572. keywords = [
  573. 'infix', 'infixr', 'infixl',
  574. 'type', 'cotype', 'rectype', 'alias',
  575. 'struct', 'con',
  576. 'fun', 'function', 'val', 'var',
  577. 'external',
  578. 'if', 'then', 'else', 'elif', 'return', 'match',
  579. 'private', 'public', 'private',
  580. 'module', 'import', 'as',
  581. 'include', 'inline',
  582. 'rec',
  583. 'try', 'yield', 'enum',
  584. 'interface', 'instance',
  585. ]
  586. # keywords that are followed by a type
  587. typeStartKeywords = [
  588. 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum',
  589. ]
  590. # keywords valid in a type
  591. typekeywords = [
  592. 'forall', 'exists', 'some', 'with',
  593. ]
  594. # builtin names and special names
  595. builtin = [
  596. 'for', 'while', 'repeat',
  597. 'foreach', 'foreach-indexed',
  598. 'error', 'catch', 'finally',
  599. 'cs', 'js', 'file', 'ref', 'assigned',
  600. ]
  601. # symbols that can be in an operator
  602. symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+'
  603. # symbol boundary: an operator keyword should not be followed by any of these
  604. sboundary = '(?!'+symbols+')'
  605. # name boundary: a keyword should not be followed by any of these
  606. boundary = '(?![\w/])'
  607. # koka token abstractions
  608. tokenType = Name.Attribute
  609. tokenTypeDef = Name.Class
  610. tokenConstructor = Generic.Emph
  611. # main lexer
  612. tokens = {
  613. 'root': [
  614. include('whitespace'),
  615. # go into type mode
  616. (r'::?' + sboundary, tokenType, 'type'),
  617. (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
  618. 'alias-type'),
  619. (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
  620. 'struct-type'),
  621. ((r'(%s)' % '|'.join(typeStartKeywords)) +
  622. r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
  623. 'type'),
  624. # special sequences of tokens (we use ?: for non-capturing group as
  625. # required by 'bygroups')
  626. (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
  627. bygroups(Keyword, Text, Keyword, Name.Namespace)),
  628. (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'
  629. r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)'
  630. r'((?:[a-z]\w*/)*[a-z]\w*))?',
  631. bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text,
  632. Keyword, Name.Namespace)),
  633. (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))'
  634. r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))',
  635. bygroups(Keyword, Text, Name.Function)),
  636. (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?'
  637. r'([a-z]\w*|\((?:' + symbols + r'|/)\))',
  638. bygroups(Keyword, Text, Keyword, Name.Function)),
  639. # keywords
  640. (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type),
  641. (r'(%s)' % '|'.join(keywords) + boundary, Keyword),
  642. (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo),
  643. (r'::?|:=|\->|[=.]' + sboundary, Keyword),
  644. # names
  645. (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
  646. bygroups(Name.Namespace, tokenConstructor)),
  647. (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)),
  648. (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))',
  649. bygroups(Name.Namespace, Name)),
  650. (r'_\w*', Name.Variable),
  651. # literal string
  652. (r'@"', String.Double, 'litstring'),
  653. # operators
  654. (symbols + "|/(?![*/])", Operator),
  655. (r'`', Operator),
  656. (r'[{}()\[\];,]', Punctuation),
  657. # literals. No check for literal characters with len > 1
  658. (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float),
  659. (r'0[xX][0-9a-fA-F]+', Number.Hex),
  660. (r'[0-9]+', Number.Integer),
  661. (r"'", String.Char, 'char'),
  662. (r'"', String.Double, 'string'),
  663. ],
  664. # type started by alias
  665. 'alias-type': [
  666. (r'=', Keyword),
  667. include('type')
  668. ],
  669. # type started by struct
  670. 'struct-type': [
  671. (r'(?=\((?!,*\)))', Punctuation, '#pop'),
  672. include('type')
  673. ],
  674. # type started by colon
  675. 'type': [
  676. (r'[(\[<]', tokenType, 'type-nested'),
  677. include('type-content')
  678. ],
  679. # type nested in brackets: can contain parameters, comma etc.
  680. 'type-nested': [
  681. (r'[)\]>]', tokenType, '#pop'),
  682. (r'[(\[<]', tokenType, 'type-nested'),
  683. (r',', tokenType),
  684. (r'([a-z]\w*)(\s*)(:)(?!:)',
  685. bygroups(Name, Text, tokenType)), # parameter name
  686. include('type-content')
  687. ],
  688. # shared contents of a type
  689. 'type-content': [
  690. include('whitespace'),
  691. # keywords
  692. (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword),
  693. (r'(?=((%s)' % '|'.join(keywords) + boundary + '))',
  694. Keyword, '#pop'), # need to match because names overlap...
  695. # kinds
  696. (r'[EPHVX]' + boundary, tokenType),
  697. # type names
  698. (r'[a-z][0-9]*(?![\w/])', tokenType),
  699. (r'_\w*', tokenType.Variable), # Generic.Emph
  700. (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
  701. bygroups(Name.Namespace, tokenType)),
  702. (r'((?:[a-z]\w*/)*)([a-z]\w+)',
  703. bygroups(Name.Namespace, tokenType)),
  704. # type keyword operators
  705. (r'::|->|[.:|]', tokenType),
  706. # catchall
  707. default('#pop')
  708. ],
  709. # comments and literals
  710. 'whitespace': [
  711. (r'\n\s*#.*$', Comment.Preproc),
  712. (r'\s+', Text),
  713. (r'/\*', Comment.Multiline, 'comment'),
  714. (r'//.*$', Comment.Single)
  715. ],
  716. 'comment': [
  717. (r'[^/*]+', Comment.Multiline),
  718. (r'/\*', Comment.Multiline, '#push'),
  719. (r'\*/', Comment.Multiline, '#pop'),
  720. (r'[*/]', Comment.Multiline),
  721. ],
  722. 'litstring': [
  723. (r'[^"]+', String.Double),
  724. (r'""', String.Escape),
  725. (r'"', String.Double, '#pop'),
  726. ],
  727. 'string': [
  728. (r'[^\\"\n]+', String.Double),
  729. include('escape-sequence'),
  730. (r'["\n]', String.Double, '#pop'),
  731. ],
  732. 'char': [
  733. (r'[^\\\'\n]+', String.Char),
  734. include('escape-sequence'),
  735. (r'[\'\n]', String.Char, '#pop'),
  736. ],
  737. 'escape-sequence': [
  738. (r'\\[nrt\\"\']', String.Escape),
  739. (r'\\x[0-9a-fA-F]{2}', String.Escape),
  740. (r'\\u[0-9a-fA-F]{4}', String.Escape),
  741. # Yes, \U literals are 6 hex digits.
  742. (r'\\U[0-9a-fA-F]{6}', String.Escape)
  743. ]
  744. }