clean.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.clean
  4. ~~~~~~~~~~~~~~~~~~~~~
  5. Lexer for the Clean language.
  6. :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. from pygments.lexer import ExtendedRegexLexer, LexerContext, \
  10. bygroups, words, include, default
  11. from pygments.token import Comment, Keyword, Literal, Name, Number, Operator, \
  12. Punctuation, String, Text, Whitespace
  13. __all__ = ['CleanLexer']
  14. class CleanLexer(ExtendedRegexLexer):
  15. """
  16. Lexer for the general purpose, state-of-the-art, pure and lazy functional
  17. programming language Clean (http://clean.cs.ru.nl/Clean).
  18. .. versionadded: 2.2
  19. """
  20. name = 'Clean'
  21. aliases = ['clean']
  22. filenames = ['*.icl', '*.dcl']
  23. def get_tokens_unprocessed(self, text=None, context=None):
  24. ctx = LexerContext(text, 0)
  25. ctx.indent = 0
  26. return ExtendedRegexLexer.get_tokens_unprocessed(self, text, context=ctx)
  27. def check_class_not_import(lexer, match, ctx):
  28. if match.group(0) == 'import':
  29. yield match.start(), Keyword.Namespace, match.group(0)
  30. ctx.stack = ctx.stack[:-1] + ['fromimportfunc']
  31. else:
  32. yield match.start(), Name.Class, match.group(0)
  33. ctx.pos = match.end()
  34. def check_instance_class(lexer, match, ctx):
  35. if match.group(0) == 'instance' or match.group(0) == 'class':
  36. yield match.start(), Keyword, match.group(0)
  37. else:
  38. yield match.start(), Name.Function, match.group(0)
  39. ctx.stack = ctx.stack + ['fromimportfunctype']
  40. ctx.pos = match.end()
  41. @staticmethod
  42. def indent_len(text):
  43. # Tabs are four spaces:
  44. # https://svn.cs.ru.nl/repos/clean-platform/trunk/doc/STANDARDS.txt
  45. text = text.replace('\n', '')
  46. return len(text.replace('\t', ' ')), len(text)
  47. def store_indent(lexer, match, ctx):
  48. ctx.indent, _ = CleanLexer.indent_len(match.group(0))
  49. ctx.pos = match.end()
  50. yield match.start(), Text, match.group(0)
  51. def check_indent1(lexer, match, ctx):
  52. indent, reallen = CleanLexer.indent_len(match.group(0))
  53. if indent > ctx.indent:
  54. yield match.start(), Whitespace, match.group(0)
  55. ctx.pos = match.start() + reallen + 1
  56. else:
  57. ctx.indent = 0
  58. ctx.pos = match.start()
  59. ctx.stack = ctx.stack[:-1]
  60. yield match.start(), Whitespace, match.group(0)[1:]
  61. def check_indent2(lexer, match, ctx):
  62. indent, reallen = CleanLexer.indent_len(match.group(0))
  63. if indent > ctx.indent:
  64. yield match.start(), Whitespace, match.group(0)
  65. ctx.pos = match.start() + reallen + 1
  66. else:
  67. ctx.indent = 0
  68. ctx.pos = match.start()
  69. ctx.stack = ctx.stack[:-2]
  70. def check_indent3(lexer, match, ctx):
  71. indent, reallen = CleanLexer.indent_len(match.group(0))
  72. if indent > ctx.indent:
  73. yield match.start(), Whitespace, match.group(0)
  74. ctx.pos = match.start() + reallen + 1
  75. else:
  76. ctx.indent = 0
  77. ctx.pos = match.start()
  78. ctx.stack = ctx.stack[:-3]
  79. yield match.start(), Whitespace, match.group(0)[1:]
  80. if match.group(0) == '\n\n':
  81. ctx.pos = ctx.pos + 1
  82. def skip(lexer, match, ctx):
  83. ctx.stack = ctx.stack[:-1]
  84. ctx.pos = match.end()
  85. yield match.start(), Comment, match.group(0)
  86. keywords = ('class', 'instance', 'where', 'with', 'let', 'let!',
  87. 'in', 'case', 'of', 'infix', 'infixr', 'infixl', 'generic',
  88. 'derive', 'otherwise', 'code', 'inline')
  89. tokens = {
  90. 'common': [
  91. (r';', Punctuation, '#pop'),
  92. (r'//', Comment, 'singlecomment'),
  93. ],
  94. 'root': [
  95. # Comments
  96. (r'//.*\n', Comment.Single),
  97. (r'(?s)/\*\*.*?\*/', Comment.Special),
  98. (r'(?s)/\*.*?\*/', Comment.Multi),
  99. # Modules, imports, etc.
  100. (r'\b((?:implementation|definition|system)\s+)?(module)(\s+)([\w`.]+)',
  101. bygroups(Keyword.Namespace, Keyword.Namespace, Text, Name.Class)),
  102. (r'(?<=\n)import(?=\s)', Keyword.Namespace, 'import'),
  103. (r'(?<=\n)from(?=\s)', Keyword.Namespace, 'fromimport'),
  104. # Keywords
  105. # We cannot use (?s)^|(?<=\s) as prefix, so need to repeat this
  106. (words(keywords, prefix=r'(?<=\s)', suffix=r'(?=\s)'), Keyword),
  107. (words(keywords, prefix=r'^', suffix=r'(?=\s)'), Keyword),
  108. # Function definitions
  109. (r'(?=\{\|)', Whitespace, 'genericfunction'),
  110. (r'(?<=\n)([ \t]*)([\w`$()=\-<>~*\^|+&%]+)((?:\s+\w)*)(\s*)(::)',
  111. bygroups(store_indent, Name.Function, Keyword.Type, Whitespace,
  112. Punctuation),
  113. 'functiondefargs'),
  114. # Type definitions
  115. (r'(?<=\n)([ \t]*)(::)', bygroups(store_indent, Punctuation), 'typedef'),
  116. (r'^([ \t]*)(::)', bygroups(store_indent, Punctuation), 'typedef'),
  117. # Literals
  118. (r'\'\\?.(?<!\\)\'', String.Char),
  119. (r'\'\\\d+\'', String.Char),
  120. (r'\'\\\\\'', String.Char), # (special case for '\\')
  121. (r'[+\-~]?\s*\d+\.\d+(E[+\-~]?\d+)?\b', Number.Float),
  122. (r'[+\-~]?\s*0[0-7]\b', Number.Oct),
  123. (r'[+\-~]?\s*0x[0-9a-fA-F]\b', Number.Hex),
  124. (r'[+\-~]?\s*\d+\b', Number.Integer),
  125. (r'"', String.Double, 'doubleqstring'),
  126. (words(('True', 'False'), prefix=r'(?<=\s)', suffix=r'(?=\s)'),
  127. Literal),
  128. # Qualified names
  129. (r'(\')([\w.]+)(\'\.)',
  130. bygroups(Punctuation, Name.Namespace, Punctuation)),
  131. # Everything else is some name
  132. (r'([\w`$%/?@]+\.?)*[\w`$%/?@]+', Name),
  133. # Punctuation
  134. (r'[{}()\[\],:;.#]', Punctuation),
  135. (r'[+\-=!<>|&~*\^/]', Operator),
  136. (r'\\\\', Operator),
  137. # Lambda expressions
  138. (r'\\.*?(->|\.|=)', Name.Function),
  139. # Whitespace
  140. (r'\s', Whitespace),
  141. include('common'),
  142. ],
  143. 'fromimport': [
  144. include('common'),
  145. (r'([\w`.]+)', check_class_not_import),
  146. (r'\n', Whitespace, '#pop'),
  147. (r'\s', Whitespace),
  148. ],
  149. 'fromimportfunc': [
  150. include('common'),
  151. (r'(::)(\s+)([^,\s]+)', bygroups(Punctuation, Text, Keyword.Type)),
  152. (r'([\w`$()=\-<>~*\^|+&%/]+)', check_instance_class),
  153. (r',', Punctuation),
  154. (r'\n', Whitespace, '#pop'),
  155. (r'\s', Whitespace),
  156. ],
  157. 'fromimportfunctype': [
  158. include('common'),
  159. (r'[{(\[]', Punctuation, 'combtype'),
  160. (r',', Punctuation, '#pop'),
  161. (r'[:;.#]', Punctuation),
  162. (r'\n', Whitespace, '#pop:2'),
  163. (r'[^\S\n]+', Whitespace),
  164. (r'\S+', Keyword.Type),
  165. ],
  166. 'combtype': [
  167. include('common'),
  168. (r'[})\]]', Punctuation, '#pop'),
  169. (r'[{(\[]', Punctuation, '#pop'),
  170. (r'[,:;.#]', Punctuation),
  171. (r'\s+', Whitespace),
  172. (r'\S+', Keyword.Type),
  173. ],
  174. 'import': [
  175. include('common'),
  176. (words(('from', 'import', 'as', 'qualified'),
  177. prefix='(?<=\s)', suffix='(?=\s)'), Keyword.Namespace),
  178. (r'[\w`.]+', Name.Class),
  179. (r'\n', Whitespace, '#pop'),
  180. (r',', Punctuation),
  181. (r'[^\S\n]+', Whitespace),
  182. ],
  183. 'singlecomment': [
  184. (r'(.)(?=\n)', skip),
  185. (r'.+(?!\n)', Comment),
  186. ],
  187. 'doubleqstring': [
  188. (r'[^\\"]+', String.Double),
  189. (r'"', String.Double, '#pop'),
  190. (r'\\.', String.Double),
  191. ],
  192. 'typedef': [
  193. include('common'),
  194. (r'[\w`]+', Keyword.Type),
  195. (r'[:=|(),\[\]{}!*]', Punctuation),
  196. (r'->', Punctuation),
  197. (r'\n(?=[^\s|])', Whitespace, '#pop'),
  198. (r'\s', Whitespace),
  199. (r'.', Keyword.Type),
  200. ],
  201. 'genericfunction': [
  202. include('common'),
  203. (r'\{\|', Punctuation),
  204. (r'\|\}', Punctuation, '#pop'),
  205. (r',', Punctuation),
  206. (r'->', Punctuation),
  207. (r'(\s+of\s+)(\{)', bygroups(Keyword, Punctuation), 'genericftypes'),
  208. (r'\s', Whitespace),
  209. (r'[\w`\[\]{}!]+', Keyword.Type),
  210. (r'[*()]', Punctuation),
  211. ],
  212. 'genericftypes': [
  213. include('common'),
  214. (r'[\w`]+', Keyword.Type),
  215. (r',', Punctuation),
  216. (r'\s', Whitespace),
  217. (r'\}', Punctuation, '#pop'),
  218. ],
  219. 'functiondefargs': [
  220. include('common'),
  221. (r'\n(\s*)', check_indent1),
  222. (r'[!{}()\[\],:;.#]', Punctuation),
  223. (r'->', Punctuation, 'functiondefres'),
  224. (r'^(?=\S)', Whitespace, '#pop'),
  225. (r'\S', Keyword.Type),
  226. (r'\s', Whitespace),
  227. ],
  228. 'functiondefres': [
  229. include('common'),
  230. (r'\n(\s*)', check_indent2),
  231. (r'^(?=\S)', Whitespace, '#pop:2'),
  232. (r'[!{}()\[\],:;.#]', Punctuation),
  233. (r'\|', Punctuation, 'functiondefclasses'),
  234. (r'\S', Keyword.Type),
  235. (r'\s', Whitespace),
  236. ],
  237. 'functiondefclasses': [
  238. include('common'),
  239. (r'\n(\s*)', check_indent3),
  240. (r'^(?=\S)', Whitespace, '#pop:3'),
  241. (r'[,&]', Punctuation),
  242. (r'\[', Punctuation, 'functiondefuniquneq'),
  243. (r'[\w`$()=\-<>~*\^|+&%/{}\[\]@]', Name.Function, 'functionname'),
  244. (r'\s+', Whitespace),
  245. ],
  246. 'functiondefuniquneq': [
  247. include('common'),
  248. (r'[a-z]+', Keyword.Type),
  249. (r'\s+', Whitespace),
  250. (r'<=|,', Punctuation),
  251. (r'\]', Punctuation, '#pop')
  252. ],
  253. 'functionname': [
  254. include('common'),
  255. (r'[\w`$()=\-<>~*\^|+&%/]+', Name.Function),
  256. (r'(?=\{\|)', Punctuation, 'genericfunction'),
  257. default('#pop'),
  258. ]
  259. }