lexer.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. """
  2. `GrammarLexer` is compatible with Pygments lexers and can be used to highlight
  3. the input using a regular grammar with token annotations.
  4. """
  5. from __future__ import unicode_literals
  6. from prompt_toolkit.document import Document
  7. from prompt_toolkit.layout.lexers import Lexer
  8. from prompt_toolkit.layout.utils import split_lines
  9. from prompt_toolkit.token import Token
  10. from .compiler import _CompiledGrammar
  11. from six.moves import range
  12. __all__ = (
  13. 'GrammarLexer',
  14. )
  15. class GrammarLexer(Lexer):
  16. """
  17. Lexer which can be used for highlighting of tokens according to variables in the grammar.
  18. (It does not actual lexing of the string, but it exposes an API, compatible
  19. with the Pygments lexer class.)
  20. :param compiled_grammar: Grammar as returned by the `compile()` function.
  21. :param lexers: Dictionary mapping variable names of the regular grammar to
  22. the lexers that should be used for this part. (This can
  23. call other lexers recursively.) If you wish a part of the
  24. grammar to just get one token, use a
  25. `prompt_toolkit.layout.lexers.SimpleLexer`.
  26. """
  27. def __init__(self, compiled_grammar, default_token=None, lexers=None):
  28. assert isinstance(compiled_grammar, _CompiledGrammar)
  29. assert default_token is None or isinstance(default_token, tuple)
  30. assert lexers is None or all(isinstance(v, Lexer) for k, v in lexers.items())
  31. assert lexers is None or isinstance(lexers, dict)
  32. self.compiled_grammar = compiled_grammar
  33. self.default_token = default_token or Token
  34. self.lexers = lexers or {}
  35. def _get_tokens(self, cli, text):
  36. m = self.compiled_grammar.match_prefix(text)
  37. if m:
  38. characters = [[self.default_token, c] for c in text]
  39. for v in m.variables():
  40. # If we have a `Lexer` instance for this part of the input.
  41. # Tokenize recursively and apply tokens.
  42. lexer = self.lexers.get(v.varname)
  43. if lexer:
  44. document = Document(text[v.start:v.stop])
  45. lexer_tokens_for_line = lexer.lex_document(cli, document)
  46. lexer_tokens = []
  47. for i in range(len(document.lines)):
  48. lexer_tokens.extend(lexer_tokens_for_line(i))
  49. lexer_tokens.append((Token, '\n'))
  50. if lexer_tokens:
  51. lexer_tokens.pop()
  52. i = v.start
  53. for t, s in lexer_tokens:
  54. for c in s:
  55. if characters[i][0] == self.default_token:
  56. characters[i][0] = t
  57. i += 1
  58. # Highlight trailing input.
  59. trailing_input = m.trailing_input()
  60. if trailing_input:
  61. for i in range(trailing_input.start, trailing_input.stop):
  62. characters[i][0] = Token.TrailingInput
  63. return characters
  64. else:
  65. return [(Token, text)]
  66. def lex_document(self, cli, document):
  67. lines = list(split_lines(self._get_tokens(cli, document.text)))
  68. def get_line(lineno):
  69. try:
  70. return lines[lineno]
  71. except IndexError:
  72. return []
  73. return get_line