parser.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. from __future__ import print_function, absolute_import, division, generators, nested_scopes
  2. import sys
  3. import os.path
  4. import logging
  5. import ply.yacc
  6. from jsonpath_rw.jsonpath import *
  7. from jsonpath_rw.lexer import JsonPathLexer
  8. logger = logging.getLogger(__name__)
  9. def parse(string):
  10. return JsonPathParser().parse(string)
  11. class JsonPathParser(object):
  12. '''
  13. An LALR-parser for JsonPath
  14. '''
  15. tokens = JsonPathLexer.tokens
  16. def __init__(self, debug=False, lexer_class=None):
  17. if self.__doc__ == None:
  18. raise Exception('Docstrings have been removed! By design of PLY, jsonpath-rw requires docstrings. You must not use PYTHONOPTIMIZE=2 or python -OO.')
  19. self.debug = debug
  20. self.lexer_class = lexer_class or JsonPathLexer # Crufty but works around statefulness in PLY
  21. def parse(self, string, lexer = None):
  22. lexer = lexer or self.lexer_class()
  23. return self.parse_token_stream(lexer.tokenize(string))
  24. def parse_token_stream(self, token_iterator, start_symbol='jsonpath'):
  25. # Since PLY has some crufty aspects and dumps files, we try to keep them local
  26. # However, we need to derive the name of the output Python file :-/
  27. output_directory = os.path.dirname(__file__)
  28. try:
  29. module_name = os.path.splitext(os.path.split(__file__)[1])[0]
  30. except:
  31. module_name = __name__
  32. parsing_table_module = '_'.join([module_name, start_symbol, 'parsetab'])
  33. # And we regenerate the parse table every time; it doesn't actually take that long!
  34. new_parser = ply.yacc.yacc(module=self,
  35. debug=self.debug,
  36. tabmodule = parsing_table_module,
  37. outputdir = output_directory,
  38. write_tables=0,
  39. start = start_symbol,
  40. errorlog = logger)
  41. return new_parser.parse(lexer = IteratorToTokenStream(token_iterator))
  42. # ===================== PLY Parser specification =====================
  43. precedence = [
  44. ('left', ','),
  45. ('left', 'DOUBLEDOT'),
  46. ('left', '.'),
  47. ('left', '|'),
  48. ('left', '&'),
  49. ('left', 'WHERE'),
  50. ]
  51. def p_error(self, t):
  52. raise Exception('Parse error at %s:%s near token %s (%s)' % (t.lineno, t.col, t.value, t.type))
  53. def p_jsonpath_binop(self, p):
  54. """jsonpath : jsonpath '.' jsonpath
  55. | jsonpath DOUBLEDOT jsonpath
  56. | jsonpath WHERE jsonpath
  57. | jsonpath '|' jsonpath
  58. | jsonpath '&' jsonpath"""
  59. op = p[2]
  60. if op == '.':
  61. p[0] = Child(p[1], p[3])
  62. elif op == '..':
  63. p[0] = Descendants(p[1], p[3])
  64. elif op == 'where':
  65. p[0] = Where(p[1], p[3])
  66. elif op == '|':
  67. p[0] = Union(p[1], p[3])
  68. elif op == '&':
  69. p[0] = Intersect(p[1], p[3])
  70. def p_jsonpath_fields(self, p):
  71. "jsonpath : fields_or_any"
  72. p[0] = Fields(*p[1])
  73. def p_jsonpath_named_operator(self, p):
  74. "jsonpath : NAMED_OPERATOR"
  75. if p[1] == 'this':
  76. p[0] = This()
  77. elif p[1] == 'parent':
  78. p[0] = Parent()
  79. else:
  80. raise Exception('Unknown named operator `%s` at %s:%s' % (p[1], p.lineno(1), p.lexpos(1)))
  81. def p_jsonpath_root(self, p):
  82. "jsonpath : '$'"
  83. p[0] = Root()
  84. def p_jsonpath_idx(self, p):
  85. "jsonpath : '[' idx ']'"
  86. p[0] = p[2]
  87. def p_jsonpath_slice(self, p):
  88. "jsonpath : '[' slice ']'"
  89. p[0] = p[2]
  90. def p_jsonpath_fieldbrackets(self, p):
  91. "jsonpath : '[' fields ']'"
  92. p[0] = Fields(*p[2])
  93. def p_jsonpath_child_fieldbrackets(self, p):
  94. "jsonpath : jsonpath '[' fields ']'"
  95. p[0] = Child(p[1], Fields(*p[3]))
  96. def p_jsonpath_child_idxbrackets(self, p):
  97. "jsonpath : jsonpath '[' idx ']'"
  98. p[0] = Child(p[1], p[3])
  99. def p_jsonpath_child_slicebrackets(self, p):
  100. "jsonpath : jsonpath '[' slice ']'"
  101. p[0] = Child(p[1], p[3])
  102. def p_jsonpath_parens(self, p):
  103. "jsonpath : '(' jsonpath ')'"
  104. p[0] = p[2]
  105. # Because fields in brackets cannot be '*' - that is reserved for array indices
  106. def p_fields_or_any(self, p):
  107. """fields_or_any : fields
  108. | '*' """
  109. if p[1] == '*':
  110. p[0] = ['*']
  111. else:
  112. p[0] = p[1]
  113. def p_fields_id(self, p):
  114. "fields : ID"
  115. p[0] = [p[1]]
  116. def p_fields_comma(self, p):
  117. "fields : fields ',' fields"
  118. p[0] = p[1] + p[3]
  119. def p_idx(self, p):
  120. "idx : NUMBER"
  121. p[0] = Index(p[1])
  122. def p_slice_any(self, p):
  123. "slice : '*'"
  124. p[0] = Slice()
  125. def p_slice(self, p): # Currently does not support `step`
  126. "slice : maybe_int ':' maybe_int"
  127. p[0] = Slice(start=p[1], end=p[3])
  128. def p_maybe_int(self, p):
  129. """maybe_int : NUMBER
  130. | empty"""
  131. p[0] = p[1]
  132. def p_empty(self, p):
  133. 'empty :'
  134. p[0] = None
  135. class IteratorToTokenStream(object):
  136. def __init__(self, iterator):
  137. self.iterator = iterator
  138. def token(self):
  139. try:
  140. return next(self.iterator)
  141. except StopIteration:
  142. return None
  143. if __name__ == '__main__':
  144. logging.basicConfig()
  145. parser = JsonPathParser(debug=True)
  146. print(parser.parse(sys.stdin.read()))