123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442 |
- """Macro to easily define recursive-descent PEG parsers"""
- import re
- from macropy.core.macros import *
- from macropy.core.hquotes import macros, hq, u
- from macropy.quick_lambda import macros, f
- from macropy.case_classes import macros, case
- from collections import defaultdict
- """
- PEG Parser Atoms
- ================
- Sequence: e1 e2 , Seq
- Ordered choice: e1 / e2 | Or
- Zero-or-more: e* .rep Rep
- One-or-more: e+ .rep1
- Optional: e? .opt
- And-predicate: &e & And
- Not-predicate: !e - Not
- """
- macros = Macros()
- @macros.block
- def peg(tree, gen_sym, **kw):
- """Macro to easily define recursive-descent PEG parsers"""
- potential_targets = [
- target.id for stmt in tree
- if type(stmt) is Assign
- for target in stmt.targets
- ]
- for statement in tree:
- if type(statement) is Assign:
- new_tree = process(statement.value, potential_targets, gen_sym)
- statement.value = hq[
- Parser.Named(lambda: ast[new_tree], [u[statement.targets[0].id]])
- ]
- return tree
- @macros.expr
- def peg(tree, gen_sym, **kw):
- """Macro to easily define recursive-descent PEG parsers"""
- return process(tree, [], gen_sym)
- def process(tree, potential_targets, gen_sym):
- @Walker
- def PegWalker(tree, stop, collect, **kw):
- if type(tree) is Str:
- stop()
- return hq[Parser.Raw(ast[tree])]
- if type(tree) is Name and tree.id in potential_targets:
- collect(tree.id)
- if type(tree) is BinOp and type(tree.op) is RShift:
- tree.left, b_left = PegWalker.recurse_collect(tree.left)
- tree.right = hq[lambda bindings: ast[tree.right]]
- names = distinct(flatten(b_left))
- tree.right.args.args = map(f[Name(id = _)], names)
- tree.right.args.defaults = [hq[[]]] * len(names)
- tree.right.args.kwarg = gen_sym("kw")
- stop()
- return tree
- if type(tree) is BinOp and type(tree.op) is FloorDiv:
- tree.left, b_left = PegWalker.recurse_collect(tree.left)
- stop()
- collect(b_left)
- return tree
- if type(tree) is Tuple:
- result = hq[Parser.Seq([])]
- result.args[0].elts = tree.elts
- all_bindings = []
- for i, elt in enumerate(tree.elts):
- result.args[0].elts[i], bindings = PegWalker.recurse_collect(tree.elts[i])
- all_bindings.append(bindings)
- stop()
- collect(all_bindings)
- return result
- if type(tree) is Compare and type(tree.ops[0]) is Is:
- left_tree, bindings = PegWalker.recurse_collect(tree.left)
- new_tree = hq[ast[left_tree].bind_to(u[tree.comparators[0].id])]
- stop()
- collect(bindings + [tree.comparators[0].id])
- return new_tree
- new_tree = PegWalker.recurse(tree)
- return new_tree
- def cut():
- """Used within a Seq parser (p1, p2, p3,...) to commit the Seq to that
- alternative.
- After the Seq passes the `cut`, any failure becomes fatal, and backtracking
- is not performed. This improves performance and improves the quality
- of the error messages."""
- @case
- class Input(string, index):
- pass
- @case
- class Success(output, bindings, remaining):
- """
- output: the final value that was parsed
- bindings: named value bindings, created by the `is` keyword
- remaining: an Input representing the unread portion of the input
- """
- pass
- @case
- class Failure(remaining, failed, fatal | False):
- """
- remaining: an Input representing the unread portion of the input
- failed: a List[Parser], containing the stack of parsers in
- effect at time of failure
- fatal: whether a parent parser which receives this result from a child
- should continue backtracking
- """
- @property
- def index(self):
- return self.remaining.index
- @property
- def trace(self):
- return [x for f in self.failed for x in f.trace_name]
- @property
- def msg(self):
- """A pretty human-readable error message representing this Failure"""
- line_length = 60
- string = self.remaining.string
- index = self.index
- line_start = string.rfind('\n', 0, index+1)
- line_end = string.find('\n', index+1, len(string))
- if line_end == -1:
- line_end = len(string)
- line_num = string.count('\n', 0, index)
- offset = min(index - line_start , 40)
- msg = "index: " + str(self.index) + ", line: " + str(line_num + 1) + ", col: " + str(index - line_start) + "\n" + \
- " / ".join(self.trace) + "\n" + \
- string[line_start+1:line_end][index - offset - line_start:index+line_length-offset - line_start] + "\n" + \
- (offset-1) * " " + "^" + "\n" +\
- "expected: " + self.failed[-1].short_str()
- return msg
- class ParseError(Exception):
- """An exception that wraps a Failure"""
- def __init__(self, failure):
- self.failure = failure
- Exception.__init__(self, failure.msg)
- @case
- class Parser:
- def parse(self, string):
- """String -> value; throws ParseError in case of failure"""
- res = Parser.Full(self).parse_input(Input(string, 0))
- if type(res) is Success:
- return res.output
- else:
- raise ParseError(res)
- def parse_partial(self, string):
- """String -> Success | Failure"""
- return self.parse_input(Input(string, 0))
- def parse_string(self, string):
- """String -> Success | Failure"""
- return Parser.Full(self).parse_input(Input(string, 0))
- def parse_input(self, input):
- """Input -> Success | Failure"""
- @property
- def trace_name(self):
- return []
- def bind_to(self, string):
- return Parser.Named(lambda: self, [string])
- def __and__(self, other): return Parser.And([self, other])
- def __or__(self, other): return Parser.Or([self, other])
- def __neg__(self): return Parser.Not(self)
- @property
- def join(self):
- return self // "".join
- @property
- def rep1(self):
- return Parser.And([Parser.Rep(self), self])
- @property
- def rep(self):
- return Parser.Rep(self)
- def rep1_with(self, other):
- return Parser.Seq([self, Parser.Seq([other, self]).rep]) // (lambda x: [x[0]] + [y[1] for y in x[1]])
- def rep_with(self, other):
- return self.rep1_with(other) | Parser.Succeed([])
- @property
- def opt(self):
- return Parser.Or([self, Parser.Raw("")])
- @property
- def r(self):
- """Creates a regex-matching parser from the given raw parser"""
- return Parser.Regex(self.string)
- def __mul__(self, n): return Parser.RepN(self, n)
- def __floordiv__(self, other): return Parser.Transform(self, other)
- def __pow__(self, other): return Parser.Transform(self, lambda x: other(*x))
- def __rshift__(self, other): return Parser.TransformBound(self, other)
- class Full(parser):
- def parse_input(self, input):
- res = self.parser.parse_input(input)
- if type(res) is Success and res.remaining.index < len(input.string):
- return Failure(res.remaining, [self])
- else:
- return res
- def short_str(self):
- return self.parser.short_str()
- class Raw(string):
- def parse_input(self, input):
- if input.string[input.index:].startswith(self.string):
- return Success(self.string, {}, input.copy(index = input.index + len(self.string)))
- else:
- return Failure(input, [self])
- def short_str(self):
- return repr(self.string)
- class Regex(regex_string):
- def parse_input(self, input):
- match = re.match(self.regex_string, input.string[input.index:])
- if match:
- group = match.group()
- return Success(group, {}, input.copy(index = input.index + len(group)))
- else:
- return Failure(input, [self])
- def short_str(self):
- return repr(self.regex_string) + ".r"
- class Seq(children):
- def parse_input(self, input):
- current_input = input
- results = []
- result_dict = defaultdict(lambda: [])
- committed = False
- for child in self.children:
- if child is cut:
- committed = True
- else:
- res = child.parse_input(current_input)
- if type(res) is Failure:
- if committed or res.fatal:
- return Failure(res.remaining, [self] + res.failed, True)
- else:
- return res
- current_input = res.remaining
- results.append(res.output)
- for k, v in res.bindings.items():
- result_dict[k] = v
- return Success(results, result_dict, current_input)
- def short_str(self):
- return "(" + ", ".join(map(lambda x: x.short_str(), self.children)) + ")"
- class Or(children):
- def parse_input(self, input):
- for child in self.children:
- res = child.parse_input(input)
- if type(res) is Success:
- return res
- elif res.fatal:
- res.failed = [self] + res.failed
- return res
- return Failure(input, [self])
- def __or__(self, other): return Parser.Or(self.children + [other])
- def short_str(self):
- return "(" + " | ".join(map(lambda x: x.short_str(), self.children)) + ")"
- class And(children):
- def parse_input(self, input):
- results = [child.parse_input(input) for child in self.children]
- failures = [res for res in results if type(res) is Failure]
- if failures == []:
- return results[0]
- else:
- failures[0].failed = [self] + failures[0].failed
- return failures[0]
- def __and__(self, other): return Parser.And(self.children + [other])
- def short_str(self):
- return "(" + " & ".join(map(lambda x: x.short_str(), self.children)) + ")"
- class Not(parser):
- def parse_input(self, input):
- if type(self.parser.parse_input(input)) is Success:
- return Failure(input, [self])
- else:
- return Success(None, {}, input)
- def short_str(self):
- return "-" + self.parser.short_str()
- class Rep(parser):
- def parse_input(self, input):
- current_input = input
- results = []
- result_dict = defaultdict(lambda: [])
- while True:
- res = self.parser.parse_input(current_input)
- if type(res) is Failure:
- if res.fatal:
- res.failed = [self] + res.failed
- return res
- else:
- return Success(results, result_dict, current_input)
- current_input = res.remaining
- for k, v in res.bindings.items():
- result_dict[k] = result_dict[k] + [v]
- results.append(res.output)
- class RepN(parser, n):
- def parse_input(self, input):
- current_input = input
- results = []
- result_dict = defaultdict(lambda: [])
- for i in range(self.n):
- res = self.parser.parse_input(current_input)
- if type(res) is Failure:
- res.failed = [self] + res.failed
- return res
- current_input = res.remaining
- for k, v in res.bindings.items():
- result_dict[k] = result_dict[k] + [v]
- results.append(res.output)
- return Success(results, result_dict, current_input)
- def short_str(self):
- return self.parser.short_str() + "*" + n
- class Transform(parser, func):
- def parse_input(self, input):
- res = self.parser.parse_input(input)
- if type(res) is Success:
- res.output = self.func(res.output)
- else:
- res.failed = [self] + res.failed
- return res
- def short_str(self):
- return self.parser.short_str()
- class TransformBound(parser, func):
- def parse_input(self, input):
- res = self.parser.parse_input(input)
- if type(res) is Success:
- res.output = self.func(**res.bindings)
- res.bindings = {}
- else:
- res.failed = [self] + res.failed
- return res
- def short_str(self):
- return self.parser.short_str()
- class Named(parser_thunk, trace_name):
- self.stored_parser = None
- @property
- def parser(self):
- if not self.stored_parser:
- self.stored_parser = self.parser_thunk()
- return self.stored_parser
- def parse_input(self, input):
- res = self.parser.parse_input(input)
- if type(res) is Success:
- res.bindings = {self.trace_name[0]: res.output}
- else:
- res.failed = [self] + res.failed
- return res
- def short_str(self):
- return self.trace_name[0]
- class Succeed(string):
- def parse_input(self, input):
- return Success(self.string, {}, input)
- class Fail():
- def parse_input(self, input):
- return Failure(input, [self])
- def short_str(self):
- return "fail"
|