123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135 |
- # Copyright 2014 Matt Chaput. All rights reserved.
- #
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions are met:
- #
- # 1. Redistributions of source code must retain the above copyright notice,
- # this list of conditions and the following disclaimer.
- #
- # 2. Redistributions in binary form must reproduce the above copyright
- # notice, this list of conditions and the following disclaimer in the
- # documentation and/or other materials provided with the distribution.
- #
- # THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
- # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
- # EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
- # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
- # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- #
- # The views and conclusions contained in the software and documentation are
- # those of the authors and should not be interpreted as representing official
- # policies, either expressed or implied, of Matt Chaput.
- import re
- from whoosh.automata.fsa import ANY, EPSILON, NFA
- # Operator precedence
- CHOICE = ("|", )
- ops = ()
- def parse(pattern):
- stack = []
- ops = []
- class RegexBuilder(object):
- def __init__(self):
- self.statenum = 1
- def new_state(self):
- self.statenum += 1
- return self.statenum
- def epsilon(self):
- s = self.new_state()
- e = self.new_state()
- nfa = NFA(s)
- nfa.add_transition(s, EPSILON, e)
- nfa.add_final_state(e)
- return nfa
- def char(self, label):
- s = self.new_state()
- e = self.new_state()
- nfa = NFA(s)
- nfa.add_transition(s, label, e)
- nfa.add_final_state(e)
- return nfa
- def charset(self, chars):
- s = self.new_state()
- e = self.new_state()
- nfa = NFA(s)
- for char in chars:
- nfa.add_transition(s, char, e)
- nfa.add_final_state(e)
- return e
- def dot(self):
- s = self.new_state()
- e = self.new_state()
- nfa = NFA(s)
- nfa.add_transition(s, ANY, e)
- nfa.add_final_state(e)
- return nfa
- def choice(self, n1, n2):
- s = self.new_state()
- s1 = self.new_state()
- s2 = self.new_state()
- e1 = self.new_state()
- e2 = self.new_state()
- e = self.new_state()
- nfa = NFA(s)
- nfa.add_transition(s, EPSILON, s1)
- nfa.add_transition(s, EPSILON, s2)
- nfa.insert(s1, n1, e1)
- nfa.insert(s2, n2, e2)
- nfa.add_transition(e1, EPSILON, e)
- nfa.add_transition(e2, EPSILON, e)
- nfa.add_final_state(e)
- return nfa
- def concat(self, n1, n2):
- s = self.new_state()
- m = self.new_state()
- e = self.new_state()
- nfa = NFA(s)
- nfa.insert(s, n1, m)
- nfa.insert(m, n2, e)
- nfa.add_final_state(e)
- return nfa
- def star(self, n):
- s = self.new_state()
- m1 = self.new_state()
- m2 = self.new_state()
- e = self.new_state()
- nfa = NFA(s)
- nfa.add_transition(s, EPSILON, m1)
- nfa.add_transition(s, EPSILON, e)
- nfa.insert(m1, n, m2)
- nfa.add_transition(m2, EPSILON, m1)
- nfa.add_transition(m2, EPSILON, e)
- nfa.add_final_state(e)
- return nfa
- def plus(self, n):
- return self.concat(n, self.star(n))
- def question(self, n):
- return self.choice(n, self.epsilon())
|