reg.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. # Copyright 2014 Matt Chaput. All rights reserved.
  2. #
  3. # Redistribution and use in source and binary forms, with or without
  4. # modification, are permitted provided that the following conditions are met:
  5. #
  6. # 1. Redistributions of source code must retain the above copyright notice,
  7. # this list of conditions and the following disclaimer.
  8. #
  9. # 2. Redistributions in binary form must reproduce the above copyright
  10. # notice, this list of conditions and the following disclaimer in the
  11. # documentation and/or other materials provided with the distribution.
  12. #
  13. # THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
  14. # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  15. # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
  16. # EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  17. # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  18. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
  19. # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  20. # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  21. # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  22. # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  23. #
  24. # The views and conclusions contained in the software and documentation are
  25. # those of the authors and should not be interpreted as representing official
  26. # policies, either expressed or implied, of Matt Chaput.
  27. import re
  28. from whoosh.automata.fsa import ANY, EPSILON, NFA
  29. # Operator precedence
  30. CHOICE = ("|", )
  31. ops = ()
  32. def parse(pattern):
  33. stack = []
  34. ops = []
  35. class RegexBuilder(object):
  36. def __init__(self):
  37. self.statenum = 1
  38. def new_state(self):
  39. self.statenum += 1
  40. return self.statenum
  41. def epsilon(self):
  42. s = self.new_state()
  43. e = self.new_state()
  44. nfa = NFA(s)
  45. nfa.add_transition(s, EPSILON, e)
  46. nfa.add_final_state(e)
  47. return nfa
  48. def char(self, label):
  49. s = self.new_state()
  50. e = self.new_state()
  51. nfa = NFA(s)
  52. nfa.add_transition(s, label, e)
  53. nfa.add_final_state(e)
  54. return nfa
  55. def charset(self, chars):
  56. s = self.new_state()
  57. e = self.new_state()
  58. nfa = NFA(s)
  59. for char in chars:
  60. nfa.add_transition(s, char, e)
  61. nfa.add_final_state(e)
  62. return e
  63. def dot(self):
  64. s = self.new_state()
  65. e = self.new_state()
  66. nfa = NFA(s)
  67. nfa.add_transition(s, ANY, e)
  68. nfa.add_final_state(e)
  69. return nfa
  70. def choice(self, n1, n2):
  71. s = self.new_state()
  72. s1 = self.new_state()
  73. s2 = self.new_state()
  74. e1 = self.new_state()
  75. e2 = self.new_state()
  76. e = self.new_state()
  77. nfa = NFA(s)
  78. nfa.add_transition(s, EPSILON, s1)
  79. nfa.add_transition(s, EPSILON, s2)
  80. nfa.insert(s1, n1, e1)
  81. nfa.insert(s2, n2, e2)
  82. nfa.add_transition(e1, EPSILON, e)
  83. nfa.add_transition(e2, EPSILON, e)
  84. nfa.add_final_state(e)
  85. return nfa
  86. def concat(self, n1, n2):
  87. s = self.new_state()
  88. m = self.new_state()
  89. e = self.new_state()
  90. nfa = NFA(s)
  91. nfa.insert(s, n1, m)
  92. nfa.insert(m, n2, e)
  93. nfa.add_final_state(e)
  94. return nfa
  95. def star(self, n):
  96. s = self.new_state()
  97. m1 = self.new_state()
  98. m2 = self.new_state()
  99. e = self.new_state()
  100. nfa = NFA(s)
  101. nfa.add_transition(s, EPSILON, m1)
  102. nfa.add_transition(s, EPSILON, e)
  103. nfa.insert(m1, n, m2)
  104. nfa.add_transition(m2, EPSILON, m1)
  105. nfa.add_transition(m2, EPSILON, e)
  106. nfa.add_final_state(e)
  107. return nfa
  108. def plus(self, n):
  109. return self.concat(n, self.star(n))
  110. def question(self, n):
  111. return self.choice(n, self.epsilon())