glob.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. # Copyright 2012 Matt Chaput. All rights reserved.
  2. #
  3. # Redistribution and use in source and binary forms, with or without
  4. # modification, are permitted provided that the following conditions are met:
  5. #
  6. # 1. Redistributions of source code must retain the above copyright notice,
  7. # this list of conditions and the following disclaimer.
  8. #
  9. # 2. Redistributions in binary form must reproduce the above copyright
  10. # notice, this list of conditions and the following disclaimer in the
  11. # documentation and/or other materials provided with the distribution.
  12. #
  13. # THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
  14. # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  15. # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
  16. # EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  17. # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  18. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
  19. # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  20. # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  21. # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  22. # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  23. #
  24. # The views and conclusions contained in the software and documentation are
  25. # those of the authors and should not be interpreted as representing official
  26. # policies, either expressed or implied, of Matt Chaput.
  27. from whoosh.automata.fsa import ANY, EPSILON, NFA
  28. # Constants for glob
  29. _LIT = 0
  30. _STAR = 1
  31. _PLUS = 2
  32. _QUEST = 3
  33. _RANGE = 4
  34. def parse_glob(pattern, _glob_multi="*", _glob_single="?",
  35. _glob_range1="[", _glob_range2="]"):
  36. pos = 0
  37. last = None
  38. while pos < len(pattern):
  39. char = pattern[pos]
  40. pos += 1
  41. if char == _glob_multi: # *
  42. # (Ignore more than one star in a row)
  43. if last is not _STAR:
  44. yield _STAR, None
  45. last = _STAR
  46. elif char == _glob_single: # ?
  47. # (Ignore ? after a star)
  48. if last is not _STAR:
  49. yield _QUEST, None
  50. last = _QUEST
  51. elif char == _glob_range1: # [
  52. chars = set()
  53. negate = False
  54. # Take the char range specification until the ]
  55. while pos < len(pattern):
  56. char = pattern[pos]
  57. pos += 1
  58. if char == _glob_range2:
  59. break
  60. chars.add(char)
  61. if chars:
  62. yield _RANGE, (chars, negate)
  63. last = _RANGE
  64. else:
  65. yield _LIT, char
  66. last = _LIT
  67. def glob_automaton(pattern):
  68. nfa = NFA(0)
  69. i = -1
  70. for i, (op, arg) in enumerate(parse_glob(pattern)):
  71. if op is _LIT:
  72. nfa.add_transition(i, arg, i + 1)
  73. elif op is _STAR:
  74. nfa.add_transition(i, ANY, i + 1)
  75. nfa.add_transition(i, EPSILON, i + 1)
  76. nfa.add_transition(i + 1, EPSILON, i)
  77. elif op is _QUEST:
  78. nfa.add_transition(i, ANY, i + 1)
  79. elif op is _RANGE:
  80. for char in arg[0]:
  81. nfa.add_transition(i, char, i + 1)
  82. nfa.add_final_state(i + 1)
  83. return nfa