123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990 |
- # Copyright 2012 Matt Chaput. All rights reserved.
- #
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions are met:
- #
- # 1. Redistributions of source code must retain the above copyright notice,
- # this list of conditions and the following disclaimer.
- #
- # 2. Redistributions in binary form must reproduce the above copyright
- # notice, this list of conditions and the following disclaimer in the
- # documentation and/or other materials provided with the distribution.
- #
- # THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
- # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
- # EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
- # OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
- # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- #
- # The views and conclusions contained in the software and documentation are
- # those of the authors and should not be interpreted as representing official
- # policies, either expressed or implied, of Matt Chaput.
- from whoosh.automata.fsa import ANY, EPSILON, NFA
- # Constants for glob
- _LIT = 0
- _STAR = 1
- _PLUS = 2
- _QUEST = 3
- _RANGE = 4
- def parse_glob(pattern, _glob_multi="*", _glob_single="?",
- _glob_range1="[", _glob_range2="]"):
- pos = 0
- last = None
- while pos < len(pattern):
- char = pattern[pos]
- pos += 1
- if char == _glob_multi: # *
- # (Ignore more than one star in a row)
- if last is not _STAR:
- yield _STAR, None
- last = _STAR
- elif char == _glob_single: # ?
- # (Ignore ? after a star)
- if last is not _STAR:
- yield _QUEST, None
- last = _QUEST
- elif char == _glob_range1: # [
- chars = set()
- negate = False
- # Take the char range specification until the ]
- while pos < len(pattern):
- char = pattern[pos]
- pos += 1
- if char == _glob_range2:
- break
- chars.add(char)
- if chars:
- yield _RANGE, (chars, negate)
- last = _RANGE
- else:
- yield _LIT, char
- last = _LIT
- def glob_automaton(pattern):
- nfa = NFA(0)
- i = -1
- for i, (op, arg) in enumerate(parse_glob(pattern)):
- if op is _LIT:
- nfa.add_transition(i, arg, i + 1)
- elif op is _STAR:
- nfa.add_transition(i, ANY, i + 1)
- nfa.add_transition(i, EPSILON, i + 1)
- nfa.add_transition(i + 1, EPSILON, i)
- elif op is _QUEST:
- nfa.add_transition(i, ANY, i + 1)
- elif op is _RANGE:
- for char in arg[0]:
- nfa.add_transition(i, char, i + 1)
- nfa.add_final_state(i + 1)
- return nfa
|