lev.py 989 B

123456789101112131415161718192021222324252627282930
  1. from __future__ import print_function
  2. from whoosh.compat import unichr, xrange
  3. from whoosh.automata.fsa import ANY, EPSILON, NFA, unull
  4. def levenshtein_automaton(term, k, prefix=0):
  5. nfa = NFA((0, 0))
  6. if prefix:
  7. for i in xrange(prefix):
  8. c = term[i]
  9. nfa.add_transition((i, 0), c, (i + 1, 0))
  10. for i in xrange(prefix, len(term)):
  11. c = term[i]
  12. for e in xrange(k + 1):
  13. # Correct character
  14. nfa.add_transition((i, e), c, (i + 1, e))
  15. if e < k:
  16. # Deletion
  17. nfa.add_transition((i, e), ANY, (i, e + 1))
  18. # Insertion
  19. nfa.add_transition((i, e), EPSILON, (i + 1, e + 1))
  20. # Substitution
  21. nfa.add_transition((i, e), ANY, (i + 1, e + 1))
  22. for e in xrange(k + 1):
  23. if e < k:
  24. nfa.add_transition((len(term), e), ANY, (len(term), e + 1))
  25. nfa.add_final_state((len(term), e))
  26. return nfa