__init__.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. # Copyright (C) 2001-2012 NLTK Project
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the 'License');
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an 'AS IS' BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # Natural Language Toolkit: Snowball Stemmer
  15. #
  16. # Copyright (C) 2001-2012 NLTK Project
  17. # Author: Peter Michael Stahl <pemistahl@gmail.com>
  18. # Peter Ljunglof <peter.ljunglof@heatherleaf.se> (revisions)
  19. # Algorithms: Dr Martin Porter <martin@tartarus.org>
  20. # URL: <http://www.nltk.org/>
  21. # For license information, see LICENSE.TXT
  22. # HJ 2012/07/19 adapted from https://github.com/kmike/nltk.git (branch 2and3)
  23. # 2.0.1rc4-256-g45768f8
  24. """
  25. This module provides a port of the Snowball stemmers developed by Martin
  26. Porter.
  27. At the moment, this port is able to stem words from fourteen languages: Danish,
  28. Dutch, English, Finnish, French, German, Hungarian, Italian, Norwegian,
  29. Portuguese, Romanian, Russian, Spanish and Swedish.
  30. The algorithms have been developed by Martin Porter. These stemmers are called
  31. Snowball, because he invented a programming language with this name for
  32. creating new stemming algorithms. There is more information available at
  33. http://snowball.tartarus.org/
  34. """
  35. from .danish import DanishStemmer
  36. from .dutch import DutchStemmer
  37. from .english import EnglishStemmer
  38. from .finnish import FinnishStemmer
  39. from .french import FrenchStemmer
  40. from .german import GermanStemmer
  41. from .hungarian import HungarianStemmer
  42. from .italian import ItalianStemmer
  43. from .norwegian import NorwegianStemmer
  44. from .portugese import PortugueseStemmer
  45. from .romanian import RomanianStemmer
  46. from .russian import RussianStemmer
  47. from .spanish import SpanishStemmer
  48. from .swedish import SwedishStemmer
  49. # Map two-letter codes to stemming classes
  50. classes = {"da": DanishStemmer,
  51. "nl": DutchStemmer,
  52. "en": EnglishStemmer,
  53. "fi": FinnishStemmer,
  54. "fr": FrenchStemmer,
  55. "de": GermanStemmer,
  56. "hu": HungarianStemmer,
  57. "it": ItalianStemmer,
  58. "no": NorwegianStemmer,
  59. "pt": PortugueseStemmer,
  60. "ro": RomanianStemmer,
  61. "ru": RussianStemmer,
  62. "es": SpanishStemmer,
  63. "sv": SwedishStemmer,
  64. }