collation.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. # Copyright 2016 MongoDB, Inc.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """Tools for working with `collations`_.
  15. .. _collations: http://userguide.icu-project.org/collation/concepts
  16. """
  17. from pymongo import common
  18. class CollationStrength(object):
  19. """
  20. An enum that defines values for `strength` on a
  21. :class:`~pymongo.collation.Collation`.
  22. """
  23. PRIMARY = 1
  24. """Differentiate base (unadorned) characters."""
  25. SECONDARY = 2
  26. """Differentiate character accents."""
  27. TERTIARY = 3
  28. """Differentiate character case."""
  29. QUATERNARY = 4
  30. """Differentiate words with and without punctuation."""
  31. IDENTICAL = 5
  32. """Differentiate unicode code point (characters are exactly identical)."""
  33. class CollationAlternate(object):
  34. """
  35. An enum that defines values for `alternate` on a
  36. :class:`~pymongo.collation.Collation`.
  37. """
  38. NON_IGNORABLE = 'non-ignorable'
  39. """Spaces and punctuation are treated as base characters."""
  40. SHIFTED = 'shifted'
  41. """Spaces and punctuation are *not* considered base characters.
  42. Spaces and punctuation are distinguished regardless when the
  43. :class:`~pymongo.collation.Collation` strength is at least
  44. :data:`~pymongo.collation.CollationStrength.QUATERNARY`.
  45. """
  46. class CollationMaxVariable(object):
  47. """
  48. An enum that defines values for `max_variable` on a
  49. :class:`~pymongo.collation.Collation`.
  50. """
  51. PUNCT = 'punct'
  52. """Both punctuation and spaces are ignored."""
  53. SPACE = 'space'
  54. """Spaces alone are ignored."""
  55. class CollationCaseFirst(object):
  56. """
  57. An enum that defines values for `case_first` on a
  58. :class:`~pymongo.collation.Collation`.
  59. """
  60. UPPER = 'upper'
  61. """Sort uppercase characters first."""
  62. LOWER = 'lower'
  63. """Sort lowercase characters first."""
  64. OFF = 'off'
  65. """Default for locale or collation strength."""
  66. class Collation(object):
  67. """Collation
  68. :Parameters:
  69. - `locale`: (string) The locale of the collation. This should be a string
  70. that identifies an `ICU locale ID` exactly. For example, ``en_US`` is
  71. valid, but ``en_us`` and ``en-US`` are not. Consult the MongoDB
  72. documentation for a list of supported locales.
  73. - `caseLevel`: (optional) If ``True``, turn on case sensitivity if
  74. `strength` is 1 or 2 (case sensitivity is implied if `strength` is
  75. greater than 2). Defaults to ``False``.
  76. - `caseFirst`: (optional) Specify that either uppercase or lowercase
  77. characters take precedence. Must be one of the following values:
  78. * :data:`~CollationCaseFirst.UPPER`
  79. * :data:`~CollationCaseFirst.LOWER`
  80. * :data:`~CollationCaseFirst.OFF` (the default)
  81. - `strength`: (optional) Specify the comparison strength. This is also
  82. known as the ICU comparison level. This must be one of the following
  83. values:
  84. * :data:`~CollationStrength.PRIMARY`
  85. * :data:`~CollationStrength.SECONDARY`
  86. * :data:`~CollationStrength.TERTIARY` (the default)
  87. * :data:`~CollationStrength.QUATERNARY`
  88. * :data:`~CollationStrength.IDENTICAL`
  89. Each successive level builds upon the previous. For example, a
  90. `strength` of :data:`~CollationStrength.SECONDARY` differentiates
  91. characters based both on the unadorned base character and its accents.
  92. - `numericOrdering`: (optional) If ``True``, order numbers numerically
  93. instead of in collation order (defaults to ``False``).
  94. - `alternate`: (optional) Specify whether spaces and punctuation are
  95. considered base characters. This must be one of the following values:
  96. * :data:`~CollationAlternate.NON_IGNORABLE` (the default)
  97. * :data:`~CollationAlternate.SHIFTED`
  98. - `maxVariable`: (optional) When `alternate` is
  99. :data:`~CollationAlternate.SHIFTED`, this option specifies what
  100. characters may be ignored. This must be one of the following values:
  101. * :data:`~CollationMaxVariable.PUNCT` (the default)
  102. * :data:`~CollationMaxVariable.SPACE`
  103. - `normalization`: (optional) If ``True``, normalizes text into Unicode
  104. NFD. Defaults to ``False``.
  105. - `backwards`: (optional) If ``True``, accents on characters are
  106. considered from the back of the word to the front, as it is done in some
  107. French dictionary ordering traditions. Defaults to ``False``.
  108. - `kwargs`: (optional) Keyword arguments supplying any additional options
  109. to be sent with this Collation object.
  110. .. versionadded: 3.4
  111. """
  112. __slots__ = ("__document",)
  113. def __init__(self, locale,
  114. caseLevel=None,
  115. caseFirst=None,
  116. strength=None,
  117. numericOrdering=None,
  118. alternate=None,
  119. maxVariable=None,
  120. normalization=None,
  121. backwards=None,
  122. **kwargs):
  123. locale = common.validate_string('locale', locale)
  124. self.__document = {'locale': locale}
  125. if caseLevel is not None:
  126. self.__document['caseLevel'] = common.validate_boolean(
  127. 'caseLevel', caseLevel)
  128. if caseFirst is not None:
  129. self.__document['caseFirst'] = common.validate_string(
  130. 'caseFirst', caseFirst)
  131. if strength is not None:
  132. self.__document['strength'] = common.validate_integer(
  133. 'strength', strength)
  134. if numericOrdering is not None:
  135. self.__document['numericOrdering'] = common.validate_boolean(
  136. 'numericOrdering', numericOrdering)
  137. if alternate is not None:
  138. self.__document['alternate'] = common.validate_string(
  139. 'alternate', alternate)
  140. if maxVariable is not None:
  141. self.__document['maxVariable'] = common.validate_string(
  142. 'maxVariable', maxVariable)
  143. if normalization is not None:
  144. self.__document['normalization'] = common.validate_boolean(
  145. 'normalization', normalization)
  146. if backwards is not None:
  147. self.__document['backwards'] = common.validate_boolean(
  148. 'backwards', backwards)
  149. self.__document.update(kwargs)
  150. @property
  151. def document(self):
  152. """The document representation of this collation.
  153. .. note::
  154. :class:`Collation` is immutable. Mutating the value of
  155. :attr:`document` does not mutate this :class:`Collation`.
  156. """
  157. return self.__document.copy()
  158. def __repr__(self):
  159. document = self.document
  160. return 'Collation(%s)' % (
  161. ', '.join('%s=%r' % (key, document[key]) for key in document),)
  162. def __eq__(self, other):
  163. if isinstance(other, Collation):
  164. return self.document == other.document
  165. return NotImplemented
  166. def __ne__(self, other):
  167. return not self == other
  168. def validate_collation_or_none(value):
  169. if value is None:
  170. return None
  171. if isinstance(value, Collation):
  172. return value.document
  173. if isinstance(value, dict):
  174. return value
  175. raise TypeError(
  176. 'collation must be a dict, an instance of collation.Collation, '
  177. 'or None.')