xmpp_stringprep.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. # -*- test-case-name: twisted.words.test.test_jabberxmppstringprep -*-
  2. #
  3. # Copyright (c) Twisted Matrix Laboratories.
  4. # See LICENSE for details.
  5. from encodings import idna
  6. from itertools import chain
  7. import stringprep
  8. # We require Unicode version 3.2.
  9. from unicodedata import ucd_3_2_0 as unicodedata
  10. from twisted.python.compat import unichr
  11. from twisted.python.deprecate import deprecatedModuleAttribute
  12. from incremental import Version
  13. from zope.interface import Interface, implementer
  14. crippled = False
  15. deprecatedModuleAttribute(
  16. Version("Twisted", 13, 1, 0),
  17. "crippled is always False",
  18. __name__,
  19. "crippled")
  20. class ILookupTable(Interface):
  21. """
  22. Interface for character lookup classes.
  23. """
  24. def lookup(c):
  25. """
  26. Return whether character is in this table.
  27. """
  28. class IMappingTable(Interface):
  29. """
  30. Interface for character mapping classes.
  31. """
  32. def map(c):
  33. """
  34. Return mapping for character.
  35. """
  36. @implementer(ILookupTable)
  37. class LookupTableFromFunction:
  38. def __init__(self, in_table_function):
  39. self.lookup = in_table_function
  40. @implementer(ILookupTable)
  41. class LookupTable:
  42. def __init__(self, table):
  43. self._table = table
  44. def lookup(self, c):
  45. return c in self._table
  46. @implementer(IMappingTable)
  47. class MappingTableFromFunction:
  48. def __init__(self, map_table_function):
  49. self.map = map_table_function
  50. @implementer(IMappingTable)
  51. class EmptyMappingTable:
  52. def __init__(self, in_table_function):
  53. self._in_table_function = in_table_function
  54. def map(self, c):
  55. if self._in_table_function(c):
  56. return None
  57. else:
  58. return c
  59. class Profile:
  60. def __init__(self, mappings=[], normalize=True, prohibiteds=[],
  61. check_unassigneds=True, check_bidi=True):
  62. self.mappings = mappings
  63. self.normalize = normalize
  64. self.prohibiteds = prohibiteds
  65. self.do_check_unassigneds = check_unassigneds
  66. self.do_check_bidi = check_bidi
  67. def prepare(self, string):
  68. result = self.map(string)
  69. if self.normalize:
  70. result = unicodedata.normalize("NFKC", result)
  71. self.check_prohibiteds(result)
  72. if self.do_check_unassigneds:
  73. self.check_unassigneds(result)
  74. if self.do_check_bidi:
  75. self.check_bidirectionals(result)
  76. return result
  77. def map(self, string):
  78. result = []
  79. for c in string:
  80. result_c = c
  81. for mapping in self.mappings:
  82. result_c = mapping.map(c)
  83. if result_c != c:
  84. break
  85. if result_c is not None:
  86. result.append(result_c)
  87. return u"".join(result)
  88. def check_prohibiteds(self, string):
  89. for c in string:
  90. for table in self.prohibiteds:
  91. if table.lookup(c):
  92. raise UnicodeError("Invalid character %s" % repr(c))
  93. def check_unassigneds(self, string):
  94. for c in string:
  95. if stringprep.in_table_a1(c):
  96. raise UnicodeError("Unassigned code point %s" % repr(c))
  97. def check_bidirectionals(self, string):
  98. found_LCat = False
  99. found_RandALCat = False
  100. for c in string:
  101. if stringprep.in_table_d1(c):
  102. found_RandALCat = True
  103. if stringprep.in_table_d2(c):
  104. found_LCat = True
  105. if found_LCat and found_RandALCat:
  106. raise UnicodeError("Violation of BIDI Requirement 2")
  107. if found_RandALCat and not (stringprep.in_table_d1(string[0]) and
  108. stringprep.in_table_d1(string[-1])):
  109. raise UnicodeError("Violation of BIDI Requirement 3")
  110. class NamePrep:
  111. """ Implements preparation of internationalized domain names.
  112. This class implements preparing internationalized domain names using the
  113. rules defined in RFC 3491, section 4 (Conversion operations).
  114. We do not perform step 4 since we deal with unicode representations of
  115. domain names and do not convert from or to ASCII representations using
  116. punycode encoding. When such a conversion is needed, the C{idna} standard
  117. library provides the C{ToUnicode()} and C{ToASCII()} functions. Note that
  118. C{idna} itself assumes UseSTD3ASCIIRules to be false.
  119. The following steps are performed by C{prepare()}:
  120. - Split the domain name in labels at the dots (RFC 3490, 3.1)
  121. - Apply nameprep proper on each label (RFC 3491)
  122. - Enforce the restrictions on ASCII characters in host names by
  123. assuming STD3ASCIIRules to be true. (STD 3)
  124. - Rejoin the labels using the label separator U+002E (full stop).
  125. """
  126. # Prohibited characters.
  127. prohibiteds = [unichr(n) for n in chain(range(0x00, 0x2c + 1),
  128. range(0x2e, 0x2f + 1),
  129. range(0x3a, 0x40 + 1),
  130. range(0x5b, 0x60 + 1),
  131. range(0x7b, 0x7f + 1))]
  132. def prepare(self, string):
  133. result = []
  134. labels = idna.dots.split(string)
  135. if labels and len(labels[-1]) == 0:
  136. trailing_dot = u'.'
  137. del labels[-1]
  138. else:
  139. trailing_dot = u''
  140. for label in labels:
  141. result.append(self.nameprep(label))
  142. return u".".join(result) + trailing_dot
  143. def check_prohibiteds(self, string):
  144. for c in string:
  145. if c in self.prohibiteds:
  146. raise UnicodeError("Invalid character %s" % repr(c))
  147. def nameprep(self, label):
  148. label = idna.nameprep(label)
  149. self.check_prohibiteds(label)
  150. if label[0] == u'-':
  151. raise UnicodeError("Invalid leading hyphen-minus")
  152. if label[-1] == u'-':
  153. raise UnicodeError("Invalid trailing hyphen-minus")
  154. return label
  155. C_11 = LookupTableFromFunction(stringprep.in_table_c11)
  156. C_12 = LookupTableFromFunction(stringprep.in_table_c12)
  157. C_21 = LookupTableFromFunction(stringprep.in_table_c21)
  158. C_22 = LookupTableFromFunction(stringprep.in_table_c22)
  159. C_3 = LookupTableFromFunction(stringprep.in_table_c3)
  160. C_4 = LookupTableFromFunction(stringprep.in_table_c4)
  161. C_5 = LookupTableFromFunction(stringprep.in_table_c5)
  162. C_6 = LookupTableFromFunction(stringprep.in_table_c6)
  163. C_7 = LookupTableFromFunction(stringprep.in_table_c7)
  164. C_8 = LookupTableFromFunction(stringprep.in_table_c8)
  165. C_9 = LookupTableFromFunction(stringprep.in_table_c9)
  166. B_1 = EmptyMappingTable(stringprep.in_table_b1)
  167. B_2 = MappingTableFromFunction(stringprep.map_table_b2)
  168. nodeprep = Profile(mappings=[B_1, B_2],
  169. prohibiteds=[C_11, C_12, C_21, C_22,
  170. C_3, C_4, C_5, C_6, C_7, C_8, C_9,
  171. LookupTable([u'"', u'&', u"'", u'/',
  172. u':', u'<', u'>', u'@'])])
  173. resourceprep = Profile(mappings=[B_1,],
  174. prohibiteds=[C_12, C_21, C_22,
  175. C_3, C_4, C_5, C_6, C_7, C_8, C_9])
  176. nameprep = NamePrep()