123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244 |
- # -*- test-case-name: twisted.words.test.test_jabberxmppstringprep -*-
- #
- # Copyright (c) Twisted Matrix Laboratories.
- # See LICENSE for details.
- from encodings import idna
- from itertools import chain
- import stringprep
- # We require Unicode version 3.2.
- from unicodedata import ucd_3_2_0 as unicodedata
- from twisted.python.compat import unichr
- from twisted.python.deprecate import deprecatedModuleAttribute
- from incremental import Version
- from zope.interface import Interface, implementer
- crippled = False
- deprecatedModuleAttribute(
- Version("Twisted", 13, 1, 0),
- "crippled is always False",
- __name__,
- "crippled")
- class ILookupTable(Interface):
- """
- Interface for character lookup classes.
- """
- def lookup(c):
- """
- Return whether character is in this table.
- """
- class IMappingTable(Interface):
- """
- Interface for character mapping classes.
- """
- def map(c):
- """
- Return mapping for character.
- """
- @implementer(ILookupTable)
- class LookupTableFromFunction:
- def __init__(self, in_table_function):
- self.lookup = in_table_function
- @implementer(ILookupTable)
- class LookupTable:
- def __init__(self, table):
- self._table = table
- def lookup(self, c):
- return c in self._table
- @implementer(IMappingTable)
- class MappingTableFromFunction:
- def __init__(self, map_table_function):
- self.map = map_table_function
- @implementer(IMappingTable)
- class EmptyMappingTable:
- def __init__(self, in_table_function):
- self._in_table_function = in_table_function
- def map(self, c):
- if self._in_table_function(c):
- return None
- else:
- return c
- class Profile:
- def __init__(self, mappings=[], normalize=True, prohibiteds=[],
- check_unassigneds=True, check_bidi=True):
- self.mappings = mappings
- self.normalize = normalize
- self.prohibiteds = prohibiteds
- self.do_check_unassigneds = check_unassigneds
- self.do_check_bidi = check_bidi
- def prepare(self, string):
- result = self.map(string)
- if self.normalize:
- result = unicodedata.normalize("NFKC", result)
- self.check_prohibiteds(result)
- if self.do_check_unassigneds:
- self.check_unassigneds(result)
- if self.do_check_bidi:
- self.check_bidirectionals(result)
- return result
- def map(self, string):
- result = []
- for c in string:
- result_c = c
- for mapping in self.mappings:
- result_c = mapping.map(c)
- if result_c != c:
- break
- if result_c is not None:
- result.append(result_c)
- return u"".join(result)
- def check_prohibiteds(self, string):
- for c in string:
- for table in self.prohibiteds:
- if table.lookup(c):
- raise UnicodeError("Invalid character %s" % repr(c))
- def check_unassigneds(self, string):
- for c in string:
- if stringprep.in_table_a1(c):
- raise UnicodeError("Unassigned code point %s" % repr(c))
- def check_bidirectionals(self, string):
- found_LCat = False
- found_RandALCat = False
- for c in string:
- if stringprep.in_table_d1(c):
- found_RandALCat = True
- if stringprep.in_table_d2(c):
- found_LCat = True
- if found_LCat and found_RandALCat:
- raise UnicodeError("Violation of BIDI Requirement 2")
- if found_RandALCat and not (stringprep.in_table_d1(string[0]) and
- stringprep.in_table_d1(string[-1])):
- raise UnicodeError("Violation of BIDI Requirement 3")
- class NamePrep:
- """ Implements preparation of internationalized domain names.
- This class implements preparing internationalized domain names using the
- rules defined in RFC 3491, section 4 (Conversion operations).
- We do not perform step 4 since we deal with unicode representations of
- domain names and do not convert from or to ASCII representations using
- punycode encoding. When such a conversion is needed, the C{idna} standard
- library provides the C{ToUnicode()} and C{ToASCII()} functions. Note that
- C{idna} itself assumes UseSTD3ASCIIRules to be false.
- The following steps are performed by C{prepare()}:
- - Split the domain name in labels at the dots (RFC 3490, 3.1)
- - Apply nameprep proper on each label (RFC 3491)
- - Enforce the restrictions on ASCII characters in host names by
- assuming STD3ASCIIRules to be true. (STD 3)
- - Rejoin the labels using the label separator U+002E (full stop).
- """
- # Prohibited characters.
- prohibiteds = [unichr(n) for n in chain(range(0x00, 0x2c + 1),
- range(0x2e, 0x2f + 1),
- range(0x3a, 0x40 + 1),
- range(0x5b, 0x60 + 1),
- range(0x7b, 0x7f + 1))]
- def prepare(self, string):
- result = []
- labels = idna.dots.split(string)
- if labels and len(labels[-1]) == 0:
- trailing_dot = u'.'
- del labels[-1]
- else:
- trailing_dot = u''
- for label in labels:
- result.append(self.nameprep(label))
- return u".".join(result) + trailing_dot
- def check_prohibiteds(self, string):
- for c in string:
- if c in self.prohibiteds:
- raise UnicodeError("Invalid character %s" % repr(c))
- def nameprep(self, label):
- label = idna.nameprep(label)
- self.check_prohibiteds(label)
- if label[0] == u'-':
- raise UnicodeError("Invalid leading hyphen-minus")
- if label[-1] == u'-':
- raise UnicodeError("Invalid trailing hyphen-minus")
- return label
- C_11 = LookupTableFromFunction(stringprep.in_table_c11)
- C_12 = LookupTableFromFunction(stringprep.in_table_c12)
- C_21 = LookupTableFromFunction(stringprep.in_table_c21)
- C_22 = LookupTableFromFunction(stringprep.in_table_c22)
- C_3 = LookupTableFromFunction(stringprep.in_table_c3)
- C_4 = LookupTableFromFunction(stringprep.in_table_c4)
- C_5 = LookupTableFromFunction(stringprep.in_table_c5)
- C_6 = LookupTableFromFunction(stringprep.in_table_c6)
- C_7 = LookupTableFromFunction(stringprep.in_table_c7)
- C_8 = LookupTableFromFunction(stringprep.in_table_c8)
- C_9 = LookupTableFromFunction(stringprep.in_table_c9)
- B_1 = EmptyMappingTable(stringprep.in_table_b1)
- B_2 = MappingTableFromFunction(stringprep.map_table_b2)
- nodeprep = Profile(mappings=[B_1, B_2],
- prohibiteds=[C_11, C_12, C_21, C_22,
- C_3, C_4, C_5, C_6, C_7, C_8, C_9,
- LookupTable([u'"', u'&', u"'", u'/',
- u':', u'<', u'>', u'@'])])
- resourceprep = Profile(mappings=[B_1,],
- prohibiteds=[C_12, C_21, C_22,
- C_3, C_4, C_5, C_6, C_7, C_8, C_9])
- nameprep = NamePrep()
|