parseresult.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. # -*- coding: utf-8 -*-
  2. # Copyright (c) 2015 Ian Cordasco
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
  12. # implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. from collections import namedtuple
  16. from . import compat
  17. from . import exceptions
  18. from . import normalizers
  19. from . import uri
  20. __all__ = ('ParseResult', 'ParseResultBytes')
  21. PARSED_COMPONENTS = ('scheme', 'userinfo', 'host', 'port', 'path', 'query',
  22. 'fragment')
  23. class ParseResultMixin(object):
  24. def _generate_authority(self, attributes):
  25. # I swear I did not align the comparisons below. That's just how they
  26. # happened to align based on pep8 and attribute lengths.
  27. userinfo, host, port = (attributes[p]
  28. for p in ('userinfo', 'host', 'port'))
  29. if (self.userinfo != userinfo or
  30. self.host != host or
  31. self.port != port):
  32. if port:
  33. port = '{0}'.format(port)
  34. return normalizers.normalize_authority(
  35. (compat.to_str(userinfo, self.encoding),
  36. compat.to_str(host, self.encoding),
  37. port)
  38. )
  39. return self.authority
  40. def geturl(self):
  41. """Standard library shim to the unsplit method."""
  42. return self.unsplit()
  43. @property
  44. def hostname(self):
  45. """Standard library shim for the host portion of the URI."""
  46. return self.host
  47. @property
  48. def netloc(self):
  49. """Standard library shim for the authority portion of the URI."""
  50. return self.authority
  51. @property
  52. def params(self):
  53. """Standard library shim for the query portion of the URI."""
  54. return self.query
  55. class ParseResult(namedtuple('ParseResult', PARSED_COMPONENTS),
  56. ParseResultMixin):
  57. slots = ()
  58. def __new__(cls, scheme, userinfo, host, port, path, query, fragment,
  59. uri_ref, encoding='utf-8'):
  60. parse_result = super(ParseResult, cls).__new__(
  61. cls,
  62. scheme or None,
  63. userinfo or None,
  64. host,
  65. port or None,
  66. path or None,
  67. query or None,
  68. fragment or None)
  69. parse_result.encoding = encoding
  70. parse_result.reference = uri_ref
  71. return parse_result
  72. @classmethod
  73. def from_string(cls, uri_string, encoding='utf-8', strict=True):
  74. """Parse a URI from the given unicode URI string.
  75. :param str uri_string: Unicode URI to be parsed into a reference.
  76. :param str encoding: The encoding of the string provided
  77. :param bool strict: Parse strictly according to :rfc:`3986` if True.
  78. If False, parse similarly to the standard library's urlparse
  79. function.
  80. :returns: :class:`ParseResult` or subclass thereof
  81. """
  82. reference = uri.URIReference.from_string(uri_string, encoding)
  83. try:
  84. subauthority = reference.authority_info()
  85. except exceptions.InvalidAuthority:
  86. if strict:
  87. raise
  88. userinfo, host, port = split_authority(reference.authority)
  89. else:
  90. # Thanks to Richard Barrell for this idea:
  91. # https://twitter.com/0x2ba22e11/status/617338811975139328
  92. userinfo, host, port = (subauthority.get(p)
  93. for p in ('userinfo', 'host', 'port'))
  94. if port:
  95. try:
  96. port = int(port)
  97. except ValueError:
  98. raise exceptions.InvalidPort(port)
  99. return cls(scheme=reference.scheme,
  100. userinfo=userinfo,
  101. host=host,
  102. port=port,
  103. path=reference.path,
  104. query=reference.query,
  105. fragment=reference.fragment,
  106. uri_ref=reference,
  107. encoding=encoding)
  108. @property
  109. def authority(self):
  110. """Normalized authority generated from the subauthority parts."""
  111. return self.reference.authority
  112. def copy_with(self, scheme=None, userinfo=None, host=None, port=None,
  113. path=None, query=None, fragment=None):
  114. attributes = zip(PARSED_COMPONENTS,
  115. (scheme, userinfo, host, port, path, query, fragment))
  116. attrs_dict = {}
  117. for name, value in attributes:
  118. if value is None:
  119. value = getattr(self, name)
  120. attrs_dict[name] = value
  121. authority = self._generate_authority(attrs_dict)
  122. ref = self.reference.copy_with(scheme=attrs_dict['scheme'],
  123. authority=authority,
  124. path=attrs_dict['path'],
  125. query=attrs_dict['query'],
  126. fragment=attrs_dict['fragment'])
  127. return ParseResult(uri_ref=ref, encoding=self.encoding, **attrs_dict)
  128. def encode(self, encoding=None):
  129. encoding = encoding or self.encoding
  130. attrs = dict(
  131. zip(PARSED_COMPONENTS,
  132. (attr.encode(encoding) if hasattr(attr, 'encode') else attr
  133. for attr in self)))
  134. return ParseResultBytes(
  135. uri_ref=self.reference,
  136. encoding=encoding,
  137. **attrs
  138. )
  139. def unsplit(self, use_idna=False):
  140. """Create a URI string from the components.
  141. :returns: The parsed URI reconstituted as a string.
  142. :rtype: str
  143. """
  144. parse_result = self
  145. if use_idna and self.host:
  146. hostbytes = self.host.encode('idna')
  147. host = hostbytes.decode(self.encoding)
  148. parse_result = self.copy_with(host=host)
  149. return parse_result.reference.unsplit()
  150. class ParseResultBytes(namedtuple('ParseResultBytes', PARSED_COMPONENTS),
  151. ParseResultMixin):
  152. def __new__(cls, scheme, userinfo, host, port, path, query, fragment,
  153. uri_ref, encoding='utf-8'):
  154. parse_result = super(ParseResultBytes, cls).__new__(
  155. cls,
  156. scheme or None,
  157. userinfo or None,
  158. host,
  159. port or None,
  160. path or None,
  161. query or None,
  162. fragment or None)
  163. parse_result.encoding = encoding
  164. parse_result.reference = uri_ref
  165. return parse_result
  166. @classmethod
  167. def from_string(cls, uri_string, encoding='utf-8', strict=True):
  168. """Parse a URI from the given unicode URI string.
  169. :param str uri_string: Unicode URI to be parsed into a reference.
  170. :param str encoding: The encoding of the string provided
  171. :param bool strict: Parse strictly according to :rfc:`3986` if True.
  172. If False, parse similarly to the standard library's urlparse
  173. function.
  174. :returns: :class:`ParseResultBytes` or subclass thereof
  175. """
  176. reference = uri.URIReference.from_string(uri_string, encoding)
  177. try:
  178. subauthority = reference.authority_info()
  179. except exceptions.InvalidAuthority:
  180. if strict:
  181. raise
  182. userinfo, host, port = split_authority(reference.authority)
  183. else:
  184. # Thanks to Richard Barrell for this idea:
  185. # https://twitter.com/0x2ba22e11/status/617338811975139328
  186. userinfo, host, port = (subauthority.get(p)
  187. for p in ('userinfo', 'host', 'port'))
  188. if port:
  189. try:
  190. port = int(port)
  191. except ValueError:
  192. raise exceptions.InvalidPort(port)
  193. to_bytes = compat.to_bytes
  194. return cls(scheme=to_bytes(reference.scheme, encoding),
  195. userinfo=to_bytes(userinfo, encoding),
  196. host=to_bytes(host, encoding),
  197. port=port,
  198. path=to_bytes(reference.path, encoding),
  199. query=to_bytes(reference.query, encoding),
  200. fragment=to_bytes(reference.fragment, encoding),
  201. uri_ref=reference,
  202. encoding=encoding)
  203. @property
  204. def authority(self):
  205. """Normalized authority generated from the subauthority parts."""
  206. return self.reference.authority.encode(self.encoding)
  207. def copy_with(self, scheme=None, userinfo=None, host=None, port=None,
  208. path=None, query=None, fragment=None):
  209. attributes = zip(PARSED_COMPONENTS,
  210. (scheme, userinfo, host, port, path, query, fragment))
  211. attrs_dict = {}
  212. for name, value in attributes:
  213. if value is None:
  214. value = getattr(self, name)
  215. if not isinstance(value, bytes) and hasattr(value, 'encode'):
  216. value = value.encode(self.encoding)
  217. attrs_dict[name] = value
  218. authority = self._generate_authority(attrs_dict)
  219. to_str = compat.to_str
  220. ref = self.reference.copy_with(
  221. scheme=to_str(attrs_dict['scheme'], self.encoding),
  222. authority=authority,
  223. path=to_str(attrs_dict['path'], self.encoding),
  224. query=to_str(attrs_dict['query'], self.encoding),
  225. fragment=to_str(attrs_dict['fragment'], self.encoding)
  226. )
  227. return ParseResultBytes(
  228. uri_ref=ref,
  229. encoding=self.encoding,
  230. **attrs_dict
  231. )
  232. def unsplit(self, use_idna=False):
  233. """Create a URI bytes object from the components.
  234. :returns: The parsed URI reconstituted as a string.
  235. :rtype: bytes
  236. """
  237. parse_result = self
  238. if use_idna and self.host:
  239. # self.host is bytes, to encode to idna, we need to decode it
  240. # first
  241. host = self.host.decode(self.encoding)
  242. hostbytes = host.encode('idna')
  243. parse_result = self.copy_with(host=hostbytes)
  244. uri = parse_result.reference.unsplit()
  245. return uri.encode(self.encoding)
  246. def split_authority(authority):
  247. # Initialize our expected return values
  248. userinfo = host = port = None
  249. # Initialize an extra var we may need to use
  250. extra_host = None
  251. # Set-up rest in case there is no userinfo portion
  252. rest = authority
  253. if '@' in authority:
  254. userinfo, rest = authority.rsplit('@', 1)
  255. # Handle IPv6 host addresses
  256. if rest.startswith('['):
  257. host, rest = rest.split(']', 1)
  258. host += ']'
  259. if ':' in rest:
  260. extra_host, port = rest.split(':', 1)
  261. elif not host and rest:
  262. host = rest
  263. if extra_host and not host:
  264. host = extra_host
  265. return userinfo, host, port