uri.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. # -*- coding: utf-8 -*-
  2. # Copyright (c) 2014 Rackspace
  3. # Copyright (c) 2015 Ian Cordasco
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
  13. # implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. from collections import namedtuple
  17. from .compat import to_str
  18. from .exceptions import InvalidAuthority, ResolutionError
  19. from .misc import (
  20. ABSOLUTE_URI_MATCHER, FRAGMENT_MATCHER, IPv4_MATCHER, PATH_MATCHER,
  21. QUERY_MATCHER, SCHEME_MATCHER, SUBAUTHORITY_MATCHER, URI_MATCHER,
  22. URI_COMPONENTS, merge_paths
  23. )
  24. from .normalizers import (
  25. encode_component, normalize_scheme, normalize_authority, normalize_path,
  26. normalize_query, normalize_fragment
  27. )
  28. class URIReference(namedtuple('URIReference', URI_COMPONENTS)):
  29. slots = ()
  30. def __new__(cls, scheme, authority, path, query, fragment,
  31. encoding='utf-8'):
  32. ref = super(URIReference, cls).__new__(
  33. cls,
  34. scheme or None,
  35. authority or None,
  36. path or None,
  37. query or None,
  38. fragment or None)
  39. ref.encoding = encoding
  40. return ref
  41. def __eq__(self, other):
  42. other_ref = other
  43. if isinstance(other, tuple):
  44. other_ref = URIReference(*other)
  45. elif not isinstance(other, URIReference):
  46. try:
  47. other_ref = URIReference.from_string(other)
  48. except TypeError:
  49. raise TypeError(
  50. 'Unable to compare URIReference() to {0}()'.format(
  51. type(other).__name__))
  52. # See http://tools.ietf.org/html/rfc3986#section-6.2
  53. naive_equality = tuple(self) == tuple(other_ref)
  54. return naive_equality or self.normalized_equality(other_ref)
  55. @classmethod
  56. def from_string(cls, uri_string, encoding='utf-8'):
  57. """Parse a URI reference from the given unicode URI string.
  58. :param str uri_string: Unicode URI to be parsed into a reference.
  59. :param str encoding: The encoding of the string provided
  60. :returns: :class:`URIReference` or subclass thereof
  61. """
  62. uri_string = to_str(uri_string, encoding)
  63. split_uri = URI_MATCHER.match(uri_string).groupdict()
  64. return cls(split_uri['scheme'], split_uri['authority'],
  65. encode_component(split_uri['path'], encoding),
  66. encode_component(split_uri['query'], encoding),
  67. encode_component(split_uri['fragment'], encoding), encoding)
  68. def authority_info(self):
  69. """Returns a dictionary with the ``userinfo``, ``host``, and ``port``.
  70. If the authority is not valid, it will raise a ``InvalidAuthority``
  71. Exception.
  72. :returns:
  73. ``{'userinfo': 'username:password', 'host': 'www.example.com',
  74. 'port': '80'}``
  75. :rtype: dict
  76. :raises InvalidAuthority: If the authority is not ``None`` and can not
  77. be parsed.
  78. """
  79. if not self.authority:
  80. return {'userinfo': None, 'host': None, 'port': None}
  81. match = SUBAUTHORITY_MATCHER.match(self.authority)
  82. if match is None:
  83. # In this case, we have an authority that was parsed from the URI
  84. # Reference, but it cannot be further parsed by our
  85. # SUBAUTHORITY_MATCHER. In this case it must not be a valid
  86. # authority.
  87. raise InvalidAuthority(self.authority.encode(self.encoding))
  88. # We had a match, now let's ensure that it is actually a valid host
  89. # address if it is IPv4
  90. matches = match.groupdict()
  91. host = matches.get('host')
  92. if (host and IPv4_MATCHER.match(host) and not
  93. valid_ipv4_host_address(host)):
  94. # If we have a host, it appears to be IPv4 and it does not have
  95. # valid bytes, it is an InvalidAuthority.
  96. raise InvalidAuthority(self.authority.encode(self.encoding))
  97. return matches
  98. @property
  99. def host(self):
  100. """If present, a string representing the host."""
  101. try:
  102. authority = self.authority_info()
  103. except InvalidAuthority:
  104. return None
  105. return authority['host']
  106. @property
  107. def port(self):
  108. """If present, the port (as a string) extracted from the authority."""
  109. try:
  110. authority = self.authority_info()
  111. except InvalidAuthority:
  112. return None
  113. return authority['port']
  114. @property
  115. def userinfo(self):
  116. """If present, the userinfo extracted from the authority."""
  117. try:
  118. authority = self.authority_info()
  119. except InvalidAuthority:
  120. return None
  121. return authority['userinfo']
  122. def is_absolute(self):
  123. """Determine if this URI Reference is an absolute URI.
  124. See http://tools.ietf.org/html/rfc3986#section-4.3 for explanation.
  125. :returns: ``True`` if it is an absolute URI, ``False`` otherwise.
  126. :rtype: bool
  127. """
  128. return bool(ABSOLUTE_URI_MATCHER.match(self.unsplit()))
  129. def is_valid(self, **kwargs):
  130. """Determines if the URI is valid.
  131. :param bool require_scheme: Set to ``True`` if you wish to require the
  132. presence of the scheme component.
  133. :param bool require_authority: Set to ``True`` if you wish to require
  134. the presence of the authority component.
  135. :param bool require_path: Set to ``True`` if you wish to require the
  136. presence of the path component.
  137. :param bool require_query: Set to ``True`` if you wish to require the
  138. presence of the query component.
  139. :param bool require_fragment: Set to ``True`` if you wish to require
  140. the presence of the fragment component.
  141. :returns: ``True`` if the URI is valid. ``False`` otherwise.
  142. :rtype: bool
  143. """
  144. validators = [
  145. (self.scheme_is_valid, kwargs.get('require_scheme', False)),
  146. (self.authority_is_valid, kwargs.get('require_authority', False)),
  147. (self.path_is_valid, kwargs.get('require_path', False)),
  148. (self.query_is_valid, kwargs.get('require_query', False)),
  149. (self.fragment_is_valid, kwargs.get('require_fragment', False)),
  150. ]
  151. return all(v(r) for v, r in validators)
  152. def _is_valid(self, value, matcher, require):
  153. if require:
  154. return (value is not None
  155. and matcher.match(value))
  156. # require is False and value is not None
  157. return value is None or matcher.match(value)
  158. def authority_is_valid(self, require=False):
  159. """Determines if the authority component is valid.
  160. :param str require: Set to ``True`` to require the presence of this
  161. component.
  162. :returns: ``True`` if the authority is valid. ``False`` otherwise.
  163. :rtype: bool
  164. """
  165. try:
  166. self.authority_info()
  167. except InvalidAuthority:
  168. return False
  169. is_valid = self._is_valid(self.authority,
  170. SUBAUTHORITY_MATCHER,
  171. require)
  172. # Ensure that IPv4 addresses have valid bytes
  173. if is_valid and self.host and IPv4_MATCHER.match(self.host):
  174. return valid_ipv4_host_address(self.host)
  175. # Perhaps the host didn't exist or if it did, it wasn't an IPv4-like
  176. # address. In either case, we want to rely on the `_is_valid` check,
  177. # so let's return that.
  178. return is_valid
  179. def scheme_is_valid(self, require=False):
  180. """Determines if the scheme component is valid.
  181. :param str require: Set to ``True`` to require the presence of this
  182. component.
  183. :returns: ``True`` if the scheme is valid. ``False`` otherwise.
  184. :rtype: bool
  185. """
  186. return self._is_valid(self.scheme, SCHEME_MATCHER, require)
  187. def path_is_valid(self, require=False):
  188. """Determines if the path component is valid.
  189. :param str require: Set to ``True`` to require the presence of this
  190. component.
  191. :returns: ``True`` if the path is valid. ``False`` otherwise.
  192. :rtype: bool
  193. """
  194. return self._is_valid(self.path, PATH_MATCHER, require)
  195. def query_is_valid(self, require=False):
  196. """Determines if the query component is valid.
  197. :param str require: Set to ``True`` to require the presence of this
  198. component.
  199. :returns: ``True`` if the query is valid. ``False`` otherwise.
  200. :rtype: bool
  201. """
  202. return self._is_valid(self.query, QUERY_MATCHER, require)
  203. def fragment_is_valid(self, require=False):
  204. """Determines if the fragment component is valid.
  205. :param str require: Set to ``True`` to require the presence of this
  206. component.
  207. :returns: ``True`` if the fragment is valid. ``False`` otherwise.
  208. :rtype: bool
  209. """
  210. return self._is_valid(self.fragment, FRAGMENT_MATCHER, require)
  211. def normalize(self):
  212. """Normalize this reference as described in Section 6.2.2
  213. This is not an in-place normalization. Instead this creates a new
  214. URIReference.
  215. :returns: A new reference object with normalized components.
  216. :rtype: URIReference
  217. """
  218. # See http://tools.ietf.org/html/rfc3986#section-6.2.2 for logic in
  219. # this method.
  220. return URIReference(normalize_scheme(self.scheme or ''),
  221. normalize_authority(
  222. (self.userinfo, self.host, self.port)),
  223. normalize_path(self.path or ''),
  224. normalize_query(self.query or ''),
  225. normalize_fragment(self.fragment or ''))
  226. def normalized_equality(self, other_ref):
  227. """Compare this URIReference to another URIReference.
  228. :param URIReference other_ref: (required), The reference with which
  229. we're comparing.
  230. :returns: ``True`` if the references are equal, ``False`` otherwise.
  231. :rtype: bool
  232. """
  233. return tuple(self.normalize()) == tuple(other_ref.normalize())
  234. def resolve_with(self, base_uri, strict=False):
  235. """Use an absolute URI Reference to resolve this relative reference.
  236. Assuming this is a relative reference that you would like to resolve,
  237. use the provided base URI to resolve it.
  238. See http://tools.ietf.org/html/rfc3986#section-5 for more information.
  239. :param base_uri: Either a string or URIReference. It must be an
  240. absolute URI or it will raise an exception.
  241. :returns: A new URIReference which is the result of resolving this
  242. reference using ``base_uri``.
  243. :rtype: :class:`URIReference`
  244. :raises ResolutionError: If the ``base_uri`` is not an absolute URI.
  245. """
  246. if not isinstance(base_uri, URIReference):
  247. base_uri = URIReference.from_string(base_uri)
  248. if not base_uri.is_absolute():
  249. raise ResolutionError(base_uri)
  250. # This is optional per
  251. # http://tools.ietf.org/html/rfc3986#section-5.2.1
  252. base_uri = base_uri.normalize()
  253. # The reference we're resolving
  254. resolving = self
  255. if not strict and resolving.scheme == base_uri.scheme:
  256. resolving = resolving.copy_with(scheme=None)
  257. # http://tools.ietf.org/html/rfc3986#page-32
  258. if resolving.scheme is not None:
  259. target = resolving.copy_with(path=normalize_path(resolving.path))
  260. else:
  261. if resolving.authority is not None:
  262. target = resolving.copy_with(
  263. scheme=base_uri.scheme,
  264. path=normalize_path(resolving.path)
  265. )
  266. else:
  267. if resolving.path is None:
  268. if resolving.query is not None:
  269. query = resolving.query
  270. else:
  271. query = base_uri.query
  272. target = resolving.copy_with(
  273. scheme=base_uri.scheme,
  274. authority=base_uri.authority,
  275. path=base_uri.path,
  276. query=query
  277. )
  278. else:
  279. if resolving.path.startswith('/'):
  280. path = normalize_path(resolving.path)
  281. else:
  282. path = normalize_path(
  283. merge_paths(base_uri, resolving.path)
  284. )
  285. target = resolving.copy_with(
  286. scheme=base_uri.scheme,
  287. authority=base_uri.authority,
  288. path=path,
  289. query=resolving.query
  290. )
  291. return target
  292. def unsplit(self):
  293. """Create a URI string from the components.
  294. :returns: The URI Reference reconstituted as a string.
  295. :rtype: str
  296. """
  297. # See http://tools.ietf.org/html/rfc3986#section-5.3
  298. result_list = []
  299. if self.scheme:
  300. result_list.extend([self.scheme, ':'])
  301. if self.authority:
  302. result_list.extend(['//', self.authority])
  303. if self.path:
  304. result_list.append(self.path)
  305. if self.query:
  306. result_list.extend(['?', self.query])
  307. if self.fragment:
  308. result_list.extend(['#', self.fragment])
  309. return ''.join(result_list)
  310. def copy_with(self, scheme=None, authority=None, path=None, query=None,
  311. fragment=None):
  312. attributes = {
  313. 'scheme': scheme,
  314. 'authority': authority,
  315. 'path': path,
  316. 'query': query,
  317. 'fragment': fragment,
  318. }
  319. for key, value in list(attributes.items()):
  320. if value is None:
  321. del attributes[key]
  322. return self._replace(**attributes)
  323. def valid_ipv4_host_address(host):
  324. # If the host exists, and it might be IPv4, check each byte in the
  325. # address.
  326. return all([0 <= int(byte, base=10) <= 255 for byte in host.split('.')])