uri_parser.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. # Copyright 2011-present MongoDB, Inc.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you
  4. # may not use this file except in compliance with the License. You
  5. # may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
  12. # implied. See the License for the specific language governing
  13. # permissions and limitations under the License.
  14. """Tools to parse and validate a MongoDB URI."""
  15. import re
  16. import warnings
  17. import sys
  18. from bson.py3compat import string_type, PY3
  19. if PY3:
  20. from urllib.parse import unquote_plus
  21. else:
  22. from urllib import unquote_plus
  23. from pymongo.common import (
  24. get_validated_options, INTERNAL_URI_OPTION_NAME_MAP,
  25. URI_OPTIONS_DEPRECATION_MAP, _CaseInsensitiveDictionary)
  26. from pymongo.errors import ConfigurationError, InvalidURI
  27. from pymongo.srv_resolver import _HAVE_DNSPYTHON, _SrvResolver
  28. SCHEME = 'mongodb://'
  29. SCHEME_LEN = len(SCHEME)
  30. SRV_SCHEME = 'mongodb+srv://'
  31. SRV_SCHEME_LEN = len(SRV_SCHEME)
  32. DEFAULT_PORT = 27017
  33. def parse_userinfo(userinfo):
  34. """Validates the format of user information in a MongoDB URI.
  35. Reserved characters like ':', '/', '+' and '@' must be escaped
  36. following RFC 3986.
  37. Returns a 2-tuple containing the unescaped username followed
  38. by the unescaped password.
  39. :Paramaters:
  40. - `userinfo`: A string of the form <username>:<password>
  41. .. versionchanged:: 2.2
  42. Now uses `urllib.unquote_plus` so `+` characters must be escaped.
  43. """
  44. if '@' in userinfo or userinfo.count(':') > 1:
  45. if PY3:
  46. quote_fn = "urllib.parse.quote_plus"
  47. else:
  48. quote_fn = "urllib.quote_plus"
  49. raise InvalidURI("Username and password must be escaped according to "
  50. "RFC 3986, use %s()." % quote_fn)
  51. user, _, passwd = userinfo.partition(":")
  52. # No password is expected with GSSAPI authentication.
  53. if not user:
  54. raise InvalidURI("The empty string is not valid username.")
  55. return unquote_plus(user), unquote_plus(passwd)
  56. def parse_ipv6_literal_host(entity, default_port):
  57. """Validates an IPv6 literal host:port string.
  58. Returns a 2-tuple of IPv6 literal followed by port where
  59. port is default_port if it wasn't specified in entity.
  60. :Parameters:
  61. - `entity`: A string that represents an IPv6 literal enclosed
  62. in braces (e.g. '[::1]' or '[::1]:27017').
  63. - `default_port`: The port number to use when one wasn't
  64. specified in entity.
  65. """
  66. if entity.find(']') == -1:
  67. raise ValueError("an IPv6 address literal must be "
  68. "enclosed in '[' and ']' according "
  69. "to RFC 2732.")
  70. i = entity.find(']:')
  71. if i == -1:
  72. return entity[1:-1], default_port
  73. return entity[1: i], entity[i + 2:]
  74. def parse_host(entity, default_port=DEFAULT_PORT):
  75. """Validates a host string
  76. Returns a 2-tuple of host followed by port where port is default_port
  77. if it wasn't specified in the string.
  78. :Parameters:
  79. - `entity`: A host or host:port string where host could be a
  80. hostname or IP address.
  81. - `default_port`: The port number to use when one wasn't
  82. specified in entity.
  83. """
  84. host = entity
  85. port = default_port
  86. if entity[0] == '[':
  87. host, port = parse_ipv6_literal_host(entity, default_port)
  88. elif entity.endswith(".sock"):
  89. return entity, default_port
  90. elif entity.find(':') != -1:
  91. if entity.count(':') > 1:
  92. raise ValueError("Reserved characters such as ':' must be "
  93. "escaped according RFC 2396. An IPv6 "
  94. "address literal must be enclosed in '[' "
  95. "and ']' according to RFC 2732.")
  96. host, port = host.split(':', 1)
  97. if isinstance(port, string_type):
  98. if not port.isdigit() or int(port) > 65535 or int(port) <= 0:
  99. raise ValueError("Port must be an integer between 0 and 65535: %s"
  100. % (port,))
  101. port = int(port)
  102. # Normalize hostname to lowercase, since DNS is case-insensitive:
  103. # http://tools.ietf.org/html/rfc4343
  104. # This prevents useless rediscovery if "foo.com" is in the seed list but
  105. # "FOO.com" is in the hello response.
  106. return host.lower(), port
  107. # Options whose values are implicitly determined by tlsInsecure.
  108. _IMPLICIT_TLSINSECURE_OPTS = {
  109. "tlsallowinvalidcertificates",
  110. "tlsallowinvalidhostnames",
  111. "tlsdisableocspendpointcheck",}
  112. # Options that cannot be specified when tlsInsecure is also specified.
  113. _TLSINSECURE_EXCLUDE_OPTS = (
  114. {k for k in _IMPLICIT_TLSINSECURE_OPTS} |
  115. {INTERNAL_URI_OPTION_NAME_MAP[k] for k in _IMPLICIT_TLSINSECURE_OPTS})
  116. def _parse_options(opts, delim):
  117. """Helper method for split_options which creates the options dict.
  118. Also handles the creation of a list for the URI tag_sets/
  119. readpreferencetags portion, and the use of a unicode options string."""
  120. options = _CaseInsensitiveDictionary()
  121. for uriopt in opts.split(delim):
  122. key, value = uriopt.split("=")
  123. if key.lower() == 'readpreferencetags':
  124. options.setdefault(key, []).append(value)
  125. else:
  126. if key in options:
  127. warnings.warn("Duplicate URI option '%s'." % (key,))
  128. if key.lower() == 'authmechanismproperties':
  129. val = value
  130. else:
  131. val = unquote_plus(value)
  132. options[key] = val
  133. return options
  134. def _handle_security_options(options):
  135. """Raise appropriate errors when conflicting TLS options are present in
  136. the options dictionary.
  137. :Parameters:
  138. - `options`: Instance of _CaseInsensitiveDictionary containing
  139. MongoDB URI options.
  140. """
  141. tlsinsecure = options.get('tlsinsecure')
  142. if tlsinsecure is not None:
  143. for opt in _TLSINSECURE_EXCLUDE_OPTS:
  144. if opt in options:
  145. err_msg = ("URI options %s and %s cannot be specified "
  146. "simultaneously.")
  147. raise InvalidURI(err_msg % (
  148. options.cased_key('tlsinsecure'), options.cased_key(opt)))
  149. # Convenience function to retrieve option values based on public or private names.
  150. def _getopt(opt):
  151. return (options.get(opt) or
  152. options.get(INTERNAL_URI_OPTION_NAME_MAP[opt]))
  153. # Handle co-occurence of OCSP & tlsAllowInvalidCertificates options.
  154. tlsallowinvalidcerts = _getopt('tlsallowinvalidcertificates')
  155. if tlsallowinvalidcerts is not None:
  156. if 'tlsdisableocspendpointcheck' in options:
  157. err_msg = ("URI options %s and %s cannot be specified "
  158. "simultaneously.")
  159. raise InvalidURI(err_msg % (
  160. 'tlsallowinvalidcertificates', options.cased_key(
  161. 'tlsdisableocspendpointcheck')))
  162. if tlsallowinvalidcerts is True:
  163. options['tlsdisableocspendpointcheck'] = True
  164. # Handle co-occurence of CRL and OCSP-related options.
  165. tlscrlfile = _getopt('tlscrlfile')
  166. if tlscrlfile is not None:
  167. for opt in ('tlsinsecure', 'tlsallowinvalidcertificates',
  168. 'tlsdisableocspendpointcheck'):
  169. if options.get(opt) is True:
  170. err_msg = ("URI option %s=True cannot be specified when "
  171. "CRL checking is enabled.")
  172. raise InvalidURI(err_msg % (opt,))
  173. if 'ssl' in options and 'tls' in options:
  174. def truth_value(val):
  175. if val in ('true', 'false'):
  176. return val == 'true'
  177. if isinstance(val, bool):
  178. return val
  179. return val
  180. if truth_value(options.get('ssl')) != truth_value(options.get('tls')):
  181. err_msg = ("Can not specify conflicting values for URI options %s "
  182. "and %s.")
  183. raise InvalidURI(err_msg % (
  184. options.cased_key('ssl'), options.cased_key('tls')))
  185. return options
  186. def _handle_option_deprecations(options):
  187. """Issue appropriate warnings when deprecated options are present in the
  188. options dictionary. Removes deprecated option key, value pairs if the
  189. options dictionary is found to also have the renamed option.
  190. :Parameters:
  191. - `options`: Instance of _CaseInsensitiveDictionary containing
  192. MongoDB URI options.
  193. """
  194. for optname in list(options):
  195. if optname in URI_OPTIONS_DEPRECATION_MAP:
  196. mode, message = URI_OPTIONS_DEPRECATION_MAP[optname]
  197. if mode == 'renamed':
  198. newoptname = message
  199. if newoptname in options:
  200. warn_msg = ("Deprecated option '%s' ignored in favor of "
  201. "'%s'.")
  202. warnings.warn(
  203. warn_msg % (options.cased_key(optname),
  204. options.cased_key(newoptname)),
  205. DeprecationWarning, stacklevel=2)
  206. options.pop(optname)
  207. continue
  208. warn_msg = "Option '%s' is deprecated, use '%s' instead."
  209. warnings.warn(
  210. warn_msg % (options.cased_key(optname), newoptname),
  211. DeprecationWarning, stacklevel=2)
  212. elif mode == 'removed':
  213. warn_msg = "Option '%s' is deprecated. %s."
  214. warnings.warn(
  215. warn_msg % (options.cased_key(optname), message),
  216. DeprecationWarning, stacklevel=2)
  217. return options
  218. def _normalize_options(options):
  219. """Normalizes option names in the options dictionary by converting them to
  220. their internally-used names. Also handles use of the tlsInsecure option.
  221. :Parameters:
  222. - `options`: Instance of _CaseInsensitiveDictionary containing
  223. MongoDB URI options.
  224. """
  225. tlsinsecure = options.get('tlsinsecure')
  226. if tlsinsecure is not None:
  227. for opt in _IMPLICIT_TLSINSECURE_OPTS:
  228. intname = INTERNAL_URI_OPTION_NAME_MAP[opt]
  229. # Internal options are logical inverse of public options.
  230. options[intname] = not tlsinsecure
  231. for optname in list(options):
  232. intname = INTERNAL_URI_OPTION_NAME_MAP.get(optname, None)
  233. if intname is not None:
  234. options[intname] = options.pop(optname)
  235. return options
  236. def validate_options(opts, warn=False):
  237. """Validates and normalizes options passed in a MongoDB URI.
  238. Returns a new dictionary of validated and normalized options. If warn is
  239. False then errors will be thrown for invalid options, otherwise they will
  240. be ignored and a warning will be issued.
  241. :Parameters:
  242. - `opts`: A dict of MongoDB URI options.
  243. - `warn` (optional): If ``True`` then warnings will be logged and
  244. invalid options will be ignored. Otherwise invalid options will
  245. cause errors.
  246. """
  247. return get_validated_options(opts, warn)
  248. def split_options(opts, validate=True, warn=False, normalize=True):
  249. """Takes the options portion of a MongoDB URI, validates each option
  250. and returns the options in a dictionary.
  251. :Parameters:
  252. - `opt`: A string representing MongoDB URI options.
  253. - `validate`: If ``True`` (the default), validate and normalize all
  254. options.
  255. - `warn`: If ``False`` (the default), suppress all warnings raised
  256. during validation of options.
  257. - `normalize`: If ``True`` (the default), renames all options to their
  258. internally-used names.
  259. """
  260. and_idx = opts.find("&")
  261. semi_idx = opts.find(";")
  262. try:
  263. if and_idx >= 0 and semi_idx >= 0:
  264. raise InvalidURI("Can not mix '&' and ';' for option separators.")
  265. elif and_idx >= 0:
  266. options = _parse_options(opts, "&")
  267. elif semi_idx >= 0:
  268. options = _parse_options(opts, ";")
  269. elif opts.find("=") != -1:
  270. options = _parse_options(opts, None)
  271. else:
  272. raise ValueError
  273. except ValueError:
  274. raise InvalidURI("MongoDB URI options are key=value pairs.")
  275. options = _handle_security_options(options)
  276. options = _handle_option_deprecations(options)
  277. if validate:
  278. options = validate_options(options, warn)
  279. if options.get('authsource') == '':
  280. raise InvalidURI(
  281. "the authSource database cannot be an empty string")
  282. if normalize:
  283. options = _normalize_options(options)
  284. return options
  285. def split_hosts(hosts, default_port=DEFAULT_PORT):
  286. """Takes a string of the form host1[:port],host2[:port]... and
  287. splits it into (host, port) tuples. If [:port] isn't present the
  288. default_port is used.
  289. Returns a set of 2-tuples containing the host name (or IP) followed by
  290. port number.
  291. :Parameters:
  292. - `hosts`: A string of the form host1[:port],host2[:port],...
  293. - `default_port`: The port number to use when one wasn't specified
  294. for a host.
  295. """
  296. nodes = []
  297. for entity in hosts.split(','):
  298. if not entity:
  299. raise ConfigurationError("Empty host "
  300. "(or extra comma in host list).")
  301. port = default_port
  302. # Unix socket entities don't have ports
  303. if entity.endswith('.sock'):
  304. port = None
  305. nodes.append(parse_host(entity, port))
  306. return nodes
  307. # Prohibited characters in database name. DB names also can't have ".", but for
  308. # backward-compat we allow "db.collection" in URI.
  309. _BAD_DB_CHARS = re.compile('[' + re.escape(r'/ "$') + ']')
  310. _ALLOWED_TXT_OPTS = frozenset(
  311. ['authsource', 'authSource', 'replicaset', 'replicaSet', 'loadbalanced',
  312. 'loadBalanced'])
  313. def _check_options(nodes, options):
  314. # Ensure directConnection was not True if there are multiple seeds.
  315. if len(nodes) > 1 and options.get('directconnection'):
  316. raise ConfigurationError(
  317. 'Cannot specify multiple hosts with directConnection=true')
  318. if options.get('loadbalanced'):
  319. if len(nodes) > 1:
  320. raise ConfigurationError(
  321. 'Cannot specify multiple hosts with loadBalanced=true')
  322. if options.get('directconnection'):
  323. raise ConfigurationError(
  324. 'Cannot specify directConnection=true with loadBalanced=true')
  325. if options.get('replicaset'):
  326. raise ConfigurationError(
  327. 'Cannot specify replicaSet with loadBalanced=true')
  328. def parse_uri(uri, default_port=DEFAULT_PORT, validate=True, warn=False,
  329. normalize=True, connect_timeout=None):
  330. """Parse and validate a MongoDB URI.
  331. Returns a dict of the form::
  332. {
  333. 'nodelist': <list of (host, port) tuples>,
  334. 'username': <username> or None,
  335. 'password': <password> or None,
  336. 'database': <database name> or None,
  337. 'collection': <collection name> or None,
  338. 'options': <dict of MongoDB URI options>,
  339. 'fqdn': <fqdn of the MongoDB+SRV URI> or None
  340. }
  341. If the URI scheme is "mongodb+srv://" DNS SRV and TXT lookups will be done
  342. to build nodelist and options.
  343. :Parameters:
  344. - `uri`: The MongoDB URI to parse.
  345. - `default_port`: The port number to use when one wasn't specified
  346. for a host in the URI.
  347. - `validate` (optional): If ``True`` (the default), validate and
  348. normalize all options. Default: ``True``.
  349. - `warn` (optional): When validating, if ``True`` then will warn
  350. the user then ignore any invalid options or values. If ``False``,
  351. validation will error when options are unsupported or values are
  352. invalid. Default: ``False``.
  353. - `normalize` (optional): If ``True``, convert names of URI options
  354. to their internally-used names. Default: ``True``.
  355. - `connect_timeout` (optional): The maximum time in milliseconds to
  356. wait for a response from the DNS server.
  357. .. versionchanged:: 3.9
  358. Added the ``normalize`` parameter.
  359. .. versionchanged:: 3.6
  360. Added support for mongodb+srv:// URIs.
  361. .. versionchanged:: 3.5
  362. Return the original value of the ``readPreference`` MongoDB URI option
  363. instead of the validated read preference mode.
  364. .. versionchanged:: 3.1
  365. ``warn`` added so invalid options can be ignored.
  366. """
  367. if uri.startswith(SCHEME):
  368. is_srv = False
  369. scheme_free = uri[SCHEME_LEN:]
  370. elif uri.startswith(SRV_SCHEME):
  371. if not _HAVE_DNSPYTHON:
  372. python_path = sys.executable or "python"
  373. raise ConfigurationError(
  374. 'The "dnspython" module must be '
  375. 'installed to use mongodb+srv:// URIs. '
  376. 'To fix this error install pymongo with the srv extra:\n '
  377. '%s -m pip install "pymongo[srv]"' % (python_path))
  378. is_srv = True
  379. scheme_free = uri[SRV_SCHEME_LEN:]
  380. else:
  381. raise InvalidURI("Invalid URI scheme: URI must "
  382. "begin with '%s' or '%s'" % (SCHEME, SRV_SCHEME))
  383. if not scheme_free:
  384. raise InvalidURI("Must provide at least one hostname or IP.")
  385. user = None
  386. passwd = None
  387. dbase = None
  388. collection = None
  389. options = _CaseInsensitiveDictionary()
  390. host_part, _, path_part = scheme_free.partition('/')
  391. if not host_part:
  392. host_part = path_part
  393. path_part = ""
  394. if not path_part and '?' in host_part:
  395. raise InvalidURI("A '/' is required between "
  396. "the host list and any options.")
  397. if path_part:
  398. dbase, _, opts = path_part.partition('?')
  399. if dbase:
  400. dbase = unquote_plus(dbase)
  401. if '.' in dbase:
  402. dbase, collection = dbase.split('.', 1)
  403. if _BAD_DB_CHARS.search(dbase):
  404. raise InvalidURI('Bad database name "%s"' % dbase)
  405. else:
  406. dbase = None
  407. if opts:
  408. options.update(split_options(opts, validate, warn, normalize))
  409. if '@' in host_part:
  410. userinfo, _, hosts = host_part.rpartition('@')
  411. user, passwd = parse_userinfo(userinfo)
  412. else:
  413. hosts = host_part
  414. if '/' in hosts:
  415. raise InvalidURI("Any '/' in a unix domain socket must be"
  416. " percent-encoded: %s" % host_part)
  417. hosts = unquote_plus(hosts)
  418. fqdn = None
  419. if is_srv:
  420. if options.get('directConnection'):
  421. raise ConfigurationError(
  422. "Cannot specify directConnection=true with "
  423. "%s URIs" % (SRV_SCHEME,))
  424. nodes = split_hosts(hosts, default_port=None)
  425. if len(nodes) != 1:
  426. raise InvalidURI(
  427. "%s URIs must include one, "
  428. "and only one, hostname" % (SRV_SCHEME,))
  429. fqdn, port = nodes[0]
  430. if port is not None:
  431. raise InvalidURI(
  432. "%s URIs must not include a port number" % (SRV_SCHEME,))
  433. # Use the connection timeout. connectTimeoutMS passed as a keyword
  434. # argument overrides the same option passed in the connection string.
  435. connect_timeout = connect_timeout or options.get("connectTimeoutMS")
  436. dns_resolver = _SrvResolver(fqdn, connect_timeout=connect_timeout)
  437. nodes = dns_resolver.get_hosts()
  438. dns_options = dns_resolver.get_options()
  439. if dns_options:
  440. parsed_dns_options = split_options(
  441. dns_options, validate, warn, normalize)
  442. if set(parsed_dns_options) - _ALLOWED_TXT_OPTS:
  443. raise ConfigurationError(
  444. "Only authSource, replicaSet, and loadBalanced are "
  445. "supported from DNS")
  446. for opt, val in parsed_dns_options.items():
  447. if opt not in options:
  448. options[opt] = val
  449. if "ssl" not in options:
  450. options["ssl"] = True if validate else 'true'
  451. else:
  452. nodes = split_hosts(hosts, default_port=default_port)
  453. _check_options(nodes, options)
  454. return {
  455. 'nodelist': nodes,
  456. 'username': user,
  457. 'password': passwd,
  458. 'database': dbase,
  459. 'collection': collection,
  460. 'options': options,
  461. 'fqdn': fqdn
  462. }
  463. if __name__ == '__main__':
  464. import pprint
  465. import sys
  466. try:
  467. pprint.pprint(parse_uri(sys.argv[1]))
  468. except InvalidURI as exc:
  469. print(exc)
  470. sys.exit(0)