trustroot.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. # -*- test-case-name: openid.test.test_rpverify -*-
  2. """
  3. This module contains the C{L{TrustRoot}} class, which helps handle
  4. trust root checking. This module is used by the
  5. C{L{openid.server.server}} module, but it is also available to server
  6. implementers who wish to use it for additional trust root checking.
  7. It also implements relying party return_to URL verification, based on
  8. the realm.
  9. """
  10. __all__ = [
  11. 'TrustRoot',
  12. 'RP_RETURN_TO_URL_TYPE',
  13. 'extractReturnToURLs',
  14. 'returnToMatches',
  15. 'verifyReturnTo',
  16. ]
  17. from openid import oidutil
  18. from openid import urinorm
  19. from openid.yadis import services
  20. from urlparse import urlparse, urlunparse
  21. import re
  22. ############################################
  23. _protocols = ['http', 'https']
  24. _top_level_domains = [
  25. 'ac', 'ad', 'ae', 'aero', 'af', 'ag', 'ai', 'al', 'am', 'an',
  26. 'ao', 'aq', 'ar', 'arpa', 'as', 'asia', 'at', 'au', 'aw',
  27. 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh', 'bi',
  28. 'biz', 'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv', 'bw',
  29. 'by', 'bz', 'ca', 'cat', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci',
  30. 'ck', 'cl', 'cm', 'cn', 'co', 'com', 'coop', 'cr', 'cu', 'cv',
  31. 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz', 'ec',
  32. 'edu', 'ee', 'eg', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk',
  33. 'fm', 'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh',
  34. 'gi', 'gl', 'gm', 'gn', 'gov', 'gp', 'gq', 'gr', 'gs', 'gt',
  35. 'gu', 'gw', 'gy', 'hk', 'hm', 'hn', 'hr', 'ht', 'hu', 'id',
  36. 'ie', 'il', 'im', 'in', 'info', 'int', 'io', 'iq', 'ir', 'is',
  37. 'it', 'je', 'jm', 'jo', 'jobs', 'jp', 'ke', 'kg', 'kh', 'ki',
  38. 'km', 'kn', 'kp', 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc',
  39. 'li', 'lk', 'lr', 'ls', 'lt', 'lu', 'lv', 'ly', 'ma', 'mc',
  40. 'md', 'me', 'mg', 'mh', 'mil', 'mk', 'ml', 'mm', 'mn', 'mo',
  41. 'mobi', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'museum', 'mv',
  42. 'mw', 'mx', 'my', 'mz', 'na', 'name', 'nc', 'ne', 'net', 'nf',
  43. 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu', 'nz', 'om', 'org',
  44. 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr',
  45. 'pro', 'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru',
  46. 'rw', 'sa', 'sb', 'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj',
  47. 'sk', 'sl', 'sm', 'sn', 'so', 'sr', 'st', 'su', 'sv', 'sy',
  48. 'sz', 'tc', 'td', 'tel', 'tf', 'tg', 'th', 'tj', 'tk', 'tl',
  49. 'tm', 'tn', 'to', 'tp', 'tr', 'travel', 'tt', 'tv', 'tw',
  50. 'tz', 'ua', 'ug', 'uk', 'us', 'uy', 'uz', 'va', 'vc', 've',
  51. 'vg', 'vi', 'vn', 'vu', 'wf', 'ws', 'xn--0zwm56d',
  52. 'xn--11b5bs3a9aj6g', 'xn--80akhbyknj4f', 'xn--9t4b11yi5a',
  53. 'xn--deba0ad', 'xn--g6w251d', 'xn--hgbk6aj7f53bba',
  54. 'xn--hlcj6aya9esc7a', 'xn--jxalpdlp', 'xn--kgbechtv',
  55. 'xn--zckzah', 'ye', 'yt', 'yu', 'za', 'zm', 'zw']
  56. # Build from RFC3986, section 3.2.2. Used to reject hosts with invalid
  57. # characters.
  58. host_segment_re = re.compile(
  59. r"(?:[-a-zA-Z0-9!$&'\(\)\*+,;=._~]|%[a-zA-Z0-9]{2})+$")
  60. class RealmVerificationRedirected(Exception):
  61. """Attempting to verify this realm resulted in a redirect.
  62. @since: 2.1.0
  63. """
  64. def __init__(self, relying_party_url, rp_url_after_redirects):
  65. self.relying_party_url = relying_party_url
  66. self.rp_url_after_redirects = rp_url_after_redirects
  67. def __str__(self):
  68. return ("Attempting to verify %r resulted in "
  69. "redirect to %r" %
  70. (self.relying_party_url,
  71. self.rp_url_after_redirects))
  72. def _parseURL(url):
  73. try:
  74. url = urinorm.urinorm(url)
  75. except ValueError:
  76. return None
  77. proto, netloc, path, params, query, frag = urlparse(url)
  78. if not path:
  79. # Python <2.4 does not parse URLs with no path properly
  80. if not query and '?' in netloc:
  81. netloc, query = netloc.split('?', 1)
  82. path = '/'
  83. path = urlunparse(('', '', path, params, query, frag))
  84. if ':' in netloc:
  85. try:
  86. host, port = netloc.split(':')
  87. except ValueError:
  88. return None
  89. if not re.match(r'\d+$', port):
  90. return None
  91. else:
  92. host = netloc
  93. port = ''
  94. host = host.lower()
  95. if not host_segment_re.match(host):
  96. return None
  97. return proto, host, port, path
  98. class TrustRoot(object):
  99. """
  100. This class represents an OpenID trust root. The C{L{parse}}
  101. classmethod accepts a trust root string, producing a
  102. C{L{TrustRoot}} object. The method OpenID server implementers
  103. would be most likely to use is the C{L{isSane}} method, which
  104. checks the trust root for given patterns that indicate that the
  105. trust root is too broad or points to a local network resource.
  106. @sort: parse, isSane
  107. """
  108. def __init__(self, unparsed, proto, wildcard, host, port, path):
  109. self.unparsed = unparsed
  110. self.proto = proto
  111. self.wildcard = wildcard
  112. self.host = host
  113. self.port = port
  114. self.path = path
  115. def isSane(self):
  116. """
  117. This method checks the to see if a trust root represents a
  118. reasonable (sane) set of URLs. 'http://*.com/', for example
  119. is not a reasonable pattern, as it cannot meaningfully specify
  120. the site claiming it. This function attempts to find many
  121. related examples, but it can only work via heuristics.
  122. Negative responses from this method should be treated as
  123. advisory, used only to alert the user to examine the trust
  124. root carefully.
  125. @return: Whether the trust root is sane
  126. @rtype: C{bool}
  127. """
  128. if self.host == 'localhost':
  129. return True
  130. host_parts = self.host.split('.')
  131. if self.wildcard:
  132. assert host_parts[0] == '', host_parts
  133. del host_parts[0]
  134. # If it's an absolute domain name, remove the empty string
  135. # from the end.
  136. if host_parts and not host_parts[-1]:
  137. del host_parts[-1]
  138. if not host_parts:
  139. return False
  140. # Do not allow adjacent dots
  141. if '' in host_parts:
  142. return False
  143. tld = host_parts[-1]
  144. if tld not in _top_level_domains:
  145. return False
  146. if len(host_parts) == 1:
  147. return False
  148. if self.wildcard:
  149. if len(tld) == 2 and len(host_parts[-2]) <= 3:
  150. # It's a 2-letter tld with a short second to last segment
  151. # so there needs to be more than two segments specified
  152. # (e.g. *.co.uk is insane)
  153. return len(host_parts) > 2
  154. # Passed all tests for insanity.
  155. return True
  156. def validateURL(self, url):
  157. """
  158. Validates a URL against this trust root.
  159. @param url: The URL to check
  160. @type url: C{str}
  161. @return: Whether the given URL is within this trust root.
  162. @rtype: C{bool}
  163. """
  164. url_parts = _parseURL(url)
  165. if url_parts is None:
  166. return False
  167. proto, host, port, path = url_parts
  168. if proto != self.proto:
  169. return False
  170. if port != self.port:
  171. return False
  172. if '*' in host:
  173. return False
  174. if not self.wildcard:
  175. if host != self.host:
  176. return False
  177. elif ((not host.endswith(self.host)) and
  178. ('.' + host) != self.host):
  179. return False
  180. if path != self.path:
  181. path_len = len(self.path)
  182. trust_prefix = self.path[:path_len]
  183. url_prefix = path[:path_len]
  184. # must be equal up to the length of the path, at least
  185. if trust_prefix != url_prefix:
  186. return False
  187. # These characters must be on the boundary between the end
  188. # of the trust root's path and the start of the URL's
  189. # path.
  190. if '?' in self.path:
  191. allowed = '&'
  192. else:
  193. allowed = '?/'
  194. return (self.path[-1] in allowed or
  195. path[path_len] in allowed)
  196. return True
  197. def parse(cls, trust_root):
  198. """
  199. This method creates a C{L{TrustRoot}} instance from the given
  200. input, if possible.
  201. @param trust_root: This is the trust root to parse into a
  202. C{L{TrustRoot}} object.
  203. @type trust_root: C{str}
  204. @return: A C{L{TrustRoot}} instance if trust_root parses as a
  205. trust root, C{None} otherwise.
  206. @rtype: C{NoneType} or C{L{TrustRoot}}
  207. """
  208. url_parts = _parseURL(trust_root)
  209. if url_parts is None:
  210. return None
  211. proto, host, port, path = url_parts
  212. # check for valid prototype
  213. if proto not in _protocols:
  214. return None
  215. # check for URI fragment
  216. if path.find('#') != -1:
  217. return None
  218. # extract wildcard if it is there
  219. if host.find('*', 1) != -1:
  220. # wildcard must be at start of domain: *.foo.com, not foo.*.com
  221. return None
  222. if host.startswith('*'):
  223. # Starts with star, so must have a dot after it (if a
  224. # domain is specified)
  225. if len(host) > 1 and host[1] != '.':
  226. return None
  227. host = host[1:]
  228. wilcard = True
  229. else:
  230. wilcard = False
  231. # we have a valid trust root
  232. tr = cls(trust_root, proto, wilcard, host, port, path)
  233. return tr
  234. parse = classmethod(parse)
  235. def checkSanity(cls, trust_root_string):
  236. """str -> bool
  237. is this a sane trust root?
  238. """
  239. trust_root = cls.parse(trust_root_string)
  240. if trust_root is None:
  241. return False
  242. else:
  243. return trust_root.isSane()
  244. checkSanity = classmethod(checkSanity)
  245. def checkURL(cls, trust_root, url):
  246. """quick func for validating a url against a trust root. See the
  247. TrustRoot class if you need more control."""
  248. tr = cls.parse(trust_root)
  249. return tr is not None and tr.validateURL(url)
  250. checkURL = classmethod(checkURL)
  251. def buildDiscoveryURL(self):
  252. """Return a discovery URL for this realm.
  253. This function does not check to make sure that the realm is
  254. valid. Its behaviour on invalid inputs is undefined.
  255. @rtype: str
  256. @returns: The URL upon which relying party discovery should be run
  257. in order to verify the return_to URL
  258. @since: 2.1.0
  259. """
  260. if self.wildcard:
  261. # Use "www." in place of the star
  262. assert self.host.startswith('.'), self.host
  263. www_domain = 'www' + self.host
  264. return '%s://%s%s' % (self.proto, www_domain, self.path)
  265. else:
  266. return self.unparsed
  267. def __repr__(self):
  268. return "TrustRoot(%r, %r, %r, %r, %r, %r)" % (
  269. self.unparsed, self.proto, self.wildcard, self.host, self.port,
  270. self.path)
  271. def __str__(self):
  272. return repr(self)
  273. # The URI for relying party discovery, used in realm verification.
  274. #
  275. # XXX: This should probably live somewhere else (like in
  276. # openid.consumer or openid.yadis somewhere)
  277. RP_RETURN_TO_URL_TYPE = 'http://specs.openid.net/auth/2.0/return_to'
  278. def _extractReturnURL(endpoint):
  279. """If the endpoint is a relying party OpenID return_to endpoint,
  280. return the endpoint URL. Otherwise, return None.
  281. This function is intended to be used as a filter for the Yadis
  282. filtering interface.
  283. @see: C{L{openid.yadis.services}}
  284. @see: C{L{openid.yadis.filters}}
  285. @param endpoint: An XRDS BasicServiceEndpoint, as returned by
  286. performing Yadis dicovery.
  287. @returns: The endpoint URL or None if the endpoint is not a
  288. relying party endpoint.
  289. @rtype: str or NoneType
  290. """
  291. if endpoint.matchTypes([RP_RETURN_TO_URL_TYPE]):
  292. return endpoint.uri
  293. else:
  294. return None
  295. def returnToMatches(allowed_return_to_urls, return_to):
  296. """Is the return_to URL under one of the supplied allowed
  297. return_to URLs?
  298. @since: 2.1.0
  299. """
  300. for allowed_return_to in allowed_return_to_urls:
  301. # A return_to pattern works the same as a realm, except that
  302. # it's not allowed to use a wildcard. We'll model this by
  303. # parsing it as a realm, and not trying to match it if it has
  304. # a wildcard.
  305. return_realm = TrustRoot.parse(allowed_return_to)
  306. if (# Parses as a trust root
  307. return_realm is not None and
  308. # Does not have a wildcard
  309. not return_realm.wildcard and
  310. # Matches the return_to that we passed in with it
  311. return_realm.validateURL(return_to)
  312. ):
  313. return True
  314. # No URL in the list matched
  315. return False
  316. def getAllowedReturnURLs(relying_party_url):
  317. """Given a relying party discovery URL return a list of return_to URLs.
  318. @since: 2.1.0
  319. """
  320. (rp_url_after_redirects, return_to_urls) = services.getServiceEndpoints(
  321. relying_party_url, _extractReturnURL)
  322. if rp_url_after_redirects != relying_party_url:
  323. # Verification caused a redirect
  324. raise RealmVerificationRedirected(
  325. relying_party_url, rp_url_after_redirects)
  326. return return_to_urls
  327. # _vrfy parameter is there to make testing easier
  328. def verifyReturnTo(realm_str, return_to, _vrfy=getAllowedReturnURLs):
  329. """Verify that a return_to URL is valid for the given realm.
  330. This function builds a discovery URL, performs Yadis discovery on
  331. it, makes sure that the URL does not redirect, parses out the
  332. return_to URLs, and finally checks to see if the current return_to
  333. URL matches the return_to.
  334. @raises DiscoveryFailure: When Yadis discovery fails
  335. @returns: True if the return_to URL is valid for the realm
  336. @since: 2.1.0
  337. """
  338. realm = TrustRoot.parse(realm_str)
  339. if realm is None:
  340. # The realm does not parse as a URL pattern
  341. return False
  342. try:
  343. allowable_urls = _vrfy(realm.buildDiscoveryURL())
  344. except RealmVerificationRedirected, err:
  345. oidutil.log(str(err))
  346. return False
  347. if returnToMatches(allowable_urls, return_to):
  348. return True
  349. else:
  350. oidutil.log("Failed to validate return_to %r for realm %r, was not "
  351. "in %s" % (return_to, realm_str, allowable_urls))
  352. return False