resolver_ares.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. # Copyright (c) 2011-2015 Denis Bilenko. See LICENSE for details.
  2. """
  3. c-ares based hostname resolver.
  4. """
  5. from __future__ import absolute_import
  6. import os
  7. import sys
  8. from _socket import getservbyname, getaddrinfo, gaierror, error
  9. from gevent.hub import Waiter, get_hub
  10. from gevent._compat import string_types, text_type, integer_types, reraise, PY3
  11. from gevent.socket import AF_UNSPEC, AF_INET, AF_INET6, SOCK_STREAM, SOCK_DGRAM, SOCK_RAW, AI_NUMERICHOST, EAI_SERVICE, AI_PASSIVE
  12. from gevent.ares import channel, InvalidIP # pylint:disable=import-error,no-name-in-module
  13. __all__ = ['Resolver']
  14. class Resolver(object):
  15. """
  16. Implementation of the resolver API using the `c-ares`_ library.
  17. This implementation uses the c-ares library to handle name
  18. resolution. c-ares is natively asynchronous at the socket level
  19. and so integrates well into gevent's event loop.
  20. In comparison to :class:`gevent.resolver_thread.Resolver` (which
  21. delegates to the native system resolver), the implementation is
  22. much more complex. In addition, there have been reports of it not
  23. properly honoring certain system configurations (for example, the
  24. order in which IPv4 and IPv6 results are returned may not match
  25. the threaded resolver). However, because it does not use threads,
  26. it may scale better for applications that make many lookups.
  27. There are some known differences from the system resolver:
  28. - ``gethostbyname_ex`` and ``gethostbyaddr`` may return different
  29. for the ``aliaslist`` tuple member. (Sometimes the same,
  30. sometimes in a different order, sometimes a different alias
  31. altogether.)
  32. - ``gethostbyname_ex`` may return the ``ipaddrlist`` in a different order.
  33. - ``getaddrinfo`` does not return ``SOCK_RAW`` results.
  34. - ``getaddrinfo`` may return results in a different order.
  35. - Handling of ``.local`` (mDNS) names may be different, even if they are listed in
  36. the hosts file.
  37. - c-ares will not resolve ``broadcasthost``, even if listed in the hosts file.
  38. - This implementation may raise ``gaierror(4)`` where the system implementation would raise
  39. ``herror(1)``.
  40. - The results for ``localhost`` may be different. In particular, some system
  41. resolvers will return more results from ``getaddrinfo`` than c-ares does,
  42. such as SOCK_DGRAM results, and c-ares may report more ips on a multi-homed
  43. host.
  44. .. caution:: This module is considered extremely experimental on PyPy, and
  45. due to its implementation in cython, it may be slower. It may also lead to
  46. interpreter crashes.
  47. .. _c-ares: http://c-ares.haxx.se
  48. """
  49. ares_class = channel
  50. def __init__(self, hub=None, use_environ=True, **kwargs):
  51. if hub is None:
  52. hub = get_hub()
  53. self.hub = hub
  54. if use_environ:
  55. for key in os.environ:
  56. if key.startswith('GEVENTARES_'):
  57. name = key[11:].lower()
  58. if name:
  59. value = os.environ[key]
  60. kwargs.setdefault(name, value)
  61. self.ares = self.ares_class(hub.loop, **kwargs)
  62. self.pid = os.getpid()
  63. self.params = kwargs
  64. self.fork_watcher = hub.loop.fork(ref=False)
  65. self.fork_watcher.start(self._on_fork)
  66. def __repr__(self):
  67. return '<gevent.resolver_ares.Resolver at 0x%x ares=%r>' % (id(self), self.ares)
  68. def _on_fork(self):
  69. # NOTE: See comment in gevent.hub.reinit.
  70. pid = os.getpid()
  71. if pid != self.pid:
  72. self.hub.loop.run_callback(self.ares.destroy)
  73. self.ares = self.ares_class(self.hub.loop, **self.params)
  74. self.pid = pid
  75. def close(self):
  76. if self.ares is not None:
  77. self.hub.loop.run_callback(self.ares.destroy)
  78. self.ares = None
  79. self.fork_watcher.stop()
  80. def gethostbyname(self, hostname, family=AF_INET):
  81. hostname = _resolve_special(hostname, family)
  82. return self.gethostbyname_ex(hostname, family)[-1][0]
  83. def gethostbyname_ex(self, hostname, family=AF_INET):
  84. if PY3:
  85. if isinstance(hostname, str):
  86. hostname = hostname.encode('idna')
  87. elif not isinstance(hostname, (bytes, bytearray)):
  88. raise TypeError('Expected es(idna), not %s' % type(hostname).__name__)
  89. else:
  90. if isinstance(hostname, text_type):
  91. hostname = hostname.encode('ascii')
  92. elif not isinstance(hostname, str):
  93. raise TypeError('Expected string, not %s' % type(hostname).__name__)
  94. while True:
  95. ares = self.ares
  96. try:
  97. waiter = Waiter(self.hub)
  98. ares.gethostbyname(waiter, hostname, family)
  99. result = waiter.get()
  100. if not result[-1]:
  101. raise gaierror(-5, 'No address associated with hostname')
  102. return result
  103. except gaierror:
  104. if ares is self.ares:
  105. if hostname == b'255.255.255.255':
  106. # The stdlib handles this case in 2.7 and 3.x, but ares does not.
  107. # It is tested by test_socket.py in 3.4.
  108. # HACK: So hardcode the expected return.
  109. return ('255.255.255.255', [], ['255.255.255.255'])
  110. raise
  111. # "self.ares is not ares" means channel was destroyed (because we were forked)
  112. def _lookup_port(self, port, socktype):
  113. # pylint:disable=too-many-branches
  114. socktypes = []
  115. if isinstance(port, string_types):
  116. try:
  117. port = int(port)
  118. except ValueError:
  119. try:
  120. if socktype == 0:
  121. origport = port
  122. try:
  123. port = getservbyname(port, 'tcp')
  124. socktypes.append(SOCK_STREAM)
  125. except error:
  126. port = getservbyname(port, 'udp')
  127. socktypes.append(SOCK_DGRAM)
  128. else:
  129. try:
  130. if port == getservbyname(origport, 'udp'):
  131. socktypes.append(SOCK_DGRAM)
  132. except error:
  133. pass
  134. elif socktype == SOCK_STREAM:
  135. port = getservbyname(port, 'tcp')
  136. elif socktype == SOCK_DGRAM:
  137. port = getservbyname(port, 'udp')
  138. else:
  139. raise gaierror(EAI_SERVICE, 'Servname not supported for ai_socktype')
  140. except error as ex:
  141. if 'not found' in str(ex):
  142. raise gaierror(EAI_SERVICE, 'Servname not supported for ai_socktype')
  143. else:
  144. raise gaierror(str(ex))
  145. except UnicodeEncodeError:
  146. raise error('Int or String expected')
  147. elif port is None:
  148. port = 0
  149. elif isinstance(port, integer_types):
  150. pass
  151. else:
  152. raise error('Int or String expected', port, type(port))
  153. port = int(port % 65536)
  154. if not socktypes and socktype:
  155. socktypes.append(socktype)
  156. return port, socktypes
  157. def _getaddrinfo(self, host, port, family=0, socktype=0, proto=0, flags=0):
  158. # pylint:disable=too-many-locals,too-many-branches
  159. if isinstance(host, text_type):
  160. host = host.encode('idna')
  161. elif not isinstance(host, str) or (flags & AI_NUMERICHOST):
  162. # this handles cases which do not require network access
  163. # 1) host is None
  164. # 2) host is of an invalid type
  165. # 3) AI_NUMERICHOST flag is set
  166. return getaddrinfo(host, port, family, socktype, proto, flags)
  167. # we also call _socket.getaddrinfo below if family is not one of AF_*
  168. port, socktypes = self._lookup_port(port, socktype)
  169. socktype_proto = [(SOCK_STREAM, 6), (SOCK_DGRAM, 17), (SOCK_RAW, 0)]
  170. if socktypes:
  171. socktype_proto = [(x, y) for (x, y) in socktype_proto if x in socktypes]
  172. if proto:
  173. socktype_proto = [(x, y) for (x, y) in socktype_proto if proto == y]
  174. ares = self.ares
  175. if family == AF_UNSPEC:
  176. ares_values = Values(self.hub, 2)
  177. ares.gethostbyname(ares_values, host, AF_INET)
  178. ares.gethostbyname(ares_values, host, AF_INET6)
  179. elif family == AF_INET:
  180. ares_values = Values(self.hub, 1)
  181. ares.gethostbyname(ares_values, host, AF_INET)
  182. elif family == AF_INET6:
  183. ares_values = Values(self.hub, 1)
  184. ares.gethostbyname(ares_values, host, AF_INET6)
  185. else:
  186. raise gaierror(5, 'ai_family not supported: %r' % (family, ))
  187. values = ares_values.get()
  188. if len(values) == 2 and values[0] == values[1]:
  189. values.pop()
  190. result = []
  191. result4 = []
  192. result6 = []
  193. for addrs in values:
  194. if addrs.family == AF_INET:
  195. for addr in addrs[-1]:
  196. sockaddr = (addr, port)
  197. for socktype4, proto4 in socktype_proto:
  198. result4.append((AF_INET, socktype4, proto4, '', sockaddr))
  199. elif addrs.family == AF_INET6:
  200. for addr in addrs[-1]:
  201. if addr == '::1':
  202. dest = result
  203. else:
  204. dest = result6
  205. sockaddr = (addr, port, 0, 0)
  206. for socktype6, proto6 in socktype_proto:
  207. dest.append((AF_INET6, socktype6, proto6, '', sockaddr))
  208. # As of 2016, some platforms return IPV6 first and some do IPV4 first,
  209. # and some might even allow configuration of which is which. For backwards
  210. # compatibility with earlier releases (but not necessarily resolver_thread!)
  211. # we return 4 first. See https://github.com/gevent/gevent/issues/815 for more.
  212. result += result4 + result6
  213. if not result:
  214. raise gaierror(-5, 'No address associated with hostname')
  215. return result
  216. def getaddrinfo(self, host, port, family=0, socktype=0, proto=0, flags=0):
  217. while True:
  218. ares = self.ares
  219. try:
  220. return self._getaddrinfo(host, port, family, socktype, proto, flags)
  221. except gaierror:
  222. if ares is self.ares:
  223. raise
  224. def _gethostbyaddr(self, ip_address):
  225. if PY3:
  226. if isinstance(ip_address, str):
  227. ip_address = ip_address.encode('idna')
  228. elif not isinstance(ip_address, (bytes, bytearray)):
  229. raise TypeError('Expected es(idna), not %s' % type(ip_address).__name__)
  230. else:
  231. if isinstance(ip_address, text_type):
  232. ip_address = ip_address.encode('ascii')
  233. elif not isinstance(ip_address, str):
  234. raise TypeError('Expected string, not %s' % type(ip_address).__name__)
  235. waiter = Waiter(self.hub)
  236. try:
  237. self.ares.gethostbyaddr(waiter, ip_address)
  238. return waiter.get()
  239. except InvalidIP:
  240. result = self._getaddrinfo(ip_address, None, family=AF_UNSPEC, socktype=SOCK_DGRAM)
  241. if not result:
  242. raise
  243. _ip_address = result[0][-1][0]
  244. if isinstance(_ip_address, text_type):
  245. _ip_address = _ip_address.encode('ascii')
  246. if _ip_address == ip_address:
  247. raise
  248. waiter.clear()
  249. self.ares.gethostbyaddr(waiter, _ip_address)
  250. return waiter.get()
  251. def gethostbyaddr(self, ip_address):
  252. ip_address = _resolve_special(ip_address, AF_UNSPEC)
  253. while True:
  254. ares = self.ares
  255. try:
  256. return self._gethostbyaddr(ip_address)
  257. except gaierror:
  258. if ares is self.ares:
  259. raise
  260. def _getnameinfo(self, sockaddr, flags):
  261. if not isinstance(flags, int):
  262. raise TypeError('an integer is required')
  263. if not isinstance(sockaddr, tuple):
  264. raise TypeError('getnameinfo() argument 1 must be a tuple')
  265. address = sockaddr[0]
  266. if not PY3 and isinstance(address, text_type):
  267. address = address.encode('ascii')
  268. if not isinstance(address, string_types):
  269. raise TypeError('sockaddr[0] must be a string, not %s' % type(address).__name__)
  270. port = sockaddr[1]
  271. if not isinstance(port, int):
  272. raise TypeError('port must be an integer, not %s' % type(port))
  273. waiter = Waiter(self.hub)
  274. result = self._getaddrinfo(address, str(sockaddr[1]), family=AF_UNSPEC, socktype=SOCK_DGRAM)
  275. if not result:
  276. reraise(*sys.exc_info())
  277. elif len(result) != 1:
  278. raise error('sockaddr resolved to multiple addresses')
  279. family, _socktype, _proto, _name, address = result[0]
  280. if family == AF_INET:
  281. if len(sockaddr) != 2:
  282. raise error("IPv4 sockaddr must be 2 tuple")
  283. elif family == AF_INET6:
  284. address = address[:2] + sockaddr[2:]
  285. self.ares.getnameinfo(waiter, address, flags)
  286. node, service = waiter.get()
  287. if service is None:
  288. if PY3:
  289. # ares docs: "If the query did not complete
  290. # successfully, or one of the values was not
  291. # requested, node or service will be NULL ". Python 2
  292. # allows that for the service, but Python 3 raises
  293. # an error. This is tested by test_socket in py 3.4
  294. err = gaierror('nodename nor servname provided, or not known')
  295. err.errno = 8
  296. raise err
  297. service = '0'
  298. return node, service
  299. def getnameinfo(self, sockaddr, flags):
  300. while True:
  301. ares = self.ares
  302. try:
  303. return self._getnameinfo(sockaddr, flags)
  304. except gaierror:
  305. if ares is self.ares:
  306. raise
  307. class Values(object):
  308. # helper to collect multiple values; ignore errors unless nothing has succeeded
  309. # QQQ could probably be moved somewhere - hub.py?
  310. __slots__ = ['count', 'values', 'error', 'waiter']
  311. def __init__(self, hub, count):
  312. self.count = count
  313. self.values = []
  314. self.error = None
  315. self.waiter = Waiter(hub)
  316. def __call__(self, source):
  317. self.count -= 1
  318. if source.exception is None:
  319. self.values.append(source.value)
  320. else:
  321. self.error = source.exception
  322. if self.count <= 0:
  323. self.waiter.switch()
  324. def get(self):
  325. self.waiter.get()
  326. if self.values:
  327. return self.values
  328. else:
  329. assert error is not None
  330. raise self.error # pylint:disable=raising-bad-type
  331. def _resolve_special(hostname, family):
  332. if hostname == '':
  333. result = getaddrinfo(None, 0, family, SOCK_DGRAM, 0, AI_PASSIVE)
  334. if len(result) != 1:
  335. raise error('wildcard resolved to multiple address')
  336. return result[0][4][0]
  337. return hostname