connection.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. # -*- coding: utf-8 -*-
  2. """
  3. hyper/http11/connection
  4. ~~~~~~~~~~~~~~~~~~~~~~~
  5. Objects that build hyper's connection-level HTTP/1.1 abstraction.
  6. """
  7. import logging
  8. import os
  9. import socket
  10. import base64
  11. from collections import Iterable, Mapping
  12. import collections
  13. from hyperframe.frame import SettingsFrame
  14. from .response import HTTP11Response
  15. from ..tls import wrap_socket, H2C_PROTOCOL
  16. from ..common.bufsocket import BufferedSocket
  17. from ..common.exceptions import TLSUpgrade, HTTPUpgrade
  18. from ..common.headers import HTTPHeaderMap
  19. from ..common.util import to_bytestring, to_host_port_tuple
  20. from ..compat import bytes
  21. # We prefer pycohttpparser to the pure-Python interpretation
  22. try: # pragma: no cover
  23. from pycohttpparser.api import Parser
  24. except ImportError: # pragma: no cover
  25. from .parser import Parser
  26. log = logging.getLogger(__name__)
  27. BODY_CHUNKED = 1
  28. BODY_FLAT = 2
  29. class HTTP11Connection(object):
  30. """
  31. An object representing a single HTTP/1.1 connection to a server.
  32. :param host: The host to connect to. This may be an IP address or a
  33. hostname, and optionally may include a port: for example,
  34. ``'twitter.com'``, ``'twitter.com:443'`` or ``'127.0.0.1'``.
  35. :param port: (optional) The port to connect to. If not provided and one
  36. also isn't provided in the ``host`` parameter, defaults to 80.
  37. :param secure: (optional) Whether the request should use TLS. Defaults to
  38. ``False`` for most requests, but to ``True`` for any request issued to
  39. port 443.
  40. :param ssl_context: (optional) A class with custom certificate settings.
  41. If not provided then hyper's default ``SSLContext`` is used instead.
  42. :param proxy_host: (optional) The proxy to connect to. This can be an IP
  43. address or a host name and may include a port.
  44. :param proxy_port: (optional) The proxy port to connect to. If not provided
  45. and one also isn't provided in the ``proxy`` parameter,
  46. defaults to 8080.
  47. """
  48. def __init__(self, host, port=None, secure=None, ssl_context=None,
  49. proxy_host=None, proxy_port=None, **kwargs):
  50. if port is None:
  51. self.host, self.port = to_host_port_tuple(host, default_port=80)
  52. else:
  53. self.host, self.port = host, port
  54. # Record whether we plan to secure the request. In future this should
  55. # be extended to a security profile, but a bool will do for now.
  56. # TODO: Actually do something with this!
  57. if secure is not None:
  58. self.secure = secure
  59. elif self.port == 443:
  60. self.secure = True
  61. else:
  62. self.secure = False
  63. # only send http upgrade headers for non-secure connection
  64. self._send_http_upgrade = not self.secure
  65. self.ssl_context = ssl_context
  66. self._sock = None
  67. # Setup proxy details if applicable.
  68. if proxy_host:
  69. if proxy_port is None:
  70. self.proxy_host, self.proxy_port = to_host_port_tuple(
  71. proxy_host, default_port=8080
  72. )
  73. else:
  74. self.proxy_host, self.proxy_port = proxy_host, proxy_port
  75. else:
  76. self.proxy_host = None
  77. self.proxy_port = None
  78. #: The size of the in-memory buffer used to store data from the
  79. #: network. This is used as a performance optimisation. Increase buffer
  80. #: size to improve performance: decrease it to conserve memory.
  81. #: Defaults to 64kB.
  82. self.network_buffer_size = 65536
  83. #: The object used to perform HTTP/1.1 parsing. Needs to conform to
  84. #: the standard hyper parsing interface.
  85. self.parser = Parser()
  86. def connect(self):
  87. """
  88. Connect to the server specified when the object was created. This is a
  89. no-op if we're already connected.
  90. :returns: Nothing.
  91. """
  92. if self._sock is None:
  93. if not self.proxy_host:
  94. host = self.host
  95. port = self.port
  96. else:
  97. host = self.proxy_host
  98. port = self.proxy_port
  99. sock = socket.create_connection((host, port), 5)
  100. proto = None
  101. if self.secure:
  102. assert not self.proxy_host, "Proxy with HTTPS not supported."
  103. sock, proto = wrap_socket(sock, host, self.ssl_context)
  104. log.debug("Selected protocol: %s", proto)
  105. sock = BufferedSocket(sock, self.network_buffer_size)
  106. if proto not in ('http/1.1', None):
  107. raise TLSUpgrade(proto, sock)
  108. self._sock = sock
  109. return
  110. def request(self, method, url, body=None, headers=None):
  111. """
  112. This will send a request to the server using the HTTP request method
  113. ``method`` and the selector ``url``. If the ``body`` argument is
  114. present, it should be string or bytes object of data to send after the
  115. headers are finished. Strings are encoded as UTF-8. To use other
  116. encodings, pass a bytes object. The Content-Length header is set to the
  117. length of the body field.
  118. :param method: The request method, e.g. ``'GET'``.
  119. :param url: The URL to contact, e.g. ``'/path/segment'``.
  120. :param body: (optional) The request body to send. Must be a bytestring,
  121. an iterable of bytestring, or a file-like object.
  122. :param headers: (optional) The headers to send on the request.
  123. :returns: Nothing.
  124. """
  125. headers = headers or {}
  126. method = to_bytestring(method)
  127. url = to_bytestring(url)
  128. if not isinstance(headers, HTTPHeaderMap):
  129. if isinstance(headers, Mapping):
  130. headers = HTTPHeaderMap(headers.items())
  131. elif isinstance(headers, Iterable):
  132. headers = HTTPHeaderMap(headers)
  133. else:
  134. raise ValueError(
  135. 'Header argument must be a dictionary or an iterable'
  136. )
  137. if self._sock is None:
  138. self.connect()
  139. if self._send_http_upgrade:
  140. self._add_upgrade_headers(headers)
  141. self._send_http_upgrade = False
  142. # We may need extra headers.
  143. if body:
  144. body_type = self._add_body_headers(headers, body)
  145. if b'host' not in headers:
  146. headers[b'host'] = self.host
  147. # Begin by emitting the header block.
  148. self._send_headers(method, url, headers)
  149. # Next, send the request body.
  150. if body:
  151. self._send_body(body, body_type)
  152. return
  153. def get_response(self):
  154. """
  155. Returns a response object.
  156. This is an early beta, so the response object is pretty stupid. That's
  157. ok, we'll fix it later.
  158. """
  159. headers = HTTPHeaderMap()
  160. response = None
  161. while response is None:
  162. # 'encourage' the socket to receive data.
  163. self._sock.fill()
  164. response = self.parser.parse_response(self._sock.buffer)
  165. for n, v in response.headers:
  166. headers[n.tobytes()] = v.tobytes()
  167. self._sock.advance_buffer(response.consumed)
  168. if (response.status == 101 and
  169. b'upgrade' in headers['connection'] and
  170. H2C_PROTOCOL.encode('utf-8') in headers['upgrade']):
  171. raise HTTPUpgrade(H2C_PROTOCOL, self._sock)
  172. return HTTP11Response(
  173. response.status,
  174. response.msg.tobytes(),
  175. headers,
  176. self._sock,
  177. self
  178. )
  179. def _send_headers(self, method, url, headers):
  180. """
  181. Handles the logic of sending the header block.
  182. """
  183. self._sock.send(b' '.join([method, url, b'HTTP/1.1\r\n']))
  184. for name, value in headers.iter_raw():
  185. name, value = to_bytestring(name), to_bytestring(value)
  186. header = b''.join([name, b': ', value, b'\r\n'])
  187. self._sock.send(header)
  188. self._sock.send(b'\r\n')
  189. def _add_body_headers(self, headers, body):
  190. """
  191. Adds any headers needed for sending the request body. This will always
  192. defer to the user-supplied header content.
  193. :returns: One of (BODY_CHUNKED, BODY_FLAT), indicating what type of
  194. request body should be used.
  195. """
  196. if b'content-length' in headers:
  197. return BODY_FLAT
  198. if b'chunked' in headers.get(b'transfer-encoding', []):
  199. return BODY_CHUNKED
  200. # For bytestring bodies we upload the content with a fixed length.
  201. # For file objects, we use the length of the file object.
  202. if isinstance(body, bytes):
  203. length = str(len(body)).encode('utf-8')
  204. elif hasattr(body, 'fileno'):
  205. length = str(os.fstat(body.fileno()).st_size).encode('utf-8')
  206. else:
  207. length = None
  208. if length:
  209. headers[b'content-length'] = length
  210. return BODY_FLAT
  211. headers[b'transfer-encoding'] = b'chunked'
  212. return BODY_CHUNKED
  213. def _add_upgrade_headers(self, headers):
  214. # Add HTTP Upgrade headers.
  215. headers[b'connection'] = b'Upgrade, HTTP2-Settings'
  216. headers[b'upgrade'] = H2C_PROTOCOL
  217. # Encode SETTINGS frame payload in Base64 and put into the HTTP-2
  218. # Settings header.
  219. http2_settings = SettingsFrame(0)
  220. http2_settings.settings[SettingsFrame.INITIAL_WINDOW_SIZE] = 65535
  221. encoded_settings = base64.urlsafe_b64encode(
  222. http2_settings.serialize_body()
  223. )
  224. headers[b'HTTP2-Settings'] = encoded_settings.rstrip(b'=')
  225. def _send_body(self, body, body_type):
  226. """
  227. Handles the HTTP/1.1 logic for sending HTTP bodies. This does magical
  228. different things in different cases.
  229. """
  230. if body_type == BODY_FLAT:
  231. # Special case for files and other 'readable' objects.
  232. if hasattr(body, 'read'):
  233. return self._send_file_like_obj(body)
  234. # Case for bytestrings.
  235. elif isinstance(body, bytes):
  236. self._sock.send(body)
  237. return
  238. # Iterables that set a specific content length.
  239. elif isinstance(body, collections.Iterable):
  240. for item in body:
  241. try:
  242. self._sock.send(item)
  243. except TypeError:
  244. raise ValueError(
  245. "Elements in iterable body must be bytestrings. "
  246. "Illegal element: {}".format(item)
  247. )
  248. return
  249. else:
  250. raise ValueError(
  251. 'Request body must be a bytestring, a file-like object '
  252. 'returning bytestrings or an iterable of bytestrings. '
  253. 'Got: {}'.format(type(body))
  254. )
  255. # Chunked!
  256. return self._send_chunked(body)
  257. def _send_chunked(self, body):
  258. """
  259. Handles the HTTP/1.1 logic for sending a chunk-encoded body.
  260. """
  261. # Chunked! For chunked bodies we don't special-case, we just iterate
  262. # over what we have and send stuff out.
  263. for chunk in body:
  264. length = '{0:x}'.format(len(chunk)).encode('ascii')
  265. # For now write this as four 'send' calls. That's probably
  266. # inefficient, let's come back to it.
  267. try:
  268. self._sock.send(length)
  269. self._sock.send(b'\r\n')
  270. self._sock.send(chunk)
  271. self._sock.send(b'\r\n')
  272. except TypeError:
  273. raise ValueError(
  274. "Iterable bodies must always iterate in bytestrings"
  275. )
  276. self._sock.send(b'0\r\n\r\n')
  277. return
  278. def _send_file_like_obj(self, fobj):
  279. """
  280. Handles streaming a file-like object to the network.
  281. """
  282. while True:
  283. block = fobj.read(16*1024)
  284. if not block:
  285. break
  286. try:
  287. self._sock.send(block)
  288. except TypeError:
  289. raise ValueError(
  290. "File-like bodies must return bytestrings. Got: "
  291. "{}".format(type(block))
  292. )
  293. return
  294. def close(self):
  295. """
  296. Closes the connection. This closes the socket and then abandons the
  297. reference to it. After calling this method, any outstanding
  298. :class:`Response <hyper.http11.response.Response>` objects will throw
  299. exceptions if attempts are made to read their bodies.
  300. In some cases this method will automatically be called.
  301. .. warning:: This method should absolutely only be called when you are
  302. certain the connection object is no longer needed.
  303. """
  304. self._sock.close()
  305. self._sock = None
  306. # The following two methods are the implementation of the context manager
  307. # protocol.
  308. def __enter__(self):
  309. return self
  310. def __exit__(self, type, value, tb):
  311. self.close()
  312. return False # Never swallow exceptions.