httputil.py 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095
  1. #
  2. # Copyright 2009 Facebook
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  5. # not use this file except in compliance with the License. You may obtain
  6. # a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13. # License for the specific language governing permissions and limitations
  14. # under the License.
  15. """HTTP utility code shared by clients and servers.
  16. This module also defines the `HTTPServerRequest` class which is exposed
  17. via `tornado.web.RequestHandler.request`.
  18. """
  19. from __future__ import absolute_import, division, print_function
  20. import calendar
  21. import collections
  22. import copy
  23. import datetime
  24. import email.utils
  25. import numbers
  26. import re
  27. import time
  28. import unicodedata
  29. import warnings
  30. from tornado.escape import native_str, parse_qs_bytes, utf8
  31. from tornado.log import gen_log
  32. from tornado.util import ObjectDict, PY3, unicode_type
  33. if PY3:
  34. import http.cookies as Cookie
  35. from http.client import responses
  36. from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
  37. else:
  38. import Cookie
  39. from httplib import responses
  40. from urllib import urlencode
  41. from urlparse import urlparse, urlunparse, parse_qsl
  42. # responses is unused in this file, but we re-export it to other files.
  43. # Reference it so pyflakes doesn't complain.
  44. responses
  45. try:
  46. from ssl import SSLError
  47. except ImportError:
  48. # ssl is unavailable on app engine.
  49. class _SSLError(Exception):
  50. pass
  51. # Hack around a mypy limitation. We can't simply put "type: ignore"
  52. # on the class definition itself; must go through an assignment.
  53. SSLError = _SSLError # type: ignore
  54. try:
  55. import typing # noqa: F401
  56. except ImportError:
  57. pass
  58. # RFC 7230 section 3.5: a recipient MAY recognize a single LF as a line
  59. # terminator and ignore any preceding CR.
  60. _CRLF_RE = re.compile(r'\r?\n')
  61. class _NormalizedHeaderCache(dict):
  62. """Dynamic cached mapping of header names to Http-Header-Case.
  63. Implemented as a dict subclass so that cache hits are as fast as a
  64. normal dict lookup, without the overhead of a python function
  65. call.
  66. >>> normalized_headers = _NormalizedHeaderCache(10)
  67. >>> normalized_headers["coNtent-TYPE"]
  68. 'Content-Type'
  69. """
  70. def __init__(self, size):
  71. super(_NormalizedHeaderCache, self).__init__()
  72. self.size = size
  73. self.queue = collections.deque()
  74. def __missing__(self, key):
  75. normalized = "-".join([w.capitalize() for w in key.split("-")])
  76. self[key] = normalized
  77. self.queue.append(key)
  78. if len(self.queue) > self.size:
  79. # Limit the size of the cache. LRU would be better, but this
  80. # simpler approach should be fine. In Python 2.7+ we could
  81. # use OrderedDict (or in 3.2+, @functools.lru_cache).
  82. old_key = self.queue.popleft()
  83. del self[old_key]
  84. return normalized
  85. _normalized_headers = _NormalizedHeaderCache(1000)
  86. class HTTPHeaders(collections.MutableMapping):
  87. """A dictionary that maintains ``Http-Header-Case`` for all keys.
  88. Supports multiple values per key via a pair of new methods,
  89. `add()` and `get_list()`. The regular dictionary interface
  90. returns a single value per key, with multiple values joined by a
  91. comma.
  92. >>> h = HTTPHeaders({"content-type": "text/html"})
  93. >>> list(h.keys())
  94. ['Content-Type']
  95. >>> h["Content-Type"]
  96. 'text/html'
  97. >>> h.add("Set-Cookie", "A=B")
  98. >>> h.add("Set-Cookie", "C=D")
  99. >>> h["set-cookie"]
  100. 'A=B,C=D'
  101. >>> h.get_list("set-cookie")
  102. ['A=B', 'C=D']
  103. >>> for (k,v) in sorted(h.get_all()):
  104. ... print('%s: %s' % (k,v))
  105. ...
  106. Content-Type: text/html
  107. Set-Cookie: A=B
  108. Set-Cookie: C=D
  109. """
  110. def __init__(self, *args, **kwargs):
  111. self._dict = {} # type: typing.Dict[str, str]
  112. self._as_list = {} # type: typing.Dict[str, typing.List[str]]
  113. self._last_key = None
  114. if (len(args) == 1 and len(kwargs) == 0 and
  115. isinstance(args[0], HTTPHeaders)):
  116. # Copy constructor
  117. for k, v in args[0].get_all():
  118. self.add(k, v)
  119. else:
  120. # Dict-style initialization
  121. self.update(*args, **kwargs)
  122. # new public methods
  123. def add(self, name, value):
  124. # type: (str, str) -> None
  125. """Adds a new value for the given key."""
  126. norm_name = _normalized_headers[name]
  127. self._last_key = norm_name
  128. if norm_name in self:
  129. self._dict[norm_name] = (native_str(self[norm_name]) + ',' +
  130. native_str(value))
  131. self._as_list[norm_name].append(value)
  132. else:
  133. self[norm_name] = value
  134. def get_list(self, name):
  135. """Returns all values for the given header as a list."""
  136. norm_name = _normalized_headers[name]
  137. return self._as_list.get(norm_name, [])
  138. def get_all(self):
  139. # type: () -> typing.Iterable[typing.Tuple[str, str]]
  140. """Returns an iterable of all (name, value) pairs.
  141. If a header has multiple values, multiple pairs will be
  142. returned with the same name.
  143. """
  144. for name, values in self._as_list.items():
  145. for value in values:
  146. yield (name, value)
  147. def parse_line(self, line):
  148. """Updates the dictionary with a single header line.
  149. >>> h = HTTPHeaders()
  150. >>> h.parse_line("Content-Type: text/html")
  151. >>> h.get('content-type')
  152. 'text/html'
  153. """
  154. if line[0].isspace():
  155. # continuation of a multi-line header
  156. if self._last_key is None:
  157. raise HTTPInputError("first header line cannot start with whitespace")
  158. new_part = ' ' + line.lstrip()
  159. self._as_list[self._last_key][-1] += new_part
  160. self._dict[self._last_key] += new_part
  161. else:
  162. try:
  163. name, value = line.split(":", 1)
  164. except ValueError:
  165. raise HTTPInputError("no colon in header line")
  166. self.add(name, value.strip())
  167. @classmethod
  168. def parse(cls, headers):
  169. """Returns a dictionary from HTTP header text.
  170. >>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n")
  171. >>> sorted(h.items())
  172. [('Content-Length', '42'), ('Content-Type', 'text/html')]
  173. .. versionchanged:: 5.1
  174. Raises `HTTPInputError` on malformed headers instead of a
  175. mix of `KeyError`, and `ValueError`.
  176. """
  177. h = cls()
  178. for line in _CRLF_RE.split(headers):
  179. if line:
  180. h.parse_line(line)
  181. return h
  182. # MutableMapping abstract method implementations.
  183. def __setitem__(self, name, value):
  184. norm_name = _normalized_headers[name]
  185. self._dict[norm_name] = value
  186. self._as_list[norm_name] = [value]
  187. def __getitem__(self, name):
  188. # type: (str) -> str
  189. return self._dict[_normalized_headers[name]]
  190. def __delitem__(self, name):
  191. norm_name = _normalized_headers[name]
  192. del self._dict[norm_name]
  193. del self._as_list[norm_name]
  194. def __len__(self):
  195. return len(self._dict)
  196. def __iter__(self):
  197. return iter(self._dict)
  198. def copy(self):
  199. # defined in dict but not in MutableMapping.
  200. return HTTPHeaders(self)
  201. # Use our overridden copy method for the copy.copy module.
  202. # This makes shallow copies one level deeper, but preserves
  203. # the appearance that HTTPHeaders is a single container.
  204. __copy__ = copy
  205. def __str__(self):
  206. lines = []
  207. for name, value in self.get_all():
  208. lines.append("%s: %s\n" % (name, value))
  209. return "".join(lines)
  210. __unicode__ = __str__
  211. class HTTPServerRequest(object):
  212. """A single HTTP request.
  213. All attributes are type `str` unless otherwise noted.
  214. .. attribute:: method
  215. HTTP request method, e.g. "GET" or "POST"
  216. .. attribute:: uri
  217. The requested uri.
  218. .. attribute:: path
  219. The path portion of `uri`
  220. .. attribute:: query
  221. The query portion of `uri`
  222. .. attribute:: version
  223. HTTP version specified in request, e.g. "HTTP/1.1"
  224. .. attribute:: headers
  225. `.HTTPHeaders` dictionary-like object for request headers. Acts like
  226. a case-insensitive dictionary with additional methods for repeated
  227. headers.
  228. .. attribute:: body
  229. Request body, if present, as a byte string.
  230. .. attribute:: remote_ip
  231. Client's IP address as a string. If ``HTTPServer.xheaders`` is set,
  232. will pass along the real IP address provided by a load balancer
  233. in the ``X-Real-Ip`` or ``X-Forwarded-For`` header.
  234. .. versionchanged:: 3.1
  235. The list format of ``X-Forwarded-For`` is now supported.
  236. .. attribute:: protocol
  237. The protocol used, either "http" or "https". If ``HTTPServer.xheaders``
  238. is set, will pass along the protocol used by a load balancer if
  239. reported via an ``X-Scheme`` header.
  240. .. attribute:: host
  241. The requested hostname, usually taken from the ``Host`` header.
  242. .. attribute:: arguments
  243. GET/POST arguments are available in the arguments property, which
  244. maps arguments names to lists of values (to support multiple values
  245. for individual names). Names are of type `str`, while arguments
  246. are byte strings. Note that this is different from
  247. `.RequestHandler.get_argument`, which returns argument values as
  248. unicode strings.
  249. .. attribute:: query_arguments
  250. Same format as ``arguments``, but contains only arguments extracted
  251. from the query string.
  252. .. versionadded:: 3.2
  253. .. attribute:: body_arguments
  254. Same format as ``arguments``, but contains only arguments extracted
  255. from the request body.
  256. .. versionadded:: 3.2
  257. .. attribute:: files
  258. File uploads are available in the files property, which maps file
  259. names to lists of `.HTTPFile`.
  260. .. attribute:: connection
  261. An HTTP request is attached to a single HTTP connection, which can
  262. be accessed through the "connection" attribute. Since connections
  263. are typically kept open in HTTP/1.1, multiple requests can be handled
  264. sequentially on a single connection.
  265. .. versionchanged:: 4.0
  266. Moved from ``tornado.httpserver.HTTPRequest``.
  267. """
  268. def __init__(self, method=None, uri=None, version="HTTP/1.0", headers=None,
  269. body=None, host=None, files=None, connection=None,
  270. start_line=None, server_connection=None):
  271. if start_line is not None:
  272. method, uri, version = start_line
  273. self.method = method
  274. self.uri = uri
  275. self.version = version
  276. self.headers = headers or HTTPHeaders()
  277. self.body = body or b""
  278. # set remote IP and protocol
  279. context = getattr(connection, 'context', None)
  280. self.remote_ip = getattr(context, 'remote_ip', None)
  281. self.protocol = getattr(context, 'protocol', "http")
  282. self.host = host or self.headers.get("Host") or "127.0.0.1"
  283. self.host_name = split_host_and_port(self.host.lower())[0]
  284. self.files = files or {}
  285. self.connection = connection
  286. self.server_connection = server_connection
  287. self._start_time = time.time()
  288. self._finish_time = None
  289. self.path, sep, self.query = uri.partition('?')
  290. self.arguments = parse_qs_bytes(self.query, keep_blank_values=True)
  291. self.query_arguments = copy.deepcopy(self.arguments)
  292. self.body_arguments = {}
  293. def supports_http_1_1(self):
  294. """Returns True if this request supports HTTP/1.1 semantics.
  295. .. deprecated:: 4.0
  296. Applications are less likely to need this information with
  297. the introduction of `.HTTPConnection`. If you still need
  298. it, access the ``version`` attribute directly. This method
  299. will be removed in Tornado 6.0.
  300. """
  301. warnings.warn("supports_http_1_1() is deprecated, use request.version instead",
  302. DeprecationWarning)
  303. return self.version == "HTTP/1.1"
  304. @property
  305. def cookies(self):
  306. """A dictionary of Cookie.Morsel objects."""
  307. if not hasattr(self, "_cookies"):
  308. self._cookies = Cookie.SimpleCookie()
  309. if "Cookie" in self.headers:
  310. try:
  311. parsed = parse_cookie(self.headers["Cookie"])
  312. except Exception:
  313. pass
  314. else:
  315. for k, v in parsed.items():
  316. try:
  317. self._cookies[k] = v
  318. except Exception:
  319. # SimpleCookie imposes some restrictions on keys;
  320. # parse_cookie does not. Discard any cookies
  321. # with disallowed keys.
  322. pass
  323. return self._cookies
  324. def write(self, chunk, callback=None):
  325. """Writes the given chunk to the response stream.
  326. .. deprecated:: 4.0
  327. Use ``request.connection`` and the `.HTTPConnection` methods
  328. to write the response. This method will be removed in Tornado 6.0.
  329. """
  330. warnings.warn("req.write deprecated, use req.connection.write and write_headers instead",
  331. DeprecationWarning)
  332. assert isinstance(chunk, bytes)
  333. assert self.version.startswith("HTTP/1."), \
  334. "deprecated interface only supported in HTTP/1.x"
  335. self.connection.write(chunk, callback=callback)
  336. def finish(self):
  337. """Finishes this HTTP request on the open connection.
  338. .. deprecated:: 4.0
  339. Use ``request.connection`` and the `.HTTPConnection` methods
  340. to write the response. This method will be removed in Tornado 6.0.
  341. """
  342. warnings.warn("req.finish deprecated, use req.connection.finish instead",
  343. DeprecationWarning)
  344. self.connection.finish()
  345. self._finish_time = time.time()
  346. def full_url(self):
  347. """Reconstructs the full URL for this request."""
  348. return self.protocol + "://" + self.host + self.uri
  349. def request_time(self):
  350. """Returns the amount of time it took for this request to execute."""
  351. if self._finish_time is None:
  352. return time.time() - self._start_time
  353. else:
  354. return self._finish_time - self._start_time
  355. def get_ssl_certificate(self, binary_form=False):
  356. """Returns the client's SSL certificate, if any.
  357. To use client certificates, the HTTPServer's
  358. `ssl.SSLContext.verify_mode` field must be set, e.g.::
  359. ssl_ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
  360. ssl_ctx.load_cert_chain("foo.crt", "foo.key")
  361. ssl_ctx.load_verify_locations("cacerts.pem")
  362. ssl_ctx.verify_mode = ssl.CERT_REQUIRED
  363. server = HTTPServer(app, ssl_options=ssl_ctx)
  364. By default, the return value is a dictionary (or None, if no
  365. client certificate is present). If ``binary_form`` is true, a
  366. DER-encoded form of the certificate is returned instead. See
  367. SSLSocket.getpeercert() in the standard library for more
  368. details.
  369. http://docs.python.org/library/ssl.html#sslsocket-objects
  370. """
  371. try:
  372. return self.connection.stream.socket.getpeercert(
  373. binary_form=binary_form)
  374. except SSLError:
  375. return None
  376. def _parse_body(self):
  377. parse_body_arguments(
  378. self.headers.get("Content-Type", ""), self.body,
  379. self.body_arguments, self.files,
  380. self.headers)
  381. for k, v in self.body_arguments.items():
  382. self.arguments.setdefault(k, []).extend(v)
  383. def __repr__(self):
  384. attrs = ("protocol", "host", "method", "uri", "version", "remote_ip")
  385. args = ", ".join(["%s=%r" % (n, getattr(self, n)) for n in attrs])
  386. return "%s(%s)" % (self.__class__.__name__, args)
  387. class HTTPInputError(Exception):
  388. """Exception class for malformed HTTP requests or responses
  389. from remote sources.
  390. .. versionadded:: 4.0
  391. """
  392. pass
  393. class HTTPOutputError(Exception):
  394. """Exception class for errors in HTTP output.
  395. .. versionadded:: 4.0
  396. """
  397. pass
  398. class HTTPServerConnectionDelegate(object):
  399. """Implement this interface to handle requests from `.HTTPServer`.
  400. .. versionadded:: 4.0
  401. """
  402. def start_request(self, server_conn, request_conn):
  403. """This method is called by the server when a new request has started.
  404. :arg server_conn: is an opaque object representing the long-lived
  405. (e.g. tcp-level) connection.
  406. :arg request_conn: is a `.HTTPConnection` object for a single
  407. request/response exchange.
  408. This method should return a `.HTTPMessageDelegate`.
  409. """
  410. raise NotImplementedError()
  411. def on_close(self, server_conn):
  412. """This method is called when a connection has been closed.
  413. :arg server_conn: is a server connection that has previously been
  414. passed to ``start_request``.
  415. """
  416. pass
  417. class HTTPMessageDelegate(object):
  418. """Implement this interface to handle an HTTP request or response.
  419. .. versionadded:: 4.0
  420. """
  421. def headers_received(self, start_line, headers):
  422. """Called when the HTTP headers have been received and parsed.
  423. :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`
  424. depending on whether this is a client or server message.
  425. :arg headers: a `.HTTPHeaders` instance.
  426. Some `.HTTPConnection` methods can only be called during
  427. ``headers_received``.
  428. May return a `.Future`; if it does the body will not be read
  429. until it is done.
  430. """
  431. pass
  432. def data_received(self, chunk):
  433. """Called when a chunk of data has been received.
  434. May return a `.Future` for flow control.
  435. """
  436. pass
  437. def finish(self):
  438. """Called after the last chunk of data has been received."""
  439. pass
  440. def on_connection_close(self):
  441. """Called if the connection is closed without finishing the request.
  442. If ``headers_received`` is called, either ``finish`` or
  443. ``on_connection_close`` will be called, but not both.
  444. """
  445. pass
  446. class HTTPConnection(object):
  447. """Applications use this interface to write their responses.
  448. .. versionadded:: 4.0
  449. """
  450. def write_headers(self, start_line, headers, chunk=None, callback=None):
  451. """Write an HTTP header block.
  452. :arg start_line: a `.RequestStartLine` or `.ResponseStartLine`.
  453. :arg headers: a `.HTTPHeaders` instance.
  454. :arg chunk: the first (optional) chunk of data. This is an optimization
  455. so that small responses can be written in the same call as their
  456. headers.
  457. :arg callback: a callback to be run when the write is complete.
  458. The ``version`` field of ``start_line`` is ignored.
  459. Returns a `.Future` if no callback is given.
  460. .. deprecated:: 5.1
  461. The ``callback`` argument is deprecated and will be removed
  462. in Tornado 6.0.
  463. """
  464. raise NotImplementedError()
  465. def write(self, chunk, callback=None):
  466. """Writes a chunk of body data.
  467. The callback will be run when the write is complete. If no callback
  468. is given, returns a Future.
  469. .. deprecated:: 5.1
  470. The ``callback`` argument is deprecated and will be removed
  471. in Tornado 6.0.
  472. """
  473. raise NotImplementedError()
  474. def finish(self):
  475. """Indicates that the last body data has been written.
  476. """
  477. raise NotImplementedError()
  478. def url_concat(url, args):
  479. """Concatenate url and arguments regardless of whether
  480. url has existing query parameters.
  481. ``args`` may be either a dictionary or a list of key-value pairs
  482. (the latter allows for multiple values with the same key.
  483. >>> url_concat("http://example.com/foo", dict(c="d"))
  484. 'http://example.com/foo?c=d'
  485. >>> url_concat("http://example.com/foo?a=b", dict(c="d"))
  486. 'http://example.com/foo?a=b&c=d'
  487. >>> url_concat("http://example.com/foo?a=b", [("c", "d"), ("c", "d2")])
  488. 'http://example.com/foo?a=b&c=d&c=d2'
  489. """
  490. if args is None:
  491. return url
  492. parsed_url = urlparse(url)
  493. if isinstance(args, dict):
  494. parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)
  495. parsed_query.extend(args.items())
  496. elif isinstance(args, list) or isinstance(args, tuple):
  497. parsed_query = parse_qsl(parsed_url.query, keep_blank_values=True)
  498. parsed_query.extend(args)
  499. else:
  500. err = "'args' parameter should be dict, list or tuple. Not {0}".format(
  501. type(args))
  502. raise TypeError(err)
  503. final_query = urlencode(parsed_query)
  504. url = urlunparse((
  505. parsed_url[0],
  506. parsed_url[1],
  507. parsed_url[2],
  508. parsed_url[3],
  509. final_query,
  510. parsed_url[5]))
  511. return url
  512. class HTTPFile(ObjectDict):
  513. """Represents a file uploaded via a form.
  514. For backwards compatibility, its instance attributes are also
  515. accessible as dictionary keys.
  516. * ``filename``
  517. * ``body``
  518. * ``content_type``
  519. """
  520. pass
  521. def _parse_request_range(range_header):
  522. """Parses a Range header.
  523. Returns either ``None`` or tuple ``(start, end)``.
  524. Note that while the HTTP headers use inclusive byte positions,
  525. this method returns indexes suitable for use in slices.
  526. >>> start, end = _parse_request_range("bytes=1-2")
  527. >>> start, end
  528. (1, 3)
  529. >>> [0, 1, 2, 3, 4][start:end]
  530. [1, 2]
  531. >>> _parse_request_range("bytes=6-")
  532. (6, None)
  533. >>> _parse_request_range("bytes=-6")
  534. (-6, None)
  535. >>> _parse_request_range("bytes=-0")
  536. (None, 0)
  537. >>> _parse_request_range("bytes=")
  538. (None, None)
  539. >>> _parse_request_range("foo=42")
  540. >>> _parse_request_range("bytes=1-2,6-10")
  541. Note: only supports one range (ex, ``bytes=1-2,6-10`` is not allowed).
  542. See [0] for the details of the range header.
  543. [0]: http://greenbytes.de/tech/webdav/draft-ietf-httpbis-p5-range-latest.html#byte.ranges
  544. """
  545. unit, _, value = range_header.partition("=")
  546. unit, value = unit.strip(), value.strip()
  547. if unit != "bytes":
  548. return None
  549. start_b, _, end_b = value.partition("-")
  550. try:
  551. start = _int_or_none(start_b)
  552. end = _int_or_none(end_b)
  553. except ValueError:
  554. return None
  555. if end is not None:
  556. if start is None:
  557. if end != 0:
  558. start = -end
  559. end = None
  560. else:
  561. end += 1
  562. return (start, end)
  563. def _get_content_range(start, end, total):
  564. """Returns a suitable Content-Range header:
  565. >>> print(_get_content_range(None, 1, 4))
  566. bytes 0-0/4
  567. >>> print(_get_content_range(1, 3, 4))
  568. bytes 1-2/4
  569. >>> print(_get_content_range(None, None, 4))
  570. bytes 0-3/4
  571. """
  572. start = start or 0
  573. end = (end or total) - 1
  574. return "bytes %s-%s/%s" % (start, end, total)
  575. def _int_or_none(val):
  576. val = val.strip()
  577. if val == "":
  578. return None
  579. return int(val)
  580. def parse_body_arguments(content_type, body, arguments, files, headers=None):
  581. """Parses a form request body.
  582. Supports ``application/x-www-form-urlencoded`` and
  583. ``multipart/form-data``. The ``content_type`` parameter should be
  584. a string and ``body`` should be a byte string. The ``arguments``
  585. and ``files`` parameters are dictionaries that will be updated
  586. with the parsed contents.
  587. """
  588. if headers and 'Content-Encoding' in headers:
  589. gen_log.warning("Unsupported Content-Encoding: %s",
  590. headers['Content-Encoding'])
  591. return
  592. if content_type.startswith("application/x-www-form-urlencoded"):
  593. try:
  594. uri_arguments = parse_qs_bytes(native_str(body), keep_blank_values=True)
  595. except Exception as e:
  596. gen_log.warning('Invalid x-www-form-urlencoded body: %s', e)
  597. uri_arguments = {}
  598. for name, values in uri_arguments.items():
  599. if values:
  600. arguments.setdefault(name, []).extend(values)
  601. elif content_type.startswith("multipart/form-data"):
  602. try:
  603. fields = content_type.split(";")
  604. for field in fields:
  605. k, sep, v = field.strip().partition("=")
  606. if k == "boundary" and v:
  607. parse_multipart_form_data(utf8(v), body, arguments, files)
  608. break
  609. else:
  610. raise ValueError("multipart boundary not found")
  611. except Exception as e:
  612. gen_log.warning("Invalid multipart/form-data: %s", e)
  613. def parse_multipart_form_data(boundary, data, arguments, files):
  614. """Parses a ``multipart/form-data`` body.
  615. The ``boundary`` and ``data`` parameters are both byte strings.
  616. The dictionaries given in the arguments and files parameters
  617. will be updated with the contents of the body.
  618. .. versionchanged:: 5.1
  619. Now recognizes non-ASCII filenames in RFC 2231/5987
  620. (``filename*=``) format.
  621. """
  622. # The standard allows for the boundary to be quoted in the header,
  623. # although it's rare (it happens at least for google app engine
  624. # xmpp). I think we're also supposed to handle backslash-escapes
  625. # here but I'll save that until we see a client that uses them
  626. # in the wild.
  627. if boundary.startswith(b'"') and boundary.endswith(b'"'):
  628. boundary = boundary[1:-1]
  629. final_boundary_index = data.rfind(b"--" + boundary + b"--")
  630. if final_boundary_index == -1:
  631. gen_log.warning("Invalid multipart/form-data: no final boundary")
  632. return
  633. parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n")
  634. for part in parts:
  635. if not part:
  636. continue
  637. eoh = part.find(b"\r\n\r\n")
  638. if eoh == -1:
  639. gen_log.warning("multipart/form-data missing headers")
  640. continue
  641. headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"))
  642. disp_header = headers.get("Content-Disposition", "")
  643. disposition, disp_params = _parse_header(disp_header)
  644. if disposition != "form-data" or not part.endswith(b"\r\n"):
  645. gen_log.warning("Invalid multipart/form-data")
  646. continue
  647. value = part[eoh + 4:-2]
  648. if not disp_params.get("name"):
  649. gen_log.warning("multipart/form-data value missing name")
  650. continue
  651. name = disp_params["name"]
  652. if disp_params.get("filename"):
  653. ctype = headers.get("Content-Type", "application/unknown")
  654. files.setdefault(name, []).append(HTTPFile( # type: ignore
  655. filename=disp_params["filename"], body=value,
  656. content_type=ctype))
  657. else:
  658. arguments.setdefault(name, []).append(value)
  659. def format_timestamp(ts):
  660. """Formats a timestamp in the format used by HTTP.
  661. The argument may be a numeric timestamp as returned by `time.time`,
  662. a time tuple as returned by `time.gmtime`, or a `datetime.datetime`
  663. object.
  664. >>> format_timestamp(1359312200)
  665. 'Sun, 27 Jan 2013 18:43:20 GMT'
  666. """
  667. if isinstance(ts, numbers.Real):
  668. pass
  669. elif isinstance(ts, (tuple, time.struct_time)):
  670. ts = calendar.timegm(ts)
  671. elif isinstance(ts, datetime.datetime):
  672. ts = calendar.timegm(ts.utctimetuple())
  673. else:
  674. raise TypeError("unknown timestamp type: %r" % ts)
  675. return email.utils.formatdate(ts, usegmt=True)
  676. RequestStartLine = collections.namedtuple(
  677. 'RequestStartLine', ['method', 'path', 'version'])
  678. def parse_request_start_line(line):
  679. """Returns a (method, path, version) tuple for an HTTP 1.x request line.
  680. The response is a `collections.namedtuple`.
  681. >>> parse_request_start_line("GET /foo HTTP/1.1")
  682. RequestStartLine(method='GET', path='/foo', version='HTTP/1.1')
  683. """
  684. try:
  685. method, path, version = line.split(" ")
  686. except ValueError:
  687. # https://tools.ietf.org/html/rfc7230#section-3.1.1
  688. # invalid request-line SHOULD respond with a 400 (Bad Request)
  689. raise HTTPInputError("Malformed HTTP request line")
  690. if not re.match(r"^HTTP/1\.[0-9]$", version):
  691. raise HTTPInputError(
  692. "Malformed HTTP version in HTTP Request-Line: %r" % version)
  693. return RequestStartLine(method, path, version)
  694. ResponseStartLine = collections.namedtuple(
  695. 'ResponseStartLine', ['version', 'code', 'reason'])
  696. def parse_response_start_line(line):
  697. """Returns a (version, code, reason) tuple for an HTTP 1.x response line.
  698. The response is a `collections.namedtuple`.
  699. >>> parse_response_start_line("HTTP/1.1 200 OK")
  700. ResponseStartLine(version='HTTP/1.1', code=200, reason='OK')
  701. """
  702. line = native_str(line)
  703. match = re.match("(HTTP/1.[0-9]) ([0-9]+) ([^\r]*)", line)
  704. if not match:
  705. raise HTTPInputError("Error parsing response start line")
  706. return ResponseStartLine(match.group(1), int(match.group(2)),
  707. match.group(3))
  708. # _parseparam and _parse_header are copied and modified from python2.7's cgi.py
  709. # The original 2.7 version of this code did not correctly support some
  710. # combinations of semicolons and double quotes.
  711. # It has also been modified to support valueless parameters as seen in
  712. # websocket extension negotiations, and to support non-ascii values in
  713. # RFC 2231/5987 format.
  714. def _parseparam(s):
  715. while s[:1] == ';':
  716. s = s[1:]
  717. end = s.find(';')
  718. while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
  719. end = s.find(';', end + 1)
  720. if end < 0:
  721. end = len(s)
  722. f = s[:end]
  723. yield f.strip()
  724. s = s[end:]
  725. def _parse_header(line):
  726. r"""Parse a Content-type like header.
  727. Return the main content-type and a dictionary of options.
  728. >>> d = "form-data; foo=\"b\\\\a\\\"r\"; file*=utf-8''T%C3%A4st"
  729. >>> ct, d = _parse_header(d)
  730. >>> ct
  731. 'form-data'
  732. >>> d['file'] == r'T\u00e4st'.encode('ascii').decode('unicode_escape')
  733. True
  734. >>> d['foo']
  735. 'b\\a"r'
  736. """
  737. parts = _parseparam(';' + line)
  738. key = next(parts)
  739. # decode_params treats first argument special, but we already stripped key
  740. params = [('Dummy', 'value')]
  741. for p in parts:
  742. i = p.find('=')
  743. if i >= 0:
  744. name = p[:i].strip().lower()
  745. value = p[i + 1:].strip()
  746. params.append((name, native_str(value)))
  747. params = email.utils.decode_params(params)
  748. params.pop(0) # get rid of the dummy again
  749. pdict = {}
  750. for name, value in params:
  751. value = email.utils.collapse_rfc2231_value(value)
  752. if len(value) >= 2 and value[0] == '"' and value[-1] == '"':
  753. value = value[1:-1]
  754. pdict[name] = value
  755. return key, pdict
  756. def _encode_header(key, pdict):
  757. """Inverse of _parse_header.
  758. >>> _encode_header('permessage-deflate',
  759. ... {'client_max_window_bits': 15, 'client_no_context_takeover': None})
  760. 'permessage-deflate; client_max_window_bits=15; client_no_context_takeover'
  761. """
  762. if not pdict:
  763. return key
  764. out = [key]
  765. # Sort the parameters just to make it easy to test.
  766. for k, v in sorted(pdict.items()):
  767. if v is None:
  768. out.append(k)
  769. else:
  770. # TODO: quote if necessary.
  771. out.append('%s=%s' % (k, v))
  772. return '; '.join(out)
  773. def encode_username_password(username, password):
  774. """Encodes a username/password pair in the format used by HTTP auth.
  775. The return value is a byte string in the form ``username:password``.
  776. .. versionadded:: 5.1
  777. """
  778. if isinstance(username, unicode_type):
  779. username = unicodedata.normalize('NFC', username)
  780. if isinstance(password, unicode_type):
  781. password = unicodedata.normalize('NFC', password)
  782. return utf8(username) + b":" + utf8(password)
  783. def doctests():
  784. import doctest
  785. return doctest.DocTestSuite()
  786. def split_host_and_port(netloc):
  787. """Returns ``(host, port)`` tuple from ``netloc``.
  788. Returned ``port`` will be ``None`` if not present.
  789. .. versionadded:: 4.1
  790. """
  791. match = re.match(r'^(.+):(\d+)$', netloc)
  792. if match:
  793. host = match.group(1)
  794. port = int(match.group(2))
  795. else:
  796. host = netloc
  797. port = None
  798. return (host, port)
  799. def qs_to_qsl(qs):
  800. """Generator converting a result of ``parse_qs`` back to name-value pairs.
  801. .. versionadded:: 5.0
  802. """
  803. for k, vs in qs.items():
  804. for v in vs:
  805. yield (k, v)
  806. _OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
  807. _QuotePatt = re.compile(r"[\\].")
  808. _nulljoin = ''.join
  809. def _unquote_cookie(str):
  810. """Handle double quotes and escaping in cookie values.
  811. This method is copied verbatim from the Python 3.5 standard
  812. library (http.cookies._unquote) so we don't have to depend on
  813. non-public interfaces.
  814. """
  815. # If there aren't any doublequotes,
  816. # then there can't be any special characters. See RFC 2109.
  817. if str is None or len(str) < 2:
  818. return str
  819. if str[0] != '"' or str[-1] != '"':
  820. return str
  821. # We have to assume that we must decode this string.
  822. # Down to work.
  823. # Remove the "s
  824. str = str[1:-1]
  825. # Check for special sequences. Examples:
  826. # \012 --> \n
  827. # \" --> "
  828. #
  829. i = 0
  830. n = len(str)
  831. res = []
  832. while 0 <= i < n:
  833. o_match = _OctalPatt.search(str, i)
  834. q_match = _QuotePatt.search(str, i)
  835. if not o_match and not q_match: # Neither matched
  836. res.append(str[i:])
  837. break
  838. # else:
  839. j = k = -1
  840. if o_match:
  841. j = o_match.start(0)
  842. if q_match:
  843. k = q_match.start(0)
  844. if q_match and (not o_match or k < j): # QuotePatt matched
  845. res.append(str[i:k])
  846. res.append(str[k + 1])
  847. i = k + 2
  848. else: # OctalPatt matched
  849. res.append(str[i:j])
  850. res.append(chr(int(str[j + 1:j + 4], 8)))
  851. i = j + 4
  852. return _nulljoin(res)
  853. def parse_cookie(cookie):
  854. """Parse a ``Cookie`` HTTP header into a dict of name/value pairs.
  855. This function attempts to mimic browser cookie parsing behavior;
  856. it specifically does not follow any of the cookie-related RFCs
  857. (because browsers don't either).
  858. The algorithm used is identical to that used by Django version 1.9.10.
  859. .. versionadded:: 4.4.2
  860. """
  861. cookiedict = {}
  862. for chunk in cookie.split(str(';')):
  863. if str('=') in chunk:
  864. key, val = chunk.split(str('='), 1)
  865. else:
  866. # Assume an empty name per
  867. # https://bugzilla.mozilla.org/show_bug.cgi?id=169091
  868. key, val = str(''), chunk
  869. key, val = key.strip(), val.strip()
  870. if key or val:
  871. # unquote using Python's algorithm.
  872. cookiedict[key] = _unquote_cookie(val)
  873. return cookiedict