utilities.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585
  1. # -*- coding: utf-8 -*-
  2. """
  3. h2/utilities
  4. ~~~~~~~~~~~~
  5. Utility functions that do not belong in a separate module.
  6. """
  7. import collections
  8. import re
  9. from string import whitespace
  10. import sys
  11. from hpack import HeaderTuple, NeverIndexedHeaderTuple
  12. from .exceptions import ProtocolError, FlowControlError
  13. UPPER_RE = re.compile(b"[A-Z]")
  14. # A set of headers that are hop-by-hop or connection-specific and thus
  15. # forbidden in HTTP/2. This list comes from RFC 7540 § 8.1.2.2.
  16. CONNECTION_HEADERS = frozenset([
  17. b'connection', u'connection',
  18. b'proxy-connection', u'proxy-connection',
  19. b'keep-alive', u'keep-alive',
  20. b'transfer-encoding', u'transfer-encoding',
  21. b'upgrade', u'upgrade',
  22. ])
  23. _ALLOWED_PSEUDO_HEADER_FIELDS = frozenset([
  24. b':method', u':method',
  25. b':scheme', u':scheme',
  26. b':authority', u':authority',
  27. b':path', u':path',
  28. b':status', u':status',
  29. ])
  30. _SECURE_HEADERS = frozenset([
  31. # May have basic credentials which are vulnerable to dictionary attacks.
  32. b'authorization', u'authorization',
  33. b'proxy-authorization', u'proxy-authorization',
  34. ])
  35. _REQUEST_ONLY_HEADERS = frozenset([
  36. b':scheme', u':scheme',
  37. b':path', u':path',
  38. b':authority', u':authority',
  39. b':method', u':method'
  40. ])
  41. _RESPONSE_ONLY_HEADERS = frozenset([b':status', u':status'])
  42. if sys.version_info[0] == 2: # Python 2.X
  43. _WHITESPACE = frozenset(whitespace)
  44. else: # Python 3.3+
  45. _WHITESPACE = frozenset(map(ord, whitespace))
  46. def _secure_headers(headers, hdr_validation_flags):
  47. """
  48. Certain headers are at risk of being attacked during the header compression
  49. phase, and so need to be kept out of header compression contexts. This
  50. function automatically transforms certain specific headers into HPACK
  51. never-indexed fields to ensure they don't get added to header compression
  52. contexts.
  53. This function currently implements two rules:
  54. - 'authorization' and 'proxy-authorization' fields are automatically made
  55. never-indexed.
  56. - Any 'cookie' header field shorter than 20 bytes long is made
  57. never-indexed.
  58. These fields are the most at-risk. These rules are inspired by Firefox
  59. and nghttp2.
  60. """
  61. for header in headers:
  62. if header[0] in _SECURE_HEADERS:
  63. yield NeverIndexedHeaderTuple(*header)
  64. elif header[0] in (b'cookie', u'cookie') and len(header[1]) < 20:
  65. yield NeverIndexedHeaderTuple(*header)
  66. else:
  67. yield header
  68. def extract_method_header(headers):
  69. """
  70. Extracts the request method from the headers list.
  71. """
  72. for k, v in headers:
  73. if k in (b':method', u':method'):
  74. if not isinstance(v, bytes):
  75. return v.encode('utf-8')
  76. else:
  77. return v
  78. def is_informational_response(headers):
  79. """
  80. Searches a header block for a :status header to confirm that a given
  81. collection of headers are an informational response. Assumes the header
  82. block is well formed: that is, that the HTTP/2 special headers are first
  83. in the block, and so that it can stop looking when it finds the first
  84. header field whose name does not begin with a colon.
  85. :param headers: The HTTP/2 header block.
  86. :returns: A boolean indicating if this is an informational response.
  87. """
  88. for n, v in headers:
  89. if isinstance(n, bytes):
  90. sigil = b':'
  91. status = b':status'
  92. informational_start = b'1'
  93. else:
  94. sigil = u':'
  95. status = u':status'
  96. informational_start = u'1'
  97. # If we find a non-special header, we're done here: stop looping.
  98. if not n.startswith(sigil):
  99. return False
  100. # This isn't the status header, bail.
  101. if n != status:
  102. continue
  103. # If the first digit is a 1, we've got informational headers.
  104. return v.startswith(informational_start)
  105. def guard_increment_window(current, increment):
  106. """
  107. Increments a flow control window, guarding against that window becoming too
  108. large.
  109. :param current: The current value of the flow control window.
  110. :param increment: The increment to apply to that window.
  111. :returns: The new value of the window.
  112. :raises: ``FlowControlError``
  113. """
  114. # The largest value the flow control window may take.
  115. LARGEST_FLOW_CONTROL_WINDOW = 2**31 - 1
  116. new_size = current + increment
  117. if new_size > LARGEST_FLOW_CONTROL_WINDOW:
  118. raise FlowControlError(
  119. "May not increment flow control window past %d" %
  120. LARGEST_FLOW_CONTROL_WINDOW
  121. )
  122. return new_size
  123. def authority_from_headers(headers):
  124. """
  125. Given a header set, searches for the authority header and returns the
  126. value.
  127. Note that this doesn't terminate early, so should only be called if the
  128. headers are for a client request. Otherwise, will loop over the entire
  129. header set, which is potentially unwise.
  130. :param headers: The HTTP header set.
  131. :returns: The value of the authority header, or ``None``.
  132. :rtype: ``bytes`` or ``None``.
  133. """
  134. for n, v in headers:
  135. # This gets run against headers that come both from HPACK and from the
  136. # user, so we may have unicode floating around in here. We only want
  137. # bytes.
  138. if n in (b':authority', u':authority'):
  139. return v.encode('utf-8') if not isinstance(v, bytes) else v
  140. return None
  141. # Flags used by the validate_headers pipeline to determine which checks
  142. # should be applied to a given set of headers.
  143. HeaderValidationFlags = collections.namedtuple(
  144. 'HeaderValidationFlags',
  145. ['is_client', 'is_trailer', 'is_response_header', 'is_push_promise']
  146. )
  147. def validate_headers(headers, hdr_validation_flags):
  148. """
  149. Validates a header sequence against a set of constraints from RFC 7540.
  150. :param headers: The HTTP header set.
  151. :param hdr_validation_flags: An instance of HeaderValidationFlags.
  152. """
  153. # This validation logic is built on a sequence of generators that are
  154. # iterated over to provide the final header list. This reduces some of the
  155. # overhead of doing this checking. However, it's worth noting that this
  156. # checking remains somewhat expensive, and attempts should be made wherever
  157. # possible to reduce the time spent doing them.
  158. #
  159. # For example, we avoid tuple upacking in loops because it represents a
  160. # fixed cost that we don't want to spend, instead indexing into the header
  161. # tuples.
  162. headers = _reject_uppercase_header_fields(
  163. headers, hdr_validation_flags
  164. )
  165. headers = _reject_surrounding_whitespace(
  166. headers, hdr_validation_flags
  167. )
  168. headers = _reject_te(
  169. headers, hdr_validation_flags
  170. )
  171. headers = _reject_connection_header(
  172. headers, hdr_validation_flags
  173. )
  174. headers = _reject_pseudo_header_fields(
  175. headers, hdr_validation_flags
  176. )
  177. headers = _check_host_authority_header(
  178. headers, hdr_validation_flags
  179. )
  180. headers = _check_path_header(headers, hdr_validation_flags)
  181. return list(headers)
  182. def _reject_uppercase_header_fields(headers, hdr_validation_flags):
  183. """
  184. Raises a ProtocolError if any uppercase character is found in a header
  185. block.
  186. """
  187. for header in headers:
  188. if UPPER_RE.search(header[0]):
  189. raise ProtocolError(
  190. "Received uppercase header name %s." % header[0])
  191. yield header
  192. def _reject_surrounding_whitespace(headers, hdr_validation_flags):
  193. """
  194. Raises a ProtocolError if any header name or value is surrounded by
  195. whitespace characters.
  196. """
  197. # For compatibility with RFC 7230 header fields, we need to allow the field
  198. # value to be an empty string. This is ludicrous, but technically allowed.
  199. # The field name may not be empty, though, so we can safely assume that it
  200. # must have at least one character in it and throw exceptions if it
  201. # doesn't.
  202. for header in headers:
  203. if header[0][0] in _WHITESPACE or header[0][-1] in _WHITESPACE:
  204. raise ProtocolError(
  205. "Received header name surrounded by whitespace %r" % header[0])
  206. if header[1] and ((header[1][0] in _WHITESPACE) or
  207. (header[1][-1] in _WHITESPACE)):
  208. raise ProtocolError(
  209. "Received header value surrounded by whitespace %r" % header[1]
  210. )
  211. yield header
  212. def _reject_te(headers, hdr_validation_flags):
  213. """
  214. Raises a ProtocolError if the TE header is present in a header block and
  215. its value is anything other than "trailers".
  216. """
  217. for header in headers:
  218. if header[0] in (b'te', u'te'):
  219. if header[1].lower() not in (b'trailers', u'trailers'):
  220. raise ProtocolError(
  221. "Invalid value for Transfer-Encoding header: %s" %
  222. header[1]
  223. )
  224. yield header
  225. def _reject_connection_header(headers, hdr_validation_flags):
  226. """
  227. Raises a ProtocolError if the Connection header is present in a header
  228. block.
  229. """
  230. for header in headers:
  231. if header[0] in CONNECTION_HEADERS:
  232. raise ProtocolError(
  233. "Connection-specific header field present: %s." % header[0]
  234. )
  235. yield header
  236. def _custom_startswith(test_string, bytes_prefix, unicode_prefix):
  237. """
  238. Given a string that might be a bytestring or a Unicode string,
  239. return True if it starts with the appropriate prefix.
  240. """
  241. if isinstance(test_string, bytes):
  242. return test_string.startswith(bytes_prefix)
  243. else:
  244. return test_string.startswith(unicode_prefix)
  245. def _assert_header_in_set(string_header, bytes_header, header_set):
  246. """
  247. Given a set of header names, checks whether the string or byte version of
  248. the header name is present. Raises a Protocol error with the appropriate
  249. error if it's missing.
  250. """
  251. if not (string_header in header_set or bytes_header in header_set):
  252. raise ProtocolError(
  253. "Header block missing mandatory %s header" % string_header
  254. )
  255. def _reject_pseudo_header_fields(headers, hdr_validation_flags):
  256. """
  257. Raises a ProtocolError if duplicate pseudo-header fields are found in a
  258. header block or if a pseudo-header field appears in a block after an
  259. ordinary header field.
  260. Raises a ProtocolError if pseudo-header fields are found in trailers.
  261. """
  262. seen_pseudo_header_fields = set()
  263. seen_regular_header = False
  264. for header in headers:
  265. if _custom_startswith(header[0], b':', u':'):
  266. if header[0] in seen_pseudo_header_fields:
  267. raise ProtocolError(
  268. "Received duplicate pseudo-header field %s" % header[0]
  269. )
  270. seen_pseudo_header_fields.add(header[0])
  271. if seen_regular_header:
  272. raise ProtocolError(
  273. "Received pseudo-header field out of sequence: %s" %
  274. header[0]
  275. )
  276. if header[0] not in _ALLOWED_PSEUDO_HEADER_FIELDS:
  277. raise ProtocolError(
  278. "Received custom pseudo-header field %s" % header[0]
  279. )
  280. else:
  281. seen_regular_header = True
  282. yield header
  283. # Check the pseudo-headers we got to confirm they're acceptable.
  284. _check_pseudo_header_field_acceptability(
  285. seen_pseudo_header_fields, hdr_validation_flags
  286. )
  287. def _check_pseudo_header_field_acceptability(pseudo_headers,
  288. hdr_validation_flags):
  289. """
  290. Given the set of pseudo-headers present in a header block and the
  291. validation flags, confirms that RFC 7540 allows them.
  292. """
  293. # Pseudo-header fields MUST NOT appear in trailers - RFC 7540 § 8.1.2.1
  294. if hdr_validation_flags.is_trailer and pseudo_headers:
  295. raise ProtocolError(
  296. "Received pseudo-header in trailer %s" % pseudo_headers
  297. )
  298. # If ':status' pseudo-header is not there in a response header, reject it.
  299. # Similarly, if ':path', ':method', or ':scheme' are not there in a request
  300. # header, reject it. Additionally, if a response contains any request-only
  301. # headers or vice-versa, reject it.
  302. # Relevant RFC section: RFC 7540 § 8.1.2.4
  303. # https://tools.ietf.org/html/rfc7540#section-8.1.2.4
  304. if hdr_validation_flags.is_response_header:
  305. _assert_header_in_set(u':status', b':status', pseudo_headers)
  306. invalid_response_headers = pseudo_headers & _REQUEST_ONLY_HEADERS
  307. if invalid_response_headers:
  308. raise ProtocolError(
  309. "Encountered request-only headers %s" %
  310. invalid_response_headers
  311. )
  312. elif (not hdr_validation_flags.is_response_header and
  313. not hdr_validation_flags.is_trailer):
  314. # This is a request, so we need to have seen :path, :method, and
  315. # :scheme.
  316. _assert_header_in_set(u':path', b':path', pseudo_headers)
  317. _assert_header_in_set(u':method', b':method', pseudo_headers)
  318. _assert_header_in_set(u':scheme', b':scheme', pseudo_headers)
  319. invalid_request_headers = pseudo_headers & _RESPONSE_ONLY_HEADERS
  320. if invalid_request_headers:
  321. raise ProtocolError(
  322. "Encountered response-only headers %s" %
  323. invalid_request_headers
  324. )
  325. def _validate_host_authority_header(headers):
  326. """
  327. Given the :authority and Host headers from a request block that isn't
  328. a trailer, check that:
  329. 1. At least one of these headers is set.
  330. 2. If both headers are set, they match.
  331. :param headers: The HTTP header set.
  332. :raises: ``ProtocolError``
  333. """
  334. # We use None as a sentinel value. Iterate over the list of headers,
  335. # and record the value of these headers (if present). We don't need
  336. # to worry about receiving duplicate :authority headers, as this is
  337. # enforced by the _reject_pseudo_header_fields() pipeline.
  338. #
  339. # TODO: We should also guard against receiving duplicate Host headers,
  340. # and against sending duplicate headers.
  341. authority_header_val = None
  342. host_header_val = None
  343. for header in headers:
  344. if header[0] in (b':authority', u':authority'):
  345. authority_header_val = header[1]
  346. elif header[0] in (b'host', u'host'):
  347. host_header_val = header[1]
  348. yield header
  349. # If we have not-None values for these variables, then we know we saw
  350. # the corresponding header.
  351. authority_present = (authority_header_val is not None)
  352. host_present = (host_header_val is not None)
  353. # It is an error for a request header block to contain neither
  354. # an :authority header nor a Host header.
  355. if not authority_present and not host_present:
  356. raise ProtocolError(
  357. "Request header block does not have an :authority or Host header."
  358. )
  359. # If we receive both headers, they should definitely match.
  360. if authority_present and host_present:
  361. if authority_header_val != host_header_val:
  362. raise ProtocolError(
  363. "Request header block has mismatched :authority and "
  364. "Host headers: %r / %r"
  365. % (authority_header_val, host_header_val)
  366. )
  367. def _check_host_authority_header(headers, hdr_validation_flags):
  368. """
  369. Raises a ProtocolError if a header block arrives that does not contain an
  370. :authority or a Host header, or if a header block contains both fields,
  371. but their values do not match.
  372. """
  373. # We only expect to see :authority and Host headers on request header
  374. # blocks that aren't trailers, so skip this validation if this is a
  375. # response header or we're looking at trailer blocks.
  376. skip_validation = (
  377. hdr_validation_flags.is_response_header or
  378. hdr_validation_flags.is_trailer
  379. )
  380. if skip_validation:
  381. return headers
  382. return _validate_host_authority_header(headers)
  383. def _check_path_header(headers, hdr_validation_flags):
  384. """
  385. Raise a ProtocolError if a header block arrives or is sent that contains an
  386. empty :path header.
  387. """
  388. def inner():
  389. for header in headers:
  390. if header[0] in (b':path', u':path'):
  391. if not header[1]:
  392. raise ProtocolError("An empty :path header is forbidden")
  393. yield header
  394. # We only expect to see :authority and Host headers on request header
  395. # blocks that aren't trailers, so skip this validation if this is a
  396. # response header or we're looking at trailer blocks.
  397. skip_validation = (
  398. hdr_validation_flags.is_response_header or
  399. hdr_validation_flags.is_trailer
  400. )
  401. if skip_validation:
  402. return headers
  403. else:
  404. return inner()
  405. def _lowercase_header_names(headers, hdr_validation_flags):
  406. """
  407. Given an iterable of header two-tuples, rebuilds that iterable with the
  408. header names lowercased. This generator produces tuples that preserve the
  409. original type of the header tuple for tuple and any ``HeaderTuple``.
  410. """
  411. for header in headers:
  412. if isinstance(header, HeaderTuple):
  413. yield header.__class__(header[0].lower(), header[1])
  414. else:
  415. yield (header[0].lower(), header[1])
  416. def _strip_surrounding_whitespace(headers, hdr_validation_flags):
  417. """
  418. Given an iterable of header two-tuples, strip both leading and trailing
  419. whitespace from both header names and header values. This generator
  420. produces tuples that preserve the original type of the header tuple for
  421. tuple and any ``HeaderTuple``.
  422. """
  423. for header in headers:
  424. if isinstance(header, HeaderTuple):
  425. yield header.__class__(header[0].strip(), header[1].strip())
  426. else:
  427. yield (header[0].strip(), header[1].strip())
  428. def _strip_connection_headers(headers, hdr_validation_flags):
  429. """
  430. Strip any connection headers as per RFC7540 § 8.1.2.2.
  431. """
  432. for header in headers:
  433. if header[0] not in CONNECTION_HEADERS:
  434. yield header
  435. def _check_sent_host_authority_header(headers, hdr_validation_flags):
  436. """
  437. Raises an InvalidHeaderBlockError if we try to send a header block
  438. that does not contain an :authority or a Host header, or if
  439. the header block contains both fields, but their values do not match.
  440. """
  441. # We only expect to see :authority and Host headers on request header
  442. # blocks that aren't trailers, so skip this validation if this is a
  443. # response header or we're looking at trailer blocks.
  444. skip_validation = (
  445. hdr_validation_flags.is_response_header or
  446. hdr_validation_flags.is_trailer
  447. )
  448. if skip_validation:
  449. return headers
  450. return _validate_host_authority_header(headers)
  451. def normalize_outbound_headers(headers, hdr_validation_flags):
  452. """
  453. Normalizes a header sequence that we are about to send.
  454. :param headers: The HTTP header set.
  455. :param hdr_validation_flags: An instance of HeaderValidationFlags.
  456. """
  457. headers = _lowercase_header_names(headers, hdr_validation_flags)
  458. headers = _strip_surrounding_whitespace(headers, hdr_validation_flags)
  459. headers = _strip_connection_headers(headers, hdr_validation_flags)
  460. headers = _secure_headers(headers, hdr_validation_flags)
  461. return headers
  462. def validate_outbound_headers(headers, hdr_validation_flags):
  463. """
  464. Validates and normalizes a header sequence that we are about to send.
  465. :param headers: The HTTP header set.
  466. :param hdr_validation_flags: An instance of HeaderValidationFlags.
  467. """
  468. headers = _reject_te(
  469. headers, hdr_validation_flags
  470. )
  471. headers = _reject_connection_header(
  472. headers, hdr_validation_flags
  473. )
  474. headers = _reject_pseudo_header_fields(
  475. headers, hdr_validation_flags
  476. )
  477. headers = _check_sent_host_authority_header(
  478. headers, hdr_validation_flags
  479. )
  480. headers = _check_path_header(headers, hdr_validation_flags)
  481. return headers