hpack.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
  1. # -*- coding: utf-8 -*-
  2. """
  3. hpack/hpack
  4. ~~~~~~~~~~~
  5. Implements the HPACK header compression algorithm as detailed by the IETF.
  6. """
  7. import logging
  8. from .table import HeaderTable, table_entry_size
  9. from .compat import to_byte, to_bytes
  10. from .exceptions import (
  11. HPACKDecodingError, OversizedHeaderListError, InvalidTableSizeError
  12. )
  13. from .huffman import HuffmanEncoder
  14. from .huffman_constants import (
  15. REQUEST_CODES, REQUEST_CODES_LENGTH
  16. )
  17. from .huffman_table import decode_huffman
  18. from .struct import HeaderTuple, NeverIndexedHeaderTuple
  19. log = logging.getLogger(__name__)
  20. INDEX_NONE = b'\x00'
  21. INDEX_NEVER = b'\x10'
  22. INDEX_INCREMENTAL = b'\x40'
  23. # Precompute 2^i for 1-8 for use in prefix calcs.
  24. # Zero index is not used but there to save a subtraction
  25. # as prefix numbers are not zero indexed.
  26. _PREFIX_BIT_MAX_NUMBERS = [(2 ** i) - 1 for i in range(9)]
  27. try: # pragma: no cover
  28. basestring = basestring
  29. except NameError: # pragma: no cover
  30. basestring = (str, bytes)
  31. # We default the maximum header list we're willing to accept to 64kB. That's a
  32. # lot of headers, but if applications want to raise it they can do.
  33. DEFAULT_MAX_HEADER_LIST_SIZE = 2 ** 16
  34. def _unicode_if_needed(header, raw):
  35. """
  36. Provides a header as a unicode string if raw is False, otherwise returns
  37. it as a bytestring.
  38. """
  39. name = to_bytes(header[0])
  40. value = to_bytes(header[1])
  41. if not raw:
  42. name = name.decode('utf-8')
  43. value = value.decode('utf-8')
  44. return header.__class__(name, value)
  45. def encode_integer(integer, prefix_bits):
  46. """
  47. This encodes an integer according to the wacky integer encoding rules
  48. defined in the HPACK spec.
  49. """
  50. log.debug("Encoding %d with %d bits", integer, prefix_bits)
  51. if integer < 0:
  52. raise ValueError(
  53. "Can only encode positive integers, got %s" % integer
  54. )
  55. if prefix_bits < 1 or prefix_bits > 8:
  56. raise ValueError(
  57. "Prefix bits must be between 1 and 8, got %s" % prefix_bits
  58. )
  59. max_number = _PREFIX_BIT_MAX_NUMBERS[prefix_bits]
  60. if integer < max_number:
  61. return bytearray([integer]) # Seriously?
  62. else:
  63. elements = [max_number]
  64. integer -= max_number
  65. while integer >= 128:
  66. elements.append((integer & 127) + 128)
  67. integer >>= 7
  68. elements.append(integer)
  69. return bytearray(elements)
  70. def decode_integer(data, prefix_bits):
  71. """
  72. This decodes an integer according to the wacky integer encoding rules
  73. defined in the HPACK spec. Returns a tuple of the decoded integer and the
  74. number of bytes that were consumed from ``data`` in order to get that
  75. integer.
  76. """
  77. if prefix_bits < 1 or prefix_bits > 8:
  78. raise ValueError(
  79. "Prefix bits must be between 1 and 8, got %s" % prefix_bits
  80. )
  81. max_number = _PREFIX_BIT_MAX_NUMBERS[prefix_bits]
  82. index = 1
  83. shift = 0
  84. mask = (0xFF >> (8 - prefix_bits))
  85. try:
  86. number = to_byte(data[0]) & mask
  87. if number == max_number:
  88. while True:
  89. next_byte = to_byte(data[index])
  90. index += 1
  91. if next_byte >= 128:
  92. number += (next_byte - 128) << shift
  93. else:
  94. number += next_byte << shift
  95. break
  96. shift += 7
  97. except IndexError:
  98. raise HPACKDecodingError(
  99. "Unable to decode HPACK integer representation from %r" % data
  100. )
  101. log.debug("Decoded %d, consumed %d bytes", number, index)
  102. return number, index
  103. def _dict_to_iterable(header_dict):
  104. """
  105. This converts a dictionary to an iterable of two-tuples. This is a
  106. HPACK-specific function becuase it pulls "special-headers" out first and
  107. then emits them.
  108. """
  109. assert isinstance(header_dict, dict)
  110. keys = sorted(
  111. header_dict.keys(),
  112. key=lambda k: not _to_bytes(k).startswith(b':')
  113. )
  114. for key in keys:
  115. yield key, header_dict[key]
  116. def _to_bytes(string):
  117. """
  118. Convert string to bytes.
  119. """
  120. if not isinstance(string, basestring): # pragma: no cover
  121. string = str(string)
  122. return string if isinstance(string, bytes) else string.encode('utf-8')
  123. class Encoder(object):
  124. """
  125. An HPACK encoder object. This object takes HTTP headers and emits encoded
  126. HTTP/2 header blocks.
  127. """
  128. def __init__(self):
  129. self.header_table = HeaderTable()
  130. self.huffman_coder = HuffmanEncoder(
  131. REQUEST_CODES, REQUEST_CODES_LENGTH
  132. )
  133. self.table_size_changes = []
  134. @property
  135. def header_table_size(self):
  136. """
  137. Controls the size of the HPACK header table.
  138. """
  139. return self.header_table.maxsize
  140. @header_table_size.setter
  141. def header_table_size(self, value):
  142. self.header_table.maxsize = value
  143. if self.header_table.resized:
  144. self.table_size_changes.append(value)
  145. def encode(self, headers, huffman=True):
  146. """
  147. Takes a set of headers and encodes them into a HPACK-encoded header
  148. block.
  149. :param headers: The headers to encode. Must be either an iterable of
  150. tuples, an iterable of :class:`HeaderTuple
  151. <hpack.struct.HeaderTuple>`, or a ``dict``.
  152. If an iterable of tuples, the tuples may be either
  153. two-tuples or three-tuples. If they are two-tuples, the
  154. tuples must be of the format ``(name, value)``. If they
  155. are three-tuples, they must be of the format
  156. ``(name, value, sensitive)``, where ``sensitive`` is a
  157. boolean value indicating whether the header should be
  158. added to header tables anywhere. If not present,
  159. ``sensitive`` defaults to ``False``.
  160. If an iterable of :class:`HeaderTuple
  161. <hpack.struct.HeaderTuple>`, the tuples must always be
  162. two-tuples. Instead of using ``sensitive`` as a third
  163. tuple entry, use :class:`NeverIndexedHeaderTuple
  164. <hpack.struct.NeverIndexedHeaderTuple>` to request that
  165. the field never be indexed.
  166. .. warning:: HTTP/2 requires that all special headers
  167. (headers whose names begin with ``:`` characters)
  168. appear at the *start* of the header block. While
  169. this method will ensure that happens for ``dict``
  170. subclasses, callers using any other iterable of
  171. tuples **must** ensure they place their special
  172. headers at the start of the iterable.
  173. For efficiency reasons users should prefer to use
  174. iterables of two-tuples: fixing the ordering of
  175. dictionary headers is an expensive operation that
  176. should be avoided if possible.
  177. :param huffman: (optional) Whether to Huffman-encode any header sent as
  178. a literal value. Except for use when debugging, it is
  179. recommended that this be left enabled.
  180. :returns: A bytestring containing the HPACK-encoded header block.
  181. """
  182. # Transforming the headers into a header block is a procedure that can
  183. # be modeled as a chain or pipe. First, the headers are encoded. This
  184. # encoding can be done a number of ways. If the header name-value pair
  185. # are already in the header table we can represent them using the
  186. # indexed representation: the same is true if they are in the static
  187. # table. Otherwise, a literal representation will be used.
  188. log.debug("HPACK encoding %s", headers)
  189. header_block = []
  190. # Turn the headers into a list of tuples if possible. This is the
  191. # natural way to interact with them in HPACK. Because dictionaries are
  192. # un-ordered, we need to make sure we grab the "special" headers first.
  193. if isinstance(headers, dict):
  194. headers = _dict_to_iterable(headers)
  195. # Before we begin, if the header table size has been changed we need
  196. # to signal all changes since last emission appropriately.
  197. if self.header_table.resized:
  198. header_block.append(self._encode_table_size_change())
  199. self.header_table.resized = False
  200. # Add each header to the header block
  201. for header in headers:
  202. sensitive = False
  203. if isinstance(header, HeaderTuple):
  204. sensitive = not header.indexable
  205. elif len(header) > 2:
  206. sensitive = header[2]
  207. header = (_to_bytes(header[0]), _to_bytes(header[1]))
  208. header_block.append(self.add(header, sensitive, huffman))
  209. header_block = b''.join(header_block)
  210. log.debug("Encoded header block to %s", header_block)
  211. return header_block
  212. def add(self, to_add, sensitive, huffman=False):
  213. """
  214. This function takes a header key-value tuple and serializes it.
  215. """
  216. log.debug("Adding %s to the header table", to_add)
  217. name, value = to_add
  218. # Set our indexing mode
  219. indexbit = INDEX_INCREMENTAL if not sensitive else INDEX_NEVER
  220. # Search for a matching header in the header table.
  221. match = self.header_table.search(name, value)
  222. if match is None:
  223. # Not in the header table. Encode using the literal syntax,
  224. # and add it to the header table.
  225. encoded = self._encode_literal(name, value, indexbit, huffman)
  226. if not sensitive:
  227. self.header_table.add(name, value)
  228. return encoded
  229. # The header is in the table, break out the values. If we matched
  230. # perfectly, we can use the indexed representation: otherwise we
  231. # can use the indexed literal.
  232. index, name, perfect = match
  233. if perfect:
  234. # Indexed representation.
  235. encoded = self._encode_indexed(index)
  236. else:
  237. # Indexed literal. We are going to add header to the
  238. # header table unconditionally. It is a future todo to
  239. # filter out headers which are known to be ineffective for
  240. # indexing since they just take space in the table and
  241. # pushed out other valuable headers.
  242. encoded = self._encode_indexed_literal(
  243. index, value, indexbit, huffman
  244. )
  245. if not sensitive:
  246. self.header_table.add(name, value)
  247. return encoded
  248. def _encode_indexed(self, index):
  249. """
  250. Encodes a header using the indexed representation.
  251. """
  252. field = encode_integer(index, 7)
  253. field[0] |= 0x80 # we set the top bit
  254. return bytes(field)
  255. def _encode_literal(self, name, value, indexbit, huffman=False):
  256. """
  257. Encodes a header with a literal name and literal value. If ``indexing``
  258. is True, the header will be added to the header table: otherwise it
  259. will not.
  260. """
  261. if huffman:
  262. name = self.huffman_coder.encode(name)
  263. value = self.huffman_coder.encode(value)
  264. name_len = encode_integer(len(name), 7)
  265. value_len = encode_integer(len(value), 7)
  266. if huffman:
  267. name_len[0] |= 0x80
  268. value_len[0] |= 0x80
  269. return b''.join(
  270. [indexbit, bytes(name_len), name, bytes(value_len), value]
  271. )
  272. def _encode_indexed_literal(self, index, value, indexbit, huffman=False):
  273. """
  274. Encodes a header with an indexed name and a literal value and performs
  275. incremental indexing.
  276. """
  277. if indexbit != INDEX_INCREMENTAL:
  278. prefix = encode_integer(index, 4)
  279. else:
  280. prefix = encode_integer(index, 6)
  281. prefix[0] |= ord(indexbit)
  282. if huffman:
  283. value = self.huffman_coder.encode(value)
  284. value_len = encode_integer(len(value), 7)
  285. if huffman:
  286. value_len[0] |= 0x80
  287. return b''.join([bytes(prefix), bytes(value_len), value])
  288. def _encode_table_size_change(self):
  289. """
  290. Produces the encoded form of all header table size change context
  291. updates.
  292. """
  293. block = b''
  294. for size_bytes in self.table_size_changes:
  295. size_bytes = encode_integer(size_bytes, 5)
  296. size_bytes[0] |= 0x20
  297. block += bytes(size_bytes)
  298. self.table_size_changes = []
  299. return block
  300. class Decoder(object):
  301. """
  302. An HPACK decoder object.
  303. .. versionchanged:: 2.3.0
  304. Added ``max_header_list_size`` argument.
  305. :param max_header_list_size: The maximum decompressed size we will allow
  306. for any single header block. This is a protection against DoS attacks
  307. that attempt to force the application to expand a relatively small
  308. amount of data into a really large header list, allowing enormous
  309. amounts of memory to be allocated.
  310. If this amount of data is exceeded, a `OversizedHeaderListError
  311. <hpack.OversizedHeaderListError>` exception will be raised. At this
  312. point the connection should be shut down, as the HPACK state will no
  313. longer be useable.
  314. Defaults to 64kB.
  315. :type max_header_list_size: ``int``
  316. """
  317. def __init__(self, max_header_list_size=DEFAULT_MAX_HEADER_LIST_SIZE):
  318. self.header_table = HeaderTable()
  319. #: The maximum decompressed size we will allow for any single header
  320. #: block. This is a protection against DoS attacks that attempt to
  321. #: force the application to expand a relatively small amount of data
  322. #: into a really large header list, allowing enormous amounts of memory
  323. #: to be allocated.
  324. #:
  325. #: If this amount of data is exceeded, a `OversizedHeaderListError
  326. #: <hpack.OversizedHeaderListError>` exception will be raised. At this
  327. #: point the connection should be shut down, as the HPACK state will no
  328. #: longer be usable.
  329. #:
  330. #: Defaults to 64kB.
  331. #:
  332. #: .. versionadded:: 2.3.0
  333. self.max_header_list_size = max_header_list_size
  334. #: Maximum allowed header table size.
  335. #:
  336. #: A HTTP/2 implementation should set this to the most recent value of
  337. #: SETTINGS_HEADER_TABLE_SIZE that it sent *and has received an ACK
  338. #: for*. Once this setting is set, the actual header table size will be
  339. #: checked at the end of each decoding run and whenever it is changed,
  340. #: to confirm that it fits in this size.
  341. self.max_allowed_table_size = self.header_table.maxsize
  342. @property
  343. def header_table_size(self):
  344. """
  345. Controls the size of the HPACK header table.
  346. """
  347. return self.header_table.maxsize
  348. @header_table_size.setter
  349. def header_table_size(self, value):
  350. self.header_table.maxsize = value
  351. def decode(self, data, raw=False):
  352. """
  353. Takes an HPACK-encoded header block and decodes it into a header set.
  354. :param data: A bytestring representing a complete HPACK-encoded header
  355. block.
  356. :param raw: (optional) Whether to return the headers as tuples of raw
  357. byte strings or to decode them as UTF-8 before returning
  358. them. The default value is False, which returns tuples of
  359. Unicode strings
  360. :returns: A list of two-tuples of ``(name, value)`` representing the
  361. HPACK-encoded headers, in the order they were decoded.
  362. :raises HPACKDecodingError: If an error is encountered while decoding
  363. the header block.
  364. """
  365. log.debug("Decoding %s", data)
  366. data_mem = memoryview(data)
  367. headers = []
  368. data_len = len(data)
  369. inflated_size = 0
  370. current_index = 0
  371. while current_index < data_len:
  372. # Work out what kind of header we're decoding.
  373. # If the high bit is 1, it's an indexed field.
  374. current = to_byte(data[current_index])
  375. indexed = True if current & 0x80 else False
  376. # Otherwise, if the second-highest bit is 1 it's a field that does
  377. # alter the header table.
  378. literal_index = True if current & 0x40 else False
  379. # Otherwise, if the third-highest bit is 1 it's an encoding context
  380. # update.
  381. encoding_update = True if current & 0x20 else False
  382. if indexed:
  383. header, consumed = self._decode_indexed(
  384. data_mem[current_index:]
  385. )
  386. elif literal_index:
  387. # It's a literal header that does affect the header table.
  388. header, consumed = self._decode_literal_index(
  389. data_mem[current_index:]
  390. )
  391. elif encoding_update:
  392. # It's an update to the encoding context. These are forbidden
  393. # in a header block after any actual header.
  394. if headers:
  395. raise HPACKDecodingError(
  396. "Table size update not at the start of the block"
  397. )
  398. consumed = self._update_encoding_context(
  399. data_mem[current_index:]
  400. )
  401. header = None
  402. else:
  403. # It's a literal header that does not affect the header table.
  404. header, consumed = self._decode_literal_no_index(
  405. data_mem[current_index:]
  406. )
  407. if header:
  408. headers.append(header)
  409. inflated_size += table_entry_size(*header)
  410. if inflated_size > self.max_header_list_size:
  411. raise OversizedHeaderListError(
  412. "A header list larger than %d has been received" %
  413. self.max_header_list_size
  414. )
  415. current_index += consumed
  416. # Confirm that the table size is lower than the maximum. We do this
  417. # here to ensure that we catch when the max has been *shrunk* and the
  418. # remote peer hasn't actually done that.
  419. self._assert_valid_table_size()
  420. try:
  421. return [_unicode_if_needed(h, raw) for h in headers]
  422. except UnicodeDecodeError:
  423. raise HPACKDecodingError("Unable to decode headers as UTF-8.")
  424. def _assert_valid_table_size(self):
  425. """
  426. Check that the table size set by the encoder is lower than the maximum
  427. we expect to have.
  428. """
  429. if self.header_table_size > self.max_allowed_table_size:
  430. raise InvalidTableSizeError(
  431. "Encoder did not shrink table size to within the max"
  432. )
  433. def _update_encoding_context(self, data):
  434. """
  435. Handles a byte that updates the encoding context.
  436. """
  437. # We've been asked to resize the header table.
  438. new_size, consumed = decode_integer(data, 5)
  439. if new_size > self.max_allowed_table_size:
  440. raise InvalidTableSizeError(
  441. "Encoder exceeded max allowable table size"
  442. )
  443. self.header_table_size = new_size
  444. return consumed
  445. def _decode_indexed(self, data):
  446. """
  447. Decodes a header represented using the indexed representation.
  448. """
  449. index, consumed = decode_integer(data, 7)
  450. header = HeaderTuple(*self.header_table.get_by_index(index))
  451. log.debug("Decoded %s, consumed %d", header, consumed)
  452. return header, consumed
  453. def _decode_literal_no_index(self, data):
  454. return self._decode_literal(data, False)
  455. def _decode_literal_index(self, data):
  456. return self._decode_literal(data, True)
  457. def _decode_literal(self, data, should_index):
  458. """
  459. Decodes a header represented with a literal.
  460. """
  461. total_consumed = 0
  462. # When should_index is true, if the low six bits of the first byte are
  463. # nonzero, the header name is indexed.
  464. # When should_index is false, if the low four bits of the first byte
  465. # are nonzero the header name is indexed.
  466. if should_index:
  467. indexed_name = to_byte(data[0]) & 0x3F
  468. name_len = 6
  469. not_indexable = False
  470. else:
  471. high_byte = to_byte(data[0])
  472. indexed_name = high_byte & 0x0F
  473. name_len = 4
  474. not_indexable = high_byte & 0x10
  475. if indexed_name:
  476. # Indexed header name.
  477. index, consumed = decode_integer(data, name_len)
  478. name = self.header_table.get_by_index(index)[0]
  479. total_consumed = consumed
  480. length = 0
  481. else:
  482. # Literal header name. The first byte was consumed, so we need to
  483. # move forward.
  484. data = data[1:]
  485. length, consumed = decode_integer(data, 7)
  486. name = data[consumed:consumed + length]
  487. if len(name) != length:
  488. raise HPACKDecodingError("Truncated header block")
  489. if to_byte(data[0]) & 0x80:
  490. name = decode_huffman(name)
  491. total_consumed = consumed + length + 1 # Since we moved forward 1.
  492. data = data[consumed + length:]
  493. # The header value is definitely length-based.
  494. length, consumed = decode_integer(data, 7)
  495. value = data[consumed:consumed + length]
  496. if len(value) != length:
  497. raise HPACKDecodingError("Truncated header block")
  498. if to_byte(data[0]) & 0x80:
  499. value = decode_huffman(value)
  500. # Updated the total consumed length.
  501. total_consumed += length + consumed
  502. # If we have been told never to index the header field, encode that in
  503. # the tuple we use.
  504. if not_indexable:
  505. header = NeverIndexedHeaderTuple(name, value)
  506. else:
  507. header = HeaderTuple(name, value)
  508. # If we've been asked to index this, add it to the header table.
  509. if should_index:
  510. self.header_table.add(name, value)
  511. log.debug(
  512. "Decoded %s, total consumed %d bytes, indexed %s",
  513. header,
  514. total_consumed,
  515. should_index
  516. )
  517. return header, total_consumed