hpack.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662
  1. # -*- coding: utf-8 -*-
  2. """
  3. hpack/hpack
  4. ~~~~~~~~~~~
  5. Implements the HPACK header compression algorithm as detailed by the IETF.
  6. """
  7. import collections
  8. import logging
  9. from .compat import to_byte
  10. from .huffman import HuffmanDecoder, HuffmanEncoder
  11. from .huffman_constants import (
  12. REQUEST_CODES, REQUEST_CODES_LENGTH
  13. )
  14. log = logging.getLogger(__name__)
  15. def encode_integer(integer, prefix_bits):
  16. """
  17. This encodes an integer according to the wacky integer encoding rules
  18. defined in the HPACK spec.
  19. """
  20. log.debug("Encoding %d with %d bits", integer, prefix_bits)
  21. max_number = (2 ** prefix_bits) - 1
  22. if (integer < max_number):
  23. return bytearray([integer]) # Seriously?
  24. else:
  25. elements = [max_number]
  26. integer = integer - max_number
  27. while integer >= 128:
  28. elements.append((integer % 128) + 128)
  29. integer = integer // 128 # We need integer division
  30. elements.append(integer)
  31. return bytearray(elements)
  32. def decode_integer(data, prefix_bits):
  33. """
  34. This decodes an integer according to the wacky integer encoding rules
  35. defined in the HPACK spec. Returns a tuple of the decoded integer and the
  36. number of bytes that were consumed from ``data`` in order to get that
  37. integer.
  38. """
  39. multiple = lambda index: 128 ** (index - 1)
  40. max_number = (2 ** prefix_bits) - 1
  41. mask = 0xFF >> (8 - prefix_bits)
  42. index = 0
  43. number = to_byte(data[index]) & mask
  44. if (number == max_number):
  45. while True:
  46. index += 1
  47. next_byte = to_byte(data[index])
  48. if next_byte >= 128:
  49. number += (next_byte - 128) * multiple(index)
  50. else:
  51. number += next_byte * multiple(index)
  52. break
  53. log.debug("Decoded %d, consumed %d bytes", number, index + 1)
  54. return (number, index + 1)
  55. def _to_bytes(string):
  56. """
  57. Convert string to bytes.
  58. """
  59. if not isinstance(string, (str, bytes)): # pragma: no cover
  60. string = str(string)
  61. return string if isinstance(string, bytes) else string.encode('utf-8')
  62. def header_table_size(table):
  63. """
  64. Calculates the 'size' of the header table as defined by the HTTP/2
  65. specification.
  66. """
  67. # It's phenomenally frustrating that the specification feels it is able to
  68. # tell me how large the header table is, considering that its calculations
  69. # assume a very particular layout that most implementations will not have.
  70. # I appreciate it's an attempt to prevent DoS attacks by sending lots of
  71. # large headers in the header table, but it seems like a better approach
  72. # would be to limit the size of headers. Ah well.
  73. return sum(32 + len(name) + len(value) for name, value in table)
  74. class Encoder(object):
  75. """
  76. An HPACK encoder object. This object takes HTTP headers and emits encoded
  77. HTTP/2 header blocks.
  78. """
  79. # This is the static table of header fields.
  80. static_table = [
  81. (b':authority', b''),
  82. (b':method', b'GET'),
  83. (b':method', b'POST'),
  84. (b':path', b'/'),
  85. (b':path', b'/index.html'),
  86. (b':scheme', b'http'),
  87. (b':scheme', b'https'),
  88. (b':status', b'200'),
  89. (b':status', b'204'),
  90. (b':status', b'206'),
  91. (b':status', b'304'),
  92. (b':status', b'400'),
  93. (b':status', b'404'),
  94. (b':status', b'500'),
  95. (b'accept-charset', b''),
  96. (b'accept-encoding', b'gzip, deflate'),
  97. (b'accept-language', b''),
  98. (b'accept-ranges', b''),
  99. (b'accept', b''),
  100. (b'access-control-allow-origin', b''),
  101. (b'age', b''),
  102. (b'allow', b''),
  103. (b'authorization', b''),
  104. (b'cache-control', b''),
  105. (b'content-disposition', b''),
  106. (b'content-encoding', b''),
  107. (b'content-language', b''),
  108. (b'content-length', b''),
  109. (b'content-location', b''),
  110. (b'content-range', b''),
  111. (b'content-type', b''),
  112. (b'cookie', b''),
  113. (b'date', b''),
  114. (b'etag', b''),
  115. (b'expect', b''),
  116. (b'expires', b''),
  117. (b'from', b''),
  118. (b'host', b''),
  119. (b'if-match', b''),
  120. (b'if-modified-since', b''),
  121. (b'if-none-match', b''),
  122. (b'if-range', b''),
  123. (b'if-unmodified-since', b''),
  124. (b'last-modified', b''),
  125. (b'link', b''),
  126. (b'location', b''),
  127. (b'max-forwards', b''),
  128. (b'proxy-authenticate', b''),
  129. (b'proxy-authorization', b''),
  130. (b'range', b''),
  131. (b'referer', b''),
  132. (b'refresh', b''),
  133. (b'retry-after', b''),
  134. (b'server', b''),
  135. (b'set-cookie', b''),
  136. (b'strict-transport-security', b''),
  137. (b'transfer-encoding', b''),
  138. (b'user-agent', b''),
  139. (b'vary', b''),
  140. (b'via', b''),
  141. (b'www-authenticate', b''),
  142. ]
  143. def __init__(self):
  144. self.header_table = collections.deque()
  145. self._header_table_size = 4096 # This value set by the standard.
  146. self.huffman_coder = HuffmanEncoder(
  147. REQUEST_CODES, REQUEST_CODES_LENGTH
  148. )
  149. # We need to keep track of whether the header table size has been
  150. # changed since we last encoded anything. If it has, we need to signal
  151. # that change in the HPACK block.
  152. self._table_size_changed = False
  153. @property
  154. def header_table_size(self):
  155. return self._header_table_size
  156. @header_table_size.setter
  157. def header_table_size(self, value):
  158. log.debug(
  159. "Setting header table size to %d from %d",
  160. value,
  161. self._header_table_size
  162. )
  163. # If the new value is larger than the current one, no worries!
  164. # Otherwise, we may need to shrink the header table.
  165. if value < self._header_table_size:
  166. current_size = header_table_size(self.header_table)
  167. while value < current_size:
  168. header = self.header_table.pop()
  169. n, v = header
  170. current_size -= (
  171. 32 + len(n) + len(v)
  172. )
  173. log.debug(
  174. "Removed %s: %s from the encoder header table", n, v
  175. )
  176. if value != self._header_table_size:
  177. self._table_size_changed = True
  178. self._header_table_size = value
  179. def encode(self, headers, huffman=True):
  180. """
  181. Takes a set of headers and encodes them into a HPACK-encoded header
  182. block.
  183. Transforming the headers into a header block is a procedure that can
  184. be modeled as a chain or pipe. First, the headers are encoded. This
  185. encoding can be done a number of ways. If the header name-value pair
  186. are already in the header table we can represent them using the indexed
  187. representation: the same is true if they are in the static table.
  188. Otherwise, a literal representation will be used.
  189. """
  190. log.debug("HPACK encoding %s", headers)
  191. header_block = []
  192. # Turn the headers into a list of tuples if possible. This is the
  193. # natural way to interact with them in HPACK.
  194. if isinstance(headers, dict):
  195. headers = headers.items()
  196. # Next, walk across the headers and turn them all into bytestrings.
  197. headers = [(_to_bytes(n), _to_bytes(v)) for n, v in headers]
  198. # Before we begin, if the header table size has been changed we need
  199. # to signal that appropriately.
  200. if self._table_size_changed:
  201. header_block.append(self._encode_table_size_change())
  202. self._table_size_changed = False
  203. # We can now encode each header in the block.
  204. header_block.extend(
  205. (self.add(header, huffman) for header in headers)
  206. )
  207. header_block = b''.join(header_block)
  208. log.debug("Encoded header block to %s", header_block)
  209. return header_block
  210. def add(self, to_add, huffman=False):
  211. """
  212. This function takes a header key-value tuple and serializes it.
  213. """
  214. log.debug("Adding %s to the header table", to_add)
  215. name, value = to_add
  216. # Search for a matching header in the header table.
  217. match = self.matching_header(name, value)
  218. if match is None:
  219. # Not in the header table. Encode using the literal syntax,
  220. # and add it to the header table.
  221. encoded = self._encode_literal(name, value, True, huffman)
  222. self._add_to_header_table(to_add)
  223. return encoded
  224. # The header is in the table, break out the values. If we matched
  225. # perfectly, we can use the indexed representation: otherwise we
  226. # can use the indexed literal.
  227. index, perfect = match
  228. if perfect:
  229. # Indexed representation.
  230. encoded = self._encode_indexed(index)
  231. else:
  232. # Indexed literal. We are going to add header to the
  233. # header table unconditionally. It is a future todo to
  234. # filter out headers which are known to be ineffective for
  235. # indexing since they just take space in the table and
  236. # pushed out other valuable headers.
  237. encoded = self._encode_indexed_literal(index, value, huffman)
  238. self._add_to_header_table(to_add)
  239. return encoded
  240. def matching_header(self, name, value):
  241. """
  242. Scans the header table and the static table. Returns a tuple, where the
  243. first value is the index of the match, and the second is whether there
  244. was a full match or not. Prefers full matches to partial ones.
  245. Upsettingly, the header table is one-indexed, not zero-indexed.
  246. """
  247. partial_match = None
  248. static_table_len = len(Encoder.static_table)
  249. for (i, (n, v)) in enumerate(Encoder.static_table):
  250. if n == name:
  251. if v == value:
  252. return (i + 1, Encoder.static_table[i])
  253. elif partial_match is None:
  254. partial_match = (i + 1, None)
  255. for (i, (n, v)) in enumerate(self.header_table):
  256. if n == name:
  257. if v == value:
  258. return (i + static_table_len + 1, self.header_table[i])
  259. elif partial_match is None:
  260. partial_match = (i + static_table_len + 1, None)
  261. return partial_match
  262. def _add_to_header_table(self, header):
  263. """
  264. Adds a header to the header table, evicting old ones if necessary.
  265. """
  266. # Be optimistic: add the header straight away.
  267. self.header_table.appendleft(header)
  268. # Now, work out how big the header table is.
  269. actual_size = header_table_size(self.header_table)
  270. # Loop and remove whatever we need to.
  271. while actual_size > self.header_table_size:
  272. header = self.header_table.pop()
  273. n, v = header
  274. actual_size -= (
  275. 32 + len(n) + len(v)
  276. )
  277. log.debug("Evicted %s: %s from the header table", n, v)
  278. def _encode_indexed(self, index):
  279. """
  280. Encodes a header using the indexed representation.
  281. """
  282. field = encode_integer(index, 7)
  283. field[0] = field[0] | 0x80 # we set the top bit
  284. return bytes(field)
  285. def _encode_literal(self, name, value, indexing, huffman=False):
  286. """
  287. Encodes a header with a literal name and literal value. If ``indexing``
  288. is True, the header will be added to the header table: otherwise it
  289. will not.
  290. """
  291. prefix = b'\x40' if indexing else b'\x00'
  292. if huffman:
  293. name = self.huffman_coder.encode(name)
  294. value = self.huffman_coder.encode(value)
  295. name_len = encode_integer(len(name), 7)
  296. value_len = encode_integer(len(value), 7)
  297. if huffman:
  298. name_len[0] |= 0x80
  299. value_len[0] |= 0x80
  300. return b''.join([prefix, bytes(name_len), name, bytes(value_len), value])
  301. def _encode_indexed_literal(self, index, value, huffman=False):
  302. """
  303. Encodes a header with an indexed name and a literal value and performs
  304. incremental indexing.
  305. """
  306. prefix = encode_integer(index, 6)
  307. prefix[0] |= 0x40
  308. if huffman:
  309. value = self.huffman_coder.encode(value)
  310. value_len = encode_integer(len(value), 7)
  311. if huffman:
  312. value_len[0] |= 0x80
  313. return b''.join([bytes(prefix), bytes(value_len), value])
  314. def _encode_table_size_change(self):
  315. """
  316. Produces the encoded form of a header table size change context update.
  317. """
  318. size_bytes = encode_integer(self.header_table_size, 5)
  319. size_bytes[0] |= 0x20
  320. return bytes(size_bytes)
  321. class Decoder(object):
  322. """
  323. An HPACK decoder object.
  324. """
  325. static_table = [
  326. (b':authority', b''),
  327. (b':method', b'GET'),
  328. (b':method', b'POST'),
  329. (b':path', b'/'),
  330. (b':path', b'/index.html'),
  331. (b':scheme', b'http'),
  332. (b':scheme', b'https'),
  333. (b':status', b'200'),
  334. (b':status', b'204'),
  335. (b':status', b'206'),
  336. (b':status', b'304'),
  337. (b':status', b'400'),
  338. (b':status', b'404'),
  339. (b':status', b'500'),
  340. (b'accept-charset', b''),
  341. (b'accept-encoding', b'gzip, deflate'),
  342. (b'accept-language', b''),
  343. (b'accept-ranges', b''),
  344. (b'accept', b''),
  345. (b'access-control-allow-origin', b''),
  346. (b'age', b''),
  347. (b'allow', b''),
  348. (b'authorization', b''),
  349. (b'cache-control', b''),
  350. (b'content-disposition', b''),
  351. (b'content-encoding', b''),
  352. (b'content-language', b''),
  353. (b'content-length', b''),
  354. (b'content-location', b''),
  355. (b'content-range', b''),
  356. (b'content-type', b''),
  357. (b'cookie', b''),
  358. (b'date', b''),
  359. (b'etag', b''),
  360. (b'expect', b''),
  361. (b'expires', b''),
  362. (b'from', b''),
  363. (b'host', b''),
  364. (b'if-match', b''),
  365. (b'if-modified-since', b''),
  366. (b'if-none-match', b''),
  367. (b'if-range', b''),
  368. (b'if-unmodified-since', b''),
  369. (b'last-modified', b''),
  370. (b'link', b''),
  371. (b'location', b''),
  372. (b'max-forwards', b''),
  373. (b'proxy-authenticate', b''),
  374. (b'proxy-authorization', b''),
  375. (b'range', b''),
  376. (b'referer', b''),
  377. (b'refresh', b''),
  378. (b'retry-after', b''),
  379. (b'server', b''),
  380. (b'set-cookie', b''),
  381. (b'strict-transport-security', b''),
  382. (b'transfer-encoding', b''),
  383. (b'user-agent', b''),
  384. (b'vary', b''),
  385. (b'via', b''),
  386. (b'www-authenticate', b''),
  387. ]
  388. def __init__(self):
  389. self.header_table = collections.deque()
  390. self._header_table_size = 4096 # This value set by the standard.
  391. self.huffman_coder = HuffmanDecoder(
  392. REQUEST_CODES, REQUEST_CODES_LENGTH
  393. )
  394. @property
  395. def header_table_size(self):
  396. return self._header_table_size
  397. @header_table_size.setter
  398. def header_table_size(self, value):
  399. log.debug(
  400. "Resizing decoder header table to %d from %d",
  401. value,
  402. self._header_table_size
  403. )
  404. # If the new value is larger than the current one, no worries!
  405. # Otherwise, we may need to shrink the header table.
  406. if value < self._header_table_size:
  407. current_size = header_table_size(self.header_table)
  408. while value < current_size:
  409. header = self.header_table.pop()
  410. n, v = header
  411. current_size -= (
  412. 32 + len(n) + len(v)
  413. )
  414. log.debug("Evicting %s: %s from the header table", n, v)
  415. self._header_table_size = value
  416. def decode(self, data):
  417. """
  418. Takes an HPACK-encoded header block and decodes it into a header set.
  419. """
  420. log.debug("Decoding %s", data)
  421. headers = []
  422. data_len = len(data)
  423. current_index = 0
  424. while current_index < data_len:
  425. # Work out what kind of header we're decoding.
  426. # If the high bit is 1, it's an indexed field.
  427. current = to_byte(data[current_index])
  428. indexed = bool(current & 0x80)
  429. # Otherwise, if the second-highest bit is 1 it's a field that does
  430. # alter the header table.
  431. literal_index = bool(current & 0x40)
  432. # Otherwise, if the third-highest bit is 1 it's an encoding context
  433. # update.
  434. encoding_update = bool(current & 0x20)
  435. if indexed:
  436. header, consumed = self._decode_indexed(data[current_index:])
  437. elif literal_index:
  438. # It's a literal header that does affect the header table.
  439. header, consumed = self._decode_literal_index(
  440. data[current_index:]
  441. )
  442. elif encoding_update:
  443. # It's an update to the encoding context.
  444. consumed = self._update_encoding_context(data)
  445. header = None
  446. else:
  447. # It's a literal header that does not affect the header table.
  448. header, consumed = self._decode_literal_no_index(
  449. data[current_index:]
  450. )
  451. if header:
  452. headers.append(header)
  453. current_index += consumed
  454. return [(n.decode('utf-8'), v.decode('utf-8')) for n, v in headers]
  455. def _add_to_header_table(self, new_header):
  456. """
  457. Adds a header to the header table, evicting old ones if necessary.
  458. """
  459. # Be optimistic: add the header straight away.
  460. self.header_table.appendleft(new_header)
  461. # Now, work out how big the header table is.
  462. actual_size = header_table_size(self.header_table)
  463. # Loop and remove whatever we need to.
  464. while actual_size > self.header_table_size:
  465. header = self.header_table.pop()
  466. n, v = header
  467. actual_size -= (
  468. 32 + len(n) + len(v)
  469. )
  470. log.debug("Evicting %s: %s from the header table", n, v)
  471. def _update_encoding_context(self, data):
  472. """
  473. Handles a byte that updates the encoding context.
  474. """
  475. # We've been asked to resize the header table.
  476. new_size, consumed = decode_integer(data, 5)
  477. self.header_table_size = new_size
  478. return consumed
  479. def _decode_indexed(self, data):
  480. """
  481. Decodes a header represented using the indexed representation.
  482. """
  483. index, consumed = decode_integer(data, 7)
  484. index -= 1 # Because this idiot table is 1-indexed. Ugh.
  485. if index >= len(Decoder.static_table):
  486. index -= len(Decoder.static_table)
  487. header = self.header_table[index]
  488. else:
  489. header = Decoder.static_table[index]
  490. log.debug("Decoded %s, consumed %d", header, consumed)
  491. return header, consumed
  492. def _decode_literal_no_index(self, data):
  493. return self._decode_literal(data, False)
  494. def _decode_literal_index(self, data):
  495. return self._decode_literal(data, True)
  496. def _decode_literal(self, data, should_index):
  497. """
  498. Decodes a header represented with a literal.
  499. """
  500. total_consumed = 0
  501. # When should_index is true, if the low six bits of the first byte are
  502. # nonzero, the header name is indexed.
  503. # When should_index is false, if the low four bits of the first byte
  504. # are nonzero the header name is indexed.
  505. if should_index:
  506. indexed_name = to_byte(data[0]) & 0x3F
  507. name_len = 6
  508. else:
  509. indexed_name = to_byte(data[0]) & 0x0F
  510. name_len = 4
  511. if indexed_name:
  512. # Indexed header name.
  513. index, consumed = decode_integer(data, name_len)
  514. index -= 1
  515. if index >= len(Decoder.static_table):
  516. index -= len(Decoder.static_table)
  517. name = self.header_table[index][0]
  518. else:
  519. name = Decoder.static_table[index][0]
  520. total_consumed = consumed
  521. length = 0
  522. else:
  523. # Literal header name. The first byte was consumed, so we need to
  524. # move forward.
  525. data = data[1:]
  526. length, consumed = decode_integer(data, 7)
  527. name = data[consumed:consumed + length]
  528. if to_byte(data[0]) & 0x80:
  529. name = self.huffman_coder.decode(name)
  530. total_consumed = consumed + length + 1 # Since we moved forward 1.
  531. data = data[consumed + length:]
  532. # The header value is definitely length-based.
  533. length, consumed = decode_integer(data, 7)
  534. value = data[consumed:consumed + length]
  535. if to_byte(data[0]) & 0x80:
  536. value = self.huffman_coder.decode(value)
  537. # Updated the total consumed length.
  538. total_consumed += length + consumed
  539. # If we've been asked to index this, add it to the header table.
  540. header = (name, value)
  541. if should_index:
  542. self._add_to_header_table(header)
  543. log.debug(
  544. "Decoded %s, total consumed %d bytes, indexed %s",
  545. header,
  546. total_consumed,
  547. should_index
  548. )
  549. return header, total_consumed