huffman.py 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. # -*- coding: utf-8 -*-
  2. """
  3. hpack/huffman_decoder
  4. ~~~~~~~~~~~~~~~~~~~~~
  5. An implementation of a bitwise prefix tree specially built for decoding
  6. Huffman-coded content where we already know the Huffman table.
  7. """
  8. from .compat import to_byte, decode_hex
  9. class HuffmanEncoder(object):
  10. """
  11. Encodes a string according to the Huffman encoding table defined in the
  12. HPACK specification.
  13. """
  14. def __init__(self, huffman_code_list, huffman_code_list_lengths):
  15. self.huffman_code_list = huffman_code_list
  16. self.huffman_code_list_lengths = huffman_code_list_lengths
  17. def encode(self, bytes_to_encode):
  18. """
  19. Given a string of bytes, encodes them according to the HPACK Huffman
  20. specification.
  21. """
  22. # If handed the empty string, just immediately return.
  23. if not bytes_to_encode:
  24. return b''
  25. final_num = 0
  26. final_int_len = 0
  27. # Turn each byte into its huffman code. These codes aren't necessarily
  28. # octet aligned, so keep track of how far through an octet we are. To
  29. # handle this cleanly, just use a single giant integer.
  30. for char in bytes_to_encode:
  31. byte = to_byte(char)
  32. bin_int_len = self.huffman_code_list_lengths[byte]
  33. bin_int = self.huffman_code_list[byte] & (
  34. 2 ** (bin_int_len + 1) - 1
  35. )
  36. final_num <<= bin_int_len
  37. final_num |= bin_int
  38. final_int_len += bin_int_len
  39. # Pad out to an octet with ones.
  40. bits_to_be_padded = (8 - (final_int_len % 8)) % 8
  41. final_num <<= bits_to_be_padded
  42. final_num |= (1 << bits_to_be_padded) - 1
  43. # Convert the number to hex and strip off the leading '0x' and the
  44. # trailing 'L', if present.
  45. final_num = hex(final_num)[2:].rstrip('L')
  46. # If this is odd, prepend a zero.
  47. final_num = '0' + final_num if len(final_num) % 2 != 0 else final_num
  48. # This number should have twice as many digits as bytes. If not, we're
  49. # missing some leading zeroes. Work out how many bytes we want and how
  50. # many digits we have, then add the missing zero digits to the front.
  51. total_bytes = (final_int_len + bits_to_be_padded) // 8
  52. expected_digits = total_bytes * 2
  53. if len(final_num) != expected_digits:
  54. missing_digits = expected_digits - len(final_num)
  55. final_num = ('0' * missing_digits) + final_num
  56. return decode_hex(final_num)