headers.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. # -*- coding: utf-8 -*-
  2. """
  3. hyper/common/headers
  4. ~~~~~~~~~~~~~~~~~~~~~
  5. Contains hyper's structures for storing and working with HTTP headers.
  6. """
  7. import collections
  8. from hyper.common.util import to_bytestring, to_bytestring_tuple
  9. class HTTPHeaderMap(collections.MutableMapping):
  10. """
  11. A structure that contains HTTP headers.
  12. HTTP headers are a curious beast. At the surface level they look roughly
  13. like a name-value set, but in practice they have many variations that
  14. make them tricky:
  15. - duplicate keys are allowed
  16. - keys are compared case-insensitively
  17. - duplicate keys are isomorphic to comma-separated values, *except when
  18. they aren't*!
  19. - they logically contain a form of ordering
  20. This data structure is an attempt to preserve all of that information
  21. while being as user-friendly as possible. It retains all of the mapping
  22. convenience methods (allowing by-name indexing), while avoiding using a
  23. dictionary for storage.
  24. When iterated over, this structure returns headers in 'canonical form'.
  25. This form is a tuple, where the first entry is the header name (in
  26. lower-case), and the second entry is a list of header values (in original
  27. case).
  28. The mapping always emits both names and values in the form of bytestrings:
  29. never unicode strings. It can accept names and values in unicode form, and
  30. will automatically be encoded to bytestrings using UTF-8. The reason for
  31. what appears to be a user-unfriendly decision here is primarily to allow
  32. the broadest-possible compatibility (to make it possible to send headers in
  33. unusual encodings) while ensuring that users are never confused about what
  34. type of data they will receive.
  35. .. warning:: Note that this data structure makes none of the performance
  36. guarantees of a dictionary. Lookup and deletion is not an O(1)
  37. operation. Inserting a new value *is* O(1), all other
  38. operations are O(n), including *replacing* a header entirely.
  39. """
  40. def __init__(self, *args, **kwargs):
  41. # The meat of the structure. In practice, headers are an ordered list
  42. # of tuples. This early version of the data structure simply uses this
  43. # directly under the covers.
  44. #
  45. # An important curiosity here is that the headers are not stored in
  46. # 'canonical form', but are instead stored in the form they were
  47. # provided in. This is to ensure that it is always possible to
  48. # reproduce the original header structure if necessary. This leads to
  49. # some unfortunate performance costs on structure access where it is
  50. # often necessary to transform the data into canonical form on access.
  51. # This cost is judged acceptable in low-level code like `hyper`, but
  52. # higher-level abstractions should consider if they really require this
  53. # logic.
  54. self._items = []
  55. for arg in args:
  56. self._items.extend(map(lambda x: to_bytestring_tuple(*x), arg))
  57. for k, v in kwargs.items():
  58. self._items.append(to_bytestring_tuple(k, v))
  59. def __getitem__(self, key):
  60. """
  61. Unlike the dict __getitem__, this returns a list of items in the order
  62. they were added. These items are returned in 'canonical form', meaning
  63. that comma-separated values are split into multiple values.
  64. """
  65. key = to_bytestring(key)
  66. values = []
  67. for k, v in self._items:
  68. if _keys_equal(k, key):
  69. values.extend(x[1] for x in canonical_form(k, v))
  70. if not values:
  71. raise KeyError("Nonexistent header key: {}".format(key))
  72. return values
  73. def __setitem__(self, key, value):
  74. """
  75. Unlike the dict __setitem__, this appends to the list of items.
  76. """
  77. self._items.append(to_bytestring_tuple(key, value))
  78. def __delitem__(self, key):
  79. """
  80. Sadly, __delitem__ is kind of stupid here, but the best we can do is
  81. delete all headers with a given key. To correctly achieve the 'KeyError
  82. on missing key' logic from dictionaries, we need to do this slowly.
  83. """
  84. key = to_bytestring(key)
  85. indices = []
  86. for (i, (k, v)) in enumerate(self._items):
  87. if _keys_equal(k, key):
  88. indices.append(i)
  89. if not indices:
  90. raise KeyError("Nonexistent header key: {}".format(key))
  91. for i in indices[::-1]:
  92. self._items.pop(i)
  93. def __iter__(self):
  94. """
  95. This mapping iterates like the list of tuples it is. The headers are
  96. returned in canonical form.
  97. """
  98. for pair in self._items:
  99. for value in canonical_form(*pair):
  100. yield value
  101. def __len__(self):
  102. """
  103. The length of this mapping is the number of individual headers in
  104. canonical form. Sadly, this is a somewhat expensive operation.
  105. """
  106. size = 0
  107. for _ in self:
  108. size += 1
  109. return size
  110. def __contains__(self, key):
  111. """
  112. If any header is present with this key, returns True.
  113. """
  114. key = to_bytestring(key)
  115. return any(_keys_equal(key, k) for k, _ in self._items)
  116. def keys(self):
  117. """
  118. Returns an iterable of the header keys in the mapping. This explicitly
  119. does not filter duplicates, ensuring that it's the same length as
  120. len().
  121. """
  122. for n, _ in self:
  123. yield n
  124. def items(self):
  125. """
  126. This mapping iterates like the list of tuples it is.
  127. """
  128. return self.__iter__()
  129. def values(self):
  130. """
  131. This is an almost nonsensical query on a header dictionary, but we
  132. satisfy it in the exact same way we satisfy 'keys'.
  133. """
  134. for _, v in self:
  135. yield v
  136. def get(self, name, default=None):
  137. """
  138. Unlike the dict get, this returns a list of items in the order
  139. they were added.
  140. """
  141. try:
  142. return self[name]
  143. except KeyError:
  144. return default
  145. def iter_raw(self):
  146. """
  147. Allows iterating over the headers in 'raw' form: that is, the form in
  148. which they were added to the structure. This iteration is in order,
  149. and can be used to rebuild the original headers (e.g. to determine
  150. exactly what a server sent).
  151. """
  152. for item in self._items:
  153. yield item
  154. def replace(self, key, value):
  155. """
  156. Replace existing header with new value. If header doesn't exist this
  157. method work like ``__setitem__``. Replacing leads to deletion of all
  158. existing headers with the same name.
  159. """
  160. key, value = to_bytestring_tuple(key, value)
  161. indices = []
  162. for (i, (k, v)) in enumerate(self._items):
  163. if _keys_equal(k, key):
  164. indices.append(i)
  165. # If the key isn't present, this is easy: just append and abort early.
  166. if not indices:
  167. self._items.append((key, value))
  168. return
  169. # Delete all but the first. I swear, this is the correct slicing
  170. # syntax!
  171. base_index = indices[0]
  172. for i in indices[:0:-1]:
  173. self._items.pop(i)
  174. del self._items[base_index]
  175. self._items.insert(base_index, (key, value))
  176. def merge(self, other):
  177. """
  178. Merge another header set or any other dict-like into this one.
  179. """
  180. # Short circuit to avoid infinite loops in case we try to merge into
  181. # ourselves.
  182. if other is self:
  183. return
  184. if isinstance(other, HTTPHeaderMap):
  185. self._items.extend(other.iter_raw())
  186. return
  187. for k, v in other.items():
  188. self._items.append(to_bytestring_tuple(k, v))
  189. def __eq__(self, other):
  190. return self._items == other._items
  191. def __ne__(self, other):
  192. return self._items != other._items
  193. def __str__(self): # pragma: no cover
  194. return 'HTTPHeaderMap(%s)' % self._items
  195. def __repr__(self): # pragma: no cover
  196. return str(self)
  197. def canonical_form(k, v):
  198. """
  199. Returns an iterable of key-value-pairs corresponding to the header in
  200. canonical form. This means that the header is split on commas unless for
  201. any reason it's a super-special snowflake (I'm looking at you Set-Cookie).
  202. """
  203. SPECIAL_SNOWFLAKES = set([b'set-cookie', b'set-cookie2'])
  204. k = k.lower()
  205. if k in SPECIAL_SNOWFLAKES:
  206. yield k, v
  207. else:
  208. for sub_val in v.split(b','):
  209. yield k, sub_val.strip()
  210. def _keys_equal(x, y):
  211. """
  212. Returns 'True' if the two keys are equal by the laws of HTTP headers.
  213. """
  214. return x.lower() == y.lower()