streaming_iterator.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. # -*- coding: utf-8 -*-
  2. """
  3. requests_toolbelt.streaming_iterator
  4. ====================================
  5. This holds the implementation details for the :class:`StreamingIterator`. It
  6. is designed for the case where you, the user, know the size of the upload but
  7. need to provide the data as an iterator. This class will allow you to specify
  8. the size and stream the data without using a chunked transfer-encoding.
  9. """
  10. from requests.utils import super_len
  11. from .multipart.encoder import CustomBytesIO, encode_with
  12. class StreamingIterator(object):
  13. """
  14. This class provides a way of allowing iterators with a known size to be
  15. streamed instead of chunked.
  16. In requests, if you pass in an iterator it assumes you want to use
  17. chunked transfer-encoding to upload the data, which not all servers
  18. support well. Additionally, you may want to set the content-length
  19. yourself to avoid this but that will not work. The only way to preempt
  20. requests using a chunked transfer-encoding and forcing it to stream the
  21. uploads is to mimic a very specific interace. Instead of having to know
  22. these details you can instead just use this class. You simply provide the
  23. size and iterator and pass the instance of StreamingIterator to requests
  24. via the data parameter like so:
  25. .. code-block:: python
  26. from requests_toolbelt import StreamingIterator
  27. import requests
  28. # Let iterator be some generator that you already have and size be
  29. # the size of the data produced by the iterator
  30. r = requests.post(url, data=StreamingIterator(size, iterator))
  31. You can also pass file-like objects to :py:class:`StreamingIterator` in
  32. case requests can't determize the filesize itself. This is the case with
  33. streaming file objects like ``stdin`` or any sockets. Wrapping e.g. files
  34. that are on disk with ``StreamingIterator`` is unnecessary, because
  35. requests can determine the filesize itself.
  36. Naturally, you should also set the `Content-Type` of your upload
  37. appropriately because the toolbelt will not attempt to guess that for you.
  38. """
  39. def __init__(self, size, iterator, encoding='utf-8'):
  40. #: The expected size of the upload
  41. self.size = int(size)
  42. if self.size < 0:
  43. raise ValueError(
  44. 'The size of the upload must be a positive integer'
  45. )
  46. #: Attribute that requests will check to determine the length of the
  47. #: body. See bug #80 for more details
  48. self.len = self.size
  49. #: Encoding the input data is using
  50. self.encoding = encoding
  51. #: The iterator used to generate the upload data
  52. self.iterator = iterator
  53. if hasattr(iterator, 'read'):
  54. self._file = iterator
  55. else:
  56. self._file = _IteratorAsBinaryFile(iterator, encoding)
  57. def read(self, size=-1):
  58. return encode_with(self._file.read(size), self.encoding)
  59. class _IteratorAsBinaryFile(object):
  60. def __init__(self, iterator, encoding='utf-8'):
  61. #: The iterator used to generate the upload data
  62. self.iterator = iterator
  63. #: Encoding the iterator is using
  64. self.encoding = encoding
  65. # The buffer we use to provide the correct number of bytes requested
  66. # during a read
  67. self._buffer = CustomBytesIO()
  68. def _get_bytes(self):
  69. try:
  70. return encode_with(next(self.iterator), self.encoding)
  71. except StopIteration:
  72. return b''
  73. def _load_bytes(self, size):
  74. self._buffer.smart_truncate()
  75. amount_to_load = size - super_len(self._buffer)
  76. bytes_to_append = True
  77. while amount_to_load > 0 and bytes_to_append:
  78. bytes_to_append = self._get_bytes()
  79. amount_to_load -= self._buffer.append(bytes_to_append)
  80. def read(self, size=-1):
  81. size = int(size)
  82. if size == -1:
  83. return b''.join(self.iterator)
  84. self._load_bytes(size)
  85. return self._buffer.read(size)