stream.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. # -*- coding: utf-8 -*-
  2. """Utilities for dealing with streamed requests."""
  3. import collections
  4. import os.path
  5. import re
  6. from .. import exceptions as exc
  7. # Regular expressions stolen from werkzeug/http.py
  8. # cd2c97bb0a076da2322f11adce0b2731f9193396 L62-L64
  9. _QUOTED_STRING_RE = r'"[^"\\]*(?:\\.[^"\\]*)*"'
  10. _OPTION_HEADER_PIECE_RE = re.compile(
  11. r';\s*(%s|[^\s;=]+)\s*(?:=\s*(%s|[^;]+))?\s*' % (_QUOTED_STRING_RE,
  12. _QUOTED_STRING_RE)
  13. )
  14. _DEFAULT_CHUNKSIZE = 512
  15. def _get_filename(content_disposition):
  16. for match in _OPTION_HEADER_PIECE_RE.finditer(content_disposition):
  17. k, v = match.groups()
  18. if k == 'filename':
  19. # ignore any directory paths in the filename
  20. return os.path.split(v)[1]
  21. return None
  22. def get_download_file_path(response, path):
  23. """
  24. Given a response and a path, return a file path for a download.
  25. If a ``path`` parameter is a directory, this function will parse the
  26. ``Content-Disposition`` header on the response to determine the name of the
  27. file as reported by the server, and return a file path in the specified
  28. directory.
  29. If ``path`` is empty or None, this function will return a path relative
  30. to the process' current working directory.
  31. If path is a full file path, return it.
  32. :param response: A Response object from requests
  33. :type response: requests.models.Response
  34. :param str path: Directory or file path.
  35. :returns: full file path to download as
  36. :rtype: str
  37. :raises: :class:`requests_toolbelt.exceptions.StreamingError`
  38. """
  39. path_is_dir = path and os.path.isdir(path)
  40. if path and not path_is_dir:
  41. # fully qualified file path
  42. filepath = path
  43. else:
  44. response_filename = _get_filename(
  45. response.headers.get('content-disposition', '')
  46. )
  47. if not response_filename:
  48. raise exc.StreamingError('No filename given to stream response to')
  49. if path_is_dir:
  50. # directory to download to
  51. filepath = os.path.join(path, response_filename)
  52. else:
  53. # fallback to downloading to current working directory
  54. filepath = response_filename
  55. return filepath
  56. def stream_response_to_file(response, path=None, chunksize=_DEFAULT_CHUNKSIZE):
  57. """Stream a response body to the specified file.
  58. Either use the ``path`` provided or use the name provided in the
  59. ``Content-Disposition`` header.
  60. .. warning::
  61. If you pass this function an open file-like object as the ``path``
  62. parameter, the function will not close that file for you.
  63. .. warning::
  64. This function will not automatically close the response object
  65. passed in as the ``response`` parameter.
  66. If a ``path`` parameter is a directory, this function will parse the
  67. ``Content-Disposition`` header on the response to determine the name of the
  68. file as reported by the server, and return a file path in the specified
  69. directory. If no ``path`` parameter is supplied, this function will default
  70. to the process' current working directory.
  71. .. code-block:: python
  72. import requests
  73. from requests_toolbelt import exceptions
  74. from requests_toolbelt.downloadutils import stream
  75. r = requests.get(url, stream=True)
  76. try:
  77. filename = stream.stream_response_to_file(r)
  78. except exceptions.StreamingError as e:
  79. # The toolbelt could not find the filename in the
  80. # Content-Disposition
  81. print(e.message)
  82. You can also specify the filename as a string. This will be passed to
  83. the built-in :func:`open` and we will read the content into the file.
  84. .. code-block:: python
  85. import requests
  86. from requests_toolbelt.downloadutils import stream
  87. r = requests.get(url, stream=True)
  88. filename = stream.stream_response_to_file(r, path='myfile')
  89. If the calculated download file path already exists, this function will
  90. raise a StreamingError.
  91. Instead, if you want to manage the file object yourself, you need to
  92. provide either a :class:`io.BytesIO` object or a file opened with the
  93. `'b'` flag. See the two examples below for more details.
  94. .. code-block:: python
  95. import requests
  96. from requests_toolbelt.downloadutils import stream
  97. with open('myfile', 'wb') as fd:
  98. r = requests.get(url, stream=True)
  99. filename = stream.stream_response_to_file(r, path=fd)
  100. print('{0} saved to {1}'.format(url, filename))
  101. .. code-block:: python
  102. import io
  103. import requests
  104. from requests_toolbelt.downloadutils import stream
  105. b = io.BytesIO()
  106. r = requests.get(url, stream=True)
  107. filename = stream.stream_response_to_file(r, path=b)
  108. assert filename is None
  109. :param response: A Response object from requests
  110. :type response: requests.models.Response
  111. :param path: *(optional)*, Either a string with the path to the location
  112. to save the response content, or a file-like object expecting bytes.
  113. :type path: :class:`str`, or object with a :meth:`write`
  114. :param int chunksize: (optional), Size of chunk to attempt to stream
  115. (default 512B).
  116. :returns: The name of the file, if one can be determined, else None
  117. :rtype: str
  118. :raises: :class:`requests_toolbelt.exceptions.StreamingError`
  119. """
  120. pre_opened = False
  121. fd = None
  122. filename = None
  123. if path and isinstance(getattr(path, 'write', None), collections.Callable):
  124. pre_opened = True
  125. fd = path
  126. filename = getattr(fd, 'name', None)
  127. else:
  128. filename = get_download_file_path(response, path)
  129. if os.path.exists(filename):
  130. raise exc.StreamingError("File already exists: %s" % filename)
  131. fd = open(filename, 'wb')
  132. for chunk in response.iter_content(chunk_size=chunksize):
  133. fd.write(chunk)
  134. if not pre_opened:
  135. fd.close()
  136. return filename