__init__.py 3.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. """
  2. This module provides the API for ``requests_toolbelt.threaded``.
  3. The module provides a clean and simple API for making requests via a thread
  4. pool. The thread pool will use sessions for increased performance.
  5. A simple use-case is:
  6. .. code-block:: python
  7. from requests_toolbelt import threaded
  8. urls_to_get = [{
  9. 'url': 'https://api.github.com/users/sigmavirus24',
  10. 'method': 'GET',
  11. }, {
  12. 'url': 'https://api.github.com/repos/requests/toolbelt',
  13. 'method': 'GET',
  14. }, {
  15. 'url': 'https://google.com',
  16. 'method': 'GET',
  17. }]
  18. responses, errors = threaded.map(urls_to_get)
  19. By default, the threaded submodule will detect the number of CPUs your
  20. computer has and use that if no other number of processes is selected. To
  21. change this, always use the keyword argument ``num_processes``. Using the
  22. above example, we would expand it like so:
  23. .. code-block:: python
  24. responses, errors = threaded.map(urls_to_get, num_processes=10)
  25. You can also customize how a :class:`requests.Session` is initialized by
  26. creating a callback function:
  27. .. code-block:: python
  28. from requests_toolbelt import user_agent
  29. def initialize_session(session):
  30. session.headers['User-Agent'] = user_agent('my-scraper', '0.1')
  31. session.headers['Accept'] = 'application/json'
  32. responses, errors = threaded.map(urls_to_get,
  33. initializer=initialize_session)
  34. .. autofunction:: requests_toolbelt.threaded.map
  35. Inspiration is blatantly drawn from the standard library's multiprocessing
  36. library. See the following references:
  37. - multiprocessing's `pool source`_
  38. - map and map_async `inspiration`_
  39. .. _pool source:
  40. https://hg.python.org/cpython/file/8ef4f75a8018/Lib/multiprocessing/pool.py
  41. .. _inspiration:
  42. https://hg.python.org/cpython/file/8ef4f75a8018/Lib/multiprocessing/pool.py#l340
  43. """
  44. from . import pool
  45. from .._compat import queue
  46. def map(requests, **kwargs):
  47. r"""Simple interface to the threaded Pool object.
  48. This function takes a list of dictionaries representing requests to make
  49. using Sessions in threads and returns a tuple where the first item is
  50. a generator of successful responses and the second is a generator of
  51. exceptions.
  52. :param list requests:
  53. Collection of dictionaries representing requests to make with the Pool
  54. object.
  55. :param \*\*kwargs:
  56. Keyword arguments that are passed to the
  57. :class:`~requests_toolbelt.threaded.pool.Pool` object.
  58. :returns: Tuple of responses and exceptions from the pool
  59. :rtype: (:class:`~requests_toolbelt.threaded.pool.ThreadResponse`,
  60. :class:`~requests_toolbelt.threaded.pool.ThreadException`)
  61. """
  62. if not (requests and all(isinstance(r, dict) for r in requests)):
  63. raise ValueError('map expects a list of dictionaries.')
  64. # Build our queue of requests
  65. job_queue = queue.Queue()
  66. for request in requests:
  67. job_queue.put(request)
  68. # Ensure the user doesn't try to pass their own job_queue
  69. kwargs['job_queue'] = job_queue
  70. threadpool = pool.Pool(**kwargs)
  71. threadpool.join_all()
  72. return threadpool.responses(), threadpool.exceptions()