deprecated.py 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. # -*- coding: utf-8 -*-
  2. """A collection of functions deprecated in requests.utils."""
  3. import re
  4. import sys
  5. from requests import utils
  6. find_charset = re.compile(
  7. br'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I
  8. ).findall
  9. find_pragma = re.compile(
  10. br'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I
  11. ).findall
  12. find_xml = re.compile(
  13. br'^<\?xml.*?encoding=["\']*(.+?)["\'>]'
  14. ).findall
  15. def get_encodings_from_content(content):
  16. """Return encodings from given content string.
  17. .. code-block:: python
  18. import requests
  19. from requests_toolbelt.utils import deprecated
  20. r = requests.get(url)
  21. encodings = deprecated.get_encodings_from_content(r)
  22. :param content: bytestring to extract encodings from
  23. :type content: bytes
  24. :return: encodings detected in the provided content
  25. :rtype: list(str)
  26. """
  27. encodings = (find_charset(content) + find_pragma(content)
  28. + find_xml(content))
  29. if (3, 0) <= sys.version_info < (4, 0):
  30. encodings = [encoding.decode('utf8') for encoding in encodings]
  31. return encodings
  32. def get_unicode_from_response(response):
  33. """Return the requested content back in unicode.
  34. This will first attempt to retrieve the encoding from the response
  35. headers. If that fails, it will use
  36. :func:`requests_toolbelt.utils.deprecated.get_encodings_from_content`
  37. to determine encodings from HTML elements.
  38. .. code-block:: python
  39. import requests
  40. from requests_toolbelt.utils import deprecated
  41. r = requests.get(url)
  42. text = deprecated.get_unicode_from_response(r)
  43. :param response: Response object to get unicode content from.
  44. :type response: requests.models.Response
  45. """
  46. tried_encodings = set()
  47. # Try charset from content-type
  48. encoding = utils.get_encoding_from_headers(response.headers)
  49. if encoding:
  50. try:
  51. return str(response.content, encoding)
  52. except UnicodeError:
  53. tried_encodings.add(encoding.lower())
  54. encodings = get_encodings_from_content(response.content)
  55. for _encoding in encodings:
  56. _encoding = _encoding.lower()
  57. if _encoding in tried_encodings:
  58. continue
  59. try:
  60. return str(response.content, _encoding)
  61. except UnicodeError:
  62. tried_encodings.add(_encoding)
  63. # Fall back:
  64. if encoding:
  65. try:
  66. return str(response.content, encoding, errors='replace')
  67. except TypeError:
  68. pass
  69. return response.text