openers.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. # -*- coding: utf-8 -*-
  2. import sys
  3. PY3k = sys.version_info >= (3,)
  4. if PY3k:
  5. from urllib.request import urlopen
  6. from urllib.parse import urlencode
  7. from urllib.error import HTTPError
  8. basestring = (str, bytes)
  9. else:
  10. from urllib2 import urlopen # NOQA
  11. from urllib import urlencode # NOQA
  12. from urllib2 import HTTPError
  13. try:
  14. import requests
  15. HAS_REQUEST = True
  16. except ImportError:
  17. HAS_REQUEST = False
  18. allowed_args = (
  19. 'auth', 'data', 'headers', 'verify', 'cert', 'config', 'hooks', 'proxies', 'cookies')
  20. def _query(url, method, kwargs):
  21. data = None
  22. if 'data' in kwargs:
  23. data = kwargs.pop('data')
  24. if type(data) in (dict, list, tuple):
  25. data = urlencode(data)
  26. if isinstance(method, basestring) and \
  27. method.lower() == 'get' and data:
  28. if '?' not in url:
  29. url += '?'
  30. elif url[-1] not in ('?', '&'):
  31. url += '&'
  32. url += data
  33. data = None
  34. if data and PY3k:
  35. data = data.encode('utf-8')
  36. return url, data
  37. def _requests(url, kwargs):
  38. encoding = kwargs.get('encoding')
  39. method = kwargs.get('method', 'get').lower()
  40. meth = getattr(requests, str(method))
  41. if method == 'get':
  42. url, data = _query(url, method, kwargs)
  43. kw = {}
  44. for k in allowed_args:
  45. if k in kwargs:
  46. kw[k] = kwargs[k]
  47. resp = meth(url=url, **kw)
  48. if not (200 <= resp.status_code < 300):
  49. raise HTTPError(resp.url, resp.status_code,
  50. resp.reason, resp.headers, None)
  51. if encoding:
  52. resp.encoding = encoding
  53. html = resp.text
  54. return html
  55. def _urllib(url, kwargs):
  56. method = kwargs.get('method')
  57. url, data = _query(url, method, kwargs)
  58. return urlopen(url, data)
  59. def url_opener(url, kwargs):
  60. if HAS_REQUEST:
  61. return _requests(url, kwargs)
  62. return _urllib(url, kwargs)