wsgi.py 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. from __future__ import unicode_literals
  2. import cgi
  3. import codecs
  4. import logging
  5. import sys
  6. from io import BytesIO
  7. from threading import Lock
  8. import warnings
  9. from django import http
  10. from django.conf import settings
  11. from django.core import signals
  12. from django.core.handlers import base
  13. from django.core.urlresolvers import set_script_prefix
  14. from django.utils import datastructures
  15. from django.utils.deprecation import RemovedInDjango19Warning
  16. from django.utils.encoding import force_str, force_text
  17. from django.utils.functional import cached_property
  18. from django.utils import six
  19. # For backwards compatibility -- lots of code uses this in the wild!
  20. from django.http.response import REASON_PHRASES as STATUS_CODE_TEXT # NOQA
  21. logger = logging.getLogger('django.request')
  22. # encode() and decode() expect the charset to be a native string.
  23. ISO_8859_1, UTF_8 = str('iso-8859-1'), str('utf-8')
  24. class LimitedStream(object):
  25. '''
  26. LimitedStream wraps another stream in order to not allow reading from it
  27. past specified amount of bytes.
  28. '''
  29. def __init__(self, stream, limit, buf_size=64 * 1024 * 1024):
  30. self.stream = stream
  31. self.remaining = limit
  32. self.buffer = b''
  33. self.buf_size = buf_size
  34. def _read_limited(self, size=None):
  35. if size is None or size > self.remaining:
  36. size = self.remaining
  37. if size == 0:
  38. return b''
  39. result = self.stream.read(size)
  40. self.remaining -= len(result)
  41. return result
  42. def read(self, size=None):
  43. if size is None:
  44. result = self.buffer + self._read_limited()
  45. self.buffer = b''
  46. elif size < len(self.buffer):
  47. result = self.buffer[:size]
  48. self.buffer = self.buffer[size:]
  49. else: # size >= len(self.buffer)
  50. result = self.buffer + self._read_limited(size - len(self.buffer))
  51. self.buffer = b''
  52. return result
  53. def readline(self, size=None):
  54. while b'\n' not in self.buffer and \
  55. (size is None or len(self.buffer) < size):
  56. if size:
  57. # since size is not None here, len(self.buffer) < size
  58. chunk = self._read_limited(size - len(self.buffer))
  59. else:
  60. chunk = self._read_limited()
  61. if not chunk:
  62. break
  63. self.buffer += chunk
  64. sio = BytesIO(self.buffer)
  65. if size:
  66. line = sio.readline(size)
  67. else:
  68. line = sio.readline()
  69. self.buffer = sio.read()
  70. return line
  71. class WSGIRequest(http.HttpRequest):
  72. def __init__(self, environ):
  73. script_name = get_script_name(environ)
  74. path_info = get_path_info(environ)
  75. if not path_info:
  76. # Sometimes PATH_INFO exists, but is empty (e.g. accessing
  77. # the SCRIPT_NAME URL without a trailing slash). We really need to
  78. # operate as if they'd requested '/'. Not amazingly nice to force
  79. # the path like this, but should be harmless.
  80. path_info = '/'
  81. self.environ = environ
  82. self.path_info = path_info
  83. self.path = '%s/%s' % (script_name.rstrip('/'), path_info.lstrip('/'))
  84. self.META = environ
  85. self.META['PATH_INFO'] = path_info
  86. self.META['SCRIPT_NAME'] = script_name
  87. self.method = environ['REQUEST_METHOD'].upper()
  88. _, content_params = cgi.parse_header(environ.get('CONTENT_TYPE', ''))
  89. if 'charset' in content_params:
  90. try:
  91. codecs.lookup(content_params['charset'])
  92. except LookupError:
  93. pass
  94. else:
  95. self.encoding = content_params['charset']
  96. self._post_parse_error = False
  97. try:
  98. content_length = int(environ.get('CONTENT_LENGTH'))
  99. except (ValueError, TypeError):
  100. content_length = 0
  101. self._stream = LimitedStream(self.environ['wsgi.input'], content_length)
  102. self._read_started = False
  103. self.resolver_match = None
  104. def _get_scheme(self):
  105. return self.environ.get('wsgi.url_scheme')
  106. def _get_request(self):
  107. warnings.warn('`request.REQUEST` is deprecated, use `request.GET` or '
  108. '`request.POST` instead.', RemovedInDjango19Warning, 2)
  109. if not hasattr(self, '_request'):
  110. self._request = datastructures.MergeDict(self.POST, self.GET)
  111. return self._request
  112. @cached_property
  113. def GET(self):
  114. # The WSGI spec says 'QUERY_STRING' may be absent.
  115. raw_query_string = get_bytes_from_wsgi(self.environ, 'QUERY_STRING', '')
  116. return http.QueryDict(raw_query_string, encoding=self._encoding)
  117. def _get_post(self):
  118. if not hasattr(self, '_post'):
  119. self._load_post_and_files()
  120. return self._post
  121. def _set_post(self, post):
  122. self._post = post
  123. @cached_property
  124. def COOKIES(self):
  125. raw_cookie = get_str_from_wsgi(self.environ, 'HTTP_COOKIE', '')
  126. return http.parse_cookie(raw_cookie)
  127. def _get_files(self):
  128. if not hasattr(self, '_files'):
  129. self._load_post_and_files()
  130. return self._files
  131. POST = property(_get_post, _set_post)
  132. FILES = property(_get_files)
  133. REQUEST = property(_get_request)
  134. class WSGIHandler(base.BaseHandler):
  135. initLock = Lock()
  136. request_class = WSGIRequest
  137. def __call__(self, environ, start_response):
  138. # Set up middleware if needed. We couldn't do this earlier, because
  139. # settings weren't available.
  140. if self._request_middleware is None:
  141. with self.initLock:
  142. try:
  143. # Check that middleware is still uninitialized.
  144. if self._request_middleware is None:
  145. self.load_middleware()
  146. except:
  147. # Unload whatever middleware we got
  148. self._request_middleware = None
  149. raise
  150. set_script_prefix(get_script_name(environ))
  151. signals.request_started.send(sender=self.__class__)
  152. try:
  153. request = self.request_class(environ)
  154. except UnicodeDecodeError:
  155. logger.warning('Bad Request (UnicodeDecodeError)',
  156. exc_info=sys.exc_info(),
  157. extra={
  158. 'status_code': 400,
  159. }
  160. )
  161. response = http.HttpResponseBadRequest()
  162. else:
  163. response = self.get_response(request)
  164. response._handler_class = self.__class__
  165. status = '%s %s' % (response.status_code, response.reason_phrase)
  166. response_headers = [(str(k), str(v)) for k, v in response.items()]
  167. for c in response.cookies.values():
  168. response_headers.append((str('Set-Cookie'), str(c.output(header=''))))
  169. start_response(force_str(status), response_headers)
  170. return response
  171. def get_path_info(environ):
  172. """
  173. Returns the HTTP request's PATH_INFO as a unicode string.
  174. """
  175. path_info = get_bytes_from_wsgi(environ, 'PATH_INFO', '/')
  176. # It'd be better to implement URI-to-IRI decoding, see #19508.
  177. return path_info.decode(UTF_8)
  178. def get_script_name(environ):
  179. """
  180. Returns the equivalent of the HTTP request's SCRIPT_NAME environment
  181. variable. If Apache mod_rewrite has been used, returns what would have been
  182. the script name prior to any rewriting (so it's the script name as seen
  183. from the client's perspective), unless the FORCE_SCRIPT_NAME setting is
  184. set (to anything).
  185. """
  186. if settings.FORCE_SCRIPT_NAME is not None:
  187. return force_text(settings.FORCE_SCRIPT_NAME)
  188. # If Apache's mod_rewrite had a whack at the URL, Apache set either
  189. # SCRIPT_URL or REDIRECT_URL to the full resource URL before applying any
  190. # rewrites. Unfortunately not every Web server (lighttpd!) passes this
  191. # information through all the time, so FORCE_SCRIPT_NAME, above, is still
  192. # needed.
  193. script_url = get_bytes_from_wsgi(environ, 'SCRIPT_URL', '')
  194. if not script_url:
  195. script_url = get_bytes_from_wsgi(environ, 'REDIRECT_URL', '')
  196. if script_url:
  197. path_info = get_bytes_from_wsgi(environ, 'PATH_INFO', '')
  198. script_name = script_url[:-len(path_info)]
  199. else:
  200. script_name = get_bytes_from_wsgi(environ, 'SCRIPT_NAME', '')
  201. # It'd be better to implement URI-to-IRI decoding, see #19508.
  202. return script_name.decode(UTF_8)
  203. def get_bytes_from_wsgi(environ, key, default):
  204. """
  205. Get a value from the WSGI environ dictionary as bytes.
  206. key and default should be str objects. Under Python 2 they may also be
  207. unicode objects provided they only contain ASCII characters.
  208. """
  209. value = environ.get(str(key), str(default))
  210. # Under Python 3, non-ASCII values in the WSGI environ are arbitrarily
  211. # decoded with ISO-8859-1. This is wrong for Django websites where UTF-8
  212. # is the default. Re-encode to recover the original bytestring.
  213. return value if six.PY2 else value.encode(ISO_8859_1)
  214. def get_str_from_wsgi(environ, key, default):
  215. """
  216. Get a value from the WSGI environ dictionary as bytes.
  217. key and default should be str objects. Under Python 2 they may also be
  218. unicode objects provided they only contain ASCII characters.
  219. """
  220. value = environ.get(str(key), str(default))
  221. # Same comment as above
  222. return value if six.PY2 else value.encode(ISO_8859_1).decode(UTF_8)