common.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. import hashlib
  2. import logging
  3. import re
  4. import warnings
  5. from django.conf import settings
  6. from django.core.mail import mail_managers
  7. from django.core import urlresolvers
  8. from django import http
  9. from django.utils.deprecation import RemovedInDjango18Warning
  10. from django.utils.encoding import force_text
  11. from django.utils.http import urlquote
  12. from django.utils import six
  13. logger = logging.getLogger('django.request')
  14. class CommonMiddleware(object):
  15. """
  16. "Common" middleware for taking care of some basic operations:
  17. - Forbids access to User-Agents in settings.DISALLOWED_USER_AGENTS
  18. - URL rewriting: Based on the APPEND_SLASH and PREPEND_WWW settings,
  19. this middleware appends missing slashes and/or prepends missing
  20. "www."s.
  21. - If APPEND_SLASH is set and the initial URL doesn't end with a
  22. slash, and it is not found in urlpatterns, a new URL is formed by
  23. appending a slash at the end. If this new URL is found in
  24. urlpatterns, then an HTTP-redirect is returned to this new URL;
  25. otherwise the initial URL is processed as usual.
  26. - ETags: If the USE_ETAGS setting is set, ETags will be calculated from
  27. the entire page content and Not Modified responses will be returned
  28. appropriately.
  29. """
  30. def process_request(self, request):
  31. """
  32. Check for denied User-Agents and rewrite the URL based on
  33. settings.APPEND_SLASH and settings.PREPEND_WWW
  34. """
  35. # Check for denied User-Agents
  36. if 'HTTP_USER_AGENT' in request.META:
  37. for user_agent_regex in settings.DISALLOWED_USER_AGENTS:
  38. if user_agent_regex.search(request.META['HTTP_USER_AGENT']):
  39. logger.warning('Forbidden (User agent): %s', request.path,
  40. extra={
  41. 'status_code': 403,
  42. 'request': request
  43. }
  44. )
  45. return http.HttpResponseForbidden('<h1>Forbidden</h1>')
  46. # Check for a redirect based on settings.APPEND_SLASH
  47. # and settings.PREPEND_WWW
  48. host = request.get_host()
  49. old_url = [host, request.path]
  50. new_url = old_url[:]
  51. if (settings.PREPEND_WWW and old_url[0] and
  52. not old_url[0].startswith('www.')):
  53. new_url[0] = 'www.' + old_url[0]
  54. # Append a slash if APPEND_SLASH is set and the URL doesn't have a
  55. # trailing slash and there is no pattern for the current path
  56. if settings.APPEND_SLASH and (not old_url[1].endswith('/')):
  57. urlconf = getattr(request, 'urlconf', None)
  58. if (not urlresolvers.is_valid_path(request.path_info, urlconf) and
  59. urlresolvers.is_valid_path("%s/" % request.path_info, urlconf)):
  60. new_url[1] = new_url[1] + '/'
  61. if settings.DEBUG and request.method == 'POST':
  62. raise RuntimeError((""
  63. "You called this URL via POST, but the URL doesn't end "
  64. "in a slash and you have APPEND_SLASH set. Django can't "
  65. "redirect to the slash URL while maintaining POST data. "
  66. "Change your form to point to %s%s (note the trailing "
  67. "slash), or set APPEND_SLASH=False in your Django "
  68. "settings.") % (new_url[0], new_url[1]))
  69. if new_url == old_url:
  70. # No redirects required.
  71. return
  72. if new_url[0]:
  73. newurl = "%s://%s%s" % (
  74. request.scheme,
  75. new_url[0], urlquote(new_url[1]))
  76. else:
  77. newurl = urlquote(new_url[1])
  78. if request.META.get('QUERY_STRING', ''):
  79. if six.PY3:
  80. newurl += '?' + request.META['QUERY_STRING']
  81. else:
  82. # `query_string` is a bytestring. Appending it to the unicode
  83. # string `newurl` will fail if it isn't ASCII-only. This isn't
  84. # allowed; only broken software generates such query strings.
  85. # Better drop the invalid query string than crash (#15152).
  86. try:
  87. newurl += '?' + request.META['QUERY_STRING'].decode()
  88. except UnicodeDecodeError:
  89. pass
  90. return http.HttpResponsePermanentRedirect(newurl)
  91. def process_response(self, request, response):
  92. """
  93. Calculate the ETag, if needed.
  94. """
  95. if settings.SEND_BROKEN_LINK_EMAILS:
  96. warnings.warn("SEND_BROKEN_LINK_EMAILS is deprecated. "
  97. "Use BrokenLinkEmailsMiddleware instead.",
  98. RemovedInDjango18Warning, stacklevel=2)
  99. BrokenLinkEmailsMiddleware().process_response(request, response)
  100. if settings.USE_ETAGS:
  101. if response.has_header('ETag'):
  102. etag = response['ETag']
  103. elif response.streaming:
  104. etag = None
  105. else:
  106. etag = '"%s"' % hashlib.md5(response.content).hexdigest()
  107. if etag is not None:
  108. if (200 <= response.status_code < 300
  109. and request.META.get('HTTP_IF_NONE_MATCH') == etag):
  110. cookies = response.cookies
  111. response = http.HttpResponseNotModified()
  112. response.cookies = cookies
  113. else:
  114. response['ETag'] = etag
  115. return response
  116. class BrokenLinkEmailsMiddleware(object):
  117. def process_response(self, request, response):
  118. """
  119. Send broken link emails for relevant 404 NOT FOUND responses.
  120. """
  121. if response.status_code == 404 and not settings.DEBUG:
  122. domain = request.get_host()
  123. path = request.get_full_path()
  124. referer = force_text(request.META.get('HTTP_REFERER', ''), errors='replace')
  125. if not self.is_ignorable_request(request, path, domain, referer):
  126. ua = request.META.get('HTTP_USER_AGENT', '<none>')
  127. ip = request.META.get('REMOTE_ADDR', '<none>')
  128. mail_managers(
  129. "Broken %slink on %s" % (
  130. ('INTERNAL ' if self.is_internal_request(domain, referer) else ''),
  131. domain
  132. ),
  133. "Referrer: %s\nRequested URL: %s\nUser agent: %s\n"
  134. "IP address: %s\n" % (referer, path, ua, ip),
  135. fail_silently=True)
  136. return response
  137. def is_internal_request(self, domain, referer):
  138. """
  139. Returns True if the referring URL is the same domain as the current request.
  140. """
  141. # Different subdomains are treated as different domains.
  142. return bool(re.match("^https?://%s/" % re.escape(domain), referer))
  143. def is_ignorable_request(self, request, uri, domain, referer):
  144. """
  145. Returns True if the given request *shouldn't* notify the site managers.
  146. """
  147. # '?' in referer is identified as search engine source
  148. if (not referer or
  149. (not self.is_internal_request(domain, referer) and '?' in referer)):
  150. return True
  151. return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)