scgi_base.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544
  1. # Copyright (c) 2005, 2006 Allan Saddi <allan@saddi.com>
  2. # All rights reserved.
  3. #
  4. # Redistribution and use in source and binary forms, with or without
  5. # modification, are permitted provided that the following conditions
  6. # are met:
  7. # 1. Redistributions of source code must retain the above copyright
  8. # notice, this list of conditions and the following disclaimer.
  9. # 2. Redistributions in binary form must reproduce the above copyright
  10. # notice, this list of conditions and the following disclaimer in the
  11. # documentation and/or other materials provided with the distribution.
  12. #
  13. # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  14. # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  15. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  16. # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  17. # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  18. # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  19. # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  20. # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  21. # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  22. # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  23. # SUCH DAMAGE.
  24. #
  25. # $Id$
  26. __author__ = 'Allan Saddi <allan@saddi.com>'
  27. __version__ = '$Revision$'
  28. import sys
  29. import logging
  30. import socket
  31. import select
  32. import errno
  33. import cStringIO as StringIO
  34. import signal
  35. import datetime
  36. import os
  37. import warnings
  38. # Threads are required. If you want a non-threaded (forking) version, look at
  39. # SWAP <http://www.idyll.org/~t/www-tools/wsgi/>.
  40. import thread
  41. import threading
  42. __all__ = ['BaseSCGIServer']
  43. class NoDefault(object):
  44. pass
  45. # The main classes use this name for logging.
  46. LoggerName = 'scgi-wsgi'
  47. # Set up module-level logger.
  48. console = logging.StreamHandler()
  49. console.setLevel(logging.DEBUG)
  50. console.setFormatter(logging.Formatter('%(asctime)s : %(message)s',
  51. '%Y-%m-%d %H:%M:%S'))
  52. logging.getLogger(LoggerName).addHandler(console)
  53. del console
  54. class ProtocolError(Exception):
  55. """
  56. Exception raised when the server does something unexpected or
  57. sends garbled data. Usually leads to a Connection closing.
  58. """
  59. pass
  60. def recvall(sock, length):
  61. """
  62. Attempts to receive length bytes from a socket, blocking if necessary.
  63. (Socket may be blocking or non-blocking.)
  64. """
  65. dataList = []
  66. recvLen = 0
  67. while length:
  68. try:
  69. data = sock.recv(length)
  70. except socket.error, e:
  71. if e[0] == errno.EAGAIN:
  72. select.select([sock], [], [])
  73. continue
  74. else:
  75. raise
  76. if not data: # EOF
  77. break
  78. dataList.append(data)
  79. dataLen = len(data)
  80. recvLen += dataLen
  81. length -= dataLen
  82. return ''.join(dataList), recvLen
  83. def readNetstring(sock):
  84. """
  85. Attempt to read a netstring from a socket.
  86. """
  87. # First attempt to read the length.
  88. size = ''
  89. while True:
  90. try:
  91. c = sock.recv(1)
  92. except socket.error, e:
  93. if e[0] == errno.EAGAIN:
  94. select.select([sock], [], [])
  95. continue
  96. else:
  97. raise
  98. if c == ':':
  99. break
  100. if not c:
  101. raise EOFError
  102. size += c
  103. # Try to decode the length.
  104. try:
  105. size = int(size)
  106. if size < 0:
  107. raise ValueError
  108. except ValueError:
  109. raise ProtocolError, 'invalid netstring length'
  110. # Now read the string.
  111. s, length = recvall(sock, size)
  112. if length < size:
  113. raise EOFError
  114. # Lastly, the trailer.
  115. trailer, length = recvall(sock, 1)
  116. if length < 1:
  117. raise EOFError
  118. if trailer != ',':
  119. raise ProtocolError, 'invalid netstring trailer'
  120. return s
  121. class StdoutWrapper(object):
  122. """
  123. Wrapper for sys.stdout so we know if data has actually been written.
  124. """
  125. def __init__(self, stdout):
  126. self._file = stdout
  127. self.dataWritten = False
  128. def write(self, data):
  129. if data:
  130. self.dataWritten = True
  131. self._file.write(data)
  132. def writelines(self, lines):
  133. for line in lines:
  134. self.write(line)
  135. def __getattr__(self, name):
  136. return getattr(self._file, name)
  137. class Request(object):
  138. """
  139. Encapsulates data related to a single request.
  140. Public attributes:
  141. environ - Environment variables from web server.
  142. stdin - File-like object representing the request body.
  143. stdout - File-like object for writing the response.
  144. """
  145. def __init__(self, conn, environ, input, output):
  146. self._conn = conn
  147. self.environ = environ
  148. self.stdin = input
  149. self.stdout = StdoutWrapper(output)
  150. self.logger = logging.getLogger(LoggerName)
  151. def run(self):
  152. self.logger.info('%s %s%s',
  153. self.environ['REQUEST_METHOD'],
  154. self.environ.get('SCRIPT_NAME', ''),
  155. self.environ.get('PATH_INFO', ''))
  156. start = datetime.datetime.now()
  157. try:
  158. self._conn.server.handler(self)
  159. except:
  160. self.logger.exception('Exception caught from handler')
  161. if not self.stdout.dataWritten:
  162. self._conn.server.error(self)
  163. end = datetime.datetime.now()
  164. handlerTime = end - start
  165. self.logger.debug('%s %s%s done (%.3f secs)',
  166. self.environ['REQUEST_METHOD'],
  167. self.environ.get('SCRIPT_NAME', ''),
  168. self.environ.get('PATH_INFO', ''),
  169. handlerTime.seconds +
  170. handlerTime.microseconds / 1000000.0)
  171. class Connection(object):
  172. """
  173. Represents a single client (web server) connection. A single request
  174. is handled, after which the socket is closed.
  175. """
  176. def __init__(self, sock, addr, server):
  177. self._sock = sock
  178. self._addr = addr
  179. self.server = server
  180. self.logger = logging.getLogger(LoggerName)
  181. def run(self):
  182. if len(self._addr) == 2:
  183. self.logger.debug('Connection starting up (%s:%d)',
  184. self._addr[0], self._addr[1])
  185. try:
  186. self.processInput()
  187. except (EOFError, KeyboardInterrupt):
  188. pass
  189. except ProtocolError, e:
  190. self.logger.error("Protocol error '%s'", str(e))
  191. except:
  192. self.logger.exception('Exception caught in Connection')
  193. if len(self._addr) == 2:
  194. self.logger.debug('Connection shutting down (%s:%d)',
  195. self._addr[0], self._addr[1])
  196. # All done!
  197. self._sock.close()
  198. def processInput(self):
  199. # Read headers
  200. headers = readNetstring(self._sock)
  201. headers = headers.split('\x00')[:-1]
  202. if len(headers) % 2 != 0:
  203. raise ProtocolError, 'invalid headers'
  204. environ = {}
  205. for i in range(len(headers) / 2):
  206. environ[headers[2*i]] = headers[2*i+1]
  207. clen = environ.get('CONTENT_LENGTH')
  208. if clen is None:
  209. raise ProtocolError, 'missing CONTENT_LENGTH'
  210. try:
  211. clen = int(clen)
  212. if clen < 0:
  213. raise ValueError
  214. except ValueError:
  215. raise ProtocolError, 'invalid CONTENT_LENGTH'
  216. self._sock.setblocking(1)
  217. if clen:
  218. input = self._sock.makefile('r')
  219. else:
  220. # Empty input.
  221. input = StringIO.StringIO()
  222. # stdout
  223. output = self._sock.makefile('w')
  224. # Allocate Request
  225. req = Request(self, environ, input, output)
  226. # Run it.
  227. req.run()
  228. output.close()
  229. input.close()
  230. class BaseSCGIServer(object):
  231. # What Request class to use.
  232. requestClass = Request
  233. def __init__(self, application, scriptName=NoDefault, environ=None,
  234. multithreaded=True, multiprocess=False,
  235. bindAddress=('localhost', 4000), umask=None,
  236. allowedServers=NoDefault,
  237. loggingLevel=logging.INFO, debug=True):
  238. """
  239. scriptName is the initial portion of the URL path that "belongs"
  240. to your application. It is used to determine PATH_INFO (which doesn't
  241. seem to be passed in). An empty scriptName means your application
  242. is mounted at the root of your virtual host.
  243. environ, which must be a dictionary, can contain any additional
  244. environment variables you want to pass to your application.
  245. Set multithreaded to False if your application is not thread-safe.
  246. Set multiprocess to True to explicitly set wsgi.multiprocess to
  247. True. (Only makes sense with threaded servers.)
  248. bindAddress is the address to bind to, which must be a string or
  249. a tuple of length 2. If a tuple, the first element must be a string,
  250. which is the host name or IPv4 address of a local interface. The
  251. 2nd element of the tuple is the port number. If a string, it will
  252. be interpreted as a filename and a UNIX socket will be opened.
  253. If binding to a UNIX socket, umask may be set to specify what
  254. the umask is to be changed to before the socket is created in the
  255. filesystem. After the socket is created, the previous umask is
  256. restored.
  257. allowedServers must be None or a list of strings representing the
  258. IPv4 addresses of servers allowed to connect. None means accept
  259. connections from anywhere. By default, it is a list containing
  260. the single item '127.0.0.1'.
  261. loggingLevel sets the logging level of the module-level logger.
  262. """
  263. if environ is None:
  264. environ = {}
  265. self.application = application
  266. self.scriptName = scriptName
  267. self.environ = environ
  268. self.multithreaded = multithreaded
  269. self.multiprocess = multiprocess
  270. self.debug = debug
  271. self._bindAddress = bindAddress
  272. self._umask = umask
  273. if allowedServers is NoDefault:
  274. allowedServers = ['127.0.0.1']
  275. self._allowedServers = allowedServers
  276. # Used to force single-threadedness.
  277. self._appLock = thread.allocate_lock()
  278. self.logger = logging.getLogger(LoggerName)
  279. self.logger.setLevel(loggingLevel)
  280. def _setupSocket(self):
  281. """Creates and binds the socket for communication with the server."""
  282. oldUmask = None
  283. if type(self._bindAddress) is str:
  284. # Unix socket
  285. sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
  286. try:
  287. os.unlink(self._bindAddress)
  288. except OSError:
  289. pass
  290. if self._umask is not None:
  291. oldUmask = os.umask(self._umask)
  292. else:
  293. # INET socket
  294. assert type(self._bindAddress) is tuple
  295. assert len(self._bindAddress) == 2
  296. sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  297. sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
  298. sock.bind(self._bindAddress)
  299. sock.listen(socket.SOMAXCONN)
  300. if oldUmask is not None:
  301. os.umask(oldUmask)
  302. return sock
  303. def _cleanupSocket(self, sock):
  304. """Closes the main socket."""
  305. sock.close()
  306. def _isClientAllowed(self, addr):
  307. ret = self._allowedServers is None or \
  308. len(addr) != 2 or \
  309. (len(addr) == 2 and addr[0] in self._allowedServers)
  310. if not ret:
  311. self.logger.warning('Server connection from %s disallowed',
  312. addr[0])
  313. return ret
  314. def handler(self, request):
  315. """
  316. WSGI handler. Sets up WSGI environment, calls the application,
  317. and sends the application's response.
  318. """
  319. environ = request.environ
  320. environ.update(self.environ)
  321. environ['wsgi.version'] = (1,0)
  322. environ['wsgi.input'] = request.stdin
  323. environ['wsgi.errors'] = sys.stderr
  324. environ['wsgi.multithread'] = self.multithreaded
  325. environ['wsgi.multiprocess'] = self.multiprocess
  326. environ['wsgi.run_once'] = False
  327. if environ.get('HTTPS', 'off') in ('on', '1'):
  328. environ['wsgi.url_scheme'] = 'https'
  329. else:
  330. environ['wsgi.url_scheme'] = 'http'
  331. self._sanitizeEnv(environ)
  332. headers_set = []
  333. headers_sent = []
  334. result = None
  335. def write(data):
  336. assert type(data) is str, 'write() argument must be string'
  337. assert headers_set, 'write() before start_response()'
  338. if not headers_sent:
  339. status, responseHeaders = headers_sent[:] = headers_set
  340. found = False
  341. for header,value in responseHeaders:
  342. if header.lower() == 'content-length':
  343. found = True
  344. break
  345. if not found and result is not None:
  346. try:
  347. if len(result) == 1:
  348. responseHeaders.append(('Content-Length',
  349. str(len(data))))
  350. except:
  351. pass
  352. s = 'Status: %s\r\n' % status
  353. for header in responseHeaders:
  354. s += '%s: %s\r\n' % header
  355. s += '\r\n'
  356. request.stdout.write(s)
  357. request.stdout.write(data)
  358. request.stdout.flush()
  359. def start_response(status, response_headers, exc_info=None):
  360. if exc_info:
  361. try:
  362. if headers_sent:
  363. # Re-raise if too late
  364. raise exc_info[0], exc_info[1], exc_info[2]
  365. finally:
  366. exc_info = None # avoid dangling circular ref
  367. else:
  368. assert not headers_set, 'Headers already set!'
  369. assert type(status) is str, 'Status must be a string'
  370. assert len(status) >= 4, 'Status must be at least 4 characters'
  371. assert int(status[:3]), 'Status must begin with 3-digit code'
  372. assert status[3] == ' ', 'Status must have a space after code'
  373. assert type(response_headers) is list, 'Headers must be a list'
  374. if __debug__:
  375. for name,val in response_headers:
  376. assert type(name) is str, 'Header name "%s" must be a string' % name
  377. assert type(val) is str, 'Value of header "%s" must be a string' % name
  378. headers_set[:] = [status, response_headers]
  379. return write
  380. if not self.multithreaded:
  381. self._appLock.acquire()
  382. try:
  383. try:
  384. result = self.application(environ, start_response)
  385. try:
  386. for data in result:
  387. if data:
  388. write(data)
  389. if not headers_sent:
  390. write('') # in case body was empty
  391. finally:
  392. if hasattr(result, 'close'):
  393. result.close()
  394. except socket.error, e:
  395. if e[0] != errno.EPIPE:
  396. raise # Don't let EPIPE propagate beyond server
  397. finally:
  398. if not self.multithreaded:
  399. self._appLock.release()
  400. def _sanitizeEnv(self, environ):
  401. """Fill-in/deduce missing values in environ."""
  402. reqUri = None
  403. if environ.has_key('REQUEST_URI'):
  404. reqUri = environ['REQUEST_URI'].split('?', 1)
  405. # Ensure QUERY_STRING exists
  406. if not environ.has_key('QUERY_STRING') or not environ['QUERY_STRING']:
  407. if reqUri is not None and len(reqUri) > 1:
  408. environ['QUERY_STRING'] = reqUri[1]
  409. else:
  410. environ['QUERY_STRING'] = ''
  411. # Check WSGI_SCRIPT_NAME
  412. scriptName = environ.get('WSGI_SCRIPT_NAME')
  413. if scriptName is None:
  414. scriptName = self.scriptName
  415. else:
  416. warnings.warn('WSGI_SCRIPT_NAME environment variable for scgi '
  417. 'servers is deprecated',
  418. DeprecationWarning)
  419. if scriptName.lower() == 'none':
  420. scriptName = None
  421. if scriptName is None:
  422. # Do nothing (most likely coming from cgi2scgi)
  423. return
  424. if scriptName is NoDefault:
  425. # Pull SCRIPT_NAME/PATH_INFO from environment, with empty defaults
  426. if not environ.has_key('SCRIPT_NAME'):
  427. environ['SCRIPT_INFO'] = ''
  428. if not environ.has_key('PATH_INFO') or not environ['PATH_INFO']:
  429. if reqUri is not None:
  430. environ['PATH_INFO'] = reqUri[0]
  431. else:
  432. environ['PATH_INFO'] = ''
  433. else:
  434. # Configured scriptName
  435. warnings.warn('Configured SCRIPT_NAME is deprecated\n'
  436. 'Do not use WSGI_SCRIPT_NAME or the scriptName\n'
  437. 'keyword parameter -- they will be going away',
  438. DeprecationWarning)
  439. value = environ['SCRIPT_NAME']
  440. value += environ.get('PATH_INFO', '')
  441. if not value.startswith(scriptName):
  442. self.logger.warning('scriptName does not match request URI')
  443. environ['PATH_INFO'] = value[len(scriptName):]
  444. environ['SCRIPT_NAME'] = scriptName
  445. def error(self, request):
  446. """
  447. Override to provide custom error handling. Ideally, however,
  448. all errors should be caught at the application level.
  449. """
  450. if self.debug:
  451. import cgitb
  452. request.stdout.write('Content-Type: text/html\r\n\r\n' +
  453. cgitb.html(sys.exc_info()))
  454. else:
  455. errorpage = """<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
  456. <html><head>
  457. <title>Unhandled Exception</title>
  458. </head><body>
  459. <h1>Unhandled Exception</h1>
  460. <p>An unhandled exception was thrown by the application.</p>
  461. </body></html>
  462. """
  463. request.stdout.write('Content-Type: text/html\r\n\r\n' +
  464. errorpage)