Download Install Tutorial Docs FAQ Tools WikiLicense Team IRC Planet Involvement Shop Book

root/trunk/cherrypy/wsgiserver/__init__.py

Revision 2007 (checked in by fumanchu, 2 months ago)

Marking CP 3.1.0 final.

  • Property svn:eol-style set to native
Line 
1 """A high-speed, production ready, thread pooled, generic WSGI server.
2
3 Simplest example on how to use this module directly
4 (without using CherryPy's application machinery):
5
6     from cherrypy import wsgiserver
7     
8     def my_crazy_app(environ, start_response):
9         status = '200 OK'
10         response_headers = [('Content-type','text/plain')]
11         start_response(status, response_headers)
12         return ['Hello world!\n']
13     
14     server = wsgiserver.CherryPyWSGIServer(
15                 ('0.0.0.0', 8070), my_crazy_app,
16                 server_name='www.cherrypy.example')
17     
18 The CherryPy WSGI server can serve as many WSGI applications
19 as you want in one instance by using a WSGIPathInfoDispatcher:
20     
21     d = WSGIPathInfoDispatcher({'/': my_crazy_app, '/blog': my_blog_app})
22     server = wsgiserver.CherryPyWSGIServer(('0.0.0.0', 80), d)
23     
24 Want SSL support? Just set these attributes:
25     
26     server.ssl_certificate = <filename>
27     server.ssl_private_key = <filename>
28     
29     if __name__ == '__main__':
30         try:
31             server.start()
32         except KeyboardInterrupt:
33             server.stop()
34
35 This won't call the CherryPy engine (application side) at all, only the
36 WSGI server, which is independant from the rest of CherryPy. Don't
37 let the name "CherryPyWSGIServer" throw you; the name merely reflects
38 its origin, not its coupling.
39
40 For those of you wanting to understand internals of this module, here's the
41 basic call flow. The server's listening thread runs a very tight loop,
42 sticking incoming connections onto a Queue:
43
44     server = CherryPyWSGIServer(...)
45     server.start()
46     while True:
47         tick()
48         # This blocks until a request comes in:
49         child = socket.accept()
50         conn = HTTPConnection(child, ...)
51         server.requests.put(conn)
52
53 Worker threads are kept in a pool and poll the Queue, popping off and then
54 handling each connection in turn. Each connection can consist of an arbitrary
55 number of requests and their responses, so we run a nested loop:
56
57     while True:
58         conn = server.requests.get()
59         conn.communicate()
60         ->  while True:
61                 req = HTTPRequest(...)
62                 req.parse_request()
63                 ->  # Read the Request-Line, e.g. "GET /page HTTP/1.1"
64                     req.rfile.readline()
65                     req.read_headers()
66                 req.respond()
67                 ->  response = wsgi_app(...)
68                     try:
69                         for chunk in response:
70                             if chunk:
71                                 req.write(chunk)
72                     finally:
73                         if hasattr(response, "close"):
74                             response.close()
75                 if req.close_connection:
76                     return
77 """
78
79
80 import base64
81 import os
82 import Queue
83 import re
84 quoted_slash = re.compile("(?i)%2F")
85 import rfc822
86 import socket
87 try:
88     import cStringIO as StringIO
89 except ImportError:
90     import StringIO
91 import sys
92 import threading
93 import time
94 import traceback
95 from urllib import unquote
96 from urlparse import urlparse
97 import warnings
98
99 try:
100     from OpenSSL import SSL
101     from OpenSSL import crypto
102 except ImportError:
103     SSL = None
104
105 import errno
106
107 def plat_specific_errors(*errnames):
108     """Return error numbers for all errors in errnames on this platform.
109     
110     The 'errno' module contains different global constants depending on
111     the specific platform (OS). This function will return the list of
112     numeric values for a given list of potential names.
113     """
114     errno_names = dir(errno)
115     nums = [getattr(errno, k) for k in errnames if k in errno_names]
116     # de-dupe the list
117     return dict.fromkeys(nums).keys()
118
119 socket_error_eintr = plat_specific_errors("EINTR", "WSAEINTR")
120
121 socket_errors_to_ignore = plat_specific_errors(
122     "EPIPE",
123     "EBADF", "WSAEBADF",
124     "ENOTSOCK", "WSAENOTSOCK",
125     "ETIMEDOUT", "WSAETIMEDOUT",
126     "ECONNREFUSED", "WSAECONNREFUSED",
127     "ECONNRESET", "WSAECONNRESET",
128     "ECONNABORTED", "WSAECONNABORTED",
129     "ENETRESET", "WSAENETRESET",
130     "EHOSTDOWN", "EHOSTUNREACH",
131     )
132 socket_errors_to_ignore.append("timed out")
133
134 socket_errors_nonblocking = plat_specific_errors(
135     'EAGAIN', 'EWOULDBLOCK', 'WSAEWOULDBLOCK')
136
137 comma_separated_headers = ['ACCEPT', 'ACCEPT-CHARSET', 'ACCEPT-ENCODING',
138     'ACCEPT-LANGUAGE', 'ACCEPT-RANGES', 'ALLOW', 'CACHE-CONTROL',
139     'CONNECTION', 'CONTENT-ENCODING', 'CONTENT-LANGUAGE', 'EXPECT',
140     'IF-MATCH', 'IF-NONE-MATCH', 'PRAGMA', 'PROXY-AUTHENTICATE', 'TE',
141     'TRAILER', 'TRANSFER-ENCODING', 'UPGRADE', 'VARY', 'VIA', 'WARNING',
142     'WWW-AUTHENTICATE']
143
144
145 class WSGIPathInfoDispatcher(object):
146     """A WSGI dispatcher for dispatch based on the PATH_INFO.
147     
148     apps: a dict or list of (path_prefix, app) pairs.
149     """
150    
151     def __init__(self, apps):
152         try:
153             apps = apps.items()
154         except AttributeError:
155             pass
156        
157         # Sort the apps by len(path), descending
158         apps.sort()
159         apps.reverse()
160        
161         # The path_prefix strings must start, but not end, with a slash.
162         # Use "" instead of "/".
163         self.apps = [(p.rstrip("/"), a) for p, a in apps]
164    
165     def __call__(self, environ, start_response):
166         path = environ["PATH_INFO"] or "/"
167         for p, app in self.apps:
168             # The apps list should be sorted by length, descending.
169             if path.startswith(p + "/") or path == p:
170                 environ = environ.copy()
171                 environ["SCRIPT_NAME"] = environ["SCRIPT_NAME"] + p
172                 environ["PATH_INFO"] = path[len(p):]
173                 return app(environ, start_response)
174        
175         start_response('404 Not Found', [('Content-Type', 'text/plain'),
176                                          ('Content-Length', '0')])
177         return ['']
178
179
180 class MaxSizeExceeded(Exception):
181     pass
182
183 class SizeCheckWrapper(object):
184     """Wraps a file-like object, raising MaxSizeExceeded if too large."""
185    
186     def __init__(self, rfile, maxlen):
187         self.rfile = rfile
188         self.maxlen = maxlen
189         self.bytes_read = 0
190    
191     def _check_length(self):
192         if self.maxlen and self.bytes_read > self.maxlen:
193             raise MaxSizeExceeded()
194    
195     def read(self, size=None):
196         data = self.rfile.read(size)
197         self.bytes_read += len(data)
198         self._check_length()
199         return data
200    
201     def readline(self, size=None):
202         if size is not None:
203             data = self.rfile.readline(size)
204             self.bytes_read += len(data)
205             self._check_length()
206             return data
207        
208         # User didn't specify a size ...
209         # We read the line in chunks to make sure it's not a 100MB line !
210         res = []
211         while True:
212             data = self.rfile.readline(256)
213             self.bytes_read += len(data)
214             self._check_length()
215             res.append(data)
216             # See http://www.cherrypy.org/ticket/421
217             if len(data) < 256 or data[-1:] == "\n":
218                 return ''.join(res)
219    
220     def readlines(self, sizehint=0):
221         # Shamelessly stolen from StringIO
222         total = 0
223         lines = []
224         line = self.readline()
225         while line:
226             lines.append(line)
227             total += len(line)
228             if 0 < sizehint <= total:
229                 break
230             line = self.readline()
231         return lines
232    
233     def close(self):
234         self.rfile.close()
235    
236     def __iter__(self):
237         return self
238    
239     def next(self):
240         data = self.rfile.next()
241         self.bytes_read += len(data)
242         self._check_length()
243         return data
244
245
246 class HTTPRequest(object):
247     """An HTTP Request (and response).
248     
249     A single HTTP connection may consist of multiple request/response pairs.
250     
251     send: the 'send' method from the connection's socket object.
252     wsgi_app: the WSGI application to call.
253     environ: a partial WSGI environ (server and connection entries).
254         The caller MUST set the following entries:
255         * All wsgi.* entries, including .input
256         * SERVER_NAME and SERVER_PORT
257         * Any SSL_* entries
258         * Any custom entries like REMOTE_ADDR and REMOTE_PORT
259         * SERVER_SOFTWARE: the value to write in the "Server" response header.
260         * ACTUAL_SERVER_PROTOCOL: the value to write in the Status-Line of
261             the response. From RFC 2145: "An HTTP server SHOULD send a
262             response version equal to the highest version for which the
263             server is at least conditionally compliant, and whose major
264             version is less than or equal to the one received in the
265             request.  An HTTP server MUST NOT send a version for which
266             it is not at least conditionally compliant."
267     
268     outheaders: a list of header tuples to write in the response.
269     ready: when True, the request has been parsed and is ready to begin
270         generating the response. When False, signals the calling Connection
271         that the response should not be generated and the connection should
272         close.
273     close_connection: signals the calling Connection that the request
274         should close. This does not imply an error! The client and/or
275         server may each request that the connection be closed.
276     chunked_write: if True, output will be encoded with the "chunked"
277         transfer-coding. This value is set automatically inside
278         send_headers.
279     """
280    
281     max_request_header_size = 0
282     max_request_body_size = 0
283    
284     def __init__(self, wfile, environ, wsgi_app):
285         self.rfile = environ['wsgi.input']
286         self.wfile = wfile
287         self.environ = environ.copy()
288         self.wsgi_app = wsgi_app
289        
290         self.ready = False
291         self.started_response = False
292         self.status = ""
293         self.outheaders = []
294         self.sent_headers = False
295         self.close_connection = False
296         self.chunked_write = False
297    
298     def parse_request(self):
299         """Parse the next HTTP request start-line and message-headers."""
300         self.rfile.maxlen = self.max_request_header_size
301         self.rfile.bytes_read = 0
302        
303         try:
304             self._parse_request()
305         except MaxSizeExceeded:
306             self.simple_response("413 Request Entity Too Large")
307             return
308    
309     def _parse_request(self):
310         # HTTP/1.1 connections are persistent by default. If a client
311         # requests a page, then idles (leaves the connection open),
312         # then rfile.readline() will raise socket.error("timed out").
313         # Note that it does this based on the value given to settimeout(),
314         # and doesn't need the client to request or acknowledge the close
315         # (although your TCP stack might suffer for it: cf Apache's history
316         # with FIN_WAIT_2).
317         request_line = self.rfile.readline()
318         if not request_line:
319             # Force self.ready = False so the connection will close.
320             self.ready = False
321             return
322        
323         if request_line == "\r\n":
324             # RFC 2616 sec 4.1: "...if the server is reading the protocol
325             # stream at the beginning of a message and receives a CRLF
326             # first, it should ignore the CRLF."
327             # But only ignore one leading line! else we enable a DoS.
328             request_line = self.rfile.readline()
329             if not request_line:
330                 self.ready = False
331                 return
332        
333         environ = self.environ
334        
335         method, path, req_protocol = request_line.strip().split(" ", 2)
336         environ["REQUEST_METHOD"] = method
337        
338         # path may be an abs_path (including "http://host.domain.tld");
339         scheme, location, path, params, qs, frag = urlparse(path)
340        
341         if frag:
342             self.simple_response("400 Bad Request",
343                                  "Illegal #fragment in Request-URI.")
344             return
345        
346         if scheme:
347             environ["wsgi.url_scheme"] = scheme
348         if params:
349             path = path + ";" + params
350        
351         environ["SCRIPT_NAME"] = ""
352        
353         # Unquote the path+params (e.g. "/this%20path" -> "this path").
354         # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2
355         #
356         # But note that "...a URI must be separated into its components
357         # before the escaped characters within those components can be
358         # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2
359         atoms = [unquote(x) for x in quoted_slash.split(path)]
360         path = "%2F".join(atoms)
361         environ["PATH_INFO"] = path
362        
363         # Note that, like wsgiref and most other WSGI servers,
364         # we unquote the path but not the query string.
365         environ["QUERY_STRING"] = qs
366        
367         # Compare request and server HTTP protocol versions, in case our
368         # server does not support the requested protocol. Limit our output
369         # to min(req, server). We want the following output:
370         #     request    server     actual written   supported response
371         #     protocol   protocol  response protocol    feature set
372         # a     1.0        1.0           1.0                1.0
373         # b     1.0        1.1           1.1                1.0
374         # c     1.1        1.0           1.0                1.0
375         # d     1.1        1.1           1.1                1.1
376         # Notice that, in (b), the response will be "HTTP/1.1" even though
377         # the client only understands 1.0. RFC 2616 10.5.6 says we should
378         # only return 505 if the _major_ version is different.
379         rp = int(req_protocol[5]), int(req_protocol[7])
380         server_protocol = environ["ACTUAL_SERVER_PROTOCOL"]
381         sp = int(server_protocol[5]), int(server_protocol[7])
382         if sp[0] != rp[0]:
383             self.simple_response("505 HTTP Version Not Supported")
384             return
385         # Bah. "SERVER_PROTOCOL" is actually the REQUEST protocol.
386         environ["SERVER_PROTOCOL"] = req_protocol
387         self.response_protocol = "HTTP/%s.%s" % min(rp, sp)
388        
389         # If the Request-URI was an absoluteURI, use its location atom.
390         if location:
391             environ["SERVER_NAME"] = location
392        
393         # then all the http headers
394         try:
395             self.read_headers()
396         except ValueError, ex:
397             self.simple_response("400 Bad Request", repr(ex.args))
398             return
399        
400         mrbs = self.max_request_body_size
401         if mrbs and int(environ.get("CONTENT_LENGTH", 0)) > mrbs:
402             self.simple_response("413 Request Entity Too Large")
403             return
404        
405         # Set AUTH_TYPE, REMOTE_USER
406         creds = environ.get("HTTP_AUTHORIZATION", "").split(" ", 1)
407         environ["AUTH_TYPE"] = creds[0]
408         if creds[0].lower() == 'basic':
409             user, pw = base64.decodestring(creds[1]).split(":", 1)
410             environ["REMOTE_USER"] = user
411        
412         # Persistent connection support
413         if self.response_protocol == "HTTP/1.1":
414             # Both server and client are HTTP/1.1
415             if environ.get("HTTP_CONNECTION", "") == "close":
416                 self.close_connection = True
417         else:
418             # Either the server or client (or both) are HTTP/1.0
419             if environ.get("HTTP_CONNECTION", "") != "Keep-Alive":
420                 self.close_connection = True
421        
422         # Transfer-Encoding support
423         te = None
424         if self.response_protocol == "HTTP/1.1":
425             te = environ.get("HTTP_TRANSFER_ENCODING")
426             if te:
427                 te = [x.strip().lower() for x in te.split(",") if x.strip()]
428        
429         self.chunked_read = False
430        
431         if te:
432             for enc in te:
433                 if enc == "chunked":
434                     self.chunked_read = True
435                 else:
436                     # Note that, even if we see "chunked", we must reject
437                     # if there is an extension we don't recognize.
438                     self.simple_response("501 Unimplemented")
439                     self.close_connection = True
440                     return
441        
442         # From PEP 333:
443         # "Servers and gateways that implement HTTP 1.1 must provide
444         # transparent support for HTTP 1.1's "expect/continue" mechanism.
445         # This may be done in any of several ways:
446         #   1. Respond to requests containing an Expect: 100-continue request
447         #      with an immediate "100 Continue" response, and proceed normally.
448         #   2. Proceed with the request normally, but provide the application
449         #      with a wsgi.input stream that will send the "100 Continue"
450         #      response if/when the application first attempts to read from
451         #      the input stream. The read request must then remain blocked
452         #      until the client responds.
453         #   3. Wait until the client decides that the server does not support
454         #      expect/continue, and sends the request body on its own.
455         #      (This is suboptimal, and is not recommended.)
456         #
457         # We used to do 3, but are now doing 1. Maybe we'll do 2 someday,
458         # but it seems like it would be a big slowdown for such a rare case.
459         if environ.get("HTTP_EXPECT", "") == "100-continue":
460             self.simple_response(100)
461        
462         self.ready = True
463    
464     def read_headers(self):
465         """Read header lines from the incoming stream."""
466         environ = self.environ
467        
468         while True:
469             line = self.rfile.readline()
470             if not line:
471                 # No more data--illegal end of headers
472                 raise ValueError("Illegal end of headers.")
473            
474             if line == '\r\n':
475                 # Normal end of headers
476                 break
477            
478             if line[0] in ' \t':
479                 # It's a continuation line.
480                 v = line.strip()
481             else:
482                 k, v = line.split(":", 1)
483                 k, v = k.strip().upper(), v.strip()
484                 envname = "HTTP_" + k.replace("-", "_")
485            
486             if k in comma_separated_headers:
487                 existing = environ.get(envname)
488                 if existing:
489                     v = ", ".join((existing, v))
490             environ[envname] = v
491        
492         ct = environ.pop("HTTP_CONTENT_TYPE", None)
493         if ct is not None:
494             environ["CONTENT_TYPE"] = ct
495         cl = environ.pop("HTTP_CONTENT_LENGTH", None)
496         if cl is not None:
497             environ["CONTENT_LENGTH"] = cl
498    
499     def decode_chunked(self):
500         """Decode the 'chunked' transfer coding."""
501         cl = 0
502         data = StringIO.StringIO()
503         while True:
504             line = self.rfile.readline().strip().split(";", 1)
505             chunk_size = int(line.pop(0), 16)
506             if chunk_size <= 0:
507                 break
508 ##            if line: chunk_extension = line[0]
509             cl += chunk_size
510             data.write(self.rfile.read(chunk_size))
511             crlf = self.rfile.read(2)
512             if crlf != "\r\n":
513                 self.simple_response("400 Bad Request",
514                                      "Bad chunked transfer coding "
515                                      "(expected '\\r\\n', got %r)" % crlf)
516                 return
517        
518         # Grab any trailer headers
519         self.read_headers()
520        
521         data.seek(0)
522         self.environ["wsgi.input"] = data
523         self.environ["CONTENT_LENGTH"] = str(cl) or ""
524         return True
525    
526     def respond(self):
527         """Call the appropriate WSGI app and write its iterable output."""
528         # Set rfile.maxlen to ensure we don't read past Content-Length.
529         # This will also be used to read the entire request body if errors
530         # are raised before the app can read the body.
531         if self.chunked_read:
532             # If chunked, Content-Length will be 0.
533             self.rfile.maxlen = self.max_request_body_size
534         else:
535             cl = int(self.environ.get("CONTENT_LENGTH", 0))
536             if self.max_request_body_size:
537                 self.rfile.maxlen = min(cl, self.max_request_body_size)
538             else:
539                 self.rfile.maxlen = cl
540         self.rfile.bytes_read = 0
541        
542         try:
543             self._respond()
544         except MaxSizeExceeded:
545             if not self.sent_headers:
546                 self.simple_response("413 Request Entity Too Large")
547             return
548 &nbs