Download Install Tutorial Docs FAQ Tools WikiLicense Team IRC Planet Involvement Shop Book

root/branches/cherrypy-3.0.x/cherrypy/_cprequest.py

Revision 1952 (checked in by fumanchu, 2 months ago)

Backport to 3.0.x of [1737] fix for #737.

  • Property svn:eol-style set to native
Line 
1
2 import Cookie
3 import os
4 import sys
5 import time
6 import types
7
8 import cherrypy
9 from cherrypy import _cpcgifs, _cpconfig
10 from cherrypy._cperror import format_exc, bare_error
11 from cherrypy.lib import http
12
13
14 class Hook(object):
15     """A callback and its metadata: failsafe, priority, and kwargs."""
16    
17     __metaclass__ = cherrypy._AttributeDocstrings
18    
19     callback = None
20     callback__doc = """
21     The bare callable that this Hook object is wrapping, which will
22     be called when the Hook is called."""
23    
24     failsafe = False
25     failsafe__doc = """
26     If True, the callback is guaranteed to run even if other callbacks
27     from the same call point raise exceptions."""
28    
29     priority = 50
30     priority__doc = """
31     Defines the order of execution for a list of Hooks. Priority numbers
32     should be limited to the closed interval [0, 100], but values outside
33     this range are acceptable, as are fractional values."""
34    
35     kwargs = {}
36     kwargs__doc = """
37     A set of keyword arguments that will be passed to the
38     callable on each call."""
39    
40     def __init__(self, callback, failsafe=None, priority=None, **kwargs):
41         self.callback = callback
42        
43         if failsafe is None:
44             failsafe = getattr(callback, "failsafe", False)
45         self.failsafe = failsafe
46        
47         if priority is None:
48             priority = getattr(callback, "priority", 50)
49         self.priority = priority
50        
51         self.kwargs = kwargs
52    
53     def __cmp__(self, other):
54         return cmp(self.priority, other.priority)
55    
56     def __call__(self):
57         """Run self.callback(**self.kwargs)."""
58         return self.callback(**self.kwargs)
59
60
61 class HookMap(dict):
62     """A map of call points to lists of callbacks (Hook objects)."""
63    
64     def __new__(cls, points=None):
65         d = dict.__new__(cls)
66         for p in points or []:
67             d[p] = []
68         return d
69    
70     def __init__(self, *a, **kw):
71         pass
72    
73     def attach(self, point, callback, failsafe=None, priority=None, **kwargs):
74         """Append a new Hook made from the supplied arguments."""
75         self[point].append(Hook(callback, failsafe, priority, **kwargs))
76    
77     def run(self, point):
78         """Execute all registered Hooks (callbacks) for the given point."""
79         exc = None
80         hooks = self[point]
81         hooks.sort()
82         for hook in hooks:
83             # Some hooks are guaranteed to run even if others at
84             # the same hookpoint fail. We will still log the failure,
85             # but proceed on to the next hook. The only way
86             # to stop all processing from one of these hooks is
87             # to raise SystemExit and stop the whole server.
88             if exc is None or hook.failsafe:
89                 try:
90                     hook()
91                 except (KeyboardInterrupt, SystemExit):
92                     raise
93                 except (cherrypy.HTTPError, cherrypy.HTTPRedirect,
94                         cherrypy.InternalRedirect):
95                     exc = sys.exc_info()[1]
96                 except:
97                     exc = sys.exc_info()[1]
98                     cherrypy.log(traceback=True)
99         if exc:
100             raise
101    
102     def __copy__(self):
103         newmap = self.__class__()
104         # We can't just use 'update' because we want copies of the
105         # mutable values (each is a list) as well.
106         for k, v in self.iteritems():
107             newmap[k] = v[:]
108         return newmap
109     copy = __copy__
110    
111     def __repr__(self):
112         cls = self.__class__
113         return "%s.%s(points=%r)" % (cls.__module__, cls.__name__, self.keys())
114
115
116 # Config namespace handlers
117
118 def hooks_namespace(k, v):
119     """Attach bare hooks declared in config."""
120     # Use split again to allow multiple hooks for a single
121     # hookpoint per path (e.g. "hooks.before_handler.1").
122     # Little-known fact you only get from reading source ;)
123     hookpoint = k.split(".", 1)[0]
124     if isinstance(v, basestring):
125         v = cherrypy.lib.attributes(v)
126     if not isinstance(v, Hook):
127         v = Hook(v)
128     cherrypy.request.hooks[hookpoint].append(v)
129
130 def request_namespace(k, v):
131     """Attach request attributes declared in config."""
132     setattr(cherrypy.request, k, v)
133
134 def response_namespace(k, v):
135     """Attach response attributes declared in config."""
136     setattr(cherrypy.response, k, v)
137
138 def error_page_namespace(k, v):
139     """Attach error pages declared in config."""
140     cherrypy.request.error_page[int(k)] = v
141
142
143 hookpoints = ['on_start_resource', 'before_request_body',
144               'before_handler', 'before_finalize',
145               'on_end_resource', 'on_end_request',
146               'before_error_response', 'after_error_response']
147
148
149 class Request(object):
150     """An HTTP request.
151     
152     This object represents the metadata of an HTTP request message;
153     that is, it contains attributes which describe the environment
154     in which the request URL, headers, and body were sent (if you
155     want tools to interpret the headers and body, those are elsewhere,
156     mostly in Tools). This 'metadata' consists of socket data,
157     transport characteristics, and the Request-Line. This object
158     also contains data regarding the configuration in effect for
159     the given URL, and the execution plan for generating a response.
160     """
161    
162     __metaclass__ = cherrypy._AttributeDocstrings
163    
164     prev = None
165     prev__doc = """
166     The previous Request object (if any). This should be None
167     unless we are processing an InternalRedirect."""
168    
169     # Conversation/connection attributes
170     local = http.Host("localhost", 80)
171     local__doc = \
172         "An http.Host(ip, port, hostname) object for the server socket."
173    
174     remote = http.Host("localhost", 1111)
175     remote__doc = \
176         "An http.Host(ip, port, hostname) object for the client socket."
177    
178     scheme = "http"
179     scheme__doc = """
180     The protocol used between client and server. In most cases,
181     this will be either 'http' or 'https'."""
182    
183     server_protocol = "HTTP/1.1"
184     server_protocol__doc = """
185     The HTTP version for which the HTTP server is at least
186     conditionally compliant."""
187    
188     base = ""
189     base__doc = """The (scheme://host) portion of the requested URL."""
190    
191     # Request-Line attributes
192     request_line = ""
193     request_line__doc = """
194     The complete Request-Line received from the client. This is a
195     single string consisting of the request method, URI, and protocol
196     version (joined by spaces). Any final CRLF is removed."""
197    
198     method = "GET"
199     method__doc = """
200     Indicates the HTTP method to be performed on the resource identified
201     by the Request-URI. Common methods include GET, HEAD, POST, PUT, and
202     DELETE. CherryPy allows any extension method; however, various HTTP
203     servers and gateways may restrict the set of allowable methods.
204     CherryPy applications SHOULD restrict the set (on a per-URI basis)."""
205    
206     query_string = ""
207     query_string__doc = """
208     The query component of the Request-URI, a string of information to be
209     interpreted by the resource. The query portion of a URI follows the
210     path component, and is separated by a '?'. For example, the URI
211     'http://www.cherrypy.org/wiki?a=3&b=4' has the query component,
212     'a=3&b=4'."""
213    
214     protocol = (1, 1)
215     protocol__doc = """The HTTP protocol version corresponding to the set
216         of features which should be allowed in the response. If BOTH
217         the client's request message AND the server's level of HTTP
218         compliance is HTTP/1.1, this attribute will be the tuple (1, 1).
219         If either is 1.0, this attribute will be the tuple (1, 0).
220         Lower HTTP protocol versions are not explicitly supported."""
221    
222     params = {}
223     params__doc = """
224     A dict which combines query string (GET) and request entity (POST)
225     variables. This is populated in two stages: GET params are added
226     before the 'on_start_resource' hook, and POST params are added
227     between the 'before_request_body' and 'before_handler' hooks."""
228    
229     # Message attributes
230     header_list = []
231     header_list__doc = """
232     A list of the HTTP request headers as (name, value) tuples.
233     In general, you should use request.headers (a dict) instead."""
234    
235     headers = http.HeaderMap()
236     headers__doc = """
237     A dict-like object containing the request headers. Keys are header
238     names (in Title-Case format); however, you may get and set them in
239     a case-insensitive manner. That is, headers['Content-Type'] and
240     headers['content-type'] refer to the same value. Values are header
241     values (decoded according to RFC 2047 if necessary). See also:
242     http.HeaderMap, http.HeaderElement."""
243    
244     cookie = Cookie.SimpleCookie()
245     cookie__doc = """See help(Cookie)."""
246    
247     rfile = None
248     rfile__doc = """
249     If the request included an entity (body), it will be available
250     as a stream in this attribute. However, the rfile will normally
251     be read for you between the 'before_request_body' hook and the
252     'before_handler' hook, and the resulting string is placed into
253     either request.params or the request.body attribute.
254     
255     You may disable the automatic consumption of the rfile by setting
256     request.process_request_body to False, either in config for the desired
257     path, or in an 'on_start_resource' or 'before_request_body' hook.
258     
259     WARNING: In almost every case, you should not attempt to read from the
260     rfile stream after CherryPy's automatic mechanism has read it. If you
261     turn off the automatic parsing of rfile, you should read exactly the
262     number of bytes specified in request.headers['Content-Length'].
263     Ignoring either of these warnings may result in a hung request thread
264     or in corruption of the next (pipelined) request.
265     """
266    
267     process_request_body = True
268     process_request_body__doc = """
269     If True, the rfile (if any) is automatically read and parsed,
270     and the result placed into request.params or request.body."""
271    
272     methods_with_bodies = ("POST", "PUT")
273     methods_with_bodies__doc = """
274     A sequence of HTTP methods for which CherryPy will automatically
275     attempt to read a body from the rfile."""
276    
277     body = None
278     body__doc = """
279     If the request Content-Type is 'application/x-www-form-urlencoded'
280     or multipart, this will be None. Otherwise, this will contain the
281     request entity body as a string; this value is set between the
282     'before_request_body' and 'before_handler' hooks (assuming that
283     process_request_body is True)."""
284    
285     # Dispatch attributes
286     dispatch = cherrypy.dispatch.Dispatcher()
287     dispatch__doc = """
288     The object which looks up the 'page handler' callable and collects
289     config for the current request based on the path_info, other
290     request attributes, and the application architecture. The core
291     calls the dispatcher as early as possible, passing it a 'path_info'
292     argument.
293     
294     The default dispatcher discovers the page handler by matching path_info
295     to a hierarchical arrangement of objects, starting at request.app.root.
296     See help(cherrypy.dispatch) for more information."""
297    
298     script_name = ""
299     script_name__doc = """
300     The 'mount point' of the application which is handling this request."""
301    
302     path_info = "/"
303     path_info__doc = """
304     The 'relative path' portion of the Request-URI. This is relative
305     to the script_name ('mount point') of the application which is
306     handling this request."""
307
308     login = None
309     login__doc = """
310     When authentication is used during the request processing this is
311     set to 'False' if it failed and to the 'username' value if it succeeded.
312     The default 'None' implies that no authentication happened."""
313    
314     app = None
315     app__doc = \
316         """The cherrypy.Application object which is handling this request."""
317    
318     handler = None
319     handler__doc = """
320     The function, method, or other callable which CherryPy will call to
321     produce the response. The discovery of the handler and the arguments
322     it will receive are determined by the request.dispatch object.
323     By default, the handler is discovered by walking a tree of objects
324     starting at request.app.root, and is then passed all HTTP params
325     (from the query string and POST body) as keyword arguments."""
326    
327     toolmaps = {}
328     toolmaps__doc = """
329     A nested dict of all Toolboxes and Tools in effect for this request,
330     of the form: {Toolbox.namespace: {Tool.name: config dict}}."""
331    
332     config = None
333     config__doc = """
334     A flat dict of all configuration entries which apply to the
335     current request. These entries are collected from global config,
336     application config (based on request.path_info), and from handler
337     config (exactly how is governed by the request.dispatch object in
338     effect for this request; by default, handler config can be attached
339     anywhere in the tree between request.app.root and the final handler,
340     and inherits downward)."""
341    
342     is_index = None
343     is_index__doc = """
344     This will be True if the current request is mapped to an 'index'
345     resource handler (also, a 'default' handler if path_info ends with
346     a slash). The value may be used to automatically redirect the
347     user-agent to a 'more canonical' URL which either adds or removes
348     the trailing slash. See cherrypy.tools.trailing_slash."""
349    
350     hooks = HookMap(hookpoints)
351     hooks__doc = """
352     A HookMap (dict-like object) of the form: {hookpoint: [hook, ...]}.
353     Each key is a str naming the hook point, and each value is a list
354     of hooks which will be called at that hook point during this request.
355     The list of hooks is generally populated as early as possible (mostly
356     from Tools specified in config), but may be extended at any time.
357     See also: _cprequest.Hook, _cprequest.HookMap, and cherrypy.tools."""
358    
359     error_response = cherrypy.HTTPError(500).set_response
360     error_response__doc = """
361     The no-arg callable which will handle unexpected, untrapped errors
362     during request processing. This is not used for expected exceptions
363     (like NotFound, HTTPError, or HTTPRedirect) which are raised in
364     response to expected conditions (those should be customized either
365     via request.error_page or by overriding HTTPError.set_response).
366     By default, error_response uses HTTPError(500) to return a generic
367     error response to the user-agent."""
368    
369     error_page = {}
370     error_page__doc = """
371     A dict of {error code: response filename} pairs. The named response
372     files should be Python string-formatting templates, and can expect by
373     default to receive the format values with the mapping keys 'status',
374     'message', 'traceback', and 'version'. The set of format mappings
375     can be extended by overriding HTTPError.set_response."""
376    
377     show_tracebacks = True
378     show_tracebacks__doc = """
379     If True, unexpected errors encountered during request processing will
380     include a traceback in the response body."""
381    
382     throws = (KeyboardInterrupt, SystemExit, cherrypy.InternalRedirect)
383     throws__doc = \
384         """The sequence of exceptions which Request.run does not trap."""
385    
386     throw_errors = False
387     throw_errors__doc = """
388     If True, Request.run will not trap any errors (except HTTPRedirect and
389     HTTPError, which are more properly called 'exceptions', not errors)."""
390    
391     namespaces = _cpconfig.NamespaceSet(
392         **{"hooks": hooks_namespace,
393            "request": request_namespace,
394            "response": response_namespace,
395            "error_page": error_page_namespace,
396            # "tools": See _cptools.Toolbox
397            })
398    
399     def __init__(self, local_host, remote_host, scheme="http",
400                  server_protocol="HTTP/1.1"):
401         """Populate a new Request object.
402         
403         local_host should be an http.Host object with the server info.
404         remote_host should be an http.Host object with the client info.
405         scheme should be a string, either "http" or "https".
406         """
407         self.local = local_host
408         self.remote = remote_host
409         self.scheme = scheme
410         self.server_protocol = server_protocol
411        
412         self.closed = False
413        
414         # Put a *copy* of the class error_page into self.
415         self.error_page = self.error_page.copy()
416        
417         # Put a *copy* of the class namespaces into self.
418         self.namespaces = self.namespaces.copy()
419    
420     def close(self):
421         """Run cleanup code and remove self from globals. (Core)"""
422         if not self.closed:
423             self.closed = True
424             self.hooks.run('on_end_request')
425    
426     def run(self, method, path, query_string, req_protocol, headers, rfile):
427         """Process the Request. (Core)
428         
429         method, path, query_string, and req_protocol should be pulled directly
430             from the Request-Line (e.g. "GET /path?key=val HTTP/1.0").
431         path should be %XX-unquoted, but query_string should not be.
432         headers should be a list of (name, value) tuples.
433         rfile should be a file-like object containing the HTTP request entity.
434         
435         When run() is done, the returned object should have 3 attributes:
436           status, e.g. "200 OK"
437           header_list, a list of (name, value) tuples
438           body, an iterable yielding strings
439         
440         Consumer code (HTTP servers) should then access these response
441         attributes to build the outbound stream.
442         
443         """
444        
445         try:
446             self.error_response = cherrypy.HTTPError(500).set_response
447            
448             self.method = method
449             path = path or "/"
450             self.query_string = query_string or ''
451            
452             # Compare request and server HTTP protocol versions, in case our
453             # server does not support the requested protocol. Limit our output
454             # to min(req, server). We want the following output:
455             #     request    server     actual written   supported response
456             #     protocol   protocol  response protocol    feature set
457             # a     1.0        1.0           1.0                1.0
458             # b     1.0        1.1           1.1                1.0
459             # c     1.1        1.0           1.0                1.0
460             # d     1.1        1.1           1.1                1.1
461             # Notice that, in (b), the response will be "HTTP/1.1" even though
462             # the client only understands 1.0. RFC 2616 10.5.6 says we should
463             # only return 505 if the _major_ version is different.
464             rp = int(req_protocol[5]), int(req_protocol[7])
465             sp = int(self.server_protocol[5]), int(self.server_protocol[7])
466             self.protocol = min(rp, sp)
467            
468             # Rebuild first line of the request (e.g. "GET /path HTTP/1.0").
469             url = path
470             if query_string:
471                 url += '?' + query_string
472             self.request_line = '%s %s %s' % (method, url, req_protocol)
473            
474             self.header_list = list(headers)
475             self.rfile = rfile
476             self.headers = http.HeaderMap()
477             self.cookie = Cookie.SimpleCookie()
478             self.handler = None
479            
480             # path_info should be the path from the
481             # app root (script_name) to the handler.
482             self.script_name = self.app.script_name
483             self.path_info = pi = path[len(self.script_name.rstrip("/")):]
484            
485             self.respond(pi)
486            
487         except self.throws:
488             raise
489         except:
490             if self.throw_errors:
491                 raise
492             else:
493                 # Failure in setup, error handler or finalize. Bypass them.
494                 # Can't use handle_error because we may not have hooks yet.
495                 cherrypy.log(traceback=True)
496                 if self.show_tracebacks:
497                     body = format_exc()
498                 else:
499                     body = ""
500                 r = bare_error(body)
501                 response = cherrypy.response
502                 response.status, response.header_list, response.body = r
503        
504         if self.method == "HEAD":
505             # HEAD requests MUST NOT return a message-body in the response.
506             cherrypy.response.body = []
507        
508         cherrypy.log.access()
509        
510         if cherrypy.response.timed_out:
511             raise cherrypy.TimeoutError()
512        
513         return cherrypy.response
514    
515     def respond(self, path_info):
516         """Generate a response for the resource at self.path_info. (Core)"""
517         try:
518             try:
519                 try:
520                     if self.app is None:
521                         raise cherrypy.NotFound()
522                    
523                     # Get the 'Host' header, so we can HTTPRedirect properly.
524                     self.process_headers()
525                    
526                     # Make a copy of the class hooks
527                     self.hooks = self.__class__.hooks.copy()
528                     self.toolmaps = {}
529                     self.get_resource(path_info)
530                     self.namespaces(self.config)
531                    
532                     self.hooks.run('on_start_resource')
533                    
534                     if self.process_request_body:
535                         if self.method not in self.methods_with_bodies:
536                             self.process_request_body = False
537                        
538                         if self.process_request_body:
539                             # Prepare the SizeCheckWrapper for the req body
540                             mbs = getattr(cherrypy.server,
541                                           "max_request_body_size", 0)
542                             if mbs > 0:
543                                 self.rfile = http.SizeCheckWrapper(self.rfile, mbs)
544                    
545                     self.hooks.run('before_request_body')
546                     if self.process_request_body:
547                         self.process_body()
548                    
549                     self.hooks.run('before_handler')
550                     if self.handler:
551                         cherrypy.response.body = self.handler()
552                     self.hooks.run('before_finalize')
553                     cherrypy.response.finalize()
554                 except (cherrypy.HTTPRedirect, cherrypy.HTTPError), inst:
555                     inst.set_response()
556                     self.hooks.run('before_finalize')
557                     cherrypy.response.finalize()
558             finally:
559                 self.hooks.run('on_end_resource')
560         except self.throws:
561             raise
562         except:
563             if self.throw_errors:
564                 raise
565             self.handle_error(sys.exc_info())
566    
567     def process_headers(self):
568         """Parse HTTP header data into Python structures. (Core)"""
569         self.params = http.parse_query_string(self.query_string)
570        
571         # Process the headers into self.headers
572         headers = self.headers
573         for name, value in self.header_list:
574             # Call title() now (and use dict.__method__(headers))
575             # so title doesn't have to be called twice.
576             name = name.title()
577             value = value.strip()
578            
579             # Warning: if there is more than one header entry for cookies (AFAIK,
580             # only Konqueror does that), only the last one will remain in headers