Download Install Tutorial Docs FAQ Tools WikiLicense Team IRC Planet Involvement Shop Book

root/branches/cherrypy-2.x/cherrypy/lib/httptools.py

Revision 2020 (checked in by nick125, 3 months ago)

Fix a small issue in r2019 so we explicitly state we want to split by spaces (" ") rather than use split()'s default splitting algorithm (which splits on spaces, newlines, etc).

  • Property svn:eol-style set to native
Line 
1 """HTTP library functions and tools."""
2
3 # This module contains functions and tools for building an HTTP application
4 # framework: any one, not just one whose name starts with "Ch". ;) If you
5 # reference any modules from some popular framework inside *this* module,
6 # FuManChu will personally hang you up by your thumbs and submit you
7 # to a public caning.
8
9 from BaseHTTPServer import BaseHTTPRequestHandler
10 responseCodes = BaseHTTPRequestHandler.responses.copy()
11
12 # From http://www.cherrypy.org/ticket/361
13 responseCodes[500] = ('Internal error',
14                       'The server encountered an unexpected condition '
15                       'which prevented it from fulfilling the request.')
16
17
18 import cgi
19 from email.Header import Header, decode_header
20 import re
21 import time
22 import urllib
23 from urlparse import urlparse
24
25
26 def urljoin(*atoms):
27     url = "/".join(atoms)
28     while "//" in url:
29         url = url.replace("//", "/")
30     return url
31
32
33 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
34 monthname = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
35                    'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
36
37 def HTTPDate(dt=None):
38     """Return the given time.struct_time as a string in RFC 1123 format.
39     
40     If no arguments are provided, the current time (as determined by
41     time.gmtime() is used).
42     
43     RFC 2616: "[Concerning RFC 1123, RFC 850, asctime date formats]...
44     HTTP/1.1 clients and servers that parse the date value MUST
45     accept all three formats (for compatibility with HTTP/1.0),
46     though they MUST only generate the RFC 1123 format for
47     representing HTTP-date values in header fields."
48     
49     RFC 1945 (HTTP/1.0) requires the same.
50     
51     """
52    
53     if dt is None:
54         dt = time.gmtime()
55    
56     year, month, day, hh, mm, ss, wd, y, z = dt
57     # Is "%a, %d %b %Y %H:%M:%S GMT" better or worse?
58     return ("%s, %02d %3s %4d %02d:%02d:%02d GMT" %
59             (weekdayname[wd], day, monthname[month], year, hh, mm, ss))
60
61
62 class Version(object):
63    
64     """A version, such as "2.1 beta 3", which can be compared atom-by-atom.
65     
66     If a string is provided to the constructor, it will be split on word
67     boundaries; that is, "1.4.13 beta 9" -> ["1", "4", "13", "beta", "9"].
68     
69     Comparisons are performed atom-by-atom, numerically if both atoms are
70     numeric. Therefore, "2.12" is greater than "2.4", and "3.0 beta" is
71     greater than "3.0 alpha" (only because "b" > "a"). If an atom is
72     provided in one Version and not another, the longer Version is
73     greater than the shorter, that is: "4.8 alpha" > "4.8".
74     """
75    
76     def __init__(self, atoms):
77         """A Version object.
78         
79         atoms: if a str, it will be split on word boundaries;
80                if a float or int, it will be split at the decimal point.
81         """
82         if isinstance(atoms, (int, float)):
83             atoms = str(atoms)
84         if isinstance(atoms, basestring):
85             self.atoms = re.split(r'\W', atoms)
86         else:
87             self.atoms = [str(x) for x in atoms]
88    
89     def from_http(cls, version_str):
90         """Return a Version object from the given 'HTTP/x.y' string."""
91         return cls(version_str[5:])
92     from_http = classmethod(from_http)
93    
94     def to_http(self):
95         """Return a 'HTTP/x.y' string for this Version object."""
96         return "HTTP/%s.%s" % tuple(self.atoms[:2])
97    
98     def __str__(self):
99         return ".".join([str(x) for x in self.atoms])
100    
101     def __cmp__(self, other):
102         cls = self.__class__
103         if not isinstance(other, cls):
104             # Try to coerce other to a Version instance.
105             other = cls(other)
106        
107         index = 0
108         while index < len(self.atoms) and index < len(other.atoms):
109             mine, theirs = self.atoms[index], other.atoms[index]
110             if mine.isdigit() and theirs.isdigit():
111                 mine, theirs = int(mine), int(theirs)
112             if mine < theirs:
113                 return -1
114             if mine > theirs:
115                 return 1
116             index += 1
117         if index < len(other.atoms):
118             return -1
119         if index < len(self.atoms):
120             return 1
121         return 0
122
123
124 def getRanges(headervalue, content_length):
125     """Return a list of (start, stop) indices from a Range header, or None.
126     
127     Each (start, stop) tuple will be composed of two ints, which are suitable
128     for use in a slicing operation. That is, the header "Range: bytes=3-6",
129     if applied against a Python string, is requesting resource[3:7]. This
130     function will return the list [(3, 7)].
131     """
132    
133     if not headervalue:
134         return None
135    
136     result = []
137     bytesunit, byteranges = headervalue.split("=", 1)
138     for brange in byteranges.split(","):
139         start, stop = [x.strip() for x in brange.split("-", 1)]
140         if start:
141             if not stop:
142                 stop = content_length - 1
143             start, stop = map(int, (start, stop))
144             if start >= content_length:
145                 # From rfc 2616 sec 14.16:
146                 # "If the server receives a request (other than one
147                 # including an If-Range request-header field) with an
148                 # unsatisfiable Range request-header field (that is,
149                 # all of whose byte-range-spec values have a first-byte-pos
150                 # value greater than the current length of the selected
151                 # resource), it SHOULD return a response code of 416
152                 # (Requested range not satisfiable)."
153                 continue
154             if stop < start:
155                 # From rfc 2616 sec 14.16:
156                 # "If the server ignores a byte-range-spec because it
157                 # is syntactically invalid, the server SHOULD treat
158                 # the request as if the invalid Range header field
159                 # did not exist. (Normally, this means return a 200
160                 # response containing the full entity)."
161                 return None
162             result.append((start, stop + 1))
163         else:
164             if not stop:
165                 # See rfc quote above.
166                 return None
167             # Negative subscript (last N bytes)
168             result.append((content_length - int(stop), content_length))
169    
170     return result
171
172
173 class HeaderElement(object):
174     """An element (with parameters) from an HTTP header's element list."""
175    
176     def __init__(self, value, params=None):
177         self.value = value
178         if params is None:
179             params = {}
180         self.params = params
181    
182     def __str__(self):
183         p = [";%s=%s" % (k, v) for k, v in self.params.iteritems()]
184         return "%s%s" % (self.value, "".join(p))
185    
186     def parse(elementstr):
187         """Transform 'token;key=val' to ('token', {'key': 'val'})."""
188         # Split the element into a value and parameters. The 'value' may
189         # be of the form, "token=token", but we don't split that here.
190         atoms = [x.strip() for x in elementstr.split(";")]
191         initial_value = atoms.pop(0).strip()
192         params = {}
193         for atom in atoms:
194             atom = [x.strip() for x in atom.split("=", 1) if x.strip()]
195             key = atom.pop(0)
196             if atom:
197                 val = atom[0]
198             else:
199                 val = ""
200             params[key] = val
201         return initial_value, params
202     parse = staticmethod(parse)
203    
204     def from_str(cls, elementstr):
205         """Construct an instance from a string of the form 'token;key=val'."""
206         ival, params = cls.parse(elementstr)
207         return cls(ival, params)
208     from_str = classmethod(from_str)
209
210
211 class AcceptElement(HeaderElement):
212     """An element (with parameters) from an Accept-* header's element list."""
213    
214     def from_str(cls, elementstr):
215         qvalue = None
216         # The first "q" parameter (if any) separates the initial
217         # parameter(s) (if any) from the accept-params.
218         atoms = re.split(r'; *q *=', elementstr, 1)
219         initial_value = atoms.pop(0).strip()
220         if atoms:
221             # The qvalue for an Accept header can have extensions. The other
222             # headers cannot, but it's easier to parse them as if they did.
223             qvalue = HeaderElement.from_str(atoms[0].strip())
224        
225         ival, params = cls.parse(initial_value)
226         if qvalue is not None:
227             params["q"] = qvalue
228         return cls(ival, params)
229     from_str = classmethod(from_str)
230    
231     def qvalue(self):
232         val = self.params.get("q", "1")
233         if isinstance(val, HeaderElement):
234             val = val.value
235         return float(val)
236     qvalue = property(qvalue, doc="The qvalue, or priority, of this value.")
237    
238     def __cmp__(self, other):
239         # If you sort a list of AcceptElement objects, they will be listed
240         # in priority order; the most preferred value will be first.
241         diff = cmp(other.qvalue, self.qvalue)
242         if diff == 0:
243             diff = cmp(str(other), str(self))
244         return diff
245
246
247 def header_elements(fieldname, fieldvalue):
248     """Return a HeaderElement list from a comma-separated header str."""
249    
250     if not fieldvalue:
251         return None
252     headername = fieldname.lower()
253    
254     result = []
255     for element in fieldvalue.split(","):
256         if headername.startswith("accept") or headername == 'te':
257             hv = AcceptElement.from_str(element)
258         else:
259             hv = HeaderElement.from_str(element)
260         result.append(hv)
261    
262     result.sort()
263     return result
264
265 def decode_TEXT(value):
266     """Decode RFC-2047 TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> u"f\xfcr")."""
267     atoms = decode_header(value)
268     decodedvalue = ""
269     for atom, charset in atoms:
270         if charset is not None:
271             atom = atom.decode(charset)
272         decodedvalue += atom
273     return decodedvalue
274
275 def validStatus(status):
276     """Return legal HTTP status Code, Reason-phrase and Message.
277     
278     The status arg must be an int, or a str that begins with an int.
279     
280     If status is an int, or a str and  no reason-phrase is supplied,
281     a default reason-phrase will be provided.
282     """
283    
284     if not status:
285         status = 200
286    
287     status = str(status)
288     parts = status.split(" ", 1)
289     if len(parts) == 1:
290         # No reason supplied.
291         code, = parts
292         reason = None
293     else:
294         code, reason = parts
295         reason = reason.strip()
296    
297     try:
298         code = int(code)
299     except ValueError:
300         raise ValueError("Illegal response status from server "
301                          "(%s is non-numeric)." % repr(code))
302    
303     if code < 100 or code > 599:
304         raise ValueError("Illegal response status from server "
305                          "(%s is out of range)." % repr(code))
306    
307     if code not in responseCodes:
308         # code is unknown but not illegal
309         defaultReason, message = "", ""
310     else:
311         defaultReason, message = responseCodes[code]
312    
313     if reason is None:
314         reason = defaultReason
315    
316     return code, reason, message
317
318 def parseRequestLine(requestLine):
319     """Return (method, path, querystring, protocol) from a requestLine."""
320     splitLine = requestLine.split(" ")
321     method, protocol = splitLine[0], splitLine[-1]
322     path = " ".join(splitLine[1:-1])
323
324     # path may be an abs_path (including "http://host.domain.tld");
325     # Ignore scheme, location, and fragments (so config lookups work).
326     # [Therefore, this assumes all hosts are valid for this server.]
327     scheme, location, path, params, qs, frag = urlparse(path)
328     if path == "*":
329         # "...the request does not apply to a particular resource,
330         # but to the server itself". See
331         # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2
332         pass
333     else:
334         if params:
335             params = ";" + params
336         path = path + params
337        
338         # Unquote the path (e.g. "/this%20path" -> "this path").
339         # http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5.1.2
340         #
341         # But note that "...a URI must be separated into its components
342         # before the escaped characters within those components can be
343         # safely decoded." http://www.ietf.org/rfc/rfc2396.txt, sec 2.4.2
344         #
345         # Note also that cgi.parse_qs will decode the querystring for us.
346         atoms = [urllib.unquote(x) for x in re.split("(?i)%2F", path)]
347         path = "%2F".join(atoms)
348    
349     return method, path, qs, protocol
350
351 def parseQueryString(query_string, keep_blank_values=True):
352     """Build a paramMap dictionary from a query_string."""
353     if re.match(r"[0-9]+,[0-9]+", query_string):
354         # Server-side image map. Map the coords to 'x' and 'y'
355         # (like CGI::Request does).
356         pm = query_string.split(",")
357         pm = {'x': int(pm[0]), 'y': int(pm[1])}
358     else:
359         pm = cgi.parse_qs(query_string, keep_blank_values)
360         for key, val in pm.items():
361             if len(val) == 1:
362                 pm[key] = val[0]
363     return pm
364
365 def paramsFromCGIForm(form):
366     paramMap = {}
367     for key in form.keys():
368         valueList = form[key]
369         if isinstance(valueList, list):
370             paramMap[key] = []
371             for item in valueList:
372                 if item.filename is not None:
373                     value = item # It's a file upload
374                 else:
375                     value = item.value # It's a regular field
376                 paramMap[key].append(value)
377         else:
378             if valueList.filename is not None:
379                 value = valueList # It's a file upload
380             else:
381                 value = valueList.value # It's a regular field
382             paramMap[key] = value
383     return paramMap
384
385
386 class CaseInsensitiveDict(dict):
387     """A case-insensitive dict subclass.
388     
389     Each key is changed on entry to str(key).title().
390     """
391    
392     def __getitem__(self, key):
393         return dict.__getitem__(self, str(key).title())
394    
395     def __setitem__(self, key, value):
396         dict.__setitem__(self, str(key).title(), value)
397    
398     def __delitem__(self, key):
399         dict.__delitem__(self, str(key).title())
400    
401     def __contains__(self, key):
402         return dict.__contains__(self, str(key).title())
403    
404     def get(self, key, default=None):
405         return dict.get(self, str(key).title(), default)
406    
407     def has_key(self, key):
408         return dict.has_key(self, str(key).title())
409    
410     def update(self, E):
411         for k in E.keys():
412             self[str(k).title()] = E[k]
413    
414     def fromkeys(cls, seq, value=None):
415         newdict = cls()
416         for k in seq:
417             newdict[str(k).title()] = value
418         return newdict
419     fromkeys = classmethod(fromkeys)
420    
421     def setdefault(self, key, x=None):
422         key = str(key).title()
423         try:
424             return self[key]
425         except KeyError:
426             self[key] = x
427             return x
428    
429     def pop(self, key, default):
430         return dict.pop(self, str(key).title(), default)
431
432
433 class HeaderMap(CaseInsensitiveDict):
434     """A dict subclass for HTTP request and response headers.
435     
436     Each key is changed on entry to str(key).title(). This allows headers
437     to be case-insensitive and avoid duplicates.
438     """
439    
440     def elements(self, key):
441         """Return a list of HeaderElements for the given header (or None)."""
442         key = str(key).title()
443         h = self.get(key)
444         if h is None:
445             return []
446         return header_elements(key, h)
447    
448     general_fields = ["Cache-Control", "Connection", "Date", "Pragma",
449                       "Trailer", "Transfer-Encoding", "Upgrade", "Via",
450                       "Warning"]
451     response_fields = ["Accept-Ranges", "Age", "ETag", "Location",
452                        "Proxy-Authenticate", "Retry-After", "Server",
453                        "Vary", "WWW-Authenticate"]
454     entity_fields = ["Allow", "Content-Encoding", "Content-Language",
455                      "Content-Length", "Content-Location", "Content-MD5",
456                      "Content-Range", "Content-Type", "Expires",
457                      "Last-Modified"]
458    
459     order_map = {}
460     for _ in general_fields:
461         order_map[_] = 0
462     for _ in response_fields:
463         order_map[_] = 1
464     for _ in entity_fields:
465         order_map[_] = 2
466    
467     def sorted_list(self, protocol=(1, 0)):
468         """Transform self into a sorted list of (name, value) tuples.
469         
470         From http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2
471             '... it is "good practice" to send general-header fields first,
472             followed by request-header or response-header fields, and ending
473             with the entity-header fields.'
474         """
475        
476         header_list = []
477         for key, valueList in self.iteritems():
478             order = self.order_map.get(key, 3)
479             if not isinstance(valueList, list):
480                 valueList = [valueList]
481             for v in valueList:
482                
483                 if isinstance(v, unicode):
484                     # HTTP/1.0 says, "Words of *TEXT may contain octets
485                     # from character sets other than US-ASCII." and
486                     # "Recipients of header field TEXT containing octets
487                     # outside the US-ASCII character set may assume that
488                     # they represent ISO-8859-1 characters."
489                     try:
490                         v = v.encode("iso-8859-1")
491                     except UnicodeEncodeError:
492                         if protocol >= (1, 1):
493                             # Encode RFC-2047 TEXT
494                             # (e.g. u"\u8200" -> "=?utf-8?b?6IiA?=").
495                             v = Header(v, 'utf-8').encode()
496                         else:
497                             raise
498                 else:
499                     # This coercion should not take any time a