Download Install Tutorial Docs FAQ Tools WikiLicense Team IRC Planet Involvement Shop Book

root/trunk/cherrypy/lib/encoding.py

Revision 1993 (checked in by fumanchu, 4 months ago)

Fix for #832 (Failure in test_encoding). I'm reasonably sure this is what we want.

  • Property svn:eol-style set to native
Line 
1 import struct
2 import time
3
4 import cherrypy
5
6
7 def decode(encoding=None, default_encoding='utf-8'):
8     """Decode cherrypy.request.params from str to unicode objects."""
9     if not encoding:
10         ct = cherrypy.request.headers.elements("Content-Type")
11         if ct:
12             ct = ct[0]
13             encoding = ct.params.get("charset", None)
14             if (not encoding) and ct.value.lower().startswith("text/"):
15                 # http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1
16                 # When no explicit charset parameter is provided by the
17                 # sender, media subtypes of the "text" type are defined
18                 # to have a default charset value of "ISO-8859-1" when
19                 # received via HTTP.
20                 encoding = "ISO-8859-1"
21        
22         if not encoding:
23             encoding = default_encoding
24    
25     try:
26         decode_params(encoding)
27     except UnicodeDecodeError:
28         # IE and Firefox don't supply a charset when submitting form
29         # params with a CT of application/x-www-form-urlencoded.
30         # So after all our guessing, it could *still* be wrong.
31         # Start over with ISO-8859-1, since that seems to be preferred.
32         decode_params("ISO-8859-1")
33
34 def decode_params(encoding):
35     decoded_params = {}
36     for key, value in cherrypy.request.params.items():
37         if not hasattr(value, 'file'):
38             # Skip the value if it is an uploaded file
39             if isinstance(value, list):
40                 # value is a list: decode each element
41                 value = [v.decode(encoding) for v in value]
42             elif isinstance(value, str):
43                 # value is a regular string: decode it
44                 value = value.decode(encoding)
45         decoded_params[key] = value
46    
47     # Decode all or nothing, so we can try again on error.
48     cherrypy.request.params = decoded_params
49
50
51 # Encoding
52
53 def encode(encoding=None, errors='strict', text_only=True, add_charset=True):
54     # Guard against running twice
55     if getattr(cherrypy.request, "_encoding_attempted", False):
56         return
57     cherrypy.request._encoding_attempted = True
58    
59     ct = cherrypy.response.headers.elements("Content-Type")
60     if ct:
61         ct = ct[0]
62         if (not text_only) or ct.value.lower().startswith("text/"):
63             # Set "charset=..." param on response Content-Type header
64             ct.params['charset'] = find_acceptable_charset(encoding, errors=errors)
65             if add_charset:
66                 cherrypy.response.headers["Content-Type"] = str(ct)
67
68 def encode_stream(encoding, errors='strict'):
69     """Encode a streaming response body.
70     
71     Use a generator wrapper, and just pray it works as the stream is
72     being written out.
73     """
74     def encoder(body):
75         for chunk in body:
76             if isinstance(chunk, unicode):
77                 chunk = chunk.encode(encoding, errors)
78             yield chunk
79     cherrypy.response.body = encoder(cherrypy.response.body)
80     return True
81
82 def encode_string(encoding, errors='strict'):
83     """Encode a buffered response body."""
84     try:
85         body = []
86         for chunk in cherrypy.response.body:
87             if isinstance(chunk, unicode):
88                 chunk = chunk.encode(encoding, errors)
89             body.append(chunk)
90         cherrypy.response.body = body
91     except (LookupError, UnicodeError):
92         return False
93     else:
94         return True
95
96 def find_acceptable_charset(encoding=None, default_encoding='utf-8', errors='strict'):
97     response = cherrypy.response
98    
99     if cherrypy.response.stream:
100         encoder = encode_stream
101     else:
102         response.collapse_body()
103         encoder = encode_string
104         if response.headers.has_key("Content-Length"):
105             # Delete Content-Length header so finalize() recalcs it.
106             # Encoded strings may be of different lengths from their
107             # unicode equivalents, and even from each other. For example:
108             # >>> t = u"\u7007\u3040"
109             # >>> len(t)
110             # 2
111             # >>> len(t.encode("UTF-8"))
112             # 6
113             # >>> len(t.encode("utf7"))
114             # 8
115             del response.headers["Content-Length"]
116    
117     # Parse the Accept-Charset request header, and try to provide one
118     # of the requested charsets (in order of user preference).
119     encs = cherrypy.request.headers.elements('Accept-Charset')
120     charsets = [enc.value.lower() for enc in encs]
121     attempted_charsets = []
122    
123     if encoding is not None:
124         # If specified, force this encoding to be used, or fail.
125         encoding = encoding.lower()
126         if (not charsets) or "*" in charsets or encoding in charsets:
127             if encoder(encoding, errors):
128                 return encoding
129     else:
130         if not encs:
131             # Any character-set is acceptable.
132             if encoder(default_encoding, errors):
133                 return default_encoding
134             else:
135                 raise cherrypy.HTTPError(500, failmsg % default_encoding)
136         else:
137             if "*" not in charsets:
138                 # If no "*" is present in an Accept-Charset field, then all
139                 # character sets not explicitly mentioned get a quality
140                 # value of 0, except for ISO-8859-1, which gets a quality
141                 # value of 1 if not explicitly mentioned.
142                 iso = 'iso-8859-1'
143                 if iso not in charsets:
144                     attempted_charsets.append(iso)
145                     if encoder(iso, errors):
146                         return iso
147            
148             for element in encs:
149                 if element.qvalue > 0:
150                     if element.value == "*":
151                         # Matches any charset. Try our default.
152                         if default_encoding not in attempted_charsets:
153                             attempted_charsets.append(default_encoding)
154                             if encoder(default_encoding, errors):
155                                 return default_encoding
156                     else:
157                         encoding = element.value
158                         if encoding not in attempted_charsets:
159                             attempted_charsets.append(encoding)
160                             if encoder(encoding, errors):
161                                 return encoding
162    
163     # No suitable encoding found.
164     ac = cherrypy.request.headers.get('Accept-Charset')
165     if ac is None:
166         msg = "Your client did not send an Accept-Charset header."
167     else:
168         msg = "Your client sent this Accept-Charset header: %s." % ac
169     msg += " We tried these charsets: %s." % ", ".join(attempted_charsets)
170     raise cherrypy.HTTPError(406, msg)
171
172
173 # GZIP
174
175 def compress(body, compress_level):
176     """Compress 'body' at the given compress_level."""
177     import zlib
178    
179     yield '\037\213'      # magic header
180     yield '\010'         # compression method
181     yield '\0'
182     yield struct.pack("<L", long(time.time()))
183     yield '\002'
184     yield '\377'
185    
186     crc = zlib.crc32("")
187     size = 0
188     zobj = zlib.compressobj(compress_level,
189                             zlib.DEFLATED, -zlib.MAX_WBITS,
190                             zlib.DEF_MEM_LEVEL, 0)
191     for line in body:
192         size += len(line)
193         crc = zlib.crc32(line, crc)
194         yield zobj.compress(line)
195     yield zobj.flush()
196     yield struct.pack("<l", crc)
197     yield struct.pack("<L", size & 0xFFFFFFFFL)
198
199 def decompress(body):
200     import gzip, StringIO
201    
202     zbuf = StringIO.StringIO()
203     zbuf.write(body)
204     zbuf.seek(0)
205     zfile = gzip.GzipFile(mode='rb', fileobj=zbuf)
206     data = zfile.read()
207     zfile.close()
208     return data
209
210
211 def gzip(compress_level=9, mime_types=['text/html', 'text/plain']):
212     """Try to gzip the response body if Content-Type in mime_types.
213     
214     cherrypy.response.headers['Content-Type'] must be set to one of the
215     values in the mime_types arg before calling this function.
216     
217     No compression is performed if any of the following hold:
218         * The client sends no Accept-Encoding request header
219         * No 'gzip' or 'x-gzip' is present in the Accept-Encoding header
220         * No 'gzip' or 'x-gzip' with a qvalue > 0 is present
221         * The 'identity' value is given with a qvalue > 0.
222     """
223     response = cherrypy.response
224     if not response.body:
225         # Response body is empty (might be a 304 for instance)
226         return
227    
228     # If returning cached content (which should already have been gzipped),
229     # don't re-zip.
230     if getattr(cherrypy.request, "cached", False):
231         return
232    
233     acceptable = cherrypy.request.headers.elements('Accept-Encoding')
234     if not acceptable:
235         # If no Accept-Encoding field is present in a request,
236         # the server MAY assume that the client will accept any
237         # content coding. In this case, if "identity" is one of
238         # the available content-codings, then the server SHOULD use
239         # the "identity" content-coding, unless it has additional
240         # information that a different content-coding is meaningful
241         # to the client.
242         return
243    
244     ct = response.headers.get('Content-Type').split(';')[0]
245     for coding in acceptable:
246         if coding.value == 'identity' and coding.qvalue != 0:
247             return
248         if coding.value in ('gzip', 'x-gzip'):
249             if coding.qvalue == 0:
250                 return
251             if ct in mime_types:
252                 # Return a generator that compresses the page
253                 varies = response.headers.get("Vary", "")
254                 varies = [x.strip() for x in varies.split(",") if x.strip()]
255                 if "Accept-Encoding" not in varies:
256                     varies.append("Accept-Encoding")
257                 response.headers['Vary'] = ", ".join(varies)
258                
259                 response.headers['Content-Encoding'] = 'gzip'
260                 response.body = compress(response.body, compress_level)
261                 if response.headers.has_key("Content-Length"):
262                     # Delete Content-Length header so finalize() recalcs it.
263                     del response.headers["Content-Length"]
264             return
265     cherrypy.HTTPError(406, "identity, gzip").set_response()
Note: See TracBrowser for help on using the browser.

Hosted by WebFaction

Log in as guest/cpguest to create tickets