From 2e5c9bc2ff7bbb1b2496038860a120ecae15591b Mon Sep 17 00:00:00 2001
From: "R. Tyler Ballance" <tyler@monkeypox.org>
Date: Wed, 14 Oct 2009 23:04:37 -0700
Subject: Remove the encode/decode calls in DummyResponse.write()/getvalue()
 and take a more optimistic approach

Borrowing some concepts from the "slide-compat" branch that I maintain
for Slide, Inc. for gracefully handling less-than-ideal string-encoding
situations (as is the case for Slide).

Making DummyResponse.getvalue() optimistic in trying to u''.join() a
list of random string (unicode, str (various encodings)) objects
and then only on a UnicodeDecodeError, run through the "safeConvert"
function (blech) to handle encoded str() objects
---
 cheetah/DummyTransaction.py | 40 +++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/cheetah/DummyTransaction.py b/cheetah/DummyTransaction.py
index 6726a63..f84ade4 100644
--- a/cheetah/DummyTransaction.py
+++ b/cheetah/DummyTransaction.py
@@ -8,6 +8,7 @@ Warning: This may be deprecated in the future, please do not rely on any
 specific DummyTransaction or DummyResponse behavior
 '''
 
+import logging
 import types
 
 class DummyResponseFailure(Exception):
@@ -24,31 +25,40 @@ class DummyResponse(object):
 
     def flush(self):
         pass
-        
+
+    def safeConvert(self, chunk):
+        # Exceptionally gross, but the safest way
+        # I've found to ensure I get a legit unicode object
+        if not chunk:
+            return u''
+        if isinstance(chunk, unicode):
+            return chunk
+        try:
+            return chunk.decode('utf-8', 'strict')
+        except UnicodeDecodeError:
+            try:
+                return chunk.decode('latin-1', 'strict')
+            except UnicodeDecodeError:
+                return chunk.decode('ascii', 'ignore')
+        except AttributeError:
+            return unicode(chunk, errors='ignore')
+        return chunk
+
     def write(self, value):
-        if isinstance(value, unicode):
-            value = value.encode('utf-8')
         self._outputChunks.append(value)
 
-
     def writeln(self, txt):
         write(txt)
         write('\n')
 
     def getvalue(self, outputChunks=None):
         chunks = outputChunks or self._outputChunks
-        try: 
-            return ''.join(chunks).decode('utf-8')
+        try:
+            return u''.join(chunks)
         except UnicodeDecodeError, ex:
-            nonunicode = [c for c in chunks if not isinstance(c, unicode)]
-            raise DummyResponseFailure('''Looks like you're trying to mix encoded strings with Unicode strings
-            (most likely utf-8 encoded ones)
-
-            This can happen if you're using the `EncodeUnicode` filter, or if you're manually
-            encoding strings as utf-8 before passing them in on the searchList (possible offenders: 
-            %s) 
-            (%s)''' % (nonunicode, ex))
-
+            logging.debug('Trying to work around a UnicodeDecodeError in getvalue()')
+            logging.debug('...perhaps you could fix "%s" while you\'re debugging')
+            return ''.join((self.safeConvert(c) for c in chunks))
 
     def writelines(self, *lines):
         ## not used
-- 
cgit v1.2.1