summaryrefslogtreecommitdiff
path: root/buildscripts/utils.py
diff options
context:
space:
mode:
authorDan Crosta <dcrosta@10gen.com>2012-04-11 10:57:25 -0400
committerDan Crosta <dcrosta@10gen.com>2012-04-12 17:36:24 -0400
commit5c8053463a38a8c554e0d8a02bef8e5942df8aee (patch)
tree46a5b02e7ee2056ad13ba91e44319586ec247e63 /buildscripts/utils.py
parent7c024a1dd496da0e8975e7ab6b6d4cbfa47f5d51 (diff)
downloadmongo-5c8053463a38a8c554e0d8a02bef8e5942df8aee.tar.gz
buildlogger: make everything unicode
Diffstat (limited to 'buildscripts/utils.py')
-rw-r--r--buildscripts/utils.py19
1 files changed, 19 insertions, 0 deletions
diff --git a/buildscripts/utils.py b/buildscripts/utils.py
index 413f22681af..be16d0b9f82 100644
--- a/buildscripts/utils.py
+++ b/buildscripts/utils.py
@@ -1,4 +1,5 @@
+import codecs
import re
import socket
import time
@@ -191,3 +192,21 @@ def run_smoke_command(*args):
# otherwise SCons treats it as a list of dependencies.
return [smoke_command(*args)]
+# unicode is a pain. some strings cannot be unicode()'d
+# but we want to just preserve the bytes in a human-readable
+# fashion. this codec error handler will substitute the
+# repr() of the offending bytes into the decoded string
+# at the position they occurred
+def replace_with_repr(unicode_error):
+ offender = unicode_error.object[unicode_error.start:unicode_error.end]
+ return (unicode(repr(offender).strip("'").strip('"')), unicode_error.end)
+
+codecs.register_error('repr', replace_with_repr)
+
+def unicode_dammit(string, encoding='utf8'):
+ # convert a string to a unicode, using the Python
+ # representation of non-ascii bytes when necessary
+ #
+ # name inpsired by BeautifulSoup's "UnicodeDammit"
+ return string.decode(encoding, 'repr')
+