summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Crosta <dcrosta@10gen.com>2012-04-11 10:57:25 -0400
committerDan Crosta <dcrosta@10gen.com>2012-04-12 17:36:24 -0400
commit5c8053463a38a8c554e0d8a02bef8e5942df8aee (patch)
tree46a5b02e7ee2056ad13ba91e44319586ec247e63
parent7c024a1dd496da0e8975e7ab6b6d4cbfa47f5d51 (diff)
downloadmongo-5c8053463a38a8c554e0d8a02bef8e5942df8aee.tar.gz
buildlogger: make everything unicode
-rw-r--r--buildscripts/buildlogger.py2
-rw-r--r--buildscripts/utils.py19
2 files changed, 21 insertions, 0 deletions
diff --git a/buildscripts/buildlogger.py b/buildscripts/buildlogger.py
index a6f91d840f8..02016317348 100644
--- a/buildscripts/buildlogger.py
+++ b/buildscripts/buildlogger.py
@@ -32,6 +32,7 @@ import sys
import time
import traceback
import urllib2
+import utils
try:
import json
@@ -322,6 +323,7 @@ def loop_and_callback(command, callback):
while proc.poll() is None:
try:
line = proc.stdout.readline().strip('\r\n')
+ line = utils.unicode_dammit(line)
callback(line)
except IOError:
# if the signal handler is called while
diff --git a/buildscripts/utils.py b/buildscripts/utils.py
index 413f22681af..be16d0b9f82 100644
--- a/buildscripts/utils.py
+++ b/buildscripts/utils.py
@@ -1,4 +1,5 @@
+import codecs
import re
import socket
import time
@@ -191,3 +192,21 @@ def run_smoke_command(*args):
# otherwise SCons treats it as a list of dependencies.
return [smoke_command(*args)]
+# unicode is a pain. some strings cannot be unicode()'d
+# but we want to just preserve the bytes in a human-readable
+# fashion. this codec error handler will substitute the
+# repr() of the offending bytes into the decoded string
+# at the position they occurred
+def replace_with_repr(unicode_error):
+ offender = unicode_error.object[unicode_error.start:unicode_error.end]
+ return (unicode(repr(offender).strip("'").strip('"')), unicode_error.end)
+
+codecs.register_error('repr', replace_with_repr)
+
+def unicode_dammit(string, encoding='utf8'):
+ # convert a string to a unicode, using the Python
+ # representation of non-ascii bytes when necessary
+ #
+ # name inpsired by BeautifulSoup's "UnicodeDammit"
+ return string.decode(encoding, 'repr')
+