diff options
author | Dan Crosta <dcrosta@10gen.com> | 2012-04-11 10:57:25 -0400 |
---|---|---|
committer | Dan Crosta <dcrosta@10gen.com> | 2012-04-12 17:36:24 -0400 |
commit | 5c8053463a38a8c554e0d8a02bef8e5942df8aee (patch) | |
tree | 46a5b02e7ee2056ad13ba91e44319586ec247e63 | |
parent | 7c024a1dd496da0e8975e7ab6b6d4cbfa47f5d51 (diff) | |
download | mongo-5c8053463a38a8c554e0d8a02bef8e5942df8aee.tar.gz |
buildlogger: make everything unicode
-rw-r--r-- | buildscripts/buildlogger.py | 2 | ||||
-rw-r--r-- | buildscripts/utils.py | 19 |
2 files changed, 21 insertions, 0 deletions
diff --git a/buildscripts/buildlogger.py b/buildscripts/buildlogger.py index a6f91d840f8..02016317348 100644 --- a/buildscripts/buildlogger.py +++ b/buildscripts/buildlogger.py @@ -32,6 +32,7 @@ import sys import time import traceback import urllib2 +import utils try: import json @@ -322,6 +323,7 @@ def loop_and_callback(command, callback): while proc.poll() is None: try: line = proc.stdout.readline().strip('\r\n') + line = utils.unicode_dammit(line) callback(line) except IOError: # if the signal handler is called while diff --git a/buildscripts/utils.py b/buildscripts/utils.py index 413f22681af..be16d0b9f82 100644 --- a/buildscripts/utils.py +++ b/buildscripts/utils.py @@ -1,4 +1,5 @@ +import codecs import re import socket import time @@ -191,3 +192,21 @@ def run_smoke_command(*args): # otherwise SCons treats it as a list of dependencies. return [smoke_command(*args)] +# unicode is a pain. some strings cannot be unicode()'d +# but we want to just preserve the bytes in a human-readable +# fashion. this codec error handler will substitute the +# repr() of the offending bytes into the decoded string +# at the position they occurred +def replace_with_repr(unicode_error): + offender = unicode_error.object[unicode_error.start:unicode_error.end] + return (unicode(repr(offender).strip("'").strip('"')), unicode_error.end) + +codecs.register_error('repr', replace_with_repr) + +def unicode_dammit(string, encoding='utf8'): + # convert a string to a unicode, using the Python + # representation of non-ascii bytes when necessary + # + # name inpsired by BeautifulSoup's "UnicodeDammit" + return string.decode(encoding, 'repr') + |