diff options
author | Dan Crosta <dcrosta@10gen.com> | 2012-04-11 10:57:25 -0400 |
---|---|---|
committer | Dan Crosta <dcrosta@10gen.com> | 2012-04-12 17:36:24 -0400 |
commit | 5c8053463a38a8c554e0d8a02bef8e5942df8aee (patch) | |
tree | 46a5b02e7ee2056ad13ba91e44319586ec247e63 /buildscripts/utils.py | |
parent | 7c024a1dd496da0e8975e7ab6b6d4cbfa47f5d51 (diff) | |
download | mongo-5c8053463a38a8c554e0d8a02bef8e5942df8aee.tar.gz |
buildlogger: make everything unicode
Diffstat (limited to 'buildscripts/utils.py')
-rw-r--r-- | buildscripts/utils.py | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/buildscripts/utils.py b/buildscripts/utils.py index 413f22681af..be16d0b9f82 100644 --- a/buildscripts/utils.py +++ b/buildscripts/utils.py @@ -1,4 +1,5 @@ +import codecs import re import socket import time @@ -191,3 +192,21 @@ def run_smoke_command(*args): # otherwise SCons treats it as a list of dependencies. return [smoke_command(*args)] +# unicode is a pain. some strings cannot be unicode()'d +# but we want to just preserve the bytes in a human-readable +# fashion. this codec error handler will substitute the +# repr() of the offending bytes into the decoded string +# at the position they occurred +def replace_with_repr(unicode_error): + offender = unicode_error.object[unicode_error.start:unicode_error.end] + return (unicode(repr(offender).strip("'").strip('"')), unicode_error.end) + +codecs.register_error('repr', replace_with_repr) + +def unicode_dammit(string, encoding='utf8'): + # convert a string to a unicode, using the Python + # representation of non-ascii bytes when necessary + # + # name inpsired by BeautifulSoup's "UnicodeDammit" + return string.decode(encoding, 'repr') + |