summaryrefslogtreecommitdiff
path: root/git/objects/fun.py
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2016-10-16 14:34:03 +0200
committerSebastian Thiel <byronimo@gmail.com>2016-10-16 14:34:03 +0200
commit93d530234a4f5533aa99c3b897bb56d375c2ae60 (patch)
tree3ac26de9dcd1cc918dbe12034616398de4338ff7 /git/objects/fun.py
parentff389af9374116c47e3dc4f8a5979784bf1babff (diff)
downloadgitpython-93d530234a4f5533aa99c3b897bb56d375c2ae60.tar.gz
fix(unicode): use surrogateescape in bytes.decode
That way, we will try to decode as default encoding (usually utf-8), but allow ourselves to simply keep bytes that don't match within the resulting unicode string. That way, we allow for lossless decode/encode cycles while still assuring that decoding never fails. NOTE: I was too lazy to create a test that would verify it, but manually executed https://github.com/petertodd/gitpython-unicode-error. fixes #532
Diffstat (limited to 'git/objects/fun.py')
-rw-r--r--git/objects/fun.py6
1 files changed, 1 insertions, 5 deletions
diff --git a/git/objects/fun.py b/git/objects/fun.py
index 5c0f4819..a144ba7e 100644
--- a/git/objects/fun.py
+++ b/git/objects/fun.py
@@ -76,11 +76,7 @@ def tree_entries_from_data(data):
# default encoding for strings in git is utf8
# Only use the respective unicode object if the byte stream was encoded
name = data[ns:i]
- try:
- name = name.decode(defenc)
- except UnicodeDecodeError:
- pass
- # END handle encoding
+ name = name.decode(defenc, 'surrogateescape')
# byte is NULL, get next 20
i += 1