From ae2ff0f9d704dc776a1934f72a339da206a9fff4 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 4 Jan 2015 19:50:28 +0100 Subject: Dum brute force conversion of all types. However, StringIO really is ByteIO in most cases, and py2.7 should run but doesn't. This should be made work first. --- git/objects/fun.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'git/objects/fun.py') diff --git a/git/objects/fun.py b/git/objects/fun.py index 416a52e6..db2ec7c2 100644 --- a/git/objects/fun.py +++ b/git/objects/fun.py @@ -1,5 +1,9 @@ """Module with functions which are supposed to be as fast as possible""" from stat import S_ISDIR +from git.compat import ( + xrange, + text_type +) __all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive', 'traverse_tree_recursive') @@ -28,7 +32,7 @@ def tree_to_stream(entries, write): # hence we must convert to an utf8 string for it to work properly. # According to my tests, this is exactly what git does, that is it just # takes the input literally, which appears to be utf8 on linux. - if isinstance(name, unicode): + if isinstance(name, text_type): name = name.encode("utf8") write("%s %s\0%s" % (mode_str, name, binsha)) # END for each item -- cgit v1.2.1 From 8a308613467a1510f8dac514624abae4e10c0779 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 5 Jan 2015 16:44:54 +0100 Subject: Fixes test_blob and improved commit writing/reading --- git/objects/fun.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'git/objects/fun.py') diff --git a/git/objects/fun.py b/git/objects/fun.py index db2ec7c2..f92a4c06 100644 --- a/git/objects/fun.py +++ b/git/objects/fun.py @@ -1,6 +1,9 @@ """Module with functions which are supposed to be as fast as possible""" from stat import S_ISDIR from git.compat import ( + byte_ord, + force_bytes, + defenc, xrange, text_type ) @@ -17,13 +20,13 @@ def tree_to_stream(entries, write): bit_mask = 7 # 3 bits set for binsha, mode, name in entries: - mode_str = '' + mode_str = b'' for i in xrange(6): mode_str = chr(((mode >> (i * 3)) & bit_mask) + ord_zero) + mode_str # END for each 8 octal value # git slices away the first octal if its zero - if mode_str[0] == '0': + if byte_ord(mode_str[0]) == ord_zero: mode_str = mode_str[1:] # END save a byte @@ -33,16 +36,16 @@ def tree_to_stream(entries, write): # According to my tests, this is exactly what git does, that is it just # takes the input literally, which appears to be utf8 on linux. if isinstance(name, text_type): - name = name.encode("utf8") - write("%s %s\0%s" % (mode_str, name, binsha)) + name = name.encode(defenc) + write(b''.join(mode_str, b' ', name, b'\0', binsha)) # END for each item - def tree_entries_from_data(data): """Reads the binary representation of a tree and returns tuples of Tree items - :param data: data block with tree data + :param data: data block with tree data (as bytes) :return: list(tuple(binsha, mode, tree_relative_path), ...)""" ord_zero = ord('0') + space_ord = ord(' ') len_data = len(data) i = 0 out = list() @@ -52,10 +55,10 @@ def tree_entries_from_data(data): # read mode # Some git versions truncate the leading 0, some don't # The type will be extracted from the mode later - while data[i] != ' ': + while byte_ord(data[i]) != space_ord: # move existing mode integer up one level being 3 bits # and add the actual ordinal value of the character - mode = (mode << 3) + (ord(data[i]) - ord_zero) + mode = (mode << 3) + (byte_ord(data[i]) - ord_zero) i += 1 # END while reading mode @@ -65,7 +68,7 @@ def tree_entries_from_data(data): # parse name, it is NULL separated ns = i - while data[i] != '\0': + while byte_ord(data[i]) != 0: i += 1 # END while not reached NULL @@ -73,12 +76,9 @@ def tree_entries_from_data(data): # Only use the respective unicode object if the byte stream was encoded name = data[ns:i] try: - name_enc = name.decode("utf-8") + name = name.decode(defenc) except UnicodeDecodeError: pass - else: - if len(name) > len(name_enc): - name = name_enc # END handle encoding # byte is NULL, get next 20 -- cgit v1.2.1 From e1060a2a8c90c0730c3541811df8f906dac510a7 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 5 Jan 2015 17:59:22 +0100 Subject: test_commit works once again --- git/objects/fun.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'git/objects/fun.py') diff --git a/git/objects/fun.py b/git/objects/fun.py index f92a4c06..610bdb5c 100644 --- a/git/objects/fun.py +++ b/git/objects/fun.py @@ -2,7 +2,6 @@ from stat import S_ISDIR from git.compat import ( byte_ord, - force_bytes, defenc, xrange, text_type @@ -37,7 +36,7 @@ def tree_to_stream(entries, write): # takes the input literally, which appears to be utf8 on linux. if isinstance(name, text_type): name = name.encode(defenc) - write(b''.join(mode_str, b' ', name, b'\0', binsha)) + write(b''.join((mode_str, b' ', name, b'\0', binsha))) # END for each item def tree_entries_from_data(data): -- cgit v1.2.1 From 4a67e4e49c4e7b82e416067df69c72656213e886 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 5 Jan 2015 18:21:49 +0100 Subject: test_fun works --- git/objects/fun.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'git/objects/fun.py') diff --git a/git/objects/fun.py b/git/objects/fun.py index 610bdb5c..ba8dbcf4 100644 --- a/git/objects/fun.py +++ b/git/objects/fun.py @@ -4,7 +4,8 @@ from git.compat import ( byte_ord, defenc, xrange, - text_type + text_type, + bchr ) __all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive', @@ -21,7 +22,7 @@ def tree_to_stream(entries, write): for binsha, mode, name in entries: mode_str = b'' for i in xrange(6): - mode_str = chr(((mode >> (i * 3)) & bit_mask) + ord_zero) + mode_str + mode_str = bchr(((mode >> (i * 3)) & bit_mask) + ord_zero) + mode_str # END for each 8 octal value # git slices away the first octal if its zero -- cgit v1.2.1 From e0c65d6638698f4e3a9e726efca8c0bcf466cd62 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 6 Jan 2015 15:38:20 +0100 Subject: Make flake8 happy --- git/objects/fun.py | 1 + 1 file changed, 1 insertion(+) (limited to 'git/objects/fun.py') diff --git a/git/objects/fun.py b/git/objects/fun.py index ba8dbcf4..c04f80b5 100644 --- a/git/objects/fun.py +++ b/git/objects/fun.py @@ -40,6 +40,7 @@ def tree_to_stream(entries, write): write(b''.join((mode_str, b' ', name, b'\0', binsha))) # END for each item + def tree_entries_from_data(data): """Reads the binary representation of a tree and returns tuples of Tree items :param data: data block with tree data (as bytes) -- cgit v1.2.1