diff options
Diffstat (limited to 'lib/git/objects')
-rw-r--r-- | lib/git/objects/base.py | 418 | ||||
-rw-r--r-- | lib/git/objects/commit.py | 139 | ||||
-rw-r--r-- | lib/git/objects/tree.py | 2 | ||||
-rw-r--r-- | lib/git/objects/utils.py | 17 |
4 files changed, 301 insertions, 275 deletions
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index bb15192d..f7043199 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -6,223 +6,223 @@ import os from git.utils import LazyMixin, join_path_native import utils - + _assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" class Object(LazyMixin): - """ - Implements an Object which may be Blobs, Trees, Commits and Tags - - This Object also serves as a constructor for instances of the correct type:: - - inst = Object.new(repo,id) - inst.sha # objects sha in hex - inst.size # objects uncompressed data size - inst.data # byte string containing the whole data of the object - """ - NULL_HEX_SHA = '0'*40 - TYPES = ("blob", "tree", "commit", "tag") - __slots__ = ("repo", "sha", "size", "data" ) - type = None # to be set by subclass - - def __init__(self, repo, id): - """ - Initialize an object by identifying it by its id. All keyword arguments - will be set on demand if None. - - ``repo`` - repository this object is located in - - ``id`` - SHA1 or ref suitable for git-rev-parse - """ - super(Object,self).__init__() - self.repo = repo - self.sha = id + """ + Implements an Object which may be Blobs, Trees, Commits and Tags + + This Object also serves as a constructor for instances of the correct type:: + + inst = Object.new(repo,id) + inst.sha # objects sha in hex + inst.size # objects uncompressed data size + inst.data # byte string containing the whole data of the object + """ + NULL_HEX_SHA = '0'*40 + TYPES = ("blob", "tree", "commit", "tag") + __slots__ = ("repo", "sha", "size", "data" ) + type = None # to be set by subclass + + def __init__(self, repo, id): + """ + Initialize an object by identifying it by its id. All keyword arguments + will be set on demand if None. + + ``repo`` + repository this object is located in + + ``id`` + SHA1 or ref suitable for git-rev-parse + """ + super(Object,self).__init__() + self.repo = repo + self.sha = id - @classmethod - def new(cls, repo, id): - """ - Return - New Object instance of a type appropriate to the object type behind - id. The id of the newly created object will be a hexsha even though - the input id may have been a Reference or Rev-Spec - - Note - This cannot be a __new__ method as it would always call __init__ - with the input id which is not necessarily a hexsha. - """ - hexsha, typename, size = repo.git.get_object_header(id) - obj_type = utils.get_object_type_by_name(typename) - inst = obj_type(repo, hexsha) - inst.size = size - return inst - - def _set_self_from_args_(self, args_dict): - """ - Initialize attributes on self from the given dict that was retrieved - from locals() in the calling method. - - Will only set an attribute on self if the corresponding value in args_dict - is not None - """ - for attr, val in args_dict.items(): - if attr != "self" and val is not None: - setattr( self, attr, val ) - # END set all non-None attributes - - def _set_cache_(self, attr): - """ - Retrieve object information - """ - if attr == "size": - hexsha, typename, self.size = self.repo.git.get_object_header(self.sha) - assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) - elif attr == "data": - hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha) - assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) - else: - super(Object,self)._set_cache_(attr) - - def __eq__(self, other): - """ - Returns - True if the objects have the same SHA1 - """ - return self.sha == other.sha - - def __ne__(self, other): - """ - Returns - True if the objects do not have the same SHA1 - """ - return self.sha != other.sha - - def __hash__(self): - """ - Returns - Hash of our id allowing objects to be used in dicts and sets - """ - return hash(self.sha) - - def __str__(self): - """ - Returns - string of our SHA1 as understood by all git commands - """ - return self.sha - - def __repr__(self): - """ - Returns - string with pythonic representation of our object - """ - return '<git.%s "%s">' % (self.__class__.__name__, self.sha) + @classmethod + def new(cls, repo, id): + """ + Return + New Object instance of a type appropriate to the object type behind + id. The id of the newly created object will be a hexsha even though + the input id may have been a Reference or Rev-Spec + + Note + This cannot be a __new__ method as it would always call __init__ + with the input id which is not necessarily a hexsha. + """ + hexsha, typename, size = repo.git.get_object_header(id) + obj_type = utils.get_object_type_by_name(typename) + inst = obj_type(repo, hexsha) + inst.size = size + return inst + + def _set_self_from_args_(self, args_dict): + """ + Initialize attributes on self from the given dict that was retrieved + from locals() in the calling method. + + Will only set an attribute on self if the corresponding value in args_dict + is not None + """ + for attr, val in args_dict.items(): + if attr != "self" and val is not None: + setattr( self, attr, val ) + # END set all non-None attributes + + def _set_cache_(self, attr): + """ + Retrieve object information + """ + if attr == "size": + hexsha, typename, self.size = self.repo.git.get_object_header(self.sha) + assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) + elif attr == "data": + hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha) + assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) + else: + super(Object,self)._set_cache_(attr) + + def __eq__(self, other): + """ + Returns + True if the objects have the same SHA1 + """ + return self.sha == other.sha + + def __ne__(self, other): + """ + Returns + True if the objects do not have the same SHA1 + """ + return self.sha != other.sha + + def __hash__(self): + """ + Returns + Hash of our id allowing objects to be used in dicts and sets + """ + return hash(self.sha) + + def __str__(self): + """ + Returns + string of our SHA1 as understood by all git commands + """ + return self.sha + + def __repr__(self): + """ + Returns + string with pythonic representation of our object + """ + return '<git.%s "%s">' % (self.__class__.__name__, self.sha) - @property - def data_stream(self): - """ - Returns - File Object compatible stream to the uncompressed raw data of the object - """ - proc = self.repo.git.cat_file(self.type, self.sha, as_process=True) - return utils.ProcessStreamAdapter(proc, "stdout") - - def stream_data(self, ostream): - """ - Writes our data directly to the given output stream - - ``ostream`` - File object compatible stream object. - - Returns - self - """ - self.repo.git.cat_file(self.type, self.sha, output_stream=ostream) - return self + @property + def data_stream(self): + """ + Returns + File Object compatible stream to the uncompressed raw data of the object + """ + proc = self.repo.git.cat_file(self.type, self.sha, as_process=True) + return utils.ProcessStreamAdapter(proc, "stdout") + def stream_data(self, ostream): + """ + Writes our data directly to the given output stream + + ``ostream`` + File object compatible stream object. + + Returns + self + """ + self.repo.git.cat_file(self.type, self.sha, output_stream=ostream) + return self + class IndexObject(Object): - """ - Base for all objects that can be part of the index file , namely Tree, Blob and - SubModule objects - """ - __slots__ = ("path", "mode") - - def __init__(self, repo, sha, mode=None, path=None): - """ - Initialize a newly instanced IndexObject - ``repo`` - is the Repo we are located in + """ + Base for all objects that can be part of the index file , namely Tree, Blob and + SubModule objects + """ + __slots__ = ("path", "mode") + + def __init__(self, repo, sha, mode=None, path=None): + """ + Initialize a newly instanced IndexObject + ``repo`` + is the Repo we are located in - ``sha`` : string - is the git object id as hex sha + ``sha`` : string + is the git object id as hex sha - ``mode`` : int - is the file mode as int, use the stat module to evaluate the infomration + ``mode`` : int + is the file mode as int, use the stat module to evaluate the infomration - ``path`` : str - is the path to the file in the file system, relative to the git repository root, i.e. - file.ext or folder/other.ext - - NOTE - Path may not be set of the index object has been created directly as it cannot - be retrieved without knowing the parent tree. - """ - super(IndexObject, self).__init__(repo, sha) - self._set_self_from_args_(locals()) - if isinstance(mode, basestring): - self.mode = self._mode_str_to_int(mode) - - def __hash__(self): - """ - Returns - Hash of our path as index items are uniquely identifyable by path, not - by their data ! - """ - return hash(self.path) - - def _set_cache_(self, attr): - if attr in IndexObject.__slots__: - # they cannot be retrieved lateron ( not without searching for them ) - raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) - else: - super(IndexObject, self)._set_cache_(attr) - - @classmethod - def _mode_str_to_int(cls, modestr): - """ - ``modestr`` - string like 755 or 644 or 100644 - only the last 6 chars will be used - - Returns - String identifying a mode compatible to the mode methods ids of the - stat module regarding the rwx permissions for user, group and other, - special flags and file system flags, i.e. whether it is a symlink - for example. - """ - mode = 0 - for iteration,char in enumerate(reversed(modestr[-6:])): - mode += int(char) << iteration*3 - # END for each char - return mode - - @property - def name(self): - """ - Returns - Name portion of the path, effectively being the basename - """ - return os.path.basename(self.path) - - @property - def abspath(self): - """ - Returns - Absolute path to this index object in the file system ( as opposed to the - .path field which is a path relative to the git repository ). - - The returned path will be native to the system and contains '\' on windows. - """ - return join_path_native(self.repo.working_tree_dir, self.path) - + ``path`` : str + is the path to the file in the file system, relative to the git repository root, i.e. + file.ext or folder/other.ext + + NOTE + Path may not be set of the index object has been created directly as it cannot + be retrieved without knowing the parent tree. + """ + super(IndexObject, self).__init__(repo, sha) + self._set_self_from_args_(locals()) + if isinstance(mode, basestring): + self.mode = self._mode_str_to_int(mode) + + def __hash__(self): + """ + Returns + Hash of our path as index items are uniquely identifyable by path, not + by their data ! + """ + return hash(self.path) + + def _set_cache_(self, attr): + if attr in IndexObject.__slots__: + # they cannot be retrieved lateron ( not without searching for them ) + raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) + else: + super(IndexObject, self)._set_cache_(attr) + + @classmethod + def _mode_str_to_int(cls, modestr): + """ + ``modestr`` + string like 755 or 644 or 100644 - only the last 6 chars will be used + + Returns + String identifying a mode compatible to the mode methods ids of the + stat module regarding the rwx permissions for user, group and other, + special flags and file system flags, i.e. whether it is a symlink + for example. + """ + mode = 0 + for iteration,char in enumerate(reversed(modestr[-6:])): + mode += int(char) << iteration*3 + # END for each char + return mode + + @property + def name(self): + """ + Returns + Name portion of the path, effectively being the basename + """ + return os.path.basename(self.path) + + @property + def abspath(self): + """ + Returns + Absolute path to this index object in the file system ( as opposed to the + .path field which is a path relative to the git repository ). + + The returned path will be native to the system and contains '\' on windows. + """ + return join_path_native(self.repo.working_tree_dir, self.path) + diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 87eed49b..948e9a54 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -9,12 +9,14 @@ import git.diff as diff import git.stats as stats from git.actor import Actor from tree import Tree +from cStringIO import StringIO import base import utils import time import os -class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable): + +class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Serializable): """ Wraps a git Commit object. @@ -91,7 +93,8 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable): self._set_self_from_args_(locals()) if parents is not None: - self.parents = tuple( self.__class__(repo, p) for p in parents ) + cls = type(self) + self.parents = tuple(cls(repo, p) for p in parents if not isinstance(p, cls)) # END for each parent to convert if self.sha and tree is not None: @@ -109,20 +112,9 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable): We set all values at once. """ if attr in Commit.__slots__: - # prepare our data lines to match rev-list - data_lines = self.data.splitlines() - data_lines.insert(0, "commit %s" % self.sha) - temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next() - self.parents = temp.parents - self.tree = temp.tree - self.author = temp.author - self.authored_date = temp.authored_date - self.author_tz_offset = temp.author_tz_offset - self.committer = temp.committer - self.committed_date = temp.committed_date - self.committer_tz_offset = temp.committer_tz_offset - self.message = temp.message - self.encoding = temp.encoding + # read the data in a chunk, its faster - then provide a file wrapper + hexsha, typename, size, data = self.repo.git.get_object_data(self) + self._deserialize(StringIO(data)) else: super(Commit, self)._set_cache_(attr) @@ -260,59 +252,18 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable): iterator returning Commit objects """ stream = proc_or_stream - if not hasattr(stream,'next'): + if not hasattr(stream,'readline'): stream = proc_or_stream.stdout - for line in stream: - commit_tokens = line.split() + while True: + line = stream.readline() + if not line: + break + commit_tokens = line.split() id = commit_tokens[1] assert commit_tokens[0] == "commit" - tree = stream.next().split()[1] - - parents = [] - next_line = None - for parent_line in stream: - if not parent_line.startswith('parent'): - next_line = parent_line - break - # END abort reading parents - parents.append(parent_line.split()[-1]) - # END for each parent line - - author, authored_date, author_tz_offset = utils.parse_actor_and_date(next_line) - committer, committed_date, committer_tz_offset = utils.parse_actor_and_date(stream.next()) - - # empty line - encoding = stream.next() - encoding.strip() - if encoding: - encoding = encoding[encoding.find(' ')+1:] - # END parse encoding - - message_lines = list() - if from_rev_list: - for msg_line in stream: - if not msg_line.startswith(' '): - # and forget about this empty marker - break - # END abort message reading - # strip leading 4 spaces - message_lines.append(msg_line[4:]) - # END while there are message lines - else: - # a stream from our data simply gives us the plain message - for msg_line in stream: - message_lines.append(msg_line) - # END message parsing - message = '\n'.join(message_lines) - - - yield Commit(repo, id, tree, - author, authored_date, author_tz_offset, - committer, committed_date, committer_tz_offset, - message, tuple(parents), - encoding or cls.default_encoding) + yield Commit(repo, id)._deserialize(stream, from_rev_list) # END for each line in stream @@ -393,7 +344,7 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable): # assume utf8 encoding enc_section, enc_option = cls.conf_encoding.split('.') - conf_encoding = cr.get_value(enc_section, enc_option, default_encoding) + conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding) author = Actor(author_name, author_email) committer = Actor(committer_name, committer_email) @@ -429,3 +380,61 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable): def __repr__(self): return '<git.Commit "%s">' % self.sha + #{ Serializable Implementation + + def _serialize(self, stream): + # for now, this is very inefficient and in fact shouldn't be used like this + return super(Commit, self)._serialize(stream) + + def _deserialize(self, stream, from_rev_list=False): + """:param from_rev_list: if true, the stream format is coming from the rev-list command + Otherwise it is assumed to be a plain data stream from our object""" + self.tree = Tree(self.repo, stream.readline().split()[1], 0, '') + + self.parents = list() + next_line = None + while True: + parent_line = stream.readline() + if not parent_line.startswith('parent'): + next_line = parent_line + break + # END abort reading parents + self.parents.append(type(self)(self.repo, parent_line.split()[-1])) + # END for each parent line + self.parents = tuple(self.parents) + + self.author, self.authored_date, self.author_tz_offset = utils.parse_actor_and_date(next_line) + self.committer, self.committed_date, self.committer_tz_offset = utils.parse_actor_and_date(stream.readline()) + + + # empty line + self.encoding = self.default_encoding + enc = stream.readline() + enc.strip() + if enc: + self.encoding = enc[enc.find(' ')+1:] + # END parse encoding + + message_lines = list() + if from_rev_list: + while True: + msg_line = stream.readline() + if not msg_line.startswith(' '): + # and forget about this empty marker + # cut the last newline to get rid of the artificial newline added + # by rev-list command. Lets hope its just linux style \n + message_lines[-1] = message_lines[-1][:-1] + break + # END abort message reading + # strip leading 4 spaces + message_lines.append(msg_line[4:]) + # END while there are message lines + self.message = ''.join(message_lines) + else: + # a stream from our data simply gives us the plain message + # The end of our message stream is marked with a newline that we strip + self.message = stream.read()[:-1] + # END message parsing + return self + + #} END serializable implementation diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index a9e60981..285d3b5b 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -209,7 +209,7 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable): visit_once = False, ignore_self=1 ): """For documentation, see utils.Traversable.traverse - Trees are set to visist_once = False to gain more performance in the traversal""" + Trees are set to visit_once = False to gain more performance in the traversal""" return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self) # List protocol diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py index 7060e293..6d378a72 100644 --- a/lib/git/objects/utils.py +++ b/lib/git/objects/utils.py @@ -280,3 +280,20 @@ class Traversable(object): addToStack( stack, item, branch_first, nd ) # END for each item on work stack + + +class Serializable(object): + """Defines methods to serialize and deserialize objects from and into a data stream""" + + def _serialize(self, stream): + """Serialize the data of this object into the given data stream + :note: a serialized object would ``_deserialize`` into the same objet + :param stream: a file-like object + :return: self""" + raise NotImplementedError("To be implemented in subclass") + + def _deserialize(self, stream): + """Deserialize all information regarding this object from the stream + :param stream: a file-like object + :return: self""" + raise NotImplementedError("To be implemented in subclass") |