diff options
Diffstat (limited to 'lib/git/objects')
-rw-r--r-- | lib/git/objects/base.py | 196 | ||||
-rw-r--r-- | lib/git/objects/blob.py | 48 | ||||
-rw-r--r-- | lib/git/objects/commit.py | 196 | ||||
-rw-r--r-- | lib/git/objects/fun.py | 7 | ||||
-rw-r--r-- | lib/git/objects/submodule.py | 1 | ||||
-rw-r--r-- | lib/git/objects/tag.py | 126 | ||||
-rw-r--r-- | lib/git/objects/tree.py | 78 | ||||
-rw-r--r-- | lib/git/objects/utils.py | 62 |
8 files changed, 299 insertions, 415 deletions
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 90aa8ca2..118bc3ca 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -3,179 +3,140 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -import os from git.utils import LazyMixin, join_path_native, stream_copy -import utils +from utils import get_object_type_by_name +from gitdb.util import ( + hex_to_bin, + bin_to_hex, + basename + ) + +import gitdb.typ as dbtyp _assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" +__all__ = ("Object", "IndexObject") + class Object(LazyMixin): - """ - Implements an Object which may be Blobs, Trees, Commits and Tags - - This Object also serves as a constructor for instances of the correct type:: - - inst = Object.new(repo,id) - inst.sha # objects sha in hex - inst.size # objects uncompressed data size - inst.data # byte string containing the whole data of the object - """ + """Implements an Object which may be Blobs, Trees, Commits and Tags""" NULL_HEX_SHA = '0'*40 NULL_BIN_SHA = '\0'*20 - TYPES = ("blob", "tree", "commit", "tag") - __slots__ = ("repo", "sha", "size", "data" ) + + TYPES = (dbtyp.str_blob_type, dbtyp.str_tree_type, dbtyp.str_commit_type, dbtyp.str_tag_type) + __slots__ = ("repo", "binsha", "size" ) type = None # to be set by subclass - def __init__(self, repo, id): - """ - Initialize an object by identifying it by its id. All keyword arguments - will be set on demand if None. + def __init__(self, repo, binsha): + """Initialize an object by identifying it by its binary sha. + All keyword arguments will be set on demand if None. - ``repo`` - repository this object is located in + :param repo: repository this object is located in - ``id`` - SHA1 or ref suitable for git-rev-parse - """ + :param binsha: 20 byte SHA1""" super(Object,self).__init__() self.repo = repo - self.sha = id + self.binsha = binsha @classmethod def new(cls, repo, id): """ - Return - New Object instance of a type appropriate to the object type behind - id. The id of the newly created object will be a hexsha even though + :return: New Object instance of a type appropriate to the object type behind + id. The id of the newly created object will be a binsha even though the input id may have been a Reference or Rev-Spec - Note - This cannot be a __new__ method as it would always call __init__ - with the input id which is not necessarily a hexsha. - """ + :param id: reference, rev-spec, or hexsha + + :note: This cannot be a __new__ method as it would always call __init__ + with the input id which is not necessarily a binsha.""" hexsha, typename, size = repo.git.get_object_header(id) - obj_type = utils.get_object_type_by_name(typename) - inst = obj_type(repo, hexsha) + inst = get_object_type_by_name(typename)(repo, hex_to_bin(hexsha)) inst.size = size return inst def _set_self_from_args_(self, args_dict): - """ - Initialize attributes on self from the given dict that was retrieved + """Initialize attributes on self from the given dict that was retrieved from locals() in the calling method. Will only set an attribute on self if the corresponding value in args_dict - is not None - """ + is not None""" for attr, val in args_dict.items(): if attr != "self" and val is not None: setattr( self, attr, val ) # END set all non-None attributes def _set_cache_(self, attr): - """ - Retrieve object information - """ + """Retrieve object information""" if attr == "size": - oinfo = self.repo.odb.info(self.sha) + oinfo = self.repo.odb.info(self.binsha) self.size = oinfo.size - assert oinfo.type == self.type, _assertion_msg_format % (self.sha, oinfo.type, self.type) - elif attr == "data": - ostream = self.repo.odb.stream(self.sha) - self.size = ostream.size - self.data = ostream.read() - assert ostream.type == self.type, _assertion_msg_format % (self.sha, ostream.type, self.type) + # assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type) else: super(Object,self)._set_cache_(attr) def __eq__(self, other): - """ - Returns - True if the objects have the same SHA1 - """ - return self.sha == other.sha + """:return: True if the objects have the same SHA1""" + return self.binsha == other.binsha def __ne__(self, other): - """ - Returns - True if the objects do not have the same SHA1 - """ - return self.sha != other.sha + """:return: True if the objects do not have the same SHA1 """ + return self.binsha != other.binsha def __hash__(self): - """ - Returns - Hash of our id allowing objects to be used in dicts and sets - """ - return hash(self.sha) + """:return: Hash of our id allowing objects to be used in dicts and sets""" + return hash(self.binsha) def __str__(self): - """ - Returns - string of our SHA1 as understood by all git commands - """ - return self.sha + """:return: string of our SHA1 as understood by all git commands""" + return bin_to_hex(self.binsha) def __repr__(self): - """ - Returns - string with pythonic representation of our object - """ - return '<git.%s "%s">' % (self.__class__.__name__, self.sha) + """:return: string with pythonic representation of our object""" + return '<git.%s "%s">' % (self.__class__.__name__, self.hexsha) + + @property + def hexsha(self): + """:return: 40 byte hex version of our 20 byte binary sha""" + return bin_to_hex(self.binsha) @property def data_stream(self): """ :return: File Object compatible stream to the uncompressed raw data of the object :note: returned streams must be read in order""" - return self.repo.odb.stream(self.sha) + return self.repo.odb.stream(self.binsha) def stream_data(self, ostream): """Writes our data directly to the given output stream :param ostream: File object compatible stream object. :return: self""" - istream = self.repo.odb.stream(self.sha) + istream = self.repo.odb.stream(self.binsha) stream_copy(istream, ostream) return self class IndexObject(Object): - """ - Base for all objects that can be part of the index file , namely Tree, Blob and - SubModule objects - """ + """Base for all objects that can be part of the index file , namely Tree, Blob and + SubModule objects""" __slots__ = ("path", "mode") - def __init__(self, repo, sha, mode=None, path=None): - """ - Initialize a newly instanced IndexObject - ``repo`` - is the Repo we are located in - - ``sha`` : string - is the git object id as hex sha - - ``mode`` : int - is the file mode as int, use the stat module to evaluate the infomration - - ``path`` : str + def __init__(self, repo, binsha, mode=None, path=None): + """Initialize a newly instanced IndexObject + :param repo: is the Repo we are located in + :param binsha: 20 byte sha1 + :param mode: is the stat compatible file mode as int, use the stat module + to evaluate the infomration + :param path: is the path to the file in the file system, relative to the git repository root, i.e. file.ext or folder/other.ext - - NOTE + :note: Path may not be set of the index object has been created directly as it cannot - be retrieved without knowing the parent tree. - """ - super(IndexObject, self).__init__(repo, sha) + be retrieved without knowing the parent tree.""" + super(IndexObject, self).__init__(repo, binsha) self._set_self_from_args_(locals()) - if isinstance(mode, basestring): - self.mode = self._mode_str_to_int(mode) def __hash__(self): - """ - Returns + """:return: Hash of our path as index items are uniquely identifyable by path, not - by their data ! - """ + by their data !""" return hash(self.path) def _set_cache_(self, attr): @@ -184,41 +145,20 @@ class IndexObject(Object): raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) else: super(IndexObject, self)._set_cache_(attr) + # END hanlde slot attribute - @classmethod - def _mode_str_to_int(cls, modestr): - """ - ``modestr`` - string like 755 or 644 or 100644 - only the last 6 chars will be used - - Returns - String identifying a mode compatible to the mode methods ids of the - stat module regarding the rwx permissions for user, group and other, - special flags and file system flags, i.e. whether it is a symlink - for example. - """ - mode = 0 - for iteration, char in enumerate(reversed(modestr[-6:])): - mode += int(char) << iteration*3 - # END for each char - return mode - @property def name(self): - """ - Returns - Name portion of the path, effectively being the basename - """ - return os.path.basename(self.path) + """:return: Name portion of the path, effectively being the basename""" + return basename(self.path) @property def abspath(self): """ - Returns + :return: Absolute path to this index object in the file system ( as opposed to the .path field which is a path relative to the git repository ). - The returned path will be native to the system and contains '\' on windows. - """ + The returned path will be native to the system and contains '\' on windows. """ return join_path_native(self.repo.working_tree_dir, self.path) diff --git a/lib/git/objects/blob.py b/lib/git/objects/blob.py index 3f91d078..8263e9a2 100644 --- a/lib/git/objects/blob.py +++ b/lib/git/objects/blob.py @@ -4,33 +4,33 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -import mimetypes +from mimetypes import guess_type import base -class Blob(base.IndexObject): - """A Blob encapsulates a git blob object""" - DEFAULT_MIME_TYPE = "text/plain" - type = "blob" - - __slots__ = tuple() +__all__ = ('Blob', ) - - @property - def mime_type(self): - """ - The mime type of this file (based on the filename) +class Blob(base.IndexObject): + """A Blob encapsulates a git blob object""" + DEFAULT_MIME_TYPE = "text/plain" + type = "blob" - Returns - str - - NOTE - Defaults to 'text/plain' in case the actual file type is unknown. - """ - guesses = None - if self.path: - guesses = mimetypes.guess_type(self.path) - return guesses and guesses[0] or self.DEFAULT_MIME_TYPE + __slots__ = "data" + def _set_cache_(self, attr): + if attr == "data": + ostream = self.repo.odb.stream(self.binsha) + self.size = ostream.size + self.data = ostream.read() + # assert ostream.type == self.type, _assertion_msg_format % (self.binsha, ostream.type, self.type) + else: + super(Blob, self)._set_cache_(attr) + # END handle data - def __repr__(self): - return '<git.Blob "%s">' % self.sha + @property + def mime_type(self): + """ :return:String describing the mime type of this file (based on the filename) + :note: Defaults to 'text/plain' in case the actual file type is unknown. """ + guesses = None + if self.path: + guesses = guess_type(self.path) + return guesses and guesses[0] or self.DEFAULT_MIME_TYPE diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index f30a6dea..f365c994 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -8,24 +8,37 @@ from git.utils import ( Iterable, Stats, ) - -import git.diff as diff +from git.diff import Diffable from tree import Tree from gitdb import IStream from cStringIO import StringIO + import base -import utils -import time +from gitdb.util import ( + hex_to_bin + ) +from utils import ( + Traversable, + Serializable, + get_user_id, + parse_date, + Actor, + altz_to_utctz_str, + parse_actor_and_date + ) +from time import ( + time, + altzone + ) import os +__all__ = ('Commit', ) -class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Serializable): - """ - Wraps a git Commit object. +class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): + """Wraps a git Commit object. This class will act lazily on some of its attributes and will query the - value on demand only if it involves calling the git binary. - """ + value on demand only if it involves calling the git binary.""" # ENVIRONMENT VARIABLES # read when creating new commits @@ -52,22 +65,19 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri "author", "authored_date", "author_tz_offset", "committer", "committed_date", "committer_tz_offset", "message", "parents", "encoding") - _id_attribute_ = "sha" + _id_attribute_ = "binsha" - def __init__(self, repo, sha, tree=None, author=None, authored_date=None, author_tz_offset=None, + def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None, committer=None, committed_date=None, committer_tz_offset=None, message=None, parents=None, encoding=None): - """ - Instantiate a new Commit. All keyword arguments taking None as default will - be implicitly set if id names a valid sha. + """Instantiate a new Commit. All keyword arguments taking None as default will + be implicitly set on first query. - The parameter documentation indicates the type of the argument after a colon ':'. - - :param sha: is the sha id of the commit or a ref + :param binsha: 20 byte sha1 :param parents: tuple( Commit, ... ) is a tuple of commit ids or actual Commits :param tree: Tree - is the corresponding tree id or an actual Tree + Tree object :param author: Actor is the author string ( will be implicitly converted into an Actor object ) :param authored_date: int_seconds_since_epoch @@ -86,13 +96,15 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri is the commit message :param encoding: string encoding of the message, defaults to UTF-8 + :param parents: + List or tuple of Commit objects which are our parent(s) in the commit + dependency graph :return: git.Commit :note: Timezone information is in the same format and in the same sign as what time.altzone returns. The sign is inverted compared to git's - UTC timezone. - """ - super(Commit,self).__init__(repo, sha) + UTC timezone.""" + super(Commit,self).__init__(repo, binsha) self._set_self_from_args_(locals()) @classmethod @@ -100,80 +112,61 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri return commit.parents def _set_cache_(self, attr): - """ Called by LazyMixin superclass when the given uninitialized member needs - to be set. - We set all values at once. """ if attr in Commit.__slots__: # read the data in a chunk, its faster - then provide a file wrapper - # Could use self.data, but lets try to get it with less calls - hexsha, typename, size, data = self.repo.git.get_object_data(self) - self._deserialize(StringIO(data)) + binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha) + self._deserialize(StringIO(stream.read())) else: super(Commit, self)._set_cache_(attr) + # END handle attrs @property def summary(self): - """ - Returns - First line of the commit message. - """ + """:return: First line of the commit message""" return self.message.split('\n', 1)[0] def count(self, paths='', **kwargs): - """ - Count the number of commits reachable from this commit + """Count the number of commits reachable from this commit - ``paths`` + :param paths: is an optinal path or a list of paths restricting the return value to commits actually containing the paths - ``kwargs`` + :param kwargs: Additional options to be passed to git-rev-list. They must not alter the ouput style of the command, or parsing will yield incorrect results - Returns - int - """ + :return: int defining the number of reachable commits""" # yes, it makes a difference whether empty paths are given or not in our case # as the empty paths version will ignore merge commits for some reason. if paths: - return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).splitlines()) + return len(self.repo.git.rev_list(self.hexsha, '--', paths, **kwargs).splitlines()) else: - return len(self.repo.git.rev_list(self.sha, **kwargs).splitlines()) + return len(self.repo.git.rev_list(self.hexsha, **kwargs).splitlines()) @property def name_rev(self): """ - Returns + :return: String describing the commits hex sha based on the closest Reference. - Mostly useful for UI purposes - """ + Mostly useful for UI purposes""" return self.repo.git.name_rev(self) @classmethod def iter_items(cls, repo, rev, paths='', **kwargs): - """ - Find all commits matching the given criteria. - - ``repo`` - is the Repo - - ``rev`` - revision specifier, see git-rev-parse for viable options + """Find all commits matching the given criteria. - ``paths`` + :param repo: is the Repo + :param rev: revision specifier, see git-rev-parse for viable options + :param paths: is an optinal path or list of paths, if set only Commits that include the path or paths will be considered - - ``kwargs`` + :param kwargs: optional keyword arguments to git rev-list where ``max_count`` is the maximum number of commits to fetch ``skip`` is the number of commits to skip ``since`` all commits since i.e. '1970-01-01' - - Returns - iterator yielding Commit items - """ + :return: iterator yielding Commit items""" if 'pretty' in kwargs: raise ValueError("--pretty cannot be used as parsing expects single sha's only") # END handle pretty @@ -186,45 +179,36 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri return cls._iter_from_process_or_stream(repo, proc) def iter_parents(self, paths='', **kwargs): - """ - Iterate _all_ parents of this commit. - - ``paths`` + """Iterate _all_ parents of this commit. + :param paths: Optional path or list of paths limiting the Commits to those that contain at least one of the paths - - ``kwargs`` - All arguments allowed by git-rev-list + :param kwargs: All arguments allowed by git-rev-list - Return: - Iterator yielding Commit objects which are parents of self - """ + :return: Iterator yielding Commit objects which are parents of self """ # skip ourselves skip = kwargs.get("skip", 1) if skip == 0: # skip ourselves skip = 1 kwargs['skip'] = skip - return self.iter_items( self.repo, self, paths, **kwargs ) + return self.iter_items(self.repo, self, paths, **kwargs) @property def stats(self): - """ - Create a git stat from changes between this commit and its first parent + """Create a git stat from changes between this commit and its first parent or from all changes done if this is the very first commit. - Return - git.Stats - """ + :return: git.Stats""" if not self.parents: - text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True) + text = self.repo.git.diff_tree(self.hexsha, '--', numstat=True, root=True) text2 = "" for line in text.splitlines()[1:]: (insertions, deletions, filename) = line.split("\t") text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) text = text2 else: - text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True) + text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True) return Stats._list_from_string(self.repo, text) @classmethod @@ -244,14 +228,14 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri line = readline() if not line: break - sha = line.strip() - if len(sha) > 40: + hexsha = line.strip() + if len(hexsha) > 40: # split additional information, as returned by bisect for instance - sha, rest = line.split(None, 1) + hexsha, rest = line.split(None, 1) # END handle extra info - assert len(sha) == 40, "Invalid line: %s" % sha - yield Commit(repo, sha) + assert len(hexsha) == 40, "Invalid line: %s" % hexsha + yield Commit(repo, hex_to_bin(hexsha)) # END for each line in stream @@ -260,7 +244,8 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri """Commit the given tree, creating a commit object. :param repo: Repo object the commit should be part of - :param tree: Sha of a tree or a tree object to become the tree of the new commit + :param tree: Tree object or hex or bin sha + the tree of the new commit :param message: Commit message. It may be an empty string if no message is provided. It will be converted to a string in any case. :param parent_commits: @@ -279,8 +264,7 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri :note: Additional information about the committer and Author are taken from the environment or from the git configuration, see git-commit-tree for - more information - """ + more information""" parents = parent_commits if parent_commits is None: try: @@ -300,7 +284,7 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri # COMMITER AND AUTHOR INFO cr = repo.config_reader() env = os.environ - default_email = utils.get_user_id() + default_email = get_user_id() default_name = default_email.split('@')[0] conf_name = cr.get_value('user', cls.conf_name, default_name) @@ -313,19 +297,19 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri committer_email = env.get(cls.env_committer_email, conf_email) # PARSE THE DATES - unix_time = int(time.time()) - offset = time.altzone + unix_time = int(time()) + offset = altzone author_date_str = env.get(cls.env_author_date, '') if author_date_str: - author_time, author_offset = utils.parse_date(author_date_str) + author_time, author_offset = parse_date(author_date_str) else: author_time, author_offset = unix_time, offset # END set author time committer_date_str = env.get(cls.env_committer_date, '') if committer_date_str: - committer_time, committer_offset = utils.parse_date(committer_date_str) + committer_time, committer_offset = parse_date(committer_date_str) else: committer_time, committer_offset = unix_time, offset # END set committer time @@ -334,12 +318,18 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri enc_section, enc_option = cls.conf_encoding.split('.') conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding) - author = utils.Actor(author_name, author_email) - committer = utils.Actor(committer_name, committer_email) + author = Actor(author_name, author_email) + committer = Actor(committer_name, committer_email) + + # if the tree is no object, make sure we create one - otherwise + # the created commit object is invalid + if isinstance(tree, str): + tree = repo.tree(tree) + # END tree conversion # CREATE NEW COMMIT - new_commit = cls(repo, cls.NULL_HEX_SHA, tree, + new_commit = cls(repo, cls.NULL_BIN_SHA, tree, author, author_time, author_offset, committer, committer_time, committer_offset, message, parent_commits, conf_encoding) @@ -350,7 +340,7 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri stream.seek(0) istream = repo.odb.store(IStream(cls.type, streamlen, stream)) - new_commit.sha = istream.sha + new_commit.binsha = istream.binsha if head: try: @@ -366,14 +356,6 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri return new_commit - - def __str__(self): - """ Convert commit to string which is SHA1 """ - return self.sha - - def __repr__(self): - return '<git.Commit "%s">' % self.sha - #{ Serializable Implementation def _serialize(self, stream): @@ -387,11 +369,11 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri fmt = "%s %s <%s> %s %s\n" write(fmt % ("author", a.name, a.email, self.authored_date, - utils.altz_to_utctz_str(self.author_tz_offset))) + altz_to_utctz_str(self.author_tz_offset))) write(fmt % ("committer", c.name, c.email, self.committed_date, - utils.altz_to_utctz_str(self.committer_tz_offset))) + altz_to_utctz_str(self.committer_tz_offset))) if self.encoding != self.default_encoding: write("encoding %s\n" % self.encoding) @@ -404,7 +386,7 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri """:param from_rev_list: if true, the stream format is coming from the rev-list command Otherwise it is assumed to be a plain data stream from our object""" readline = stream.readline - self.tree = Tree(self.repo, readline().split()[1], Tree.tree_id<<12, '') + self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '') self.parents = list() next_line = None @@ -414,12 +396,12 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri next_line = parent_line break # END abort reading parents - self.parents.append(type(self)(self.repo, parent_line.split()[-1])) + self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1]))) # END for each parent line self.parents = tuple(self.parents) - self.author, self.authored_date, self.author_tz_offset = utils.parse_actor_and_date(next_line) - self.committer, self.committed_date, self.committer_tz_offset = utils.parse_actor_and_date(readline()) + self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line) + self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline()) # now we can have the encoding line, or an empty line followed by the optional diff --git a/lib/git/objects/fun.py b/lib/git/objects/fun.py index 5b39ab0c..2d0fd634 100644 --- a/lib/git/objects/fun.py +++ b/lib/git/objects/fun.py @@ -1,9 +1,10 @@ """Module with functions which are supposed to be as fast as possible""" +from stat import S_ISDIR __all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive', 'traverse_tree_recursive') -from stat import S_ISDIR + def tree_to_stream(entries, write): @@ -99,7 +100,7 @@ def _to_full_path(item, path_prefix): def traverse_trees_recursive(odb, tree_shas, path_prefix): """ - :return: list with entries according to the given tree-shas. + :return: list with entries according to the given binary tree-shas. The result is encoded in a list of n tuple|None per blob/commit, (n == len(tree_shas)), where * [0] == 20 byte sha @@ -165,7 +166,7 @@ def traverse_trees_recursive(odb, tree_shas, path_prefix): def traverse_tree_recursive(odb, tree_sha, path_prefix): """ - :return: list of entries of the tree pointed to by tree_sha. An entry + :return: list of entries of the tree pointed to by the binary tree_sha. An entry has the following format: * [0] 20 byte sha * [1] mode as int diff --git a/lib/git/objects/submodule.py b/lib/git/objects/submodule.py index 4742d448..1f571a48 100644 --- a/lib/git/objects/submodule.py +++ b/lib/git/objects/submodule.py @@ -1,5 +1,6 @@ import base +__all__ = ("Submodule", ) class Submodule(base.IndexObject): """Implements access to a git submodule. They are special in that their sha diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py index 96363db6..702eae35 100644 --- a/lib/git/objects/tag.py +++ b/lib/git/objects/tag.py @@ -3,77 +3,63 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -""" -Module containing all object based types. -""" +""" Module containing all object based types. """ import base -import utils +from gitdb.util import hex_to_bin +from utils import ( + get_object_type_by_name, + parse_actor_and_date + ) -class TagObject(base.Object): - """ - Non-Lightweight tag carrying additional information about an object we are pointing - to. - """ - type = "tag" - __slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" ) - - def __init__(self, repo, sha, object=None, tag=None, - tagger=None, tagged_date=None, tagger_tz_offset=None, message=None): - """ - Initialize a tag object with additional data - - ``repo`` - repository this object is located in - - ``sha`` - SHA1 or ref suitable for git-rev-parse - - ``object`` - Object instance of object we are pointing to - - ``tag`` - name of this tag - - ``tagger`` - Actor identifying the tagger - - ``tagged_date`` : int_seconds_since_epoch - is the DateTime of the tag creation - use time.gmtime to convert - it into a different format - - ``tagged_tz_offset``: int_seconds_west_of_utc - is the timezone that the authored_date is in +__all__ = ("TagObject", ) - """ - super(TagObject, self).__init__(repo, sha ) - self._set_self_from_args_(locals()) - - def _set_cache_(self, attr): - """ - Cache all our attributes at once - """ - if attr in TagObject.__slots__: - lines = self.data.splitlines() - - obj, hexsha = lines[0].split(" ") # object <hexsha> - type_token, type_name = lines[1].split(" ") # type <type_name> - self.object = utils.get_object_type_by_name(type_name)(self.repo, hexsha) - - self.tag = lines[2][4:] # tag <tag name> - - tagger_info = lines[3][7:]# tagger <actor> <date> - self.tagger, self.tagged_date, self.tagger_tz_offset = utils.parse_actor_and_date(tagger_info) - - # line 4 empty - it could mark the beginning of the next header - # in csse there really is no message, it would not exist. Otherwise - # a newline separates header from message - if len(lines) > 5: - self.message = "\n".join(lines[5:]) - else: - self.message = '' - # END check our attributes - else: - super(TagObject, self)._set_cache_(attr) - - +class TagObject(base.Object): + """Non-Lightweight tag carrying additional information about an object we are pointing to.""" + type = "tag" + __slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" ) + + def __init__(self, repo, binsha, object=None, tag=None, + tagger=None, tagged_date=None, tagger_tz_offset=None, message=None): + """Initialize a tag object with additional data + + :param repo: repository this object is located in + :param binsha: 20 byte SHA1 + :param object: Object instance of object we are pointing to + :param tag: name of this tag + :param tagger: Actor identifying the tagger + :param tagged_date: int_seconds_since_epoch + is the DateTime of the tag creation - use time.gmtime to convert + it into a different format + :param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the + authored_date is in, in a format similar to time.altzone""" + super(TagObject, self).__init__(repo, binsha ) + self._set_self_from_args_(locals()) + + def _set_cache_(self, attr): + """Cache all our attributes at once""" + if attr in TagObject.__slots__: + ostream = self.repo.odb.stream(self.binsha) + lines = ostream.read().splitlines() + + obj, hexsha = lines[0].split(" ") # object <hexsha> + type_token, type_name = lines[1].split(" ") # type <type_name> + self.object = get_object_type_by_name(type_name)(self.repo, hex_to_bin(hexsha)) + + self.tag = lines[2][4:] # tag <tag name> + + tagger_info = lines[3][7:]# tagger <actor> <date> + self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info) + + # line 4 empty - it could mark the beginning of the next header + # in case there really is no message, it would not exist. Otherwise + # a newline separates header from message + if len(lines) > 5: + self.message = "\n".join(lines[5:]) + else: + self.message = '' + # END check our attributes + else: + super(TagObject, self)._set_cache_(attr) + + diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index 6b1d13c1..056d3da9 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -3,23 +3,24 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php - -import os import utils -import base +from base import IndexObject from blob import Blob from submodule import Submodule import git.diff as diff -join = os.path.join from fun import ( tree_entries_from_data, tree_to_stream ) -from gitdb.util import to_bin_sha -from binascii import b2a_hex +from gitdb.util import ( + to_bin_sha, + join + ) + +__all__ = ("TreeModifier", "Tree") class TreeModifier(object): """A utility class providing methods to alter the underlying cache in a list-like @@ -99,12 +100,8 @@ class TreeModifier(object): #} END mutators -class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializable): - """ - Tress represent a ordered list of Blobs and other Trees. Hence it can be - accessed like a list. - - Tree's will cache their contents after first retrieval to improve efficiency. +class Tree(IndexObject, diff.Diffable, utils.Traversable, utils.Serializable): + """Tree objects represent an ordered list of Blobs and other Trees. ``Tree as a list``:: @@ -113,8 +110,6 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl You may as well access by index blob = tree[0] - - """ type = "tree" @@ -134,8 +129,8 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl } - def __init__(self, repo, sha, mode=tree_id<<12, path=None): - super(Tree, self).__init__(repo, sha, mode, path) + def __init__(self, repo, binsha, mode=tree_id<<12, path=None): + super(Tree, self).__init__(repo, binsha, mode, path) @classmethod def _get_intermediate_items(cls, index_object): @@ -146,39 +141,28 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl def _set_cache_(self, attr): if attr == "_cache": # Set the data when we need it - self._cache = tree_entries_from_data(self.data) + ostream = self.repo.odb.stream(self.binsha) + self._cache = tree_entries_from_data(ostream.read()) else: super(Tree, self)._set_cache_(attr) + # END handle attribute def _iter_convert_to_object(self, iterable): - """Iterable yields tuples of (hexsha, mode, name), which will be converted + """Iterable yields tuples of (binsha, mode, name), which will be converted to the respective object representation""" for binsha, mode, name in iterable: path = join(self.path, name) - type_id = mode >> 12 try: - yield self._map_id_to_type[type_id](self.repo, b2a_hex(binsha), mode, path) + yield self._map_id_to_type[mode >> 12](self.repo, binsha, mode, path) except KeyError: - raise TypeError( "Unknown type %i found in tree data for path '%s'" % (type_id, path)) + raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path)) # END for each item def __div__(self, file): - """ - Find the named object in this tree's contents - - Examples:: - - >>> Repo('/path/to/python-git').tree/'lib' - <git.Tree "6cc23ee138be09ff8c28b07162720018b244e95e"> - >>> Repo('/path/to/python-git').tree/'README.txt' - <git.Blob "8b1e02c0fb554eed2ce2ef737a68bb369d7527df"> - - Returns - ``git.Blob`` or ``git.Tree`` + """Find the named object in this tree's contents + :return: ``git.Blob`` or ``git.Tree`` or ``git.Submodule`` - Raise - KeyError if given file or tree does not exist in tree - """ + :raise KeyError: if given file or tree does not exist in tree""" msg = "Blob or Tree named %r not found" if '/' in file: tree = self @@ -201,29 +185,20 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl else: for info in self._cache: if info[2] == file: # [2] == name - return self._map_id_to_type[info[1] >> 12](self.repo, b2a_hex(info[0]), info[1], join(self.path, info[2])) + return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join(self.path, info[2])) # END for each obj raise KeyError( msg % file ) # END handle long paths - def __repr__(self): - return '<git.Tree "%s">' % self.sha - @property def trees(self): - """ - Returns - list(Tree, ...) list of trees directly below this tree - """ + """:return: list(Tree, ...) list of trees directly below this tree""" return [ i for i in self if i.type == "tree" ] @property def blobs(self): - """ - Returns - list(Blob, ...) list of blobs directly below this tree - """ + """:return: list(Blob, ...) list of blobs directly below this tree""" return [ i for i in self if i.type == "blob" ] @property @@ -238,7 +213,6 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl prune = lambda i,d: False, depth = -1, branch_first=True, visit_once = False, ignore_self=1 ): """For documentation, see utils.Traversable.traverse - Trees are set to visit_once = False to gain more performance in the traversal""" return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self) @@ -255,7 +229,7 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl def __getitem__(self, item): if isinstance(item, int): info = self._cache[item] - return self._map_id_to_type[info[1] >> 12](self.repo, b2a_hex(info[0]), info[1], join(self.path, info[2])) + return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join(self.path, info[2])) if isinstance(item, basestring): # compatability @@ -266,9 +240,9 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl def __contains__(self, item): - if isinstance(item, base.IndexObject): + if isinstance(item, IndexObject): for info in self._cache: - if item.sha == info[0]: + if item.binsha == info[0]: return True # END compare sha # END for each entry diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py index 072662ee..c0ddd6e6 100644 --- a/lib/git/objects/utils.py +++ b/lib/git/objects/utils.py @@ -3,9 +3,7 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -""" -Module for general utility functions -""" +"""Module for general utility functions""" import re from collections import deque as Deque import platform @@ -20,18 +18,28 @@ __all__ = ('get_object_type_by_name', 'get_user_id', 'parse_date', 'parse_actor_ #{ Functions +def mode_str_to_int(modestr): + """ + :param modestr: string like 755 or 644 or 100644 - only the last 6 chars will be used + :return: + String identifying a mode compatible to the mode methods ids of the + stat module regarding the rwx permissions for user, group and other, + special flags and file system flags, i.e. whether it is a symlink + for example.""" + mode = 0 + for iteration, char in enumerate(reversed(modestr[-6:])): + mode += int(char) << iteration*3 + # END for each char + return mode + def get_object_type_by_name(object_type_name): """ - Returns - type suitable to handle the given object type name. + :return: type suitable to handle the given object type name. Use the type to create new instances. - ``object_type_name`` - Member of TYPES + :param object_type_name: Member of TYPES - Raises - ValueError: In case object_type_name is unknown - """ + :raise ValueError: In case object_type_name is unknown""" if object_type_name == "commit": import commit return commit.Commit @@ -169,14 +177,11 @@ def parse_date(string_date): _re_actor_epoch = re.compile(r'^.+? (.*) (\d+) ([+-]\d+).*$') def parse_actor_and_date(line): - """ - Parse out the actor (author or committer) info from a line like:: + """Parse out the actor (author or committer) info from a line like:: - author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700 + author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700 - Returns - [Actor, int_seconds_since_epoch, int_timezone_offset] - """ + :return: [Actor, int_seconds_since_epoch, int_timezone_offset]""" m = _re_actor_epoch.search(line) actor, epoch, offset = m.groups() return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset)) @@ -238,13 +243,11 @@ class Actor(object): class ProcessStreamAdapter(object): - """ - Class wireing all calls to the contained Process instance. + """Class wireing all calls to the contained Process instance. Use this type to hide the underlying process to provide access only to a specified stream. The process is usually wrapped into an AutoInterrupt class to kill - it if the instance goes out of scope. - """ + it if the instance goes out of scope.""" __slots__ = ("_proc", "_stream") def __init__(self, process, stream_name): self._proc = process @@ -274,36 +277,33 @@ class Traversable(object): def traverse( self, predicate = lambda i,d: True, prune = lambda i,d: False, depth = -1, branch_first=True, visit_once = True, ignore_self=1, as_edge = False ): - """ - ``Returns`` - iterator yieling of items found when traversing self + """:return: iterator yieling of items found when traversing self - ``predicate`` - f(i,d) returns False if item i at depth d should not be included in the result + :param predicate: f(i,d) returns False if item i at depth d should not be included in the result - ``prune`` + :param prune: f(i,d) return True if the search should stop at item i at depth d. Item i will not be returned. - ``depth`` + :param depth: define at which level the iteration should not go deeper if -1, there is no limit if 0, you would effectively only get self, the root of the iteration i.e. if 1, you would only get the first level of predessessors/successors - ``branch_first`` + :param branch_first: if True, items will be returned branch first, otherwise depth first - ``visit_once`` + :param visit_once: if True, items will only be returned once, although they might be encountered several times. Loops are prevented that way. - ``ignore_self`` + :param ignore_self: if True, self will be ignored and automatically pruned from the result. Otherwise it will be the first item to be returned. If as_edge is True, the source of the first edge is None - ``as_edge`` + :param as_edge: if True, return a pair of items, first being the source, second the destinatination, i.e. tuple(src, dest) with the edge spanning from source to destination""" |