summaryrefslogtreecommitdiff
path: root/git
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2011-04-07 20:17:00 +0200
committerSebastian Thiel <byronimo@gmail.com>2011-04-07 20:17:00 +0200
commita12a7618a1f6f61a4c97ddf4cc422158c3fa72ba (patch)
tree2b2e0e9991ddede152556f7954cd6e4c6121be97 /git
parente77d2d0ebb9487b696835f219e4a23a558462a55 (diff)
downloadgitpython-a12a7618a1f6f61a4c97ddf4cc422158c3fa72ba.tar.gz
Updated objects to use the ones defined in gitdb as basis. Only the submodule implementation is left in git-python as it requires some advanced features. No tests where run yet
Diffstat (limited to 'git')
m---------git/ext/gitdb0
-rw-r--r--git/objects/base.py166
-rw-r--r--git/objects/blob.py25
-rw-r--r--git/objects/commit.py267
-rw-r--r--git/objects/fun.py197
-rw-r--r--git/objects/submodule/base.py4
-rw-r--r--git/objects/tag.py71
-rw-r--r--git/objects/tree.py273
-rw-r--r--git/test/objects/test_blob.py (renamed from git/test/test_blob.py)0
-rw-r--r--git/test/objects/test_commit.py (renamed from git/test/test_commit.py)0
-rw-r--r--git/test/objects/test_submodule.py (renamed from git/test/test_submodule.py)0
-rw-r--r--git/test/objects/test_tree.py (renamed from git/test/test_tree.py)0
-rw-r--r--git/test/test_actor.py36
-rw-r--r--git/util.py66
14 files changed, 37 insertions, 1068 deletions
diff --git a/git/ext/gitdb b/git/ext/gitdb
-Subproject 7c4d3d6b000930134019515c83c10b140330d31
+Subproject dba71a0c727aba19319d3e868d0ca4b8009bcef
diff --git a/git/objects/base.py b/git/objects/base.py
index 5f2f7809..42d7b600 100644
--- a/git/objects/base.py
+++ b/git/objects/base.py
@@ -3,170 +3,6 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.util import LazyMixin, join_path_native, stream_copy
-from util import get_object_type_by_name
-from gitdb.util import (
- hex_to_bin,
- bin_to_hex,
- basename
- )
-
-import gitdb.typ as dbtyp
-
-_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r"
-
+from gitdb.object.base import Object, IndexObject
__all__ = ("Object", "IndexObject")
-class Object(LazyMixin):
- """Implements an Object which may be Blobs, Trees, Commits and Tags"""
- NULL_HEX_SHA = '0'*40
- NULL_BIN_SHA = '\0'*20
-
- TYPES = (dbtyp.str_blob_type, dbtyp.str_tree_type, dbtyp.str_commit_type, dbtyp.str_tag_type)
- __slots__ = ("repo", "binsha", "size" )
- type = None # to be set by subclass
-
- def __init__(self, repo, binsha):
- """Initialize an object by identifying it by its binary sha.
- All keyword arguments will be set on demand if None.
-
- :param repo: repository this object is located in
-
- :param binsha: 20 byte SHA1"""
- super(Object,self).__init__()
- self.repo = repo
- self.binsha = binsha
- assert len(binsha) == 20, "Require 20 byte binary sha, got %r, len = %i" % (binsha, len(binsha))
-
- @classmethod
- def new(cls, repo, id):
- """
- :return: New Object instance of a type appropriate to the object type behind
- id. The id of the newly created object will be a binsha even though
- the input id may have been a Reference or Rev-Spec
-
- :param id: reference, rev-spec, or hexsha
-
- :note: This cannot be a __new__ method as it would always call __init__
- with the input id which is not necessarily a binsha."""
- return repo.rev_parse(str(id))
-
- @classmethod
- def new_from_sha(cls, repo, sha1):
- """
- :return: new object instance of a type appropriate to represent the given
- binary sha1
- :param sha1: 20 byte binary sha1"""
- if sha1 == cls.NULL_BIN_SHA:
- # the NULL binsha is always the root commit
- return get_object_type_by_name('commit')(repo, sha1)
- #END handle special case
- oinfo = repo.odb.info(sha1)
- inst = get_object_type_by_name(oinfo.type)(repo, oinfo.binsha)
- inst.size = oinfo.size
- return inst
-
- def _set_cache_(self, attr):
- """Retrieve object information"""
- if attr == "size":
- oinfo = self.repo.odb.info(self.binsha)
- self.size = oinfo.size
- # assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type)
- else:
- super(Object,self)._set_cache_(attr)
-
- def __eq__(self, other):
- """:return: True if the objects have the same SHA1"""
- return self.binsha == other.binsha
-
- def __ne__(self, other):
- """:return: True if the objects do not have the same SHA1 """
- return self.binsha != other.binsha
-
- def __hash__(self):
- """:return: Hash of our id allowing objects to be used in dicts and sets"""
- return hash(self.binsha)
-
- def __str__(self):
- """:return: string of our SHA1 as understood by all git commands"""
- return bin_to_hex(self.binsha)
-
- def __repr__(self):
- """:return: string with pythonic representation of our object"""
- return '<git.%s "%s">' % (self.__class__.__name__, self.hexsha)
-
- @property
- def hexsha(self):
- """:return: 40 byte hex version of our 20 byte binary sha"""
- return bin_to_hex(self.binsha)
-
- @property
- def data_stream(self):
- """ :return: File Object compatible stream to the uncompressed raw data of the object
- :note: returned streams must be read in order"""
- return self.repo.odb.stream(self.binsha)
-
- def stream_data(self, ostream):
- """Writes our data directly to the given output stream
- :param ostream: File object compatible stream object.
- :return: self"""
- istream = self.repo.odb.stream(self.binsha)
- stream_copy(istream, ostream)
- return self
-
-
-class IndexObject(Object):
- """Base for all objects that can be part of the index file , namely Tree, Blob and
- SubModule objects"""
- __slots__ = ("path", "mode")
-
- # for compatability with iterable lists
- _id_attribute_ = 'path'
-
- def __init__(self, repo, binsha, mode=None, path=None):
- """Initialize a newly instanced IndexObject
- :param repo: is the Repo we are located in
- :param binsha: 20 byte sha1
- :param mode: is the stat compatible file mode as int, use the stat module
- to evaluate the infomration
- :param path:
- is the path to the file in the file system, relative to the git repository root, i.e.
- file.ext or folder/other.ext
- :note:
- Path may not be set of the index object has been created directly as it cannot
- be retrieved without knowing the parent tree."""
- super(IndexObject, self).__init__(repo, binsha)
- if mode is not None:
- self.mode = mode
- if path is not None:
- self.path = path
-
- def __hash__(self):
- """:return:
- Hash of our path as index items are uniquely identifyable by path, not
- by their data !"""
- return hash(self.path)
-
- def _set_cache_(self, attr):
- if attr in IndexObject.__slots__:
- # they cannot be retrieved lateron ( not without searching for them )
- raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
- else:
- super(IndexObject, self)._set_cache_(attr)
- # END hanlde slot attribute
-
- @property
- def name(self):
- """:return: Name portion of the path, effectively being the basename"""
- return basename(self.path)
-
- @property
- def abspath(self):
- """
- :return:
- Absolute path to this index object in the file system ( as opposed to the
- .path field which is a path relative to the git repository ).
-
- The returned path will be native to the system and contains '\' on windows. """
- return join_path_native(self.repo.working_tree_dir, self.path)
-
diff --git a/git/objects/blob.py b/git/objects/blob.py
index f52d1a53..38834436 100644
--- a/git/objects/blob.py
+++ b/git/objects/blob.py
@@ -4,29 +4,10 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from mimetypes import guess_type
-import base
+from git.util import RepoAliasMixin
+from gitdb.object.blob import Blob as GitDB_Blob
__all__ = ('Blob', )
-class Blob(base.IndexObject):
- """A Blob encapsulates a git blob object"""
- DEFAULT_MIME_TYPE = "text/plain"
- type = "blob"
-
- # valid blob modes
- executable_mode = 0100755
- file_mode = 0100644
- link_mode = 0120000
-
+class Blob(GitDB_Blob, RepoAliasMixin):
__slots__ = tuple()
-
- @property
- def mime_type(self):
- """
- :return: String describing the mime type of this file (based on the filename)
- :note: Defaults to 'text/plain' in case the actual file type is unknown. """
- guesses = None
- if self.path:
- guesses = guess_type(self.path)
- return guesses and guesses[0] or self.DEFAULT_MIME_TYPE
diff --git a/git/objects/commit.py b/git/objects/commit.py
index fd4187b0..d932ab1a 100644
--- a/git/objects/commit.py
+++ b/git/objects/commit.py
@@ -3,142 +3,28 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-
-from git.util import (
- Actor,
- Iterable,
- Stats,
- )
+from git.util import RepoAliasMixin
+from gitdb.object.commit import Commit as GitDB_Commit
from git.diff import Diffable
-from tree import Tree
+from gitdb.util import (
+ Iterable,
+ Actor
+ )
+
from gitdb import IStream
from cStringIO import StringIO
-import base
-from gitdb.util import (
- hex_to_bin
- )
-from util import (
- Traversable,
- Serializable,
- parse_date,
- altz_to_utctz_str,
- parse_actor_and_date
- )
-from time import (
- time,
- altzone
- )
+from util import parse_date
+from time import altzone
+
import os
-import sys
__all__ = ('Commit', )
-class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
- """Wraps a git Commit object.
-
- This class will act lazily on some of its attributes and will query the
- value on demand only if it involves calling the git binary."""
-
- # ENVIRONMENT VARIABLES
- # read when creating new commits
- env_author_date = "GIT_AUTHOR_DATE"
- env_committer_date = "GIT_COMMITTER_DATE"
-
- # CONFIGURATION KEYS
- conf_encoding = 'i18n.commitencoding'
-
- # INVARIANTS
- default_encoding = "UTF-8"
+class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin):
+ """Provides additional git-command based functionality to the default gitdb commit object"""
+ __slots__ = tuple()
-
- # object configuration
- type = "commit"
- __slots__ = ("tree",
- "author", "authored_date", "author_tz_offset",
- "committer", "committed_date", "committer_tz_offset",
- "message", "parents", "encoding")
- _id_attribute_ = "binsha"
-
- def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None,
- committer=None, committed_date=None, committer_tz_offset=None,
- message=None, parents=None, encoding=None):
- """Instantiate a new Commit. All keyword arguments taking None as default will
- be implicitly set on first query.
-
- :param binsha: 20 byte sha1
- :param parents: tuple( Commit, ... )
- is a tuple of commit ids or actual Commits
- :param tree: Tree
- Tree object
- :param author: Actor
- is the author string ( will be implicitly converted into an Actor object )
- :param authored_date: int_seconds_since_epoch
- is the authored DateTime - use time.gmtime() to convert it into a
- different format
- :param author_tz_offset: int_seconds_west_of_utc
- is the timezone that the authored_date is in
- :param committer: Actor
- is the committer string
- :param committed_date: int_seconds_since_epoch
- is the committed DateTime - use time.gmtime() to convert it into a
- different format
- :param committer_tz_offset: int_seconds_west_of_utc
- is the timezone that the authored_date is in
- :param message: string
- is the commit message
- :param encoding: string
- encoding of the message, defaults to UTF-8
- :param parents:
- List or tuple of Commit objects which are our parent(s) in the commit
- dependency graph
- :return: git.Commit
-
- :note: Timezone information is in the same format and in the same sign
- as what time.altzone returns. The sign is inverted compared to git's
- UTC timezone."""
- super(Commit,self).__init__(repo, binsha)
- if tree is not None:
- assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree)
- if tree is not None:
- self.tree = tree
- if author is not None:
- self.author = author
- if authored_date is not None:
- self.authored_date = authored_date
- if author_tz_offset is not None:
- self.author_tz_offset = author_tz_offset
- if committer is not None:
- self.committer = committer
- if committed_date is not None:
- self.committed_date = committed_date
- if committer_tz_offset is not None:
- self.committer_tz_offset = committer_tz_offset
- if message is not None:
- self.message = message
- if parents is not None:
- self.parents = parents
- if encoding is not None:
- self.encoding = encoding
-
- @classmethod
- def _get_intermediate_items(cls, commit):
- return commit.parents
-
- def _set_cache_(self, attr):
- if attr in Commit.__slots__:
- # read the data in a chunk, its faster - then provide a file wrapper
- binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha)
- self._deserialize(StringIO(stream.read()))
- else:
- super(Commit, self)._set_cache_(attr)
- # END handle attrs
-
- @property
- def summary(self):
- """:return: First line of the commit message"""
- return self.message.split('\n', 1)[0]
-
def count(self, paths='', **kwargs):
"""Count the number of commits reachable from this commit
@@ -225,33 +111,6 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True)
return Stats._list_from_string(self.repo, text)
- @classmethod
- def _iter_from_process_or_stream(cls, repo, proc_or_stream):
- """Parse out commit information into a list of Commit objects
- We expect one-line per commit, and parse the actual commit information directly
- from our lighting fast object database
-
- :param proc: git-rev-list process instance - one sha per line
- :return: iterator returning Commit objects"""
- stream = proc_or_stream
- if not hasattr(stream,'readline'):
- stream = proc_or_stream.stdout
-
- readline = stream.readline
- while True:
- line = readline()
- if not line:
- break
- hexsha = line.strip()
- if len(hexsha) > 40:
- # split additional information, as returned by bisect for instance
- hexsha, rest = line.split(None, 1)
- # END handle extra info
-
- assert len(hexsha) == 40, "Invalid line: %s" % hexsha
- yield Commit(repo, hex_to_bin(hexsha))
- # END for each line in stream
-
@classmethod
def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False):
@@ -361,105 +220,5 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
# END advance head handling
return new_commit
-
- #{ Serializable Implementation
-
- def _serialize(self, stream):
- write = stream.write
- write("tree %s\n" % self.tree)
- for p in self.parents:
- write("parent %s\n" % p)
-
- a = self.author
- aname = a.name
- if isinstance(aname, unicode):
- aname = aname.encode(self.encoding)
- # END handle unicode in name
-
- c = self.committer
- fmt = "%s %s <%s> %s %s\n"
- write(fmt % ("author", aname, a.email,
- self.authored_date,
- altz_to_utctz_str(self.author_tz_offset)))
-
- # encode committer
- aname = c.name
- if isinstance(aname, unicode):
- aname = aname.encode(self.encoding)
- # END handle unicode in name
- write(fmt % ("committer", aname, c.email,
- self.committed_date,
- altz_to_utctz_str(self.committer_tz_offset)))
-
- if self.encoding != self.default_encoding:
- write("encoding %s\n" % self.encoding)
-
- write("\n")
-
- # write plain bytes, be sure its encoded according to our encoding
- if isinstance(self.message, unicode):
- write(self.message.encode(self.encoding))
- else:
- write(self.message)
- # END handle encoding
- return self
-
- def _deserialize(self, stream):
- """:param from_rev_list: if true, the stream format is coming from the rev-list command
- Otherwise it is assumed to be a plain data stream from our object"""
- readline = stream.readline
- self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '')
-
- self.parents = list()
- next_line = None
- while True:
- parent_line = readline()
- if not parent_line.startswith('parent'):
- next_line = parent_line
- break
- # END abort reading parents
- self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1])))
- # END for each parent line
- self.parents = tuple(self.parents)
-
- self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line)
- self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline())
-
-
- # now we can have the encoding line, or an empty line followed by the optional
- # message.
- self.encoding = self.default_encoding
- # read encoding or empty line to separate message
- enc = readline()
- enc = enc.strip()
- if enc:
- self.encoding = enc[enc.find(' ')+1:]
- # now comes the message separator
- readline()
- # END handle encoding
-
- # decode the authors name
- try:
- self.author.name = self.author.name.decode(self.encoding)
- except UnicodeDecodeError:
- print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding)
- # END handle author's encoding
-
- # decode committer name
- try:
- self.committer.name = self.committer.name.decode(self.encoding)
- except UnicodeDecodeError:
- print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding)
- # END handle author's encoding
-
- # a stream from our data simply gives us the plain message
- # The end of our message stream is marked with a newline that we strip
- self.message = stream.read()
- try:
- self.message = self.message.decode(self.encoding)
- except UnicodeDecodeError:
- print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding)
- # END exception handling
- return self
#} END serializable implementation
diff --git a/git/objects/fun.py b/git/objects/fun.py
index 9b0a377c..22016b27 100644
--- a/git/objects/fun.py
+++ b/git/objects/fun.py
@@ -1,199 +1,2 @@
"""Module with functions which are supposed to be as fast as possible"""
-from stat import S_ISDIR
-__all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive',
- 'traverse_tree_recursive')
-
-
-
-
-def tree_to_stream(entries, write):
- """Write the give list of entries into a stream using its write method
- :param entries: **sorted** list of tuples with (binsha, mode, name)
- :param write: write method which takes a data string"""
- ord_zero = ord('0')
- bit_mask = 7 # 3 bits set
-
- for binsha, mode, name in entries:
- mode_str = ''
- for i in xrange(6):
- mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str
- # END for each 8 octal value
-
- # git slices away the first octal if its zero
- if mode_str[0] == '0':
- mode_str = mode_str[1:]
- # END save a byte
-
- # here it comes: if the name is actually unicode, the replacement below
- # will not work as the binsha is not part of the ascii unicode encoding -
- # hence we must convert to an utf8 string for it to work properly.
- # According to my tests, this is exactly what git does, that is it just
- # takes the input literally, which appears to be utf8 on linux.
- if isinstance(name, unicode):
- name = name.encode("utf8")
- write("%s %s\0%s" % (mode_str, name, binsha))
- # END for each item
-
-
-def tree_entries_from_data(data):
- """Reads the binary representation of a tree and returns tuples of Tree items
- :param data: data block with tree data
- :return: list(tuple(binsha, mode, tree_relative_path), ...)"""
- ord_zero = ord('0')
- len_data = len(data)
- i = 0
- out = list()
- while i < len_data:
- mode = 0
-
- # read mode
- # Some git versions truncate the leading 0, some don't
- # The type will be extracted from the mode later
- while data[i] != ' ':
- # move existing mode integer up one level being 3 bits
- # and add the actual ordinal value of the character
- mode = (mode << 3) + (ord(data[i]) - ord_zero)
- i += 1
- # END while reading mode
-
- # byte is space now, skip it
- i += 1
-
- # parse name, it is NULL separated
-
- ns = i
- while data[i] != '\0':
- i += 1
- # END while not reached NULL
-
- # default encoding for strings in git is utf8
- # Only use the respective unicode object if the byte stream was encoded
- name = data[ns:i]
- name_enc = name.decode("utf-8")
- if len(name) > len(name_enc):
- name = name_enc
- # END handle encoding
-
- # byte is NULL, get next 20
- i += 1
- sha = data[i:i+20]
- i = i + 20
- out.append((sha, mode, name))
- # END for each byte in data stream
- return out
-
-
-def _find_by_name(tree_data, name, is_dir, start_at):
- """return data entry matching the given name and tree mode
- or None.
- Before the item is returned, the respective data item is set
- None in the tree_data list to mark it done"""
- try:
- item = tree_data[start_at]
- if item and item[2] == name and S_ISDIR(item[1]) == is_dir:
- tree_data[start_at] = None
- return item
- except IndexError:
- pass
- # END exception handling
- for index, item in enumerate(tree_data):
- if item and item[2] == name and S_ISDIR(item[1]) == is_dir:
- tree_data[index] = None
- return item
- # END if item matches
- # END for each item
- return None
-
-def _to_full_path(item, path_prefix):
- """Rebuild entry with given path prefix"""
- if not item:
- return item
- return (item[0], item[1], path_prefix+item[2])
-
-def traverse_trees_recursive(odb, tree_shas, path_prefix):
- """
- :return: list with entries according to the given binary tree-shas.
- The result is encoded in a list
- of n tuple|None per blob/commit, (n == len(tree_shas)), where
- * [0] == 20 byte sha
- * [1] == mode as int
- * [2] == path relative to working tree root
- The entry tuple is None if the respective blob/commit did not
- exist in the given tree.
- :param tree_shas: iterable of shas pointing to trees. All trees must
- be on the same level. A tree-sha may be None in which case None
- :param path_prefix: a prefix to be added to the returned paths on this level,
- set it '' for the first iteration
- :note: The ordering of the returned items will be partially lost"""
- trees_data = list()
- nt = len(tree_shas)
- for tree_sha in tree_shas:
- if tree_sha is None:
- data = list()
- else:
- data = tree_entries_from_data(odb.stream(tree_sha).read())
- # END handle muted trees
- trees_data.append(data)
- # END for each sha to get data for
-
- out = list()
- out_append = out.append
-
- # find all matching entries and recursively process them together if the match
- # is a tree. If the match is a non-tree item, put it into the result.
- # Processed items will be set None
- for ti, tree_data in enumerate(trees_data):
- for ii, item in enumerate(tree_data):
- if not item:
- continue
- # END skip already done items
- entries = [ None for n in range(nt) ]
- entries[ti] = item
- sha, mode, name = item # its faster to unpack
- is_dir = S_ISDIR(mode) # type mode bits
-
- # find this item in all other tree data items
- # wrap around, but stop one before our current index, hence
- # ti+nt, not ti+1+nt
- for tio in range(ti+1, ti+nt):
- tio = tio % nt
- entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii)
- # END for each other item data
-
- # if we are a directory, enter recursion
- if is_dir:
- out.extend(traverse_trees_recursive(odb, [((ei and ei[0]) or None) for ei in entries], path_prefix+name+'/'))
- else:
- out_append(tuple(_to_full_path(e, path_prefix) for e in entries))
- # END handle recursion
-
- # finally mark it done
- tree_data[ii] = None
- # END for each item
-
- # we are done with one tree, set all its data empty
- del(tree_data[:])
- # END for each tree_data chunk
- return out
-
-def traverse_tree_recursive(odb, tree_sha, path_prefix):
- """
- :return: list of entries of the tree pointed to by the binary tree_sha. An entry
- has the following format:
- * [0] 20 byte sha
- * [1] mode as int
- * [2] path relative to the repository
- :param path_prefix: prefix to prepend to the front of all returned paths"""
- entries = list()
- data = tree_entries_from_data(odb.stream(tree_sha).read())
-
- # unpacking/packing is faster than accessing individual items
- for sha, mode, name in data:
- if S_ISDIR(mode):
- entries.extend(traverse_tree_recursive(odb, sha, path_prefix+name+'/'))
- else:
- entries.append((sha, mode, path_prefix+name))
- # END for each item
-
- return entries
diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py
index 2160299b..7997e5e5 100644
--- a/git/objects/submodule/base.py
+++ b/git/objects/submodule/base.py
@@ -1,3 +1,5 @@
+from git.util import RepoAliasMixin
+from gitdb.object.submodule import Submodule as GitDB_Submodule
import util
from util import (
mkhead,
@@ -53,7 +55,7 @@ UPDWKTREE = UpdateProgress.UPDWKTREE
# IndexObject comes via util module, its a 'hacky' fix thanks to pythons import
# mechanism which cause plenty of trouble of the only reason for packages and
# modules is refactoring - subpackages shoudn't depend on parent packages
-class Submodule(util.IndexObject, Iterable, Traversable):
+class Submodule(GitDB_Submodule, Iterable, Traversable, RepoAliasMixin):
"""Implements access to a git submodule. They are special in that their sha
represents a commit in the submodule's repository which is to be checked out
at the path of this instance.
diff --git a/git/objects/tag.py b/git/objects/tag.py
index c7d02abe..a3a85eef 100644
--- a/git/objects/tag.py
+++ b/git/objects/tag.py
@@ -4,73 +4,10 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
""" Module containing all object based types. """
-import base
-from gitdb.util import hex_to_bin
-from util import (
- get_object_type_by_name,
- parse_actor_and_date
- )
-
+from git.util import RepoAliasMixin
+from gitdb.object.tag import GitDB_TagObject
__all__ = ("TagObject", )
-class TagObject(base.Object):
+class TagObject(GitDB_TagObject, RepoAliasMixin):
"""Non-Lightweight tag carrying additional information about an object we are pointing to."""
- type = "tag"
- __slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" )
-
- def __init__(self, repo, binsha, object=None, tag=None,
- tagger=None, tagged_date=None, tagger_tz_offset=None, message=None):
- """Initialize a tag object with additional data
-
- :param repo: repository this object is located in
- :param binsha: 20 byte SHA1
- :param object: Object instance of object we are pointing to
- :param tag: name of this tag
- :param tagger: Actor identifying the tagger
- :param tagged_date: int_seconds_since_epoch
- is the DateTime of the tag creation - use time.gmtime to convert
- it into a different format
- :param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the
- authored_date is in, in a format similar to time.altzone"""
- super(TagObject, self).__init__(repo, binsha )
- if object is not None:
- self.object = object
- if tag is not None:
- self.tag = tag
- if tagger is not None:
- self.tagger = tagger
- if tagged_date is not None:
- self.tagged_date = tagged_date
- if tagger_tz_offset is not None:
- self.tagger_tz_offset = tagger_tz_offset
- if message is not None:
- self.message = message
-
- def _set_cache_(self, attr):
- """Cache all our attributes at once"""
- if attr in TagObject.__slots__:
- ostream = self.repo.odb.stream(self.binsha)
- lines = ostream.read().splitlines()
-
- obj, hexsha = lines[0].split(" ") # object <hexsha>
- type_token, type_name = lines[1].split(" ") # type <type_name>
- self.object = get_object_type_by_name(type_name)(self.repo, hex_to_bin(hexsha))
-
- self.tag = lines[2][4:] # tag <tag name>
-
- tagger_info = lines[3][7:]# tagger <actor> <date>
- self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info)
-
- # line 4 empty - it could mark the beginning of the next header
- # in case there really is no message, it would not exist. Otherwise
- # a newline separates header from message
- if len(lines) > 5:
- self.message = "\n".join(lines[5:])
- else:
- self.message = ''
- # END check our attributes
- else:
- super(TagObject, self)._set_cache_(attr)
-
-
-
+ __slots__ = tuple()
diff --git a/git/objects/tree.py b/git/objects/tree.py
index 67431686..23e1dfe4 100644
--- a/git/objects/tree.py
+++ b/git/objects/tree.py
@@ -3,278 +3,25 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-import util
-from base import IndexObject
-from git.util import join_path
-from blob import Blob
-from submodule.base import Submodule
+from git.util import RepoAliasMixin
+from gitdb.object.tree import GitDB_Tree, TreeModifier
import git.diff as diff
-from fun import (
- tree_entries_from_data,
- tree_to_stream
- )
-
-from gitdb.util import (
- to_bin_sha,
- )
+from submodule.base import Submodule
__all__ = ("TreeModifier", "Tree")
-class TreeModifier(object):
- """A utility class providing methods to alter the underlying cache in a list-like fashion.
-
- Once all adjustments are complete, the _cache, which really is a refernce to
- the cache of a tree, will be sorted. Assuring it will be in a serializable state"""
- __slots__ = '_cache'
-
- def __init__(self, cache):
- self._cache = cache
-
- def _index_by_name(self, name):
- """:return: index of an item with name, or -1 if not found"""
- for i, t in enumerate(self._cache):
- if t[2] == name:
- return i
- # END found item
- # END for each item in cache
- return -1
-
- #{ Interface
- def set_done(self):
- """Call this method once you are done modifying the tree information.
- It may be called several times, but be aware that each call will cause
- a sort operation
- :return self:"""
- self._cache.sort(key=lambda t: t[2]) # sort by name
- return self
- #} END interface
-
- #{ Mutators
- def add(self, sha, mode, name, force=False):
- """Add the given item to the tree. If an item with the given name already
- exists, nothing will be done, but a ValueError will be raised if the
- sha and mode of the existing item do not match the one you add, unless
- force is True
-
- :param sha: The 20 or 40 byte sha of the item to add
- :param mode: int representing the stat compatible mode of the item
- :param force: If True, an item with your name and information will overwrite
- any existing item with the same name, no matter which information it has
- :return: self"""
- if '/' in name:
- raise ValueError("Name must not contain '/' characters")
- if (mode >> 12) not in Tree._map_id_to_type:
- raise ValueError("Invalid object type according to mode %o" % mode)
-
- sha = to_bin_sha(sha)
- index = self._index_by_name(name)
- item = (sha, mode, name)
- if index == -1:
- self._cache.append(item)
- else:
- if force:
- self._cache[index] = item
- else:
- ex_item = self._cache[index]
- if ex_item[0] != sha or ex_item[1] != mode:
- raise ValueError("Item %r existed with different properties" % name)
- # END handle mismatch
- # END handle force
- # END handle name exists
- return self
-
- def add_unchecked(self, binsha, mode, name):
- """Add the given item to the tree, its correctness is assumed, which
- puts the caller into responsibility to assure the input is correct.
- For more information on the parameters, see ``add``
- :param binsha: 20 byte binary sha"""
- self._cache.append((binsha, mode, name))
-
- def __delitem__(self, name):
- """Deletes an item with the given name if it exists"""
- index = self._index_by_name(name)
- if index > -1:
- del(self._cache[index])
-
- #} END mutators
-
-
-class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable):
- """Tree objects represent an ordered list of Blobs and other Trees.
-
- ``Tree as a list``::
-
- Access a specific blob using the
- tree['filename'] notation.
-
- You may as well access by index
- blob = tree[0]
- """
-
- type = "tree"
- __slots__ = "_cache"
-
- # actual integer ids for comparison
- commit_id = 016 # equals stat.S_IFDIR | stat.S_IFLNK - a directory link
- blob_id = 010
- symlink_id = 012
- tree_id = 004
+class Tree(GitDB_Tree, diff.Diffable):
+ """As opposed to the default GitDB tree implementation, this one can be diffed
+ and returns our own types"""
+ __slots__ = tuple()
_map_id_to_type = {
- commit_id : Submodule,
- blob_id : Blob,
- symlink_id : Blob
+ GitDB_Tree.commit_id : Submodule,
+ GitDB_Tree.blob_id : Blob,
+ GitDB_Tree.symlink_id : Blob
# tree id added once Tree is defined
}
-
- def __init__(self, repo, binsha, mode=tree_id<<12, path=None):
- super(Tree, self).__init__(repo, binsha, mode, path)
-
- @classmethod
- def _get_intermediate_items(cls, index_object):
- if index_object.type == "tree":
- return tuple(index_object._iter_convert_to_object(index_object._cache))
- return tuple()
-
- def _set_cache_(self, attr):
- if attr == "_cache":
- # Set the data when we need it
- ostream = self.repo.odb.stream(self.binsha)
- self._cache = tree_entries_from_data(ostream.read())
- else:
- super(Tree, self)._set_cache_(attr)
- # END handle attribute
-
- def _iter_convert_to_object(self, iterable):
- """Iterable yields tuples of (binsha, mode, name), which will be converted
- to the respective object representation"""
- for binsha, mode, name in iterable:
- path = join_path(self.path, name)
- try:
- yield self._map_id_to_type[mode >> 12](self.repo, binsha, mode, path)
- except KeyError:
- raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path))
- # END for each item
-
- def __div__(self, file):
- """Find the named object in this tree's contents
- :return: ``git.Blob`` or ``git.Tree`` or ``git.Submodule``
-
- :raise KeyError: if given file or tree does not exist in tree"""
- msg = "Blob or Tree named %r not found"
- if '/' in file:
- tree = self
- item = self
- tokens = file.split('/')
- for i,token in enumerate(tokens):
- item = tree[token]
- if item.type == 'tree':
- tree = item
- else:
- # safety assertion - blobs are at the end of the path
- if i != len(tokens)-1:
- raise KeyError(msg % file)
- return item
- # END handle item type
- # END for each token of split path
- if item == self:
- raise KeyError(msg % file)
- return item
- else:
- for info in self._cache:
- if info[2] == file: # [2] == name
- return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2]))
- # END for each obj
- raise KeyError( msg % file )
- # END handle long paths
-
-
- @property
- def trees(self):
- """:return: list(Tree, ...) list of trees directly below this tree"""
- return [ i for i in self if i.type == "tree" ]
-
- @property
- def blobs(self):
- """:return: list(Blob, ...) list of blobs directly below this tree"""
- return [ i for i in self if i.type == "blob" ]
-
- @property
- def cache(self):
- """
- :return: An object allowing to modify the internal cache. This can be used
- to change the tree's contents. When done, make sure you call ``set_done``
- on the tree modifier, or serialization behaviour will be incorrect.
- See the ``TreeModifier`` for more information on how to alter the cache"""
- return TreeModifier(self._cache)
-
- def traverse( self, predicate = lambda i,d: True,
- prune = lambda i,d: False, depth = -1, branch_first=True,
- visit_once = False, ignore_self=1 ):
- """For documentation, see util.Traversable.traverse
- Trees are set to visit_once = False to gain more performance in the traversal"""
- return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self)
-
- # List protocol
- def __getslice__(self, i, j):
- return list(self._iter_convert_to_object(self._cache[i:j]))
-
- def __iter__(self):
- return self._iter_convert_to_object(self._cache)
-
- def __len__(self):
- return len(self._cache)
-
- def __getitem__(self, item):
- if isinstance(item, int):
- info = self._cache[item]
- return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2]))
-
- if isinstance(item, basestring):
- # compatability
- return self.__div__(item)
- # END index is basestring
-
- raise TypeError( "Invalid index type: %r" % item )
-
-
- def __contains__(self, item):
- if isinstance(item, IndexObject):
- for info in self._cache:
- if item.binsha == info[0]:
- return True
- # END compare sha
- # END for each entry
- # END handle item is index object
- # compatability
-
- # treat item as repo-relative path
- path = self.path
- for info in self._cache:
- if item == join_path(path, info[2]):
- return True
- # END for each item
- return False
-
- def __reversed__(self):
- return reversed(self._iter_convert_to_object(self._cache))
-
- def _serialize(self, stream):
- """Serialize this tree into the stream. Please note that we will assume
- our tree data to be in a sorted state. If this is not the case, serialization
- will not generate a correct tree representation as these are assumed to be sorted
- by algorithms"""
- tree_to_stream(self._cache, stream.write)
- return self
-
- def _deserialize(self, stream):
- self._cache = tree_entries_from_data(stream.read())
- return self
-
-
-# END tree
-
# finalize map definition
Tree._map_id_to_type[Tree.tree_id] = Tree
diff --git a/git/test/test_blob.py b/git/test/objects/test_blob.py
index 661c0501..661c0501 100644
--- a/git/test/test_blob.py
+++ b/git/test/objects/test_blob.py
diff --git a/git/test/test_commit.py b/git/test/objects/test_commit.py
index 4a8d8b87..4a8d8b87 100644
--- a/git/test/test_commit.py
+++ b/git/test/objects/test_commit.py
diff --git a/git/test/test_submodule.py b/git/test/objects/test_submodule.py
index adb4fb82..adb4fb82 100644
--- a/git/test/test_submodule.py
+++ b/git/test/objects/test_submodule.py
diff --git a/git/test/test_tree.py b/git/test/objects/test_tree.py
index ec10e962..ec10e962 100644
--- a/git/test/test_tree.py
+++ b/git/test/objects/test_tree.py
diff --git a/git/test/test_actor.py b/git/test/test_actor.py
deleted file mode 100644
index b8e5ba3b..00000000
--- a/git/test/test_actor.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# test_actor.py
-# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
-#
-# This module is part of GitPython and is released under
-# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-
-import os
-from git.test.lib import *
-from git import *
-
-class TestActor(object):
- def test_from_string_should_separate_name_and_email(self):
- a = Actor._from_string("Michael Trier <mtrier@example.com>")
- assert_equal("Michael Trier", a.name)
- assert_equal("mtrier@example.com", a.email)
-
- # base type capabilities
- assert a == a
- assert not ( a != a )
- m = set()
- m.add(a)
- m.add(a)
- assert len(m) == 1
-
- def test_from_string_should_handle_just_name(self):
- a = Actor._from_string("Michael Trier")
- assert_equal("Michael Trier", a.name)
- assert_equal(None, a.email)
-
- def test_should_display_representation(self):
- a = Actor._from_string("Michael Trier <mtrier@example.com>")
- assert_equal('<git.Actor "Michael Trier <mtrier@example.com>">', repr(a))
-
- def test_str_should_alias_name(self):
- a = Actor._from_string("Michael Trier <mtrier@example.com>")
- assert_equal(a.name, str(a)) \ No newline at end of file
diff --git a/git/util.py b/git/util.py
index ec1ece1e..3d9fd7d5 100644
--- a/git/util.py
+++ b/git/util.py
@@ -26,7 +26,9 @@ from gitdb.util import (
LockFile,
BlockingLockFile,
Actor,
- Iterable
+ Iterable,
+ stream_copy,
+ IterableList
)
__all__ = ( "stream_copy", "join_path", "to_native_path_windows", "to_native_path_linux",
@@ -36,21 +38,6 @@ __all__ = ( "stream_copy", "join_path", "to_native_path_windows", "to_native_pat
#{ Utility Methods
-def stream_copy(source, destination, chunk_size=512*1024):
- """Copy all data from the source stream into the destination stream in chunks
- of size chunk_size
-
- :return: amount of bytes written"""
- br = 0
- while True:
- chunk = source.read(chunk_size)
- destination.write(chunk)
- br += len(chunk)
- if len(chunk) < chunk_size:
- break
- # END reading output stream
- return br
-
def get_user_id():
""":return: string identifying the currently active system user as name@node
@@ -286,51 +273,4 @@ class IndexFileSHA1Writer(object):
return self.f.tell()
-
-
-class IterableList(list):
- """
- List of iterable objects allowing to query an object by id or by named index::
-
- heads = repo.heads
- heads.master
- heads['master']
- heads[0]
-
- It requires an id_attribute name to be set which will be queried from its
- contained items to have a means for comparison.
-
- A prefix can be specified which is to be used in case the id returned by the
- items always contains a prefix that does not matter to the user, so it
- can be left out."""
- __slots__ = ('_id_attr', '_prefix')
-
- def __new__(cls, id_attr, prefix=''):
- return super(IterableList,cls).__new__(cls)
-
- def __init__(self, id_attr, prefix=''):
- self._id_attr = id_attr
- self._prefix = prefix
- if not isinstance(id_attr, basestring):
- raise ValueError("First parameter must be a string identifying the name-property. Extend the list after initialization")
- # END help debugging !
-
- def __getattr__(self, attr):
- attr = self._prefix + attr
- for item in self:
- if getattr(item, self._id_attr) == attr:
- return item
- # END for each item
- return list.__getattribute__(self, attr)
-
- def __getitem__(self, index):
- if isinstance(index, int):
- return list.__getitem__(self,index)
-
- try:
- return getattr(self, index)
- except AttributeError:
- raise IndexError( "No item found with id %r" % (self._prefix + index) )
-
-
#} END classes