Updated objects to use the ones defined in gitdb as basis. Only the submodule implementation is left in git-python as it requires some advanced features. No tests where run yet

author: Sebastian Thiel <byronimo@gmail.com> 2011-04-07 20:17:00 +0200
committer: Sebastian Thiel <byronimo@gmail.com> 2011-04-07 20:17:00 +0200
commit: a12a7618a1f6f61a4c97ddf4cc422158c3fa72ba (patch)
tree: 2b2e0e9991ddede152556f7954cd6e4c6121be97 /git
parent: e77d2d0ebb9487b696835f219e4a23a558462a55 (diff)
download: gitpython-a12a7618a1f6f61a4c97ddf4cc422158c3fa72ba.tar.gz
14 files changed, 37 insertions, 1068 deletions
diff --git a/git/ext/gitdb b/git/ext/gitdb
-Subproject 7c4d3d6b000930134019515c83c10b140330d31
+Subproject dba71a0c727aba19319d3e868d0ca4b8009bcef
diff --git a/git/objects/base.py b/git/objects/base.py
index 5f2f7809..42d7b600 100644
--- a/git/objects/base.py
+++ b/git/objects/base.py
@@ -3,170 +3,6 @@
 #
 # This module is part of GitPython and is released under
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.util import LazyMixin, join_path_native, stream_copy
-from util import get_object_type_by_name
-from gitdb.util import (
-							hex_to_bin,
-							bin_to_hex,
-							basename
-						)
-
-import gitdb.typ as dbtyp
-	
-_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r"
-
+from gitdb.object.base import Object, IndexObject 
 __all__ = ("Object", "IndexObject")
 
-class Object(LazyMixin):
-	"""Implements an Object which may be Blobs, Trees, Commits and Tags"""
-	NULL_HEX_SHA = '0'*40
-	NULL_BIN_SHA = '\0'*20
-	
-	TYPES = (dbtyp.str_blob_type, dbtyp.str_tree_type, dbtyp.str_commit_type, dbtyp.str_tag_type)
-	__slots__ = ("repo", "binsha", "size" )
-	type = None			# to be set by subclass
-	
-	def __init__(self, repo, binsha):
-		"""Initialize an object by identifying it by its binary sha. 
-		All keyword arguments will be set on demand if None.
-		
-		:param repo: repository this object is located in
-			
-		:param binsha: 20 byte SHA1"""
-		super(Object,self).__init__()
-		self.repo = repo
-		self.binsha = binsha
-		assert len(binsha) == 20, "Require 20 byte binary sha, got %r, len = %i" % (binsha, len(binsha))
-
-	@classmethod
-	def new(cls, repo, id):
-		"""
-		:return: New Object instance of a type appropriate to the object type behind 
-			id. The id of the newly created object will be a binsha even though 
-			the input id may have been a Reference or Rev-Spec
-			
-		:param id: reference, rev-spec, or hexsha
-			
-		:note: This cannot be a __new__ method as it would always call __init__
-			with the input id which is not necessarily a binsha."""
-		return repo.rev_parse(str(id))
-		
-	@classmethod
-	def new_from_sha(cls, repo, sha1):
-		"""
-		:return: new object instance of a type appropriate to represent the given 
-			binary sha1
-		:param sha1: 20 byte binary sha1"""
-		if sha1 == cls.NULL_BIN_SHA:
-			# the NULL binsha is always the root commit
-			return get_object_type_by_name('commit')(repo, sha1)
-		#END handle special case
-		oinfo = repo.odb.info(sha1)
-		inst = get_object_type_by_name(oinfo.type)(repo, oinfo.binsha)
-		inst.size = oinfo.size
-		return inst 
-	
-	def _set_cache_(self, attr):
-		"""Retrieve object information"""
-		if attr	 == "size":
-			oinfo = self.repo.odb.info(self.binsha)
-			self.size = oinfo.size
-			# assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type)
-		else:
-			super(Object,self)._set_cache_(attr)
-		
-	def __eq__(self, other):
-		""":return: True if the objects have the same SHA1"""
-		return self.binsha == other.binsha
-		
-	def __ne__(self, other):
-		""":return: True if the objects do not have the same SHA1 """
-		return self.binsha != other.binsha
-		
-	def __hash__(self):
-		""":return: Hash of our id allowing objects to be used in dicts and sets"""
-		return hash(self.binsha)
-		
-	def __str__(self):
-		""":return: string of our SHA1 as understood by all git commands"""
-		return bin_to_hex(self.binsha)
-		
-	def __repr__(self):
-		""":return: string with pythonic representation of our object"""
-		return '<git.%s "%s">' % (self.__class__.__name__, self.hexsha)
-
-	@property
-	def hexsha(self):
-		""":return: 40 byte hex version of our 20 byte binary sha"""
-		return bin_to_hex(self.binsha)
-
-	@property
-	def data_stream(self):
-		""" :return:  File Object compatible stream to the uncompressed raw data of the object
-		:note: returned streams must be read in order"""
-		return self.repo.odb.stream(self.binsha)
-
-	def stream_data(self, ostream):
-		"""Writes our data directly to the given output stream
-		:param ostream: File object compatible stream object.
-		:return: self"""
-		istream = self.repo.odb.stream(self.binsha)
-		stream_copy(istream, ostream)
-		return self
-		
-
-class IndexObject(Object):
-	"""Base for all objects that can be part of the index file , namely Tree, Blob and
-	SubModule objects"""
-	__slots__ = ("path", "mode")
-	
-	# for compatability with iterable lists
-	_id_attribute_ = 'path'
-	
-	def __init__(self, repo, binsha, mode=None, path=None):
-		"""Initialize a newly instanced IndexObject
-		:param repo: is the Repo we are located in
-		:param binsha: 20 byte sha1
-		:param mode: is the stat compatible file mode as int, use the stat module
-			to evaluate the infomration
-		:param path:
-			is the path to the file in the file system, relative to the git repository root, i.e.
-			file.ext or folder/other.ext
-		:note:
-			Path may not be set of the index object has been created directly as it cannot
-			be retrieved without knowing the parent tree."""
-		super(IndexObject, self).__init__(repo, binsha)
-		if mode is not None:
-			self.mode = mode
-		if path is not None:
-			self.path = path
-	
-	def __hash__(self):
-		""":return:
-			Hash of our path as index items are uniquely identifyable by path, not 
-			by their data !"""
-		return hash(self.path)
-	
-	def _set_cache_(self, attr):
-		if attr in IndexObject.__slots__:
-			# they cannot be retrieved lateron ( not without searching for them )
-			raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
-		else:
-			super(IndexObject, self)._set_cache_(attr)
-		# END hanlde slot attribute
-	
-	@property
-	def name(self):
-		""":return: Name portion of the path, effectively being the basename"""
-		return basename(self.path)
-		
-	@property
-	def abspath(self):
-		"""
-		:return:
-			Absolute path to this index object in the file system ( as opposed to the 
-			.path field which is a path relative to the git repository ).
-			
-			The returned path will be native to the system and contains '\' on windows. """
-		return join_path_native(self.repo.working_tree_dir, self.path)
-		
diff --git a/git/objects/blob.py b/git/objects/blob.py
index f52d1a53..38834436 100644
--- a/git/objects/blob.py
+++ b/git/objects/blob.py
@@ -4,29 +4,10 @@
 # This module is part of GitPython and is released under
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
 
-from mimetypes import guess_type
-import base
+from git.util import RepoAliasMixin
+from gitdb.object.blob import Blob as GitDB_Blob
 
 __all__ = ('Blob', )
 
-class Blob(base.IndexObject):
-	"""A Blob encapsulates a git blob object"""
-	DEFAULT_MIME_TYPE = "text/plain"
-	type = "blob"
-	
-	# valid blob modes
-	executable_mode = 0100755
-	file_mode = 0100644
-	link_mode = 0120000
-
+class Blob(GitDB_Blob, RepoAliasMixin):
 	__slots__ = tuple()
-
-	@property
-	def mime_type(self):
-		"""
-		:return: String describing the mime type of this file (based on the filename)
-		:note: Defaults to 'text/plain' in case the actual file type is unknown. """
-		guesses = None
-		if self.path:
-			guesses = guess_type(self.path)
-		return guesses and guesses[0] or self.DEFAULT_MIME_TYPE
diff --git a/git/objects/commit.py b/git/objects/commit.py
index fd4187b0..d932ab1a 100644
--- a/git/objects/commit.py
+++ b/git/objects/commit.py
@@ -3,142 +3,28 @@
 #
 # This module is part of GitPython and is released under
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
-
-from git.util import 		(
-							Actor,
-							Iterable,
-							Stats,
-						)
+from git.util import RepoAliasMixin
+from gitdb.object.commit import Commit as GitDB_Commit
 from git.diff import Diffable
-from tree import Tree
+from gitdb.util import (
+						Iterable,
+						Actor
+						)
+
 from gitdb import IStream
 from cStringIO import StringIO
 
-import base
-from gitdb.util import (
-						hex_to_bin
-						)
-from util import (
-						Traversable,
-						Serializable,
-						parse_date,
-						altz_to_utctz_str,
-						parse_actor_and_date
-					)
-from time import (
-					time, 
-					altzone
-				)
+from util import parse_date
+from time import altzone
+
 import os
-import sys
 
 __all__ = ('Commit', )
 
-class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
-	"""Wraps a git Commit object.
-	
-	This class will act lazily on some of its attributes and will query the 
-	value on demand only if it involves calling the git binary."""
-	
-	# ENVIRONMENT VARIABLES
-	# read when creating new commits
-	env_author_date = "GIT_AUTHOR_DATE"
-	env_committer_date = "GIT_COMMITTER_DATE"
-	
-	# CONFIGURATION KEYS
-	conf_encoding = 'i18n.commitencoding'
-	
-	# INVARIANTS
-	default_encoding = "UTF-8"
+class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin):
+	"""Provides additional git-command based functionality to the default gitdb commit object"""
+	__slots__ = tuple()
 	
-	
-	# object configuration 
-	type = "commit"
-	__slots__ = ("tree",
-				 "author", "authored_date", "author_tz_offset",
-				 "committer", "committed_date", "committer_tz_offset",
-				 "message", "parents", "encoding")
-	_id_attribute_ = "binsha"
-	
-	def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None,
-				 committer=None, committed_date=None, committer_tz_offset=None, 
-				 message=None,  parents=None, encoding=None):
-		"""Instantiate a new Commit. All keyword arguments taking None as default will 
-		be implicitly set on first query. 
-		
-		:param binsha: 20 byte sha1
-		:param parents: tuple( Commit, ... ) 
-			is a tuple of commit ids or actual Commits
-		:param tree: Tree
-			Tree object
-		:param author: Actor
-			is the author string ( will be implicitly converted into an Actor object )
-		:param authored_date: int_seconds_since_epoch
-			is the authored DateTime - use time.gmtime() to convert it into a 
-			different format
-		:param author_tz_offset: int_seconds_west_of_utc
-			is the timezone that the authored_date is in
-		:param committer: Actor
-			is the committer string
-		:param committed_date: int_seconds_since_epoch
-			is the committed DateTime - use time.gmtime() to convert it into a 
-			different format
-		:param committer_tz_offset: int_seconds_west_of_utc
-			is the timezone that the authored_date is in
-		:param message: string
-			is the commit message
-		:param encoding: string
-			encoding of the message, defaults to UTF-8
-		:param parents:
-			List or tuple of Commit objects which are our parent(s) in the commit 
-			dependency graph
-		:return: git.Commit
-		
-		:note: Timezone information is in the same format and in the same sign 
-			as what time.altzone returns. The sign is inverted compared to git's 
-			UTC timezone."""
-		super(Commit,self).__init__(repo, binsha)
-		if tree is not None:
-			assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree)
-		if tree is not None:
-			self.tree = tree
-		if author is not None:
-			self.author = author
-		if authored_date is not None:
-			self.authored_date = authored_date
-		if author_tz_offset is not None:
-			self.author_tz_offset = author_tz_offset
-		if committer is not None:
-			self.committer = committer
-		if committed_date is not None:
-			self.committed_date = committed_date
-		if committer_tz_offset is not None:
-			self.committer_tz_offset = committer_tz_offset
-		if message is not None:
-			self.message = message
-		if parents is not None:
-			self.parents = parents
-		if encoding is not None:
-			self.encoding = encoding
-		
-	@classmethod
-	def _get_intermediate_items(cls, commit):
-		return commit.parents
-
-	def _set_cache_(self, attr):
-		if attr in Commit.__slots__:
-			# read the data in a chunk, its faster - then provide a file wrapper
-			binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha)
-			self._deserialize(StringIO(stream.read()))
-		else:
-			super(Commit, self)._set_cache_(attr)
-		# END handle attrs
-
-	@property
-	def summary(self):
-		""":return: First line of the commit message"""
-		return self.message.split('\n', 1)[0]
-		
 	def count(self, paths='', **kwargs):
 		"""Count the number of commits reachable from this commit
 
@@ -225,33 +111,6 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
 			text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True)
 		return Stats._list_from_string(self.repo, text)
 
-	@classmethod
-	def _iter_from_process_or_stream(cls, repo, proc_or_stream):
-		"""Parse out commit information into a list of Commit objects
-		We expect one-line per commit, and parse the actual commit information directly
-		from our lighting fast object database
-
-		:param proc: git-rev-list process instance - one sha per line
-		:return: iterator returning Commit objects"""
-		stream = proc_or_stream
-		if not hasattr(stream,'readline'):
-			stream = proc_or_stream.stdout
-			
-		readline = stream.readline
-		while True:
-			line = readline()
-			if not line:
-				break
-			hexsha = line.strip()
-			if len(hexsha) > 40:
-				# split additional information, as returned by bisect for instance
-				hexsha, rest = line.split(None, 1)
-			# END handle extra info
-			
-			assert len(hexsha) == 40, "Invalid line: %s" % hexsha
-			yield Commit(repo, hex_to_bin(hexsha))
-		# END for each line in stream
-		
 		
 	@classmethod
 	def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False):
@@ -361,105 +220,5 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
 		# END advance head handling 
 		
 		return new_commit
-	
-	#{ Serializable Implementation
-	
-	def _serialize(self, stream):
-		write = stream.write
-		write("tree %s\n" % self.tree)
-		for p in self.parents:
-			write("parent %s\n" % p)
-			
-		a = self.author
-		aname = a.name
-		if isinstance(aname, unicode):
-			aname = aname.encode(self.encoding)
-		# END handle unicode in name
-		
-		c = self.committer
-		fmt = "%s %s <%s> %s %s\n"
-		write(fmt % ("author", aname, a.email, 
-						self.authored_date, 
-						altz_to_utctz_str(self.author_tz_offset)))
-			
-		# encode committer
-		aname = c.name
-		if isinstance(aname, unicode):
-			aname = aname.encode(self.encoding)
-		# END handle unicode in name
-		write(fmt % ("committer", aname, c.email, 
-						self.committed_date,
-						altz_to_utctz_str(self.committer_tz_offset)))
-		
-		if self.encoding != self.default_encoding:
-			write("encoding %s\n" % self.encoding)
-		
-		write("\n")
-		
-		# write plain bytes, be sure its encoded according to our encoding
-		if isinstance(self.message, unicode):
-			write(self.message.encode(self.encoding))
-		else:
-			write(self.message)
-		# END handle encoding
-		return self
-	
-	def _deserialize(self, stream):
-		""":param from_rev_list: if true, the stream format is coming from the rev-list command
-		Otherwise it is assumed to be a plain data stream from our object"""
-		readline = stream.readline
-		self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '')
-
-		self.parents = list()
-		next_line = None
-		while True:
-			parent_line = readline()
-			if not parent_line.startswith('parent'):
-				next_line = parent_line
-				break
-			# END abort reading parents
-			self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1])))
-		# END for each parent line
-		self.parents = tuple(self.parents)
-		
-		self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line)
-		self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline())
-		
-		
-		# now we can have the encoding line, or an empty line followed by the optional
-		# message.
-		self.encoding = self.default_encoding
-		# read encoding or empty line to separate message
-		enc = readline()
-		enc = enc.strip()
-		if enc:
-			self.encoding = enc[enc.find(' ')+1:]
-			# now comes the message separator 
-			readline()
-		# END handle encoding
-		
-		# decode the authors name
-		try:
-			self.author.name = self.author.name.decode(self.encoding) 
-		except UnicodeDecodeError:
-			print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding)
-		# END handle author's encoding
-		
-		# decode committer name
-		try:
-			self.committer.name = self.committer.name.decode(self.encoding) 
-		except UnicodeDecodeError:
-			print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding)
-		# END handle author's encoding
-		
-		# a stream from our data simply gives us the plain message
-		# The end of our message stream is marked with a newline that we strip
-		self.message = stream.read()
-		try:
-			self.message = self.message.decode(self.encoding)
-		except UnicodeDecodeError:
-			print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding)
-		# END exception handling 
-		return self
 		
 	#} END serializable implementation
diff --git a/git/objects/fun.py b/git/objects/fun.py
index 9b0a377c..22016b27 100644
--- a/git/objects/fun.py
+++ b/git/objects/fun.py
@@ -1,199 +1,2 @@
 """Module with functions which are supposed to be as fast as possible"""
-from stat import S_ISDIR
 
-__all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive',
-			'traverse_tree_recursive')
-
-
-				
-
-def tree_to_stream(entries, write):
-	"""Write the give list of entries into a stream using its write method
-	:param entries: **sorted** list of tuples with (binsha, mode, name)
-	:param write: write method which takes a data string"""
-	ord_zero = ord('0')
-	bit_mask = 7			# 3 bits set
-	
-	for binsha, mode, name in entries:
-		mode_str = ''
-		for i in xrange(6):
-			mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str
-		# END for each 8 octal value
-		
-		# git slices away the first octal if its zero
-		if mode_str[0] == '0':
-			mode_str = mode_str[1:]
-		# END save a byte
-
-		# here it comes:  if the name is actually unicode, the replacement below
-		# will not work as the binsha is not part of the ascii unicode encoding - 
-		# hence we must convert to an utf8 string for it to work properly.
-		# According to my tests, this is exactly what git does, that is it just
-		# takes the input literally, which appears to be utf8 on linux.
-		if isinstance(name, unicode):
-			name = name.encode("utf8")
-		write("%s %s\0%s" % (mode_str, name, binsha)) 
-	# END for each item
-
-
-def tree_entries_from_data(data):
-	"""Reads the binary representation of a tree and returns tuples of Tree items
-	:param data: data block with tree data
-	:return: list(tuple(binsha, mode, tree_relative_path), ...)"""
-	ord_zero = ord('0')
-	len_data = len(data)
-	i = 0
-	out = list()
-	while i < len_data:
-		mode = 0
-		
-		# read mode
-		# Some git versions truncate the leading 0, some don't
-		# The type will be extracted from the mode later
-		while data[i] != ' ':
-			# move existing mode integer up one level being 3 bits
-			# and add the actual ordinal value of the character
-			mode = (mode << 3) + (ord(data[i]) - ord_zero)
-			i += 1
-		# END while reading mode
-		
-		# byte is space now, skip it
-		i += 1
-		
-		# parse name, it is NULL separated
-		
-		ns = i
-		while data[i] != '\0':
-			i += 1
-		# END while not reached NULL
-		
-		# default encoding for strings in git is utf8
-		# Only use the respective unicode object if the byte stream was encoded
-		name = data[ns:i]
-		name_enc = name.decode("utf-8")
-		if len(name) > len(name_enc):
-			name = name_enc
-		# END handle encoding
-		
-		# byte is NULL, get next 20
-		i += 1
-		sha = data[i:i+20]
-		i = i + 20
-		out.append((sha, mode, name))
-	# END for each byte in data stream
-	return out
-	
-	
-def _find_by_name(tree_data, name, is_dir, start_at):
-	"""return data entry matching the given name and tree mode
-	or None.
-	Before the item is returned, the respective data item is set 
-	None in the tree_data list to mark it done"""
-	try:
-		item = tree_data[start_at]
-		if item and  item[2] == name and S_ISDIR(item[1]) == is_dir:
-			tree_data[start_at] = None
-			return item
-	except IndexError:
-		pass
-	# END exception handling
-	for index, item in enumerate(tree_data):
-		if item and item[2] == name and S_ISDIR(item[1]) == is_dir:
-			tree_data[index] = None
-			return item
-		# END if item matches
-	# END for each item
-	return None
-
-def _to_full_path(item, path_prefix):
-	"""Rebuild entry with given path prefix"""
-	if not item:
-		return item
-	return (item[0], item[1], path_prefix+item[2])
-	
-def traverse_trees_recursive(odb, tree_shas, path_prefix):
-	"""
-	:return: list with entries according to the given binary tree-shas. 
-		The result is encoded in a list
-		of n tuple|None per blob/commit, (n == len(tree_shas)), where 
-		* [0] == 20 byte sha
-		* [1] == mode as int
-		* [2] == path relative to working tree root
-		The entry tuple is None if the respective blob/commit did not 
-		exist in the given tree.
-	:param tree_shas: iterable of shas pointing to trees. All trees must 
-		be on the same level. A tree-sha may be None in which case None
-	:param path_prefix: a prefix to be added to the returned paths on this level, 
-		set it '' for the first iteration
-	:note: The ordering of the returned items will be partially lost"""
-	trees_data = list()
-	nt = len(tree_shas)
-	for tree_sha in tree_shas:
-		if tree_sha is None:
-			data = list()
-		else:
-			data = tree_entries_from_data(odb.stream(tree_sha).read())
-		# END handle muted trees
-		trees_data.append(data)
-	# END for each sha to get data for
-	
-	out = list()
-	out_append = out.append
-	
-	# find all matching entries and recursively process them together if the match
-	# is a tree. If the match is a non-tree item, put it into the result.
-	# Processed items will be set None
-	for ti, tree_data in enumerate(trees_data):
-		for ii, item in enumerate(tree_data):
-			if not item:
-				continue
-			# END skip already done items
-			entries = [ None for n in range(nt) ]
-			entries[ti] = item
-			sha, mode, name = item							# its faster to unpack
-			is_dir = S_ISDIR(mode)							# type mode bits
-			
-			# find this item in all other tree data items
-			# wrap around, but stop one before our current index, hence 
-			# ti+nt, not ti+1+nt
-			for tio in range(ti+1, ti+nt):
-				tio = tio % nt
-				entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii)
-			# END for each other item data
-			
-			# if we are a directory, enter recursion
-			if is_dir:
-				out.extend(traverse_trees_recursive(odb, [((ei and ei[0]) or None) for ei in entries], path_prefix+name+'/'))
-			else:
-				out_append(tuple(_to_full_path(e, path_prefix) for e in entries))
-			# END handle recursion
-			
-			# finally mark it done
-			tree_data[ii] = None
-		# END for each item
-		
-		# we are done with one tree, set all its data empty
-		del(tree_data[:])
-	# END for each tree_data chunk
-	return out
-	
-def traverse_tree_recursive(odb, tree_sha, path_prefix):
-	"""
-	:return: list of entries of the tree pointed to by the binary tree_sha. An entry
-		has the following format:
-		* [0] 20 byte sha
-		* [1] mode as int
-		* [2] path relative to the repository
-	:param path_prefix: prefix to prepend to the front of all returned paths"""
-	entries = list()
-	data = tree_entries_from_data(odb.stream(tree_sha).read())
-	
-	# unpacking/packing is faster than accessing individual items
-	for sha, mode, name in data:
-		if S_ISDIR(mode):
-			entries.extend(traverse_tree_recursive(odb, sha, path_prefix+name+'/'))
-		else:
-			entries.append((sha, mode, path_prefix+name))
-	# END for each item
-	
-	return entries
diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py
index 2160299b..7997e5e5 100644
--- a/git/objects/submodule/base.py
+++ b/git/objects/submodule/base.py
@@ -1,3 +1,5 @@
+from git.util import RepoAliasMixin
+from gitdb.object.submodule import Submodule as GitDB_Submodule
 import util
 from util import (
 					mkhead,
@@ -53,7 +55,7 @@ UPDWKTREE = UpdateProgress.UPDWKTREE
 # IndexObject comes via util module, its a 'hacky' fix thanks to pythons import 
 # mechanism which cause plenty of trouble of the only reason for packages and
 # modules is refactoring - subpackages shoudn't depend on parent packages
-class Submodule(util.IndexObject, Iterable, Traversable):
+class Submodule(GitDB_Submodule, Iterable, Traversable, RepoAliasMixin):
 	"""Implements access to a git submodule. They are special in that their sha
 	represents a commit in the submodule's repository which is to be checked out
 	at the path of this instance. 
diff --git a/git/objects/tag.py b/git/objects/tag.py
index c7d02abe..a3a85eef 100644
--- a/git/objects/tag.py
+++ b/git/objects/tag.py
@@ -4,73 +4,10 @@
 # This module is part of GitPython and is released under
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
 """ Module containing all object based types. """
-import base
-from gitdb.util import hex_to_bin
-from util import (
-						get_object_type_by_name,
-						parse_actor_and_date
-					)
-
+from git.util import RepoAliasMixin
+from gitdb.object.tag import GitDB_TagObject
 __all__ = ("TagObject", )
 
-class TagObject(base.Object):
+class TagObject(GitDB_TagObject, RepoAliasMixin):
 	"""Non-Lightweight tag carrying additional information about an object we are pointing to."""
-	type = "tag"
-	__slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" )
-		
-	def __init__(self, repo, binsha, object=None, tag=None, 
-				tagger=None, tagged_date=None, tagger_tz_offset=None, message=None):
-		"""Initialize a tag object with additional data
-		
-		:param repo: repository this object is located in
-		:param binsha: 20 byte SHA1
-		:param object: Object instance of object we are pointing to
-		:param tag: name of this tag
-		:param tagger: Actor identifying the tagger
-		:param tagged_date: int_seconds_since_epoch
-			is the DateTime of the tag creation - use time.gmtime to convert 
-			it into a different format
-		:param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the 
-			authored_date is in, in a format similar to time.altzone"""
-		super(TagObject, self).__init__(repo, binsha )
-		if object is not None:
-			self.object = object
-		if tag is not None:
-			self.tag = tag
-		if tagger is not None:
-			self.tagger = tagger
-		if tagged_date is not None:
-			self.tagged_date = tagged_date
-		if tagger_tz_offset is not None:
-			self.tagger_tz_offset = tagger_tz_offset
-		if message is not None:
-			self.message = message
-		
-	def _set_cache_(self, attr):
-		"""Cache all our attributes at once"""
-		if attr in TagObject.__slots__:
-			ostream = self.repo.odb.stream(self.binsha)
-			lines = ostream.read().splitlines()
-			
-			obj, hexsha = lines[0].split(" ")		# object <hexsha>
-			type_token, type_name = lines[1].split(" ") # type <type_name>
-			self.object = get_object_type_by_name(type_name)(self.repo, hex_to_bin(hexsha))
-			
-			self.tag = lines[2][4:]	 # tag <tag name>
-			
-			tagger_info = lines[3][7:]# tagger <actor> <date>
-			self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info)
-			
-			# line 4 empty - it could mark the beginning of the next header
-			# in case there really is no message, it would not exist. Otherwise 
-			# a newline separates header from message
-			if len(lines) > 5:
-				self.message = "\n".join(lines[5:])
-			else:
-				self.message = ''
-		# END check our attributes
-		else:
-			super(TagObject, self)._set_cache_(attr)
-		
-		
-
+	__slots__ = tuple()
diff --git a/git/objects/tree.py b/git/objects/tree.py
index 67431686..23e1dfe4 100644
--- a/git/objects/tree.py
+++ b/git/objects/tree.py
@@ -3,278 +3,25 @@
 #
 # This module is part of GitPython and is released under
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
-import util
-from base import IndexObject
-from git.util import join_path
-from blob import Blob
-from submodule.base import Submodule
+from git.util import RepoAliasMixin
+from gitdb.object.tree import GitDB_Tree, TreeModifier
 import git.diff as diff
 
-from fun import (
-					tree_entries_from_data, 
-					tree_to_stream
-				 )
-
-from gitdb.util import (
-						to_bin_sha, 
-						)
+from submodule.base import Submodule
 
 __all__ = ("TreeModifier", "Tree")
 
-class TreeModifier(object):
-	"""A utility class providing methods to alter the underlying cache in a list-like fashion.
-	
-	Once all adjustments are complete, the _cache, which really is a refernce to 
-	the cache of a tree, will be sorted. Assuring it will be in a serializable state"""
-	__slots__ = '_cache'
-	
-	def __init__(self, cache):
-		self._cache = cache
-	
-	def _index_by_name(self, name):
-		""":return: index of an item with name, or -1 if not found"""
-		for i, t in enumerate(self._cache):
-			if t[2] == name:
-				return i
-			# END found item
-		# END for each item in cache
-		return -1
-	
-	#{ Interface 
-	def set_done(self):
-		"""Call this method once you are done modifying the tree information.
-		It may be called several times, but be aware that each call will cause 
-		a sort operation
-		:return self:"""
-		self._cache.sort(key=lambda t: t[2])	# sort by name
-		return self
-	#} END interface
-	
-	#{ Mutators
-	def add(self, sha, mode, name, force=False):
-		"""Add the given item to the tree. If an item with the given name already
-		exists, nothing will be done, but a ValueError will be raised if the 
-		sha and mode of the existing item do not match the one you add, unless 
-		force is True
-		
-		:param sha: The 20 or 40 byte sha of the item to add
-		:param mode: int representing the stat compatible mode of the item
-		:param force: If True, an item with your name and information will overwrite
-			any existing item with the same name, no matter which information it has
-		:return: self"""
-		if '/' in name:
-			raise ValueError("Name must not contain '/' characters")
-		if (mode >> 12) not in Tree._map_id_to_type:
-			raise ValueError("Invalid object type according to mode %o" % mode)
-			
-		sha = to_bin_sha(sha)
-		index = self._index_by_name(name)
-		item = (sha, mode, name)
-		if index == -1:
-			self._cache.append(item)
-		else:
-			if force:
-				self._cache[index] = item
-			else:
-				ex_item = self._cache[index]
-				if ex_item[0] != sha or ex_item[1] != mode:
-					raise ValueError("Item %r existed with different properties" % name)
-				# END handle mismatch
-			# END handle force
-		# END handle name exists
-		return self
-		
-	def add_unchecked(self, binsha, mode, name):
-		"""Add the given item to the tree, its correctness is assumed, which 
-		puts the caller into responsibility to assure the input is correct. 
-		For more information on the parameters, see ``add``
-		:param binsha: 20 byte binary sha"""
-		self._cache.append((binsha, mode, name))
-		
-	def __delitem__(self, name):
-		"""Deletes an item with the given name if it exists"""
-		index = self._index_by_name(name)
-		if index > -1:
-			del(self._cache[index])
-		
-	#} END mutators
-
-
-class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable):
-	"""Tree objects represent an ordered list of Blobs and other Trees.
-	
-	``Tree as a list``::
-		
-		Access a specific blob using the  
-		tree['filename'] notation.
-		
-		You may as well access by index
-		blob = tree[0]
-	"""
-	
-	type = "tree"
-	__slots__ = "_cache"
-	
-	# actual integer ids for comparison 
-	commit_id = 016		# equals stat.S_IFDIR | stat.S_IFLNK - a directory link
-	blob_id = 010
-	symlink_id = 012
-	tree_id = 004
+class Tree(GitDB_Tree, diff.Diffable):
+	"""As opposed to the default GitDB tree implementation, this one can be diffed
+	and returns our own types"""
+	__slots__ = tuple()
 	
 	_map_id_to_type = {
-						commit_id : Submodule, 
-						blob_id : Blob, 
-						symlink_id : Blob
+						GitDB_Tree.commit_id : Submodule, 
+						GitDB_Tree.blob_id : Blob, 
+						GitDB_Tree.symlink_id : Blob
 						# tree id added once Tree is defined
 						}
 	
-	
-	def __init__(self, repo, binsha, mode=tree_id<<12, path=None):
-		super(Tree, self).__init__(repo, binsha, mode, path)
-
-	@classmethod
-	def _get_intermediate_items(cls, index_object):
-		if index_object.type == "tree":
-			return tuple(index_object._iter_convert_to_object(index_object._cache))
-		return tuple()
-
-	def _set_cache_(self, attr):
-		if attr == "_cache":
-			# Set the data when we need it
-			ostream = self.repo.odb.stream(self.binsha)
-			self._cache = tree_entries_from_data(ostream.read())
-		else:
-			super(Tree, self)._set_cache_(attr)
-		# END handle attribute 
-
-	def _iter_convert_to_object(self, iterable):
-		"""Iterable yields tuples of (binsha, mode, name), which will be converted
-		to the respective object representation"""
-		for binsha, mode, name in iterable:
-			path = join_path(self.path, name)
-			try:
-				yield self._map_id_to_type[mode >> 12](self.repo, binsha, mode, path)
-			except KeyError:
-				raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path))
-		# END for each item 
-
-	def __div__(self, file):
-		"""Find the named object in this tree's contents
-		:return: ``git.Blob`` or ``git.Tree`` or ``git.Submodule``
-		
-		:raise KeyError: if given file or tree does not exist in tree"""
-		msg = "Blob or Tree named %r not found"
-		if '/' in file:
-			tree = self
-			item = self
-			tokens = file.split('/')
-			for i,token in enumerate(tokens):
-				item = tree[token]
-				if item.type == 'tree':
-					tree = item
-				else:
-					# safety assertion - blobs are at the end of the path
-					if i != len(tokens)-1:
-						raise KeyError(msg % file)
-					return item
-				# END handle item type
-			# END for each token of split path
-			if item == self:
-				raise KeyError(msg % file)
-			return item
-		else:
-			for info in self._cache:
-				if info[2] == file:		# [2] == name
-					return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2]))
-			# END for each obj
-			raise KeyError( msg % file )
-		# END handle long paths
-
-
-	@property
-	def trees(self):
-		""":return: list(Tree, ...) list of trees directly below this tree"""
-		return [ i for i in self if i.type == "tree" ]
-		
-	@property
-	def blobs(self):
-		""":return: list(Blob, ...) list of blobs directly below this tree"""
-		return [ i for i in self if i.type == "blob" ]
-
-	@property
-	def cache(self):
-		"""
-		:return: An object allowing to modify the internal cache. This can be used
-			to change the tree's contents. When done, make sure you call ``set_done``
-			on the tree modifier, or serialization behaviour will be incorrect.
-			See the ``TreeModifier`` for more information on how to alter the cache"""
-		return TreeModifier(self._cache)
-
-	def traverse( self, predicate = lambda i,d: True,
-						   prune = lambda i,d: False, depth = -1, branch_first=True,
-						   visit_once = False, ignore_self=1 ):
-		"""For documentation, see util.Traversable.traverse
-		Trees are set to visit_once = False to gain more performance in the traversal"""
-		return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self)
-
-	# List protocol
-	def __getslice__(self, i, j):
-		return list(self._iter_convert_to_object(self._cache[i:j]))
-		
-	def __iter__(self):
-		return self._iter_convert_to_object(self._cache)
-		
-	def __len__(self):
-		return len(self._cache)
-		
-	def __getitem__(self, item):
-		if isinstance(item, int):
-			info = self._cache[item]
-			return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2]))
-		
-		if isinstance(item, basestring):
-			# compatability
-			return self.__div__(item)
-		# END index is basestring 
-		
-		raise TypeError( "Invalid index type: %r" % item )
-		
-		
-	def __contains__(self, item):
-		if isinstance(item, IndexObject):
-			for info in self._cache:
-				if item.binsha == info[0]:
-					return True
-				# END compare sha
-			# END for each entry
-		# END handle item is index object
-		# compatability
-		
-		# treat item as repo-relative path
-		path = self.path
-		for info in self._cache:
-			if item == join_path(path, info[2]):
-				return True
-		# END for each item
-		return False
-	
-	def __reversed__(self):
-		return reversed(self._iter_convert_to_object(self._cache))
-		
-	def _serialize(self, stream):
-		"""Serialize this tree into the stream. Please note that we will assume 
-		our tree data to be in a sorted state. If this is not the case, serialization
-		will not generate a correct tree representation as these are assumed to be sorted
-		by algorithms"""
-		tree_to_stream(self._cache, stream.write)
-		return self
-		
-	def _deserialize(self, stream):
-		self._cache = tree_entries_from_data(stream.read())
-		return self
-		
-		
-# END tree
-
 # finalize map definition
 Tree._map_id_to_type[Tree.tree_id] = Tree
diff --git a/git/test/test_blob.py b/git/test/objects/test_blob.py
index 661c0501..661c0501 100644
--- a/git/test/test_blob.py
+++ b/git/test/objects/test_blob.py
diff --git a/git/test/test_commit.py b/git/test/objects/test_commit.py
index 4a8d8b87..4a8d8b87 100644
--- a/git/test/test_commit.py
+++ b/git/test/objects/test_commit.py
diff --git a/git/test/test_submodule.py b/git/test/objects/test_submodule.py
index adb4fb82..adb4fb82 100644
--- a/git/test/test_submodule.py
+++ b/git/test/objects/test_submodule.py
diff --git a/git/test/test_tree.py b/git/test/objects/test_tree.py
index ec10e962..ec10e962 100644
--- a/git/test/test_tree.py
+++ b/git/test/objects/test_tree.py
diff --git a/git/test/test_actor.py b/git/test/test_actor.py
deleted file mode 100644
index b8e5ba3b..00000000
--- a/git/test/test_actor.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# test_actor.py
-# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
-#
-# This module is part of GitPython and is released under
-# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-
-import os
-from git.test.lib import *
-from git import *
-
-class TestActor(object):
-    def test_from_string_should_separate_name_and_email(self):
-        a = Actor._from_string("Michael Trier <mtrier@example.com>")
-        assert_equal("Michael Trier", a.name)
-        assert_equal("mtrier@example.com", a.email)
-        
-        # base type capabilities
-        assert a == a
-        assert not ( a != a )
-        m = set()
-        m.add(a)
-        m.add(a)
-        assert len(m) == 1
-
-    def test_from_string_should_handle_just_name(self):
-        a = Actor._from_string("Michael Trier")
-        assert_equal("Michael Trier", a.name)
-        assert_equal(None, a.email)
-
-    def test_should_display_representation(self):
-        a = Actor._from_string("Michael Trier <mtrier@example.com>")
-        assert_equal('<git.Actor "Michael Trier <mtrier@example.com>">', repr(a))
-
-    def test_str_should_alias_name(self):
-        a = Actor._from_string("Michael Trier <mtrier@example.com>")
-        assert_equal(a.name, str(a))
-\ No newline at end of file
diff --git a/git/util.py b/git/util.py
index ec1ece1e..3d9fd7d5 100644
--- a/git/util.py
+++ b/git/util.py
@@ -26,7 +26,9 @@ from gitdb.util import (
 							LockFile,
 							BlockingLockFile,
 							Actor,
-							Iterable
+							Iterable,
+							stream_copy,
+							IterableList
 						)
 
 __all__ = ( "stream_copy", "join_path", "to_native_path_windows", "to_native_path_linux", 
@@ -36,21 +38,6 @@ __all__ = ( "stream_copy", "join_path", "to_native_path_windows", "to_native_pat
 
 #{ Utility Methods
 
-def stream_copy(source, destination, chunk_size=512*1024):
-	"""Copy all data from the source stream into the destination stream in chunks
-	of size chunk_size
-	
-	:return: amount of bytes written"""
-	br = 0
-	while True:
-		chunk = source.read(chunk_size)
-		destination.write(chunk)
-		br += len(chunk)
-		if len(chunk) < chunk_size:
-			break
-	# END reading output stream
-	return br
-	
 	
 def get_user_id():
 	""":return: string identifying the currently active system user as name@node
@@ -286,51 +273,4 @@ class IndexFileSHA1Writer(object):
 		return self.f.tell()
 
 
-	
-
-class IterableList(list):
-	"""
-	List of iterable objects allowing to query an object by id or by named index::
-	 
-	 heads = repo.heads
-	 heads.master
-	 heads['master']
-	 heads[0]
-	 
-	It requires an id_attribute name to be set which will be queried from its 
-	contained items to have a means for comparison.
-	
-	A prefix can be specified which is to be used in case the id returned by the 
-	items always contains a prefix that does not matter to the user, so it 
-	can be left out."""
-	__slots__ = ('_id_attr', '_prefix')
-	
-	def __new__(cls, id_attr, prefix=''):
-		return super(IterableList,cls).__new__(cls)
-		
-	def __init__(self, id_attr, prefix=''):
-		self._id_attr = id_attr
-		self._prefix = prefix
-		if not isinstance(id_attr, basestring):
-			raise ValueError("First parameter must be a string identifying the name-property. Extend the list after initialization")
-		# END help debugging !
-		
-	def __getattr__(self, attr):
-		attr = self._prefix + attr
-		for item in self:
-			if getattr(item, self._id_attr) == attr:
-				return item
-		# END for each item
-		return list.__getattribute__(self, attr)
-		
-	def __getitem__(self, index):
-		if isinstance(index, int):
-			return list.__getitem__(self,index)
-		
-		try:
-			return getattr(self, index)
-		except AttributeError:
-			raise IndexError( "No item found with id %r" % (self._prefix + index) )
-			
-
 #} END classes
author	Sebastian Thiel <byronimo@gmail.com>	2011-04-07 20:17:00 +0200
committer	Sebastian Thiel <byronimo@gmail.com>	2011-04-07 20:17:00 +0200
commit	a12a7618a1f6f61a4c97ddf4cc422158c3fa72ba (patch)
tree	2b2e0e9991ddede152556f7954cd6e4c6121be97 /git
parent	e77d2d0ebb9487b696835f219e4a23a558462a55 (diff)
download	gitpython-a12a7618a1f6f61a4c97ddf4cc422158c3fa72ba.tar.gz