Added all code from gitdb to gitpython. Next is to make it generally work. Then the tests will need some work

author: Sebastian Thiel <byronimo@gmail.com> 2011-05-05 19:43:22 +0200
committer: Sebastian Thiel <byronimo@gmail.com> 2011-05-05 19:43:22 +0200
commit: 4177eefd7bdaea96a529b00ba9cf751924ede202 (patch)
tree: 958614c21bd97267e0d06f71bb18d4215ddd87b5 /git/objects
parent: f54546a9b857ae728033482f3c5c18c9ff3393c3 (diff)
download: gitpython-4177eefd7bdaea96a529b00ba9cf751924ede202.tar.gz
8 files changed, 993 insertions, 24 deletions
diff --git a/git/objects/base.py b/git/objects/base.py
index 42d7b600..24967e7b 100644
--- a/git/objects/base.py
+++ b/git/objects/base.py
@@ -3,6 +3,177 @@
 #
 # This module is part of GitPython and is released under
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from gitdb.object.base import Object, IndexObject 
+
+from util import get_object_type_by_name
+from git.util import (
+							hex_to_bin,
+							bin_to_hex,
+							dirname,
+							basename, 
+							LazyMixin, 
+							join_path_native, 
+							stream_copy
+						)
+
+from git.typ import ObjectType
+	
+_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r"
+
 __all__ = ("Object", "IndexObject")
 
+class Object(LazyMixin):
+	"""Implements an Object which may be Blobs, Trees, Commits and Tags"""
+	NULL_HEX_SHA = '0'*40
+	NULL_BIN_SHA = '\0'*20
+	
+	TYPES = (ObjectType.blob, ObjectType.tree, ObjectType.commit, ObjectType.tag)
+	__slots__ = ("odb", "binsha", "size" )
+	
+	type = None			# to be set by subclass
+	type_id = None		# to be set by subclass
+	
+	def __init__(self, odb, binsha):
+		"""Initialize an object by identifying it by its binary sha. 
+		All keyword arguments will be set on demand if None.
+		
+		:param odb: repository this object is located in
+			
+		:param binsha: 20 byte SHA1"""
+		super(Object,self).__init__()
+		self.odb = odb
+		self.binsha = binsha
+		assert len(binsha) == 20, "Require 20 byte binary sha, got %r, len = %i" % (binsha, len(binsha))
+
+	@classmethod
+	def new(cls, odb, id):
+		"""
+		:return: New Object instance of a type appropriate to the object type behind 
+			id. The id of the newly created object will be a binsha even though 
+			the input id may have been a Reference or Rev-Spec
+			
+		:param id: reference, rev-spec, or hexsha
+			
+		:note: This cannot be a __new__ method as it would always call __init__
+			with the input id which is not necessarily a binsha."""
+		return odb.rev_parse(str(id))
+		
+	@classmethod
+	def new_from_sha(cls, odb, sha1):
+		"""
+		:return: new object instance of a type appropriate to represent the given 
+			binary sha1
+		:param sha1: 20 byte binary sha1"""
+		if sha1 == cls.NULL_BIN_SHA:
+			# the NULL binsha is always the root commit
+			return get_object_type_by_name('commit')(odb, sha1)
+		#END handle special case
+		oinfo = odb.info(sha1)
+		inst = get_object_type_by_name(oinfo.type)(odb, oinfo.binsha)
+		inst.size = oinfo.size
+		return inst 
+	
+	def _set_cache_(self, attr):
+		"""Retrieve object information"""
+		if attr	 == "size":
+			oinfo = self.odb.info(self.binsha)
+			self.size = oinfo.size
+			# assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type)
+		else:
+			super(Object,self)._set_cache_(attr)
+		
+	def __eq__(self, other):
+		""":return: True if the objects have the same SHA1"""
+		return self.binsha == other.binsha
+		
+	def __ne__(self, other):
+		""":return: True if the objects do not have the same SHA1 """
+		return self.binsha != other.binsha
+		
+	def __hash__(self):
+		""":return: Hash of our id allowing objects to be used in dicts and sets"""
+		return hash(self.binsha)
+		
+	def __str__(self):
+		""":return: string of our SHA1 as understood by all git commands"""
+		return bin_to_hex(self.binsha)
+		
+	def __repr__(self):
+		""":return: string with pythonic representation of our object"""
+		return '<git.%s "%s">' % (self.__class__.__name__, self.hexsha)
+
+	@property
+	def hexsha(self):
+		""":return: 40 byte hex version of our 20 byte binary sha"""
+		return bin_to_hex(self.binsha)
+
+	@property
+	def data_stream(self):
+		""" :return:  File Object compatible stream to the uncompressed raw data of the object
+		:note: returned streams must be read in order"""
+		return self.odb.stream(self.binsha)
+
+	def stream_data(self, ostream):
+		"""Writes our data directly to the given output stream
+		:param ostream: File object compatible stream object.
+		:return: self"""
+		istream = self.odb.stream(self.binsha)
+		stream_copy(istream, ostream)
+		return self
+		
+
+class IndexObject(Object):
+	"""Base for all objects that can be part of the index file , namely Tree, Blob and
+	SubModule objects"""
+	__slots__ = ("path", "mode")
+	
+	# for compatability with iterable lists
+	_id_attribute_ = 'path'
+	
+	def __init__(self, odb, binsha, mode=None, path=None):
+		"""Initialize a newly instanced IndexObject
+		:param odb: is the object database we are located in
+		:param binsha: 20 byte sha1
+		:param mode: is the stat compatible file mode as int, use the stat module
+			to evaluate the infomration
+		:param path:
+			is the path to the file in the file system, relative to the git repository root, i.e.
+			file.ext or folder/other.ext
+		:note:
+			Path may not be set of the index object has been created directly as it cannot
+			be retrieved without knowing the parent tree."""
+		super(IndexObject, self).__init__(odb, binsha)
+		if mode is not None:
+			self.mode = mode
+		if path is not None:
+			self.path = path
+	
+	def __hash__(self):
+		""":return:
+			Hash of our path as index items are uniquely identifyable by path, not 
+			by their data !"""
+		return hash(self.path)
+	
+	def _set_cache_(self, attr):
+		if attr in IndexObject.__slots__:
+			# they cannot be retrieved lateron ( not without searching for them )
+			raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
+		else:
+			super(IndexObject, self)._set_cache_(attr)
+		# END hanlde slot attribute
+	
+	@property
+	def name(self):
+		""":return: Name portion of the path, effectively being the basename"""
+		return basename(self.path)
+		
+	@property
+	def abspath(self):
+		"""
+		:return:
+			Absolute path to this index object in the file system ( as opposed to the 
+			.path field which is a path relative to the git repository ).
+			
+			The returned path will be native to the system and contains '\' on windows. """
+		assert False, "Only works if repository is not bare - provide this check in an interface"
+		return join_path_native(dirname(self.odb.root_path()), self.path)
+		
diff --git a/git/objects/blob.py b/git/objects/blob.py
index 38834436..326c5459 100644
--- a/git/objects/blob.py
+++ b/git/objects/blob.py
@@ -5,9 +5,32 @@
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
 
 from git.util import RepoAliasMixin
-from gitdb.object.blob import Blob as GitDB_Blob
+from mimetypes import guess_type
+from gitdb.typ import ObjectType
+
+import base
 
 __all__ = ('Blob', )
 
-class Blob(GitDB_Blob, RepoAliasMixin):
+class Blob(base.IndexObject, RepoAliasMixin):
+	"""A Blob encapsulates a git blob object"""
+	DEFAULT_MIME_TYPE = "text/plain"
+	type = ObjectType.blob
+	type_id = ObjectType.blob_id
+	
+	# valid blob modes
+	executable_mode = 0100755
+	file_mode = 0100644
+	link_mode = 0120000
+
 	__slots__ = tuple()
+
+	@property
+	def mime_type(self):
+		"""
+		:return: String describing the mime type of this file (based on the filename)
+		:note: Defaults to 'text/plain' in case the actual file type is unknown. """
+		guesses = None
+		if self.path:
+			guesses = guess_type(self.path)
+		return guesses and guesses[0] or self.DEFAULT_MIME_TYPE
diff --git a/git/objects/commit.py b/git/objects/commit.py
index d932ab1a..30dcaa0a 100644
--- a/git/objects/commit.py
+++ b/git/objects/commit.py
@@ -3,28 +3,68 @@
 #
 # This module is part of GitPython and is released under
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.util import RepoAliasMixin
-from gitdb.object.commit import Commit as GitDB_Commit
-from git.diff import Diffable
+import base
+
+from gitdb.typ import ObjectType
+from tree import Tree
+from cStringIO import StringIO
+
 from gitdb.util import (
+						hex_to_bin,
+						Actor,
+						RepoAliasMixin,
 						Iterable,
 						Actor
 						)
 
-from gitdb import IStream
+from util import (
+					Traversable,
+					Serializable,
+					altz_to_utctz_str,
+					parse_actor_and_date
+				)
+from git.diff import Diffable
+from gitdb.base import IStream
 from cStringIO import StringIO
 
 from util import parse_date
 from time import altzone
 
 import os
+import sys
 
 __all__ = ('Commit', )
 
-class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin):
-	"""Provides additional git-command based functionality to the default gitdb commit object"""
+class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin, base.Object, Traversable, Serializable):
+	"""Wraps a git Commit object.
+	
+	This class will act lazily on some of its attributes and will query the 
+	value on demand only if it involves calling the git binary."""
 	__slots__ = tuple()
 	
+	# ENVIRONMENT VARIABLES
+	# read when creating new commits
+	env_author_date = "GIT_AUTHOR_DATE"
+	env_committer_date = "GIT_COMMITTER_DATE"
+	
+	# CONFIGURATION KEYS
+	conf_encoding = 'i18n.commitencoding'
+	
+	# INVARIANTS
+	default_encoding = "UTF-8"
+	
+	
+	# object configuration 
+	type = ObjectType.commit
+	type_id = ObjectType.commit_id
+	
+	__slots__ = ("tree",
+				 "author", "authored_date", "author_tz_offset",
+				 "committer", "committed_date", "committer_tz_offset",
+				 "message", "parents", "encoding")
+	_id_attribute_ = "binsha"
+	
+	
 	def count(self, paths='', **kwargs):
 		"""Count the number of commits reachable from this commit
 
@@ -221,4 +261,211 @@ class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin):
 		
 		return new_commit
 		
+	def __init__(self, odb, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None,
+				 committer=None, committed_date=None, committer_tz_offset=None, 
+				 message=None,  parents=None, encoding=None):
+		"""Instantiate a new Commit. All keyword arguments taking None as default will 
+		be implicitly set on first query. 
+		
+		:param binsha: 20 byte sha1
+		:param parents: tuple( Commit, ... ) 
+			is a tuple of commit ids or actual Commits
+		:param tree: Tree
+			Tree object
+		:param author: Actor
+			is the author string ( will be implicitly converted into an Actor object )
+		:param authored_date: int_seconds_since_epoch
+			is the authored DateTime - use time.gmtime() to convert it into a 
+			different format
+		:param author_tz_offset: int_seconds_west_of_utc
+			is the timezone that the authored_date is in
+		:param committer: Actor
+			is the committer string
+		:param committed_date: int_seconds_since_epoch
+			is the committed DateTime - use time.gmtime() to convert it into a 
+			different format
+		:param committer_tz_offset: int_seconds_west_of_utc
+			is the timezone that the authored_date is in
+		:param message: string
+			is the commit message
+		:param encoding: string
+			encoding of the message, defaults to UTF-8
+		:param parents:
+			List or tuple of Commit objects which are our parent(s) in the commit 
+			dependency graph
+		:return: git.Commit
+		
+		:note: Timezone information is in the same format and in the same sign 
+			as what time.altzone returns. The sign is inverted compared to git's 
+			UTC timezone."""
+		super(Commit,self).__init__(odb, binsha)
+		if tree is not None:
+			assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree)
+		if tree is not None:
+			self.tree = tree
+		if author is not None:
+			self.author = author
+		if authored_date is not None:
+			self.authored_date = authored_date
+		if author_tz_offset is not None:
+			self.author_tz_offset = author_tz_offset
+		if committer is not None:
+			self.committer = committer
+		if committed_date is not None:
+			self.committed_date = committed_date
+		if committer_tz_offset is not None:
+			self.committer_tz_offset = committer_tz_offset
+		if message is not None:
+			self.message = message
+		if parents is not None:
+			self.parents = parents
+		if encoding is not None:
+			self.encoding = encoding
+		
+	@classmethod
+	def _get_intermediate_items(cls, commit):
+		return commit.parents
+
+	def _set_cache_(self, attr):
+		if attr in Commit.__slots__:
+			# read the data in a chunk, its faster - then provide a file wrapper
+			binsha, typename, self.size, stream = self.odb.stream(self.binsha)
+			self._deserialize(StringIO(stream.read()))
+		else:
+			super(Commit, self)._set_cache_(attr)
+		# END handle attrs
+
+	@property
+	def summary(self):
+		""":return: First line of the commit message"""
+		return self.message.split('\n', 1)[0]
+		
+	@classmethod
+	def _iter_from_process_or_stream(cls, odb, proc_or_stream):
+		"""Parse out commit information into a list of Commit objects
+		We expect one-line per commit, and parse the actual commit information directly
+		from our lighting fast object database
+
+		:param proc: git-rev-list process instance - one sha per line
+		:return: iterator returning Commit objects"""
+		stream = proc_or_stream
+		if not hasattr(stream,'readline'):
+			stream = proc_or_stream.stdout
+			
+		readline = stream.readline
+		while True:
+			line = readline()
+			if not line:
+				break
+			hexsha = line.strip()
+			if len(hexsha) > 40:
+				# split additional information, as returned by bisect for instance
+				hexsha, rest = line.split(None, 1)
+			# END handle extra info
+			
+			assert len(hexsha) == 40, "Invalid line: %s" % hexsha
+			yield cls(odb, hex_to_bin(hexsha))
+		# END for each line in stream
+	
+	#{ Serializable Implementation
+	
+	def _serialize(self, stream):
+		write = stream.write
+		write("tree %s\n" % self.tree)
+		for p in self.parents:
+			write("parent %s\n" % p)
+			
+		a = self.author
+		aname = a.name
+		if isinstance(aname, unicode):
+			aname = aname.encode(self.encoding)
+		# END handle unicode in name
+		
+		c = self.committer
+		fmt = "%s %s <%s> %s %s\n"
+		write(fmt % ("author", aname, a.email, 
+						self.authored_date, 
+						altz_to_utctz_str(self.author_tz_offset)))
+			
+		# encode committer
+		aname = c.name
+		if isinstance(aname, unicode):
+			aname = aname.encode(self.encoding)
+		# END handle unicode in name
+		write(fmt % ("committer", aname, c.email, 
+						self.committed_date,
+						altz_to_utctz_str(self.committer_tz_offset)))
+		
+		if self.encoding != self.default_encoding:
+			write("encoding %s\n" % self.encoding)
+		
+		write("\n")
+		
+		# write plain bytes, be sure its encoded according to our encoding
+		if isinstance(self.message, unicode):
+			write(self.message.encode(self.encoding))
+		else:
+			write(self.message)
+		# END handle encoding
+		return self
+	
+	def _deserialize(self, stream):
+		""":param from_rev_list: if true, the stream format is coming from the rev-list command
+		Otherwise it is assumed to be a plain data stream from our object"""
+		readline = stream.readline
+		self.tree = Tree(self.odb, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '')
+
+		self.parents = list()
+		next_line = None
+		while True:
+			parent_line = readline()
+			if not parent_line.startswith('parent'):
+				next_line = parent_line
+				break
+			# END abort reading parents
+			self.parents.append(type(self)(self.odb, hex_to_bin(parent_line.split()[-1])))
+		# END for each parent line
+		self.parents = tuple(self.parents)
+		
+		self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line)
+		self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline())
+		
+		
+		# now we can have the encoding line, or an empty line followed by the optional
+		# message.
+		self.encoding = self.default_encoding
+		# read encoding or empty line to separate message
+		enc = readline()
+		enc = enc.strip()
+		if enc:
+			self.encoding = enc[enc.find(' ')+1:]
+			# now comes the message separator 
+			readline()
+		# END handle encoding
+		
+		# decode the authors name
+		try:
+			self.author.name = self.author.name.decode(self.encoding) 
+		except UnicodeDecodeError:
+			print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding)
+		# END handle author's encoding
+		
+		# decode committer name
+		try:
+			self.committer.name = self.committer.name.decode(self.encoding) 
+		except UnicodeDecodeError:
+			print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding)
+		# END handle author's encoding
+		
+		# a stream from our data simply gives us the plain message
+		# The end of our message stream is marked with a newline that we strip
+		self.message = stream.read()
+		try:
+			self.message = self.message.decode(self.encoding)
+		except UnicodeDecodeError:
+			print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding)
+		# END exception handling 
+		return self
+	
 	#} END serializable implementation
+
diff --git a/git/objects/fun.py b/git/objects/fun.py
index 2443bad7..6f2eaaad 100644
--- a/git/objects/fun.py
+++ b/git/objects/fun.py
@@ -1,4 +1,201 @@
 """Module with functions which are supposed to be as fast as possible"""
 
-from gitdb.object.fun import *
+from stat import S_ISDIR
+
+__all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive',
+			'traverse_tree_recursive')
+
+
+				
+
+def tree_to_stream(entries, write):
+	"""Write the give list of entries into a stream using its write method
+	:param entries: **sorted** list of tuples with (binsha, mode, name)
+	:param write: write method which takes a data string"""
+	ord_zero = ord('0')
+	bit_mask = 7			# 3 bits set
+	
+	for binsha, mode, name in entries:
+		mode_str = ''
+		for i in xrange(6):
+			mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str
+		# END for each 8 octal value
+		
+		# git slices away the first octal if its zero
+		if mode_str[0] == '0':
+			mode_str = mode_str[1:]
+		# END save a byte
+
+		# here it comes:  if the name is actually unicode, the replacement below
+		# will not work as the binsha is not part of the ascii unicode encoding - 
+		# hence we must convert to an utf8 string for it to work properly.
+		# According to my tests, this is exactly what git does, that is it just
+		# takes the input literally, which appears to be utf8 on linux.
+		if isinstance(name, unicode):
+			name = name.encode("utf8")
+		write("%s %s\0%s" % (mode_str, name, binsha)) 
+	# END for each item
+
+
+def tree_entries_from_data(data):
+	"""Reads the binary representation of a tree and returns tuples of Tree items
+	:param data: data block with tree data
+	:return: list(tuple(binsha, mode, tree_relative_path), ...)"""
+	ord_zero = ord('0')
+	len_data = len(data)
+	i = 0
+	out = list()
+	while i < len_data:
+		mode = 0
+		
+		# read mode
+		# Some git versions truncate the leading 0, some don't
+		# The type will be extracted from the mode later
+		while data[i] != ' ':
+			# move existing mode integer up one level being 3 bits
+			# and add the actual ordinal value of the character
+			mode = (mode << 3) + (ord(data[i]) - ord_zero)
+			i += 1
+		# END while reading mode
+		
+		# byte is space now, skip it
+		i += 1
+		
+		# parse name, it is NULL separated
+		
+		ns = i
+		while data[i] != '\0':
+			i += 1
+		# END while not reached NULL
+		
+		# default encoding for strings in git is utf8
+		# Only use the respective unicode object if the byte stream was encoded
+		name = data[ns:i]
+		name_enc = name.decode("utf-8")
+		if len(name) > len(name_enc):
+			name = name_enc
+		# END handle encoding
+		
+		# byte is NULL, get next 20
+		i += 1
+		sha = data[i:i+20]
+		i = i + 20
+		out.append((sha, mode, name))
+	# END for each byte in data stream
+	return out
+	
+	
+def _find_by_name(tree_data, name, is_dir, start_at):
+	"""return data entry matching the given name and tree mode
+	or None.
+	Before the item is returned, the respective data item is set 
+	None in the tree_data list to mark it done"""
+	try:
+		item = tree_data[start_at]
+		if item and  item[2] == name and S_ISDIR(item[1]) == is_dir:
+			tree_data[start_at] = None
+			return item
+	except IndexError:
+		pass
+	# END exception handling
+	for index, item in enumerate(tree_data):
+		if item and item[2] == name and S_ISDIR(item[1]) == is_dir:
+			tree_data[index] = None
+			return item
+		# END if item matches
+	# END for each item
+	return None
+
+def _to_full_path(item, path_prefix):
+	"""Rebuild entry with given path prefix"""
+	if not item:
+		return item
+	return (item[0], item[1], path_prefix+item[2])
+	
+def traverse_trees_recursive(odb, tree_shas, path_prefix):
+	"""
+	:return: list with entries according to the given binary tree-shas. 
+		The result is encoded in a list
+		of n tuple|None per blob/commit, (n == len(tree_shas)), where 
+		* [0] == 20 byte sha
+		* [1] == mode as int
+		* [2] == path relative to working tree root
+		The entry tuple is None if the respective blob/commit did not 
+		exist in the given tree.
+	:param tree_shas: iterable of shas pointing to trees. All trees must 
+		be on the same level. A tree-sha may be None in which case None
+	:param path_prefix: a prefix to be added to the returned paths on this level, 
+		set it '' for the first iteration
+	:note: The ordering of the returned items will be partially lost"""
+	trees_data = list()
+	nt = len(tree_shas)
+	for tree_sha in tree_shas:
+		if tree_sha is None:
+			data = list()
+		else:
+			data = tree_entries_from_data(odb.stream(tree_sha).read())
+		# END handle muted trees
+		trees_data.append(data)
+	# END for each sha to get data for
+	
+	out = list()
+	out_append = out.append
+	
+	# find all matching entries and recursively process them together if the match
+	# is a tree. If the match is a non-tree item, put it into the result.
+	# Processed items will be set None
+	for ti, tree_data in enumerate(trees_data):
+		for ii, item in enumerate(tree_data):
+			if not item:
+				continue
+			# END skip already done items
+			entries = [ None for n in range(nt) ]
+			entries[ti] = item
+			sha, mode, name = item							# its faster to unpack
+			is_dir = S_ISDIR(mode)							# type mode bits
+			
+			# find this item in all other tree data items
+			# wrap around, but stop one before our current index, hence 
+			# ti+nt, not ti+1+nt
+			for tio in range(ti+1, ti+nt):
+				tio = tio % nt
+				entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii)
+			# END for each other item data
+			
+			# if we are a directory, enter recursion
+			if is_dir:
+				out.extend(traverse_trees_recursive(odb, [((ei and ei[0]) or None) for ei in entries], path_prefix+name+'/'))
+			else:
+				out_append(tuple(_to_full_path(e, path_prefix) for e in entries))
+			# END handle recursion
+			
+			# finally mark it done
+			tree_data[ii] = None
+		# END for each item
+		
+		# we are done with one tree, set all its data empty
+		del(tree_data[:])
+	# END for each tree_data chunk
+	return out
+	
+def traverse_tree_recursive(odb, tree_sha, path_prefix):
+	"""
+	:return: list of entries of the tree pointed to by the binary tree_sha. An entry
+		has the following format:
+		* [0] 20 byte sha
+		* [1] mode as int
+		* [2] path relative to the repository
+	:param path_prefix: prefix to prepend to the front of all returned paths"""
+	entries = list()
+	data = tree_entries_from_data(odb.stream(tree_sha).read())
+	
+	# unpacking/packing is faster than accessing individual items
+	for sha, mode, name in data:
+		if S_ISDIR(mode):
+			entries.extend(traverse_tree_recursive(odb, sha, path_prefix+name+'/'))
+		else:
+			entries.append((sha, mode, path_prefix+name))
+	# END for each item
+	
+	return entries
 
diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py
index 7997e5e5..9b45d9b6 100644
--- a/git/objects/submodule/base.py
+++ b/git/objects/submodule/base.py
@@ -73,6 +73,9 @@ class Submodule(GitDB_Submodule, Iterable, Traversable, RepoAliasMixin):
 	# this is a bogus type for base class compatability
 	type = 'submodule'
 	
+	# this type doesn't really have a type id
+	type_id = 0
+	
 	__slots__ = ('_parent_commit', '_url', '_branch_path', '_name', '__weakref__')
 	_cache_attrs = ('path', '_url', '_branch_path')
 	
diff --git a/git/objects/tag.py b/git/objects/tag.py
index 59b2362e..0bd1d20c 100644
--- a/git/objects/tag.py
+++ b/git/objects/tag.py
@@ -4,10 +4,77 @@
 # This module is part of GitPython and is released under
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
 """ Module containing all object based types. """
+import base
 from git.util import RepoAliasMixin
-from gitdb.object.tag import TagObject as GitDB_TagObject
+from gitdb.util import hex_to_bin
+from util import (
+					get_object_type_by_name,
+					parse_actor_and_date
+				)
+from gitdb.typ import ObjectType
+
 __all__ = ("TagObject", )
 
-class TagObject(GitDB_TagObject, RepoAliasMixin):
+class TagObject(base.Object, RepoAliasMixin):
 	"""Non-Lightweight tag carrying additional information about an object we are pointing to."""
-	__slots__ = tuple()
+	type = ObjectType.tag
+	type_id = ObjectType.tag_id
+	
+	__slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" )
+		
+	def __init__(self, odb, binsha, object=None, tag=None, 
+				tagger=None, tagged_date=None, tagger_tz_offset=None, message=None):
+		"""Initialize a tag object with additional data
+		
+		:param odb: repository this object is located in
+		:param binsha: 20 byte SHA1
+		:param object: Object instance of object we are pointing to
+		:param tag: name of this tag
+		:param tagger: Actor identifying the tagger
+		:param tagged_date: int_seconds_since_epoch
+			is the DateTime of the tag creation - use time.gmtime to convert 
+			it into a different format
+		:param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the 
+			authored_date is in, in a format similar to time.altzone"""
+		super(TagObject, self).__init__(odb, binsha )
+		if object is not None:
+			self.object = object
+		if tag is not None:
+			self.tag = tag
+		if tagger is not None:
+			self.tagger = tagger
+		if tagged_date is not None:
+			self.tagged_date = tagged_date
+		if tagger_tz_offset is not None:
+			self.tagger_tz_offset = tagger_tz_offset
+		if message is not None:
+			self.message = message
+		
+	def _set_cache_(self, attr):
+		"""Cache all our attributes at once"""
+		if attr in TagObject.__slots__:
+			ostream = self.odb.stream(self.binsha)
+			lines = ostream.read().splitlines()
+			
+			obj, hexsha = lines[0].split(" ")		# object <hexsha>
+			type_token, type_name = lines[1].split(" ") # type <type_name>
+			self.object = get_object_type_by_name(type_name)(self.odb, hex_to_bin(hexsha))
+			
+			self.tag = lines[2][4:]	 # tag <tag name>
+			
+			tagger_info = lines[3][7:]# tagger <actor> <date>
+			self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info)
+			
+			# line 4 empty - it could mark the beginning of the next header
+			# in case there really is no message, it would not exist. Otherwise 
+			# a newline separates header from message
+			if len(lines) > 5:
+				self.message = "\n".join(lines[5:])
+			else:
+				self.message = ''
+		# END check our attributes
+		else:
+			super(TagObject, self)._set_cache_(attr)
+		
+		
+
diff --git a/git/objects/tree.py b/git/objects/tree.py
index 00ef07fc..1b5f7561 100644
--- a/git/objects/tree.py
+++ b/git/objects/tree.py
@@ -4,26 +4,286 @@
 # This module is part of GitPython and is released under
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
 from git.util import RepoAliasMixin
-from gitdb.object.tree import Tree as GitDB_Tree
-from gitdb.object.tree import TreeModifier
 import git.diff as diff
-
+from gitdb.typ import ObjectType
+from base import IndexObject
 from blob import Blob
-from submodule.base import Submodule
+from submodule import Submodule
+
+from fun import (
+					tree_entries_from_data, 
+					tree_to_stream
+				 )
+
+from gitdb.util import (
+						to_bin_sha,
+						join_path
+						)
+import util
 
 __all__ = ("TreeModifier", "Tree")
 
-class Tree(GitDB_Tree, diff.Diffable):
-	"""As opposed to the default GitDB tree implementation, this one can be diffed
-	and returns our own types"""
-	__slots__ = tuple()
+class TreeModifier(object):
+	"""A utility class providing methods to alter the underlying cache in a list-like fashion.
+	
+	Once all adjustments are complete, the _cache, which really is a refernce to 
+	the cache of a tree, will be sorted. Assuring it will be in a serializable state"""
+	__slots__ = '_cache'
+	
+	def __init__(self, cache):
+		self._cache = cache
+	
+	def _index_by_name(self, name):
+		""":return: index of an item with name, or -1 if not found"""
+		for i, t in enumerate(self._cache):
+			if t[2] == name:
+				return i
+			# END found item
+		# END for each item in cache
+		return -1
+	
+	#{ Interface 
+	def set_done(self):
+		"""Call this method once you are done modifying the tree information.
+		It may be called several times, but be aware that each call will cause 
+		a sort operation
+		:return self:"""
+		self._cache.sort(key=lambda t: t[2])	# sort by name
+		return self
+	#} END interface
+	
+	#{ Mutators
+	def add(self, sha, mode, name, force=False):
+		"""Add the given item to the tree. If an item with the given name already
+		exists, nothing will be done, but a ValueError will be raised if the 
+		sha and mode of the existing item do not match the one you add, unless 
+		force is True
+		
+		:param sha: The 20 or 40 byte sha of the item to add
+		:param mode: int representing the stat compatible mode of the item
+		:param force: If True, an item with your name and information will overwrite
+			any existing item with the same name, no matter which information it has
+		:return: self"""
+		if '/' in name:
+			raise ValueError("Name must not contain '/' characters")
+		if (mode >> 12) not in Tree._map_id_to_type:
+			raise ValueError("Invalid object type according to mode %o" % mode)
+			
+		sha = to_bin_sha(sha)
+		index = self._index_by_name(name)
+		item = (sha, mode, name)
+		if index == -1:
+			self._cache.append(item)
+		else:
+			if force:
+				self._cache[index] = item
+			else:
+				ex_item = self._cache[index]
+				if ex_item[0] != sha or ex_item[1] != mode:
+					raise ValueError("Item %r existed with different properties" % name)
+				# END handle mismatch
+			# END handle force
+		# END handle name exists
+		return self
+		
+	def add_unchecked(self, binsha, mode, name):
+		"""Add the given item to the tree, its correctness is assumed, which 
+		puts the caller into responsibility to assure the input is correct. 
+		For more information on the parameters, see ``add``
+		:param binsha: 20 byte binary sha"""
+		self._cache.append((binsha, mode, name))
+		
+	def __delitem__(self, name):
+		"""Deletes an item with the given name if it exists"""
+		index = self._index_by_name(name)
+		if index > -1:
+			del(self._cache[index])
+		
+	#} END mutators
+
+
+class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable, RepoAliasMixin):
+	"""Tree objects represent an ordered list of Blobs and other Trees.
+	
+	``Tree as a list``::
+		
+		Access a specific blob using the  
+		tree['filename'] notation.
+		
+		You may as well access by index
+		blob = tree[0]
+	"""
+	
+	type = ObjectType.tree
+	type_id = ObjectType.tree_id
+	
+	__slots__ = "_cache"
+	
+	# actual integer ids for comparison 
+	commit_id = 016		# equals stat.S_IFDIR | stat.S_IFLNK - a directory link
+	blob_id = 010
+	symlink_id = 012
+	tree_id = 004
 	
+	#{ Configuration
+	
+	# override in subclass if you would like your own types to be instantiated instead
 	_map_id_to_type = {
-						GitDB_Tree.commit_id : Submodule, 
-						GitDB_Tree.blob_id : Blob, 
-						GitDB_Tree.symlink_id : Blob
+						commit_id : Submodule, 
+						blob_id : Blob, 
+						symlink_id : Blob
 						# tree id added once Tree is defined
 						}
 	
+	#} end configuration
+	
+	
+	def __init__(self, repo, binsha, mode=tree_id<<12, path=None):
+		super(Tree, self).__init__(repo, binsha, mode, path)
+
+	@classmethod
+	def _get_intermediate_items(cls, index_object):
+		if index_object.type == "tree":
+			return tuple(index_object._iter_convert_to_object(index_object._cache))
+		return tuple()
+
+	def _set_cache_(self, attr):
+		if attr == "_cache":
+			# Set the data when we need it
+			ostream = self.odb.stream(self.binsha)
+			self._cache = tree_entries_from_data(ostream.read())
+		else:
+			super(Tree, self)._set_cache_(attr)
+		# END handle attribute 
+
+	def _iter_convert_to_object(self, iterable):
+		"""Iterable yields tuples of (binsha, mode, name), which will be converted
+		to the respective object representation"""
+		for binsha, mode, name in iterable:
+			path = join_path(self.path, name)
+			try:
+				yield self._map_id_to_type[mode >> 12](self.repo, binsha, mode, path)
+			except KeyError:
+				raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path))
+		# END for each item 
+
+	def __div__(self, file):
+		"""Find the named object in this tree's contents
+		:return: ``git.Blob`` or ``git.Tree`` or ``git.Submodule``
+		
+		:raise KeyError: if given file or tree does not exist in tree"""
+		msg = "Blob or Tree named %r not found"
+		if '/' in file:
+			tree = self
+			item = self
+			tokens = file.split('/')
+			for i,token in enumerate(tokens):
+				item = tree[token]
+				if item.type == 'tree':
+					tree = item
+				else:
+					# safety assertion - blobs are at the end of the path
+					if i != len(tokens)-1:
+						raise KeyError(msg % file)
+					return item
+				# END handle item type
+			# END for each token of split path
+			if item == self:
+				raise KeyError(msg % file)
+			return item
+		else:
+			for info in self._cache:
+				if info[2] == file:		# [2] == name
+					return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2]))
+			# END for each obj
+			raise KeyError( msg % file )
+		# END handle long paths
+
+
+	@property
+	def trees(self):
+		""":return: list(Tree, ...) list of trees directly below this tree"""
+		return [ i for i in self if i.type == "tree" ]
+		
+	@property
+	def blobs(self):
+		""":return: list(Blob, ...) list of blobs directly below this tree"""
+		return [ i for i in self if i.type == "blob" ]
+
+	@property
+	def cache(self):
+		"""
+		:return: An object allowing to modify the internal cache. This can be used
+			to change the tree's contents. When done, make sure you call ``set_done``
+			on the tree modifier, or serialization behaviour will be incorrect.
+			See the ``TreeModifier`` for more information on how to alter the cache"""
+		return TreeModifier(self._cache)
+
+	def traverse( self, predicate = lambda i,d: True,
+						   prune = lambda i,d: False, depth = -1, branch_first=True,
+						   visit_once = False, ignore_self=1 ):
+		"""For documentation, see util.Traversable.traverse
+		Trees are set to visit_once = False to gain more performance in the traversal"""
+		return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self)
+
+	# List protocol
+	def __getslice__(self, i, j):
+		return list(self._iter_convert_to_object(self._cache[i:j]))
+		
+	def __iter__(self):
+		return self._iter_convert_to_object(self._cache)
+		
+	def __len__(self):
+		return len(self._cache)
+		
+	def __getitem__(self, item):
+		if isinstance(item, int):
+			info = self._cache[item]
+			return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2]))
+		
+		if isinstance(item, basestring):
+			# compatability
+			return self.__div__(item)
+		# END index is basestring 
+		
+		raise TypeError( "Invalid index type: %r" % item )
+		
+		
+	def __contains__(self, item):
+		if isinstance(item, IndexObject):
+			for info in self._cache:
+				if item.binsha == info[0]:
+					return True
+				# END compare sha
+			# END for each entry
+		# END handle item is index object
+		# compatability
+		
+		# treat item as repo-relative path
+		path = self.path
+		for info in self._cache:
+			if item == join_path(path, info[2]):
+				return True
+		# END for each item
+		return False
+	
+	def __reversed__(self):
+		return reversed(self._iter_convert_to_object(self._cache))
+		
+	def _serialize(self, stream):
+		"""Serialize this tree into the stream. Please note that we will assume 
+		our tree data to be in a sorted state. If this is not the case, serialization
+		will not generate a correct tree representation as these are assumed to be sorted
+		by algorithms"""
+		tree_to_stream(self._cache, stream.write)
+		return self
+		
+	def _deserialize(self, stream):
+		self._cache = tree_entries_from_data(stream.read())
+		return self
+		
+		
+# END tree
+
 # finalize map definition
 Tree._map_id_to_type[Tree.tree_id] = Tree
diff --git a/git/objects/util.py b/git/objects/util.py
index 4c9323b8..8ac590f2 100644
--- a/git/objects/util.py
+++ b/git/objects/util.py
@@ -20,6 +20,7 @@ __all__ = ('get_object_type_by_name', 'parse_date', 'parse_actor_and_date',
 			'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz', 
 			'verify_utctz', 'Actor')
 
+
 #{ Functions
 
 def mode_str_to_int(modestr):
author	Sebastian Thiel <byronimo@gmail.com>	2011-05-05 19:43:22 +0200
committer	Sebastian Thiel <byronimo@gmail.com>	2011-05-05 19:43:22 +0200
commit	4177eefd7bdaea96a529b00ba9cf751924ede202 (patch)
tree	958614c21bd97267e0d06f71bb18d4215ddd87b5 /git/objects
parent	f54546a9b857ae728033482f3c5c18c9ff3393c3 (diff)
download	gitpython-4177eefd7bdaea96a529b00ba9cf751924ede202.tar.gz