diff options
-rw-r--r-- | doc/intro.rst | 1 | ||||
-rw-r--r-- | lib/git/index.py | 142 | ||||
-rw-r--r-- | test/git/test_index.py | 53 |
3 files changed, 168 insertions, 28 deletions
diff --git a/doc/intro.rst b/doc/intro.rst index f08a7ec5..42879155 100644 --- a/doc/intro.rst +++ b/doc/intro.rst @@ -15,6 +15,7 @@ Requirements ============ * Git_ tested with 1.5.3.7 +* Requires Git_ 1.6.5.4 or newer if index.add function is to be used * `Python Nose`_ - used for running the tests * `Mock by Michael Foord`_ used for tests. Requires 0.5 diff --git a/lib/git/index.py b/lib/git/index.py index bd9c73f1..546f4520 100644 --- a/lib/git/index.py +++ b/lib/git/index.py @@ -16,8 +16,10 @@ import os import sys import stat import subprocess +import glob import git.diff as diff +from errors import GitCommandError from git.objects import Blob, Tree, Object, Commit from git.utils import SHA1Writer, LazyMixin, ConcurrentWriteOperation, join_path_native @@ -662,7 +664,7 @@ class IndexFile(LazyMixin, diff.Diffable): @clear_cache @default_index - def add(self, items, force=True, **kwargs): + def add(self, items, force=True, fprogress=lambda *args: None): """ Add files from the working tree, specific blobs or BaseIndexEntries to the index. The underlying index file will be written immediately, hence @@ -717,43 +719,130 @@ class IndexFile(LazyMixin, diff.Diffable): as the API user usually wants the item to be added even though they might be excluded. - ``**kwargs`` - Additional keyword arguments to be passed to git-update-index, such - as index_only. - + ``fprogress`` + Function with signature f(path, done=False, item=item) called for each + path to be added, once once it is about to be added where done==False + and once after it was added where done=True. + item is set to the actual item we handle, either a Path or a BaseIndexEntry + Please note that the processed path is not guaranteed to be present + in the index already as the index is currently being processed. + Returns List(BaseIndexEntries) representing the entries just actually added. - """ + + Raises + GitCommandError if a supplied Path did not exist. Please note that BaseIndexEntry + Objects that do not have a null sha will be added even if their paths + do not exist. + """ + # UTILITIES + def raise_exc(e): + raise e + + def expand_paths(paths): + """Expand the directories in list of paths to the corresponding paths accordingly, + they will always be relative to the repository""" + out = list() + r = self.repo.git.git_dir + rs = r + '/' + for path in paths: + abs_path = path + if not os.path.isabs(abs_path): + abs_path = os.path.join(r, path) + # END make absolute path + + # resolve globs if possible + if '?' in path or '*' in path or '[' in path: + out.extend(f.replace(rs, '') for f in expand_paths(glob.glob(abs_path))) + continue + # END glob handling + try: + for root, dirs, files in os.walk(abs_path, onerror=raise_exc): + for rela_file in files: + # add relative paths only + out.append(os.path.join(root.replace(rs, ''), rela_file)) + # END for each file in subdir + # END for each subdirectory + except OSError: + # was a file or something that could not be iterated + out.append(path) + # END path exception handling + # END for each path + + # NOTE: git will add items multiple times even if a glob overlapped + # with manually specified paths or if paths where specified multiple + # times - we respect that and do not prune + return out + # END expand helper method + + def write_path_to_stdin(proc, filepath, item, fmakeexc): + """Write path to proc.stdin and make sure it processes the item, including progress. + @return: stdout string""" + fprogress(filepath, False, item) + try: + proc.stdin.write("%s\n" % filepath) + except IOError: + # pipe broke, usually because some error happend + raise fmakeexc() + # END write exception handling + proc.stdin.flush() + # NOTE: if this hangs, you need at lest git 1.6.5.4 as a git-bugfix + # is needed for this + # TODO: Rewrite this using hash-object and update index to get + # rid of the bug-fix dependency, updaet intro.rst requirements + rval = proc.stdout.readline().strip() # trigger operation + fprogress(filepath, True, item) + return rval + # END write_path_to_stdin + + # sort the entries into strings and Entries, Blobs are converted to entries # automatically # paths can be git-added, for everything else we use git-update-index entries_added = list() paths, entries = self._preprocess_add_items(items) + # HANDLE PATHS if paths: - git_add_output = self.repo.git.add(paths, v=True) - # force rereading our entries + # to get suitable progress information, pipe paths to stdin + args = ("--add", "--replace", "--verbose", "--stdin") + proc = self.repo.git.update_index(*args, **{'as_process':True, 'istream':subprocess.PIPE}) + make_exc = lambda : GitCommandError(("git-update-index",)+args, 128, proc.stderr.readline()) + filepaths=expand_paths(paths) + added_files = list() + + for filepath in filepaths: + write_path_to_stdin(proc, filepath, filepath, make_exc) + added_files.append(filepath) + # END for each filepath + self._flush_stdin_and_wait(proc) # ignore stdout + + # force rereading our entries once it is all done del(self.entries) - for line in git_add_output.splitlines(): - # line contains: - # add '<path>' - added_file = line[5:-1] - entries_added.append(self.entries[(added_file,0)]) - # END for each line + entries_added.extend(self.entries[(f,0)] for f in added_files) # END path handling + # HANDLE ENTRIES if entries: null_mode_entries = [ e for e in entries if e.mode == 0 ] if null_mode_entries: raise ValueError("At least one Entry has a null-mode - please use index.remove to remove files for clarity") # END null mode should be remove + # HANLDE ENTRY OBJECT CREATION # create objects if required, otherwise go with the existing shas null_entries_indices = [ i for i,e in enumerate(entries) if e.sha == Object.NULL_HEX_SHA ] if null_entries_indices: - hash_proc = self.repo.git.hash_object(w=True, stdin_paths=True, istream=subprocess.PIPE, as_process=True) - hash_proc.stdin.write('\n'.join(entries[i].path for i in null_entries_indices)) - obj_ids = self._flush_stdin_and_wait(hash_proc).splitlines() + # creating object ids is the time consuming part. Hence we will + # send progress for these now. + args = ("-w", "--stdin-paths") + proc = self.repo.git.hash_object(*args, **{'istream':subprocess.PIPE, 'as_process':True}) + make_exc = lambda : GitCommandError(("git-hash-object",)+args, 128, proc.stderr.readline()) + obj_ids = list() + for ei in null_entries_indices: + entry = entries[ei] + obj_ids.append(write_path_to_stdin(proc, entry.path, entry, make_exc)) + # END for each entry index assert len(obj_ids) == len(null_entries_indices), "git-hash-object did not produce all requested objects: want %i, got %i" % ( len(null_entries_indices), len(obj_ids) ) # update IndexEntries with new object id @@ -764,11 +853,22 @@ class IndexFile(LazyMixin, diff.Diffable): # END for each index # END null_entry handling - # feed all the data to stdin - update_index_proc = self.repo.git.update_index(index_info=True, istream=subprocess.PIPE, as_process=True, **kwargs) - update_index_proc.stdin.write('\n'.join(str(e) for e in entries)) + # feed pure entries to stdin + proc = self.repo.git.update_index(index_info=True, istream=subprocess.PIPE, as_process=True) + for i, entry in enumerate(entries): + progress_sent = i in null_entries_indices + if not progress_sent: + fprogress(entry.path, False, entry) + # it cannot handle too-many newlines in this mode + if i != 0: + proc.stdin.write('\n') + proc.stdin.write(str(entry)) + proc.stdin.flush() + if not progress_sent: + fprogress(entry.path, True, entry) + # END for each enty + self._flush_stdin_and_wait(proc) entries_added.extend(entries) - self._flush_stdin_and_wait(update_index_proc) # END if there are base entries return entries_added diff --git a/test/git/test_index.py b/test/git/test_index.py index a8ae4b8d..26bc44c2 100644 --- a/test/git/test_index.py +++ b/test/git/test_index.py @@ -13,8 +13,35 @@ import tempfile import glob from stat import * + class TestTree(TestBase): + def __init__(self, *args): + super(TestTree, self).__init__(*args) + self._reset_progress() + + def _assert_add_progress(self, entries): + assert len(entries) == len(self._add_progress_map) + for path, call_count in self._add_progress_map.iteritems(): + assert call_count == 2 + self._reset_progress() + + def _add_progress(self, path, done, item): + """Called as progress func - we keep track of the proper + call order""" + assert item is not None + self._add_progress_map.setdefault(path, 0) + curval = self._add_progress_map[path] + if curval == 0: + assert not done + if curval == 1: + assert done + self._add_progress_map[path] = curval + 1 + + def _reset_progress(self): + # maps paths to the count of calls + self._add_progress_map = dict() + def test_index_file_base(self): # read from file index = IndexFile(self.rorepo, fixture_path("index")) @@ -297,19 +324,29 @@ class TestTree(TestBase): assert os.path.isfile(os.path.join(rw_repo.git.git_dir, lib_file_path)) # directory - entries = index.add(['lib']) + entries = index.add(['lib'], fprogress=self._add_progress) + self._assert_add_progress(entries) assert len(entries)>1 # glob - entries = index.reset(new_commit).add(['lib/*.py']) + entries = index.reset(new_commit).add(['lib/git/*.py'], fprogress=self._add_progress) + self._assert_add_progress(entries) assert len(entries) == 14 + # same file + entries = index.reset(new_commit).add(['lib/git/head.py']*2, fprogress=self._add_progress) + # would fail, test is too primitive to handle this case + # self._assert_add_progress(entries) + self._reset_progress() + assert len(entries) == 2 + # missing path self.failUnlessRaises(GitCommandError, index.reset(new_commit).add, ['doesnt/exist/must/raise']) # blob from older revision overrides current index revision old_blob = new_commit.parents[0].tree.blobs[0] - entries = index.reset(new_commit).add([old_blob]) + entries = index.reset(new_commit).add([old_blob], fprogress=self._add_progress) + self._assert_add_progress(entries) assert index.entries[(old_blob.path,0)].sha == old_blob.sha and len(entries) == 1 # mode 0 not allowed @@ -319,14 +356,16 @@ class TestTree(TestBase): # add new file new_file_relapath = "my_new_file" new_file_path = self._make_file(new_file_relapath, "hello world", rw_repo) - entries = index.reset(new_commit).add([BaseIndexEntry((010644, null_sha, 0, new_file_relapath))]) + entries = index.reset(new_commit).add([BaseIndexEntry((010644, null_sha, 0, new_file_relapath))], fprogress=self._add_progress) + self._assert_add_progress(entries) assert len(entries) == 1 and entries[0].sha != null_sha # add symlink if sys.platform != "win32": link_file = os.path.join(rw_repo.git.git_dir, "my_real_symlink") os.symlink("/etc/that", link_file) - entries = index.reset(new_commit).add([link_file]) + entries = index.reset(new_commit).add([link_file], fprogress=self._add_progress) + self._assert_add_progress(entries) assert len(entries) == 1 and S_ISLNK(entries[0].mode) print "%o" % entries[0].mode # END real symlink test @@ -336,7 +375,8 @@ class TestTree(TestBase): link_target = "/etc/that" fake_symlink_path = self._make_file(fake_symlink_relapath, link_target, rw_repo) fake_entry = BaseIndexEntry((0120000, null_sha, 0, fake_symlink_relapath)) - entries = index.reset(new_commit).add([fake_entry]) + entries = index.reset(new_commit).add([fake_entry], fprogress=self._add_progress) + self._assert_add_progress(entries) assert entries[0].sha != null_sha assert len(entries) == 1 and S_ISLNK(entries[0].mode) @@ -363,4 +403,3 @@ class TestTree(TestBase): open(fake_symlink_path,'rb').read() == link_target else: assert S_ISLNK(os.lstat(fake_symlink_path)[ST_MODE]) - |