diff options
Diffstat (limited to 'morphlib/gitindex.py')
-rw-r--r-- | morphlib/gitindex.py | 161 |
1 files changed, 161 insertions, 0 deletions
diff --git a/morphlib/gitindex.py b/morphlib/gitindex.py new file mode 100644 index 00000000..e22f6225 --- /dev/null +++ b/morphlib/gitindex.py @@ -0,0 +1,161 @@ +# Copyright (C) 2013-2014 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# =*= License: GPL-2 =*= + + +import collections +import os + +import morphlib + + +STATUS_UNTRACKED = '??' +STATUS_IGNORED = '!!' + + +class GitIndex(object): + '''An object that represents operations on the working tree. + + Index objects can be constructed with a different path to the + index file, which can be used to construct commits without + altering the working tree, index or HEAD. + + The file must either be a previously initialised index, or a + non-existant file. + + Git creates a lock file and atomically alters the index by + renaming a temporary file into place, so `index_file` must be + in a writable directory. + + ''' + + def __init__(self, gd, index_file): + self._gd = gd + self._index_file = index_file + + def _run_git(self, *args, **kwargs): + if self._index_file is not None: + kwargs['env'] = kwargs.get('env', dict(os.environ)) + kwargs['env']['GIT_INDEX_FILE'] = self._index_file + return morphlib.git.gitcmd(self._gd._runcmd, *args, **kwargs) + + def _get_status(self): + '''Return git status output in a Python useful format + + This runs git status such that unusual filenames are preserved + and returns its output in a sequence of (status_code, to_path, + from_path). + + from_path is None unless the status_code says there was a + rename, in which case it is the path it was renamed from. + + Untracked and ignored changes are also included in the output, + their status codes are '??' and '!!' respectively. + + ''' + + # git status -z will NUL terminate paths, so we don't have to + # unescape the paths it outputs. Unfortunately each status entry + # can have 1 or 2 paths, so extra parsing is required. + # To handle this, we split it into NUL delimited tokens. + # The first token of an entry is the 2 character status code, + # a space, then the path. + # If our status code starts with R then it's a rename, hence + # has a second path, requiring us to pop an extra token. + status = self._run_git('status', '-z', '--ignored') + tokens = collections.deque(status.split('\0')) + while True: + tok = tokens.popleft() + # Status output is NUL terminated rather than delimited, + # and split is for delimited output. A side effect of this is + # that we get an empty token as the last output. This suits + # us fine, as it gives us a sentinel value to terminate with. + if not tok: + return + + # The first token of an entry is 2 character status, a space, + # then the path + code = tok[:2] + to_path = tok[3:] + + # If the code starts with R then it's a rename, and + # the next token says where the file was renamed from + from_path = tokens.popleft() if code[0] == 'R' else None + yield code, to_path, from_path + + def get_uncommitted_changes(self): + for code, to_path, from_path in self._get_status(): + if (code not in (STATUS_UNTRACKED, STATUS_IGNORED) + or code == (STATUS_UNTRACKED) and to_path.endswith('.morph')): + yield code, to_path, from_path + + def set_to_tree(self, treeish): + '''Modify the index to contain the contents of the treeish.''' + self._run_git('read-tree', treeish) + + def add_files_from_index_info(self, infos): + '''Add files without interacting with the working tree. + + `infos` is an iterable of (file mode string, object sha1, path) + There are no constraints on the size of the iterable + + ''' + + # update-index may take NUL terminated input lines of the entries + # to add so we generate a string for the input, rather than + # having many command line arguments, since for a large amount + # of entries, this can be too many arguments to process and the + # exec will fail. + # Generating the input as a string uses more memory than using + # subprocess.Popen directly and using .communicate, but is much + # less verbose. + feed_stdin = '\0'.join('%o %s\t%s' % (mode, sha1, path) + for mode, sha1, path in infos) + '\0' + self._run_git('update-index', '--add', '-z', '--index-info', + feed_stdin=feed_stdin) + + def add_files_from_working_tree(self, paths): + '''Add existing files to the index. + + Given an iterable of paths to files in the working tree, + relative to the git repository's top-level directory, + add the contents of the files to git's object store, + and the index. + + This is similar to the following: + + gd = GitDirectory(...) + idx = gd.get_index() + for path in paths: + fullpath = os.path.join(gd,dirname, path) + with open(fullpath, 'r') as f: + sha1 = gd.store_blob(f) + idx.add_files_from_index_info([(os.stat(fullpath).st_mode, + sha1, path)]) + + ''' + + if self._gd.is_bare(): + raise morphlib.gitdir.NoWorkingTreeError(self._gd) + # Handle paths in smaller chunks, so that the runcmd + # cannot fail from exceeding command line length + # 50 is an arbitrary limit + for paths in morphlib.util.iter_trickle(paths, 50): + self._run_git('add', *paths) + + def write_tree(self): + '''Transform the index into a tree in the object store.''' + return self._run_git('write-tree').strip() |