move GenericCommitHandler into its own module in prep for a delta-based one

author: Ian Clatworthy <ian.clatworthy@canonical.com> 2009-02-19 12:43:28 +1000
committer: Ian Clatworthy <ian.clatworthy@canonical.com> 2009-02-19 12:43:28 +1000
commit: 9fd81a6dde4a4b8fe9201b75a29e2405a375d6c2 (patch)
tree: 14f78561b770709345f2c9b9cc2142c8bdc34148 /processors
parent: d5f4c3cddd74467ebd352a33e3b212325b389e73 (diff)
download: bzr-fastimport-9fd81a6dde4a4b8fe9201b75a29e2405a375d6c2.tar.gz
1 files changed, 40 insertions, 379 deletions
diff --git a/processors/generic_processor.py b/processors/generic_processor.py
index 55ef262..36ab927 100644
--- a/processors/generic_processor.py
+++ b/processors/generic_processor.py
@@ -17,27 +17,20 @@
 """Import processor that supports all Bazaar repository formats."""
 
 
-import os
 import time
 from bzrlib import (
-    builtins,
     bzrdir,
     delta,
     errors,
-    generate_ids,
-    inventory,
-    lru_cache,
     osutils,
     progress,
-    revision,
-    revisiontree,
-    transport,
     )
 from bzrlib.repofmt import pack_repo
 from bzrlib.trace import note
 import bzrlib.util.configobj.configobj as configobj
 from bzrlib.plugins.fastimport import (
     branch_updater,
+    bzr_commit_handler,
     cache_manager,
     errors as plugin_errors,
     helpers,
@@ -140,28 +133,11 @@ class GenericProcessor(processor.ImportProcessor):
         # mapping of tag name to revision_id
         self.tags = {}
 
-        # Create the revision loader needed for committing
-        new_repo_api = hasattr(self.repo, 'revisions')
-        if new_repo_api:
-            self.loader = revisionloader.RevisionLoader2(self.repo)
-        elif not self._experimental:
-            self.loader = revisionloader.RevisionLoader1(self.repo)
+        # Create the revision loader to use for committing, if any
+        if self.use_deltas:
+            self.loader = None
         else:
-            def fulltext_when(count):
-                total = self.total_commits
-                if total is not None and count == total:
-                    fulltext = True
-                else:
-                    # Create an inventory fulltext every 200 revisions
-                    fulltext = count % 200 == 0
-                if fulltext:
-                    self.note("%d commits - storing inventory as full-text",
-                        count)
-                return fulltext
-
-            self.loader = revisionloader.ImportRevisionLoader1(
-                self.repo, self.inventory_cache_size,
-                fulltext_when=fulltext_when)
+            self.loader = self._loader_factory()
 
         # Disable autopacking if the repo format supports it.
         # THIS IS A HACK - there is no sanctioned way of doing this yet.
@@ -194,6 +170,9 @@ class GenericProcessor(processor.ImportProcessor):
         else:
             self.info = None
 
+        # Decide whether to use full inventories or inventory deltas
+        self.use_deltas = self._experimental
+
         # Decide how often to automatically report progress
         # (not a parameter yet)
         self.progress_every = _DEFAULT_AUTO_PROGRESS
@@ -227,6 +206,30 @@ class GenericProcessor(processor.ImportProcessor):
         else:
             self.total_commits = self.max_commits
 
+    def _loader_factory(self):
+        """Make a RevisionLoader based on what the repository supports."""
+        new_repo_api = hasattr(self.repo, 'revisions')
+        if new_repo_api:
+            return revisionloader.RevisionLoader2(self.repo)
+        elif not self._experimental:
+            return revisionloader.RevisionLoader1(self.repo)
+        else:
+            def fulltext_when(count):
+                total = self.total_commits
+                if total is not None and count == total:
+                    fulltext = True
+                else:
+                    # Create an inventory fulltext every 200 revisions
+                    fulltext = count % 200 == 0
+                if fulltext:
+                    self.note("%d commits - storing inventory as full-text",
+                        count)
+                return fulltext
+
+            return revisionloader.ImportRevisionLoader1(
+                self.repo, self.inventory_cache_size,
+                fulltext_when=fulltext_when)
+
     def _process(self, command_iter):
         # if anything goes wrong, abort the write group if any
         try:
@@ -376,7 +379,7 @@ class GenericProcessor(processor.ImportProcessor):
     def commit_handler(self, cmd):
         """Process a CommitCommand."""
         if self.skip_total and self._revision_count < self.skip_total:
-            _track_heads(cmd, self.cache_mgr)
+            self.cache_mgr.track_heads(cmd)
             # Check that we really do know about this commit-id
             if not self.cache_mgr.revision_ids.has_key(cmd.id):
                 raise plugin_errors.BadRestart(cmd.id)
@@ -394,12 +397,16 @@ class GenericProcessor(processor.ImportProcessor):
             return
         if self.first_incremental_commit:
             self.first_incremental_commit = None
-            parents = _track_heads(cmd, self.cache_mgr)
+            parents = self.cache_mgr.track_heads(cmd)
             self._gen_file_ids_cache(parents)
 
         # 'Commit' the revision and report progress
-        handler = GenericCommitHandler(cmd, self.repo, self.cache_mgr,
-            self.loader, self.verbose, self._experimental)
+        if self.use_deltas:
+            handler = bzr_commit_handler.DeltaCommitHandler(cmd,
+                self.cache_mgr, self.repo, verbose=self.verbose)
+        else:
+            handler = bzr_commit_handler.InventoryCommitHandler(cmd,
+                self.cache_mgr, self.repo, self.loader, verbose=self.verbose)
         handler.process()
         self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
         self._revision_count += 1
@@ -485,349 +492,3 @@ class GenericProcessor(processor.ImportProcessor):
         bzr_tag_name = name.decode('utf-8', 'replace')
         bzr_rev_id = self.cache_mgr.revision_ids[from_]
         self.tags[bzr_tag_name] = bzr_rev_id
-
-
-def _track_heads(cmd, cache_mgr):
-    """Track the repository heads given a CommitCommand.
-    
-    :return: the list of parents in terms of commit-ids
-    """
-    # Get the true set of parents
-    if cmd.from_ is not None:
-        parents = [cmd.from_]
-    else:
-        last_id = cache_mgr.last_ids.get(cmd.ref)
-        if last_id is not None:
-            parents = [last_id]
-        else:
-            parents = []
-    parents.extend(cmd.merges)
-
-    # Track the heads
-    cache_mgr.track_heads_for_ref(cmd.ref, cmd.id, parents)
-    return parents
-
-
-class GenericCommitHandler(processor.CommitHandler):
-
-    def __init__(self, command, repo, cache_mgr, loader, verbose=False,
-        _experimental=False):
-        processor.CommitHandler.__init__(self, command)
-        self.repo = repo
-        self.cache_mgr = cache_mgr
-        self.loader = loader
-        self.verbose = verbose
-        self._experimental = _experimental
-
-    def pre_process_files(self):
-        """Prepare for committing."""
-        self.revision_id = self.gen_revision_id()
-        # cache of texts for this commit, indexed by file-id
-        self.lines_for_commit = {}
-        if self.repo.supports_rich_root():
-            self.lines_for_commit[inventory.ROOT_ID] = []
-
-        # Track the heads and get the real parent list
-        parents = _track_heads(self.command, self.cache_mgr)
-
-        # Convert the parent commit-ids to bzr revision-ids
-        if parents:
-            self.parents = [self.cache_mgr.revision_ids[p]
-                for p in parents]
-        else:
-            self.parents = []
-        self.debug("%s id: %s, parents: %s", self.command.id,
-            self.revision_id, str(self.parents))
-
-        # Seed the inventory from the previous one
-        if len(self.parents) == 0:
-            self.inventory = self.gen_initial_inventory()
-        else:
-            # use the bzr_revision_id to lookup the inv cache
-            inv = self.get_inventory(self.parents[0])
-            # TODO: Shallow copy - deep inventory copying is expensive
-            self.inventory = inv.copy()
-        if self.repo.supports_rich_root():
-            self.inventory.revision_id = self.revision_id
-        else:
-            # In this repository, root entries have no knit or weave. When
-            # serializing out to disk and back in, root.revision is always
-            # the new revision_id.
-            self.inventory.root.revision = self.revision_id
-
-        # directory-path -> inventory-entry for current inventory
-        self.directory_entries = dict(self.inventory.directories())
-
-    def post_process_files(self):
-        """Save the revision."""
-        self.cache_mgr.inventories[self.revision_id] = self.inventory
-
-        # Load the revision into the repository
-        rev_props = {}
-        committer = self.command.committer
-        who = "%s <%s>" % (committer[0],committer[1])
-        author = self.command.author
-        if author is not None:
-            author_id = "%s <%s>" % (author[0],author[1])
-            if author_id != who:
-                rev_props['author'] = author_id
-        rev = revision.Revision(
-           timestamp=committer[2],
-           timezone=committer[3],
-           committer=who,
-           message=helpers.escape_commit_message(self.command.message),
-           revision_id=self.revision_id,
-           properties=rev_props,
-           parent_ids=self.parents)
-        self.loader.load(rev, self.inventory, None,
-            lambda file_id: self._get_lines(file_id),
-            lambda revision_ids: self._get_inventories(revision_ids))
-
-    def modify_handler(self, filecmd):
-        if filecmd.dataref is not None:
-            data = self.cache_mgr.fetch_blob(filecmd.dataref)
-        else:
-            data = filecmd.data
-        self.debug("modifying %s", filecmd.path)
-        self._modify_inventory(filecmd.path, filecmd.kind,
-            filecmd.is_executable, data)
-
-    def _delete_recursive(self, path):
-        self.debug("deleting %s", path)
-        fileid = self.bzr_file_id(path)
-        dirname, basename = osutils.split(path)
-        if (fileid in self.inventory and
-            isinstance(self.inventory[fileid], inventory.InventoryDirectory)):
-            for child_path in self.inventory[fileid].children.keys():
-                self._delete_recursive(os.utils.pathjoin(path, child_path))
-        try:
-            if self.inventory.id2path(fileid) == path:
-                del self.inventory[fileid]
-            else:
-                # already added by some other name?
-                if dirname in self.cache_mgr.file_ids:
-                    parent_id = self.cache_mgr.file_ids[dirname]
-                    del self.inventory[parent_id].children[basename]
-        except KeyError:
-            self._warn_unless_in_merges(fileid, path)
-        except errors.NoSuchId:
-            self._warn_unless_in_merges(fileid, path)
-        except AttributeError, ex:
-            if ex.args[0] == 'children':
-                # A directory has changed into a file and then one
-                # of it's children is being deleted!
-                self._warn_unless_in_merges(fileid, path)
-            else:
-                raise
-        try:
-            self.cache_mgr.delete_path(path)
-        except KeyError:
-            pass
-
-    def delete_handler(self, filecmd):
-        self._delete_recursive(filecmd.path)
-
-    def _warn_unless_in_merges(self, fileid, path):
-        if len(self.parents) <= 1:
-            return
-        for parent in self.parents[1:]:
-            if fileid in self.get_inventory(parent):
-                return
-        self.warning("ignoring delete of %s as not in parent inventories", path)
-
-    def copy_handler(self, filecmd):
-        src_path = filecmd.src_path
-        dest_path = filecmd.dest_path
-        self.debug("copying %s to %s", src_path, dest_path)
-        if not self.parents:
-            self.warning("ignoring copy of %s to %s - no parent revisions",
-                src_path, dest_path)
-            return
-        file_id = self.inventory.path2id(src_path)
-        if file_id is None:
-            self.warning("ignoring copy of %s to %s - source does not exist",
-                src_path, dest_path)
-            return
-        ie = self.inventory[file_id]
-        kind = ie.kind
-        if kind == 'file':
-            content = self._get_content_from_repo(self.parents[0], file_id)
-            self._modify_inventory(dest_path, kind, ie.executable, content)
-        elif kind == 'symlink':
-            self._modify_inventory(dest_path, kind, False, ie.symlink_target)
-        else:
-            self.warning("ignoring copy of %s %s - feature not yet supported",
-                kind, path)
-
-    def _get_content_from_repo(self, revision_id, file_id):
-        """Get the content of a file for a revision-id."""
-        revtree = self.repo.revision_tree(revision_id)
-        return revtree.get_file_text(file_id)
-
-    def rename_handler(self, filecmd):
-        old_path = filecmd.old_path
-        new_path = filecmd.new_path
-        self.debug("renaming %s to %s", old_path, new_path)
-        file_id = self.bzr_file_id(old_path)
-        basename, new_parent_ie = self._ensure_directory(new_path)
-        new_parent_id = new_parent_ie.file_id
-        existing_id = self.inventory.path2id(new_path)
-        if existing_id is not None:
-            self.inventory.remove_recursive_id(existing_id)
-        ie = self.inventory[file_id]
-        lines = self.loader._get_lines(file_id, ie.revision)
-        self.lines_for_commit[file_id] = lines
-        self.inventory.rename(file_id, new_parent_id, basename)
-        self.cache_mgr.rename_path(old_path, new_path)
-        self.inventory[file_id].revision = self.revision_id
-
-    def deleteall_handler(self, filecmd):
-        self.debug("deleting all files (and also all directories)")
-        # Would be nice to have an inventory.clear() method here
-        root_items = [ie for (name, ie) in
-            self.inventory.root.children.iteritems()]
-        for root_item in root_items:
-            self.inventory.remove_recursive_id(root_item.file_id)
-
-    def bzr_file_id_and_new(self, path):
-        """Get a Bazaar file identifier and new flag for a path.
-        
-        :return: file_id, is_new where
-          is_new = True if the file_id is newly created
-        """
-        try:
-            id = self.cache_mgr.file_ids[path]
-            return id, False
-        except KeyError:
-            id = generate_ids.gen_file_id(path)
-            self.cache_mgr.file_ids[path] = id
-            self.debug("Generated new file id %s for '%s'", id, path)
-            return id, True
-
-    def bzr_file_id(self, path):
-        """Get a Bazaar file identifier for a path."""
-        return self.bzr_file_id_and_new(path)[0]
-
-    def gen_initial_inventory(self):
-        """Generate an inventory for a parentless revision."""
-        inv = inventory.Inventory(revision_id=self.revision_id)
-        if self.repo.supports_rich_root():
-            # The very first root needs to have the right revision
-            inv.root.revision = self.revision_id
-        return inv
-
-    def gen_revision_id(self):
-        """Generate a revision id.
-
-        Subclasses may override this to produce deterministic ids say.
-        """
-        committer = self.command.committer
-        # Perhaps 'who' being the person running the import is ok? If so,
-        # it might be a bit quicker and give slightly better compression?
-        who = "%s <%s>" % (committer[0],committer[1])
-        timestamp = committer[2]
-        return generate_ids.gen_revision_id(who, timestamp)
-
-    def get_inventory(self, revision_id):
-        """Get the inventory for a revision id."""
-        try:
-            inv = self.cache_mgr.inventories[revision_id]
-        except KeyError:
-            if self.verbose:
-                self.note("get_inventory cache miss for %s", revision_id)
-            # Not cached so reconstruct from repository
-            inv = self.repo.revision_tree(revision_id).inventory
-            self.cache_mgr.inventories[revision_id] = inv
-        return inv
-
-    def _get_inventories(self, revision_ids):
-        """Get the inventories for revision-ids.
-        
-        This is a callback used by the RepositoryLoader to
-        speed up inventory reconstruction.
-        """
-        present = []
-        inventories = []
-        # If an inventory is in the cache, we assume it was
-        # successfully loaded into the repsoitory
-        for revision_id in revision_ids:
-            try:
-                inv = self.cache_mgr.inventories[revision_id]
-                present.append(revision_id)
-            except KeyError:
-                if self.verbose:
-                    self.note("get_inventories cache miss for %s", revision_id)
-                # Not cached so reconstruct from repository
-                if self.repo.has_revision(revision_id):
-                    rev_tree = self.repo.revision_tree(revision_id)
-                    present.append(revision_id)
-                else:
-                    rev_tree = self.repo.revision_tree(None)
-                inv = rev_tree.inventory
-                self.cache_mgr.inventories[revision_id] = inv
-            inventories.append(inv)
-        return present, inventories
-
-    def _get_lines(self, file_id):
-        """Get the lines for a file-id."""
-        return self.lines_for_commit[file_id]
-
-    def _modify_inventory(self, path, kind, is_executable, data):
-        """Add to or change an item in the inventory."""
-        # Create the new InventoryEntry
-        basename, parent_ie = self._ensure_directory(path)
-        file_id = self.bzr_file_id(path)
-        ie = inventory.make_entry(kind, basename, parent_ie.file_id, file_id)
-        ie.revision = self.revision_id
-        if isinstance(ie, inventory.InventoryFile):
-            ie.executable = is_executable
-            lines = osutils.split_lines(data)
-            ie.text_sha1 = osutils.sha_strings(lines)
-            ie.text_size = sum(map(len, lines))
-            self.lines_for_commit[file_id] = lines
-        elif isinstance(ie, inventory.InventoryLink):
-            ie.symlink_target = data.encode('utf8')
-            # There are no lines stored for a symlink so
-            # make sure the cache used by get_lines knows that
-            self.lines_for_commit[file_id] = []
-        else:
-            raise errors.BzrError("Cannot import items of kind '%s' yet" %
-                (kind,))
-
-        # Record this new inventory entry
-        if file_id in self.inventory:
-            # HACK: no API for this (del+add does more than it needs to)
-            self.inventory._byid[file_id] = ie
-            parent_ie.children[basename] = ie
-        else:
-            self.inventory.add(ie)
-
-    def _ensure_directory(self, path):
-        """Ensure that the containing directory exists for 'path'"""
-        dirname, basename = osutils.split(path)
-        if dirname == '':
-            # the root node doesn't get updated
-            return basename, self.inventory.root
-        try:
-            ie = self.directory_entries[dirname]
-        except KeyError:
-            # We will create this entry, since it doesn't exist
-            pass
-        else:
-            return basename, ie
-
-        # No directory existed, we will just create one, first, make sure
-        # the parent exists
-        dir_basename, parent_ie = self._ensure_directory(dirname)
-        dir_file_id = self.bzr_file_id(dirname)
-        ie = inventory.entry_factory['directory'](dir_file_id,
-                                                  dir_basename,
-                                                  parent_ie.file_id)
-        ie.revision = self.revision_id
-        self.directory_entries[dirname] = ie
-        # There are no lines stored for a directory so
-        # make sure the cache used by get_lines knows that
-        self.lines_for_commit[dir_file_id] = []
-        #print "adding dir for %s" % path
-        self.inventory.add(ie)
-        return basename, ie
author	Ian Clatworthy <ian.clatworthy@canonical.com>	2009-02-19 12:43:28 +1000
committer	Ian Clatworthy <ian.clatworthy@canonical.com>	2009-02-19 12:43:28 +1000
commit	9fd81a6dde4a4b8fe9201b75a29e2405a375d6c2 (patch)
tree	14f78561b770709345f2c9b9cc2142c8bdc34148 /processors
parent	d5f4c3cddd74467ebd352a33e3b212325b389e73 (diff)
download	bzr-fastimport-9fd81a6dde4a4b8fe9201b75a29e2405a375d6c2.tar.gz