From 1efe12dfdf0b360f89ec30ed1f30b749f826bcba Mon Sep 17 00:00:00 2001
From: termie <code@term.ie>
Date: Sun, 30 Jan 2011 14:31:03 -0800
Subject: Add a bunch of mark id normalization.

bzr-fastimport is getting marks from a variety of sources, all of which
us the format ':\d+' but it really wants just the raw number internally.

This patch cleans up the symptoms of many places where this normalization
was not occuring.
---
 branch_updater.py               | 1 +
 bzr_commit_handler.py           | 2 +-
 marks_file.py                   | 5 ++---
 processors/generic_processor.py | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/branch_updater.py b/branch_updater.py
index 039171f..a908f1d 100644
--- a/branch_updater.py
+++ b/branch_updater.py
@@ -151,6 +151,7 @@ class BranchUpdater(object):
         :return: whether the branch was changed or not
         """
         from fastimport.helpers import single_plural
+        last_mark = last_mark.lstrip(':')
         last_rev_id = self.cache_mgr.revision_ids[last_mark]
         revs = list(self.repo.iter_reverse_revision_history(last_rev_id))
         revno = len(revs)
diff --git a/bzr_commit_handler.py b/bzr_commit_handler.py
index d81b271..9988f0a 100644
--- a/bzr_commit_handler.py
+++ b/bzr_commit_handler.py
@@ -113,7 +113,7 @@ class GenericCommitHandler(processor.CommitHandler):
 
         # Convert the parent commit-ids to bzr revision-ids
         if parents:
-            self.parents = [self.cache_mgr.revision_ids[p]
+            self.parents = [self.cache_mgr.revision_ids[p.lstrip(':')]
                 for p in parents]
         else:
             self.parents = []
diff --git a/marks_file.py b/marks_file.py
index c05f8c6..7066a1c 100644
--- a/marks_file.py
+++ b/marks_file.py
@@ -53,8 +53,7 @@ def import_marks(filename):
     while line:
         line = line.rstrip('\n')
         mark, revid = line.split(' ', 1)
-        if mark.startswith(':'):
-            mark = mark[1:]
+        mark = mark.lstrip(':')
         revision_ids[mark] = revid
         line = f.readline()
     f.close()
@@ -76,5 +75,5 @@ def export_marks(filename, revision_ids):
 
     # Write the revision info
     for mark, revid in revision_ids.iteritems():
-        f.write(':%s %s\n' % (mark, revid))
+        f.write(':%s %s\n' % (str(mark).lstrip(':'), revid))
     f.close()
diff --git a/processors/generic_processor.py b/processors/generic_processor.py
index 093a432..e17a2ff 100644
--- a/processors/generic_processor.py
+++ b/processors/generic_processor.py
@@ -533,9 +533,9 @@ class GenericProcessor(processor.ImportProcessor):
         except:
             print "ABORT: exception occurred processing commit %s" % (cmd.id)
             raise
-        self.cache_mgr.revision_ids[cmd.id] = handler.revision_id
+        self.cache_mgr.revision_ids[cmd.id.lstrip(':')] = handler.revision_id
         self._revision_count += 1
-        self.report_progress("(%s)" % cmd.id)
+        self.report_progress("(%s)" % cmd.id.lstrip(':'))
 
         if cmd.ref.startswith('refs/tags/'):
             tag_name = cmd.ref[len('refs/tags/'):]
-- 
cgit v1.2.1


From bc1a6e0f831ae24acfe98f5dbdc76b034e492e35 Mon Sep 17 00:00:00 2001
From: Jelmer Vernooij <jelmer@samba.org>
Date: Tue, 1 Feb 2011 11:04:51 +0100
Subject: Use lookup functions for committish.

---
 branch_updater.py               |  7 +++----
 bzr_commit_handler.py           |  2 +-
 cache_manager.py                | 19 ++++++++++++++++---
 processors/generic_processor.py | 19 ++++++++++---------
 4 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/branch_updater.py b/branch_updater.py
index a908f1d..0f9b7be 100644
--- a/branch_updater.py
+++ b/branch_updater.py
@@ -121,7 +121,7 @@ class BranchUpdater(object):
                 except errors.BzrError, ex:
                     error("ERROR: failed to create branch %s: %s",
                         location, ex)
-            lost_head = self.cache_mgr.revision_ids[tip]
+            lost_head = self.cache_mgr.lookup_committish(tip)
             lost_info = (name, lost_head)
             lost_heads.append(lost_info)
         return branch_tips, lost_heads
@@ -147,12 +147,11 @@ class BranchUpdater(object):
 
     def _update_branch(self, br, last_mark):
         """Update a branch with last revision and tag information.
-        
+
         :return: whether the branch was changed or not
         """
         from fastimport.helpers import single_plural
-        last_mark = last_mark.lstrip(':')
-        last_rev_id = self.cache_mgr.revision_ids[last_mark]
+        last_rev_id = self.cache_mgr.lookup_committish(last_mark)
         revs = list(self.repo.iter_reverse_revision_history(last_rev_id))
         revno = len(revs)
         existing_revno, existing_last_rev_id = br.last_revision_info()
diff --git a/bzr_commit_handler.py b/bzr_commit_handler.py
index 9988f0a..4ae12f9 100644
--- a/bzr_commit_handler.py
+++ b/bzr_commit_handler.py
@@ -113,7 +113,7 @@ class GenericCommitHandler(processor.CommitHandler):
 
         # Convert the parent commit-ids to bzr revision-ids
         if parents:
-            self.parents = [self.cache_mgr.revision_ids[p.lstrip(':')]
+            self.parents = [self.cache_mgr.lookup_committish(p)
                 for p in parents]
         else:
             self.parents = []
diff --git a/cache_manager.py b/cache_manager.py
index 28d9f0d..4321fe6 100644
--- a/cache_manager.py
+++ b/cache_manager.py
@@ -96,7 +96,7 @@ class CacheManager(object):
 
         # import commmit-ids -> revision-id lookup table
         # we need to keep all of these but they are small
-        self.revision_ids = {}
+        self.marks = {}
 
         # (path, branch_ref) -> file-ids - as generated.
         # (Use store_file_id/fetch_fileid methods rather than direct access.)
@@ -121,12 +121,25 @@ class CacheManager(object):
 
         self.reftracker = RefTracker()
 
+    def add_mark(self, mark, commit_id):
+        assert mark[0] != ':'
+        self.marks[mark] = commit_id
+
+    def lookup_committish(self, committish):
+        """Resolve a 'committish' to a revision id.
+
+        :param committish: A "committish" string
+        :return: Bazaar revision id
+        """
+        assert committish[0] == ':'
+        return self.marks[committish.lstrip(':')]
+
     def dump_stats(self, note=trace.note):
         """Dump some statistics about what we cached."""
         # TODO: add in inventory stastistics
         note("Cache statistics:")
         self._show_stats_for(self._sticky_blobs, "sticky blobs", note=note)
-        self._show_stats_for(self.revision_ids, "revision-ids", note=note)
+        self._show_stats_for(self.marks, "revision-ids", note=note)
         # These aren't interesting so omit from the output, at least for now
         #self._show_stats_for(self._blobs, "other blobs", note=note)
         #self.reftracker.dump_stats(note=note)
@@ -157,7 +170,7 @@ class CacheManager(object):
         """Free up any memory used by the caches."""
         self._blobs.clear()
         self._sticky_blobs.clear()
-        self.revision_ids.clear()
+        self.marks.clear()
         self.reftracker.clear()
         self.inventories.clear()
 
diff --git a/processors/generic_processor.py b/processors/generic_processor.py
index e17a2ff..0fbdb6f 100644
--- a/processors/generic_processor.py
+++ b/processors/generic_processor.py
@@ -150,11 +150,11 @@ class GenericProcessor(processor.ImportProcessor):
             self.note("Starting import ...")
         self.cache_mgr = cache_manager.CacheManager(self.info, self.verbose,
             self.inventory_cache_size)
-        
+
         if self.params.get("import-marks") is not None:
             mark_info = marks_file.import_marks(self.params.get("import-marks"))
             if mark_info is not None:
-                self.cache_mgr.revision_ids = mark_info
+                self.cache_mgr.marks = mark_info
             self.skip_total = False
             self.first_incremental_commit = True
         else:
@@ -334,7 +334,7 @@ class GenericProcessor(processor.ImportProcessor):
 
         if self.params.get("export-marks") is not None:
             marks_file.export_marks(self.params.get("export-marks"),
-                self.cache_mgr.revision_ids)
+                self.cache_mgr.marks)
 
         if self.cache_mgr.reftracker.last_ref == None:
             """Nothing to refresh"""
@@ -474,7 +474,7 @@ class GenericProcessor(processor.ImportProcessor):
         # Currently, we just check the size. In the future, we might
         # decide to be more paranoid and check that the revision-ids
         # are identical as well.
-        self.cache_mgr.revision_ids, known = idmapfile.load_id_map(
+        self.cache_mgr.marks, known = idmapfile.load_id_map(
             self.id_map_path)
         existing_count = len(self.repo.all_revision_ids())
         if existing_count < known:
@@ -485,7 +485,7 @@ class GenericProcessor(processor.ImportProcessor):
         """Save the id-map."""
         # Save the whole lot every time. If this proves a problem, we can
         # change to 'append just the new ones' at a later time.
-        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids)
+        idmapfile.save_id_map(self.id_map_path, self.cache_mgr.marks)
 
     def blob_handler(self, cmd):
         """Process a BlobCommand."""
@@ -509,11 +509,12 @@ class GenericProcessor(processor.ImportProcessor):
 
     def commit_handler(self, cmd):
         """Process a CommitCommand."""
+        mark = cmd.id.lstrip(':')
         if self.skip_total and self._revision_count < self.skip_total:
             self.cache_mgr.reftracker.track_heads(cmd)
             # Check that we really do know about this commit-id
-            if not self.cache_mgr.revision_ids.has_key(cmd.id):
-                raise plugin_errors.BadRestart(cmd.id)
+            if not self.cache_mgr.marks.has_key(mark):
+                raise plugin_errors.BadRestart(mark)
             self.cache_mgr._blobs = {}
             self._revision_count += 1
             if cmd.ref.startswith('refs/tags/'):
@@ -533,7 +534,7 @@ class GenericProcessor(processor.ImportProcessor):
         except:
             print "ABORT: exception occurred processing commit %s" % (cmd.id)
             raise
-        self.cache_mgr.revision_ids[cmd.id.lstrip(':')] = handler.revision_id
+        self.cache_mgr.add_mark(mark, handler.revision_id)
         self._revision_count += 1
         self.report_progress("(%s)" % cmd.id.lstrip(':'))
 
@@ -597,7 +598,7 @@ class GenericProcessor(processor.ImportProcessor):
     def _set_tag(self, name, from_):
         """Define a tag given a name and import 'from' reference."""
         bzr_tag_name = name.decode('utf-8', 'replace')
-        bzr_rev_id = self.cache_mgr.revision_ids[from_]
+        bzr_rev_id = self.cache_mgr.lookup_committish(from_)
         self.tags[bzr_tag_name] = bzr_rev_id
 
     def feature_handler(self, cmd):
-- 
cgit v1.2.1