From 1efe12dfdf0b360f89ec30ed1f30b749f826bcba Mon Sep 17 00:00:00 2001 From: termie Date: Sun, 30 Jan 2011 14:31:03 -0800 Subject: Add a bunch of mark id normalization. bzr-fastimport is getting marks from a variety of sources, all of which us the format ':\d+' but it really wants just the raw number internally. This patch cleans up the symptoms of many places where this normalization was not occuring. --- branch_updater.py | 1 + bzr_commit_handler.py | 2 +- marks_file.py | 5 ++--- processors/generic_processor.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/branch_updater.py b/branch_updater.py index 039171f..a908f1d 100644 --- a/branch_updater.py +++ b/branch_updater.py @@ -151,6 +151,7 @@ class BranchUpdater(object): :return: whether the branch was changed or not """ from fastimport.helpers import single_plural + last_mark = last_mark.lstrip(':') last_rev_id = self.cache_mgr.revision_ids[last_mark] revs = list(self.repo.iter_reverse_revision_history(last_rev_id)) revno = len(revs) diff --git a/bzr_commit_handler.py b/bzr_commit_handler.py index d81b271..9988f0a 100644 --- a/bzr_commit_handler.py +++ b/bzr_commit_handler.py @@ -113,7 +113,7 @@ class GenericCommitHandler(processor.CommitHandler): # Convert the parent commit-ids to bzr revision-ids if parents: - self.parents = [self.cache_mgr.revision_ids[p] + self.parents = [self.cache_mgr.revision_ids[p.lstrip(':')] for p in parents] else: self.parents = [] diff --git a/marks_file.py b/marks_file.py index c05f8c6..7066a1c 100644 --- a/marks_file.py +++ b/marks_file.py @@ -53,8 +53,7 @@ def import_marks(filename): while line: line = line.rstrip('\n') mark, revid = line.split(' ', 1) - if mark.startswith(':'): - mark = mark[1:] + mark = mark.lstrip(':') revision_ids[mark] = revid line = f.readline() f.close() @@ -76,5 +75,5 @@ def export_marks(filename, revision_ids): # Write the revision info for mark, revid in revision_ids.iteritems(): - f.write(':%s %s\n' % (mark, revid)) + f.write(':%s %s\n' % (str(mark).lstrip(':'), revid)) f.close() diff --git a/processors/generic_processor.py b/processors/generic_processor.py index 093a432..e17a2ff 100644 --- a/processors/generic_processor.py +++ b/processors/generic_processor.py @@ -533,9 +533,9 @@ class GenericProcessor(processor.ImportProcessor): except: print "ABORT: exception occurred processing commit %s" % (cmd.id) raise - self.cache_mgr.revision_ids[cmd.id] = handler.revision_id + self.cache_mgr.revision_ids[cmd.id.lstrip(':')] = handler.revision_id self._revision_count += 1 - self.report_progress("(%s)" % cmd.id) + self.report_progress("(%s)" % cmd.id.lstrip(':')) if cmd.ref.startswith('refs/tags/'): tag_name = cmd.ref[len('refs/tags/'):] -- cgit v1.2.1 From bc1a6e0f831ae24acfe98f5dbdc76b034e492e35 Mon Sep 17 00:00:00 2001 From: Jelmer Vernooij Date: Tue, 1 Feb 2011 11:04:51 +0100 Subject: Use lookup functions for committish. --- branch_updater.py | 7 +++---- bzr_commit_handler.py | 2 +- cache_manager.py | 19 ++++++++++++++++--- processors/generic_processor.py | 19 ++++++++++--------- 4 files changed, 30 insertions(+), 17 deletions(-) diff --git a/branch_updater.py b/branch_updater.py index a908f1d..0f9b7be 100644 --- a/branch_updater.py +++ b/branch_updater.py @@ -121,7 +121,7 @@ class BranchUpdater(object): except errors.BzrError, ex: error("ERROR: failed to create branch %s: %s", location, ex) - lost_head = self.cache_mgr.revision_ids[tip] + lost_head = self.cache_mgr.lookup_committish(tip) lost_info = (name, lost_head) lost_heads.append(lost_info) return branch_tips, lost_heads @@ -147,12 +147,11 @@ class BranchUpdater(object): def _update_branch(self, br, last_mark): """Update a branch with last revision and tag information. - + :return: whether the branch was changed or not """ from fastimport.helpers import single_plural - last_mark = last_mark.lstrip(':') - last_rev_id = self.cache_mgr.revision_ids[last_mark] + last_rev_id = self.cache_mgr.lookup_committish(last_mark) revs = list(self.repo.iter_reverse_revision_history(last_rev_id)) revno = len(revs) existing_revno, existing_last_rev_id = br.last_revision_info() diff --git a/bzr_commit_handler.py b/bzr_commit_handler.py index 9988f0a..4ae12f9 100644 --- a/bzr_commit_handler.py +++ b/bzr_commit_handler.py @@ -113,7 +113,7 @@ class GenericCommitHandler(processor.CommitHandler): # Convert the parent commit-ids to bzr revision-ids if parents: - self.parents = [self.cache_mgr.revision_ids[p.lstrip(':')] + self.parents = [self.cache_mgr.lookup_committish(p) for p in parents] else: self.parents = [] diff --git a/cache_manager.py b/cache_manager.py index 28d9f0d..4321fe6 100644 --- a/cache_manager.py +++ b/cache_manager.py @@ -96,7 +96,7 @@ class CacheManager(object): # import commmit-ids -> revision-id lookup table # we need to keep all of these but they are small - self.revision_ids = {} + self.marks = {} # (path, branch_ref) -> file-ids - as generated. # (Use store_file_id/fetch_fileid methods rather than direct access.) @@ -121,12 +121,25 @@ class CacheManager(object): self.reftracker = RefTracker() + def add_mark(self, mark, commit_id): + assert mark[0] != ':' + self.marks[mark] = commit_id + + def lookup_committish(self, committish): + """Resolve a 'committish' to a revision id. + + :param committish: A "committish" string + :return: Bazaar revision id + """ + assert committish[0] == ':' + return self.marks[committish.lstrip(':')] + def dump_stats(self, note=trace.note): """Dump some statistics about what we cached.""" # TODO: add in inventory stastistics note("Cache statistics:") self._show_stats_for(self._sticky_blobs, "sticky blobs", note=note) - self._show_stats_for(self.revision_ids, "revision-ids", note=note) + self._show_stats_for(self.marks, "revision-ids", note=note) # These aren't interesting so omit from the output, at least for now #self._show_stats_for(self._blobs, "other blobs", note=note) #self.reftracker.dump_stats(note=note) @@ -157,7 +170,7 @@ class CacheManager(object): """Free up any memory used by the caches.""" self._blobs.clear() self._sticky_blobs.clear() - self.revision_ids.clear() + self.marks.clear() self.reftracker.clear() self.inventories.clear() diff --git a/processors/generic_processor.py b/processors/generic_processor.py index e17a2ff..0fbdb6f 100644 --- a/processors/generic_processor.py +++ b/processors/generic_processor.py @@ -150,11 +150,11 @@ class GenericProcessor(processor.ImportProcessor): self.note("Starting import ...") self.cache_mgr = cache_manager.CacheManager(self.info, self.verbose, self.inventory_cache_size) - + if self.params.get("import-marks") is not None: mark_info = marks_file.import_marks(self.params.get("import-marks")) if mark_info is not None: - self.cache_mgr.revision_ids = mark_info + self.cache_mgr.marks = mark_info self.skip_total = False self.first_incremental_commit = True else: @@ -334,7 +334,7 @@ class GenericProcessor(processor.ImportProcessor): if self.params.get("export-marks") is not None: marks_file.export_marks(self.params.get("export-marks"), - self.cache_mgr.revision_ids) + self.cache_mgr.marks) if self.cache_mgr.reftracker.last_ref == None: """Nothing to refresh""" @@ -474,7 +474,7 @@ class GenericProcessor(processor.ImportProcessor): # Currently, we just check the size. In the future, we might # decide to be more paranoid and check that the revision-ids # are identical as well. - self.cache_mgr.revision_ids, known = idmapfile.load_id_map( + self.cache_mgr.marks, known = idmapfile.load_id_map( self.id_map_path) existing_count = len(self.repo.all_revision_ids()) if existing_count < known: @@ -485,7 +485,7 @@ class GenericProcessor(processor.ImportProcessor): """Save the id-map.""" # Save the whole lot every time. If this proves a problem, we can # change to 'append just the new ones' at a later time. - idmapfile.save_id_map(self.id_map_path, self.cache_mgr.revision_ids) + idmapfile.save_id_map(self.id_map_path, self.cache_mgr.marks) def blob_handler(self, cmd): """Process a BlobCommand.""" @@ -509,11 +509,12 @@ class GenericProcessor(processor.ImportProcessor): def commit_handler(self, cmd): """Process a CommitCommand.""" + mark = cmd.id.lstrip(':') if self.skip_total and self._revision_count < self.skip_total: self.cache_mgr.reftracker.track_heads(cmd) # Check that we really do know about this commit-id - if not self.cache_mgr.revision_ids.has_key(cmd.id): - raise plugin_errors.BadRestart(cmd.id) + if not self.cache_mgr.marks.has_key(mark): + raise plugin_errors.BadRestart(mark) self.cache_mgr._blobs = {} self._revision_count += 1 if cmd.ref.startswith('refs/tags/'): @@ -533,7 +534,7 @@ class GenericProcessor(processor.ImportProcessor): except: print "ABORT: exception occurred processing commit %s" % (cmd.id) raise - self.cache_mgr.revision_ids[cmd.id.lstrip(':')] = handler.revision_id + self.cache_mgr.add_mark(mark, handler.revision_id) self._revision_count += 1 self.report_progress("(%s)" % cmd.id.lstrip(':')) @@ -597,7 +598,7 @@ class GenericProcessor(processor.ImportProcessor): def _set_tag(self, name, from_): """Define a tag given a name and import 'from' reference.""" bzr_tag_name = name.decode('utf-8', 'replace') - bzr_rev_id = self.cache_mgr.revision_ids[from_] + bzr_rev_id = self.cache_mgr.lookup_committish(from_) self.tags[bzr_tag_name] = bzr_rev_id def feature_handler(self, cmd): -- cgit v1.2.1