diff options
author | Jelmer Vernooij <jelmer@samba.org> | 2010-09-06 01:16:15 +0200 |
---|---|---|
committer | Jelmer Vernooij <jelmer@samba.org> | 2010-09-06 01:16:15 +0200 |
commit | c88605b6d4b162f71f400d073bd0c4348142cf11 (patch) | |
tree | 197ebc4a3ba60419aa52c770a6793ed2734e5925 | |
parent | 0aac780e93ac746c5254879146fe289cf7dad122 (diff) | |
download | python-fastimport-git-c88605b6d4b162f71f400d073bd0c4348142cf11.tar.gz |
Split out reftracker.
-rw-r--r-- | bzr_commit_handler.py | 2 | ||||
-rw-r--r-- | cache_manager.py | 53 | ||||
-rw-r--r-- | processors/generic_processor.py | 13 | ||||
-rw-r--r-- | processors/info_processor.py | 11 | ||||
-rw-r--r-- | reftracker.py | 67 | ||||
-rw-r--r-- | tests/test_head_tracking.py | 6 |
6 files changed, 97 insertions, 55 deletions
diff --git a/bzr_commit_handler.py b/bzr_commit_handler.py index 0d64616..351a832 100644 --- a/bzr_commit_handler.py +++ b/bzr_commit_handler.py @@ -85,7 +85,7 @@ class GenericCommitHandler(processor.CommitHandler): self.data_for_commit[inventory.ROOT_ID] = [] # Track the heads and get the real parent list - parents = self.cache_mgr.track_heads(self.command) + parents = self.cache_mgr.reftracker.track_heads(self.command) # Convert the parent commit-ids to bzr revision-ids if parents: diff --git a/cache_manager.py b/cache_manager.py index 4b0c369..6b0f1e6 100644 --- a/cache_manager.py +++ b/cache_manager.py @@ -23,9 +23,15 @@ import tempfile import weakref from bzrlib import lru_cache, trace +from bzrlib.plugins.fastimport import ( + branch_mapper, + ) from fastimport.helpers import ( single_plural, ) +from bzrlib.plugins.fastimport.reftracker import ( + RefTracker, + ) class _Cleanup(object): @@ -80,10 +86,10 @@ class _Cleanup(object): self.small_blobs = None if self.tempdir is not None: shutil.rmtree(self.tempdir) - + class CacheManager(object): - + _small_blob_threshold = 25*1024 _sticky_cache_size = 300*1024*1024 _sticky_flushed_size = 100*1024*1024 @@ -122,11 +128,6 @@ class CacheManager(object): # (path, branch_ref) -> file-ids - as generated. # (Use store_file_id/fetch_fileid methods rather than direct access.) - # Head tracking: last ref, last id per ref & map of commit ids to ref*s* - self.last_ref = None - self.last_ids = {} - self.heads = {} - # Work out the blobs to make sticky - None means all self._blob_ref_counts = {} if info is not None: @@ -145,6 +146,8 @@ class CacheManager(object): # than reinstantiate on every usage self.branch_mapper = branch_mapper.BranchMapper() + self.reftracker = RefTracker() + def dump_stats(self, note=trace.note): """Dump some statistics about what we cached.""" # TODO: add in inventory stastistics @@ -153,8 +156,7 @@ class CacheManager(object): self._show_stats_for(self.revision_ids, "revision-ids", note=note) # These aren't interesting so omit from the output, at least for now #self._show_stats_for(self._blobs, "other blobs", note=note) - #self._show_stats_for(self.last_ids, "last-ids", note=note) - #self._show_stats_for(self.heads, "heads", note=note) + #self.reftracker.dump_stats(note=note) def _show_stats_for(self, dict, label, note=trace.note, tuple_key=False): """Dump statistics about a given dictionary. @@ -183,8 +185,7 @@ class CacheManager(object): self._blobs.clear() self._sticky_blobs.clear() self.revision_ids.clear() - self.last_ids.clear() - self.heads.clear() + self.reftracker.clear() self.inventories.clear() def _flush_blobs_to_disk(self): @@ -288,32 +289,4 @@ class CacheManager(object): self._sticky_memory_bytes -= len(content) return content - def track_heads(self, cmd): - """Track the repository heads given a CommitCommand. - - :param cmd: the CommitCommand - :return: the list of parents in terms of commit-ids - """ - # Get the true set of parents - if cmd.from_ is not None: - parents = [cmd.from_] - else: - last_id = self.last_ids.get(cmd.ref) - if last_id is not None: - parents = [last_id] - else: - parents = [] - parents.extend(cmd.merges) - - # Track the heads - self.track_heads_for_ref(cmd.ref, cmd.id, parents) - return parents - - def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None): - if parents is not None: - for parent in parents: - if parent in self.heads: - del self.heads[parent] - self.heads.setdefault(cmd_id, set()).add(cmd_ref) - self.last_ids[cmd_ref] = cmd_id - self.last_ref = cmd_ref + diff --git a/processors/generic_processor.py b/processors/generic_processor.py index a2fe03c..279836b 100644 --- a/processors/generic_processor.py +++ b/processors/generic_processor.py @@ -331,15 +331,16 @@ class GenericProcessor(processor.ImportProcessor): marks_file.export_marks(self.params.get("export-marks"), self.cache_mgr.revision_ids) - if self.cache_mgr.last_ref == None: + if self.cache_mgr.reftracker.last_ref == None: """Nothing to refresh""" return # Update the branches self.note("Updating branch information ...") updater = branch_updater.BranchUpdater(self.repo, self.branch, - self.cache_mgr, helpers.invert_dictset(self.cache_mgr.heads), - self.cache_mgr.last_ref, self.tags) + self.cache_mgr, helpers.invert_dictset( + self.cache_mgr.reftracker.heads), + self.cache_mgr.reftracker.last_ref, self.tags) branches_updated, branches_lost = updater.update() self._branch_count = len(branches_updated) @@ -504,7 +505,7 @@ class GenericProcessor(processor.ImportProcessor): def commit_handler(self, cmd): """Process a CommitCommand.""" if self.skip_total and self._revision_count < self.skip_total: - self.cache_mgr.track_heads(cmd) + self.cache_mgr.reftracker.track_heads(cmd) # Check that we really do know about this commit-id if not self.cache_mgr.revision_ids.has_key(cmd.id): raise plugin_errors.BadRestart(cmd.id) @@ -519,7 +520,7 @@ class GenericProcessor(processor.ImportProcessor): return if self.first_incremental_commit: self.first_incremental_commit = None - parents = self.cache_mgr.track_heads(cmd) + parents = self.cache_mgr.reftracker.track_heads(cmd) # 'Commit' the revision and report progress handler = self.commit_handler_factory(cmd, self.cache_mgr, @@ -582,7 +583,7 @@ class GenericProcessor(processor.ImportProcessor): return if cmd.from_ is not None: - self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_) + self.cache_mgr.reftracker.track_heads_for_ref(cmd.ref, cmd.from_) def tag_handler(self, cmd): """Process a TagCommand.""" diff --git a/processors/info_processor.py b/processors/info_processor.py index a765f91..e274683 100644 --- a/processors/info_processor.py +++ b/processors/info_processor.py @@ -68,8 +68,8 @@ class InfoProcessor(processor.ImportProcessor): for usage in ['new', 'used', 'unknown', 'unmarked']: self.blobs[usage] = set() self.blob_ref_counts = {} - # Head tracking - delegate to the cache manager - self.cache_mgr = cache_manager.CacheManager(inventory_cache_size=0) + # Head tracking + self.reftracker = cache_manager.RefTracker() # Stuff to cache: a map from mark to # of times that mark is merged self.merges = {} # Stuff to cache: these are maps from mark to sets @@ -102,7 +102,7 @@ class InfoProcessor(processor.ImportProcessor): } self._dump_stats_group("Parent counts", p_items, str) self._dump_stats_group("Commit analysis", flags.iteritems(), _found) - heads = invert_dictset(self.cache_mgr.heads) + heads = invert_dictset(self.reftracker.heads) self._dump_stats_group("Head analysis", heads.iteritems(), None, _iterable_as_config_list) # note("\t%d\t%s" % (len(self.committers), 'unique committers')) @@ -213,7 +213,7 @@ class InfoProcessor(processor.ImportProcessor): self.copy_source_paths.setdefault(cmd.id, set()).add(fc.src_path) # Track the heads - parents = self.cache_mgr.track_heads(cmd) + parents = self.reftracker.track_heads(cmd) # Track the parent counts parent_count = len(parents) @@ -240,7 +240,8 @@ class InfoProcessor(processor.ImportProcessor): self.lightweight_tags += 1 else: if cmd.from_ is not None: - self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_) + self.reftracker.track_heads_for_ref( + cmd.ref, cmd.from_) def tag_handler(self, cmd): """Process a TagCommand.""" diff --git a/reftracker.py b/reftracker.py new file mode 100644 index 0000000..3862180 --- /dev/null +++ b/reftracker.py @@ -0,0 +1,67 @@ +# Copyright (C) 2009 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +"""Tracker of refs.""" + + +class RefTracker(object): + + def __init__(self): + # Head tracking: last ref, last id per ref & map of commit ids to ref*s* + self.last_ref = None + self.last_ids = {} + self.heads = {} + + def dump_stats(self, note): + self._show_stats_for(self.last_ids, "last-ids", note=note) + self._show_stats_for(self.heads, "heads", note=note) + + def clear(self): + self.last_ids.clear() + self.heads.clear() + + def track_heads(self, cmd): + """Track the repository heads given a CommitCommand. + + :param cmd: the CommitCommand + :return: the list of parents in terms of commit-ids + """ + # Get the true set of parents + if cmd.from_ is not None: + parents = [cmd.from_] + else: + last_id = self.last_ids.get(cmd.ref) + if last_id is not None: + parents = [last_id] + else: + parents = [] + parents.extend(cmd.merges) + + # Track the heads + self.track_heads_for_ref(cmd.ref, cmd.id, parents) + return parents + + def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None): + if parents is not None: + for parent in parents: + if parent in self.heads: + del self.heads[parent] + self.heads.setdefault(cmd_id, set()).add(cmd_ref) + self.last_ids[cmd_ref] = cmd_id + self.last_ref = cmd_ref + + diff --git a/tests/test_head_tracking.py b/tests/test_head_tracking.py index 77bb422..3c44d50 100644 --- a/tests/test_head_tracking.py +++ b/tests/test_head_tracking.py @@ -222,13 +222,13 @@ class TestHeadTracking(tests.TestCase): cm = CacheManager() for cmd in p.iter_commands(): if isinstance(cmd, commands.CommitCommand): - cm.track_heads(cmd) + cm.reftracker.track_heads(cmd) # eat the file commands list(cmd.file_iter()) elif isinstance(cmd, commands.ResetCommand): if cmd.from_ is not None: - cm.track_heads_for_ref(cmd.ref, cmd.from_) - self.assertEqual(cm.heads, expected) + cm.reftracker.track_heads_for_ref(cmd.ref, cmd.from_) + self.assertEqual(cm.reftracker.heads, expected) def test_mainline(self): self.assertHeads(_SAMPLE_MAINLINE, { |