summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJelmer Vernooij <jelmer@samba.org>2010-09-06 01:16:15 +0200
committerJelmer Vernooij <jelmer@samba.org>2010-09-06 01:16:15 +0200
commitc60068bd0035e829a1e11a55d9bd6fe2cde65a32 (patch)
tree197ebc4a3ba60419aa52c770a6793ed2734e5925
parent00998e68def7710ae12f926c0a1aaa0df861f13d (diff)
downloadpython-fastimport-c60068bd0035e829a1e11a55d9bd6fe2cde65a32.tar.gz
Split out reftracker.
-rw-r--r--bzr_commit_handler.py2
-rw-r--r--cache_manager.py53
-rw-r--r--processors/generic_processor.py13
-rw-r--r--processors/info_processor.py11
-rw-r--r--reftracker.py67
-rw-r--r--tests/test_head_tracking.py6
6 files changed, 97 insertions, 55 deletions
diff --git a/bzr_commit_handler.py b/bzr_commit_handler.py
index 0d64616..351a832 100644
--- a/bzr_commit_handler.py
+++ b/bzr_commit_handler.py
@@ -85,7 +85,7 @@ class GenericCommitHandler(processor.CommitHandler):
self.data_for_commit[inventory.ROOT_ID] = []
# Track the heads and get the real parent list
- parents = self.cache_mgr.track_heads(self.command)
+ parents = self.cache_mgr.reftracker.track_heads(self.command)
# Convert the parent commit-ids to bzr revision-ids
if parents:
diff --git a/cache_manager.py b/cache_manager.py
index 4b0c369..6b0f1e6 100644
--- a/cache_manager.py
+++ b/cache_manager.py
@@ -23,9 +23,15 @@ import tempfile
import weakref
from bzrlib import lru_cache, trace
+from bzrlib.plugins.fastimport import (
+ branch_mapper,
+ )
from fastimport.helpers import (
single_plural,
)
+from bzrlib.plugins.fastimport.reftracker import (
+ RefTracker,
+ )
class _Cleanup(object):
@@ -80,10 +86,10 @@ class _Cleanup(object):
self.small_blobs = None
if self.tempdir is not None:
shutil.rmtree(self.tempdir)
-
+
class CacheManager(object):
-
+
_small_blob_threshold = 25*1024
_sticky_cache_size = 300*1024*1024
_sticky_flushed_size = 100*1024*1024
@@ -122,11 +128,6 @@ class CacheManager(object):
# (path, branch_ref) -> file-ids - as generated.
# (Use store_file_id/fetch_fileid methods rather than direct access.)
- # Head tracking: last ref, last id per ref & map of commit ids to ref*s*
- self.last_ref = None
- self.last_ids = {}
- self.heads = {}
-
# Work out the blobs to make sticky - None means all
self._blob_ref_counts = {}
if info is not None:
@@ -145,6 +146,8 @@ class CacheManager(object):
# than reinstantiate on every usage
self.branch_mapper = branch_mapper.BranchMapper()
+ self.reftracker = RefTracker()
+
def dump_stats(self, note=trace.note):
"""Dump some statistics about what we cached."""
# TODO: add in inventory stastistics
@@ -153,8 +156,7 @@ class CacheManager(object):
self._show_stats_for(self.revision_ids, "revision-ids", note=note)
# These aren't interesting so omit from the output, at least for now
#self._show_stats_for(self._blobs, "other blobs", note=note)
- #self._show_stats_for(self.last_ids, "last-ids", note=note)
- #self._show_stats_for(self.heads, "heads", note=note)
+ #self.reftracker.dump_stats(note=note)
def _show_stats_for(self, dict, label, note=trace.note, tuple_key=False):
"""Dump statistics about a given dictionary.
@@ -183,8 +185,7 @@ class CacheManager(object):
self._blobs.clear()
self._sticky_blobs.clear()
self.revision_ids.clear()
- self.last_ids.clear()
- self.heads.clear()
+ self.reftracker.clear()
self.inventories.clear()
def _flush_blobs_to_disk(self):
@@ -288,32 +289,4 @@ class CacheManager(object):
self._sticky_memory_bytes -= len(content)
return content
- def track_heads(self, cmd):
- """Track the repository heads given a CommitCommand.
-
- :param cmd: the CommitCommand
- :return: the list of parents in terms of commit-ids
- """
- # Get the true set of parents
- if cmd.from_ is not None:
- parents = [cmd.from_]
- else:
- last_id = self.last_ids.get(cmd.ref)
- if last_id is not None:
- parents = [last_id]
- else:
- parents = []
- parents.extend(cmd.merges)
-
- # Track the heads
- self.track_heads_for_ref(cmd.ref, cmd.id, parents)
- return parents
-
- def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None):
- if parents is not None:
- for parent in parents:
- if parent in self.heads:
- del self.heads[parent]
- self.heads.setdefault(cmd_id, set()).add(cmd_ref)
- self.last_ids[cmd_ref] = cmd_id
- self.last_ref = cmd_ref
+
diff --git a/processors/generic_processor.py b/processors/generic_processor.py
index a2fe03c..279836b 100644
--- a/processors/generic_processor.py
+++ b/processors/generic_processor.py
@@ -331,15 +331,16 @@ class GenericProcessor(processor.ImportProcessor):
marks_file.export_marks(self.params.get("export-marks"),
self.cache_mgr.revision_ids)
- if self.cache_mgr.last_ref == None:
+ if self.cache_mgr.reftracker.last_ref == None:
"""Nothing to refresh"""
return
# Update the branches
self.note("Updating branch information ...")
updater = branch_updater.BranchUpdater(self.repo, self.branch,
- self.cache_mgr, helpers.invert_dictset(self.cache_mgr.heads),
- self.cache_mgr.last_ref, self.tags)
+ self.cache_mgr, helpers.invert_dictset(
+ self.cache_mgr.reftracker.heads),
+ self.cache_mgr.reftracker.last_ref, self.tags)
branches_updated, branches_lost = updater.update()
self._branch_count = len(branches_updated)
@@ -504,7 +505,7 @@ class GenericProcessor(processor.ImportProcessor):
def commit_handler(self, cmd):
"""Process a CommitCommand."""
if self.skip_total and self._revision_count < self.skip_total:
- self.cache_mgr.track_heads(cmd)
+ self.cache_mgr.reftracker.track_heads(cmd)
# Check that we really do know about this commit-id
if not self.cache_mgr.revision_ids.has_key(cmd.id):
raise plugin_errors.BadRestart(cmd.id)
@@ -519,7 +520,7 @@ class GenericProcessor(processor.ImportProcessor):
return
if self.first_incremental_commit:
self.first_incremental_commit = None
- parents = self.cache_mgr.track_heads(cmd)
+ parents = self.cache_mgr.reftracker.track_heads(cmd)
# 'Commit' the revision and report progress
handler = self.commit_handler_factory(cmd, self.cache_mgr,
@@ -582,7 +583,7 @@ class GenericProcessor(processor.ImportProcessor):
return
if cmd.from_ is not None:
- self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_)
+ self.cache_mgr.reftracker.track_heads_for_ref(cmd.ref, cmd.from_)
def tag_handler(self, cmd):
"""Process a TagCommand."""
diff --git a/processors/info_processor.py b/processors/info_processor.py
index a765f91..e274683 100644
--- a/processors/info_processor.py
+++ b/processors/info_processor.py
@@ -68,8 +68,8 @@ class InfoProcessor(processor.ImportProcessor):
for usage in ['new', 'used', 'unknown', 'unmarked']:
self.blobs[usage] = set()
self.blob_ref_counts = {}
- # Head tracking - delegate to the cache manager
- self.cache_mgr = cache_manager.CacheManager(inventory_cache_size=0)
+ # Head tracking
+ self.reftracker = cache_manager.RefTracker()
# Stuff to cache: a map from mark to # of times that mark is merged
self.merges = {}
# Stuff to cache: these are maps from mark to sets
@@ -102,7 +102,7 @@ class InfoProcessor(processor.ImportProcessor):
}
self._dump_stats_group("Parent counts", p_items, str)
self._dump_stats_group("Commit analysis", flags.iteritems(), _found)
- heads = invert_dictset(self.cache_mgr.heads)
+ heads = invert_dictset(self.reftracker.heads)
self._dump_stats_group("Head analysis", heads.iteritems(), None,
_iterable_as_config_list)
# note("\t%d\t%s" % (len(self.committers), 'unique committers'))
@@ -213,7 +213,7 @@ class InfoProcessor(processor.ImportProcessor):
self.copy_source_paths.setdefault(cmd.id, set()).add(fc.src_path)
# Track the heads
- parents = self.cache_mgr.track_heads(cmd)
+ parents = self.reftracker.track_heads(cmd)
# Track the parent counts
parent_count = len(parents)
@@ -240,7 +240,8 @@ class InfoProcessor(processor.ImportProcessor):
self.lightweight_tags += 1
else:
if cmd.from_ is not None:
- self.cache_mgr.track_heads_for_ref(cmd.ref, cmd.from_)
+ self.reftracker.track_heads_for_ref(
+ cmd.ref, cmd.from_)
def tag_handler(self, cmd):
"""Process a TagCommand."""
diff --git a/reftracker.py b/reftracker.py
new file mode 100644
index 0000000..3862180
--- /dev/null
+++ b/reftracker.py
@@ -0,0 +1,67 @@
+# Copyright (C) 2009 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+"""Tracker of refs."""
+
+
+class RefTracker(object):
+
+ def __init__(self):
+ # Head tracking: last ref, last id per ref & map of commit ids to ref*s*
+ self.last_ref = None
+ self.last_ids = {}
+ self.heads = {}
+
+ def dump_stats(self, note):
+ self._show_stats_for(self.last_ids, "last-ids", note=note)
+ self._show_stats_for(self.heads, "heads", note=note)
+
+ def clear(self):
+ self.last_ids.clear()
+ self.heads.clear()
+
+ def track_heads(self, cmd):
+ """Track the repository heads given a CommitCommand.
+
+ :param cmd: the CommitCommand
+ :return: the list of parents in terms of commit-ids
+ """
+ # Get the true set of parents
+ if cmd.from_ is not None:
+ parents = [cmd.from_]
+ else:
+ last_id = self.last_ids.get(cmd.ref)
+ if last_id is not None:
+ parents = [last_id]
+ else:
+ parents = []
+ parents.extend(cmd.merges)
+
+ # Track the heads
+ self.track_heads_for_ref(cmd.ref, cmd.id, parents)
+ return parents
+
+ def track_heads_for_ref(self, cmd_ref, cmd_id, parents=None):
+ if parents is not None:
+ for parent in parents:
+ if parent in self.heads:
+ del self.heads[parent]
+ self.heads.setdefault(cmd_id, set()).add(cmd_ref)
+ self.last_ids[cmd_ref] = cmd_id
+ self.last_ref = cmd_ref
+
+
diff --git a/tests/test_head_tracking.py b/tests/test_head_tracking.py
index 77bb422..3c44d50 100644
--- a/tests/test_head_tracking.py
+++ b/tests/test_head_tracking.py
@@ -222,13 +222,13 @@ class TestHeadTracking(tests.TestCase):
cm = CacheManager()
for cmd in p.iter_commands():
if isinstance(cmd, commands.CommitCommand):
- cm.track_heads(cmd)
+ cm.reftracker.track_heads(cmd)
# eat the file commands
list(cmd.file_iter())
elif isinstance(cmd, commands.ResetCommand):
if cmd.from_ is not None:
- cm.track_heads_for_ref(cmd.ref, cmd.from_)
- self.assertEqual(cm.heads, expected)
+ cm.reftracker.track_heads_for_ref(cmd.ref, cmd.from_)
+ self.assertEqual(cm.reftracker.heads, expected)
def test_mainline(self):
self.assertHeads(_SAMPLE_MAINLINE, {