summaryrefslogtreecommitdiff
path: root/revision_store.py
diff options
context:
space:
mode:
authorIan Clatworthy <ian.clatworthy@canonical.com>2009-02-19 17:13:43 +1000
committerIan Clatworthy <ian.clatworthy@canonical.com>2009-02-19 17:13:43 +1000
commit995d645a0dbc1d920fdc4b15c3c905ea69f3c84e (patch)
tree2e152be3d4d2aaa9f2e89b691a5356569ff75e79 /revision_store.py
parent69bd63cebbbf62db05b2b9874596d118c6db7b13 (diff)
downloadbzr-fastimport-995d645a0dbc1d920fdc4b15c3c905ea69f3c84e.tar.gz
generalise RevisionLoader to RevisionStore as a repo abstraction
Diffstat (limited to 'revision_store.py')
-rw-r--r--revision_store.py360
1 files changed, 360 insertions, 0 deletions
diff --git a/revision_store.py b/revision_store.py
new file mode 100644
index 0000000..fd2b427
--- /dev/null
+++ b/revision_store.py
@@ -0,0 +1,360 @@
+# Copyright (C) 2008 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+"""An abstraction of a repository providing just the bits importing needs."""
+
+
+from bzrlib import errors, knit, lru_cache, osutils
+from bzrlib import revision as _mod_revision
+
+
+class AbstractRevisionStore(object):
+
+ def __init__(self, repo):
+ """An object responsible for loading revisions into a repository.
+
+ NOTE: Repository locking is not managed by this class. Clients
+ should take a write lock, call load() multiple times, then release
+ the lock.
+
+ :param repository: the target repository
+ """
+ self.repo = repo
+ self.try_inv_deltas = getattr(self.repo._format, '_commit_inv_deltas',
+ False)
+
+ def expects_rich_root(self):
+ """Does this store expect inventories with rich roots?"""
+ return self.repo.supports_rich_root()
+
+ def get_inventory(self, revision_id):
+ """Get a stored inventory."""
+ return self.repo.get_inventory(revision_id)
+
+ def get_file_text(self, revision_id, file_id):
+ """Get the text stored for a file in a given revision."""
+ revtree = self.repo.revision_tree(revision_id)
+ return revtree.get_file_text(file_id)
+
+ def load(self, rev, inv, signature, text_provider,
+ inventories_provider=None):
+ """Load a revision.
+
+ :param rev: the Revision
+ :param inv: the inventory
+ :param signature: signing information
+ :param text_provider: a callable expecting a file_id parameter
+ that returns the text for that file-id
+ :param inventories_provider: a callable expecting a repository and
+ a list of revision-ids, that returns:
+ * the list of revision-ids present in the repository
+ * the list of inventories for the revision-id's,
+ including an empty inventory for the missing revisions
+ If None, a default implementation is provided.
+ """
+ # NOTE: This is bzrlib.repository._install_revision refactored to
+ # to provide more flexibility in how previous revisions are cached,
+ # data is feed in, etc.
+ if inventories_provider is None:
+ inventories_provider = self._default_inventories_provider
+ present_parents, parent_invs = inventories_provider(rev.parent_ids)
+ self._load_texts(rev.revision_id, inv.iter_entries(), parent_invs,
+ text_provider)
+ try:
+ rev.inventory_sha1 = self._add_inventory(rev.revision_id,
+ inv, present_parents, parent_invs)
+ except errors.RevisionAlreadyPresent:
+ pass
+ if signature is not None:
+ self.repo.add_signature_text(rev.revision_id, signature)
+ self._add_revision(rev, inv)
+
+ def _load_texts(self, revision_id, entries, parent_invs, text_provider):
+ """Load texts to a repository for inventory entries.
+
+ This method is provided for subclasses to use or override.
+
+ :param revision_id: the revision identifier
+ :param entries: iterator over the inventory entries
+ :param parent_invs: the parent inventories
+ :param text_provider: a callable expecting a file_id parameter
+ that returns the text for that file-id
+ """
+ raise NotImplementedError(self._load_texts)
+
+ def _add_inventory(self, revision_id, inv, parents, parent_invs):
+ """Add the inventory inv to the repository as revision_id.
+
+ :param parents: The revision ids of the parents that revision_id
+ is known to have and are in the repository already.
+ :param parent_invs: the parent inventories
+
+ :returns: The validator(which is a sha1 digest, though what is sha'd is
+ repository format specific) of the serialized inventory.
+ """
+ if self.try_inv_deltas and len(parents):
+ # Do we need to search for the first non-empty inventory?
+ # parent_invs can be a longer list than parents if there
+ # are ghosts????
+ basis_inv = parent_invs[0]
+ delta = inv._make_delta(basis_inv)
+ return self.repo.add_inventory_by_delta(parents[0], delta,
+ revision_id, parents)
+ else:
+ return self.repo.add_inventory(revision_id, inv, parents)
+
+ def _add_revision(self, rev, inv):
+ """Add a revision and its inventory to a repository.
+
+ :param rev: the Revision
+ :param inv: the inventory
+ """
+ self.repo.add_revision(rev.revision_id, rev, inv)
+
+ def _default_inventories_provider(self, revision_ids):
+ """An inventories provider that queries the repository."""
+ present = []
+ inventories = []
+ for revision_id in revision_ids:
+ if self.repo.has_revision(revision_id):
+ present.append(revision_id)
+ rev_tree = self.repo.revision_tree(revision_id)
+ else:
+ rev_tree = self.repo.revision_tree(None)
+ inventories.append(rev_tree.inventory)
+ return present, inventories
+
+
+class RevisionStore1(AbstractRevisionStore):
+ """A RevisionStore that uses the old bzrlib Repository API.
+
+ The old API was present until bzr.dev rev 3510.
+ """
+
+ def _load_texts(self, revision_id, entries, parent_invs, text_provider):
+ """See RevisionStore._load_texts()."""
+ # Backwards compatibility hack: skip the root id.
+ if not self.repo.supports_rich_root():
+ path, root = entries.next()
+ if root.revision != revision_id:
+ raise errors.IncompatibleRevision(repr(self.repo))
+ # Add the texts that are not already present
+ tx = self.repo.get_transaction()
+ for path, ie in entries:
+ # This test is *really* slow: over 50% of import time
+ #w = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)
+ #if ie.revision in w:
+ # continue
+ # Try another way, realising that this assumes that the
+ # version is not already there. In the general case,
+ # a shared repository might already have the revision but
+ # we arguably don't need that check when importing from
+ # a foreign system.
+ if ie.revision != revision_id:
+ continue
+ text_parents = []
+ for parent_inv in parent_invs:
+ if ie.file_id not in parent_inv:
+ continue
+ parent_id = parent_inv[ie.file_id].revision
+ if parent_id in text_parents:
+ continue
+ text_parents.append(parent_id)
+ lines = text_provider(ie.file_id)
+ vfile = self.repo.weave_store.get_weave_or_empty(ie.file_id, tx)
+ vfile.add_lines(revision_id, text_parents, lines)
+
+ def _get_lines(self, file_id, revision_id):
+ tx = self.repo.get_transaction()
+ w = self.repo.weave_store.get_weave(ie.file_id, tx)
+ return w.get_lines(revision_id)
+
+ def _add_revision(self, rev, inv):
+ # There's no need to do everything repo.add_revision does and
+ # doing so (since bzr.dev 3392) can be pretty slow for long
+ # delta chains on inventories. Just do the essentials here ...
+ _mod_revision.check_not_reserved_id(rev.revision_id)
+ self.repo._revision_store.add_revision(rev, self.repo.get_transaction())
+
+
+class RevisionStore2(AbstractRevisionStore):
+ """A RevisionStore that uses the new bzrlib Repository API."""
+
+ def _load_texts(self, revision_id, entries, parent_invs, text_provider):
+ """See RevisionStore._load_texts()."""
+ # Backwards compatibility hack: skip the root id.
+ if not self.repo.supports_rich_root():
+ path, root = entries.next()
+ if root.revision != revision_id:
+ raise errors.IncompatibleRevision(repr(self.repo))
+ text_keys = {}
+ for path, ie in entries:
+ text_keys[(ie.file_id, ie.revision)] = ie
+ text_parent_map = self.repo.texts.get_parent_map(text_keys)
+ missing_texts = set(text_keys) - set(text_parent_map)
+ # Add the texts that are not already present
+ for text_key in missing_texts:
+ ie = text_keys[text_key]
+ text_parents = []
+ for parent_inv in parent_invs:
+ if ie.file_id not in parent_inv:
+ continue
+ parent_id = parent_inv[ie.file_id].revision
+ if parent_id in text_parents:
+ continue
+ text_parents.append((ie.file_id, parent_id))
+ lines = text_provider(ie.file_id)
+ self.repo.texts.add_lines(text_key, text_parents, lines)
+
+ def _get_lines(self, file_id, revision_id):
+ record = self.repo.texts.get_record_stream([(file_id, revision_id)],
+ 'unordered', True).next()
+ if record.storage_kind == 'absent':
+ raise errors.RevisionNotPresent(record.key, self.repo)
+ return osutils.split_lines(record.get_bytes_as('fulltext'))
+
+ # This is breaking imports into brisbane-core currently
+ #def _add_revision(self, rev, inv):
+ # # There's no need to do everything repo.add_revision does and
+ # # doing so (since bzr.dev 3392) can be pretty slow for long
+ # # delta chains on inventories. Just do the essentials here ...
+ # _mod_revision.check_not_reserved_id(rev.revision_id)
+ # self.repo._add_revision(rev)
+
+
+class ImportRevisionStore1(RevisionStore1):
+ """A RevisionStore (old Repository API) optimised for importing.
+
+ This implementation caches serialised inventory texts and provides
+ fine-grained control over when inventories are stored as fulltexts.
+ """
+
+ def __init__(self, repo, parent_texts_to_cache=1, fulltext_when=None,
+ random_ids=True):
+ """See AbstractRevisionStore.__init__.
+
+ :param repository: the target repository
+ :param parent_text_to_cache: the number of parent texts to cache
+ :para fulltext_when: if non None, a function to call to decide
+ whether to fulltext the inventory or not. The revision count
+ is passed as a parameter and the result is treated as a boolean.
+ """
+ RevisionStore1.__init__(self, repo)
+ self.inv_parent_texts = lru_cache.LRUCache(parent_texts_to_cache)
+ self.fulltext_when = fulltext_when
+ self.random_ids = random_ids
+ self.revision_count = 0
+
+ def _add_inventory(self, revision_id, inv, parents, parent_invs):
+ """See RevisionStore._add_inventory."""
+ # Code taken from bzrlib.repository.add_inventory
+ assert self.repo.is_in_write_group()
+ _mod_revision.check_not_reserved_id(revision_id)
+ assert inv.revision_id is None or inv.revision_id == revision_id, \
+ "Mismatch between inventory revision" \
+ " id and insertion revid (%r, %r)" % (inv.revision_id, revision_id)
+ assert inv.root is not None
+ inv_lines = self.repo._serialise_inventory_to_lines(inv)
+ inv_vf = self.repo.get_inventory_weave()
+ sha1, num_bytes, parent_text = self._inventory_add_lines(inv_vf,
+ revision_id, parents, inv_lines, self.inv_parent_texts)
+ self.inv_parent_texts[revision_id] = parent_text
+ return sha1
+
+ def _inventory_add_lines(self, inv_vf, version_id, parents, lines,
+ parent_texts):
+ """See Repository._inventory_add_lines()."""
+ # setup parameters used in original code but not this API
+ self.revision_count += 1
+ if self.fulltext_when is not None:
+ delta = not self.fulltext_when(self.revision_count)
+ else:
+ delta = inv_vf.delta
+ left_matching_blocks = None
+ random_id = self.random_ids
+ check_content = False
+
+ # bzrlib.knit.add_lines() but error checking optimised
+ inv_vf._check_add(version_id, lines, random_id, check_content)
+
+ ####################################################################
+ # bzrlib.knit._add() but skip checking if fulltext better than delta
+ ####################################################################
+
+ line_bytes = ''.join(lines)
+ digest = osutils.sha_string(line_bytes)
+ present_parents = []
+ for parent in parents:
+ if inv_vf.has_version(parent):
+ present_parents.append(parent)
+ if parent_texts is None:
+ parent_texts = {}
+
+ # can only compress against the left most present parent.
+ if (delta and
+ (len(present_parents) == 0 or
+ present_parents[0] != parents[0])):
+ delta = False
+
+ text_length = len(line_bytes)
+ options = []
+ if lines:
+ if lines[-1][-1] != '\n':
+ # copy the contents of lines.
+ lines = lines[:]
+ options.append('no-eol')
+ lines[-1] = lines[-1] + '\n'
+ line_bytes += '\n'
+
+ #if delta:
+ # # To speed the extract of texts the delta chain is limited
+ # # to a fixed number of deltas. This should minimize both
+ # # I/O and the time spend applying deltas.
+ # delta = inv_vf._check_should_delta(present_parents)
+
+ assert isinstance(version_id, str)
+ content = inv_vf.factory.make(lines, version_id)
+ if delta or (inv_vf.factory.annotated and len(present_parents) > 0):
+ # Merge annotations from parent texts if needed.
+ delta_hunks = inv_vf._merge_annotations(content, present_parents,
+ parent_texts, delta, inv_vf.factory.annotated,
+ left_matching_blocks)
+
+ if delta:
+ options.append('line-delta')
+ store_lines = inv_vf.factory.lower_line_delta(delta_hunks)
+ size, bytes = inv_vf._data._record_to_data(version_id, digest,
+ store_lines)
+ else:
+ options.append('fulltext')
+ # isinstance is slower and we have no hierarchy.
+ if inv_vf.factory.__class__ == knit.KnitPlainFactory:
+ # Use the already joined bytes saving iteration time in
+ # _record_to_data.
+ size, bytes = inv_vf._data._record_to_data(version_id, digest,
+ lines, [line_bytes])
+ else:
+ # get mixed annotation + content and feed it into the
+ # serialiser.
+ store_lines = inv_vf.factory.lower_fulltext(content)
+ size, bytes = inv_vf._data._record_to_data(version_id, digest,
+ store_lines)
+
+ access_memo = inv_vf._data.add_raw_records([size], bytes)[0]
+ inv_vf._index.add_versions(
+ ((version_id, options, access_memo, parents),),
+ random_id=random_id)
+ return digest, text_length, content