Tarball conversion

author: Lorry <lorry@roadtrain.codethink.co.uk> 2012-08-22 15:47:16 +0100
committer: Lorry <lorry@roadtrain.codethink.co.uk> 2012-08-22 15:47:16 +0100
commit: 25335618bf8755ce6b116ee14f47f5a1f2c821e9 (patch)
tree: d889d7ab3f9f985d0c54c534cb8052bd2e6d7163 /bzrlib/versionedfile.py
download: bzr-tarball-25335618bf8755ce6b116ee14f47f5a1f2c821e9.tar.gz
1 files changed, 1963 insertions, 0 deletions
diff --git a/bzrlib/versionedfile.py b/bzrlib/versionedfile.py
new file mode 100644
index 0000000..84d91bb
--- /dev/null
+++ b/bzrlib/versionedfile.py
@@ -0,0 +1,1963 @@
+# Copyright (C) 2006-2011 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Versioned text file storage api."""
+
+from __future__ import absolute_import
+
+from copy import copy
+from cStringIO import StringIO
+import os
+import struct
+from zlib import adler32
+
+from bzrlib.lazy_import import lazy_import
+lazy_import(globals(), """
+from bzrlib import (
+    annotate,
+    bencode,
+    errors,
+    graph as _mod_graph,
+    groupcompress,
+    index,
+    knit,
+    osutils,
+    multiparent,
+    tsort,
+    revision,
+    urlutils,
+    )
+""")
+from bzrlib.registry import Registry
+from bzrlib.textmerge import TextMerge
+
+
+adapter_registry = Registry()
+adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit',
+    'DeltaPlainToFullText')
+adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit',
+    'FTPlainToFullText')
+adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'),
+    'bzrlib.knit', 'DeltaAnnotatedToUnannotated')
+adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'),
+    'bzrlib.knit', 'DeltaAnnotatedToFullText')
+adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'),
+    'bzrlib.knit', 'FTAnnotatedToUnannotated')
+adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'),
+    'bzrlib.knit', 'FTAnnotatedToFullText')
+# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'),
+#     'bzrlib.knit', 'FTAnnotatedToChunked')
+
+
+class ContentFactory(object):
+    """Abstract interface for insertion and retrieval from a VersionedFile.
+
+    :ivar sha1: None, or the sha1 of the content fulltext.
+    :ivar storage_kind: The native storage kind of this factory. One of
+        'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft',
+        'knit-delta', 'fulltext', 'knit-annotated-ft-gz',
+        'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'.
+    :ivar key: The key of this content. Each key is a tuple with a single
+        string in it.
+    :ivar parents: A tuple of parent keys for self.key. If the object has
+        no parent information, None (as opposed to () for an empty list of
+        parents).
+    """
+
+    def __init__(self):
+        """Create a ContentFactory."""
+        self.sha1 = None
+        self.storage_kind = None
+        self.key = None
+        self.parents = None
+
+
+class ChunkedContentFactory(ContentFactory):
+    """Static data content factory.
+
+    This takes a 'chunked' list of strings. The only requirement on 'chunked' is
+    that ''.join(lines) becomes a valid fulltext. A tuple of a single string
+    satisfies this, as does a list of lines.
+
+    :ivar sha1: None, or the sha1 of the content fulltext.
+    :ivar storage_kind: The native storage kind of this factory. Always
+        'chunked'
+    :ivar key: The key of this content. Each key is a tuple with a single
+        string in it.
+    :ivar parents: A tuple of parent keys for self.key. If the object has
+        no parent information, None (as opposed to () for an empty list of
+        parents).
+     """
+
+    def __init__(self, key, parents, sha1, chunks):
+        """Create a ContentFactory."""
+        self.sha1 = sha1
+        self.storage_kind = 'chunked'
+        self.key = key
+        self.parents = parents
+        self._chunks = chunks
+
+    def get_bytes_as(self, storage_kind):
+        if storage_kind == 'chunked':
+            return self._chunks
+        elif storage_kind == 'fulltext':
+            return ''.join(self._chunks)
+        raise errors.UnavailableRepresentation(self.key, storage_kind,
+            self.storage_kind)
+
+
+class FulltextContentFactory(ContentFactory):
+    """Static data content factory.
+
+    This takes a fulltext when created and just returns that during
+    get_bytes_as('fulltext').
+
+    :ivar sha1: None, or the sha1 of the content fulltext.
+    :ivar storage_kind: The native storage kind of this factory. Always
+        'fulltext'.
+    :ivar key: The key of this content. Each key is a tuple with a single
+        string in it.
+    :ivar parents: A tuple of parent keys for self.key. If the object has
+        no parent information, None (as opposed to () for an empty list of
+        parents).
+     """
+
+    def __init__(self, key, parents, sha1, text):
+        """Create a ContentFactory."""
+        self.sha1 = sha1
+        self.storage_kind = 'fulltext'
+        self.key = key
+        self.parents = parents
+        self._text = text
+
+    def get_bytes_as(self, storage_kind):
+        if storage_kind == self.storage_kind:
+            return self._text
+        elif storage_kind == 'chunked':
+            return [self._text]
+        raise errors.UnavailableRepresentation(self.key, storage_kind,
+            self.storage_kind)
+
+
+class AbsentContentFactory(ContentFactory):
+    """A placeholder content factory for unavailable texts.
+
+    :ivar sha1: None.
+    :ivar storage_kind: 'absent'.
+    :ivar key: The key of this content. Each key is a tuple with a single
+        string in it.
+    :ivar parents: None.
+    """
+
+    def __init__(self, key):
+        """Create a ContentFactory."""
+        self.sha1 = None
+        self.storage_kind = 'absent'
+        self.key = key
+        self.parents = None
+
+    def get_bytes_as(self, storage_kind):
+        raise ValueError('A request was made for key: %s, but that'
+                         ' content is not available, and the calling'
+                         ' code does not handle if it is missing.'
+                         % (self.key,))
+
+
+class AdapterFactory(ContentFactory):
+    """A content factory to adapt between key prefix's."""
+
+    def __init__(self, key, parents, adapted):
+        """Create an adapter factory instance."""
+        self.key = key
+        self.parents = parents
+        self._adapted = adapted
+
+    def __getattr__(self, attr):
+        """Return a member from the adapted object."""
+        if attr in ('key', 'parents'):
+            return self.__dict__[attr]
+        else:
+            return getattr(self._adapted, attr)
+
+
+def filter_absent(record_stream):
+    """Adapt a record stream to remove absent records."""
+    for record in record_stream:
+        if record.storage_kind != 'absent':
+            yield record
+
+
+class _MPDiffGenerator(object):
+    """Pull out the functionality for generating mp_diffs."""
+
+    def __init__(self, vf, keys):
+        self.vf = vf
+        # This is the order the keys were requested in
+        self.ordered_keys = tuple(keys)
+        # keys + their parents, what we need to compute the diffs
+        self.needed_keys = ()
+        # Map from key: mp_diff
+        self.diffs = {}
+        # Map from key: parents_needed (may have ghosts)
+        self.parent_map = {}
+        # Parents that aren't present
+        self.ghost_parents = ()
+        # Map from parent_key => number of children for this text
+        self.refcounts = {}
+        # Content chunks that are cached while we still need them
+        self.chunks = {}
+
+    def _find_needed_keys(self):
+        """Find the set of keys we need to request.
+
+        This includes all the original keys passed in, and the non-ghost
+        parents of those keys.
+
+        :return: (needed_keys, refcounts)
+            needed_keys is the set of all texts we need to extract
+            refcounts is a dict of {key: num_children} letting us know when we
+                no longer need to cache a given parent text
+        """
+        # All the keys and their parents
+        needed_keys = set(self.ordered_keys)
+        parent_map = self.vf.get_parent_map(needed_keys)
+        self.parent_map = parent_map
+        # TODO: Should we be using a different construct here? I think this
+        #       uses difference_update internally, and we expect the result to
+        #       be tiny
+        missing_keys = needed_keys.difference(parent_map)
+        if missing_keys:
+            raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
+        # Parents that might be missing. They are allowed to be ghosts, but we
+        # should check for them
+        refcounts = {}
+        setdefault = refcounts.setdefault
+        just_parents = set()
+        for child_key, parent_keys in parent_map.iteritems():
+            if not parent_keys:
+                # parent_keys may be None if a given VersionedFile claims to
+                # not support graph operations.
+                continue
+            just_parents.update(parent_keys)
+            needed_keys.update(parent_keys)
+            for p in parent_keys:
+                refcounts[p] = setdefault(p, 0) + 1
+        just_parents.difference_update(parent_map)
+        # Remove any parents that are actually ghosts from the needed set
+        self.present_parents = set(self.vf.get_parent_map(just_parents))
+        self.ghost_parents = just_parents.difference(self.present_parents)
+        needed_keys.difference_update(self.ghost_parents)
+        self.needed_keys = needed_keys
+        self.refcounts = refcounts
+        return needed_keys, refcounts
+
+    def _compute_diff(self, key, parent_lines, lines):
+        """Compute a single mp_diff, and store it in self._diffs"""
+        if len(parent_lines) > 0:
+            # XXX: _extract_blocks is not usefully defined anywhere...
+            #      It was meant to extract the left-parent diff without
+            #      having to recompute it for Knit content (pack-0.92,
+            #      etc). That seems to have regressed somewhere
+            left_parent_blocks = self.vf._extract_blocks(key,
+                parent_lines[0], lines)
+        else:
+            left_parent_blocks = None
+        diff = multiparent.MultiParent.from_lines(lines,
+                    parent_lines, left_parent_blocks)
+        self.diffs[key] = diff
+
+    def _process_one_record(self, key, this_chunks):
+        parent_keys = None
+        if key in self.parent_map:
+            # This record should be ready to diff, since we requested
+            # content in 'topological' order
+            parent_keys = self.parent_map.pop(key)
+            # If a VersionedFile claims 'no-graph' support, then it may return
+            # None for any parent request, so we replace it with an empty tuple
+            if parent_keys is None:
+                parent_keys = ()
+            parent_lines = []
+            for p in parent_keys:
+                # Alternatively we could check p not in self.needed_keys, but
+                # ghost_parents should be tiny versus huge
+                if p in self.ghost_parents:
+                    continue
+                refcount = self.refcounts[p]
+                if refcount == 1: # Last child reference
+                    self.refcounts.pop(p)
+                    parent_chunks = self.chunks.pop(p)
+                else:
+                    self.refcounts[p] = refcount - 1
+                    parent_chunks = self.chunks[p]
+                p_lines = osutils.chunks_to_lines(parent_chunks)
+                # TODO: Should we cache the line form? We did the
+                #       computation to get it, but storing it this way will
+                #       be less memory efficient...
+                parent_lines.append(p_lines)
+                del p_lines
+            lines = osutils.chunks_to_lines(this_chunks)
+            # Since we needed the lines, we'll go ahead and cache them this way
+            this_chunks = lines
+            self._compute_diff(key, parent_lines, lines)
+            del lines
+        # Is this content required for any more children?
+        if key in self.refcounts:
+            self.chunks[key] = this_chunks
+
+    def _extract_diffs(self):
+        needed_keys, refcounts = self._find_needed_keys()
+        for record in self.vf.get_record_stream(needed_keys,
+                                                'topological', True):
+            if record.storage_kind == 'absent':
+                raise errors.RevisionNotPresent(record.key, self.vf)
+            self._process_one_record(record.key,
+                                     record.get_bytes_as('chunked'))
+        
+    def compute_diffs(self):
+        self._extract_diffs()
+        dpop = self.diffs.pop
+        return [dpop(k) for k in self.ordered_keys]
+
+
+class VersionedFile(object):
+    """Versioned text file storage.
+
+    A versioned file manages versions of line-based text files,
+    keeping track of the originating version for each line.
+
+    To clients the "lines" of the file are represented as a list of
+    strings. These strings will typically have terminal newline
+    characters, but this is not required.  In particular files commonly
+    do not have a newline at the end of the file.
+
+    Texts are identified by a version-id string.
+    """
+
+    @staticmethod
+    def check_not_reserved_id(version_id):
+        revision.check_not_reserved_id(version_id)
+
+    def copy_to(self, name, transport):
+        """Copy this versioned file to name on transport."""
+        raise NotImplementedError(self.copy_to)
+
+    def get_record_stream(self, versions, ordering, include_delta_closure):
+        """Get a stream of records for versions.
+
+        :param versions: The versions to include. Each version is a tuple
+            (version,).
+        :param ordering: Either 'unordered' or 'topological'. A topologically
+            sorted stream has compression parents strictly before their
+            children.
+        :param include_delta_closure: If True then the closure across any
+            compression parents will be included (in the data content of the
+            stream, not in the emitted records). This guarantees that
+            'fulltext' can be used successfully on every record.
+        :return: An iterator of ContentFactory objects, each of which is only
+            valid until the iterator is advanced.
+        """
+        raise NotImplementedError(self.get_record_stream)
+
+    def has_version(self, version_id):
+        """Returns whether version is present."""
+        raise NotImplementedError(self.has_version)
+
+    def insert_record_stream(self, stream):
+        """Insert a record stream into this versioned file.
+
+        :param stream: A stream of records to insert.
+        :return: None
+        :seealso VersionedFile.get_record_stream:
+        """
+        raise NotImplementedError
+
+    def add_lines(self, version_id, parents, lines, parent_texts=None,
+        left_matching_blocks=None, nostore_sha=None, random_id=False,
+        check_content=True):
+        """Add a single text on top of the versioned file.
+
+        Must raise RevisionAlreadyPresent if the new version is
+        already present in file history.
+
+        Must raise RevisionNotPresent if any of the given parents are
+        not present in file history.
+
+        :param lines: A list of lines. Each line must be a bytestring. And all
+            of them except the last must be terminated with \n and contain no
+            other \n's. The last line may either contain no \n's or a single
+            terminated \n. If the lines list does meet this constraint the add
+            routine may error or may succeed - but you will be unable to read
+            the data back accurately. (Checking the lines have been split
+            correctly is expensive and extremely unlikely to catch bugs so it
+            is not done at runtime unless check_content is True.)
+        :param parent_texts: An optional dictionary containing the opaque
+            representations of some or all of the parents of version_id to
+            allow delta optimisations.  VERY IMPORTANT: the texts must be those
+            returned by add_lines or data corruption can be caused.
+        :param left_matching_blocks: a hint about which areas are common
+            between the text and its left-hand-parent.  The format is
+            the SequenceMatcher.get_matching_blocks format.
+        :param nostore_sha: Raise ExistingContent and do not add the lines to
+            the versioned file if the digest of the lines matches this.
+        :param random_id: If True a random id has been selected rather than
+            an id determined by some deterministic process such as a converter
+            from a foreign VCS. When True the backend may choose not to check
+            for uniqueness of the resulting key within the versioned file, so
+            this should only be done when the result is expected to be unique
+            anyway.
+        :param check_content: If True, the lines supplied are verified to be
+            bytestrings that are correctly formed lines.
+        :return: The text sha1, the number of bytes in the text, and an opaque
+                 representation of the inserted version which can be provided
+                 back to future add_lines calls in the parent_texts dictionary.
+        """
+        self._check_write_ok()
+        return self._add_lines(version_id, parents, lines, parent_texts,
+            left_matching_blocks, nostore_sha, random_id, check_content)
+
+    def _add_lines(self, version_id, parents, lines, parent_texts,
+        left_matching_blocks, nostore_sha, random_id, check_content):
+        """Helper to do the class specific add_lines."""
+        raise NotImplementedError(self.add_lines)
+
+    def add_lines_with_ghosts(self, version_id, parents, lines,
+        parent_texts=None, nostore_sha=None, random_id=False,
+        check_content=True, left_matching_blocks=None):
+        """Add lines to the versioned file, allowing ghosts to be present.
+
+        This takes the same parameters as add_lines and returns the same.
+        """
+        self._check_write_ok()
+        return self._add_lines_with_ghosts(version_id, parents, lines,
+            parent_texts, nostore_sha, random_id, check_content, left_matching_blocks)
+
+    def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts,
+        nostore_sha, random_id, check_content, left_matching_blocks):
+        """Helper to do class specific add_lines_with_ghosts."""
+        raise NotImplementedError(self.add_lines_with_ghosts)
+
+    def check(self, progress_bar=None):
+        """Check the versioned file for integrity."""
+        raise NotImplementedError(self.check)
+
+    def _check_lines_not_unicode(self, lines):
+        """Check that lines being added to a versioned file are not unicode."""
+        for line in lines:
+            if line.__class__ is not str:
+                raise errors.BzrBadParameterUnicode("lines")
+
+    def _check_lines_are_lines(self, lines):
+        """Check that the lines really are full lines without inline EOL."""
+        for line in lines:
+            if '\n' in line[:-1]:
+                raise errors.BzrBadParameterContainsNewline("lines")
+
+    def get_format_signature(self):
+        """Get a text description of the data encoding in this file.
+
+        :since: 0.90
+        """
+        raise NotImplementedError(self.get_format_signature)
+
+    def make_mpdiffs(self, version_ids):
+        """Create multiparent diffs for specified versions."""
+        # XXX: Can't use _MPDiffGenerator just yet. This is because version_ids
+        #      is a list of strings, not keys. And while self.get_record_stream
+        #      is supported, it takes *keys*, while self.get_parent_map() takes
+        #      strings... *sigh*
+        knit_versions = set()
+        knit_versions.update(version_ids)
+        parent_map = self.get_parent_map(version_ids)
+        for version_id in version_ids:
+            try:
+                knit_versions.update(parent_map[version_id])
+            except KeyError:
+                raise errors.RevisionNotPresent(version_id, self)
+        # We need to filter out ghosts, because we can't diff against them.
+        knit_versions = set(self.get_parent_map(knit_versions).keys())
+        lines = dict(zip(knit_versions,
+            self._get_lf_split_line_list(knit_versions)))
+        diffs = []
+        for version_id in version_ids:
+            target = lines[version_id]
+            try:
+                parents = [lines[p] for p in parent_map[version_id] if p in
+                    knit_versions]
+            except KeyError:
+                # I don't know how this could ever trigger.
+                # parent_map[version_id] was already triggered in the previous
+                # for loop, and lines[p] has the 'if p in knit_versions' check,
+                # so we again won't have a KeyError.
+                raise errors.RevisionNotPresent(version_id, self)
+            if len(parents) > 0:
+                left_parent_blocks = self._extract_blocks(version_id,
+                                                          parents[0], target)
+            else:
+                left_parent_blocks = None
+            diffs.append(multiparent.MultiParent.from_lines(target, parents,
+                         left_parent_blocks))
+        return diffs
+
+    def _extract_blocks(self, version_id, source, target):
+        return None
+
+    def add_mpdiffs(self, records):
+        """Add mpdiffs to this VersionedFile.
+
+        Records should be iterables of version, parents, expected_sha1,
+        mpdiff. mpdiff should be a MultiParent instance.
+        """
+        # Does this need to call self._check_write_ok()? (IanC 20070919)
+        vf_parents = {}
+        mpvf = multiparent.MultiMemoryVersionedFile()
+        versions = []
+        for version, parent_ids, expected_sha1, mpdiff in records:
+            versions.append(version)
+            mpvf.add_diff(mpdiff, version, parent_ids)
+        needed_parents = set()
+        for version, parent_ids, expected_sha1, mpdiff in records:
+            needed_parents.update(p for p in parent_ids
+                                  if not mpvf.has_version(p))
+        present_parents = set(self.get_parent_map(needed_parents).keys())
+        for parent_id, lines in zip(present_parents,
+                                 self._get_lf_split_line_list(present_parents)):
+            mpvf.add_version(lines, parent_id, [])
+        for (version, parent_ids, expected_sha1, mpdiff), lines in\
+            zip(records, mpvf.get_line_list(versions)):
+            if len(parent_ids) == 1:
+                left_matching_blocks = list(mpdiff.get_matching_blocks(0,
+                    mpvf.get_diff(parent_ids[0]).num_lines()))
+            else:
+                left_matching_blocks = None
+            try:
+                _, _, version_text = self.add_lines_with_ghosts(version,
+                    parent_ids, lines, vf_parents,
+                    left_matching_blocks=left_matching_blocks)
+            except NotImplementedError:
+                # The vf can't handle ghosts, so add lines normally, which will
+                # (reasonably) fail if there are ghosts in the data.
+                _, _, version_text = self.add_lines(version,
+                    parent_ids, lines, vf_parents,
+                    left_matching_blocks=left_matching_blocks)
+            vf_parents[version] = version_text
+        sha1s = self.get_sha1s(versions)
+        for version, parent_ids, expected_sha1, mpdiff in records:
+            if expected_sha1 != sha1s[version]:
+                raise errors.VersionedFileInvalidChecksum(version)
+
+    def get_text(self, version_id):
+        """Return version contents as a text string.
+
+        Raises RevisionNotPresent if version is not present in
+        file history.
+        """
+        return ''.join(self.get_lines(version_id))
+    get_string = get_text
+
+    def get_texts(self, version_ids):
+        """Return the texts of listed versions as a list of strings.
+
+        Raises RevisionNotPresent if version is not present in
+        file history.
+        """
+        return [''.join(self.get_lines(v)) for v in version_ids]
+
+    def get_lines(self, version_id):
+        """Return version contents as a sequence of lines.
+
+        Raises RevisionNotPresent if version is not present in
+        file history.
+        """
+        raise NotImplementedError(self.get_lines)
+
+    def _get_lf_split_line_list(self, version_ids):
+        return [StringIO(t).readlines() for t in self.get_texts(version_ids)]
+
+    def get_ancestry(self, version_ids, topo_sorted=True):
+        """Return a list of all ancestors of given version(s). This
+        will not include the null revision.
+
+        This list will not be topologically sorted if topo_sorted=False is
+        passed.
+
+        Must raise RevisionNotPresent if any of the given versions are
+        not present in file history."""
+        if isinstance(version_ids, basestring):
+            version_ids = [version_ids]
+        raise NotImplementedError(self.get_ancestry)
+
+    def get_ancestry_with_ghosts(self, version_ids):
+        """Return a list of all ancestors of given version(s). This
+        will not include the null revision.
+
+        Must raise RevisionNotPresent if any of the given versions are
+        not present in file history.
+
+        Ghosts that are known about will be included in ancestry list,
+        but are not explicitly marked.
+        """
+        raise NotImplementedError(self.get_ancestry_with_ghosts)
+
+    def get_parent_map(self, version_ids):
+        """Get a map of the parents of version_ids.
+
+        :param version_ids: The version ids to look up parents for.
+        :return: A mapping from version id to parents.
+        """
+        raise NotImplementedError(self.get_parent_map)
+
+    def get_parents_with_ghosts(self, version_id):
+        """Return version names for parents of version_id.
+
+        Will raise RevisionNotPresent if version_id is not present
+        in the history.
+
+        Ghosts that are known about will be included in the parent list,
+        but are not explicitly marked.
+        """
+        try:
+            return list(self.get_parent_map([version_id])[version_id])
+        except KeyError:
+            raise errors.RevisionNotPresent(version_id, self)
+
+    def annotate(self, version_id):
+        """Return a list of (version-id, line) tuples for version_id.
+
+        :raise RevisionNotPresent: If the given version is
+        not present in file history.
+        """
+        raise NotImplementedError(self.annotate)
+
+    def iter_lines_added_or_present_in_versions(self, version_ids=None,
+                                                pb=None):
+        """Iterate over the lines in the versioned file from version_ids.
+
+        This may return lines from other versions. Each item the returned
+        iterator yields is a tuple of a line and a text version that that line
+        is present in (not introduced in).
+
+        Ordering of results is in whatever order is most suitable for the
+        underlying storage format.
+
+        If a progress bar is supplied, it may be used to indicate progress.
+        The caller is responsible for cleaning up progress bars (because this
+        is an iterator).
+
+        NOTES: Lines are normalised: they will all have \n terminators.
+               Lines are returned in arbitrary order.
+
+        :return: An iterator over (line, version_id).
+        """
+        raise NotImplementedError(self.iter_lines_added_or_present_in_versions)
+
+    def plan_merge(self, ver_a, ver_b):
+        """Return pseudo-annotation indicating how the two versions merge.
+
+        This is computed between versions a and b and their common
+        base.
+
+        Weave lines present in none of them are skipped entirely.
+
+        Legend:
+        killed-base Dead in base revision
+        killed-both Killed in each revision
+        killed-a    Killed in a
+        killed-b    Killed in b
+        unchanged   Alive in both a and b (possibly created in both)
+        new-a       Created in a
+        new-b       Created in b
+        ghost-a     Killed in a, unborn in b
+        ghost-b     Killed in b, unborn in a
+        irrelevant  Not in either revision
+        """
+        raise NotImplementedError(VersionedFile.plan_merge)
+
+    def weave_merge(self, plan, a_marker=TextMerge.A_MARKER,
+                    b_marker=TextMerge.B_MARKER):
+        return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0]
+
+
+class RecordingVersionedFilesDecorator(object):
+    """A minimal versioned files that records calls made on it.
+
+    Only enough methods have been added to support tests using it to date.
+
+    :ivar calls: A list of the calls made; can be reset at any time by
+        assigning [] to it.
+    """
+
+    def __init__(self, backing_vf):
+        """Create a RecordingVersionedFilesDecorator decorating backing_vf.
+
+        :param backing_vf: The versioned file to answer all methods.
+        """
+        self._backing_vf = backing_vf
+        self.calls = []
+
+    def add_lines(self, key, parents, lines, parent_texts=None,
+        left_matching_blocks=None, nostore_sha=None, random_id=False,
+        check_content=True):
+        self.calls.append(("add_lines", key, parents, lines, parent_texts,
+            left_matching_blocks, nostore_sha, random_id, check_content))
+        return self._backing_vf.add_lines(key, parents, lines, parent_texts,
+            left_matching_blocks, nostore_sha, random_id, check_content)
+
+    def check(self):
+        self._backing_vf.check()
+
+    def get_parent_map(self, keys):
+        self.calls.append(("get_parent_map", copy(keys)))
+        return self._backing_vf.get_parent_map(keys)
+
+    def get_record_stream(self, keys, sort_order, include_delta_closure):
+        self.calls.append(("get_record_stream", list(keys), sort_order,
+            include_delta_closure))
+        return self._backing_vf.get_record_stream(keys, sort_order,
+            include_delta_closure)
+
+    def get_sha1s(self, keys):
+        self.calls.append(("get_sha1s", copy(keys)))
+        return self._backing_vf.get_sha1s(keys)
+
+    def iter_lines_added_or_present_in_keys(self, keys, pb=None):
+        self.calls.append(("iter_lines_added_or_present_in_keys", copy(keys)))
+        return self._backing_vf.iter_lines_added_or_present_in_keys(keys, pb=pb)
+
+    def keys(self):
+        self.calls.append(("keys",))
+        return self._backing_vf.keys()
+
+
+class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator):
+    """A VF that records calls, and returns keys in specific order.
+
+    :ivar calls: A list of the calls made; can be reset at any time by
+        assigning [] to it.
+    """
+
+    def __init__(self, backing_vf, key_priority):
+        """Create a RecordingVersionedFilesDecorator decorating backing_vf.
+
+        :param backing_vf: The versioned file to answer all methods.
+        :param key_priority: A dictionary defining what order keys should be
+            returned from an 'unordered' get_record_stream request.
+            Keys with lower priority are returned first, keys not present in
+            the map get an implicit priority of 0, and are returned in
+            lexicographical order.
+        """
+        RecordingVersionedFilesDecorator.__init__(self, backing_vf)
+        self._key_priority = key_priority
+
+    def get_record_stream(self, keys, sort_order, include_delta_closure):
+        self.calls.append(("get_record_stream", list(keys), sort_order,
+            include_delta_closure))
+        if sort_order == 'unordered':
+            def sort_key(key):
+                return (self._key_priority.get(key, 0), key)
+            # Use a defined order by asking for the keys one-by-one from the
+            # backing_vf
+            for key in sorted(keys, key=sort_key):
+                for record in self._backing_vf.get_record_stream([key],
+                                'unordered', include_delta_closure):
+                    yield record
+        else:
+            for record in self._backing_vf.get_record_stream(keys, sort_order,
+                            include_delta_closure):
+                yield record
+
+
+class KeyMapper(object):
+    """KeyMappers map between keys and underlying partitioned storage."""
+
+    def map(self, key):
+        """Map key to an underlying storage identifier.
+
+        :param key: A key tuple e.g. ('file-id', 'revision-id').
+        :return: An underlying storage identifier, specific to the partitioning
+            mechanism.
+        """
+        raise NotImplementedError(self.map)
+
+    def unmap(self, partition_id):
+        """Map a partitioned storage id back to a key prefix.
+
+        :param partition_id: The underlying partition id.
+        :return: As much of a key (or prefix) as is derivable from the partition
+            id.
+        """
+        raise NotImplementedError(self.unmap)
+
+
+class ConstantMapper(KeyMapper):
+    """A key mapper that maps to a constant result."""
+
+    def __init__(self, result):
+        """Create a ConstantMapper which will return result for all maps."""
+        self._result = result
+
+    def map(self, key):
+        """See KeyMapper.map()."""
+        return self._result
+
+
+class URLEscapeMapper(KeyMapper):
+    """Base class for use with transport backed storage.
+
+    This provides a map and unmap wrapper that respectively url escape and
+    unescape their outputs and inputs.
+    """
+
+    def map(self, key):
+        """See KeyMapper.map()."""
+        return urlutils.quote(self._map(key))
+
+    def unmap(self, partition_id):
+        """See KeyMapper.unmap()."""
+        return self._unmap(urlutils.unquote(partition_id))
+
+
+class PrefixMapper(URLEscapeMapper):
+    """A key mapper that extracts the first component of a key.
+
+    This mapper is for use with a transport based backend.
+    """
+
+    def _map(self, key):
+        """See KeyMapper.map()."""
+        return key[0]
+
+    def _unmap(self, partition_id):
+        """See KeyMapper.unmap()."""
+        return (partition_id,)
+
+
+class HashPrefixMapper(URLEscapeMapper):
+    """A key mapper that combines the first component of a key with a hash.
+
+    This mapper is for use with a transport based backend.
+    """
+
+    def _map(self, key):
+        """See KeyMapper.map()."""
+        prefix = self._escape(key[0])
+        return "%02x/%s" % (adler32(prefix) & 0xff, prefix)
+
+    def _escape(self, prefix):
+        """No escaping needed here."""
+        return prefix
+
+    def _unmap(self, partition_id):
+        """See KeyMapper.unmap()."""
+        return (self._unescape(osutils.basename(partition_id)),)
+
+    def _unescape(self, basename):
+        """No unescaping needed for HashPrefixMapper."""
+        return basename
+
+
+class HashEscapedPrefixMapper(HashPrefixMapper):
+    """Combines the escaped first component of a key with a hash.
+
+    This mapper is for use with a transport based backend.
+    """
+
+    _safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,."
+
+    def _escape(self, prefix):
+        """Turn a key element into a filesystem safe string.
+
+        This is similar to a plain urlutils.quote, except
+        it uses specific safe characters, so that it doesn't
+        have to translate a lot of valid file ids.
+        """
+        # @ does not get escaped. This is because it is a valid
+        # filesystem character we use all the time, and it looks
+        # a lot better than seeing %40 all the time.
+        r = [((c in self._safe) and c or ('%%%02x' % ord(c)))
+             for c in prefix]
+        return ''.join(r)
+
+    def _unescape(self, basename):
+        """Escaped names are easily unescaped by urlutils."""
+        return urlutils.unquote(basename)
+
+
+def make_versioned_files_factory(versioned_file_factory, mapper):
+    """Create a ThunkedVersionedFiles factory.
+
+    This will create a callable which when called creates a
+    ThunkedVersionedFiles on a transport, using mapper to access individual
+    versioned files, and versioned_file_factory to create each individual file.
+    """
+    def factory(transport):
+        return ThunkedVersionedFiles(transport, versioned_file_factory, mapper,
+            lambda:True)
+    return factory
+
+
+class VersionedFiles(object):
+    """Storage for many versioned files.
+
+    This object allows a single keyspace for accessing the history graph and
+    contents of named bytestrings.
+
+    Currently no implementation allows the graph of different key prefixes to
+    intersect, but the API does allow such implementations in the future.
+
+    The keyspace is expressed via simple tuples. Any instance of VersionedFiles
+    may have a different length key-size, but that size will be constant for
+    all texts added to or retrieved from it. For instance, bzrlib uses
+    instances with a key-size of 2 for storing user files in a repository, with
+    the first element the fileid, and the second the version of that file.
+
+    The use of tuples allows a single code base to support several different
+    uses with only the mapping logic changing from instance to instance.
+
+    :ivar _immediate_fallback_vfs: For subclasses that support stacking,
+        this is a list of other VersionedFiles immediately underneath this
+        one.  They may in turn each have further fallbacks.
+    """
+
+    def add_lines(self, key, parents, lines, parent_texts=None,
+        left_matching_blocks=None, nostore_sha=None, random_id=False,
+        check_content=True):
+        """Add a text to the store.
+
+        :param key: The key tuple of the text to add. If the last element is
+            None, a CHK string will be generated during the addition.
+        :param parents: The parents key tuples of the text to add.
+        :param lines: A list of lines. Each line must be a bytestring. And all
+            of them except the last must be terminated with \n and contain no
+            other \n's. The last line may either contain no \n's or a single
+            terminating \n. If the lines list does meet this constraint the add
+            routine may error or may succeed - but you will be unable to read
+            the data back accurately. (Checking the lines have been split
+            correctly is expensive and extremely unlikely to catch bugs so it
+            is not done at runtime unless check_content is True.)
+        :param parent_texts: An optional dictionary containing the opaque
+            representations of some or all of the parents of version_id to
+            allow delta optimisations.  VERY IMPORTANT: the texts must be those
+            returned by add_lines or data corruption can be caused.
+        :param left_matching_blocks: a hint about which areas are common
+            between the text and its left-hand-parent.  The format is
+            the SequenceMatcher.get_matching_blocks format.
+        :param nostore_sha: Raise ExistingContent and do not add the lines to
+            the versioned file if the digest of the lines matches this.
+        :param random_id: If True a random id has been selected rather than
+            an id determined by some deterministic process such as a converter
+            from a foreign VCS. When True the backend may choose not to check
+            for uniqueness of the resulting key within the versioned file, so
+            this should only be done when the result is expected to be unique
+            anyway.
+        :param check_content: If True, the lines supplied are verified to be
+            bytestrings that are correctly formed lines.
+        :return: The text sha1, the number of bytes in the text, and an opaque
+                 representation of the inserted version which can be provided
+                 back to future add_lines calls in the parent_texts dictionary.
+        """
+        raise NotImplementedError(self.add_lines)
+
+    def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
+        """Add a text to the store.
+
+        This is a private function for use by VersionedFileCommitBuilder.
+
+        :param key: The key tuple of the text to add. If the last element is
+            None, a CHK string will be generated during the addition.
+        :param parents: The parents key tuples of the text to add.
+        :param text: A string containing the text to be committed.
+        :param nostore_sha: Raise ExistingContent and do not add the lines to
+            the versioned file if the digest of the lines matches this.
+        :param random_id: If True a random id has been selected rather than
+            an id determined by some deterministic process such as a converter
+            from a foreign VCS. When True the backend may choose not to check
+            for uniqueness of the resulting key within the versioned file, so
+            this should only be done when the result is expected to be unique
+            anyway.
+        :param check_content: If True, the lines supplied are verified to be
+            bytestrings that are correctly formed lines.
+        :return: The text sha1, the number of bytes in the text, and an opaque
+                 representation of the inserted version which can be provided
+                 back to future _add_text calls in the parent_texts dictionary.
+        """
+        # The default implementation just thunks over to .add_lines(),
+        # inefficient, but it works.
+        return self.add_lines(key, parents, osutils.split_lines(text),
+                              nostore_sha=nostore_sha,
+                              random_id=random_id,
+                              check_content=True)
+
+    def add_mpdiffs(self, records):
+        """Add mpdiffs to this VersionedFile.
+
+        Records should be iterables of version, parents, expected_sha1,
+        mpdiff. mpdiff should be a MultiParent instance.
+        """
+        vf_parents = {}
+        mpvf = multiparent.MultiMemoryVersionedFile()
+        versions = []
+        for version, parent_ids, expected_sha1, mpdiff in records:
+            versions.append(version)
+            mpvf.add_diff(mpdiff, version, parent_ids)
+        needed_parents = set()
+        for version, parent_ids, expected_sha1, mpdiff in records:
+            needed_parents.update(p for p in parent_ids
+                                  if not mpvf.has_version(p))
+        # It seems likely that adding all the present parents as fulltexts can
+        # easily exhaust memory.
+        chunks_to_lines = osutils.chunks_to_lines
+        for record in self.get_record_stream(needed_parents, 'unordered',
+            True):
+            if record.storage_kind == 'absent':
+                continue
+            mpvf.add_version(chunks_to_lines(record.get_bytes_as('chunked')),
+                record.key, [])
+        for (key, parent_keys, expected_sha1, mpdiff), lines in\
+            zip(records, mpvf.get_line_list(versions)):
+            if len(parent_keys) == 1:
+                left_matching_blocks = list(mpdiff.get_matching_blocks(0,
+                    mpvf.get_diff(parent_keys[0]).num_lines()))
+            else:
+                left_matching_blocks = None
+            version_sha1, _, version_text = self.add_lines(key,
+                parent_keys, lines, vf_parents,
+                left_matching_blocks=left_matching_blocks)
+            if version_sha1 != expected_sha1:
+                raise errors.VersionedFileInvalidChecksum(version)
+            vf_parents[key] = version_text
+
+    def annotate(self, key):
+        """Return a list of (version-key, line) tuples for the text of key.
+
+        :raise RevisionNotPresent: If the key is not present.
+        """
+        raise NotImplementedError(self.annotate)
+
+    def check(self, progress_bar=None):
+        """Check this object for integrity.
+        
+        :param progress_bar: A progress bar to output as the check progresses.
+        :param keys: Specific keys within the VersionedFiles to check. When
+            this parameter is not None, check() becomes a generator as per
+            get_record_stream. The difference to get_record_stream is that
+            more or deeper checks will be performed.
+        :return: None, or if keys was supplied a generator as per
+            get_record_stream.
+        """
+        raise NotImplementedError(self.check)
+
+    @staticmethod
+    def check_not_reserved_id(version_id):
+        revision.check_not_reserved_id(version_id)
+
+    def clear_cache(self):
+        """Clear whatever caches this VersionedFile holds.
+
+        This is generally called after an operation has been performed, when we
+        don't expect to be using this versioned file again soon.
+        """
+
+    def _check_lines_not_unicode(self, lines):
+        """Check that lines being added to a versioned file are not unicode."""
+        for line in lines:
+            if line.__class__ is not str:
+                raise errors.BzrBadParameterUnicode("lines")
+
+    def _check_lines_are_lines(self, lines):
+        """Check that the lines really are full lines without inline EOL."""
+        for line in lines:
+            if '\n' in line[:-1]:
+                raise errors.BzrBadParameterContainsNewline("lines")
+
+    def get_known_graph_ancestry(self, keys):
+        """Get a KnownGraph instance with the ancestry of keys."""
+        # most basic implementation is a loop around get_parent_map
+        pending = set(keys)
+        parent_map = {}
+        while pending:
+            this_parent_map = self.get_parent_map(pending)
+            parent_map.update(this_parent_map)
+            pending = set()
+            map(pending.update, this_parent_map.itervalues())
+            pending = pending.difference(parent_map)
+        kg = _mod_graph.KnownGraph(parent_map)
+        return kg
+
+    def get_parent_map(self, keys):
+        """Get a map of the parents of keys.
+
+        :param keys: The keys to look up parents for.
+        :return: A mapping from keys to parents. Absent keys are absent from
+            the mapping.
+        """
+        raise NotImplementedError(self.get_parent_map)
+
+    def get_record_stream(self, keys, ordering, include_delta_closure):
+        """Get a stream of records for keys.
+
+        :param keys: The keys to include.
+        :param ordering: Either 'unordered' or 'topological'. A topologically
+            sorted stream has compression parents strictly before their
+            children.
+        :param include_delta_closure: If True then the closure across any
+            compression parents will be included (in the opaque data).
+        :return: An iterator of ContentFactory objects, each of which is only
+            valid until the iterator is advanced.
+        """
+        raise NotImplementedError(self.get_record_stream)
+
+    def get_sha1s(self, keys):
+        """Get the sha1's of the texts for the given keys.
+
+        :param keys: The names of the keys to lookup
+        :return: a dict from key to sha1 digest. Keys of texts which are not
+            present in the store are not present in the returned
+            dictionary.
+        """
+        raise NotImplementedError(self.get_sha1s)
+
+    has_key = index._has_key_from_parent_map
+
+    def get_missing_compression_parent_keys(self):
+        """Return an iterable of keys of missing compression parents.
+
+        Check this after calling insert_record_stream to find out if there are
+        any missing compression parents.  If there are, the records that
+        depend on them are not able to be inserted safely. The precise
+        behaviour depends on the concrete VersionedFiles class in use.
+
+        Classes that do not support this will raise NotImplementedError.
+        """
+        raise NotImplementedError(self.get_missing_compression_parent_keys)
+
+    def insert_record_stream(self, stream):
+        """Insert a record stream into this container.
+
+        :param stream: A stream of records to insert.
+        :return: None
+        :seealso VersionedFile.get_record_stream:
+        """
+        raise NotImplementedError
+
+    def iter_lines_added_or_present_in_keys(self, keys, pb=None):
+        """Iterate over the lines in the versioned files from keys.
+
+        This may return lines from other keys. Each item the returned
+        iterator yields is a tuple of a line and a text version that that line
+        is present in (not introduced in).
+
+        Ordering of results is in whatever order is most suitable for the
+        underlying storage format.
+
+        If a progress bar is supplied, it may be used to indicate progress.
+        The caller is responsible for cleaning up progress bars (because this
+        is an iterator).
+
+        NOTES:
+         * Lines are normalised by the underlying store: they will all have \n
+           terminators.
+         * Lines are returned in arbitrary order.
+
+        :return: An iterator over (line, key).
+        """
+        raise NotImplementedError(self.iter_lines_added_or_present_in_keys)
+
+    def keys(self):
+        """Return a iterable of the keys for all the contained texts."""
+        raise NotImplementedError(self.keys)
+
+    def make_mpdiffs(self, keys):
+        """Create multiparent diffs for specified keys."""
+        generator = _MPDiffGenerator(self, keys)
+        return generator.compute_diffs()
+
+    def get_annotator(self):
+        return annotate.Annotator(self)
+
+    missing_keys = index._missing_keys_from_parent_map
+
+    def _extract_blocks(self, version_id, source, target):
+        return None
+
+    def _transitive_fallbacks(self):
+        """Return the whole stack of fallback versionedfiles.
+
+        This VersionedFiles may have a list of fallbacks, but it doesn't
+        necessarily know about the whole stack going down, and it can't know
+        at open time because they may change after the objects are opened.
+        """
+        all_fallbacks = []
+        for a_vfs in self._immediate_fallback_vfs:
+            all_fallbacks.append(a_vfs)
+            all_fallbacks.extend(a_vfs._transitive_fallbacks())
+        return all_fallbacks
+
+
+class ThunkedVersionedFiles(VersionedFiles):
+    """Storage for many versioned files thunked onto a 'VersionedFile' class.
+
+    This object allows a single keyspace for accessing the history graph and
+    contents of named bytestrings.
+
+    Currently no implementation allows the graph of different key prefixes to
+    intersect, but the API does allow such implementations in the future.
+    """
+
+    def __init__(self, transport, file_factory, mapper, is_locked):
+        """Create a ThunkedVersionedFiles."""
+        self._transport = transport
+        self._file_factory = file_factory
+        self._mapper = mapper
+        self._is_locked = is_locked
+
+    def add_lines(self, key, parents, lines, parent_texts=None,
+        left_matching_blocks=None, nostore_sha=None, random_id=False,
+        check_content=True):
+        """See VersionedFiles.add_lines()."""
+        path = self._mapper.map(key)
+        version_id = key[-1]
+        parents = [parent[-1] for parent in parents]
+        vf = self._get_vf(path)
+        try:
+            try:
+                return vf.add_lines_with_ghosts(version_id, parents, lines,
+                    parent_texts=parent_texts,
+                    left_matching_blocks=left_matching_blocks,
+                    nostore_sha=nostore_sha, random_id=random_id,
+                    check_content=check_content)
+            except NotImplementedError:
+                return vf.add_lines(version_id, parents, lines,
+                    parent_texts=parent_texts,
+                    left_matching_blocks=left_matching_blocks,
+                    nostore_sha=nostore_sha, random_id=random_id,
+                    check_content=check_content)
+        except errors.NoSuchFile:
+            # parent directory may be missing, try again.
+            self._transport.mkdir(osutils.dirname(path))
+            try:
+                return vf.add_lines_with_ghosts(version_id, parents, lines,
+                    parent_texts=parent_texts,
+                    left_matching_blocks=left_matching_blocks,
+                    nostore_sha=nostore_sha, random_id=random_id,
+                    check_content=check_content)
+            except NotImplementedError:
+                return vf.add_lines(version_id, parents, lines,
+                    parent_texts=parent_texts,
+                    left_matching_blocks=left_matching_blocks,
+                    nostore_sha=nostore_sha, random_id=random_id,
+                    check_content=check_content)
+
+    def annotate(self, key):
+        """Return a list of (version-key, line) tuples for the text of key.
+
+        :raise RevisionNotPresent: If the key is not present.
+        """
+        prefix = key[:-1]
+        path = self._mapper.map(prefix)
+        vf = self._get_vf(path)
+        origins = vf.annotate(key[-1])
+        result = []
+        for origin, line in origins:
+            result.append((prefix + (origin,), line))
+        return result
+
+    def check(self, progress_bar=None, keys=None):
+        """See VersionedFiles.check()."""
+        # XXX: This is over-enthusiastic but as we only thunk for Weaves today
+        # this is tolerable. Ideally we'd pass keys down to check() and 
+        # have the older VersiondFile interface updated too.
+        for prefix, vf in self._iter_all_components():
+            vf.check()
+        if keys is not None:
+            return self.get_record_stream(keys, 'unordered', True)
+
+    def get_parent_map(self, keys):
+        """Get a map of the parents of keys.
+
+        :param keys: The keys to look up parents for.
+        :return: A mapping from keys to parents. Absent keys are absent from
+            the mapping.
+        """
+        prefixes = self._partition_keys(keys)
+        result = {}
+        for prefix, suffixes in prefixes.items():
+            path = self._mapper.map(prefix)
+            vf = self._get_vf(path)
+            parent_map = vf.get_parent_map(suffixes)
+            for key, parents in parent_map.items():
+                result[prefix + (key,)] = tuple(
+                    prefix + (parent,) for parent in parents)
+        return result
+
+    def _get_vf(self, path):
+        if not self._is_locked():
+            raise errors.ObjectNotLocked(self)
+        return self._file_factory(path, self._transport, create=True,
+            get_scope=lambda:None)
+
+    def _partition_keys(self, keys):
+        """Turn keys into a dict of prefix:suffix_list."""
+        result = {}
+        for key in keys:
+            prefix_keys = result.setdefault(key[:-1], [])
+            prefix_keys.append(key[-1])
+        return result
+
+    def _get_all_prefixes(self):
+        # Identify all key prefixes.
+        # XXX: A bit hacky, needs polish.
+        if type(self._mapper) == ConstantMapper:
+            paths = [self._mapper.map(())]
+            prefixes = [()]
+        else:
+            relpaths = set()
+            for quoted_relpath in self._transport.iter_files_recursive():
+                path, ext = os.path.splitext(quoted_relpath)
+                relpaths.add(path)
+            paths = list(relpaths)
+            prefixes = [self._mapper.unmap(path) for path in paths]
+        return zip(paths, prefixes)
+
+    def get_record_stream(self, keys, ordering, include_delta_closure):
+        """See VersionedFiles.get_record_stream()."""
+        # Ordering will be taken care of by each partitioned store; group keys
+        # by partition.
+        keys = sorted(keys)
+        for prefix, suffixes, vf in self._iter_keys_vf(keys):
+            suffixes = [(suffix,) for suffix in suffixes]
+            for record in vf.get_record_stream(suffixes, ordering,
+                include_delta_closure):
+                if record.parents is not None:
+                    record.parents = tuple(
+                        prefix + parent for parent in record.parents)
+                record.key = prefix + record.key
+                yield record
+
+    def _iter_keys_vf(self, keys):
+        prefixes = self._partition_keys(keys)
+        sha1s = {}
+        for prefix, suffixes in prefixes.items():
+            path = self._mapper.map(prefix)
+            vf = self._get_vf(path)
+            yield prefix, suffixes, vf
+
+    def get_sha1s(self, keys):
+        """See VersionedFiles.get_sha1s()."""
+        sha1s = {}
+        for prefix,suffixes, vf in self._iter_keys_vf(keys):
+            vf_sha1s = vf.get_sha1s(suffixes)
+            for suffix, sha1 in vf_sha1s.iteritems():
+                sha1s[prefix + (suffix,)] = sha1
+        return sha1s
+
+    def insert_record_stream(self, stream):
+        """Insert a record stream into this container.
+
+        :param stream: A stream of records to insert.
+        :return: None
+        :seealso VersionedFile.get_record_stream:
+        """
+        for record in stream:
+            prefix = record.key[:-1]
+            key = record.key[-1:]
+            if record.parents is not None:
+                parents = [parent[-1:] for parent in record.parents]
+            else:
+                parents = None
+            thunk_record = AdapterFactory(key, parents, record)
+            path = self._mapper.map(prefix)
+            # Note that this parses the file many times; we can do better but
+            # as this only impacts weaves in terms of performance, it is
+            # tolerable.
+            vf = self._get_vf(path)
+            vf.insert_record_stream([thunk_record])
+
+    def iter_lines_added_or_present_in_keys(self, keys, pb=None):
+        """Iterate over the lines in the versioned files from keys.
+
+        This may return lines from other keys. Each item the returned
+        iterator yields is a tuple of a line and a text version that that line
+        is present in (not introduced in).
+
+        Ordering of results is in whatever order is most suitable for the
+        underlying storage format.
+
+        If a progress bar is supplied, it may be used to indicate progress.
+        The caller is responsible for cleaning up progress bars (because this
+        is an iterator).
+
+        NOTES:
+         * Lines are normalised by the underlying store: they will all have \n
+           terminators.
+         * Lines are returned in arbitrary order.
+
+        :return: An iterator over (line, key).
+        """
+        for prefix, suffixes, vf in self._iter_keys_vf(keys):
+            for line, version in vf.iter_lines_added_or_present_in_versions(suffixes):
+                yield line, prefix + (version,)
+
+    def _iter_all_components(self):
+        for path, prefix in self._get_all_prefixes():
+            yield prefix, self._get_vf(path)
+
+    def keys(self):
+        """See VersionedFiles.keys()."""
+        result = set()
+        for prefix, vf in self._iter_all_components():
+            for suffix in vf.versions():
+                result.add(prefix + (suffix,))
+        return result
+
+
+class VersionedFilesWithFallbacks(VersionedFiles):
+
+    def without_fallbacks(self):
+        """Return a clone of this object without any fallbacks configured."""
+        raise NotImplementedError(self.without_fallbacks)
+
+    def add_fallback_versioned_files(self, a_versioned_files):
+        """Add a source of texts for texts not present in this knit.
+
+        :param a_versioned_files: A VersionedFiles object.
+        """
+        raise NotImplementedError(self.add_fallback_versioned_files)
+
+    def get_known_graph_ancestry(self, keys):
+        """Get a KnownGraph instance with the ancestry of keys."""
+        parent_map, missing_keys = self._index.find_ancestry(keys)
+        for fallback in self._transitive_fallbacks():
+            if not missing_keys:
+                break
+            (f_parent_map, f_missing_keys) = fallback._index.find_ancestry(
+                                                missing_keys)
+            parent_map.update(f_parent_map)
+            missing_keys = f_missing_keys
+        kg = _mod_graph.KnownGraph(parent_map)
+        return kg
+
+
+class _PlanMergeVersionedFile(VersionedFiles):
+    """A VersionedFile for uncommitted and committed texts.
+
+    It is intended to allow merges to be planned with working tree texts.
+    It implements only the small part of the VersionedFiles interface used by
+    PlanMerge.  It falls back to multiple versionedfiles for data not stored in
+    _PlanMergeVersionedFile itself.
+
+    :ivar: fallback_versionedfiles a list of VersionedFiles objects that can be
+        queried for missing texts.
+    """
+
+    def __init__(self, file_id):
+        """Create a _PlanMergeVersionedFile.
+
+        :param file_id: Used with _PlanMerge code which is not yet fully
+            tuple-keyspace aware.
+        """
+        self._file_id = file_id
+        # fallback locations
+        self.fallback_versionedfiles = []
+        # Parents for locally held keys.
+        self._parents = {}
+        # line data for locally held keys.
+        self._lines = {}
+        # key lookup providers
+        self._providers = [_mod_graph.DictParentsProvider(self._parents)]
+
+    def plan_merge(self, ver_a, ver_b, base=None):
+        """See VersionedFile.plan_merge"""
+        from bzrlib.merge import _PlanMerge
+        if base is None:
+            return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge()
+        old_plan = list(_PlanMerge(ver_a, base, self, (self._file_id,)).plan_merge())
+        new_plan = list(_PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge())
+        return _PlanMerge._subtract_plans(old_plan, new_plan)
+
+    def plan_lca_merge(self, ver_a, ver_b, base=None):
+        from bzrlib.merge import _PlanLCAMerge
+        graph = _mod_graph.Graph(self)
+        new_plan = _PlanLCAMerge(ver_a, ver_b, self, (self._file_id,), graph).plan_merge()
+        if base is None:
+            return new_plan
+        old_plan = _PlanLCAMerge(ver_a, base, self, (self._file_id,), graph).plan_merge()
+        return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan))
+
+    def add_lines(self, key, parents, lines):
+        """See VersionedFiles.add_lines
+
+        Lines are added locally, not to fallback versionedfiles.  Also, ghosts
+        are permitted.  Only reserved ids are permitted.
+        """
+        if type(key) is not tuple:
+            raise TypeError(key)
+        if not revision.is_reserved_id(key[-1]):
+            raise ValueError('Only reserved ids may be used')
+        if parents is None:
+            raise ValueError('Parents may not be None')
+        if lines is None:
+            raise ValueError('Lines may not be None')
+        self._parents[key] = tuple(parents)
+        self._lines[key] = lines
+
+    def get_record_stream(self, keys, ordering, include_delta_closure):
+        pending = set(keys)
+        for key in keys:
+            if key in self._lines:
+                lines = self._lines[key]
+                parents = self._parents[key]
+                pending.remove(key)
+                yield ChunkedContentFactory(key, parents, None, lines)
+        for versionedfile in self.fallback_versionedfiles:
+            for record in versionedfile.get_record_stream(
+                pending, 'unordered', True):
+                if record.storage_kind == 'absent':
+                    continue
+                else:
+                    pending.remove(record.key)
+                    yield record
+            if not pending:
+                return
+        # report absent entries
+        for key in pending:
+            yield AbsentContentFactory(key)
+
+    def get_parent_map(self, keys):
+        """See VersionedFiles.get_parent_map"""
+        # We create a new provider because a fallback may have been added.
+        # If we make fallbacks private we can update a stack list and avoid
+        # object creation thrashing.
+        keys = set(keys)
+        result = {}
+        if revision.NULL_REVISION in keys:
+            keys.remove(revision.NULL_REVISION)
+            result[revision.NULL_REVISION] = ()
+        self._providers = self._providers[:1] + self.fallback_versionedfiles
+        result.update(
+            _mod_graph.StackedParentsProvider(
+                self._providers).get_parent_map(keys))
+        for key, parents in result.iteritems():
+            if parents == ():
+                result[key] = (revision.NULL_REVISION,)
+        return result
+
+
+class PlanWeaveMerge(TextMerge):
+    """Weave merge that takes a plan as its input.
+
+    This exists so that VersionedFile.plan_merge is implementable.
+    Most callers will want to use WeaveMerge instead.
+    """
+
+    def __init__(self, plan, a_marker=TextMerge.A_MARKER,
+                 b_marker=TextMerge.B_MARKER):
+        TextMerge.__init__(self, a_marker, b_marker)
+        self.plan = list(plan)
+
+    def _merge_struct(self):
+        lines_a = []
+        lines_b = []
+        ch_a = ch_b = False
+
+        def outstanding_struct():
+            if not lines_a and not lines_b:
+                return
+            elif ch_a and not ch_b:
+                # one-sided change:
+                yield(lines_a,)
+            elif ch_b and not ch_a:
+                yield (lines_b,)
+            elif lines_a == lines_b:
+                yield(lines_a,)
+            else:
+                yield (lines_a, lines_b)
+
+        # We previously considered either 'unchanged' or 'killed-both' lines
+        # to be possible places to resynchronize.  However, assuming agreement
+        # on killed-both lines may be too aggressive. -- mbp 20060324
+        for state, line in self.plan:
+            if state == 'unchanged':
+                # resync and flush queued conflicts changes if any
+                for struct in outstanding_struct():
+                    yield struct
+                lines_a = []
+                lines_b = []
+                ch_a = ch_b = False
+
+            if state == 'unchanged':
+                if line:
+                    yield ([line],)
+            elif state == 'killed-a':
+                ch_a = True
+                lines_b.append(line)
+            elif state == 'killed-b':
+                ch_b = True
+                lines_a.append(line)
+            elif state == 'new-a':
+                ch_a = True
+                lines_a.append(line)
+            elif state == 'new-b':
+                ch_b = True
+                lines_b.append(line)
+            elif state == 'conflicted-a':
+                ch_b = ch_a = True
+                lines_a.append(line)
+            elif state == 'conflicted-b':
+                ch_b = ch_a = True
+                lines_b.append(line)
+            elif state == 'killed-both':
+                # This counts as a change, even though there is no associated
+                # line
+                ch_b = ch_a = True
+            else:
+                if state not in ('irrelevant', 'ghost-a', 'ghost-b',
+                        'killed-base'):
+                    raise AssertionError(state)
+        for struct in outstanding_struct():
+            yield struct
+
+    def base_from_plan(self):
+        """Construct a BASE file from the plan text."""
+        base_lines = []
+        for state, line in self.plan:
+            if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
+                # If unchanged, then this line is straight from base. If a or b
+                # or both killed the line, then it *used* to be in base.
+                base_lines.append(line)
+            else:
+                if state not in ('killed-base', 'irrelevant',
+                                 'ghost-a', 'ghost-b',
+                                 'new-a', 'new-b',
+                                 'conflicted-a', 'conflicted-b'):
+                    # killed-base, irrelevant means it doesn't apply
+                    # ghost-a/ghost-b are harder to say for sure, but they
+                    # aren't in the 'inc_c' which means they aren't in the
+                    # shared base of a & b. So we don't include them.  And
+                    # obviously if the line is newly inserted, it isn't in base
+
+                    # If 'conflicted-a' or b, then it is new vs one base, but
+                    # old versus another base. However, if we make it present
+                    # in the base, it will be deleted from the target, and it
+                    # seems better to get a line doubled in the merge result,
+                    # rather than have it deleted entirely.
+                    # Example, each node is the 'text' at that point:
+                    #           MN
+                    #          /   \
+                    #        MaN   MbN
+                    #         |  X  |
+                    #        MabN MbaN
+                    #          \   /
+                    #           ???
+                    # There was a criss-cross conflict merge. Both sides
+                    # include the other, but put themselves first.
+                    # Weave marks this as a 'clean' merge, picking OTHER over
+                    # THIS. (Though the details depend on order inserted into
+                    # weave, etc.)
+                    # LCA generates a plan:
+                    # [('unchanged', M),
+                    #  ('conflicted-b', b),
+                    #  ('unchanged', a),
+                    #  ('conflicted-a', b),
+                    #  ('unchanged', N)]
+                    # If you mark 'conflicted-*' as part of BASE, then a 3-way
+                    # merge tool will cleanly generate "MaN" (as BASE vs THIS
+                    # removes one 'b', and BASE vs OTHER removes the other)
+                    # If you include neither, 3-way creates a clean "MbabN" as
+                    # THIS adds one 'b', and OTHER does too.
+                    # It seems that having the line 2 times is better than
+                    # having it omitted. (Easier to manually delete than notice
+                    # it needs to be added.)
+                    raise AssertionError('Unknown state: %s' % (state,))
+        return base_lines
+
+
+class WeaveMerge(PlanWeaveMerge):
+    """Weave merge that takes a VersionedFile and two versions as its input."""
+
+    def __init__(self, versionedfile, ver_a, ver_b,
+        a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
+        plan = versionedfile.plan_merge(ver_a, ver_b)
+        PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
+
+
+class VirtualVersionedFiles(VersionedFiles):
+    """Dummy implementation for VersionedFiles that uses other functions for
+    obtaining fulltexts and parent maps.
+
+    This is always on the bottom of the stack and uses string keys
+    (rather than tuples) internally.
+    """
+
+    def __init__(self, get_parent_map, get_lines):
+        """Create a VirtualVersionedFiles.
+
+        :param get_parent_map: Same signature as Repository.get_parent_map.
+        :param get_lines: Should return lines for specified key or None if
+                          not available.
+        """
+        super(VirtualVersionedFiles, self).__init__()
+        self._get_parent_map = get_parent_map
+        self._get_lines = get_lines
+
+    def check(self, progressbar=None):
+        """See VersionedFiles.check.
+
+        :note: Always returns True for VirtualVersionedFiles.
+        """
+        return True
+
+    def add_mpdiffs(self, records):
+        """See VersionedFiles.mpdiffs.
+
+        :note: Not implemented for VirtualVersionedFiles.
+        """
+        raise NotImplementedError(self.add_mpdiffs)
+
+    def get_parent_map(self, keys):
+        """See VersionedFiles.get_parent_map."""
+        return dict([((k,), tuple([(p,) for p in v]))
+            for k,v in self._get_parent_map([k for (k,) in keys]).iteritems()])
+
+    def get_sha1s(self, keys):
+        """See VersionedFiles.get_sha1s."""
+        ret = {}
+        for (k,) in keys:
+            lines = self._get_lines(k)
+            if lines is not None:
+                if not isinstance(lines, list):
+                    raise AssertionError
+                ret[(k,)] = osutils.sha_strings(lines)
+        return ret
+
+    def get_record_stream(self, keys, ordering, include_delta_closure):
+        """See VersionedFiles.get_record_stream."""
+        for (k,) in list(keys):
+            lines = self._get_lines(k)
+            if lines is not None:
+                if not isinstance(lines, list):
+                    raise AssertionError
+                yield ChunkedContentFactory((k,), None,
+                        sha1=osutils.sha_strings(lines),
+                        chunks=lines)
+            else:
+                yield AbsentContentFactory((k,))
+
+    def iter_lines_added_or_present_in_keys(self, keys, pb=None):
+        """See VersionedFile.iter_lines_added_or_present_in_versions()."""
+        for i, (key,) in enumerate(keys):
+            if pb is not None:
+                pb.update("Finding changed lines", i, len(keys))
+            for l in self._get_lines(key):
+                yield (l, key)
+
+
+class NoDupeAddLinesDecorator(object):
+    """Decorator for a VersionedFiles that skips doing an add_lines if the key
+    is already present.
+    """
+
+    def __init__(self, store):
+        self._store = store
+
+    def add_lines(self, key, parents, lines, parent_texts=None,
+            left_matching_blocks=None, nostore_sha=None, random_id=False,
+            check_content=True):
+        """See VersionedFiles.add_lines.
+        
+        This implementation may return None as the third element of the return
+        value when the original store wouldn't.
+        """
+        if nostore_sha:
+            raise NotImplementedError(
+                "NoDupeAddLinesDecorator.add_lines does not implement the "
+                "nostore_sha behaviour.")
+        if key[-1] is None:
+            sha1 = osutils.sha_strings(lines)
+            key = ("sha1:" + sha1,)
+        else:
+            sha1 = None
+        if key in self._store.get_parent_map([key]):
+            # This key has already been inserted, so don't do it again.
+            if sha1 is None:
+                sha1 = osutils.sha_strings(lines)
+            return sha1, sum(map(len, lines)), None
+        return self._store.add_lines(key, parents, lines,
+                parent_texts=parent_texts,
+                left_matching_blocks=left_matching_blocks,
+                nostore_sha=nostore_sha, random_id=random_id,
+                check_content=check_content)
+
+    def __getattr__(self, name):
+        return getattr(self._store, name)
+
+
+def network_bytes_to_kind_and_offset(network_bytes):
+    """Strip of a record kind from the front of network_bytes.
+
+    :param network_bytes: The bytes of a record.
+    :return: A tuple (storage_kind, offset_of_remaining_bytes)
+    """
+    line_end = network_bytes.find('\n')
+    storage_kind = network_bytes[:line_end]
+    return storage_kind, line_end + 1
+
+
+class NetworkRecordStream(object):
+    """A record_stream which reconstitures a serialised stream."""
+
+    def __init__(self, bytes_iterator):
+        """Create a NetworkRecordStream.
+
+        :param bytes_iterator: An iterator of bytes. Each item in this
+            iterator should have been obtained from a record_streams'
+            record.get_bytes_as(record.storage_kind) call.
+        """
+        self._bytes_iterator = bytes_iterator
+        self._kind_factory = {
+            'fulltext': fulltext_network_to_record,
+            'groupcompress-block': groupcompress.network_block_to_records,
+            'knit-ft-gz': knit.knit_network_to_record,
+            'knit-delta-gz': knit.knit_network_to_record,
+            'knit-annotated-ft-gz': knit.knit_network_to_record,
+            'knit-annotated-delta-gz': knit.knit_network_to_record,
+            'knit-delta-closure': knit.knit_delta_closure_to_records,
+            }
+
+    def read(self):
+        """Read the stream.
+
+        :return: An iterator as per VersionedFiles.get_record_stream().
+        """
+        for bytes in self._bytes_iterator:
+            storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
+            for record in self._kind_factory[storage_kind](
+                storage_kind, bytes, line_end):
+                yield record
+
+
+def fulltext_network_to_record(kind, bytes, line_end):
+    """Convert a network fulltext record to record."""
+    meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
+    record_meta = bytes[line_end+4:line_end+4+meta_len]
+    key, parents = bencode.bdecode_as_tuple(record_meta)
+    if parents == 'nil':
+        parents = None
+    fulltext = bytes[line_end+4+meta_len:]
+    return [FulltextContentFactory(key, parents, None, fulltext)]
+
+
+def _length_prefix(bytes):
+    return struct.pack('!L', len(bytes))
+
+
+def record_to_fulltext_bytes(record):
+    if record.parents is None:
+        parents = 'nil'
+    else:
+        parents = record.parents
+    record_meta = bencode.bencode((record.key, parents))
+    record_content = record.get_bytes_as('fulltext')
+    return "fulltext\n%s%s%s" % (
+        _length_prefix(record_meta), record_meta, record_content)
+
+
+def sort_groupcompress(parent_map):
+    """Sort and group the keys in parent_map into groupcompress order.
+
+    groupcompress is defined (currently) as reverse-topological order, grouped
+    by the key prefix.
+
+    :return: A sorted-list of keys
+    """
+    # gc-optimal ordering is approximately reverse topological,
+    # properly grouped by file-id.
+    per_prefix_map = {}
+    for item in parent_map.iteritems():
+        key = item[0]
+        if isinstance(key, str) or len(key) == 1:
+            prefix = ''
+        else:
+            prefix = key[0]
+        try:
+            per_prefix_map[prefix].append(item)
+        except KeyError:
+            per_prefix_map[prefix] = [item]
+
+    present_keys = []
+    for prefix in sorted(per_prefix_map):
+        present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
+    return present_keys
+
+
+class _KeyRefs(object):
+
+    def __init__(self, track_new_keys=False):
+        # dict mapping 'key' to 'set of keys referring to that key'
+        self.refs = {}
+        if track_new_keys:
+            # set remembering all new keys
+            self.new_keys = set()
+        else:
+            self.new_keys = None
+
+    def clear(self):
+        if self.refs:
+            self.refs.clear()
+        if self.new_keys:
+            self.new_keys.clear()
+
+    def add_references(self, key, refs):
+        # Record the new references
+        for referenced in refs:
+            try:
+                needed_by = self.refs[referenced]
+            except KeyError:
+                needed_by = self.refs[referenced] = set()
+            needed_by.add(key)
+        # Discard references satisfied by the new key
+        self.add_key(key)
+
+    def get_new_keys(self):
+        return self.new_keys
+    
+    def get_unsatisfied_refs(self):
+        return self.refs.iterkeys()
+
+    def _satisfy_refs_for_key(self, key):
+        try:
+            del self.refs[key]
+        except KeyError:
+            # No keys depended on this key.  That's ok.
+            pass
+
+    def add_key(self, key):
+        # satisfy refs for key, and remember that we've seen this key.
+        self._satisfy_refs_for_key(key)
+        if self.new_keys is not None:
+            self.new_keys.add(key)
+
+    def satisfy_refs_for_keys(self, keys):
+        for key in keys:
+            self._satisfy_refs_for_key(key)
+
+    def get_referrers(self):
+        result = set()
+        for referrers in self.refs.itervalues():
+            result.update(referrers)
+        return result
+
+
+
author	Lorry <lorry@roadtrain.codethink.co.uk>	2012-08-22 15:47:16 +0100
committer	Lorry <lorry@roadtrain.codethink.co.uk>	2012-08-22 15:47:16 +0100
commit	25335618bf8755ce6b116ee14f47f5a1f2c821e9 (patch)
tree	d889d7ab3f9f985d0c54c534cb8052bd2e6d7163 /bzrlib/versionedfile.py
download	bzr-tarball-25335618bf8755ce6b116ee14f47f5a1f2c821e9.tar.gz