diff options
author | Lorry <lorry@roadtrain.codethink.co.uk> | 2012-08-22 15:47:16 +0100 |
---|---|---|
committer | Lorry <lorry@roadtrain.codethink.co.uk> | 2012-08-22 15:47:16 +0100 |
commit | 25335618bf8755ce6b116ee14f47f5a1f2c821e9 (patch) | |
tree | d889d7ab3f9f985d0c54c534cb8052bd2e6d7163 /bzrlib/versionedfile.py | |
download | bzr-tarball-25335618bf8755ce6b116ee14f47f5a1f2c821e9.tar.gz |
Tarball conversion
Diffstat (limited to 'bzrlib/versionedfile.py')
-rw-r--r-- | bzrlib/versionedfile.py | 1963 |
1 files changed, 1963 insertions, 0 deletions
diff --git a/bzrlib/versionedfile.py b/bzrlib/versionedfile.py new file mode 100644 index 0000000..84d91bb --- /dev/null +++ b/bzrlib/versionedfile.py @@ -0,0 +1,1963 @@ +# Copyright (C) 2006-2011 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +"""Versioned text file storage api.""" + +from __future__ import absolute_import + +from copy import copy +from cStringIO import StringIO +import os +import struct +from zlib import adler32 + +from bzrlib.lazy_import import lazy_import +lazy_import(globals(), """ +from bzrlib import ( + annotate, + bencode, + errors, + graph as _mod_graph, + groupcompress, + index, + knit, + osutils, + multiparent, + tsort, + revision, + urlutils, + ) +""") +from bzrlib.registry import Registry +from bzrlib.textmerge import TextMerge + + +adapter_registry = Registry() +adapter_registry.register_lazy(('knit-delta-gz', 'fulltext'), 'bzrlib.knit', + 'DeltaPlainToFullText') +adapter_registry.register_lazy(('knit-ft-gz', 'fulltext'), 'bzrlib.knit', + 'FTPlainToFullText') +adapter_registry.register_lazy(('knit-annotated-delta-gz', 'knit-delta-gz'), + 'bzrlib.knit', 'DeltaAnnotatedToUnannotated') +adapter_registry.register_lazy(('knit-annotated-delta-gz', 'fulltext'), + 'bzrlib.knit', 'DeltaAnnotatedToFullText') +adapter_registry.register_lazy(('knit-annotated-ft-gz', 'knit-ft-gz'), + 'bzrlib.knit', 'FTAnnotatedToUnannotated') +adapter_registry.register_lazy(('knit-annotated-ft-gz', 'fulltext'), + 'bzrlib.knit', 'FTAnnotatedToFullText') +# adapter_registry.register_lazy(('knit-annotated-ft-gz', 'chunked'), +# 'bzrlib.knit', 'FTAnnotatedToChunked') + + +class ContentFactory(object): + """Abstract interface for insertion and retrieval from a VersionedFile. + + :ivar sha1: None, or the sha1 of the content fulltext. + :ivar storage_kind: The native storage kind of this factory. One of + 'mpdiff', 'knit-annotated-ft', 'knit-annotated-delta', 'knit-ft', + 'knit-delta', 'fulltext', 'knit-annotated-ft-gz', + 'knit-annotated-delta-gz', 'knit-ft-gz', 'knit-delta-gz'. + :ivar key: The key of this content. Each key is a tuple with a single + string in it. + :ivar parents: A tuple of parent keys for self.key. If the object has + no parent information, None (as opposed to () for an empty list of + parents). + """ + + def __init__(self): + """Create a ContentFactory.""" + self.sha1 = None + self.storage_kind = None + self.key = None + self.parents = None + + +class ChunkedContentFactory(ContentFactory): + """Static data content factory. + + This takes a 'chunked' list of strings. The only requirement on 'chunked' is + that ''.join(lines) becomes a valid fulltext. A tuple of a single string + satisfies this, as does a list of lines. + + :ivar sha1: None, or the sha1 of the content fulltext. + :ivar storage_kind: The native storage kind of this factory. Always + 'chunked' + :ivar key: The key of this content. Each key is a tuple with a single + string in it. + :ivar parents: A tuple of parent keys for self.key. If the object has + no parent information, None (as opposed to () for an empty list of + parents). + """ + + def __init__(self, key, parents, sha1, chunks): + """Create a ContentFactory.""" + self.sha1 = sha1 + self.storage_kind = 'chunked' + self.key = key + self.parents = parents + self._chunks = chunks + + def get_bytes_as(self, storage_kind): + if storage_kind == 'chunked': + return self._chunks + elif storage_kind == 'fulltext': + return ''.join(self._chunks) + raise errors.UnavailableRepresentation(self.key, storage_kind, + self.storage_kind) + + +class FulltextContentFactory(ContentFactory): + """Static data content factory. + + This takes a fulltext when created and just returns that during + get_bytes_as('fulltext'). + + :ivar sha1: None, or the sha1 of the content fulltext. + :ivar storage_kind: The native storage kind of this factory. Always + 'fulltext'. + :ivar key: The key of this content. Each key is a tuple with a single + string in it. + :ivar parents: A tuple of parent keys for self.key. If the object has + no parent information, None (as opposed to () for an empty list of + parents). + """ + + def __init__(self, key, parents, sha1, text): + """Create a ContentFactory.""" + self.sha1 = sha1 + self.storage_kind = 'fulltext' + self.key = key + self.parents = parents + self._text = text + + def get_bytes_as(self, storage_kind): + if storage_kind == self.storage_kind: + return self._text + elif storage_kind == 'chunked': + return [self._text] + raise errors.UnavailableRepresentation(self.key, storage_kind, + self.storage_kind) + + +class AbsentContentFactory(ContentFactory): + """A placeholder content factory for unavailable texts. + + :ivar sha1: None. + :ivar storage_kind: 'absent'. + :ivar key: The key of this content. Each key is a tuple with a single + string in it. + :ivar parents: None. + """ + + def __init__(self, key): + """Create a ContentFactory.""" + self.sha1 = None + self.storage_kind = 'absent' + self.key = key + self.parents = None + + def get_bytes_as(self, storage_kind): + raise ValueError('A request was made for key: %s, but that' + ' content is not available, and the calling' + ' code does not handle if it is missing.' + % (self.key,)) + + +class AdapterFactory(ContentFactory): + """A content factory to adapt between key prefix's.""" + + def __init__(self, key, parents, adapted): + """Create an adapter factory instance.""" + self.key = key + self.parents = parents + self._adapted = adapted + + def __getattr__(self, attr): + """Return a member from the adapted object.""" + if attr in ('key', 'parents'): + return self.__dict__[attr] + else: + return getattr(self._adapted, attr) + + +def filter_absent(record_stream): + """Adapt a record stream to remove absent records.""" + for record in record_stream: + if record.storage_kind != 'absent': + yield record + + +class _MPDiffGenerator(object): + """Pull out the functionality for generating mp_diffs.""" + + def __init__(self, vf, keys): + self.vf = vf + # This is the order the keys were requested in + self.ordered_keys = tuple(keys) + # keys + their parents, what we need to compute the diffs + self.needed_keys = () + # Map from key: mp_diff + self.diffs = {} + # Map from key: parents_needed (may have ghosts) + self.parent_map = {} + # Parents that aren't present + self.ghost_parents = () + # Map from parent_key => number of children for this text + self.refcounts = {} + # Content chunks that are cached while we still need them + self.chunks = {} + + def _find_needed_keys(self): + """Find the set of keys we need to request. + + This includes all the original keys passed in, and the non-ghost + parents of those keys. + + :return: (needed_keys, refcounts) + needed_keys is the set of all texts we need to extract + refcounts is a dict of {key: num_children} letting us know when we + no longer need to cache a given parent text + """ + # All the keys and their parents + needed_keys = set(self.ordered_keys) + parent_map = self.vf.get_parent_map(needed_keys) + self.parent_map = parent_map + # TODO: Should we be using a different construct here? I think this + # uses difference_update internally, and we expect the result to + # be tiny + missing_keys = needed_keys.difference(parent_map) + if missing_keys: + raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf) + # Parents that might be missing. They are allowed to be ghosts, but we + # should check for them + refcounts = {} + setdefault = refcounts.setdefault + just_parents = set() + for child_key, parent_keys in parent_map.iteritems(): + if not parent_keys: + # parent_keys may be None if a given VersionedFile claims to + # not support graph operations. + continue + just_parents.update(parent_keys) + needed_keys.update(parent_keys) + for p in parent_keys: + refcounts[p] = setdefault(p, 0) + 1 + just_parents.difference_update(parent_map) + # Remove any parents that are actually ghosts from the needed set + self.present_parents = set(self.vf.get_parent_map(just_parents)) + self.ghost_parents = just_parents.difference(self.present_parents) + needed_keys.difference_update(self.ghost_parents) + self.needed_keys = needed_keys + self.refcounts = refcounts + return needed_keys, refcounts + + def _compute_diff(self, key, parent_lines, lines): + """Compute a single mp_diff, and store it in self._diffs""" + if len(parent_lines) > 0: + # XXX: _extract_blocks is not usefully defined anywhere... + # It was meant to extract the left-parent diff without + # having to recompute it for Knit content (pack-0.92, + # etc). That seems to have regressed somewhere + left_parent_blocks = self.vf._extract_blocks(key, + parent_lines[0], lines) + else: + left_parent_blocks = None + diff = multiparent.MultiParent.from_lines(lines, + parent_lines, left_parent_blocks) + self.diffs[key] = diff + + def _process_one_record(self, key, this_chunks): + parent_keys = None + if key in self.parent_map: + # This record should be ready to diff, since we requested + # content in 'topological' order + parent_keys = self.parent_map.pop(key) + # If a VersionedFile claims 'no-graph' support, then it may return + # None for any parent request, so we replace it with an empty tuple + if parent_keys is None: + parent_keys = () + parent_lines = [] + for p in parent_keys: + # Alternatively we could check p not in self.needed_keys, but + # ghost_parents should be tiny versus huge + if p in self.ghost_parents: + continue + refcount = self.refcounts[p] + if refcount == 1: # Last child reference + self.refcounts.pop(p) + parent_chunks = self.chunks.pop(p) + else: + self.refcounts[p] = refcount - 1 + parent_chunks = self.chunks[p] + p_lines = osutils.chunks_to_lines(parent_chunks) + # TODO: Should we cache the line form? We did the + # computation to get it, but storing it this way will + # be less memory efficient... + parent_lines.append(p_lines) + del p_lines + lines = osutils.chunks_to_lines(this_chunks) + # Since we needed the lines, we'll go ahead and cache them this way + this_chunks = lines + self._compute_diff(key, parent_lines, lines) + del lines + # Is this content required for any more children? + if key in self.refcounts: + self.chunks[key] = this_chunks + + def _extract_diffs(self): + needed_keys, refcounts = self._find_needed_keys() + for record in self.vf.get_record_stream(needed_keys, + 'topological', True): + if record.storage_kind == 'absent': + raise errors.RevisionNotPresent(record.key, self.vf) + self._process_one_record(record.key, + record.get_bytes_as('chunked')) + + def compute_diffs(self): + self._extract_diffs() + dpop = self.diffs.pop + return [dpop(k) for k in self.ordered_keys] + + +class VersionedFile(object): + """Versioned text file storage. + + A versioned file manages versions of line-based text files, + keeping track of the originating version for each line. + + To clients the "lines" of the file are represented as a list of + strings. These strings will typically have terminal newline + characters, but this is not required. In particular files commonly + do not have a newline at the end of the file. + + Texts are identified by a version-id string. + """ + + @staticmethod + def check_not_reserved_id(version_id): + revision.check_not_reserved_id(version_id) + + def copy_to(self, name, transport): + """Copy this versioned file to name on transport.""" + raise NotImplementedError(self.copy_to) + + def get_record_stream(self, versions, ordering, include_delta_closure): + """Get a stream of records for versions. + + :param versions: The versions to include. Each version is a tuple + (version,). + :param ordering: Either 'unordered' or 'topological'. A topologically + sorted stream has compression parents strictly before their + children. + :param include_delta_closure: If True then the closure across any + compression parents will be included (in the data content of the + stream, not in the emitted records). This guarantees that + 'fulltext' can be used successfully on every record. + :return: An iterator of ContentFactory objects, each of which is only + valid until the iterator is advanced. + """ + raise NotImplementedError(self.get_record_stream) + + def has_version(self, version_id): + """Returns whether version is present.""" + raise NotImplementedError(self.has_version) + + def insert_record_stream(self, stream): + """Insert a record stream into this versioned file. + + :param stream: A stream of records to insert. + :return: None + :seealso VersionedFile.get_record_stream: + """ + raise NotImplementedError + + def add_lines(self, version_id, parents, lines, parent_texts=None, + left_matching_blocks=None, nostore_sha=None, random_id=False, + check_content=True): + """Add a single text on top of the versioned file. + + Must raise RevisionAlreadyPresent if the new version is + already present in file history. + + Must raise RevisionNotPresent if any of the given parents are + not present in file history. + + :param lines: A list of lines. Each line must be a bytestring. And all + of them except the last must be terminated with \n and contain no + other \n's. The last line may either contain no \n's or a single + terminated \n. If the lines list does meet this constraint the add + routine may error or may succeed - but you will be unable to read + the data back accurately. (Checking the lines have been split + correctly is expensive and extremely unlikely to catch bugs so it + is not done at runtime unless check_content is True.) + :param parent_texts: An optional dictionary containing the opaque + representations of some or all of the parents of version_id to + allow delta optimisations. VERY IMPORTANT: the texts must be those + returned by add_lines or data corruption can be caused. + :param left_matching_blocks: a hint about which areas are common + between the text and its left-hand-parent. The format is + the SequenceMatcher.get_matching_blocks format. + :param nostore_sha: Raise ExistingContent and do not add the lines to + the versioned file if the digest of the lines matches this. + :param random_id: If True a random id has been selected rather than + an id determined by some deterministic process such as a converter + from a foreign VCS. When True the backend may choose not to check + for uniqueness of the resulting key within the versioned file, so + this should only be done when the result is expected to be unique + anyway. + :param check_content: If True, the lines supplied are verified to be + bytestrings that are correctly formed lines. + :return: The text sha1, the number of bytes in the text, and an opaque + representation of the inserted version which can be provided + back to future add_lines calls in the parent_texts dictionary. + """ + self._check_write_ok() + return self._add_lines(version_id, parents, lines, parent_texts, + left_matching_blocks, nostore_sha, random_id, check_content) + + def _add_lines(self, version_id, parents, lines, parent_texts, + left_matching_blocks, nostore_sha, random_id, check_content): + """Helper to do the class specific add_lines.""" + raise NotImplementedError(self.add_lines) + + def add_lines_with_ghosts(self, version_id, parents, lines, + parent_texts=None, nostore_sha=None, random_id=False, + check_content=True, left_matching_blocks=None): + """Add lines to the versioned file, allowing ghosts to be present. + + This takes the same parameters as add_lines and returns the same. + """ + self._check_write_ok() + return self._add_lines_with_ghosts(version_id, parents, lines, + parent_texts, nostore_sha, random_id, check_content, left_matching_blocks) + + def _add_lines_with_ghosts(self, version_id, parents, lines, parent_texts, + nostore_sha, random_id, check_content, left_matching_blocks): + """Helper to do class specific add_lines_with_ghosts.""" + raise NotImplementedError(self.add_lines_with_ghosts) + + def check(self, progress_bar=None): + """Check the versioned file for integrity.""" + raise NotImplementedError(self.check) + + def _check_lines_not_unicode(self, lines): + """Check that lines being added to a versioned file are not unicode.""" + for line in lines: + if line.__class__ is not str: + raise errors.BzrBadParameterUnicode("lines") + + def _check_lines_are_lines(self, lines): + """Check that the lines really are full lines without inline EOL.""" + for line in lines: + if '\n' in line[:-1]: + raise errors.BzrBadParameterContainsNewline("lines") + + def get_format_signature(self): + """Get a text description of the data encoding in this file. + + :since: 0.90 + """ + raise NotImplementedError(self.get_format_signature) + + def make_mpdiffs(self, version_ids): + """Create multiparent diffs for specified versions.""" + # XXX: Can't use _MPDiffGenerator just yet. This is because version_ids + # is a list of strings, not keys. And while self.get_record_stream + # is supported, it takes *keys*, while self.get_parent_map() takes + # strings... *sigh* + knit_versions = set() + knit_versions.update(version_ids) + parent_map = self.get_parent_map(version_ids) + for version_id in version_ids: + try: + knit_versions.update(parent_map[version_id]) + except KeyError: + raise errors.RevisionNotPresent(version_id, self) + # We need to filter out ghosts, because we can't diff against them. + knit_versions = set(self.get_parent_map(knit_versions).keys()) + lines = dict(zip(knit_versions, + self._get_lf_split_line_list(knit_versions))) + diffs = [] + for version_id in version_ids: + target = lines[version_id] + try: + parents = [lines[p] for p in parent_map[version_id] if p in + knit_versions] + except KeyError: + # I don't know how this could ever trigger. + # parent_map[version_id] was already triggered in the previous + # for loop, and lines[p] has the 'if p in knit_versions' check, + # so we again won't have a KeyError. + raise errors.RevisionNotPresent(version_id, self) + if len(parents) > 0: + left_parent_blocks = self._extract_blocks(version_id, + parents[0], target) + else: + left_parent_blocks = None + diffs.append(multiparent.MultiParent.from_lines(target, parents, + left_parent_blocks)) + return diffs + + def _extract_blocks(self, version_id, source, target): + return None + + def add_mpdiffs(self, records): + """Add mpdiffs to this VersionedFile. + + Records should be iterables of version, parents, expected_sha1, + mpdiff. mpdiff should be a MultiParent instance. + """ + # Does this need to call self._check_write_ok()? (IanC 20070919) + vf_parents = {} + mpvf = multiparent.MultiMemoryVersionedFile() + versions = [] + for version, parent_ids, expected_sha1, mpdiff in records: + versions.append(version) + mpvf.add_diff(mpdiff, version, parent_ids) + needed_parents = set() + for version, parent_ids, expected_sha1, mpdiff in records: + needed_parents.update(p for p in parent_ids + if not mpvf.has_version(p)) + present_parents = set(self.get_parent_map(needed_parents).keys()) + for parent_id, lines in zip(present_parents, + self._get_lf_split_line_list(present_parents)): + mpvf.add_version(lines, parent_id, []) + for (version, parent_ids, expected_sha1, mpdiff), lines in\ + zip(records, mpvf.get_line_list(versions)): + if len(parent_ids) == 1: + left_matching_blocks = list(mpdiff.get_matching_blocks(0, + mpvf.get_diff(parent_ids[0]).num_lines())) + else: + left_matching_blocks = None + try: + _, _, version_text = self.add_lines_with_ghosts(version, + parent_ids, lines, vf_parents, + left_matching_blocks=left_matching_blocks) + except NotImplementedError: + # The vf can't handle ghosts, so add lines normally, which will + # (reasonably) fail if there are ghosts in the data. + _, _, version_text = self.add_lines(version, + parent_ids, lines, vf_parents, + left_matching_blocks=left_matching_blocks) + vf_parents[version] = version_text + sha1s = self.get_sha1s(versions) + for version, parent_ids, expected_sha1, mpdiff in records: + if expected_sha1 != sha1s[version]: + raise errors.VersionedFileInvalidChecksum(version) + + def get_text(self, version_id): + """Return version contents as a text string. + + Raises RevisionNotPresent if version is not present in + file history. + """ + return ''.join(self.get_lines(version_id)) + get_string = get_text + + def get_texts(self, version_ids): + """Return the texts of listed versions as a list of strings. + + Raises RevisionNotPresent if version is not present in + file history. + """ + return [''.join(self.get_lines(v)) for v in version_ids] + + def get_lines(self, version_id): + """Return version contents as a sequence of lines. + + Raises RevisionNotPresent if version is not present in + file history. + """ + raise NotImplementedError(self.get_lines) + + def _get_lf_split_line_list(self, version_ids): + return [StringIO(t).readlines() for t in self.get_texts(version_ids)] + + def get_ancestry(self, version_ids, topo_sorted=True): + """Return a list of all ancestors of given version(s). This + will not include the null revision. + + This list will not be topologically sorted if topo_sorted=False is + passed. + + Must raise RevisionNotPresent if any of the given versions are + not present in file history.""" + if isinstance(version_ids, basestring): + version_ids = [version_ids] + raise NotImplementedError(self.get_ancestry) + + def get_ancestry_with_ghosts(self, version_ids): + """Return a list of all ancestors of given version(s). This + will not include the null revision. + + Must raise RevisionNotPresent if any of the given versions are + not present in file history. + + Ghosts that are known about will be included in ancestry list, + but are not explicitly marked. + """ + raise NotImplementedError(self.get_ancestry_with_ghosts) + + def get_parent_map(self, version_ids): + """Get a map of the parents of version_ids. + + :param version_ids: The version ids to look up parents for. + :return: A mapping from version id to parents. + """ + raise NotImplementedError(self.get_parent_map) + + def get_parents_with_ghosts(self, version_id): + """Return version names for parents of version_id. + + Will raise RevisionNotPresent if version_id is not present + in the history. + + Ghosts that are known about will be included in the parent list, + but are not explicitly marked. + """ + try: + return list(self.get_parent_map([version_id])[version_id]) + except KeyError: + raise errors.RevisionNotPresent(version_id, self) + + def annotate(self, version_id): + """Return a list of (version-id, line) tuples for version_id. + + :raise RevisionNotPresent: If the given version is + not present in file history. + """ + raise NotImplementedError(self.annotate) + + def iter_lines_added_or_present_in_versions(self, version_ids=None, + pb=None): + """Iterate over the lines in the versioned file from version_ids. + + This may return lines from other versions. Each item the returned + iterator yields is a tuple of a line and a text version that that line + is present in (not introduced in). + + Ordering of results is in whatever order is most suitable for the + underlying storage format. + + If a progress bar is supplied, it may be used to indicate progress. + The caller is responsible for cleaning up progress bars (because this + is an iterator). + + NOTES: Lines are normalised: they will all have \n terminators. + Lines are returned in arbitrary order. + + :return: An iterator over (line, version_id). + """ + raise NotImplementedError(self.iter_lines_added_or_present_in_versions) + + def plan_merge(self, ver_a, ver_b): + """Return pseudo-annotation indicating how the two versions merge. + + This is computed between versions a and b and their common + base. + + Weave lines present in none of them are skipped entirely. + + Legend: + killed-base Dead in base revision + killed-both Killed in each revision + killed-a Killed in a + killed-b Killed in b + unchanged Alive in both a and b (possibly created in both) + new-a Created in a + new-b Created in b + ghost-a Killed in a, unborn in b + ghost-b Killed in b, unborn in a + irrelevant Not in either revision + """ + raise NotImplementedError(VersionedFile.plan_merge) + + def weave_merge(self, plan, a_marker=TextMerge.A_MARKER, + b_marker=TextMerge.B_MARKER): + return PlanWeaveMerge(plan, a_marker, b_marker).merge_lines()[0] + + +class RecordingVersionedFilesDecorator(object): + """A minimal versioned files that records calls made on it. + + Only enough methods have been added to support tests using it to date. + + :ivar calls: A list of the calls made; can be reset at any time by + assigning [] to it. + """ + + def __init__(self, backing_vf): + """Create a RecordingVersionedFilesDecorator decorating backing_vf. + + :param backing_vf: The versioned file to answer all methods. + """ + self._backing_vf = backing_vf + self.calls = [] + + def add_lines(self, key, parents, lines, parent_texts=None, + left_matching_blocks=None, nostore_sha=None, random_id=False, + check_content=True): + self.calls.append(("add_lines", key, parents, lines, parent_texts, + left_matching_blocks, nostore_sha, random_id, check_content)) + return self._backing_vf.add_lines(key, parents, lines, parent_texts, + left_matching_blocks, nostore_sha, random_id, check_content) + + def check(self): + self._backing_vf.check() + + def get_parent_map(self, keys): + self.calls.append(("get_parent_map", copy(keys))) + return self._backing_vf.get_parent_map(keys) + + def get_record_stream(self, keys, sort_order, include_delta_closure): + self.calls.append(("get_record_stream", list(keys), sort_order, + include_delta_closure)) + return self._backing_vf.get_record_stream(keys, sort_order, + include_delta_closure) + + def get_sha1s(self, keys): + self.calls.append(("get_sha1s", copy(keys))) + return self._backing_vf.get_sha1s(keys) + + def iter_lines_added_or_present_in_keys(self, keys, pb=None): + self.calls.append(("iter_lines_added_or_present_in_keys", copy(keys))) + return self._backing_vf.iter_lines_added_or_present_in_keys(keys, pb=pb) + + def keys(self): + self.calls.append(("keys",)) + return self._backing_vf.keys() + + +class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator): + """A VF that records calls, and returns keys in specific order. + + :ivar calls: A list of the calls made; can be reset at any time by + assigning [] to it. + """ + + def __init__(self, backing_vf, key_priority): + """Create a RecordingVersionedFilesDecorator decorating backing_vf. + + :param backing_vf: The versioned file to answer all methods. + :param key_priority: A dictionary defining what order keys should be + returned from an 'unordered' get_record_stream request. + Keys with lower priority are returned first, keys not present in + the map get an implicit priority of 0, and are returned in + lexicographical order. + """ + RecordingVersionedFilesDecorator.__init__(self, backing_vf) + self._key_priority = key_priority + + def get_record_stream(self, keys, sort_order, include_delta_closure): + self.calls.append(("get_record_stream", list(keys), sort_order, + include_delta_closure)) + if sort_order == 'unordered': + def sort_key(key): + return (self._key_priority.get(key, 0), key) + # Use a defined order by asking for the keys one-by-one from the + # backing_vf + for key in sorted(keys, key=sort_key): + for record in self._backing_vf.get_record_stream([key], + 'unordered', include_delta_closure): + yield record + else: + for record in self._backing_vf.get_record_stream(keys, sort_order, + include_delta_closure): + yield record + + +class KeyMapper(object): + """KeyMappers map between keys and underlying partitioned storage.""" + + def map(self, key): + """Map key to an underlying storage identifier. + + :param key: A key tuple e.g. ('file-id', 'revision-id'). + :return: An underlying storage identifier, specific to the partitioning + mechanism. + """ + raise NotImplementedError(self.map) + + def unmap(self, partition_id): + """Map a partitioned storage id back to a key prefix. + + :param partition_id: The underlying partition id. + :return: As much of a key (or prefix) as is derivable from the partition + id. + """ + raise NotImplementedError(self.unmap) + + +class ConstantMapper(KeyMapper): + """A key mapper that maps to a constant result.""" + + def __init__(self, result): + """Create a ConstantMapper which will return result for all maps.""" + self._result = result + + def map(self, key): + """See KeyMapper.map().""" + return self._result + + +class URLEscapeMapper(KeyMapper): + """Base class for use with transport backed storage. + + This provides a map and unmap wrapper that respectively url escape and + unescape their outputs and inputs. + """ + + def map(self, key): + """See KeyMapper.map().""" + return urlutils.quote(self._map(key)) + + def unmap(self, partition_id): + """See KeyMapper.unmap().""" + return self._unmap(urlutils.unquote(partition_id)) + + +class PrefixMapper(URLEscapeMapper): + """A key mapper that extracts the first component of a key. + + This mapper is for use with a transport based backend. + """ + + def _map(self, key): + """See KeyMapper.map().""" + return key[0] + + def _unmap(self, partition_id): + """See KeyMapper.unmap().""" + return (partition_id,) + + +class HashPrefixMapper(URLEscapeMapper): + """A key mapper that combines the first component of a key with a hash. + + This mapper is for use with a transport based backend. + """ + + def _map(self, key): + """See KeyMapper.map().""" + prefix = self._escape(key[0]) + return "%02x/%s" % (adler32(prefix) & 0xff, prefix) + + def _escape(self, prefix): + """No escaping needed here.""" + return prefix + + def _unmap(self, partition_id): + """See KeyMapper.unmap().""" + return (self._unescape(osutils.basename(partition_id)),) + + def _unescape(self, basename): + """No unescaping needed for HashPrefixMapper.""" + return basename + + +class HashEscapedPrefixMapper(HashPrefixMapper): + """Combines the escaped first component of a key with a hash. + + This mapper is for use with a transport based backend. + """ + + _safe = "abcdefghijklmnopqrstuvwxyz0123456789-_@,." + + def _escape(self, prefix): + """Turn a key element into a filesystem safe string. + + This is similar to a plain urlutils.quote, except + it uses specific safe characters, so that it doesn't + have to translate a lot of valid file ids. + """ + # @ does not get escaped. This is because it is a valid + # filesystem character we use all the time, and it looks + # a lot better than seeing %40 all the time. + r = [((c in self._safe) and c or ('%%%02x' % ord(c))) + for c in prefix] + return ''.join(r) + + def _unescape(self, basename): + """Escaped names are easily unescaped by urlutils.""" + return urlutils.unquote(basename) + + +def make_versioned_files_factory(versioned_file_factory, mapper): + """Create a ThunkedVersionedFiles factory. + + This will create a callable which when called creates a + ThunkedVersionedFiles on a transport, using mapper to access individual + versioned files, and versioned_file_factory to create each individual file. + """ + def factory(transport): + return ThunkedVersionedFiles(transport, versioned_file_factory, mapper, + lambda:True) + return factory + + +class VersionedFiles(object): + """Storage for many versioned files. + + This object allows a single keyspace for accessing the history graph and + contents of named bytestrings. + + Currently no implementation allows the graph of different key prefixes to + intersect, but the API does allow such implementations in the future. + + The keyspace is expressed via simple tuples. Any instance of VersionedFiles + may have a different length key-size, but that size will be constant for + all texts added to or retrieved from it. For instance, bzrlib uses + instances with a key-size of 2 for storing user files in a repository, with + the first element the fileid, and the second the version of that file. + + The use of tuples allows a single code base to support several different + uses with only the mapping logic changing from instance to instance. + + :ivar _immediate_fallback_vfs: For subclasses that support stacking, + this is a list of other VersionedFiles immediately underneath this + one. They may in turn each have further fallbacks. + """ + + def add_lines(self, key, parents, lines, parent_texts=None, + left_matching_blocks=None, nostore_sha=None, random_id=False, + check_content=True): + """Add a text to the store. + + :param key: The key tuple of the text to add. If the last element is + None, a CHK string will be generated during the addition. + :param parents: The parents key tuples of the text to add. + :param lines: A list of lines. Each line must be a bytestring. And all + of them except the last must be terminated with \n and contain no + other \n's. The last line may either contain no \n's or a single + terminating \n. If the lines list does meet this constraint the add + routine may error or may succeed - but you will be unable to read + the data back accurately. (Checking the lines have been split + correctly is expensive and extremely unlikely to catch bugs so it + is not done at runtime unless check_content is True.) + :param parent_texts: An optional dictionary containing the opaque + representations of some or all of the parents of version_id to + allow delta optimisations. VERY IMPORTANT: the texts must be those + returned by add_lines or data corruption can be caused. + :param left_matching_blocks: a hint about which areas are common + between the text and its left-hand-parent. The format is + the SequenceMatcher.get_matching_blocks format. + :param nostore_sha: Raise ExistingContent and do not add the lines to + the versioned file if the digest of the lines matches this. + :param random_id: If True a random id has been selected rather than + an id determined by some deterministic process such as a converter + from a foreign VCS. When True the backend may choose not to check + for uniqueness of the resulting key within the versioned file, so + this should only be done when the result is expected to be unique + anyway. + :param check_content: If True, the lines supplied are verified to be + bytestrings that are correctly formed lines. + :return: The text sha1, the number of bytes in the text, and an opaque + representation of the inserted version which can be provided + back to future add_lines calls in the parent_texts dictionary. + """ + raise NotImplementedError(self.add_lines) + + def _add_text(self, key, parents, text, nostore_sha=None, random_id=False): + """Add a text to the store. + + This is a private function for use by VersionedFileCommitBuilder. + + :param key: The key tuple of the text to add. If the last element is + None, a CHK string will be generated during the addition. + :param parents: The parents key tuples of the text to add. + :param text: A string containing the text to be committed. + :param nostore_sha: Raise ExistingContent and do not add the lines to + the versioned file if the digest of the lines matches this. + :param random_id: If True a random id has been selected rather than + an id determined by some deterministic process such as a converter + from a foreign VCS. When True the backend may choose not to check + for uniqueness of the resulting key within the versioned file, so + this should only be done when the result is expected to be unique + anyway. + :param check_content: If True, the lines supplied are verified to be + bytestrings that are correctly formed lines. + :return: The text sha1, the number of bytes in the text, and an opaque + representation of the inserted version which can be provided + back to future _add_text calls in the parent_texts dictionary. + """ + # The default implementation just thunks over to .add_lines(), + # inefficient, but it works. + return self.add_lines(key, parents, osutils.split_lines(text), + nostore_sha=nostore_sha, + random_id=random_id, + check_content=True) + + def add_mpdiffs(self, records): + """Add mpdiffs to this VersionedFile. + + Records should be iterables of version, parents, expected_sha1, + mpdiff. mpdiff should be a MultiParent instance. + """ + vf_parents = {} + mpvf = multiparent.MultiMemoryVersionedFile() + versions = [] + for version, parent_ids, expected_sha1, mpdiff in records: + versions.append(version) + mpvf.add_diff(mpdiff, version, parent_ids) + needed_parents = set() + for version, parent_ids, expected_sha1, mpdiff in records: + needed_parents.update(p for p in parent_ids + if not mpvf.has_version(p)) + # It seems likely that adding all the present parents as fulltexts can + # easily exhaust memory. + chunks_to_lines = osutils.chunks_to_lines + for record in self.get_record_stream(needed_parents, 'unordered', + True): + if record.storage_kind == 'absent': + continue + mpvf.add_version(chunks_to_lines(record.get_bytes_as('chunked')), + record.key, []) + for (key, parent_keys, expected_sha1, mpdiff), lines in\ + zip(records, mpvf.get_line_list(versions)): + if len(parent_keys) == 1: + left_matching_blocks = list(mpdiff.get_matching_blocks(0, + mpvf.get_diff(parent_keys[0]).num_lines())) + else: + left_matching_blocks = None + version_sha1, _, version_text = self.add_lines(key, + parent_keys, lines, vf_parents, + left_matching_blocks=left_matching_blocks) + if version_sha1 != expected_sha1: + raise errors.VersionedFileInvalidChecksum(version) + vf_parents[key] = version_text + + def annotate(self, key): + """Return a list of (version-key, line) tuples for the text of key. + + :raise RevisionNotPresent: If the key is not present. + """ + raise NotImplementedError(self.annotate) + + def check(self, progress_bar=None): + """Check this object for integrity. + + :param progress_bar: A progress bar to output as the check progresses. + :param keys: Specific keys within the VersionedFiles to check. When + this parameter is not None, check() becomes a generator as per + get_record_stream. The difference to get_record_stream is that + more or deeper checks will be performed. + :return: None, or if keys was supplied a generator as per + get_record_stream. + """ + raise NotImplementedError(self.check) + + @staticmethod + def check_not_reserved_id(version_id): + revision.check_not_reserved_id(version_id) + + def clear_cache(self): + """Clear whatever caches this VersionedFile holds. + + This is generally called after an operation has been performed, when we + don't expect to be using this versioned file again soon. + """ + + def _check_lines_not_unicode(self, lines): + """Check that lines being added to a versioned file are not unicode.""" + for line in lines: + if line.__class__ is not str: + raise errors.BzrBadParameterUnicode("lines") + + def _check_lines_are_lines(self, lines): + """Check that the lines really are full lines without inline EOL.""" + for line in lines: + if '\n' in line[:-1]: + raise errors.BzrBadParameterContainsNewline("lines") + + def get_known_graph_ancestry(self, keys): + """Get a KnownGraph instance with the ancestry of keys.""" + # most basic implementation is a loop around get_parent_map + pending = set(keys) + parent_map = {} + while pending: + this_parent_map = self.get_parent_map(pending) + parent_map.update(this_parent_map) + pending = set() + map(pending.update, this_parent_map.itervalues()) + pending = pending.difference(parent_map) + kg = _mod_graph.KnownGraph(parent_map) + return kg + + def get_parent_map(self, keys): + """Get a map of the parents of keys. + + :param keys: The keys to look up parents for. + :return: A mapping from keys to parents. Absent keys are absent from + the mapping. + """ + raise NotImplementedError(self.get_parent_map) + + def get_record_stream(self, keys, ordering, include_delta_closure): + """Get a stream of records for keys. + + :param keys: The keys to include. + :param ordering: Either 'unordered' or 'topological'. A topologically + sorted stream has compression parents strictly before their + children. + :param include_delta_closure: If True then the closure across any + compression parents will be included (in the opaque data). + :return: An iterator of ContentFactory objects, each of which is only + valid until the iterator is advanced. + """ + raise NotImplementedError(self.get_record_stream) + + def get_sha1s(self, keys): + """Get the sha1's of the texts for the given keys. + + :param keys: The names of the keys to lookup + :return: a dict from key to sha1 digest. Keys of texts which are not + present in the store are not present in the returned + dictionary. + """ + raise NotImplementedError(self.get_sha1s) + + has_key = index._has_key_from_parent_map + + def get_missing_compression_parent_keys(self): + """Return an iterable of keys of missing compression parents. + + Check this after calling insert_record_stream to find out if there are + any missing compression parents. If there are, the records that + depend on them are not able to be inserted safely. The precise + behaviour depends on the concrete VersionedFiles class in use. + + Classes that do not support this will raise NotImplementedError. + """ + raise NotImplementedError(self.get_missing_compression_parent_keys) + + def insert_record_stream(self, stream): + """Insert a record stream into this container. + + :param stream: A stream of records to insert. + :return: None + :seealso VersionedFile.get_record_stream: + """ + raise NotImplementedError + + def iter_lines_added_or_present_in_keys(self, keys, pb=None): + """Iterate over the lines in the versioned files from keys. + + This may return lines from other keys. Each item the returned + iterator yields is a tuple of a line and a text version that that line + is present in (not introduced in). + + Ordering of results is in whatever order is most suitable for the + underlying storage format. + + If a progress bar is supplied, it may be used to indicate progress. + The caller is responsible for cleaning up progress bars (because this + is an iterator). + + NOTES: + * Lines are normalised by the underlying store: they will all have \n + terminators. + * Lines are returned in arbitrary order. + + :return: An iterator over (line, key). + """ + raise NotImplementedError(self.iter_lines_added_or_present_in_keys) + + def keys(self): + """Return a iterable of the keys for all the contained texts.""" + raise NotImplementedError(self.keys) + + def make_mpdiffs(self, keys): + """Create multiparent diffs for specified keys.""" + generator = _MPDiffGenerator(self, keys) + return generator.compute_diffs() + + def get_annotator(self): + return annotate.Annotator(self) + + missing_keys = index._missing_keys_from_parent_map + + def _extract_blocks(self, version_id, source, target): + return None + + def _transitive_fallbacks(self): + """Return the whole stack of fallback versionedfiles. + + This VersionedFiles may have a list of fallbacks, but it doesn't + necessarily know about the whole stack going down, and it can't know + at open time because they may change after the objects are opened. + """ + all_fallbacks = [] + for a_vfs in self._immediate_fallback_vfs: + all_fallbacks.append(a_vfs) + all_fallbacks.extend(a_vfs._transitive_fallbacks()) + return all_fallbacks + + +class ThunkedVersionedFiles(VersionedFiles): + """Storage for many versioned files thunked onto a 'VersionedFile' class. + + This object allows a single keyspace for accessing the history graph and + contents of named bytestrings. + + Currently no implementation allows the graph of different key prefixes to + intersect, but the API does allow such implementations in the future. + """ + + def __init__(self, transport, file_factory, mapper, is_locked): + """Create a ThunkedVersionedFiles.""" + self._transport = transport + self._file_factory = file_factory + self._mapper = mapper + self._is_locked = is_locked + + def add_lines(self, key, parents, lines, parent_texts=None, + left_matching_blocks=None, nostore_sha=None, random_id=False, + check_content=True): + """See VersionedFiles.add_lines().""" + path = self._mapper.map(key) + version_id = key[-1] + parents = [parent[-1] for parent in parents] + vf = self._get_vf(path) + try: + try: + return vf.add_lines_with_ghosts(version_id, parents, lines, + parent_texts=parent_texts, + left_matching_blocks=left_matching_blocks, + nostore_sha=nostore_sha, random_id=random_id, + check_content=check_content) + except NotImplementedError: + return vf.add_lines(version_id, parents, lines, + parent_texts=parent_texts, + left_matching_blocks=left_matching_blocks, + nostore_sha=nostore_sha, random_id=random_id, + check_content=check_content) + except errors.NoSuchFile: + # parent directory may be missing, try again. + self._transport.mkdir(osutils.dirname(path)) + try: + return vf.add_lines_with_ghosts(version_id, parents, lines, + parent_texts=parent_texts, + left_matching_blocks=left_matching_blocks, + nostore_sha=nostore_sha, random_id=random_id, + check_content=check_content) + except NotImplementedError: + return vf.add_lines(version_id, parents, lines, + parent_texts=parent_texts, + left_matching_blocks=left_matching_blocks, + nostore_sha=nostore_sha, random_id=random_id, + check_content=check_content) + + def annotate(self, key): + """Return a list of (version-key, line) tuples for the text of key. + + :raise RevisionNotPresent: If the key is not present. + """ + prefix = key[:-1] + path = self._mapper.map(prefix) + vf = self._get_vf(path) + origins = vf.annotate(key[-1]) + result = [] + for origin, line in origins: + result.append((prefix + (origin,), line)) + return result + + def check(self, progress_bar=None, keys=None): + """See VersionedFiles.check().""" + # XXX: This is over-enthusiastic but as we only thunk for Weaves today + # this is tolerable. Ideally we'd pass keys down to check() and + # have the older VersiondFile interface updated too. + for prefix, vf in self._iter_all_components(): + vf.check() + if keys is not None: + return self.get_record_stream(keys, 'unordered', True) + + def get_parent_map(self, keys): + """Get a map of the parents of keys. + + :param keys: The keys to look up parents for. + :return: A mapping from keys to parents. Absent keys are absent from + the mapping. + """ + prefixes = self._partition_keys(keys) + result = {} + for prefix, suffixes in prefixes.items(): + path = self._mapper.map(prefix) + vf = self._get_vf(path) + parent_map = vf.get_parent_map(suffixes) + for key, parents in parent_map.items(): + result[prefix + (key,)] = tuple( + prefix + (parent,) for parent in parents) + return result + + def _get_vf(self, path): + if not self._is_locked(): + raise errors.ObjectNotLocked(self) + return self._file_factory(path, self._transport, create=True, + get_scope=lambda:None) + + def _partition_keys(self, keys): + """Turn keys into a dict of prefix:suffix_list.""" + result = {} + for key in keys: + prefix_keys = result.setdefault(key[:-1], []) + prefix_keys.append(key[-1]) + return result + + def _get_all_prefixes(self): + # Identify all key prefixes. + # XXX: A bit hacky, needs polish. + if type(self._mapper) == ConstantMapper: + paths = [self._mapper.map(())] + prefixes = [()] + else: + relpaths = set() + for quoted_relpath in self._transport.iter_files_recursive(): + path, ext = os.path.splitext(quoted_relpath) + relpaths.add(path) + paths = list(relpaths) + prefixes = [self._mapper.unmap(path) for path in paths] + return zip(paths, prefixes) + + def get_record_stream(self, keys, ordering, include_delta_closure): + """See VersionedFiles.get_record_stream().""" + # Ordering will be taken care of by each partitioned store; group keys + # by partition. + keys = sorted(keys) + for prefix, suffixes, vf in self._iter_keys_vf(keys): + suffixes = [(suffix,) for suffix in suffixes] + for record in vf.get_record_stream(suffixes, ordering, + include_delta_closure): + if record.parents is not None: + record.parents = tuple( + prefix + parent for parent in record.parents) + record.key = prefix + record.key + yield record + + def _iter_keys_vf(self, keys): + prefixes = self._partition_keys(keys) + sha1s = {} + for prefix, suffixes in prefixes.items(): + path = self._mapper.map(prefix) + vf = self._get_vf(path) + yield prefix, suffixes, vf + + def get_sha1s(self, keys): + """See VersionedFiles.get_sha1s().""" + sha1s = {} + for prefix,suffixes, vf in self._iter_keys_vf(keys): + vf_sha1s = vf.get_sha1s(suffixes) + for suffix, sha1 in vf_sha1s.iteritems(): + sha1s[prefix + (suffix,)] = sha1 + return sha1s + + def insert_record_stream(self, stream): + """Insert a record stream into this container. + + :param stream: A stream of records to insert. + :return: None + :seealso VersionedFile.get_record_stream: + """ + for record in stream: + prefix = record.key[:-1] + key = record.key[-1:] + if record.parents is not None: + parents = [parent[-1:] for parent in record.parents] + else: + parents = None + thunk_record = AdapterFactory(key, parents, record) + path = self._mapper.map(prefix) + # Note that this parses the file many times; we can do better but + # as this only impacts weaves in terms of performance, it is + # tolerable. + vf = self._get_vf(path) + vf.insert_record_stream([thunk_record]) + + def iter_lines_added_or_present_in_keys(self, keys, pb=None): + """Iterate over the lines in the versioned files from keys. + + This may return lines from other keys. Each item the returned + iterator yields is a tuple of a line and a text version that that line + is present in (not introduced in). + + Ordering of results is in whatever order is most suitable for the + underlying storage format. + + If a progress bar is supplied, it may be used to indicate progress. + The caller is responsible for cleaning up progress bars (because this + is an iterator). + + NOTES: + * Lines are normalised by the underlying store: they will all have \n + terminators. + * Lines are returned in arbitrary order. + + :return: An iterator over (line, key). + """ + for prefix, suffixes, vf in self._iter_keys_vf(keys): + for line, version in vf.iter_lines_added_or_present_in_versions(suffixes): + yield line, prefix + (version,) + + def _iter_all_components(self): + for path, prefix in self._get_all_prefixes(): + yield prefix, self._get_vf(path) + + def keys(self): + """See VersionedFiles.keys().""" + result = set() + for prefix, vf in self._iter_all_components(): + for suffix in vf.versions(): + result.add(prefix + (suffix,)) + return result + + +class VersionedFilesWithFallbacks(VersionedFiles): + + def without_fallbacks(self): + """Return a clone of this object without any fallbacks configured.""" + raise NotImplementedError(self.without_fallbacks) + + def add_fallback_versioned_files(self, a_versioned_files): + """Add a source of texts for texts not present in this knit. + + :param a_versioned_files: A VersionedFiles object. + """ + raise NotImplementedError(self.add_fallback_versioned_files) + + def get_known_graph_ancestry(self, keys): + """Get a KnownGraph instance with the ancestry of keys.""" + parent_map, missing_keys = self._index.find_ancestry(keys) + for fallback in self._transitive_fallbacks(): + if not missing_keys: + break + (f_parent_map, f_missing_keys) = fallback._index.find_ancestry( + missing_keys) + parent_map.update(f_parent_map) + missing_keys = f_missing_keys + kg = _mod_graph.KnownGraph(parent_map) + return kg + + +class _PlanMergeVersionedFile(VersionedFiles): + """A VersionedFile for uncommitted and committed texts. + + It is intended to allow merges to be planned with working tree texts. + It implements only the small part of the VersionedFiles interface used by + PlanMerge. It falls back to multiple versionedfiles for data not stored in + _PlanMergeVersionedFile itself. + + :ivar: fallback_versionedfiles a list of VersionedFiles objects that can be + queried for missing texts. + """ + + def __init__(self, file_id): + """Create a _PlanMergeVersionedFile. + + :param file_id: Used with _PlanMerge code which is not yet fully + tuple-keyspace aware. + """ + self._file_id = file_id + # fallback locations + self.fallback_versionedfiles = [] + # Parents for locally held keys. + self._parents = {} + # line data for locally held keys. + self._lines = {} + # key lookup providers + self._providers = [_mod_graph.DictParentsProvider(self._parents)] + + def plan_merge(self, ver_a, ver_b, base=None): + """See VersionedFile.plan_merge""" + from bzrlib.merge import _PlanMerge + if base is None: + return _PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge() + old_plan = list(_PlanMerge(ver_a, base, self, (self._file_id,)).plan_merge()) + new_plan = list(_PlanMerge(ver_a, ver_b, self, (self._file_id,)).plan_merge()) + return _PlanMerge._subtract_plans(old_plan, new_plan) + + def plan_lca_merge(self, ver_a, ver_b, base=None): + from bzrlib.merge import _PlanLCAMerge + graph = _mod_graph.Graph(self) + new_plan = _PlanLCAMerge(ver_a, ver_b, self, (self._file_id,), graph).plan_merge() + if base is None: + return new_plan + old_plan = _PlanLCAMerge(ver_a, base, self, (self._file_id,), graph).plan_merge() + return _PlanLCAMerge._subtract_plans(list(old_plan), list(new_plan)) + + def add_lines(self, key, parents, lines): + """See VersionedFiles.add_lines + + Lines are added locally, not to fallback versionedfiles. Also, ghosts + are permitted. Only reserved ids are permitted. + """ + if type(key) is not tuple: + raise TypeError(key) + if not revision.is_reserved_id(key[-1]): + raise ValueError('Only reserved ids may be used') + if parents is None: + raise ValueError('Parents may not be None') + if lines is None: + raise ValueError('Lines may not be None') + self._parents[key] = tuple(parents) + self._lines[key] = lines + + def get_record_stream(self, keys, ordering, include_delta_closure): + pending = set(keys) + for key in keys: + if key in self._lines: + lines = self._lines[key] + parents = self._parents[key] + pending.remove(key) + yield ChunkedContentFactory(key, parents, None, lines) + for versionedfile in self.fallback_versionedfiles: + for record in versionedfile.get_record_stream( + pending, 'unordered', True): + if record.storage_kind == 'absent': + continue + else: + pending.remove(record.key) + yield record + if not pending: + return + # report absent entries + for key in pending: + yield AbsentContentFactory(key) + + def get_parent_map(self, keys): + """See VersionedFiles.get_parent_map""" + # We create a new provider because a fallback may have been added. + # If we make fallbacks private we can update a stack list and avoid + # object creation thrashing. + keys = set(keys) + result = {} + if revision.NULL_REVISION in keys: + keys.remove(revision.NULL_REVISION) + result[revision.NULL_REVISION] = () + self._providers = self._providers[:1] + self.fallback_versionedfiles + result.update( + _mod_graph.StackedParentsProvider( + self._providers).get_parent_map(keys)) + for key, parents in result.iteritems(): + if parents == (): + result[key] = (revision.NULL_REVISION,) + return result + + +class PlanWeaveMerge(TextMerge): + """Weave merge that takes a plan as its input. + + This exists so that VersionedFile.plan_merge is implementable. + Most callers will want to use WeaveMerge instead. + """ + + def __init__(self, plan, a_marker=TextMerge.A_MARKER, + b_marker=TextMerge.B_MARKER): + TextMerge.__init__(self, a_marker, b_marker) + self.plan = list(plan) + + def _merge_struct(self): + lines_a = [] + lines_b = [] + ch_a = ch_b = False + + def outstanding_struct(): + if not lines_a and not lines_b: + return + elif ch_a and not ch_b: + # one-sided change: + yield(lines_a,) + elif ch_b and not ch_a: + yield (lines_b,) + elif lines_a == lines_b: + yield(lines_a,) + else: + yield (lines_a, lines_b) + + # We previously considered either 'unchanged' or 'killed-both' lines + # to be possible places to resynchronize. However, assuming agreement + # on killed-both lines may be too aggressive. -- mbp 20060324 + for state, line in self.plan: + if state == 'unchanged': + # resync and flush queued conflicts changes if any + for struct in outstanding_struct(): + yield struct + lines_a = [] + lines_b = [] + ch_a = ch_b = False + + if state == 'unchanged': + if line: + yield ([line],) + elif state == 'killed-a': + ch_a = True + lines_b.append(line) + elif state == 'killed-b': + ch_b = True + lines_a.append(line) + elif state == 'new-a': + ch_a = True + lines_a.append(line) + elif state == 'new-b': + ch_b = True + lines_b.append(line) + elif state == 'conflicted-a': + ch_b = ch_a = True + lines_a.append(line) + elif state == 'conflicted-b': + ch_b = ch_a = True + lines_b.append(line) + elif state == 'killed-both': + # This counts as a change, even though there is no associated + # line + ch_b = ch_a = True + else: + if state not in ('irrelevant', 'ghost-a', 'ghost-b', + 'killed-base'): + raise AssertionError(state) + for struct in outstanding_struct(): + yield struct + + def base_from_plan(self): + """Construct a BASE file from the plan text.""" + base_lines = [] + for state, line in self.plan: + if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'): + # If unchanged, then this line is straight from base. If a or b + # or both killed the line, then it *used* to be in base. + base_lines.append(line) + else: + if state not in ('killed-base', 'irrelevant', + 'ghost-a', 'ghost-b', + 'new-a', 'new-b', + 'conflicted-a', 'conflicted-b'): + # killed-base, irrelevant means it doesn't apply + # ghost-a/ghost-b are harder to say for sure, but they + # aren't in the 'inc_c' which means they aren't in the + # shared base of a & b. So we don't include them. And + # obviously if the line is newly inserted, it isn't in base + + # If 'conflicted-a' or b, then it is new vs one base, but + # old versus another base. However, if we make it present + # in the base, it will be deleted from the target, and it + # seems better to get a line doubled in the merge result, + # rather than have it deleted entirely. + # Example, each node is the 'text' at that point: + # MN + # / \ + # MaN MbN + # | X | + # MabN MbaN + # \ / + # ??? + # There was a criss-cross conflict merge. Both sides + # include the other, but put themselves first. + # Weave marks this as a 'clean' merge, picking OTHER over + # THIS. (Though the details depend on order inserted into + # weave, etc.) + # LCA generates a plan: + # [('unchanged', M), + # ('conflicted-b', b), + # ('unchanged', a), + # ('conflicted-a', b), + # ('unchanged', N)] + # If you mark 'conflicted-*' as part of BASE, then a 3-way + # merge tool will cleanly generate "MaN" (as BASE vs THIS + # removes one 'b', and BASE vs OTHER removes the other) + # If you include neither, 3-way creates a clean "MbabN" as + # THIS adds one 'b', and OTHER does too. + # It seems that having the line 2 times is better than + # having it omitted. (Easier to manually delete than notice + # it needs to be added.) + raise AssertionError('Unknown state: %s' % (state,)) + return base_lines + + +class WeaveMerge(PlanWeaveMerge): + """Weave merge that takes a VersionedFile and two versions as its input.""" + + def __init__(self, versionedfile, ver_a, ver_b, + a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER): + plan = versionedfile.plan_merge(ver_a, ver_b) + PlanWeaveMerge.__init__(self, plan, a_marker, b_marker) + + +class VirtualVersionedFiles(VersionedFiles): + """Dummy implementation for VersionedFiles that uses other functions for + obtaining fulltexts and parent maps. + + This is always on the bottom of the stack and uses string keys + (rather than tuples) internally. + """ + + def __init__(self, get_parent_map, get_lines): + """Create a VirtualVersionedFiles. + + :param get_parent_map: Same signature as Repository.get_parent_map. + :param get_lines: Should return lines for specified key or None if + not available. + """ + super(VirtualVersionedFiles, self).__init__() + self._get_parent_map = get_parent_map + self._get_lines = get_lines + + def check(self, progressbar=None): + """See VersionedFiles.check. + + :note: Always returns True for VirtualVersionedFiles. + """ + return True + + def add_mpdiffs(self, records): + """See VersionedFiles.mpdiffs. + + :note: Not implemented for VirtualVersionedFiles. + """ + raise NotImplementedError(self.add_mpdiffs) + + def get_parent_map(self, keys): + """See VersionedFiles.get_parent_map.""" + return dict([((k,), tuple([(p,) for p in v])) + for k,v in self._get_parent_map([k for (k,) in keys]).iteritems()]) + + def get_sha1s(self, keys): + """See VersionedFiles.get_sha1s.""" + ret = {} + for (k,) in keys: + lines = self._get_lines(k) + if lines is not None: + if not isinstance(lines, list): + raise AssertionError + ret[(k,)] = osutils.sha_strings(lines) + return ret + + def get_record_stream(self, keys, ordering, include_delta_closure): + """See VersionedFiles.get_record_stream.""" + for (k,) in list(keys): + lines = self._get_lines(k) + if lines is not None: + if not isinstance(lines, list): + raise AssertionError + yield ChunkedContentFactory((k,), None, + sha1=osutils.sha_strings(lines), + chunks=lines) + else: + yield AbsentContentFactory((k,)) + + def iter_lines_added_or_present_in_keys(self, keys, pb=None): + """See VersionedFile.iter_lines_added_or_present_in_versions().""" + for i, (key,) in enumerate(keys): + if pb is not None: + pb.update("Finding changed lines", i, len(keys)) + for l in self._get_lines(key): + yield (l, key) + + +class NoDupeAddLinesDecorator(object): + """Decorator for a VersionedFiles that skips doing an add_lines if the key + is already present. + """ + + def __init__(self, store): + self._store = store + + def add_lines(self, key, parents, lines, parent_texts=None, + left_matching_blocks=None, nostore_sha=None, random_id=False, + check_content=True): + """See VersionedFiles.add_lines. + + This implementation may return None as the third element of the return + value when the original store wouldn't. + """ + if nostore_sha: + raise NotImplementedError( + "NoDupeAddLinesDecorator.add_lines does not implement the " + "nostore_sha behaviour.") + if key[-1] is None: + sha1 = osutils.sha_strings(lines) + key = ("sha1:" + sha1,) + else: + sha1 = None + if key in self._store.get_parent_map([key]): + # This key has already been inserted, so don't do it again. + if sha1 is None: + sha1 = osutils.sha_strings(lines) + return sha1, sum(map(len, lines)), None + return self._store.add_lines(key, parents, lines, + parent_texts=parent_texts, + left_matching_blocks=left_matching_blocks, + nostore_sha=nostore_sha, random_id=random_id, + check_content=check_content) + + def __getattr__(self, name): + return getattr(self._store, name) + + +def network_bytes_to_kind_and_offset(network_bytes): + """Strip of a record kind from the front of network_bytes. + + :param network_bytes: The bytes of a record. + :return: A tuple (storage_kind, offset_of_remaining_bytes) + """ + line_end = network_bytes.find('\n') + storage_kind = network_bytes[:line_end] + return storage_kind, line_end + 1 + + +class NetworkRecordStream(object): + """A record_stream which reconstitures a serialised stream.""" + + def __init__(self, bytes_iterator): + """Create a NetworkRecordStream. + + :param bytes_iterator: An iterator of bytes. Each item in this + iterator should have been obtained from a record_streams' + record.get_bytes_as(record.storage_kind) call. + """ + self._bytes_iterator = bytes_iterator + self._kind_factory = { + 'fulltext': fulltext_network_to_record, + 'groupcompress-block': groupcompress.network_block_to_records, + 'knit-ft-gz': knit.knit_network_to_record, + 'knit-delta-gz': knit.knit_network_to_record, + 'knit-annotated-ft-gz': knit.knit_network_to_record, + 'knit-annotated-delta-gz': knit.knit_network_to_record, + 'knit-delta-closure': knit.knit_delta_closure_to_records, + } + + def read(self): + """Read the stream. + + :return: An iterator as per VersionedFiles.get_record_stream(). + """ + for bytes in self._bytes_iterator: + storage_kind, line_end = network_bytes_to_kind_and_offset(bytes) + for record in self._kind_factory[storage_kind]( + storage_kind, bytes, line_end): + yield record + + +def fulltext_network_to_record(kind, bytes, line_end): + """Convert a network fulltext record to record.""" + meta_len, = struct.unpack('!L', bytes[line_end:line_end+4]) + record_meta = bytes[line_end+4:line_end+4+meta_len] + key, parents = bencode.bdecode_as_tuple(record_meta) + if parents == 'nil': + parents = None + fulltext = bytes[line_end+4+meta_len:] + return [FulltextContentFactory(key, parents, None, fulltext)] + + +def _length_prefix(bytes): + return struct.pack('!L', len(bytes)) + + +def record_to_fulltext_bytes(record): + if record.parents is None: + parents = 'nil' + else: + parents = record.parents + record_meta = bencode.bencode((record.key, parents)) + record_content = record.get_bytes_as('fulltext') + return "fulltext\n%s%s%s" % ( + _length_prefix(record_meta), record_meta, record_content) + + +def sort_groupcompress(parent_map): + """Sort and group the keys in parent_map into groupcompress order. + + groupcompress is defined (currently) as reverse-topological order, grouped + by the key prefix. + + :return: A sorted-list of keys + """ + # gc-optimal ordering is approximately reverse topological, + # properly grouped by file-id. + per_prefix_map = {} + for item in parent_map.iteritems(): + key = item[0] + if isinstance(key, str) or len(key) == 1: + prefix = '' + else: + prefix = key[0] + try: + per_prefix_map[prefix].append(item) + except KeyError: + per_prefix_map[prefix] = [item] + + present_keys = [] + for prefix in sorted(per_prefix_map): + present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix]))) + return present_keys + + +class _KeyRefs(object): + + def __init__(self, track_new_keys=False): + # dict mapping 'key' to 'set of keys referring to that key' + self.refs = {} + if track_new_keys: + # set remembering all new keys + self.new_keys = set() + else: + self.new_keys = None + + def clear(self): + if self.refs: + self.refs.clear() + if self.new_keys: + self.new_keys.clear() + + def add_references(self, key, refs): + # Record the new references + for referenced in refs: + try: + needed_by = self.refs[referenced] + except KeyError: + needed_by = self.refs[referenced] = set() + needed_by.add(key) + # Discard references satisfied by the new key + self.add_key(key) + + def get_new_keys(self): + return self.new_keys + + def get_unsatisfied_refs(self): + return self.refs.iterkeys() + + def _satisfy_refs_for_key(self, key): + try: + del self.refs[key] + except KeyError: + # No keys depended on this key. That's ok. + pass + + def add_key(self, key): + # satisfy refs for key, and remember that we've seen this key. + self._satisfy_refs_for_key(key) + if self.new_keys is not None: + self.new_keys.add(key) + + def satisfy_refs_for_keys(self, keys): + for key in keys: + self._satisfy_refs_for_key(key) + + def get_referrers(self): + result = set() + for referrers in self.refs.itervalues(): + result.update(referrers) + return result + + + |