summaryrefslogtreecommitdiff
path: root/bzrlib/textmerge.py
blob: 4de0cdaf2512630ec3006539481dbf0e8608380f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# Copyright (C) 2006, 2009, 2010 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#
# Author: Martin Pool <mbp@canonical.com>
#         Aaron Bentley <aaron.bentley@utoronto.ca>

from __future__ import absolute_import

from bzrlib.lazy_import import lazy_import
lazy_import(globals(), """
from bzrlib import patiencediff
""")


class TextMerge(object):
    """Base class for text-mergers
    Subclasses must implement _merge_struct.

    Many methods produce or consume structured merge information.
    This is an iterable of tuples of lists of lines.
    Each tuple may have a length of 1 - 3, depending on whether the region it
    represents is conflicted.

    Unconflicted region tuples have length 1.
    Conflicted region tuples have length 2 or 3.  Index 1 is text_a, e.g. THIS.
    Index 1 is text_b, e.g. OTHER.  Index 2 is optional.  If present, it
    represents BASE.
    """
    # TODO: Show some version information (e.g. author, date) on conflicted
    # regions.
    A_MARKER = '<<<<<<< \n'
    B_MARKER = '>>>>>>> \n'
    SPLIT_MARKER = '=======\n'
    def __init__(self, a_marker=A_MARKER, b_marker=B_MARKER,
                 split_marker=SPLIT_MARKER):
        self.a_marker = a_marker
        self.b_marker = b_marker
        self.split_marker = split_marker

    def _merge_struct(self):
        """Return structured merge info.  Must be implemented by subclasses.
        See TextMerge docstring for details on the format.
        """
        raise NotImplementedError('_merge_struct is abstract')

    def struct_to_lines(self, struct_iter):
        """Convert merge result tuples to lines"""
        for lines in struct_iter:
            if len(lines) == 1:
                for line in lines[0]:
                    yield line
            else:
                yield self.a_marker
                for line in lines[0]:
                    yield line
                yield self.split_marker
                for line in lines[1]:
                    yield line
                yield self.b_marker

    def iter_useful(self, struct_iter):
        """Iterate through input tuples, skipping empty ones."""
        for group in struct_iter:
            if len(group[0]) > 0:
                yield group
            elif len(group) > 1 and len(group[1]) > 0:
                yield group

    def merge_lines(self, reprocess=False):
        """Produce an iterable of lines, suitable for writing to a file
        Returns a tuple of (line iterable, conflict indicator)
        If reprocess is True, a two-way merge will be performed on the
        intermediate structure, to reduce conflict regions.
        """
        struct = []
        conflicts = False
        for group in self.merge_struct(reprocess):
            struct.append(group)
            if len(group) > 1:
                conflicts = True
        return self.struct_to_lines(struct), conflicts

    def merge_struct(self, reprocess=False):
        """Produce structured merge info"""
        struct_iter = self.iter_useful(self._merge_struct())
        if reprocess is True:
            return self.reprocess_struct(struct_iter)
        else:
            return struct_iter

    @staticmethod
    def reprocess_struct(struct_iter):
        """ Perform a two-way merge on structural merge info.
        This reduces the size of conflict regions, but breaks the connection
        between the BASE text and the conflict region.

        This process may split a single conflict region into several smaller
        ones, but will not introduce new conflicts.
        """
        for group in struct_iter:
            if len(group) == 1:
                yield group
            else:
                for newgroup in Merge2(group[0], group[1]).merge_struct():
                    yield newgroup


class Merge2(TextMerge):
    """ Two-way merge.
    In a two way merge, common regions are shown as unconflicting, and uncommon
    regions produce conflicts.
    """

    def __init__(self, lines_a, lines_b, a_marker=TextMerge.A_MARKER,
                 b_marker=TextMerge.B_MARKER,
                 split_marker=TextMerge.SPLIT_MARKER):
        TextMerge.__init__(self, a_marker, b_marker, split_marker)
        self.lines_a = lines_a
        self.lines_b = lines_b

    def _merge_struct(self):
        """Return structured merge info.
        See TextMerge docstring.
        """
        sm = patiencediff.PatienceSequenceMatcher(
            None, self.lines_a, self.lines_b)
        pos_a = 0
        pos_b = 0
        for ai, bi, l in sm.get_matching_blocks():
            # non-matching lines
            yield(self.lines_a[pos_a:ai], self.lines_b[pos_b:bi])
            # matching lines
            yield(self.lines_a[ai:ai+l],)
            pos_a = ai + l
            pos_b = bi + l
        # final non-matching lines
        yield(self.lines_a[pos_a:-1], self.lines_b[pos_b:-1])