summaryrefslogtreecommitdiff
path: root/bzrlib/bundle/serializer/v08.py
blob: 77e3cd4dffb12cd9ea0ba7efdf6d88367cf3019f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
# Copyright (C) 2005, 2006, 2009 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""Serializer factory for reading and writing bundles.
"""

from __future__ import absolute_import

from bzrlib import (
    errors,
    ui,
    )
from bzrlib.bundle.serializer import (BundleSerializer,
                                      _get_bundle_header,
                                     )
from bzrlib.bundle.serializer import binary_diff
from bzrlib.bundle.bundle_data import (RevisionInfo, BundleInfo)
from bzrlib.diff import internal_diff
from bzrlib.revision import NULL_REVISION
from bzrlib.testament import StrictTestament
from bzrlib.timestamp import (
    format_highres_date,
    )
from bzrlib.textfile import text_file
from bzrlib.trace import mutter

bool_text = {True: 'yes', False: 'no'}


class Action(object):
    """Represent an action"""

    def __init__(self, name, parameters=None, properties=None):
        self.name = name
        if parameters is None:
            self.parameters = []
        else:
            self.parameters = parameters
        if properties is None:
            self.properties = []
        else:
            self.properties = properties

    def add_utf8_property(self, name, value):
        """Add a property whose value is currently utf8 to the action."""
        self.properties.append((name, value.decode('utf8')))

    def add_property(self, name, value):
        """Add a property to the action"""
        self.properties.append((name, value))

    def add_bool_property(self, name, value):
        """Add a boolean property to the action"""
        self.add_property(name, bool_text[value])

    def write(self, to_file):
        """Write action as to a file"""
        p_texts = [' '.join([self.name]+self.parameters)]
        for prop in self.properties:
            if len(prop) == 1:
                p_texts.append(prop[0])
            else:
                try:
                    p_texts.append('%s:%s' % prop)
                except:
                    raise repr(prop)
        text = ['=== ']
        text.append(' // '.join(p_texts))
        text_line = ''.join(text).encode('utf-8')
        available = 79
        while len(text_line) > available:
            to_file.write(text_line[:available])
            text_line = text_line[available:]
            to_file.write('\n... ')
            available = 79 - len('... ')
        to_file.write(text_line+'\n')


class BundleSerializerV08(BundleSerializer):
    def read(self, f):
        """Read the rest of the bundles from the supplied file.

        :param f: The file to read from
        :return: A list of bundles
        """
        return BundleReader(f).info

    def check_compatible(self):
        if self.source.supports_rich_root():
            raise errors.IncompatibleBundleFormat('0.8', repr(self.source))

    def write(self, source, revision_ids, forced_bases, f):
        """Write the bundless to the supplied files.

        :param source: A source for revision information
        :param revision_ids: The list of revision ids to serialize
        :param forced_bases: A dict of revision -> base that overrides default
        :param f: The file to output to
        """
        self.source = source
        self.revision_ids = revision_ids
        self.forced_bases = forced_bases
        self.to_file = f
        self.check_compatible()
        source.lock_read()
        try:
            self._write_main_header()
            pb = ui.ui_factory.nested_progress_bar()
            try:
                self._write_revisions(pb)
            finally:
                pb.finished()
        finally:
            source.unlock()

    def write_bundle(self, repository, target, base, fileobj):
        return self._write_bundle(repository, target, base, fileobj)

    def _write_main_header(self):
        """Write the header for the changes"""
        f = self.to_file
        f.write(_get_bundle_header('0.8'))
        f.write('#\n')

    def _write(self, key, value, indent=1, trailing_space_when_empty=False):
        """Write out meta information, with proper indenting, etc.

        :param trailing_space_when_empty: To work around a bug in earlier
            bundle readers, when writing an empty property, we use "prop: \n"
            rather than writing "prop:\n".
            If this parameter is True, and value is the empty string, we will
            write an extra space.
        """
        if indent < 1:
            raise ValueError('indentation must be greater than 0')
        f = self.to_file
        f.write('#' + (' ' * indent))
        f.write(key.encode('utf-8'))
        if not value:
            if trailing_space_when_empty and value == '':
                f.write(': \n')
            else:
                f.write(':\n')
        elif isinstance(value, str):
            f.write(': ')
            f.write(value)
            f.write('\n')
        elif isinstance(value, unicode):
            f.write(': ')
            f.write(value.encode('utf-8'))
            f.write('\n')
        else:
            f.write(':\n')
            for entry in value:
                f.write('#' + (' ' * (indent+2)))
                if isinstance(entry, str):
                    f.write(entry)
                else:
                    f.write(entry.encode('utf-8'))
                f.write('\n')

    def _write_revisions(self, pb):
        """Write the information for all of the revisions."""

        # Optimize for the case of revisions in order
        last_rev_id = None
        last_rev_tree = None

        i_max = len(self.revision_ids)
        for i, rev_id in enumerate(self.revision_ids):
            pb.update("Generating revision data", i, i_max)
            rev = self.source.get_revision(rev_id)
            if rev_id == last_rev_id:
                rev_tree = last_rev_tree
            else:
                rev_tree = self.source.revision_tree(rev_id)
            if rev_id in self.forced_bases:
                explicit_base = True
                base_id = self.forced_bases[rev_id]
                if base_id is None:
                    base_id = NULL_REVISION
            else:
                explicit_base = False
                if rev.parent_ids:
                    base_id = rev.parent_ids[-1]
                else:
                    base_id = NULL_REVISION

            if base_id == last_rev_id:
                base_tree = last_rev_tree
            else:
                base_tree = self.source.revision_tree(base_id)
            force_binary = (i != 0)
            self._write_revision(rev, rev_tree, base_id, base_tree,
                                 explicit_base, force_binary)

            last_rev_id = base_id
            last_rev_tree = base_tree

    def _testament_sha1(self, revision_id):
        return StrictTestament.from_revision(self.source,
                                             revision_id).as_sha1()

    def _write_revision(self, rev, rev_tree, base_rev, base_tree,
                        explicit_base, force_binary):
        """Write out the information for a revision."""
        def w(key, value):
            self._write(key, value, indent=1)

        w('message', rev.message.split('\n'))
        w('committer', rev.committer)
        w('date', format_highres_date(rev.timestamp, rev.timezone))
        self.to_file.write('\n')

        self._write_delta(rev_tree, base_tree, rev.revision_id, force_binary)

        w('revision id', rev.revision_id)
        w('sha1', self._testament_sha1(rev.revision_id))
        w('inventory sha1', rev.inventory_sha1)
        if rev.parent_ids:
            w('parent ids', rev.parent_ids)
        if explicit_base:
            w('base id', base_rev)
        if rev.properties:
            self._write('properties', None, indent=1)
            for name, value in sorted(rev.properties.items()):
                self._write(name, value, indent=3,
                            trailing_space_when_empty=True)

        # Add an extra blank space at the end
        self.to_file.write('\n')

    def _write_action(self, name, parameters, properties=None):
        if properties is None:
            properties = []
        p_texts = ['%s:%s' % v for v in properties]
        self.to_file.write('=== ')
        self.to_file.write(' '.join([name]+parameters).encode('utf-8'))
        self.to_file.write(' // '.join(p_texts).encode('utf-8'))
        self.to_file.write('\n')

    def _write_delta(self, new_tree, old_tree, default_revision_id,
                     force_binary):
        """Write out the changes between the trees."""
        DEVNULL = '/dev/null'
        old_label = ''
        new_label = ''

        def do_diff(file_id, old_path, new_path, action, force_binary):
            def tree_lines(tree, require_text=False):
                if tree.has_id(file_id):
                    tree_file = tree.get_file(file_id)
                    if require_text is True:
                        tree_file = text_file(tree_file)
                    return tree_file.readlines()
                else:
                    return []

            try:
                if force_binary:
                    raise errors.BinaryFile()
                old_lines = tree_lines(old_tree, require_text=True)
                new_lines = tree_lines(new_tree, require_text=True)
                action.write(self.to_file)
                internal_diff(old_path, old_lines, new_path, new_lines,
                              self.to_file)
            except errors.BinaryFile:
                old_lines = tree_lines(old_tree, require_text=False)
                new_lines = tree_lines(new_tree, require_text=False)
                action.add_property('encoding', 'base64')
                action.write(self.to_file)
                binary_diff(old_path, old_lines, new_path, new_lines,
                            self.to_file)

        def finish_action(action, file_id, kind, meta_modified, text_modified,
                          old_path, new_path):
            entry = new_tree.root_inventory[file_id]
            if entry.revision != default_revision_id:
                action.add_utf8_property('last-changed', entry.revision)
            if meta_modified:
                action.add_bool_property('executable', entry.executable)
            if text_modified and kind == "symlink":
                action.add_property('target', entry.symlink_target)
            if text_modified and kind == "file":
                do_diff(file_id, old_path, new_path, action, force_binary)
            else:
                action.write(self.to_file)

        delta = new_tree.changes_from(old_tree, want_unchanged=True,
                                      include_root=True)
        for path, file_id, kind in delta.removed:
            action = Action('removed', [kind, path]).write(self.to_file)

        for path, file_id, kind in delta.added:
            action = Action('added', [kind, path], [('file-id', file_id)])
            meta_modified = (kind=='file' and
                             new_tree.is_executable(file_id))
            finish_action(action, file_id, kind, meta_modified, True,
                          DEVNULL, path)

        for (old_path, new_path, file_id, kind,
             text_modified, meta_modified) in delta.renamed:
            action = Action('renamed', [kind, old_path], [(new_path,)])
            finish_action(action, file_id, kind, meta_modified, text_modified,
                          old_path, new_path)

        for (path, file_id, kind,
             text_modified, meta_modified) in delta.modified:
            action = Action('modified', [kind, path])
            finish_action(action, file_id, kind, meta_modified, text_modified,
                          path, path)

        for path, file_id, kind in delta.unchanged:
            new_rev = new_tree.get_file_revision(file_id)
            if new_rev is None:
                continue
            old_rev = old_tree.get_file_revision(file_id)
            if new_rev != old_rev:
                action = Action('modified', [new_tree.kind(file_id),
                                             new_tree.id2path(file_id)])
                action.add_utf8_property('last-changed', new_rev)
                action.write(self.to_file)


class BundleReader(object):
    """This class reads in a bundle from a file, and returns
    a Bundle object, which can then be applied against a tree.
    """
    def __init__(self, from_file):
        """Read in the bundle from the file.

        :param from_file: A file-like object (must have iterator support).
        """
        object.__init__(self)
        self.from_file = iter(from_file)
        self._next_line = None

        self.info = self._get_info()
        # We put the actual inventory ids in the footer, so that the patch
        # is easier to read for humans.
        # Unfortunately, that means we need to read everything before we
        # can create a proper bundle.
        self._read()
        self._validate()

    def _get_info(self):
        return BundleInfo08()

    def _read(self):
        self._next().next()
        while self._next_line is not None:
            if not self._read_revision_header():
                break
            if self._next_line is None:
                break
            self._read_patches()
            self._read_footer()

    def _validate(self):
        """Make sure that the information read in makes sense
        and passes appropriate checksums.
        """
        # Fill in all the missing blanks for the revisions
        # and generate the real_revisions list.
        self.info.complete_info()

    def _next(self):
        """yield the next line, but secretly
        keep 1 extra line for peeking.
        """
        for line in self.from_file:
            last = self._next_line
            self._next_line = line
            if last is not None:
                #mutter('yielding line: %r' % last)
                yield last
        last = self._next_line
        self._next_line = None
        #mutter('yielding line: %r' % last)
        yield last

    def _read_revision_header(self):
        found_something = False
        self.info.revisions.append(RevisionInfo(None))
        for line in self._next():
            # The bzr header is terminated with a blank line
            # which does not start with '#'
            if line is None or line == '\n':
                break
            if not line.startswith('#'):
                continue
            found_something = True
            self._handle_next(line)
        if not found_something:
            # Nothing was there, so remove the added revision
            self.info.revisions.pop()
        return found_something

    def _read_next_entry(self, line, indent=1):
        """Read in a key-value pair
        """
        if not line.startswith('#'):
            raise errors.MalformedHeader('Bzr header did not start with #')
        line = line[1:-1].decode('utf-8') # Remove the '#' and '\n'
        if line[:indent] == ' '*indent:
            line = line[indent:]
        if not line:
            return None, None# Ignore blank lines

        loc = line.find(': ')
        if loc != -1:
            key = line[:loc]
            value = line[loc+2:]
            if not value:
                value = self._read_many(indent=indent+2)
        elif line[-1:] == ':':
            key = line[:-1]
            value = self._read_many(indent=indent+2)
        else:
            raise errors.MalformedHeader('While looking for key: value pairs,'
                    ' did not find the colon %r' % (line))

        key = key.replace(' ', '_')
        #mutter('found %s: %s' % (key, value))
        return key, value

    def _handle_next(self, line):
        if line is None:
            return
        key, value = self._read_next_entry(line, indent=1)
        mutter('_handle_next %r => %r' % (key, value))
        if key is None:
            return

        revision_info = self.info.revisions[-1]
        if key in revision_info.__dict__:
            if getattr(revision_info, key) is None:
                if key in ('file_id', 'revision_id', 'base_id'):
                    value = value.encode('utf8')
                elif key in ('parent_ids'):
                    value = [v.encode('utf8') for v in value]
                setattr(revision_info, key, value)
            else:
                raise errors.MalformedHeader('Duplicated Key: %s' % key)
        else:
            # What do we do with a key we don't recognize
            raise errors.MalformedHeader('Unknown Key: "%s"' % key)

    def _read_many(self, indent):
        """If a line ends with no entry, that means that it should be
        followed with multiple lines of values.

        This detects the end of the list, because it will be a line that
        does not start properly indented.
        """
        values = []
        start = '#' + (' '*indent)

        if self._next_line is None or self._next_line[:len(start)] != start:
            return values

        for line in self._next():
            values.append(line[len(start):-1].decode('utf-8'))
            if self._next_line is None or self._next_line[:len(start)] != start:
                break
        return values

    def _read_one_patch(self):
        """Read in one patch, return the complete patch, along with
        the next line.

        :return: action, lines, do_continue
        """
        #mutter('_read_one_patch: %r' % self._next_line)
        # Peek and see if there are no patches
        if self._next_line is None or self._next_line.startswith('#'):
            return None, [], False

        first = True
        lines = []
        for line in self._next():
            if first:
                if not line.startswith('==='):
                    raise errors.MalformedPatches('The first line of all patches'
                        ' should be a bzr meta line "==="'
                        ': %r' % line)
                action = line[4:-1].decode('utf-8')
            elif line.startswith('... '):
                action += line[len('... '):-1].decode('utf-8')

            if (self._next_line is not None and
                self._next_line.startswith('===')):
                return action, lines, True
            elif self._next_line is None or self._next_line.startswith('#'):
                return action, lines, False

            if first:
                first = False
            elif not line.startswith('... '):
                lines.append(line)

        return action, lines, False

    def _read_patches(self):
        do_continue = True
        revision_actions = []
        while do_continue:
            action, lines, do_continue = self._read_one_patch()
            if action is not None:
                revision_actions.append((action, lines))
        if self.info.revisions[-1].tree_actions is not None:
            raise AssertionError()
        self.info.revisions[-1].tree_actions = revision_actions

    def _read_footer(self):
        """Read the rest of the meta information.

        :param first_line:  The previous step iterates past what it
                            can handle. That extra line is given here.
        """
        for line in self._next():
            self._handle_next(line)
            if self._next_line is None:
                break
            if not self._next_line.startswith('#'):
                # Consume the trailing \n and stop processing
                self._next().next()
                break

class BundleInfo08(BundleInfo):

    def _update_tree(self, bundle_tree, revision_id):
        bundle_tree.note_last_changed('', revision_id)
        BundleInfo._update_tree(self, bundle_tree, revision_id)

    def _testament_sha1_from_revision(self, repository, revision_id):
        testament = StrictTestament.from_revision(repository, revision_id)
        return testament.as_sha1()

    def _testament_sha1(self, revision, tree):
        return StrictTestament(revision, tree).as_sha1()