summaryrefslogtreecommitdiff
path: root/bzrlib/_chk_map_py.py
blob: 211736e9d7ac4b42ac56e6210a1d244e36d8f44a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# Copyright (C) 2009, 2010 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""Python implementation of _search_key functions, etc."""

from __future__ import absolute_import

import zlib
import struct

from bzrlib.static_tuple import StaticTuple

_LeafNode = None
_InternalNode = None
_unknown = None

def _crc32(bit):
    # Depending on python version and platform, zlib.crc32 will return either a
    # signed (<= 2.5 >= 3.0) or an unsigned (2.5, 2.6).
    # http://docs.python.org/library/zlib.html recommends using a mask to force
    # an unsigned value to ensure the same numeric value (unsigned) is obtained
    # across all python versions and platforms.
    # Note: However, on 32-bit platforms this causes an upcast to PyLong, which
    #       are generally slower than PyInts. However, if performance becomes
    #       critical, we should probably write the whole thing as an extension
    #       anyway.
    #       Though we really don't need that 32nd bit of accuracy. (even 2**24
    #       is probably enough node fan out for realistic trees.)
    return zlib.crc32(bit)&0xFFFFFFFF


def _search_key_16(key):
    """Map the key tuple into a search key string which has 16-way fan out."""
    return '\x00'.join(['%08X' % _crc32(bit) for bit in key])


def _search_key_255(key):
    """Map the key tuple into a search key string which has 255-way fan out.

    We use 255-way because '\n' is used as a delimiter, and causes problems
    while parsing.
    """
    bytes = '\x00'.join([struct.pack('>L', _crc32(bit)) for bit in key])
    return bytes.replace('\n', '_')


def _deserialise_leaf_node(bytes, key, search_key_func=None):
    """Deserialise bytes, with key key, into a LeafNode.

    :param bytes: The bytes of the node.
    :param key: The key that the serialised node has.
    """
    global _unknown, _LeafNode, _InternalNode
    if _LeafNode is None:
        from bzrlib import chk_map
        _unknown = chk_map._unknown
        _LeafNode = chk_map.LeafNode
        _InternalNode = chk_map.InternalNode
    result = _LeafNode(search_key_func=search_key_func)
    # Splitlines can split on '\r' so don't use it, split('\n') adds an
    # extra '' if the bytes ends in a final newline.
    lines = bytes.split('\n')
    trailing = lines.pop()
    if trailing != '':
        raise AssertionError('We did not have a final newline for %s'
                             % (key,))
    items = {}
    if lines[0] != 'chkleaf:':
        raise ValueError("not a serialised leaf node: %r" % bytes)
    maximum_size = int(lines[1])
    width = int(lines[2])
    length = int(lines[3])
    prefix = lines[4]
    pos = 5
    while pos < len(lines):
        line = prefix + lines[pos]
        elements = line.split('\x00')
        pos += 1
        if len(elements) != width + 1:
            raise AssertionError(
                'Incorrect number of elements (%d vs %d) for: %r'
                % (len(elements), width + 1, line))
        num_value_lines = int(elements[-1])
        value_lines = lines[pos:pos+num_value_lines]
        pos += num_value_lines
        value = '\n'.join(value_lines)
        items[StaticTuple.from_sequence(elements[:-1])] = value
    if len(items) != length:
        raise AssertionError("item count (%d) mismatch for key %s,"
            " bytes %r" % (length, key, bytes))
    result._items = items
    result._len = length
    result._maximum_size = maximum_size
    result._key = key
    result._key_width = width
    result._raw_size = (sum(map(len, lines[5:])) # the length of the suffix
        + (length)*(len(prefix))
        + (len(lines)-5))
    if not items:
        result._search_prefix = None
        result._common_serialised_prefix = None
    else:
        result._search_prefix = _unknown
        result._common_serialised_prefix = prefix
    if len(bytes) != result._current_size():
        raise AssertionError('_current_size computed incorrectly')
    return result


def _deserialise_internal_node(bytes, key, search_key_func=None):
    global _unknown, _LeafNode, _InternalNode
    if _InternalNode is None:
        from bzrlib import chk_map
        _unknown = chk_map._unknown
        _LeafNode = chk_map.LeafNode
        _InternalNode = chk_map.InternalNode
    result = _InternalNode(search_key_func=search_key_func)
    # Splitlines can split on '\r' so don't use it, remove the extra ''
    # from the result of split('\n') because we should have a trailing
    # newline
    lines = bytes.split('\n')
    if lines[-1] != '':
        raise ValueError("last line must be ''")
    lines.pop(-1)
    items = {}
    if lines[0] != 'chknode:':
        raise ValueError("not a serialised internal node: %r" % bytes)
    maximum_size = int(lines[1])
    width = int(lines[2])
    length = int(lines[3])
    common_prefix = lines[4]
    for line in lines[5:]:
        line = common_prefix + line
        prefix, flat_key = line.rsplit('\x00', 1)
        items[prefix] = StaticTuple(flat_key,)
    if len(items) == 0:
        raise AssertionError("We didn't find any item for %s" % key)
    result._items = items
    result._len = length
    result._maximum_size = maximum_size
    result._key = key
    result._key_width = width
    # XXX: InternalNodes don't really care about their size, and this will
    #      change if we add prefix compression
    result._raw_size = None # len(bytes)
    result._node_width = len(prefix)
    result._search_prefix = common_prefix
    return result


def _bytes_to_text_key(bytes):
    """Take a CHKInventory value string and return a (file_id, rev_id) tuple"""
    sections = bytes.split('\n')
    kind, file_id = sections[0].split(': ')
    return (intern(file_id), intern(sections[3]))