diff options
Diffstat (limited to 'bzrlib/_dirstate_helpers_py.py')
-rw-r--r-- | bzrlib/_dirstate_helpers_py.py | 319 |
1 files changed, 319 insertions, 0 deletions
diff --git a/bzrlib/_dirstate_helpers_py.py b/bzrlib/_dirstate_helpers_py.py new file mode 100644 index 0000000..1bf1e43 --- /dev/null +++ b/bzrlib/_dirstate_helpers_py.py @@ -0,0 +1,319 @@ +# Copyright (C) 2007, 2008 Canonical Ltd +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +"""Python implementations of Dirstate Helper functions.""" + +from __future__ import absolute_import + +import binascii +import os +import struct + +# We cannot import the dirstate module, because it loads this module +# All we really need is the IN_MEMORY_MODIFIED constant +from bzrlib import errors +from bzrlib.dirstate import DirState + + +def pack_stat(st, _b64=binascii.b2a_base64, _pack=struct.Struct('>6L').pack): + """Convert stat values into a packed representation + + Not all of the fields from the stat included are strictly needed, and by + just encoding the mtime and mode a slight speed increase could be gained. + However, using the pyrex version instead is a bigger win. + """ + # base64 encoding always adds a final newline, so strip it off + return _b64(_pack(st.st_size & 0xFFFFFFFF, int(st.st_mtime) & 0xFFFFFFFF, + int(st.st_ctime) & 0xFFFFFFFF, st.st_dev & 0xFFFFFFFF, + st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1] + + +def _unpack_stat(packed_stat): + """Turn a packed_stat back into the stat fields. + + This is meant as a debugging tool, should not be used in real code. + """ + (st_size, st_mtime, st_ctime, st_dev, st_ino, + st_mode) = struct.unpack('>6L', binascii.a2b_base64(packed_stat)) + return dict(st_size=st_size, st_mtime=st_mtime, st_ctime=st_ctime, + st_dev=st_dev, st_ino=st_ino, st_mode=st_mode) + + +def _bisect_path_left(paths, path): + """Return the index where to insert path into paths. + + This uses the dirblock sorting. So all children in a directory come before + the children of children. For example:: + + a/ + b/ + c + d/ + e + b-c + d-e + a-a + a=c + + Will be sorted as:: + + a + a-a + a=c + a/b + a/b-c + a/d + a/d-e + a/b/c + a/d/e + + :param paths: A list of paths to search through + :param path: A single path to insert + :return: An offset where 'path' can be inserted. + :seealso: bisect.bisect_left + """ + hi = len(paths) + lo = 0 + while lo < hi: + mid = (lo + hi) // 2 + # Grab the dirname for the current dirblock + cur = paths[mid] + if _cmp_path_by_dirblock(cur, path) < 0: + lo = mid + 1 + else: + hi = mid + return lo + + +def _bisect_path_right(paths, path): + """Return the index where to insert path into paths. + + This uses a path-wise comparison so we get:: + a + a-b + a=b + a/b + Rather than:: + a + a-b + a/b + a=b + :param paths: A list of paths to search through + :param path: A single path to insert + :return: An offset where 'path' can be inserted. + :seealso: bisect.bisect_right + """ + hi = len(paths) + lo = 0 + while lo < hi: + mid = (lo+hi)//2 + # Grab the dirname for the current dirblock + cur = paths[mid] + if _cmp_path_by_dirblock(path, cur) < 0: + hi = mid + else: + lo = mid + 1 + return lo + + +def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache={}): + """Return the index where to insert dirname into the dirblocks. + + The return value idx is such that all directories blocks in dirblock[:idx] + have names < dirname, and all blocks in dirblock[idx:] have names >= + dirname. + + Optional args lo (default 0) and hi (default len(dirblocks)) bound the + slice of a to be searched. + """ + if hi is None: + hi = len(dirblocks) + try: + dirname_split = cache[dirname] + except KeyError: + dirname_split = dirname.split('/') + cache[dirname] = dirname_split + while lo < hi: + mid = (lo + hi) // 2 + # Grab the dirname for the current dirblock + cur = dirblocks[mid][0] + try: + cur_split = cache[cur] + except KeyError: + cur_split = cur.split('/') + cache[cur] = cur_split + if cur_split < dirname_split: lo = mid + 1 + else: hi = mid + return lo + + +def cmp_by_dirs(path1, path2): + """Compare two paths directory by directory. + + This is equivalent to doing:: + + cmp(path1.split('/'), path2.split('/')) + + The idea is that you should compare path components separately. This + differs from plain ``cmp(path1, path2)`` for paths like ``'a-b'`` and + ``a/b``. "a-b" comes after "a" but would come before "a/b" lexically. + + :param path1: first path + :param path2: second path + :return: negative number if ``path1`` comes first, + 0 if paths are equal, + and positive number if ``path2`` sorts first + """ + if not isinstance(path1, str): + raise TypeError("'path1' must be a plain string, not %s: %r" + % (type(path1), path1)) + if not isinstance(path2, str): + raise TypeError("'path2' must be a plain string, not %s: %r" + % (type(path2), path2)) + return cmp(path1.split('/'), path2.split('/')) + + +def _cmp_path_by_dirblock(path1, path2): + """Compare two paths based on what directory they are in. + + This generates a sort order, such that all children of a directory are + sorted together, and grandchildren are in the same order as the + children appear. But all grandchildren come after all children. + + :param path1: first path + :param path2: the second path + :return: negative number if ``path1`` comes first, + 0 if paths are equal + and a positive number if ``path2`` sorts first + """ + if not isinstance(path1, str): + raise TypeError("'path1' must be a plain string, not %s: %r" + % (type(path1), path1)) + if not isinstance(path2, str): + raise TypeError("'path2' must be a plain string, not %s: %r" + % (type(path2), path2)) + dirname1, basename1 = os.path.split(path1) + key1 = (dirname1.split('/'), basename1) + dirname2, basename2 = os.path.split(path2) + key2 = (dirname2.split('/'), basename2) + return cmp(key1, key2) + + +def _read_dirblocks(state): + """Read in the dirblocks for the given DirState object. + + This is tightly bound to the DirState internal representation. It should be + thought of as a member function, which is only separated out so that we can + re-write it in pyrex. + + :param state: A DirState object. + :return: None + """ + state._state_file.seek(state._end_of_header) + text = state._state_file.read() + # TODO: check the crc checksums. crc_measured = zlib.crc32(text) + + fields = text.split('\0') + # Remove the last blank entry + trailing = fields.pop() + if trailing != '': + raise errors.DirstateCorrupt(state, + 'trailing garbage: %r' % (trailing,)) + # consider turning fields into a tuple. + + # skip the first field which is the trailing null from the header. + cur = 1 + # Each line now has an extra '\n' field which is not used + # so we just skip over it + # entry size: + # 3 fields for the key + # + number of fields per tree_data (5) * tree count + # + newline + num_present_parents = state._num_present_parents() + tree_count = 1 + num_present_parents + entry_size = state._fields_per_entry() + expected_field_count = entry_size * state._num_entries + field_count = len(fields) + # this checks our adjustment, and also catches file too short. + if field_count - cur != expected_field_count: + raise errors.DirstateCorrupt(state, + 'field count incorrect %s != %s, entry_size=%s, '\ + 'num_entries=%s fields=%r' % ( + field_count - cur, expected_field_count, entry_size, + state._num_entries, fields)) + + if num_present_parents == 1: + # Bind external functions to local names + _int = int + # We access all fields in order, so we can just iterate over + # them. Grab an straight iterator over the fields. (We use an + # iterator because we don't want to do a lot of additions, nor + # do we want to do a lot of slicing) + next = iter(fields).next + # Move the iterator to the current position + for x in xrange(cur): + next() + # The two blocks here are deliberate: the root block and the + # contents-of-root block. + state._dirblocks = [('', []), ('', [])] + current_block = state._dirblocks[0][1] + current_dirname = '' + append_entry = current_block.append + for count in xrange(state._num_entries): + dirname = next() + name = next() + file_id = next() + if dirname != current_dirname: + # new block - different dirname + current_block = [] + current_dirname = dirname + state._dirblocks.append((current_dirname, current_block)) + append_entry = current_block.append + # we know current_dirname == dirname, so re-use it to avoid + # creating new strings + entry = ((current_dirname, name, file_id), + [(# Current Tree + next(), # minikind + next(), # fingerprint + _int(next()), # size + next() == 'y', # executable + next(), # packed_stat or revision_id + ), + ( # Parent 1 + next(), # minikind + next(), # fingerprint + _int(next()), # size + next() == 'y', # executable + next(), # packed_stat or revision_id + ), + ]) + trailing = next() + if trailing != '\n': + raise ValueError("trailing garbage in dirstate: %r" % trailing) + # append the entry to the current block + append_entry(entry) + state._split_root_dirblock_into_contents() + else: + fields_to_entry = state._get_fields_to_entry() + entries = [fields_to_entry(fields[pos:pos+entry_size]) + for pos in xrange(cur, field_count, entry_size)] + state._entries_to_current_state(entries) + # To convert from format 2 => format 3 + # state._dirblocks = sorted(state._dirblocks, + # key=lambda blk:blk[0].split('/')) + # To convert from format 3 => format 2 + # state._dirblocks = sorted(state._dirblocks) + state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED |