# Copyright (C) 2007, 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Python implementations of Dirstate Helper functions.""" from __future__ import absolute_import import binascii import os import struct # We cannot import the dirstate module, because it loads this module # All we really need is the IN_MEMORY_MODIFIED constant from bzrlib import errors from bzrlib.dirstate import DirState def pack_stat(st, _b64=binascii.b2a_base64, _pack=struct.Struct('>6L').pack): """Convert stat values into a packed representation Not all of the fields from the stat included are strictly needed, and by just encoding the mtime and mode a slight speed increase could be gained. However, using the pyrex version instead is a bigger win. """ # base64 encoding always adds a final newline, so strip it off return _b64(_pack(st.st_size & 0xFFFFFFFF, int(st.st_mtime) & 0xFFFFFFFF, int(st.st_ctime) & 0xFFFFFFFF, st.st_dev & 0xFFFFFFFF, st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1] def _unpack_stat(packed_stat): """Turn a packed_stat back into the stat fields. This is meant as a debugging tool, should not be used in real code. """ (st_size, st_mtime, st_ctime, st_dev, st_ino, st_mode) = struct.unpack('>6L', binascii.a2b_base64(packed_stat)) return dict(st_size=st_size, st_mtime=st_mtime, st_ctime=st_ctime, st_dev=st_dev, st_ino=st_ino, st_mode=st_mode) def _bisect_path_left(paths, path): """Return the index where to insert path into paths. This uses the dirblock sorting. So all children in a directory come before the children of children. For example:: a/ b/ c d/ e b-c d-e a-a a=c Will be sorted as:: a a-a a=c a/b a/b-c a/d a/d-e a/b/c a/d/e :param paths: A list of paths to search through :param path: A single path to insert :return: An offset where 'path' can be inserted. :seealso: bisect.bisect_left """ hi = len(paths) lo = 0 while lo < hi: mid = (lo + hi) // 2 # Grab the dirname for the current dirblock cur = paths[mid] if _cmp_path_by_dirblock(cur, path) < 0: lo = mid + 1 else: hi = mid return lo def _bisect_path_right(paths, path): """Return the index where to insert path into paths. This uses a path-wise comparison so we get:: a a-b a=b a/b Rather than:: a a-b a/b a=b :param paths: A list of paths to search through :param path: A single path to insert :return: An offset where 'path' can be inserted. :seealso: bisect.bisect_right """ hi = len(paths) lo = 0 while lo < hi: mid = (lo+hi)//2 # Grab the dirname for the current dirblock cur = paths[mid] if _cmp_path_by_dirblock(path, cur) < 0: hi = mid else: lo = mid + 1 return lo def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache={}): """Return the index where to insert dirname into the dirblocks. The return value idx is such that all directories blocks in dirblock[:idx] have names < dirname, and all blocks in dirblock[idx:] have names >= dirname. Optional args lo (default 0) and hi (default len(dirblocks)) bound the slice of a to be searched. """ if hi is None: hi = len(dirblocks) try: dirname_split = cache[dirname] except KeyError: dirname_split = dirname.split('/') cache[dirname] = dirname_split while lo < hi: mid = (lo + hi) // 2 # Grab the dirname for the current dirblock cur = dirblocks[mid][0] try: cur_split = cache[cur] except KeyError: cur_split = cur.split('/') cache[cur] = cur_split if cur_split < dirname_split: lo = mid + 1 else: hi = mid return lo def cmp_by_dirs(path1, path2): """Compare two paths directory by directory. This is equivalent to doing:: cmp(path1.split('/'), path2.split('/')) The idea is that you should compare path components separately. This differs from plain ``cmp(path1, path2)`` for paths like ``'a-b'`` and ``a/b``. "a-b" comes after "a" but would come before "a/b" lexically. :param path1: first path :param path2: second path :return: negative number if ``path1`` comes first, 0 if paths are equal, and positive number if ``path2`` sorts first """ if not isinstance(path1, str): raise TypeError("'path1' must be a plain string, not %s: %r" % (type(path1), path1)) if not isinstance(path2, str): raise TypeError("'path2' must be a plain string, not %s: %r" % (type(path2), path2)) return cmp(path1.split('/'), path2.split('/')) def _cmp_path_by_dirblock(path1, path2): """Compare two paths based on what directory they are in. This generates a sort order, such that all children of a directory are sorted together, and grandchildren are in the same order as the children appear. But all grandchildren come after all children. :param path1: first path :param path2: the second path :return: negative number if ``path1`` comes first, 0 if paths are equal and a positive number if ``path2`` sorts first """ if not isinstance(path1, str): raise TypeError("'path1' must be a plain string, not %s: %r" % (type(path1), path1)) if not isinstance(path2, str): raise TypeError("'path2' must be a plain string, not %s: %r" % (type(path2), path2)) dirname1, basename1 = os.path.split(path1) key1 = (dirname1.split('/'), basename1) dirname2, basename2 = os.path.split(path2) key2 = (dirname2.split('/'), basename2) return cmp(key1, key2) def _read_dirblocks(state): """Read in the dirblocks for the given DirState object. This is tightly bound to the DirState internal representation. It should be thought of as a member function, which is only separated out so that we can re-write it in pyrex. :param state: A DirState object. :return: None """ state._state_file.seek(state._end_of_header) text = state._state_file.read() # TODO: check the crc checksums. crc_measured = zlib.crc32(text) fields = text.split('\0') # Remove the last blank entry trailing = fields.pop() if trailing != '': raise errors.DirstateCorrupt(state, 'trailing garbage: %r' % (trailing,)) # consider turning fields into a tuple. # skip the first field which is the trailing null from the header. cur = 1 # Each line now has an extra '\n' field which is not used # so we just skip over it # entry size: # 3 fields for the key # + number of fields per tree_data (5) * tree count # + newline num_present_parents = state._num_present_parents() tree_count = 1 + num_present_parents entry_size = state._fields_per_entry() expected_field_count = entry_size * state._num_entries field_count = len(fields) # this checks our adjustment, and also catches file too short. if field_count - cur != expected_field_count: raise errors.DirstateCorrupt(state, 'field count incorrect %s != %s, entry_size=%s, '\ 'num_entries=%s fields=%r' % ( field_count - cur, expected_field_count, entry_size, state._num_entries, fields)) if num_present_parents == 1: # Bind external functions to local names _int = int # We access all fields in order, so we can just iterate over # them. Grab an straight iterator over the fields. (We use an # iterator because we don't want to do a lot of additions, nor # do we want to do a lot of slicing) next = iter(fields).next # Move the iterator to the current position for x in xrange(cur): next() # The two blocks here are deliberate: the root block and the # contents-of-root block. state._dirblocks = [('', []), ('', [])] current_block = state._dirblocks[0][1] current_dirname = '' append_entry = current_block.append for count in xrange(state._num_entries): dirname = next() name = next() file_id = next() if dirname != current_dirname: # new block - different dirname current_block = [] current_dirname = dirname state._dirblocks.append((current_dirname, current_block)) append_entry = current_block.append # we know current_dirname == dirname, so re-use it to avoid # creating new strings entry = ((current_dirname, name, file_id), [(# Current Tree next(), # minikind next(), # fingerprint _int(next()), # size next() == 'y', # executable next(), # packed_stat or revision_id ), ( # Parent 1 next(), # minikind next(), # fingerprint _int(next()), # size next() == 'y', # executable next(), # packed_stat or revision_id ), ]) trailing = next() if trailing != '\n': raise ValueError("trailing garbage in dirstate: %r" % trailing) # append the entry to the current block append_entry(entry) state._split_root_dirblock_into_contents() else: fields_to_entry = state._get_fields_to_entry() entries = [fields_to_entry(fields[pos:pos+entry_size]) for pos in xrange(cur, field_count, entry_size)] state._entries_to_current_state(entries) # To convert from format 2 => format 3 # state._dirblocks = sorted(state._dirblocks, # key=lambda blk:blk[0].split('/')) # To convert from format 3 => format 2 # state._dirblocks = sorted(state._dirblocks) state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED