summaryrefslogtreecommitdiff
path: root/bzrlib/_dirstate_helpers_pyx.pyx
diff options
context:
space:
mode:
authorLorry <lorry@roadtrain.codethink.co.uk>2012-08-22 15:47:16 +0100
committerLorry <lorry@roadtrain.codethink.co.uk>2012-08-22 15:47:16 +0100
commit25335618bf8755ce6b116ee14f47f5a1f2c821e9 (patch)
treed889d7ab3f9f985d0c54c534cb8052bd2e6d7163 /bzrlib/_dirstate_helpers_pyx.pyx
downloadbzr-tarball-25335618bf8755ce6b116ee14f47f5a1f2c821e9.tar.gz
Tarball conversion
Diffstat (limited to 'bzrlib/_dirstate_helpers_pyx.pyx')
-rw-r--r--bzrlib/_dirstate_helpers_pyx.pyx2031
1 files changed, 2031 insertions, 0 deletions
diff --git a/bzrlib/_dirstate_helpers_pyx.pyx b/bzrlib/_dirstate_helpers_pyx.pyx
new file mode 100644
index 0000000..76847b4
--- /dev/null
+++ b/bzrlib/_dirstate_helpers_pyx.pyx
@@ -0,0 +1,2031 @@
+# Copyright (C) 2007-2010 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Helper functions for DirState.
+
+This is the python implementation for DirState functions.
+"""
+
+import binascii
+import bisect
+import errno
+import os
+import stat
+import sys
+
+from bzrlib import cache_utf8, errors, osutils
+from bzrlib.dirstate import DirState
+from bzrlib.osutils import parent_directories, pathjoin, splitpath
+
+
+# This is the Windows equivalent of ENOTDIR
+# It is defined in pywin32.winerror, but we don't want a strong dependency for
+# just an error code.
+# XXX: Perhaps we could get it from a windows header ?
+cdef int ERROR_PATH_NOT_FOUND
+ERROR_PATH_NOT_FOUND = 3
+cdef int ERROR_DIRECTORY
+ERROR_DIRECTORY = 267
+
+#python2.4 support, and other platform-dependent includes
+cdef extern from "python-compat.h":
+ unsigned long htonl(unsigned long)
+
+# Give Pyrex some function definitions for it to understand.
+# All of these are just hints to Pyrex, so that it can try to convert python
+# objects into similar C objects. (such as PyInt => int).
+# In anything defined 'cdef extern from XXX' the real C header will be
+# imported, and the real definition will be used from there. So these are just
+# hints, and do not need to match exactly to the C definitions.
+
+cdef extern from *:
+ ctypedef unsigned long size_t
+
+cdef extern from "_dirstate_helpers_pyx.h":
+ ctypedef int intptr_t
+
+
+
+cdef extern from "stdlib.h":
+ unsigned long int strtoul(char *nptr, char **endptr, int base)
+
+
+cdef extern from 'sys/stat.h':
+ int S_ISDIR(int mode)
+ int S_ISREG(int mode)
+ # On win32, this actually comes from "python-compat.h"
+ int S_ISLNK(int mode)
+ int S_IXUSR
+
+# These functions allow us access to a bit of the 'bare metal' of python
+# objects, rather than going through the object abstraction. (For example,
+# PyList_Append, rather than getting the 'append' attribute of the object, and
+# creating a tuple, and then using PyCallObject).
+# Functions that return (or take) a void* are meant to grab a C PyObject*. This
+# differs from the Pyrex 'object'. If you declare a variable as 'object' Pyrex
+# will automatically Py_INCREF and Py_DECREF when appropriate. But for some
+# inner loops, we don't need to do that at all, as the reference only lasts for
+# a very short time.
+# Note that the C API GetItem calls borrow references, so pyrex does the wrong
+# thing if you declare e.g. object PyList_GetItem(object lst, int index) - you
+# need to manually Py_INCREF yourself.
+cdef extern from "Python.h":
+ ctypedef int Py_ssize_t
+ ctypedef struct PyObject:
+ pass
+ int PyList_Append(object lst, object item) except -1
+ void *PyList_GetItem_object_void "PyList_GET_ITEM" (object lst, int index)
+ void *PyList_GetItem_void_void "PyList_GET_ITEM" (void * lst, int index)
+ object PyList_GET_ITEM(object lst, Py_ssize_t index)
+ int PyList_CheckExact(object)
+ Py_ssize_t PyList_GET_SIZE (object p)
+
+ void *PyTuple_GetItem_void_void "PyTuple_GET_ITEM" (void* tpl, int index)
+ object PyTuple_GetItem_void_object "PyTuple_GET_ITEM" (void* tpl, int index)
+ object PyTuple_GET_ITEM(object tpl, Py_ssize_t index)
+
+ unsigned long PyInt_AsUnsignedLongMask(object number) except? -1
+
+ char *PyString_AsString(object p)
+ char *PyString_AsString_obj "PyString_AsString" (PyObject *string)
+ char *PyString_AS_STRING_void "PyString_AS_STRING" (void *p)
+ int PyString_AsStringAndSize(object str, char **buffer, Py_ssize_t *length) except -1
+ object PyString_FromString(char *)
+ object PyString_FromStringAndSize(char *, Py_ssize_t)
+ int PyString_Size(object p)
+ int PyString_GET_SIZE_void "PyString_GET_SIZE" (void *p)
+ int PyString_CheckExact(object p)
+ void Py_INCREF(object o)
+ void Py_DECREF(object o)
+
+
+cdef extern from "string.h":
+ int strncmp(char *s1, char *s2, int len)
+ void *memchr(void *s, int c, size_t len)
+ int memcmp(void *b1, void *b2, size_t len)
+ # ??? memrchr is a GNU extension :(
+ # void *memrchr(void *s, int c, size_t len)
+
+# cimport all of the definitions we will need to access
+from _static_tuple_c cimport import_static_tuple_c, StaticTuple, \
+ StaticTuple_New, StaticTuple_SET_ITEM
+
+import_static_tuple_c()
+
+cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise
+ # memrchr seems to be a GNU extension, so we have to implement it ourselves
+ cdef char *pos
+ cdef char *start
+
+ start = <char*>s
+ pos = start + n - 1
+ while pos >= start:
+ if pos[0] == c:
+ return <void*>pos
+ pos = pos - 1
+ return NULL
+
+
+def _py_memrchr(s, c):
+ """Just to expose _my_memrchr for testing.
+
+ :param s: The Python string to search
+ :param c: The character to search for
+ :return: The offset to the last instance of 'c' in s
+ """
+ cdef void *_s
+ cdef void *found
+ cdef int length
+ cdef char *_c
+
+ _s = PyString_AsString(s)
+ length = PyString_Size(s)
+
+ _c = PyString_AsString(c)
+ assert PyString_Size(c) == 1,\
+ 'Must be a single character string, not %s' % (c,)
+ found = _my_memrchr(_s, _c[0], length)
+ if found == NULL:
+ return None
+ return <char*>found - <char*>_s
+
+
+cdef object safe_string_from_size(char *s, Py_ssize_t size):
+ if size < 0:
+ raise AssertionError(
+ 'tried to create a string with an invalid size: %d'
+ % (size))
+ return PyString_FromStringAndSize(s, size)
+
+
+cdef int _is_aligned(void *ptr): # cannot_raise
+ """Is this pointer aligned to an integer size offset?
+
+ :return: 1 if this pointer is aligned, 0 otherwise.
+ """
+ return ((<intptr_t>ptr) & ((sizeof(int))-1)) == 0
+
+
+cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2): # cannot_raise
+ cdef unsigned char *cur1
+ cdef unsigned char *cur2
+ cdef unsigned char *end1
+ cdef unsigned char *end2
+ cdef int *cur_int1
+ cdef int *cur_int2
+ cdef int *end_int1
+ cdef int *end_int2
+
+ if path1 == path2 and size1 == size2:
+ return 0
+
+ end1 = <unsigned char*>path1+size1
+ end2 = <unsigned char*>path2+size2
+
+ # Use 32-bit comparisons for the matching portion of the string.
+ # Almost all CPU's are faster at loading and comparing 32-bit integers,
+ # than they are at 8-bit integers.
+ # 99% of the time, these will be aligned, but in case they aren't just skip
+ # this loop
+ if _is_aligned(path1) and _is_aligned(path2):
+ cur_int1 = <int*>path1
+ cur_int2 = <int*>path2
+ end_int1 = <int*>(path1 + size1 - (size1 % sizeof(int)))
+ end_int2 = <int*>(path2 + size2 - (size2 % sizeof(int)))
+
+ while cur_int1 < end_int1 and cur_int2 < end_int2:
+ if cur_int1[0] != cur_int2[0]:
+ break
+ cur_int1 = cur_int1 + 1
+ cur_int2 = cur_int2 + 1
+
+ cur1 = <unsigned char*>cur_int1
+ cur2 = <unsigned char*>cur_int2
+ else:
+ cur1 = <unsigned char*>path1
+ cur2 = <unsigned char*>path2
+
+ while cur1 < end1 and cur2 < end2:
+ if cur1[0] == cur2[0]:
+ # This character matches, just go to the next one
+ cur1 = cur1 + 1
+ cur2 = cur2 + 1
+ continue
+ # The current characters do not match
+ if cur1[0] == c'/':
+ return -1 # Reached the end of path1 segment first
+ elif cur2[0] == c'/':
+ return 1 # Reached the end of path2 segment first
+ elif cur1[0] < cur2[0]:
+ return -1
+ else:
+ return 1
+
+ # We reached the end of at least one of the strings
+ if cur1 < end1:
+ return 1 # Not at the end of cur1, must be at the end of cur2
+ if cur2 < end2:
+ return -1 # At the end of cur1, but not at cur2
+ # We reached the end of both strings
+ return 0
+
+
+def cmp_by_dirs(path1, path2):
+ """Compare two paths directory by directory.
+
+ This is equivalent to doing::
+
+ cmp(path1.split('/'), path2.split('/'))
+
+ The idea is that you should compare path components separately. This
+ differs from plain ``cmp(path1, path2)`` for paths like ``'a-b'`` and
+ ``a/b``. "a-b" comes after "a" but would come before "a/b" lexically.
+
+ :param path1: first path
+ :param path2: second path
+ :return: negative number if ``path1`` comes first,
+ 0 if paths are equal,
+ and positive number if ``path2`` sorts first
+ """
+ if not PyString_CheckExact(path1):
+ raise TypeError("'path1' must be a plain string, not %s: %r"
+ % (type(path1), path1))
+ if not PyString_CheckExact(path2):
+ raise TypeError("'path2' must be a plain string, not %s: %r"
+ % (type(path2), path2))
+ return _cmp_by_dirs(PyString_AsString(path1),
+ PyString_Size(path1),
+ PyString_AsString(path2),
+ PyString_Size(path2))
+
+
+def _cmp_path_by_dirblock(path1, path2):
+ """Compare two paths based on what directory they are in.
+
+ This generates a sort order, such that all children of a directory are
+ sorted together, and grandchildren are in the same order as the
+ children appear. But all grandchildren come after all children.
+
+ In other words, all entries in a directory are sorted together, and
+ directorys are sorted in cmp_by_dirs order.
+
+ :param path1: first path
+ :param path2: the second path
+ :return: negative number if ``path1`` comes first,
+ 0 if paths are equal
+ and a positive number if ``path2`` sorts first
+ """
+ if not PyString_CheckExact(path1):
+ raise TypeError("'path1' must be a plain string, not %s: %r"
+ % (type(path1), path1))
+ if not PyString_CheckExact(path2):
+ raise TypeError("'path2' must be a plain string, not %s: %r"
+ % (type(path2), path2))
+ return _cmp_path_by_dirblock_intern(PyString_AsString(path1),
+ PyString_Size(path1),
+ PyString_AsString(path2),
+ PyString_Size(path2))
+
+
+cdef int _cmp_path_by_dirblock_intern(char *path1, int path1_len,
+ char *path2, int path2_len): # cannot_raise
+ """Compare two paths by what directory they are in.
+
+ see ``_cmp_path_by_dirblock`` for details.
+ """
+ cdef char *dirname1
+ cdef int dirname1_len
+ cdef char *dirname2
+ cdef int dirname2_len
+ cdef char *basename1
+ cdef int basename1_len
+ cdef char *basename2
+ cdef int basename2_len
+ cdef int cur_len
+ cdef int cmp_val
+
+ if path1_len == 0 and path2_len == 0:
+ return 0
+
+ if path1 == path2 and path1_len == path2_len:
+ return 0
+
+ if path1_len == 0:
+ return -1
+
+ if path2_len == 0:
+ return 1
+
+ basename1 = <char*>_my_memrchr(path1, c'/', path1_len)
+
+ if basename1 == NULL:
+ basename1 = path1
+ basename1_len = path1_len
+ dirname1 = ''
+ dirname1_len = 0
+ else:
+ dirname1 = path1
+ dirname1_len = basename1 - path1
+ basename1 = basename1 + 1
+ basename1_len = path1_len - dirname1_len - 1
+
+ basename2 = <char*>_my_memrchr(path2, c'/', path2_len)
+
+ if basename2 == NULL:
+ basename2 = path2
+ basename2_len = path2_len
+ dirname2 = ''
+ dirname2_len = 0
+ else:
+ dirname2 = path2
+ dirname2_len = basename2 - path2
+ basename2 = basename2 + 1
+ basename2_len = path2_len - dirname2_len - 1
+
+ cmp_val = _cmp_by_dirs(dirname1, dirname1_len,
+ dirname2, dirname2_len)
+ if cmp_val != 0:
+ return cmp_val
+
+ cur_len = basename1_len
+ if basename2_len < basename1_len:
+ cur_len = basename2_len
+
+ cmp_val = memcmp(basename1, basename2, cur_len)
+ if cmp_val != 0:
+ return cmp_val
+ if basename1_len == basename2_len:
+ return 0
+ if basename1_len < basename2_len:
+ return -1
+ return 1
+
+
+def _bisect_path_left(paths, path):
+ """Return the index where to insert path into paths.
+
+ This uses a path-wise comparison so we get::
+ a
+ a-b
+ a=b
+ a/b
+ Rather than::
+ a
+ a-b
+ a/b
+ a=b
+ :param paths: A list of paths to search through
+ :param path: A single path to insert
+ :return: An offset where 'path' can be inserted.
+ :seealso: bisect.bisect_left
+ """
+ cdef int _lo
+ cdef int _hi
+ cdef int _mid
+ cdef char *path_cstr
+ cdef int path_size
+ cdef char *cur_cstr
+ cdef int cur_size
+ cdef void *cur
+
+ if not PyList_CheckExact(paths):
+ raise TypeError("you must pass a python list for 'paths' not: %s %r"
+ % (type(paths), paths))
+ if not PyString_CheckExact(path):
+ raise TypeError("you must pass a string for 'path' not: %s %r"
+ % (type(path), path))
+
+ _hi = len(paths)
+ _lo = 0
+
+ path_cstr = PyString_AsString(path)
+ path_size = PyString_Size(path)
+
+ while _lo < _hi:
+ _mid = (_lo + _hi) / 2
+ cur = PyList_GetItem_object_void(paths, _mid)
+ cur_cstr = PyString_AS_STRING_void(cur)
+ cur_size = PyString_GET_SIZE_void(cur)
+ if _cmp_path_by_dirblock_intern(cur_cstr, cur_size,
+ path_cstr, path_size) < 0:
+ _lo = _mid + 1
+ else:
+ _hi = _mid
+ return _lo
+
+
+def _bisect_path_right(paths, path):
+ """Return the index where to insert path into paths.
+
+ This uses a path-wise comparison so we get::
+ a
+ a-b
+ a=b
+ a/b
+ Rather than::
+ a
+ a-b
+ a/b
+ a=b
+ :param paths: A list of paths to search through
+ :param path: A single path to insert
+ :return: An offset where 'path' can be inserted.
+ :seealso: bisect.bisect_right
+ """
+ cdef int _lo
+ cdef int _hi
+ cdef int _mid
+ cdef char *path_cstr
+ cdef int path_size
+ cdef char *cur_cstr
+ cdef int cur_size
+ cdef void *cur
+
+ if not PyList_CheckExact(paths):
+ raise TypeError("you must pass a python list for 'paths' not: %s %r"
+ % (type(paths), paths))
+ if not PyString_CheckExact(path):
+ raise TypeError("you must pass a string for 'path' not: %s %r"
+ % (type(path), path))
+
+ _hi = len(paths)
+ _lo = 0
+
+ path_cstr = PyString_AsString(path)
+ path_size = PyString_Size(path)
+
+ while _lo < _hi:
+ _mid = (_lo + _hi) / 2
+ cur = PyList_GetItem_object_void(paths, _mid)
+ cur_cstr = PyString_AS_STRING_void(cur)
+ cur_size = PyString_GET_SIZE_void(cur)
+ if _cmp_path_by_dirblock_intern(path_cstr, path_size,
+ cur_cstr, cur_size) < 0:
+ _hi = _mid
+ else:
+ _lo = _mid + 1
+ return _lo
+
+
+def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache=None):
+ """Return the index where to insert dirname into the dirblocks.
+
+ The return value idx is such that all directories blocks in dirblock[:idx]
+ have names < dirname, and all blocks in dirblock[idx:] have names >=
+ dirname.
+
+ Optional args lo (default 0) and hi (default len(dirblocks)) bound the
+ slice of a to be searched.
+ """
+ cdef int _lo
+ cdef int _hi
+ cdef int _mid
+ cdef char *dirname_cstr
+ cdef int dirname_size
+ cdef char *cur_cstr
+ cdef int cur_size
+ cdef void *cur
+
+ if not PyList_CheckExact(dirblocks):
+ raise TypeError("you must pass a python list for 'dirblocks' not: %s %r"
+ % (type(dirblocks), dirblocks))
+ if not PyString_CheckExact(dirname):
+ raise TypeError("you must pass a string for dirname not: %s %r"
+ % (type(dirname), dirname))
+ if hi is None:
+ _hi = len(dirblocks)
+ else:
+ _hi = hi
+
+ _lo = lo
+ dirname_cstr = PyString_AsString(dirname)
+ dirname_size = PyString_Size(dirname)
+
+ while _lo < _hi:
+ _mid = (_lo + _hi) / 2
+ # Grab the dirname for the current dirblock
+ # cur = dirblocks[_mid][0]
+ cur = PyTuple_GetItem_void_void(
+ PyList_GetItem_object_void(dirblocks, _mid), 0)
+ cur_cstr = PyString_AS_STRING_void(cur)
+ cur_size = PyString_GET_SIZE_void(cur)
+ if _cmp_by_dirs(cur_cstr, cur_size, dirname_cstr, dirname_size) < 0:
+ _lo = _mid + 1
+ else:
+ _hi = _mid
+ return _lo
+
+
+cdef class Reader:
+ """Maintain the current location, and return fields as you parse them."""
+
+ cdef object state # The DirState object
+ cdef object text # The overall string object
+ cdef char *text_cstr # Pointer to the beginning of text
+ cdef int text_size # Length of text
+
+ cdef char *end_cstr # End of text
+ cdef char *cur_cstr # Pointer to the current record
+ cdef char *next # Pointer to the end of this record
+
+ def __init__(self, text, state):
+ self.state = state
+ self.text = text
+ self.text_cstr = PyString_AsString(text)
+ self.text_size = PyString_Size(text)
+ self.end_cstr = self.text_cstr + self.text_size
+ self.cur_cstr = self.text_cstr
+
+ cdef char *get_next(self, int *size) except NULL:
+ """Return a pointer to the start of the next field."""
+ cdef char *next
+ cdef Py_ssize_t extra_len
+
+ if self.cur_cstr == NULL:
+ raise AssertionError('get_next() called when cur_str is NULL')
+ elif self.cur_cstr >= self.end_cstr:
+ raise AssertionError('get_next() called when there are no chars'
+ ' left')
+ next = self.cur_cstr
+ self.cur_cstr = <char*>memchr(next, c'\0', self.end_cstr - next)
+ if self.cur_cstr == NULL:
+ extra_len = self.end_cstr - next
+ raise errors.DirstateCorrupt(self.state,
+ 'failed to find trailing NULL (\\0).'
+ ' Trailing garbage: %r'
+ % safe_string_from_size(next, extra_len))
+ size[0] = self.cur_cstr - next
+ self.cur_cstr = self.cur_cstr + 1
+ return next
+
+ cdef object get_next_str(self):
+ """Get the next field as a Python string."""
+ cdef int size
+ cdef char *next
+ next = self.get_next(&size)
+ return safe_string_from_size(next, size)
+
+ cdef int _init(self) except -1:
+ """Get the pointer ready.
+
+ This assumes that the dirstate header has already been read, and we
+ already have the dirblock string loaded into memory.
+ This just initializes our memory pointers, etc for parsing of the
+ dirblock string.
+ """
+ cdef char *first
+ cdef int size
+ # The first field should be an empty string left over from the Header
+ first = self.get_next(&size)
+ if first[0] != c'\0' and size == 0:
+ raise AssertionError('First character should be null not: %s'
+ % (first,))
+ return 0
+
+ cdef object _get_entry(self, int num_trees, void **p_current_dirname,
+ int *new_block):
+ """Extract the next entry.
+
+ This parses the next entry based on the current location in
+ ``self.cur_cstr``.
+ Each entry can be considered a "row" in the total table. And each row
+ has a fixed number of columns. It is generally broken up into "key"
+ columns, then "current" columns, and then "parent" columns.
+
+ :param num_trees: How many parent trees need to be parsed
+ :param p_current_dirname: A pointer to the current PyString
+ representing the directory name.
+ We pass this in as a void * so that pyrex doesn't have to
+ increment/decrement the PyObject reference counter for each
+ _get_entry call.
+ We use a pointer so that _get_entry can update it with the new
+ value.
+ :param new_block: This is to let the caller know that it needs to
+ create a new directory block to store the next entry.
+ """
+ cdef StaticTuple path_name_file_id_key
+ cdef StaticTuple tmp
+ cdef char *entry_size_cstr
+ cdef unsigned long int entry_size
+ cdef char* executable_cstr
+ cdef int is_executable
+ cdef char* dirname_cstr
+ cdef char* trailing
+ cdef int cur_size
+ cdef int i
+ cdef object minikind
+ cdef object fingerprint
+ cdef object info
+
+ # Read the 'key' information (dirname, name, file_id)
+ dirname_cstr = self.get_next(&cur_size)
+ # Check to see if we have started a new directory block.
+ # If so, then we need to create a new dirname PyString, so that it can
+ # be used in all of the tuples. This saves time and memory, by re-using
+ # the same object repeatedly.
+
+ # Do the cheap 'length of string' check first. If the string is a
+ # different length, then we *have* to be a different directory.
+ if (cur_size != PyString_GET_SIZE_void(p_current_dirname[0])
+ or strncmp(dirname_cstr,
+ # Extract the char* from our current dirname string. We
+ # know it is a PyString, so we can use
+ # PyString_AS_STRING, we use the _void version because
+ # we are tricking Pyrex by using a void* rather than an
+ # <object>
+ PyString_AS_STRING_void(p_current_dirname[0]),
+ cur_size+1) != 0):
+ dirname = safe_string_from_size(dirname_cstr, cur_size)
+ p_current_dirname[0] = <void*>dirname
+ new_block[0] = 1
+ else:
+ new_block[0] = 0
+
+ # Build up the key that will be used.
+ # By using <object>(void *) Pyrex will automatically handle the
+ # Py_INCREF that we need.
+ cur_dirname = <object>p_current_dirname[0]
+ # Use StaticTuple_New to pre-allocate, rather than creating a regular
+ # tuple and passing it to the StaticTuple constructor.
+ # path_name_file_id_key = StaticTuple(<object>p_current_dirname[0],
+ # self.get_next_str(),
+ # self.get_next_str(),
+ # )
+ tmp = StaticTuple_New(3)
+ Py_INCREF(cur_dirname); StaticTuple_SET_ITEM(tmp, 0, cur_dirname)
+ cur_basename = self.get_next_str()
+ cur_file_id = self.get_next_str()
+ Py_INCREF(cur_basename); StaticTuple_SET_ITEM(tmp, 1, cur_basename)
+ Py_INCREF(cur_file_id); StaticTuple_SET_ITEM(tmp, 2, cur_file_id)
+ path_name_file_id_key = tmp
+
+ # Parse all of the per-tree information. current has the information in
+ # the same location as parent trees. The only difference is that 'info'
+ # is a 'packed_stat' for current, while it is a 'revision_id' for
+ # parent trees.
+ # minikind, fingerprint, and info will be returned as regular python
+ # strings
+ # entry_size and is_executable will be parsed into a python Long and
+ # python Boolean, respectively.
+ # TODO: jam 20070718 Consider changin the entry_size conversion to
+ # prefer python Int when possible. They are generally faster to
+ # work with, and it will be rare that we have a file >2GB.
+ # Especially since this code is pretty much fixed at a max of
+ # 4GB.
+ trees = []
+ for i from 0 <= i < num_trees:
+ minikind = self.get_next_str()
+ fingerprint = self.get_next_str()
+ entry_size_cstr = self.get_next(&cur_size)
+ entry_size = strtoul(entry_size_cstr, NULL, 10)
+ executable_cstr = self.get_next(&cur_size)
+ is_executable = (executable_cstr[0] == c'y')
+ info = self.get_next_str()
+ # TODO: If we want to use StaticTuple_New here we need to be pretty
+ # careful. We are relying on a bit of Pyrex
+ # automatic-conversion from 'int' to PyInt, and that doesn't
+ # play well with the StaticTuple_SET_ITEM macro.
+ # Timing doesn't (yet) show a worthwile improvement in speed
+ # versus complexity and maintainability.
+ # tmp = StaticTuple_New(5)
+ # Py_INCREF(minikind); StaticTuple_SET_ITEM(tmp, 0, minikind)
+ # Py_INCREF(fingerprint); StaticTuple_SET_ITEM(tmp, 1, fingerprint)
+ # Py_INCREF(entry_size); StaticTuple_SET_ITEM(tmp, 2, entry_size)
+ # Py_INCREF(is_executable); StaticTuple_SET_ITEM(tmp, 3, is_executable)
+ # Py_INCREF(info); StaticTuple_SET_ITEM(tmp, 4, info)
+ # PyList_Append(trees, tmp)
+ PyList_Append(trees, StaticTuple(
+ minikind, # minikind
+ fingerprint, # fingerprint
+ entry_size, # size
+ is_executable,# executable
+ info, # packed_stat or revision_id
+ ))
+
+ # The returned tuple is (key, [trees])
+ ret = (path_name_file_id_key, trees)
+ # Ignore the trailing newline, but assert that it does exist, this
+ # ensures that we always finish parsing a line on an end-of-entry
+ # marker.
+ trailing = self.get_next(&cur_size)
+ if cur_size != 1 or trailing[0] != c'\n':
+ raise errors.DirstateCorrupt(self.state,
+ 'Bad parse, we expected to end on \\n, not: %d %s: %s'
+ % (cur_size, safe_string_from_size(trailing, cur_size),
+ ret))
+ return ret
+
+ def _parse_dirblocks(self):
+ """Parse all dirblocks in the state file."""
+ cdef int num_trees
+ cdef object current_block
+ cdef object entry
+ cdef void * current_dirname
+ cdef int new_block
+ cdef int expected_entry_count
+ cdef int entry_count
+
+ num_trees = self.state._num_present_parents() + 1
+ expected_entry_count = self.state._num_entries
+
+ # Ignore the first record
+ self._init()
+
+ current_block = []
+ dirblocks = [('', current_block), ('', [])]
+ self.state._dirblocks = dirblocks
+ obj = ''
+ current_dirname = <void*>obj
+ new_block = 0
+ entry_count = 0
+
+ # TODO: jam 2007-05-07 Consider pre-allocating some space for the
+ # members, and then growing and shrinking from there. If most
+ # directories have close to 10 entries in them, it would save a
+ # few mallocs if we default our list size to something
+ # reasonable. Or we could malloc it to something large (100 or
+ # so), and then truncate. That would give us a malloc + realloc,
+ # rather than lots of reallocs.
+ while self.cur_cstr < self.end_cstr:
+ entry = self._get_entry(num_trees, &current_dirname, &new_block)
+ if new_block:
+ # new block - different dirname
+ current_block = []
+ PyList_Append(dirblocks,
+ (<object>current_dirname, current_block))
+ PyList_Append(current_block, entry)
+ entry_count = entry_count + 1
+ if entry_count != expected_entry_count:
+ raise errors.DirstateCorrupt(self.state,
+ 'We read the wrong number of entries.'
+ ' We expected to read %s, but read %s'
+ % (expected_entry_count, entry_count))
+ self.state._split_root_dirblock_into_contents()
+
+
+def _read_dirblocks(state):
+ """Read in the dirblocks for the given DirState object.
+
+ This is tightly bound to the DirState internal representation. It should be
+ thought of as a member function, which is only separated out so that we can
+ re-write it in pyrex.
+
+ :param state: A DirState object.
+ :return: None
+ :postcondition: The dirblocks will be loaded into the appropriate fields in
+ the DirState object.
+ """
+ state._state_file.seek(state._end_of_header)
+ text = state._state_file.read()
+ # TODO: check the crc checksums. crc_measured = zlib.crc32(text)
+
+ reader = Reader(text, state)
+
+ reader._parse_dirblocks()
+ state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
+
+
+cdef int minikind_from_mode(int mode): # cannot_raise
+ # in order of frequency:
+ if S_ISREG(mode):
+ return c"f"
+ if S_ISDIR(mode):
+ return c"d"
+ if S_ISLNK(mode):
+ return c"l"
+ return 0
+
+
+_encode = binascii.b2a_base64
+
+
+cdef _pack_stat(stat_value):
+ """return a string representing the stat value's key fields.
+
+ :param stat_value: A stat oject with st_size, st_mtime, st_ctime, st_dev,
+ st_ino and st_mode fields.
+ """
+ cdef char result[6*4] # 6 long ints
+ cdef int *aliased
+ aliased = <int *>result
+ aliased[0] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_size))
+ # mtime and ctime will often be floats but get converted to PyInt within
+ aliased[1] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_mtime))
+ aliased[2] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_ctime))
+ aliased[3] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_dev))
+ aliased[4] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_ino))
+ aliased[5] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_mode))
+ packed = PyString_FromStringAndSize(result, 6*4)
+ return _encode(packed)[:-1]
+
+
+def pack_stat(stat_value):
+ """Convert stat value into a packed representation quickly with pyrex"""
+ return _pack_stat(stat_value)
+
+
+def update_entry(self, entry, abspath, stat_value):
+ """Update the entry based on what is actually on disk.
+
+ This function only calculates the sha if it needs to - if the entry is
+ uncachable, or clearly different to the first parent's entry, no sha
+ is calculated, and None is returned.
+
+ :param entry: This is the dirblock entry for the file in question.
+ :param abspath: The path on disk for this file.
+ :param stat_value: (optional) if we already have done a stat on the
+ file, re-use it.
+ :return: None, or The sha1 hexdigest of the file (40 bytes) or link
+ target of a symlink.
+ """
+ return _update_entry(self, entry, abspath, stat_value)
+
+
+cdef _update_entry(self, entry, abspath, stat_value):
+ """Update the entry based on what is actually on disk.
+
+ This function only calculates the sha if it needs to - if the entry is
+ uncachable, or clearly different to the first parent's entry, no sha
+ is calculated, and None is returned.
+
+ :param self: The dirstate object this is operating on.
+ :param entry: This is the dirblock entry for the file in question.
+ :param abspath: The path on disk for this file.
+ :param stat_value: The stat value done on the path.
+ :return: None, or The sha1 hexdigest of the file (40 bytes) or link
+ target of a symlink.
+ """
+ # TODO - require pyrex 0.9.8, then use a pyd file to define access to the
+ # _st mode of the compiled stat objects.
+ cdef int minikind, saved_minikind
+ cdef void * details
+ cdef int worth_saving
+ minikind = minikind_from_mode(stat_value.st_mode)
+ if 0 == minikind:
+ return None
+ packed_stat = _pack_stat(stat_value)
+ details = PyList_GetItem_void_void(PyTuple_GetItem_void_void(<void *>entry, 1), 0)
+ saved_minikind = PyString_AsString_obj(<PyObject *>PyTuple_GetItem_void_void(details, 0))[0]
+ if minikind == c'd' and saved_minikind == c't':
+ minikind = c't'
+ saved_link_or_sha1 = PyTuple_GetItem_void_object(details, 1)
+ saved_file_size = PyTuple_GetItem_void_object(details, 2)
+ saved_executable = PyTuple_GetItem_void_object(details, 3)
+ saved_packed_stat = PyTuple_GetItem_void_object(details, 4)
+ # Deal with pyrex decrefing the objects
+ Py_INCREF(saved_link_or_sha1)
+ Py_INCREF(saved_file_size)
+ Py_INCREF(saved_executable)
+ Py_INCREF(saved_packed_stat)
+ #(saved_minikind, saved_link_or_sha1, saved_file_size,
+ # saved_executable, saved_packed_stat) = entry[1][0]
+
+ if (minikind == saved_minikind
+ and packed_stat == saved_packed_stat):
+ # The stat hasn't changed since we saved, so we can re-use the
+ # saved sha hash.
+ if minikind == c'd':
+ return None
+
+ # size should also be in packed_stat
+ if saved_file_size == stat_value.st_size:
+ return saved_link_or_sha1
+
+ # If we have gotten this far, that means that we need to actually
+ # process this entry.
+ link_or_sha1 = None
+ worth_saving = 1
+ if minikind == c'f':
+ executable = self._is_executable(stat_value.st_mode,
+ saved_executable)
+ if self._cutoff_time is None:
+ self._sha_cutoff_time()
+ if (stat_value.st_mtime < self._cutoff_time
+ and stat_value.st_ctime < self._cutoff_time
+ and len(entry[1]) > 1
+ and entry[1][1][0] != 'a'):
+ # Could check for size changes for further optimised
+ # avoidance of sha1's. However the most prominent case of
+ # over-shaing is during initial add, which this catches.
+ link_or_sha1 = self._sha1_file(abspath)
+ entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
+ executable, packed_stat)
+ else:
+ # This file is not worth caching the sha1. Either it is too new, or
+ # it is newly added. Regardless, the only things we are changing
+ # are derived from the stat, and so are not worth caching. So we do
+ # *not* set the IN_MEMORY_MODIFIED flag. (But we'll save the
+ # updated values if there is *other* data worth saving.)
+ entry[1][0] = ('f', '', stat_value.st_size, executable,
+ DirState.NULLSTAT)
+ worth_saving = 0
+ elif minikind == c'd':
+ entry[1][0] = ('d', '', 0, False, packed_stat)
+ if saved_minikind != c'd':
+ # This changed from something into a directory. Make sure we
+ # have a directory block for it. This doesn't happen very
+ # often, so this doesn't have to be super fast.
+ block_index, entry_index, dir_present, file_present = \
+ self._get_block_entry_index(entry[0][0], entry[0][1], 0)
+ self._ensure_block(block_index, entry_index,
+ pathjoin(entry[0][0], entry[0][1]))
+ else:
+ # Any changes are derived trivially from the stat object, not worth
+ # re-writing a dirstate for just this
+ worth_saving = 0
+ elif minikind == c'l':
+ if saved_minikind == c'l':
+ # If the object hasn't changed kind, it isn't worth saving the
+ # dirstate just for a symlink. The default is 'fast symlinks' which
+ # save the target in the inode entry, rather than separately. So to
+ # stat, we've already read everything off disk.
+ worth_saving = 0
+ link_or_sha1 = self._read_link(abspath, saved_link_or_sha1)
+ if self._cutoff_time is None:
+ self._sha_cutoff_time()
+ if (stat_value.st_mtime < self._cutoff_time
+ and stat_value.st_ctime < self._cutoff_time):
+ entry[1][0] = ('l', link_or_sha1, stat_value.st_size,
+ False, packed_stat)
+ else:
+ entry[1][0] = ('l', '', stat_value.st_size,
+ False, DirState.NULLSTAT)
+ if worth_saving:
+ # Note, even though _mark_modified will only set
+ # IN_MEMORY_HASH_MODIFIED, it still isn't worth
+ self._mark_modified([entry])
+ return link_or_sha1
+
+
+# TODO: Do we want to worry about exceptions here?
+cdef char _minikind_from_string(object string) except? -1:
+ """Convert a python string to a char."""
+ return PyString_AsString(string)[0]
+
+
+cdef object _kind_absent
+cdef object _kind_file
+cdef object _kind_directory
+cdef object _kind_symlink
+cdef object _kind_relocated
+cdef object _kind_tree_reference
+_kind_absent = "absent"
+_kind_file = "file"
+_kind_directory = "directory"
+_kind_symlink = "symlink"
+_kind_relocated = "relocated"
+_kind_tree_reference = "tree-reference"
+
+
+cdef object _minikind_to_kind(char minikind):
+ """Create a string kind for minikind."""
+ cdef char _minikind[1]
+ if minikind == c'f':
+ return _kind_file
+ elif minikind == c'd':
+ return _kind_directory
+ elif minikind == c'a':
+ return _kind_absent
+ elif minikind == c'r':
+ return _kind_relocated
+ elif minikind == c'l':
+ return _kind_symlink
+ elif minikind == c't':
+ return _kind_tree_reference
+ _minikind[0] = minikind
+ raise KeyError(PyString_FromStringAndSize(_minikind, 1))
+
+
+cdef int _versioned_minikind(char minikind): # cannot_raise
+ """Return non-zero if minikind is in fltd"""
+ return (minikind == c'f' or
+ minikind == c'd' or
+ minikind == c'l' or
+ minikind == c't')
+
+
+cdef class ProcessEntryC:
+
+ cdef int doing_consistency_expansion
+ cdef object old_dirname_to_file_id # dict
+ cdef object new_dirname_to_file_id # dict
+ cdef object last_source_parent
+ cdef object last_target_parent
+ cdef int include_unchanged
+ cdef int partial
+ cdef object use_filesystem_for_exec
+ cdef object utf8_decode
+ cdef readonly object searched_specific_files
+ cdef readonly object searched_exact_paths
+ cdef object search_specific_files
+ # The parents up to the root of the paths we are searching.
+ # After all normal paths are returned, these specific items are returned.
+ cdef object search_specific_file_parents
+ cdef object state
+ # Current iteration variables:
+ cdef object current_root
+ cdef object current_root_unicode
+ cdef object root_entries
+ cdef int root_entries_pos, root_entries_len
+ cdef object root_abspath
+ cdef int source_index, target_index
+ cdef int want_unversioned
+ cdef object tree
+ cdef object dir_iterator
+ cdef int block_index
+ cdef object current_block
+ cdef int current_block_pos
+ cdef object current_block_list
+ cdef object current_dir_info
+ cdef object current_dir_list
+ cdef object _pending_consistent_entries # list
+ cdef int path_index
+ cdef object root_dir_info
+ cdef object bisect_left
+ cdef object pathjoin
+ cdef object fstat
+ # A set of the ids we've output when doing partial output.
+ cdef object seen_ids
+ cdef object sha_file
+
+ def __init__(self, include_unchanged, use_filesystem_for_exec,
+ search_specific_files, state, source_index, target_index,
+ want_unversioned, tree):
+ self.doing_consistency_expansion = 0
+ self.old_dirname_to_file_id = {}
+ self.new_dirname_to_file_id = {}
+ # Are we doing a partial iter_changes?
+ self.partial = set(['']).__ne__(search_specific_files)
+ # Using a list so that we can access the values and change them in
+ # nested scope. Each one is [path, file_id, entry]
+ self.last_source_parent = [None, None]
+ self.last_target_parent = [None, None]
+ if include_unchanged is None:
+ self.include_unchanged = False
+ else:
+ self.include_unchanged = int(include_unchanged)
+ self.use_filesystem_for_exec = use_filesystem_for_exec
+ self.utf8_decode = cache_utf8._utf8_decode
+ # for all search_indexs in each path at or under each element of
+ # search_specific_files, if the detail is relocated: add the id, and
+ # add the relocated path as one to search if its not searched already.
+ # If the detail is not relocated, add the id.
+ self.searched_specific_files = set()
+ # When we search exact paths without expanding downwards, we record
+ # that here.
+ self.searched_exact_paths = set()
+ self.search_specific_files = search_specific_files
+ # The parents up to the root of the paths we are searching.
+ # After all normal paths are returned, these specific items are returned.
+ self.search_specific_file_parents = set()
+ # The ids we've sent out in the delta.
+ self.seen_ids = set()
+ self.state = state
+ self.current_root = None
+ self.current_root_unicode = None
+ self.root_entries = None
+ self.root_entries_pos = 0
+ self.root_entries_len = 0
+ self.root_abspath = None
+ if source_index is None:
+ self.source_index = -1
+ else:
+ self.source_index = source_index
+ self.target_index = target_index
+ self.want_unversioned = want_unversioned
+ self.tree = tree
+ self.dir_iterator = None
+ self.block_index = -1
+ self.current_block = None
+ self.current_block_list = None
+ self.current_block_pos = -1
+ self.current_dir_info = None
+ self.current_dir_list = None
+ self._pending_consistent_entries = []
+ self.path_index = 0
+ self.root_dir_info = None
+ self.bisect_left = bisect.bisect_left
+ self.pathjoin = osutils.pathjoin
+ self.fstat = os.fstat
+ self.sha_file = osutils.sha_file
+ if target_index != 0:
+ # A lot of code in here depends on target_index == 0
+ raise errors.BzrError('unsupported target index')
+
+ cdef _process_entry(self, entry, path_info):
+ """Compare an entry and real disk to generate delta information.
+
+ :param path_info: top_relpath, basename, kind, lstat, abspath for
+ the path of entry. If None, then the path is considered absent in
+ the target (Perhaps we should pass in a concrete entry for this ?)
+ Basename is returned as a utf8 string because we expect this
+ tuple will be ignored, and don't want to take the time to
+ decode.
+ :return: (iter_changes_result, changed). If the entry has not been
+ handled then changed is None. Otherwise it is False if no content
+ or metadata changes have occured, and True if any content or
+ metadata change has occurred. If self.include_unchanged is True then
+ if changed is not None, iter_changes_result will always be a result
+ tuple. Otherwise, iter_changes_result is None unless changed is
+ True.
+ """
+ cdef char target_minikind
+ cdef char source_minikind
+ cdef object file_id
+ cdef int content_change
+ cdef object details_list
+ file_id = None
+ details_list = entry[1]
+ if -1 == self.source_index:
+ source_details = DirState.NULL_PARENT_DETAILS
+ else:
+ source_details = details_list[self.source_index]
+ target_details = details_list[self.target_index]
+ target_minikind = _minikind_from_string(target_details[0])
+ if path_info is not None and _versioned_minikind(target_minikind):
+ if self.target_index != 0:
+ raise AssertionError("Unsupported target index %d" %
+ self.target_index)
+ link_or_sha1 = _update_entry(self.state, entry, path_info[4], path_info[3])
+ # The entry may have been modified by update_entry
+ target_details = details_list[self.target_index]
+ target_minikind = _minikind_from_string(target_details[0])
+ else:
+ link_or_sha1 = None
+ # the rest of this function is 0.3 seconds on 50K paths, or
+ # 0.000006 seconds per call.
+ source_minikind = _minikind_from_string(source_details[0])
+ if ((_versioned_minikind(source_minikind) or source_minikind == c'r')
+ and _versioned_minikind(target_minikind)):
+ # claimed content in both: diff
+ # r | fdlt | | add source to search, add id path move and perform
+ # | | | diff check on source-target
+ # r | fdlt | a | dangling file that was present in the basis.
+ # | | | ???
+ if source_minikind != c'r':
+ old_dirname = entry[0][0]
+ old_basename = entry[0][1]
+ old_path = path = None
+ else:
+ # add the source to the search path to find any children it
+ # has. TODO ? : only add if it is a container ?
+ if (not self.doing_consistency_expansion and
+ not osutils.is_inside_any(self.searched_specific_files,
+ source_details[1])):
+ self.search_specific_files.add(source_details[1])
+ # expanding from a user requested path, parent expansion
+ # for delta consistency happens later.
+ # generate the old path; this is needed for stating later
+ # as well.
+ old_path = source_details[1]
+ old_dirname, old_basename = os.path.split(old_path)
+ path = self.pathjoin(entry[0][0], entry[0][1])
+ old_entry = self.state._get_entry(self.source_index,
+ path_utf8=old_path)
+ # update the source details variable to be the real
+ # location.
+ if old_entry == (None, None):
+ raise errors.CorruptDirstate(self.state._filename,
+ "entry '%s/%s' is considered renamed from %r"
+ " but source does not exist\n"
+ "entry: %s" % (entry[0][0], entry[0][1], old_path, entry))
+ source_details = old_entry[1][self.source_index]
+ source_minikind = _minikind_from_string(source_details[0])
+ if path_info is None:
+ # the file is missing on disk, show as removed.
+ content_change = 1
+ target_kind = None
+ target_exec = False
+ else:
+ # source and target are both versioned and disk file is present.
+ target_kind = path_info[2]
+ if target_kind == 'directory':
+ if path is None:
+ old_path = path = self.pathjoin(old_dirname, old_basename)
+ file_id = entry[0][2]
+ self.new_dirname_to_file_id[path] = file_id
+ if source_minikind != c'd':
+ content_change = 1
+ else:
+ # directories have no fingerprint
+ content_change = 0
+ target_exec = False
+ elif target_kind == 'file':
+ if source_minikind != c'f':
+ content_change = 1
+ else:
+ # Check the sha. We can't just rely on the size as
+ # content filtering may mean differ sizes actually
+ # map to the same content
+ if link_or_sha1 is None:
+ # Stat cache miss:
+ statvalue, link_or_sha1 = \
+ self.state._sha1_provider.stat_and_sha1(
+ path_info[4])
+ self.state._observed_sha1(entry, link_or_sha1,
+ statvalue)
+ content_change = (link_or_sha1 != source_details[1])
+ # Target details is updated at update_entry time
+ if self.use_filesystem_for_exec:
+ # We don't need S_ISREG here, because we are sure
+ # we are dealing with a file.
+ target_exec = bool(S_IXUSR & path_info[3].st_mode)
+ else:
+ target_exec = target_details[3]
+ elif target_kind == 'symlink':
+ if source_minikind != c'l':
+ content_change = 1
+ else:
+ content_change = (link_or_sha1 != source_details[1])
+ target_exec = False
+ elif target_kind == 'tree-reference':
+ if source_minikind != c't':
+ content_change = 1
+ else:
+ content_change = 0
+ target_exec = False
+ else:
+ if path is None:
+ path = self.pathjoin(old_dirname, old_basename)
+ raise errors.BadFileKindError(path, path_info[2])
+ if source_minikind == c'd':
+ if path is None:
+ old_path = path = self.pathjoin(old_dirname, old_basename)
+ if file_id is None:
+ file_id = entry[0][2]
+ self.old_dirname_to_file_id[old_path] = file_id
+ # parent id is the entry for the path in the target tree
+ if old_basename and old_dirname == self.last_source_parent[0]:
+ # use a cached hit for non-root source entries.
+ source_parent_id = self.last_source_parent[1]
+ else:
+ try:
+ source_parent_id = self.old_dirname_to_file_id[old_dirname]
+ except KeyError, _:
+ source_parent_entry = self.state._get_entry(self.source_index,
+ path_utf8=old_dirname)
+ source_parent_id = source_parent_entry[0][2]
+ if source_parent_id == entry[0][2]:
+ # This is the root, so the parent is None
+ source_parent_id = None
+ else:
+ self.last_source_parent[0] = old_dirname
+ self.last_source_parent[1] = source_parent_id
+ new_dirname = entry[0][0]
+ if entry[0][1] and new_dirname == self.last_target_parent[0]:
+ # use a cached hit for non-root target entries.
+ target_parent_id = self.last_target_parent[1]
+ else:
+ try:
+ target_parent_id = self.new_dirname_to_file_id[new_dirname]
+ except KeyError, _:
+ # TODO: We don't always need to do the lookup, because the
+ # parent entry will be the same as the source entry.
+ target_parent_entry = self.state._get_entry(self.target_index,
+ path_utf8=new_dirname)
+ if target_parent_entry == (None, None):
+ raise AssertionError(
+ "Could not find target parent in wt: %s\nparent of: %s"
+ % (new_dirname, entry))
+ target_parent_id = target_parent_entry[0][2]
+ if target_parent_id == entry[0][2]:
+ # This is the root, so the parent is None
+ target_parent_id = None
+ else:
+ self.last_target_parent[0] = new_dirname
+ self.last_target_parent[1] = target_parent_id
+
+ source_exec = source_details[3]
+ changed = (content_change
+ or source_parent_id != target_parent_id
+ or old_basename != entry[0][1]
+ or source_exec != target_exec
+ )
+ if not changed and not self.include_unchanged:
+ return None, False
+ else:
+ if old_path is None:
+ path = self.pathjoin(old_dirname, old_basename)
+ old_path = path
+ old_path_u = self.utf8_decode(old_path)[0]
+ path_u = old_path_u
+ else:
+ old_path_u = self.utf8_decode(old_path)[0]
+ if old_path == path:
+ path_u = old_path_u
+ else:
+ path_u = self.utf8_decode(path)[0]
+ source_kind = _minikind_to_kind(source_minikind)
+ return (entry[0][2],
+ (old_path_u, path_u),
+ content_change,
+ (True, True),
+ (source_parent_id, target_parent_id),
+ (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
+ (source_kind, target_kind),
+ (source_exec, target_exec)), changed
+ elif source_minikind == c'a' and _versioned_minikind(target_minikind):
+ # looks like a new file
+ path = self.pathjoin(entry[0][0], entry[0][1])
+ # parent id is the entry for the path in the target tree
+ # TODO: these are the same for an entire directory: cache em.
+ parent_entry = self.state._get_entry(self.target_index,
+ path_utf8=entry[0][0])
+ if parent_entry is None:
+ raise errors.DirstateCorrupt(self.state,
+ "We could not find the parent entry in index %d"
+ " for the entry: %s"
+ % (self.target_index, entry[0]))
+ parent_id = parent_entry[0][2]
+ if parent_id == entry[0][2]:
+ parent_id = None
+ if path_info is not None:
+ # Present on disk:
+ if self.use_filesystem_for_exec:
+ # We need S_ISREG here, because we aren't sure if this
+ # is a file or not.
+ target_exec = bool(
+ S_ISREG(path_info[3].st_mode)
+ and S_IXUSR & path_info[3].st_mode)
+ else:
+ target_exec = target_details[3]
+ return (entry[0][2],
+ (None, self.utf8_decode(path)[0]),
+ True,
+ (False, True),
+ (None, parent_id),
+ (None, self.utf8_decode(entry[0][1])[0]),
+ (None, path_info[2]),
+ (None, target_exec)), True
+ else:
+ # Its a missing file, report it as such.
+ return (entry[0][2],
+ (None, self.utf8_decode(path)[0]),
+ False,
+ (False, True),
+ (None, parent_id),
+ (None, self.utf8_decode(entry[0][1])[0]),
+ (None, None),
+ (None, False)), True
+ elif _versioned_minikind(source_minikind) and target_minikind == c'a':
+ # unversioned, possibly, or possibly not deleted: we dont care.
+ # if its still on disk, *and* theres no other entry at this
+ # path [we dont know this in this routine at the moment -
+ # perhaps we should change this - then it would be an unknown.
+ old_path = self.pathjoin(entry[0][0], entry[0][1])
+ # parent id is the entry for the path in the target tree
+ parent_id = self.state._get_entry(self.source_index, path_utf8=entry[0][0])[0][2]
+ if parent_id == entry[0][2]:
+ parent_id = None
+ return (entry[0][2],
+ (self.utf8_decode(old_path)[0], None),
+ True,
+ (True, False),
+ (parent_id, None),
+ (self.utf8_decode(entry[0][1])[0], None),
+ (_minikind_to_kind(source_minikind), None),
+ (source_details[3], None)), True
+ elif _versioned_minikind(source_minikind) and target_minikind == c'r':
+ # a rename; could be a true rename, or a rename inherited from
+ # a renamed parent. TODO: handle this efficiently. Its not
+ # common case to rename dirs though, so a correct but slow
+ # implementation will do.
+ if (not self.doing_consistency_expansion and
+ not osutils.is_inside_any(self.searched_specific_files,
+ target_details[1])):
+ self.search_specific_files.add(target_details[1])
+ # We don't expand the specific files parents list here as
+ # the path is absent in target and won't create a delta with
+ # missing parent.
+ elif ((source_minikind == c'r' or source_minikind == c'a') and
+ (target_minikind == c'r' or target_minikind == c'a')):
+ # neither of the selected trees contain this path,
+ # so skip over it. This is not currently directly tested, but
+ # is indirectly via test_too_much.TestCommands.test_conflicts.
+ pass
+ else:
+ raise AssertionError("don't know how to compare "
+ "source_minikind=%r, target_minikind=%r"
+ % (source_minikind, target_minikind))
+ ## import pdb;pdb.set_trace()
+ return None, None
+
+ def __iter__(self):
+ return self
+
+ def iter_changes(self):
+ return self
+
+ cdef int _gather_result_for_consistency(self, result) except -1:
+ """Check a result we will yield to make sure we are consistent later.
+
+ This gathers result's parents into a set to output later.
+
+ :param result: A result tuple.
+ """
+ if not self.partial or not result[0]:
+ return 0
+ self.seen_ids.add(result[0])
+ new_path = result[1][1]
+ if new_path:
+ # Not the root and not a delete: queue up the parents of the path.
+ self.search_specific_file_parents.update(
+ osutils.parent_directories(new_path.encode('utf8')))
+ # Add the root directory which parent_directories does not
+ # provide.
+ self.search_specific_file_parents.add('')
+ return 0
+
+ cdef int _update_current_block(self) except -1:
+ if (self.block_index < len(self.state._dirblocks) and
+ osutils.is_inside(self.current_root, self.state._dirblocks[self.block_index][0])):
+ self.current_block = self.state._dirblocks[self.block_index]
+ self.current_block_list = self.current_block[1]
+ self.current_block_pos = 0
+ else:
+ self.current_block = None
+ self.current_block_list = None
+ return 0
+
+ def __next__(self):
+ # Simple thunk to allow tail recursion without pyrex confusion
+ return self._iter_next()
+
+ cdef _iter_next(self):
+ """Iterate over the changes."""
+ # This function single steps through an iterator. As such while loops
+ # are often exited by 'return' - the code is structured so that the
+ # next call into the function will return to the same while loop. Note
+ # that all flow control needed to re-reach that step is reexecuted,
+ # which can be a performance problem. It has not yet been tuned to
+ # minimise this; a state machine is probably the simplest restructuring
+ # to both minimise this overhead and make the code considerably more
+ # understandable.
+
+ # sketch:
+ # compare source_index and target_index at or under each element of search_specific_files.
+ # follow the following comparison table. Note that we only want to do diff operations when
+ # the target is fdl because thats when the walkdirs logic will have exposed the pathinfo
+ # for the target.
+ # cases:
+ #
+ # Source | Target | disk | action
+ # r | fdlt | | add source to search, add id path move and perform
+ # | | | diff check on source-target
+ # r | fdlt | a | dangling file that was present in the basis.
+ # | | | ???
+ # r | a | | add source to search
+ # r | a | a |
+ # r | r | | this path is present in a non-examined tree, skip.
+ # r | r | a | this path is present in a non-examined tree, skip.
+ # a | fdlt | | add new id
+ # a | fdlt | a | dangling locally added file, skip
+ # a | a | | not present in either tree, skip
+ # a | a | a | not present in any tree, skip
+ # a | r | | not present in either tree at this path, skip as it
+ # | | | may not be selected by the users list of paths.
+ # a | r | a | not present in either tree at this path, skip as it
+ # | | | may not be selected by the users list of paths.
+ # fdlt | fdlt | | content in both: diff them
+ # fdlt | fdlt | a | deleted locally, but not unversioned - show as deleted ?
+ # fdlt | a | | unversioned: output deleted id for now
+ # fdlt | a | a | unversioned and deleted: output deleted id
+ # fdlt | r | | relocated in this tree, so add target to search.
+ # | | | Dont diff, we will see an r,fd; pair when we reach
+ # | | | this id at the other path.
+ # fdlt | r | a | relocated in this tree, so add target to search.
+ # | | | Dont diff, we will see an r,fd; pair when we reach
+ # | | | this id at the other path.
+
+ # TODO: jam 20070516 - Avoid the _get_entry lookup overhead by
+ # keeping a cache of directories that we have seen.
+ cdef object current_dirname, current_blockname
+ cdef char * current_dirname_c, * current_blockname_c
+ cdef int advance_entry, advance_path
+ cdef int path_handled
+ searched_specific_files = self.searched_specific_files
+ # Are we walking a root?
+ while self.root_entries_pos < self.root_entries_len:
+ entry = self.root_entries[self.root_entries_pos]
+ self.root_entries_pos = self.root_entries_pos + 1
+ result, changed = self._process_entry(entry, self.root_dir_info)
+ if changed is not None:
+ if changed:
+ self._gather_result_for_consistency(result)
+ if changed or self.include_unchanged:
+ return result
+ # Have we finished the prior root, or never started one ?
+ if self.current_root is None:
+ # TODO: the pending list should be lexically sorted? the
+ # interface doesn't require it.
+ try:
+ self.current_root = self.search_specific_files.pop()
+ except KeyError, _:
+ raise StopIteration()
+ self.searched_specific_files.add(self.current_root)
+ # process the entries for this containing directory: the rest will be
+ # found by their parents recursively.
+ self.root_entries = self.state._entries_for_path(self.current_root)
+ self.root_entries_len = len(self.root_entries)
+ self.current_root_unicode = self.current_root.decode('utf8')
+ self.root_abspath = self.tree.abspath(self.current_root_unicode)
+ try:
+ root_stat = os.lstat(self.root_abspath)
+ except OSError, e:
+ if e.errno == errno.ENOENT:
+ # the path does not exist: let _process_entry know that.
+ self.root_dir_info = None
+ else:
+ # some other random error: hand it up.
+ raise
+ else:
+ self.root_dir_info = ('', self.current_root,
+ osutils.file_kind_from_stat_mode(root_stat.st_mode), root_stat,
+ self.root_abspath)
+ if self.root_dir_info[2] == 'directory':
+ if self.tree._directory_is_tree_reference(
+ self.current_root_unicode):
+ self.root_dir_info = self.root_dir_info[:2] + \
+ ('tree-reference',) + self.root_dir_info[3:]
+ if not self.root_entries and not self.root_dir_info:
+ # this specified path is not present at all, skip it.
+ # (tail recursion, can do a loop once the full structure is
+ # known).
+ return self._iter_next()
+ path_handled = 0
+ self.root_entries_pos = 0
+ # XXX Clarity: This loop is duplicated a out the self.current_root
+ # is None guard above: if we return from it, it completes there
+ # (and the following if block cannot trigger because
+ # path_handled must be true, so the if block is not # duplicated.
+ while self.root_entries_pos < self.root_entries_len:
+ entry = self.root_entries[self.root_entries_pos]
+ self.root_entries_pos = self.root_entries_pos + 1
+ result, changed = self._process_entry(entry, self.root_dir_info)
+ if changed is not None:
+ path_handled = -1
+ if changed:
+ self._gather_result_for_consistency(result)
+ if changed or self.include_unchanged:
+ return result
+ # handle unversioned specified paths:
+ if self.want_unversioned and not path_handled and self.root_dir_info:
+ new_executable = bool(
+ stat.S_ISREG(self.root_dir_info[3].st_mode)
+ and stat.S_IEXEC & self.root_dir_info[3].st_mode)
+ return (None,
+ (None, self.current_root_unicode),
+ True,
+ (False, False),
+ (None, None),
+ (None, splitpath(self.current_root_unicode)[-1]),
+ (None, self.root_dir_info[2]),
+ (None, new_executable)
+ )
+ # If we reach here, the outer flow continues, which enters into the
+ # per-root setup logic.
+ if (self.current_dir_info is None and self.current_block is None and not
+ self.doing_consistency_expansion):
+ # setup iteration of this root:
+ self.current_dir_list = None
+ if self.root_dir_info and self.root_dir_info[2] == 'tree-reference':
+ self.current_dir_info = None
+ else:
+ self.dir_iterator = osutils._walkdirs_utf8(self.root_abspath,
+ prefix=self.current_root)
+ self.path_index = 0
+ try:
+ self.current_dir_info = self.dir_iterator.next()
+ self.current_dir_list = self.current_dir_info[1]
+ except OSError, e:
+ # there may be directories in the inventory even though
+ # this path is not a file on disk: so mark it as end of
+ # iterator
+ if e.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
+ self.current_dir_info = None
+ elif sys.platform == 'win32':
+ # on win32, python2.4 has e.errno == ERROR_DIRECTORY, but
+ # python 2.5 has e.errno == EINVAL,
+ # and e.winerror == ERROR_DIRECTORY
+ try:
+ e_winerror = e.winerror
+ except AttributeError, _:
+ e_winerror = None
+ win_errors = (ERROR_DIRECTORY, ERROR_PATH_NOT_FOUND)
+ if (e.errno in win_errors or e_winerror in win_errors):
+ self.current_dir_info = None
+ else:
+ # Will this really raise the right exception ?
+ raise
+ else:
+ raise
+ else:
+ if self.current_dir_info[0][0] == '':
+ # remove .bzr from iteration
+ bzr_index = self.bisect_left(self.current_dir_list, ('.bzr',))
+ if self.current_dir_list[bzr_index][0] != '.bzr':
+ raise AssertionError()
+ del self.current_dir_list[bzr_index]
+ initial_key = (self.current_root, '', '')
+ self.block_index, _ = self.state._find_block_index_from_key(initial_key)
+ if self.block_index == 0:
+ # we have processed the total root already, but because the
+ # initial key matched it we should skip it here.
+ self.block_index = self.block_index + 1
+ self._update_current_block()
+ # walk until both the directory listing and the versioned metadata
+ # are exhausted.
+ while (self.current_dir_info is not None
+ or self.current_block is not None):
+ # Uncommon case - a missing directory or an unversioned directory:
+ if (self.current_dir_info and self.current_block
+ and self.current_dir_info[0][0] != self.current_block[0]):
+ # Work around pyrex broken heuristic - current_dirname has
+ # the same scope as current_dirname_c
+ current_dirname = self.current_dir_info[0][0]
+ current_dirname_c = PyString_AS_STRING_void(
+ <void *>current_dirname)
+ current_blockname = self.current_block[0]
+ current_blockname_c = PyString_AS_STRING_void(
+ <void *>current_blockname)
+ # In the python generator we evaluate this if block once per
+ # dir+block; because we reenter in the pyrex version its being
+ # evaluated once per path: we could cache the result before
+ # doing the while loop and probably save time.
+ if _cmp_by_dirs(current_dirname_c,
+ PyString_Size(current_dirname),
+ current_blockname_c,
+ PyString_Size(current_blockname)) < 0:
+ # filesystem data refers to paths not covered by the
+ # dirblock. this has two possibilities:
+ # A) it is versioned but empty, so there is no block for it
+ # B) it is not versioned.
+
+ # if (A) then we need to recurse into it to check for
+ # new unknown files or directories.
+ # if (B) then we should ignore it, because we don't
+ # recurse into unknown directories.
+ # We are doing a loop
+ while self.path_index < len(self.current_dir_list):
+ current_path_info = self.current_dir_list[self.path_index]
+ # dont descend into this unversioned path if it is
+ # a dir
+ if current_path_info[2] in ('directory',
+ 'tree-reference'):
+ del self.current_dir_list[self.path_index]
+ self.path_index = self.path_index - 1
+ self.path_index = self.path_index + 1
+ if self.want_unversioned:
+ if current_path_info[2] == 'directory':
+ if self.tree._directory_is_tree_reference(
+ self.utf8_decode(current_path_info[0])[0]):
+ current_path_info = current_path_info[:2] + \
+ ('tree-reference',) + current_path_info[3:]
+ new_executable = bool(
+ stat.S_ISREG(current_path_info[3].st_mode)
+ and stat.S_IEXEC & current_path_info[3].st_mode)
+ return (None,
+ (None, self.utf8_decode(current_path_info[0])[0]),
+ True,
+ (False, False),
+ (None, None),
+ (None, self.utf8_decode(current_path_info[1])[0]),
+ (None, current_path_info[2]),
+ (None, new_executable))
+ # This dir info has been handled, go to the next
+ self.path_index = 0
+ self.current_dir_list = None
+ try:
+ self.current_dir_info = self.dir_iterator.next()
+ self.current_dir_list = self.current_dir_info[1]
+ except StopIteration, _:
+ self.current_dir_info = None
+ else: #(dircmp > 0)
+ # We have a dirblock entry for this location, but there
+ # is no filesystem path for this. This is most likely
+ # because a directory was removed from the disk.
+ # We don't have to report the missing directory,
+ # because that should have already been handled, but we
+ # need to handle all of the files that are contained
+ # within.
+ while self.current_block_pos < len(self.current_block_list):
+ current_entry = self.current_block_list[self.current_block_pos]
+ self.current_block_pos = self.current_block_pos + 1
+ # entry referring to file not present on disk.
+ # advance the entry only, after processing.
+ result, changed = self._process_entry(current_entry, None)
+ if changed is not None:
+ if changed:
+ self._gather_result_for_consistency(result)
+ if changed or self.include_unchanged:
+ return result
+ self.block_index = self.block_index + 1
+ self._update_current_block()
+ continue # next loop-on-block/dir
+ result = self._loop_one_block()
+ if result is not None:
+ return result
+ if len(self.search_specific_files):
+ # More supplied paths to process
+ self.current_root = None
+ return self._iter_next()
+ # Start expanding more conservatively, adding paths the user may not
+ # have intended but required for consistent deltas.
+ self.doing_consistency_expansion = 1
+ if not self._pending_consistent_entries:
+ self._pending_consistent_entries = self._next_consistent_entries()
+ while self._pending_consistent_entries:
+ result, changed = self._pending_consistent_entries.pop()
+ if changed is not None:
+ return result
+ raise StopIteration()
+
+ cdef object _maybe_tree_ref(self, current_path_info):
+ if self.tree._directory_is_tree_reference(
+ self.utf8_decode(current_path_info[0])[0]):
+ return current_path_info[:2] + \
+ ('tree-reference',) + current_path_info[3:]
+ else:
+ return current_path_info
+
+ cdef object _loop_one_block(self):
+ # current_dir_info and current_block refer to the same directory -
+ # this is the common case code.
+ # Assign local variables for current path and entry:
+ cdef object current_entry
+ cdef object current_path_info
+ cdef int path_handled
+ cdef char minikind
+ cdef int cmp_result
+ # cdef char * temp_str
+ # cdef Py_ssize_t temp_str_length
+ # PyString_AsStringAndSize(disk_kind, &temp_str, &temp_str_length)
+ # if not strncmp(temp_str, "directory", temp_str_length):
+ if (self.current_block is not None and
+ self.current_block_pos < PyList_GET_SIZE(self.current_block_list)):
+ current_entry = PyList_GET_ITEM(self.current_block_list,
+ self.current_block_pos)
+ # accomodate pyrex
+ Py_INCREF(current_entry)
+ else:
+ current_entry = None
+ if (self.current_dir_info is not None and
+ self.path_index < PyList_GET_SIZE(self.current_dir_list)):
+ current_path_info = PyList_GET_ITEM(self.current_dir_list,
+ self.path_index)
+ # accomodate pyrex
+ Py_INCREF(current_path_info)
+ disk_kind = PyTuple_GET_ITEM(current_path_info, 2)
+ # accomodate pyrex
+ Py_INCREF(disk_kind)
+ if disk_kind == "directory":
+ current_path_info = self._maybe_tree_ref(current_path_info)
+ else:
+ current_path_info = None
+ while (current_entry is not None or current_path_info is not None):
+ advance_entry = -1
+ advance_path = -1
+ result = None
+ changed = None
+ path_handled = 0
+ if current_entry is None:
+ # unversioned - the check for path_handled when the path
+ # is advanced will yield this path if needed.
+ pass
+ elif current_path_info is None:
+ # no path is fine: the per entry code will handle it.
+ result, changed = self._process_entry(current_entry,
+ current_path_info)
+ else:
+ minikind = _minikind_from_string(
+ current_entry[1][self.target_index][0])
+ cmp_result = cmp(current_path_info[1], current_entry[0][1])
+ if (cmp_result or minikind == c'a' or minikind == c'r'):
+ # The current path on disk doesn't match the dirblock
+ # record. Either the dirblock record is marked as
+ # absent/renamed, or the file on disk is not present at all
+ # in the dirblock. Either way, report about the dirblock
+ # entry, and let other code handle the filesystem one.
+
+ # Compare the basename for these files to determine
+ # which comes first
+ if cmp_result < 0:
+ # extra file on disk: pass for now, but only
+ # increment the path, not the entry
+ advance_entry = 0
+ else:
+ # entry referring to file not present on disk.
+ # advance the entry only, after processing.
+ result, changed = self._process_entry(current_entry,
+ None)
+ advance_path = 0
+ else:
+ # paths are the same,and the dirstate entry is not
+ # absent or renamed.
+ result, changed = self._process_entry(current_entry,
+ current_path_info)
+ if changed is not None:
+ path_handled = -1
+ if not changed and not self.include_unchanged:
+ changed = None
+ # >- loop control starts here:
+ # >- entry
+ if advance_entry and current_entry is not None:
+ self.current_block_pos = self.current_block_pos + 1
+ if self.current_block_pos < PyList_GET_SIZE(self.current_block_list):
+ current_entry = self.current_block_list[self.current_block_pos]
+ else:
+ current_entry = None
+ # >- path
+ if advance_path and current_path_info is not None:
+ if not path_handled:
+ # unversioned in all regards
+ if self.want_unversioned:
+ new_executable = bool(
+ stat.S_ISREG(current_path_info[3].st_mode)
+ and stat.S_IEXEC & current_path_info[3].st_mode)
+ try:
+ relpath_unicode = self.utf8_decode(current_path_info[0])[0]
+ except UnicodeDecodeError, _:
+ raise errors.BadFilenameEncoding(
+ current_path_info[0], osutils._fs_enc)
+ if changed is not None:
+ raise AssertionError(
+ "result is not None: %r" % result)
+ result = (None,
+ (None, relpath_unicode),
+ True,
+ (False, False),
+ (None, None),
+ (None, self.utf8_decode(current_path_info[1])[0]),
+ (None, current_path_info[2]),
+ (None, new_executable))
+ changed = True
+ # dont descend into this unversioned path if it is
+ # a dir
+ if current_path_info[2] in ('directory'):
+ del self.current_dir_list[self.path_index]
+ self.path_index = self.path_index - 1
+ # dont descend the disk iterator into any tree
+ # paths.
+ if current_path_info[2] == 'tree-reference':
+ del self.current_dir_list[self.path_index]
+ self.path_index = self.path_index - 1
+ self.path_index = self.path_index + 1
+ if self.path_index < len(self.current_dir_list):
+ current_path_info = self.current_dir_list[self.path_index]
+ if current_path_info[2] == 'directory':
+ current_path_info = self._maybe_tree_ref(
+ current_path_info)
+ else:
+ current_path_info = None
+ if changed is not None:
+ # Found a result on this pass, yield it
+ if changed:
+ self._gather_result_for_consistency(result)
+ if changed or self.include_unchanged:
+ return result
+ if self.current_block is not None:
+ self.block_index = self.block_index + 1
+ self._update_current_block()
+ if self.current_dir_info is not None:
+ self.path_index = 0
+ self.current_dir_list = None
+ try:
+ self.current_dir_info = self.dir_iterator.next()
+ self.current_dir_list = self.current_dir_info[1]
+ except StopIteration, _:
+ self.current_dir_info = None
+
+ cdef object _next_consistent_entries(self):
+ """Grabs the next specific file parent case to consider.
+
+ :return: A list of the results, each of which is as for _process_entry.
+ """
+ results = []
+ while self.search_specific_file_parents:
+ # Process the parent directories for the paths we were iterating.
+ # Even in extremely large trees this should be modest, so currently
+ # no attempt is made to optimise.
+ path_utf8 = self.search_specific_file_parents.pop()
+ if path_utf8 in self.searched_exact_paths:
+ # We've examined this path.
+ continue
+ if osutils.is_inside_any(self.searched_specific_files, path_utf8):
+ # We've examined this path.
+ continue
+ path_entries = self.state._entries_for_path(path_utf8)
+ # We need either one or two entries. If the path in
+ # self.target_index has moved (so the entry in source_index is in
+ # 'ar') then we need to also look for the entry for this path in
+ # self.source_index, to output the appropriate delete-or-rename.
+ selected_entries = []
+ found_item = False
+ for candidate_entry in path_entries:
+ # Find entries present in target at this path:
+ if candidate_entry[1][self.target_index][0] not in 'ar':
+ found_item = True
+ selected_entries.append(candidate_entry)
+ # Find entries present in source at this path:
+ elif (self.source_index is not None and
+ candidate_entry[1][self.source_index][0] not in 'ar'):
+ found_item = True
+ if candidate_entry[1][self.target_index][0] == 'a':
+ # Deleted, emit it here.
+ selected_entries.append(candidate_entry)
+ else:
+ # renamed, emit it when we process the directory it
+ # ended up at.
+ self.search_specific_file_parents.add(
+ candidate_entry[1][self.target_index][1])
+ if not found_item:
+ raise AssertionError(
+ "Missing entry for specific path parent %r, %r" % (
+ path_utf8, path_entries))
+ path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
+ for entry in selected_entries:
+ if entry[0][2] in self.seen_ids:
+ continue
+ result, changed = self._process_entry(entry, path_info)
+ if changed is None:
+ raise AssertionError(
+ "Got entry<->path mismatch for specific path "
+ "%r entry %r path_info %r " % (
+ path_utf8, entry, path_info))
+ # Only include changes - we're outside the users requested
+ # expansion.
+ if changed:
+ self._gather_result_for_consistency(result)
+ if (result[6][0] == 'directory' and
+ result[6][1] != 'directory'):
+ # This stopped being a directory, the old children have
+ # to be included.
+ if entry[1][self.source_index][0] == 'r':
+ # renamed, take the source path
+ entry_path_utf8 = entry[1][self.source_index][1]
+ else:
+ entry_path_utf8 = path_utf8
+ initial_key = (entry_path_utf8, '', '')
+ block_index, _ = self.state._find_block_index_from_key(
+ initial_key)
+ if block_index == 0:
+ # The children of the root are in block index 1.
+ block_index = block_index + 1
+ current_block = None
+ if block_index < len(self.state._dirblocks):
+ current_block = self.state._dirblocks[block_index]
+ if not osutils.is_inside(
+ entry_path_utf8, current_block[0]):
+ # No entries for this directory at all.
+ current_block = None
+ if current_block is not None:
+ for entry in current_block[1]:
+ if entry[1][self.source_index][0] in 'ar':
+ # Not in the source tree, so doesn't have to be
+ # included.
+ continue
+ # Path of the entry itself.
+ self.search_specific_file_parents.add(
+ self.pathjoin(*entry[0][:2]))
+ if changed or self.include_unchanged:
+ results.append((result, changed))
+ self.searched_exact_paths.add(path_utf8)
+ return results
+
+ cdef object _path_info(self, utf8_path, unicode_path):
+ """Generate path_info for unicode_path.
+
+ :return: None if unicode_path does not exist, or a path_info tuple.
+ """
+ abspath = self.tree.abspath(unicode_path)
+ try:
+ stat = os.lstat(abspath)
+ except OSError, e:
+ if e.errno == errno.ENOENT:
+ # the path does not exist.
+ return None
+ else:
+ raise
+ utf8_basename = utf8_path.rsplit('/', 1)[-1]
+ dir_info = (utf8_path, utf8_basename,
+ osutils.file_kind_from_stat_mode(stat.st_mode), stat,
+ abspath)
+ if dir_info[2] == 'directory':
+ if self.tree._directory_is_tree_reference(
+ unicode_path):
+ self.root_dir_info = self.root_dir_info[:2] + \
+ ('tree-reference',) + self.root_dir_info[3:]
+ return dir_info