Tarball conversion

author: Lorry <lorry@roadtrain.codethink.co.uk> 2012-08-22 15:47:16 +0100
committer: Lorry <lorry@roadtrain.codethink.co.uk> 2012-08-22 15:47:16 +0100
commit: 25335618bf8755ce6b116ee14f47f5a1f2c821e9 (patch)
tree: d889d7ab3f9f985d0c54c534cb8052bd2e6d7163 /bzrlib/_dirstate_helpers_pyx.pyx
download: bzr-tarball-25335618bf8755ce6b116ee14f47f5a1f2c821e9.tar.gz
1 files changed, 2031 insertions, 0 deletions
diff --git a/bzrlib/_dirstate_helpers_pyx.pyx b/bzrlib/_dirstate_helpers_pyx.pyx
new file mode 100644
index 0000000..76847b4
--- /dev/null
+++ b/bzrlib/_dirstate_helpers_pyx.pyx
@@ -0,0 +1,2031 @@
+# Copyright (C) 2007-2010 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Helper functions for DirState.
+
+This is the python implementation for DirState functions.
+"""
+
+import binascii
+import bisect
+import errno
+import os
+import stat
+import sys
+
+from bzrlib import cache_utf8, errors, osutils
+from bzrlib.dirstate import DirState
+from bzrlib.osutils import parent_directories, pathjoin, splitpath
+
+
+# This is the Windows equivalent of ENOTDIR
+# It is defined in pywin32.winerror, but we don't want a strong dependency for
+# just an error code.
+# XXX: Perhaps we could get it from a windows header ?
+cdef int ERROR_PATH_NOT_FOUND
+ERROR_PATH_NOT_FOUND = 3
+cdef int ERROR_DIRECTORY
+ERROR_DIRECTORY = 267
+
+#python2.4 support, and other platform-dependent includes
+cdef extern from "python-compat.h":
+    unsigned long htonl(unsigned long)
+
+# Give Pyrex some function definitions for it to understand.
+# All of these are just hints to Pyrex, so that it can try to convert python
+# objects into similar C objects. (such as PyInt => int).
+# In anything defined 'cdef extern from XXX' the real C header will be
+# imported, and the real definition will be used from there. So these are just
+# hints, and do not need to match exactly to the C definitions.
+
+cdef extern from *:
+    ctypedef unsigned long size_t
+
+cdef extern from "_dirstate_helpers_pyx.h":
+    ctypedef int intptr_t
+
+
+
+cdef extern from "stdlib.h":
+    unsigned long int strtoul(char *nptr, char **endptr, int base)
+
+
+cdef extern from 'sys/stat.h':
+    int S_ISDIR(int mode)
+    int S_ISREG(int mode)
+    # On win32, this actually comes from "python-compat.h"
+    int S_ISLNK(int mode)
+    int S_IXUSR
+
+# These functions allow us access to a bit of the 'bare metal' of python
+# objects, rather than going through the object abstraction. (For example,
+# PyList_Append, rather than getting the 'append' attribute of the object, and
+# creating a tuple, and then using PyCallObject).
+# Functions that return (or take) a void* are meant to grab a C PyObject*. This
+# differs from the Pyrex 'object'. If you declare a variable as 'object' Pyrex
+# will automatically Py_INCREF and Py_DECREF when appropriate. But for some
+# inner loops, we don't need to do that at all, as the reference only lasts for
+# a very short time.
+# Note that the C API GetItem calls borrow references, so pyrex does the wrong
+# thing if you declare e.g. object PyList_GetItem(object lst, int index) - you
+# need to manually Py_INCREF yourself.
+cdef extern from "Python.h":
+    ctypedef int Py_ssize_t
+    ctypedef struct PyObject:
+        pass
+    int PyList_Append(object lst, object item) except -1
+    void *PyList_GetItem_object_void "PyList_GET_ITEM" (object lst, int index)
+    void *PyList_GetItem_void_void "PyList_GET_ITEM" (void * lst, int index)
+    object PyList_GET_ITEM(object lst, Py_ssize_t index)
+    int PyList_CheckExact(object)
+    Py_ssize_t PyList_GET_SIZE (object p)
+
+    void *PyTuple_GetItem_void_void "PyTuple_GET_ITEM" (void* tpl, int index)
+    object PyTuple_GetItem_void_object "PyTuple_GET_ITEM" (void* tpl, int index)
+    object PyTuple_GET_ITEM(object tpl, Py_ssize_t index)
+
+    unsigned long PyInt_AsUnsignedLongMask(object number) except? -1
+
+    char *PyString_AsString(object p)
+    char *PyString_AsString_obj "PyString_AsString" (PyObject *string)
+    char *PyString_AS_STRING_void "PyString_AS_STRING" (void *p)
+    int PyString_AsStringAndSize(object str, char **buffer, Py_ssize_t *length) except -1
+    object PyString_FromString(char *)
+    object PyString_FromStringAndSize(char *, Py_ssize_t)
+    int PyString_Size(object p)
+    int PyString_GET_SIZE_void "PyString_GET_SIZE" (void *p)
+    int PyString_CheckExact(object p)
+    void Py_INCREF(object o)
+    void Py_DECREF(object o)
+
+
+cdef extern from "string.h":
+    int strncmp(char *s1, char *s2, int len)
+    void *memchr(void *s, int c, size_t len)
+    int memcmp(void *b1, void *b2, size_t len)
+    # ??? memrchr is a GNU extension :(
+    # void *memrchr(void *s, int c, size_t len)
+
+# cimport all of the definitions we will need to access
+from _static_tuple_c cimport import_static_tuple_c, StaticTuple, \
+    StaticTuple_New, StaticTuple_SET_ITEM
+
+import_static_tuple_c()
+
+cdef void* _my_memrchr(void *s, int c, size_t n): # cannot_raise
+    # memrchr seems to be a GNU extension, so we have to implement it ourselves
+    cdef char *pos
+    cdef char *start
+
+    start = <char*>s
+    pos = start + n - 1
+    while pos >= start:
+        if pos[0] == c:
+            return <void*>pos
+        pos = pos - 1
+    return NULL
+
+
+def _py_memrchr(s, c):
+    """Just to expose _my_memrchr for testing.
+
+    :param s: The Python string to search
+    :param c: The character to search for
+    :return: The offset to the last instance of 'c' in s
+    """
+    cdef void *_s
+    cdef void *found
+    cdef int length
+    cdef char *_c
+
+    _s = PyString_AsString(s)
+    length = PyString_Size(s)
+
+    _c = PyString_AsString(c)
+    assert PyString_Size(c) == 1,\
+        'Must be a single character string, not %s' % (c,)
+    found = _my_memrchr(_s, _c[0], length)
+    if found == NULL:
+        return None
+    return <char*>found - <char*>_s
+
+
+cdef object safe_string_from_size(char *s, Py_ssize_t size):
+    if size < 0:
+        raise AssertionError(
+            'tried to create a string with an invalid size: %d'
+            % (size))
+    return PyString_FromStringAndSize(s, size)
+
+
+cdef int _is_aligned(void *ptr): # cannot_raise
+    """Is this pointer aligned to an integer size offset?
+
+    :return: 1 if this pointer is aligned, 0 otherwise.
+    """
+    return ((<intptr_t>ptr) & ((sizeof(int))-1)) == 0
+
+
+cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2): # cannot_raise
+    cdef unsigned char *cur1
+    cdef unsigned char *cur2
+    cdef unsigned char *end1
+    cdef unsigned char *end2
+    cdef int *cur_int1
+    cdef int *cur_int2
+    cdef int *end_int1
+    cdef int *end_int2
+
+    if path1 == path2 and size1 == size2:
+        return 0
+
+    end1 = <unsigned char*>path1+size1
+    end2 = <unsigned char*>path2+size2
+
+    # Use 32-bit comparisons for the matching portion of the string.
+    # Almost all CPU's are faster at loading and comparing 32-bit integers,
+    # than they are at 8-bit integers.
+    # 99% of the time, these will be aligned, but in case they aren't just skip
+    # this loop
+    if _is_aligned(path1) and _is_aligned(path2):
+        cur_int1 = <int*>path1
+        cur_int2 = <int*>path2
+        end_int1 = <int*>(path1 + size1 - (size1 % sizeof(int)))
+        end_int2 = <int*>(path2 + size2 - (size2 % sizeof(int)))
+
+        while cur_int1 < end_int1 and cur_int2 < end_int2:
+            if cur_int1[0] != cur_int2[0]:
+                break
+            cur_int1 = cur_int1 + 1
+            cur_int2 = cur_int2 + 1
+
+        cur1 = <unsigned char*>cur_int1
+        cur2 = <unsigned char*>cur_int2
+    else:
+        cur1 = <unsigned char*>path1
+        cur2 = <unsigned char*>path2
+
+    while cur1 < end1 and cur2 < end2:
+        if cur1[0] == cur2[0]:
+            # This character matches, just go to the next one
+            cur1 = cur1 + 1
+            cur2 = cur2 + 1
+            continue
+        # The current characters do not match
+        if cur1[0] == c'/':
+            return -1 # Reached the end of path1 segment first
+        elif cur2[0] == c'/':
+            return 1 # Reached the end of path2 segment first
+        elif cur1[0] < cur2[0]:
+            return -1
+        else:
+            return 1
+
+    # We reached the end of at least one of the strings
+    if cur1 < end1:
+        return 1 # Not at the end of cur1, must be at the end of cur2
+    if cur2 < end2:
+        return -1 # At the end of cur1, but not at cur2
+    # We reached the end of both strings
+    return 0
+
+
+def cmp_by_dirs(path1, path2):
+    """Compare two paths directory by directory.
+
+    This is equivalent to doing::
+
+       cmp(path1.split('/'), path2.split('/'))
+
+    The idea is that you should compare path components separately. This
+    differs from plain ``cmp(path1, path2)`` for paths like ``'a-b'`` and
+    ``a/b``. "a-b" comes after "a" but would come before "a/b" lexically.
+
+    :param path1: first path
+    :param path2: second path
+    :return: negative number if ``path1`` comes first,
+        0 if paths are equal,
+        and positive number if ``path2`` sorts first
+    """
+    if not PyString_CheckExact(path1):
+        raise TypeError("'path1' must be a plain string, not %s: %r"
+                        % (type(path1), path1))
+    if not PyString_CheckExact(path2):
+        raise TypeError("'path2' must be a plain string, not %s: %r"
+                        % (type(path2), path2))
+    return _cmp_by_dirs(PyString_AsString(path1),
+                        PyString_Size(path1),
+                        PyString_AsString(path2),
+                        PyString_Size(path2))
+
+
+def _cmp_path_by_dirblock(path1, path2):
+    """Compare two paths based on what directory they are in.
+
+    This generates a sort order, such that all children of a directory are
+    sorted together, and grandchildren are in the same order as the
+    children appear. But all grandchildren come after all children.
+
+    In other words, all entries in a directory are sorted together, and
+    directorys are sorted in cmp_by_dirs order.
+
+    :param path1: first path
+    :param path2: the second path
+    :return: negative number if ``path1`` comes first,
+        0 if paths are equal
+        and a positive number if ``path2`` sorts first
+    """
+    if not PyString_CheckExact(path1):
+        raise TypeError("'path1' must be a plain string, not %s: %r"
+                        % (type(path1), path1))
+    if not PyString_CheckExact(path2):
+        raise TypeError("'path2' must be a plain string, not %s: %r"
+                        % (type(path2), path2))
+    return _cmp_path_by_dirblock_intern(PyString_AsString(path1),
+                                        PyString_Size(path1),
+                                        PyString_AsString(path2),
+                                        PyString_Size(path2))
+
+
+cdef int _cmp_path_by_dirblock_intern(char *path1, int path1_len,
+                                      char *path2, int path2_len): # cannot_raise
+    """Compare two paths by what directory they are in.
+
+    see ``_cmp_path_by_dirblock`` for details.
+    """
+    cdef char *dirname1
+    cdef int dirname1_len
+    cdef char *dirname2
+    cdef int dirname2_len
+    cdef char *basename1
+    cdef int basename1_len
+    cdef char *basename2
+    cdef int basename2_len
+    cdef int cur_len
+    cdef int cmp_val
+
+    if path1_len == 0 and path2_len == 0:
+        return 0
+
+    if path1 == path2 and path1_len == path2_len:
+        return 0
+
+    if path1_len == 0:
+        return -1
+
+    if path2_len == 0:
+        return 1
+
+    basename1 = <char*>_my_memrchr(path1, c'/', path1_len)
+
+    if basename1 == NULL:
+        basename1 = path1
+        basename1_len = path1_len
+        dirname1 = ''
+        dirname1_len = 0
+    else:
+        dirname1 = path1
+        dirname1_len = basename1 - path1
+        basename1 = basename1 + 1
+        basename1_len = path1_len - dirname1_len - 1
+
+    basename2 = <char*>_my_memrchr(path2, c'/', path2_len)
+
+    if basename2 == NULL:
+        basename2 = path2
+        basename2_len = path2_len
+        dirname2 = ''
+        dirname2_len = 0
+    else:
+        dirname2 = path2
+        dirname2_len = basename2 - path2
+        basename2 = basename2 + 1
+        basename2_len = path2_len - dirname2_len - 1
+
+    cmp_val = _cmp_by_dirs(dirname1, dirname1_len,
+                           dirname2, dirname2_len)
+    if cmp_val != 0:
+        return cmp_val
+
+    cur_len = basename1_len
+    if basename2_len < basename1_len:
+        cur_len = basename2_len
+
+    cmp_val = memcmp(basename1, basename2, cur_len)
+    if cmp_val != 0:
+        return cmp_val
+    if basename1_len == basename2_len:
+        return 0
+    if basename1_len < basename2_len:
+        return -1
+    return 1
+
+
+def _bisect_path_left(paths, path):
+    """Return the index where to insert path into paths.
+
+    This uses a path-wise comparison so we get::
+        a
+        a-b
+        a=b
+        a/b
+    Rather than::
+        a
+        a-b
+        a/b
+        a=b
+    :param paths: A list of paths to search through
+    :param path: A single path to insert
+    :return: An offset where 'path' can be inserted.
+    :seealso: bisect.bisect_left
+    """
+    cdef int _lo
+    cdef int _hi
+    cdef int _mid
+    cdef char *path_cstr
+    cdef int path_size
+    cdef char *cur_cstr
+    cdef int cur_size
+    cdef void *cur
+
+    if not PyList_CheckExact(paths):
+        raise TypeError("you must pass a python list for 'paths' not: %s %r"
+                        % (type(paths), paths))
+    if not PyString_CheckExact(path):
+        raise TypeError("you must pass a string for 'path' not: %s %r"
+                        % (type(path), path))
+
+    _hi = len(paths)
+    _lo = 0
+
+    path_cstr = PyString_AsString(path)
+    path_size = PyString_Size(path)
+
+    while _lo < _hi:
+        _mid = (_lo + _hi) / 2
+        cur = PyList_GetItem_object_void(paths, _mid)
+        cur_cstr = PyString_AS_STRING_void(cur)
+        cur_size = PyString_GET_SIZE_void(cur)
+        if _cmp_path_by_dirblock_intern(cur_cstr, cur_size,
+                                        path_cstr, path_size) < 0:
+            _lo = _mid + 1
+        else:
+            _hi = _mid
+    return _lo
+
+
+def _bisect_path_right(paths, path):
+    """Return the index where to insert path into paths.
+
+    This uses a path-wise comparison so we get::
+        a
+        a-b
+        a=b
+        a/b
+    Rather than::
+        a
+        a-b
+        a/b
+        a=b
+    :param paths: A list of paths to search through
+    :param path: A single path to insert
+    :return: An offset where 'path' can be inserted.
+    :seealso: bisect.bisect_right
+    """
+    cdef int _lo
+    cdef int _hi
+    cdef int _mid
+    cdef char *path_cstr
+    cdef int path_size
+    cdef char *cur_cstr
+    cdef int cur_size
+    cdef void *cur
+
+    if not PyList_CheckExact(paths):
+        raise TypeError("you must pass a python list for 'paths' not: %s %r"
+                        % (type(paths), paths))
+    if not PyString_CheckExact(path):
+        raise TypeError("you must pass a string for 'path' not: %s %r"
+                        % (type(path), path))
+
+    _hi = len(paths)
+    _lo = 0
+
+    path_cstr = PyString_AsString(path)
+    path_size = PyString_Size(path)
+
+    while _lo < _hi:
+        _mid = (_lo + _hi) / 2
+        cur = PyList_GetItem_object_void(paths, _mid)
+        cur_cstr = PyString_AS_STRING_void(cur)
+        cur_size = PyString_GET_SIZE_void(cur)
+        if _cmp_path_by_dirblock_intern(path_cstr, path_size,
+                                        cur_cstr, cur_size) < 0:
+            _hi = _mid
+        else:
+            _lo = _mid + 1
+    return _lo
+
+
+def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache=None):
+    """Return the index where to insert dirname into the dirblocks.
+
+    The return value idx is such that all directories blocks in dirblock[:idx]
+    have names < dirname, and all blocks in dirblock[idx:] have names >=
+    dirname.
+
+    Optional args lo (default 0) and hi (default len(dirblocks)) bound the
+    slice of a to be searched.
+    """
+    cdef int _lo
+    cdef int _hi
+    cdef int _mid
+    cdef char *dirname_cstr
+    cdef int dirname_size
+    cdef char *cur_cstr
+    cdef int cur_size
+    cdef void *cur
+
+    if not PyList_CheckExact(dirblocks):
+        raise TypeError("you must pass a python list for 'dirblocks' not: %s %r"
+                        % (type(dirblocks), dirblocks))
+    if not PyString_CheckExact(dirname):
+        raise TypeError("you must pass a string for dirname not: %s %r"
+                        % (type(dirname), dirname))
+    if hi is None:
+        _hi = len(dirblocks)
+    else:
+        _hi = hi
+
+    _lo = lo
+    dirname_cstr = PyString_AsString(dirname)
+    dirname_size = PyString_Size(dirname)
+
+    while _lo < _hi:
+        _mid = (_lo + _hi) / 2
+        # Grab the dirname for the current dirblock
+        # cur = dirblocks[_mid][0]
+        cur = PyTuple_GetItem_void_void(
+                PyList_GetItem_object_void(dirblocks, _mid), 0)
+        cur_cstr = PyString_AS_STRING_void(cur)
+        cur_size = PyString_GET_SIZE_void(cur)
+        if _cmp_by_dirs(cur_cstr, cur_size, dirname_cstr, dirname_size) < 0:
+            _lo = _mid + 1
+        else:
+            _hi = _mid
+    return _lo
+
+
+cdef class Reader:
+    """Maintain the current location, and return fields as you parse them."""
+
+    cdef object state # The DirState object
+    cdef object text # The overall string object
+    cdef char *text_cstr # Pointer to the beginning of text
+    cdef int text_size # Length of text
+
+    cdef char *end_cstr # End of text
+    cdef char *cur_cstr # Pointer to the current record
+    cdef char *next # Pointer to the end of this record
+
+    def __init__(self, text, state):
+        self.state = state
+        self.text = text
+        self.text_cstr = PyString_AsString(text)
+        self.text_size = PyString_Size(text)
+        self.end_cstr = self.text_cstr + self.text_size
+        self.cur_cstr = self.text_cstr
+
+    cdef char *get_next(self, int *size) except NULL:
+        """Return a pointer to the start of the next field."""
+        cdef char *next
+        cdef Py_ssize_t extra_len
+
+        if self.cur_cstr == NULL:
+            raise AssertionError('get_next() called when cur_str is NULL')
+        elif self.cur_cstr >= self.end_cstr:
+            raise AssertionError('get_next() called when there are no chars'
+                                 ' left')
+        next = self.cur_cstr
+        self.cur_cstr = <char*>memchr(next, c'\0', self.end_cstr - next)
+        if self.cur_cstr == NULL:
+            extra_len = self.end_cstr - next
+            raise errors.DirstateCorrupt(self.state,
+                'failed to find trailing NULL (\\0).'
+                ' Trailing garbage: %r'
+                % safe_string_from_size(next, extra_len))
+        size[0] = self.cur_cstr - next
+        self.cur_cstr = self.cur_cstr + 1
+        return next
+
+    cdef object get_next_str(self):
+        """Get the next field as a Python string."""
+        cdef int size
+        cdef char *next
+        next = self.get_next(&size)
+        return safe_string_from_size(next, size)
+
+    cdef int _init(self) except -1:
+        """Get the pointer ready.
+
+        This assumes that the dirstate header has already been read, and we
+        already have the dirblock string loaded into memory.
+        This just initializes our memory pointers, etc for parsing of the
+        dirblock string.
+        """
+        cdef char *first
+        cdef int size
+        # The first field should be an empty string left over from the Header
+        first = self.get_next(&size)
+        if first[0] != c'\0' and size == 0:
+            raise AssertionError('First character should be null not: %s'
+                                 % (first,))
+        return 0
+
+    cdef object _get_entry(self, int num_trees, void **p_current_dirname,
+                           int *new_block):
+        """Extract the next entry.
+
+        This parses the next entry based on the current location in
+        ``self.cur_cstr``.
+        Each entry can be considered a "row" in the total table. And each row
+        has a fixed number of columns. It is generally broken up into "key"
+        columns, then "current" columns, and then "parent" columns.
+
+        :param num_trees: How many parent trees need to be parsed
+        :param p_current_dirname: A pointer to the current PyString
+            representing the directory name.
+            We pass this in as a void * so that pyrex doesn't have to
+            increment/decrement the PyObject reference counter for each
+            _get_entry call.
+            We use a pointer so that _get_entry can update it with the new
+            value.
+        :param new_block: This is to let the caller know that it needs to
+            create a new directory block to store the next entry.
+        """
+        cdef StaticTuple path_name_file_id_key
+        cdef StaticTuple tmp
+        cdef char *entry_size_cstr
+        cdef unsigned long int entry_size
+        cdef char* executable_cstr
+        cdef int is_executable
+        cdef char* dirname_cstr
+        cdef char* trailing
+        cdef int cur_size
+        cdef int i
+        cdef object minikind
+        cdef object fingerprint
+        cdef object info
+
+        # Read the 'key' information (dirname, name, file_id)
+        dirname_cstr = self.get_next(&cur_size)
+        # Check to see if we have started a new directory block.
+        # If so, then we need to create a new dirname PyString, so that it can
+        # be used in all of the tuples. This saves time and memory, by re-using
+        # the same object repeatedly.
+
+        # Do the cheap 'length of string' check first. If the string is a
+        # different length, then we *have* to be a different directory.
+        if (cur_size != PyString_GET_SIZE_void(p_current_dirname[0])
+            or strncmp(dirname_cstr,
+                       # Extract the char* from our current dirname string.  We
+                       # know it is a PyString, so we can use
+                       # PyString_AS_STRING, we use the _void version because
+                       # we are tricking Pyrex by using a void* rather than an
+                       # <object>
+                       PyString_AS_STRING_void(p_current_dirname[0]),
+                       cur_size+1) != 0):
+            dirname = safe_string_from_size(dirname_cstr, cur_size)
+            p_current_dirname[0] = <void*>dirname
+            new_block[0] = 1
+        else:
+            new_block[0] = 0
+
+        # Build up the key that will be used.
+        # By using <object>(void *) Pyrex will automatically handle the
+        # Py_INCREF that we need.
+        cur_dirname = <object>p_current_dirname[0]
+        # Use StaticTuple_New to pre-allocate, rather than creating a regular
+        # tuple and passing it to the StaticTuple constructor.
+        # path_name_file_id_key = StaticTuple(<object>p_current_dirname[0],
+        #                          self.get_next_str(),
+        #                          self.get_next_str(),
+        #                         )
+        tmp = StaticTuple_New(3)
+        Py_INCREF(cur_dirname); StaticTuple_SET_ITEM(tmp, 0, cur_dirname)
+        cur_basename = self.get_next_str()
+        cur_file_id = self.get_next_str()
+        Py_INCREF(cur_basename); StaticTuple_SET_ITEM(tmp, 1, cur_basename)
+        Py_INCREF(cur_file_id); StaticTuple_SET_ITEM(tmp, 2, cur_file_id)
+        path_name_file_id_key = tmp
+
+        # Parse all of the per-tree information. current has the information in
+        # the same location as parent trees. The only difference is that 'info'
+        # is a 'packed_stat' for current, while it is a 'revision_id' for
+        # parent trees.
+        # minikind, fingerprint, and info will be returned as regular python
+        # strings
+        # entry_size and is_executable will be parsed into a python Long and
+        # python Boolean, respectively.
+        # TODO: jam 20070718 Consider changin the entry_size conversion to
+        #       prefer python Int when possible. They are generally faster to
+        #       work with, and it will be rare that we have a file >2GB.
+        #       Especially since this code is pretty much fixed at a max of
+        #       4GB.
+        trees = []
+        for i from 0 <= i < num_trees:
+            minikind = self.get_next_str()
+            fingerprint = self.get_next_str()
+            entry_size_cstr = self.get_next(&cur_size)
+            entry_size = strtoul(entry_size_cstr, NULL, 10)
+            executable_cstr = self.get_next(&cur_size)
+            is_executable = (executable_cstr[0] == c'y')
+            info = self.get_next_str()
+            # TODO: If we want to use StaticTuple_New here we need to be pretty
+            #       careful. We are relying on a bit of Pyrex
+            #       automatic-conversion from 'int' to PyInt, and that doesn't
+            #       play well with the StaticTuple_SET_ITEM macro.
+            #       Timing doesn't (yet) show a worthwile improvement in speed
+            #       versus complexity and maintainability.
+            # tmp = StaticTuple_New(5)
+            # Py_INCREF(minikind); StaticTuple_SET_ITEM(tmp, 0, minikind)
+            # Py_INCREF(fingerprint); StaticTuple_SET_ITEM(tmp, 1, fingerprint)
+            # Py_INCREF(entry_size); StaticTuple_SET_ITEM(tmp, 2, entry_size)
+            # Py_INCREF(is_executable); StaticTuple_SET_ITEM(tmp, 3, is_executable)
+            # Py_INCREF(info); StaticTuple_SET_ITEM(tmp, 4, info)
+            # PyList_Append(trees, tmp)
+            PyList_Append(trees, StaticTuple(
+                minikind,     # minikind
+                fingerprint,  # fingerprint
+                entry_size,   # size
+                is_executable,# executable
+                info,         # packed_stat or revision_id
+            ))
+
+        # The returned tuple is (key, [trees])
+        ret = (path_name_file_id_key, trees)
+        # Ignore the trailing newline, but assert that it does exist, this
+        # ensures that we always finish parsing a line on an end-of-entry
+        # marker.
+        trailing = self.get_next(&cur_size)
+        if cur_size != 1 or trailing[0] != c'\n':
+            raise errors.DirstateCorrupt(self.state,
+                'Bad parse, we expected to end on \\n, not: %d %s: %s'
+                % (cur_size, safe_string_from_size(trailing, cur_size),
+                   ret))
+        return ret
+
+    def _parse_dirblocks(self):
+        """Parse all dirblocks in the state file."""
+        cdef int num_trees
+        cdef object current_block
+        cdef object entry
+        cdef void * current_dirname
+        cdef int new_block
+        cdef int expected_entry_count
+        cdef int entry_count
+
+        num_trees = self.state._num_present_parents() + 1
+        expected_entry_count = self.state._num_entries
+
+        # Ignore the first record
+        self._init()
+
+        current_block = []
+        dirblocks = [('', current_block), ('', [])]
+        self.state._dirblocks = dirblocks
+        obj = ''
+        current_dirname = <void*>obj
+        new_block = 0
+        entry_count = 0
+
+        # TODO: jam 2007-05-07 Consider pre-allocating some space for the
+        #       members, and then growing and shrinking from there. If most
+        #       directories have close to 10 entries in them, it would save a
+        #       few mallocs if we default our list size to something
+        #       reasonable. Or we could malloc it to something large (100 or
+        #       so), and then truncate. That would give us a malloc + realloc,
+        #       rather than lots of reallocs.
+        while self.cur_cstr < self.end_cstr:
+            entry = self._get_entry(num_trees, &current_dirname, &new_block)
+            if new_block:
+                # new block - different dirname
+                current_block = []
+                PyList_Append(dirblocks,
+                              (<object>current_dirname, current_block))
+            PyList_Append(current_block, entry)
+            entry_count = entry_count + 1
+        if entry_count != expected_entry_count:
+            raise errors.DirstateCorrupt(self.state,
+                    'We read the wrong number of entries.'
+                    ' We expected to read %s, but read %s'
+                    % (expected_entry_count, entry_count))
+        self.state._split_root_dirblock_into_contents()
+
+
+def _read_dirblocks(state):
+    """Read in the dirblocks for the given DirState object.
+
+    This is tightly bound to the DirState internal representation. It should be
+    thought of as a member function, which is only separated out so that we can
+    re-write it in pyrex.
+
+    :param state: A DirState object.
+    :return: None
+    :postcondition: The dirblocks will be loaded into the appropriate fields in
+        the DirState object.
+    """
+    state._state_file.seek(state._end_of_header)
+    text = state._state_file.read()
+    # TODO: check the crc checksums. crc_measured = zlib.crc32(text)
+
+    reader = Reader(text, state)
+
+    reader._parse_dirblocks()
+    state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED
+
+
+cdef int minikind_from_mode(int mode): # cannot_raise
+    # in order of frequency:
+    if S_ISREG(mode):
+        return c"f"
+    if S_ISDIR(mode):
+        return c"d"
+    if S_ISLNK(mode):
+        return c"l"
+    return 0
+
+
+_encode = binascii.b2a_base64
+
+
+cdef _pack_stat(stat_value):
+    """return a string representing the stat value's key fields.
+
+    :param stat_value: A stat oject with st_size, st_mtime, st_ctime, st_dev,
+        st_ino and st_mode fields.
+    """
+    cdef char result[6*4] # 6 long ints
+    cdef int *aliased
+    aliased = <int *>result
+    aliased[0] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_size))
+    # mtime and ctime will often be floats but get converted to PyInt within
+    aliased[1] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_mtime))
+    aliased[2] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_ctime))
+    aliased[3] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_dev))
+    aliased[4] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_ino))
+    aliased[5] = htonl(PyInt_AsUnsignedLongMask(stat_value.st_mode))
+    packed = PyString_FromStringAndSize(result, 6*4)
+    return _encode(packed)[:-1]
+
+
+def pack_stat(stat_value):
+    """Convert stat value into a packed representation quickly with pyrex"""
+    return _pack_stat(stat_value)
+
+
+def update_entry(self, entry, abspath, stat_value):
+    """Update the entry based on what is actually on disk.
+
+    This function only calculates the sha if it needs to - if the entry is
+    uncachable, or clearly different to the first parent's entry, no sha
+    is calculated, and None is returned.
+
+    :param entry: This is the dirblock entry for the file in question.
+    :param abspath: The path on disk for this file.
+    :param stat_value: (optional) if we already have done a stat on the
+        file, re-use it.
+    :return: None, or The sha1 hexdigest of the file (40 bytes) or link
+        target of a symlink.
+    """
+    return _update_entry(self, entry, abspath, stat_value)
+
+
+cdef _update_entry(self, entry, abspath, stat_value):
+    """Update the entry based on what is actually on disk.
+
+    This function only calculates the sha if it needs to - if the entry is
+    uncachable, or clearly different to the first parent's entry, no sha
+    is calculated, and None is returned.
+
+    :param self: The dirstate object this is operating on.
+    :param entry: This is the dirblock entry for the file in question.
+    :param abspath: The path on disk for this file.
+    :param stat_value: The stat value done on the path.
+    :return: None, or The sha1 hexdigest of the file (40 bytes) or link
+        target of a symlink.
+    """
+    # TODO - require pyrex 0.9.8, then use a pyd file to define access to the
+    # _st mode of the compiled stat objects.
+    cdef int minikind, saved_minikind
+    cdef void * details
+    cdef int worth_saving
+    minikind = minikind_from_mode(stat_value.st_mode)
+    if 0 == minikind:
+        return None
+    packed_stat = _pack_stat(stat_value)
+    details = PyList_GetItem_void_void(PyTuple_GetItem_void_void(<void *>entry, 1), 0)
+    saved_minikind = PyString_AsString_obj(<PyObject *>PyTuple_GetItem_void_void(details, 0))[0]
+    if minikind == c'd' and saved_minikind == c't':
+        minikind = c't'
+    saved_link_or_sha1 = PyTuple_GetItem_void_object(details, 1)
+    saved_file_size = PyTuple_GetItem_void_object(details, 2)
+    saved_executable = PyTuple_GetItem_void_object(details, 3)
+    saved_packed_stat = PyTuple_GetItem_void_object(details, 4)
+    # Deal with pyrex decrefing the objects
+    Py_INCREF(saved_link_or_sha1)
+    Py_INCREF(saved_file_size)
+    Py_INCREF(saved_executable)
+    Py_INCREF(saved_packed_stat)
+    #(saved_minikind, saved_link_or_sha1, saved_file_size,
+    # saved_executable, saved_packed_stat) = entry[1][0]
+
+    if (minikind == saved_minikind
+        and packed_stat == saved_packed_stat):
+        # The stat hasn't changed since we saved, so we can re-use the
+        # saved sha hash.
+        if minikind == c'd':
+            return None
+
+        # size should also be in packed_stat
+        if saved_file_size == stat_value.st_size:
+            return saved_link_or_sha1
+
+    # If we have gotten this far, that means that we need to actually
+    # process this entry.
+    link_or_sha1 = None
+    worth_saving = 1
+    if minikind == c'f':
+        executable = self._is_executable(stat_value.st_mode,
+                                         saved_executable)
+        if self._cutoff_time is None:
+            self._sha_cutoff_time()
+        if (stat_value.st_mtime < self._cutoff_time
+            and stat_value.st_ctime < self._cutoff_time
+            and len(entry[1]) > 1
+            and entry[1][1][0] != 'a'):
+                # Could check for size changes for further optimised
+                # avoidance of sha1's. However the most prominent case of
+                # over-shaing is during initial add, which this catches.
+            link_or_sha1 = self._sha1_file(abspath)
+            entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
+                           executable, packed_stat)
+        else:
+            # This file is not worth caching the sha1. Either it is too new, or
+            # it is newly added. Regardless, the only things we are changing
+            # are derived from the stat, and so are not worth caching. So we do
+            # *not* set the IN_MEMORY_MODIFIED flag. (But we'll save the
+            # updated values if there is *other* data worth saving.)
+            entry[1][0] = ('f', '', stat_value.st_size, executable,
+                           DirState.NULLSTAT)
+            worth_saving = 0
+    elif minikind == c'd':
+        entry[1][0] = ('d', '', 0, False, packed_stat)
+        if saved_minikind != c'd':
+            # This changed from something into a directory. Make sure we
+            # have a directory block for it. This doesn't happen very
+            # often, so this doesn't have to be super fast.
+            block_index, entry_index, dir_present, file_present = \
+                self._get_block_entry_index(entry[0][0], entry[0][1], 0)
+            self._ensure_block(block_index, entry_index,
+                               pathjoin(entry[0][0], entry[0][1]))
+        else:
+            # Any changes are derived trivially from the stat object, not worth
+            # re-writing a dirstate for just this
+            worth_saving = 0
+    elif minikind == c'l':
+        if saved_minikind == c'l':
+            # If the object hasn't changed kind, it isn't worth saving the
+            # dirstate just for a symlink. The default is 'fast symlinks' which
+            # save the target in the inode entry, rather than separately. So to
+            # stat, we've already read everything off disk.
+            worth_saving = 0
+        link_or_sha1 = self._read_link(abspath, saved_link_or_sha1)
+        if self._cutoff_time is None:
+            self._sha_cutoff_time()
+        if (stat_value.st_mtime < self._cutoff_time
+            and stat_value.st_ctime < self._cutoff_time):
+            entry[1][0] = ('l', link_or_sha1, stat_value.st_size,
+                           False, packed_stat)
+        else:
+            entry[1][0] = ('l', '', stat_value.st_size,
+                           False, DirState.NULLSTAT)
+    if worth_saving:
+        # Note, even though _mark_modified will only set
+        # IN_MEMORY_HASH_MODIFIED, it still isn't worth 
+        self._mark_modified([entry])
+    return link_or_sha1
+
+
+# TODO: Do we want to worry about exceptions here?
+cdef char _minikind_from_string(object string) except? -1:
+    """Convert a python string to a char."""
+    return PyString_AsString(string)[0]
+
+
+cdef object _kind_absent
+cdef object _kind_file
+cdef object _kind_directory
+cdef object _kind_symlink
+cdef object _kind_relocated
+cdef object _kind_tree_reference
+_kind_absent = "absent"
+_kind_file = "file"
+_kind_directory = "directory"
+_kind_symlink = "symlink"
+_kind_relocated = "relocated"
+_kind_tree_reference = "tree-reference"
+
+
+cdef object _minikind_to_kind(char minikind):
+    """Create a string kind for minikind."""
+    cdef char _minikind[1]
+    if minikind == c'f':
+        return _kind_file
+    elif minikind == c'd':
+        return _kind_directory
+    elif minikind == c'a':
+        return _kind_absent
+    elif minikind == c'r':
+        return _kind_relocated
+    elif minikind == c'l':
+        return _kind_symlink
+    elif minikind == c't':
+        return _kind_tree_reference
+    _minikind[0] = minikind
+    raise KeyError(PyString_FromStringAndSize(_minikind, 1))
+
+
+cdef int _versioned_minikind(char minikind): # cannot_raise
+    """Return non-zero if minikind is in fltd"""
+    return (minikind == c'f' or
+            minikind == c'd' or
+            minikind == c'l' or
+            minikind == c't')
+
+
+cdef class ProcessEntryC:
+
+    cdef int doing_consistency_expansion
+    cdef object old_dirname_to_file_id # dict
+    cdef object new_dirname_to_file_id # dict
+    cdef object last_source_parent
+    cdef object last_target_parent
+    cdef int include_unchanged
+    cdef int partial
+    cdef object use_filesystem_for_exec
+    cdef object utf8_decode
+    cdef readonly object searched_specific_files
+    cdef readonly object searched_exact_paths
+    cdef object search_specific_files
+    # The parents up to the root of the paths we are searching.
+    # After all normal paths are returned, these specific items are returned.
+    cdef object search_specific_file_parents
+    cdef object state
+    # Current iteration variables:
+    cdef object current_root
+    cdef object current_root_unicode
+    cdef object root_entries
+    cdef int root_entries_pos, root_entries_len
+    cdef object root_abspath
+    cdef int source_index, target_index
+    cdef int want_unversioned
+    cdef object tree
+    cdef object dir_iterator
+    cdef int block_index
+    cdef object current_block
+    cdef int current_block_pos
+    cdef object current_block_list
+    cdef object current_dir_info
+    cdef object current_dir_list
+    cdef object _pending_consistent_entries # list
+    cdef int path_index
+    cdef object root_dir_info
+    cdef object bisect_left
+    cdef object pathjoin
+    cdef object fstat
+    # A set of the ids we've output when doing partial output.
+    cdef object seen_ids
+    cdef object sha_file
+
+    def __init__(self, include_unchanged, use_filesystem_for_exec,
+        search_specific_files, state, source_index, target_index,
+        want_unversioned, tree):
+        self.doing_consistency_expansion = 0
+        self.old_dirname_to_file_id = {}
+        self.new_dirname_to_file_id = {}
+        # Are we doing a partial iter_changes?
+        self.partial = set(['']).__ne__(search_specific_files)
+        # Using a list so that we can access the values and change them in
+        # nested scope. Each one is [path, file_id, entry]
+        self.last_source_parent = [None, None]
+        self.last_target_parent = [None, None]
+        if include_unchanged is None:
+            self.include_unchanged = False
+        else:
+            self.include_unchanged = int(include_unchanged)
+        self.use_filesystem_for_exec = use_filesystem_for_exec
+        self.utf8_decode = cache_utf8._utf8_decode
+        # for all search_indexs in each path at or under each element of
+        # search_specific_files, if the detail is relocated: add the id, and
+        # add the relocated path as one to search if its not searched already.
+        # If the detail is not relocated, add the id.
+        self.searched_specific_files = set()
+        # When we search exact paths without expanding downwards, we record
+        # that here.
+        self.searched_exact_paths = set()
+        self.search_specific_files = search_specific_files
+        # The parents up to the root of the paths we are searching.
+        # After all normal paths are returned, these specific items are returned.
+        self.search_specific_file_parents = set()
+        # The ids we've sent out in the delta.
+        self.seen_ids = set()
+        self.state = state
+        self.current_root = None
+        self.current_root_unicode = None
+        self.root_entries = None
+        self.root_entries_pos = 0
+        self.root_entries_len = 0
+        self.root_abspath = None
+        if source_index is None:
+            self.source_index = -1
+        else:
+            self.source_index = source_index
+        self.target_index = target_index
+        self.want_unversioned = want_unversioned
+        self.tree = tree
+        self.dir_iterator = None
+        self.block_index = -1
+        self.current_block = None
+        self.current_block_list = None
+        self.current_block_pos = -1
+        self.current_dir_info = None
+        self.current_dir_list = None
+        self._pending_consistent_entries = []
+        self.path_index = 0
+        self.root_dir_info = None
+        self.bisect_left = bisect.bisect_left
+        self.pathjoin = osutils.pathjoin
+        self.fstat = os.fstat
+        self.sha_file = osutils.sha_file
+        if target_index != 0:
+            # A lot of code in here depends on target_index == 0
+            raise errors.BzrError('unsupported target index')
+
+    cdef _process_entry(self, entry, path_info):
+        """Compare an entry and real disk to generate delta information.
+
+        :param path_info: top_relpath, basename, kind, lstat, abspath for
+            the path of entry. If None, then the path is considered absent in 
+            the target (Perhaps we should pass in a concrete entry for this ?)
+            Basename is returned as a utf8 string because we expect this
+            tuple will be ignored, and don't want to take the time to
+            decode.
+        :return: (iter_changes_result, changed). If the entry has not been
+            handled then changed is None. Otherwise it is False if no content
+            or metadata changes have occured, and True if any content or
+            metadata change has occurred. If self.include_unchanged is True then
+            if changed is not None, iter_changes_result will always be a result
+            tuple. Otherwise, iter_changes_result is None unless changed is
+            True.
+        """
+        cdef char target_minikind
+        cdef char source_minikind
+        cdef object file_id
+        cdef int content_change
+        cdef object details_list
+        file_id = None
+        details_list = entry[1]
+        if -1 == self.source_index:
+            source_details = DirState.NULL_PARENT_DETAILS
+        else:
+            source_details = details_list[self.source_index]
+        target_details = details_list[self.target_index]
+        target_minikind = _minikind_from_string(target_details[0])
+        if path_info is not None and _versioned_minikind(target_minikind):
+            if self.target_index != 0:
+                raise AssertionError("Unsupported target index %d" %
+                                     self.target_index)
+            link_or_sha1 = _update_entry(self.state, entry, path_info[4], path_info[3])
+            # The entry may have been modified by update_entry
+            target_details = details_list[self.target_index]
+            target_minikind = _minikind_from_string(target_details[0])
+        else:
+            link_or_sha1 = None
+        # the rest of this function is 0.3 seconds on 50K paths, or
+        # 0.000006 seconds per call.
+        source_minikind = _minikind_from_string(source_details[0])
+        if ((_versioned_minikind(source_minikind) or source_minikind == c'r')
+            and _versioned_minikind(target_minikind)):
+            # claimed content in both: diff
+            #   r    | fdlt   |      | add source to search, add id path move and perform
+            #        |        |      | diff check on source-target
+            #   r    | fdlt   |  a   | dangling file that was present in the basis.
+            #        |        |      | ???
+            if source_minikind != c'r':
+                old_dirname = entry[0][0]
+                old_basename = entry[0][1]
+                old_path = path = None
+            else:
+                # add the source to the search path to find any children it
+                # has.  TODO ? : only add if it is a container ?
+                if (not self.doing_consistency_expansion and 
+                    not osutils.is_inside_any(self.searched_specific_files,
+                                             source_details[1])):
+                    self.search_specific_files.add(source_details[1])
+                    # expanding from a user requested path, parent expansion
+                    # for delta consistency happens later.
+                # generate the old path; this is needed for stating later
+                # as well.
+                old_path = source_details[1]
+                old_dirname, old_basename = os.path.split(old_path)
+                path = self.pathjoin(entry[0][0], entry[0][1])
+                old_entry = self.state._get_entry(self.source_index,
+                                             path_utf8=old_path)
+                # update the source details variable to be the real
+                # location.
+                if old_entry == (None, None):
+                    raise errors.CorruptDirstate(self.state._filename,
+                        "entry '%s/%s' is considered renamed from %r"
+                        " but source does not exist\n"
+                        "entry: %s" % (entry[0][0], entry[0][1], old_path, entry))
+                source_details = old_entry[1][self.source_index]
+                source_minikind = _minikind_from_string(source_details[0])
+            if path_info is None:
+                # the file is missing on disk, show as removed.
+                content_change = 1
+                target_kind = None
+                target_exec = False
+            else:
+                # source and target are both versioned and disk file is present.
+                target_kind = path_info[2]
+                if target_kind == 'directory':
+                    if path is None:
+                        old_path = path = self.pathjoin(old_dirname, old_basename)
+                    file_id = entry[0][2]
+                    self.new_dirname_to_file_id[path] = file_id
+                    if source_minikind != c'd':
+                        content_change = 1
+                    else:
+                        # directories have no fingerprint
+                        content_change = 0
+                    target_exec = False
+                elif target_kind == 'file':
+                    if source_minikind != c'f':
+                        content_change = 1
+                    else:
+                        # Check the sha. We can't just rely on the size as
+                        # content filtering may mean differ sizes actually
+                        # map to the same content
+                        if link_or_sha1 is None:
+                            # Stat cache miss:
+                            statvalue, link_or_sha1 = \
+                                self.state._sha1_provider.stat_and_sha1(
+                                path_info[4])
+                            self.state._observed_sha1(entry, link_or_sha1,
+                                statvalue)
+                        content_change = (link_or_sha1 != source_details[1])
+                    # Target details is updated at update_entry time
+                    if self.use_filesystem_for_exec:
+                        # We don't need S_ISREG here, because we are sure
+                        # we are dealing with a file.
+                        target_exec = bool(S_IXUSR & path_info[3].st_mode)
+                    else:
+                        target_exec = target_details[3]
+                elif target_kind == 'symlink':
+                    if source_minikind != c'l':
+                        content_change = 1
+                    else:
+                        content_change = (link_or_sha1 != source_details[1])
+                    target_exec = False
+                elif target_kind == 'tree-reference':
+                    if source_minikind != c't':
+                        content_change = 1
+                    else:
+                        content_change = 0
+                    target_exec = False
+                else:
+                    if path is None:
+                        path = self.pathjoin(old_dirname, old_basename)
+                    raise errors.BadFileKindError(path, path_info[2])
+            if source_minikind == c'd':
+                if path is None:
+                    old_path = path = self.pathjoin(old_dirname, old_basename)
+                if file_id is None:
+                    file_id = entry[0][2]
+                self.old_dirname_to_file_id[old_path] = file_id
+            # parent id is the entry for the path in the target tree
+            if old_basename and old_dirname == self.last_source_parent[0]:
+                # use a cached hit for non-root source entries.
+                source_parent_id = self.last_source_parent[1]
+            else:
+                try:
+                    source_parent_id = self.old_dirname_to_file_id[old_dirname]
+                except KeyError, _:
+                    source_parent_entry = self.state._get_entry(self.source_index,
+                                                           path_utf8=old_dirname)
+                    source_parent_id = source_parent_entry[0][2]
+                if source_parent_id == entry[0][2]:
+                    # This is the root, so the parent is None
+                    source_parent_id = None
+                else:
+                    self.last_source_parent[0] = old_dirname
+                    self.last_source_parent[1] = source_parent_id
+            new_dirname = entry[0][0]
+            if entry[0][1] and new_dirname == self.last_target_parent[0]:
+                # use a cached hit for non-root target entries.
+                target_parent_id = self.last_target_parent[1]
+            else:
+                try:
+                    target_parent_id = self.new_dirname_to_file_id[new_dirname]
+                except KeyError, _:
+                    # TODO: We don't always need to do the lookup, because the
+                    #       parent entry will be the same as the source entry.
+                    target_parent_entry = self.state._get_entry(self.target_index,
+                                                           path_utf8=new_dirname)
+                    if target_parent_entry == (None, None):
+                        raise AssertionError(
+                            "Could not find target parent in wt: %s\nparent of: %s"
+                            % (new_dirname, entry))
+                    target_parent_id = target_parent_entry[0][2]
+                if target_parent_id == entry[0][2]:
+                    # This is the root, so the parent is None
+                    target_parent_id = None
+                else:
+                    self.last_target_parent[0] = new_dirname
+                    self.last_target_parent[1] = target_parent_id
+
+            source_exec = source_details[3]
+            changed = (content_change
+                or source_parent_id != target_parent_id
+                or old_basename != entry[0][1]
+                or source_exec != target_exec
+                )
+            if not changed and not self.include_unchanged:
+                return None, False
+            else:
+                if old_path is None:
+                    path = self.pathjoin(old_dirname, old_basename)
+                    old_path = path
+                    old_path_u = self.utf8_decode(old_path)[0]
+                    path_u = old_path_u
+                else:
+                    old_path_u = self.utf8_decode(old_path)[0]
+                    if old_path == path:
+                        path_u = old_path_u
+                    else:
+                        path_u = self.utf8_decode(path)[0]
+                source_kind = _minikind_to_kind(source_minikind)
+                return (entry[0][2],
+                       (old_path_u, path_u),
+                       content_change,
+                       (True, True),
+                       (source_parent_id, target_parent_id),
+                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
+                       (source_kind, target_kind),
+                       (source_exec, target_exec)), changed
+        elif source_minikind == c'a' and _versioned_minikind(target_minikind):
+            # looks like a new file
+            path = self.pathjoin(entry[0][0], entry[0][1])
+            # parent id is the entry for the path in the target tree
+            # TODO: these are the same for an entire directory: cache em.
+            parent_entry = self.state._get_entry(self.target_index,
+                                                 path_utf8=entry[0][0])
+            if parent_entry is None:
+                raise errors.DirstateCorrupt(self.state,
+                    "We could not find the parent entry in index %d"
+                    " for the entry: %s"
+                    % (self.target_index, entry[0]))
+            parent_id = parent_entry[0][2]
+            if parent_id == entry[0][2]:
+                parent_id = None
+            if path_info is not None:
+                # Present on disk:
+                if self.use_filesystem_for_exec:
+                    # We need S_ISREG here, because we aren't sure if this
+                    # is a file or not.
+                    target_exec = bool(
+                        S_ISREG(path_info[3].st_mode)
+                        and S_IXUSR & path_info[3].st_mode)
+                else:
+                    target_exec = target_details[3]
+                return (entry[0][2],
+                       (None, self.utf8_decode(path)[0]),
+                       True,
+                       (False, True),
+                       (None, parent_id),
+                       (None, self.utf8_decode(entry[0][1])[0]),
+                       (None, path_info[2]),
+                       (None, target_exec)), True
+            else:
+                # Its a missing file, report it as such.
+                return (entry[0][2],
+                       (None, self.utf8_decode(path)[0]),
+                       False,
+                       (False, True),
+                       (None, parent_id),
+                       (None, self.utf8_decode(entry[0][1])[0]),
+                       (None, None),
+                       (None, False)), True
+        elif _versioned_minikind(source_minikind) and target_minikind == c'a':
+            # unversioned, possibly, or possibly not deleted: we dont care.
+            # if its still on disk, *and* theres no other entry at this
+            # path [we dont know this in this routine at the moment -
+            # perhaps we should change this - then it would be an unknown.
+            old_path = self.pathjoin(entry[0][0], entry[0][1])
+            # parent id is the entry for the path in the target tree
+            parent_id = self.state._get_entry(self.source_index, path_utf8=entry[0][0])[0][2]
+            if parent_id == entry[0][2]:
+                parent_id = None
+            return (entry[0][2],
+                   (self.utf8_decode(old_path)[0], None),
+                   True,
+                   (True, False),
+                   (parent_id, None),
+                   (self.utf8_decode(entry[0][1])[0], None),
+                   (_minikind_to_kind(source_minikind), None),
+                   (source_details[3], None)), True
+        elif _versioned_minikind(source_minikind) and target_minikind == c'r':
+            # a rename; could be a true rename, or a rename inherited from
+            # a renamed parent. TODO: handle this efficiently. Its not
+            # common case to rename dirs though, so a correct but slow
+            # implementation will do.
+            if (not self.doing_consistency_expansion and 
+                not osutils.is_inside_any(self.searched_specific_files,
+                    target_details[1])):
+                self.search_specific_files.add(target_details[1])
+                # We don't expand the specific files parents list here as
+                # the path is absent in target and won't create a delta with
+                # missing parent.
+        elif ((source_minikind == c'r' or source_minikind == c'a') and
+              (target_minikind == c'r' or target_minikind == c'a')):
+            # neither of the selected trees contain this path,
+            # so skip over it. This is not currently directly tested, but
+            # is indirectly via test_too_much.TestCommands.test_conflicts.
+            pass
+        else:
+            raise AssertionError("don't know how to compare "
+                "source_minikind=%r, target_minikind=%r"
+                % (source_minikind, target_minikind))
+            ## import pdb;pdb.set_trace()
+        return None, None
+
+    def __iter__(self):
+        return self
+
+    def iter_changes(self):
+        return self
+
+    cdef int _gather_result_for_consistency(self, result) except -1:
+        """Check a result we will yield to make sure we are consistent later.
+        
+        This gathers result's parents into a set to output later.
+
+        :param result: A result tuple.
+        """
+        if not self.partial or not result[0]:
+            return 0
+        self.seen_ids.add(result[0])
+        new_path = result[1][1]
+        if new_path:
+            # Not the root and not a delete: queue up the parents of the path.
+            self.search_specific_file_parents.update(
+                osutils.parent_directories(new_path.encode('utf8')))
+            # Add the root directory which parent_directories does not
+            # provide.
+            self.search_specific_file_parents.add('')
+        return 0
+
+    cdef int _update_current_block(self) except -1:
+        if (self.block_index < len(self.state._dirblocks) and
+            osutils.is_inside(self.current_root, self.state._dirblocks[self.block_index][0])):
+            self.current_block = self.state._dirblocks[self.block_index]
+            self.current_block_list = self.current_block[1]
+            self.current_block_pos = 0
+        else:
+            self.current_block = None
+            self.current_block_list = None
+        return 0
+
+    def __next__(self):
+        # Simple thunk to allow tail recursion without pyrex confusion
+        return self._iter_next()
+
+    cdef _iter_next(self):
+        """Iterate over the changes."""
+        # This function single steps through an iterator. As such while loops
+        # are often exited by 'return' - the code is structured so that the
+        # next call into the function will return to the same while loop. Note
+        # that all flow control needed to re-reach that step is reexecuted,
+        # which can be a performance problem. It has not yet been tuned to
+        # minimise this; a state machine is probably the simplest restructuring
+        # to both minimise this overhead and make the code considerably more
+        # understandable.
+
+        # sketch: 
+        # compare source_index and target_index at or under each element of search_specific_files.
+        # follow the following comparison table. Note that we only want to do diff operations when
+        # the target is fdl because thats when the walkdirs logic will have exposed the pathinfo 
+        # for the target.
+        # cases:
+        # 
+        # Source | Target | disk | action
+        #   r    | fdlt   |      | add source to search, add id path move and perform
+        #        |        |      | diff check on source-target
+        #   r    | fdlt   |  a   | dangling file that was present in the basis. 
+        #        |        |      | ???
+        #   r    |  a     |      | add source to search
+        #   r    |  a     |  a   | 
+        #   r    |  r     |      | this path is present in a non-examined tree, skip.
+        #   r    |  r     |  a   | this path is present in a non-examined tree, skip.
+        #   a    | fdlt   |      | add new id
+        #   a    | fdlt   |  a   | dangling locally added file, skip
+        #   a    |  a     |      | not present in either tree, skip
+        #   a    |  a     |  a   | not present in any tree, skip
+        #   a    |  r     |      | not present in either tree at this path, skip as it
+        #        |        |      | may not be selected by the users list of paths.
+        #   a    |  r     |  a   | not present in either tree at this path, skip as it
+        #        |        |      | may not be selected by the users list of paths.
+        #  fdlt  | fdlt   |      | content in both: diff them
+        #  fdlt  | fdlt   |  a   | deleted locally, but not unversioned - show as deleted ?
+        #  fdlt  |  a     |      | unversioned: output deleted id for now
+        #  fdlt  |  a     |  a   | unversioned and deleted: output deleted id
+        #  fdlt  |  r     |      | relocated in this tree, so add target to search.
+        #        |        |      | Dont diff, we will see an r,fd; pair when we reach
+        #        |        |      | this id at the other path.
+        #  fdlt  |  r     |  a   | relocated in this tree, so add target to search.
+        #        |        |      | Dont diff, we will see an r,fd; pair when we reach
+        #        |        |      | this id at the other path.
+
+        # TODO: jam 20070516 - Avoid the _get_entry lookup overhead by
+        #       keeping a cache of directories that we have seen.
+        cdef object current_dirname, current_blockname
+        cdef char * current_dirname_c, * current_blockname_c
+        cdef int advance_entry, advance_path
+        cdef int path_handled
+        searched_specific_files = self.searched_specific_files
+        # Are we walking a root?
+        while self.root_entries_pos < self.root_entries_len:
+            entry = self.root_entries[self.root_entries_pos]
+            self.root_entries_pos = self.root_entries_pos + 1
+            result, changed = self._process_entry(entry, self.root_dir_info)
+            if changed is not None:
+                if changed:
+                    self._gather_result_for_consistency(result)
+                if changed or self.include_unchanged:
+                    return result
+        # Have we finished the prior root, or never started one ?
+        if self.current_root is None:
+            # TODO: the pending list should be lexically sorted?  the
+            # interface doesn't require it.
+            try:
+                self.current_root = self.search_specific_files.pop()
+            except KeyError, _:
+                raise StopIteration()
+            self.searched_specific_files.add(self.current_root)
+            # process the entries for this containing directory: the rest will be
+            # found by their parents recursively.
+            self.root_entries = self.state._entries_for_path(self.current_root)
+            self.root_entries_len = len(self.root_entries)
+            self.current_root_unicode = self.current_root.decode('utf8')
+            self.root_abspath = self.tree.abspath(self.current_root_unicode)
+            try:
+                root_stat = os.lstat(self.root_abspath)
+            except OSError, e:
+                if e.errno == errno.ENOENT:
+                    # the path does not exist: let _process_entry know that.
+                    self.root_dir_info = None
+                else:
+                    # some other random error: hand it up.
+                    raise
+            else:
+                self.root_dir_info = ('', self.current_root,
+                    osutils.file_kind_from_stat_mode(root_stat.st_mode), root_stat,
+                    self.root_abspath)
+                if self.root_dir_info[2] == 'directory':
+                    if self.tree._directory_is_tree_reference(
+                        self.current_root_unicode):
+                        self.root_dir_info = self.root_dir_info[:2] + \
+                            ('tree-reference',) + self.root_dir_info[3:]
+            if not self.root_entries and not self.root_dir_info:
+                # this specified path is not present at all, skip it.
+                # (tail recursion, can do a loop once the full structure is
+                # known).
+                return self._iter_next()
+            path_handled = 0
+            self.root_entries_pos = 0
+            # XXX Clarity: This loop is duplicated a out the self.current_root
+            # is None guard above: if we return from it, it completes there
+            # (and the following if block cannot trigger because
+            # path_handled must be true, so the if block is not # duplicated.
+            while self.root_entries_pos < self.root_entries_len:
+                entry = self.root_entries[self.root_entries_pos]
+                self.root_entries_pos = self.root_entries_pos + 1
+                result, changed = self._process_entry(entry, self.root_dir_info)
+                if changed is not None:
+                    path_handled = -1
+                    if changed:
+                        self._gather_result_for_consistency(result)
+                    if changed or self.include_unchanged:
+                        return result
+            # handle unversioned specified paths:
+            if self.want_unversioned and not path_handled and self.root_dir_info:
+                new_executable = bool(
+                    stat.S_ISREG(self.root_dir_info[3].st_mode)
+                    and stat.S_IEXEC & self.root_dir_info[3].st_mode)
+                return (None,
+                       (None, self.current_root_unicode),
+                       True,
+                       (False, False),
+                       (None, None),
+                       (None, splitpath(self.current_root_unicode)[-1]),
+                       (None, self.root_dir_info[2]),
+                       (None, new_executable)
+                      )
+            # If we reach here, the outer flow continues, which enters into the
+            # per-root setup logic.
+        if (self.current_dir_info is None and self.current_block is None and not
+            self.doing_consistency_expansion):
+            # setup iteration of this root:
+            self.current_dir_list = None
+            if self.root_dir_info and self.root_dir_info[2] == 'tree-reference':
+                self.current_dir_info = None
+            else:
+                self.dir_iterator = osutils._walkdirs_utf8(self.root_abspath,
+                    prefix=self.current_root)
+                self.path_index = 0
+                try:
+                    self.current_dir_info = self.dir_iterator.next()
+                    self.current_dir_list = self.current_dir_info[1]
+                except OSError, e:
+                    # there may be directories in the inventory even though
+                    # this path is not a file on disk: so mark it as end of
+                    # iterator
+                    if e.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
+                        self.current_dir_info = None
+                    elif sys.platform == 'win32':
+                        # on win32, python2.4 has e.errno == ERROR_DIRECTORY, but
+                        # python 2.5 has e.errno == EINVAL,
+                        #            and e.winerror == ERROR_DIRECTORY
+                        try:
+                            e_winerror = e.winerror
+                        except AttributeError, _:
+                            e_winerror = None
+                        win_errors = (ERROR_DIRECTORY, ERROR_PATH_NOT_FOUND)
+                        if (e.errno in win_errors or e_winerror in win_errors):
+                            self.current_dir_info = None
+                        else:
+                            # Will this really raise the right exception ?
+                            raise
+                    else:
+                        raise
+                else:
+                    if self.current_dir_info[0][0] == '':
+                        # remove .bzr from iteration
+                        bzr_index = self.bisect_left(self.current_dir_list, ('.bzr',))
+                        if self.current_dir_list[bzr_index][0] != '.bzr':
+                            raise AssertionError()
+                        del self.current_dir_list[bzr_index]
+            initial_key = (self.current_root, '', '')
+            self.block_index, _ = self.state._find_block_index_from_key(initial_key)
+            if self.block_index == 0:
+                # we have processed the total root already, but because the
+                # initial key matched it we should skip it here.
+                self.block_index = self.block_index + 1
+            self._update_current_block()
+        # walk until both the directory listing and the versioned metadata
+        # are exhausted. 
+        while (self.current_dir_info is not None
+            or self.current_block is not None):
+            # Uncommon case - a missing directory or an unversioned directory:
+            if (self.current_dir_info and self.current_block
+                and self.current_dir_info[0][0] != self.current_block[0]):
+                # Work around pyrex broken heuristic - current_dirname has
+                # the same scope as current_dirname_c
+                current_dirname = self.current_dir_info[0][0]
+                current_dirname_c = PyString_AS_STRING_void(
+                    <void *>current_dirname)
+                current_blockname = self.current_block[0]
+                current_blockname_c = PyString_AS_STRING_void(
+                    <void *>current_blockname)
+                # In the python generator we evaluate this if block once per
+                # dir+block; because we reenter in the pyrex version its being
+                # evaluated once per path: we could cache the result before
+                # doing the while loop and probably save time.
+                if _cmp_by_dirs(current_dirname_c,
+                    PyString_Size(current_dirname),
+                    current_blockname_c,
+                    PyString_Size(current_blockname)) < 0:
+                    # filesystem data refers to paths not covered by the
+                    # dirblock.  this has two possibilities:
+                    # A) it is versioned but empty, so there is no block for it
+                    # B) it is not versioned.
+
+                    # if (A) then we need to recurse into it to check for
+                    # new unknown files or directories.
+                    # if (B) then we should ignore it, because we don't
+                    # recurse into unknown directories.
+                    # We are doing a loop
+                    while self.path_index < len(self.current_dir_list):
+                        current_path_info = self.current_dir_list[self.path_index]
+                        # dont descend into this unversioned path if it is
+                        # a dir
+                        if current_path_info[2] in ('directory',
+                                                    'tree-reference'):
+                            del self.current_dir_list[self.path_index]
+                            self.path_index = self.path_index - 1
+                        self.path_index = self.path_index + 1
+                        if self.want_unversioned:
+                            if current_path_info[2] == 'directory':
+                                if self.tree._directory_is_tree_reference(
+                                    self.utf8_decode(current_path_info[0])[0]):
+                                    current_path_info = current_path_info[:2] + \
+                                        ('tree-reference',) + current_path_info[3:]
+                            new_executable = bool(
+                                stat.S_ISREG(current_path_info[3].st_mode)
+                                and stat.S_IEXEC & current_path_info[3].st_mode)
+                            return (None,
+                                (None, self.utf8_decode(current_path_info[0])[0]),
+                                True,
+                                (False, False),
+                                (None, None),
+                                (None, self.utf8_decode(current_path_info[1])[0]),
+                                (None, current_path_info[2]),
+                                (None, new_executable))
+                    # This dir info has been handled, go to the next
+                    self.path_index = 0
+                    self.current_dir_list = None
+                    try:
+                        self.current_dir_info = self.dir_iterator.next()
+                        self.current_dir_list = self.current_dir_info[1]
+                    except StopIteration, _:
+                        self.current_dir_info = None
+                else: #(dircmp > 0)
+                    # We have a dirblock entry for this location, but there
+                    # is no filesystem path for this. This is most likely
+                    # because a directory was removed from the disk.
+                    # We don't have to report the missing directory,
+                    # because that should have already been handled, but we
+                    # need to handle all of the files that are contained
+                    # within.
+                    while self.current_block_pos < len(self.current_block_list):
+                        current_entry = self.current_block_list[self.current_block_pos]
+                        self.current_block_pos = self.current_block_pos + 1
+                        # entry referring to file not present on disk.
+                        # advance the entry only, after processing.
+                        result, changed = self._process_entry(current_entry, None)
+                        if changed is not None:
+                            if changed:
+                                self._gather_result_for_consistency(result)
+                            if changed or self.include_unchanged:
+                                return result
+                    self.block_index = self.block_index + 1
+                    self._update_current_block()
+                continue # next loop-on-block/dir
+            result = self._loop_one_block()
+            if result is not None:
+                return result
+        if len(self.search_specific_files):
+            # More supplied paths to process
+            self.current_root = None
+            return self._iter_next()
+        # Start expanding more conservatively, adding paths the user may not
+        # have intended but required for consistent deltas.
+        self.doing_consistency_expansion = 1
+        if not self._pending_consistent_entries:
+            self._pending_consistent_entries = self._next_consistent_entries()
+        while self._pending_consistent_entries:
+            result, changed = self._pending_consistent_entries.pop()
+            if changed is not None:
+                return result
+        raise StopIteration()
+
+    cdef object _maybe_tree_ref(self, current_path_info):
+        if self.tree._directory_is_tree_reference(
+            self.utf8_decode(current_path_info[0])[0]):
+            return current_path_info[:2] + \
+                ('tree-reference',) + current_path_info[3:]
+        else:
+            return current_path_info
+
+    cdef object _loop_one_block(self):
+            # current_dir_info and current_block refer to the same directory -
+            # this is the common case code.
+            # Assign local variables for current path and entry:
+            cdef object current_entry
+            cdef object current_path_info
+            cdef int path_handled
+            cdef char minikind
+            cdef int cmp_result
+            # cdef char * temp_str
+            # cdef Py_ssize_t temp_str_length
+            # PyString_AsStringAndSize(disk_kind, &temp_str, &temp_str_length)
+            # if not strncmp(temp_str, "directory", temp_str_length):
+            if (self.current_block is not None and
+                self.current_block_pos < PyList_GET_SIZE(self.current_block_list)):
+                current_entry = PyList_GET_ITEM(self.current_block_list,
+                    self.current_block_pos)
+                # accomodate pyrex
+                Py_INCREF(current_entry)
+            else:
+                current_entry = None
+            if (self.current_dir_info is not None and
+                self.path_index < PyList_GET_SIZE(self.current_dir_list)):
+                current_path_info = PyList_GET_ITEM(self.current_dir_list,
+                    self.path_index)
+                # accomodate pyrex
+                Py_INCREF(current_path_info)
+                disk_kind = PyTuple_GET_ITEM(current_path_info, 2)
+                # accomodate pyrex
+                Py_INCREF(disk_kind)
+                if disk_kind == "directory":
+                    current_path_info = self._maybe_tree_ref(current_path_info)
+            else:
+                current_path_info = None
+            while (current_entry is not None or current_path_info is not None):
+                advance_entry = -1
+                advance_path = -1
+                result = None
+                changed = None
+                path_handled = 0
+                if current_entry is None:
+                    # unversioned -  the check for path_handled when the path
+                    # is advanced will yield this path if needed.
+                    pass
+                elif current_path_info is None:
+                    # no path is fine: the per entry code will handle it.
+                    result, changed = self._process_entry(current_entry,
+                        current_path_info)
+                else:
+                    minikind = _minikind_from_string(
+                        current_entry[1][self.target_index][0])
+                    cmp_result = cmp(current_path_info[1], current_entry[0][1])
+                    if (cmp_result or minikind == c'a' or minikind == c'r'):
+                        # The current path on disk doesn't match the dirblock
+                        # record. Either the dirblock record is marked as
+                        # absent/renamed, or the file on disk is not present at all
+                        # in the dirblock. Either way, report about the dirblock
+                        # entry, and let other code handle the filesystem one.
+
+                        # Compare the basename for these files to determine
+                        # which comes first
+                        if cmp_result < 0:
+                            # extra file on disk: pass for now, but only
+                            # increment the path, not the entry
+                            advance_entry = 0
+                        else:
+                            # entry referring to file not present on disk.
+                            # advance the entry only, after processing.
+                            result, changed = self._process_entry(current_entry,
+                                None)
+                            advance_path = 0
+                    else:
+                        # paths are the same,and the dirstate entry is not
+                        # absent or renamed.
+                        result, changed = self._process_entry(current_entry,
+                            current_path_info)
+                        if changed is not None:
+                            path_handled = -1
+                            if not changed and not self.include_unchanged:
+                                changed = None
+                # >- loop control starts here:
+                # >- entry
+                if advance_entry and current_entry is not None:
+                    self.current_block_pos = self.current_block_pos + 1
+                    if self.current_block_pos < PyList_GET_SIZE(self.current_block_list):
+                        current_entry = self.current_block_list[self.current_block_pos]
+                    else:
+                        current_entry = None
+                # >- path
+                if advance_path and current_path_info is not None:
+                    if not path_handled:
+                        # unversioned in all regards
+                        if self.want_unversioned:
+                            new_executable = bool(
+                                stat.S_ISREG(current_path_info[3].st_mode)
+                                and stat.S_IEXEC & current_path_info[3].st_mode)
+                            try:
+                                relpath_unicode = self.utf8_decode(current_path_info[0])[0]
+                            except UnicodeDecodeError, _:
+                                raise errors.BadFilenameEncoding(
+                                    current_path_info[0], osutils._fs_enc)
+                            if changed is not None:
+                                raise AssertionError(
+                                    "result is not None: %r" % result)
+                            result = (None,
+                                (None, relpath_unicode),
+                                True,
+                                (False, False),
+                                (None, None),
+                                (None, self.utf8_decode(current_path_info[1])[0]),
+                                (None, current_path_info[2]),
+                                (None, new_executable))
+                            changed = True
+                        # dont descend into this unversioned path if it is
+                        # a dir
+                        if current_path_info[2] in ('directory'):
+                            del self.current_dir_list[self.path_index]
+                            self.path_index = self.path_index - 1
+                    # dont descend the disk iterator into any tree 
+                    # paths.
+                    if current_path_info[2] == 'tree-reference':
+                        del self.current_dir_list[self.path_index]
+                        self.path_index = self.path_index - 1
+                    self.path_index = self.path_index + 1
+                    if self.path_index < len(self.current_dir_list):
+                        current_path_info = self.current_dir_list[self.path_index]
+                        if current_path_info[2] == 'directory':
+                            current_path_info = self._maybe_tree_ref(
+                                current_path_info)
+                    else:
+                        current_path_info = None
+                if changed is not None:
+                    # Found a result on this pass, yield it
+                    if changed:
+                        self._gather_result_for_consistency(result)
+                    if changed or self.include_unchanged:
+                        return result
+            if self.current_block is not None:
+                self.block_index = self.block_index + 1
+                self._update_current_block()
+            if self.current_dir_info is not None:
+                self.path_index = 0
+                self.current_dir_list = None
+                try:
+                    self.current_dir_info = self.dir_iterator.next()
+                    self.current_dir_list = self.current_dir_info[1]
+                except StopIteration, _:
+                    self.current_dir_info = None
+
+    cdef object _next_consistent_entries(self):
+        """Grabs the next specific file parent case to consider.
+        
+        :return: A list of the results, each of which is as for _process_entry.
+        """
+        results = []
+        while self.search_specific_file_parents:
+            # Process the parent directories for the paths we were iterating.
+            # Even in extremely large trees this should be modest, so currently
+            # no attempt is made to optimise.
+            path_utf8 = self.search_specific_file_parents.pop()
+            if path_utf8 in self.searched_exact_paths:
+                # We've examined this path.
+                continue
+            if osutils.is_inside_any(self.searched_specific_files, path_utf8):
+                # We've examined this path.
+                continue
+            path_entries = self.state._entries_for_path(path_utf8)
+            # We need either one or two entries. If the path in
+            # self.target_index has moved (so the entry in source_index is in
+            # 'ar') then we need to also look for the entry for this path in
+            # self.source_index, to output the appropriate delete-or-rename.
+            selected_entries = []
+            found_item = False
+            for candidate_entry in path_entries:
+                # Find entries present in target at this path:
+                if candidate_entry[1][self.target_index][0] not in 'ar':
+                    found_item = True
+                    selected_entries.append(candidate_entry)
+                # Find entries present in source at this path:
+                elif (self.source_index is not None and
+                    candidate_entry[1][self.source_index][0] not in 'ar'):
+                    found_item = True
+                    if candidate_entry[1][self.target_index][0] == 'a':
+                        # Deleted, emit it here.
+                        selected_entries.append(candidate_entry)
+                    else:
+                        # renamed, emit it when we process the directory it
+                        # ended up at.
+                        self.search_specific_file_parents.add(
+                            candidate_entry[1][self.target_index][1])
+            if not found_item:
+                raise AssertionError(
+                    "Missing entry for specific path parent %r, %r" % (
+                    path_utf8, path_entries))
+            path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
+            for entry in selected_entries:
+                if entry[0][2] in self.seen_ids:
+                    continue
+                result, changed = self._process_entry(entry, path_info)
+                if changed is None:
+                    raise AssertionError(
+                        "Got entry<->path mismatch for specific path "
+                        "%r entry %r path_info %r " % (
+                        path_utf8, entry, path_info))
+                # Only include changes - we're outside the users requested
+                # expansion.
+                if changed:
+                    self._gather_result_for_consistency(result)
+                    if (result[6][0] == 'directory' and
+                        result[6][1] != 'directory'):
+                        # This stopped being a directory, the old children have
+                        # to be included.
+                        if entry[1][self.source_index][0] == 'r':
+                            # renamed, take the source path
+                            entry_path_utf8 = entry[1][self.source_index][1]
+                        else:
+                            entry_path_utf8 = path_utf8
+                        initial_key = (entry_path_utf8, '', '')
+                        block_index, _ = self.state._find_block_index_from_key(
+                            initial_key)
+                        if block_index == 0:
+                            # The children of the root are in block index 1.
+                            block_index = block_index + 1
+                        current_block = None
+                        if block_index < len(self.state._dirblocks):
+                            current_block = self.state._dirblocks[block_index]
+                            if not osutils.is_inside(
+                                entry_path_utf8, current_block[0]):
+                                # No entries for this directory at all.
+                                current_block = None
+                        if current_block is not None:
+                            for entry in current_block[1]:
+                                if entry[1][self.source_index][0] in 'ar':
+                                    # Not in the source tree, so doesn't have to be
+                                    # included.
+                                    continue
+                                # Path of the entry itself.
+                                self.search_specific_file_parents.add(
+                                    self.pathjoin(*entry[0][:2]))
+                if changed or self.include_unchanged:
+                    results.append((result, changed))
+            self.searched_exact_paths.add(path_utf8)
+        return results
+
+    cdef object _path_info(self, utf8_path, unicode_path):
+        """Generate path_info for unicode_path.
+
+        :return: None if unicode_path does not exist, or a path_info tuple.
+        """
+        abspath = self.tree.abspath(unicode_path)
+        try:
+            stat = os.lstat(abspath)
+        except OSError, e:
+            if e.errno == errno.ENOENT:
+                # the path does not exist.
+                return None
+            else:
+                raise
+        utf8_basename = utf8_path.rsplit('/', 1)[-1]
+        dir_info = (utf8_path, utf8_basename,
+            osutils.file_kind_from_stat_mode(stat.st_mode), stat,
+            abspath)
+        if dir_info[2] == 'directory':
+            if self.tree._directory_is_tree_reference(
+                unicode_path):
+                self.root_dir_info = self.root_dir_info[:2] + \
+                    ('tree-reference',) + self.root_dir_info[3:]
+        return dir_info
author	Lorry <lorry@roadtrain.codethink.co.uk>	2012-08-22 15:47:16 +0100
committer	Lorry <lorry@roadtrain.codethink.co.uk>	2012-08-22 15:47:16 +0100
commit	25335618bf8755ce6b116ee14f47f5a1f2c821e9 (patch)
tree	d889d7ab3f9f985d0c54c534cb8052bd2e6d7163 /bzrlib/_dirstate_helpers_pyx.pyx
download	bzr-tarball-25335618bf8755ce6b116ee14f47f5a1f2c821e9.tar.gz