1 files changed, 304 insertions, 0 deletions
diff --git a/bzrlib/_knit_load_data_pyx.pyx b/bzrlib/_knit_load_data_pyx.pyx
new file mode 100644
index 0000000..746366d
--- /dev/null
+++ b/bzrlib/_knit_load_data_pyx.pyx
@@ -0,0 +1,304 @@
+# Copyright (C) 2007-2010 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Pyrex extensions to knit parsing."""
+
+import sys
+
+from bzrlib import errors
+
+
+cdef extern from "stdlib.h":
+    ctypedef unsigned size_t
+    long int strtol(char *nptr, char **endptr, int base)
+
+
+cdef extern from "Python.h":
+    int PyDict_CheckExact(object)
+    void *PyDict_GetItem_void "PyDict_GetItem" (object p, object key)
+    int PyDict_SetItem(object p, object key, object val) except -1
+
+    int PyList_Append(object lst, object item) except -1
+    object PyList_GET_ITEM(object lst, int index)
+    int PyList_CheckExact(object)
+
+    void *PyTuple_GetItem_void_void "PyTuple_GET_ITEM" (void* tpl, int index)
+
+    char *PyString_AsString(object p)
+    object PyString_FromStringAndSize(char *, int)
+    int PyString_Size(object p)
+
+    void Py_INCREF(object)
+
+
+cdef extern from "string.h":
+    void *memchr(void *s, int c, size_t n)
+
+
+cdef int string_to_int_safe(char *s, char *end, int *out) except -1:
+    """Convert a base10 string to an integer.
+
+    This makes sure the whole string is consumed, or it raises ValueError.
+    This is similar to how int(s) works, except you don't need a Python
+    String object.
+
+    :param s: The string to convert
+    :param end: The character after the integer. So if the string is '12\0',
+        this should be pointing at the '\0'. If the string was '12 ' then this
+        should point at the ' '.
+    :param out: This is the integer that will be returned
+    :return: -1 if an exception is raised. 0 otherwise
+    """
+    cdef char *integer_end
+
+    # We can't just return the integer because of how pyrex determines when
+    # there is an exception.
+    out[0] = <int>strtol(s, &integer_end, 10)
+    if integer_end != end:
+        py_s = PyString_FromStringAndSize(s, end-s)
+        raise ValueError('%r is not a valid integer' % (py_s,))
+    return 0
+
+
+cdef class KnitIndexReader:
+
+    cdef object kndx
+    cdef object fp
+
+    cdef object cache
+    cdef object history
+
+    cdef char * cur_str
+    cdef char * end_str
+
+    cdef int history_len
+
+    def __init__(self, kndx, fp):
+        self.kndx = kndx
+        self.fp = fp
+
+        self.cache = kndx._cache
+        self.history = kndx._history
+
+        self.cur_str = NULL
+        self.end_str = NULL
+        self.history_len = 0
+
+    cdef int validate(self) except -1:
+        if not PyDict_CheckExact(self.cache):
+            raise TypeError('kndx._cache must be a python dict')
+        if not PyList_CheckExact(self.history):
+            raise TypeError('kndx._history must be a python list')
+        return 0
+
+    cdef object process_options(self, char *option_str, char *end):
+        """Process the options string into a list."""
+        cdef char *next
+
+        # This is alternative code which creates a python string and splits it.
+        # It is "correct" and more obvious, but slower than the following code.
+        # It can be uncommented to switch in case the other code is seen as
+        # suspect.
+        # options = PyString_FromStringAndSize(option_str,
+        #                                      end - option_str)
+        # return options.split(',')
+
+        final_options = []
+
+        while option_str < end:
+            next = <char*>memchr(option_str, c',', end - option_str)
+            if next == NULL:
+                next = end
+            next_option = PyString_FromStringAndSize(option_str,
+                                                     next - option_str)
+            PyList_Append(final_options, next_option)
+
+            # Move past the ','
+            option_str = next+1
+
+        return final_options
+
+    cdef object process_parents(self, char *parent_str, char *end):
+        cdef char *next
+        cdef int int_parent
+        cdef char *parent_end
+
+        # Alternative, correct but slower code.
+        #
+        # parents = PyString_FromStringAndSize(parent_str,
+        #                                      end - parent_str)
+        # real_parents = []
+        # for parent in parents.split():
+        #     if parent[0].startswith('.'):
+        #         real_parents.append(parent[1:])
+        #     else:
+        #         real_parents.append(self.history[int(parent)])
+        # return real_parents
+
+        parents = []
+        while parent_str <= end:
+            next = <char*>memchr(parent_str, c' ', end - parent_str)
+            if next == NULL or next >= end or next == parent_str:
+                break
+
+            if parent_str[0] == c'.':
+                # This is an explicit revision id
+                parent_str = parent_str + 1
+                parent = PyString_FromStringAndSize(parent_str,
+                                                    next - parent_str)
+            else:
+                # This in an integer mapping to original
+                string_to_int_safe(parent_str, next, &int_parent)
+
+                if int_parent >= self.history_len:
+                    raise IndexError('Parent index refers to a revision which'
+                        ' does not exist yet.'
+                        ' %d > %d' % (int_parent, self.history_len))
+                parent = PyList_GET_ITEM(self.history, int_parent)
+                # PyList_GET_ITEM steals a reference
+                Py_INCREF(parent)
+            PyList_Append(parents, parent)
+            parent_str = next + 1
+        return tuple(parents)
+
+    cdef int process_one_record(self, char *start, char *end) except -1:
+        """Take a simple string and split it into an index record."""
+        cdef char *version_id_str
+        cdef int version_id_size
+        cdef char *option_str
+        cdef char *option_end
+        cdef char *pos_str
+        cdef int pos
+        cdef char *size_str
+        cdef int size
+        cdef char *parent_str
+        cdef int parent_size
+        cdef void *cache_entry
+
+        version_id_str = start
+        option_str = <char*>memchr(version_id_str, c' ', end - version_id_str)
+        if option_str == NULL or option_str >= end:
+            # Short entry
+            return 0
+        version_id_size = <int>(option_str - version_id_str)
+        # Move past the space character
+        option_str = option_str + 1
+
+        pos_str = <char*>memchr(option_str, c' ', end - option_str)
+        if pos_str == NULL or pos_str >= end:
+            # Short entry
+            return 0
+        option_end = pos_str
+        pos_str = pos_str + 1
+
+        size_str = <char*>memchr(pos_str, c' ', end - pos_str)
+        if size_str == NULL or size_str >= end:
+            # Short entry
+            return 0
+        size_str = size_str + 1
+
+        parent_str = <char*>memchr(size_str, c' ', end - size_str)
+        if parent_str == NULL or parent_str >= end:
+            # Missing parents
+            return 0
+        parent_str = parent_str + 1
+
+        version_id = PyString_FromStringAndSize(version_id_str,
+                                                version_id_size)
+        options = self.process_options(option_str, option_end)
+
+        try:
+            string_to_int_safe(pos_str, size_str - 1, &pos)
+            string_to_int_safe(size_str, parent_str - 1, &size)
+            parents = self.process_parents(parent_str, end)
+        except (ValueError, IndexError), e:
+            py_line = PyString_FromStringAndSize(start, end - start)
+            raise errors.KnitCorrupt(self.kndx._filename,
+                                     "line %r: %s" % (py_line, e))
+
+        cache_entry = PyDict_GetItem_void(self.cache, version_id)
+        if cache_entry == NULL:
+            PyList_Append(self.history, version_id)
+            index = self.history_len
+            self.history_len = self.history_len + 1
+        else:
+            # PyTuple_GetItem_void_void does *not* increment the reference
+            # counter, but casting to <object> does.
+            index = <object>PyTuple_GetItem_void_void(cache_entry, 5)
+
+        PyDict_SetItem(self.cache, version_id,
+                       (version_id,
+                        options,
+                        pos,
+                        size,
+                        parents,
+                        index,
+                       ))
+        return 1
+
+    cdef int process_next_record(self) except -1:
+        """Process the next record in the file."""
+        cdef char *last
+        cdef char *start
+
+        start = self.cur_str
+        # Find the next newline
+        last = <char*>memchr(start, c'\n', self.end_str - start)
+        if last == NULL:
+            # Process until the end of the file
+            last = self.end_str - 1
+            self.cur_str = self.end_str
+        else:
+            # The last character is right before the '\n'
+            # And the next string is right after it
+            self.cur_str = last + 1
+            last = last - 1
+
+        if last <= start or last[0] != c':':
+            # Incomplete record
+            return 0
+
+        return self.process_one_record(start, last)
+
+    def read(self):
+        cdef int text_size
+
+        self.validate()
+
+        self.kndx.check_header(self.fp)
+
+        # We read the whole thing at once
+        # TODO: jam 2007-05-09 Consider reading incrementally rather than
+        #       having to have the whole thing read up front.
+        #       we already know that calling f.readlines() versus lots of
+        #       f.readline() calls is faster.
+        #       The other possibility is to avoid a Python String here
+        #       completely. However self.fp may be a 'file-like' object
+        #       it is not guaranteed to be a real file.
+        text = self.fp.read()
+        text_size = PyString_Size(text)
+        self.cur_str = PyString_AsString(text)
+        # This points to the last character in the string
+        self.end_str = self.cur_str + text_size
+
+        while self.cur_str < self.end_str:
+            self.process_next_record()
+
+
+def _load_data_c(kndx, fp):
+    """Load the knit index file into memory."""
+    reader = KnitIndexReader(kndx, fp)
+    reader.read()