# Copyright (C) 2007-2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Pyrex extensions to knit parsing.""" import sys from bzrlib import errors cdef extern from "stdlib.h": ctypedef unsigned size_t long int strtol(char *nptr, char **endptr, int base) cdef extern from "Python.h": int PyDict_CheckExact(object) void *PyDict_GetItem_void "PyDict_GetItem" (object p, object key) int PyDict_SetItem(object p, object key, object val) except -1 int PyList_Append(object lst, object item) except -1 object PyList_GET_ITEM(object lst, int index) int PyList_CheckExact(object) void *PyTuple_GetItem_void_void "PyTuple_GET_ITEM" (void* tpl, int index) char *PyString_AsString(object p) object PyString_FromStringAndSize(char *, int) int PyString_Size(object p) void Py_INCREF(object) cdef extern from "string.h": void *memchr(void *s, int c, size_t n) cdef int string_to_int_safe(char *s, char *end, int *out) except -1: """Convert a base10 string to an integer. This makes sure the whole string is consumed, or it raises ValueError. This is similar to how int(s) works, except you don't need a Python String object. :param s: The string to convert :param end: The character after the integer. So if the string is '12\0', this should be pointing at the '\0'. If the string was '12 ' then this should point at the ' '. :param out: This is the integer that will be returned :return: -1 if an exception is raised. 0 otherwise """ cdef char *integer_end # We can't just return the integer because of how pyrex determines when # there is an exception. out[0] = strtol(s, &integer_end, 10) if integer_end != end: py_s = PyString_FromStringAndSize(s, end-s) raise ValueError('%r is not a valid integer' % (py_s,)) return 0 cdef class KnitIndexReader: cdef object kndx cdef object fp cdef object cache cdef object history cdef char * cur_str cdef char * end_str cdef int history_len def __init__(self, kndx, fp): self.kndx = kndx self.fp = fp self.cache = kndx._cache self.history = kndx._history self.cur_str = NULL self.end_str = NULL self.history_len = 0 cdef int validate(self) except -1: if not PyDict_CheckExact(self.cache): raise TypeError('kndx._cache must be a python dict') if not PyList_CheckExact(self.history): raise TypeError('kndx._history must be a python list') return 0 cdef object process_options(self, char *option_str, char *end): """Process the options string into a list.""" cdef char *next # This is alternative code which creates a python string and splits it. # It is "correct" and more obvious, but slower than the following code. # It can be uncommented to switch in case the other code is seen as # suspect. # options = PyString_FromStringAndSize(option_str, # end - option_str) # return options.split(',') final_options = [] while option_str < end: next = memchr(option_str, c',', end - option_str) if next == NULL: next = end next_option = PyString_FromStringAndSize(option_str, next - option_str) PyList_Append(final_options, next_option) # Move past the ',' option_str = next+1 return final_options cdef object process_parents(self, char *parent_str, char *end): cdef char *next cdef int int_parent cdef char *parent_end # Alternative, correct but slower code. # # parents = PyString_FromStringAndSize(parent_str, # end - parent_str) # real_parents = [] # for parent in parents.split(): # if parent[0].startswith('.'): # real_parents.append(parent[1:]) # else: # real_parents.append(self.history[int(parent)]) # return real_parents parents = [] while parent_str <= end: next = memchr(parent_str, c' ', end - parent_str) if next == NULL or next >= end or next == parent_str: break if parent_str[0] == c'.': # This is an explicit revision id parent_str = parent_str + 1 parent = PyString_FromStringAndSize(parent_str, next - parent_str) else: # This in an integer mapping to original string_to_int_safe(parent_str, next, &int_parent) if int_parent >= self.history_len: raise IndexError('Parent index refers to a revision which' ' does not exist yet.' ' %d > %d' % (int_parent, self.history_len)) parent = PyList_GET_ITEM(self.history, int_parent) # PyList_GET_ITEM steals a reference Py_INCREF(parent) PyList_Append(parents, parent) parent_str = next + 1 return tuple(parents) cdef int process_one_record(self, char *start, char *end) except -1: """Take a simple string and split it into an index record.""" cdef char *version_id_str cdef int version_id_size cdef char *option_str cdef char *option_end cdef char *pos_str cdef int pos cdef char *size_str cdef int size cdef char *parent_str cdef int parent_size cdef void *cache_entry version_id_str = start option_str = memchr(version_id_str, c' ', end - version_id_str) if option_str == NULL or option_str >= end: # Short entry return 0 version_id_size = (option_str - version_id_str) # Move past the space character option_str = option_str + 1 pos_str = memchr(option_str, c' ', end - option_str) if pos_str == NULL or pos_str >= end: # Short entry return 0 option_end = pos_str pos_str = pos_str + 1 size_str = memchr(pos_str, c' ', end - pos_str) if size_str == NULL or size_str >= end: # Short entry return 0 size_str = size_str + 1 parent_str = memchr(size_str, c' ', end - size_str) if parent_str == NULL or parent_str >= end: # Missing parents return 0 parent_str = parent_str + 1 version_id = PyString_FromStringAndSize(version_id_str, version_id_size) options = self.process_options(option_str, option_end) try: string_to_int_safe(pos_str, size_str - 1, &pos) string_to_int_safe(size_str, parent_str - 1, &size) parents = self.process_parents(parent_str, end) except (ValueError, IndexError), e: py_line = PyString_FromStringAndSize(start, end - start) raise errors.KnitCorrupt(self.kndx._filename, "line %r: %s" % (py_line, e)) cache_entry = PyDict_GetItem_void(self.cache, version_id) if cache_entry == NULL: PyList_Append(self.history, version_id) index = self.history_len self.history_len = self.history_len + 1 else: # PyTuple_GetItem_void_void does *not* increment the reference # counter, but casting to does. index = PyTuple_GetItem_void_void(cache_entry, 5) PyDict_SetItem(self.cache, version_id, (version_id, options, pos, size, parents, index, )) return 1 cdef int process_next_record(self) except -1: """Process the next record in the file.""" cdef char *last cdef char *start start = self.cur_str # Find the next newline last = memchr(start, c'\n', self.end_str - start) if last == NULL: # Process until the end of the file last = self.end_str - 1 self.cur_str = self.end_str else: # The last character is right before the '\n' # And the next string is right after it self.cur_str = last + 1 last = last - 1 if last <= start or last[0] != c':': # Incomplete record return 0 return self.process_one_record(start, last) def read(self): cdef int text_size self.validate() self.kndx.check_header(self.fp) # We read the whole thing at once # TODO: jam 2007-05-09 Consider reading incrementally rather than # having to have the whole thing read up front. # we already know that calling f.readlines() versus lots of # f.readline() calls is faster. # The other possibility is to avoid a Python String here # completely. However self.fp may be a 'file-like' object # it is not guaranteed to be a real file. text = self.fp.read() text_size = PyString_Size(text) self.cur_str = PyString_AsString(text) # This points to the last character in the string self.end_str = self.cur_str + text_size while self.cur_str < self.end_str: self.process_next_record() def _load_data_c(kndx, fp): """Load the knit index file into memory.""" reader = KnitIndexReader(kndx, fp) reader.read()