diff options
Diffstat (limited to 'build/transform_sql.py')
-rwxr-xr-x | build/transform_sql.py | 150 |
1 files changed, 142 insertions, 8 deletions
diff --git a/build/transform_sql.py b/build/transform_sql.py index 12f3f6b..90e1b79 100755 --- a/build/transform_sql.py +++ b/build/transform_sql.py @@ -25,11 +25,19 @@ # +import operator import os import re import sys +# operator.methodcaller doesn't exist in Python 2.5. +if not hasattr(operator, 'methodcaller'): + def methodcaller(method, *args, **kwargs): + return lambda x: getattr(x, method)(*args, **kwargs) + operator.methodcaller = methodcaller + del methodcaller + DEFINE_END = ' ""\n\n' @@ -49,7 +57,7 @@ class Processor(object): # a few SQL comments that act as directives for this transform system re_format = re.compile('-- *format: *([0-9]+)') - re_statement = re.compile('-- *STMT_([A-Z_0-9]+)') + re_statement = re.compile('-- *STMT_([A-Z_0-9]+)( +\(([^\)]*)\))?') re_include = re.compile('-- *include: *([-a-z]+)') re_define = re.compile('-- *define: *([A-Z_0-9]+)') @@ -66,6 +74,13 @@ class Processor(object): self.close_define() self.output.write('#define STMT_%s %d\n' % (match.group(1), self.stmt_count)) + + if match.group(3) == None: + info = 'NULL' + else: + info = '"' + match.group(3) + '"' + self.output.write('#define STMT_%d_INFO {"STMT_%s", %s}\n' % + (self.stmt_count, match.group(1), info)) self.output.write('#define STMT_%d \\\n' % (self.stmt_count,)) self.var_printed = True @@ -82,10 +97,11 @@ class Processor(object): self.output.write(' APR_STRINGIFY(%s) \\\n' % define) - def __init__(self, dirpath, output, var_name): + def __init__(self, dirpath, output, var_name, token_map): self.dirpath = dirpath self.output = output self.var_name = var_name + self.token_map = token_map self.stmt_count = 0 self.var_printed = False @@ -102,9 +118,79 @@ class Processor(object): for line in input.split('\n'): line = line.replace('"', '\\"') - line = re.sub(r'IS_STRICT_DESCENDANT_OF[(]([A-Za-z_.]+), ([?][0-9]+)[)]', - r"((\1) > (\2) || '/' AND (\1) < (\2) || '0') ", - line) + + # IS_STRICT_DESCENDANT_OF() + + # A common operation in the working copy is determining descendants of + # a node. To allow Sqlite to use its indexes to provide the answer we + # must provide simple less than and greater than operations. + # + # For relative paths that consist of one or more components like 'subdir' + # we can accomplish this by comparing local_relpath with 'subdir/' and + # 'subdir0' ('/'+1 = '0') + # + # For the working copy root this case is less simple and not strictly + # valid utf-8/16 (but luckily Sqlite doesn't validate utf-8 nor utf-16). + # The binary blob x'FFFF' is higher than any valid utf-8 and utf-16 + # sequence. + # + # So for the root we can compare with > '' and < x'FFFF'. (This skips the + # root itself and selects all descendants) + # + + # '/'+1 == '0' + line = re.sub( + r'IS_STRICT_DESCENDANT_OF[(]([A-Za-z_.]+), ([?][0-9]+)[)]', + r"(((\1) > (CASE (\2) WHEN '' THEN '' ELSE (\2) || '/' END))" + + r" AND ((\1) < CASE (\2) WHEN '' THEN X'FFFF' ELSE (\2) || '0' END))", + line) + + # RELPATH_SKIP_JOIN(x, y, z) skips the x prefix from z and the joins the + # result after y. In other words it replaces x with y, but follows the + # relpath rules. + line = re.sub( + r'RELPATH_SKIP_JOIN[(]([?]?[A-Za-z0-9_.]+), ' + + r'([?]?[A-Za-z0-9_.]+), ' + + r'([?]?[A-Za-z0-9_.]+)[)]', + r"(CASE WHEN (\1) = '' THEN RELPATH_JOIN(\2, \3) " + + r"WHEN (\2) = '' THEN RELPATH_SKIP_ANCESTOR(\1, \3) " + + r"WHEN SUBSTR((\3), 1, LENGTH(\1)) = (\1) " + + r"THEN " + + r"CASE WHEN LENGTH(\1) = LENGTH(\3) THEN (\2) " + + r"WHEN SUBSTR((\3), LENGTH(\1)+1, 1) = '/' " + + r"THEN (\2) || SUBSTR((\3), LENGTH(\1)+1) " + + r"END " + + r"END)", + line) + + # RELPATH_JOIN(x, y) joins x to y following the svn_relpath_join() rules + line = re.sub( + r'RELPATH_JOIN[(]([?]?[A-Za-z0-9_.]+), ([?]?[A-Za-z0-9_.]+)[)]', + r"(CASE WHEN (\1) = '' THEN (\2) " + + r"WHEN (\2) = '' THEN (\1) " + + r"ELSE (\1) || '/' || (\2) " + + r"END)", + line) + + # RELPATH_SKIP_ANCESTOR(x, y) skips the x prefix from y following the + # svn_relpath_skip_ancestor() rules. Returns NULL when y is not below X. + line = re.sub( + r'RELPATH_SKIP_ANCESTOR[(]([?]?[A-Za-z0-9_.]+), ' + + r'([?]?[A-Za-z0-9_.]+)[)]', + r"(CASE WHEN (\1) = '' THEN (\2) " + + r" WHEN SUBSTR((\2), 1, LENGTH(\1)) = (\1) " + + r" THEN " + + r"CASE WHEN LENGTH(\1) = LENGTH(\2) THEN '' " + + r"WHEN SUBSTR((\2), LENGTH(\1)+1, 1) = '/' " + + r"THEN SUBSTR((\2), LENGTH(\1)+2) " + + r"END" + + r" END)", + line) + + # Another preprocessing. + for symbol, string in self.token_map.iteritems(): + # ### This doesn't sql-escape 'string' + line = re.sub(r'\b%s\b' % re.escape(symbol), "'%s'" % string, line) if line.strip(): handled = False @@ -138,19 +224,60 @@ class Processor(object): self.var_printed = False +class NonRewritableDict(dict): + """A dictionary that does not allow self[k]=v when k in self + (unless v is equal to the stored value). + + (An entry would have to be explicitly deleted before a new value + may be entered.) + """ + + def __setitem__(self, key, val): + if self.__contains__(key) and self.__getitem__(key) != val: + raise Exception("Can't re-insert key %r with value %r " + "(already present with value %r)" + % (key, val, self.__getitem__(key))) + super(NonRewritableDict, self).__setitem__(key, val) + +def hotspots(fd): + hotspot = False + for line in fd: + # hotspot is TRUE within definitions of static const svn_token_map_t[]. + hotspot ^= int(('svn_token_map_t', '\x7d;')[hotspot] in line) + if hotspot: + yield line + +def extract_token_map(filename): + try: + fd = open(filename) + except IOError: + return {} + + pattern = re.compile(r'"(.*?)".*?(MAP_\w*)') + return \ + NonRewritableDict( + map(operator.itemgetter(1,0), + map(operator.methodcaller('groups'), + filter(None, + map(pattern.search, + hotspots(fd)))))) + def main(input_filepath, output): filename = os.path.basename(input_filepath) input = open(input_filepath, 'r').read() + token_map_filename = os.path.dirname(input_filepath) + '/token-map.h' + token_map = extract_token_map(token_map_filename) + var_name = re.sub('[-.]', '_', filename).upper() output.write( - '/* This file is automatically generated from %s.\n' + '/* This file is automatically generated from %s and %s.\n' ' * Do not edit this file -- edit the source and rerun gen-make.py */\n' '\n' - % (filename,)) + % (filename, token_map_filename)) - proc = Processor(os.path.dirname(input_filepath), output, var_name) + proc = Processor(os.path.dirname(input_filepath), output, var_name, token_map) proc.process_file(input) ### the STMT_%d naming precludes *multiple* transform_sql headers from @@ -165,6 +292,13 @@ def main(input_filepath, output): + ', \\\n'.join(' STMT_%d' % (i,) for i in range(proc.stmt_count)) + ', \\\n NULL \\\n }\n') + output.write('\n') + + output.write( + '#define %s_DECLARE_STATEMENT_INFO(varname) \\\n' % (var_name,) + + ' static const char * const varname[][2] = { \\\n' + + ', \\\n'.join(' STMT_%d_INFO' % (i) for i in range(proc.stmt_count)) + + ', \\\n {NULL, NULL} \\\n }\n') if __name__ == '__main__': if len(sys.argv) < 2 or len(sys.argv) > 3: |