summaryrefslogtreecommitdiff
path: root/build/transform_sql.py
diff options
context:
space:
mode:
Diffstat (limited to 'build/transform_sql.py')
-rwxr-xr-xbuild/transform_sql.py150
1 files changed, 142 insertions, 8 deletions
diff --git a/build/transform_sql.py b/build/transform_sql.py
index 12f3f6b..90e1b79 100755
--- a/build/transform_sql.py
+++ b/build/transform_sql.py
@@ -25,11 +25,19 @@
#
+import operator
import os
import re
import sys
+# operator.methodcaller doesn't exist in Python 2.5.
+if not hasattr(operator, 'methodcaller'):
+ def methodcaller(method, *args, **kwargs):
+ return lambda x: getattr(x, method)(*args, **kwargs)
+ operator.methodcaller = methodcaller
+ del methodcaller
+
DEFINE_END = ' ""\n\n'
@@ -49,7 +57,7 @@ class Processor(object):
# a few SQL comments that act as directives for this transform system
re_format = re.compile('-- *format: *([0-9]+)')
- re_statement = re.compile('-- *STMT_([A-Z_0-9]+)')
+ re_statement = re.compile('-- *STMT_([A-Z_0-9]+)( +\(([^\)]*)\))?')
re_include = re.compile('-- *include: *([-a-z]+)')
re_define = re.compile('-- *define: *([A-Z_0-9]+)')
@@ -66,6 +74,13 @@ class Processor(object):
self.close_define()
self.output.write('#define STMT_%s %d\n' % (match.group(1),
self.stmt_count))
+
+ if match.group(3) == None:
+ info = 'NULL'
+ else:
+ info = '"' + match.group(3) + '"'
+ self.output.write('#define STMT_%d_INFO {"STMT_%s", %s}\n' %
+ (self.stmt_count, match.group(1), info))
self.output.write('#define STMT_%d \\\n' % (self.stmt_count,))
self.var_printed = True
@@ -82,10 +97,11 @@ class Processor(object):
self.output.write(' APR_STRINGIFY(%s) \\\n' % define)
- def __init__(self, dirpath, output, var_name):
+ def __init__(self, dirpath, output, var_name, token_map):
self.dirpath = dirpath
self.output = output
self.var_name = var_name
+ self.token_map = token_map
self.stmt_count = 0
self.var_printed = False
@@ -102,9 +118,79 @@ class Processor(object):
for line in input.split('\n'):
line = line.replace('"', '\\"')
- line = re.sub(r'IS_STRICT_DESCENDANT_OF[(]([A-Za-z_.]+), ([?][0-9]+)[)]',
- r"((\1) > (\2) || '/' AND (\1) < (\2) || '0') ",
- line)
+
+ # IS_STRICT_DESCENDANT_OF()
+
+ # A common operation in the working copy is determining descendants of
+ # a node. To allow Sqlite to use its indexes to provide the answer we
+ # must provide simple less than and greater than operations.
+ #
+ # For relative paths that consist of one or more components like 'subdir'
+ # we can accomplish this by comparing local_relpath with 'subdir/' and
+ # 'subdir0' ('/'+1 = '0')
+ #
+ # For the working copy root this case is less simple and not strictly
+ # valid utf-8/16 (but luckily Sqlite doesn't validate utf-8 nor utf-16).
+ # The binary blob x'FFFF' is higher than any valid utf-8 and utf-16
+ # sequence.
+ #
+ # So for the root we can compare with > '' and < x'FFFF'. (This skips the
+ # root itself and selects all descendants)
+ #
+
+ # '/'+1 == '0'
+ line = re.sub(
+ r'IS_STRICT_DESCENDANT_OF[(]([A-Za-z_.]+), ([?][0-9]+)[)]',
+ r"(((\1) > (CASE (\2) WHEN '' THEN '' ELSE (\2) || '/' END))" +
+ r" AND ((\1) < CASE (\2) WHEN '' THEN X'FFFF' ELSE (\2) || '0' END))",
+ line)
+
+ # RELPATH_SKIP_JOIN(x, y, z) skips the x prefix from z and the joins the
+ # result after y. In other words it replaces x with y, but follows the
+ # relpath rules.
+ line = re.sub(
+ r'RELPATH_SKIP_JOIN[(]([?]?[A-Za-z0-9_.]+), ' +
+ r'([?]?[A-Za-z0-9_.]+), ' +
+ r'([?]?[A-Za-z0-9_.]+)[)]',
+ r"(CASE WHEN (\1) = '' THEN RELPATH_JOIN(\2, \3) " +
+ r"WHEN (\2) = '' THEN RELPATH_SKIP_ANCESTOR(\1, \3) " +
+ r"WHEN SUBSTR((\3), 1, LENGTH(\1)) = (\1) " +
+ r"THEN " +
+ r"CASE WHEN LENGTH(\1) = LENGTH(\3) THEN (\2) " +
+ r"WHEN SUBSTR((\3), LENGTH(\1)+1, 1) = '/' " +
+ r"THEN (\2) || SUBSTR((\3), LENGTH(\1)+1) " +
+ r"END " +
+ r"END)",
+ line)
+
+ # RELPATH_JOIN(x, y) joins x to y following the svn_relpath_join() rules
+ line = re.sub(
+ r'RELPATH_JOIN[(]([?]?[A-Za-z0-9_.]+), ([?]?[A-Za-z0-9_.]+)[)]',
+ r"(CASE WHEN (\1) = '' THEN (\2) " +
+ r"WHEN (\2) = '' THEN (\1) " +
+ r"ELSE (\1) || '/' || (\2) " +
+ r"END)",
+ line)
+
+ # RELPATH_SKIP_ANCESTOR(x, y) skips the x prefix from y following the
+ # svn_relpath_skip_ancestor() rules. Returns NULL when y is not below X.
+ line = re.sub(
+ r'RELPATH_SKIP_ANCESTOR[(]([?]?[A-Za-z0-9_.]+), ' +
+ r'([?]?[A-Za-z0-9_.]+)[)]',
+ r"(CASE WHEN (\1) = '' THEN (\2) " +
+ r" WHEN SUBSTR((\2), 1, LENGTH(\1)) = (\1) " +
+ r" THEN " +
+ r"CASE WHEN LENGTH(\1) = LENGTH(\2) THEN '' " +
+ r"WHEN SUBSTR((\2), LENGTH(\1)+1, 1) = '/' " +
+ r"THEN SUBSTR((\2), LENGTH(\1)+2) " +
+ r"END" +
+ r" END)",
+ line)
+
+ # Another preprocessing.
+ for symbol, string in self.token_map.iteritems():
+ # ### This doesn't sql-escape 'string'
+ line = re.sub(r'\b%s\b' % re.escape(symbol), "'%s'" % string, line)
if line.strip():
handled = False
@@ -138,19 +224,60 @@ class Processor(object):
self.var_printed = False
+class NonRewritableDict(dict):
+ """A dictionary that does not allow self[k]=v when k in self
+ (unless v is equal to the stored value).
+
+ (An entry would have to be explicitly deleted before a new value
+ may be entered.)
+ """
+
+ def __setitem__(self, key, val):
+ if self.__contains__(key) and self.__getitem__(key) != val:
+ raise Exception("Can't re-insert key %r with value %r "
+ "(already present with value %r)"
+ % (key, val, self.__getitem__(key)))
+ super(NonRewritableDict, self).__setitem__(key, val)
+
+def hotspots(fd):
+ hotspot = False
+ for line in fd:
+ # hotspot is TRUE within definitions of static const svn_token_map_t[].
+ hotspot ^= int(('svn_token_map_t', '\x7d;')[hotspot] in line)
+ if hotspot:
+ yield line
+
+def extract_token_map(filename):
+ try:
+ fd = open(filename)
+ except IOError:
+ return {}
+
+ pattern = re.compile(r'"(.*?)".*?(MAP_\w*)')
+ return \
+ NonRewritableDict(
+ map(operator.itemgetter(1,0),
+ map(operator.methodcaller('groups'),
+ filter(None,
+ map(pattern.search,
+ hotspots(fd))))))
+
def main(input_filepath, output):
filename = os.path.basename(input_filepath)
input = open(input_filepath, 'r').read()
+ token_map_filename = os.path.dirname(input_filepath) + '/token-map.h'
+ token_map = extract_token_map(token_map_filename)
+
var_name = re.sub('[-.]', '_', filename).upper()
output.write(
- '/* This file is automatically generated from %s.\n'
+ '/* This file is automatically generated from %s and %s.\n'
' * Do not edit this file -- edit the source and rerun gen-make.py */\n'
'\n'
- % (filename,))
+ % (filename, token_map_filename))
- proc = Processor(os.path.dirname(input_filepath), output, var_name)
+ proc = Processor(os.path.dirname(input_filepath), output, var_name, token_map)
proc.process_file(input)
### the STMT_%d naming precludes *multiple* transform_sql headers from
@@ -165,6 +292,13 @@ def main(input_filepath, output):
+ ', \\\n'.join(' STMT_%d' % (i,) for i in range(proc.stmt_count))
+ ', \\\n NULL \\\n }\n')
+ output.write('\n')
+
+ output.write(
+ '#define %s_DECLARE_STATEMENT_INFO(varname) \\\n' % (var_name,)
+ + ' static const char * const varname[][2] = { \\\n'
+ + ', \\\n'.join(' STMT_%d_INFO' % (i) for i in range(proc.stmt_count))
+ + ', \\\n {NULL, NULL} \\\n }\n')
if __name__ == '__main__':
if len(sys.argv) < 2 or len(sys.argv) > 3: