1 files changed, 142 insertions, 8 deletions
diff --git a/build/transform_sql.py b/build/transform_sql.py
index 12f3f6b..90e1b79 100755
--- a/build/transform_sql.py
+++ b/build/transform_sql.py
@@ -25,11 +25,19 @@
 #
 
 
+import operator
 import os
 import re
 import sys
 
 
+# operator.methodcaller doesn't exist in Python 2.5.
+if not hasattr(operator, 'methodcaller'):
+  def methodcaller(method, *args, **kwargs):
+    return lambda x: getattr(x, method)(*args, **kwargs)
+  operator.methodcaller = methodcaller
+  del methodcaller
+
 DEFINE_END = '  ""\n\n'
 
 
@@ -49,7 +57,7 @@ class Processor(object):
 
   # a few SQL comments that act as directives for this transform system
   re_format = re.compile('-- *format: *([0-9]+)')
-  re_statement = re.compile('-- *STMT_([A-Z_0-9]+)')
+  re_statement = re.compile('-- *STMT_([A-Z_0-9]+)( +\(([^\)]*)\))?')
   re_include = re.compile('-- *include: *([-a-z]+)')
   re_define = re.compile('-- *define: *([A-Z_0-9]+)')
 
@@ -66,6 +74,13 @@ class Processor(object):
     self.close_define()
     self.output.write('#define STMT_%s %d\n' % (match.group(1),
                                                 self.stmt_count))
+
+    if match.group(3) == None:
+      info = 'NULL'
+    else:
+      info = '"' + match.group(3) + '"'
+    self.output.write('#define STMT_%d_INFO {"STMT_%s", %s}\n' %
+                      (self.stmt_count, match.group(1), info))
     self.output.write('#define STMT_%d \\\n' % (self.stmt_count,))
     self.var_printed = True
 
@@ -82,10 +97,11 @@ class Processor(object):
 
     self.output.write('  APR_STRINGIFY(%s) \\\n' % define)
 
-  def __init__(self, dirpath, output, var_name):
+  def __init__(self, dirpath, output, var_name, token_map):
     self.dirpath = dirpath
     self.output = output
     self.var_name = var_name
+    self.token_map = token_map
 
     self.stmt_count = 0
     self.var_printed = False
@@ -102,9 +118,79 @@ class Processor(object):
 
     for line in input.split('\n'):
       line = line.replace('"', '\\"')
-      line = re.sub(r'IS_STRICT_DESCENDANT_OF[(]([A-Za-z_.]+), ([?][0-9]+)[)]',
-                    r"((\1) > (\2) || '/' AND (\1) < (\2) || '0') ",
-                    line)
+
+      # IS_STRICT_DESCENDANT_OF()
+
+      # A common operation in the working copy is determining descendants of
+      # a node. To allow Sqlite to use its indexes to provide the answer we
+      # must provide simple less than and greater than operations.
+      #
+      # For relative paths that consist of one or more components like 'subdir'
+      # we can accomplish this by comparing local_relpath with 'subdir/' and
+      # 'subdir0' ('/'+1 = '0')
+      #
+      # For the working copy root this case is less simple and not strictly
+      # valid utf-8/16 (but luckily Sqlite doesn't validate utf-8 nor utf-16).
+      # The binary blob x'FFFF' is higher than any valid utf-8 and utf-16
+      # sequence.
+      #
+      # So for the root we can compare with > '' and < x'FFFF'. (This skips the
+      # root itself and selects all descendants)
+      #
+
+      # '/'+1 == '0'
+      line = re.sub(
+            r'IS_STRICT_DESCENDANT_OF[(]([A-Za-z_.]+), ([?][0-9]+)[)]',
+            r"(((\1) > (CASE (\2) WHEN '' THEN '' ELSE (\2) || '/' END))" +
+            r" AND ((\1) < CASE (\2) WHEN '' THEN X'FFFF' ELSE (\2) || '0' END))",
+            line)
+
+      # RELPATH_SKIP_JOIN(x, y, z) skips the x prefix from z and the joins the
+      # result after y. In other words it replaces x with y, but follows the
+      # relpath rules.
+      line = re.sub(
+             r'RELPATH_SKIP_JOIN[(]([?]?[A-Za-z0-9_.]+), ' +
+                                 r'([?]?[A-Za-z0-9_.]+), ' +
+                                 r'([?]?[A-Za-z0-9_.]+)[)]',
+             r"(CASE WHEN (\1) = '' THEN RELPATH_JOIN(\2, \3) " +
+             r"WHEN (\2) = '' THEN RELPATH_SKIP_ANCESTOR(\1, \3) " +
+             r"WHEN SUBSTR((\3), 1, LENGTH(\1)) = (\1) " +
+             r"THEN " +
+                   r"CASE WHEN LENGTH(\1) = LENGTH(\3) THEN (\2) " +
+                        r"WHEN SUBSTR((\3), LENGTH(\1)+1, 1) = '/' " +
+                        r"THEN (\2) || SUBSTR((\3), LENGTH(\1)+1) " +
+                   r"END " +
+             r"END)",
+             line)
+
+      # RELPATH_JOIN(x, y) joins x to y following the svn_relpath_join() rules
+      line = re.sub(
+            r'RELPATH_JOIN[(]([?]?[A-Za-z0-9_.]+), ([?]?[A-Za-z0-9_.]+)[)]',
+            r"(CASE WHEN (\1) = '' THEN (\2) " +
+                  r"WHEN (\2) = '' THEN (\1) " +
+                 r"ELSE (\1) || '/' || (\2) " +
+            r"END)",
+            line)
+
+      # RELPATH_SKIP_ANCESTOR(x, y) skips the x prefix from y following the
+      # svn_relpath_skip_ancestor() rules. Returns NULL when y is not below X.
+      line = re.sub(
+             r'RELPATH_SKIP_ANCESTOR[(]([?]?[A-Za-z0-9_.]+), ' +
+                                     r'([?]?[A-Za-z0-9_.]+)[)]',
+             r"(CASE WHEN (\1) = '' THEN (\2) " +
+             r" WHEN SUBSTR((\2), 1, LENGTH(\1)) = (\1) " +
+             r" THEN " +
+                   r"CASE WHEN LENGTH(\1) = LENGTH(\2) THEN '' " +
+                        r"WHEN SUBSTR((\2), LENGTH(\1)+1, 1) = '/' " +
+                        r"THEN SUBSTR((\2), LENGTH(\1)+2) " +
+                   r"END" +
+             r" END)",
+            line)
+
+      # Another preprocessing.
+      for symbol, string in self.token_map.iteritems():
+        # ### This doesn't sql-escape 'string'
+        line = re.sub(r'\b%s\b' % re.escape(symbol), "'%s'" % string, line)
 
       if line.strip():
         handled = False
@@ -138,19 +224,60 @@ class Processor(object):
       self.var_printed = False
 
 
+class NonRewritableDict(dict):
+  """A dictionary that does not allow self[k]=v when k in self
+  (unless v is equal to the stored value).
+
+  (An entry would have to be explicitly deleted before a new value
+  may be entered.)
+  """
+
+  def __setitem__(self, key, val):
+    if self.__contains__(key) and self.__getitem__(key) != val:
+      raise Exception("Can't re-insert key %r with value %r "
+                      "(already present with value %r)"
+                      % (key, val, self.__getitem__(key)))
+    super(NonRewritableDict, self).__setitem__(key, val)
+
+def hotspots(fd):
+  hotspot = False
+  for line in fd:
+    # hotspot is TRUE within definitions of static const svn_token_map_t[].
+    hotspot ^= int(('svn_token_map_t', '\x7d;')[hotspot] in line)
+    if hotspot:
+      yield line
+
+def extract_token_map(filename):
+  try:
+    fd = open(filename)
+  except IOError:
+    return {}
+
+  pattern = re.compile(r'"(.*?)".*?(MAP_\w*)')
+  return \
+    NonRewritableDict(
+      map(operator.itemgetter(1,0),
+        map(operator.methodcaller('groups'),
+          filter(None,
+            map(pattern.search,
+              hotspots(fd))))))
+
 def main(input_filepath, output):
   filename = os.path.basename(input_filepath)
   input = open(input_filepath, 'r').read()
 
+  token_map_filename = os.path.dirname(input_filepath) + '/token-map.h'
+  token_map = extract_token_map(token_map_filename)
+
   var_name = re.sub('[-.]', '_', filename).upper()
 
   output.write(
-    '/* This file is automatically generated from %s.\n'
+    '/* This file is automatically generated from %s and %s.\n'
     ' * Do not edit this file -- edit the source and rerun gen-make.py */\n'
     '\n'
-    % (filename,))
+    % (filename, token_map_filename))
 
-  proc = Processor(os.path.dirname(input_filepath), output, var_name)
+  proc = Processor(os.path.dirname(input_filepath), output, var_name, token_map)
   proc.process_file(input)
 
   ### the STMT_%d naming precludes *multiple* transform_sql headers from
@@ -165,6 +292,13 @@ def main(input_filepath, output):
       + ', \\\n'.join('    STMT_%d' % (i,) for i in range(proc.stmt_count))
       + ', \\\n    NULL \\\n  }\n')
 
+    output.write('\n')
+
+    output.write(
+      '#define %s_DECLARE_STATEMENT_INFO(varname) \\\n' % (var_name,)
+      + '  static const char * const varname[][2] = { \\\n'
+      + ', \\\n'.join('    STMT_%d_INFO' % (i) for i in range(proc.stmt_count))
+      + ', \\\n    {NULL, NULL} \\\n  }\n')
 
 if __name__ == '__main__':
   if len(sys.argv) < 2 or len(sys.argv) > 3: