#!/usr/bin/env python # # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # # # # transform_sql.py -- create a header file with the appropriate SQL variables # from an SQL file # import operator import os import re import sys # operator.methodcaller doesn't exist in Python 2.5. if not hasattr(operator, 'methodcaller'): def methodcaller(method, *args, **kwargs): return lambda x: getattr(x, method)(*args, **kwargs) operator.methodcaller = methodcaller del methodcaller DEFINE_END = ' ""\n\n' def usage_and_exit(msg): if msg: sys.stderr.write('%s\n\n' % msg) sys.stderr.write( 'USAGE: %s SQLITE_FILE [OUTPUT_FILE]\n' ' stdout will be used if OUTPUT_FILE is not provided.\n' % os.path.basename(sys.argv[0])) sys.stderr.flush() sys.exit(1) class Processor(object): re_comments = re.compile(r'/\*.*?\*/', re.MULTILINE|re.DOTALL) # a few SQL comments that act as directives for this transform system re_format = re.compile('-- *format: *([0-9]+)') re_statement = re.compile('-- *STMT_([A-Z_0-9]+)( +\(([^\)]*)\))?') re_include = re.compile('-- *include: *([-a-z]+)') re_define = re.compile('-- *define: *([A-Z_0-9]+)') def _sub_format(self, match): vsn = match.group(1) self.close_define() self.output.write('#define %s_%s \\\n' % (self.var_name, match.group(1))) self.var_printed = True def _sub_statement(self, match): name = match.group(1) self.close_define() self.output.write('#define STMT_%s %d\n' % (match.group(1), self.stmt_count)) if match.group(3) == None: info = 'NULL' else: info = '"' + match.group(3) + '"' self.output.write('#define STMT_%d_INFO {"STMT_%s", %s}\n' % (self.stmt_count, match.group(1), info)) self.output.write('#define STMT_%d \\\n' % (self.stmt_count,)) self.var_printed = True self.stmt_count += 1 def _sub_include(self, match): filepath = os.path.join(self.dirpath, match.group(1) + '.sql') self.close_define() self.process_file(open(filepath).read()) def _sub_define(self, match): define = match.group(1) self.output.write(' APR_STRINGIFY(%s) \\\n' % define) def __init__(self, dirpath, output, var_name, token_map): self.dirpath = dirpath self.output = output self.var_name = var_name self.token_map = token_map self.stmt_count = 0 self.var_printed = False self._directives = { self.re_format : self._sub_format, self.re_statement : self._sub_statement, self.re_include : self._sub_include, self.re_define : self._sub_define, } def process_file(self, input): input = self.re_comments.sub('', input) for line in input.split('\n'): line = line.replace('"', '\\"') # IS_STRICT_DESCENDANT_OF() # A common operation in the working copy is determining descendants of # a node. To allow Sqlite to use its indexes to provide the answer we # must provide simple less than and greater than operations. # # For relative paths that consist of one or more components like 'subdir' # we can accomplish this by comparing local_relpath with 'subdir/' and # 'subdir0' ('/'+1 = '0') # # For the working copy root this case is less simple and not strictly # valid utf-8/16 (but luckily Sqlite doesn't validate utf-8 nor utf-16). # The binary blob x'FFFF' is higher than any valid utf-8 and utf-16 # sequence. # # So for the root we can compare with > '' and < x'FFFF'. (This skips the # root itself and selects all descendants) # # '/'+1 == '0' line = re.sub( r'IS_STRICT_DESCENDANT_OF[(]([?]?[A-Za-z0-9_.]+), ([?]?[A-Za-z0-9_.]+)[)]', r"(((\1) > (CASE (\2) WHEN '' THEN '' ELSE (\2) || '/' END))" + r" AND ((\1) < CASE (\2) WHEN '' THEN X'FFFF' ELSE (\2) || '0' END))", line) # RELPATH_SKIP_JOIN(x, y, z) skips the x prefix from z and the joins the # result after y. In other words it replaces x with y, but follows the # relpath rules. # # This matches the C version of: # svn_relpath_join(y, svn_relpath_skip_ancestor(x, z), pool) # but returns an SQL NULL in case z is not below x. # line = re.sub( r'RELPATH_SKIP_JOIN[(]([?]?[A-Za-z0-9_.]+), ' + r'([?]?[A-Za-z0-9_.]+), ' + r'([?]?[A-Za-z0-9_.]+)[)]', r"(CASE WHEN (\1) = '' THEN RELPATH_JOIN(\2, \3) " + r"WHEN (\2) = '' THEN RELPATH_SKIP_ANCESTOR(\1, \3) " + r"WHEN SUBSTR((\3), 1, LENGTH(\1)) = (\1) " + r"THEN " + r"CASE WHEN LENGTH(\1) = LENGTH(\3) THEN (\2) " + r"WHEN SUBSTR((\3), LENGTH(\1)+1, 1) = '/' " + r"THEN (\2) || SUBSTR((\3), LENGTH(\1)+1) " + r"END " + r"END)", line) # RELPATH_JOIN(x, y) joins x to y following the svn_relpath_join() rules line = re.sub( r'RELPATH_JOIN[(]([?]?[A-Za-z0-9_.]+), ([?]?[A-Za-z0-9_.]+)[)]', r"(CASE WHEN (\1) = '' THEN (\2) " + r"WHEN (\2) = '' THEN (\1) " + r"ELSE (\1) || '/' || (\2) " + r"END)", line) # RELPATH_SKIP_ANCESTOR(x, y) skips the x prefix from y following the # svn_relpath_skip_ancestor() rules. Returns NULL when y is not below X. line = re.sub( r'RELPATH_SKIP_ANCESTOR[(]([?]?[A-Za-z0-9_.]+), ' + r'([?]?[A-Za-z0-9_.]+)[)]', r"(CASE WHEN (\1) = '' THEN (\2) " + r" WHEN SUBSTR((\2), 1, LENGTH(\1)) = (\1) " + r" THEN " + r"CASE WHEN LENGTH(\1) = LENGTH(\2) THEN '' " + r"WHEN SUBSTR((\2), LENGTH(\1)+1, 1) = '/' " + r"THEN SUBSTR((\2), LENGTH(\1)+2) " + r"END" + r" END)", line) # Another preprocessing. for symbol, string in self.token_map.items(): # ### This doesn't sql-escape 'string' line = re.sub(r'\b%s\b' % re.escape(symbol), "'%s'" % string, line) if line.strip(): handled = False for regex, handler in self._directives.items(): match = regex.match(line) if match: handler(match) handled = True break # we've handed the line, so skip it if handled: continue if not self.var_printed: self.output.write('#define %s \\\n' % self.var_name) self.var_printed = True # got something besides whitespace. write it out. include some whitespace # to separate the SQL commands. and a backslash to continue the string # onto the next line. self.output.write(' "%s " \\\n' % line.rstrip()) # previous line had a continuation. end the madness. self.close_define() def close_define(self): if self.var_printed: self.output.write(DEFINE_END) self.var_printed = False class NonRewritableDict(dict): """A dictionary that does not allow self[k]=v when k in self (unless v is equal to the stored value). (An entry would have to be explicitly deleted before a new value may be entered.) """ def __setitem__(self, key, val): if self.__contains__(key) and self.__getitem__(key) != val: raise Exception("Can't re-insert key %r with value %r " "(already present with value %r)" % (key, val, self.__getitem__(key))) super(NonRewritableDict, self).__setitem__(key, val) def hotspots(fd): hotspot = False for line in fd: # hotspot is TRUE within definitions of static const svn_token_map_t[]. hotspot ^= int(('svn_token_map_t', '\x7d;')[hotspot] in line) if hotspot: yield line def extract_token_map(filename): try: fd = open(filename) except IOError: return {} pattern = re.compile(r'"(.*?)".*?(MAP_\w*)') return \ NonRewritableDict( map(operator.itemgetter(1,0), map(operator.methodcaller('groups'), filter(None, map(pattern.search, hotspots(fd)))))) def main(input_filepath, output): filename = os.path.basename(input_filepath) input = open(input_filepath, 'r').read() token_map_filename = os.path.dirname(input_filepath) + '/token-map.h' token_map = extract_token_map(token_map_filename) var_name = re.sub('[-.]', '_', filename).upper() output.write( '/* This file is automatically generated from %s and %s.\n' ' * Do not edit this file -- edit the source and rerun gen-make.py */\n' '\n' % (filename, token_map_filename)) proc = Processor(os.path.dirname(input_filepath), output, var_name, token_map) proc.process_file(input) ### the STMT_%d naming precludes *multiple* transform_sql headers from ### being used within the same .c file. for now, that's more than fine. ### in the future, we can always add a var_name discriminator or use ### the statement name itself (which should hopefully be unique across ### all names in use; or can easily be made so) if proc.stmt_count > 0: output.write( '#define %s_DECLARE_STATEMENTS(varname) \\\n' % (var_name,) + ' static const char * const varname[] = { \\\n' + ', \\\n'.join(' STMT_%d' % (i,) for i in range(proc.stmt_count)) + ', \\\n NULL \\\n }\n') output.write('\n') output.write( '#define %s_DECLARE_STATEMENT_INFO(varname) \\\n' % (var_name,) + ' static const char * const varname[][2] = { \\\n' + ', \\\n'.join(' STMT_%d_INFO' % (i) for i in range(proc.stmt_count)) + ', \\\n {NULL, NULL} \\\n }\n') if __name__ == '__main__': if len(sys.argv) < 2 or len(sys.argv) > 3: usage_and_exit('Incorrect number of arguments') # Note: we could use stdin, but then we'd have no var_name input_filepath = sys.argv[1] if len(sys.argv) > 2: output_file = open(sys.argv[2], 'w') else: output_file = sys.stdout main(input_filepath, output_file)