diff options
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | Makefile.am | 5 | ||||
-rwxr-xr-x | doc/apibuild.py | 2939 | ||||
-rw-r--r-- | libxslt/keys.c | 61 | ||||
-rw-r--r-- | libxslt/transform.c | 8 | ||||
-rw-r--r-- | tests/docs/Makefile.am | 2 | ||||
-rw-r--r-- | tests/docs/bug-133.xml | 7 | ||||
-rw-r--r-- | tests/general/Makefile.am | 2 | ||||
-rw-r--r-- | tests/general/bug-133.out | 6 | ||||
-rw-r--r-- | tests/general/bug-133.xsl | 32 |
10 files changed, 1791 insertions, 1281 deletions
@@ -1,3 +1,13 @@ +Thu Nov 20 00:22:14 CET 2003 Daniel Veillard <daniel@veillard.com> + + * libxslt/keys.c: fixed a bug in the keys selector parsing + #120684 when | is in a predicate or a string. + * tests/docs/Makefile.am tests/docs/bug-132.xml + tests/general/Makefile.am tests/docs/bug-132*: added tests + to the regression suite for bug #120684. + * Makefile.am: don't package cvs temp files + * doc/apibuild.py: update from libxml2 one + Tue Nov 18 13:42:12 HKT 2003 William Brack <wbrack@mmm.com.hk> * libexslt/strings.c: fixed entity problem in exslt:tokenize diff --git a/Makefile.am b/Makefile.am index 6c2dc388..52d3c38f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -13,7 +13,7 @@ confexec_DATA = xsltConf.sh bin_SCRIPTS = xslt-config -dist-hook: libxslt.spec +dist-hook: cleanup libxslt.spec (cd $(srcdir) ; tar -cf - --exclude CVS win32 vms) | (cd $(distdir); tar xf -) EXTRA_DIST = xsltConf.sh.in xslt-config.in libxslt.spec libxslt.spec.in \ @@ -50,6 +50,9 @@ valgrind: @echo '## Go get a cup of coffee it is gonna take a while ...' @(cd tests ; $(MAKE) CHECKER='valgrind -q' tests) +cleanup: + -@(find . -name .\#\* -exec rm {} \;) + cleantar: @(rm -f libxslt*.tar.gz) diff --git a/doc/apibuild.py b/doc/apibuild.py index cf803839..223214de 100755 --- a/doc/apibuild.py +++ b/doc/apibuild.py @@ -7,209 +7,12 @@ # # daniel@veillard.com # -import sys +import os, sys import string import glob debug=0 -def escape(raw): - raw = string.replace(raw, '&', '&') - raw = string.replace(raw, '<', '<') - raw = string.replace(raw, '>', '>') - raw = string.replace(raw, "'", ''') - raw = string.replace(raw, '"', '"') - return raw - -class identifier: - def __init__(self, name, module=None, type=None, info=None, extra=None): - self.name = name - self.module = module - self.type = type - self.info = info - self.extra = extra - self.static = 0 - - def __repr__(self): - r = "%s %s:" % (self.type, self.name) - if self.static: - r = r + " static" - if self.module != None: - r = r + " from %s" % (self.module) - if self.info != None: - r = r + " " + `self.info` - if self.extra != None: - r = r + " " + `self.extra` - return r - - - def set_module(self, module): - self.module = module - def set_type(self, type): - self.type = type - def set_info(self, info): - self.info = info - def set_extra(self, extra): - self.extra = extra - def set_static(self, static): - self.static = static - - def update(self, module, type = None, info = None, extra=None): - if module != None and self.module == None: - self.set_module(module) - if type != None and self.type == None: - self.set_type(type) - if info != None: - self.set_info(info) - if extra != None: - self.set_extra(extra) - - -class index: - def __init__(self, name = "noname"): - self.name = name; - self.identifiers = {} - self.functions = {} - self.variables = {} - self.includes = {} - self.structs = {} - self.enums = {} - self.typedefs = {} - self.macros = {} - self.references = {} - - def add(self, name, module, static, type, info=None, extra=None): - if name[0:2] == '__': - return None - d = None - try: - d = self.identifiers[name] - d.update(module, type, info, extra) - except: - d = identifier(name, module, type, info, extra) - self.identifiers[name] = d - - if d != None and static == 1: - d.set_static(1) - - if d != None and name != None and type != None: - if type == "function": - self.functions[name] = d - elif type == "functype": - self.functions[name] = d - elif type == "variable": - self.variables[name] = d - elif type == "include": - self.includes[name] = d - elif type == "struct": - self.structs[name] = d - elif type == "enum": - self.enums[name] = d - elif type == "typedef": - self.typedefs[name] = d - elif type == "macro": - self.macros[name] = d - else: - print "Unable to register type ", type - return d - - def merge(self, idx): - for id in idx.functions.keys(): - # - # macro might be used to override functions or variables - # definitions - # - if self.macros.has_key(id): - del self.macros[id] - if self.functions.has_key(id): - print "function %s from %s redeclared in %s" % ( - id, self.functions[id].module, idx.functions[id].module) - else: - self.functions[id] = idx.functions[id] - self.identifiers[id] = idx.functions[id] - for id in idx.variables.keys(): - # - # macro might be used to override functions or variables - # definitions - # - if self.macros.has_key(id): - del self.macros[id] - if self.variables.has_key(id): - print "variable %s from %s redeclared in %s" % ( - id, self.variables[id].module, idx.variables[id].module) - else: - self.variables[id] = idx.variables[id] - self.identifiers[id] = idx.variables[id] - for id in idx.structs.keys(): - if self.structs.has_key(id): - print "struct %s from %s redeclared in %s" % ( - id, self.structs[id].module, idx.structs[id].module) - else: - self.structs[id] = idx.structs[id] - self.identifiers[id] = idx.structs[id] - for id in idx.typedefs.keys(): - if self.typedefs.has_key(id): - print "typedef %s from %s redeclared in %s" % ( - id, self.typedefs[id].module, idx.typedefs[id].module) - else: - self.typedefs[id] = idx.typedefs[id] - self.identifiers[id] = idx.typedefs[id] - for id in idx.macros.keys(): - # - # macro might be used to override functions or variables - # definitions - # - if self.variables.has_key(id): - continue - if self.functions.has_key(id): - continue - if self.enums.has_key(id): - continue - if self.macros.has_key(id): - print "macro %s from %s redeclared in %s" % ( - id, self.macros[id].module, idx.macros[id].module) - else: - self.macros[id] = idx.macros[id] - self.identifiers[id] = idx.macros[id] - for id in idx.enums.keys(): - if self.enums.has_key(id): - print "enum %s from %s redeclared in %s" % ( - id, self.enums[id].module, idx.enums[id].module) - else: - self.enums[id] = idx.enums[id] - self.identifiers[id] = idx.enums[id] - - def merge_public(self, idx): - for id in idx.functions.keys(): - if self.functions.has_key(id): - up = idx.functions[id] - self.functions[id].update(None, up.type, up.info, up.extra) - # else: - # print "Function %s from %s is not declared in headers" % ( - # id, idx.functions[id].module) - # TODO: do the same for variables. - - def analyze_dict(self, type, dict): - count = 0 - public = 0 - for name in dict.keys(): - id = dict[name] - count = count + 1 - if id.static == 0: - public = public + 1 - if count != public: - print " %d %s , %d public" % (count, type, public) - elif count != 0: - print " %d public %s" % (count, type) - - - def analyze(self): - self.analyze_dict("functions", self.functions) - self.analyze_dict("variables", self.variables) - self.analyze_dict("structs", self.structs) - self.analyze_dict("typedefs", self.typedefs) - self.analyze_dict("macros", self.macros) - # # C parser analysis code # @@ -221,6 +24,11 @@ ignored_files = { "acconfig.h": "generated portability layer", "config.h": "generated portability layer", "libxml.h": "internal only", + "testOOM.c": "out of memory tester", + "testOOMlib.h": "out of memory tester", + "testOOMlib.c": "out of memory tester", + "pattern.c": "not integrated yet", + "pattern.h": "not integrated yet", } ignored_words = { @@ -241,190 +49,430 @@ ignored_words = { "X_IN_Y": (5, "macro function builder"), } -class CLexer: - """A lexer for the C language, tokenize the input by reading and - analyzing it line by line""" - def __init__(self, input): - self.input = input - self.tokens = [] - self.line = "" - self.lineno = 0 - - def getline(self): - line = '' - while line == '': - line = self.input.readline() - if not line: - return None - self.lineno = self.lineno + 1 - line = string.lstrip(line) - line = string.rstrip(line) - if line == '': - continue - while line[-1] == '\\': - line = line[:-1] - n = self.input.readline() - self.lineno = self.lineno + 1 - n = string.lstrip(n) - n = string.rstrip(n) - if not n: - break - else: - line = line + n - return line - - def getlineno(self): - return self.lineno +def escape(raw): + raw = string.replace(raw, '&', '&') + raw = string.replace(raw, '<', '<') + raw = string.replace(raw, '>', '>') + raw = string.replace(raw, "'", ''') + raw = string.replace(raw, '"', '"') + return raw + +def uniq(items): + d = {} + for item in items: + d[item]=1 + return d.keys() - def push(self, token): - self.tokens.insert(0, token); +class identifier: + def __init__(self, name, module=None, type=None, lineno = 0, + info=None, extra=None): + self.name = name + self.module = module + self.type = type + self.info = info + self.extra = extra + self.lineno = lineno + self.static = 0 + + def __repr__(self): + r = "%s %s:" % (self.type, self.name) + if self.static: + r = r + " static" + if self.module != None: + r = r + " from %s" % (self.module) + if self.info != None: + r = r + " " + `self.info` + if self.extra != None: + r = r + " " + `self.extra` + return r + + + def set_module(self, module): + self.module = module + def set_type(self, type): + self.type = type + def set_info(self, info): + self.info = info + def set_extra(self, extra): + self.extra = extra + def set_lineno(self, lineno): + self.lineno = lineno + def set_static(self, static): + self.static = static + + def get_name(self): + return self.name + def get_module(self): + return self.module + def get_type(self): + return self.type + def get_info(self): + return self.info + def get_lineno(self): + return self.lineno + def get_extra(self): + return self.extra + def get_static(self): + return self.static + + def update(self, module, type = None, info = None, extra=None): + if module != None and self.module == None: + self.set_module(module) + if type != None and self.type == None: + self.set_type(type) + if info != None: + self.set_info(info) + if extra != None: + self.set_extra(extra) - def debug(self): - print "Last token: ", self.last - print "Token queue: ", self.tokens - print "Line %d end: " % (self.lineno), self.line - def token(self): - while self.tokens == []: - if self.line == "": - line = self.getline() +class index: + def __init__(self, name = "noname"): + self.name = name + self.identifiers = {} + self.functions = {} + self.variables = {} + self.includes = {} + self.structs = {} + self.enums = {} + self.typedefs = {} + self.macros = {} + self.references = {} + self.info = {} + + def add_ref(self, name, module, static, type, lineno, info=None, extra=None): + if name[0:2] == '__': + return None + d = None + try: + d = self.identifiers[name] + d.update(module, type, lineno, info, extra) + except: + d = identifier(name, module, type, lineno, info, extra) + self.identifiers[name] = d + + if d != None and static == 1: + d.set_static(1) + + if d != None and name != None and type != None: + self.references[name] = d + + def add(self, name, module, static, type, lineno, info=None, extra=None): + if name[0:2] == '__': + return None + d = None + try: + d = self.identifiers[name] + d.update(module, type, lineno, info, extra) + except: + d = identifier(name, module, type, lineno, info, extra) + self.identifiers[name] = d + + if d != None and static == 1: + d.set_static(1) + + if d != None and name != None and type != None: + if type == "function": + self.functions[name] = d + elif type == "functype": + self.functions[name] = d + elif type == "variable": + self.variables[name] = d + elif type == "include": + self.includes[name] = d + elif type == "struct": + self.structs[name] = d + elif type == "enum": + self.enums[name] = d + elif type == "typedef": + self.typedefs[name] = d + elif type == "macro": + self.macros[name] = d + else: + print "Unable to register type ", type + return d + + def merge(self, idx): + for id in idx.functions.keys(): + # + # macro might be used to override functions or variables + # definitions + # + if self.macros.has_key(id): + del self.macros[id] + if self.functions.has_key(id): + print "function %s from %s redeclared in %s" % ( + id, self.functions[id].module, idx.functions[id].module) + else: + self.functions[id] = idx.functions[id] + self.identifiers[id] = idx.functions[id] + for id in idx.variables.keys(): + # + # macro might be used to override functions or variables + # definitions + # + if self.macros.has_key(id): + del self.macros[id] + if self.variables.has_key(id): + print "variable %s from %s redeclared in %s" % ( + id, self.variables[id].module, idx.variables[id].module) + else: + self.variables[id] = idx.variables[id] + self.identifiers[id] = idx.variables[id] + for id in idx.structs.keys(): + if self.structs.has_key(id): + print "struct %s from %s redeclared in %s" % ( + id, self.structs[id].module, idx.structs[id].module) + else: + self.structs[id] = idx.structs[id] + self.identifiers[id] = idx.structs[id] + for id in idx.typedefs.keys(): + if self.typedefs.has_key(id): + print "typedef %s from %s redeclared in %s" % ( + id, self.typedefs[id].module, idx.typedefs[id].module) + else: + self.typedefs[id] = idx.typedefs[id] + self.identifiers[id] = idx.typedefs[id] + for id in idx.macros.keys(): + # + # macro might be used to override functions or variables + # definitions + # + if self.variables.has_key(id): + continue + if self.functions.has_key(id): + continue + if self.enums.has_key(id): + continue + if self.macros.has_key(id): + print "macro %s from %s redeclared in %s" % ( + id, self.macros[id].module, idx.macros[id].module) + else: + self.macros[id] = idx.macros[id] + self.identifiers[id] = idx.macros[id] + for id in idx.enums.keys(): + if self.enums.has_key(id): + print "enum %s from %s redeclared in %s" % ( + id, self.enums[id].module, idx.enums[id].module) else: - line = self.line - self.line = "" - if line == None: - return None - - if line[0] == '#': - self.tokens = map((lambda x: ('preproc', x)), - string.split(line)) - break; - l = len(line) - if line[0] == '"' or line[0] == "'": - end = line[0] - line = line[1:] - found = 0 - tok = "" - while found == 0: - i = 0 - l = len(line) - while i < l: - if line[i] == end: - self.line = line[i+1:] - line = line[:i] - l = i - found = 1 - break - if line[i] == '\\': - i = i + 1 - i = i + 1 - tok = tok + line - if found == 0: - line = self.getline() - if line == None: - return None - self.last = ('string', tok) - return self.last - - if l >= 2 and line[0] == '/' and line[1] == '*': - line = line[2:] - found = 0 - tok = "" - while found == 0: - i = 0 - l = len(line) - while i < l: - if line[i] == '*' and i+1 < l and line[i+1] == '/': - self.line = line[i+2:] - line = line[:i-1] - l = i - found = 1 - break - i = i + 1 - if tok != "": - tok = tok + "\n" - tok = tok + line - if found == 0: - line = self.getline() - if line == None: - return None - self.last = ('comment', tok) - return self.last - if l >= 2 and line[0] == '/' and line[1] == '/': - line = line[2:] - self.last = ('comment', line) - return self.last - i = 0 - while i < l: - if line[i] == '/' and i+1 < l and line[i+1] == '/': - self.line = line[i:] - line = line[:i] - break - if line[i] == '/' and i+1 < l and line[i+1] == '*': - self.line = line[i:] - line = line[:i] - break - if line[i] == '"' or line[i] == "'": - self.line = line[i:] - line = line[:i] - break - i = i + 1 - l = len(line) - i = 0 - while i < l: - if line[i] == ' ' or line[i] == '\t': - i = i + 1 - continue - o = ord(line[i]) - if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ - (o >= 48 and o <= 57): - s = i - while i < l: - o = ord(line[i]) - if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ - (o >= 48 and o <= 57) or string.find( + self.enums[id] = idx.enums[id] + self.identifiers[id] = idx.enums[id] + + def merge_public(self, idx): + for id in idx.functions.keys(): + if self.functions.has_key(id): + up = idx.functions[id] + self.functions[id].update(None, up.type, up.info, up.extra) + # else: + # print "Function %s from %s is not declared in headers" % ( + # id, idx.functions[id].module) + # TODO: do the same for variables. + + def analyze_dict(self, type, dict): + count = 0 + public = 0 + for name in dict.keys(): + id = dict[name] + count = count + 1 + if id.static == 0: + public = public + 1 + if count != public: + print " %d %s , %d public" % (count, type, public) + elif count != 0: + print " %d public %s" % (count, type) + + + def analyze(self): + self.analyze_dict("functions", self.functions) + self.analyze_dict("variables", self.variables) + self.analyze_dict("structs", self.structs) + self.analyze_dict("typedefs", self.typedefs) + self.analyze_dict("macros", self.macros) + +class CLexer: + """A lexer for the C language, tokenize the input by reading and + analyzing it line by line""" + def __init__(self, input): + self.input = input + self.tokens = [] + self.line = "" + self.lineno = 0 + + def getline(self): + line = '' + while line == '': + line = self.input.readline() + if not line: + return None + self.lineno = self.lineno + 1 + line = string.lstrip(line) + line = string.rstrip(line) + if line == '': + continue + while line[-1] == '\\': + line = line[:-1] + n = self.input.readline() + self.lineno = self.lineno + 1 + n = string.lstrip(n) + n = string.rstrip(n) + if not n: + break + else: + line = line + n + return line + + def getlineno(self): + return self.lineno + + def push(self, token): + self.tokens.insert(0, token); + + def debug(self): + print "Last token: ", self.last + print "Token queue: ", self.tokens + print "Line %d end: " % (self.lineno), self.line + + def token(self): + while self.tokens == []: + if self.line == "": + line = self.getline() + else: + line = self.line + self.line = "" + if line == None: + return None + + if line[0] == '#': + self.tokens = map((lambda x: ('preproc', x)), + string.split(line)) + break; + l = len(line) + if line[0] == '"' or line[0] == "'": + end = line[0] + line = line[1:] + found = 0 + tok = "" + while found == 0: + i = 0 + l = len(line) + while i < l: + if line[i] == end: + self.line = line[i+1:] + line = line[:i] + l = i + found = 1 + break + if line[i] == '\\': + i = i + 1 + i = i + 1 + tok = tok + line + if found == 0: + line = self.getline() + if line == None: + return None + self.last = ('string', tok) + return self.last + + if l >= 2 and line[0] == '/' and line[1] == '*': + line = line[2:] + found = 0 + tok = "" + while found == 0: + i = 0 + l = len(line) + while i < l: + if line[i] == '*' and i+1 < l and line[i+1] == '/': + self.line = line[i+2:] + line = line[:i-1] + l = i + found = 1 + break + i = i + 1 + if tok != "": + tok = tok + "\n" + tok = tok + line + if found == 0: + line = self.getline() + if line == None: + return None + self.last = ('comment', tok) + return self.last + if l >= 2 and line[0] == '/' and line[1] == '/': + line = line[2:] + self.last = ('comment', line) + return self.last + i = 0 + while i < l: + if line[i] == '/' and i+1 < l and line[i+1] == '/': + self.line = line[i:] + line = line[:i] + break + if line[i] == '/' and i+1 < l and line[i+1] == '*': + self.line = line[i:] + line = line[:i] + break + if line[i] == '"' or line[i] == "'": + self.line = line[i:] + line = line[:i] + break + i = i + 1 + l = len(line) + i = 0 + while i < l: + if line[i] == ' ' or line[i] == '\t': + i = i + 1 + continue + o = ord(line[i]) + if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ + (o >= 48 and o <= 57): + s = i + while i < l: + o = ord(line[i]) + if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ + (o >= 48 and o <= 57) or string.find( " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1: - i = i + 1 - else: - break - self.tokens.append(('name', line[s:i])) - continue - if string.find("(){}:;,[]", line[i]) != -1: + i = i + 1 + else: + break + self.tokens.append(('name', line[s:i])) + continue + if string.find("(){}:;,[]", line[i]) != -1: # if line[i] == '(' or line[i] == ')' or line[i] == '{' or \ # line[i] == '}' or line[i] == ':' or line[i] == ';' or \ # line[i] == ',' or line[i] == '[' or line[i] == ']': - self.tokens.append(('sep', line[i])) - i = i + 1 - continue - if string.find("+-*><=/%&!|.", line[i]) != -1: + self.tokens.append(('sep', line[i])) + i = i + 1 + continue + if string.find("+-*><=/%&!|.", line[i]) != -1: # if line[i] == '+' or line[i] == '-' or line[i] == '*' or \ # line[i] == '>' or line[i] == '<' or line[i] == '=' or \ # line[i] == '/' or line[i] == '%' or line[i] == '&' or \ # line[i] == '!' or line[i] == '|' or line[i] == '.': - if line[i] == '.' and i + 2 < l and \ - line[i+1] == '.' and line[i+2] == '.': - self.tokens.append(('name', '...')) - i = i + 3 - continue - - j = i + 1 - if j < l and ( - string.find("+-*><=/%&!|", line[j]) != -1): + if line[i] == '.' and i + 2 < l and \ + line[i+1] == '.' and line[i+2] == '.': + self.tokens.append(('name', '...')) + i = i + 3 + continue + + j = i + 1 + if j < l and ( + string.find("+-*><=/%&!|", line[j]) != -1): # line[j] == '+' or line[j] == '-' or line[j] == '*' or \ # line[j] == '>' or line[j] == '<' or line[j] == '=' or \ # line[j] == '/' or line[j] == '%' or line[j] == '&' or \ # line[j] == '!' or line[j] == '|'): - self.tokens.append(('op', line[i:j+1])) - i = j + 1 - else: - self.tokens.append(('op', line[i])) - i = i + 1 - continue - s = i - while i < l: - o = ord(line[i]) - if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ - (o >= 48 and o <= 57) or ( - string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1): + self.tokens.append(('op', line[i:j+1])) + i = j + 1 + else: + self.tokens.append(('op', line[i])) + i = i + 1 + continue + s = i + while i < l: + o = ord(line[i]) + if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ + (o >= 48 and o <= 57) or ( + string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1): # line[i] != ' ' and line[i] != '\t' and # line[i] != '(' and line[i] != ')' and # line[i] != '{' and line[i] != '}' and @@ -437,1044 +485,1377 @@ class CLexer: # line[i] != ']' and line[i] != '=' and # line[i] != '*' and line[i] != '>' and # line[i] != '<'): - i = i + 1 - else: - break - self.tokens.append(('name', line[s:i])) + i = i + 1 + else: + break + self.tokens.append(('name', line[s:i])) - tok = self.tokens[0] - self.tokens = self.tokens[1:] - self.last = tok - return tok + tok = self.tokens[0] + self.tokens = self.tokens[1:] + self.last = tok + return tok class CParser: - """The C module parser""" - def __init__(self, filename, idx = None): - self.filename = filename - if len(filename) > 2 and filename[-2:] == '.h': - self.is_header = 1 - else: - self.is_header = 0 - self.input = open(filename) - self.lexer = CLexer(self.input) - if idx == None: - self.index = index() - else: - self.index = idx - self.top_comment = "" - self.last_comment = "" - self.comment = None - - def lineno(self): - return self.lexer.getlineno() - - def error(self, msg, token=-1): - print "Parse Error: " + msg - if token != -1: - print "Got token ", token - self.lexer.debug() - sys.exit(1) - - def debug(self, msg, token=-1): - print "Debug: " + msg - if token != -1: - print "Got token ", token - self.lexer.debug() - - def parseComment(self, token): - if self.top_comment == "": - self.top_comment = token[1] - if self.comment == None or token[1][0] == '*': - self.comment = token[1]; - else: - self.comment = self.comment + token[1] - token = self.lexer.token() - return token + """The C module parser""" + def __init__(self, filename, idx = None): + self.filename = filename + if len(filename) > 2 and filename[-2:] == '.h': + self.is_header = 1 + else: + self.is_header = 0 + self.input = open(filename) + self.lexer = CLexer(self.input) + if idx == None: + self.index = index() + else: + self.index = idx + self.top_comment = "" + self.last_comment = "" + self.comment = None + self.collect_ref = 0 + + def collect_references(self): + self.collect_ref = 1 + + def lineno(self): + return self.lexer.getlineno() + + def index_add(self, name, module, static, type, info=None, extra = None): + self.index.add(name, module, static, type, self.lineno(), + info, extra) + + def index_add_ref(self, name, module, static, type, info=None, + extra = None): + self.index.add_ref(name, module, static, type, self.lineno(), + info, extra) + + def error(self, msg, token=-1): + print "Parse Error: " + msg + if token != -1: + print "Got token ", token + self.lexer.debug() + sys.exit(1) + + def debug(self, msg, token=-1): + print "Debug: " + msg + if token != -1: + print "Got token ", token + self.lexer.debug() + + def parseTopComment(self, comment): + res = {} + lines = string.split(comment, "\n") + item = None + for line in lines: + while line != "" and (line[0] == ' ' or line[0] == '\t'): + line = line[1:] + while line != "" and line[0] == '*': + line = line[1:] + while line != "" and (line[0] == ' ' or line[0] == '\t'): + line = line[1:] + try: + (it, line) = string.split(line, ":", 1) + item = it + while line != "" and (line[0] == ' ' or line[0] == '\t'): + line = line[1:] + if res.has_key(item): + res[item] = res[item] + " " + line + else: + res[item] = line + except: + if item != None: + if res.has_key(item): + res[item] = res[item] + " " + line + else: + res[item] = line + self.index.info = res + + def parseComment(self, token): + if self.top_comment == "": + self.top_comment = token[1] + if self.comment == None or token[1][0] == '*': + self.comment = token[1]; + else: + self.comment = self.comment + token[1] + token = self.lexer.token() + return token # # Parse a comment block associate to a macro # - def parseMacroComment(self, name, quiet = 0): - if name[0:2] == '__': - quiet = 1 - - args = [] - desc = "" - - if self.comment == None: - if not quiet: - print "Missing comment for macro %s" % (name) - return((args, desc)) - if self.comment[0] != '*': - if not quiet: - print "Missing * in macro comment for %s" % (name) - return((args, desc)) - lines = string.split(self.comment, '\n') - if lines[0] == '*': - del lines[0] - if lines[0] != "* %s:" % (name): - if not quiet: - print "Misformatted macro comment for %s" % (name) - print " Expecting '* %s:' got '%s'" % (name, lines[0]) - return((args, desc)) - del lines[0] - while lines[0] == '*': - del lines[0] - while len(lines) > 0 and lines[0][0:3] == '* @': - l = lines[0][3:] - try: - (arg, desc) = string.split(l, ':', 1) - desc=string.strip(desc) - arg=string.strip(arg) - except: - if not quiet: - print "Misformatted macro comment for %s" % (name) - print " problem with '%s'" % (lines[0]) - del lines[0] - continue - del lines[0] - l = string.strip(lines[0]) - while len(l) > 2 and l[0:3] != '* @': - while l[0] == '*': - l = l[1:] - desc = desc + ' ' + string.strip(l) - del lines[0] - if len(lines) == 0: - break - l = lines[0] - args.append((arg, desc)) - while len(lines) > 0 and lines[0] == '*': - del lines[0] - desc = "" - while len(lines) > 0: - l = lines[0] - while len(l) > 0 and l[0] == '*': - l = l[1:] - l = string.strip(l) - desc = desc + " " + l - del lines[0] + def parseMacroComment(self, name, quiet = 0): + if name[0:2] == '__': + quiet = 1 + + args = [] + desc = "" + + if self.comment == None: + if not quiet: + print "Missing comment for macro %s" % (name) + return((args, desc)) + if self.comment[0] != '*': + if not quiet: + print "Missing * in macro comment for %s" % (name) + return((args, desc)) + lines = string.split(self.comment, '\n') + if lines[0] == '*': + del lines[0] + if lines[0] != "* %s:" % (name): + if not quiet: + print "Misformatted macro comment for %s" % (name) + print " Expecting '* %s:' got '%s'" % (name, lines[0]) + return((args, desc)) + del lines[0] + while lines[0] == '*': + del lines[0] + while len(lines) > 0 and lines[0][0:3] == '* @': + l = lines[0][3:] + try: + (arg, desc) = string.split(l, ':', 1) + desc=string.strip(desc) + arg=string.strip(arg) + except: + if not quiet: + print "Misformatted macro comment for %s" % (name) + print " problem with '%s'" % (lines[0]) + del lines[0] + continue + del lines[0] + l = string.strip(lines[0]) + while len(l) > 2 and l[0:3] != '* @': + while l[0] == '*': + l = l[1:] + desc = desc + ' ' + string.strip(l) + del lines[0] + if len(lines) == 0: + break + l = lines[0] + args.append((arg, desc)) + while len(lines) > 0 and lines[0] == '*': + del lines[0] + desc = "" + while len(lines) > 0: + l = lines[0] + while len(l) > 0 and l[0] == '*': + l = l[1:] + l = string.strip(l) + desc = desc + " " + l + del lines[0] - desc = string.strip(desc) + desc = string.strip(desc) - if quiet == 0: - if desc == "": - print "Macro comment for %s lack description of the macro" % (name) + if quiet == 0: + if desc == "": + print "Macro comment for %s lack description of the macro" % (name) - return((args, desc)) + return((args, desc)) # # Parse a comment block and merge the informations found in the # parameters descriptions, finally returns a block as complete # as possible # - def mergeFunctionComment(self, name, description, quiet = 0): - if name == 'main': - quiet = 1 - if name[0:2] == '__': - quiet = 1 - - (ret, args) = description - desc = "" - retdesc = "" - - if self.comment == None: - if not quiet: - print "Missing comment for function %s" % (name) - return(((ret[0], retdesc), args, desc)) - if self.comment[0] != '*': - if not quiet: - print "Missing * in function comment for %s" % (name) - return(((ret[0], retdesc), args, desc)) - lines = string.split(self.comment, '\n') - if lines[0] == '*': - del lines[0] - if lines[0] != "* %s:" % (name): - if not quiet: - print "Misformatted function comment for %s" % (name) - print " Expecting '* %s:' got '%s'" % (name, lines[0]) - return(((ret[0], retdesc), args, desc)) - del lines[0] - while lines[0] == '*': - del lines[0] - nbargs = len(args) - while len(lines) > 0 and lines[0][0:3] == '* @': - l = lines[0][3:] - try: - (arg, desc) = string.split(l, ':', 1) - desc=string.strip(desc) - arg=string.strip(arg) - except: - if not quiet: - print "Misformatted function comment for %s" % (name) - print " problem with '%s'" % (lines[0]) - del lines[0] - continue - del lines[0] - l = string.strip(lines[0]) - while len(l) > 2 and l[0:3] != '* @': - while l[0] == '*': - l = l[1:] - desc = desc + ' ' + string.strip(l) - del lines[0] - if len(lines) == 0: - break - l = lines[0] - i = 0 - while i < nbargs: - if args[i][1] == arg: - args[i] = (args[i][0], arg, desc) - break; - i = i + 1 - if i >= nbargs: - if not quiet: - print "Uname to find arg %s from function comment for %s" % ( - arg, name) - while len(lines) > 0 and lines[0] == '*': - del lines[0] - desc = "" - while len(lines) > 0: - l = lines[0] - while len(l) > 0 and l[0] == '*': - l = l[1:] - l = string.strip(l) - if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return": - try: - l = string.split(l, ' ', 1)[1] - except: - l = "" - retdesc = string.strip(l) - del lines[0] - while len(lines) > 0: - l = lines[0] - while len(l) > 0 and l[0] == '*': - l = l[1:] - l = string.strip(l) - retdesc = retdesc + " " + l - del lines[0] - else: - desc = desc + " " + l - del lines[0] + def mergeFunctionComment(self, name, description, quiet = 0): + if name == 'main': + quiet = 1 + if name[0:2] == '__': + quiet = 1 + + (ret, args) = description + desc = "" + retdesc = "" + + if self.comment == None: + if not quiet: + print "Missing comment for function %s" % (name) + return(((ret[0], retdesc), args, desc)) + if self.comment[0] != '*': + if not quiet: + print "Missing * in function comment for %s" % (name) + return(((ret[0], retdesc), args, desc)) + lines = string.split(self.comment, '\n') + if lines[0] == '*': + del lines[0] + if lines[0] != "* %s:" % (name): + if not quiet: + print "Misformatted function comment for %s" % (name) + print " Expecting '* %s:' got '%s'" % (name, lines[0]) + return(((ret[0], retdesc), args, desc)) + del lines[0] + while lines[0] == '*': + del lines[0] + nbargs = len(args) + while len(lines) > 0 and lines[0][0:3] == '* @': + l = lines[0][3:] + try: + (arg, desc) = string.split(l, ':', 1) + desc=string.strip(desc) + arg=string.strip(arg) + except: + if not quiet: + print "Misformatted function comment for %s" % (name) + print " problem with '%s'" % (lines[0]) + del lines[0] + continue + del lines[0] + l = string.strip(lines[0]) + while len(l) > 2 and l[0:3] != '* @': + while l[0] == '*': + l = l[1:] + desc = desc + ' ' + string.strip(l) + del lines[0] + if len(lines) == 0: + break + l = lines[0] + i = 0 + while i < nbargs: + if args[i][1] == arg: + args[i] = (args[i][0], arg, desc) + break; + i = i + 1 + if i >= nbargs: + if not quiet: + print "Unable to find arg %s from function comment for %s" % ( + arg, name) + while len(lines) > 0 and lines[0] == '*': + del lines[0] + desc = "" + while len(lines) > 0: + l = lines[0] + while len(l) > 0 and l[0] == '*': + l = l[1:] + l = string.strip(l) + if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return": + try: + l = string.split(l, ' ', 1)[1] + except: + l = "" + retdesc = string.strip(l) + del lines[0] + while len(lines) > 0: + l = lines[0] + while len(l) > 0 and l[0] == '*': + l = l[1:] + l = string.strip(l) + retdesc = retdesc + " " + l + del lines[0] + else: + desc = desc + " " + l + del lines[0] - retdesc = string.strip(retdesc) - desc = string.strip(desc) + retdesc = string.strip(retdesc) + desc = string.strip(desc) - if quiet == 0: + if quiet == 0: # # report missing comments # - i = 0 - while i < nbargs: - if args[i][2] == None and args[i][0] != "void" and args[i][1] != None: - print "Function comment for %s lack description of arg %s" % (name, args[i][1]) - i = i + 1 - if retdesc == "" and ret[0] != "void": - print "Function comment for %s lack description of return value" % (name) - if desc == "": - print "Function comment for %s lack description of the function" % (name) - - - return(((ret[0], retdesc), args, desc)) - - def parsePreproc(self, token): - name = token[1] - if name == "#include": - token = self.lexer.token() - if token == None: - return None - if token[0] == 'preproc': - self.index.add(token[1], self.filename, not self.is_header, + i = 0 + while i < nbargs: + if args[i][2] == None and args[i][0] != "void" and args[i][1] != None: + print "Function comment for %s lack description of arg %s" % (name, args[i][1]) + i = i + 1 + if retdesc == "" and ret[0] != "void": + print "Function comment for %s lack description of return value" % (name) + if desc == "": + print "Function comment for %s lack description of the function" % (name) + + + return(((ret[0], retdesc), args, desc)) + + def parsePreproc(self, token): + name = token[1] + if name == "#include": + token = self.lexer.token() + if token == None: + return None + if token[0] == 'preproc': + self.index_add(token[1], self.filename, not self.is_header, "include") - return self.lexer.token() - return token - if name == "#define": - token = self.lexer.token() - if token == None: - return None - if token[0] == 'preproc': + return self.lexer.token() + return token + if name == "#define": + token = self.lexer.token() + if token == None: + return None + if token[0] == 'preproc': # TODO macros with arguments - name = token[1] - lst = [] - token = self.lexer.token() - while token != None and token[0] == 'preproc' and \ - token[1][0] != '#': - lst.append(token[1]) - token = self.lexer.token() - try: - name = string.split(name, '(') [0] - except: - pass - info = self.parseMacroComment(name, not self.is_header) - self.index.add(name, self.filename, not self.is_header, + name = token[1] + lst = [] + token = self.lexer.token() + while token != None and token[0] == 'preproc' and \ + token[1][0] != '#': + lst.append(token[1]) + token = self.lexer.token() + try: + name = string.split(name, '(') [0] + except: + pass + info = self.parseMacroComment(name, not self.is_header) + self.index_add(name, self.filename, not self.is_header, "macro", info) - return token - token = self.lexer.token() - while token != None and token[0] == 'preproc' and \ - token[1][0] != '#': - token = self.lexer.token() - return token + return token + token = self.lexer.token() + while token != None and token[0] == 'preproc' and \ + token[1][0] != '#': + token = self.lexer.token() + return token # # token acquisition on top of the lexer, it handle internally # preprocessor and comments since they are logically not part of # the program structure. # - def token(self): - global ignored_words - - token = self.lexer.token() - while token != None: - if token[0] == 'comment': - token = self.parseComment(token) - continue - elif token[0] == 'preproc': - token = self.parsePreproc(token) - continue - elif token[0] == "name" and ignored_words.has_key(token[1]): - (n, info) = ignored_words[token[1]] - i = 0 - while i < n: - token = self.lexer.token() - i = i + 1 - token = self.lexer.token() - continue - else: - if debug: - print "=> ", token - return token - return None + def token(self): + global ignored_words + + token = self.lexer.token() + while token != None: + if token[0] == 'comment': + token = self.parseComment(token) + continue + elif token[0] == 'preproc': + token = self.parsePreproc(token) + continue + elif token[0] == "name" and ignored_words.has_key(token[1]): + (n, info) = ignored_words[token[1]] + i = 0 + while i < n: + token = self.lexer.token() + i = i + 1 + token = self.lexer.token() + continue + else: + if debug: + print "=> ", token + return token + return None # # Parse a typedef, it records the type and its name. # - def parseTypedef(self, token): - if token == None: - return None - token = self.parseType(token) - if token == None: - self.error("parsing typedef") - return None - base_type = self.type - type = base_type + def parseTypedef(self, token): + if token == None: + return None + token = self.parseType(token) + if token == None: + self.error("parsing typedef") + return None + base_type = self.type + type = base_type #self.debug("end typedef type", token) - while token != None: - if token[0] == "name": - name = token[1] - signature = self.signature - if signature != None: - type = string.split(type, '(')[0] - d = self.mergeFunctionComment(name, - ((type, None), signature), 1) - self.index.add(name, self.filename, not self.is_header, + while token != None: + if token[0] == "name": + name = token[1] + signature = self.signature + if signature != None: + type = string.split(type, '(')[0] + d = self.mergeFunctionComment(name, + ((type, None), signature), 1) + self.index_add(name, self.filename, not self.is_header, "functype", d) - else: - if base_type == "struct": - self.index.add(name, self.filename, not self.is_header, + else: + if base_type == "struct": + self.index_add(name, self.filename, not self.is_header, "struct", type) - base_type = "struct " + name - else: - self.index.add(name, self.filename, not self.is_header, + base_type = "struct " + name + else: + self.index_add(name, self.filename, not self.is_header, "typedef", type) - token = self.token() - else: - self.error("parsing typedef: expecting a name") - return token + token = self.token() + else: + self.error("parsing typedef: expecting a name") + return token #self.debug("end typedef", token) - if token != None and token[0] == 'sep' and token[1] == ',': - type = base_type - token = self.token() - while token != None and token[0] == "op": - type = type + token[1] - token = self.token() - elif token != None and token[0] == 'sep' and token[1] == ';': - break; - elif token != None and token[0] == 'name': - type = base_type - continue; - else: - self.error("parsing typedef: expecting ';'", token) - return token - token = self.token() - return token + if token != None and token[0] == 'sep' and token[1] == ',': + type = base_type + token = self.token() + while token != None and token[0] == "op": + type = type + token[1] + token = self.token() + elif token != None and token[0] == 'sep' and token[1] == ';': + break; + elif token != None and token[0] == 'name': + type = base_type + continue; + else: + self.error("parsing typedef: expecting ';'", token) + return token + token = self.token() + return token # # Parse a C code block, used for functions it parse till # the balancing } included # - def parseBlock(self, token): - while token != None: - if token[0] == "sep" and token[1] == "{": - token = self.token() - token = self.parseBlock(token) - elif token[0] == "sep" and token[1] == "}": - self.comment = None - token = self.token() - return token - else: - token = self.token() - return token + def parseBlock(self, token): + while token != None: + if token[0] == "sep" and token[1] == "{": + token = self.token() + token = self.parseBlock(token) + elif token[0] == "sep" and token[1] == "}": + self.comment = None + token = self.token() + return token + else: + if self.collect_ref == 1: + oldtok = token + token = self.token() + if oldtok[0] == "name" and oldtok[1][0:3] == "xml": + if token[0] == "sep" and token[1] == "(": + self.index_add_ref(oldtok[1], self.filename, + 0, "function") + token = self.token() + elif token[0] == "name": + token = self.token() + if token[0] == "sep" and (token[1] == ";" or + token[1] == "," or token[1] == "="): + self.index_add_ref(oldtok[1], self.filename, + 0, "type") + elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_": + self.index_add_ref(oldtok[1], self.filename, + 0, "typedef") + elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_": + self.index_add_ref(oldtok[1], self.filename, + 0, "typedef") + + else: + token = self.token() + return token # # Parse a C struct definition till the balancing } # - def parseStruct(self, token): - fields = [] + def parseStruct(self, token): + fields = [] #self.debug("start parseStruct", token) - while token != None: - if token[0] == "sep" and token[1] == "{": - token = self.token() - token = self.parseTypeBlock(token) - elif token[0] == "sep" and token[1] == "}": - self.struct_fields = fields + while token != None: + if token[0] == "sep" and token[1] == "{": + token = self.token() + token = self.parseTypeBlock(token) + elif token[0] == "sep" and token[1] == "}": + self.struct_fields = fields #self.debug("end parseStruct", token) #print fields - token = self.token() - return token - else: - base_type = self.type + token = self.token() + return token + else: + base_type = self.type #self.debug("before parseType", token) - token = self.parseType(token) + token = self.parseType(token) #self.debug("after parseType", token) - if token != None and token[0] == "name": - fname = token[1] - token = self.token() - if token[0] == "sep" and token[1] == ";": - self.comment = None - token = self.token() - fields.append((self.type, fname, self.comment)) - self.comment = None - else: - self.error("parseStruct: expecting ;", token) - elif token != None and token[0] == "sep" and token[1] == "{": - token = self.token() - token = self.parseTypeBlock(token) - if token != None and token[0] == "name": - token = self.token() - if token != None and token[0] == "sep" and token[1] == ";": - token = self.token() - else: - self.error("parseStruct: expecting ;", token) - else: - self.error("parseStruct: name", token) - token = self.token() - self.type = base_type; - self.struct_fields = fields + if token != None and token[0] == "name": + fname = token[1] + token = self.token() + if token[0] == "sep" and token[1] == ";": + self.comment = None + token = self.token() + fields.append((self.type, fname, self.comment)) + self.comment = None + else: + self.error("parseStruct: expecting ;", token) + elif token != None and token[0] == "sep" and token[1] == "{": + token = self.token() + token = self.parseTypeBlock(token) + if token != None and token[0] == "name": + token = self.token() + if token != None and token[0] == "sep" and token[1] == ";": + token = self.token() + else: + self.error("parseStruct: expecting ;", token) + else: + self.error("parseStruct: name", token) + token = self.token() + self.type = base_type; + self.struct_fields = fields #self.debug("end parseStruct", token) #print fields - return token + return token # # Parse a C enum block, parse till the balancing } # - def parseEnumBlock(self, token): - self.enums = [] - name = None - self.comment = None - comment = "" - value = "0" - while token != None: - if token[0] == "sep" and token[1] == "{": - token = self.token() - token = self.parseTypeBlock(token) - elif token[0] == "sep" and token[1] == "}": - if name != None: - if self.comment != None: - comment = self.comment - self.comment = None - self.enums.append((name, value, comment)) - token = self.token() - return token - elif token[0] == "name": - if name != None: - if self.comment != None: - comment = string.strip(self.comment) - self.comment = None - self.enums.append((name, value, comment)) - name = token[1] - comment = "" - token = self.token() - if token[0] == "op" and token[1][0] == "=": - value = "" - if len(token[1]) > 1: - value = token[1][1:] - token = self.token() - while token[0] != "sep" or (token[1] != ',' and - token[1] != '}'): - value = value + token[1] - token = self.token() - else: - try: - value = "%d" % (int(value) + 1) - except: - print "Failed to compute value of enum %s" % (name) - value="" - if token[0] == "sep" and token[1] == ",": - token = self.token() - else: - token = self.token() - return token + def parseEnumBlock(self, token): + self.enums = [] + name = None + self.comment = None + comment = "" + value = "0" + while token != None: + if token[0] == "sep" and token[1] == "{": + token = self.token() + token = self.parseTypeBlock(token) + elif token[0] == "sep" and token[1] == "}": + if name != None: + if self.comment != None: + comment = self.comment + self.comment = None + self.enums.append((name, value, comment)) + token = self.token() + return token + elif token[0] == "name": + if name != None: + if self.comment != None: + comment = string.strip(self.comment) + self.comment = None + self.enums.append((name, value, comment)) + name = token[1] + comment = "" + token = self.token() + if token[0] == "op" and token[1][0] == "=": + value = "" + if len(token[1]) > 1: + value = token[1][1:] + token = self.token() + while token[0] != "sep" or (token[1] != ',' and + token[1] != '}'): + value = value + token[1] + token = self.token() + else: + try: + value = "%d" % (int(value) + 1) + except: + print "Failed to compute value of enum %s" % (name) + value="" + if token[0] == "sep" and token[1] == ",": + token = self.token() + else: + token = self.token() + return token # # Parse a C definition block, used for structs it parse till # the balancing } # - def parseTypeBlock(self, token): - while token != None: - if token[0] == "sep" and token[1] == "{": - token = self.token() - token = self.parseTypeBlock(token) - elif token[0] == "sep" and token[1] == "}": - token = self.token() - return token - else: - token = self.token() - return token + def parseTypeBlock(self, token): + while token != None: + if token[0] == "sep" and token[1] == "{": + token = self.token() + token = self.parseTypeBlock(token) + elif token[0] == "sep" and token[1] == "}": + token = self.token() + return token + else: + token = self.token() + return token # # Parse a type: the fact that the type name can either occur after # the definition or within the definition makes it a little harder # if inside, the name token is pushed back before returning # - def parseType(self, token): - self.type = "" - self.struct_fields = [] - self.signature = None - if token == None: - return token - - while token[0] == "name" and ( - token[1] == "const" or token[1] == "unsigned"): - if self.type == "": - self.type = token[1] - else: - self.type = self.type + " " + token[1] - token = self.token() - - if token[0] == "name" and (token[1] == "long" or token[1] == "short"): - if self.type == "": - self.type = token[1] - else: - self.type = self.type + " " + token[1] - if token[0] == "name" and token[1] == "int": - if self.type == "": - self.type = tmp[1] - else: - self.type = self.type + " " + tmp[1] + def parseType(self, token): + self.type = "" + self.struct_fields = [] + self.signature = None + if token == None: + return token + + while token[0] == "name" and ( + token[1] == "const" or token[1] == "unsigned"): + if self.type == "": + self.type = token[1] + else: + self.type = self.type + " " + token[1] + token = self.token() + + if token[0] == "name" and (token[1] == "long" or token[1] == "short"): + if self.type == "": + self.type = token[1] + else: + self.type = self.type + " " + token[1] + if token[0] == "name" and token[1] == "int": + if self.type == "": + self.type = tmp[1] + else: + self.type = self.type + " " + tmp[1] - elif token[0] == "name" and token[1] == "struct": - if self.type == "": - self.type = token[1] - else: - self.type = self.type + " " + token[1] - token = self.token() - nametok = None - if token[0] == "name": - nametok = token - token = self.token() - if token != None and token[0] == "sep" and token[1] == "{": - token = self.token() - token = self.parseStruct(token) - elif token != None and token[0] == "op" and token[1] == "*": - self.type = self.type + " " + nametok[1] + " *" - token = self.token() - while token != None and token[0] == "op" and token[1] == "*": - self.type = self.type + " *" - token = self.token() - if token[0] == "name": - nametok = token - token = self.token() - else: - self.error("struct : expecting name", token) - return token - elif token != None and token[0] == "name" and nametok != None: - self.type = self.type + " " + nametok[1] - return token - - if nametok != None: - self.lexer.push(token) - token = nametok - return token - - elif token[0] == "name" and token[1] == "enum": - if self.type == "": - self.type = token[1] - else: - self.type = self.type + " " + token[1] - self.enums = [] - token = self.token() - if token != None and token[0] == "sep" and token[1] == "{": - token = self.token() - token = self.parseEnumBlock(token) - else: - self.error("parsing enum: expecting '{'", token) - enum_type = None - if token != None and token[0] != "name": - self.lexer.push(token) - token = ("name", "enum") - else: - enum_type = token[1] - for enum in self.enums: - self.index.add(enum[0], self.filename, - not self.is_header, "enum", - (enum[1], enum[2], enum_type)) - return token - - elif token[0] == "name": - if self.type == "": - self.type = token[1] - else: - self.type = self.type + " " + token[1] - else: - self.error("parsing type %s: expecting a name" % (self.type), - token) - return token - token = self.token() - while token != None and (token[0] == "op" or - token[0] == "name" and token[1] == "const"): - self.type = self.type + " " + token[1] - token = self.token() + elif token[0] == "name" and token[1] == "struct": + if self.type == "": + self.type = token[1] + else: + self.type = self.type + " " + token[1] + token = self.token() + nametok = None + if token[0] == "name": + nametok = token + token = self.token() + if token != None and token[0] == "sep" and token[1] == "{": + token = self.token() + token = self.parseStruct(token) + elif token != None and token[0] == "op" and token[1] == "*": + self.type = self.type + " " + nametok[1] + " *" + token = self.token() + while token != None and token[0] == "op" and token[1] == "*": + self.type = self.type + " *" + token = self.token() + if token[0] == "name": + nametok = token + token = self.token() + else: + self.error("struct : expecting name", token) + return token + elif token != None and token[0] == "name" and nametok != None: + self.type = self.type + " " + nametok[1] + return token + + if nametok != None: + self.lexer.push(token) + token = nametok + return token + + elif token[0] == "name" and token[1] == "enum": + if self.type == "": + self.type = token[1] + else: + self.type = self.type + " " + token[1] + self.enums = [] + token = self.token() + if token != None and token[0] == "sep" and token[1] == "{": + token = self.token() + token = self.parseEnumBlock(token) + else: + self.error("parsing enum: expecting '{'", token) + enum_type = None + if token != None and token[0] != "name": + self.lexer.push(token) + token = ("name", "enum") + else: + enum_type = token[1] + for enum in self.enums: + self.index_add(enum[0], self.filename, + not self.is_header, "enum", + (enum[1], enum[2], enum_type)) + return token + + elif token[0] == "name": + if self.type == "": + self.type = token[1] + else: + self.type = self.type + " " + token[1] + else: + self.error("parsing type %s: expecting a name" % (self.type), + token) + return token + token = self.token() + while token != None and (token[0] == "op" or + token[0] == "name" and token[1] == "const"): + self.type = self.type + " " + token[1] + token = self.token() # # if there is a parenthesis here, this means a function type # - if token != None and token[0] == "sep" and token[1] == '(': - self.type = self.type + token[1] - token = self.token() - while token != None and token[0] == "op" and token[1] == '*': - self.type = self.type + token[1] - token = self.token() - if token == None or token[0] != "name" : - self.error("parsing function type, name expected", token); - return token - self.type = self.type + token[1] - nametok = token - token = self.token() - if token != None and token[0] == "sep" and token[1] == ')': - self.type = self.type + token[1] - token = self.token() - if token != None and token[0] == "sep" and token[1] == '(': - token = self.token() - type = self.type; - token = self.parseSignature(token); - self.type = type; - else: - self.error("parsing function type, '(' expected", token); - return token - else: - self.error("parsing function type, ')' expected", token); - return token - self.lexer.push(token) - token = nametok - return token + if token != None and token[0] == "sep" and token[1] == '(': + self.type = self.type + token[1] + token = self.token() + while token != None and token[0] == "op" and token[1] == '*': + self.type = self.type + token[1] + token = self.token() + if token == None or token[0] != "name" : + self.error("parsing function type, name expected", token); + return token + self.type = self.type + token[1] + nametok = token + token = self.token() + if token != None and token[0] == "sep" and token[1] == ')': + self.type = self.type + token[1] + token = self.token() + if token != None and token[0] == "sep" and token[1] == '(': + token = self.token() + type = self.type; + token = self.parseSignature(token); + self.type = type; + else: + self.error("parsing function type, '(' expected", token); + return token + else: + self.error("parsing function type, ')' expected", token); + return token + self.lexer.push(token) + token = nametok + return token # # do some lookahead for arrays # - if token != None and token[0] == "name": - nametok = token - token = self.token() - if token != None and token[0] == "sep" and token[1] == '[': - self.type = self.type + nametok[1] - while token != None and token[0] == "sep" and token[1] == '[': - self.type = self.type + token[1] - token = self.token() - while token != None and token[0] != 'sep' and \ - token[1] != ']' and token[1] != ';': - self.type = self.type + token[1] - token = self.token() - if token != None and token[0] == 'sep' and token[1] == ']': - self.type = self.type + token[1] - token = self.token() - else: - self.error("parsing array type, ']' expected", token); - return token - elif token != None and token[0] == "sep" and token[1] == ':': + if token != None and token[0] == "name": + nametok = token + token = self.token() + if token != None and token[0] == "sep" and token[1] == '[': + self.type = self.type + nametok[1] + while token != None and token[0] == "sep" and token[1] == '[': + self.type = self.type + token[1] + token = self.token() + while token != None and token[0] != 'sep' and \ + token[1] != ']' and token[1] != ';': + self.type = self.type + token[1] + token = self.token() + if token != None and token[0] == 'sep' and token[1] == ']': + self.type = self.type + token[1] + token = self.token() + else: + self.error("parsing array type, ']' expected", token); + return token + elif token != None and token[0] == "sep" and token[1] == ':': # remove :12 in case it's a limited int size - token = self.token() - token = self.token() - self.lexer.push(token) - token = nametok + token = self.token() + token = self.token() + self.lexer.push(token) + token = nametok - return token + return token # # Parse a signature: '(' has been parsed and we scan the type definition # up to the ')' included - def parseSignature(self, token): - signature = [] - if token != None and token[0] == "sep" and token[1] == ')': - self.signature = [] - token = self.token() - return token - while token != None: - token = self.parseType(token) - if token != None and token[0] == "name": - signature.append((self.type, token[1], None)) - token = self.token() - elif token != None and token[0] == "sep" and token[1] == ',': - token = self.token() - continue - elif token != None and token[0] == "sep" and token[1] == ')': + def parseSignature(self, token): + signature = [] + if token != None and token[0] == "sep" and token[1] == ')': + self.signature = [] + token = self.token() + return token + while token != None: + token = self.parseType(token) + if token != None and token[0] == "name": + signature.append((self.type, token[1], None)) + token = self.token() + elif token != None and token[0] == "sep" and token[1] == ',': + token = self.token() + continue + elif token != None and token[0] == "sep" and token[1] == ')': # only the type was provided - if self.type == "...": - signature.append((self.type, "...", None)) - else: - signature.append((self.type, None, None)) - if token != None and token[0] == "sep": - if token[1] == ',': - token = self.token() - continue - elif token[1] == ')': - token = self.token() - break - self.signature = signature - return token + if self.type == "...": + signature.append((self.type, "...", None)) + else: + signature.append((self.type, None, None)) + if token != None and token[0] == "sep": + if token[1] == ',': + token = self.token() + continue + elif token[1] == ')': + token = self.token() + break + self.signature = signature + return token # # Parse a global definition, be it a type, variable or function # the extern "C" blocks are a bit nasty and require it to recurse. # - def parseGlobal(self, token): - static = 0 - if token[1] == 'extern': - token = self.token() - if token == None: - return token - if token[0] == 'string': - if token[1] == 'C': - token = self.token() - if token == None: - return token - if token[0] == 'sep' and token[1] == "{": - token = self.token() + def parseGlobal(self, token): + static = 0 + if token[1] == 'extern': + token = self.token() + if token == None: + return token + if token[0] == 'string': + if token[1] == 'C': + token = self.token() + if token == None: + return token + if token[0] == 'sep' and token[1] == "{": + token = self.token() # print 'Entering extern "C line ', self.lineno() - while token != None and (token[0] != 'sep' or - token[1] != "}"): - if token[0] == 'name': - token = self.parseGlobal(token) - else: - self.error( + while token != None and (token[0] != 'sep' or + token[1] != "}"): + if token[0] == 'name': + token = self.parseGlobal(token) + else: + self.error( "token %s %s unexpected at the top level" % ( token[0], token[1])) - token = self.parseGlobal(token) + token = self.parseGlobal(token) # print 'Exiting extern "C" line', self.lineno() - token = self.token() - return token - else: - return token - elif token[1] == 'static': - static = 1 - token = self.token() - if token == None or token[0] != 'name': - return token - - if token[1] == 'typedef': - token = self.token() - return self.parseTypedef(token) - else: - token = self.parseType(token) - type_orig = self.type - if token == None or token[0] != "name": - return token - type = type_orig - self.name = token[1] - token = self.token() - while token != None and (token[0] == "sep" or token[0] == "op"): - if token[0] == "sep": - if token[1] == "[": - type = type + token[1] - token = self.token() - while token != None and (token[0] != "sep" or \ - token[1] != ";"): - type = type + token[1] - token = self.token() - - if token != None and token[0] == "op" and token[1] == "=": + token = self.token() + return token + else: + return token + elif token[1] == 'static': + static = 1 + token = self.token() + if token == None or token[0] != 'name': + return token + + if token[1] == 'typedef': + token = self.token() + return self.parseTypedef(token) + else: + token = self.parseType(token) + type_orig = self.type + if token == None or token[0] != "name": + return token + type = type_orig + self.name = token[1] + token = self.token() + while token != None and (token[0] == "sep" or token[0] == "op"): + if token[0] == "sep": + if token[1] == "[": + type = type + token[1] + token = self.token() + while token != None and (token[0] != "sep" or \ + token[1] != ";"): + type = type + token[1] + token = self.token() + + if token != None and token[0] == "op" and token[1] == "=": # # Skip the initialization of the variable # - token = self.token() - if token[0] == 'sep' and token[1] == '{': - token = self.token() - token = self.parseBlock(token) - else: - self.comment = None - while token != None and (token[0] != "sep" or \ - (token[1] != ';' and token[1] != ',')): - token = self.token() - self.comment = None - if token == None or token[0] != "sep" or (token[1] != ';' and - token[1] != ','): - self.error("missing ';' or ',' after value") - - if token != None and token[0] == "sep": - if token[1] == ";": - self.comment = None - token = self.token() - if type == "struct": - self.index.add(self.name, self.filename, - not self.is_header, "struct", self.struct_fields) - else: - self.index.add(self.name, self.filename, - not self.is_header, "variable", type) - break - elif token[1] == "(": - token = self.token() - token = self.parseSignature(token) - if token == None: - return None - if token[0] == "sep" and token[1] == ";": - d = self.mergeFunctionComment(self.name, - ((type, None), self.signature), 1) - self.index.add(self.name, self.filename, static, + token = self.token() + if token[0] == 'sep' and token[1] == '{': + token = self.token() + token = self.parseBlock(token) + else: + self.comment = None + while token != None and (token[0] != "sep" or \ + (token[1] != ';' and token[1] != ',')): + token = self.token() + self.comment = None + if token == None or token[0] != "sep" or (token[1] != ';' and + token[1] != ','): + self.error("missing ';' or ',' after value") + + if token != None and token[0] == "sep": + if token[1] == ";": + self.comment = None + token = self.token() + if type == "struct": + self.index_add(self.name, self.filename, + not self.is_header, "struct", self.struct_fields) + else: + self.index_add(self.name, self.filename, + not self.is_header, "variable", type) + break + elif token[1] == "(": + token = self.token() + token = self.parseSignature(token) + if token == None: + return None + if token[0] == "sep" and token[1] == ";": + d = self.mergeFunctionComment(self.name, + ((type, None), self.signature), 1) + self.index_add(self.name, self.filename, static, "function", d) - token = self.token() - elif token[0] == "sep" and token[1] == "{": - d = self.mergeFunctionComment(self.name, - ((type, None), self.signature), static) - self.index.add(self.name, self.filename, static, + token = self.token() + elif token[0] == "sep" and token[1] == "{": + d = self.mergeFunctionComment(self.name, + ((type, None), self.signature), static) + self.index_add(self.name, self.filename, static, "function", d) - token = self.token() - token = self.parseBlock(token); - elif token[1] == ',': - self.comment = None - self.index.add(self.name, self.filename, static, + token = self.token() + token = self.parseBlock(token); + elif token[1] == ',': + self.comment = None + self.index_add(self.name, self.filename, static, "variable", type) - type = type_orig - token = self.token() - while token != None and token[0] == "sep": - type = type + token[1] - token = self.token() - if token != None and token[0] == "name": - self.name = token[1] - token = self.token() - else: - break + type = type_orig + token = self.token() + while token != None and token[0] == "sep": + type = type + token[1] + token = self.token() + if token != None and token[0] == "name": + self.name = token[1] + token = self.token() + else: + break - return token - - def parse(self): - print "Parsing %s" % (self.filename) - token = self.token() - while token != None: - if token[0] == 'name': - token = self.parseGlobal(token) - else: - self.error("token %s %s unexpected at the top level" % ( - token[0], token[1])) - token = self.parseGlobal(token) - return - return self.index + return token + + def parse(self): + print "Parsing %s" % (self.filename) + token = self.token() + while token != None: + if token[0] == 'name': + token = self.parseGlobal(token) + else: + self.error("token %s %s unexpected at the top level" % ( + token[0], token[1])) + token = self.parseGlobal(token) + return + self.parseTopComment(self.top_comment) + return self.index class docBuilder: - """A documentation builder""" - def __init__(self, name, directories=['.'], excludes=[]): - self.name = name - self.directories = directories - self.excludes = excludes + ignored_files.keys() - self.modules = {} - self.headers = {} - self.idx = index() - - def analyze(self): - print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys())) - self.idx.analyze() - - def scanHeaders(self): - for header in self.headers.keys(): + """A documentation builder""" + def __init__(self, name, directories=['.'], excludes=[]): + self.name = name + self.directories = directories + self.excludes = excludes + ignored_files.keys() + self.modules = {} + self.headers = {} + self.idx = index() + self.xref = {} + self.index = {} + if name == 'libxml2': + self.basename = 'libxml' + else: + self.basename = name + + def indexString(self, id, str): + if str == None: + return + str = string.replace(str, "'", ' ') + str = string.replace(str, '"', ' ') + str = string.replace(str, "/", ' ') + str = string.replace(str, '*', ' ') + str = string.replace(str, "[", ' ') + str = string.replace(str, "]", ' ') + str = string.replace(str, "(", ' ') + str = string.replace(str, ")", ' ') + str = string.replace(str, "<", ' ') + str = string.replace(str, '>', ' ') + str = string.replace(str, "&", ' ') + str = string.replace(str, '#', ' ') + str = string.replace(str, ",", ' ') + str = string.replace(str, '.', ' ') + str = string.replace(str, ';', ' ') + tokens = string.split(str) + for token in tokens: + try: + c = token[0] + if string.find(string.letters, c) < 0: + pass + elif len(token) < 3: + pass + else: + lower = string.lower(token) + # TODO: generalize this a bit + if lower == 'and' or lower == 'the': + pass + elif self.xref.has_key(token): + self.xref[token].append(id) + else: + self.xref[token] = [id] + except: + pass + + def analyze(self): + print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys())) + self.idx.analyze() + + def scanHeaders(self): + for header in self.headers.keys(): parser = CParser(header) idx = parser.parse() self.headers[header] = idx; self.idx.merge(idx) - def scanModules(self): - for module in self.modules.keys(): + def scanModules(self): + for module in self.modules.keys(): parser = CParser(module) idx = parser.parse() # idx.analyze() self.modules[module] = idx self.idx.merge_public(idx) - def scan(self): - for directory in self.directories: - files = glob.glob(directory + "/*.c") - for file in files: - skip = 0 - for excl in self.excludes: - if string.find(file, excl) != -1: - skip = 1; - break - if skip == 0: - self.modules[file] = None; - files = glob.glob(directory + "/*.h") - for file in files: - skip = 0 - for excl in self.excludes: - if string.find(file, excl) != -1: - skip = 1; - break - if skip == 0: - self.headers[file] = None; - self.scanHeaders() - self.scanModules() + def scan(self): + for directory in self.directories: + files = glob.glob(directory + "/*.c") + for file in files: + skip = 0 + for excl in self.excludes: + if string.find(file, excl) != -1: + skip = 1; + break + if skip == 0: + self.modules[file] = None; + files = glob.glob(directory + "/*.h") + for file in files: + skip = 0 + for excl in self.excludes: + if string.find(file, excl) != -1: + skip = 1; + break + if skip == 0: + self.headers[file] = None; + self.scanHeaders() + self.scanModules() - def modulename_file(self, file): - module = string.split(file, '/')[-1] - if module[-2:] == '.h': - module = module[:-2] - return module - - def serialize_enum(self, output, name): - id = self.idx.enums[name] - output.write(" <enum name='%s' file='%s'" % (name, - self.modulename_file(id.module))) - if id.info != None: - info = id.info - if info[0] != None and info[0] != '': - output.write(" value='%s'" % info[0]); - if info[2] != None and info[2] != '': - output.write(" type='%s'" % info[2]); - if info[1] != None and info[1] != '': - output.write(" info='%s'" % escape(info[1])); - output.write("/>\n") - - def serialize_macro(self, output, name): - id = self.idx.macros[name] - output.write(" <macro name='%s' file='%s'>\n" % (name, - self.modulename_file(id.module))) - if id.info != None: - try: - (args, desc) = id.info - if desc != None and desc != "": - output.write(" <info>%s</info>\n" % (escape(desc))) - for arg in args: - (name, desc) = arg - if desc != None and desc != "": - output.write(" <arg name='%s' info='%s'/>\n" % ( - name, escape(desc))) - else: - output.write(" <arg name='%s'/>\n" % (name)) - except: - pass - output.write(" </macro>\n") - - def serialize_typedef(self, output, name): - id = self.idx.typedefs[name] - if id.info[0:7] == 'struct ': - output.write(" <struct name='%s' file='%s' type='%s'" % ( - name, self.modulename_file(id.module), id.info)) - name = id.info[7:] - if self.idx.structs.has_key(name) and ( \ - type(self.idx.structs[name].info) == type(()) or + def modulename_file(self, file): + module = os.path.basename(file) + if module[-2:] == '.h': + module = module[:-2] + return module + + def serialize_enum(self, output, name): + id = self.idx.enums[name] + output.write(" <enum name='%s' file='%s'" % (name, + self.modulename_file(id.module))) + if id.info != None: + info = id.info + if info[0] != None and info[0] != '': + try: + val = eval(info[0]) + except: + val = info[0] + output.write(" value='%s'" % (val)); + if info[2] != None and info[2] != '': + output.write(" type='%s'" % info[2]); + if info[1] != None and info[1] != '': + output.write(" info='%s'" % escape(info[1])); + output.write("/>\n") + + def serialize_macro(self, output, name): + id = self.idx.macros[name] + output.write(" <macro name='%s' file='%s'>\n" % (name, + self.modulename_file(id.module))) + if id.info != None: + try: + (args, desc) = id.info + if desc != None and desc != "": + output.write(" <info>%s</info>\n" % (escape(desc))) + self.indexString(name, desc) + for arg in args: + (name, desc) = arg + if desc != None and desc != "": + output.write(" <arg name='%s' info='%s'/>\n" % ( + name, escape(desc))) + self.indexString(name, desc) + else: + output.write(" <arg name='%s'/>\n" % (name)) + except: + pass + output.write(" </macro>\n") + + def serialize_typedef(self, output, name): + id = self.idx.typedefs[name] + if id.info[0:7] == 'struct ': + output.write(" <struct name='%s' file='%s' type='%s'" % ( + name, self.modulename_file(id.module), id.info)) + name = id.info[7:] + if self.idx.structs.has_key(name) and ( \ + type(self.idx.structs[name].info) == type(()) or type(self.idx.structs[name].info) == type([])): - output.write(">\n"); - try: - for field in self.idx.structs[name].info: - desc = field[2] - if desc == None: - desc = '' - else: - desc = escape(desc) - output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc)) - except: - print "Failed to serialize struct %s" % (name) - output.write(" </struct>\n") - else: - output.write("/>\n"); - else : - output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % ( - name, self.modulename_file(id.module), id.info)) - - def serialize_variable(self, output, name): - id = self.idx.variables[name] - if id.info != None: - output.write(" <variable name='%s' file='%s' type='%s'/>\n" % ( - name, self.modulename_file(id.module), id.info)) - else: - output.write(" <variable name='%s' file='%s'/>\n" % ( - name, self.modulename_file(id.module))) + output.write(">\n"); + try: + for field in self.idx.structs[name].info: + desc = field[2] + self.indexString(name, desc) + if desc == None: + desc = '' + else: + desc = escape(desc) + output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc)) + except: + print "Failed to serialize struct %s" % (name) + output.write(" </struct>\n") + else: + output.write("/>\n"); + else : + output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % ( + name, self.modulename_file(id.module), id.info)) + + def serialize_variable(self, output, name): + id = self.idx.variables[name] + if id.info != None: + output.write(" <variable name='%s' file='%s' type='%s'/>\n" % ( + name, self.modulename_file(id.module), id.info)) + else: + output.write(" <variable name='%s' file='%s'/>\n" % ( + name, self.modulename_file(id.module))) - def serialize_function(self, output, name): - id = self.idx.functions[name] - output.write(" <%s name='%s' file='%s'>\n" % (id.type, name, - self.modulename_file(id.module))) - try: - (ret, params, desc) = id.info - output.write(" <info>%s</info>\n" % (escape(desc))) - if ret[0] != None: - if ret[0] == "void": - output.write(" <return type='void'/>\n") - else: - output.write(" <return type='%s' info='%s'/>\n" % ( - ret[0], escape(ret[1]))) - for param in params: - if param[0] == 'void': - continue - if param[2] == None: - output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0])) - else: - output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2]))) - except: - print "Failed to save function %s info: " % name, `id.info` - output.write(" </%s>\n" % (id.type)) - - def serialize_exports(self, output, file): - module = self.modulename_file(file) - output.write(" <file name='%s'>\n" % (module)) - dict = self.headers[file] - ids = dict.functions.keys() + dict.variables.keys() + \ - dict.macros.keys() + dict.typedefs.keys() + \ - dict.structs.keys() + dict.enums.keys() - ids.sort() - for id in ids: - output.write(" <exports symbol='%s'/>\n" % (id)) - output.write(" </file>\n") - - - def serialize(self, filename = None): - if filename == None: - filename = "%s-api.xml" % self.name - print "Saving XML description %s" % (filename) - output = open(filename, "w") - output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') - output.write("<api name='%s'>\n" % self.name) - output.write(" <files>\n") - for file in self.headers.keys(): - self.serialize_exports(output, file) - output.write(" </files>\n") - output.write(" <symbols>\n") - macros = self.idx.macros.keys() - macros.sort() - for macro in macros: - self.serialize_macro(output, macro) - enums = self.idx.enums.keys() - enums.sort() - for enum in enums: - self.serialize_enum(output, enum) - typedefs = self.idx.typedefs.keys() - typedefs.sort() - for typedef in typedefs: - self.serialize_typedef(output, typedef) - variables = self.idx.variables.keys() - variables.sort() - for variable in variables: - self.serialize_variable(output, variable) - functions = self.idx.functions.keys() - functions.sort() - for function in functions: - self.serialize_function(output, function) - output.write(" </symbols>\n") - output.write("</api>\n") - output.close() + def serialize_function(self, output, name): + id = self.idx.functions[name] + output.write(" <%s name='%s' file='%s'>\n" % (id.type, name, + self.modulename_file(id.module))) + try: + (ret, params, desc) = id.info + output.write(" <info>%s</info>\n" % (escape(desc))) + self.indexString(name, desc) + if ret[0] != None: + if ret[0] == "void": + output.write(" <return type='void'/>\n") + else: + output.write(" <return type='%s' info='%s'/>\n" % ( + ret[0], escape(ret[1]))) + self.indexString(name, ret[1]) + for param in params: + if param[0] == 'void': + continue + if param[2] == None: + output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0])) + else: + output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2]))) + self.indexString(name, param[2]) + except: + print "Failed to save function %s info: " % name, `id.info` + output.write(" </%s>\n" % (id.type)) + + def serialize_exports(self, output, file): + module = self.modulename_file(file) + output.write(" <file name='%s'>\n" % (module)) + dict = self.headers[file] + if dict.info != None: + for data in ('Summary', 'Description', 'Author'): + try: + output.write(" <%s>%s</%s>\n" % ( + string.lower(data), + escape(dict.info[data]), + string.lower(data))) + except: + print "Header %s lacks a %s description" % (module, data) + if dict.info.has_key('Description'): + desc = dict.info['Description'] + if string.find(desc, "DEPRECATED") != -1: + output.write(" <deprecated/>\n") + + ids = dict.macros.keys() + ids.sort() + for id in uniq(ids): + # Macros are sometime used to masquerade other types. + if dict.functions.has_key(id): + continue + if dict.variables.has_key(id): + continue + if dict.typedefs.has_key(id): + continue + if dict.structs.has_key(id): + continue + if dict.enums.has_key(id): + continue + output.write(" <exports symbol='%s' type='macro'/>\n" % (id)) + ids = dict.enums.keys() + ids.sort() + for id in uniq(ids): + output.write(" <exports symbol='%s' type='enum'/>\n" % (id)) + ids = dict.typedefs.keys() + ids.sort() + for id in uniq(ids): + output.write(" <exports symbol='%s' type='typedef'/>\n" % (id)) + ids = dict.structs.keys() + ids.sort() + for id in uniq(ids): + output.write(" <exports symbol='%s' type='struct'/>\n" % (id)) + ids = dict.variables.keys() + ids.sort() + for id in uniq(ids): + output.write(" <exports symbol='%s' type='variable'/>\n" % (id)) + ids = dict.functions.keys() + ids.sort() + for id in uniq(ids): + output.write(" <exports symbol='%s' type='function'/>\n" % (id)) + output.write(" </file>\n") + + def serialize_xrefs_files(self, output): + headers = self.headers.keys() + headers.sort() + for file in headers: + module = self.modulename_file(file) + output.write(" <file name='%s'>\n" % (module)) + dict = self.headers[file] + ids = dict.functions.keys() + dict.variables.keys() + \ + dict.macros.keys() + dict.typedefs.keys() + \ + dict.structs.keys() + dict.enums.keys() + ids.sort() + for id in uniq(ids): + output.write(" <ref name='%s'/>\n" % (id)) + output.write(" </file>\n") + pass + + def serialize_xrefs_functions(self, output): + funcs = {} + for name in self.idx.functions.keys(): + id = self.idx.functions[name] + try: + (ret, params, desc) = id.info + for param in params: + if param[0] == 'void': + continue + if funcs.has_key(param[0]): + funcs[param[0]].append(name) + else: + funcs[param[0]] = [name] + except: + pass + typ = funcs.keys() + typ.sort() + for type in typ: + if type == '' or type == 'void' or type == "int" or \ + type == "char *" or type == "const char *" : + continue + output.write(" <type name='%s'>\n" % (type)) + ids = funcs[type] + ids.sort() + for id in ids: + output.write(" <ref name='%s'/>\n" % (id)) + output.write(" </type>\n") + + def serialize_xrefs_constructors(self, output): + funcs = {} + for name in self.idx.functions.keys(): + id = self.idx.functions[name] + try: + (ret, params, desc) = id.info + if ret[0] == "void": + continue + if funcs.has_key(ret[0]): + funcs[ret[0]].append(name) + else: + funcs[ret[0]] = [name] + except: + pass + typ = funcs.keys() + typ.sort() + for type in typ: + if type == '' or type == 'void' or type == "int" or \ + type == "char *" or type == "const char *" : + continue + output.write(" <type name='%s'>\n" % (type)) + ids = funcs[type] + for id in ids: + output.write(" <ref name='%s'/>\n" % (id)) + output.write(" </type>\n") + + def serialize_xrefs_alpha(self, output): + letter = None + ids = self.idx.identifiers.keys() + ids.sort() + for id in ids: + if id[0] != letter: + if letter != None: + output.write(" </letter>\n") + letter = id[0] + output.write(" <letter name='%s'>\n" % (letter)) + output.write(" <ref name='%s'/>\n" % (id)) + if letter != None: + output.write(" </letter>\n") + + def serialize_xrefs_references(self, output): + typ = self.idx.identifiers.keys() + typ.sort() + for id in typ: + idf = self.idx.identifiers[id] + module = idf.module + output.write(" <reference name='%s' href='%s'/>\n" % (id, + 'html/' + self.basename + '-' + + self.modulename_file(module) + '.html#' + + id)) + + def serialize_xrefs_index(self, output): + index = self.xref + typ = index.keys() + typ.sort() + letter = None + count = 0 + chunk = 0 + chunks = [] + for id in typ: + if len(index[id]) > 30: + continue + if id[0] != letter: + if letter == None or count > 200: + if letter != None: + output.write(" </letter>\n") + output.write(" </chunk>\n") + count = 0 + chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) + output.write(" <chunk name='chunk%s'>\n" % (chunk)) + first_letter = id[0] + chunk = chunk + 1 + elif letter != None: + output.write(" </letter>\n") + letter = id[0] + output.write(" <letter name='%s'>\n" % (letter)) + output.write(" <word name='%s'>\n" % (id)) + tokens = index[id]; + tokens.sort() + tok = None + for token in index[id]: + if tok == token: + continue + tok = token + output.write(" <ref name='%s'/>\n" % (token)) + count = count + 1 + output.write(" </word>\n") + if letter != None: + output.write(" </letter>\n") + output.write(" </chunk>\n") + output.write(" <chunks>\n") + for ch in chunks: + output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % ( + ch[0], ch[1], ch[2])) + output.write(" </chunks>\n") + + def serialize_xrefs(self, output): + output.write(" <references>\n") + self.serialize_xrefs_references(output) + output.write(" </references>\n") + output.write(" <alpha>\n") + self.serialize_xrefs_alpha(output) + output.write(" </alpha>\n") + output.write(" <constructors>\n") + self.serialize_xrefs_constructors(output) + output.write(" </constructors>\n") + output.write(" <functions>\n") + self.serialize_xrefs_functions(output) + output.write(" </functions>\n") + output.write(" <files>\n") + self.serialize_xrefs_files(output) + output.write(" </files>\n") + output.write(" <index>\n") + self.serialize_xrefs_index(output) + output.write(" </index>\n") + + def serialize(self): + filename = "%s-api.xml" % self.name + print "Saving XML description %s" % (filename) + output = open(filename, "w") + output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') + output.write("<api name='%s'>\n" % self.name) + output.write(" <files>\n") + headers = self.headers.keys() + headers.sort() + for file in headers: + self.serialize_exports(output, file) + output.write(" </files>\n") + output.write(" <symbols>\n") + macros = self.idx.macros.keys() + macros.sort() + for macro in macros: + self.serialize_macro(output, macro) + enums = self.idx.enums.keys() + enums.sort() + for enum in enums: + self.serialize_enum(output, enum) + typedefs = self.idx.typedefs.keys() + typedefs.sort() + for typedef in typedefs: + self.serialize_typedef(output, typedef) + variables = self.idx.variables.keys() + variables.sort() + for variable in variables: + self.serialize_variable(output, variable) + functions = self.idx.functions.keys() + functions.sort() + for function in functions: + self.serialize_function(output, function) + output.write(" </symbols>\n") + output.write("</api>\n") + output.close() + + filename = "%s-refs.xml" % self.name + print "Saving XML Cross References %s" % (filename) + output = open(filename, "w") + output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n') + output.write("<apirefs name='%s'>\n" % self.name) + self.serialize_xrefs(output) + output.write("</apirefs>\n") + output.close() def rebuild(): builder = None - if glob.glob("../parser.c") != [] : + if glob.glob("parser.c") != [] : + print "Rebuilding API description for libxml2" + builder = docBuilder("libxml2", [".", "."], + ["xmlwin32version.h", "tst.c"]) + elif glob.glob("../parser.c") != [] : print "Rebuilding API description for libxml2" builder = docBuilder("libxml2", ["..", "../include/libxml"], - ["xmlwin32version.h", "tst.c", - "schemasInternals.h", "xmlschemas" ]) + ["xmlwin32version.h", "tst.c"]) elif glob.glob("../libxslt/transform.c") != [] : print "Rebuilding API description for libxslt" builder = docBuilder("libxslt", ["../libxslt"], diff --git a/libxslt/keys.c b/libxslt/keys.c index ad210a17..3f5ced2f 100644 --- a/libxslt/keys.c +++ b/libxslt/keys.c @@ -217,6 +217,58 @@ xsltFreeKeys(xsltStylesheetPtr style) { } /** + * skipString: + * @cur: the current pointer + * @end: the current offset + * + * skip a string delimited by " or ' + * + * Returns the byte after the string or -1 in case of error + */ +static int +skipString(const xmlChar *cur, int end) { + xmlChar limit; + + if ((cur == NULL) || (end < 0)) return(-1); + if ((cur[end] == '\'') || (cur[end] == '"')) limit = cur[end]; + else return(end); + end++; + while (cur[end] != 0) { + if (cur[end] == limit) + return(end + 1); + end++; + } + return(-1); +} + +/** + * skipPredicate: + * @cur: the current pointer + * @end: the current offset + * + * skip a predicate + * + * Returns the byte after the predicate or -1 in case of error + */ +static int +skipPredicate(const xmlChar *cur, int end) { + if ((cur == NULL) || (end < 0)) return(-1); + if (cur[end] != '[') return(end); + end++; + while (cur[end] != 0) { + if ((cur[end] == '\'') || (cur[end] == '"')) { + end = skipString(cur, end); + if (end <= 0) + return(-1); + } + if (cur[end] == ']') + return(end + 1); + end++; + } + return(-1); +} + +/** * xsltAddKey: * @style: an XSLT stylesheet * @name: the key name or NULL @@ -260,11 +312,18 @@ xsltAddKey(xsltStylesheetPtr style, const xmlChar *name, current++; end = current; while ((match[end] != 0) && (match[end] != '|')) { + if (match[end] == '[') { + end = skipPredicate(match, end); + if (end <= 0) + xsltTransformError(NULL, style, inst, + "key pattern is malformed: %s", + key->match); + } end++; } if (current == end) { xsltTransformError(NULL, style, inst, - "key pattern is empty\n"); + "key pattern is empty\n"); if (style != NULL) style->errors++; goto error; } diff --git a/libxslt/transform.c b/libxslt/transform.c index b107cfe4..8e0bcc6c 100644 --- a/libxslt/transform.c +++ b/libxslt/transform.c @@ -3211,6 +3211,10 @@ xsltApplyTemplates(xsltTransformContextPtr ctxt, xmlNodePtr node, (list->nodeTab[i]->doc->URL != NULL)) { ctxt->document = xsltFindDocument(ctxt, list->nodeTab[i]->doc->doc); + if (ctxt->document == NULL) { + /* restore the previous context */ + ctxt->document = oldCDocPtr; + } ctxt->xpathCtxt->node = list->nodeTab[i]; #ifdef WITH_XSLT_DEBUG_PROCESS if ((ctxt->document != NULL) && @@ -3577,6 +3581,10 @@ xsltForEach(xsltTransformContextPtr ctxt, xmlNodePtr node, (list->nodeTab[i]->doc->URL != NULL)) { ctxt->document = xsltFindDocument(ctxt, list->nodeTab[i]->doc->doc); + if (ctxt->document == NULL) { + /* restore the previous context */ + ctxt->document = oldCDocPtr; + } ctxt->xpathCtxt->node = list->nodeTab[i]; #ifdef WITH_XSLT_DEBUG_PROCESS if ((ctxt->document != NULL) && diff --git a/tests/docs/Makefile.am b/tests/docs/Makefile.am index cd84a182..ed02e581 100644 --- a/tests/docs/Makefile.am +++ b/tests/docs/Makefile.am @@ -131,6 +131,8 @@ EXTRA_DIST = \ bug-129.xml \ bug-130.xml bug-130.doc \ bug-131.xml \ + bug-132.xml \ + bug-133.xml \ character.xml \ array.xml \ items.xml diff --git a/tests/docs/bug-133.xml b/tests/docs/bug-133.xml new file mode 100644 index 00000000..07164941 --- /dev/null +++ b/tests/docs/bug-133.xml @@ -0,0 +1,7 @@ +<html> +<body> +<trash /> +<h2>title</h2> +<div>test</div> +</body> +</html> diff --git a/tests/general/Makefile.am b/tests/general/Makefile.am index 28ec1ac0..c24faa03 100644 --- a/tests/general/Makefile.am +++ b/tests/general/Makefile.am @@ -137,6 +137,8 @@ EXTRA_DIST = \ bug-130.out bug-130.xsl \ bug-130-imp1.imp bug-130-imp2.imp bug-130-imp3.imp bug-130-imp4.imp \ bug-131.out bug-131.xsl bug-131-imp.imp \ + bug-132.out bug-132.xsl \ + bug-133.out bug-133.xsl \ character.out character.xsl \ character2.out character2.xsl \ itemschoose.out itemschoose.xsl \ diff --git a/tests/general/bug-133.out b/tests/general/bug-133.out new file mode 100644 index 00000000..769536c3 --- /dev/null +++ b/tests/general/bug-133.out @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<html> +<body><hintsection><hinttitle>title</hinttitle> +<div>test</div> +</hintsection></body> +</html> diff --git a/tests/general/bug-133.xsl b/tests/general/bug-133.xsl new file mode 100644 index 00000000..ffbe0ca6 --- /dev/null +++ b/tests/general/bug-133.xsl @@ -0,0 +1,32 @@ +<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> +<xsl:output method="xml" /> + +<xsl:key name="next-headings" match="h2" + use="generate-id(preceding-sibling::h1[1])" /> + +<xsl:key name="immediate-nodes" + match="node()[not(self::h1 | self::h2)]" + use="generate-id(preceding-sibling::*[self::h1 or self::h2][1])" /> + +<xsl:template match="h1 | h2"> + <hintsection> + <hinttitle><xsl:apply-templates /></hinttitle> + <xsl:apply-templates + select="key('immediate-nodes', generate-id(.))" /> + <xsl:apply-templates select="key('next-headings', generate-id(.))" /> + </hintsection> +</xsl:template> + +<xsl:template match="node()|@*"> + <xsl:copy> + <xsl:apply-templates select="node()|@*"/> + </xsl:copy> +</xsl:template> + +<xsl:template match="body"> + <xsl:copy> + <xsl:apply-templates select="(h1|h2)[1]" /> + </xsl:copy> +</xsl:template> + +</xsl:stylesheet> |