From cfe3c4e9b5cbbe7a92aa6b1ddf5b8b86dc6ae3ee Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Fri, 28 Apr 2023 15:35:50 +0200 Subject: doc: Update apibuild.py Copy the current version from libxml2. --- doc/apibuild.py | 3082 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 1551 insertions(+), 1531 deletions(-) (limited to 'doc') diff --git a/doc/apibuild.py b/doc/apibuild.py index 6e59677c..04eeabf8 100755 --- a/doc/apibuild.py +++ b/doc/apibuild.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # This is the API builder, it parses the C sources and build the # API formal description in XML. @@ -12,6 +12,8 @@ import string import glob debug=0 +#debugsym='ignorableWhitespaceSAXFunc' +debugsym=None # # C parser analysis code @@ -21,12 +23,22 @@ ignored_files = { "trio.c": "too many non standard macros", "trionan.c": "too many non standard macros", "triostr.c": "too many non standard macros", - "acconfig.h": "generated portability layer", "config.h": "generated portability layer", "libxml.h": "internal only", "testOOM.c": "out of memory tester", "testOOMlib.h": "out of memory tester", "testOOMlib.c": "out of memory tester", + "rngparser.c": "not yet integrated", + "testModule.c": "test tool", + "testThreads.c": "test tool", + "testapi.c": "generated regression tests", + "runtest.c": "regression tests program", + "runsuite.c": "regression tests program", + "tst.c": "not part of the library", + "test.c": "not part of the library", + "testdso.c": "test for dynamid shared libraries", + "testrecurse.c": "test for entities recursions", + "timsort.h": "Internal header only for xpath.c 2.9.0", } ignored_words = { @@ -38,56 +50,69 @@ ignored_words = { "XMLPUBFUN": (0, "Special macro for extern funcs for win32"), "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"), "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"), - "XMLCALL": (0, "Special macro for win32 calls"), "XSLTCALL": (0, "Special macro for win32 calls"), "EXSLTCALL": (0, "Special macro for win32 calls"), "__declspec": (3, "Windows keyword"), + "__stdcall": (0, "Windows keyword"), "ATTRIBUTE_UNUSED": (0, "macro keyword"), + "ATTRIBUTE_DESTRUCTOR": (0, "macro keyword"), "LIBEXSLT_PUBLIC": (0, "macro keyword"), "X_IN_Y": (5, "macro function builder"), "XSLT_ITEM_COMMON_FIELDS": (0, "Special macro"), "CALLBACK": (0, "macro keyword"), + "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"), + "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"), + "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"), + "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"), "LIBXSLT_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"), + "ATTRIBUTE_NO_SANITIZE": (3, "macro keyword"), + "XML_DEPRECATED": (0, "macro keyword"), } def escape(raw): - raw = string.replace(raw, '&', '&') - raw = string.replace(raw, '<', '<') - raw = string.replace(raw, '>', '>') - raw = string.replace(raw, "'", ''') - raw = string.replace(raw, '"', '"') + raw = raw.replace('&', '&') + raw = raw.replace('<', '<') + raw = raw.replace('>', '>') + raw = raw.replace("'", ''') + raw = raw.replace('"', '"') return raw -def uniq(items): - d = {} - for item in items: - d[item]=1 - return d.keys() - class identifier: - def __init__(self, name, module=None, type=None, lineno = 0, - info=None, extra=None): + def __init__(self, name, header=None, module=None, type=None, lineno = 0, + info=None, extra=None, conditionals = None): self.name = name - self.module = module - self.type = type - self.info = info - self.extra = extra - self.lineno = lineno - self.static = 0 + self.header = header + self.module = module + self.type = type + self.info = info + self.extra = extra + self.lineno = lineno + self.static = 0 + if conditionals == None or len(conditionals) == 0: + self.conditionals = None + else: + self.conditionals = conditionals[:] + if self.name == debugsym: + print("=> define %s : %s" % (debugsym, (module, type, info, + extra, conditionals))) def __repr__(self): r = "%s %s:" % (self.type, self.name) - if self.static: - r = r + " static" - if self.module != None: - r = r + " from %s" % (self.module) - if self.info != None: - r = r + " " + `self.info` - if self.extra != None: - r = r + " " + `self.extra` - return r - - + if self.static: + r = r + " static" + if self.module != None: + r = r + " from %s" % (self.module) + if self.info != None: + r = r + " " + repr(self.info) + if self.extra != None: + r = r + " " + repr(self.extra) + if self.conditionals != None: + r = r + " " + repr(self.conditionals) + return r + + + def set_header(self, header): + self.header = header def set_module(self, module): self.module = module def set_type(self, type): @@ -100,9 +125,16 @@ class identifier: self.lineno = lineno def set_static(self, static): self.static = static + def set_conditionals(self, conditionals): + if conditionals == None or len(conditionals) == 0: + self.conditionals = None + else: + self.conditionals = conditionals[:] def get_name(self): return self.name + def get_header(self): + return self.module def get_module(self): return self.module def get_type(self): @@ -115,213 +147,250 @@ class identifier: return self.extra def get_static(self): return self.static - - def update(self, module, type = None, info = None, extra=None): - if module != None and self.module == None: - self.set_module(module) + def get_conditionals(self): + return self.conditionals + + def update(self, header, module, type = None, info = None, extra=None, + conditionals=None): + if self.name == debugsym: + print("=> update %s : %s" % (debugsym, (module, type, info, + extra, conditionals))) + if header != None and self.header == None: + self.set_header(module) + if module != None and (self.module == None or self.header == self.module): + self.set_module(module) if type != None and self.type == None: - self.set_type(type) + self.set_type(type) if info != None: - self.set_info(info) + self.set_info(info) if extra != None: - self.set_extra(extra) - + self.set_extra(extra) + if conditionals != None: + self.set_conditionals(conditionals) class index: def __init__(self, name = "noname"): self.name = name self.identifiers = {} self.functions = {} - self.variables = {} - self.includes = {} - self.structs = {} - self.enums = {} - self.typedefs = {} - self.macros = {} - self.references = {} - self.info = {} - - def add_ref(self, name, module, static, type, lineno, info=None, extra=None): + self.variables = {} + self.includes = {} + self.structs = {} + self.enums = {} + self.typedefs = {} + self.macros = {} + self.references = {} + self.info = {} + + def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): if name[0:2] == '__': - return None + return None d = None - try: - d = self.identifiers[name] - d.update(module, type, lineno, info, extra) - except: - d = identifier(name, module, type, lineno, info, extra) - self.identifiers[name] = d + if name in self.identifiers: + d = self.identifiers[name] + d.update(header, module, type, info, extra, conditionals) + else: + d = identifier(name, header, module, type, lineno, info, extra, conditionals) + self.identifiers[name] = d + + if d != None and static == 1: + d.set_static(1) - if d != None and static == 1: - d.set_static(1) + if d != None and name != None and type != None: + self.references[name] = d - if d != None and name != None and type != None: - self.references[name] = d + if name == debugsym: + print("New ref: %s" % (d)) - def add(self, name, module, static, type, lineno, info=None, extra=None): + return d + + def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None): if name[0:2] == '__': - return None + return None d = None - try: - d = self.identifiers[name] - d.update(module, type, lineno, info, extra) - except: - d = identifier(name, module, type, lineno, info, extra) - self.identifiers[name] = d - - if d != None and static == 1: - d.set_static(1) - - if d != None and name != None and type != None: - if type == "function": - self.functions[name] = d - elif type == "functype": - self.functions[name] = d - elif type == "variable": - self.variables[name] = d - elif type == "include": - self.includes[name] = d - elif type == "struct": - self.structs[name] = d - elif type == "enum": - self.enums[name] = d - elif type == "typedef": - self.typedefs[name] = d - elif type == "macro": - self.macros[name] = d - else: - print "Unable to register type ", type - return d + if name in self.identifiers: + d = self.identifiers[name] + d.update(header, module, type, info, extra, conditionals) + else: + d = identifier(name, header, module, type, lineno, info, extra, conditionals) + self.identifiers[name] = d + + if d != None and static == 1: + d.set_static(1) + + if d != None and name != None and type != None: + if type == "function": + self.functions[name] = d + elif type == "functype": + self.functions[name] = d + elif type == "variable": + self.variables[name] = d + elif type == "include": + self.includes[name] = d + elif type == "struct": + self.structs[name] = d + elif type == "enum": + self.enums[name] = d + elif type == "typedef": + self.typedefs[name] = d + elif type == "macro": + self.macros[name] = d + else: + print("Unable to register type ", type) + + if name == debugsym: + print("New symbol: %s" % (d)) + + return d def merge(self, idx): - for id in idx.functions.keys(): + for id in list(idx.functions.keys()): # # macro might be used to override functions or variables # definitions # - if self.macros.has_key(id): - del self.macros[id] - if self.functions.has_key(id): - print "function %s from %s redeclared in %s" % ( - id, self.functions[id].module, idx.functions[id].module) - else: - self.functions[id] = idx.functions[id] - self.identifiers[id] = idx.functions[id] - for id in idx.variables.keys(): + if id in self.macros: + del self.macros[id] + if id in self.functions: + print("function %s from %s redeclared in %s" % ( + id, self.functions[id].header, idx.functions[id].header)) + else: + self.functions[id] = idx.functions[id] + self.identifiers[id] = idx.functions[id] + for id in list(idx.variables.keys()): # # macro might be used to override functions or variables # definitions # - if self.macros.has_key(id): - del self.macros[id] - if self.variables.has_key(id): - print "variable %s from %s redeclared in %s" % ( - id, self.variables[id].module, idx.variables[id].module) - else: - self.variables[id] = idx.variables[id] - self.identifiers[id] = idx.variables[id] - for id in idx.structs.keys(): - if self.structs.has_key(id): - print "struct %s from %s redeclared in %s" % ( - id, self.structs[id].module, idx.structs[id].module) - else: - self.structs[id] = idx.structs[id] - self.identifiers[id] = idx.structs[id] - for id in idx.typedefs.keys(): - if self.typedefs.has_key(id): - print "typedef %s from %s redeclared in %s" % ( - id, self.typedefs[id].module, idx.typedefs[id].module) - else: - self.typedefs[id] = idx.typedefs[id] - self.identifiers[id] = idx.typedefs[id] - for id in idx.macros.keys(): + if id in self.macros: + del self.macros[id] + if id in self.variables: + print("variable %s from %s redeclared in %s" % ( + id, self.variables[id].header, idx.variables[id].header)) + else: + self.variables[id] = idx.variables[id] + self.identifiers[id] = idx.variables[id] + for id in list(idx.structs.keys()): + if id in self.structs: + print("struct %s from %s redeclared in %s" % ( + id, self.structs[id].header, idx.structs[id].header)) + else: + self.structs[id] = idx.structs[id] + self.identifiers[id] = idx.structs[id] + for id in list(idx.typedefs.keys()): + if id in self.typedefs: + print("typedef %s from %s redeclared in %s" % ( + id, self.typedefs[id].header, idx.typedefs[id].header)) + else: + self.typedefs[id] = idx.typedefs[id] + self.identifiers[id] = idx.typedefs[id] + for id in list(idx.macros.keys()): # # macro might be used to override functions or variables # definitions # - if self.variables.has_key(id): + if id in self.variables: continue - if self.functions.has_key(id): + if id in self.functions: continue - if self.enums.has_key(id): + if id in self.enums: continue - if self.macros.has_key(id): - print "macro %s from %s redeclared in %s" % ( - id, self.macros[id].module, idx.macros[id].module) - else: - self.macros[id] = idx.macros[id] - self.identifiers[id] = idx.macros[id] - for id in idx.enums.keys(): - if self.enums.has_key(id): - print "enum %s from %s redeclared in %s" % ( - id, self.enums[id].module, idx.enums[id].module) - else: - self.enums[id] = idx.enums[id] - self.identifiers[id] = idx.enums[id] + if id in self.macros: + print("macro %s from %s redeclared in %s" % ( + id, self.macros[id].header, idx.macros[id].header)) + else: + self.macros[id] = idx.macros[id] + self.identifiers[id] = idx.macros[id] + for id in list(idx.enums.keys()): + if id in self.enums: + print("enum %s from %s redeclared in %s" % ( + id, self.enums[id].header, idx.enums[id].header)) + else: + self.enums[id] = idx.enums[id] + self.identifiers[id] = idx.enums[id] def merge_public(self, idx): - for id in idx.functions.keys(): - if self.functions.has_key(id): - up = idx.functions[id] - self.functions[id].update(None, up.type, up.info, up.extra) - # else: - # print "Function %s from %s is not declared in headers" % ( - # id, idx.functions[id].module) - # TODO: do the same for variables. + for id in list(idx.functions.keys()): + if id in self.functions: + # check that function condition agrees with header + if idx.functions[id].conditionals != \ + self.functions[id].conditionals: + print("Header condition differs from Function for %s:" \ + % id) + print(" H: %s" % self.functions[id].conditionals) + print(" C: %s" % idx.functions[id].conditionals) + up = idx.functions[id] + self.functions[id].update(None, up.module, up.type, up.info, up.extra) + # else: + # print "Function %s from %s is not declared in headers" % ( + # id, idx.functions[id].module) + + for id in list(idx.variables.keys()): + if id in self.variables: + # check that variable condition agrees with header + # TODO: produces many false positives + #if idx.variables[id].conditionals != \ + # self.variables[id].conditionals: + # print("Header condition differs from Variable for %s:" \ + # % id) + # print(" H: %s" % self.variables[id].conditionals) + # print(" C: %s" % idx.variables[id].conditionals) + up = idx.variables[id] + self.variables[id].update(None, up.module, up.type, up.info, up.extra) def analyze_dict(self, type, dict): count = 0 - public = 0 - for name in dict.keys(): - id = dict[name] - count = count + 1 - if id.static == 0: - public = public + 1 + public = 0 + for name in list(dict.keys()): + id = dict[name] + count = count + 1 + if id.static == 0: + public = public + 1 if count != public: - print " %d %s , %d public" % (count, type, public) - elif count != 0: - print " %d public %s" % (count, type) + print(" %d %s , %d public" % (count, type, public)) + elif count != 0: + print(" %d public %s" % (count, type)) def analyze(self): - self.analyze_dict("functions", self.functions) - self.analyze_dict("variables", self.variables) - self.analyze_dict("structs", self.structs) - self.analyze_dict("typedefs", self.typedefs) - self.analyze_dict("macros", self.macros) - + self.analyze_dict("functions", self.functions) + self.analyze_dict("variables", self.variables) + self.analyze_dict("structs", self.structs) + self.analyze_dict("typedefs", self.typedefs) + self.analyze_dict("macros", self.macros) + class CLexer: """A lexer for the C language, tokenize the input by reading and analyzing it line by line""" def __init__(self, input): self.input = input - self.tokens = [] - self.line = "" - self.lineno = 0 + self.tokens = [] + self.line = "" + self.lineno = 0 def getline(self): line = '' - while line == '': - line = self.input.readline() - if not line: - return None - self.lineno = self.lineno + 1 - line = string.lstrip(line) - line = string.rstrip(line) - if line == '': - continue - while line[-1] == '\\': - line = line[:-1] - n = self.input.readline() - self.lineno = self.lineno + 1 - n = string.lstrip(n) - n = string.rstrip(n) - if not n: - break - else: - line = line + n + while line == '': + line = self.input.readline() + if not line: + return None + self.lineno = self.lineno + 1 + line = line.lstrip() + line = line.rstrip() + if line == '': + continue + while line[-1] == '\\': + line = line[:-1] + n = self.input.readline() + self.lineno = self.lineno + 1 + n = n.lstrip() + n = n.rstrip() + if not n: + break + else: + line = line + n return line - + def getlineno(self): return self.lineno @@ -329,350 +398,407 @@ class CLexer: self.tokens.insert(0, token); def debug(self): - print "Last token: ", self.last - print "Token queue: ", self.tokens - print "Line %d end: " % (self.lineno), self.line + print("Last token: ", self.last) + print("Token queue: ", self.tokens) + print("Line %d end: " % (self.lineno), self.line) def token(self): while self.tokens == []: - if self.line == "": - line = self.getline() - else: - line = self.line - self.line = "" - if line == None: - return None - - if line[0] == '#': - self.tokens = map((lambda x: ('preproc', x)), - string.split(line)) - break; - l = len(line) - if line[0] == '"' or line[0] == "'": - end = line[0] - line = line[1:] - found = 0 - tok = "" - while found == 0: - i = 0 - l = len(line) - while i < l: - if line[i] == end: - self.line = line[i+1:] - line = line[:i] - l = i - found = 1 - break - if line[i] == '\\': - i = i + 1 - i = i + 1 - tok = tok + line - if found == 0: - line = self.getline() - if line == None: - return None - self.last = ('string', tok) - return self.last - - if l >= 2 and line[0] == '/' and line[1] == '*': - line = line[2:] - found = 0 - tok = "" - while found == 0: - i = 0 - l = len(line) - while i < l: - if line[i] == '*' and i+1 < l and line[i+1] == '/': - self.line = line[i+2:] - line = line[:i-1] - l = i - found = 1 - break - i = i + 1 - if tok != "": - tok = tok + "\n" - tok = tok + line - if found == 0: - line = self.getline() - if line == None: - return None - self.last = ('comment', tok) - return self.last - if l >= 2 and line[0] == '/' and line[1] == '/': - line = line[2:] - self.last = ('comment', line) - return self.last - i = 0 - while i < l: - if line[i] == '/' and i+1 < l and line[i+1] == '/': - self.line = line[i:] - line = line[:i] - break - if line[i] == '/' and i+1 < l and line[i+1] == '*': - self.line = line[i:] - line = line[:i] - break - if line[i] == '"' or line[i] == "'": - self.line = line[i:] - line = line[:i] - break - i = i + 1 - l = len(line) - i = 0 - while i < l: - if line[i] == ' ' or line[i] == '\t': - i = i + 1 - continue - o = ord(line[i]) - if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ - (o >= 48 and o <= 57): - s = i - while i < l: - o = ord(line[i]) - if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ - (o >= 48 and o <= 57) or string.find( - " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1: - i = i + 1 - else: - break - self.tokens.append(('name', line[s:i])) - continue - if string.find("(){}:;,[]", line[i]) != -1: + if self.line == "": + line = self.getline() + else: + line = self.line + self.line = "" + if line == None: + return None + + if line[0] == '#': + self.tokens = list(map((lambda x: ('preproc', x)), + line.split())) + break; + l = len(line) + if line[0] == '"' or line[0] == "'": + end = line[0] + line = line[1:] + found = 0 + tok = "" + while found == 0: + i = 0 + l = len(line) + while i < l: + if line[i] == end: + self.line = line[i+1:] + line = line[:i] + l = i + found = 1 + break + if line[i] == '\\': + i = i + 1 + i = i + 1 + tok = tok + line + if found == 0: + line = self.getline() + if line == None: + return None + self.last = ('string', tok) + return self.last + + if l >= 2 and line[0] == '/' and line[1] == '*': + line = line[2:] + found = 0 + tok = "" + while found == 0: + i = 0 + l = len(line) + while i < l: + if line[i] == '*' and i+1 < l and line[i+1] == '/': + self.line = line[i+2:] + line = line[:i-1] + l = i + found = 1 + break + i = i + 1 + if tok != "": + tok = tok + "\n" + tok = tok + line + if found == 0: + line = self.getline() + if line == None: + return None + self.last = ('comment', tok) + return self.last + if l >= 2 and line[0] == '/' and line[1] == '/': + line = line[2:] + self.last = ('comment', line) + return self.last + i = 0 + while i < l: + if line[i] == '/' and i+1 < l and line[i+1] == '/': + self.line = line[i:] + line = line[:i] + break + if line[i] == '/' and i+1 < l and line[i+1] == '*': + self.line = line[i:] + line = line[:i] + break + if line[i] == '"' or line[i] == "'": + self.line = line[i:] + line = line[:i] + break + i = i + 1 + l = len(line) + i = 0 + while i < l: + if line[i] == ' ' or line[i] == '\t': + i = i + 1 + continue + o = ord(line[i]) + if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ + (o >= 48 and o <= 57): + s = i + while i < l: + o = ord(line[i]) + if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ + (o >= 48 and o <= 57) or \ + (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1: + i = i + 1 + else: + break + self.tokens.append(('name', line[s:i])) + continue + if "(){}:;,[]".find(line[i]) != -1: # if line[i] == '(' or line[i] == ')' or line[i] == '{' or \ -# line[i] == '}' or line[i] == ':' or line[i] == ';' or \ -# line[i] == ',' or line[i] == '[' or line[i] == ']': - self.tokens.append(('sep', line[i])) - i = i + 1 - continue - if string.find("+-*><=/%&!|.", line[i]) != -1: +# line[i] == '}' or line[i] == ':' or line[i] == ';' or \ +# line[i] == ',' or line[i] == '[' or line[i] == ']': + self.tokens.append(('sep', line[i])) + i = i + 1 + continue + if "+-*><=/%&!|.".find(line[i]) != -1: # if line[i] == '+' or line[i] == '-' or line[i] == '*' or \ -# line[i] == '>' or line[i] == '<' or line[i] == '=' or \ -# line[i] == '/' or line[i] == '%' or line[i] == '&' or \ -# line[i] == '!' or line[i] == '|' or line[i] == '.': - if line[i] == '.' and i + 2 < l and \ - line[i+1] == '.' and line[i+2] == '.': - self.tokens.append(('name', '...')) - i = i + 3 - continue - - j = i + 1 - if j < l and ( - string.find("+-*><=/%&!|", line[j]) != -1): -# line[j] == '+' or line[j] == '-' or line[j] == '*' or \ -# line[j] == '>' or line[j] == '<' or line[j] == '=' or \ -# line[j] == '/' or line[j] == '%' or line[j] == '&' or \ -# line[j] == '!' or line[j] == '|'): - self.tokens.append(('op', line[i:j+1])) - i = j + 1 - else: - self.tokens.append(('op', line[i])) - i = i + 1 - continue - s = i - while i < l: - o = ord(line[i]) - if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ - (o >= 48 and o <= 57) or ( - string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1): -# line[i] != ' ' and line[i] != '\t' and -# line[i] != '(' and line[i] != ')' and -# line[i] != '{' and line[i] != '}' and -# line[i] != ':' and line[i] != ';' and -# line[i] != ',' and line[i] != '+' and -# line[i] != '-' and line[i] != '*' and -# line[i] != '/' and line[i] != '%' and -# line[i] != '&' and line[i] != '!' and -# line[i] != '|' and line[i] != '[' and -# line[i] != ']' and line[i] != '=' and -# line[i] != '*' and line[i] != '>' and -# line[i] != '<'): - i = i + 1 - else: - break - self.tokens.append(('name', line[s:i])) - - tok = self.tokens[0] - self.tokens = self.tokens[1:] - self.last = tok - return tok - +# line[i] == '>' or line[i] == '<' or line[i] == '=' or \ +# line[i] == '/' or line[i] == '%' or line[i] == '&' or \ +# line[i] == '!' or line[i] == '|' or line[i] == '.': + if line[i] == '.' and i + 2 < l and \ + line[i+1] == '.' and line[i+2] == '.': + self.tokens.append(('name', '...')) + i = i + 3 + continue + + j = i + 1 + if j < l and ( + "+-*><=/%&!|".find(line[j]) != -1): +# line[j] == '+' or line[j] == '-' or line[j] == '*' or \ +# line[j] == '>' or line[j] == '<' or line[j] == '=' or \ +# line[j] == '/' or line[j] == '%' or line[j] == '&' or \ +# line[j] == '!' or line[j] == '|'): + self.tokens.append(('op', line[i:j+1])) + i = j + 1 + else: + self.tokens.append(('op', line[i])) + i = i + 1 + continue + s = i + while i < l: + o = ord(line[i]) + if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \ + (o >= 48 and o <= 57) or ( + " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1): +# line[i] != ' ' and line[i] != '\t' and +# line[i] != '(' and line[i] != ')' and +# line[i] != '{' and line[i] != '}' and +# line[i] != ':' and line[i] != ';' and +# line[i] != ',' and line[i] != '+' and +# line[i] != '-' and line[i] != '*' and +# line[i] != '/' and line[i] != '%' and +# line[i] != '&' and line[i] != '!' and +# line[i] != '|' and line[i] != '[' and +# line[i] != ']' and line[i] != '=' and +# line[i] != '*' and line[i] != '>' and +# line[i] != '<'): + i = i + 1 + else: + break + self.tokens.append(('name', line[s:i])) + + tok = self.tokens[0] + self.tokens = self.tokens[1:] + self.last = tok + return tok + class CParser: """The C module parser""" def __init__(self, filename, idx = None): self.filename = filename - if len(filename) > 2 and filename[-2:] == '.h': - self.is_header = 1 - else: - self.is_header = 0 + if len(filename) > 2 and filename[-2:] == '.h': + self.is_header = 1 + else: + self.is_header = 0 self.input = open(filename) - self.lexer = CLexer(self.input) - if idx == None: - self.index = index() - else: - self.index = idx - self.top_comment = "" - self.last_comment = "" - self.comment = None - self.collect_ref = 0 - self.no_error = 0 + self.lexer = CLexer(self.input) + if idx == None: + self.index = index() + else: + self.index = idx + self.top_comment = "" + self.last_comment = "" + self.comment = None + self.collect_ref = 0 + self.doc_disable = 0 + self.conditionals = [] + self.defines = [] def collect_references(self): self.collect_ref = 1 - def stop_error(self): - self.no_error = 1 + def disable(self): + self.doc_disable = 1 - def start_error(self): - self.no_error = 0 + def enable(self): + self.doc_disable = 0 def lineno(self): return self.lexer.getlineno() def index_add(self, name, module, static, type, info=None, extra = None): - self.index.add(name, module, static, type, self.lineno(), - info, extra) + if self.doc_disable: + return + if self.is_header == 1: + self.index.add(name, module, module, static, type, self.lineno(), + info, extra, self.conditionals) + else: + self.index.add(name, None, module, static, type, self.lineno(), + info, extra, self.conditionals) def index_add_ref(self, name, module, static, type, info=None, extra = None): - self.index.add_ref(name, module, static, type, self.lineno(), - info, extra) + if self.is_header == 1: + self.index.add_ref(name, module, module, static, type, + self.lineno(), info, extra, self.conditionals) + else: + self.index.add_ref(name, None, module, static, type, self.lineno(), + info, extra, self.conditionals) def warning(self, msg): - if self.no_error: - return - print msg + if self.doc_disable: + return + print(msg) def error(self, msg, token=-1): - if self.no_error: - return + if self.doc_disable: + return - print "Parse Error: " + msg - if token != -1: - print "Got token ", token - self.lexer.debug() - sys.exit(1) + print("Parse Error: " + msg) + if token != -1: + print("Got token ", token) + self.lexer.debug() + sys.exit(1) def debug(self, msg, token=-1): - print "Debug: " + msg - if token != -1: - print "Got token ", token - self.lexer.debug() + print("Debug: " + msg) + if token != -1: + print("Got token ", token) + self.lexer.debug() def parseTopComment(self, comment): - res = {} - lines = string.split(comment, "\n") - item = None - for line in lines: - while line != "" and (line[0] == ' ' or line[0] == '\t'): - line = line[1:] - while line != "" and line[0] == '*': - line = line[1:] - while line != "" and (line[0] == ' ' or line[0] == '\t'): - line = line[1:] - try: - (it, line) = string.split(line, ":", 1) - item = it - while line != "" and (line[0] == ' ' or line[0] == '\t'): - line = line[1:] - if res.has_key(item): - res[item] = res[item] + " " + line - else: - res[item] = line - except: - if item != None: - if res.has_key(item): - res[item] = res[item] + " " + line - else: - res[item] = line - self.index.info = res + res = {} + lines = comment.split("\n") + item = None + for line in lines: + while line != "" and (line[0] == ' ' or line[0] == '\t'): + line = line[1:] + while line != "" and line[0] == '*': + line = line[1:] + while line != "" and (line[0] == ' ' or line[0] == '\t'): + line = line[1:] + try: + (it, line) = line.split(":", 1) + item = it + while line != "" and (line[0] == ' ' or line[0] == '\t'): + line = line[1:] + if item in res: + res[item] = res[item] + " " + line + else: + res[item] = line + except: + if item != None: + if item in res: + res[item] = res[item] + " " + line + else: + res[item] = line + self.index.info = res def parseComment(self, token): if self.top_comment == "": - self.top_comment = token[1] - if self.comment == None or token[1][0] == '*': - self.comment = token[1]; - else: - self.comment = self.comment + token[1] - token = self.lexer.token() + self.top_comment = token[1] + if self.comment == None or token[1][0] == '*': + self.comment = token[1]; + else: + self.comment = self.comment + token[1] + token = self.lexer.token() - if string.find(self.comment, "DOC_DISABLE") != -1: - self.stop_error() + if self.comment.find("DOC_DISABLE") != -1: + self.disable() - if string.find(self.comment, "DOC_ENABLE") != -1: - self.start_error() + if self.comment.find("DOC_ENABLE") != -1: + self.enable() - return token + return token - # - # Parse a comment block associate to a macro - # + # + # Parse a simple comment block for typedefs or global variables + # + def parseSimpleComment(self, name, quiet = False): + if name[0:2] == '__': + quiet = 1 + + args = [] + desc = "" + + if self.comment == None: + if not quiet: + self.warning("Missing comment for %s" % (name)) + return(None) + if self.comment[0] != '*': + if not quiet: + self.warning("Missing * in comment for %s" % (name)) + return(None) + lines = self.comment.split('\n') + if lines[0] == '*': + del lines[0] + if lines[0] != "* %s:" % (name): + if not quiet: + self.warning("Misformatted comment for %s" % (name)) + self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) + return(None) + del lines[0] + while len(lines) > 0 and lines[0] == '*': + del lines[0] + desc = "" + while len(lines) > 0: + l = lines[0] + while len(l) > 0 and l[0] == '*': + l = l[1:] + l = l.strip() + desc = desc + " " + l + del lines[0] + + desc = desc.strip() + + if quiet == 0: + if desc == "": + self.warning("Comment for %s lacks description" % (name)) + + return(desc) + # + # Parse a comment block associate to a macro + # def parseMacroComment(self, name, quiet = 0): if name[0:2] == '__': - quiet = 1 + quiet = 1 args = [] - desc = "" + desc = "" if self.comment == None: - if not quiet: - self.warning("Missing comment for macro %s" % (name)) - return((args, desc)) + if not quiet: + self.warning("Missing comment for macro %s" % (name)) + return((args, desc)) if self.comment[0] != '*': - if not quiet: - self.warning("Missing * in macro comment for %s" % (name)) - return((args, desc)) - lines = string.split(self.comment, '\n') - if lines[0] == '*': - del lines[0] - if lines[0] != "* %s:" % (name): - if not quiet: - self.warning("Misformatted macro comment for %s" % (name)) - self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) - return((args, desc)) - del lines[0] - while lines[0] == '*': - del lines[0] - while len(lines) > 0 and lines[0][0:3] == '* @': - l = lines[0][3:] - try: - (arg, desc) = string.split(l, ':', 1) - desc=string.strip(desc) - arg=string.strip(arg) + if not quiet: + self.warning("Missing * in macro comment for %s" % (name)) + return((args, desc)) + lines = self.comment.split('\n') + if lines[0] == '*': + del lines[0] + if lines[0] != "* %s:" % (name): + if not quiet: + self.warning("Misformatted macro comment for %s" % (name)) + self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) + return((args, desc)) + del lines[0] + while lines[0] == '*': + del lines[0] + while len(lines) > 0 and lines[0][0:3] == '* @': + l = lines[0][3:] + try: + (arg, desc) = l.split(':', 1) + desc=desc.strip() + arg=arg.strip() except: - if not quiet: - self.warning("Misformatted macro comment for %s" % (name)) - self.warning(" problem with '%s'" % (lines[0])) - del lines[0] - continue - del lines[0] - l = string.strip(lines[0]) - while len(l) > 2 and l[0:3] != '* @': - while l[0] == '*': - l = l[1:] - desc = desc + ' ' + string.strip(l) - del lines[0] - if len(lines) == 0: - break - l = lines[0] + if not quiet: + self.warning("Misformatted macro comment for %s" % (name)) + self.warning(" problem with '%s'" % (lines[0])) + del lines[0] + continue + del lines[0] + l = lines[0].strip() + while len(l) > 2 and l[0:3] != '* @': + while l[0] == '*': + l = l[1:] + desc = desc + ' ' + l.strip() + del lines[0] + if len(lines) == 0: + break + l = lines[0] args.append((arg, desc)) - while len(lines) > 0 and lines[0] == '*': - del lines[0] - desc = "" - while len(lines) > 0: - l = lines[0] - while len(l) > 0 and l[0] == '*': - l = l[1:] - l = string.strip(l) - desc = desc + " " + l - del lines[0] - - desc = string.strip(desc) - - if quiet == 0: - if desc == "": - self.warning("Macro comment for %s lack description of the macro" % (name)) - - return((args, desc)) + while len(lines) > 0 and lines[0] == '*': + del lines[0] + desc = "" + while len(lines) > 0: + l = lines[0] + while len(l) > 0 and l[0] == '*': + l = l[1:] + l = l.strip() + desc = desc + " " + l + del lines[0] + + desc = desc.strip() + + if quiet == 0: + if desc == "": + self.warning("Macro comment for %s lack description of the macro" % (name)) + + return((args, desc)) # # Parse a comment block and merge the information found in the @@ -681,149 +807,206 @@ class CParser: # def mergeFunctionComment(self, name, description, quiet = 0): if name == 'main': - quiet = 1 + quiet = 1 if name[0:2] == '__': - quiet = 1 + quiet = 1 - (ret, args) = description - desc = "" - retdesc = "" + (ret, args) = description + desc = "" + retdesc = "" if self.comment == None: - if not quiet: - self.warning("Missing comment for function %s" % (name)) - return(((ret[0], retdesc), args, desc)) + if not quiet: + self.warning("Missing comment for function %s" % (name)) + return(((ret[0], retdesc), args, desc)) if self.comment[0] != '*': - if not quiet: - self.warning("Missing * in function comment for %s" % (name)) - return(((ret[0], retdesc), args, desc)) - lines = string.split(self.comment, '\n') - if lines[0] == '*': - del lines[0] - if lines[0] != "* %s:" % (name): - if not quiet: - self.warning("Misformatted function comment for %s" % (name)) - self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) - return(((ret[0], retdesc), args, desc)) - del lines[0] - while len(lines) > 0 and lines[0] == '*': - del lines[0] - nbargs = len(args) - while len(lines) > 0 and lines[0][0:3] == '* @': - l = lines[0][3:] - try: - (arg, desc) = string.split(l, ':', 1) - desc=string.strip(desc) - arg=string.strip(arg) + if not quiet: + self.warning("Missing * in function comment for %s" % (name)) + return(((ret[0], retdesc), args, desc)) + lines = self.comment.split('\n') + if lines[0] == '*': + del lines[0] + if lines[0] != "* %s:" % (name): + if not quiet: + self.warning("Misformatted function comment for %s" % (name)) + self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0])) + return(((ret[0], retdesc), args, desc)) + del lines[0] + while lines[0] == '*': + del lines[0] + nbargs = len(args) + while len(lines) > 0 and lines[0][0:3] == '* @': + l = lines[0][3:] + try: + (arg, desc) = l.split(':', 1) + desc=desc.strip() + arg=arg.strip() except: - if not quiet: - self.warning("Misformatted function comment for %s" % (name)) - self.warning(" problem with '%s'" % (lines[0])) - del lines[0] - continue - del lines[0] - l = string.strip(lines[0]) - while len(l) > 2 and l[0:3] != '* @': - while l[0] == '*': - l = l[1:] - desc = desc + ' ' + string.strip(l) - del lines[0] - if len(lines) == 0: - break - l = lines[0] - i = 0 - while i < nbargs: - if args[i][1] == arg: - args[i] = (args[i][0], arg, desc) - break; - i = i + 1 - if i >= nbargs: - if not quiet: - self.warning("Unable to find arg %s from function comment for %s" % ( - arg, name)) - while len(lines) > 0 and lines[0] == '*': - del lines[0] - desc = "" - while len(lines) > 0: - l = lines[0] - while len(l) > 0 and l[0] == '*': - l = l[1:] - l = string.strip(l) - if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return": - try: - l = string.split(l, ' ', 1)[1] - except: - l = "" - retdesc = string.strip(l) - del lines[0] - while len(lines) > 0: - l = lines[0] - while len(l) > 0 and l[0] == '*': - l = l[1:] - l = string.strip(l) - retdesc = retdesc + " " + l - del lines[0] - else: - desc = desc + " " + l - del lines[0] - - retdesc = string.strip(retdesc) - desc = string.strip(desc) - - if quiet == 0: - # - # report missing comments - # - i = 0 - while i < nbargs: - if args[i][2] == None and args[i][0] != "void" and args[i][1] != None: - self.warning("Function comment for %s lack description of arg %s" % (name, args[i][1])) - i = i + 1 - if retdesc == "" and ret[0] != "void": - self.warning("Function comment for %s lack description of return value" % (name)) - if desc == "": - self.warning("Function comment for %s lack description of the function" % (name)) - - - return(((ret[0], retdesc), args, desc)) + if not quiet: + self.warning("Misformatted function comment for %s" % (name)) + self.warning(" problem with '%s'" % (lines[0])) + del lines[0] + continue + del lines[0] + l = lines[0].strip() + while len(l) > 2 and l[0:3] != '* @': + while l[0] == '*': + l = l[1:] + desc = desc + ' ' + l.strip() + del lines[0] + if len(lines) == 0: + break + l = lines[0] + i = 0 + while i < nbargs: + if args[i][1] == arg: + args[i] = (args[i][0], arg, desc) + break; + i = i + 1 + if i >= nbargs: + if not quiet: + self.warning("Unable to find arg %s from function comment for %s" % ( + arg, name)) + while len(lines) > 0 and lines[0] == '*': + del lines[0] + desc = "" + while len(lines) > 0: + l = lines[0] + while len(l) > 0 and l[0] == '*': + l = l[1:] + l = l.strip() + if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return": + try: + l = l.split(' ', 1)[1] + except: + l = "" + retdesc = l.strip() + del lines[0] + while len(lines) > 0: + l = lines[0] + while len(l) > 0 and l[0] == '*': + l = l[1:] + l = l.strip() + retdesc = retdesc + " " + l + del lines[0] + else: + desc = desc + " " + l + del lines[0] + + retdesc = retdesc.strip() + desc = desc.strip() + + if quiet == 0: + # + # report missing comments + # + i = 0 + while i < nbargs: + if args[i][2] == None and args[i][0] != "void" and \ + ((args[i][1] != None) or (args[i][1] == '')): + self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1])) + i = i + 1 + if retdesc == "" and ret[0] != "void": + self.warning("Function comment for %s lacks description of return value" % (name)) + if desc == "": + self.warning("Function comment for %s lacks description of the function" % (name)) + + return(((ret[0], retdesc), args, desc)) def parsePreproc(self, token): + if debug: + print("=> preproc ", token, self.lexer.tokens) name = token[1] - if name == "#include": - token = self.lexer.token() - if token == None: - return None - if token[0] == 'preproc': - self.index_add(token[1], self.filename, not self.is_header, - "include") - return self.lexer.token() - return token - if name == "#define": - token = self.lexer.token() - if token == None: - return None - if token[0] == 'preproc': - # TODO macros with arguments - name = token[1] - lst = [] - token = self.lexer.token() - while token != None and token[0] == 'preproc' and \ - token[1][0] != '#': - lst.append(token[1]) - token = self.lexer.token() + if name == "#include": + token = self.lexer.token() + if token == None: + return None + if token[0] == 'preproc': + self.index_add(token[1], self.filename, not self.is_header, + "include") + return self.lexer.token() + return token + if name == "#define": + token = self.lexer.token() + if token == None: + return None + if token[0] == 'preproc': + # TODO macros with arguments + name = token[1] + lst = [] + token = self.lexer.token() + while token != None and token[0] == 'preproc' and \ + token[1][0] != '#': + lst.append(token[1]) + token = self.lexer.token() try: - name = string.split(name, '(') [0] + name = name.split('(') [0] except: pass - info = self.parseMacroComment(name, not self.is_header) - self.index_add(name, self.filename, not self.is_header, - "macro", info) - return token - token = self.lexer.token() - while token != None and token[0] == 'preproc' and \ - token[1][0] != '#': - token = self.lexer.token() - return token + info = self.parseMacroComment(name, True) + self.index_add(name, self.filename, not self.is_header, + "macro", info) + return token + + # + # Processing of conditionals modified by Bill 1/1/05 + # + # We process conditionals (i.e. tokens from #ifdef, #ifndef, + # #if, #else and #endif) for headers and mainline code, + # store the ones from the header in libxml2-api.xml, and later + # (in the routine merge_public) verify that the two (header and + # mainline code) agree. + # + # There is a small problem with processing the headers. Some of + # the variables are not concerned with enabling / disabling of + # library functions (e.g. '__XML_PARSER_H__'), and we don't want + # them to be included in libxml2-api.xml, or involved in + # the check between the header and the mainline code. To + # accomplish this, we ignore any conditional which doesn't include + # the string 'ENABLED' + # + if name == "#ifdef": + apstr = self.lexer.tokens[0][1] + try: + self.defines.append(apstr) + if apstr.find('ENABLED') != -1: + self.conditionals.append("defined(%s)" % apstr) + except: + pass + elif name == "#ifndef": + apstr = self.lexer.tokens[0][1] + try: + self.defines.append(apstr) + if apstr.find('ENABLED') != -1: + self.conditionals.append("!defined(%s)" % apstr) + except: + pass + elif name == "#if": + apstr = "" + for tok in self.lexer.tokens: + if apstr != "": + apstr = apstr + " " + apstr = apstr + tok[1] + try: + self.defines.append(apstr) + if apstr.find('ENABLED') != -1: + self.conditionals.append(apstr) + except: + pass + elif name == "#else": + if self.conditionals != [] and \ + self.defines[-1].find('ENABLED') != -1: + self.conditionals[-1] = "!(%s)" % self.conditionals[-1] + elif name == "#endif": + if self.conditionals != [] and \ + self.defines[-1].find('ENABLED') != -1: + self.conditionals = self.conditionals[:-1] + self.defines = self.defines[:-1] + token = self.lexer.token() + while token != None and token[0] == 'preproc' and \ + token[1][0] != '#': + token = self.lexer.token() + return token # # token acquisition on top of the lexer, it handle internally @@ -834,218 +1017,227 @@ class CParser: global ignored_words token = self.lexer.token() - while token != None: - if token[0] == 'comment': - token = self.parseComment(token) - continue - elif token[0] == 'preproc': - token = self.parsePreproc(token) - continue - elif token[0] == "name" and ignored_words.has_key(token[1]): - (n, info) = ignored_words[token[1]] - i = 0 - while i < n: - token = self.lexer.token() - i = i + 1 - token = self.lexer.token() - continue - else: - if debug: - print "=> ", token - return token - return None + while token != None: + if token[0] == 'comment': + token = self.parseComment(token) + continue + elif token[0] == 'preproc': + token = self.parsePreproc(token) + continue + elif token[0] == "name" and token[1] == "__const": + token = ("name", "const") + return token + elif token[0] == "name" and token[1] == "__attribute": + token = self.lexer.token() + while token != None and token[1] != ";": + token = self.lexer.token() + return token + elif token[0] == "name" and token[1] in ignored_words: + (n, info) = ignored_words[token[1]] + i = 0 + while i < n: + token = self.lexer.token() + i = i + 1 + token = self.lexer.token() + continue + else: + if debug: + print("=> ", token) + return token + return None # # Parse a typedef, it records the type and its name. # def parseTypedef(self, token): if token == None: - return None - token = self.parseType(token) - if token == None: - self.error("parsing typedef") - return None - base_type = self.type - type = base_type - #self.debug("end typedef type", token) - while token != None: - if token[0] == "name": - name = token[1] - signature = self.signature - if signature != None: - type = string.split(type, '(')[0] - d = self.mergeFunctionComment(name, - ((type, None), signature), 1) - self.index_add(name, self.filename, not self.is_header, - "functype", d) - else: - if base_type == "struct": - self.index_add(name, self.filename, not self.is_header, - "struct", type) - base_type = "struct " + name - else: - self.index_add(name, self.filename, not self.is_header, - "typedef", type) - token = self.token() - else: - self.error("parsing typedef: expecting a name") - return token - #self.debug("end typedef", token) - if token != None and token[0] == 'sep' and token[1] == ',': - type = base_type - token = self.token() - while token != None and token[0] == "op": - type = type + token[1] - token = self.token() - elif token != None and token[0] == 'sep' and token[1] == ';': - break; - elif token != None and token[0] == 'name': - type = base_type - continue; - else: - self.error("parsing typedef: expecting ';'", token) - return token - token = self.token() - return token - + return None + token = self.parseType(token) + if token == None: + self.error("parsing typedef") + return None + base_type = self.type + type = base_type + #self.debug("end typedef type", token) + while token != None: + if token[0] == "name": + name = token[1] + signature = self.signature + if signature != None: + type = type.split('(')[0] + d = self.mergeFunctionComment(name, + ((type, None), signature), 1) + self.index_add(name, self.filename, not self.is_header, + "functype", d) + else: + if base_type == "struct": + self.index_add(name, self.filename, not self.is_header, + "struct", type) + base_type = "struct " + name + else: + # TODO report missing or misformatted comments + info = self.parseSimpleComment(name, True) + self.index_add(name, self.filename, not self.is_header, + "typedef", type, info) + token = self.token() + else: + self.error("parsing typedef: expecting a name") + return token + #self.debug("end typedef", token) + if token != None and token[0] == 'sep' and token[1] == ',': + type = base_type + token = self.token() + while token != None and token[0] == "op": + type = type + token[1] + token = self.token() + elif token != None and token[0] == 'sep' and token[1] == ';': + break; + elif token != None and token[0] == 'name': + type = base_type + continue; + else: + self.error("parsing typedef: expecting ';'", token) + return token + token = self.token() + return token + # # Parse a C code block, used for functions it parse till # the balancing } included # def parseBlock(self, token): while token != None: - if token[0] == "sep" and token[1] == "{": - token = self.token() - token = self.parseBlock(token) - elif token[0] == "sep" and token[1] == "}": - self.comment = None - token = self.token() - return token - else: - if self.collect_ref == 1: - oldtok = token - token = self.token() - if oldtok[0] == "name" and oldtok[1][0:3] == "xml": - if token[0] == "sep" and token[1] == "(": - self.index_add_ref(oldtok[1], self.filename, - 0, "function") - token = self.token() - elif token[0] == "name": - token = self.token() - if token[0] == "sep" and (token[1] == ";" or - token[1] == "," or token[1] == "="): - self.index_add_ref(oldtok[1], self.filename, - 0, "type") - elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_": - self.index_add_ref(oldtok[1], self.filename, - 0, "typedef") - elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_": - self.index_add_ref(oldtok[1], self.filename, - 0, "typedef") - - else: - token = self.token() - return token + if token[0] == "sep" and token[1] == "{": + token = self.token() + token = self.parseBlock(token) + elif token[0] == "sep" and token[1] == "}": + token = self.token() + return token + else: + if self.collect_ref == 1: + oldtok = token + token = self.token() + if oldtok[0] == "name" and oldtok[1][0:3] == "xml": + if token[0] == "sep" and token[1] == "(": + self.index_add_ref(oldtok[1], self.filename, + 0, "function") + token = self.token() + elif token[0] == "name": + token = self.token() + if token[0] == "sep" and (token[1] == ";" or + token[1] == "," or token[1] == "="): + self.index_add_ref(oldtok[1], self.filename, + 0, "type") + elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_": + self.index_add_ref(oldtok[1], self.filename, + 0, "typedef") + elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_": + self.index_add_ref(oldtok[1], self.filename, + 0, "typedef") + + else: + token = self.token() + return token # # Parse a C struct definition till the balancing } # def parseStruct(self, token): fields = [] - #self.debug("start parseStruct", token) + #self.debug("start parseStruct", token) while token != None: - if token[0] == "sep" and token[1] == "{": - token = self.token() - token = self.parseTypeBlock(token) - elif token[0] == "sep" and token[1] == "}": - self.struct_fields = fields - #self.debug("end parseStruct", token) - #print fields - token = self.token() - return token - else: - base_type = self.type - #self.debug("before parseType", token) - token = self.parseType(token) - #self.debug("after parseType", token) - if token != None and token[0] == "name": - fname = token[1] - token = self.token() - if token[0] == "sep" and token[1] == ";": - self.comment = None - token = self.token() - fields.append((self.type, fname, self.comment)) - self.comment = None - else: - self.error("parseStruct: expecting ;", token) - elif token != None and token[0] == "sep" and token[1] == "{": - token = self.token() - token = self.parseTypeBlock(token) - if token != None and token[0] == "name": - token = self.token() - if token != None and token[0] == "sep" and token[1] == ";": - token = self.token() - else: - self.error("parseStruct: expecting ;", token) - else: - self.error("parseStruct: name", token) - token = self.token() - self.type = base_type; + if token[0] == "sep" and token[1] == "{": + token = self.token() + token = self.parseTypeBlock(token) + elif token[0] == "sep" and token[1] == "}": + self.struct_fields = fields + #self.debug("end parseStruct", token) + #print fields + token = self.token() + return token + else: + base_type = self.type + #self.debug("before parseType", token) + token = self.parseType(token) + #self.debug("after parseType", token) + if token != None and token[0] == "name": + fname = token[1] + token = self.token() + if token[0] == "sep" and token[1] == ";": + self.comment = None + token = self.token() + fields.append((self.type, fname, self.comment)) + self.comment = None + else: + self.error("parseStruct: expecting ;", token) + elif token != None and token[0] == "sep" and token[1] == "{": + token = self.token() + token = self.parseTypeBlock(token) + if token != None and token[0] == "name": + token = self.token() + if token != None and token[0] == "sep" and token[1] == ";": + token = self.token() + else: + self.error("parseStruct: expecting ;", token) + else: + self.error("parseStruct: name", token) + token = self.token() + self.type = base_type; self.struct_fields = fields - #self.debug("end parseStruct", token) - #print fields - return token + #self.debug("end parseStruct", token) + #print fields + return token # # Parse a C enum block, parse till the balancing } # def parseEnumBlock(self, token): self.enums = [] - name = None - self.comment = None - comment = "" - value = "0" + name = None + self.comment = None + comment = "" + value = "0" while token != None: - if token[0] == "sep" and token[1] == "{": - token = self.token() - token = self.parseTypeBlock(token) - elif token[0] == "sep" and token[1] == "}": - if name != None: - if self.comment != None: - comment = self.comment - self.comment = None - self.enums.append((name, value, comment)) - token = self.token() - return token - elif token[0] == "name": - if name != None: - if self.comment != None: - comment = string.strip(self.comment) - self.comment = None - self.enums.append((name, value, comment)) - name = token[1] - comment = "" - token = self.token() - if token[0] == "op" and token[1][0] == "=": - value = "" - if len(token[1]) > 1: - value = token[1][1:] - token = self.token() - while token[0] != "sep" or (token[1] != ',' and - token[1] != '}'): - value = value + token[1] - token = self.token() - else: - try: - value = "%d" % (int(value) + 1) - except: - self.warning("Failed to compute value of enum %s" % (name)) - value="" - if token[0] == "sep" and token[1] == ",": - token = self.token() - else: - token = self.token() - return token + if token[0] == "sep" and token[1] == "{": + token = self.token() + token = self.parseTypeBlock(token) + elif token[0] == "sep" and token[1] == "}": + if name != None: + if self.comment != None: + comment = self.comment + self.comment = None + self.enums.append((name, value, comment)) + token = self.token() + return token + elif token[0] == "name": + if name != None: + if self.comment != None: + comment = self.comment.strip() + self.comment = None + self.enums.append((name, value, comment)) + name = token[1] + comment = "" + token = self.token() + if token[0] == "op" and token[1][0] == "=": + value = "" + if len(token[1]) > 1: + value = token[1][1:] + token = self.token() + while token[0] != "sep" or (token[1] != ',' and + token[1] != '}'): + value = value + token[1] + token = self.token() + else: + try: + value = "%d" % (int(value) + 1) + except: + self.warning("Failed to compute value of enum %s" % (name)) + value="" + if token[0] == "sep" and token[1] == ",": + token = self.token() + else: + token = self.token() + return token # # Parse a C definition block, used for structs it parse till @@ -1053,15 +1245,15 @@ class CParser: # def parseTypeBlock(self, token): while token != None: - if token[0] == "sep" and token[1] == "{": - token = self.token() - token = self.parseTypeBlock(token) - elif token[0] == "sep" and token[1] == "}": - token = self.token() - return token - else: - token = self.token() - return token + if token[0] == "sep" and token[1] == "{": + token = self.token() + token = self.parseTypeBlock(token) + elif token[0] == "sep" and token[1] == "}": + token = self.token() + return token + else: + token = self.token() + return token # # Parse a type: the fact that the type name can either occur after @@ -1070,199 +1262,200 @@ class CParser: # def parseType(self, token): self.type = "" - self.struct_fields = [] + self.struct_fields = [] self.signature = None - if token == None: - return token - - while token[0] == "name" and ( - token[1] == "const" or token[1] == "unsigned" or - token[1] == "signed"): - if self.type == "": - self.type = token[1] - else: - self.type = self.type + " " + token[1] - token = self.token() + if token == None: + return token + + while token[0] == "name" and ( + token[1] == "const" or \ + token[1] == "unsigned" or \ + token[1] == "signed"): + if self.type == "": + self.type = token[1] + else: + self.type = self.type + " " + token[1] + token = self.token() if token[0] == "name" and (token[1] == "long" or token[1] == "short"): - if self.type == "": - self.type = token[1] - else: - self.type = self.type + " " + token[1] - if token[0] == "name" and token[1] == "int": - if self.type == "": - self.type = tmp[1] - else: - self.type = self.type + " " + tmp[1] - + if self.type == "": + self.type = token[1] + else: + self.type = self.type + " " + token[1] + if token[0] == "name" and token[1] == "int": + if self.type == "": + self.type = tmp[1] + else: + self.type = self.type + " " + tmp[1] + elif token[0] == "name" and token[1] == "struct": - if self.type == "": - self.type = token[1] - else: - self.type = self.type + " " + token[1] - token = self.token() - nametok = None - if token[0] == "name": - nametok = token - token = self.token() - if token != None and token[0] == "sep" and token[1] == "{": - token = self.token() - token = self.parseStruct(token) - elif token != None and token[0] == "op" and token[1] == "*": - self.type = self.type + " " + nametok[1] + " *" - token = self.token() - while token != None and token[0] == "op" and token[1] == "*": - self.type = self.type + " *" - token = self.token() - if token[0] == "name": - nametok = token - token = self.token() - else: - self.error("struct : expecting name", token) - return token - elif token != None and token[0] == "name" and nametok != None: - self.type = self.type + " " + nametok[1] - return token - - if nametok != None: - self.lexer.push(token) - token = nametok - return token + if self.type == "": + self.type = token[1] + else: + self.type = self.type + " " + token[1] + token = self.token() + nametok = None + if token[0] == "name": + nametok = token + token = self.token() + if token != None and token[0] == "sep" and token[1] == "{": + token = self.token() + token = self.parseStruct(token) + elif token != None and token[0] == "op" and token[1] == "*": + self.type = self.type + " " + nametok[1] + " *" + token = self.token() + while token != None and token[0] == "op" and token[1] == "*": + self.type = self.type + " *" + token = self.token() + if token[0] == "name": + nametok = token + token = self.token() + else: + self.error("struct : expecting name", token) + return token + elif token != None and token[0] == "name" and nametok != None: + self.type = self.type + " " + nametok[1] + return token + + if nametok != None: + self.lexer.push(token) + token = nametok + return token elif token[0] == "name" and token[1] == "enum": - if self.type == "": - self.type = token[1] - else: - self.type = self.type + " " + token[1] - self.enums = [] - token = self.token() - if token != None and token[0] == "sep" and token[1] == "{": - token = self.token() - token = self.parseEnumBlock(token) - else: - self.error("parsing enum: expecting '{'", token) - enum_type = None - if token != None and token[0] != "name": - self.lexer.push(token) - token = ("name", "enum") - else: - enum_type = token[1] - for enum in self.enums: - self.index_add(enum[0], self.filename, - not self.is_header, "enum", - (enum[1], enum[2], enum_type)) - return token - - elif token[0] == "name": - if self.type == "": - self.type = token[1] - else: - self.type = self.type + " " + token[1] - else: - self.error("parsing type %s: expecting a name" % (self.type), - token) - return token - token = self.token() + if self.type == "": + self.type = token[1] + else: + self.type = self.type + " " + token[1] + self.enums = [] + token = self.token() + if token != None and token[0] == "sep" and token[1] == "{": + token = self.token() + token = self.parseEnumBlock(token) + else: + self.error("parsing enum: expecting '{'", token) + enum_type = None + if token != None and token[0] != "name": + self.lexer.push(token) + token = ("name", "enum") + else: + enum_type = token[1] + for enum in self.enums: + self.index_add(enum[0], self.filename, + not self.is_header, "enum", + (enum[1], enum[2], enum_type)) + return token + + elif token[0] == "name": + if self.type == "": + self.type = token[1] + else: + self.type = self.type + " " + token[1] + else: + self.error("parsing type %s: expecting a name" % (self.type), + token) + return token + token = self.token() while token != None and (token[0] == "op" or - token[0] == "name" and token[1] == "const"): - self.type = self.type + " " + token[1] - token = self.token() - - # - # if there is a parenthesis here, this means a function type - # - if token != None and token[0] == "sep" and token[1] == '(': - self.type = self.type + token[1] - token = self.token() - while token != None and token[0] == "op" and token[1] == '*': - self.type = self.type + token[1] - token = self.token() - if token == None or token[0] != "name" : - self.error("parsing function type, name expected", token); - return token - self.type = self.type + token[1] - nametok = token - token = self.token() - if token != None and token[0] == "sep" and token[1] == ')': - self.type = self.type + token[1] - token = self.token() - if token != None and token[0] == "sep" and token[1] == '(': - token = self.token() - type = self.type; - token = self.parseSignature(token); - self.type = type; - else: - self.error("parsing function type, '(' expected", token); - return token - else: - self.error("parsing function type, ')' expected", token); - return token - self.lexer.push(token) - token = nametok - return token + token[0] == "name" and token[1] == "const"): + self.type = self.type + " " + token[1] + token = self.token() + + # + # if there is a parenthesis here, this means a function type + # + if token != None and token[0] == "sep" and token[1] == '(': + self.type = self.type + token[1] + token = self.token() + while token != None and token[0] == "op" and token[1] == '*': + self.type = self.type + token[1] + token = self.token() + if token == None or token[0] != "name" : + self.error("parsing function type, name expected", token); + return token + self.type = self.type + token[1] + nametok = token + token = self.token() + if token != None and token[0] == "sep" and token[1] == ')': + self.type = self.type + token[1] + token = self.token() + if token != None and token[0] == "sep" and token[1] == '(': + token = self.token() + type = self.type; + token = self.parseSignature(token); + self.type = type; + else: + self.error("parsing function type, '(' expected", token); + return token + else: + self.error("parsing function type, ')' expected", token); + return token + self.lexer.push(token) + token = nametok + return token # - # do some lookahead for arrays - # - if token != None and token[0] == "name": - nametok = token - token = self.token() - if token != None and token[0] == "sep" and token[1] == '[': - self.type = self.type + nametok[1] - while token != None and token[0] == "sep" and token[1] == '[': - self.type = self.type + token[1] - token = self.token() - while token != None and token[0] != 'sep' and \ - token[1] != ']' and token[1] != ';': - self.type = self.type + token[1] - token = self.token() - if token != None and token[0] == 'sep' and token[1] == ']': - self.type = self.type + token[1] - token = self.token() - else: - self.error("parsing array type, ']' expected", token); - return token - elif token != None and token[0] == "sep" and token[1] == ':': - # remove :12 in case it's a limited int size - token = self.token() - token = self.token() - self.lexer.push(token) - token = nametok - - return token + # do some lookahead for arrays + # + if token != None and token[0] == "name": + nametok = token + token = self.token() + if token != None and token[0] == "sep" and token[1] == '[': + self.type = self.type + nametok[1] + while token != None and token[0] == "sep" and token[1] == '[': + self.type = self.type + token[1] + token = self.token() + while token != None and token[0] != 'sep' and \ + token[1] != ']' and token[1] != ';': + self.type = self.type + token[1] + token = self.token() + if token != None and token[0] == 'sep' and token[1] == ']': + self.type = self.type + token[1] + token = self.token() + else: + self.error("parsing array type, ']' expected", token); + return token + elif token != None and token[0] == "sep" and token[1] == ':': + # remove :12 in case it's a limited int size + token = self.token() + token = self.token() + self.lexer.push(token) + token = nametok + + return token # # Parse a signature: '(' has been parsed and we scan the type definition # up to the ')' included def parseSignature(self, token): signature = [] - if token != None and token[0] == "sep" and token[1] == ')': - self.signature = [] - token = self.token() - return token - while token != None: - token = self.parseType(token) - if token != None and token[0] == "name": - signature.append((self.type, token[1], None)) - token = self.token() - elif token != None and token[0] == "sep" and token[1] == ',': - token = self.token() - continue - elif token != None and token[0] == "sep" and token[1] == ')': - # only the type was provided - if self.type == "...": - signature.append((self.type, "...", None)) - else: - signature.append((self.type, None, None)) - if token != None and token[0] == "sep": - if token[1] == ',': - token = self.token() - continue - elif token[1] == ')': - token = self.token() - break - self.signature = signature - return token + if token != None and token[0] == "sep" and token[1] == ')': + self.signature = [] + token = self.token() + return token + while token != None: + token = self.parseType(token) + if token != None and token[0] == "name": + signature.append((self.type, token[1], None)) + token = self.token() + elif token != None and token[0] == "sep" and token[1] == ',': + token = self.token() + continue + elif token != None and token[0] == "sep" and token[1] == ')': + # only the type was provided + if self.type == "...": + signature.append((self.type, "...", None)) + else: + signature.append((self.type, None, None)) + if token != None and token[0] == "sep": + if token[1] == ',': + token = self.token() + continue + elif token[1] == ')': + token = self.token() + break + self.signature = signature + return token # # Parse a global definition, be it a type, variable or function @@ -1271,594 +1464,425 @@ class CParser: def parseGlobal(self, token): static = 0 if token[1] == 'extern': - token = self.token() - if token == None: - return token - if token[0] == 'string': - if token[1] == 'C': - token = self.token() - if token == None: - return token - if token[0] == 'sep' and token[1] == "{": - token = self.token() -# print 'Entering extern "C line ', self.lineno() - while token != None and (token[0] != 'sep' or - token[1] != "}"): - if token[0] == 'name': - token = self.parseGlobal(token) - else: - self.error( - "token %s %s unexpected at the top level" % ( - token[0], token[1])) - token = self.parseGlobal(token) -# print 'Exiting extern "C" line', self.lineno() - token = self.token() - return token - else: - return token - elif token[1] == 'static': - static = 1 - token = self.token() - if token == None or token[0] != 'name': - return token - - if token[1] == 'typedef': - token = self.token() - return self.parseTypedef(token) - else: - token = self.parseType(token) - type_orig = self.type - if token == None or token[0] != "name": - return token - type = type_orig - self.name = token[1] - token = self.token() - while token != None and (token[0] == "sep" or token[0] == "op"): - if token[0] == "sep": - if token[1] == "[": - type = type + token[1] - token = self.token() - while token != None and (token[0] != "sep" or \ - token[1] != ";"): - type = type + token[1] - token = self.token() - - if token != None and token[0] == "op" and token[1] == "=": - # - # Skip the initialization of the variable - # - token = self.token() - if token[0] == 'sep' and token[1] == '{': - token = self.token() - token = self.parseBlock(token) - else: - self.comment = None - while token != None and (token[0] != "sep" or \ - (token[1] != ';' and token[1] != ',')): - token = self.token() - self.comment = None - if token == None or token[0] != "sep" or (token[1] != ';' and - token[1] != ','): - self.error("missing ';' or ',' after value") - - if token != None and token[0] == "sep": - if token[1] == ";": - self.comment = None - token = self.token() - if type == "struct": - self.index_add(self.name, self.filename, - not self.is_header, "struct", self.struct_fields) - else: - self.index_add(self.name, self.filename, - not self.is_header, "variable", type) - break - elif token[1] == "(": - token = self.token() - token = self.parseSignature(token) - if token == None: - return None - if token[0] == "sep" and token[1] == ";": - d = self.mergeFunctionComment(self.name, - ((type, None), self.signature), 1) - self.index_add(self.name, self.filename, static, - "function", d) - token = self.token() - elif token[0] == "sep" and token[1] == "{": - d = self.mergeFunctionComment(self.name, - ((type, None), self.signature), static) - self.index_add(self.name, self.filename, static, - "function", d) - token = self.token() - token = self.parseBlock(token); - elif token[1] == ',': - self.comment = None - self.index_add(self.name, self.filename, static, - "variable", type) - type = type_orig - token = self.token() - while token != None and token[0] == "sep": - type = type + token[1] - token = self.token() - if token != None and token[0] == "name": - self.name = token[1] - token = self.token() - else: - break - - return token + token = self.token() + if token == None: + return token + if token[0] == 'string': + if token[1] == 'C': + token = self.token() + if token == None: + return token + if token[0] == 'sep' and token[1] == "{": + token = self.token() +# print 'Entering extern "C line ', self.lineno() + while token != None and (token[0] != 'sep' or + token[1] != "}"): + if token[0] == 'name': + token = self.parseGlobal(token) + else: + self.error( + "token %s %s unexpected at the top level" % ( + token[0], token[1])) + token = self.parseGlobal(token) +# print 'Exiting extern "C" line', self.lineno() + token = self.token() + return token + else: + return token + elif token[1] == 'static': + static = 1 + token = self.token() + if token == None or token[0] != 'name': + return token + + if token[1] == 'typedef': + token = self.token() + return self.parseTypedef(token) + else: + token = self.parseType(token) + type_orig = self.type + if token == None or token[0] != "name": + return token + type = type_orig + self.name = token[1] + token = self.token() + while token != None and (token[0] == "sep" or token[0] == "op"): + if token[0] == "sep": + if token[1] == "[": + type = type + token[1] + token = self.token() + while token != None and (token[0] != "sep" or \ + token[1] != ";"): + type = type + token[1] + token = self.token() + + if token != None and token[0] == "op" and token[1] == "=": + # + # Skip the initialization of the variable + # + token = self.token() + if token[0] == 'sep' and token[1] == '{': + token = self.token() + token = self.parseBlock(token) + else: + while token != None and (token[0] != "sep" or \ + (token[1] != ';' and token[1] != ',')): + token = self.token() + if token == None or token[0] != "sep" or (token[1] != ';' and + token[1] != ','): + self.error("missing ';' or ',' after value") + + if token != None and token[0] == "sep": + if token[1] == ";": + if type == "struct": + self.index_add(self.name, self.filename, + not self.is_header, "struct", self.struct_fields) + else: + info = self.parseSimpleComment(self.name, True) + self.index_add(self.name, self.filename, + not self.is_header, "variable", type, info) + self.comment = None + token = self.token() + break + elif token[1] == "(": + token = self.token() + token = self.parseSignature(token) + if token == None: + return None + if token[0] == "sep" and token[1] == ";": + d = self.mergeFunctionComment(self.name, + ((type, None), self.signature), 1) + self.index_add(self.name, self.filename, static, + "function", d) + self.comment = None + token = self.token() + elif token[0] == "sep" and token[1] == "{": + d = self.mergeFunctionComment(self.name, + ((type, None), self.signature), static) + self.index_add(self.name, self.filename, static, + "function", d) + self.comment = None + token = self.token() + token = self.parseBlock(token); + elif token[1] == ',': + self.index_add(self.name, self.filename, static, + "variable", type) + self.comment = None + type = type_orig + token = self.token() + while token != None and token[0] == "sep": + type = type + token[1] + token = self.token() + if token != None and token[0] == "name": + self.name = token[1] + token = self.token() + else: + break + + return token def parse(self): self.warning("Parsing %s" % (self.filename)) token = self.token() - while token != None: + while token != None: if token[0] == 'name': - token = self.parseGlobal(token) + token = self.parseGlobal(token) else: - self.error("token %s %s unexpected at the top level" % ( - token[0], token[1])) - token = self.parseGlobal(token) - return - self.parseTopComment(self.top_comment) + self.error("token %s %s unexpected at the top level" % ( + token[0], token[1])) + token = self.parseGlobal(token) + return + self.parseTopComment(self.top_comment) return self.index - + class docBuilder: """A documentation builder""" def __init__(self, name, directories=['.'], excludes=[]): self.name = name self.directories = directories - self.excludes = excludes + ignored_files.keys() - self.modules = {} - self.headers = {} - self.idx = index() - self.xref = {} - self.index = {} - if name == 'libxml2': - self.basename = 'libxml' - else: - self.basename = name - - def indexString(self, id, str): - if str == None: - return - str = string.replace(str, "'", ' ') - str = string.replace(str, '"', ' ') - str = string.replace(str, "/", ' ') - str = string.replace(str, '*', ' ') - str = string.replace(str, "[", ' ') - str = string.replace(str, "]", ' ') - str = string.replace(str, "(", ' ') - str = string.replace(str, ")", ' ') - str = string.replace(str, "<", ' ') - str = string.replace(str, '>', ' ') - str = string.replace(str, "&", ' ') - str = string.replace(str, '#', ' ') - str = string.replace(str, ",", ' ') - str = string.replace(str, '.', ' ') - str = string.replace(str, ';', ' ') - tokens = string.split(str) - for token in tokens: - try: - c = token[0] - if string.find(string.letters, c) < 0: - pass - elif len(token) < 3: - pass - else: - lower = string.lower(token) - # TODO: generalize this a bit - if lower == 'and' or lower == 'the': - pass - elif self.xref.has_key(token): - self.xref[token].append(id) - else: - self.xref[token] = [id] - except: - pass + self.excludes = excludes + list(ignored_files.keys()) + self.modules = {} + self.headers = {} + self.idx = index() + self.index = {} + if name == 'libxml2': + self.basename = 'libxml' + else: + self.basename = name + self.outdir = None def analyze(self): - print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys())) - self.idx.analyze() + print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys())))) + self.idx.analyze() def scanHeaders(self): - for header in self.headers.keys(): - parser = CParser(header) - idx = parser.parse() - self.headers[header] = idx; - self.idx.merge(idx) + for header in list(self.headers.keys()): + parser = CParser(header) + idx = parser.parse() + self.headers[header] = idx; + self.idx.merge(idx) def scanModules(self): - for module in self.modules.keys(): - parser = CParser(module) - idx = parser.parse() - # idx.analyze() - self.modules[module] = idx - self.idx.merge_public(idx) + for module in list(self.modules.keys()): + parser = CParser(module) + idx = parser.parse() + # idx.analyze() + self.modules[module] = idx + self.idx.merge_public(idx) def scan(self): for directory in self.directories: - files = glob.glob(directory + "/*.c") - for file in files: - skip = 0 - for excl in self.excludes: - if string.find(file, excl) != -1: - skip = 1; - break - if skip == 0: - self.modules[file] = None; - files = glob.glob(directory + "/*.h") - for file in files: - skip = 0 - for excl in self.excludes: - if string.find(file, excl) != -1: - skip = 1; - break - if skip == 0: - self.headers[file] = None; - self.scanHeaders() - self.scanModules() - + files = glob.glob(directory + "/*.c") + for file in files: + skip = 0 + for excl in self.excludes: + if file.find(excl) != -1: + print("Skipping %s" % file) + skip = 1 + break + if skip == 0: + self.modules[file] = None; + files = glob.glob(directory + "/*.h") + for file in files: + skip = 0 + for excl in self.excludes: + if file.find(excl) != -1: + print("Skipping %s" % file) + skip = 1 + break + if skip == 0: + self.headers[file] = None; + self.scanHeaders() + self.scanModules() + def modulename_file(self, file): module = os.path.basename(file) - if module[-2:] == '.h': - module = module[:-2] - return module + if module[-2:] == '.h': + module = module[:-2] + elif module[-2:] == '.c': + module = module[:-2] + return module def serialize_enum(self, output, name): id = self.idx.enums[name] output.write(" \n") def serialize_macro(self, output, name): id = self.idx.macros[name] output.write(" \n" % (name, - self.modulename_file(id.module))) - if id.info != None: + self.modulename_file(id.header))) + if id.info != None: try: - (args, desc) = id.info - if desc != None and desc != "": - output.write(" %s\n" % (escape(desc))) - self.indexString(name, desc) - for arg in args: - (name, desc) = arg - if desc != None and desc != "": - output.write(" \n" % ( - name, escape(desc))) - self.indexString(name, desc) - else: - output.write(" \n" % (name)) + (args, desc) = id.info + if desc != None and desc != "": + output.write(" %s\n" % (escape(desc))) + for arg in args: + (name, desc) = arg + if desc != None and desc != "": + output.write(" \n" % ( + name, escape(desc))) + else: + output.write(" \n" % (name)) except: pass output.write(" \n") def serialize_typedef(self, output, name): id = self.idx.typedefs[name] - if id.info[0:7] == 'struct ': - output.write(" \n"); - try: - for field in self.idx.structs[name].info: - desc = field[2] - self.indexString(name, desc) - if desc == None: - desc = '' - else: - desc = escape(desc) - output.write(" \n" % (field[1] , field[0], desc)) - except: - print "Failed to serialize struct %s" % (name) - output.write(" \n") - else: - output.write("/>\n"); - else : - output.write(" \n" % ( - name, self.modulename_file(id.module), id.info)) + if id.info[0:7] == 'struct ': + output.write(" \n"); + try: + for field in self.idx.structs[name].info: + desc = field[2] + if desc == None: + desc = '' + else: + desc = escape(desc) + output.write(" \n" % (field[1] , field[0], desc)) + except: + print("Failed to serialize struct %s" % (name)) + output.write(" \n") + else: + output.write("/>\n"); + else : + output.write(" \n %s\n" % (escape(desc))) + output.write(" \n") + else: + output.write("/>\n") + except: + output.write("/>\n") def serialize_variable(self, output, name): id = self.idx.variables[name] - if id.info != None: - output.write(" \n" % ( - name, self.modulename_file(id.module), id.info)) - else: - output.write(" \n" % ( - name, self.modulename_file(id.module))) - + if id.info != None: + output.write(" \n %s\n" % (escape(desc))) + output.write(" \n") + else: + output.write("/>\n") + def serialize_function(self, output, name): id = self.idx.functions[name] - output.write(" <%s name='%s' file='%s'>\n" % (id.type, name, - self.modulename_file(id.module))) - try: - (ret, params, desc) = id.info - output.write(" %s\n" % (escape(desc))) - self.indexString(name, desc) - if ret[0] != None: - if ret[0] == "void": - output.write(" \n") - else: - output.write(" \n" % ( - ret[0], escape(ret[1]))) - self.indexString(name, ret[1]) - for param in params: - if param[0] == 'void': - continue - if param[2] == None: - output.write(" \n" % (param[1], param[0])) - else: - output.write(" \n" % (param[1], param[0], escape(param[2]))) - self.indexString(name, param[2]) - except: - print "Failed to save function %s info: " % name, `id.info` + if name == debugsym: + print("=>", id) + + output.write(" <%s name='%s' file='%s' module='%s'>\n" % (id.type, + name, self.modulename_file(id.header), + self.modulename_file(id.module))) + # + # Processing of conditionals modified by Bill 1/1/05 + # + if id.conditionals != None: + apstr = "" + for cond in id.conditionals: + if apstr != "": + apstr = apstr + " && " + apstr = apstr + cond + output.write(" %s\n"% (apstr)); + try: + (ret, params, desc) = id.info + if (desc == None or desc == '') and \ + name[0:9] != "xmlThrDef" and name != "xmlDllMain": + print("%s %s from %s has no description" % (id.type, name, + self.modulename_file(id.module))) + + output.write(" %s\n" % (escape(desc))) + if ret[0] != None: + if ret[0] == "void": + output.write(" \n") + else: + output.write(" \n" % ( + ret[0], escape(ret[1]))) + for param in params: + if param[0] == 'void': + continue + if param[2] == None: + output.write(" \n" % (param[1], param[0])) + else: + output.write(" \n" % (param[1], param[0], escape(param[2]))) + except: + print("Failed to save function %s info: " % name, repr(id.info)) output.write(" \n" % (id.type)) def serialize_exports(self, output, file): module = self.modulename_file(file) - output.write(" \n" % (module)) - dict = self.headers[file] - if dict.info != None: - for data in ('Summary', 'Description', 'Author'): - try: - output.write(" <%s>%s\n" % ( - string.lower(data), - escape(dict.info[data]), - string.lower(data))) - except: - print "Header %s lacks a %s description" % (module, data) - if dict.info.has_key('Description'): - desc = dict.info['Description'] - if string.find(desc, "DEPRECATED") != -1: - output.write(" \n") - - ids = dict.macros.keys() - ids.sort() - for id in uniq(ids): - # Macros are sometime used to masquerade other types. - if dict.functions.has_key(id): - continue - if dict.variables.has_key(id): - continue - if dict.typedefs.has_key(id): - continue - if dict.structs.has_key(id): - continue - if dict.enums.has_key(id): - continue - output.write(" \n" % (id)) - ids = dict.enums.keys() - ids.sort() - for id in uniq(ids): - output.write(" \n" % (id)) - ids = dict.typedefs.keys() - ids.sort() - for id in uniq(ids): - output.write(" \n" % (id)) - ids = dict.structs.keys() - ids.sort() - for id in uniq(ids): - output.write(" \n" % (id)) - ids = dict.variables.keys() - ids.sort() - for id in uniq(ids): - output.write(" \n" % (id)) - ids = dict.functions.keys() - ids.sort() - for id in uniq(ids): - output.write(" \n" % (id)) - output.write(" \n") - - def serialize_xrefs_files(self, output): - headers = self.headers.keys() - headers.sort() - for file in headers: - module = self.modulename_file(file) - output.write(" \n" % (module)) - dict = self.headers[file] - ids = uniq(dict.functions.keys() + dict.variables.keys() + \ - dict.macros.keys() + dict.typedefs.keys() + \ - dict.structs.keys() + dict.enums.keys()) - ids.sort() - for id in ids: - output.write(" \n" % (id)) - output.write(" \n") - pass - - def serialize_xrefs_functions(self, output): - funcs = {} - for name in self.idx.functions.keys(): - id = self.idx.functions[name] - try: - (ret, params, desc) = id.info - for param in params: - if param[0] == 'void': - continue - if funcs.has_key(param[0]): - funcs[param[0]].append(name) - else: - funcs[param[0]] = [name] - except: - pass - typ = funcs.keys() - typ.sort() - for type in typ: - if type == '' or type == 'void' or type == "int" or \ - type == "char *" or type == "const char *" : - continue - output.write(" \n" % (type)) - ids = funcs[type] - ids.sort() - pid = '' # not sure why we have dups, but get rid of them! - for id in ids: - if id != pid: - output.write(" \n" % (id)) - pid = id - output.write(" \n") - - def serialize_xrefs_constructors(self, output): - funcs = {} - for name in self.idx.functions.keys(): - id = self.idx.functions[name] - try: - (ret, params, desc) = id.info - if ret[0] == "void": - continue - if funcs.has_key(ret[0]): - funcs[ret[0]].append(name) - else: - funcs[ret[0]] = [name] - except: - pass - typ = funcs.keys() - typ.sort() - for type in typ: - if type == '' or type == 'void' or type == "int" or \ - type == "char *" or type == "const char *" : - continue - output.write(" \n" % (type)) - ids = funcs[type] - ids.sort() - for id in ids: - output.write(" \n" % (id)) - output.write(" \n") - - def serialize_xrefs_alpha(self, output): - letter = None - ids = self.idx.identifiers.keys() - ids.sort() - for id in ids: - if id[0] != letter: - if letter != None: - output.write(" \n") - letter = id[0] - output.write(" \n" % (letter)) - output.write(" \n" % (id)) - if letter != None: - output.write(" \n") - - def serialize_xrefs_references(self, output): - typ = self.idx.identifiers.keys() - typ.sort() - for id in typ: - idf = self.idx.identifiers[id] - module = idf.module - output.write(" \n" % (id, - 'html/' + self.basename + '-' + - self.modulename_file(module) + '.html#' + - id)) - - def serialize_xrefs_index(self, output): - index = self.xref - typ = index.keys() - typ.sort() - letter = None - count = 0 - chunk = 0 - chunks = [] - for id in typ: - if len(index[id]) > 30: - continue - if id[0] != letter: - if letter == None or count > 200: - if letter != None: - output.write(" \n") - output.write(" \n") - count = 0 - chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) - output.write(" \n" % (chunk)) - first_letter = id[0] - chunk = chunk + 1 - elif letter != None: - output.write(" \n") - letter = id[0] - output.write(" \n" % (letter)) - output.write(" \n" % (id)) - tokens = index[id]; - tokens.sort() - tok = None - for token in tokens: - if tok == token: - continue - tok = token - output.write(" \n" % (token)) - count = count + 1 - output.write(" \n") - if letter != None: - output.write(" \n") - output.write(" \n") - if count != 0: - chunks.append(["chunk%s" % (chunk -1), first_letter, letter]) - output.write(" \n") - for ch in chunks: - output.write(" \n" % ( - ch[0], ch[1], ch[2])) - output.write(" \n") - - def serialize_xrefs(self, output): - output.write(" \n") - self.serialize_xrefs_references(output) - output.write(" \n") - output.write(" \n") - self.serialize_xrefs_alpha(output) - output.write(" \n") - output.write(" \n") - self.serialize_xrefs_constructors(output) - output.write(" \n") - output.write(" \n") - self.serialize_xrefs_functions(output) - output.write(" \n") - output.write(" \n") - self.serialize_xrefs_files(output) - output.write(" \n") - output.write(" \n") - self.serialize_xrefs_index(output) - output.write(" \n") - - def serialize(self, outdir): - filename = outdir + "%s-api.xml" % self.name - print "Saving XML description %s" % (filename) + output.write(" \n" % (module)) + dict = self.headers[file] + if dict.info != None: + for data in ('Summary', 'Description', 'Author'): + try: + output.write(" <%s>%s\n" % ( + data.lower(), + escape(dict.info[data]), + data.lower())) + except: + print("Header %s lacks a %s description" % (module, data)) + if 'Description' in dict.info: + desc = dict.info['Description'] + if desc.find("DEPRECATED") != -1: + output.write(" \n") + + ids = list(dict.macros.keys()) + ids.sort() + for id in ids: + # Macros are sometime used to masquerade other types. + if id in dict.functions: + continue + if id in dict.variables: + continue + if id in dict.typedefs: + continue + if id in dict.structs: + continue + if id in dict.enums: + continue + output.write(" \n" % (id)) + ids = list(dict.enums.keys()) + ids.sort() + for id in ids: + output.write(" \n" % (id)) + ids = list(dict.typedefs.keys()) + ids.sort() + for id in ids: + output.write(" \n" % (id)) + ids = list(dict.structs.keys()) + ids.sort() + for id in ids: + output.write(" \n" % (id)) + ids = list(dict.variables.keys()) + ids.sort() + for id in ids: + output.write(" \n" % (id)) + ids = list(dict.functions.keys()) + ids.sort() + for id in ids: + output.write(" \n" % (id)) + output.write(" \n") + + def serialize(self): + filename = "%s-api.xml" % self.name + if self.outdir: + filename = "%s/%s" % (self.outdir, filename) + print("Saving XML description %s" % (filename)) output = open(filename, "w") output.write('\n') output.write("\n" % self.name) output.write(" \n") - headers = self.headers.keys() + headers = list(self.headers.keys()) headers.sort() for file in headers: self.serialize_exports(output, file) output.write(" \n") output.write(" \n") - macros = self.idx.macros.keys() + macros = list(self.idx.macros.keys()) macros.sort() for macro in macros: self.serialize_macro(output, macro) - enums = self.idx.enums.keys() + enums = list(self.idx.enums.keys()) enums.sort() for enum in enums: self.serialize_enum(output, enum) - typedefs = self.idx.typedefs.keys() + typedefs = list(self.idx.typedefs.keys()) typedefs.sort() for typedef in typedefs: self.serialize_typedef(output, typedef) - variables = self.idx.variables.keys() + variables = list(self.idx.variables.keys()) variables.sort() for variable in variables: self.serialize_variable(output, variable) - functions = self.idx.functions.keys() + functions = list(self.idx.functions.keys()) functions.sort() for function in functions: self.serialize_function(output, function) @@ -1866,41 +1890,33 @@ class docBuilder: output.write("\n") output.close() - filename = outdir + "%s-refs.xml" % self.name - print "Saving XML Cross References %s" % (filename) - output = open(filename, "w") - output.write('\n') - output.write("\n" % self.name) - self.serialize_xrefs(output) - output.write("\n") - output.close() - def rebuild(): builder = None if glob.glob("parser.c") != [] : - print "Rebuilding API description for libxml2" - builder = docBuilder("libxml2", [".", "."], - ["xmlwin32version.h", "tst.c"]) + print("Rebuilding API description for libxml2") + builder = docBuilder("libxml2", [".", "."], + ["tst.c"]) elif glob.glob("../parser.c") != [] : - print "Rebuilding API description for libxml2" - builder = docBuilder("libxml2", ["..", "../include/libxml"], - ["xmlwin32version.h", "tst.c"]) + print("Rebuilding API description for libxml2") + builder = docBuilder("libxml2", ["..", "../include/libxml"], + ["tst.c"]) elif glob.glob("../libxslt/transform.c") != [] : - print "Rebuilding API description for libxslt" - builder = docBuilder("libxslt", ["../libxslt"], - ["win32config.h", "libxslt.h", "tst.c"]) + print("Rebuilding API description for libxslt") + builder = docBuilder("libxslt", ["../libxslt"], + ["win32config.h", "libxslt.h", "tst.c"]) else: - print "rebuild() failed, unable to guess the module" - return None + print("rebuild() failed, unable to guess the module") + return None builder.scan() builder.analyze() - builder.serialize("./") + builder.serialize() if glob.glob("../libexslt/exslt.c") != [] : extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"]) - extra.scan() - extra.analyze() - extra.serialize("EXSLT/") + extra.outdir = "EXSLT" + extra.scan() + extra.analyze() + extra.serialize() return builder # @@ -1912,4 +1928,8 @@ def parse(filename): return idx if __name__ == "__main__": - rebuild() + if len(sys.argv) > 1: + debug = 1 + parse(sys.argv[1]) + else: + rebuild() -- cgit v1.2.1