diff options
author | Dirk Baechle <dl9obn@darc.de> | 2013-05-04 00:01:09 +0200 |
---|---|---|
committer | Dirk Baechle <dl9obn@darc.de> | 2013-05-04 00:01:09 +0200 |
commit | ab18361bfaedcc3247a78a66522f314d0d9ad708 (patch) | |
tree | a70fac176ccfba4a95aa971a84a1e63b15a1f2e9 /bin/SConsDoc.py | |
parent | da92ab79fa66c92291f72119c7e081c60e9b9ca5 (diff) | |
download | scons-ab18361bfaedcc3247a78a66522f314d0d9ad708.tar.gz |
- started to implement new XML tools
- prepared introduction of SCons XSD namespace
Diffstat (limited to 'bin/SConsDoc.py')
-rw-r--r-- | bin/SConsDoc.py | 421 |
1 files changed, 232 insertions, 189 deletions
diff --git a/bin/SConsDoc.py b/bin/SConsDoc.py index 4927dc04..8889923f 100644 --- a/bin/SConsDoc.py +++ b/bin/SConsDoc.py @@ -112,7 +112,173 @@ import imp import os.path import re import sys -import xml.sax.handler + +# Do we have libxml2/libxslt/lxml? +has_libxml2 = True +has_lxml = True +try: + import libxml2 + import libxslt +except: + has_libxml2 = False +try: + import lxml +except: + has_lxml = False + + +re_entity = re.compile("\&([^;]+);") + +entity_header = """<!DOCTYPE sconsdoc [ +<!ENTITY % scons SYSTEM 'scons.mod'> +%scons; +<!ENTITY % builders-mod SYSTEM 'builders.mod'> +%builders-mod; +<!ENTITY % functions-mod SYSTEM 'functions.mod'> +%functions-mod; +<!ENTITY % tools-mod SYSTEM 'tools.mod'> +%tools-mod; +<!ENTITY % variables-mod SYSTEM 'variables.mod'> +%variables-mod; +]>""" + +# Namespace for the SCons Docbook XSD +dbxsd="http://www.scons.org/dbxsd/v1.0" + +xml_header = """<?xml version="1.0" encoding="UTF-8"?> +<!-- +__COPYRIGHT__ + +This file is processed by the bin/SConsDoc.py module. +See its __doc__ string for a discussion of the format. +--> + +%s + +<sconsdoc xmlns="http://www.scons.org/dbxsd/v1.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="%s scons.xsd"> +""" % (entity_header, dbxsd) + +def remove_entities(content): + # Cut out entity inclusions + content = content.replace(entity_header, "") + # Cut out entities themselves + content = re_entity.sub(lambda match: match.group(1), content) + + return content + +default_xsd = os.path.join('doc','xsd','scons.xsd') + +def validate_xml(fpath, xmlschema_context): + if not has_libxml2: + # At the moment we prefer libxml2 over lxml, the latter can lead + # to conflicts when installed together with libxml2. + if has_lxml: + # Use lxml + from lxml import etree + xmlschema = etree.XMLSchema(xmlschema_context) + doc = etree.parse(fpath) + try: + xmlschema.assertValid(doc) + except: + return False + return True + else: + # Try xmllint as a last fallback + try: + import subprocess + p = subprocess.Popen(['xmllint','--noout','--noent','--schema',default_xsd,fpath], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + sout, serr = p.communicate() + if serr and not 'validates' in serr: + print serr + return False + + return True + + except: + print "Can't validate %s! Neither lxml/libxml2, nor xmllint found." % fpath + return False + + # Read file and resolve entities + doc = libxml2.readFile(fpath, None, libxml2.XML_PARSE_NOENT) + err = xmlschema_context.schemaValidateDoc(doc) + # Cleanup + doc.freeDoc() + + if err: + # TODO: print error message "Haha",err + return False + + return True + +perc="%" + +def validate_all_xml(dpath='src', xsdfile=default_xsd): + xmlschema_context = None + if not has_libxml2: + # At the moment we prefer libxml2 over lxml, the latter can lead + # to conflicts when installed together with libxml2. + if has_lxml: + # Use lxml + from lxml import etree + xmlschema_context = etree.parse(xsdfile) + else: + # Use libxml2 and prepare the schema validation context + ctxt = libxml2.schemaNewParserCtxt(xsdfile) + schema = ctxt.schemaParse() + del ctxt + xmlschema_context = schema.schemaNewValidCtxt() + + fpaths = [] + for path, dirs, files in os.walk(dpath): + for f in files: + if f.endswith('.xml'): + fp = os.path.join(path, f) + fpaths.append(fp) + + fails = [] + for idx, fp in enumerate(fpaths): + fpath = os.path.join(path, f) + print "%.2f%s (%d/%d) %s" % (float(idx+1)*100.0/float(len(fpaths)), + perc, idx+1, len(fpaths),fp) + + if not validate_xml(fp, xmlschema_context): + fails.append(fp) + continue + + if has_libxml2: + # Cleanup + del xmlschema_context + del schema + + if fails: + return False + + return True + +try: + from lxml import etree +except ImportError: + try: + # Python 2.5 + import xml.etree.cElementTree as etree + except ImportError: + try: + # Python 2.5 + import xml.etree.ElementTree as etree + except ImportError: + try: + # normal cElementTree install + import cElementTree as etree + except ImportError: + try: + # normal ElementTree install + import elementtree.ElementTree as etree + except ImportError: + print("Failed to import ElementTree from any known place") + sys.exit(1) class Item(object): def __init__(self, name): @@ -121,8 +287,8 @@ class Item(object): if self.sort_name[0] == '_': self.sort_name = self.sort_name[1:] self.summary = [] - self.sets = None - self.uses = None + self.sets = [] + self.uses = [] def cmp_name(self, name): if name[0] == '_': name = name[1:] @@ -175,201 +341,78 @@ class Arguments(object): def append(self, data): self.body.append(data) -class Summary(object): +class SConsDocHandler(object): def __init__(self): - self.body = [] - self.collect = [] - def append(self, data): - self.collect.append(data) - def end_para(self): - text = ''.join(self.collect) - paras = text.split('\n\n') - if paras == ['\n']: - return - if paras[0] == '': - self.body.append('\n') - paras = paras[1:] - paras[0] = '\n' + paras[0] - if paras[-1] == '': - paras = paras[:-1] - paras[-1] = paras[-1] + '\n' - last = '\n' - else: - last = None - sep = None - for p in paras: - c = Chunk("para", p) - if sep: - self.body.append(sep) - self.body.append(c) - sep = '\n' - if last: - self.body.append(last) - def begin_chunk(self, chunk): - self.end_para() - self.collect = chunk - def end_chunk(self): - self.body.append(self.collect) - self.collect = [] - -class SConsDocHandler(xml.sax.handler.ContentHandler, - xml.sax.handler.ErrorHandler): - def __init__(self): - self._start_dispatch = {} - self._end_dispatch = {} - keys = list(self.__class__.__dict__.keys()) - start_tag_method_names = [k for k in keys if k[:6] == 'start_'] - end_tag_method_names = [k for k in keys if k[:4] == 'end_'] - for method_name in start_tag_method_names: - tag = method_name[6:] - self._start_dispatch[tag] = getattr(self, method_name) - for method_name in end_tag_method_names: - tag = method_name[4:] - self._end_dispatch[tag] = getattr(self, method_name) - self.stack = [] - self.collect = [] - self.current_object = [] self.builders = {} self.functions = {} self.tools = {} self.cvars = {} - def startElement(self, name, attrs): - try: - start_element_method = self._start_dispatch[name] - except KeyError: - self.characters('<%s>' % name) - else: - start_element_method(attrs) + def parseText(self, root): + txt = "" + for e in root.childNodes: + if (e.nodeType == e.TEXT_NODE): + txt += e.data + return txt - def endElement(self, name): - try: - end_element_method = self._end_dispatch[name] - except KeyError: - self.characters('</%s>' % name) - else: - end_element_method() - - # - # - def characters(self, chars): - self.collect.append(chars) - - def begin_collecting(self, chunk): - self.collect = chunk - def end_collecting(self): - self.collect = [] - - def begin_chunk(self): - pass - def end_chunk(self): - pass - - # - # - # - - def begin_xxx(self, obj): - self.stack.append(self.current_object) - self.current_object = obj - def end_xxx(self): - self.current_object = self.stack.pop() - - # - # - # - def start_scons_doc(self, attrs): - pass - def end_scons_doc(self): - pass - - def start_builder(self, attrs): - name = attrs.get('name') - try: - builder = self.builders[name] - except KeyError: - builder = Builder(name) - self.builders[name] = builder - self.begin_xxx(builder) - def end_builder(self): - self.end_xxx() - - def start_scons_function(self, attrs): - name = attrs.get('name') - try: - function = self.functions[name] - except KeyError: - function = Function(name) - self.functions[name] = function - self.begin_xxx(function) - def end_scons_function(self): - self.end_xxx() - - def start_tool(self, attrs): - name = attrs.get('name') - try: - tool = self.tools[name] - except KeyError: - tool = Tool(name) - self.tools[name] = tool - self.begin_xxx(tool) - def end_tool(self): - self.end_xxx() - - def start_cvar(self, attrs): - name = attrs.get('name') + def parseItems(self, domelem): + items = [] + + for i in domelem.iterchildren(tag="item"): + items.append(self.parseText(i)) + + return items + + def parseUsesSets(self, domelem): + uses = [] + sets = [] + + for u in domelem.iterchildren(tag="uses"): + uses.extend(self.parseItems(u)) + for s in domelem.iterchildren(tag="sets"): + sets.extend(self.parseItems(s)) + + return sorted(uses), sorted(sets) + + def parseInstance(self, domelem, map, Class): + name = domelem.attrib.get('name','unknown') try: - cvar = self.cvars[name] + instance = map[name] except KeyError: - cvar = ConstructionVariable(name) - self.cvars[name] = cvar - self.begin_xxx(cvar) - def end_cvar(self): - self.end_xxx() - - def start_arguments(self, attrs): - arguments = Arguments(attrs.get('signature', "both")) - self.current_object.arguments.append(arguments) - self.begin_xxx(arguments) - self.begin_collecting(arguments) - def end_arguments(self): - self.end_xxx() - - def start_summary(self, attrs): - summary = Summary() - self.current_object.summary = summary - self.begin_xxx(summary) - self.begin_collecting(summary) - def end_summary(self): - self.current_object.end_para() - self.end_xxx() - - def start_example(self, attrs): - example = Chunk("programlisting") - self.current_object.begin_chunk(example) - def end_example(self): - self.current_object.end_chunk() - - def start_uses(self, attrs): - self.begin_collecting([]) - def end_uses(self): - self.current_object.uses = sorted(''.join(self.collect).split()) - self.end_collecting() - - def start_sets(self, attrs): - self.begin_collecting([]) - def end_sets(self): - self.current_object.sets = sorted(''.join(self.collect).split()) - self.end_collecting() - - # Stuff for the ErrorHandler portion. - def error(self, exception): - linenum = exception._linenum - self.preamble_lines - sys.stderr.write('%s:%d:%d: %s (error)\n' % (self.filename, linenum, exception._colnum, ''.join(exception.args))) - - def fatalError(self, exception): - linenum = exception._linenum - self.preamble_lines - sys.stderr.write('%s:%d:%d: %s (fatalError)\n' % (self.filename, linenum, exception._colnum, ''.join(exception.args))) + instance = Class(name) + map[name] = instance + uses, sets = self.parseUsesSets(domelem) + instance.uses.extend(uses) + instance.sets.extend(sets) + # Parse summary and function blobs + + def parseDomtree(self, root): + # Process Builders + for b in root.iterchildren(tag="{%s}builder" % dbxsd): + self.parseInstance(b, self.builders, Builder) + # Process Functions + for f in root.iterchildren(tag="{%s}scons_function" % dbxsd): + self.parseInstance(f, self.functions, Function) + # Process Tools + for t in root.iterchildren(tag="{%s}tool" % dbxsd): + self.parseInstance(t, self.tools, Tool) + # Process CVars + for c in root.iterchildren(tag="{%s}cvar" % dbxsd): + self.parseInstance(c, self.cvars, ConstructionVariable) + + def parseContent(self, content, include_entities=True): + if not include_entities: + content = remove_entities(content) + # Create domtree from given content string + root = etree.fromstring(content) + # Parse it + self.parseDomtree(root) + + def parseXmlFile(self, fpath): + # Create domtree from file + domtree = etree.parse(fpath) + # Parse it + self.parseDomtree(domtree.getroot()) def set_file_info(self, filename, preamble_lines): self.filename = filename |