- started to implement new XML tools

- prepared introduction of SCons XSD namespace
author: Dirk Baechle <dl9obn@darc.de> 2013-05-04 00:01:09 +0200
committer: Dirk Baechle <dl9obn@darc.de> 2013-05-04 00:01:09 +0200
commit: ab18361bfaedcc3247a78a66522f314d0d9ad708 (patch)
tree: a70fac176ccfba4a95aa971a84a1e63b15a1f2e9 /bin/SConsDoc.py
parent: da92ab79fa66c92291f72119c7e081c60e9b9ca5 (diff)
download: scons-ab18361bfaedcc3247a78a66522f314d0d9ad708.tar.gz
1 files changed, 232 insertions, 189 deletions
diff --git a/bin/SConsDoc.py b/bin/SConsDoc.py
index 4927dc04..8889923f 100644
--- a/bin/SConsDoc.py
+++ b/bin/SConsDoc.py
@@ -112,7 +112,173 @@ import imp
 import os.path
 import re
 import sys
-import xml.sax.handler
+
+# Do we have libxml2/libxslt/lxml?
+has_libxml2 = True
+has_lxml = True
+try:
+    import libxml2
+    import libxslt
+except:
+    has_libxml2 = False
+try:
+    import lxml
+except:
+    has_lxml = False
+
+
+re_entity = re.compile("\&([^;]+);")
+
+entity_header = """<!DOCTYPE sconsdoc [
+<!ENTITY % scons SYSTEM 'scons.mod'>
+%scons;
+<!ENTITY % builders-mod SYSTEM 'builders.mod'>
+%builders-mod;
+<!ENTITY % functions-mod SYSTEM 'functions.mod'>
+%functions-mod;
+<!ENTITY % tools-mod SYSTEM 'tools.mod'>
+%tools-mod;
+<!ENTITY % variables-mod SYSTEM 'variables.mod'>
+%variables-mod;
+]>"""
+
+# Namespace for the SCons Docbook XSD
+dbxsd="http://www.scons.org/dbxsd/v1.0"
+
+xml_header = """<?xml version="1.0" encoding="UTF-8"?>
+<!--
+__COPYRIGHT__
+
+This file is processed by the bin/SConsDoc.py module.
+See its __doc__ string for a discussion of the format.
+-->
+
+%s
+
+<sconsdoc xmlns="http://www.scons.org/dbxsd/v1.0"
+          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+          xsi:schemaLocation="%s scons.xsd">
+""" % (entity_header, dbxsd)
+
+def remove_entities(content):
+    # Cut out entity inclusions
+    content = content.replace(entity_header, "")
+    # Cut out entities themselves
+    content = re_entity.sub(lambda match: match.group(1), content)
+    
+    return content
+
+default_xsd = os.path.join('doc','xsd','scons.xsd')
+
+def validate_xml(fpath, xmlschema_context):
+    if not has_libxml2:
+        # At the moment we prefer libxml2 over lxml, the latter can lead
+        # to conflicts when installed together with libxml2.
+        if has_lxml:
+            # Use lxml
+            from lxml import etree
+            xmlschema = etree.XMLSchema(xmlschema_context)
+            doc = etree.parse(fpath)
+            try:
+                xmlschema.assertValid(doc)
+            except:
+                return False
+            return True
+        else:
+            # Try xmllint as a last fallback
+            try:
+                import subprocess
+                p = subprocess.Popen(['xmllint','--noout','--noent','--schema',default_xsd,fpath],
+                                     stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                sout, serr = p.communicate()
+                if serr and not 'validates' in serr:
+                    print serr
+                    return False
+                
+                return True
+                
+            except:
+                print "Can't validate %s! Neither lxml/libxml2, nor xmllint found." % fpath
+                return False
+
+    # Read file and resolve entities
+    doc = libxml2.readFile(fpath, None, libxml2.XML_PARSE_NOENT)
+    err = xmlschema_context.schemaValidateDoc(doc)
+    # Cleanup
+    doc.freeDoc()
+    
+    if err:
+        # TODO: print error message "Haha",err
+        return False
+        
+    return True
+
+perc="%"
+
+def validate_all_xml(dpath='src', xsdfile=default_xsd):
+    xmlschema_context = None
+    if not has_libxml2:
+        # At the moment we prefer libxml2 over lxml, the latter can lead
+        # to conflicts when installed together with libxml2.
+        if has_lxml:
+            # Use lxml
+            from lxml import etree
+            xmlschema_context = etree.parse(xsdfile)
+    else:
+        # Use libxml2 and prepare the schema validation context
+        ctxt = libxml2.schemaNewParserCtxt(xsdfile)
+        schema = ctxt.schemaParse()
+        del ctxt
+        xmlschema_context = schema.schemaNewValidCtxt()
+    
+    fpaths = []
+    for path, dirs, files in os.walk(dpath):
+        for f in files:
+            if f.endswith('.xml'):
+                fp = os.path.join(path, f)
+                fpaths.append(fp)
+                
+    fails = []
+    for idx, fp in enumerate(fpaths):
+        fpath = os.path.join(path, f)
+        print "%.2f%s (%d/%d) %s" % (float(idx+1)*100.0/float(len(fpaths)),
+                                     perc, idx+1, len(fpaths),fp)
+                                              
+        if not validate_xml(fp, xmlschema_context):
+            fails.append(fp)
+            continue
+
+    if has_libxml2:
+        # Cleanup
+        del xmlschema_context
+        del schema
+
+    if fails:
+        return False
+    
+    return True
+
+try:
+  from lxml import etree
+except ImportError:
+  try:
+    # Python 2.5
+    import xml.etree.cElementTree as etree
+  except ImportError:
+    try:
+      # Python 2.5
+      import xml.etree.ElementTree as etree
+    except ImportError:
+      try:
+        # normal cElementTree install
+        import cElementTree as etree
+      except ImportError:
+        try:
+          # normal ElementTree install
+          import elementtree.ElementTree as etree
+        except ImportError:
+          print("Failed to import ElementTree from any known place")
+          sys.exit(1)
 
 class Item(object):
     def __init__(self, name):
@@ -121,8 +287,8 @@ class Item(object):
         if self.sort_name[0] == '_':
             self.sort_name = self.sort_name[1:]
         self.summary = []
-        self.sets = None
-        self.uses = None
+        self.sets = []
+        self.uses = []
     def cmp_name(self, name):
         if name[0] == '_':
             name = name[1:]
@@ -175,201 +341,78 @@ class Arguments(object):
     def append(self, data):
         self.body.append(data)
 
-class Summary(object):
+class SConsDocHandler(object):
     def __init__(self):
-        self.body = []
-        self.collect = []
-    def append(self, data):
-        self.collect.append(data)
-    def end_para(self):
-        text = ''.join(self.collect)
-        paras = text.split('\n\n')
-        if paras == ['\n']:
-            return
-        if paras[0] == '':
-            self.body.append('\n')
-            paras = paras[1:]
-            paras[0] = '\n' + paras[0]
-        if paras[-1] == '':
-            paras = paras[:-1]
-            paras[-1] = paras[-1] + '\n'
-            last = '\n'
-        else:
-            last = None
-        sep = None
-        for p in paras:
-            c = Chunk("para", p)
-            if sep:
-                self.body.append(sep)
-            self.body.append(c)
-            sep = '\n'
-        if last:
-            self.body.append(last)
-    def begin_chunk(self, chunk):
-        self.end_para()
-        self.collect = chunk
-    def end_chunk(self):
-        self.body.append(self.collect)
-        self.collect = []
-
-class SConsDocHandler(xml.sax.handler.ContentHandler,
-                      xml.sax.handler.ErrorHandler):
-    def __init__(self):
-        self._start_dispatch = {}
-        self._end_dispatch = {}
-        keys = list(self.__class__.__dict__.keys())
-        start_tag_method_names = [k for k in keys if k[:6] == 'start_']
-        end_tag_method_names = [k for k in keys if k[:4] == 'end_']
-        for method_name in start_tag_method_names:
-            tag = method_name[6:]
-            self._start_dispatch[tag] = getattr(self, method_name)
-        for method_name in end_tag_method_names:
-            tag = method_name[4:]
-            self._end_dispatch[tag] = getattr(self, method_name)
-        self.stack = []
-        self.collect = []
-        self.current_object = []
         self.builders = {}
         self.functions = {}
         self.tools = {}
         self.cvars = {}
 
-    def startElement(self, name, attrs):
-        try:
-            start_element_method = self._start_dispatch[name]
-        except KeyError:
-            self.characters('<%s>' % name)
-        else:
-            start_element_method(attrs)
+    def parseText(self, root):
+        txt = ""
+        for e in root.childNodes:
+            if (e.nodeType == e.TEXT_NODE):
+                txt += e.data
+        return txt
 
-    def endElement(self, name):
-        try:
-            end_element_method = self._end_dispatch[name]
-        except KeyError:
-            self.characters('</%s>' % name)
-        else:
-            end_element_method()
-
-    #
-    #
-    def characters(self, chars):
-        self.collect.append(chars)
-
-    def begin_collecting(self, chunk):
-        self.collect = chunk
-    def end_collecting(self):
-        self.collect = []
-
-    def begin_chunk(self):
-        pass
-    def end_chunk(self):
-        pass
-
-    #
-    #
-    #
-
-    def begin_xxx(self, obj):
-        self.stack.append(self.current_object)
-        self.current_object = obj
-    def end_xxx(self):
-        self.current_object = self.stack.pop()
-
-    #
-    #
-    #
-    def start_scons_doc(self, attrs):
-        pass
-    def end_scons_doc(self):
-        pass
-
-    def start_builder(self, attrs):
-        name = attrs.get('name')
-        try:
-            builder = self.builders[name]
-        except KeyError:
-            builder = Builder(name)
-            self.builders[name] = builder
-        self.begin_xxx(builder)
-    def end_builder(self):
-        self.end_xxx()
-
-    def start_scons_function(self, attrs):
-        name = attrs.get('name')
-        try:
-            function = self.functions[name]
-        except KeyError:
-            function = Function(name)
-            self.functions[name] = function
-        self.begin_xxx(function)
-    def end_scons_function(self):
-        self.end_xxx()
-
-    def start_tool(self, attrs):
-        name = attrs.get('name')
-        try:
-            tool = self.tools[name]
-        except KeyError:
-            tool = Tool(name)
-            self.tools[name] = tool
-        self.begin_xxx(tool)
-    def end_tool(self):
-        self.end_xxx()
-
-    def start_cvar(self, attrs):
-        name = attrs.get('name')
+    def parseItems(self, domelem):
+        items = []
+
+        for i in domelem.iterchildren(tag="item"):
+            items.append(self.parseText(i))
+
+        return items
+
+    def parseUsesSets(self, domelem):
+        uses = []
+        sets = []
+
+        for u in domelem.iterchildren(tag="uses"):
+            uses.extend(self.parseItems(u))
+        for s in domelem.iterchildren(tag="sets"):
+            sets.extend(self.parseItems(s))
+        
+        return sorted(uses), sorted(sets)
+
+    def parseInstance(self, domelem, map, Class):
+        name = domelem.attrib.get('name','unknown')
         try:
-            cvar = self.cvars[name]
+            instance = map[name]
         except KeyError:
-            cvar = ConstructionVariable(name)
-            self.cvars[name] = cvar
-        self.begin_xxx(cvar)
-    def end_cvar(self):
-        self.end_xxx()
-
-    def start_arguments(self, attrs):
-        arguments = Arguments(attrs.get('signature', "both"))
-        self.current_object.arguments.append(arguments)
-        self.begin_xxx(arguments)
-        self.begin_collecting(arguments)
-    def end_arguments(self):
-        self.end_xxx()
-
-    def start_summary(self, attrs):
-        summary = Summary()
-        self.current_object.summary = summary
-        self.begin_xxx(summary)
-        self.begin_collecting(summary)
-    def end_summary(self):
-        self.current_object.end_para()
-        self.end_xxx()
-
-    def start_example(self, attrs):
-        example = Chunk("programlisting")
-        self.current_object.begin_chunk(example)
-    def end_example(self):
-        self.current_object.end_chunk()
-
-    def start_uses(self, attrs):
-        self.begin_collecting([])
-    def end_uses(self):
-        self.current_object.uses = sorted(''.join(self.collect).split())
-        self.end_collecting()
-
-    def start_sets(self, attrs):
-        self.begin_collecting([])
-    def end_sets(self):
-        self.current_object.sets = sorted(''.join(self.collect).split())
-        self.end_collecting()
-
-    # Stuff for the ErrorHandler portion.
-    def error(self, exception):
-        linenum = exception._linenum - self.preamble_lines
-        sys.stderr.write('%s:%d:%d: %s (error)\n' % (self.filename, linenum, exception._colnum, ''.join(exception.args)))
-
-    def fatalError(self, exception):
-        linenum = exception._linenum - self.preamble_lines
-        sys.stderr.write('%s:%d:%d: %s (fatalError)\n' % (self.filename, linenum, exception._colnum, ''.join(exception.args)))
+            instance = Class(name)
+            map[name] = instance
+        uses, sets = self.parseUsesSets(domelem)
+        instance.uses.extend(uses)
+        instance.sets.extend(sets)
+        # Parse summary and function blobs
+        
+    def parseDomtree(self, root):    
+        # Process Builders
+        for b in root.iterchildren(tag="{%s}builder" % dbxsd):
+            self.parseInstance(b, self.builders, Builder)
+        # Process Functions
+        for f in root.iterchildren(tag="{%s}scons_function" % dbxsd):
+            self.parseInstance(f, self.functions, Function)
+        # Process Tools
+        for t in root.iterchildren(tag="{%s}tool" % dbxsd):
+            self.parseInstance(t, self.tools, Tool)
+        # Process CVars
+        for c in root.iterchildren(tag="{%s}cvar" % dbxsd):
+            self.parseInstance(c, self.cvars, ConstructionVariable)
+        
+    def parseContent(self, content, include_entities=True):
+        if not include_entities:
+            content = remove_entities(content)
+        # Create domtree from given content string
+        root = etree.fromstring(content)
+        # Parse it
+        self.parseDomtree(root)
+
+    def parseXmlFile(self, fpath):
+        # Create domtree from file
+        domtree = etree.parse(fpath)
+        # Parse it
+        self.parseDomtree(domtree.getroot())
 
     def set_file_info(self, filename, preamble_lines):
         self.filename = filename
author	Dirk Baechle <dl9obn@darc.de>	2013-05-04 00:01:09 +0200
committer	Dirk Baechle <dl9obn@darc.de>	2013-05-04 00:01:09 +0200
commit	ab18361bfaedcc3247a78a66522f314d0d9ad708 (patch)
tree	a70fac176ccfba4a95aa971a84a1e63b15a1f2e9 /bin/SConsDoc.py
parent	da92ab79fa66c92291f72119c7e081c60e9b9ca5 (diff)
download	scons-ab18361bfaedcc3247a78a66522f314d0d9ad708.tar.gz