mkhtml2: add a fake resolver to improve speed

This is a workaround for missing dtd caching in libxml2 that turns into extrem overhead for documents with many xincludes.
author: Stefan Sauer <ensonic@users.sf.net> 2019-12-23 12:07:23 +0100
committer: Stefan Sauer <ensonic@users.sf.net> 2019-12-23 12:21:02 +0100
commit: fe0ee24ab59c6d81fe219e9875fab6879d5646d2 (patch)
tree: 96d6756b5e620af45d3fa4a152cdc35a2a3221c9
parent: 643d6b00d54118adec92e38f039a31efc7f87c69 (diff)
download: gtk-doc-fe0ee24ab59c6d81fe219e9875fab6879d5646d2.tar.gz
1 files changed, 18 insertions, 4 deletions
diff --git a/gtkdoc/mkhtml2.py b/gtkdoc/mkhtml2.py
index f54a80d..2eb5c73 100644
--- a/gtkdoc/mkhtml2.py
+++ b/gtkdoc/mkhtml2.py
@@ -1751,6 +1751,21 @@ def create_devhelp2(out_dir, module, xml, files):
             idx.write(line)
 
 
+class FakeDTDResolver(etree.Resolver):
+    """Don't load the docbookx.dtd since we disable the validation anyway.
+
+    libxsml2 does not cache DTDs. If we produce a docbook file with 100 chunks
+    loading such a doc with xincluding will load and parse the docbook DTD 100
+    times. This cases tons of memory allocations and is slow.
+    """
+
+    def resolve(self, url, id, context):
+        if not url.endswith('.dtd'):
+            return None
+        # http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd
+        return self.resolve_string('', context)
+
+
 def main(module, index_file, out_dir, uninstalled, src_lang, paths):
 
     # == Loading phase ==
@@ -1758,10 +1773,9 @@ def main(module, index_file, out_dir, uninstalled, src_lang, paths):
 
     # 1) load the docuemnt
     _t = timer()
-    # does not seem to be faster
-    # parser = etree.XMLParser(dtd_validation=False, collect_ids=False)
-    # tree = etree.parse(index_file, parser)
-    tree = etree.parse(index_file)
+    parser = etree.XMLParser(dtd_validation=False, collect_ids=False)
+    parser.resolvers.add(FakeDTDResolver())
+    tree = etree.parse(index_file, parser)
     logging.warning("1a: %7.3lf: load doc", timer() - _t)
     _t = timer()
     tree.xinclude()
author	Stefan Sauer <ensonic@users.sf.net>	2019-12-23 12:07:23 +0100
committer	Stefan Sauer <ensonic@users.sf.net>	2019-12-23 12:21:02 +0100
commit	fe0ee24ab59c6d81fe219e9875fab6879d5646d2 (patch)
tree	96d6756b5e620af45d3fa4a152cdc35a2a3221c9
parent	643d6b00d54118adec92e38f039a31efc7f87c69 (diff)
download	gtk-doc-fe0ee24ab59c6d81fe219e9875fab6879d5646d2.tar.gz