diff options
author | Nick Wellnhofer <wellnhofer@aevum.de> | 2023-03-31 16:47:48 +0200 |
---|---|---|
committer | Nick Wellnhofer <wellnhofer@aevum.de> | 2023-04-07 12:03:28 +0200 |
commit | 73210eeda5782da3414517cab20550d137e2dd25 (patch) | |
tree | 65998c7a7190d8287dc4f30e9675b66f1fe0e319 | |
parent | 9a76bfef0f8ea9731155dfa4ed0338792e374a79 (diff) | |
download | libxml2-73210eeda5782da3414517cab20550d137e2dd25.tar.gz |
SAX2: Ignore namespaces in HTML documents
In commit 21ca8829, we started to ignore namespaces in HTML element
names but we still called xmlSplitQName, effectively stripping the
namespace prefix. This would cause elements like <o:p> being parsed
as <p>. Now we leave the name untouched.
Fixes #508.
-rw-r--r-- | SAX2.c | 15 | ||||
-rw-r--r-- | result/HTML/names.html | 6 | ||||
-rw-r--r-- | result/HTML/names.html.err | 3 | ||||
-rw-r--r-- | result/HTML/names.html.sax | 20 | ||||
-rw-r--r-- | test/HTML/names.html | 5 |
5 files changed, 43 insertions, 6 deletions
@@ -1608,12 +1608,15 @@ xmlSAX2StartElement(void *ctx, const xmlChar *fullname, const xmlChar **atts) ctxt->validate = 0; } - - /* - * Split the full name into a namespace prefix and the tag name - */ - name = xmlSplitQName(ctxt, fullname, &prefix); - + if (ctxt->html) { + prefix = NULL; + name = xmlStrdup(fullname); + } else { + /* + * Split the full name into a namespace prefix and the tag name + */ + name = xmlSplitQName(ctxt, fullname, &prefix); + } /* * Note : the namespace resolution is deferred until the end of the diff --git a/result/HTML/names.html b/result/HTML/names.html new file mode 100644 index 00000000..dd7dcc2e --- /dev/null +++ b/result/HTML/names.html @@ -0,0 +1,6 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd"> +<html> +<body> + <o:p></o:p> +</body> +</html> diff --git a/result/HTML/names.html.err b/result/HTML/names.html.err new file mode 100644 index 00000000..4d91a5d2 --- /dev/null +++ b/result/HTML/names.html.err @@ -0,0 +1,3 @@ +./test/HTML/names.html:3: HTML parser error : Tag o:p invalid + <o:p></o:p> + ^ diff --git a/result/HTML/names.html.sax b/result/HTML/names.html.sax new file mode 100644 index 00000000..12a107f8 --- /dev/null +++ b/result/HTML/names.html.sax @@ -0,0 +1,20 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.startElement(html) +SAX.characters( +, 1) +SAX.startElement(body) +SAX.characters( + , 3) +SAX.startElement(o:p) +SAX.error: Tag o:p invalid +SAX.endElement(o:p) +SAX.characters( +, 1) +SAX.endElement(body) +SAX.characters( +, 1) +SAX.endElement(html) +SAX.characters( +, 1) +SAX.endDocument() diff --git a/test/HTML/names.html b/test/HTML/names.html new file mode 100644 index 00000000..0dac7a47 --- /dev/null +++ b/test/HTML/names.html @@ -0,0 +1,5 @@ +<html> +<body> + <o:p></o:p> +</body> +</html> |