summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--result/boundaries1.xml15
-rw-r--r--result/boundaries1.xml.rde19
-rw-r--r--result/boundaries1.xml.rdr21
-rw-r--r--result/boundaries1.xml.sax32
-rw-r--r--result/boundaries1.xml.sax233
-rw-r--r--result/noent/boundaries1.xml15
-rw-r--r--result/noent/boundaries1.xml.sax232
-rw-r--r--runtest.c306
-rw-r--r--test/boundaries1.xml16
9 files changed, 489 insertions, 0 deletions
diff --git a/result/boundaries1.xml b/result/boundaries1.xml
new file mode 100644
index 00000000..dc1848f4
--- /dev/null
+++ b/result/boundaries1.xml
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<!DOCTYPE d [
+<!ENTITY a "]>">
+<!ENTITY b "]>">
+<!--> ]> -->]>
+<?pi p1?>
+<!--> c1 -->
+<d a="&gt;" b="&gt;">
+text&a;text
+<![CDATA[cdata]]>
+<?pi p2?>
+<!--> c2 -->
+</d>
+<?pi p3?>
+<!--> c3 -->
diff --git a/result/boundaries1.xml.rde b/result/boundaries1.xml.rde
new file mode 100644
index 00000000..113b487a
--- /dev/null
+++ b/result/boundaries1.xml.rde
@@ -0,0 +1,19 @@
+0 10 d 0 0
+0 7 pi 0 1 p1
+0 8 #comment 0 1 > c1
+0 1 d 0 0
+1 3 #text 0 1
+text]>text
+
+1 4 #cdata-section 0 1 cdata
+1 14 #text 0 1
+
+1 7 pi 0 1 p2
+1 14 #text 0 1
+
+1 8 #comment 0 1 > c2
+1 14 #text 0 1
+
+0 15 d 0 0
+0 7 pi 0 1 p3
+0 8 #comment 0 1 > c3
diff --git a/result/boundaries1.xml.rdr b/result/boundaries1.xml.rdr
new file mode 100644
index 00000000..784ece01
--- /dev/null
+++ b/result/boundaries1.xml.rdr
@@ -0,0 +1,21 @@
+0 10 d 0 0
+0 7 pi 0 1 p1
+0 8 #comment 0 1 > c1
+0 1 d 0 0
+1 3 #text 0 1
+text
+1 5 a 0 0
+1 3 #text 0 1 text
+
+1 4 #cdata-section 0 1 cdata
+1 14 #text 0 1
+
+1 7 pi 0 1 p2
+1 14 #text 0 1
+
+1 8 #comment 0 1 > c2
+1 14 #text 0 1
+
+0 15 d 0 0
+0 7 pi 0 1 p3
+0 8 #comment 0 1 > c3
diff --git a/result/boundaries1.xml.sax b/result/boundaries1.xml.sax
new file mode 100644
index 00000000..19e31815
--- /dev/null
+++ b/result/boundaries1.xml.sax
@@ -0,0 +1,32 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.internalSubset(d, , )
+SAX.entityDecl(a, 1, (null), (null), ]>)
+SAX.getEntity(a)
+SAX.entityDecl(b, 1, (null), (null), ]>)
+SAX.getEntity(b)
+SAX.comment(> ]> )
+SAX.externalSubset(d, , )
+SAX.processingInstruction(pi, p1)
+SAX.comment(> c1 )
+SAX.startElement(d, a='>', b='>')
+SAX.characters(
+text, 5)
+SAX.getEntity(a)
+SAX.characters(]>, 2)
+SAX.reference(a)
+SAX.characters(text
+, 5)
+SAX.pcdata(cdata, 5)
+SAX.characters(
+, 1)
+SAX.processingInstruction(pi, p2)
+SAX.characters(
+, 1)
+SAX.comment(> c2 )
+SAX.characters(
+, 1)
+SAX.endElement(d)
+SAX.processingInstruction(pi, p3)
+SAX.comment(> c3 )
+SAX.endDocument()
diff --git a/result/boundaries1.xml.sax2 b/result/boundaries1.xml.sax2
new file mode 100644
index 00000000..b3ad5e8e
--- /dev/null
+++ b/result/boundaries1.xml.sax2
@@ -0,0 +1,33 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.internalSubset(d, , )
+SAX.entityDecl(a, 1, (null), (null), ]>)
+SAX.getEntity(a)
+SAX.entityDecl(b, 1, (null), (null), ]>)
+SAX.getEntity(b)
+SAX.comment(> ]> )
+SAX.externalSubset(d, , )
+SAX.processingInstruction(pi, p1)
+SAX.comment(> c1 )
+SAX.startElementNs(d, NULL, NULL, 0, 2, 0, a='>" b...', 1, b='>'>
+...', 1)
+SAX.characters(
+text, 5)
+SAX.getEntity(a)
+SAX.characters(]>, 2)
+SAX.reference(a)
+SAX.characters(text
+, 5)
+SAX.pcdata(cdata, 5)
+SAX.characters(
+, 1)
+SAX.processingInstruction(pi, p2)
+SAX.characters(
+, 1)
+SAX.comment(> c2 )
+SAX.characters(
+, 1)
+SAX.endElementNs(d, NULL, NULL)
+SAX.processingInstruction(pi, p3)
+SAX.comment(> c3 )
+SAX.endDocument()
diff --git a/result/noent/boundaries1.xml b/result/noent/boundaries1.xml
new file mode 100644
index 00000000..da20872a
--- /dev/null
+++ b/result/noent/boundaries1.xml
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<!DOCTYPE d [
+<!ENTITY a "]>">
+<!ENTITY b "]>">
+<!--> ]> -->]>
+<?pi p1?>
+<!--> c1 -->
+<d a="&gt;" b="&gt;">
+text]&gt;text
+<![CDATA[cdata]]>
+<?pi p2?>
+<!--> c2 -->
+</d>
+<?pi p3?>
+<!--> c3 -->
diff --git a/result/noent/boundaries1.xml.sax2 b/result/noent/boundaries1.xml.sax2
new file mode 100644
index 00000000..093ac3be
--- /dev/null
+++ b/result/noent/boundaries1.xml.sax2
@@ -0,0 +1,32 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.internalSubset(d, , )
+SAX.entityDecl(a, 1, (null), (null), ]>)
+SAX.getEntity(a)
+SAX.entityDecl(b, 1, (null), (null), ]>)
+SAX.getEntity(b)
+SAX.comment(> ]> )
+SAX.externalSubset(d, , )
+SAX.processingInstruction(pi, p1)
+SAX.comment(> c1 )
+SAX.startElementNs(d, NULL, NULL, 0, 2, 0, a='>" b...', 1, b='>'>
+...', 1)
+SAX.characters(
+text, 5)
+SAX.getEntity(a)
+SAX.characters(]>, 2)
+SAX.characters(text
+, 5)
+SAX.pcdata(cdata, 5)
+SAX.characters(
+, 1)
+SAX.processingInstruction(pi, p2)
+SAX.characters(
+, 1)
+SAX.comment(> c2 )
+SAX.characters(
+, 1)
+SAX.endElementNs(d, NULL, NULL)
+SAX.processingInstruction(pi, p3)
+SAX.comment(> c3 )
+SAX.endDocument()
diff --git a/runtest.c b/runtest.c
index 4c1c2677..eedd26e5 100644
--- a/runtest.c
+++ b/runtest.c
@@ -1970,6 +1970,306 @@ pushParseTest(const char *filename, const char *result,
}
return(0);
}
+
+static int pushBoundaryCount;
+static int pushBoundaryRefCount;
+static int pushBoundaryCharsCount;
+static int pushBoundaryCDataCount;
+
+static void
+internalSubsetBnd(void *ctx, const xmlChar *name, const xmlChar *externalID,
+ const xmlChar *systemID) {
+ pushBoundaryCount++;
+ xmlSAX2InternalSubset(ctx, name, externalID, systemID);
+}
+
+static void
+referenceBnd(void *ctx, const xmlChar *name) {
+ pushBoundaryRefCount++;
+ xmlSAX2Reference(ctx, name);
+}
+
+static void
+charactersBnd(void *ctx, const xmlChar *ch, int len) {
+ pushBoundaryCount++;
+ pushBoundaryCharsCount++;
+ xmlSAX2Characters(ctx, ch, len);
+}
+
+static void
+cdataBlockBnd(void *ctx, const xmlChar *ch, int len) {
+ pushBoundaryCount++;
+ pushBoundaryCDataCount++;
+ xmlSAX2CDataBlock(ctx, ch, len);
+}
+
+static void
+processingInstructionBnd(void *ctx, const xmlChar *target,
+ const xmlChar *data) {
+ pushBoundaryCount++;
+ xmlSAX2ProcessingInstruction(ctx, target, data);
+}
+
+static void
+commentBnd(void *ctx, const xmlChar *value) {
+ xmlParserCtxtPtr ctxt = ctx;
+ if (ctxt->inSubset == 0)
+ pushBoundaryCount++;
+ xmlSAX2Comment(ctx, value);
+}
+
+static void
+startElementBnd(void *ctx, const xmlChar *xname, const xmlChar **atts) {
+ const char *name = (const char *)xname;
+
+ /* Some elements might be created automatically. */
+ if ((strcmp(name, "html") != 0) &&
+ (strcmp(name, "body") != 0) &&
+ (strcmp(name, "head") != 0) &&
+ (strcmp(name, "p") != 0)) {
+ pushBoundaryCount++;
+ }
+ xmlSAX2StartElement(ctx, xname, atts);
+}
+
+static void
+endElementBnd(void *ctx, const xmlChar *name) {
+ /*pushBoundaryCount++;*/
+ xmlSAX2EndElement(ctx, name);
+}
+
+static void
+startElementNsBnd(void *ctx, const xmlChar *localname, const xmlChar *prefix,
+ const xmlChar *URI, int nb_namespaces,
+ const xmlChar **namespaces, int nb_attributes,
+ int nb_defaulted, const xmlChar **attributes) {
+ pushBoundaryCount++;
+ xmlSAX2StartElementNs(ctx, localname, prefix, URI, nb_namespaces,
+ namespaces, nb_attributes, nb_defaulted, attributes);
+}
+
+static void
+endElementNsBnd(void *ctx, const xmlChar *localname, const xmlChar *prefix,
+ const xmlChar *URI) {
+ /*pushBoundaryCount++;*/
+ xmlSAX2EndElementNs(ctx, localname, prefix, URI);
+}
+
+/**
+ * pushBoundaryTest:
+ * @filename: the file to parse
+ * @result: the file with expected result
+ * @err: the file with error messages: unused
+ *
+ * Test whether the push parser detects boundaries between syntactical
+ * elements correctly.
+ *
+ * Returns 0 in case of success, an error code otherwise
+ */
+static int
+pushBoundaryTest(const char *filename, const char *result,
+ const char *err ATTRIBUTE_UNUSED,
+ int options) {
+ xmlParserCtxtPtr ctxt;
+ xmlDocPtr doc;
+ xmlSAXHandler bndSAX;
+ const char *base;
+ int size, res, numCallbacks;
+ int cur = 0;
+ unsigned long avail, oldConsumed, consumed;
+
+ /*
+ * If the parser made progress, check that exactly one construct was
+ * processed and that the input buffer is (almost) empty.
+ * Since we use a chunk size of 1, this tests whether content is
+ * processed as early as possible.
+ */
+
+ nb_tests++;
+
+ memset(&bndSAX, 0, sizeof(bndSAX));
+#ifdef LIBXML_HTML_ENABLED
+ if (options & XML_PARSE_HTML) {
+ xmlSAX2InitHtmlDefaultSAXHandler(&bndSAX);
+ bndSAX.startElement = startElementBnd;
+ bndSAX.endElement = endElementBnd;
+ } else
+#endif
+ {
+ xmlSAXVersion(&bndSAX, 2);
+ bndSAX.startElementNs = startElementNsBnd;
+ bndSAX.endElementNs = endElementNsBnd;
+ }
+
+ bndSAX.internalSubset = internalSubsetBnd;
+ bndSAX.reference = referenceBnd;
+ bndSAX.characters = charactersBnd;
+ bndSAX.cdataBlock = cdataBlockBnd;
+ bndSAX.processingInstruction = processingInstructionBnd;
+ bndSAX.comment = commentBnd;
+
+ /*
+ * load the document in memory and work from there.
+ */
+ if (loadMem(filename, &base, &size) != 0) {
+ fprintf(stderr, "Failed to load %s\n", filename);
+ return(-1);
+ }
+
+#ifdef LIBXML_HTML_ENABLED
+ if (options & XML_PARSE_HTML)
+ ctxt = htmlCreatePushParserCtxt(&bndSAX, NULL, base, 1, filename,
+ XML_CHAR_ENCODING_NONE);
+ else
+#endif
+ ctxt = xmlCreatePushParserCtxt(&bndSAX, NULL, base, 1, filename);
+ xmlCtxtUseOptions(ctxt, options);
+ cur = 1;
+ consumed = 0;
+ numCallbacks = 0;
+ avail = 0;
+ while ((cur < size) && (numCallbacks <= 1) && (avail <= 0)) {
+ int terminate = (cur + 1 >= size);
+ int isText = 0;
+
+ if (ctxt->instate == XML_PARSER_CONTENT) {
+ int firstChar = (ctxt->input->end > ctxt->input->cur) ?
+ *ctxt->input->cur :
+ base[cur];
+
+ if ((firstChar != '<') &&
+ ((options & XML_PARSE_HTML) || (firstChar != '&')))
+ isText = 1;
+ }
+
+ oldConsumed = ctxt->input->consumed +
+ (unsigned long) (ctxt->input->cur - ctxt->input->base);
+
+ pushBoundaryCount = 0;
+ pushBoundaryRefCount = 0;
+ pushBoundaryCharsCount = 0;
+ pushBoundaryCDataCount = 0;
+
+#ifdef LIBXML_HTML_ENABLED
+ if (options & XML_PARSE_HTML)
+ htmlParseChunk(ctxt, base + cur, 1, terminate);
+ else
+#endif
+ xmlParseChunk(ctxt, base + cur, 1, terminate);
+ cur += 1;
+
+ /*
+ * Callback check: Check that only a single construct was parsed.
+ */
+ if (pushBoundaryRefCount > 0) {
+ numCallbacks = 1;
+ } else {
+ numCallbacks = pushBoundaryCount;
+ if (pushBoundaryCharsCount > 1) {
+ if (options & XML_PARSE_HTML) {
+ /*
+ * The HTML parser can generate a mix of chars and
+ * references.
+ */
+ numCallbacks -= pushBoundaryCharsCount - 1;
+ } else {
+ /*
+ * Allow two chars callbacks. This can happen when
+ * multi-byte chars are split across buffer boundaries.
+ */
+ numCallbacks -= 1;
+ }
+ }
+ if (options & XML_PARSE_HTML) {
+ /*
+ * Allow multiple cdata callbacks in HTML mode.
+ */
+ if (pushBoundaryCDataCount > 1)
+ numCallbacks -= pushBoundaryCDataCount - 1;
+ }
+ }
+
+ /*
+ * Buffer check: If input was consumed, check that the input
+ * buffer is (almost) empty.
+ */
+ consumed = ctxt->input->consumed +
+ (unsigned long) (ctxt->input->cur - ctxt->input->base);
+ if ((ctxt->instate != XML_PARSER_DTD) &&
+ (consumed >= 4) &&
+ (consumed != oldConsumed)) {
+ size_t max = 0;
+
+ avail = ctxt->input->end - ctxt->input->cur;
+
+ if ((options & XML_PARSE_HTML) &&
+ (ctxt->instate == XML_PARSER_END_TAG)) {
+ /* Something related to script parsing. */
+ max = 3;
+ } else if (isText) {
+ int c = *ctxt->input->cur;
+
+ /* 3 bytes for partial UTF-8 */
+ max = ((c == '<') || (c == '&')) ? 1 : 3;
+ } else if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
+ /* 2 bytes for terminator, 3 bytes for UTF-8 */
+ max = 5;
+ }
+
+ if (avail <= max)
+ avail = 0;
+ }
+ }
+ doc = ctxt->myDoc;
+#ifdef LIBXML_HTML_ENABLED
+ if (options & XML_PARSE_HTML)
+ res = 1;
+ else
+#endif
+ res = ctxt->wellFormed;
+ xmlFreeParserCtxt(ctxt);
+ free((char *)base);
+ if (numCallbacks > 1) {
+ xmlFreeDoc(doc);
+ fprintf(stderr, "Failed push boundary callback test (%d@%lu-%lu): %s\n",
+ numCallbacks, oldConsumed, consumed, filename);
+ return(-1);
+ }
+ if (avail > 0) {
+ xmlFreeDoc(doc);
+ fprintf(stderr, "Failed push boundary buffer test (%lu@%lu): %s\n",
+ avail, consumed, filename);
+ return(-1);
+ }
+ if (!res) {
+ xmlFreeDoc(doc);
+ fprintf(stderr, "Failed to parse %s\n", filename);
+ return(-1);
+ }
+#ifdef LIBXML_HTML_ENABLED
+ if (options & XML_PARSE_HTML)
+ htmlDocDumpMemory(doc, (xmlChar **) &base, &size);
+ else
+#endif
+ xmlDocDumpMemory(doc, (xmlChar **) &base, &size);
+ xmlFreeDoc(doc);
+ res = compareFileMem(result, base, size);
+ if ((base == NULL) || (res != 0)) {
+ if (base != NULL)
+ xmlFree((char *)base);
+ fprintf(stderr, "Result for %s failed in %s\n", filename, result);
+ return(-1);
+ }
+ xmlFree((char *)base);
+ if (err != NULL) {
+ res = compareFileMem(err, testErrors, testErrorsSize);
+ if (res != 0) {
+ fprintf(stderr, "Error for %s failed\n", filename);
+ return(-1);
+ }
+ }
+ return(0);
+}
#endif
/**
@@ -4660,6 +4960,9 @@ testDesc testDescriptions[] = {
{ "XML push regression tests" ,
pushParseTest, "./test/*", "result/", "", NULL,
0 },
+ { "XML push boundary tests" ,
+ pushBoundaryTest, "./test/*", "result/", "", NULL,
+ 0 },
#endif
#ifdef LIBXML_HTML_ENABLED
{ "HTML regression tests" ,
@@ -4672,6 +4975,9 @@ testDesc testDescriptions[] = {
{ "Push HTML regression tests" ,
pushParseTest, "./test/HTML/*", "result/HTML/", "", ".err",
XML_PARSE_HTML },
+ { "Push HTML boundary tests" ,
+ pushBoundaryTest, "./test/HTML/*", "result/HTML/", "", NULL,
+ XML_PARSE_HTML },
#endif
{ "HTML SAX regression tests" ,
saxParseTest, "./test/HTML/*", "result/HTML/", ".sax", NULL,
diff --git a/test/boundaries1.xml b/test/boundaries1.xml
new file mode 100644
index 00000000..c4301cd3
--- /dev/null
+++ b/test/boundaries1.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0"?>
+<!DOCTYPE d [
+ <!ENTITY a "]>">
+ <!ENTITY b ']>'>
+ <!--> ]> -->
+] >
+<?pi p1?>
+<!--> c1 -->
+<d a=">" b='>'>
+text&a;text
+<![CDATA[cdata]]>
+<?pi p2?>
+<!--> c2 -->
+</d>
+<?pi p3?>
+<!--> c3 -->