summaryrefslogtreecommitdiff
path: root/runtest.c
diff options
context:
space:
mode:
authorNick Wellnhofer <wellnhofer@aevum.de>2022-11-20 15:36:41 +0100
committerNick Wellnhofer <wellnhofer@aevum.de>2022-11-20 21:27:59 +0100
commit9c63cea5a6154295ea953b125788ee7b97010bd1 (patch)
tree76fab28016459948392e5df6cede44044e00c009 /runtest.c
parent691a7719566141bb5fbe6212498d1f0568c2610f (diff)
downloadlibxml2-9c63cea5a6154295ea953b125788ee7b97010bd1.tar.gz
test: Add test for push parser boundaries
Diffstat (limited to 'runtest.c')
-rw-r--r--runtest.c306
1 files changed, 306 insertions, 0 deletions
diff --git a/runtest.c b/runtest.c
index 4c1c2677..eedd26e5 100644
--- a/runtest.c
+++ b/runtest.c
@@ -1970,6 +1970,306 @@ pushParseTest(const char *filename, const char *result,
}
return(0);
}
+
+static int pushBoundaryCount;
+static int pushBoundaryRefCount;
+static int pushBoundaryCharsCount;
+static int pushBoundaryCDataCount;
+
+static void
+internalSubsetBnd(void *ctx, const xmlChar *name, const xmlChar *externalID,
+ const xmlChar *systemID) {
+ pushBoundaryCount++;
+ xmlSAX2InternalSubset(ctx, name, externalID, systemID);
+}
+
+static void
+referenceBnd(void *ctx, const xmlChar *name) {
+ pushBoundaryRefCount++;
+ xmlSAX2Reference(ctx, name);
+}
+
+static void
+charactersBnd(void *ctx, const xmlChar *ch, int len) {
+ pushBoundaryCount++;
+ pushBoundaryCharsCount++;
+ xmlSAX2Characters(ctx, ch, len);
+}
+
+static void
+cdataBlockBnd(void *ctx, const xmlChar *ch, int len) {
+ pushBoundaryCount++;
+ pushBoundaryCDataCount++;
+ xmlSAX2CDataBlock(ctx, ch, len);
+}
+
+static void
+processingInstructionBnd(void *ctx, const xmlChar *target,
+ const xmlChar *data) {
+ pushBoundaryCount++;
+ xmlSAX2ProcessingInstruction(ctx, target, data);
+}
+
+static void
+commentBnd(void *ctx, const xmlChar *value) {
+ xmlParserCtxtPtr ctxt = ctx;
+ if (ctxt->inSubset == 0)
+ pushBoundaryCount++;
+ xmlSAX2Comment(ctx, value);
+}
+
+static void
+startElementBnd(void *ctx, const xmlChar *xname, const xmlChar **atts) {
+ const char *name = (const char *)xname;
+
+ /* Some elements might be created automatically. */
+ if ((strcmp(name, "html") != 0) &&
+ (strcmp(name, "body") != 0) &&
+ (strcmp(name, "head") != 0) &&
+ (strcmp(name, "p") != 0)) {
+ pushBoundaryCount++;
+ }
+ xmlSAX2StartElement(ctx, xname, atts);
+}
+
+static void
+endElementBnd(void *ctx, const xmlChar *name) {
+ /*pushBoundaryCount++;*/
+ xmlSAX2EndElement(ctx, name);
+}
+
+static void
+startElementNsBnd(void *ctx, const xmlChar *localname, const xmlChar *prefix,
+ const xmlChar *URI, int nb_namespaces,
+ const xmlChar **namespaces, int nb_attributes,
+ int nb_defaulted, const xmlChar **attributes) {
+ pushBoundaryCount++;
+ xmlSAX2StartElementNs(ctx, localname, prefix, URI, nb_namespaces,
+ namespaces, nb_attributes, nb_defaulted, attributes);
+}
+
+static void
+endElementNsBnd(void *ctx, const xmlChar *localname, const xmlChar *prefix,
+ const xmlChar *URI) {
+ /*pushBoundaryCount++;*/
+ xmlSAX2EndElementNs(ctx, localname, prefix, URI);
+}
+
+/**
+ * pushBoundaryTest:
+ * @filename: the file to parse
+ * @result: the file with expected result
+ * @err: the file with error messages: unused
+ *
+ * Test whether the push parser detects boundaries between syntactical
+ * elements correctly.
+ *
+ * Returns 0 in case of success, an error code otherwise
+ */
+static int
+pushBoundaryTest(const char *filename, const char *result,
+ const char *err ATTRIBUTE_UNUSED,
+ int options) {
+ xmlParserCtxtPtr ctxt;
+ xmlDocPtr doc;
+ xmlSAXHandler bndSAX;
+ const char *base;
+ int size, res, numCallbacks;
+ int cur = 0;
+ unsigned long avail, oldConsumed, consumed;
+
+ /*
+ * If the parser made progress, check that exactly one construct was
+ * processed and that the input buffer is (almost) empty.
+ * Since we use a chunk size of 1, this tests whether content is
+ * processed as early as possible.
+ */
+
+ nb_tests++;
+
+ memset(&bndSAX, 0, sizeof(bndSAX));
+#ifdef LIBXML_HTML_ENABLED
+ if (options & XML_PARSE_HTML) {
+ xmlSAX2InitHtmlDefaultSAXHandler(&bndSAX);
+ bndSAX.startElement = startElementBnd;
+ bndSAX.endElement = endElementBnd;
+ } else
+#endif
+ {
+ xmlSAXVersion(&bndSAX, 2);
+ bndSAX.startElementNs = startElementNsBnd;
+ bndSAX.endElementNs = endElementNsBnd;
+ }
+
+ bndSAX.internalSubset = internalSubsetBnd;
+ bndSAX.reference = referenceBnd;
+ bndSAX.characters = charactersBnd;
+ bndSAX.cdataBlock = cdataBlockBnd;
+ bndSAX.processingInstruction = processingInstructionBnd;
+ bndSAX.comment = commentBnd;
+
+ /*
+ * load the document in memory and work from there.
+ */
+ if (loadMem(filename, &base, &size) != 0) {
+ fprintf(stderr, "Failed to load %s\n", filename);
+ return(-1);
+ }
+
+#ifdef LIBXML_HTML_ENABLED
+ if (options & XML_PARSE_HTML)
+ ctxt = htmlCreatePushParserCtxt(&bndSAX, NULL, base, 1, filename,
+ XML_CHAR_ENCODING_NONE);
+ else
+#endif
+ ctxt = xmlCreatePushParserCtxt(&bndSAX, NULL, base, 1, filename);
+ xmlCtxtUseOptions(ctxt, options);
+ cur = 1;
+ consumed = 0;
+ numCallbacks = 0;
+ avail = 0;
+ while ((cur < size) && (numCallbacks <= 1) && (avail <= 0)) {
+ int terminate = (cur + 1 >= size);
+ int isText = 0;
+
+ if (ctxt->instate == XML_PARSER_CONTENT) {
+ int firstChar = (ctxt->input->end > ctxt->input->cur) ?
+ *ctxt->input->cur :
+ base[cur];
+
+ if ((firstChar != '<') &&
+ ((options & XML_PARSE_HTML) || (firstChar != '&')))
+ isText = 1;
+ }
+
+ oldConsumed = ctxt->input->consumed +
+ (unsigned long) (ctxt->input->cur - ctxt->input->base);
+
+ pushBoundaryCount = 0;
+ pushBoundaryRefCount = 0;
+ pushBoundaryCharsCount = 0;
+ pushBoundaryCDataCount = 0;
+
+#ifdef LIBXML_HTML_ENABLED
+ if (options & XML_PARSE_HTML)
+ htmlParseChunk(ctxt, base + cur, 1, terminate);
+ else
+#endif
+ xmlParseChunk(ctxt, base + cur, 1, terminate);
+ cur += 1;
+
+ /*
+ * Callback check: Check that only a single construct was parsed.
+ */
+ if (pushBoundaryRefCount > 0) {
+ numCallbacks = 1;
+ } else {
+ numCallbacks = pushBoundaryCount;
+ if (pushBoundaryCharsCount > 1) {
+ if (options & XML_PARSE_HTML) {
+ /*
+ * The HTML parser can generate a mix of chars and
+ * references.
+ */
+ numCallbacks -= pushBoundaryCharsCount - 1;
+ } else {
+ /*
+ * Allow two chars callbacks. This can happen when
+ * multi-byte chars are split across buffer boundaries.
+ */
+ numCallbacks -= 1;
+ }
+ }
+ if (options & XML_PARSE_HTML) {
+ /*
+ * Allow multiple cdata callbacks in HTML mode.
+ */
+ if (pushBoundaryCDataCount > 1)
+ numCallbacks -= pushBoundaryCDataCount - 1;
+ }
+ }
+
+ /*
+ * Buffer check: If input was consumed, check that the input
+ * buffer is (almost) empty.
+ */
+ consumed = ctxt->input->consumed +
+ (unsigned long) (ctxt->input->cur - ctxt->input->base);
+ if ((ctxt->instate != XML_PARSER_DTD) &&
+ (consumed >= 4) &&
+ (consumed != oldConsumed)) {
+ size_t max = 0;
+
+ avail = ctxt->input->end - ctxt->input->cur;
+
+ if ((options & XML_PARSE_HTML) &&
+ (ctxt->instate == XML_PARSER_END_TAG)) {
+ /* Something related to script parsing. */
+ max = 3;
+ } else if (isText) {
+ int c = *ctxt->input->cur;
+
+ /* 3 bytes for partial UTF-8 */
+ max = ((c == '<') || (c == '&')) ? 1 : 3;
+ } else if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
+ /* 2 bytes for terminator, 3 bytes for UTF-8 */
+ max = 5;
+ }
+
+ if (avail <= max)
+ avail = 0;
+ }
+ }
+ doc = ctxt->myDoc;
+#ifdef LIBXML_HTML_ENABLED
+ if (options & XML_PARSE_HTML)
+ res = 1;
+ else
+#endif
+ res = ctxt->wellFormed;
+ xmlFreeParserCtxt(ctxt);
+ free((char *)base);
+ if (numCallbacks > 1) {
+ xmlFreeDoc(doc);
+ fprintf(stderr, "Failed push boundary callback test (%d@%lu-%lu): %s\n",
+ numCallbacks, oldConsumed, consumed, filename);
+ return(-1);
+ }
+ if (avail > 0) {
+ xmlFreeDoc(doc);
+ fprintf(stderr, "Failed push boundary buffer test (%lu@%lu): %s\n",
+ avail, consumed, filename);
+ return(-1);
+ }
+ if (!res) {
+ xmlFreeDoc(doc);
+ fprintf(stderr, "Failed to parse %s\n", filename);
+ return(-1);
+ }
+#ifdef LIBXML_HTML_ENABLED
+ if (options & XML_PARSE_HTML)
+ htmlDocDumpMemory(doc, (xmlChar **) &base, &size);
+ else
+#endif
+ xmlDocDumpMemory(doc, (xmlChar **) &base, &size);
+ xmlFreeDoc(doc);
+ res = compareFileMem(result, base, size);
+ if ((base == NULL) || (res != 0)) {
+ if (base != NULL)
+ xmlFree((char *)base);
+ fprintf(stderr, "Result for %s failed in %s\n", filename, result);
+ return(-1);
+ }
+ xmlFree((char *)base);
+ if (err != NULL) {
+ res = compareFileMem(err, testErrors, testErrorsSize);
+ if (res != 0) {
+ fprintf(stderr, "Error for %s failed\n", filename);
+ return(-1);
+ }
+ }
+ return(0);
+}
#endif
/**
@@ -4660,6 +4960,9 @@ testDesc testDescriptions[] = {
{ "XML push regression tests" ,
pushParseTest, "./test/*", "result/", "", NULL,
0 },
+ { "XML push boundary tests" ,
+ pushBoundaryTest, "./test/*", "result/", "", NULL,
+ 0 },
#endif
#ifdef LIBXML_HTML_ENABLED
{ "HTML regression tests" ,
@@ -4672,6 +4975,9 @@ testDesc testDescriptions[] = {
{ "Push HTML regression tests" ,
pushParseTest, "./test/HTML/*", "result/HTML/", "", ".err",
XML_PARSE_HTML },
+ { "Push HTML boundary tests" ,
+ pushBoundaryTest, "./test/HTML/*", "result/HTML/", "", NULL,
+ XML_PARSE_HTML },
#endif
{ "HTML SAX regression tests" ,
saxParseTest, "./test/HTML/*", "result/HTML/", ".sax", NULL,