summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Wellnhofer <wellnhofer@aevum.de>2019-04-30 14:10:19 +0200
committerNick Wellnhofer <wellnhofer@aevum.de>2019-05-08 12:21:50 +0200
commit311da8c8864e4f4f838434d769e0644cc02c9da9 (patch)
tree551b80220c322b705f154b534c8ca6794617ff3e
parentc491e549a6fed6851f15979f0bcf14d2d50340e1 (diff)
downloadlibxslt-311da8c8864e4f4f838434d769e0644cc02c9da9.tar.gz
Reorganize fuzzing code
- Move core fuzzing code into a single file fuzz.c - Add tests for fuzz targets - Reduce XSLT operation limit
-rw-r--r--.travis.yml1
-rw-r--r--tests/fuzz/.gitignore1
-rw-r--r--tests/fuzz/Makefile.am16
-rw-r--r--tests/fuzz/fuzz.c371
-rw-r--r--tests/fuzz/fuzz.h35
-rw-r--r--tests/fuzz/testTargets.c86
-rw-r--r--tests/fuzz/xpath.c208
-rw-r--r--tests/fuzz/xslt.c130
8 files changed, 519 insertions, 329 deletions
diff --git a/.travis.yml b/.travis.yml
index 45e013a3..172d1e2f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -26,6 +26,7 @@ script:
for target in libxslt libexslt xsltproc python tests/plugins; do
make -j2 -C $target V=1
done
+ make -j2 -C tests/fuzz testTargets
make tests | tee test.log
! grep -qv '^## Running' test.log
git:
diff --git a/tests/fuzz/.gitignore b/tests/fuzz/.gitignore
index 15b2878d..7515dea5 100644
--- a/tests/fuzz/.gitignore
+++ b/tests/fuzz/.gitignore
@@ -1,3 +1,4 @@
/corpus/
+/testTargets
/xpath
/xslt
diff --git a/tests/fuzz/Makefile.am b/tests/fuzz/Makefile.am
index afb5e322..522f994d 100644
--- a/tests/fuzz/Makefile.am
+++ b/tests/fuzz/Makefile.am
@@ -2,22 +2,34 @@ LIBXSLT_LIBS = $(top_builddir)/libxslt/libxslt.la \
$(top_builddir)/libexslt/libexslt.la
EXTRA_PROGRAMS = xpath xslt
+check_PROGRAMS = testTargets
EXTRA_DIST = xpath.dict xpath.xml xslt.dict xslt.xml seed
CLEANFILES = $(EXTRA_PROGRAMS)
AM_CPPFLAGS = -I$(top_srcdir)
AM_CFLAGS = $(LIBXML_CFLAGS)
-AM_LDFLAGS = -fsanitize=fuzzer
DEPENDENCIES = $(LIBXSLT_LIBS)
LDADD = $(LIBXSLT_LIBS) \
$(LIBGCRYPT_LIBS) $(LIBXML_LIBS) $(EXTRA_LIBS) $(M_LIBS)
+xpath_SOURCES = xpath.c fuzz.c
+xpath_LDFLAGS = -fsanitize=fuzzer
+
+xslt_SOURCES = xslt.c fuzz.c
+xslt_LDFLAGS = -fsanitize=fuzzer
+
+testTargets_SOURCES = testTargets.c fuzz.c
+
$(top_builddir)/libxslt/libxslt.la:
cd $(top_builddir)/libxslt && $(MAKE) libxslt.la
$(top_builddir)/libexslt/libexslt.la: $(top_builddir)/libxslt/libxslt.la
cd $(top_builddir)/libexslt && $(MAKE) libexslt.la
-.PHONY: fuzz-xpath fuzz-xslt
+.PHONY: tests fuzz-xpath fuzz-xslt
+
+tests: $(check_PROGRAMS)
+ @echo '## Running fuzz target tests'
+ @./testTargets $(srcdir)
fuzz-xpath: xpath$(EXEEXT)
@mkdir -p corpus/xpath
diff --git a/tests/fuzz/fuzz.c b/tests/fuzz/fuzz.c
new file mode 100644
index 00000000..0ef89db5
--- /dev/null
+++ b/tests/fuzz/fuzz.c
@@ -0,0 +1,371 @@
+/*
+ * fuzz.c: Fuzz targets for libxslt
+ *
+ * See Copyright for the status of this software.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "fuzz.h"
+
+#include <libxml/tree.h>
+#include <libxml/parser.h>
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
+#include <libxslt/extensions.h>
+#include <libxslt/functions.h>
+#include <libxslt/security.h>
+#include <libxslt/transform.h>
+#include <libxslt/xslt.h>
+#include <libxslt/xsltInternals.h>
+#include <libxslt/xsltutils.h>
+#include <libexslt/exslt.h>
+
+#if defined(_WIN32)
+ #define DIR_SEP '\\'
+#else
+ #define DIR_SEP '/'
+#endif
+
+static xmlDocPtr doc;
+static xsltSecurityPrefsPtr sec;
+static xsltTransformContextPtr tctxt;
+static xmlHashTablePtr saxonExtHash;
+
+static void
+xsltFuzzErrorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED,
+ ...) {
+}
+
+static void
+xsltFuzzInit(void) {
+ /* Init libxml2, libxslt and libexslt */
+ xmlInitParser();
+ xmlXPathInit();
+ xsltInit();
+ exsltRegisterAll();
+
+ /* Suppress error messages */
+ xmlSetGenericErrorFunc(NULL, xsltFuzzErrorFunc);
+ xsltSetGenericErrorFunc(NULL, xsltFuzzErrorFunc);
+
+ /* Disallow I/O */
+ sec = xsltNewSecurityPrefs();
+ xsltSetSecurityPrefs(sec, XSLT_SECPREF_READ_FILE, xsltSecurityForbid);
+ xsltSetSecurityPrefs(sec, XSLT_SECPREF_WRITE_FILE, xsltSecurityForbid);
+ xsltSetSecurityPrefs(sec, XSLT_SECPREF_CREATE_DIRECTORY, xsltSecurityForbid);
+ xsltSetSecurityPrefs(sec, XSLT_SECPREF_READ_NETWORK, xsltSecurityForbid);
+ xsltSetSecurityPrefs(sec, XSLT_SECPREF_WRITE_NETWORK, xsltSecurityForbid);
+}
+
+static xmlDocPtr
+xsltFuzzLoadDoc(const char *argv0, const char *dir, const char *filename) {
+ char *path;
+
+ if (dir != NULL) {
+ path = malloc(strlen(dir) + 1 + strlen(filename) + 1);
+ sprintf(path, "%s/%s", dir, filename);
+ } else {
+ const char *end;
+ size_t dirLen;
+
+ end = strrchr(argv0, DIR_SEP);
+ dirLen = (end == NULL) ? 0 : end - argv0 + 1;
+ path = malloc(dirLen + strlen(filename) + 1);
+ memcpy(path, argv0, dirLen);
+ path[dirLen] = '\0';
+ strcat(path, filename);
+ }
+
+ doc = xmlReadFile(path, NULL, 0);
+ if (doc == NULL)
+ fprintf(stderr, "Error: unable to parse file '%s'\n", path);
+ free(path);
+
+ return doc;
+}
+
+/* XPath fuzzer
+ *
+ * This fuzz target parses and evaluates XPath expressions in an (E)XSLT
+ * context using a static XML document. It heavily exercises the libxml2
+ * XPath engine (xpath.c), a few other parts of libxml2, and most of
+ * libexslt.
+ *
+ * Some EXSLT functions need the transform context to create RVTs for
+ * node-sets. A couple of functions also access the stylesheet. The
+ * XPath context from the transform context is used to parse and
+ * evaluate expressions.
+ *
+ * All these objects are created once at startup. After fuzzing each input,
+ * they're reset as cheaply as possible.
+ *
+ * TODO
+ *
+ * - Some expressions can create lots of temporary node sets (RVTs) which
+ * aren't freed until the whole expression was evaluated, leading to
+ * extensive memory usage. Cleaning them up earlier would require
+ * callbacks from the XPath engine, for example after evaluating a
+ * predicate expression, which doesn't seem feasible. Terminating the
+ * evaluation after creating a certain number of RVTs is a simple
+ * workaround.
+ * - Register a custom xsl:decimal-format declaration for format-number().
+ * - Some functions add strings to the stylesheet or transform context
+ * dictionary, for example via xsltGetQName, requiring a clean up of the
+ * dicts after fuzzing each input. This behavior seems questionable.
+ * Extension functions shouldn't needlessly modify the transform context
+ * or stylesheet.
+ * - Register xsl:keys and fuzz the key() function.
+ * - Add a few custom func:functions.
+ * - Fuzz the document() function with external documents.
+ */
+
+int
+xsltFuzzXPathInit(int *argc_p ATTRIBUTE_UNUSED, char ***argv_p,
+ const char *dir) {
+ const char *xmlFilename = "xpath.xml";
+ xsltStylesheetPtr style;
+ xmlXPathContextPtr xpctxt;
+
+ xsltFuzzInit();
+
+ /* Load XML document */
+ doc = xsltFuzzLoadDoc((*argv_p)[0], dir, xmlFilename);
+ if (doc == NULL)
+ return -1;
+
+ style = xsltNewStylesheet();
+ tctxt = xsltNewTransformContext(style, doc);
+ xsltSetCtxtSecurityPrefs(sec, tctxt);
+
+ /*
+ * Some extension functions need the current instruction.
+ *
+ * - format-number() for namespaces.
+ * - document() for the base URL.
+ * - maybe others?
+ *
+ * For fuzzing, it's enough to use the source document's root element.
+ */
+ tctxt->inst = xmlDocGetRootElement(doc);
+
+ saxonExtHash = (xmlHashTablePtr)
+ xsltStyleGetExtData(style, SAXON_NAMESPACE);
+
+ /* Set up XPath context */
+ xpctxt = tctxt->xpathCtxt;
+
+ /* Resource limits to avoid timeouts and call stack overflows */
+ xpctxt->maxParserDepth = 15;
+ xpctxt->maxDepth = 100;
+ xpctxt->opLimit = 500000;
+
+ /* Test namespaces used in xpath.xml */
+ xmlXPathRegisterNs(xpctxt, BAD_CAST "a", BAD_CAST "a");
+ xmlXPathRegisterNs(xpctxt, BAD_CAST "b", BAD_CAST "b");
+ xmlXPathRegisterNs(xpctxt, BAD_CAST "c", BAD_CAST "c");
+
+ /* EXSLT namespaces */
+ xmlXPathRegisterNs(xpctxt, BAD_CAST "crypto", EXSLT_CRYPTO_NAMESPACE);
+ xmlXPathRegisterNs(xpctxt, BAD_CAST "date", EXSLT_DATE_NAMESPACE);
+ xmlXPathRegisterNs(xpctxt, BAD_CAST "dyn", EXSLT_DYNAMIC_NAMESPACE);
+ xmlXPathRegisterNs(xpctxt, BAD_CAST "exsl", EXSLT_COMMON_NAMESPACE);
+ xmlXPathRegisterNs(xpctxt, BAD_CAST "math", EXSLT_MATH_NAMESPACE);
+ xmlXPathRegisterNs(xpctxt, BAD_CAST "saxon", SAXON_NAMESPACE);
+ xmlXPathRegisterNs(xpctxt, BAD_CAST "set", EXSLT_SETS_NAMESPACE);
+ xmlXPathRegisterNs(xpctxt, BAD_CAST "str", EXSLT_STRINGS_NAMESPACE);
+
+ /* Register variables */
+ xmlXPathRegisterVariable(xpctxt, BAD_CAST "f", xmlXPathNewFloat(-1.5));
+ xmlXPathRegisterVariable(xpctxt, BAD_CAST "b", xmlXPathNewBoolean(1));
+ xmlXPathRegisterVariable(xpctxt, BAD_CAST "s",
+ xmlXPathNewString(BAD_CAST "var"));
+ xmlXPathRegisterVariable(
+ xpctxt, BAD_CAST "n",
+ xmlXPathEval(BAD_CAST "//node() | /*/*/namespace::*", xpctxt));
+
+ return 0;
+}
+
+xmlXPathObjectPtr
+xsltFuzzXPath(const char *data, size_t size) {
+ xmlXPathContextPtr xpctxt = tctxt->xpathCtxt;
+ xmlChar *xpathExpr;
+
+ /* Null-terminate */
+ xpathExpr = malloc(size + 1);
+ memcpy(xpathExpr, data, size);
+ xpathExpr[size] = 0;
+
+ /*
+ * format-number() can still cause memory errors with invalid UTF-8 in
+ * prefixes or suffixes. This shouldn't be exploitable in practice, but
+ * should be fixed. Check UTF-8 validity for now.
+ */
+ if (xmlCheckUTF8(xpathExpr) == 0) {
+ free(xpathExpr);
+ return NULL;
+ }
+
+ /* Compile and return early if the expression is invalid */
+ xmlXPathCompExprPtr compExpr = xmlXPathCtxtCompile(xpctxt, xpathExpr);
+ free(xpathExpr);
+ if (compExpr == NULL)
+ return NULL;
+
+ /* Initialize XPath evaluation context and evaluate */
+ xpctxt->node = (xmlNodePtr) doc; /* Maybe test different context nodes? */
+ xpctxt->contextSize = 1;
+ xpctxt->proximityPosition = 1;
+ xpctxt->opCount = 0;
+ xmlXPathObjectPtr xpathObj = xmlXPathCompiledEval(compExpr, xpctxt);
+ xmlXPathFreeCompExpr(compExpr);
+
+ /* Clean object cache */
+ xmlXPathContextSetCache(xpctxt, 0, 0, 0);
+ xmlXPathContextSetCache(xpctxt, 1, -1, 0);
+
+ /* Clean dictionaries */
+ if (xmlDictSize(tctxt->dict) > 0) {
+ xmlDictFree(tctxt->dict);
+ xmlDictFree(tctxt->style->dict);
+ tctxt->style->dict = xmlDictCreate();
+ tctxt->dict = xmlDictCreateSub(tctxt->style->dict);
+ }
+
+ /* Clean saxon:expression cache */
+ if (xmlHashSize(saxonExtHash) > 0) {
+ /* There doesn't seem to be a cheaper way with the public API. */
+ xsltShutdownCtxtExts(tctxt);
+ xsltInitCtxtExts(tctxt);
+ saxonExtHash = (xmlHashTablePtr)
+ xsltStyleGetExtData(tctxt->style, SAXON_NAMESPACE);
+ }
+
+ return xpathObj;
+}
+
+void
+xsltFuzzXPathFreeObject(xmlXPathObjectPtr obj) {
+ xmlXPathFreeObject(obj);
+
+ /* Some XSLT extension functions create RVTs. */
+ xsltFreeRVTs(tctxt);
+}
+
+void
+xsltFuzzXPathCleanup(void) {
+ xsltStylesheetPtr style = tctxt->style;
+
+ xmlXPathRegisteredNsCleanup(tctxt->xpathCtxt);
+ xsltFreeSecurityPrefs(sec);
+ sec = NULL;
+ xsltFreeTransformContext(tctxt);
+ tctxt = NULL;
+ xsltFreeStylesheet(style);
+ style = NULL;
+ xmlFreeDoc(doc);
+ doc = NULL;
+}
+
+/*
+ * XSLT fuzzer
+ *
+ * This is a rather naive fuzz target using a static XML document.
+ *
+ * TODO
+ *
+ * - Improve seed corpus
+ * - Mutate multiple input documents: source, xsl:import, xsl:include
+ * - format-number() with xsl:decimal-format
+ * - Better coverage for xsl:key and key() function
+ * - EXSLT func:function
+ * - xsl:document
+ */
+
+int
+xsltFuzzXsltInit(int *argc_p ATTRIBUTE_UNUSED, char ***argv_p,
+ const char *dir) {
+ const char *xmlFilename = "xslt.xml";
+
+ xsltFuzzInit();
+
+ /* Load XML document */
+ doc = xsltFuzzLoadDoc((*argv_p)[0], dir, xmlFilename);
+ if (doc == NULL)
+ return -1;
+
+ return 0;
+}
+
+static void
+xsltSetXPathResourceLimits(xmlXPathContextPtr ctxt) {
+ ctxt->maxParserDepth = 15;
+ ctxt->maxDepth = 100;
+ ctxt->opLimit = 100000;
+}
+
+xmlChar *
+xsltFuzzXslt(const char *data, size_t size) {
+ xmlDocPtr xsltDoc;
+ xmlDocPtr result;
+ xmlNodePtr xsltRoot;
+ xsltStylesheetPtr sheet;
+ xsltTransformContextPtr ctxt;
+ xmlChar *ret = NULL;
+ int retLen;
+
+ xsltDoc = xmlReadMemory(data, size, NULL, NULL, 0);
+ if (xsltDoc == NULL)
+ return NULL;
+ xsltRoot = xmlDocGetRootElement(xsltDoc);
+ xmlNewNs(xsltRoot, EXSLT_COMMON_NAMESPACE, BAD_CAST "exsl");
+ xmlNewNs(xsltRoot, EXSLT_COMMON_NAMESPACE, BAD_CAST "exslt");
+ xmlNewNs(xsltRoot, EXSLT_CRYPTO_NAMESPACE, BAD_CAST "crypto");
+ xmlNewNs(xsltRoot, EXSLT_DATE_NAMESPACE, BAD_CAST "date");
+ xmlNewNs(xsltRoot, EXSLT_DYNAMIC_NAMESPACE, BAD_CAST "dyn");
+ xmlNewNs(xsltRoot, EXSLT_MATH_NAMESPACE, BAD_CAST "math");
+ xmlNewNs(xsltRoot, EXSLT_SETS_NAMESPACE, BAD_CAST "set");
+ xmlNewNs(xsltRoot, EXSLT_STRINGS_NAMESPACE, BAD_CAST "str");
+ xmlNewNs(xsltRoot, SAXON_NAMESPACE, BAD_CAST "saxon");
+
+ sheet = xsltNewStylesheet();
+ if (sheet == NULL) {
+ xmlFreeDoc(xsltDoc);
+ return NULL;
+ }
+ xsltSetXPathResourceLimits(sheet->xpathCtxt);
+ sheet->xpathCtxt->opCount = 0;
+ if (xsltParseStylesheetUser(sheet, xsltDoc) != 0) {
+ xsltFreeStylesheet(sheet);
+ xmlFreeDoc(xsltDoc);
+ return NULL;
+ }
+
+ ctxt = xsltNewTransformContext(sheet, doc);
+ xsltSetCtxtSecurityPrefs(sec, ctxt);
+ ctxt->maxTemplateDepth = 100;
+ ctxt->opLimit = 20000;
+ xsltSetXPathResourceLimits(ctxt->xpathCtxt);
+ ctxt->xpathCtxt->opCount = sheet->xpathCtxt->opCount;
+
+ result = xsltApplyStylesheetUser(sheet, doc, NULL, NULL, NULL, ctxt);
+ if (result != NULL)
+ xsltSaveResultToString(&ret, &retLen, result, sheet);
+
+ xmlFreeDoc(result);
+ xsltFreeTransformContext(ctxt);
+ xsltFreeStylesheet(sheet);
+
+ return ret;
+}
+
+void
+xsltFuzzXsltCleanup(void) {
+ xsltFreeSecurityPrefs(sec);
+ sec = NULL;
+ xmlFreeDoc(doc);
+ doc = NULL;
+}
diff --git a/tests/fuzz/fuzz.h b/tests/fuzz/fuzz.h
new file mode 100644
index 00000000..7dff3dbe
--- /dev/null
+++ b/tests/fuzz/fuzz.h
@@ -0,0 +1,35 @@
+/*
+ * xpath.h: Header for fuzz targets
+ *
+ * See Copyright for the status of this software.
+ */
+
+#ifndef __XML_XSLT_TESTS_FUZZ_H__
+#define __XML_XSLT_TESTS_FUZZ_H__
+
+#include <stddef.h>
+#include <libxml/xmlstring.h>
+#include <libxml/xpath.h>
+
+int
+xsltFuzzXPathInit(int *argc_p, char ***argv_p, const char *dir);
+
+xmlXPathObjectPtr
+xsltFuzzXPath(const char *data, size_t size);
+
+void
+xsltFuzzXPathFreeObject(xmlXPathObjectPtr obj);
+
+void
+xsltFuzzXPathCleanup(void);
+
+int
+xsltFuzzXsltInit(int *argc_p, char ***argv_p, const char *dir);
+
+xmlChar *
+xsltFuzzXslt(const char *data, size_t size);
+
+void
+xsltFuzzXsltCleanup(void);
+
+#endif
diff --git a/tests/fuzz/testTargets.c b/tests/fuzz/testTargets.c
new file mode 100644
index 00000000..114304bd
--- /dev/null
+++ b/tests/fuzz/testTargets.c
@@ -0,0 +1,86 @@
+/*
+ * testTargets.c: Test the fuzz targets
+ *
+ * See Copyright for the status of this software.
+ */
+
+#include <stdio.h>
+
+#include "fuzz.h"
+#include <libxml/globals.h>
+
+int
+testXPath(int argc, char **argv) {
+ xmlXPathObjectPtr obj;
+ const char expr[] = "count(//node())";
+ int ret = 0;
+
+ if (xsltFuzzXPathInit(&argc, &argv, argv[1]) != 0) {
+ xsltFuzzXPathCleanup();
+ return 1;
+ }
+
+ obj = xsltFuzzXPath(expr, sizeof(expr) - 1);
+ if ((obj == NULL) || (obj->type != XPATH_NUMBER)) {
+ fprintf(stderr, "Expression doesn't evaluate to number\n");
+ ret = 1;
+ } else if (obj->floatval != 39.0) {
+ fprintf(stderr, "Expression returned %f, expected %f\n",
+ obj->floatval, 39.0);
+ ret = 1;
+ }
+
+ xsltFuzzXPathFreeObject(obj);
+ xsltFuzzXPathCleanup();
+
+ return ret;
+}
+
+int
+testXslt(int argc, char **argv) {
+ xmlChar *result;
+ const char styleBuf[] =
+ "<xsl:stylesheet"
+ " xmlns:xsl='http://www.w3.org/1999/XSL/Transform'"
+ " version='1.0'"
+ " extension-element-prefixes='"
+ " exsl exslt crypto date dyn math set str saxon"
+ "'>\n"
+ "<xsl:output omit-xml-declaration='yes'/>\n"
+ "<xsl:template match='/'>\n"
+ " <r><xsl:value-of select='count(//node())'/></r>\n"
+ "</xsl:template>\n"
+ "</xsl:stylesheet>\n";
+ int ret = 0;
+
+ if (xsltFuzzXsltInit(&argc, &argv, argv[1]) != 0) {
+ xsltFuzzXsltCleanup();
+ return 1;
+ }
+
+ result = xsltFuzzXslt(styleBuf, sizeof(styleBuf) - 1);
+ if (result == NULL) {
+ fprintf(stderr, "Result is NULL\n");
+ ret = 1;
+ } else if (xmlStrcmp(result, BAD_CAST "<r>39</r>\n") != 0) {
+ fprintf(stderr, "Stylesheet returned\n%sexpected \n%s\n",
+ result, "<r>39</r>");
+ ret = 1;
+ }
+
+ xmlFree(result);
+ xsltFuzzXsltCleanup();
+
+ return ret;
+}
+
+int main(int argc, char **argv) {
+ int ret = 0;
+
+ if (testXPath(argc, argv) != 0)
+ ret = 1;
+ if (testXslt(argc, argv) != 0)
+ ret = 1;
+
+ return ret;
+}
diff --git a/tests/fuzz/xpath.c b/tests/fuzz/xpath.c
index 9bbfe26c..475cb073 100644
--- a/tests/fuzz/xpath.c
+++ b/tests/fuzz/xpath.c
@@ -2,217 +2,19 @@
* xpath.c: libFuzzer target for XPath expressions
*
* See Copyright for the status of this software.
- *
- * This fuzz target parses and evaluates XPath expressions in an (E)XSLT
- * context using a static XML document. It heavily exercises the libxml2
- * XPath engine (xpath.c), a few other parts of libxml2, and most of
- * libexslt.
- *
- * Some EXSLT functions need the transform context to create RVTs for
- * node-sets. A couple of functions also access the stylesheet. The
- * XPath context from the transform context is used to parse and
- * evaluate expressions.
- *
- * All these objects are created once at startup. After fuzzing each input,
- * they're reset as cheaply as possible.
- *
- * TODO
- *
- * - Some expressions can create lots of temporary node sets (RVTs) which
- * aren't freed until the whole expression was evaluated, leading to
- * extensive memory usage. Cleaning them up earlier would require
- * callbacks from the XPath engine, for example after evaluating a
- * predicate expression, which doesn't seem feasible. Terminating the
- * evaluation after creating a certain number of RVTs is a simple
- * workaround.
- * - Register a custom xsl:decimal-format declaration for format-number().
- * - Some functions add strings to the stylesheet or transform context
- * dictionary, for example via xsltGetQName, requiring a clean up of the
- * dicts after fuzzing each input. This behavior seems questionable.
- * Extension functions shouldn't needlessly modify the transform context
- * or stylesheet.
- * - Register xsl:keys and fuzz the key() function.
- * - Add a few custom func:functions.
- * - Fuzz the document() function with external documents.
*/
-#include <libgen.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include <libxml/tree.h>
-#include <libxml/parser.h>
-#include <libxml/xpath.h>
-#include <libxml/xpathInternals.h>
-#include <libxslt/extensions.h>
-#include <libxslt/functions.h>
-#include <libxslt/security.h>
-#include <libxslt/transform.h>
-#include <libxslt/xsltutils.h>
-#include <libexslt/exslt.h>
-
-static xmlDocPtr doc;
-static xsltTransformContextPtr tctxt;
-static xmlHashTablePtr saxonExtHash;
-
-static void
-xmlFuzzErrorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED,
- ...) {
-}
+#include "fuzz.h"
int
-LLVMFuzzerInitialize(int *argc_p ATTRIBUTE_UNUSED,
- char ***argv_p ATTRIBUTE_UNUSED) {
- const char *xmlFilename = "xpath.xml";
- const char *dir;
- char *argv0;
- char *xmlPath;
- xsltSecurityPrefsPtr sec;
- xsltStylesheetPtr style;
- xmlXPathContextPtr xpctxt;
-
- /* Init libxml2 and libexslt */
- xmlInitParser();
- xmlXPathInit();
- exsltRegisterAll();
-
- /* Load XML document */
- argv0 = strdup((*argv_p)[0]);
- dir = dirname(argv0);
- xmlPath = malloc(strlen(dir) + 1 + strlen(xmlFilename) + 1);
- sprintf(xmlPath, "%s/%s", dir, xmlFilename);
- doc = xmlReadFile(xmlPath, NULL, 0);
- free(xmlPath);
- free(argv0);
- if (doc == NULL) {
- fprintf(stderr, "Error: unable to parse file \"%s\"\n", xmlPath);
- return -1;
- }
-
- /* Suppress error messages */
- xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
- xsltSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
-
- style = xsltNewStylesheet();
- tctxt = xsltNewTransformContext(style, doc);
-
- /* Disallow I/O */
- sec = xsltNewSecurityPrefs();
- xsltSetSecurityPrefs(sec, XSLT_SECPREF_READ_FILE, xsltSecurityForbid);
- xsltSetSecurityPrefs(sec, XSLT_SECPREF_WRITE_FILE, xsltSecurityForbid);
- xsltSetSecurityPrefs(sec, XSLT_SECPREF_CREATE_DIRECTORY, xsltSecurityForbid);
- xsltSetSecurityPrefs(sec, XSLT_SECPREF_READ_NETWORK, xsltSecurityForbid);
- xsltSetSecurityPrefs(sec, XSLT_SECPREF_WRITE_NETWORK, xsltSecurityForbid);
- xsltSetCtxtSecurityPrefs(sec, tctxt);
-
- /*
- * Some extension functions need the current instruction.
- *
- * - format-number() for namespaces.
- * - document() for the base URL.
- * - maybe others?
- *
- * For fuzzing, it's enough to use the source document's root element.
- */
- tctxt->inst = xmlDocGetRootElement(doc);
-
- saxonExtHash = (xmlHashTablePtr)
- xsltStyleGetExtData(style, SAXON_NAMESPACE);
-
- /* Set up XPath context */
- xpctxt = tctxt->xpathCtxt;
-
- /* Resource limits to avoid timeouts and call stack overflows */
- xpctxt->maxParserDepth = 15;
- xpctxt->maxDepth = 100;
- xpctxt->opLimit = 500000;
-
- /* Test namespaces used in xpath.xml */
- xmlXPathRegisterNs(xpctxt, BAD_CAST "a", BAD_CAST "a");
- xmlXPathRegisterNs(xpctxt, BAD_CAST "b", BAD_CAST "b");
- xmlXPathRegisterNs(xpctxt, BAD_CAST "c", BAD_CAST "c");
-
- /* EXSLT namespaces */
- xmlXPathRegisterNs(xpctxt, BAD_CAST "crypto", EXSLT_CRYPTO_NAMESPACE);
- xmlXPathRegisterNs(xpctxt, BAD_CAST "date", EXSLT_DATE_NAMESPACE);
- xmlXPathRegisterNs(xpctxt, BAD_CAST "dyn", EXSLT_DYNAMIC_NAMESPACE);
- xmlXPathRegisterNs(xpctxt, BAD_CAST "exsl", EXSLT_COMMON_NAMESPACE);
- xmlXPathRegisterNs(xpctxt, BAD_CAST "math", EXSLT_MATH_NAMESPACE);
- xmlXPathRegisterNs(xpctxt, BAD_CAST "saxon", SAXON_NAMESPACE);
- xmlXPathRegisterNs(xpctxt, BAD_CAST "set", EXSLT_SETS_NAMESPACE);
- xmlXPathRegisterNs(xpctxt, BAD_CAST "str", EXSLT_STRINGS_NAMESPACE);
-
- /* Register variables */
- xmlXPathRegisterVariable(xpctxt, BAD_CAST "f", xmlXPathNewFloat(-1.5));
- xmlXPathRegisterVariable(xpctxt, BAD_CAST "b", xmlXPathNewBoolean(1));
- xmlXPathRegisterVariable(xpctxt, BAD_CAST "s",
- xmlXPathNewString(BAD_CAST "var"));
- xmlXPathRegisterVariable(
- xpctxt, BAD_CAST "n",
- xmlXPathEval(BAD_CAST "//node() | /*/*/namespace::*", xpctxt));
-
- return 0;
+LLVMFuzzerInitialize(int *argc_p, char ***argv_p) {
+ return xsltFuzzXPathInit(argc_p, argv_p, NULL);
}
int
LLVMFuzzerTestOneInput(const char *data, size_t size) {
- xmlXPathContextPtr xpctxt = tctxt->xpathCtxt;
- xmlChar *xpathExpr;
-
- /* Null-terminate */
- xpathExpr = malloc(size + 1);
- memcpy(xpathExpr, data, size);
- xpathExpr[size] = 0;
-
- /*
- * format-number() can still cause memory errors with invalid UTF-8 in
- * prefixes or suffixes. This shouldn't be exploitable in practice, but
- * should be fixed. Check UTF-8 validity for now.
- */
- if (xmlCheckUTF8(xpathExpr) == 0) {
- free(xpathExpr);
- return 0;
- }
-
- /* Compile and return early if the expression is invalid */
- xmlXPathCompExprPtr compExpr = xmlXPathCtxtCompile(xpctxt, xpathExpr);
- free(xpathExpr);
- if (compExpr == NULL)
- return 0;
-
- /* Initialize XPath evaluation context and evaluate */
- xpctxt->node = (xmlNodePtr) doc; /* Maybe test different context nodes? */
- xpctxt->contextSize = 1;
- xpctxt->proximityPosition = 1;
- xpctxt->opCount = 0;
- xmlXPathObjectPtr xpathObj = xmlXPathCompiledEval(compExpr, xpctxt);
- xmlXPathFreeObject(xpathObj);
- xmlXPathFreeCompExpr(compExpr);
-
- /* Some XSLT extension functions create RVTs. */
- xsltFreeRVTs(tctxt);
-
- /* Clean object cache */
- xmlXPathContextSetCache(xpctxt, 0, 0, 0);
- xmlXPathContextSetCache(xpctxt, 1, -1, 0);
-
- /* Clean dictionaries */
- if (xmlDictSize(tctxt->dict) > 0) {
- xmlDictFree(tctxt->dict);
- xmlDictFree(tctxt->style->dict);
- tctxt->style->dict = xmlDictCreate();
- tctxt->dict = xmlDictCreateSub(tctxt->style->dict);
- }
-
- /* Clean saxon:expression cache */
- if (xmlHashSize(saxonExtHash) > 0) {
- /* There doesn't seem to be a cheaper way with the public API. */
- xsltShutdownCtxtExts(tctxt);
- xsltInitCtxtExts(tctxt);
- saxonExtHash = (xmlHashTablePtr)
- xsltStyleGetExtData(tctxt->style, SAXON_NAMESPACE);
- }
+ xmlXPathObjectPtr xpathObj = xsltFuzzXPath(data, size);
+ xsltFuzzXPathFreeObject(xpathObj);
return 0;
}
diff --git a/tests/fuzz/xslt.c b/tests/fuzz/xslt.c
index 0d7bfebf..42ba7881 100644
--- a/tests/fuzz/xslt.c
+++ b/tests/fuzz/xslt.c
@@ -2,138 +2,20 @@
* xslt.c: libFuzzer target for XSLT stylesheets
*
* See Copyright for the status of this software.
- *
- * This is a rather naive fuzz target using a static XML document.
- *
- * TODO
- *
- * - Improve seed corpus
- * - Mutate multiple input documents: source, xsl:import, xsl:include
- * - format-number() with xsl:decimal-format
- * - Better coverage for xsl:key and key() function
- * - EXSLT func:function
- * - xsl:document
*/
-#include <libgen.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include <libxml/tree.h>
-#include <libxml/parser.h>
-#include <libxslt/security.h>
-#include <libxslt/transform.h>
-#include <libxslt/xslt.h>
-#include <libxslt/xsltInternals.h>
-#include <libxslt/xsltutils.h>
-#include <libexslt/exslt.h>
-
-static xmlDocPtr doc;
-static xsltSecurityPrefsPtr sec;
-
-static void
-errorFunc(void *ctx, const char *msg, ...) {
- /* Discard error messages. */
-}
+#include "fuzz.h"
+#include <libxml/globals.h>
int
-LLVMFuzzerInitialize(int *argc_p ATTRIBUTE_UNUSED,
- char ***argv_p ATTRIBUTE_UNUSED) {
- const char *xmlFilename = "xslt.xml";
- const char *dir;
- char *argv0;
- char *xmlPath;
-
- /* Init libraries */
- xmlInitParser();
- xmlXPathInit();
- xsltInit();
- exsltRegisterAll();
-
- /* Load XML document */
- argv0 = strdup((*argv_p)[0]);
- dir = dirname(argv0);
- xmlPath = malloc(strlen(dir) + 1 + strlen(xmlFilename) + 1);
- sprintf(xmlPath, "%s/%s", dir, xmlFilename);
- doc = xmlReadFile(xmlPath, NULL, 0);
- free(xmlPath);
- free(argv0);
- if (doc == NULL) {
- fprintf(stderr, "Error: unable to parse file \"%s\"\n", xmlPath);
- return -1;
- }
-
- /* Suppress error messages */
- xmlSetGenericErrorFunc(NULL, errorFunc);
- xsltSetGenericErrorFunc(NULL, errorFunc);
-
- /* Disallow I/O */
- sec = xsltNewSecurityPrefs();
- xsltSetSecurityPrefs(sec, XSLT_SECPREF_READ_FILE, xsltSecurityForbid);
- xsltSetSecurityPrefs(sec, XSLT_SECPREF_WRITE_FILE, xsltSecurityForbid);
- xsltSetSecurityPrefs(sec, XSLT_SECPREF_CREATE_DIRECTORY, xsltSecurityForbid);
- xsltSetSecurityPrefs(sec, XSLT_SECPREF_READ_NETWORK, xsltSecurityForbid);
- xsltSetSecurityPrefs(sec, XSLT_SECPREF_WRITE_NETWORK, xsltSecurityForbid);
-
- return 0;
-}
-
-static void
-xsltSetXPathResourceLimits(xmlXPathContextPtr ctxt) {
- ctxt->maxParserDepth = 15;
- ctxt->maxDepth = 100;
- ctxt->opLimit = 100000;
+LLVMFuzzerInitialize(int *argc_p, char ***argv_p) {
+ return xsltFuzzXsltInit(argc_p, argv_p, NULL);
}
int
LLVMFuzzerTestOneInput(const char *data, size_t size) {
- xmlDocPtr xsltDoc;
- xmlDocPtr result;
- xmlNodePtr xsltRoot;
- xsltStylesheetPtr sheet;
- xsltTransformContextPtr ctxt;
-
- xsltDoc = xmlReadMemory(data, size, NULL, NULL, 0);
- if (xsltDoc == NULL)
- return 0;
- xsltRoot = xmlDocGetRootElement(xsltDoc);
- xmlNewNs(xsltRoot, EXSLT_COMMON_NAMESPACE, BAD_CAST "exsl");
- xmlNewNs(xsltRoot, EXSLT_COMMON_NAMESPACE, BAD_CAST "exslt");
- xmlNewNs(xsltRoot, EXSLT_CRYPTO_NAMESPACE, BAD_CAST "crypto");
- xmlNewNs(xsltRoot, EXSLT_DATE_NAMESPACE, BAD_CAST "date");
- xmlNewNs(xsltRoot, EXSLT_DYNAMIC_NAMESPACE, BAD_CAST "dyn");
- xmlNewNs(xsltRoot, EXSLT_MATH_NAMESPACE, BAD_CAST "math");
- xmlNewNs(xsltRoot, EXSLT_SETS_NAMESPACE, BAD_CAST "set");
- xmlNewNs(xsltRoot, EXSLT_STRINGS_NAMESPACE, BAD_CAST "str");
- xmlNewNs(xsltRoot, SAXON_NAMESPACE, BAD_CAST "saxon");
-
- sheet = xsltNewStylesheet();
- if (sheet == NULL) {
- xmlFreeDoc(xsltDoc);
- return 0;
- }
- xsltSetXPathResourceLimits(sheet->xpathCtxt);
- sheet->xpathCtxt->opCount = 0;
- if (xsltParseStylesheetUser(sheet, xsltDoc) != 0) {
- xsltFreeStylesheet(sheet);
- xmlFreeDoc(xsltDoc);
- return 0;
- }
-
- ctxt = xsltNewTransformContext(sheet, doc);
- xsltSetCtxtSecurityPrefs(sec, ctxt);
- ctxt->maxTemplateDepth = 100;
- ctxt->opLimit = 200000;
- xsltSetXPathResourceLimits(ctxt->xpathCtxt);
- ctxt->xpathCtxt->opCount = sheet->xpathCtxt->opCount;
-
- result = xsltApplyStylesheetUser(sheet, doc, NULL, NULL, NULL, ctxt);
-
- xmlFreeDoc(result);
- xsltFreeTransformContext(ctxt);
- xsltFreeStylesheet(sheet);
+ xmlChar *result = xsltFuzzXslt(data, size);
+ xmlFree(result);
return 0;
}
-