summaryrefslogtreecommitdiff
path: root/tests/fuzz/fuzz.c
blob: 0ef89db5144c1657e584f49a5ce8fb2a1c6f7512 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
/*
 * fuzz.c: Fuzz targets for libxslt
 *
 * See Copyright for the status of this software.
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include "fuzz.h"

#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/xpath.h>
#include <libxml/xpathInternals.h>
#include <libxslt/extensions.h>
#include <libxslt/functions.h>
#include <libxslt/security.h>
#include <libxslt/transform.h>
#include <libxslt/xslt.h>
#include <libxslt/xsltInternals.h>
#include <libxslt/xsltutils.h>
#include <libexslt/exslt.h>

#if defined(_WIN32)
  #define DIR_SEP '\\'
#else
  #define DIR_SEP '/'
#endif

static xmlDocPtr doc;
static xsltSecurityPrefsPtr sec;
static xsltTransformContextPtr tctxt;
static xmlHashTablePtr saxonExtHash;

static void
xsltFuzzErrorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED,
                  ...) {
}

static void
xsltFuzzInit(void) {
    /* Init libxml2, libxslt and libexslt */
    xmlInitParser();
    xmlXPathInit();
    xsltInit();
    exsltRegisterAll();

    /* Suppress error messages */
    xmlSetGenericErrorFunc(NULL, xsltFuzzErrorFunc);
    xsltSetGenericErrorFunc(NULL, xsltFuzzErrorFunc);

    /* Disallow I/O */
    sec = xsltNewSecurityPrefs();
    xsltSetSecurityPrefs(sec, XSLT_SECPREF_READ_FILE, xsltSecurityForbid);
    xsltSetSecurityPrefs(sec, XSLT_SECPREF_WRITE_FILE, xsltSecurityForbid);
    xsltSetSecurityPrefs(sec, XSLT_SECPREF_CREATE_DIRECTORY, xsltSecurityForbid);
    xsltSetSecurityPrefs(sec, XSLT_SECPREF_READ_NETWORK, xsltSecurityForbid);
    xsltSetSecurityPrefs(sec, XSLT_SECPREF_WRITE_NETWORK, xsltSecurityForbid);
}

static xmlDocPtr
xsltFuzzLoadDoc(const char *argv0, const char *dir, const char *filename) {
    char *path;

    if (dir != NULL) {
        path = malloc(strlen(dir) + 1 + strlen(filename) + 1);
        sprintf(path, "%s/%s", dir, filename);
    } else {
        const char *end;
        size_t dirLen;

        end = strrchr(argv0, DIR_SEP);
        dirLen = (end == NULL) ? 0 : end - argv0 + 1;
        path = malloc(dirLen + strlen(filename) + 1);
        memcpy(path, argv0, dirLen);
        path[dirLen] = '\0';
        strcat(path, filename);
    }

    doc = xmlReadFile(path, NULL, 0);
    if (doc == NULL)
        fprintf(stderr, "Error: unable to parse file '%s'\n", path);
    free(path);

    return doc;
}

/* XPath fuzzer
 *
 * This fuzz target parses and evaluates XPath expressions in an (E)XSLT
 * context using a static XML document. It heavily exercises the libxml2
 * XPath engine (xpath.c), a few other parts of libxml2, and most of
 * libexslt.
 *
 * Some EXSLT functions need the transform context to create RVTs for
 * node-sets. A couple of functions also access the stylesheet. The
 * XPath context from the transform context is used to parse and
 * evaluate expressions.
 *
 * All these objects are created once at startup. After fuzzing each input,
 * they're reset as cheaply as possible.
 *
 * TODO
 *
 * - Some expressions can create lots of temporary node sets (RVTs) which
 *   aren't freed until the whole expression was evaluated, leading to
 *   extensive memory usage. Cleaning them up earlier would require
 *   callbacks from the XPath engine, for example after evaluating a
 *   predicate expression, which doesn't seem feasible. Terminating the
 *   evaluation after creating a certain number of RVTs is a simple
 *   workaround.
 * - Register a custom xsl:decimal-format declaration for format-number().
 * - Some functions add strings to the stylesheet or transform context
 *   dictionary, for example via xsltGetQName, requiring a clean up of the
 *   dicts after fuzzing each input. This behavior seems questionable.
 *   Extension functions shouldn't needlessly modify the transform context
 *   or stylesheet.
 * - Register xsl:keys and fuzz the key() function.
 * - Add a few custom func:functions.
 * - Fuzz the document() function with external documents.
 */

int
xsltFuzzXPathInit(int *argc_p ATTRIBUTE_UNUSED, char ***argv_p,
                  const char *dir) {
    const char *xmlFilename = "xpath.xml";
    xsltStylesheetPtr style;
    xmlXPathContextPtr xpctxt;

    xsltFuzzInit();

    /* Load XML document */
    doc = xsltFuzzLoadDoc((*argv_p)[0], dir, xmlFilename);
    if (doc == NULL)
        return -1;

    style = xsltNewStylesheet();
    tctxt = xsltNewTransformContext(style, doc);
    xsltSetCtxtSecurityPrefs(sec, tctxt);

    /*
     * Some extension functions need the current instruction.
     *
     * - format-number() for namespaces.
     * - document() for the base URL.
     * - maybe others?
     *
     * For fuzzing, it's enough to use the source document's root element.
     */
    tctxt->inst = xmlDocGetRootElement(doc);

    saxonExtHash = (xmlHashTablePtr)
        xsltStyleGetExtData(style, SAXON_NAMESPACE);

    /* Set up XPath context */
    xpctxt = tctxt->xpathCtxt;

    /* Resource limits to avoid timeouts and call stack overflows */
    xpctxt->maxParserDepth = 15;
    xpctxt->maxDepth = 100;
    xpctxt->opLimit = 500000;

    /* Test namespaces used in xpath.xml */
    xmlXPathRegisterNs(xpctxt, BAD_CAST "a", BAD_CAST "a");
    xmlXPathRegisterNs(xpctxt, BAD_CAST "b", BAD_CAST "b");
    xmlXPathRegisterNs(xpctxt, BAD_CAST "c", BAD_CAST "c");

    /* EXSLT namespaces */
    xmlXPathRegisterNs(xpctxt, BAD_CAST "crypto", EXSLT_CRYPTO_NAMESPACE);
    xmlXPathRegisterNs(xpctxt, BAD_CAST "date", EXSLT_DATE_NAMESPACE);
    xmlXPathRegisterNs(xpctxt, BAD_CAST "dyn", EXSLT_DYNAMIC_NAMESPACE);
    xmlXPathRegisterNs(xpctxt, BAD_CAST "exsl", EXSLT_COMMON_NAMESPACE);
    xmlXPathRegisterNs(xpctxt, BAD_CAST "math", EXSLT_MATH_NAMESPACE);
    xmlXPathRegisterNs(xpctxt, BAD_CAST "saxon", SAXON_NAMESPACE);
    xmlXPathRegisterNs(xpctxt, BAD_CAST "set", EXSLT_SETS_NAMESPACE);
    xmlXPathRegisterNs(xpctxt, BAD_CAST "str", EXSLT_STRINGS_NAMESPACE);

    /* Register variables */
    xmlXPathRegisterVariable(xpctxt, BAD_CAST "f", xmlXPathNewFloat(-1.5));
    xmlXPathRegisterVariable(xpctxt, BAD_CAST "b", xmlXPathNewBoolean(1));
    xmlXPathRegisterVariable(xpctxt, BAD_CAST "s",
                             xmlXPathNewString(BAD_CAST "var"));
    xmlXPathRegisterVariable(
            xpctxt, BAD_CAST "n",
            xmlXPathEval(BAD_CAST "//node() | /*/*/namespace::*", xpctxt));

    return 0;
}

xmlXPathObjectPtr
xsltFuzzXPath(const char *data, size_t size) {
    xmlXPathContextPtr xpctxt = tctxt->xpathCtxt;
    xmlChar *xpathExpr;

    /* Null-terminate */
    xpathExpr = malloc(size + 1);
    memcpy(xpathExpr, data, size);
    xpathExpr[size] = 0;

    /*
     * format-number() can still cause memory errors with invalid UTF-8 in
     * prefixes or suffixes. This shouldn't be exploitable in practice, but
     * should be fixed. Check UTF-8 validity for now.
     */
    if (xmlCheckUTF8(xpathExpr) == 0) {
        free(xpathExpr);
        return NULL;
    }

    /* Compile and return early if the expression is invalid */
    xmlXPathCompExprPtr compExpr = xmlXPathCtxtCompile(xpctxt, xpathExpr);
    free(xpathExpr);
    if (compExpr == NULL)
        return NULL;

    /* Initialize XPath evaluation context and evaluate */
    xpctxt->node = (xmlNodePtr) doc; /* Maybe test different context nodes? */
    xpctxt->contextSize = 1;
    xpctxt->proximityPosition = 1;
    xpctxt->opCount = 0;
    xmlXPathObjectPtr xpathObj = xmlXPathCompiledEval(compExpr, xpctxt);
    xmlXPathFreeCompExpr(compExpr);

    /* Clean object cache */
    xmlXPathContextSetCache(xpctxt, 0, 0, 0);
    xmlXPathContextSetCache(xpctxt, 1, -1, 0);

    /* Clean dictionaries */
    if (xmlDictSize(tctxt->dict) > 0) {
        xmlDictFree(tctxt->dict);
        xmlDictFree(tctxt->style->dict);
        tctxt->style->dict = xmlDictCreate();
        tctxt->dict = xmlDictCreateSub(tctxt->style->dict);
    }

    /* Clean saxon:expression cache */
    if (xmlHashSize(saxonExtHash) > 0) {
        /* There doesn't seem to be a cheaper way with the public API. */
        xsltShutdownCtxtExts(tctxt);
        xsltInitCtxtExts(tctxt);
        saxonExtHash = (xmlHashTablePtr)
            xsltStyleGetExtData(tctxt->style, SAXON_NAMESPACE);
    }

    return xpathObj;
}

void
xsltFuzzXPathFreeObject(xmlXPathObjectPtr obj) {
    xmlXPathFreeObject(obj);

    /* Some XSLT extension functions create RVTs. */
    xsltFreeRVTs(tctxt);
}

void
xsltFuzzXPathCleanup(void) {
    xsltStylesheetPtr style = tctxt->style;

    xmlXPathRegisteredNsCleanup(tctxt->xpathCtxt);
    xsltFreeSecurityPrefs(sec);
    sec = NULL;
    xsltFreeTransformContext(tctxt);
    tctxt = NULL;
    xsltFreeStylesheet(style);
    style = NULL;
    xmlFreeDoc(doc);
    doc = NULL;
}

/*
 * XSLT fuzzer
 *
 * This is a rather naive fuzz target using a static XML document.
 *
 * TODO
 *
 * - Improve seed corpus
 * - Mutate multiple input documents: source, xsl:import, xsl:include
 * - format-number() with xsl:decimal-format
 * - Better coverage for xsl:key and key() function
 * - EXSLT func:function
 * - xsl:document
 */

int
xsltFuzzXsltInit(int *argc_p ATTRIBUTE_UNUSED, char ***argv_p,
                 const char *dir) {
    const char *xmlFilename = "xslt.xml";

    xsltFuzzInit();

    /* Load XML document */
    doc = xsltFuzzLoadDoc((*argv_p)[0], dir, xmlFilename);
    if (doc == NULL)
        return -1;

    return 0;
}

static void
xsltSetXPathResourceLimits(xmlXPathContextPtr ctxt) {
    ctxt->maxParserDepth = 15;
    ctxt->maxDepth = 100;
    ctxt->opLimit = 100000;
}

xmlChar *
xsltFuzzXslt(const char *data, size_t size) {
    xmlDocPtr xsltDoc;
    xmlDocPtr result;
    xmlNodePtr xsltRoot;
    xsltStylesheetPtr sheet;
    xsltTransformContextPtr ctxt;
    xmlChar *ret = NULL;
    int retLen;

    xsltDoc = xmlReadMemory(data, size, NULL, NULL, 0);
    if (xsltDoc == NULL)
        return NULL;
    xsltRoot = xmlDocGetRootElement(xsltDoc);
    xmlNewNs(xsltRoot, EXSLT_COMMON_NAMESPACE, BAD_CAST "exsl");
    xmlNewNs(xsltRoot, EXSLT_COMMON_NAMESPACE, BAD_CAST "exslt");
    xmlNewNs(xsltRoot, EXSLT_CRYPTO_NAMESPACE, BAD_CAST "crypto");
    xmlNewNs(xsltRoot, EXSLT_DATE_NAMESPACE, BAD_CAST "date");
    xmlNewNs(xsltRoot, EXSLT_DYNAMIC_NAMESPACE, BAD_CAST "dyn");
    xmlNewNs(xsltRoot, EXSLT_MATH_NAMESPACE, BAD_CAST "math");
    xmlNewNs(xsltRoot, EXSLT_SETS_NAMESPACE, BAD_CAST "set");
    xmlNewNs(xsltRoot, EXSLT_STRINGS_NAMESPACE, BAD_CAST "str");
    xmlNewNs(xsltRoot, SAXON_NAMESPACE, BAD_CAST "saxon");

    sheet = xsltNewStylesheet();
    if (sheet == NULL) {
        xmlFreeDoc(xsltDoc);
        return NULL;
    }
    xsltSetXPathResourceLimits(sheet->xpathCtxt);
    sheet->xpathCtxt->opCount = 0;
    if (xsltParseStylesheetUser(sheet, xsltDoc) != 0) {
        xsltFreeStylesheet(sheet);
        xmlFreeDoc(xsltDoc);
        return NULL;
    }

    ctxt = xsltNewTransformContext(sheet, doc);
    xsltSetCtxtSecurityPrefs(sec, ctxt);
    ctxt->maxTemplateDepth = 100;
    ctxt->opLimit = 20000;
    xsltSetXPathResourceLimits(ctxt->xpathCtxt);
    ctxt->xpathCtxt->opCount = sheet->xpathCtxt->opCount;

    result = xsltApplyStylesheetUser(sheet, doc, NULL, NULL, NULL, ctxt);
    if (result != NULL)
        xsltSaveResultToString(&ret, &retLen, result, sheet);

    xmlFreeDoc(result);
    xsltFreeTransformContext(ctxt);
    xsltFreeStylesheet(sheet);

    return ret;
}

void
xsltFuzzXsltCleanup(void) {
    xsltFreeSecurityPrefs(sec);
    sec = NULL;
    xmlFreeDoc(doc);
    doc = NULL;
}