summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Wellnhofer <wellnhofer@aevum.de>2020-06-15 12:20:54 +0200
committerNick Wellnhofer <wellnhofer@aevum.de>2020-06-15 15:23:38 +0200
commit536f421d37470b726f023dc568d461443a1382bd (patch)
tree765a230dc39f253e017a84eab2291003a595a2e6
parenta697ed1e24234a9e6a4a4639555dcca230f752c1 (diff)
downloadlibxml2-536f421d37470b726f023dc568d461443a1382bd.tar.gz
Fuzz target for HTML parser
-rw-r--r--fuzz/.gitignore1
-rw-r--r--fuzz/Makefile.am13
-rw-r--r--fuzz/fuzz.c17
-rw-r--r--fuzz/fuzz.h3
-rw-r--r--fuzz/html.c70
-rw-r--r--fuzz/html.dict107
-rw-r--r--fuzz/html.options2
7 files changed, 212 insertions, 1 deletions
diff --git a/fuzz/.gitignore b/fuzz/.gitignore
index 28b71084..178a6592 100644
--- a/fuzz/.gitignore
+++ b/fuzz/.gitignore
@@ -1,4 +1,5 @@
corpus/
+html
regexp
seed/xml*
testFuzzer
diff --git a/fuzz/Makefile.am b/fuzz/Makefile.am
index 0e7391ba..a286f867 100644
--- a/fuzz/Makefile.am
+++ b/fuzz/Makefile.am
@@ -1,4 +1,4 @@
-EXTRA_PROGRAMS = regexp uri xml xmlSeed
+EXTRA_PROGRAMS = html regexp uri xml xmlSeed
check_PROGRAMS = testFuzzer
CLEANFILES = $(EXTRA_PROGRAMS)
AM_CPPFLAGS = -I$(top_srcdir)/include
@@ -52,6 +52,17 @@ fuzz-xml: xml$(EXEEXT) seed/xml.stamp
-timeout=20 \
corpus/xml seed/xml
+html_SOURCES = html.c fuzz.c
+html_LDFLAGS = -fsanitize=fuzzer
+
+fuzz-html: html$(EXEEXT)
+ @mkdir -p corpus/html
+ ./html$(EXEEXT) \
+ -dict=html.dict \
+ -max_len=1000000 \
+ -timeout=20 \
+ corpus/html $(top_srcdir)/test/HTML
+
regexp_SOURCES = regexp.c fuzz.c
regexp_LDFLAGS = -fsanitize=fuzzer
diff --git a/fuzz/fuzz.c b/fuzz/fuzz.c
index 6955f280..57770c53 100644
--- a/fuzz/fuzz.c
+++ b/fuzz/fuzz.c
@@ -105,6 +105,23 @@ xmlFuzzReadInt() {
}
/**
+ * xmlFuzzReadRemaining:
+ * @size: size of string in bytes
+ *
+ * Read remaining bytes from fuzz data.
+ */
+const char *
+xmlFuzzReadRemaining(size_t *size) {
+ const char *ret = fuzzData.ptr;
+
+ *size = fuzzData.remaining;
+ fuzzData.ptr += fuzzData.remaining;
+ fuzzData.remaining = 0;
+
+ return(ret);
+}
+
+/**
* xmlFuzzReadString:
* @size: size of string in bytes
*
diff --git a/fuzz/fuzz.h b/fuzz/fuzz.h
index 1093be14..35702855 100644
--- a/fuzz/fuzz.h
+++ b/fuzz/fuzz.h
@@ -33,6 +33,9 @@ xmlFuzzDataCleanup(void);
int
xmlFuzzReadInt(void);
+const char *
+xmlFuzzReadRemaining(size_t *size);
+
void
xmlFuzzReadEntities(void);
diff --git a/fuzz/html.c b/fuzz/html.c
new file mode 100644
index 00000000..d212c1f0
--- /dev/null
+++ b/fuzz/html.c
@@ -0,0 +1,70 @@
+/*
+ * html.c: a libFuzzer target to test several HTML parser interfaces.
+ *
+ * See Copyright for the status of this software.
+ */
+
+#include <libxml/HTMLparser.h>
+#include <libxml/HTMLtree.h>
+#include "fuzz.h"
+
+int
+LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED,
+ char ***argv ATTRIBUTE_UNUSED) {
+ xmlInitParser();
+ xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
+
+ return 0;
+}
+
+int
+LLVMFuzzerTestOneInput(const char *data, size_t size) {
+ static const size_t maxChunkSize = 128;
+ htmlDocPtr doc;
+ htmlParserCtxtPtr ctxt;
+ xmlChar *out;
+ const char *docBuffer;
+ size_t docSize, consumed, chunkSize;
+ int opts, outSize;
+
+ xmlFuzzDataInit(data, size);
+ opts = xmlFuzzReadInt();
+
+ docBuffer = xmlFuzzReadRemaining(&docSize);
+ if (docBuffer == NULL) {
+ xmlFuzzDataCleanup();
+ return(0);
+ }
+
+ /* Pull parser */
+
+ doc = htmlReadMemory(docBuffer, docSize, NULL, NULL, opts);
+ /* Also test the serializer. */
+ htmlDocDumpMemory(doc, &out, &outSize);
+ xmlFree(out);
+ xmlFreeDoc(doc);
+
+ /* Push parser */
+
+ ctxt = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL,
+ XML_CHAR_ENCODING_NONE);
+ htmlCtxtUseOptions(ctxt, opts);
+
+ for (consumed = 0; consumed < docSize; consumed += chunkSize) {
+ chunkSize = docSize - consumed;
+ if (chunkSize > maxChunkSize)
+ chunkSize = maxChunkSize;
+ htmlParseChunk(ctxt, docBuffer + consumed, chunkSize, 0);
+ }
+
+ htmlParseChunk(ctxt, NULL, 0, 1);
+ xmlFreeDoc(ctxt->myDoc);
+ htmlFreeParserCtxt(ctxt);
+
+ /* Cleanup */
+
+ xmlFuzzDataCleanup();
+
+ return(0);
+}
+
diff --git a/fuzz/html.dict b/fuzz/html.dict
new file mode 100644
index 00000000..9f58ed1e
--- /dev/null
+++ b/fuzz/html.dict
@@ -0,0 +1,107 @@
+elem_a="<a></a>"
+elem_abbr="<abbr></abbr>"
+elem_acronym="<acronym></acronym>"
+elem_address="<address></address>"
+elem_applet="<applet></applet>"
+elem_area="<area>"
+elem_b="<b></b>"
+elem_base="<base>"
+elem_basefont="<basefont>"
+elem_bdo="<bdo></bdo>"
+elem_big="<big></big>"
+elem_blockquote="<blockquote></blockquote>"
+elem_body="<body></body>"
+elem_br="<br>"
+elem_button="<button></button>"
+elem_caption="<caption></caption>"
+elem_center="<center></center>"
+elem_cite="<cite></cite>"
+elem_code="<code></code>"
+elem_col="<col>"
+elem_colgroup="<colgroup></colgroup>"
+elem_dd="<dd></dd>"
+elem_del="<del></del>"
+elem_dfn="<dfn></dfn>"
+elem_dir="<dir></dir>"
+elem_div="<div></div>"
+elem_dl="<dl></dl>"
+elem_dt="<dt></dt>"
+elem_em="<em></em>"
+elem_embed="<embed></embed>"
+elem_fieldset="<fieldset></fieldset>"
+elem_font="<font></font>"
+elem_form="<form></form>"
+elem_frame="<frame>"
+elem_frameset="<frameset></frameset>"
+elem_h1="<h1></h1>"
+elem_h2="<h2></h2>"
+elem_h3="<h3></h3>"
+elem_h4="<h4></h4>"
+elem_h5="<h5></h5>"
+elem_h6="<h6></h6>"
+elem_head="<head></head>"
+elem_hr="<hr>"
+elem_html="<html></html>"
+elem_i="<i></i>"
+elem_iframe="<iframe></iframe>"
+elem_img="<img>"
+elem_input="<input>"
+elem_ins="<ins></ins>"
+elem_isindex="<isindex>"
+elem_kbd="<kbd></kbd>"
+elem_label="<label></label>"
+elem_legend="<legend></legend>"
+elem_li="<li></li>"
+elem_link="<link>"
+elem_map="<map></map>"
+elem_menu="<menu></menu>"
+elem_meta="<meta>"
+elem_noframes="<noframes></noframes>"
+elem_noscript="<noscript></noscript>"
+elem_object="<object></object>"
+elem_ol="<ol></ol>"
+elem_optgroup="<optgroup></optgroup>"
+elem_option="<option></option>"
+elem_p="<p></p>"
+elem_param="<param>"
+elem_pre="<pre></pre>"
+elem_q="<q></q>"
+elem_s="<s></s>"
+elem_samp="<samp></samp>"
+elem_script="<script></script>"
+elem_select="<select></select>"
+elem_small="<small></small>"
+elem_span="<span></span>"
+elem_strike="<strike></strike>"
+elem_strong="<strong></strong>"
+elem_style="<style></style>"
+elem_sub="<sub></sub>"
+elem_sup="<sup></sup>"
+elem_table="<table></table>"
+elem_tbody="<tbody></tbody>"
+elem_td="<td></td>"
+elem_textarea="<textarea></textarea>"
+elem_tfoot="<tfoot></tfoot>"
+elem_th="<th></th>"
+elem_thead="<thead></thead>"
+elem_title="<title></title>"
+elem_tr="<tr></tr>"
+elem_tt="<tt></tt>"
+elem_u="<u></u>"
+elem_ul="<ul></ul>"
+elem_var="<var></var>"
+
+attr_id=" id=\"\""
+attr_style=" style=\"\""
+
+comment="<!-- -->"
+
+pi="<?a?>"
+
+ref_lt="&lt;"
+ref_gt="&gt;"
+ref_amp="&amp;"
+ref_apos="&apos;"
+ref_quot="&quot;"
+ref_dec="&#9;"
+ref_hex="&#xA;"
diff --git a/fuzz/html.options b/fuzz/html.options
new file mode 100644
index 00000000..e5ae71b9
--- /dev/null
+++ b/fuzz/html.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+max_len = 1000000