summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/test/fuzz/fuzz_util.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/test/fuzz/fuzz_util.c')
-rw-r--r--src/third_party/wiredtiger/test/fuzz/fuzz_util.c173
1 files changed, 173 insertions, 0 deletions
diff --git a/src/third_party/wiredtiger/test/fuzz/fuzz_util.c b/src/third_party/wiredtiger/test/fuzz/fuzz_util.c
new file mode 100644
index 00000000000..b6db660b0d8
--- /dev/null
+++ b/src/third_party/wiredtiger/test/fuzz/fuzz_util.c
@@ -0,0 +1,173 @@
+/*-
+ * Public Domain 2014-2020 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "fuzz_util.h"
+
+#include <assert.h>
+#include <stdint.h>
+#include <stddef.h>
+
+FUZZ_GLOBAL_STATE fuzz_state = {.conn = NULL, .session = NULL};
+
+/*
+ * fuzzutil_generate_home_name --
+ * Create a unique home directory per worker that LibFuzzer creates.
+ */
+static void
+fuzzutil_generate_home_name(char *buf)
+{
+ pid_t pid;
+
+ /*
+ * Once we're exercising the fuzz target, we don't know which worker we are. An easy way to make
+ * sure that workers don't clobber each other's home directories is to tack the process ID on
+ * the end of the name.
+ */
+ pid = getpid();
+ sprintf(buf, "WT_TEST_%d", pid);
+}
+
+/*
+ * fuzzutil_setup --
+ * Initialize the connection and session the first time LibFuzzer executes the target.
+ */
+void
+fuzzutil_setup(void)
+{
+ char home[100];
+
+ if (fuzz_state.conn != NULL) {
+ assert(fuzz_state.session != NULL);
+ return;
+ }
+
+ WT_CLEAR(home);
+ fuzzutil_generate_home_name(home);
+ testutil_make_work_dir(home);
+ testutil_check(wiredtiger_open(home, NULL, "create,cache_size=5MB", &fuzz_state.conn));
+ testutil_check(fuzz_state.conn->open_session(fuzz_state.conn, NULL, NULL, &fuzz_state.session));
+}
+
+/*
+ * fuzzutil_sliced_input_init --
+ * Often, our fuzz target requires multiple inputs. For example, for configuration parsing we'd
+ * need a configuration string and a key to search for. We can do this by requiring the fuzzer
+ * to provide data with a number of arbitrary multi-byte separators. If you have a sentinel
+ * character, you can use that, otherwise patterns like 0xdeadbeef work fine too but make the
+ * corpus less readable. If the fuzzer doesn't supply data in that format, we can return out of
+ * the fuzz target. While our fuzz target will reject lots of input to begin with, the fuzzer
+ * will figure out that inputs with these separators yield better coverage and will craft more
+ * sensible inputs over time. This is what the sliced input component is designed for. It takes
+ * the data input, a separator and the number of slices that it should expect and populates a
+ * heap allocated array of data pointers to each separate input and their respective size.
+ */
+bool
+fuzzutil_sliced_input_init(FUZZ_SLICED_INPUT *input, const uint8_t *data, size_t size,
+ const uint8_t *sep, size_t sep_size, size_t req_slices)
+{
+ size_t *sizes;
+ const uint8_t *begin, *end, *pos, **slices;
+ u_int i;
+
+ pos = NULL;
+ i = 0;
+ begin = data;
+ end = data + size;
+
+ /*
+ * It might be better to do an initial pass to check that we have the right number of separators
+ * before actually storing them. Currently, we're dynamically allocating even in the case of
+ * invalid input.
+ */
+ slices = malloc(sizeof(uint8_t *) * req_slices);
+ sizes = malloc(sizeof(size_t) * req_slices);
+ if (slices == NULL || sizes == NULL)
+ goto err;
+
+ /*
+ * Store pointers and sizes for each slice of the input. This code is implementing the idea
+ * described at:
+ * https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#magic-separator.
+ */
+ while ((pos = memmem(begin, (size_t)(end - begin), sep, sep_size)) != NULL) {
+ if (i >= req_slices)
+ goto err;
+ slices[i] = begin;
+ sizes[i] = (size_t)(pos - begin);
+ begin = pos + sep_size;
+ ++i;
+ }
+ if (begin < end) {
+ if (i >= req_slices)
+ goto err;
+ slices[i] = begin;
+ sizes[i] = (size_t)(end - begin);
+ ++i;
+ }
+ if (i != req_slices)
+ goto err;
+ input->slices = slices;
+ input->sizes = sizes;
+ input->num_slices = req_slices;
+ return (true);
+
+err:
+ free(slices);
+ free(sizes);
+ return (false);
+}
+
+/*
+ * fuzzutil_sliced_input_free --
+ * Free any resources on the sliced input.
+ */
+void
+fuzzutil_sliced_input_free(FUZZ_SLICED_INPUT *input)
+{
+ free(input->slices);
+ free(input->sizes);
+ input->slices = NULL;
+ input->sizes = NULL;
+}
+
+/*
+ * fuzzutil_slice_to_cstring --
+ * A conversion function to help convert from a data, size pair to a cstring.
+ */
+char *
+fuzzutil_slice_to_cstring(const uint8_t *data, size_t size)
+{
+ char *str;
+
+ str = malloc(size + 1);
+ if (str == NULL)
+ return NULL;
+ memcpy(str, data, size);
+ str[size] = '\0';
+
+ return (str);
+}