path: root/test/csuite/wt2909_checkpoint_integrity/main.c
diff options
Diffstat (limited to 'test/csuite/wt2909_checkpoint_integrity/main.c')
1 files changed, 666 insertions, 0 deletions
diff --git a/test/csuite/wt2909_checkpoint_integrity/main.c b/test/csuite/wt2909_checkpoint_integrity/main.c
new file mode 100644
index 00000000000..ddf249fb406
--- /dev/null
+++ b/test/csuite/wt2909_checkpoint_integrity/main.c
@@ -0,0 +1,666 @@
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ */
+#include "test_util.h"
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+ * JIRA ticket reference: WT-2909
+ * Test case description:
+ *
+ * This test attempts to check the integrity of checkpoints by injecting
+ * failures (by means of a custom file system) and then trying to recover. To
+ * insulate the top level program from various crashes that may occur when
+ * injecting failures, the "populate" code runs in another process, and is
+ * expected to sometimes fail. Then the top level program runs recovery (with
+ * the normal file system) and checks the results. Any failure at the top level
+ * indicates a checkpoint integrity problem.
+ *
+ * Each subtest uses the same kind of schema and data, the only variance is
+ * when the faults are injected. At the moment, this test only injects during
+ * checkpoints, and only injects write failures. It varies in the number of
+ * successful writes that occur before an injected failure (during a checkpoint
+ * operation), this can be indicated with "-o N". When N is not specified, the
+ * test attempts to find the optimal range of N for testing. Clearly when N is
+ * large, then the checkpoint may be successfully written, and the data
+ * represented by the checkpoint will be fully present. When N is small,
+ * nothing of interest is written and no data is present. To find the sweet
+ * spot where interesting failures occur, the test does a binary search to find
+ * the approximate N that divides the "small" and "large" cases. This is not
+ * strictly deterministic, a given N may give different results on different
+ * runs. But approximate optimal N can be determined, allowing a series of
+ * additional tests clustered around this N.
+ *
+ * The data is stored in two tables, one having indices. Both tables have
+ * the same keys and are updated with the same key in a single transaction.
+ *
+ * Failure mode:
+ * If one table is out of step with the other, that is detected as a failure at
+ * the top level. If an index is missing values (or has extra values), that is
+ * likewise a failure at the top level. If the tables or the home directory
+ * cannot be opened, that is a top level error. The tables must be present
+ * as an initial checkpoint is done without any injected fault.
+ */
+ * This program does not run on Windows. The non-portable aspects at minimum
+ * are fork/exec the use of environment variables (used by fail_fs), and file
+ * name and build locations of dynamically loaded libraries.
+ */
+#define BIG_SIZE (1024 * 10)
+#define BIG_CONTENTS "<Big String Contents>"
+#define MAX_ARGS 20
+#define MAX_OP_RANGE 1000
+#define STDERR_FILE "stderr.txt"
+#define STDOUT_FILE "stdout.txt"
+#define VERBOSE_PRINT 10000
+static int check_results(TEST_OPTS *, uint64_t *);
+static void check_values(WT_CURSOR *, int, int, int, char *);
+static int create_big_string(char **);
+static void cursor_count_items(WT_CURSOR *, uint64_t *);
+static void disable_failures(void);
+static void enable_failures(uint64_t, uint64_t);
+static void generate_key(uint64_t, int *);
+static void generate_value(uint32_t, uint64_t, char *, int *, int *, int *,
+ char **);
+static void run_check_subtest(TEST_OPTS *, const char *, uint64_t, bool,
+ uint64_t *);
+static void run_check_subtest_range(TEST_OPTS *, const char *, bool);
+static int run_process(TEST_OPTS *, const char *, char *[], int *);
+static int subtest_main(int, char *[], bool);
+static void subtest_populate(TEST_OPTS *, bool);
+int main(int, char *[]);
+extern int __wt_optind;
+#define WT_FAIL_FS_LIB "../../ext/test/fail_fs/.libs/"
+ * check_results --
+ * Check all the tables and verify the results.
+ */
+static int
+check_results(TEST_OPTS *opts, uint64_t *foundp)
+ WT_CURSOR *maincur, *maincur2, *v0cur, *v1cur, *v2cur;
+ WT_SESSION *session;
+ uint64_t count, idxcount, nrecords;
+ uint32_t rndint;
+ int key, key_got, ret, v0, v1, v2;
+ char *bigref, *big;
+ testutil_check(create_big_string(&bigref));
+ nrecords = opts->nrecords;
+ testutil_check(wiredtiger_open(opts->home, NULL,
+ "create,log=(enabled)", &opts->conn));
+ testutil_check(
+ opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(session, "table:subtest", NULL,
+ NULL, &maincur));
+ testutil_check(session->open_cursor(session, "table:subtest2", NULL,
+ NULL, &maincur2));
+ testutil_check(session->open_cursor(session, "index:subtest:v0", NULL,
+ NULL, &v0cur));
+ testutil_check(session->open_cursor(session, "index:subtest:v1", NULL,
+ NULL, &v1cur));
+ testutil_check(session->open_cursor(session, "index:subtest:v2", NULL,
+ NULL, &v2cur));
+ count = 0;
+ while ((ret = maincur->next(maincur)) == 0) {
+ testutil_check(maincur2->next(maincur2));
+ testutil_check(maincur2->get_key(maincur2, &key_got));
+ testutil_check(maincur2->get_value(maincur2, &rndint));
+ generate_key(count, &key);
+ generate_value(rndint, count, bigref, &v0, &v1, &v2, &big);
+ testutil_assert(key == key_got);
+ /* Check the key/values in main table. */
+ testutil_check(maincur->get_key(maincur, &key_got));
+ testutil_assert(key == key_got);
+ check_values(maincur, v0, v1, v2, big);
+ /* Check the values in the indices. */
+ v0cur->set_key(v0cur, v0);
+ testutil_check(v0cur->search(v0cur));
+ check_values(v0cur, v0, v1, v2, big);
+ v1cur->set_key(v1cur, v1);
+ testutil_check(v1cur->search(v1cur));
+ check_values(v1cur, v0, v1, v2, big);
+ v2cur->set_key(v2cur, v2);
+ testutil_check(v2cur->search(v2cur));
+ check_values(v2cur, v0, v1, v2, big);
+ count++;
+ if (count % VERBOSE_PRINT == 0 && opts->verbose)
+ printf("checked %" PRIu64 "/%" PRIu64 "\n", count,
+ nrecords);
+ }
+ if (count % VERBOSE_PRINT != 0 && opts->verbose)
+ printf("checked %" PRIu64 "/%" PRIu64 "\n", count, nrecords);
+ /*
+ * Always expect at least one entry, as populate does a
+ * checkpoint after the first insert.
+ */
+ testutil_assert(count > 0);
+ testutil_assert(ret == WT_NOTFOUND);
+ testutil_assert(maincur2->next(maincur2) == WT_NOTFOUND);
+ cursor_count_items(v0cur, &idxcount);
+ testutil_assert(count == idxcount);
+ cursor_count_items(v1cur, &idxcount);
+ testutil_assert(count == idxcount);
+ cursor_count_items(v2cur, &idxcount);
+ testutil_assert(count == idxcount);
+ testutil_check(opts->conn->close(opts->conn, NULL));
+ opts->conn = NULL;
+ free(bigref);
+ *foundp = count;
+ return (0);
+ * check_values --
+ * Check that the values in the cursor match the given values.
+ */
+static void
+check_values(WT_CURSOR *cursor, int v0, int v1, int v2, char *big)
+ int v0_got, v1_got, v2_got;
+ char *big_got;
+ testutil_check(cursor->get_value(cursor, &v0_got, &v1_got, &v2_got,
+ &big_got));
+ testutil_assert(v0 == v0_got);
+ testutil_assert(v1 == v1_got);
+ testutil_assert(v2 == v2_got);
+ testutil_assert(strcmp(big, big_got) == 0);
+ * create_big_string --
+ * Create and fill the "reference" big array.
+ */
+static int create_big_string(char **bigp)
+ size_t i, mod;
+ char *big;
+ if ((big = malloc(BIG_SIZE + 1)) == NULL)
+ return (ENOMEM);
+ mod = strlen(BIG_CONTENTS);
+ for (i = 0; i < BIG_SIZE; i++) {
+ big[i] = BIG_CONTENTS[i % mod];
+ }
+ big[BIG_SIZE] = '\0';
+ *bigp = big;
+ return (0);
+ * cursor_count_items --
+ * Count the number of items in the table by traversing
+ * through the cursor.
+ */
+static void
+cursor_count_items(WT_CURSOR *cursor, uint64_t *countp)
+ int ret;
+ *countp = 0;
+ testutil_check(cursor->reset(cursor));
+ while ((ret = cursor->next(cursor)) == 0)
+ (*countp)++;
+ testutil_assert(ret == WT_NOTFOUND);
+ * disable_failures --
+ * Disable failures in the fail file system.
+ */
+static void
+ testutil_check(setenv("WT_FAIL_FS_ENABLE", "0", 1));
+ * enable_failures --
+ * Enable failures in the fail file system.
+ */
+static void
+enable_failures(uint64_t allow_writes, uint64_t allow_reads)
+ char value[100];
+ testutil_check(setenv("WT_FAIL_FS_ENABLE", "1", 1));
+ snprintf(value, sizeof(value), "%" PRIu64, allow_writes);
+ testutil_check(setenv("WT_FAIL_FS_WRITE_ALLOW", value, 1));
+ snprintf(value, sizeof(value), "%" PRIu64, allow_reads);
+ testutil_check(setenv("WT_FAIL_FS_READ_ALLOW", value, 1));
+ * generate_key --
+ * Generate a key used by the "subtest" and "subtest2" tables.
+ */
+static void
+generate_key(uint64_t i, int *keyp)
+ *keyp = (int)i;
+ * generate_value --
+ * Generate values for the "subtest" table.
+ */
+static void
+generate_value(uint32_t rndint, uint64_t i, char *bigref,
+ int *v0p, int *v1p, int *v2p, char **bigp)
+ *v0p = (int)(i * 7);
+ *v1p = (int)(i * 10007);
+ *v2p = (int)(i * 100000007);
+ *bigp = &bigref[rndint % BIG_SIZE];
+ * run_check_subtest --
+ * Run the subtest with the given parameters and check the results.
+ */
+static void
+run_check_subtest(TEST_OPTS *opts, const char *debugger, uint64_t nops,
+ bool close_test, uint64_t *nresultsp)
+ int estatus, narg;
+ char rarg[20], sarg[20], *subtest_args[MAX_ARGS];
+ narg = 0;
+ if (debugger != NULL) {
+ subtest_args[narg++] = (char *)debugger;
+ subtest_args[narg++] = (char *)"--";
+ }
+ subtest_args[narg++] = (char *)opts->progname;
+ /* "subtest" must appear before arguments */
+ if (close_test)
+ subtest_args[narg++] = (char *)"subtest_close";
+ else
+ subtest_args[narg++] = (char *)"subtest";
+ subtest_args[narg++] = (char *)"-h";
+ subtest_args[narg++] = opts->home;
+ subtest_args[narg++] = (char *)"-v"; /* subtest is always verbose */
+ subtest_args[narg++] = (char *)"-p";
+ subtest_args[narg++] = (char *)"-o";
+ snprintf(sarg, sizeof(sarg), "%" PRIu64, nops);
+ subtest_args[narg++] = sarg; /* number of operations */
+ subtest_args[narg++] = (char *)"-n";
+ snprintf(rarg, sizeof(rarg), "%" PRIu64, opts->nrecords);
+ subtest_args[narg++] = rarg; /* number of records */
+ subtest_args[narg++] = NULL;
+ testutil_assert(narg <= MAX_ARGS);
+ if (opts->verbose)
+ printf("running a separate process with %" PRIu64
+ " operations until fail...\n", nops);
+ testutil_clean_work_dir(opts->home);
+ testutil_check(run_process(
+ opts, debugger != NULL ? debugger : opts->progname,
+ subtest_args, &estatus));
+ if (opts->verbose)
+ printf("process exited %d\n", estatus);
+ /*
+ * Verify results in parent process.
+ */
+ testutil_check(check_results(opts, nresultsp));
+ * run_check_subtest_range --
+ *
+ * Run successive tests via binary search that determines the approximate
+ * crossover point between when data is recoverable or not. Once that is
+ * determined, run the subtest in a range near that crossover point.
+ *
+ * The theory is that running at the crossover point will tend to trigger
+ * "interesting" failures at the borderline when the checkpoint is about to,
+ * or has, succeeded. If any of those failures creates a WT home directory
+ * that cannot be recovered, the top level test will fail.
+ */
+static void
+run_check_subtest_range(TEST_OPTS *opts, const char *debugger, bool close_test)
+ uint64_t cutoff, high, low, mid, nops, nresults;
+ int i;
+ bool got_failure, got_success;
+ if (opts->verbose)
+ printf("Determining best range of operations until failure, "
+ "with close_test %s.\n",
+ (close_test ? "enabled" : "disabled"));
+ run_check_subtest(opts, debugger, 1, close_test, &cutoff);
+ low = 0;
+ high = MAX_OP_RANGE;
+ mid = (low + high) / 2;
+ while (mid != low) {
+ run_check_subtest(opts, debugger, mid, close_test,
+ &nresults);
+ if (nresults > cutoff)
+ high = mid;
+ else
+ low = mid;
+ mid = (low + high) / 2;
+ }
+ /*
+ * mid is the number of ops that is the crossover point.
+ * Run some tests near that point to try to trigger weird
+ * failures. If mid is too low or too high, it indicates
+ * there is a fundamental problem with the test.
+ */
+ testutil_assert(mid > 1 && mid < MAX_OP_RANGE - 1);
+ if (opts->verbose)
+ printf("Retesting around %" PRIu64 " operations.\n",
+ mid);
+ got_failure = false;
+ got_success = false;
+ for (nops = mid - 10; nops < mid + 10; nops++) {
+ for (i = 0; i < TESTS_PER_OP_VALUE; i++) {
+ run_check_subtest(opts, debugger, nops,
+ close_test, &nresults);
+ if (nresults > cutoff)
+ got_failure = true;
+ else
+ got_success = true;
+ }
+ }
+ /*
+ * Check that it really ran with a crossover point.
+ */
+ testutil_assert(got_failure);
+ testutil_assert(got_success);
+ * run_process --
+ * Run a program with arguments, wait until it completes.
+ */
+static int
+run_process(TEST_OPTS *opts, const char *prog, char *argv[], int *status)
+ int pid;
+ char **arg;
+ if (opts->verbose) {
+ printf("running: ");
+ for (arg = argv; *arg != NULL; arg++)
+ printf("%s ", *arg);
+ printf("\n");
+ }
+ if ((pid = fork()) == 0) {
+ (void)execv(prog, argv);
+ testutil_die(errno, "%s", prog);
+ } else if (pid < 0)
+ return (errno);
+ (void)waitpid(pid, status, 0);
+ return (0);
+ * subtest_main --
+ * The main program for the subtest
+ */
+static int
+subtest_main(int argc, char *argv[], bool close_test)
+ TEST_OPTS *opts, _opts;
+ WT_SESSION *session;
+ char config[1024], filename[1024];
+ struct rlimit rlim;
+ if (testutil_disable_long_tests())
+ return (0);
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ memset(&rlim, 0, sizeof(rlim));
+ /* No core files during fault injection tests. */
+ testutil_check(setrlimit(RLIMIT_CORE, &rlim));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+ /* Redirect stderr, stdout. */
+ sprintf(filename, "%s/%s", opts->home, STDERR_FILE);
+ testutil_assert(freopen(filename, "a", stderr) != NULL);
+ sprintf(filename, "%s/%s", opts->home, STDOUT_FILE);
+ testutil_assert(freopen(filename, "a", stdout) != NULL);
+ snprintf(config, sizeof(config),
+ "create,cache_size=250M,log=(enabled),"
+ "transaction_sync=(enabled,method=none),extensions=("
+ "=(early_load,config={environment=true,verbose=true})]");
+ testutil_check(wiredtiger_open(opts->home, NULL, config, &opts->conn));
+ testutil_check(
+ opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(session->create(session, "table:subtest",
+ "key_format=i,value_format=iiiS,"
+ "columns=(id,v0,v1,v2,big)"));
+ testutil_check(session->create(session, "table:subtest2",
+ "key_format=i,value_format=i"));
+ testutil_check(session->create(session, "index:subtest:v0",
+ "columns=(v0)"));
+ testutil_check(session->create(session, "index:subtest:v1",
+ "columns=(v1)"));
+ testutil_check(session->create(session, "index:subtest:v2",
+ "columns=(v2)"));
+ testutil_check(session->close(session, NULL));
+ subtest_populate(opts, close_test);
+ testutil_cleanup(opts);
+ return (0);
+ * This macro is used as a substitute for testutil_check, except that it is
+ * aware of when a failure may be expected due to the effects of the fail_fs.
+ * This macro is used only in subtest_populate(), it uses local variables.
+ */
+#define CHECK(expr) { \
+ int _ret; \
+ _ret = expr; \
+ if (_ret != 0) { \
+ if (!failmode || \
+ (_ret != WT_RUN_RECOVERY && _ret != EIO)) { \
+ fprintf(stderr, " BAD RETURN %d for \"%s\"\n", \
+ _ret, #expr); \
+ testutil_check(_ret); \
+ } else \
+ failed = true; \
+ } \
+ * subtest_populate --
+ * Populate the tables.
+ */
+static void
+subtest_populate(TEST_OPTS *opts, bool close_test)
+ WT_CURSOR *maincur, *maincur2;
+ WT_SESSION *session;
+ uint64_t i, nrecords;
+ uint32_t rndint;
+ int key, v0, v1, v2;
+ char *big, *bigref;
+ bool failed, failmode;
+ failmode = failed = false;
+ __wt_random_init_seed(NULL, &rnd);
+ CHECK(create_big_string(&bigref));
+ nrecords = opts->nrecords;
+ CHECK(opts->conn->open_session(
+ opts->conn, NULL, NULL, &session));
+ CHECK(session->open_cursor(session, "table:subtest", NULL,
+ NULL, &maincur));
+ CHECK(session->open_cursor(session, "table:subtest2", NULL,
+ NULL, &maincur2));
+ for (i = 0; i < nrecords && !failed; i++) {
+ rndint = __wt_random(&rnd);
+ generate_key(i, &key);
+ generate_value(rndint, i, bigref, &v0, &v1, &v2, &big);
+ CHECK(session->begin_transaction(session, NULL));
+ maincur->set_key(maincur, key);
+ maincur->set_value(maincur, v0, v1, v2, big);
+ CHECK(maincur->insert(maincur));
+ maincur2->set_key(maincur2, key);
+ maincur2->set_value(maincur2, rndint);
+ CHECK(maincur2->insert(maincur2));
+ CHECK(session->commit_transaction(session, NULL));
+ if (i == 0)
+ /*
+ * Force an initial checkpoint, that helps to
+ * distinguish a clear failure from just not running
+ * long enough.
+ */
+ CHECK(session->checkpoint(session, NULL));
+ if ((i + 1) % VERBOSE_PRINT == 0 && opts->verbose)
+ printf(" %" PRIu64 "/%" PRIu64 "\n",
+ (i + 1), nrecords);
+ /* Attempt to isolate the failures to checkpointing. */
+ if (i == (nrecords/100)) {
+ enable_failures(opts->nops, 1000000);
+ failmode = true; /* CHECK should expect failures. */
+ CHECK(session->checkpoint(session, NULL));
+ failmode = false;
+ disable_failures();
+ if (failed && opts->verbose)
+ printf("checkpoint failed (expected).\n");
+ }
+ }
+ /*
+ * Closing handles after an extreme fail is likely to cause
+ * cascading failures (or crashes), so recommended practice is
+ * to immediately exit. We're interested in testing both with
+ * and without the recommended practice.
+ */
+ if (failed) {
+ if (!close_test) {
+ fprintf(stderr, "exit early.\n");
+ exit(0);
+ } else
+ fprintf(stderr, "closing after failure.\n");
+ }
+ free(bigref);
+ CHECK(maincur->close(maincur));
+ CHECK(maincur2->close(maincur2));
+ CHECK(session->close(session, NULL));
+ * main --
+ * The main program for the test. When invoked with "subtest"
+ * argument, run the subtest. Otherwise, run a separate process
+ * for each needed subtest, and check the results.
+ */
+main(int argc, char *argv[])
+ TEST_OPTS *opts, _opts;
+ uint64_t nresults;
+ const char *debugger;
+ if (testutil_disable_long_tests())
+ return (0);
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ debugger = NULL;
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ argc -= __wt_optind;
+ argv += __wt_optind;
+ if (opts->nrecords == 0)
+ opts->nrecords = 50000;
+ while (argc > 0) {
+ if (strcmp(argv[0], "subtest") == 0)
+ return (subtest_main(argc, argv, false));
+ else if (strcmp(argv[0], "subtest_close") == 0)
+ return (subtest_main(argc, argv, true));
+ else if (strcmp(argv[0], "gdb") == 0)
+ debugger = "/usr/bin/gdb";
+ else
+ testutil_assert(false);
+ argc--;
+ argv++;
+ }
+ if (opts->verbose) {
+ printf("Number of operations until failure: %" PRIu64
+ " (change with -o N)\n", opts->nops);
+ printf("Number of records: %" PRIu64
+ " (change with -n N)\n", opts->nrecords);
+ }
+ if (opts->nops == 0) {
+ run_check_subtest_range(opts, debugger, false);
+ run_check_subtest_range(opts, debugger, true);
+ } else
+ run_check_subtest(opts, debugger, opts->nops,
+ opts->nrecords, &nresults);
+ testutil_clean_work_dir(opts->home);
+ testutil_cleanup(opts);
+ return (0);