diff options
Diffstat (limited to 'test/csuite/wt2909_checkpoint_integrity/main.c')
-rw-r--r-- | test/csuite/wt2909_checkpoint_integrity/main.c | 666 |
1 files changed, 666 insertions, 0 deletions
diff --git a/test/csuite/wt2909_checkpoint_integrity/main.c b/test/csuite/wt2909_checkpoint_integrity/main.c new file mode 100644 index 00000000000..ddf249fb406 --- /dev/null +++ b/test/csuite/wt2909_checkpoint_integrity/main.c @@ -0,0 +1,666 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/wait.h> + +/* + * JIRA ticket reference: WT-2909 + * Test case description: + * + * This test attempts to check the integrity of checkpoints by injecting + * failures (by means of a custom file system) and then trying to recover. To + * insulate the top level program from various crashes that may occur when + * injecting failures, the "populate" code runs in another process, and is + * expected to sometimes fail. Then the top level program runs recovery (with + * the normal file system) and checks the results. Any failure at the top level + * indicates a checkpoint integrity problem. + * + * Each subtest uses the same kind of schema and data, the only variance is + * when the faults are injected. At the moment, this test only injects during + * checkpoints, and only injects write failures. It varies in the number of + * successful writes that occur before an injected failure (during a checkpoint + * operation), this can be indicated with "-o N". When N is not specified, the + * test attempts to find the optimal range of N for testing. Clearly when N is + * large, then the checkpoint may be successfully written, and the data + * represented by the checkpoint will be fully present. When N is small, + * nothing of interest is written and no data is present. To find the sweet + * spot where interesting failures occur, the test does a binary search to find + * the approximate N that divides the "small" and "large" cases. This is not + * strictly deterministic, a given N may give different results on different + * runs. But approximate optimal N can be determined, allowing a series of + * additional tests clustered around this N. + * + * The data is stored in two tables, one having indices. Both tables have + * the same keys and are updated with the same key in a single transaction. + * + * Failure mode: + * If one table is out of step with the other, that is detected as a failure at + * the top level. If an index is missing values (or has extra values), that is + * likewise a failure at the top level. If the tables or the home directory + * cannot be opened, that is a top level error. The tables must be present + * as an initial checkpoint is done without any injected fault. + */ + +/* + * This program does not run on Windows. The non-portable aspects at minimum + * are fork/exec the use of environment variables (used by fail_fs), and file + * name and build locations of dynamically loaded libraries. + */ +#define BIG_SIZE (1024 * 10) +#define BIG_CONTENTS "<Big String Contents>" +#define MAX_ARGS 20 +#define MAX_OP_RANGE 1000 +#define STDERR_FILE "stderr.txt" +#define STDOUT_FILE "stdout.txt" +#define TESTS_PER_OP_VALUE 3 +#define VERBOSE_PRINT 10000 + +static int check_results(TEST_OPTS *, uint64_t *); +static void check_values(WT_CURSOR *, int, int, int, char *); +static int create_big_string(char **); +static void cursor_count_items(WT_CURSOR *, uint64_t *); +static void disable_failures(void); +static void enable_failures(uint64_t, uint64_t); +static void generate_key(uint64_t, int *); +static void generate_value(uint32_t, uint64_t, char *, int *, int *, int *, + char **); +static void run_check_subtest(TEST_OPTS *, const char *, uint64_t, bool, + uint64_t *); +static void run_check_subtest_range(TEST_OPTS *, const char *, bool); +static int run_process(TEST_OPTS *, const char *, char *[], int *); +static int subtest_main(int, char *[], bool); +static void subtest_populate(TEST_OPTS *, bool); +int main(int, char *[]); + +extern int __wt_optind; + +#define WT_FAIL_FS_LIB "../../ext/test/fail_fs/.libs/libwiredtiger_fail_fs.so" + +/* + * check_results -- + * Check all the tables and verify the results. + */ +static int +check_results(TEST_OPTS *opts, uint64_t *foundp) +{ + WT_CURSOR *maincur, *maincur2, *v0cur, *v1cur, *v2cur; + WT_SESSION *session; + uint64_t count, idxcount, nrecords; + uint32_t rndint; + int key, key_got, ret, v0, v1, v2; + char *bigref, *big; + + testutil_check(create_big_string(&bigref)); + nrecords = opts->nrecords; + testutil_check(wiredtiger_open(opts->home, NULL, + "create,log=(enabled)", &opts->conn)); + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + + testutil_check(session->open_cursor(session, "table:subtest", NULL, + NULL, &maincur)); + testutil_check(session->open_cursor(session, "table:subtest2", NULL, + NULL, &maincur2)); + testutil_check(session->open_cursor(session, "index:subtest:v0", NULL, + NULL, &v0cur)); + testutil_check(session->open_cursor(session, "index:subtest:v1", NULL, + NULL, &v1cur)); + testutil_check(session->open_cursor(session, "index:subtest:v2", NULL, + NULL, &v2cur)); + + count = 0; + while ((ret = maincur->next(maincur)) == 0) { + testutil_check(maincur2->next(maincur2)); + testutil_check(maincur2->get_key(maincur2, &key_got)); + testutil_check(maincur2->get_value(maincur2, &rndint)); + + generate_key(count, &key); + generate_value(rndint, count, bigref, &v0, &v1, &v2, &big); + testutil_assert(key == key_got); + + /* Check the key/values in main table. */ + testutil_check(maincur->get_key(maincur, &key_got)); + testutil_assert(key == key_got); + check_values(maincur, v0, v1, v2, big); + + /* Check the values in the indices. */ + v0cur->set_key(v0cur, v0); + testutil_check(v0cur->search(v0cur)); + check_values(v0cur, v0, v1, v2, big); + v1cur->set_key(v1cur, v1); + testutil_check(v1cur->search(v1cur)); + check_values(v1cur, v0, v1, v2, big); + v2cur->set_key(v2cur, v2); + testutil_check(v2cur->search(v2cur)); + check_values(v2cur, v0, v1, v2, big); + + count++; + if (count % VERBOSE_PRINT == 0 && opts->verbose) + printf("checked %" PRIu64 "/%" PRIu64 "\n", count, + nrecords); + } + if (count % VERBOSE_PRINT != 0 && opts->verbose) + printf("checked %" PRIu64 "/%" PRIu64 "\n", count, nrecords); + + /* + * Always expect at least one entry, as populate does a + * checkpoint after the first insert. + */ + testutil_assert(count > 0); + testutil_assert(ret == WT_NOTFOUND); + testutil_assert(maincur2->next(maincur2) == WT_NOTFOUND); + cursor_count_items(v0cur, &idxcount); + testutil_assert(count == idxcount); + cursor_count_items(v1cur, &idxcount); + testutil_assert(count == idxcount); + cursor_count_items(v2cur, &idxcount); + testutil_assert(count == idxcount); + + testutil_check(opts->conn->close(opts->conn, NULL)); + opts->conn = NULL; + + free(bigref); + *foundp = count; + return (0); +} + +/* + * check_values -- + * Check that the values in the cursor match the given values. + */ +static void +check_values(WT_CURSOR *cursor, int v0, int v1, int v2, char *big) +{ + int v0_got, v1_got, v2_got; + char *big_got; + + testutil_check(cursor->get_value(cursor, &v0_got, &v1_got, &v2_got, + &big_got)); + testutil_assert(v0 == v0_got); + testutil_assert(v1 == v1_got); + testutil_assert(v2 == v2_got); + testutil_assert(strcmp(big, big_got) == 0); +} + +/* + * create_big_string -- + * Create and fill the "reference" big array. + */ +static int create_big_string(char **bigp) +{ + size_t i, mod; + char *big; + + if ((big = malloc(BIG_SIZE + 1)) == NULL) + return (ENOMEM); + mod = strlen(BIG_CONTENTS); + for (i = 0; i < BIG_SIZE; i++) { + big[i] = BIG_CONTENTS[i % mod]; + } + big[BIG_SIZE] = '\0'; + *bigp = big; + return (0); +} + +/* + * cursor_count_items -- + * Count the number of items in the table by traversing + * through the cursor. + */ +static void +cursor_count_items(WT_CURSOR *cursor, uint64_t *countp) +{ + int ret; + + *countp = 0; + + testutil_check(cursor->reset(cursor)); + while ((ret = cursor->next(cursor)) == 0) + (*countp)++; + testutil_assert(ret == WT_NOTFOUND); +} + +/* + * disable_failures -- + * Disable failures in the fail file system. + */ +static void +disable_failures(void) +{ + testutil_check(setenv("WT_FAIL_FS_ENABLE", "0", 1)); +} + +/* + * enable_failures -- + * Enable failures in the fail file system. + */ +static void +enable_failures(uint64_t allow_writes, uint64_t allow_reads) +{ + char value[100]; + + testutil_check(setenv("WT_FAIL_FS_ENABLE", "1", 1)); + snprintf(value, sizeof(value), "%" PRIu64, allow_writes); + testutil_check(setenv("WT_FAIL_FS_WRITE_ALLOW", value, 1)); + snprintf(value, sizeof(value), "%" PRIu64, allow_reads); + testutil_check(setenv("WT_FAIL_FS_READ_ALLOW", value, 1)); +} + +/* + * generate_key -- + * Generate a key used by the "subtest" and "subtest2" tables. + */ +static void +generate_key(uint64_t i, int *keyp) +{ + *keyp = (int)i; +} + +/* + * generate_value -- + * Generate values for the "subtest" table. + */ +static void +generate_value(uint32_t rndint, uint64_t i, char *bigref, + int *v0p, int *v1p, int *v2p, char **bigp) +{ + *v0p = (int)(i * 7); + *v1p = (int)(i * 10007); + *v2p = (int)(i * 100000007); + *bigp = &bigref[rndint % BIG_SIZE]; +} + +/* + * run_check_subtest -- + * Run the subtest with the given parameters and check the results. + */ +static void +run_check_subtest(TEST_OPTS *opts, const char *debugger, uint64_t nops, + bool close_test, uint64_t *nresultsp) +{ + int estatus, narg; + char rarg[20], sarg[20], *subtest_args[MAX_ARGS]; + + narg = 0; + if (debugger != NULL) { + subtest_args[narg++] = (char *)debugger; + subtest_args[narg++] = (char *)"--"; + } + + subtest_args[narg++] = (char *)opts->progname; + /* "subtest" must appear before arguments */ + if (close_test) + subtest_args[narg++] = (char *)"subtest_close"; + else + subtest_args[narg++] = (char *)"subtest"; + subtest_args[narg++] = (char *)"-h"; + subtest_args[narg++] = opts->home; + subtest_args[narg++] = (char *)"-v"; /* subtest is always verbose */ + subtest_args[narg++] = (char *)"-p"; + subtest_args[narg++] = (char *)"-o"; + snprintf(sarg, sizeof(sarg), "%" PRIu64, nops); + subtest_args[narg++] = sarg; /* number of operations */ + subtest_args[narg++] = (char *)"-n"; + snprintf(rarg, sizeof(rarg), "%" PRIu64, opts->nrecords); + subtest_args[narg++] = rarg; /* number of records */ + subtest_args[narg++] = NULL; + testutil_assert(narg <= MAX_ARGS); + if (opts->verbose) + printf("running a separate process with %" PRIu64 + " operations until fail...\n", nops); + testutil_clean_work_dir(opts->home); + testutil_check(run_process( + opts, debugger != NULL ? debugger : opts->progname, + subtest_args, &estatus)); + if (opts->verbose) + printf("process exited %d\n", estatus); + + /* + * Verify results in parent process. + */ + testutil_check(check_results(opts, nresultsp)); +} + +/* + * run_check_subtest_range -- + * + * Run successive tests via binary search that determines the approximate + * crossover point between when data is recoverable or not. Once that is + * determined, run the subtest in a range near that crossover point. + * + * The theory is that running at the crossover point will tend to trigger + * "interesting" failures at the borderline when the checkpoint is about to, + * or has, succeeded. If any of those failures creates a WT home directory + * that cannot be recovered, the top level test will fail. + */ +static void +run_check_subtest_range(TEST_OPTS *opts, const char *debugger, bool close_test) +{ + uint64_t cutoff, high, low, mid, nops, nresults; + int i; + bool got_failure, got_success; + + if (opts->verbose) + printf("Determining best range of operations until failure, " + "with close_test %s.\n", + (close_test ? "enabled" : "disabled")); + + run_check_subtest(opts, debugger, 1, close_test, &cutoff); + low = 0; + high = MAX_OP_RANGE; + mid = (low + high) / 2; + while (mid != low) { + run_check_subtest(opts, debugger, mid, close_test, + &nresults); + if (nresults > cutoff) + high = mid; + else + low = mid; + mid = (low + high) / 2; + } + /* + * mid is the number of ops that is the crossover point. + * Run some tests near that point to try to trigger weird + * failures. If mid is too low or too high, it indicates + * there is a fundamental problem with the test. + */ + testutil_assert(mid > 1 && mid < MAX_OP_RANGE - 1); + if (opts->verbose) + printf("Retesting around %" PRIu64 " operations.\n", + mid); + + got_failure = false; + got_success = false; + for (nops = mid - 10; nops < mid + 10; nops++) { + for (i = 0; i < TESTS_PER_OP_VALUE; i++) { + run_check_subtest(opts, debugger, nops, + close_test, &nresults); + if (nresults > cutoff) + got_failure = true; + else + got_success = true; + } + } + /* + * Check that it really ran with a crossover point. + */ + testutil_assert(got_failure); + testutil_assert(got_success); +} + +/* + * run_process -- + * Run a program with arguments, wait until it completes. + */ +static int +run_process(TEST_OPTS *opts, const char *prog, char *argv[], int *status) +{ + int pid; + char **arg; + + if (opts->verbose) { + printf("running: "); + for (arg = argv; *arg != NULL; arg++) + printf("%s ", *arg); + printf("\n"); + } + if ((pid = fork()) == 0) { + (void)execv(prog, argv); + testutil_die(errno, "%s", prog); + } else if (pid < 0) + return (errno); + + (void)waitpid(pid, status, 0); + return (0); +} + +/* + * subtest_main -- + * The main program for the subtest + */ +static int +subtest_main(int argc, char *argv[], bool close_test) +{ + TEST_OPTS *opts, _opts; + WT_SESSION *session; + char config[1024], filename[1024]; + struct rlimit rlim; + + if (testutil_disable_long_tests()) + return (0); + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + memset(&rlim, 0, sizeof(rlim)); + + /* No core files during fault injection tests. */ + testutil_check(setrlimit(RLIMIT_CORE, &rlim)); + testutil_check(testutil_parse_opts(argc, argv, opts)); + testutil_make_work_dir(opts->home); + + /* Redirect stderr, stdout. */ + sprintf(filename, "%s/%s", opts->home, STDERR_FILE); + testutil_assert(freopen(filename, "a", stderr) != NULL); + sprintf(filename, "%s/%s", opts->home, STDOUT_FILE); + testutil_assert(freopen(filename, "a", stdout) != NULL); + snprintf(config, sizeof(config), + "create,cache_size=250M,log=(enabled)," + "transaction_sync=(enabled,method=none),extensions=(" + WT_FAIL_FS_LIB + "=(early_load,config={environment=true,verbose=true})]"); + + testutil_check(wiredtiger_open(opts->home, NULL, config, &opts->conn)); + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + + testutil_check(session->create(session, "table:subtest", + "key_format=i,value_format=iiiS," + "columns=(id,v0,v1,v2,big)")); + + testutil_check(session->create(session, "table:subtest2", + "key_format=i,value_format=i")); + + testutil_check(session->create(session, "index:subtest:v0", + "columns=(v0)")); + testutil_check(session->create(session, "index:subtest:v1", + "columns=(v1)")); + testutil_check(session->create(session, "index:subtest:v2", + "columns=(v2)")); + + testutil_check(session->close(session, NULL)); + + subtest_populate(opts, close_test); + + testutil_cleanup(opts); + + return (0); +} + +/* + * This macro is used as a substitute for testutil_check, except that it is + * aware of when a failure may be expected due to the effects of the fail_fs. + * This macro is used only in subtest_populate(), it uses local variables. + */ +#define CHECK(expr) { \ + int _ret; \ + _ret = expr; \ + if (_ret != 0) { \ + if (!failmode || \ + (_ret != WT_RUN_RECOVERY && _ret != EIO)) { \ + fprintf(stderr, " BAD RETURN %d for \"%s\"\n", \ + _ret, #expr); \ + testutil_check(_ret); \ + } else \ + failed = true; \ + } \ +} + +/* + * subtest_populate -- + * Populate the tables. + */ +static void +subtest_populate(TEST_OPTS *opts, bool close_test) +{ + WT_CURSOR *maincur, *maincur2; + WT_RAND_STATE rnd; + WT_SESSION *session; + uint64_t i, nrecords; + uint32_t rndint; + int key, v0, v1, v2; + char *big, *bigref; + bool failed, failmode; + + failmode = failed = false; + __wt_random_init_seed(NULL, &rnd); + CHECK(create_big_string(&bigref)); + nrecords = opts->nrecords; + + CHECK(opts->conn->open_session( + opts->conn, NULL, NULL, &session)); + + CHECK(session->open_cursor(session, "table:subtest", NULL, + NULL, &maincur)); + + CHECK(session->open_cursor(session, "table:subtest2", NULL, + NULL, &maincur2)); + + for (i = 0; i < nrecords && !failed; i++) { + rndint = __wt_random(&rnd); + generate_key(i, &key); + generate_value(rndint, i, bigref, &v0, &v1, &v2, &big); + CHECK(session->begin_transaction(session, NULL)); + maincur->set_key(maincur, key); + maincur->set_value(maincur, v0, v1, v2, big); + CHECK(maincur->insert(maincur)); + + maincur2->set_key(maincur2, key); + maincur2->set_value(maincur2, rndint); + CHECK(maincur2->insert(maincur2)); + CHECK(session->commit_transaction(session, NULL)); + + if (i == 0) + /* + * Force an initial checkpoint, that helps to + * distinguish a clear failure from just not running + * long enough. + */ + CHECK(session->checkpoint(session, NULL)); + + if ((i + 1) % VERBOSE_PRINT == 0 && opts->verbose) + printf(" %" PRIu64 "/%" PRIu64 "\n", + (i + 1), nrecords); + /* Attempt to isolate the failures to checkpointing. */ + if (i == (nrecords/100)) { + enable_failures(opts->nops, 1000000); + failmode = true; /* CHECK should expect failures. */ + CHECK(session->checkpoint(session, NULL)); + failmode = false; + disable_failures(); + if (failed && opts->verbose) + printf("checkpoint failed (expected).\n"); + } + } + + /* + * Closing handles after an extreme fail is likely to cause + * cascading failures (or crashes), so recommended practice is + * to immediately exit. We're interested in testing both with + * and without the recommended practice. + */ + if (failed) { + if (!close_test) { + fprintf(stderr, "exit early.\n"); + exit(0); + } else + fprintf(stderr, "closing after failure.\n"); + } + + free(bigref); + CHECK(maincur->close(maincur)); + CHECK(maincur2->close(maincur2)); + CHECK(session->close(session, NULL)); +} + +/* + * main -- + * The main program for the test. When invoked with "subtest" + * argument, run the subtest. Otherwise, run a separate process + * for each needed subtest, and check the results. + */ +int +main(int argc, char *argv[]) +{ + TEST_OPTS *opts, _opts; + uint64_t nresults; + const char *debugger; + + if (testutil_disable_long_tests()) + return (0); + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + debugger = NULL; + + testutil_check(testutil_parse_opts(argc, argv, opts)); + argc -= __wt_optind; + argv += __wt_optind; + if (opts->nrecords == 0) + opts->nrecords = 50000; + + while (argc > 0) { + if (strcmp(argv[0], "subtest") == 0) + return (subtest_main(argc, argv, false)); + else if (strcmp(argv[0], "subtest_close") == 0) + return (subtest_main(argc, argv, true)); + else if (strcmp(argv[0], "gdb") == 0) + debugger = "/usr/bin/gdb"; + else + testutil_assert(false); + argc--; + argv++; + } + if (opts->verbose) { + printf("Number of operations until failure: %" PRIu64 + " (change with -o N)\n", opts->nops); + printf("Number of records: %" PRIu64 + " (change with -n N)\n", opts->nrecords); + } + if (opts->nops == 0) { + run_check_subtest_range(opts, debugger, false); + run_check_subtest_range(opts, debugger, true); + } else + run_check_subtest(opts, debugger, opts->nops, + opts->nrecords, &nresults); + + testutil_clean_work_dir(opts->home); + testutil_cleanup(opts); + + return (0); +} |