diff options
author | Sulabh Mahajan <sulabh.mahajan@mongodb.com> | 2023-04-17 11:09:14 +1000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-04-17 01:57:50 +0000 |
commit | 044d9cfb46dfc8a9577ca9e84908aa251cf6e587 (patch) | |
tree | 56be102eb14ef79096e1cb52adbecfea1d87de9d | |
parent | b38874f5754958cc44e07b03a6a651df0cdd0f89 (diff) | |
download | mongo-044d9cfb46dfc8a9577ca9e84908aa251cf6e587.tar.gz |
Import wiredtiger: 723fc71346d686d39d0eb3eac96e618c95eab928 from branch mongodb-master
ref: b3e1507ca3..723fc71346
for: 7.0.0-rc0
WT-9914 test tiered: Convert test/checkpoint to do predictable replay
-rw-r--r-- | src/third_party/wiredtiger/dist/s_string.ok | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/import.data | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/test/checkpoint/checkpointer.c | 166 | ||||
-rw-r--r-- | src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c | 93 | ||||
-rw-r--r-- | src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h | 28 | ||||
-rw-r--r-- | src/third_party/wiredtiger/test/checkpoint/workers.c | 82 | ||||
-rwxr-xr-x | src/third_party/wiredtiger/test/evergreen.yml | 75 |
7 files changed, 367 insertions, 81 deletions
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index ae85c26c330..10bd3eb55ed 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -100,7 +100,7 @@ Decrypt DeleteFileW Deterministically Dk -DmpvXx +DmpRvXx Dxxx EACCES EB diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index f4f7e4276d4..8c90c3bb1d2 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-master", - "commit": "b3e1507ca31173e2d19ebfb647096231b7808f01" + "commit": "723fc71346d686d39d0eb3eac96e618c95eab928" } diff --git a/src/third_party/wiredtiger/test/checkpoint/checkpointer.c b/src/third_party/wiredtiger/test/checkpoint/checkpointer.c index 686fcf1b12b..20dadce9bcd 100644 --- a/src/third_party/wiredtiger/test/checkpoint/checkpointer.c +++ b/src/third_party/wiredtiger/test/checkpoint/checkpointer.c @@ -32,22 +32,22 @@ static WT_THREAD_RET checkpointer(void *); static WT_THREAD_RET clock_thread(void *); static int compare_cursors(WT_CURSOR *, table_type, WT_CURSOR *, table_type); static int diagnose_key_error(WT_CURSOR *, table_type, int, WT_CURSOR *, table_type, int); -static int real_checkpointer(void); +static int real_checkpointer(THREAD_DATA *); /* * set_stable -- - * Set the stable timestamp from g.ts_stable. + * Set the given timestamp as the stable timestamp. */ static void -set_stable(void) +set_stable(uint64_t stable_ts) { char buf[128]; if (g.race_timestamps) testutil_check(__wt_snprintf(buf, sizeof(buf), - "stable_timestamp=%" PRIx64 ",oldest_timestamp=%" PRIx64, g.ts_stable, g.ts_stable)); + "stable_timestamp=%" PRIx64 ",oldest_timestamp=%" PRIx64, stable_ts, stable_ts)); else - testutil_check(__wt_snprintf(buf, sizeof(buf), "stable_timestamp=%" PRIx64, g.ts_stable)); + testutil_check(__wt_snprintf(buf, sizeof(buf), "stable_timestamp=%" PRIx64, stable_ts)); testutil_check(g.conn->set_timestamp(g.conn, buf)); } @@ -58,11 +58,16 @@ set_stable(void) void start_threads(void) { - set_stable(); - testutil_check(__wt_thread_create(NULL, &g.checkpoint_thread, checkpointer, NULL)); + set_stable(1); /* Let's start with 1 as the stable as 0 is not a valid timestamp. */ + /* + * If there are N worker threads (0 - N-1), the checkpoint thread has an ID of N and the clock + * thread an ID of N + 1. + */ + testutil_check(__wt_thread_create(NULL, &g.checkpoint_thread, checkpointer, &g.td[g.nworkers])); if (g.use_timestamps) { testutil_check(__wt_rwlock_init(NULL, &g.clock_lock)); - testutil_check(__wt_thread_create(NULL, &g.clock_thread, clock_thread, NULL)); + testutil_check( + __wt_thread_create(NULL, &g.clock_thread, clock_thread, &g.td[g.nworkers + 1])); } } @@ -87,46 +92,92 @@ end_threads(void) } /* + * get_all_committed_ts -- + * Returns the least of commit timestamps across all the threads. Returns UINT64_MAX if one of + * the threads has not yet started. + */ +static uint64_t +get_all_committed_ts(void) +{ + uint64_t ret; + int i; + + ret = UINT64_MAX; + for (i = 0; i < g.nworkers; ++i) { + if (g.td[i].ts < ret) + ret = g.td[i].ts; + if (ret == 0) + return (UINT64_MAX); + } + + return (ret); +} + +/* * clock_thread -- * Clock thread: ticks up timestamps. */ static WT_THREAD_RET clock_thread(void *arg) { - WT_RAND_STATE rnd; WT_SESSION *wt_session; WT_SESSION_IMPL *session; - uint64_t delay; + THREAD_DATA *td; + uint64_t delay, last_ts, oldest_ts; + char tid[128]; + + testutil_check(__wt_thread_str(tid, sizeof(tid))); + printf("clock thread starting: tid: %s\n", tid); + fflush(stdout); - WT_UNUSED(arg); + td = (THREAD_DATA *)arg; + last_ts = 0; - __wt_random_init(&rnd); testutil_check(g.conn->open_session(g.conn, NULL, NULL, &wt_session)); session = (WT_SESSION_IMPL *)wt_session; while (g.opts.running) { - __wt_writelock(session, &g.clock_lock); - if (g.prepare) - /* - * Leave a gap between timestamps so prepared insert followed by remove don't overlap - * with stable timestamp. - */ - g.ts_stable += 5; - else - ++g.ts_stable; - set_stable(); - if (g.ts_stable % 997 == 0) { - /* - * Random value between 6 and 10 seconds. - */ - delay = __wt_random(&rnd) % 5; - __wt_sleep(delay + 6, 0); + if (g.predictable_replay) { + oldest_ts = get_all_committed_ts(); + if (oldest_ts != UINT64_MAX && oldest_ts - last_ts > PRED_REPLAY_STABLE_PERIOD) { + /* + * If we are doing a predictable rerun, don't go past the provided stop timestamp. + */ + if (g.stop_ts > 0 && oldest_ts >= g.stop_ts) { + printf("Clock thread at %" PRIu64 + " has reached the provided stop timestamp. " + "Stopping the clock.\n", + g.stop_ts); + set_stable(g.stop_ts); + break; + } + set_stable(oldest_ts); + last_ts = oldest_ts; + } + } else { + __wt_writelock(session, &g.clock_lock); + if (g.prepare) + /* + * Leave a gap between timestamps so prepared insert followed by remove don't + * overlap with stable timestamp. + */ + g.ts_stable += 5; + else + ++g.ts_stable; + set_stable(g.ts_stable); + if (g.ts_stable % 997 == 0) { + /* + * Random value between 6 and 10 seconds. + */ + delay = __wt_random(&td->extra_rnd) % 5; + __wt_sleep(delay + 6, 0); + } + __wt_writeunlock(session, &g.clock_lock); } - __wt_writeunlock(session, &g.clock_lock); /* * Random value between 5000 and 10000. */ - delay = __wt_random(&rnd) % 5001; + delay = __wt_random(&td->extra_rnd) % 5001; __wt_sleep(0, delay + 5 * WT_THOUSAND); } @@ -144,13 +195,11 @@ checkpointer(void *arg) { char tid[128]; - WT_UNUSED(arg); - testutil_check(__wt_thread_str(tid, sizeof(tid))); printf("checkpointer thread starting: tid: %s\n", tid); fflush(stdout); - (void)real_checkpointer(); + (void)real_checkpointer((THREAD_DATA *)arg); return (WT_THREAD_RET_VALUE); } @@ -179,12 +228,11 @@ set_flush_tier_delay(WT_RAND_STATE *rnd) * in a timely fashion. */ static int -real_checkpointer(void) +real_checkpointer(THREAD_DATA *td) { - WT_RAND_STATE rnd; WT_SESSION *session; wt_timestamp_t stable_ts, oldest_ts, verify_ts; - uint64_t delay; + uint64_t delay, tmp_ts; int ret; char buf[128], flush_tier_config[128], timestamp_buf[64]; const char *checkpoint_config, *ts_config; @@ -197,7 +245,6 @@ real_checkpointer(void) if (!g.opts.running) return (log_print_err("Checkpoint thread started stopped\n", EINVAL, 1)); - __wt_random_init(&rnd); while (g.ntables > g.ntables_created && g.opts.running) __wt_yield(); @@ -216,7 +263,8 @@ real_checkpointer(void) testutil_check(__wt_snprintf( flush_tier_config, sizeof(flush_tier_config), "flush_tier=(enabled,force),%s", ts_config)); - set_flush_tier_delay(&rnd); + /* Use the extra random generator as the tier delay doesn't affect the actual data content. */ + set_flush_tier_delay(&td->extra_rnd); while (g.opts.running) { /* @@ -233,10 +281,25 @@ real_checkpointer(void) if (stable_ts <= oldest_ts) verify_ts = stable_ts; else - verify_ts = __wt_random(&rnd) % (stable_ts - oldest_ts + 1) + oldest_ts; - __wt_writelock((WT_SESSION_IMPL *)session, &g.clock_lock); - g.ts_oldest = g.ts_stable; - __wt_writeunlock((WT_SESSION_IMPL *)session, &g.clock_lock); + /* Use the extra random generator as the data is not getting modified. */ + verify_ts = __wt_random(&td->extra_rnd) % (stable_ts - oldest_ts + 1) + oldest_ts; + if (g.predictable_replay) { + tmp_ts = WT_MIN(get_all_committed_ts(), stable_ts); + /* Update the oldest timestamp, but do not go past the provided stop timestamp. */ + if (tmp_ts != UINT64_MAX && (g.stop_ts == 0 || tmp_ts <= g.stop_ts)) + g.ts_oldest = tmp_ts; + if (g.stop_ts > 0 && stable_ts >= g.stop_ts) { + printf( + "The checkpoint thread has reached the stop timestamp of " + "%" PRIu64 ". Finish the test run.\n", + g.stop_ts); + g.opts.running = false; + } + } else { + __wt_writelock((WT_SESSION_IMPL *)session, &g.clock_lock); + g.ts_oldest = g.ts_stable; + __wt_writeunlock((WT_SESSION_IMPL *)session, &g.clock_lock); + } } /* Execute a checkpoint */ @@ -255,7 +318,11 @@ real_checkpointer(void) flush_tier = false; printf("Finished a flush_tier\n"); - set_flush_tier_delay(&rnd); + /* + * Use the extra random generator as the tier delay doesn't affect the actual data + * content. + */ + set_flush_tier_delay(&td->extra_rnd); } if (!g.opts.running) @@ -277,8 +344,11 @@ real_checkpointer(void) } if (g.sweep_stress) - /* Random value between 4 and 8 seconds. */ - delay = __wt_random(&rnd) % 5 + 4; + /* + * Random value between 4 and 8 seconds. Use the extra random generator as the tier + * sleep delay doesn't affect the actual data content. + */ + delay = __wt_random(&td->extra_rnd) % 5 + 4; else /* Just find out if we should flush_tier. */ delay = 0; @@ -286,6 +356,12 @@ real_checkpointer(void) } done: + /* To be able to replay, print the stable timestamp the test stopped at. */ + if (g.predictable_replay && g.use_timestamps) { + testutil_check(g.conn->query_timestamp(g.conn, timestamp_buf, "get=stable_timestamp")); + stable_ts = testutil_timestamp_parse(timestamp_buf); + printf("Test stopped at a stable timestamp of %" PRIu64 ".\n", stable_ts); + } if ((ret = session->close(session, NULL)) != 0) return (log_print_err("session.close", ret, 1)); diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c index 5272b60cbfa..01738ddfda8 100644 --- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c +++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c @@ -44,6 +44,36 @@ extern int __wt_optind; extern char *__wt_optarg; /* + * init_thread_data -- + * Initialize the thread data struct. + */ +static void +init_thread_data(THREAD_DATA *td, int info) +{ + td->info = info; + /* + * For a predictable replay have a non-overlapping key space for each thread. Also divide the + * key range between the threads. Otherwise, share the key space among all the threads. + */ + if (g.predictable_replay) { + td->start_key = (u_int)info * WT_MILLION + 1; + td->key_range = g.nkeys / (u_int)g.nworkers; + } else { + td->start_key = 1; + td->key_range = g.nkeys; + } + + /* + * For a predictable replay the worker threads use a predetermined set of timestamps. They + * publish their most recently used timestamps for the clock thread to read across the workers + * to base their decision on. + */ + td->ts = 0; + testutil_random_from_random(&td->data_rnd, &g.opts.data_rnd); + testutil_random_from_random(&td->extra_rnd, &g.opts.extra_rnd); +} + +/* * main -- * TODO: Add a comment describing this function. */ @@ -51,8 +81,9 @@ int main(int argc, char *argv[]) { table_type ttype; - int ch, cnt, i, ret, runs; + int base, ch, cnt, i, ret, runs; const char *config_open; + char *end_number, *stop_arg; bool verify_only; (void)testutil_set_progname(argv); @@ -65,6 +96,7 @@ main(int argc, char *argv[]) g.home = dmalloc(512); g.nkeys = 10 * WT_THOUSAND; g.nops = 100 * WT_THOUSAND; + g.stop_ts = 0; g.ntables = 3; g.nworkers = 1; g.evict_reposition_timing_stress = false; @@ -74,13 +106,14 @@ main(int argc, char *argv[]) g.hs_checkpoint_timing_stress = false; g.checkpoint_slow_timing_stress = false; g.no_ts_deletes = false; + g.predictable_replay = false; runs = 1; verify_only = false; testutil_parse_begin_opt(argc, argv, SHARED_PARSE_OPTIONS, &g.opts); while ((ch = __wt_getopt( - progname, argc, argv, "C:c:Dk:l:mn:pr:s:T:t:vW:xX" SHARED_PARSE_OPTIONS)) != EOF) + progname, argc, argv, "C:c:Dk:l:mn:pr:Rs:S:T:t:vW:xX" SHARED_PARSE_OPTIONS)) != EOF) switch (ch) { case 'c': g.checkpoint_name = __wt_optarg; @@ -112,6 +145,9 @@ main(int argc, char *argv[]) case 'r': /* runs */ runs = atoi(__wt_optarg); break; + case 'R': /* predictable replay */ + g.predictable_replay = true; + break; case 's': switch (__wt_optarg[0]) { case '1': @@ -136,6 +172,17 @@ main(int argc, char *argv[]) return (usage()); } break; + case 'S': /* run until this stable timestamp */ + stop_arg = __wt_optarg; + if (WT_PREFIX_MATCH(stop_arg, "0x")) { + base = 16; + stop_arg += 2; + } else + base = 10; + g.stop_ts = (uint64_t)strtoll(stop_arg, &end_number, base); + if (*end_number) + return (usage()); + break; case 't': switch (__wt_optarg[0]) { case 'c': @@ -182,6 +229,14 @@ main(int argc, char *argv[]) if (argc != 0) return (usage()); + if (g.stop_ts > 0 && (!g.predictable_replay || !g.use_timestamps)) { + fprintf(stderr, "-S is only valid if specified along with -X and -R.\n"); + return (EXIT_FAILURE); + } + + /* + * Among other things, this initializes the random number generators in the option structure. + */ testutil_parse_end_opt(&g.opts); /* Clean up on signal. */ (void)signal(SIGINT, onint); @@ -193,6 +248,10 @@ main(int argc, char *argv[]) g.ts_oldest = 1; printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid()); + if (g.predictable_replay) + printf("Config to seed for replay: " TESTUTIL_SEED_FORMAT "\n", g.opts.data_seed, + g.opts.extra_seed); + for (cnt = 1; (runs == 0 || cnt <= runs) && g.status == 0; ++cnt) { cleanup(cnt == 1 && !verify_only); /* Clean up previous runs */ @@ -217,6 +276,21 @@ main(int argc, char *argv[]) g.cookies[i].uri, sizeof(g.cookies[i].uri), "%s%04d", URI_BASE, g.cookies[i].id)); } + /* + * Setup thread data. There are N worker threads, a checkpoint thread and possibly a clock + * thread. The workers have ID 0 to N-1, checkpoint thread has N, and the clock thread has N + * + 1. + */ + if ((g.td = calloc((size_t)(g.nworkers + 2), sizeof(THREAD_DATA))) == NULL) { + (void)log_print_err("No memory", ENOMEM, 1); + break; + } + for (i = 0; i < g.nworkers; ++i) + init_thread_data(&g.td[i], i); + init_thread_data(&g.td[g.nworkers], g.nworkers); /* Checkpoint thread. */ + if (g.use_timestamps) + init_thread_data(&g.td[g.nworkers + 1], g.nworkers + 1); /* Clock thread. */ + g.opts.running = true; wt_connect(config_open); @@ -245,6 +319,8 @@ main(int argc, char *argv[]) run_complete: free(g.cookies); g.cookies = NULL; + free(g.td); + g.td = NULL; if ((ret = wt_shutdown()) != 0) { (void)log_print_err("Shutdown failed", ret, 1); break; @@ -271,7 +347,6 @@ static void wt_connect(const char *config_open) { static WT_EVENT_HANDLER event_handler = {handle_error, handle_message, NULL, NULL, NULL}; - WT_RAND_STATE rnd; char buf[512], config[1024]; bool fast_eviction; @@ -280,8 +355,7 @@ wt_connect(const char *config_open) /* * Randomly decide on the eviction rate (fast or default). */ - __wt_random_init_seed(NULL, &rnd); - if ((__wt_random(&rnd) % 15) % 2 == 0) + if ((__wt_random(&g.opts.extra_rnd) % 15) % 2 == 0) fast_eviction = true; /* Set up the basic configuration string first. */ @@ -331,7 +405,7 @@ wt_connect(const char *config_open) g.opts.conn = g.conn; /* Set up a random delay for the first flush. */ - set_flush_tier_delay(&rnd); + set_flush_tier_delay(&g.opts.extra_rnd); testutil_tiered_begin(&g.opts); } } @@ -601,8 +675,9 @@ usage(void) { fprintf(stderr, "usage: %s\n" - " [-DmpvXx] [-C wiredtiger-config] [-c checkpoint] [-h home] [-k keys] [-l log]\n" - " [-n ops] [-r runs] [-s 1|2|3|4|5] [-T table-config] [-t f|r|v] [-W workers]\n", + " [-DmpRvXx] [-C wiredtiger-config] [-c checkpoint] [-h home] [-k keys] [-l log]\n" + " [-n ops] [-r runs] [-s 1|2|3|4|5] [-T table-config] [-t f|r|v]\n" + " [-W workers]\n", progname); fprintf(stderr, "%s", "\t-C specify wiredtiger_open configuration arguments\n" @@ -615,7 +690,9 @@ usage(void) "\t-n set number of operations each thread does\n" "\t-p use prepare\n" "\t-r set number of runs (0 for continuous)\n" + "\t-R configure predictable replay\n" "\t-s specify which timing stress configuration to use ( 1 | 2 | 3 | 4 | 5 )\n" + "\t-S set a stable timestamp to stop the test run\n" "\t\t1: sweep_stress\n" "\t\t2: failpoint_hs_delete_key_from_ts\n" "\t\t3: hs_checkpoint_timing_stress\n" diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h index ccab3cdf4c2..babe569d0c3 100644 --- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h +++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h @@ -46,6 +46,18 @@ typedef enum { MIX = 0, COL, FIX, LSM, ROW } table_type; /* File type */ /* + * For a predictable run we reserve timestamps for each thread for the entire run. The timestamp for + * the i-th key that a thread writes is given by the macro below. In a given iteration for each + * thread, there are three timestamps available. We never use the second and only sometimes use the + * third. The first is used as the commit and optionally as the prepared timestamp. The third as the + * durable timestamp ahead of the commit timestamp. + */ +#define RESERVED_TIMESTAMPS_FOR_ITERATION(threadcount, td, iter) \ + (((iter) * (uint64_t)(threadcount) + (uint64_t)((td)->info)) * 3 + 1) + +#define PRED_REPLAY_STABLE_PERIOD 100 + +/* * Per-table cookie structure. */ typedef struct { @@ -55,6 +67,15 @@ typedef struct { } COOKIE; typedef struct { + int info; + u_int start_key; + u_int key_range; + uint64_t ts; /* Only used for runs with predictable replay. */ + WT_RAND_STATE data_rnd; + WT_RAND_STATE extra_rnd; +} THREAD_DATA; + +typedef struct { TEST_OPTS opts; /* Shared test options */ char *home; /* Home directory */ const char *checkpoint_name; /* Checkpoint name */ @@ -82,9 +103,12 @@ typedef struct { bool prepare; /* Use prepare transactions */ bool race_timestamps; /* Async update to oldest timestamp */ - bool use_timestamps; /* Use txn timestamps. Start clock thread */ + bool use_timestamps; /* Use txn timestamps. Start clock thread */ + bool predictable_replay; /* Run such that a predictable replay is possible. */ + uint64_t stop_ts; /* Run a replay until the stable timestamp reaches this stop timestamp. */ - COOKIE *cookies; /* Per-thread info */ + COOKIE *cookies; /* Per-table info */ + THREAD_DATA *td; /* Per-thread info */ WT_RWLOCK clock_lock; /* Clock synchronization */ wt_thread_t checkpoint_thread; /* Checkpoint thread */ wt_thread_t clock_thread; /* Clock thread */ diff --git a/src/third_party/wiredtiger/test/checkpoint/workers.c b/src/third_party/wiredtiger/test/checkpoint/workers.c index 69fa646616c..20a3427e855 100644 --- a/src/third_party/wiredtiger/test/checkpoint/workers.c +++ b/src/third_party/wiredtiger/test/checkpoint/workers.c @@ -31,7 +31,7 @@ #define MAX_MODIFY_ENTRIES 5 static char modify_repl[256]; -static int real_worker(void); +static int real_worker(THREAD_DATA *); static WT_THREAD_RET worker(void *); /* @@ -120,9 +120,9 @@ start_workers(void) (void)gettimeofday(&start, NULL); - /* Create threads. */ + /* Create threads. The N workers have ID 0 to N - 1. */ for (i = 0; i < g.nworkers; ++i) - testutil_check(__wt_thread_create(NULL, &tids[i], worker, &g.cookies[i])); + testutil_check(__wt_thread_create(NULL, &tids[i], worker, &g.td[i])); /* Wait for the threads. */ for (i = 0; i < g.nworkers; ++i) @@ -334,15 +334,17 @@ worker_op(WT_CURSOR *cursor, table_type type, uint64_t keyno, u_int new_val) static WT_THREAD_RET worker(void *arg) { + THREAD_DATA *td; char tid[128]; - WT_UNUSED(arg); + td = (THREAD_DATA *)arg; testutil_check(__wt_thread_str(tid, sizeof(tid))); - printf("worker thread starting: tid: %s\n", tid); + printf("worker thread starting: tid: %s key-range: %" PRIu32 " - %" PRIu32 "\n", tid, + td->start_key, td->start_key + td->key_range); fflush(stdout); - (void)real_worker(); + (void)real_worker(td); return (WT_THREAD_RET_VALUE); } @@ -351,11 +353,11 @@ worker(void *arg) * A single worker thread that transactionally updates all tables with consistent values. */ static int -real_worker(void) +real_worker(THREAD_DATA *td) { WT_CURSOR **cursors; - WT_RAND_STATE rnd; WT_SESSION *session; + uint64_t base_ts; u_int i, keyno, next_rnd; int j, ret, t_ret; char buf[128]; @@ -379,19 +381,31 @@ real_worker(void) if (g.use_timestamps) { if (g.no_ts_deletes) begin_cfg = "no_timestamp=true,read_timestamp=1,roundup_timestamps=(read=true)"; - else + else if (!g.predictable_replay) begin_cfg = "read_timestamp=1,roundup_timestamps=(read=true)"; + /* + * Note: For predictable replays we do not specify a read timestamp, hence reading the + * latest committed values. This is important for a predictable outcome as reading at the + * oldest timestamp depends on where the clock and the checkpoint threads have placed the + * oldest at this moment. + */ } - __wt_random_init_seed((WT_SESSION_IMPL *)session, &rnd); - for (j = 0; j < g.ntables; j++) if ((ret = session->open_cursor(session, g.cookies[j].uri, NULL, NULL, &cursors[j])) != 0) { (void)log_print_err("session.open_cursor", ret, 1); goto err; } - for (i = 0; i < g.nops && g.opts.running; ++i, __wt_yield()) { + for (i = 0; g.opts.running; ++i, __wt_yield()) { + /* + * If a stop timestamp has been provided, the workers will continue to run until the clock + * thread reaches a stable equal to the stop timestamp. Ignore the provided operation count + * in such a case. + */ + if (g.stop_ts == 0 && i >= g.nops) + break; + if (i > 0 && i % (5 * WT_THOUSAND) == 0) printf("Worker %u of %u ops\n", i, g.nops); if (start_txn) { @@ -402,9 +416,10 @@ real_worker(void) new_txn = true; start_txn = false; } - keyno = __wt_random(&rnd) % g.nkeys + 1; + keyno = __wt_random(&td->data_rnd) % td->key_range + td->start_key; /* If we have specified to run with mix mode deletes we need to do it in it's own txn. */ - if (g.use_timestamps && g.no_ts_deletes && new_txn && __wt_random(&rnd) % 72 == 0) { + if (g.use_timestamps && g.no_ts_deletes && new_txn && + __wt_random(&td->data_rnd) % 72 == 0) { new_txn = false; for (j = 0; j < g.ntables; j++) { ret = worker_no_ts_delete(cursors[j], keyno); @@ -436,41 +451,60 @@ real_worker(void) (void)log_print_err("worker op failed", ret, 1); goto err; } else if (ret == 0) { - next_rnd = __wt_random(&rnd); + next_rnd = __wt_random(&td->data_rnd); if (next_rnd % 7 == 0) { if (g.use_timestamps) { - if (__wt_try_readlock((WT_SESSION_IMPL *)session, &g.clock_lock) == 0) { - next_rnd = __wt_random(&rnd); + /* + * For a predictable run, the timestamps for worker's operations are managed by + * reserving them across the threads and the iterations, such that they don't + * overlap. For a regular run, the timestamp thread manages the advance of the + * global clock. The workers synchronize with the clock using a reader - writer + * lock, and decide the operation timestamp based on the global clock. + */ + if (g.predictable_replay || + (__wt_try_readlock((WT_SESSION_IMPL *)session, &g.clock_lock) == 0)) { + if (g.predictable_replay) + /* i + 1 because we don't want a thread to start with commit-ts of 1 */ + base_ts = RESERVED_TIMESTAMPS_FOR_ITERATION(g.nworkers, td, i + 1); + else + base_ts = g.ts_stable + 1; + next_rnd = __wt_random(&td->data_rnd); if (g.prepare && next_rnd % 2 == 0) { testutil_check(__wt_snprintf( - buf, sizeof(buf), "prepare_timestamp=%" PRIx64, g.ts_stable + 1)); + buf, sizeof(buf), "prepare_timestamp=%" PRIx64, base_ts)); if ((ret = session->prepare_transaction(session, buf)) != 0) { - __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock); + if (!g.predictable_replay) + __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock); (void)log_print_err("real_worker:prepare_transaction", ret, 1); goto err; } testutil_check(__wt_snprintf(buf, sizeof(buf), "durable_timestamp=%" PRIx64 ",commit_timestamp=%" PRIx64, - g.ts_stable + 3, g.ts_stable + 1)); + base_ts + 2, base_ts)); } else testutil_check(__wt_snprintf( - buf, sizeof(buf), "commit_timestamp=%" PRIx64, g.ts_stable + 1)); + buf, sizeof(buf), "commit_timestamp=%" PRIx64, base_ts)); /* Commit majority of times. */ if (next_rnd % 49 != 0) { if ((ret = session->commit_transaction(session, buf)) != 0) { - __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock); + if (!g.predictable_replay) + __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock); (void)log_print_err("real_worker:commit_transaction", ret, 1); goto err; } + if (g.predictable_replay) + WT_PUBLISH(td->ts, base_ts); } else { if ((ret = session->rollback_transaction(session, NULL)) != 0) { - __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock); + if (!g.predictable_replay) + __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock); (void)log_print_err("real_worker:rollback_transaction", ret, 1); goto err; } } - __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock); + if (!g.predictable_replay) + __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock); start_txn = true; /* Occasionally reopen cursors after transaction finish. */ if (next_rnd % 13 == 0) diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml index 1a9e00ff108..3c7cd2d5220 100755 --- a/src/third_party/wiredtiger/test/evergreen.yml +++ b/src/third_party/wiredtiger/test/evergreen.yml @@ -776,6 +776,54 @@ functions: set -o verbose ${test_env_vars|} ./test_checkpoint ${checkpoint_args} 2>&1 + "checkpoint test predictable": + command: shell.exec + params: + working_dir: "wiredtiger/cmake_build/test/checkpoint" + script: | + # Get a random value with leading zeroes removed, /bin/sh version. + rando() { + tr -cd 0-9 </dev/urandom | head -c 5 | sed -e 's/0*\(.\)/\1/' + } + + # Run test/checkpoint in a way that can test predictable replay. + set -o errexit + set -o verbose + + toolsdir=../../../tools + wtutil=../../wt + + r=$(rando)$(rando) + x0=$(rando)$(rando) + + # Always run with timestamps and in the predictable mode + base_args="-x -R" + + rm -rf RUNDIR_0 + # The first run is for calibration only. We just want to run for the designated + # time and get an approriate stop timestamp that can be used in later runs. + calibration_run_args="-PSD$r,E$x0" + ${test_env_vars|} ./test_checkpoint -h RUNDIR_0 $base_args ${checkpoint_args} $calibration_run_args || exit 1 + echo "Finished calibration run" + stable_hex=$($toolsdir/wt_timestamps RUNDIR_0 | sed -e '/stable=/!d' -e 's/.*=//') + stop_ts=$(echo $((0x$stable_hex))) + for i in $(seq ${times}); do + echo Iteration $i/${times} + x1=$(rando)$(rando) + x2=$(rando)$(rando) + rm -rf RUNDIR_1 RUNDIR_2 + # Do two runs up to the stable timestamp, using the same data seed, + # but with a different extra seed. Compare it when done. + first_run_args="-PSD$r,E$x1 -S $stop_ts" + echo "First run with args $base_args ${checkpoint_args} $first_run_args" + ${test_env_vars|} ./test_checkpoint -h RUNDIR_1 $base_args ${checkpoint_args} $first_run_args || exit 1 + second_run_args="-PSD$r,E$x2 -S $stop_ts" + echo "Second run with args $base_args ${checkpoint_args} $second_run_args" + ${test_env_vars|} ./test_checkpoint -h RUNDIR_2 $base_args ${checkpoint_args} $second_run_args || exit 1 + # Compare the runs. + $toolsdir/wt_cmp_dir RUNDIR_1 RUNDIR_2 || exit 1 + done + "checkpoint stress test": command: shell.exec params: @@ -4115,6 +4163,31 @@ tasks: vars: times: 5 + - name: checkpoint-filetypes-predictable-test + commands: + - func: "get project" + - func: "compile wiredtiger" + vars: + # Don't use diagnostic - this test looks for timing problems that are more likely to occur without it + HAVE_DIAGNOSTIC: -DHAVE_DIAGNOSTIC=0 + # FIXME-WT-10936: Enable once predictable replay supports column store + #- func: "checkpoint test predictable" + # vars: + # checkpoint_args: -t m -n 1000000 -k 5000000 -C cache_size=100MB + # times: 5 + #- func: "checkpoint test predictable" + # vars: + # checkpoint_args: -t c -n 1000000 -k 5000000 -C cache_size=100MB + # times: 5 + #- func: "checkpoint test predictable" + # vars: + # checkpoint_args: -n 1000000 -k 5000000 -C cache_size=100MB + # times: 5 + - func: "checkpoint test predictable" + vars: + checkpoint_args: -t r -n 1000000 -k 5000000 -C cache_size=100MB + times: 5 + - name: many-collection-test commands: - command: timeout.update @@ -5346,6 +5419,8 @@ buildvariants: # FIXME-WT-10822 # - name: format-tiered-test - name: schema-abort-predictable-test + - name: checkpoint-filetypes-predictable-test + # When running the Python tests on this variant tcmalloc must be preloaded otherwise the wiredtiger library # fails to load and resolve its dependency. |