summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSulabh Mahajan <sulabh.mahajan@mongodb.com>2023-04-17 11:09:14 +1000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-04-17 01:57:50 +0000
commit044d9cfb46dfc8a9577ca9e84908aa251cf6e587 (patch)
tree56be102eb14ef79096e1cb52adbecfea1d87de9d
parentb38874f5754958cc44e07b03a6a651df0cdd0f89 (diff)
downloadmongo-044d9cfb46dfc8a9577ca9e84908aa251cf6e587.tar.gz
Import wiredtiger: 723fc71346d686d39d0eb3eac96e618c95eab928 from branch mongodb-master
ref: b3e1507ca3..723fc71346 for: 7.0.0-rc0 WT-9914 test tiered: Convert test/checkpoint to do predictable replay
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok2
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/test/checkpoint/checkpointer.c166
-rw-r--r--src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c93
-rw-r--r--src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h28
-rw-r--r--src/third_party/wiredtiger/test/checkpoint/workers.c82
-rwxr-xr-xsrc/third_party/wiredtiger/test/evergreen.yml75
7 files changed, 367 insertions, 81 deletions
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index ae85c26c330..10bd3eb55ed 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -100,7 +100,7 @@ Decrypt
DeleteFileW
Deterministically
Dk
-DmpvXx
+DmpRvXx
Dxxx
EACCES
EB
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index f4f7e4276d4..8c90c3bb1d2 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-master",
- "commit": "b3e1507ca31173e2d19ebfb647096231b7808f01"
+ "commit": "723fc71346d686d39d0eb3eac96e618c95eab928"
}
diff --git a/src/third_party/wiredtiger/test/checkpoint/checkpointer.c b/src/third_party/wiredtiger/test/checkpoint/checkpointer.c
index 686fcf1b12b..20dadce9bcd 100644
--- a/src/third_party/wiredtiger/test/checkpoint/checkpointer.c
+++ b/src/third_party/wiredtiger/test/checkpoint/checkpointer.c
@@ -32,22 +32,22 @@ static WT_THREAD_RET checkpointer(void *);
static WT_THREAD_RET clock_thread(void *);
static int compare_cursors(WT_CURSOR *, table_type, WT_CURSOR *, table_type);
static int diagnose_key_error(WT_CURSOR *, table_type, int, WT_CURSOR *, table_type, int);
-static int real_checkpointer(void);
+static int real_checkpointer(THREAD_DATA *);
/*
* set_stable --
- * Set the stable timestamp from g.ts_stable.
+ * Set the given timestamp as the stable timestamp.
*/
static void
-set_stable(void)
+set_stable(uint64_t stable_ts)
{
char buf[128];
if (g.race_timestamps)
testutil_check(__wt_snprintf(buf, sizeof(buf),
- "stable_timestamp=%" PRIx64 ",oldest_timestamp=%" PRIx64, g.ts_stable, g.ts_stable));
+ "stable_timestamp=%" PRIx64 ",oldest_timestamp=%" PRIx64, stable_ts, stable_ts));
else
- testutil_check(__wt_snprintf(buf, sizeof(buf), "stable_timestamp=%" PRIx64, g.ts_stable));
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "stable_timestamp=%" PRIx64, stable_ts));
testutil_check(g.conn->set_timestamp(g.conn, buf));
}
@@ -58,11 +58,16 @@ set_stable(void)
void
start_threads(void)
{
- set_stable();
- testutil_check(__wt_thread_create(NULL, &g.checkpoint_thread, checkpointer, NULL));
+ set_stable(1); /* Let's start with 1 as the stable as 0 is not a valid timestamp. */
+ /*
+ * If there are N worker threads (0 - N-1), the checkpoint thread has an ID of N and the clock
+ * thread an ID of N + 1.
+ */
+ testutil_check(__wt_thread_create(NULL, &g.checkpoint_thread, checkpointer, &g.td[g.nworkers]));
if (g.use_timestamps) {
testutil_check(__wt_rwlock_init(NULL, &g.clock_lock));
- testutil_check(__wt_thread_create(NULL, &g.clock_thread, clock_thread, NULL));
+ testutil_check(
+ __wt_thread_create(NULL, &g.clock_thread, clock_thread, &g.td[g.nworkers + 1]));
}
}
@@ -87,46 +92,92 @@ end_threads(void)
}
/*
+ * get_all_committed_ts --
+ * Returns the least of commit timestamps across all the threads. Returns UINT64_MAX if one of
+ * the threads has not yet started.
+ */
+static uint64_t
+get_all_committed_ts(void)
+{
+ uint64_t ret;
+ int i;
+
+ ret = UINT64_MAX;
+ for (i = 0; i < g.nworkers; ++i) {
+ if (g.td[i].ts < ret)
+ ret = g.td[i].ts;
+ if (ret == 0)
+ return (UINT64_MAX);
+ }
+
+ return (ret);
+}
+
+/*
* clock_thread --
* Clock thread: ticks up timestamps.
*/
static WT_THREAD_RET
clock_thread(void *arg)
{
- WT_RAND_STATE rnd;
WT_SESSION *wt_session;
WT_SESSION_IMPL *session;
- uint64_t delay;
+ THREAD_DATA *td;
+ uint64_t delay, last_ts, oldest_ts;
+ char tid[128];
+
+ testutil_check(__wt_thread_str(tid, sizeof(tid)));
+ printf("clock thread starting: tid: %s\n", tid);
+ fflush(stdout);
- WT_UNUSED(arg);
+ td = (THREAD_DATA *)arg;
+ last_ts = 0;
- __wt_random_init(&rnd);
testutil_check(g.conn->open_session(g.conn, NULL, NULL, &wt_session));
session = (WT_SESSION_IMPL *)wt_session;
while (g.opts.running) {
- __wt_writelock(session, &g.clock_lock);
- if (g.prepare)
- /*
- * Leave a gap between timestamps so prepared insert followed by remove don't overlap
- * with stable timestamp.
- */
- g.ts_stable += 5;
- else
- ++g.ts_stable;
- set_stable();
- if (g.ts_stable % 997 == 0) {
- /*
- * Random value between 6 and 10 seconds.
- */
- delay = __wt_random(&rnd) % 5;
- __wt_sleep(delay + 6, 0);
+ if (g.predictable_replay) {
+ oldest_ts = get_all_committed_ts();
+ if (oldest_ts != UINT64_MAX && oldest_ts - last_ts > PRED_REPLAY_STABLE_PERIOD) {
+ /*
+ * If we are doing a predictable rerun, don't go past the provided stop timestamp.
+ */
+ if (g.stop_ts > 0 && oldest_ts >= g.stop_ts) {
+ printf("Clock thread at %" PRIu64
+ " has reached the provided stop timestamp. "
+ "Stopping the clock.\n",
+ g.stop_ts);
+ set_stable(g.stop_ts);
+ break;
+ }
+ set_stable(oldest_ts);
+ last_ts = oldest_ts;
+ }
+ } else {
+ __wt_writelock(session, &g.clock_lock);
+ if (g.prepare)
+ /*
+ * Leave a gap between timestamps so prepared insert followed by remove don't
+ * overlap with stable timestamp.
+ */
+ g.ts_stable += 5;
+ else
+ ++g.ts_stable;
+ set_stable(g.ts_stable);
+ if (g.ts_stable % 997 == 0) {
+ /*
+ * Random value between 6 and 10 seconds.
+ */
+ delay = __wt_random(&td->extra_rnd) % 5;
+ __wt_sleep(delay + 6, 0);
+ }
+ __wt_writeunlock(session, &g.clock_lock);
}
- __wt_writeunlock(session, &g.clock_lock);
/*
* Random value between 5000 and 10000.
*/
- delay = __wt_random(&rnd) % 5001;
+ delay = __wt_random(&td->extra_rnd) % 5001;
__wt_sleep(0, delay + 5 * WT_THOUSAND);
}
@@ -144,13 +195,11 @@ checkpointer(void *arg)
{
char tid[128];
- WT_UNUSED(arg);
-
testutil_check(__wt_thread_str(tid, sizeof(tid)));
printf("checkpointer thread starting: tid: %s\n", tid);
fflush(stdout);
- (void)real_checkpointer();
+ (void)real_checkpointer((THREAD_DATA *)arg);
return (WT_THREAD_RET_VALUE);
}
@@ -179,12 +228,11 @@ set_flush_tier_delay(WT_RAND_STATE *rnd)
* in a timely fashion.
*/
static int
-real_checkpointer(void)
+real_checkpointer(THREAD_DATA *td)
{
- WT_RAND_STATE rnd;
WT_SESSION *session;
wt_timestamp_t stable_ts, oldest_ts, verify_ts;
- uint64_t delay;
+ uint64_t delay, tmp_ts;
int ret;
char buf[128], flush_tier_config[128], timestamp_buf[64];
const char *checkpoint_config, *ts_config;
@@ -197,7 +245,6 @@ real_checkpointer(void)
if (!g.opts.running)
return (log_print_err("Checkpoint thread started stopped\n", EINVAL, 1));
- __wt_random_init(&rnd);
while (g.ntables > g.ntables_created && g.opts.running)
__wt_yield();
@@ -216,7 +263,8 @@ real_checkpointer(void)
testutil_check(__wt_snprintf(
flush_tier_config, sizeof(flush_tier_config), "flush_tier=(enabled,force),%s", ts_config));
- set_flush_tier_delay(&rnd);
+ /* Use the extra random generator as the tier delay doesn't affect the actual data content. */
+ set_flush_tier_delay(&td->extra_rnd);
while (g.opts.running) {
/*
@@ -233,10 +281,25 @@ real_checkpointer(void)
if (stable_ts <= oldest_ts)
verify_ts = stable_ts;
else
- verify_ts = __wt_random(&rnd) % (stable_ts - oldest_ts + 1) + oldest_ts;
- __wt_writelock((WT_SESSION_IMPL *)session, &g.clock_lock);
- g.ts_oldest = g.ts_stable;
- __wt_writeunlock((WT_SESSION_IMPL *)session, &g.clock_lock);
+ /* Use the extra random generator as the data is not getting modified. */
+ verify_ts = __wt_random(&td->extra_rnd) % (stable_ts - oldest_ts + 1) + oldest_ts;
+ if (g.predictable_replay) {
+ tmp_ts = WT_MIN(get_all_committed_ts(), stable_ts);
+ /* Update the oldest timestamp, but do not go past the provided stop timestamp. */
+ if (tmp_ts != UINT64_MAX && (g.stop_ts == 0 || tmp_ts <= g.stop_ts))
+ g.ts_oldest = tmp_ts;
+ if (g.stop_ts > 0 && stable_ts >= g.stop_ts) {
+ printf(
+ "The checkpoint thread has reached the stop timestamp of "
+ "%" PRIu64 ". Finish the test run.\n",
+ g.stop_ts);
+ g.opts.running = false;
+ }
+ } else {
+ __wt_writelock((WT_SESSION_IMPL *)session, &g.clock_lock);
+ g.ts_oldest = g.ts_stable;
+ __wt_writeunlock((WT_SESSION_IMPL *)session, &g.clock_lock);
+ }
}
/* Execute a checkpoint */
@@ -255,7 +318,11 @@ real_checkpointer(void)
flush_tier = false;
printf("Finished a flush_tier\n");
- set_flush_tier_delay(&rnd);
+ /*
+ * Use the extra random generator as the tier delay doesn't affect the actual data
+ * content.
+ */
+ set_flush_tier_delay(&td->extra_rnd);
}
if (!g.opts.running)
@@ -277,8 +344,11 @@ real_checkpointer(void)
}
if (g.sweep_stress)
- /* Random value between 4 and 8 seconds. */
- delay = __wt_random(&rnd) % 5 + 4;
+ /*
+ * Random value between 4 and 8 seconds. Use the extra random generator as the tier
+ * sleep delay doesn't affect the actual data content.
+ */
+ delay = __wt_random(&td->extra_rnd) % 5 + 4;
else
/* Just find out if we should flush_tier. */
delay = 0;
@@ -286,6 +356,12 @@ real_checkpointer(void)
}
done:
+ /* To be able to replay, print the stable timestamp the test stopped at. */
+ if (g.predictable_replay && g.use_timestamps) {
+ testutil_check(g.conn->query_timestamp(g.conn, timestamp_buf, "get=stable_timestamp"));
+ stable_ts = testutil_timestamp_parse(timestamp_buf);
+ printf("Test stopped at a stable timestamp of %" PRIu64 ".\n", stable_ts);
+ }
if ((ret = session->close(session, NULL)) != 0)
return (log_print_err("session.close", ret, 1));
diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c
index 5272b60cbfa..01738ddfda8 100644
--- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c
+++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c
@@ -44,6 +44,36 @@ extern int __wt_optind;
extern char *__wt_optarg;
/*
+ * init_thread_data --
+ * Initialize the thread data struct.
+ */
+static void
+init_thread_data(THREAD_DATA *td, int info)
+{
+ td->info = info;
+ /*
+ * For a predictable replay have a non-overlapping key space for each thread. Also divide the
+ * key range between the threads. Otherwise, share the key space among all the threads.
+ */
+ if (g.predictable_replay) {
+ td->start_key = (u_int)info * WT_MILLION + 1;
+ td->key_range = g.nkeys / (u_int)g.nworkers;
+ } else {
+ td->start_key = 1;
+ td->key_range = g.nkeys;
+ }
+
+ /*
+ * For a predictable replay the worker threads use a predetermined set of timestamps. They
+ * publish their most recently used timestamps for the clock thread to read across the workers
+ * to base their decision on.
+ */
+ td->ts = 0;
+ testutil_random_from_random(&td->data_rnd, &g.opts.data_rnd);
+ testutil_random_from_random(&td->extra_rnd, &g.opts.extra_rnd);
+}
+
+/*
* main --
* TODO: Add a comment describing this function.
*/
@@ -51,8 +81,9 @@ int
main(int argc, char *argv[])
{
table_type ttype;
- int ch, cnt, i, ret, runs;
+ int base, ch, cnt, i, ret, runs;
const char *config_open;
+ char *end_number, *stop_arg;
bool verify_only;
(void)testutil_set_progname(argv);
@@ -65,6 +96,7 @@ main(int argc, char *argv[])
g.home = dmalloc(512);
g.nkeys = 10 * WT_THOUSAND;
g.nops = 100 * WT_THOUSAND;
+ g.stop_ts = 0;
g.ntables = 3;
g.nworkers = 1;
g.evict_reposition_timing_stress = false;
@@ -74,13 +106,14 @@ main(int argc, char *argv[])
g.hs_checkpoint_timing_stress = false;
g.checkpoint_slow_timing_stress = false;
g.no_ts_deletes = false;
+ g.predictable_replay = false;
runs = 1;
verify_only = false;
testutil_parse_begin_opt(argc, argv, SHARED_PARSE_OPTIONS, &g.opts);
while ((ch = __wt_getopt(
- progname, argc, argv, "C:c:Dk:l:mn:pr:s:T:t:vW:xX" SHARED_PARSE_OPTIONS)) != EOF)
+ progname, argc, argv, "C:c:Dk:l:mn:pr:Rs:S:T:t:vW:xX" SHARED_PARSE_OPTIONS)) != EOF)
switch (ch) {
case 'c':
g.checkpoint_name = __wt_optarg;
@@ -112,6 +145,9 @@ main(int argc, char *argv[])
case 'r': /* runs */
runs = atoi(__wt_optarg);
break;
+ case 'R': /* predictable replay */
+ g.predictable_replay = true;
+ break;
case 's':
switch (__wt_optarg[0]) {
case '1':
@@ -136,6 +172,17 @@ main(int argc, char *argv[])
return (usage());
}
break;
+ case 'S': /* run until this stable timestamp */
+ stop_arg = __wt_optarg;
+ if (WT_PREFIX_MATCH(stop_arg, "0x")) {
+ base = 16;
+ stop_arg += 2;
+ } else
+ base = 10;
+ g.stop_ts = (uint64_t)strtoll(stop_arg, &end_number, base);
+ if (*end_number)
+ return (usage());
+ break;
case 't':
switch (__wt_optarg[0]) {
case 'c':
@@ -182,6 +229,14 @@ main(int argc, char *argv[])
if (argc != 0)
return (usage());
+ if (g.stop_ts > 0 && (!g.predictable_replay || !g.use_timestamps)) {
+ fprintf(stderr, "-S is only valid if specified along with -X and -R.\n");
+ return (EXIT_FAILURE);
+ }
+
+ /*
+ * Among other things, this initializes the random number generators in the option structure.
+ */
testutil_parse_end_opt(&g.opts);
/* Clean up on signal. */
(void)signal(SIGINT, onint);
@@ -193,6 +248,10 @@ main(int argc, char *argv[])
g.ts_oldest = 1;
printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid());
+ if (g.predictable_replay)
+ printf("Config to seed for replay: " TESTUTIL_SEED_FORMAT "\n", g.opts.data_seed,
+ g.opts.extra_seed);
+
for (cnt = 1; (runs == 0 || cnt <= runs) && g.status == 0; ++cnt) {
cleanup(cnt == 1 && !verify_only); /* Clean up previous runs */
@@ -217,6 +276,21 @@ main(int argc, char *argv[])
g.cookies[i].uri, sizeof(g.cookies[i].uri), "%s%04d", URI_BASE, g.cookies[i].id));
}
+ /*
+ * Setup thread data. There are N worker threads, a checkpoint thread and possibly a clock
+ * thread. The workers have ID 0 to N-1, checkpoint thread has N, and the clock thread has N
+ * + 1.
+ */
+ if ((g.td = calloc((size_t)(g.nworkers + 2), sizeof(THREAD_DATA))) == NULL) {
+ (void)log_print_err("No memory", ENOMEM, 1);
+ break;
+ }
+ for (i = 0; i < g.nworkers; ++i)
+ init_thread_data(&g.td[i], i);
+ init_thread_data(&g.td[g.nworkers], g.nworkers); /* Checkpoint thread. */
+ if (g.use_timestamps)
+ init_thread_data(&g.td[g.nworkers + 1], g.nworkers + 1); /* Clock thread. */
+
g.opts.running = true;
wt_connect(config_open);
@@ -245,6 +319,8 @@ main(int argc, char *argv[])
run_complete:
free(g.cookies);
g.cookies = NULL;
+ free(g.td);
+ g.td = NULL;
if ((ret = wt_shutdown()) != 0) {
(void)log_print_err("Shutdown failed", ret, 1);
break;
@@ -271,7 +347,6 @@ static void
wt_connect(const char *config_open)
{
static WT_EVENT_HANDLER event_handler = {handle_error, handle_message, NULL, NULL, NULL};
- WT_RAND_STATE rnd;
char buf[512], config[1024];
bool fast_eviction;
@@ -280,8 +355,7 @@ wt_connect(const char *config_open)
/*
* Randomly decide on the eviction rate (fast or default).
*/
- __wt_random_init_seed(NULL, &rnd);
- if ((__wt_random(&rnd) % 15) % 2 == 0)
+ if ((__wt_random(&g.opts.extra_rnd) % 15) % 2 == 0)
fast_eviction = true;
/* Set up the basic configuration string first. */
@@ -331,7 +405,7 @@ wt_connect(const char *config_open)
g.opts.conn = g.conn;
/* Set up a random delay for the first flush. */
- set_flush_tier_delay(&rnd);
+ set_flush_tier_delay(&g.opts.extra_rnd);
testutil_tiered_begin(&g.opts);
}
}
@@ -601,8 +675,9 @@ usage(void)
{
fprintf(stderr,
"usage: %s\n"
- " [-DmpvXx] [-C wiredtiger-config] [-c checkpoint] [-h home] [-k keys] [-l log]\n"
- " [-n ops] [-r runs] [-s 1|2|3|4|5] [-T table-config] [-t f|r|v] [-W workers]\n",
+ " [-DmpRvXx] [-C wiredtiger-config] [-c checkpoint] [-h home] [-k keys] [-l log]\n"
+ " [-n ops] [-r runs] [-s 1|2|3|4|5] [-T table-config] [-t f|r|v]\n"
+ " [-W workers]\n",
progname);
fprintf(stderr, "%s",
"\t-C specify wiredtiger_open configuration arguments\n"
@@ -615,7 +690,9 @@ usage(void)
"\t-n set number of operations each thread does\n"
"\t-p use prepare\n"
"\t-r set number of runs (0 for continuous)\n"
+ "\t-R configure predictable replay\n"
"\t-s specify which timing stress configuration to use ( 1 | 2 | 3 | 4 | 5 )\n"
+ "\t-S set a stable timestamp to stop the test run\n"
"\t\t1: sweep_stress\n"
"\t\t2: failpoint_hs_delete_key_from_ts\n"
"\t\t3: hs_checkpoint_timing_stress\n"
diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h
index ccab3cdf4c2..babe569d0c3 100644
--- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h
+++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h
@@ -46,6 +46,18 @@
typedef enum { MIX = 0, COL, FIX, LSM, ROW } table_type; /* File type */
/*
+ * For a predictable run we reserve timestamps for each thread for the entire run. The timestamp for
+ * the i-th key that a thread writes is given by the macro below. In a given iteration for each
+ * thread, there are three timestamps available. We never use the second and only sometimes use the
+ * third. The first is used as the commit and optionally as the prepared timestamp. The third as the
+ * durable timestamp ahead of the commit timestamp.
+ */
+#define RESERVED_TIMESTAMPS_FOR_ITERATION(threadcount, td, iter) \
+ (((iter) * (uint64_t)(threadcount) + (uint64_t)((td)->info)) * 3 + 1)
+
+#define PRED_REPLAY_STABLE_PERIOD 100
+
+/*
* Per-table cookie structure.
*/
typedef struct {
@@ -55,6 +67,15 @@ typedef struct {
} COOKIE;
typedef struct {
+ int info;
+ u_int start_key;
+ u_int key_range;
+ uint64_t ts; /* Only used for runs with predictable replay. */
+ WT_RAND_STATE data_rnd;
+ WT_RAND_STATE extra_rnd;
+} THREAD_DATA;
+
+typedef struct {
TEST_OPTS opts; /* Shared test options */
char *home; /* Home directory */
const char *checkpoint_name; /* Checkpoint name */
@@ -82,9 +103,12 @@ typedef struct {
bool prepare; /* Use prepare transactions */
bool race_timestamps; /* Async update to oldest timestamp */
- bool use_timestamps; /* Use txn timestamps. Start clock thread */
+ bool use_timestamps; /* Use txn timestamps. Start clock thread */
+ bool predictable_replay; /* Run such that a predictable replay is possible. */
+ uint64_t stop_ts; /* Run a replay until the stable timestamp reaches this stop timestamp. */
- COOKIE *cookies; /* Per-thread info */
+ COOKIE *cookies; /* Per-table info */
+ THREAD_DATA *td; /* Per-thread info */
WT_RWLOCK clock_lock; /* Clock synchronization */
wt_thread_t checkpoint_thread; /* Checkpoint thread */
wt_thread_t clock_thread; /* Clock thread */
diff --git a/src/third_party/wiredtiger/test/checkpoint/workers.c b/src/third_party/wiredtiger/test/checkpoint/workers.c
index 69fa646616c..20a3427e855 100644
--- a/src/third_party/wiredtiger/test/checkpoint/workers.c
+++ b/src/third_party/wiredtiger/test/checkpoint/workers.c
@@ -31,7 +31,7 @@
#define MAX_MODIFY_ENTRIES 5
static char modify_repl[256];
-static int real_worker(void);
+static int real_worker(THREAD_DATA *);
static WT_THREAD_RET worker(void *);
/*
@@ -120,9 +120,9 @@ start_workers(void)
(void)gettimeofday(&start, NULL);
- /* Create threads. */
+ /* Create threads. The N workers have ID 0 to N - 1. */
for (i = 0; i < g.nworkers; ++i)
- testutil_check(__wt_thread_create(NULL, &tids[i], worker, &g.cookies[i]));
+ testutil_check(__wt_thread_create(NULL, &tids[i], worker, &g.td[i]));
/* Wait for the threads. */
for (i = 0; i < g.nworkers; ++i)
@@ -334,15 +334,17 @@ worker_op(WT_CURSOR *cursor, table_type type, uint64_t keyno, u_int new_val)
static WT_THREAD_RET
worker(void *arg)
{
+ THREAD_DATA *td;
char tid[128];
- WT_UNUSED(arg);
+ td = (THREAD_DATA *)arg;
testutil_check(__wt_thread_str(tid, sizeof(tid)));
- printf("worker thread starting: tid: %s\n", tid);
+ printf("worker thread starting: tid: %s key-range: %" PRIu32 " - %" PRIu32 "\n", tid,
+ td->start_key, td->start_key + td->key_range);
fflush(stdout);
- (void)real_worker();
+ (void)real_worker(td);
return (WT_THREAD_RET_VALUE);
}
@@ -351,11 +353,11 @@ worker(void *arg)
* A single worker thread that transactionally updates all tables with consistent values.
*/
static int
-real_worker(void)
+real_worker(THREAD_DATA *td)
{
WT_CURSOR **cursors;
- WT_RAND_STATE rnd;
WT_SESSION *session;
+ uint64_t base_ts;
u_int i, keyno, next_rnd;
int j, ret, t_ret;
char buf[128];
@@ -379,19 +381,31 @@ real_worker(void)
if (g.use_timestamps) {
if (g.no_ts_deletes)
begin_cfg = "no_timestamp=true,read_timestamp=1,roundup_timestamps=(read=true)";
- else
+ else if (!g.predictable_replay)
begin_cfg = "read_timestamp=1,roundup_timestamps=(read=true)";
+ /*
+ * Note: For predictable replays we do not specify a read timestamp, hence reading the
+ * latest committed values. This is important for a predictable outcome as reading at the
+ * oldest timestamp depends on where the clock and the checkpoint threads have placed the
+ * oldest at this moment.
+ */
}
- __wt_random_init_seed((WT_SESSION_IMPL *)session, &rnd);
-
for (j = 0; j < g.ntables; j++)
if ((ret = session->open_cursor(session, g.cookies[j].uri, NULL, NULL, &cursors[j])) != 0) {
(void)log_print_err("session.open_cursor", ret, 1);
goto err;
}
- for (i = 0; i < g.nops && g.opts.running; ++i, __wt_yield()) {
+ for (i = 0; g.opts.running; ++i, __wt_yield()) {
+ /*
+ * If a stop timestamp has been provided, the workers will continue to run until the clock
+ * thread reaches a stable equal to the stop timestamp. Ignore the provided operation count
+ * in such a case.
+ */
+ if (g.stop_ts == 0 && i >= g.nops)
+ break;
+
if (i > 0 && i % (5 * WT_THOUSAND) == 0)
printf("Worker %u of %u ops\n", i, g.nops);
if (start_txn) {
@@ -402,9 +416,10 @@ real_worker(void)
new_txn = true;
start_txn = false;
}
- keyno = __wt_random(&rnd) % g.nkeys + 1;
+ keyno = __wt_random(&td->data_rnd) % td->key_range + td->start_key;
/* If we have specified to run with mix mode deletes we need to do it in it's own txn. */
- if (g.use_timestamps && g.no_ts_deletes && new_txn && __wt_random(&rnd) % 72 == 0) {
+ if (g.use_timestamps && g.no_ts_deletes && new_txn &&
+ __wt_random(&td->data_rnd) % 72 == 0) {
new_txn = false;
for (j = 0; j < g.ntables; j++) {
ret = worker_no_ts_delete(cursors[j], keyno);
@@ -436,41 +451,60 @@ real_worker(void)
(void)log_print_err("worker op failed", ret, 1);
goto err;
} else if (ret == 0) {
- next_rnd = __wt_random(&rnd);
+ next_rnd = __wt_random(&td->data_rnd);
if (next_rnd % 7 == 0) {
if (g.use_timestamps) {
- if (__wt_try_readlock((WT_SESSION_IMPL *)session, &g.clock_lock) == 0) {
- next_rnd = __wt_random(&rnd);
+ /*
+ * For a predictable run, the timestamps for worker's operations are managed by
+ * reserving them across the threads and the iterations, such that they don't
+ * overlap. For a regular run, the timestamp thread manages the advance of the
+ * global clock. The workers synchronize with the clock using a reader - writer
+ * lock, and decide the operation timestamp based on the global clock.
+ */
+ if (g.predictable_replay ||
+ (__wt_try_readlock((WT_SESSION_IMPL *)session, &g.clock_lock) == 0)) {
+ if (g.predictable_replay)
+ /* i + 1 because we don't want a thread to start with commit-ts of 1 */
+ base_ts = RESERVED_TIMESTAMPS_FOR_ITERATION(g.nworkers, td, i + 1);
+ else
+ base_ts = g.ts_stable + 1;
+ next_rnd = __wt_random(&td->data_rnd);
if (g.prepare && next_rnd % 2 == 0) {
testutil_check(__wt_snprintf(
- buf, sizeof(buf), "prepare_timestamp=%" PRIx64, g.ts_stable + 1));
+ buf, sizeof(buf), "prepare_timestamp=%" PRIx64, base_ts));
if ((ret = session->prepare_transaction(session, buf)) != 0) {
- __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock);
+ if (!g.predictable_replay)
+ __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock);
(void)log_print_err("real_worker:prepare_transaction", ret, 1);
goto err;
}
testutil_check(__wt_snprintf(buf, sizeof(buf),
"durable_timestamp=%" PRIx64 ",commit_timestamp=%" PRIx64,
- g.ts_stable + 3, g.ts_stable + 1));
+ base_ts + 2, base_ts));
} else
testutil_check(__wt_snprintf(
- buf, sizeof(buf), "commit_timestamp=%" PRIx64, g.ts_stable + 1));
+ buf, sizeof(buf), "commit_timestamp=%" PRIx64, base_ts));
/* Commit majority of times. */
if (next_rnd % 49 != 0) {
if ((ret = session->commit_transaction(session, buf)) != 0) {
- __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock);
+ if (!g.predictable_replay)
+ __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock);
(void)log_print_err("real_worker:commit_transaction", ret, 1);
goto err;
}
+ if (g.predictable_replay)
+ WT_PUBLISH(td->ts, base_ts);
} else {
if ((ret = session->rollback_transaction(session, NULL)) != 0) {
- __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock);
+ if (!g.predictable_replay)
+ __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock);
(void)log_print_err("real_worker:rollback_transaction", ret, 1);
goto err;
}
}
- __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock);
+ if (!g.predictable_replay)
+ __wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock);
start_txn = true;
/* Occasionally reopen cursors after transaction finish. */
if (next_rnd % 13 == 0)
diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml
index 1a9e00ff108..3c7cd2d5220 100755
--- a/src/third_party/wiredtiger/test/evergreen.yml
+++ b/src/third_party/wiredtiger/test/evergreen.yml
@@ -776,6 +776,54 @@ functions:
set -o verbose
${test_env_vars|} ./test_checkpoint ${checkpoint_args} 2>&1
+ "checkpoint test predictable":
+ command: shell.exec
+ params:
+ working_dir: "wiredtiger/cmake_build/test/checkpoint"
+ script: |
+ # Get a random value with leading zeroes removed, /bin/sh version.
+ rando() {
+ tr -cd 0-9 </dev/urandom | head -c 5 | sed -e 's/0*\(.\)/\1/'
+ }
+
+ # Run test/checkpoint in a way that can test predictable replay.
+ set -o errexit
+ set -o verbose
+
+ toolsdir=../../../tools
+ wtutil=../../wt
+
+ r=$(rando)$(rando)
+ x0=$(rando)$(rando)
+
+ # Always run with timestamps and in the predictable mode
+ base_args="-x -R"
+
+ rm -rf RUNDIR_0
+ # The first run is for calibration only. We just want to run for the designated
+ # time and get an approriate stop timestamp that can be used in later runs.
+ calibration_run_args="-PSD$r,E$x0"
+ ${test_env_vars|} ./test_checkpoint -h RUNDIR_0 $base_args ${checkpoint_args} $calibration_run_args || exit 1
+ echo "Finished calibration run"
+ stable_hex=$($toolsdir/wt_timestamps RUNDIR_0 | sed -e '/stable=/!d' -e 's/.*=//')
+ stop_ts=$(echo $((0x$stable_hex)))
+ for i in $(seq ${times}); do
+ echo Iteration $i/${times}
+ x1=$(rando)$(rando)
+ x2=$(rando)$(rando)
+ rm -rf RUNDIR_1 RUNDIR_2
+ # Do two runs up to the stable timestamp, using the same data seed,
+ # but with a different extra seed. Compare it when done.
+ first_run_args="-PSD$r,E$x1 -S $stop_ts"
+ echo "First run with args $base_args ${checkpoint_args} $first_run_args"
+ ${test_env_vars|} ./test_checkpoint -h RUNDIR_1 $base_args ${checkpoint_args} $first_run_args || exit 1
+ second_run_args="-PSD$r,E$x2 -S $stop_ts"
+ echo "Second run with args $base_args ${checkpoint_args} $second_run_args"
+ ${test_env_vars|} ./test_checkpoint -h RUNDIR_2 $base_args ${checkpoint_args} $second_run_args || exit 1
+ # Compare the runs.
+ $toolsdir/wt_cmp_dir RUNDIR_1 RUNDIR_2 || exit 1
+ done
+
"checkpoint stress test":
command: shell.exec
params:
@@ -4115,6 +4163,31 @@ tasks:
vars:
times: 5
+ - name: checkpoint-filetypes-predictable-test
+ commands:
+ - func: "get project"
+ - func: "compile wiredtiger"
+ vars:
+ # Don't use diagnostic - this test looks for timing problems that are more likely to occur without it
+ HAVE_DIAGNOSTIC: -DHAVE_DIAGNOSTIC=0
+ # FIXME-WT-10936: Enable once predictable replay supports column store
+ #- func: "checkpoint test predictable"
+ # vars:
+ # checkpoint_args: -t m -n 1000000 -k 5000000 -C cache_size=100MB
+ # times: 5
+ #- func: "checkpoint test predictable"
+ # vars:
+ # checkpoint_args: -t c -n 1000000 -k 5000000 -C cache_size=100MB
+ # times: 5
+ #- func: "checkpoint test predictable"
+ # vars:
+ # checkpoint_args: -n 1000000 -k 5000000 -C cache_size=100MB
+ # times: 5
+ - func: "checkpoint test predictable"
+ vars:
+ checkpoint_args: -t r -n 1000000 -k 5000000 -C cache_size=100MB
+ times: 5
+
- name: many-collection-test
commands:
- command: timeout.update
@@ -5346,6 +5419,8 @@ buildvariants:
# FIXME-WT-10822
# - name: format-tiered-test
- name: schema-abort-predictable-test
+ - name: checkpoint-filetypes-predictable-test
+
# When running the Python tests on this variant tcmalloc must be preloaded otherwise the wiredtiger library
# fails to load and resolve its dependency.