diff options
author | Luke Chen <luke.chen@mongodb.com> | 2019-06-05 10:44:58 +1000 |
---|---|---|
committer | Luke Chen <luke.chen@mongodb.com> | 2019-06-05 10:44:58 +1000 |
commit | c939010fe98ba0a8affe7d0d30d4e8d57e68242b (patch) | |
tree | d26869393e4eb400fc7eb2b5bb3194014ba71956 /src | |
parent | ed173f027ae8940bb93c57c580993192dc534fb0 (diff) | |
download | mongo-c939010fe98ba0a8affe7d0d30d4e8d57e68242b.tar.gz |
Import wiredtiger: 9b85ad89688bd72b8a649d844a7e458832955764 from branch mongodb-4.2
ref: 57bd75fee9..9b85ad8968
for: 4.3.1
WT-4190 Decrease shutdown time by doing multi-threaded cache flush
WT-4608 Cache stuck with clean pages for LSM data format testing
WT-4775 Make the "bad file descriptor" test resilient against crashing
WT-4793 Extend test/checkpoint to use timestamps and more
WT-4825 Add warning in operation tracking if the open file limit is too small
WT-4828 Fix type-related exception in operation tracking after conversion to Python3
Diffstat (limited to 'src')
-rw-r--r-- | src/third_party/wiredtiger/import.data | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/conn/conn_api.c | 7 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/evict/evict_lru.c | 24 | ||||
-rw-r--r-- | src/third_party/wiredtiger/test/checkpoint/checkpointer.c | 70 | ||||
-rwxr-xr-x | src/third_party/wiredtiger/test/checkpoint/smoke.sh | 9 | ||||
-rw-r--r-- | src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c | 56 | ||||
-rw-r--r-- | src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h | 5 | ||||
-rw-r--r-- | src/third_party/wiredtiger/test/checkpoint/workers.c | 135 | ||||
-rwxr-xr-x | src/third_party/wiredtiger/test/suite/suite_subprocess.py | 55 | ||||
-rwxr-xr-x[-rw-r--r--] | src/third_party/wiredtiger/test/suite/test_bug018.py | 47 | ||||
-rwxr-xr-x | src/third_party/wiredtiger/test/suite/wttest.py | 3 | ||||
-rwxr-xr-x | src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py | 21 |
12 files changed, 369 insertions, 65 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 43ede9082a0..22114815861 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -1,5 +1,5 @@ { - "commit": "57bd75fee93b685c133281100719f886d0184589", + "commit": "9b85ad89688bd72b8a649d844a7e458832955764", "github": "wiredtiger/wiredtiger.git", "vendor": "wiredtiger", "branch": "mongodb-4.2" diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index 6d414edaa42..7a854ee596f 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -1064,6 +1064,13 @@ __conn_close(WT_CONNECTION *wt_conn, const char *config) F_SET(conn, WT_CONN_CLOSING_TIMESTAMP); } + /* + * Ramp the eviction dirty target down to encourage eviction threads to + * clear dirty content out of cache. + */ + conn->cache->eviction_dirty_trigger = 1.0; + conn->cache->eviction_dirty_target = 0.1; + err: /* * Rollback all running transactions. * We do this as a separate pass because an active transaction in one diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index 836fcd8cb59..7d696a20831 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -2639,6 +2639,23 @@ __verbose_dump_cache_single(WT_SESSION_IMPL *session, leaf_bytes = leaf_bytes_max = leaf_dirty_bytes = 0; leaf_dirty_bytes_max = leaf_dirty_pages = leaf_pages = 0; + dhandle = session->dhandle; + btree = dhandle->handle; + WT_RET(__wt_msg(session, "%s(%s%s)%s%s:", + dhandle->name, dhandle->checkpoint != NULL ? "checkpoint=" : "", + dhandle->checkpoint != NULL ? dhandle->checkpoint : "<live>", + btree->evict_disabled != 0 ? " eviction disabled" : "", + btree->evict_disabled_open ? " at open" : "")); + + /* + * We cannot walk the tree of a dhandle held exclusively because + * the owning thread could be manipulating it in a way that causes + * us to dump core. So print out that we visited and skipped it. + */ + if (F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE)) + return (__wt_msg(session, + " Opened exclusively. Cannot walk tree, skipping.")); + next_walk = NULL; while (__wt_tree_walk(session, &next_walk, WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_WAIT) == 0 && @@ -2669,13 +2686,6 @@ __verbose_dump_cache_single(WT_SESSION_IMPL *session, } } - dhandle = session->dhandle; - btree = dhandle->handle; - WT_RET(__wt_msg(session, "%s(%s%s)%s%s:", - dhandle->name, dhandle->checkpoint != NULL ? "checkpoint=" : "", - dhandle->checkpoint != NULL ? dhandle->checkpoint : "<live>", - btree->evict_disabled != 0 ? "eviction disabled" : "", - btree->evict_disabled_open ? " at open" : "")); if (intl_pages == 0) WT_RET(__wt_msg(session, "internal: 0 pages")); else diff --git a/src/third_party/wiredtiger/test/checkpoint/checkpointer.c b/src/third_party/wiredtiger/test/checkpoint/checkpointer.c index 493cdaf5114..311c21eff5e 100644 --- a/src/third_party/wiredtiger/test/checkpoint/checkpointer.c +++ b/src/third_party/wiredtiger/test/checkpoint/checkpointer.c @@ -29,6 +29,7 @@ #include "test_checkpoint.h" static WT_THREAD_RET checkpointer(void *); +static WT_THREAD_RET clock_thread(void *); static int compare_cursors( WT_CURSOR *, const char *, WT_CURSOR *, const char *); static int diagnose_key_error(WT_CURSOR *, int, WT_CURSOR *, int); @@ -44,6 +45,11 @@ start_checkpoints(void) { testutil_check(__wt_thread_create(NULL, &g.checkpoint_thread, checkpointer, NULL)); + if (g.use_timestamps) { + testutil_check(__wt_rwlock_init(NULL, &g.clock_lock)); + testutil_check(__wt_thread_create(NULL, + &g.clock_thread, clock_thread, NULL)); + } } /* @@ -54,6 +60,57 @@ void end_checkpoints(void) { testutil_check(__wt_thread_join(NULL, &g.checkpoint_thread)); + if (g.use_timestamps) { + testutil_check(__wt_thread_join(NULL, &g.clock_thread)); + __wt_rwlock_destroy(NULL, &g.clock_lock); + } +} + +/* + * clock_thread -- + * Clock thread: ticks up timestamps. + */ +static WT_THREAD_RET +clock_thread(void *arg) +{ + WT_RAND_STATE rnd; + WT_SESSION *wt_session; + WT_SESSION_IMPL *session; + uint64_t delay; + char buf[128]; + + WT_UNUSED(arg); + + __wt_random_init(&rnd); + testutil_check(g.conn->open_session(g.conn, NULL, NULL, &wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + + g.ts = 0; + while (g.running) { + __wt_writelock(session, &g.clock_lock); + ++g.ts; + testutil_check(__wt_snprintf( + buf, sizeof(buf), + "oldest_timestamp=%x,stable_timestamp=%x", g.ts, g.ts)); + testutil_check(g.conn->set_timestamp(g.conn, buf)); + if (g.ts % 997 == 0) { + /* + * Random value between 6 and 10 seconds. + */ + delay = __wt_random(&rnd) % 5; + __wt_sleep(delay + 6, 0); + } + __wt_writeunlock(session, &g.clock_lock); + /* + * Random value between 5000 and 10000. + */ + delay = __wt_random(&rnd) % 5001; + __wt_sleep(0, delay + 5000); + } + + testutil_check(wt_session->close(wt_session, NULL)); + + return (WT_THREAD_RET_VALUE); } /* @@ -82,7 +139,9 @@ checkpointer(void *arg) static int real_checkpointer(void) { + WT_RAND_STATE rnd; WT_SESSION *session; + uint64_t delay; int ret; char buf[128], *checkpoint_config; @@ -90,6 +149,7 @@ real_checkpointer(void) return (log_print_err( "Checkpoint thread started stopped\n", EINVAL, 1)); + __wt_random_init(&rnd); while (g.ntables > g.ntables_created) __wt_yield(); @@ -115,6 +175,7 @@ real_checkpointer(void) session, checkpoint_config)) != 0) return (log_print_err("session.checkpoint", ret, 1)); printf("Finished a checkpoint\n"); + fflush(stdout); if (!g.running) goto done; @@ -123,6 +184,14 @@ real_checkpointer(void) if ((ret = verify_consistency(session, true)) != 0) return (log_print_err( "verify_consistency (offline)", ret, 1)); + + /* + * Random value between 4 and 8 seconds. + */ + if (g.sweep_stress) { + delay = __wt_random(&rnd) % 5; + __wt_sleep(delay + 4, 0); + } } done: if ((ret = session->close(session, NULL)) != 0) @@ -234,6 +303,7 @@ verify_consistency(WT_SESSION *session, bool use_checkpoint) printf("Finished verifying a %s with %d tables and %" PRIu64 " keys\n", use_checkpoint ? "checkpoint" : "snapshot", g.ntables, key_count); + fflush(stdout); err: for (i = 0; i < g.ntables; i++) { if (cursors[i] != NULL && diff --git a/src/third_party/wiredtiger/test/checkpoint/smoke.sh b/src/third_party/wiredtiger/test/checkpoint/smoke.sh index 8db6fc1ebc4..dba60babb92 100755 --- a/src/third_party/wiredtiger/test/checkpoint/smoke.sh +++ b/src/third_party/wiredtiger/test/checkpoint/smoke.sh @@ -23,3 +23,12 @@ $TEST_WRAPPER ./t -T 6 -t r echo "checkpoint: 6 row-store tables, named checkpoint" $TEST_WRAPPER ./t -c 'TeSt' -T 6 -t r + +echo "checkpoint: row-store tables, stress LAS. Sweep and timestamps" +$TEST_WRAPPER ./t -t r -W 3 -r 2 -s -x -n 100000 -k 100000 -C cache_size=100MB + +echo "checkpoint: 3 mixed tables, with sweep" +$TEST_WRAPPER ./t -T 3 -t m -W 3 -r 2 -s -n 100000 -k 100000 + +echo "checkpoint: 3 mixed tables, with timestamps" +$TEST_WRAPPER ./t -T 3 -t m -W 3 -r 2 -x -n 100000 -k 100000 diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c index f73ada611fe..461b6334b27 100644 --- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c +++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c @@ -34,7 +34,7 @@ static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *); static int handle_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *); static void onint(int) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); -static void cleanup(void); +static void cleanup(bool); static int usage(void); static int wt_connect(const char *); static int wt_shutdown(void); @@ -62,10 +62,11 @@ main(int argc, char *argv[]) g.nops = 100000; g.ntables = 3; g.nworkers = 1; + g.sweep_stress = g.use_timestamps = false; runs = 1; while ((ch = __wt_getopt( - progname, argc, argv, "C:c:h:k:l:n:r:T:t:W:")) != EOF) + progname, argc, argv, "C:c:h:k:l:n:r:sT:t:W:x")) != EOF) switch (ch) { case 'c': g.checkpoint_name = __wt_optarg; @@ -92,6 +93,9 @@ main(int argc, char *argv[]) case 'r': /* runs */ runs = atoi(__wt_optarg); break; + case 's': + g.sweep_stress = true; + break; case 't': switch (__wt_optarg[0]) { case 'c': @@ -116,6 +120,9 @@ main(int argc, char *argv[]) case 'W': g.nworkers = atoi(__wt_optarg); break; + case 'x': + g.use_timestamps = true; + break; default: return (usage()); } @@ -131,11 +138,11 @@ main(int argc, char *argv[]) printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid()); for (cnt = 1; (runs == 0 || cnt <= runs) && g.status == 0; ++cnt) { + cleanup(cnt == 1); /* Clean up previous runs */ + printf(" %d: %d workers, %d tables\n", cnt, g.nworkers, g.ntables); - cleanup(); /* Clean up previous runs */ - /* Setup a fresh set of cookies in the global array. */ if ((g.cookies = calloc( (size_t)(g.ntables), sizeof(COOKIE))) == NULL) { @@ -189,15 +196,30 @@ wt_connect(const char *config_open) NULL /* Close handler. */ }; int ret; - char config[128]; - - testutil_make_work_dir(g.home); - - testutil_check(__wt_snprintf(config, sizeof(config), - "create,statistics=(fast),error_prefix=\"%s\",cache_size=1GB%s%s", - progname, - config_open == NULL ? "" : ",", - config_open == NULL ? "" : config_open)); + char config[512]; + + /* + * If we want to stress sweep, we have a lot of additional + * configuration settings to set. + */ + if (g.sweep_stress) + testutil_check(__wt_snprintf(config, sizeof(config), + "create,cache_cursors=false,statistics=(fast)," \ + "statistics_log=(json,wait=1),error_prefix=\"%s\"," \ + "file_manager=(close_handle_minimum=1,close_idle_time=1,"\ + "close_scan_interval=1),log=(enabled),cache_size=1GB,"\ + "timing_stress_for_test=(aggressive_sweep)%s%s", + progname, + config_open == NULL ? "" : ",", + config_open == NULL ? "" : config_open)); + else + testutil_check(__wt_snprintf(config, sizeof(config), + "create,cache_cursors=false,statistics=(fast)," \ + "statistics_log=(json,wait=1),error_prefix=\"%s\"" \ + "%s%s", + progname, + config_open == NULL ? "" : ",", + config_open == NULL ? "" : config_open)); if ((ret = wiredtiger_open( g.home, &event_handler, config, &g.conn)) != 0) @@ -230,12 +252,14 @@ wt_shutdown(void) * Clean up from previous runs. */ static void -cleanup(void) +cleanup(bool remove_dir) { g.running = 0; g.ntables_created = 0; + g.ts = 0; - testutil_clean_work_dir(g.home); + if (remove_dir) + testutil_make_work_dir(g.home); } static int @@ -271,7 +295,7 @@ onint(int signo) { WT_UNUSED(signo); - cleanup(); + cleanup(false); fprintf(stderr, "\n"); exit(EXIT_FAILURE); diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h index 805864344cb..b579f5cf9b9 100644 --- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h +++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h @@ -63,8 +63,13 @@ typedef struct { int ntables_created; /* Number tables opened */ int running; /* Whether to stop */ int status; /* Exit status */ + bool sweep_stress; /* Sweep stress test */ + u_int ts; /* Current timestamp */ + bool use_timestamps; /* Use txn timestamps */ COOKIE *cookies; /* Per-thread info */ + WT_RWLOCK clock_lock; /* Clock synchronization */ wt_thread_t checkpoint_thread; /* Checkpoint thread */ + wt_thread_t clock_thread; /* Clock thread */ } GLOBAL; extern GLOBAL g; diff --git a/src/third_party/wiredtiger/test/checkpoint/workers.c b/src/third_party/wiredtiger/test/checkpoint/workers.c index 33836c67110..e9966cec145 100644 --- a/src/third_party/wiredtiger/test/checkpoint/workers.c +++ b/src/third_party/wiredtiger/test/checkpoint/workers.c @@ -39,12 +39,23 @@ static int create_table(WT_SESSION *session, COOKIE *cookie) { int ret; - char config[128]; + char config[256]; - testutil_check(__wt_snprintf(config, sizeof(config), - "key_format=%s,value_format=S,%s", - cookie->type == COL ? "r" : "q", - cookie->type == LSM ? ",type=lsm" : "")); + /* + * If we're using timestamps, turn off logging for the table. + */ + if (g.use_timestamps) + testutil_check(__wt_snprintf(config, sizeof(config), + "key_format=%s,value_format=S,allocation_size=512," \ + "leaf_page_max=1KB,internal_page_max=1KB," \ + "memory_page_max=64KB,log=(enabled=false),%s", + cookie->type == COL ? "r" : "q", + cookie->type == LSM ? ",type=lsm" : "")); + else + testutil_check(__wt_snprintf(config, sizeof(config), + "key_format=%s,value_format=S,%s", + cookie->type == COL ? "r" : "q", + cookie->type == LSM ? ",type=lsm" : "")); if ((ret = session->create(session, cookie->uri, config)) != 0) if (ret != EEXIST) @@ -94,6 +105,8 @@ start_workers(table_type type) goto err; } + testutil_check(session->close(session, NULL)); + (void)gettimeofday(&start, NULL); /* Create threads. */ @@ -122,20 +135,55 @@ err: free(tids); static inline int worker_op(WT_CURSOR *cursor, uint64_t keyno, u_int new_val) { - int ret; + int cmp, ret; char valuebuf[64]; cursor->set_key(cursor, keyno); - /* Roughly 5% removes. */ - if (new_val % 19 == 0) { - if ((ret = cursor->remove(cursor)) != 0) { + /* Roughly half inserts, then balanced inserts / range removes. */ + if (new_val > g.nops / 2 && new_val % 39 == 0) { + if ((ret = cursor->search_near(cursor, &cmp)) != 0) { + if (ret == WT_NOTFOUND) + return (0); + if (ret == WT_ROLLBACK) + return (WT_ROLLBACK); + return (log_print_err("cursor.search_near", ret, 1)); + } + if (cmp < 0) { + if ((ret = cursor->next(cursor)) != 0) { + if (ret == WT_NOTFOUND) + return (0); + if (ret == WT_ROLLBACK) + return (WT_ROLLBACK); + return (log_print_err("cursor.next", ret, 1)); + } + } + for (int i = 10; i > 0; i--) { + if ((ret = cursor->remove(cursor)) != 0) { + if (ret == WT_ROLLBACK) + return (WT_ROLLBACK); + return (log_print_err("cursor.remove", ret, 1)); + } + if ((ret = cursor->next(cursor)) != 0) { + if (ret == WT_NOTFOUND) + return (0); + if (ret == WT_ROLLBACK) + return (WT_ROLLBACK); + return (log_print_err("cursor.next", ret, 1)); + } + } + if (g.sweep_stress) + testutil_check(cursor->reset(cursor)); + } else if (new_val % 39 < 10) { + if ((ret = cursor->search(cursor)) != 0 && ret != WT_NOTFOUND) { if (ret == WT_ROLLBACK) return (WT_ROLLBACK); - return (log_print_err("cursor.remove", ret, 1)); + return (log_print_err("cursor.search", ret, 1)); } + if (g.sweep_stress) + testutil_check(cursor->reset(cursor)); } else { testutil_check(__wt_snprintf( - valuebuf, sizeof(valuebuf), "%037u", new_val)); + valuebuf, sizeof(valuebuf), "%052u", new_val)); cursor->set_value(cursor, valuebuf); if ((ret = cursor->insert(cursor)) != 0) { if (ret == WT_ROLLBACK) @@ -143,6 +191,7 @@ worker_op(WT_CURSOR *cursor, uint64_t keyno, u_int new_val) return (log_print_err("cursor.insert", ret, 1)); } } + return (0); } @@ -177,11 +226,12 @@ real_worker(void) WT_SESSION *session; u_int i, keyno; int j, ret, t_ret; + const char *begin_cfg; + char buf[128]; + bool has_cursors; ret = t_ret = 0; - __wt_random_init(&rnd); - if ((cursors = calloc( (size_t)(g.ntables), sizeof(WT_CURSOR *))) == NULL) return (log_print_err("malloc", ENOMEM, 1)); @@ -192,41 +242,80 @@ real_worker(void) goto err; } + __wt_random_init_seed((WT_SESSION_IMPL *)session, &rnd); + for (j = 0; j < g.ntables; j++) if ((ret = session->open_cursor(session, g.cookies[j].uri, NULL, NULL, &cursors[j])) != 0) { (void)log_print_err("session.open_cursor", ret, 1); goto err; } + has_cursors = true; + + if (g.use_timestamps) + begin_cfg = "read_timestamp=1,roundup_timestamps=(read=true)"; + else + begin_cfg = NULL; for (i = 0; i < g.nops && g.running; ++i, __wt_yield()) { - if ((ret = session->begin_transaction(session, NULL)) != 0) { + if ((ret = + session->begin_transaction(session, begin_cfg)) != 0) { (void)log_print_err( "real_worker:begin_transaction", ret, 1); goto err; } keyno = __wt_random(&rnd) % g.nkeys + 1; - for (j = 0; j < g.ntables; j++) { - if ((ret = worker_op(cursors[j], keyno, i)) != 0) - break; + if (g.use_timestamps && i % 23 == 0) { + if (__wt_try_readlock( + (WT_SESSION_IMPL *)session, &g.clock_lock) != 0) { + testutil_check( + session->commit_transaction(session, NULL)); + for (j = 0; j < g.ntables; j++) + testutil_check( + cursors[j]->close(cursors[j])); + has_cursors = false; + __wt_readlock( + (WT_SESSION_IMPL *)session, &g.clock_lock); + testutil_check(session->begin_transaction( + session, begin_cfg)); + } + testutil_check(__wt_snprintf( + buf, sizeof(buf), "commit_timestamp=%x", g.ts + 1)); + testutil_check( + session->timestamp_transaction(session, buf)); + __wt_readunlock( + (WT_SESSION_IMPL *)session, &g.clock_lock); + + for (j = 0; !has_cursors && j < g.ntables; j++) + if ((ret = session->open_cursor( + session, g.cookies[j].uri, + NULL, NULL, &cursors[j])) != 0) { + (void)log_print_err( + "session.open_cursor", ret, 1); + goto err; + } + has_cursors = true; + } + for (j = 0; ret == 0 && j < g.ntables; j++) { + ret = worker_op(cursors[j], keyno, i); } - if (ret == 0) { + if (ret != 0 && ret != WT_ROLLBACK) { + (void)log_print_err("worker op failed", ret, 1); + goto err; + } else if (ret == 0 && __wt_random(&rnd) % 7 != 0) { if ((ret = session->commit_transaction( session, NULL)) != 0) { (void)log_print_err( "real_worker:commit_transaction", ret, 1); goto err; } - } else if (ret == WT_ROLLBACK) { + } else { if ((ret = session->rollback_transaction( session, NULL)) != 0) { (void)log_print_err( "real_worker:rollback_transaction", ret, 1); goto err; - } - } else { - (void)log_print_err("worker op failed", ret, 1); - goto err; + } } } diff --git a/src/third_party/wiredtiger/test/suite/suite_subprocess.py b/src/third_party/wiredtiger/test/suite/suite_subprocess.py index d04a281807a..95a599090f5 100755 --- a/src/third_party/wiredtiger/test/suite/suite_subprocess.py +++ b/src/third_party/wiredtiger/test/suite/suite_subprocess.py @@ -151,7 +151,8 @@ class suite_subprocess: return envvar + '=' + str(os.environ.get(envvar)) + '\n' def show_outputs(self, procargs, message, filenames): - out = 'ERROR: wt command ' + message + ': ' + str(procargs) + '\n' + \ + out = message + ': ' + \ + str(procargs) + '\n' + \ self.verbose_env('PATH') + \ self.verbose_env('LD_LIBRARY_PATH') + \ self.verbose_env('DYLD_LIBRARY_PATH') + \ @@ -169,6 +170,48 @@ class suite_subprocess: out = sepline + filename + '\n' + sepline + contents WiredTigerTestCase.prout(out) + # Run a method as a subprocess using the run.py machinery. + # Return the process exit status and the the WiredTiger + # home directory used by the subprocess. + def run_subprocess_function(self, directory, funcname): + testparts = funcname.split('.') + if len(testparts) != 3: + raise ValueError('bad function name "' + funcname + + '", should be three part dotted name') + topdir = os.path.dirname(self.buildDirectory()) + runscript = os.path.join(topdir, 'test', 'suite', 'run.py') + procargs = [ sys.executable, runscript, '-p', '--dir', directory, + funcname] + + # scenario_number is only set if we are running in a scenario + try: + scennum = self.scenario_number + procargs.append('-s') + procargs.append(str(scennum)) + except: + scennum = 0 + + returncode = -1 + os.makedirs(directory) + + # We cannot put the output/error files in the subdirectory, as + # that will be cleared by the run.py script. + with open("subprocess.err", "w") as wterr: + with open("subprocess.out", "w") as wtout: + returncode = subprocess.call( + procargs, stdout=wtout, stderr=wterr) + if returncode != 0: + # This is not necessarily an error, the primary reason to + # run in a subprocess is that it may crash. + self.show_outputs(procargs, + "Warning: run_subprocess_function " + funcname + \ + " returned error code " + str(returncode), + [ "subprocess.out", "subprocess.err" ]) + + new_home_dir = os.path.join(directory, + testparts[1] + '.' + str(scennum)) + return [ returncode, new_home_dir ] + # Run the wt utility. def runWt(self, args, infilename=None, outfilename=None, errfilename=None, closeconn=True, @@ -230,15 +273,17 @@ class suite_subprocess: procargs, stdout=wtout, stderr=wterr) if failure: if returncode == 0: - self.show_outputs(procargs, "expected failure, got success", - [wtoutname, wterrname]) + self.show_outputs(procargs, + "ERROR: wt command expected failure, got success", + [wtoutname, wterrname]) self.assertNotEqual(returncode, 0, 'expected failure: "' + \ str(procargs) + '": exited ' + str(returncode)) else: if returncode != 0: - self.show_outputs(procargs, "expected success, got failure", - [wtoutname, wterrname]) + self.show_outputs(procargs, + "ERROR: wt command expected success, got failure", + [wtoutname, wterrname]) self.assertEqual(returncode, 0, 'expected success: "' + \ str(procargs) + '": exited ' + str(returncode)) diff --git a/src/third_party/wiredtiger/test/suite/test_bug018.py b/src/third_party/wiredtiger/test/suite/test_bug018.py index f85de89c09f..5e5472f6eef 100644..100755 --- a/src/third_party/wiredtiger/test/suite/test_bug018.py +++ b/src/third_party/wiredtiger/test/suite/test_bug018.py @@ -27,6 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. from helper import copy_wiredtiger_home +from suite_subprocess import suite_subprocess import os import wiredtiger, wttest @@ -34,10 +35,14 @@ import wiredtiger, wttest # JIRA WT-3590: if writing table data fails during close then tables # that were updated within the same transaction could get out of sync with # each other. -class test_bug018(wttest.WiredTigerTestCase): +class test_bug018(wttest.WiredTigerTestCase, suite_subprocess): '''Test closing/reopening/recovering tables when writes fail''' conn_config = 'log=(enabled)' + basename = 'bug018.' + baseuri = 'file:' + basename + uri1 = baseuri + '01.wt' + uri2 = baseuri + '02.wt' def setUp(self): # This test uses Linux-specific code so skip on any other system. @@ -49,12 +54,10 @@ class test_bug018(wttest.WiredTigerTestCase): self.session.create(uri, 'key_format=S,value_format=S') return self.session.open_cursor(uri) - def test_bug018(self): + def subprocess_bug018(self): '''Test closing multiple tables''' - basename = 'bug018.' - baseuri = 'file:' + basename - c1 = self.create_table(baseuri + '01.wt') - c2 = self.create_table(baseuri + '02.wt') + c1 = self.create_table(self.uri1) + c2 = self.create_table(self.uri2) self.session.begin_transaction() c1['key'] = 'value' @@ -70,7 +73,7 @@ class test_bug018(wttest.WiredTigerTestCase): # This is Linux-specific code to figure out the file descriptor. for f in os.listdir('/proc/self/fd'): try: - if os.readlink('/proc/self/fd/' + f).endswith(basename + '02.wt'): + if os.readlink('/proc/self/fd/' + f).endswith(self.basename + '02.wt'): os.close(int(f)) except OSError: pass @@ -82,17 +85,37 @@ class test_bug018(wttest.WiredTigerTestCase): except wiredtiger.WiredTigerError: self.conn = None + def test_bug018(self): + '''Test closing multiple tables''' + + self.close_conn() + subdir = 'SUBPROCESS' + [ignore_result, new_home_dir] = self.run_subprocess_function(subdir, + 'test_bug018.test_bug018.subprocess_bug018') + # Make a backup for forensics in case something goes wrong. backup_dir = 'BACKUP' - copy_wiredtiger_home('.', backup_dir, True) + copy_wiredtiger_home(new_home_dir, backup_dir, True) # After reopening and running recovery both tables should be in # sync even though table 1 was successfully written and table 2 # had an error on close. - self.open_conn() - c1 = self.session.open_cursor(baseuri + '01.wt') - c2 = self.session.open_cursor(baseuri + '02.wt') - self.assertEqual(list(c1), list(c2)) + self.open_conn(new_home_dir) + + results1 = list(self.session.open_cursor(self.uri1)) + + # It's possible the second table can't even be opened. + # That can happen only if the root page was not pushed out. + # So if we get an error, make sure we're getting the right + # error message. + + self.captureerr.check(self) # check error messages until now + try: + results2 = list(self.session.open_cursor(self.uri2)) + except: + self.captureerr.checkAdditionalPattern(self, 'unable to read root page') + results2 = [] + self.assertEqual(results1, results2) if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/wttest.py b/src/third_party/wiredtiger/test/suite/wttest.py index 260daef7d02..de9514b7651 100755 --- a/src/third_party/wiredtiger/test/suite/wttest.py +++ b/src/third_party/wiredtiger/test/suite/wttest.py @@ -247,6 +247,9 @@ class WiredTigerTestCase(unittest.TestCase): return "%s.%s.%s" % (self.__module__, self.className(), self._testMethodName) + def buildDirectory(self): + return self._builddir + # Return the wiredtiger_open extension argument for # any needed shared library. def extensionsConfig(self): diff --git a/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py b/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py index ca2b2d814a8..7409ab62243 100755 --- a/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py +++ b/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py @@ -39,6 +39,7 @@ import multiprocessing import numpy as np import os import pandas as pd +import subprocess import sys import traceback import time @@ -462,7 +463,7 @@ def createLegendFigure(legendDict): p = figure(title="TRACKED FUNCTIONS", plot_width=plotWidth, - plot_height = max((max_ycoord + 2) * pixelsForLegendItem, 90), + plot_height = int(max((max_ycoord + 2) * pixelsForLegendItem, 90)), tools = [], toolbar_location="above", x_range = (0, (FUNCS_PER_ROW + 1)* HSPACE_BETWEEN_FUNCS), y_range = (0, max_ycoord + 2), @@ -1241,6 +1242,22 @@ def parseConfigFile(fname): return True; +# With Python3 this script fails if the number of open files +# is limited to 256, because the multiprocessing package does +# not appear to properly clean up processes that exited. +# +def checkOpenFileLimit(): + + targetLimit = 512; + openFileLimit = int(subprocess.check_output("ulimit -n", + shell=True).decode()); + + if (openFileLimit < targetLimit): + print(color.BOLD + color.RED + "Open file limit is " + + str(openFileLimit) + ". Please increase to " + str(targetLimit) + + " by running `ulimit -n " + str(targetLimit) + "`." + + color.END); + sys.exit(-1); def main(): @@ -1275,6 +1292,8 @@ def main(): parser.print_help(); sys.exit(1); + checkOpenFileLimit(); + # Determine the target job parallelism if (args.jobParallelism > 0): targetParallelism = args.jobParallelism; |