From 9c8c662a9213b16ae206f495c875594f5f0454f0 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Fri, 17 Feb 2017 11:22:16 +1100 Subject: Import wiredtiger: e1bcc30da91eedd0b17cebb725cc7e607ffa2340 from branch mongodb-3.6 ref: 48a3cbc17f..e1bcc30da9 for: 3.5.4 WT-2790 Fix a text case false positive in test_sweep01 WT-2909 Create automatable test verifying checkpoint integrity after errors WT-3088 bug: Don't evict a page with refs visible to readers after a split WT-3097 Race on reconfigure or shutdown can lead to waiting for statistics log server WT-3111 util_create() doesnt free memory assigned to "uri" WT-3113 Add a verbose mode to dump the cache when eviction is stuck WT-3115 Change the dhandle lock to a read/write lock WT-3120 Fix ordering problem in connection_close for filesystem loaded in an extension WT-3135 search_near() for index with custom collator WT-3137 Hang in __log_slot_join/__log_slot_switch_internal WT-3139 Enhance wtperf to support periodic table scans WT-3144 bug fix: random cursor returns not-found when descending to an empty page WT-3148 Improve eviction efficiency with many small trees WT-3149 Change eviction to start new walks from a random place in the tree WT-3150 Reduce impact of checkpoints on eviction server WT-3152 Convert table lock from a spinlock to a read write lock WT-3156 Assertion in log_write fires after write failure WT-3157 checkpoint/transaction integrity issue when writes fail. WT-3159 Incorrect key for index containing multiple variable sized entries WT-3161 checkpoint hang after write failure injection. WT-3164 Ensure all relevant btree fields are reset on checkpoint error WT-3170 Clear the eviction walk point while populating from a tree WT-3173 Add runtime detection for s390x CRC32 hardware support WT-3174 Coverity/lint cleanup WT-3175 New hang in internal page split WT-3179 Test bug: clang sanitizer failure in fail_fs WT-3180 Fault injection tests should only run as "long" tests and should not create core files WT-3184 Problem duplicating index cursor with custom collator WT-3186 Fix error path and panic detection in logging loops --- src/third_party/wiredtiger/bench/wtperf/config.c | 42 +- .../wiredtiger/bench/wtperf/idle_table_cycle.c | 2 + .../bench/wtperf/stress/btree-split-stress.wtperf | 3 +- src/third_party/wiredtiger/bench/wtperf/wtperf.c | 163 +++- src/third_party/wiredtiger/bench/wtperf/wtperf.h | 4 + .../wiredtiger/bench/wtperf/wtperf_opt.i | 10 +- .../wiredtiger/build_posix/Make.subdirs | 1 + src/third_party/wiredtiger/dist/api_data.py | 3 +- src/third_party/wiredtiger/dist/filelist | 1 + src/third_party/wiredtiger/dist/flags.py | 7 +- src/third_party/wiredtiger/dist/s_define.list | 2 + src/third_party/wiredtiger/dist/s_stat | 3 - src/third_party/wiredtiger/dist/s_string.ok | 7 + src/third_party/wiredtiger/dist/s_void | 10 + src/third_party/wiredtiger/dist/stat_data.py | 4 +- .../wiredtiger/examples/c/ex_file_system.c | 13 +- .../wiredtiger/ext/test/fail_fs/Makefile.am | 9 + .../wiredtiger/ext/test/fail_fs/fail_fs.c | 847 +++++++++++++++++++++ src/third_party/wiredtiger/import.data | 2 +- src/third_party/wiredtiger/src/async/async_api.c | 5 +- .../wiredtiger/src/async/async_worker.c | 2 +- src/third_party/wiredtiger/src/btree/bt_cursor.c | 136 +--- src/third_party/wiredtiger/src/btree/bt_debug.c | 4 +- src/third_party/wiredtiger/src/btree/bt_random.c | 427 +++++++++++ src/third_party/wiredtiger/src/btree/bt_split.c | 233 +++--- src/third_party/wiredtiger/src/btree/bt_walk.c | 4 +- src/third_party/wiredtiger/src/btree/row_srch.c | 212 ------ .../wiredtiger/src/checksum/power8/crc32_wrapper.c | 4 +- .../wiredtiger/src/checksum/zseries/crc32-s390x.c | 26 +- src/third_party/wiredtiger/src/config/config_def.c | 60 +- src/third_party/wiredtiger/src/conn/conn_api.c | 11 + src/third_party/wiredtiger/src/conn/conn_cache.c | 6 +- .../wiredtiger/src/conn/conn_cache_pool.c | 8 +- src/third_party/wiredtiger/src/conn/conn_ckpt.c | 26 +- src/third_party/wiredtiger/src/conn/conn_dhandle.c | 55 +- src/third_party/wiredtiger/src/conn/conn_handle.c | 21 +- src/third_party/wiredtiger/src/conn/conn_log.c | 50 +- src/third_party/wiredtiger/src/conn/conn_open.c | 42 +- src/third_party/wiredtiger/src/conn/conn_stat.c | 33 +- src/third_party/wiredtiger/src/conn/conn_sweep.c | 26 +- src/third_party/wiredtiger/src/cursor/cur_backup.c | 8 +- src/third_party/wiredtiger/src/cursor/cur_index.c | 26 +- src/third_party/wiredtiger/src/cursor/cur_std.c | 7 +- src/third_party/wiredtiger/src/cursor/cur_table.c | 2 +- .../wiredtiger/src/docs/cursor-random.dox | 5 - src/third_party/wiredtiger/src/docs/upgrading.dox | 6 + src/third_party/wiredtiger/src/docs/wtperf.dox | 6 +- src/third_party/wiredtiger/src/evict/evict_lru.c | 617 ++++++++------- src/third_party/wiredtiger/src/evict/evict_stat.c | 2 +- src/third_party/wiredtiger/src/include/btmem.h | 8 +- src/third_party/wiredtiger/src/include/btree.i | 28 +- src/third_party/wiredtiger/src/include/cache.h | 2 +- src/third_party/wiredtiger/src/include/cache.i | 2 +- .../wiredtiger/src/include/connection.h | 8 +- src/third_party/wiredtiger/src/include/dhandle.h | 18 + src/third_party/wiredtiger/src/include/extern.h | 25 +- .../wiredtiger/src/include/extern_posix.h | 4 +- .../wiredtiger/src/include/extern_win.h | 4 +- src/third_party/wiredtiger/src/include/flags.h | 79 +- src/third_party/wiredtiger/src/include/log.h | 3 +- src/third_party/wiredtiger/src/include/misc.i | 5 +- src/third_party/wiredtiger/src/include/mutex.h | 4 +- src/third_party/wiredtiger/src/include/packing.i | 7 +- src/third_party/wiredtiger/src/include/schema.h | 162 +++- src/third_party/wiredtiger/src/include/session.h | 2 - src/third_party/wiredtiger/src/include/stat.h | 4 +- .../wiredtiger/src/include/wiredtiger.in | 251 +++--- src/third_party/wiredtiger/src/log/log.c | 42 +- src/third_party/wiredtiger/src/log/log_slot.c | 206 +++-- src/third_party/wiredtiger/src/lsm/lsm_cursor.c | 4 +- src/third_party/wiredtiger/src/lsm/lsm_manager.c | 12 +- src/third_party/wiredtiger/src/lsm/lsm_stat.c | 4 +- src/third_party/wiredtiger/src/lsm/lsm_tree.c | 63 +- src/third_party/wiredtiger/src/lsm/lsm_work_unit.c | 4 +- src/third_party/wiredtiger/src/lsm/lsm_worker.c | 2 +- .../wiredtiger/src/os_posix/os_mtx_cond.c | 28 +- .../wiredtiger/src/os_win/os_mtx_cond.c | 43 +- .../wiredtiger/src/schema/schema_drop.c | 2 +- .../wiredtiger/src/schema/schema_list.c | 2 +- .../wiredtiger/src/schema/schema_rename.c | 2 +- .../wiredtiger/src/schema/schema_worker.c | 2 +- .../wiredtiger/src/session/session_api.c | 56 +- .../wiredtiger/src/session/session_dhandle.c | 43 +- src/third_party/wiredtiger/src/support/cond_auto.c | 80 +- src/third_party/wiredtiger/src/support/rand.c | 12 + src/third_party/wiredtiger/src/support/stat.c | 16 +- .../wiredtiger/src/support/thread_group.c | 2 +- src/third_party/wiredtiger/src/txn/txn.c | 95 +++ src/third_party/wiredtiger/src/txn/txn_ckpt.c | 39 +- src/third_party/wiredtiger/src/txn/txn_log.c | 4 +- src/third_party/wiredtiger/src/utilities/util.h | 2 +- .../wiredtiger/src/utilities/util_alter.c | 9 +- .../wiredtiger/src/utilities/util_compact.c | 14 +- .../wiredtiger/src/utilities/util_create.c | 12 +- .../wiredtiger/src/utilities/util_drop.c | 10 +- .../wiredtiger/src/utilities/util_dump.c | 26 +- .../wiredtiger/src/utilities/util_list.c | 21 +- .../wiredtiger/src/utilities/util_load.c | 2 +- .../wiredtiger/src/utilities/util_load_json.c | 2 +- .../wiredtiger/src/utilities/util_loadtext.c | 13 +- .../wiredtiger/src/utilities/util_main.c | 4 +- .../wiredtiger/src/utilities/util_printlog.c | 14 +- .../wiredtiger/src/utilities/util_read.c | 19 +- .../wiredtiger/src/utilities/util_rebalance.c | 30 +- .../wiredtiger/src/utilities/util_rename.c | 15 +- .../wiredtiger/src/utilities/util_salvage.c | 30 +- .../wiredtiger/src/utilities/util_stat.c | 6 +- .../wiredtiger/src/utilities/util_truncate.c | 11 +- .../wiredtiger/src/utilities/util_upgrade.c | 30 +- .../wiredtiger/src/utilities/util_verify.c | 34 +- .../wiredtiger/src/utilities/util_write.c | 20 +- src/third_party/wiredtiger/test/csuite/Makefile.am | 12 + .../test/csuite/wt2909_checkpoint_integrity/main.c | 666 ++++++++++++++++ .../wiredtiger/test/csuite/wt3120_filesys/main.c | 99 +++ .../test/csuite/wt3135_search_near_collator/main.c | 360 +++++++++ .../test/csuite/wt3184_dup_index_collator/main.c | 168 ++++ .../wiredtiger/test/suite/test_cursor_random.py | 49 ++ .../wiredtiger/test/suite/test_reconfig04.py | 2 - .../wiredtiger/test/suite/test_sweep01.py | 7 +- src/third_party/wiredtiger/test/utility/misc.c | 2 +- .../wiredtiger/test/utility/test_util.h | 2 +- 121 files changed, 4595 insertions(+), 1678 deletions(-) create mode 100644 src/third_party/wiredtiger/ext/test/fail_fs/Makefile.am create mode 100644 src/third_party/wiredtiger/ext/test/fail_fs/fail_fs.c create mode 100644 src/third_party/wiredtiger/src/btree/bt_random.c create mode 100644 src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c create mode 100644 src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c create mode 100644 src/third_party/wiredtiger/test/csuite/wt3135_search_near_collator/main.c create mode 100644 src/third_party/wiredtiger/test/csuite/wt3184_dup_index_collator/main.c (limited to 'src/third_party') diff --git a/src/third_party/wiredtiger/bench/wtperf/config.c b/src/third_party/wiredtiger/bench/wtperf/config.c index a15a3485dde..9eea99eeec4 100644 --- a/src/third_party/wiredtiger/bench/wtperf/config.c +++ b/src/third_party/wiredtiger/bench/wtperf/config.c @@ -215,6 +215,7 @@ config_threads(WTPERF *wtperf, const char *config, size_t len) return (EINVAL); } workp = &wtperf->workload[wtperf->workload_cnt++]; + workp->table_index = INT32_MAX; while ((ret = scan->next(scan, &k, &v)) == 0) { if (STRING_MATCH("count", k.str, k.len)) { @@ -233,12 +234,28 @@ config_threads(WTPERF *wtperf, const char *config, size_t len) goto err; continue; } + if (STRING_MATCH("pause", k.str, k.len)) { + if ((workp->pause = v.val) < 0) + goto err; + continue; + } if (STRING_MATCH("read", k.str, k.len) || STRING_MATCH("reads", k.str, k.len)) { if ((workp->read = v.val) < 0) goto err; continue; } + if (STRING_MATCH("read_range", k.str, k.len)) { + if ((workp->read_range = v.val) < 0) + goto err; + continue; + } + if (STRING_MATCH("table", k.str, k.len)) { + if (v.val <= 0) + goto err; + workp->table_index = (int32_t)v.val - 1; + continue; + } if (STRING_MATCH("throttle", k.str, k.len)) { workp->throttle = (uint64_t)v.val; continue; @@ -760,16 +777,33 @@ config_sanity(WTPERF *wtperf) opts->value_sz_min = opts->value_sz; } - if (opts->readonly && wtperf->workload != NULL) + if (wtperf->workload != NULL) for (i = 0, workp = wtperf->workload; - i < wtperf->workload_cnt; ++i, ++workp) - if (workp->insert != 0 || workp->update != 0 || - workp->truncate != 0) { + i < wtperf->workload_cnt; ++i, ++workp) { + if (opts->readonly && + (workp->insert != 0 || workp->update != 0 || + workp->truncate != 0)) { fprintf(stderr, "Invalid workload: insert, update or " "truncate specified with readonly\n"); return (EINVAL); } + if (workp->insert != 0 && + workp->table_index != INT32_MAX) { + fprintf(stderr, + "Invalid workload: Cannot insert into " + "specific table only\n"); + return (EINVAL); + } + if (workp->table_index != INT32_MAX && + workp->table_index >= (int32_t)opts->table_count) { + fprintf(stderr, + "Workload table index %" PRId32 + " is larger than table count %" PRId32, + workp->table_index, opts->table_count); + return (EINVAL); + } + } return (0); } diff --git a/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c b/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c index 13fa55e86f5..bb44cfbde59 100644 --- a/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c +++ b/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c @@ -120,6 +120,7 @@ cycle_idle_tables(void *arg) return (NULL); start = stop; +#if 1 /* * Drop the table. Keep retrying on EBUSY failure - it is an * expected return when checkpoints are happening. @@ -136,6 +137,7 @@ cycle_idle_tables(void *arg) } if (check_timing(wtperf, "drop", start, &stop) != 0) return (NULL); +#endif } return (NULL); diff --git a/src/third_party/wiredtiger/bench/wtperf/stress/btree-split-stress.wtperf b/src/third_party/wiredtiger/bench/wtperf/stress/btree-split-stress.wtperf index 86bb288fc6d..eb6ca1cfddc 100644 --- a/src/third_party/wiredtiger/bench/wtperf/stress/btree-split-stress.wtperf +++ b/src/third_party/wiredtiger/bench/wtperf/stress/btree-split-stress.wtperf @@ -6,5 +6,4 @@ run_time=300 reopen_connection=false populate_threads=2 value_sz=256 -read_range=100 -threads=((count=4,inserts=1,throttle=100000),(count=8,reads=1)) +threads=((count=4,inserts=1,throttle=100000),(count=8,reads=1,read_range=100)) diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.c b/src/third_party/wiredtiger/bench/wtperf/wtperf.c index baa259f8817..7f5e5ad3373 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf.c +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.c @@ -432,19 +432,17 @@ err: wtperf->error = wtperf->stop = true; * search do them. Ensuring the keys we see are always in order. */ static int -do_range_reads(WTPERF *wtperf, WT_CURSOR *cursor) +do_range_reads(WTPERF *wtperf, WT_CURSOR *cursor, int64_t read_range) { - CONFIG_OPTS *opts; - size_t range; uint64_t next_val, prev_val; + int64_t range; char *range_key_buf; char buf[512]; int ret; - opts = wtperf->opts; ret = 0; - if (opts->read_range == 0) + if (read_range == 0) return (0); memset(&buf[0], 0, 512 * sizeof(char)); @@ -454,7 +452,7 @@ do_range_reads(WTPERF *wtperf, WT_CURSOR *cursor) testutil_check(cursor->get_key(cursor, &range_key_buf)); extract_key(range_key_buf, &next_val); - for (range = 0; range < opts->read_range; ++range) { + for (range = 0; range < read_range; ++range) { prev_val = next_val; ret = cursor->next(cursor); /* We are done if we reach the end. */ @@ -475,12 +473,56 @@ do_range_reads(WTPERF *wtperf, WT_CURSOR *cursor) return (0); } +/* pre_load_data -- + * Pull everything into cache before starting the workload phase. + */ +static int +pre_load_data(WTPERF *wtperf) +{ + CONFIG_OPTS *opts; + WT_CONNECTION *conn; + WT_CURSOR *cursor; + WT_SESSION *session; + char *key; + int ret; + size_t i; + + opts = wtperf->opts; + conn = wtperf->conn; + + if ((ret = conn->open_session( + conn, NULL, opts->sess_config, &session)) != 0) { + lprintf(wtperf, ret, 0, "worker: WT_CONNECTION.open_session"); + goto err; + } + for (i = 0; i < opts->table_count; i++) { + if ((ret = session->open_cursor(session, + wtperf->uris[i], NULL, NULL, &cursor)) != 0) { + lprintf(wtperf, ret, 0, + "worker: WT_SESSION.open_cursor: %s", + wtperf->uris[i]); + goto err; + } + while (cursor->next(cursor) == 0) + if ((ret = cursor->get_key(cursor, &key)) != 0) + goto err; + if ((ret = cursor->close(cursor)) != 0) + goto err; + } + if ((ret = session->close(session, NULL)) != 0) + goto err; + if (ret != 0) +err: lprintf(wtperf, ret, 0, "Pre-workload traverse error"); + return (ret); +} + static void * worker(void *arg) { struct timespec start, stop; CONFIG_OPTS *opts; TRACK *trk; + WORKLOAD *workload; WTPERF *wtperf; WTPERF_THREAD *thread; WT_CONNECTION *conn; @@ -495,13 +537,14 @@ worker(void *arg) char buf[512]; thread = (WTPERF_THREAD *)arg; + workload = thread->workload; wtperf = thread->wtperf; opts = wtperf->opts; conn = wtperf->conn; cursors = NULL; - log_table_cursor = NULL; /* -Wconditional-initialized */ + cursor = log_table_cursor = NULL; /* -Wconditional-initialized */ ops = 0; - ops_per_txn = thread->workload->ops_per_txn; + ops_per_txn = workload->ops_per_txn; session = NULL; trk = NULL; @@ -510,7 +553,6 @@ worker(void *arg) lprintf(wtperf, ret, 0, "worker: WT_CONNECTION.open_session"); goto err; } - cursors = dcalloc(opts->table_count, sizeof(WT_CURSOR *)); for (i = 0; i < opts->table_count_idle; i++) { snprintf(buf, 512, "%s_idle%05d", wtperf->uris[0], (int)i); if ((ret = session->open_cursor( @@ -525,14 +567,34 @@ worker(void *arg) goto err; } } - for (i = 0; i < opts->table_count; i++) { + if (workload->table_index != INT32_MAX) { if ((ret = session->open_cursor(session, - wtperf->uris[i], NULL, NULL, &cursors[i])) != 0) { + wtperf->uris[workload->table_index], + NULL, NULL, &cursor)) != 0) { lprintf(wtperf, ret, 0, "worker: WT_SESSION.open_cursor: %s", - wtperf->uris[i]); + wtperf->uris[workload->table_index]); + goto err; + } + if ((ret = session->open_cursor(session, + wtperf->uris[workload->table_index], + NULL, "next_random=true", &thread->rand_cursor)) != 0) { + lprintf(wtperf, ret, 0, + "worker: WT_SESSION.open_cursor: random %s", + wtperf->uris[workload->table_index]); goto err; } + } else { + cursors = dcalloc(opts->table_count, sizeof(WT_CURSOR *)); + for (i = 0; i < opts->table_count; i++) { + if ((ret = session->open_cursor(session, + wtperf->uris[i], NULL, NULL, &cursors[i])) != 0) { + lprintf(wtperf, ret, 0, + "worker: WT_SESSION.open_cursor: %s", + wtperf->uris[i]); + goto err; + } + } } if (opts->log_like_table && (ret = session->open_cursor(session, wtperf->log_table_uri, NULL, NULL, &log_table_cursor)) != 0) { @@ -543,19 +605,19 @@ worker(void *arg) } /* Setup the timer for throttling. */ - if (thread->workload->throttle != 0) + if (workload->throttle != 0) setup_throttle(thread); /* Setup for truncate */ - if (thread->workload->truncate != 0) + if (workload->truncate != 0) if ((ret = setup_truncate(wtperf, thread, session)) != 0) goto err; key_buf = thread->key_buf; value_buf = thread->value_buf; - op = thread->workload->ops; - op_end = op + sizeof(thread->workload->ops); + op = workload->ops; + op_end = op + sizeof(workload->ops); if ((ops_per_txn != 0 || opts->log_like_table) && (ret = session->begin_transaction(session, NULL)) != 0) { @@ -564,6 +626,8 @@ worker(void *arg) } while (!wtperf->stop) { + if (workload->pause != 0) + (void)sleep((unsigned int)workload->pause); /* * Generate the next key and setup operation specific * statistics tracking objects. @@ -603,10 +667,12 @@ worker(void *arg) generate_key(opts, key_buf, next_val); - /* - * Spread the data out around the multiple databases. - */ - cursor = cursors[map_key_to_table(wtperf->opts, next_val)]; + if (workload->table_index == INT32_MAX) + /* + * Spread the data out around the multiple databases. + */ + cursor = cursors[ + map_key_to_table(wtperf->opts, next_val)]; /* * Skip the first time we do an operation, when trk->ops @@ -642,7 +708,8 @@ worker(void *arg) * for several operations, confirming that the * next key is in the correct order. */ - ret = do_range_reads(wtperf, cursor); + ret = do_range_reads(wtperf, + cursor, workload->read_range); } if (ret == 0 || ret == WT_NOTFOUND) @@ -689,7 +756,7 @@ worker(void *arg) */ strncpy(value_buf, value, opts->value_sz_max - 1); - if (thread->workload->update_delta != 0) + if (workload->update_delta != 0) update_value_delta(thread); if (value_buf[0] == 'a') value_buf[0] = 'b'; @@ -806,7 +873,7 @@ op_err: if (ret == WT_ROLLBACK && ops_per_txn != 0) { /* Schedule the next operation */ if (++op == op_end) - op = thread->workload->ops; + op = workload->ops; /* * Decrement throttle ops and check if we should sleep @@ -843,7 +910,7 @@ run_mix_schedule_op(WORKLOAD *workp, int op, int64_t op_cnt) uint8_t *p, *end; /* Jump around the array to roughly spread out the operations. */ - jump = 100 / op_cnt; + jump = (int)(100 / op_cnt); /* * Find a read operation and replace it with another operation. This @@ -884,17 +951,6 @@ run_mix_schedule(WTPERF *wtperf, WORKLOAD *workp) opts = wtperf->opts; - /* Confirm reads, inserts, truncates and updates cannot all be zero. */ - if (workp->insert == 0 && workp->read == 0 && - workp->truncate == 0 && workp->update == 0) { - lprintf(wtperf, EINVAL, 0, "no operations scheduled"); - return (EINVAL); - } - - /* - * Handle truncate first - it's a special case that can't be used in - * a mixed workload. - */ if (workp->truncate != 0) { if (workp->insert != 0 || workp->read != 0 || workp->update != 0) { @@ -906,6 +962,12 @@ run_mix_schedule(WTPERF *wtperf, WORKLOAD *workp) return (0); } + /* Confirm reads, inserts and updates cannot all be zero. */ + if (workp->insert == 0 && workp->read == 0 && workp->update == 0) { + lprintf(wtperf, EINVAL, 0, "no operations scheduled"); + return (EINVAL); + } + /* * Check for a simple case where the thread is only doing insert or * update operations (because the default operation for a @@ -2244,6 +2306,8 @@ start_run(WTPERF *wtperf) opts->checkpoint_threads, checkpoint_worker) != 0) goto err; } + if (opts->pre_load_data && (ret = pre_load_data(wtperf)) != 0) + goto err; /* Execute the workload. */ if ((ret = execute_workload(wtperf)) != 0) goto err; @@ -2827,13 +2891,42 @@ static uint64_t wtperf_rand(WTPERF_THREAD *thread) { CONFIG_OPTS *opts; + WT_CURSOR *rnd_cursor; WTPERF *wtperf; double S1, S2, U; uint64_t rval; + int ret; + char *key_buf; wtperf = thread->wtperf; opts = wtperf->opts; + /* + * If we have a random cursor set up then use it. + */ + if ((rnd_cursor = thread->rand_cursor) != NULL) { + if ((ret = rnd_cursor->next(rnd_cursor)) != 0) { + lprintf(wtperf, ret, 0, "worker: rand next failed"); + /* 0 is outside the expected range. */ + return (0); + } + if ((ret = rnd_cursor->get_key(rnd_cursor, &key_buf)) != 0) { + lprintf(wtperf, ret, 0, + "worker: rand next key retrieval"); + return (0); + } + /* + * Resetting the cursor is not fatal. We still return the + * value we retrieved above. We do it so that we don't + * leave a cursor positioned. + */ + if ((ret = rnd_cursor->reset(rnd_cursor)) != 0) + lprintf(wtperf, ret, 0, + "worker: rand cursor reset failed"); + extract_key(key_buf, &rval); + return (rval); + } + /* * Use WiredTiger's random number routine: it's lock-free and fairly * good. diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.h b/src/third_party/wiredtiger/bench/wtperf/wtperf.h index 81d74e134f6..3efb8ab700e 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf.h +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.h @@ -66,6 +66,9 @@ typedef struct { uint64_t throttle; /* Maximum operations/second */ /* Number of operations per transaction. Zero for autocommit */ int64_t ops_per_txn; + int64_t pause; /* Time between scans */ + int64_t read_range; /* Range of reads */ + int32_t table_index; /* Table to focus ops on */ int64_t truncate; /* Truncate ratio */ uint64_t truncate_pct; /* Truncate Percent */ uint64_t truncate_count; /* Truncate Count */ @@ -225,6 +228,7 @@ typedef struct { struct __wtperf_thread { /* Per-thread structure */ WTPERF *wtperf; /* Enclosing configuration */ + WT_CURSOR *rand_cursor; /* Random key cursor */ WT_RAND_STATE rnd; /* Random number generation state */ diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i index 680eb53a90e..63cef4c28fb 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i @@ -145,12 +145,13 @@ DEF_OPT_AS_UINT32(populate_ops_per_txn, 0, "phase, zero for auto-commit") DEF_OPT_AS_UINT32(populate_threads, 1, "number of populate threads, 1 for bulk load") +DEF_OPT_AS_BOOL(pre_load_data, 0, + "Scan all data prior to starting the workload phase to warm the cache") DEF_OPT_AS_UINT32(random_range, 0, "if non zero choose a value from within this range as the key for " "insert operations") DEF_OPT_AS_BOOL(random_value, 0, "generate random content for the value") DEF_OPT_AS_BOOL(range_partition, 0, "partition data by range (vs hash)") -DEF_OPT_AS_UINT32(read_range, 0, "scan a range of keys after each search") DEF_OPT_AS_BOOL(readonly, 0, "reopen the connection between populate and workload phases in readonly " "mode. Requires reopen_connection turned on (default). Requires that " @@ -192,9 +193,10 @@ DEF_OPT_AS_STRING(threads, "", "workload configuration: each 'count' " "'threads=((count=2,reads=1)(count=8,reads=1,inserts=2,updates=1))' " "which would create 2 threads doing nothing but reads and 8 threads " "each doing 50% inserts and 25% reads and updates. Allowed configuration " - "values are 'count', 'throttle', 'update_delta', 'reads', 'inserts', " - "'updates', 'truncate', 'truncate_pct' and 'truncate_count'. There are " - "also behavior modifiers, supported modifiers are 'ops_per_txn'") + "values are 'count', 'throttle', 'update_delta', 'reads', 'read_range', " + "'inserts', 'updates', 'truncate', 'truncate_pct' and 'truncate_count'. " + "There are also behavior modifiers, supported modifiers are " + "'ops_per_txn'") DEF_OPT_AS_CONFIG_STRING(transaction_config, "", "WT_SESSION.begin_transaction configuration string, applied during the " "populate phase when populate_ops_per_txn is nonzero") diff --git a/src/third_party/wiredtiger/build_posix/Make.subdirs b/src/third_party/wiredtiger/build_posix/Make.subdirs index 01f23dcbbc1..4ecec37ca6c 100644 --- a/src/third_party/wiredtiger/build_posix/Make.subdirs +++ b/src/third_party/wiredtiger/build_posix/Make.subdirs @@ -17,6 +17,7 @@ ext/encryptors/nop ext/encryptors/rotn ext/extractors/csv ext/test/kvs_bdb HAVE_BERKELEY_DB +ext/test/fail_fs . api/leveldb LEVELDB examples/c diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index 324d1e4f281..1d669fa7fe0 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -524,6 +524,7 @@ connection_runtime_config = [ 'checkpoint', 'compact', 'evict', + 'evict_stuck', 'evictserver', 'fileops', 'handleops', @@ -717,7 +718,7 @@ wiredtiger_open_common =\ ]), Config('extensions', '', r''' list of shared library extensions to load (using dlopen). - Any values specified to an library extension are passed to + Any values specified to a library extension are passed to WT_CONNECTION::load_extension as the \c config parameter (for example, extensions=(/path/ext.so={entry=my_entry}))''', diff --git a/src/third_party/wiredtiger/dist/filelist b/src/third_party/wiredtiger/dist/filelist index 13d67ef961b..3886035eaa9 100644 --- a/src/third_party/wiredtiger/dist/filelist +++ b/src/third_party/wiredtiger/dist/filelist @@ -30,6 +30,7 @@ src/btree/bt_io.c src/btree/bt_misc.c src/btree/bt_ovfl.c src/btree/bt_page.c +src/btree/bt_random.c src/btree/bt_read.c src/btree/bt_rebalance.c src/btree/bt_ret.c diff --git a/src/third_party/wiredtiger/dist/flags.py b/src/third_party/wiredtiger/dist/flags.py index 70e18712839..b20a7181532 100644 --- a/src/third_party/wiredtiger/dist/flags.py +++ b/src/third_party/wiredtiger/dist/flags.py @@ -64,6 +64,7 @@ flags = { 'VERB_COMPACT', 'VERB_EVICT', 'VERB_EVICTSERVER', + 'VERB_EVICT_STUCK', 'VERB_FILEOPS', 'VERB_HANDLEOPS', 'VERB_LOG', @@ -116,12 +117,14 @@ flags = { 'SESSION_CAN_WAIT', 'SESSION_INTERNAL', 'SESSION_LOCKED_CHECKPOINT', - 'SESSION_LOCKED_HANDLE_LIST', + 'SESSION_LOCKED_HANDLE_LIST_READ', + 'SESSION_LOCKED_HANDLE_LIST_WRITE', 'SESSION_LOCKED_METADATA', 'SESSION_LOCKED_PASS', 'SESSION_LOCKED_SCHEMA', 'SESSION_LOCKED_SLOT', - 'SESSION_LOCKED_TABLE', + 'SESSION_LOCKED_TABLE_READ', + 'SESSION_LOCKED_TABLE_WRITE', 'SESSION_LOCKED_TURTLE', 'SESSION_LOGGING_INMEM', 'SESSION_LOOKASIDE_CURSOR', diff --git a/src/third_party/wiredtiger/dist/s_define.list b/src/third_party/wiredtiger/dist/s_define.list index 53a3df87615..8911d888077 100644 --- a/src/third_party/wiredtiger/dist/s_define.list +++ b/src/third_party/wiredtiger/dist/s_define.list @@ -39,6 +39,8 @@ WT_PADDING_CHECK WT_READ_BARRIER WT_REF_SIZE WT_SESSION_LOCKED_CHECKPOINT +WT_SESSION_LOCKED_TABLE_READ +WT_SESSION_LOCKED_TABLE_WRITE WT_SESSION_LOCKED_TURTLE WT_SIZE_CHECK WT_STATS_FIELD_TO_OFFSET diff --git a/src/third_party/wiredtiger/dist/s_stat b/src/third_party/wiredtiger/dist/s_stat index 5d5937e1833..6aeeca6faa6 100755 --- a/src/third_party/wiredtiger/dist/s_stat +++ b/src/third_party/wiredtiger/dist/s_stat @@ -25,9 +25,6 @@ cat << UNUSED_STAT_FIELDS lock_checkpoint_count lock_checkpoint_wait_application lock_checkpoint_wait_internal -lock_handle_list_count -lock_handle_list_wait_application -lock_handle_list_wait_internal lock_metadata_count lock_metadata_wait_application lock_metadata_wait_internal diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index 2b998c27813..e033f77327f 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -770,6 +770,7 @@ idx ifdef ifdef's iiSii +iiiS iiii iiu ikey @@ -1138,6 +1139,7 @@ subgetraw subgets subinit sublicense +subtest subtree sunique superset @@ -1182,6 +1184,7 @@ txt typedef uB uS +ui uint uintmax unbare @@ -1217,6 +1220,7 @@ upg uri uri's uris +usec usecs usedp userbad @@ -1247,6 +1251,9 @@ vunpack vw vxr waitpid +waker +wakeup +wakeups walk's warmup wb diff --git a/src/third_party/wiredtiger/dist/s_void b/src/third_party/wiredtiger/dist/s_void index 025f6d4c7eb..90425d5a718 100755 --- a/src/third_party/wiredtiger/dist/s_void +++ b/src/third_party/wiredtiger/dist/s_void @@ -78,10 +78,20 @@ func_ok() -e '/int demo_file_sync$/d' \ -e '/int demo_fs_directory_list_free$/d' \ -e '/int demo_fs_exist$/d' \ + -e '/int fail_file_lock$/d' \ + -e '/int fail_file_sync$/d' \ + -e '/int fail_fs_directory_list_free$/d' \ + -e '/int fail_fs_exist$/d' \ + -e '/int fail_fs_simulate_fail$/d' \ + -e '/int fail_fs_terminate$/d' \ -e '/int handle_message$/d' \ -e '/int handle_progress$/d' \ -e '/int helium_cursor_reset$/d' \ -e '/int helium_session_verify$/d' \ + -e '/int index_compare_primary$/d' \ + -e '/int index_compare_S$/d' \ + -e '/int index_compare_u$/d' \ + -e '/int index_extractor_u$/d' \ -e '/int log_print_err$/d' \ -e '/int lz4_error$/d' \ -e '/int lz4_pre_size$/d' \ diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index 0af5d6d017e..a4d92345f88 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -288,9 +288,7 @@ connection_stats = [ LockStat('lock_checkpoint_count', 'checkpoint lock acquisitions'), LockStat('lock_checkpoint_wait_application', 'checkpoint lock application thread wait time (usecs)'), LockStat('lock_checkpoint_wait_internal', 'checkpoint lock internal thread wait time (usecs)'), - LockStat('lock_handle_list_count', 'handle-list lock acquisitions'), - LockStat('lock_handle_list_wait_application', 'handle-list lock application thread wait time (usecs)'), - LockStat('lock_handle_list_wait_internal', 'handle-list lock internal thread wait time (usecs)'), + LockStat('lock_handle_list_wait_eviction', 'handle-list lock eviction thread wait time (usecs)'), LockStat('lock_metadata_count', 'metadata lock acquisitions'), LockStat('lock_metadata_wait_application', 'metadata lock application thread wait time (usecs)'), LockStat('lock_metadata_wait_internal', 'metadata lock internal thread wait time (usecs)'), diff --git a/src/third_party/wiredtiger/examples/c/ex_file_system.c b/src/third_party/wiredtiger/examples/c/ex_file_system.c index 56869171558..e807ac54d3b 100644 --- a/src/third_party/wiredtiger/examples/c/ex_file_system.c +++ b/src/third_party/wiredtiger/examples/c/ex_file_system.c @@ -399,6 +399,7 @@ demo_fs_directory_list(WT_FILE_SYSTEM *file_system, uint32_t allocated, count; int ret = 0; char *name, **entries; + void *p; (void)session; /* Unused */ @@ -424,14 +425,16 @@ demo_fs_directory_list(WT_FILE_SYSTEM *file_system, * matter if the list is a bit longer than necessary. */ if (count >= allocated) { - entries = realloc( - entries, (allocated + 10) * sizeof(char *)); - if (entries == NULL) { + p = realloc( + entries, (allocated + 10) * sizeof(*entries)); + if (p == NULL) { ret = ENOMEM; goto err; } - memset(entries + allocated * sizeof(char *), - 0, 10 * sizeof(char *)); + + entries = p; + memset(entries + allocated * sizeof(*entries), + 0, 10 * sizeof(*entries)); allocated += 10; } entries[count++] = strdup(name); diff --git a/src/third_party/wiredtiger/ext/test/fail_fs/Makefile.am b/src/third_party/wiredtiger/ext/test/fail_fs/Makefile.am new file mode 100644 index 00000000000..f31f5395cd1 --- /dev/null +++ b/src/third_party/wiredtiger/ext/test/fail_fs/Makefile.am @@ -0,0 +1,9 @@ +AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include + +noinst_LTLIBRARIES = libwiredtiger_fail_fs.la +libwiredtiger_fail_fs_la_SOURCES = fail_fs.c + +# libtool hack: noinst_LTLIBRARIES turns off building shared libraries as well +# as installation, it will only build static libraries. As far as I can tell, +# the "approved" libtool way to turn them back on is by adding -rpath. +libwiredtiger_fail_fs_la_LDFLAGS = -avoid-version -module -rpath /nowhere diff --git a/src/third_party/wiredtiger/ext/test/fail_fs/fail_fs.c b/src/third_party/wiredtiger/ext/test/fail_fs/fail_fs.c new file mode 100644 index 00000000000..d0d8a14c8c2 --- /dev/null +++ b/src/third_party/wiredtiger/ext/test/fail_fs/fail_fs.c @@ -0,0 +1,847 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "queue.h" + +#define FAIL_FS_GIGABYTE (1024 * 1024 * 1024) + +#define FAIL_FS_ENV_ENABLE "WT_FAIL_FS_ENABLE" +#define FAIL_FS_ENV_WRITE_ALLOW "WT_FAIL_FS_WRITE_ALLOW" +#define FAIL_FS_ENV_READ_ALLOW "WT_FAIL_FS_READ_ALLOW" + +/* + * A "fail file system", that is, a file system extension that fails when we + * want it to. This is only used in test frameworks, this fact allows us to + * simplify some error paths. This code is not portable to Windows, as it has + * direct knowledge of file descriptors, environment variables and stack + * traces. + * + * When the filesystem extension is configured, parameters can set how many + * reads or writes can be allowed before failure. If this is not fine-grained + * enough, an 'environment' configuration parameter can be specified. If that + * is used, then on every file system read or write, environment variables are + * checked that control when reading or writing should fail. + */ +typedef struct { + WT_FILE_SYSTEM iface; + /* + * WiredTiger performs schema and I/O operations in parallel, all file + * system and file handle access must be thread-safe. This extension + * uses a single, global file system lock. + */ + pthread_rwlock_t lock; /* Lock */ + bool fail_enabled; + bool use_environment; + bool verbose; + int64_t read_ops; + int64_t write_ops; + int64_t allow_reads; + int64_t allow_writes; + /* Queue of file handles */ + TAILQ_HEAD(fail_file_handle_qh, fail_file_handle) fileq; + WT_EXTENSION_API *wtext; /* Extension functions */ +} FAIL_FILE_SYSTEM; + +typedef struct fail_file_handle { + WT_FILE_HANDLE iface; + + /* + * Track the system file descriptor for each file. + */ + FAIL_FILE_SYSTEM *fail_fs; /* Enclosing file system */ + TAILQ_ENTRY(fail_file_handle) q; /* Queue of handles */ + int fd; /* System file descriptor */ +} FAIL_FILE_HANDLE; + +static int fail_file_close(WT_FILE_HANDLE *, WT_SESSION *); +static void fail_file_handle_remove(WT_SESSION *, FAIL_FILE_HANDLE *); +static int fail_file_lock(WT_FILE_HANDLE *, WT_SESSION *, bool); +static int fail_file_read( + WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, void *); +static int fail_file_size(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t *); +static int fail_file_sync(WT_FILE_HANDLE *, WT_SESSION *); +static int fail_file_truncate(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t); +static int fail_file_write( + WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, const void *); +static bool fail_fs_arg( + const char *, WT_CONFIG_ITEM *, WT_CONFIG_ITEM *, int64_t *); +static int fail_fs_directory_list(WT_FILE_SYSTEM *, WT_SESSION *, + const char *, const char *, char ***, uint32_t *); +static int fail_fs_directory_list_free( + WT_FILE_SYSTEM *, WT_SESSION *, char **, uint32_t); +static void fail_fs_env(const char *, int64_t *); +static int fail_fs_exist(WT_FILE_SYSTEM *, WT_SESSION *, const char *, bool *); +static int fail_fs_open(WT_FILE_SYSTEM *, WT_SESSION *, + const char *, WT_FS_OPEN_FILE_TYPE, uint32_t, WT_FILE_HANDLE **); +static int fail_fs_remove( + WT_FILE_SYSTEM *, WT_SESSION *, const char *, uint32_t); +static int fail_fs_rename( + WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, uint32_t); +static int fail_fs_simulate_fail( + FAIL_FILE_HANDLE *, WT_SESSION *, int64_t, const char *); +static int fail_fs_size( + WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *); +static int fail_fs_terminate(WT_FILE_SYSTEM *, WT_SESSION *); + +/* + * We use pthread functions for portable locking. + * Assert on errors for simplicity. + */ +static void +fail_fs_allocate_lock(pthread_rwlock_t *lockp) +{ + assert(pthread_rwlock_init(lockp, NULL) == 0); +} + +static void +fail_fs_destroy_lock(pthread_rwlock_t *lockp) +{ + assert(pthread_rwlock_destroy(lockp) == 0); +} + +static void +fail_fs_lock(pthread_rwlock_t *lockp) +{ + assert(pthread_rwlock_wrlock(lockp) == 0); +} + +static void +fail_fs_unlock(pthread_rwlock_t *lockp) +{ + assert(pthread_rwlock_unlock(lockp) == 0); +} + +/* + * fail_file_close -- + * ANSI C close. + */ +static int +fail_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *session) +{ + FAIL_FILE_HANDLE *fail_fh; + FAIL_FILE_SYSTEM *fail_fs; + int ret; + + (void)session; /* Unused */ + + fail_fh = (FAIL_FILE_HANDLE *)file_handle; + fail_fs = fail_fh->fail_fs; + + /* + * We don't actually open an fd when opening directories for flushing, + * so ignore that case here. + */ + if (fail_fh->fd < 0) + return (0); + ret = close(fail_fh->fd); + fail_fh->fd = -1; + fail_fs_lock(&fail_fs->lock); + fail_file_handle_remove(session, fail_fh); + fail_fs_unlock(&fail_fs->lock); + return (ret); +} + +/* + * fail_file_handle_remove -- + * Destroy an in-memory file handle. Should only happen on remove or + * shutdown. The file system lock must be held during this call. + */ +static void +fail_file_handle_remove(WT_SESSION *session, FAIL_FILE_HANDLE *fail_fh) +{ + FAIL_FILE_SYSTEM *fail_fs; + + (void)session; /* Unused */ + fail_fs = fail_fh->fail_fs; + + TAILQ_REMOVE(&fail_fs->fileq, fail_fh, q); + + free(fail_fh->iface.name); + free(fail_fh); +} + +/* + * fail_file_lock -- + * Lock/unlock a file. + */ +static int +fail_file_lock(WT_FILE_HANDLE *file_handle, WT_SESSION *session, bool lock) +{ + /* Locks are always granted. */ + (void)file_handle; /* Unused */ + (void)session; /* Unused */ + (void)lock; /* Unused */ + + return (0); +} + +/* + * fail_file_read -- + * POSIX pread. + */ +static int +fail_file_read(WT_FILE_HANDLE *file_handle, + WT_SESSION *session, wt_off_t offset, size_t len, void *buf) +{ + FAIL_FILE_HANDLE *fail_fh; + FAIL_FILE_SYSTEM *fail_fs; + WT_EXTENSION_API *wtext; + int64_t envint, read_ops; + int ret; + size_t chunk; + ssize_t nr; + uint8_t *addr; + + fail_fh = (FAIL_FILE_HANDLE *)file_handle; + fail_fs = fail_fh->fail_fs; + wtext = fail_fs->wtext; + read_ops = 0; + ret = 0; + + fail_fs_lock(&fail_fs->lock); + + if (fail_fs->use_environment) { + fail_fs_env(FAIL_FS_ENV_ENABLE, &envint); + if (envint != 0) { + if (!fail_fs->fail_enabled) { + fail_fs->fail_enabled = true; + fail_fs_env(FAIL_FS_ENV_READ_ALLOW, + &fail_fs->allow_reads); + fail_fs->read_ops = 0; + } + read_ops = ++fail_fs->read_ops; + } else + fail_fs->fail_enabled = false; + } else + read_ops = ++fail_fs->read_ops; + + fail_fs_unlock(&fail_fs->lock); + + if (fail_fs->fail_enabled && fail_fs->allow_reads != 0 && + read_ops % fail_fs->allow_reads == 0) + return (fail_fs_simulate_fail( + fail_fh, session, read_ops, "read")); + + /* Break reads larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { + chunk = (len < FAIL_FS_GIGABYTE) ? len : FAIL_FS_GIGABYTE; + if ((nr = pread(fail_fh->fd, addr, chunk, offset)) <= 0) { + (void)wtext->err_printf(wtext, session, + "%s: handle-read: failed to read %" PRIuMAX + " bytes at offset %" PRIuMAX ": %s", + fail_fh->iface.name, + (uintmax_t)len, (uintmax_t)offset, + wtext->strerror(wtext, NULL, errno)); + ret = (nr == 0 ? WT_ERROR : errno); + break; + } + } + return (ret); +} + +/* + * fail_file_size -- + * Get the size of a file in bytes, by file handle. + */ +static int +fail_file_size( + WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t *sizep) +{ + FAIL_FILE_HANDLE *fail_fh; + struct stat statbuf; + int ret; + + (void)session; /* Unused */ + + fail_fh = (FAIL_FILE_HANDLE *)file_handle; + ret = 0; + + if ((ret = fstat(fail_fh->fd, &statbuf)) != 0) + return (ret); + *sizep = statbuf.st_size; + return (0); +} + +/* + * fail_file_sync -- + * Ensure the content of the file is stable. This is a no-op in our + * file system. + */ +static int +fail_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *session) +{ + (void)file_handle; /* Unused */ + (void)session; /* Unused */ + + return (0); +} + +/* + * fail_file_truncate -- + * POSIX ftruncate. + */ +static int +fail_file_truncate( + WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset) +{ + FAIL_FILE_HANDLE *fail_fh; + + (void)session; /* Unused */ + + fail_fh = (FAIL_FILE_HANDLE *)file_handle; + return (ftruncate(fail_fh->fd, offset)); +} + +/* + * fail_file_write -- + * POSIX pwrite. + */ +static int +fail_file_write(WT_FILE_HANDLE *file_handle, WT_SESSION *session, + wt_off_t offset, size_t len, const void *buf) +{ + FAIL_FILE_HANDLE *fail_fh; + FAIL_FILE_SYSTEM *fail_fs; + WT_EXTENSION_API *wtext; + int64_t envint, write_ops; + int ret; + size_t chunk; + ssize_t nr; + const uint8_t *addr; + + fail_fh = (FAIL_FILE_HANDLE *)file_handle; + fail_fs = fail_fh->fail_fs; + wtext = fail_fs->wtext; + write_ops = 0; + ret = 0; + + fail_fs_lock(&fail_fs->lock); + + if (fail_fs->use_environment) { + fail_fs_env(FAIL_FS_ENV_ENABLE, &envint); + if (envint != 0) { + if (!fail_fs->fail_enabled) { + fail_fs->fail_enabled = true; + fail_fs_env(FAIL_FS_ENV_WRITE_ALLOW, + &fail_fs->allow_writes); + fail_fs->write_ops = 0; + } + write_ops = ++fail_fs->write_ops; + } else + fail_fs->fail_enabled = false; + } else + write_ops = ++fail_fs->write_ops; + + fail_fs_unlock(&fail_fs->lock); + + if (fail_fs->fail_enabled && fail_fs->allow_writes != 0 && + write_ops % fail_fs->allow_writes == 0) + return (fail_fs_simulate_fail( + fail_fh, session, write_ops, "write")); + + /* Break writes larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { + chunk = (len < FAIL_FS_GIGABYTE) ? len : FAIL_FS_GIGABYTE; + if ((nr = pwrite(fail_fh->fd, addr, chunk, offset)) <= 0) { + (void)wtext->err_printf(wtext, session, + "%s: handle-write: failed to write %" PRIuMAX + " bytes at offset %" PRIuMAX ": %s", + fail_fh->iface.name, + (uintmax_t)len, (uintmax_t)offset, + wtext->strerror(wtext, NULL, errno)); + ret = (nr == 0 ? WT_ERROR : errno); + break; + } + } + return (ret); +} + +/* + * fail_fs_arg -- + * If the key matches, return the value interpreted as an integer. + */ +static bool +fail_fs_arg(const char *match, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value, + int64_t *argp) +{ + if (strncmp(match, key->str, key->len) == 0 && + match[key->len] == '\0' && + (value->type == WT_CONFIG_ITEM_BOOL || + value->type == WT_CONFIG_ITEM_NUM)) { + *argp = value->val; + return (true); + } + return (false); +} + +/* + * fail_fs_directory_list -- + * Return a list of files in a given sub-directory. + */ +static int +fail_fs_directory_list(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *directory, + const char *prefix, char ***dirlistp, uint32_t *countp) +{ + FAIL_FILE_HANDLE *fail_fh; + FAIL_FILE_SYSTEM *fail_fs; + size_t len, prefix_len; + uint32_t allocated, count; + int ret; + char *name, **entries; + void *p; + + (void)session; /* Unused */ + + fail_fs = (FAIL_FILE_SYSTEM *)file_system; + ret = 0; + *dirlistp = NULL; + *countp = 0; + + entries = NULL; + allocated = count = 0; + len = strlen(directory); + prefix_len = prefix == NULL ? 0 : strlen(prefix); + + fail_fs_lock(&fail_fs->lock); + TAILQ_FOREACH(fail_fh, &fail_fs->fileq, q) { + name = fail_fh->iface.name; + if (strncmp(name, directory, len) != 0 || + (prefix != NULL && strncmp(name, prefix, prefix_len) != 0)) + continue; + + /* + * Increase the list size in groups of 10, it doesn't + * matter if the list is a bit longer than necessary. + */ + if (count >= allocated) { + p = realloc( + entries, (allocated + 10) * sizeof(*entries)); + if (p == NULL) { + ret = ENOMEM; + goto err; + } + entries = p; + memset(entries + allocated * sizeof(*entries), + 0, 10 * sizeof(*entries)); + allocated += 10; + } + entries[count++] = strdup(name); + } + + *dirlistp = entries; + *countp = count; + +err: fail_fs_unlock(&fail_fs->lock); + if (ret == 0) + return (0); + + if (entries != NULL) { + while (count > 0) + free(entries[--count]); + free(entries); + } + + return (ret); +} + +/* + * fail_fs_directory_list_free -- + * Free memory allocated by fail_fs_directory_list. + */ +static int +fail_fs_directory_list_free(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, char **dirlist, uint32_t count) +{ + (void)file_system; /* Unused */ + (void)session; /* Unused */ + + if (dirlist != NULL) { + while (count > 0) + free(dirlist[--count]); + free(dirlist); + } + return (0); +} + +/* + * fail_fs_env -- + * If the name is in the environment, return its integral value. + */ +static void +fail_fs_env(const char *name, int64_t *valp) +{ + int64_t result; + char *s, *value; + + result = 0; + if ((value = getenv(name)) != NULL) { + s = value; + if (strcmp(value, "true") == 0) + result = 1; + else if (strcmp(value, "false") != 0) { + result = strtoll(value, &s, 10); + if (*s != '\0') + result = 0; + } + } + *valp = result; +} + +/* + * fail_fs_exist -- + * Return if the file exists. + */ +static int +fail_fs_exist(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *name, bool *existp) +{ + (void)file_system; /* Unused */ + (void)session; /* Unused */ + + *existp = (access(name, F_OK) == 0); + return (0); +} + +/* + * fail_fs_open -- + * fopen for the fail file system. + */ +static int +fail_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session, + const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, + WT_FILE_HANDLE **file_handlep) +{ + FAIL_FILE_HANDLE *fail_fh; + FAIL_FILE_SYSTEM *fail_fs; + WT_EXTENSION_API *wtext; + WT_FILE_HANDLE *file_handle; + int fd, open_flags, ret; + + (void)session; /* Unused */ + + *file_handlep = NULL; + + fail_fh = NULL; + fail_fs = (FAIL_FILE_SYSTEM *)file_system; + fd = -1; + ret = 0; + + if (fail_fs->verbose) { + wtext = fail_fs->wtext; + (void)wtext->msg_printf(wtext, session, "fail_fs: open: %s", + name); + } + + fail_fs_lock(&fail_fs->lock); + + open_flags = 0; + if ((flags & WT_FS_OPEN_CREATE) != 0) + open_flags |= O_CREAT; + if ((flags & WT_FS_OPEN_EXCLUSIVE) != 0) + open_flags |= O_EXCL; + if ((flags & WT_FS_OPEN_READONLY) != 0) + open_flags |= O_RDONLY; + else + open_flags |= O_RDWR; + + /* + * Opening a file handle on a directory is only to support filesystems + * that require a directory sync for durability. This is a no-op + * for this file system. + */ + if (file_type == WT_FS_OPEN_FILE_TYPE_DIRECTORY) + fd = -1; + else if ((fd = open(name, open_flags, 0666)) < 0) { + ret = errno; + goto err; + } + + /* We create a handle structure for each open. */ + if ((fail_fh = calloc(1, sizeof(FAIL_FILE_HANDLE))) == NULL) { + ret = ENOMEM; + goto err; + } + + /* Initialize private information. */ + fail_fh->fail_fs = fail_fs; + fail_fh->fd = fd; + + /* Initialize public information. */ + file_handle = (WT_FILE_HANDLE *)fail_fh; + if ((file_handle->name = strdup(name)) == NULL) { + ret = ENOMEM; + goto err; + } + + /* Setup the function call table. */ + file_handle->close = fail_file_close; + file_handle->fh_advise = NULL; + file_handle->fh_extend = NULL; + file_handle->fh_extend_nolock = NULL; + file_handle->fh_lock = fail_file_lock; + file_handle->fh_map = NULL; + file_handle->fh_map_discard = NULL; + file_handle->fh_map_preload = NULL; + file_handle->fh_unmap = NULL; + file_handle->fh_read = fail_file_read; + file_handle->fh_size = fail_file_size; + file_handle->fh_sync = fail_file_sync; + file_handle->fh_sync_nowait = NULL; + file_handle->fh_truncate = fail_file_truncate; + file_handle->fh_write = fail_file_write; + + TAILQ_INSERT_HEAD(&fail_fs->fileq, fail_fh, q); + + *file_handlep = file_handle; + + if (0) { +err: if (fd != -1) + (void)close(fd); + free(fail_fh); + } + + fail_fs_unlock(&fail_fs->lock); + return (ret); +} + +/* + * fail_fs_remove -- + * POSIX remove. + */ +static int +fail_fs_remove(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *name, uint32_t flags) +{ + (void)file_system; /* Unused */ + (void)session; /* Unused */ + (void)flags; /* Unused */ + + return (unlink(name)); +} + +/* + * fail_fs_rename -- + * POSIX rename. + */ +static int +fail_fs_rename(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *from, const char *to, uint32_t flags) +{ + (void)file_system; /* Unused */ + (void)session; /* Unused */ + (void)flags; /* Unused */ + + return (rename(from, to)); +} + +/* + * fail_fs_simulate_fail -- + * Simulate a failure from this file system by reporting it + * and returning a non-zero return code. + */ +static int +fail_fs_simulate_fail(FAIL_FILE_HANDLE *fail_fh, WT_SESSION *session, + int64_t nops, const char *opkind) +{ + FAIL_FILE_SYSTEM *fail_fs; + WT_EXTENSION_API *wtext; +#ifdef __FreeBSD__ + size_t btret, i; +#else + int btret, i; +#endif + void *bt[100]; + char **btstr; + + fail_fs = fail_fh->fail_fs; + if (fail_fs->verbose) { + wtext = fail_fs->wtext; + (void)wtext->msg_printf(wtext, session, + "fail_fs: %s: simulated failure after %" PRId64 + " %s operations", fail_fh->iface.name, nops, opkind); +#ifdef __FreeBSD__ + btret = backtrace(bt, sizeof(bt) / sizeof(bt[0])); +#else + btret = backtrace(bt, (int)(sizeof(bt) / sizeof(bt[0]))); +#endif + if ((btstr = backtrace_symbols(bt, btret)) != NULL) { + for (i = 0; i < btret; i++) + (void)wtext->msg_printf(wtext, session, " %s", + btstr[i]); + free(btstr); + } + } + return (EIO); +} + +/* + * fail_fs_size -- + * Get the size of a file in bytes, by file name. + */ +static int +fail_fs_size(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *name, wt_off_t *sizep) +{ + struct stat statbuf; + int ret; + + (void)file_system; /* Unused */ + (void)session; /* Unused */ + + ret = 0; + if ((ret = stat(name, &statbuf)) != 0) + return (ret); + *sizep = statbuf.st_size; + return (0); +} + +/* + * fail_fs_terminate -- + * Discard any resources on termination + */ +static int +fail_fs_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *session) +{ + FAIL_FILE_HANDLE *fail_fh; + FAIL_FILE_SYSTEM *fail_fs; + + fail_fs = (FAIL_FILE_SYSTEM *)file_system; + + while ((fail_fh = TAILQ_FIRST(&fail_fs->fileq)) != NULL) + fail_file_handle_remove(session, fail_fh); + + fail_fs_destroy_lock(&fail_fs->lock); + free(fail_fs); + + return (0); +} + +/* + * wiredtiger_extension_init -- + * WiredTiger fail filesystem extension. + */ +int +wiredtiger_extension_init(WT_CONNECTION *conn, WT_CONFIG_ARG *config) +{ + FAIL_FILE_SYSTEM *fail_fs; + WT_CONFIG_ITEM k, v; + WT_CONFIG_PARSER *config_parser; + WT_EXTENSION_API *wtext; + WT_FILE_SYSTEM *file_system; + int64_t argval; + int ret; + + ret = 0; + wtext = conn->get_extension_api(conn); + if ((fail_fs = calloc(1, sizeof(FAIL_FILE_SYSTEM))) == NULL) { + (void)wtext->err_printf(wtext, NULL, + "fail_file_system extension_init: %s", + wtext->strerror(wtext, NULL, ENOMEM)); + return (ENOMEM); + } + fail_fs->wtext = wtext; + file_system = (WT_FILE_SYSTEM *)fail_fs; + + /* Get any configuration values. */ + if ((ret = wtext->config_parser_open_arg( + wtext, NULL, config, &config_parser)) != 0) { + (void)wtext->err_printf(wtext, NULL, + "WT_EXTENSION_API.config_parser_open: config: %s", + wtext->strerror(wtext, NULL, ret)); + goto err; + } + while ((ret = config_parser->next(config_parser, &k, &v)) == 0) { + if (fail_fs_arg("environment", &k, &v, &argval)) { + fail_fs->use_environment = (argval != 0); + continue; + } else if (fail_fs_arg("verbose", &k, &v, &argval)) { + fail_fs->verbose = (argval != 0); + continue; + } else if (fail_fs_arg("allow_writes", &k, &v, + &fail_fs->allow_writes)) + continue; + else if (fail_fs_arg("allow_reads", &k, &v, + &fail_fs->allow_reads)) + continue; + + (void)wtext->err_printf(wtext, NULL, + "WT_CONFIG_PARSER.next: unexpected configuration " + "information: %.*s=%.*s: %s", + (int)k.len, k.str, (int)v.len, v.str, + wtext->strerror(wtext, NULL, ret)); + goto err; + } + if (ret != WT_NOTFOUND) { + (void)wtext->err_printf(wtext, NULL, + "WT_CONFIG_PARSER.next: config: %s", + wtext->strerror(wtext, NULL, ret)); + goto err; + } + if ((ret = config_parser->close(config_parser)) != 0) { + (void)wtext->err_printf(wtext, NULL, + "WT_CONFIG_PARSER.close: config: %s", + wtext->strerror(wtext, NULL, ret)); + goto err; + } + if (fail_fs->allow_writes != 0 || fail_fs->allow_reads != 0) + fail_fs->fail_enabled = true; + + fail_fs_allocate_lock(&fail_fs->lock); + /* Initialize the in-memory jump table. */ + file_system->fs_directory_list = fail_fs_directory_list; + file_system->fs_directory_list_free = fail_fs_directory_list_free; + file_system->fs_exist = fail_fs_exist; + file_system->fs_open_file = fail_fs_open; + file_system->fs_remove = fail_fs_remove; + file_system->fs_rename = fail_fs_rename; + file_system->fs_size = fail_fs_size; + file_system->terminate = fail_fs_terminate; + if ((ret = conn->set_file_system(conn, file_system, NULL)) != 0) { + (void)wtext->err_printf(wtext, NULL, + "WT_CONNECTION.set_file_system: %s", + wtext->strerror(wtext, NULL, ret)); + goto err; + } + return (0); + +err: free(fail_fs); + return (ret); +} diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index afb2305bbf9..d2d2afda061 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -1,5 +1,5 @@ { - "commit": "48a3cbc17fa902528217287fd075c87efb44aebc", + "commit": "e1bcc30da91eedd0b17cebb725cc7e607ffa2340", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-3.6" } diff --git a/src/third_party/wiredtiger/src/async/async_api.c b/src/third_party/wiredtiger/src/async/async_api.c index 54bcb7cd26c..026a008188c 100644 --- a/src/third_party/wiredtiger/src/async/async_api.c +++ b/src/third_party/wiredtiger/src/async/async_api.c @@ -240,8 +240,7 @@ __async_start(WT_SESSION_IMPL *session) async = conn->async; TAILQ_INIT(&async->formatqh); WT_RET(__wt_spin_init(session, &async->ops_lock, "ops")); - WT_RET(__wt_cond_alloc( - session, "async flush", false, &async->flush_cond)); + WT_RET(__wt_cond_alloc(session, "async flush", &async->flush_cond)); WT_RET(__wt_async_op_init(session)); /* @@ -541,7 +540,7 @@ retry: async->flush_op.state = WT_ASYNCOP_READY; WT_RET(__wt_async_op_enqueue(session, &async->flush_op)); while (async->flush_state != WT_ASYNC_FLUSH_COMPLETE) - __wt_cond_wait(session, async->flush_cond, 100000); + __wt_cond_wait(session, async->flush_cond, 100000, NULL); /* * Flush is done. Clear the flags. */ diff --git a/src/third_party/wiredtiger/src/async/async_worker.c b/src/third_party/wiredtiger/src/async/async_worker.c index b1bc3902f7c..11f59ed14f1 100644 --- a/src/third_party/wiredtiger/src/async/async_worker.c +++ b/src/third_party/wiredtiger/src/async/async_worker.c @@ -107,7 +107,7 @@ __async_flush_wait(WT_SESSION_IMPL *session, WT_ASYNC *async, uint64_t my_gen) { while (async->flush_state == WT_ASYNC_FLUSHING && async->flush_gen == my_gen) - __wt_cond_wait(session, async->flush_cond, 10000); + __wt_cond_wait(session, async->flush_cond, 10000, NULL); } /* diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c index d18b9b76992..5fde2237538 100644 --- a/src/third_party/wiredtiger/src/btree/bt_cursor.c +++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c @@ -76,11 +76,11 @@ __cursor_fix_implicit(WT_BTREE *btree, WT_CURSOR_BTREE *cbt) } /* - * __cursor_valid -- + * __wt_cursor_valid -- * Return if the cursor references an valid key/value pair. */ -static inline bool -__cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) +bool +__wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) { WT_BTREE *btree; WT_CELL *cell; @@ -330,7 +330,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, cbt->ref, false) : __cursor_col_search(session, cbt, cbt->ref)); - valid = cbt->compare == 0 && __cursor_valid(cbt, &upd); + valid = cbt->compare == 0 && __wt_cursor_valid(cbt, &upd); } if (!valid) { WT_ERR(__cursor_func_init(cbt, true)); @@ -338,7 +338,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, NULL, false) : __cursor_col_search(session, cbt, NULL)); - valid = cbt->compare == 0 && __cursor_valid(cbt, &upd); + valid = cbt->compare == 0 && __wt_cursor_valid(cbt, &upd); } if (valid) @@ -419,14 +419,14 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) * Ignore those cases, it makes things too complicated. */ if (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1) - valid = __cursor_valid(cbt, &upd); + valid = __wt_cursor_valid(cbt, &upd); } if (!valid) { WT_ERR(__cursor_func_init(cbt, true)); WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, NULL, true) : __cursor_col_search(session, cbt, NULL)); - valid = __cursor_valid(cbt, &upd); + valid = __wt_cursor_valid(cbt, &upd); } /* @@ -462,7 +462,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, NULL, true) : __cursor_col_search(session, cbt, NULL)); - if (__cursor_valid(cbt, &upd)) { + if (__wt_cursor_valid(cbt, &upd)) { exact = cbt->compare; ret = __wt_kv_return(session, cbt, upd); } else if ((ret = __wt_btcur_prev(cbt, false)) != WT_NOTFOUND) @@ -537,7 +537,7 @@ retry: WT_RET(__cursor_func_init(cbt, true)); * Fail in that case, the record exists. */ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) && - ((cbt->compare == 0 && __cursor_valid(cbt, NULL)) || + ((cbt->compare == 0 && __wt_cursor_valid(cbt, NULL)) || (cbt->compare != 0 && __cursor_fix_implicit(btree, cbt)))) WT_ERR(WT_DUPLICATE_KEY); @@ -552,7 +552,7 @@ retry: WT_RET(__cursor_func_init(cbt, true)); * key/value pair. */ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) && - cbt->compare == 0 && __cursor_valid(cbt, NULL)) + cbt->compare == 0 && __wt_cursor_valid(cbt, NULL)) WT_ERR(WT_DUPLICATE_KEY); ret = __cursor_row_modify(session, cbt, false); @@ -682,12 +682,12 @@ retry: WT_RET(__cursor_func_init(cbt, true)); /* * If we find a matching record, check whether an update would * conflict. Do this before checking if the update is visible - * in __cursor_valid, or we can miss conflict. + * in __wt_cursor_valid, or we can miss conflict. */ WT_ERR(__curfile_update_check(cbt)); /* Remove the record if it exists. */ - if (cbt->compare != 0 || !__cursor_valid(cbt, NULL)) { + if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) { if (!__cursor_fix_implicit(btree, cbt)) WT_ERR(WT_NOTFOUND); /* @@ -711,7 +711,7 @@ retry: WT_RET(__cursor_func_init(cbt, true)); /* Check whether an update would conflict. */ WT_ERR(__curfile_update_check(cbt)); - if (cbt->compare != 0 || !__cursor_valid(cbt, NULL)) + if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) WT_ERR(WT_NOTFOUND); ret = __cursor_row_modify(session, cbt, true); @@ -786,7 +786,8 @@ retry: WT_RET(__cursor_func_init(cbt, true)); */ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) { WT_ERR(__curfile_update_check(cbt)); - if ((cbt->compare != 0 || !__cursor_valid(cbt, NULL)) && + if ((cbt->compare != 0 || + !__wt_cursor_valid(cbt, NULL)) && !__cursor_fix_implicit(btree, cbt)) WT_ERR(WT_NOTFOUND); } @@ -800,7 +801,7 @@ retry: WT_RET(__cursor_func_init(cbt, true)); */ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) { WT_ERR(__curfile_update_check(cbt)); - if (cbt->compare != 0 || !__cursor_valid(cbt, NULL)) + if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) WT_ERR(WT_NOTFOUND); } ret = __cursor_row_modify(session, cbt, false); @@ -829,111 +830,6 @@ err: if (ret == WT_RESTART) { return (ret); } -/* - * __wt_btcur_next_random -- - * Move to a random record in the tree. There are two algorithms, one - * where we select a record at random from the whole tree on each - * retrieval and one where we first select a record at random from the - * whole tree, and then subsequently sample forward from that location. - * The sampling approach allows us to select reasonably uniform random - * points from unbalanced trees. - */ -int -__wt_btcur_next_random(WT_CURSOR_BTREE *cbt) -{ - WT_BTREE *btree; - WT_DECL_RET; - WT_SESSION_IMPL *session; - WT_UPDATE *upd; - wt_off_t size; - uint64_t skip; - - session = (WT_SESSION_IMPL *)cbt->iface.session; - btree = cbt->btree; - - /* - * Only supports row-store: applications can trivially select a random - * value from a column-store, if there were any reason to do so. - */ - if (btree->type != BTREE_ROW) - WT_RET_MSG(session, ENOTSUP, - "WT_CURSOR.next_random only supported by row-store tables"); - - WT_STAT_CONN_INCR(session, cursor_next); - WT_STAT_DATA_INCR(session, cursor_next); - - /* - * If retrieving random values without sampling, or we don't have a - * page reference, pick a roughly random leaf page in the tree. - */ - if (cbt->ref == NULL || cbt->next_random_sample_size == 0) { - /* - * Skip past the sample size of the leaf pages in the tree - * between each random key return to compensate for unbalanced - * trees. - * - * Use the underlying file size divided by its block allocation - * size as our guess of leaf pages in the file (this can be - * entirely wrong, as it depends on how many pages are in this - * particular checkpoint, how large the leaf and internal pages - * really are, and other factors). Then, divide that value by - * the configured sample size and increment the final result to - * make sure tiny files don't leave us with a skip value of 0. - * - * !!! - * Ideally, the number would be prime to avoid restart issues. - */ - if (cbt->next_random_sample_size != 0) { - WT_ERR(btree->bm->size(btree->bm, session, &size)); - cbt->next_random_leaf_skip = (uint64_t) - ((size / btree->allocsize) / - cbt->next_random_sample_size) + 1; - } - - /* - * Choose a leaf page from the tree. - */ - WT_ERR(__cursor_func_init(cbt, true)); - WT_WITH_PAGE_INDEX( - session, ret = __wt_row_random_descent(session, cbt)); - WT_ERR(ret); - } else { - /* - * Read through the tree, skipping leaf pages. Be cautious about - * the skip count: if the last leaf page skipped was also the - * last leaf page in the tree, it may be set to zero on return - * with the end-of-walk condition. - * - * Pages read for data sampling aren't "useful"; don't update - * the read generation of pages already in memory, and if a page - * is read, set its generation to a low value so it is evicted - * quickly. - */ - for (skip = - cbt->next_random_leaf_skip; cbt->ref == NULL || skip > 0;) - WT_ERR(__wt_tree_walk_skip(session, &cbt->ref, &skip, - WT_READ_NO_GEN | - WT_READ_SKIP_INTL | WT_READ_WONT_NEED)); - } - - /* - * Select a random entry from the leaf page. If it's not valid, move to - * the next entry, if that doesn't work, move to the previous entry. - */ - WT_ERR(__wt_row_random_leaf(session, cbt)); - if (__cursor_valid(cbt, &upd)) - WT_ERR(__wt_kv_return(session, cbt, upd)); - else { - if ((ret = __wt_btcur_next(cbt, false)) == WT_NOTFOUND) - ret = __wt_btcur_prev(cbt, false); - WT_ERR(ret); - } - return (0); - -err: WT_TRET(__cursor_reset(cbt)); - return (ret); -} - /* * __wt_btcur_compare -- * Return a comparison between two cursors. diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c index b62125e069d..d664da2ebd3 100644 --- a/src/third_party/wiredtiger/src/btree/bt_debug.c +++ b/src/third_party/wiredtiger/src/btree/bt_debug.c @@ -652,7 +652,7 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref) page = ref->page; mod = page->modify; - WT_RET(ds->f(ds, "%p", (void *)page)); + WT_RET(ds->f(ds, "%p", (void *)ref)); switch (page->type) { case WT_PAGE_COL_INT: @@ -699,8 +699,6 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref) WT_RET(ds->f(ds, ", evict-lru")); if (F_ISSET_ATOMIC(page, WT_PAGE_OVERFLOW_KEYS)) WT_RET(ds->f(ds, ", overflow-keys")); - if (F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_BLOCK)) - WT_RET(ds->f(ds, ", split-block")); if (F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_INSERT)) WT_RET(ds->f(ds, ", split-insert")); if (F_ISSET_ATOMIC(page, WT_PAGE_UPDATE_IGNORE)) diff --git a/src/third_party/wiredtiger/src/btree/bt_random.c b/src/third_party/wiredtiger/src/btree/bt_random.c new file mode 100644 index 00000000000..44de511f787 --- /dev/null +++ b/src/third_party/wiredtiger/src/btree/bt_random.c @@ -0,0 +1,427 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_row_random_leaf -- + * Return a random key from a row-store leaf page. + */ +int +__wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) +{ + WT_INSERT *ins, **start, **stop; + WT_INSERT_HEAD *ins_head; + WT_PAGE *page; + uint64_t samples; + uint32_t choice, entries, i; + int level; + + page = cbt->ref->page; + start = stop = NULL; /* [-Wconditional-uninitialized] */ + entries = 0; /* [-Wconditional-uninitialized] */ + + __cursor_pos_clear(cbt); + + /* If the page has disk-based entries, select from them. */ + if (page->entries != 0) { + cbt->compare = 0; + cbt->slot = __wt_random(&session->rnd) % page->entries; + + /* + * The real row-store search function builds the key, so we + * have to as well. + */ + return (__wt_row_leaf_key(session, + page, page->pg_row + cbt->slot, cbt->tmp, false)); + } + + /* + * If the tree is new (and not empty), it might have a large insert + * list. + * + * Walk down the list until we find a level with at least 50 entries, + * that's where we'll start rolling random numbers. The value 50 is + * used to ignore levels with only a few entries, that is, levels which + * are potentially badly skewed. + */ + F_SET(cbt, WT_CBT_SEARCH_SMALLEST); + if ((ins_head = WT_ROW_INSERT_SMALLEST(page)) == NULL) + return (WT_NOTFOUND); + for (level = WT_SKIP_MAXDEPTH - 1; level >= 0; --level) { + start = &ins_head->head[level]; + for (entries = 0, stop = start; + *stop != NULL; stop = &(*stop)->next[level]) + ++entries; + + if (entries > 50) + break; + } + + /* + * If it's a tiny list and we went all the way to level 0, correct the + * level; entries is correctly set. + */ + if (level < 0) + level = 0; + + /* + * Step down the skip list levels, selecting a random chunk of the name + * space at each level. + */ + for (samples = entries; level > 0; samples += entries) { + /* + * There are (entries) or (entries + 1) chunks of the name space + * considered at each level. They are: between start and the 1st + * element, between the 1st and 2nd elements, and so on to the + * last chunk which is the name space after the stop element on + * the current level. This last chunk of name space may or may + * not be there: as we descend the levels of the skip list, this + * chunk may appear, depending if the next level down has + * entries logically after the stop point in the current level. + * We can't ignore those entries: because of the algorithm used + * to determine the depth of a skiplist, there may be a large + * number of entries "revealed" by descending a level. + * + * If the next level down has more items after the current stop + * point, there are (entries + 1) chunks to consider, else there + * are (entries) chunks. + */ + if (*(stop - 1) == NULL) + choice = __wt_random(&session->rnd) % entries; + else + choice = __wt_random(&session->rnd) % (entries + 1); + + if (choice == entries) { + /* + * We selected the name space after the stop element on + * this level. Set the start point to the current stop + * point, descend a level and move the stop element to + * the end of the list, that is, the end of the newly + * discovered name space, counting entries as we go. + */ + start = stop; + --start; + --level; + for (entries = 0, stop = start; + *stop != NULL; stop = &(*stop)->next[level]) + ++entries; + } else { + /* + * We selected another name space on the level. Move the + * start pointer the selected number of entries forward + * to the start of the selected chunk (if the selected + * number is 0, start won't move). Set the stop pointer + * to the next element in the list and drop both start + * and stop down a level. + */ + for (i = 0; i < choice; ++i) + start = &(*start)->next[level]; + stop = &(*start)->next[level]; + + --start; + --stop; + --level; + + /* Count the entries in the selected name space. */ + for (entries = 0, + ins = *start; ins != *stop; ins = ins->next[level]) + ++entries; + } + } + + /* + * When we reach the bottom level, entries will already be set. Select + * a random entry from the name space and return it. + * + * It should be impossible for the entries count to be 0 at this point, + * but check for it out of paranoia and to quiet static testing tools. + */ + if (entries > 0) + entries = __wt_random(&session->rnd) % entries; + for (ins = *start; entries > 0; --entries) + ins = ins->next[0]; + + cbt->ins = ins; + cbt->ins_head = ins_head; + cbt->compare = 0; + + /* + * Random lookups in newly created collections can be slow if a page + * consists of a large skiplist. Schedule the page for eviction if we + * encounter a large skiplist. This worthwhile because applications + * that take a sample often take many samples, so the overhead of + * traversing the skip list each time accumulates to real time. + */ + if (samples > 5000) + __wt_page_evict_soon(session, cbt->ref); + + return (0); +} + +/* + * __wt_random_descent -- + * Find a random page in a tree for either sampling or eviction. + */ +int +__wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, bool eviction) +{ + WT_BTREE *btree; + WT_DECL_RET; + WT_PAGE *page; + WT_PAGE_INDEX *pindex; + WT_REF *current, *descent; + uint32_t flags, i, entries, retry; + + btree = S2BT(session); + current = NULL; + retry = 100; + + /* Eviction should not be tapped to do eviction. */ + if (eviction) + flags = WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | + WT_READ_NO_WAIT | WT_READ_NOTFOUND_OK | WT_READ_RESTART_OK; + else + flags = WT_READ_RESTART_OK; + + if (0) { +restart: /* + * Discard the currently held page and restart the search from + * the root. + */ + WT_RET(__wt_page_release(session, current, flags)); + } + + /* Search the internal pages of the tree. */ + current = &btree->root; + for (;;) { + page = current->page; + if (!WT_PAGE_IS_INTERNAL(page)) + break; + + WT_INTL_INDEX_GET(session, page, pindex); + entries = pindex->entries; + + /* Eviction just wants any random child. */ + if (eviction) { + descent = pindex->index[ + __wt_random(&session->rnd) % entries]; + goto descend; + } + + /* + * There may be empty pages in the tree, and they're useless to + * us. If we don't find a non-empty page in "entries" random + * guesses, take the first non-empty page in the tree. If the + * search page contains nothing other than empty pages, restart + * from the root some number of times before giving up. + * + * Random sampling is looking for a key/value pair on a random + * leaf page, and so will accept any page that contains a valid + * key/value pair, so on-disk is fine, but deleted is not. + */ + descent = NULL; + for (i = 0; i < entries; ++i) { + descent = + pindex->index[__wt_random(&session->rnd) % entries]; + if (descent->state == WT_REF_MEM || + descent->state == WT_REF_DISK) + break; + } + if (i == entries) + for (i = 0; i < entries; ++i) { + descent = pindex->index[i]; + if (descent->state == WT_REF_MEM || + descent->state == WT_REF_DISK) + break; + } + if (i == entries || descent == NULL) { + if (--retry > 0) + goto restart; + + WT_RET(__wt_page_release(session, current, flags)); + return (WT_NOTFOUND); + } + + /* + * Swap the current page for the child page. If the page splits + * while we're retrieving it, restart the search at the root. + * + * On other error, simply return, the swap call ensures we're + * holding nothing on failure. + */ +descend: if ((ret = + __wt_page_swap(session, current, descent, flags)) == 0) { + current = descent; + continue; + } + if (eviction && (ret == WT_NOTFOUND || ret == WT_RESTART)) + break; + if (ret == WT_RESTART) + goto restart; + return (ret); + } + + /* + * There is no point starting with the root page: the walk will exit + * immediately. In that case we aren't holding a hazard pointer so + * there is nothing to release. + */ + if (!eviction || !__wt_ref_is_root(current)) + *refp = current; + return (0); +} + +/* + * __wt_btcur_next_random -- + * Move to a random record in the tree. There are two algorithms, one + * where we select a record at random from the whole tree on each + * retrieval and one where we first select a record at random from the + * whole tree, and then subsequently sample forward from that location. + * The sampling approach allows us to select reasonably uniform random + * points from unbalanced trees. + */ +int +__wt_btcur_next_random(WT_CURSOR_BTREE *cbt) +{ + WT_BTREE *btree; + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UPDATE *upd; + wt_off_t size; + uint64_t n, skip; + + session = (WT_SESSION_IMPL *)cbt->iface.session; + btree = cbt->btree; + + /* + * Only supports row-store: applications can trivially select a random + * value from a column-store, if there were any reason to do so. + */ + if (btree->type != BTREE_ROW) + WT_RET_MSG(session, ENOTSUP, + "WT_CURSOR.next_random only supported by row-store tables"); + + WT_STAT_CONN_INCR(session, cursor_next); + WT_STAT_DATA_INCR(session, cursor_next); + +#ifdef HAVE_DIAGNOSTIC + /* + * Under some conditions we end up using the underlying cursor.next to + * walk through the object. Since there are multiple calls, we can hit + * the cursor-order checks, turn them off. + */ + __wt_cursor_key_order_reset(cbt); +#endif + + /* + * If we don't have a current position in the tree, or if retrieving + * random values without sampling, pick a roughly random leaf page in + * the tree and return an entry from it. + */ + if (cbt->ref == NULL || cbt->next_random_sample_size == 0) { + WT_ERR(__cursor_func_init(cbt, true)); + WT_WITH_PAGE_INDEX(session, + ret = __wt_random_descent(session, &cbt->ref, false)); + if (ret == 0) + goto random_page_entry; + + /* + * Random descent may return not-found: the tree might be empty + * or have so many deleted items we didn't find any valid pages. + * We can't return WT_NOTFOUND to the application unless a tree + * is really empty, fallback to skipping through tree pages. + */ + WT_ERR_NOTFOUND_OK(ret); + } + + /* + * Cursor through the tree, skipping past the sample size of the leaf + * pages in the tree between each random key return to compensate for + * unbalanced trees. + * + * If the random descent attempt failed, we don't have a configured + * sample size, use 100 for no particular reason. + */ + if (cbt->next_random_sample_size == 0) + cbt->next_random_sample_size = 100; + + /* + * If the random descent attempt failed, or it's our first skip attempt, + * we haven't yet set the pages to skip, do it now. + * + * Use the underlying file size divided by its block allocation size as + * our guess of leaf pages in the file (this can be entirely wrong, as + * it depends on how many pages are in this particular checkpoint, how + * large the leaf and internal pages really are, and other factors). + * Then, divide that value by the configured sample size and increment + * the final result to make sure tiny files don't leave us with a skip + * value of 0. + * + * !!! + * Ideally, the number would be prime to avoid restart issues. + */ + if (cbt->next_random_leaf_skip == 0) { + WT_ERR(btree->bm->size(btree->bm, session, &size)); + cbt->next_random_leaf_skip = (uint64_t) + ((size / btree->allocsize) / + cbt->next_random_sample_size) + 1; + } + + /* + * Be paranoid about loop termination: first, if the last leaf page + * skipped was also the last leaf page in the tree, skip may be set to + * zero on return along with the NULL WT_REF end-of-walk condition. + * Second, if a tree has no valid pages at all (the condition after + * initial creation), we might make no progress at all, or finally, if + * a tree has only deleted pages, we'll make progress, but never get a + * useful WT_REF. And, of course, the tree can switch from one of these + * states to another without warning. Decrement skip regardless of what + * is happening in the search, guarantee we eventually quit. + * + * Pages read for data sampling aren't "useful"; don't update the read + * generation of pages already in memory, and if a page is read, set + * its generation to a low value so it is evicted quickly. + */ + for (skip = cbt->next_random_leaf_skip; cbt->ref == NULL || skip > 0;) { + n = skip; + WT_ERR(__wt_tree_walk_skip(session, &cbt->ref, &skip, + WT_READ_NO_GEN | WT_READ_SKIP_INTL | WT_READ_WONT_NEED)); + if (n == skip) { + if (skip == 0) + break; + --skip; + } + } + + /* + * We can't return WT_NOTFOUND to the application unless a tree is + * really empty, fallback to a random entry from the first page in the + * tree that has anything at all. + */ + if (cbt->ref == NULL) + WT_ERR(__wt_btcur_next(cbt, false)); + +random_page_entry: + /* + * Select a random entry from the leaf page. If it's not valid, move to + * the next entry, if that doesn't work, move to the previous entry. + */ + WT_ERR(__wt_row_random_leaf(session, cbt)); + if (__wt_cursor_valid(cbt, &upd)) + WT_ERR(__wt_kv_return(session, cbt, upd)); + else { + if ((ret = __wt_btcur_next(cbt, false)) == WT_NOTFOUND) + ret = __wt_btcur_prev(cbt, false); + WT_ERR(ret); + } + return (0); + +err: WT_TRET(__cursor_reset(cbt)); + return (ret); +} diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index 6b0b8a08c02..45550ff627f 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -53,6 +53,16 @@ __split_oldest_gen(WT_SESSION_IMPL *session) return (oldest); } +/* + * __wt_split_obsolete -- + * Check if it is safe to free / evict based on split generation. + */ +bool +__wt_split_obsolete(WT_SESSION_IMPL *session, uint64_t split_gen) +{ + return (split_gen < __split_oldest_gen(session)); +} + /* * __split_stash_add -- * Add a new entry into the session's split stash list. @@ -187,7 +197,7 @@ __split_safe_free(WT_SESSION_IMPL *session, #ifdef HAVE_DIAGNOSTIC /* * __split_verify_intl_key_order -- - * Verify the key order on an internal page after a split, diagnostic only. + * Verify the key order on an internal page after a split. */ static void __split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page) @@ -239,6 +249,46 @@ __split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page) break; } } + +/* + * __split_verify_root -- + * Verify a root page involved in a split. + */ +static int +__split_verify_root(WT_SESSION_IMPL *session, WT_PAGE *page) +{ + WT_DECL_RET; + WT_REF *ref; + + /* The split is complete and live, verify all of the pages involved. */ + __split_verify_intl_key_order(session, page); + + WT_INTL_FOREACH_BEGIN(session, page, ref) { + /* + * An eviction thread might be attempting to evict the page + * (the WT_REF may be WT_REF_LOCKED), or it may be a disk based + * page (the WT_REF may be WT_REF_READING), or it may be in + * some other state. Acquire a hazard pointer for any + * in-memory pages so we know the state of the page. + * + * Ignore pages not in-memory (deleted, on-disk, being read), + * there's no in-memory structure to check. + */ + if ((ret = __wt_page_in(session, + ref, WT_READ_CACHE | WT_READ_NO_EVICT)) == WT_NOTFOUND) + continue; + WT_ERR(ret); + + __split_verify_intl_key_order(session, ref->page); + + WT_ERR(__wt_page_release(session, ref, WT_READ_NO_EVICT)); + } WT_INTL_FOREACH_END; + + return (0); + +err: /* Something really bad just happened. */ + WT_PANIC_RET(session, ret, "fatal error during page split"); +} #endif /* @@ -390,12 +440,12 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, } /* - * __split_ref_step1 -- + * __split_ref_prepare -- * Prepare a set of WT_REFs for a move. */ static void -__split_ref_step1( - WT_SESSION_IMPL *session, WT_PAGE_INDEX *pindex, bool skip_first) +__split_ref_prepare(WT_SESSION_IMPL *session, + WT_PAGE_INDEX *pindex, uint64_t split_gen, bool skip_first) { WT_PAGE *child; WT_REF *child_ref, *ref; @@ -418,30 +468,25 @@ __split_ref_step1( child = ref->page; /* - * Block eviction and splits in newly created pages. + * Block eviction in newly created pages. * * Once the split is live, newly created internal pages might be * evicted and their WT_REF structures freed. If that happened * before all threads exit the index of the page that previously * "owned" the WT_REF, a thread might see a freed WT_REF. To - * ensure that doesn't happen, the newly created page's modify - * structure has a field with a transaction ID that's checked - * before any internal page is evicted. Unfortunately, we don't - * know the correct value until we update the original page's - * index (we need a transaction ID from after that update), but - * the act of updating the original page's index is what allows - * the eviction to happen. + * ensure that doesn't happen, the newly created page contains + * the current split generation and can't be evicted until + * all readers have left the old generation. * - * Split blocking was because historic versions of the split - * code didn't update the WT_REF.home field until after the - * split was live, so the WT_REF.home fields being updated could - * split again before the update, there's a race between splits - * as to which would update them first. The current code updates - * the WT_REF.home fields before going live (in this function), - * this shouldn't be an issue, but for now splits remain turned - * off. + * Historic, we also blocked splits in newly created pages + * because we didn't update the WT_REF.home field until after + * the split was live, so the WT_REF.home fields being updated + * could split again before the update, there's a race between + * splits as to which would update them first. The current code + * updates the WT_REF.home fields before going live (in this + * function), this isn't an issue. */ - F_SET_ATOMIC(child, WT_PAGE_SPLIT_BLOCK); + child->pg_intl_split_gen = split_gen; /* * We use a page flag to prevent the child from splitting from @@ -464,64 +509,6 @@ __split_ref_step1( } } -/* - * __split_ref_step2 -- - * Allow the newly created children to be evicted or split. - */ -static int -__split_ref_step2( - WT_SESSION_IMPL *session, WT_PAGE_INDEX *pindex, bool skip_first) -{ - WT_DECL_RET; - WT_PAGE *child; - WT_REF *ref; - uint32_t i; - - /* - * The split has gone live, enable eviction and splits on the newly - * created internal pages. - */ - WT_WRITE_BARRIER(); - - for (i = skip_first ? 1 : 0; i < pindex->entries; ++i) { - ref = pindex->index[i]; - - /* - * We don't hold hazard pointers on created pages, they cannot - * be evicted because the page-modify transaction value set as - * they were created prevents eviction. (See above, we reset - * that value as part of fixing up the page.) But, an eviction - * thread might be attempting to evict the page (the WT_REF may - * be WT_REF_LOCKED), or it may be a disk based page (the WT_REF - * may be WT_REF_READING), or it may be in some other state. - * Acquire a hazard pointer for any in-memory pages so we know - * the state of the page. Ignore pages not in-memory (deleted, - * on-disk, being read), there's no in-memory structure to fix. - */ - if ((ret = __wt_page_in(session, - ref, WT_READ_CACHE | WT_READ_NO_EVICT)) == WT_NOTFOUND) - continue; - WT_ERR(ret); - - child = ref->page; - - /* The child can now be evicted or split. */ - F_CLR_ATOMIC(child, WT_PAGE_SPLIT_BLOCK); - -#ifdef HAVE_DIAGNOSTIC - WT_WITH_PAGE_INDEX(session, - __split_verify_intl_key_order(session, child)); -#endif - - WT_ERR(__wt_hazard_clear(session, ref)); - } - - return (0); - -err: /* Something really bad just happened. */ - WT_PANIC_RET(session, ret, "fatal error resolving a split"); -} - /* * __split_root -- * Split the root page in-memory, deepening the tree. @@ -653,8 +640,12 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) /* Start making real changes to the tree, errors are fatal. */ complete = WT_ERR_PANIC; + /* Get a generation for this split, mark the root page. */ + split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); + root->pg_intl_split_gen = split_gen; + /* Prepare the WT_REFs for the move. */ - __split_ref_step1(session, alloc_index, false); + __split_ref_prepare(session, alloc_index, split_gen, false); /* * Confirm the root page's index hasn't moved, then update it, which @@ -662,20 +653,17 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) */ WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(root) == pindex); WT_INTL_INDEX_SET(root, alloc_index); + alloc_index = NULL; #ifdef HAVE_DIAGNOSTIC WT_WITH_PAGE_INDEX(session, - __split_verify_intl_key_order(session, root)); + ret = __split_verify_root(session, root)); + WT_ERR(ret); #endif - /* Finalize the WT_REFs we moved. */ - WT_ERR(__split_ref_step2(session, alloc_index, false)); - /* The split is complete and correct, ignore benign errors. */ + /* The split is complete and verified, ignore benign errors. */ complete = WT_ERR_IGNORE; - /* We've installed the allocated page-index, ensure error handling. */ - alloc_index = NULL; - /* * We can't free the previous root's index, there may be threads using * it. Add to the session's discard list, to be freed once we know no @@ -686,7 +674,6 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) * fails, we don't roll back that change, because threads may already * be using the new index. */ - split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *); WT_TRET(__split_safe_free(session, split_gen, false, pindex, size)); root_decr += size; @@ -838,6 +825,10 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, /* Start making real changes to the tree, errors are fatal. */ complete = WT_ERR_PANIC; + /* Get a generation for this split, mark the parent page. */ + split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); + parent->pg_intl_split_gen = split_gen; + /* * Confirm the parent page's index hasn't moved then update it, which * makes the split visible to threads descending the tree. @@ -846,11 +837,6 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, WT_INTL_INDEX_SET(parent, alloc_index); alloc_index = NULL; -#ifdef HAVE_DIAGNOSTIC - WT_WITH_PAGE_INDEX(session, - __split_verify_intl_key_order(session, parent)); -#endif - /* * If discarding the page's original WT_REF field, reset it to split. * Threads cursoring through the tree were blocked because that WT_REF @@ -869,16 +855,25 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, __wt_free(session, ref->page_del); } + /* + * Set the discarded WT_REF state to split, ensuring we don't + * race with any discard of the WT_REF deleted fields. + */ WT_PUBLISH(ref->state, WT_REF_SPLIT); + + /* + * Push out the change: not required for correctness, but stops + * threads spinning on incorrect page references. + */ + WT_FULL_BARRIER(); } - /* - * Push out the changes: not required for correctness, but don't let - * threads spin on incorrect page references longer than necessary. - */ - WT_FULL_BARRIER(); +#ifdef HAVE_DIAGNOSTIC + WT_WITH_PAGE_INDEX(session, + __split_verify_intl_key_order(session, parent)); +#endif - /* The split is complete and correct, ignore benign errors. */ + /* The split is complete and verified, ignore benign errors. */ complete = WT_ERR_IGNORE; /* @@ -908,7 +903,6 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, * * Acquire a new split generation. */ - split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); for (i = 0, deleted_refs = scr->mem; i < deleted_entries; ++i) { next_ref = pindex->index[deleted_refs[i]]; WT_ASSERT(session, next_ref->state == WT_REF_SPLIT); @@ -1160,14 +1154,21 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) /* Start making real changes to the tree, errors are fatal. */ complete = WT_ERR_PANIC; + /* Get a generation for this split, mark the page. */ + split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); + page->pg_intl_split_gen = split_gen; + /* Prepare the WT_REFs for the move. */ - __split_ref_step1(session, alloc_index, true); + __split_ref_prepare(session, alloc_index, split_gen, true); /* Split into the parent. */ WT_ERR(__split_parent(session, page_ref, alloc_index->index, alloc_index->entries, parent_incr, false, false)); - /* Confirm the page's index hasn't moved, then update it. */ + /* + * Confirm the page's index hasn't moved, then update it, which makes + * the split visible to threads descending the tree. + */ WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(page) == pindex); WT_INTL_INDEX_SET(page, replace_index); @@ -1178,18 +1179,9 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) __split_verify_intl_key_order(session, page)); #endif - /* Finalize the WT_REFs we moved. */ - WT_ERR(__split_ref_step2(session, alloc_index, true)); - - /* The split is complete and correct, ignore benign errors. */ + /* The split is complete and verified, ignore benign errors. */ complete = WT_ERR_IGNORE; - /* - * Push out the changes: not required for correctness, but no reason - * to wait. - */ - WT_FULL_BARRIER(); - /* * We don't care about the page-index we allocated, all we needed was * the array of WT_REF structures, which has now been split into the @@ -1207,7 +1199,6 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) * back that change, because threads may already be using the new parent * page. */ - split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *); WT_TRET(__split_safe_free(session, split_gen, false, pindex, size)); page_decr += size; @@ -1284,10 +1275,6 @@ __split_internal_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool trylock, for (;;) { parent = ref->home; - /* Skip pages that aren't ready to split. */ - if (F_ISSET_ATOMIC(parent, WT_PAGE_SPLIT_BLOCK)) - return (EBUSY); - if (trylock) WT_RET(__wt_try_writelock(session, &parent->page_lock)); else @@ -2086,8 +2073,7 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref) WT_PAGE *parent; bool hazard; - __wt_verbose( - session, WT_VERB_SPLIT, "%p: split-insert", (void *)ref->page); + __wt_verbose(session, WT_VERB_SPLIT, "%p: split-insert", (void *)ref); WT_RET(__split_internal_lock(session, ref, true, &parent, &hazard)); if ((ret = __split_insert(session, ref)) != 0) { @@ -2178,8 +2164,7 @@ __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing) WT_PAGE *parent; bool hazard; - __wt_verbose( - session, WT_VERB_SPLIT, "%p: split-multi", (void *)ref->page); + __wt_verbose(session, WT_VERB_SPLIT, "%p: split-multi", (void *)ref); WT_RET(__split_internal_lock(session, ref, false, &parent, &hazard)); if ((ret = __split_multi(session, ref, closing)) != 0 || closing) { @@ -2207,8 +2192,7 @@ __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref) WT_PAGE *parent; bool hazard; - __wt_verbose( - session, WT_VERB_SPLIT, "%p: reverse-split", (void *)ref->page); + __wt_verbose(session, WT_VERB_SPLIT, "%p: reverse-split", (void *)ref); WT_RET(__split_internal_lock(session, ref, false, &parent, &hazard)); ret = __split_parent(session, ref, NULL, 0, 0, false, true); @@ -2229,8 +2213,7 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi) page = ref->page; - __wt_verbose( - session, WT_VERB_SPLIT, "%p: split-rewrite", (void *)ref->page); + __wt_verbose(session, WT_VERB_SPLIT, "%p: split-rewrite", (void *)ref); /* * This isn't a split: a reconciliation failed because we couldn't write diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c index 049700952ee..ddaa2e5f70b 100644 --- a/src/third_party/wiredtiger/src/btree/bt_walk.c +++ b/src/third_party/wiredtiger/src/btree/bt_walk.c @@ -340,9 +340,7 @@ __tree_walk_internal(WT_SESSION_IMPL *session, * Take a copy of any held page and clear the return value. Remember * the hazard pointer we're currently holding. * - * We may be passed a pointer to btree->evict_page that we are clearing - * here. We check when discarding pages that we're not discarding that - * page, so this clear must be done before the page is released. + * Clear the returned value, it makes future error handling easier. */ couple = couple_orig = ref = *refp; *refp = NULL; diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c index aa299a161da..9c3d467340e 100644 --- a/src/third_party/wiredtiger/src/btree/row_srch.c +++ b/src/third_party/wiredtiger/src/btree/row_srch.c @@ -623,215 +623,3 @@ leaf_match: cbt->compare = 0; err: WT_TRET(__wt_page_release(session, current, 0)); return (ret); } - -/* - * __wt_row_random_leaf -- - * Return a random key from a row-store leaf page. - */ -int -__wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) -{ - WT_INSERT *ins, **start, **stop; - WT_INSERT_HEAD *ins_head; - WT_PAGE *page; - uint64_t samples; - uint32_t choice, entries, i; - int level; - - page = cbt->ref->page; - start = stop = NULL; /* [-Wconditional-uninitialized] */ - entries = 0; /* [-Wconditional-uninitialized] */ - - __cursor_pos_clear(cbt); - - /* If the page has disk-based entries, select from them. */ - if (page->entries != 0) { - cbt->compare = 0; - cbt->slot = __wt_random(&session->rnd) % page->entries; - - /* - * The real row-store search function builds the key, so we - * have to as well. - */ - return (__wt_row_leaf_key(session, - page, page->pg_row + cbt->slot, cbt->tmp, false)); - } - - /* - * If the tree is new (and not empty), it might have a large insert - * list. - * - * Walk down the list until we find a level with at least 50 entries, - * that's where we'll start rolling random numbers. The value 50 is - * used to ignore levels with only a few entries, that is, levels which - * are potentially badly skewed. - */ - F_SET(cbt, WT_CBT_SEARCH_SMALLEST); - if ((ins_head = WT_ROW_INSERT_SMALLEST(page)) == NULL) - return (WT_NOTFOUND); - for (level = WT_SKIP_MAXDEPTH - 1; level >= 0; --level) { - start = &ins_head->head[level]; - for (entries = 0, stop = start; - *stop != NULL; stop = &(*stop)->next[level]) - ++entries; - - if (entries > 50) - break; - } - - /* - * If it's a tiny list and we went all the way to level 0, correct the - * level; entries is correctly set. - */ - if (level < 0) - level = 0; - - /* - * Step down the skip list levels, selecting a random chunk of the name - * space at each level. - */ - for (samples = entries; level > 0; samples += entries) { - /* - * There are (entries) or (entries + 1) chunks of the name space - * considered at each level. They are: between start and the 1st - * element, between the 1st and 2nd elements, and so on to the - * last chunk which is the name space after the stop element on - * the current level. This last chunk of name space may or may - * not be there: as we descend the levels of the skip list, this - * chunk may appear, depending if the next level down has - * entries logically after the stop point in the current level. - * We can't ignore those entries: because of the algorithm used - * to determine the depth of a skiplist, there may be a large - * number of entries "revealed" by descending a level. - * - * If the next level down has more items after the current stop - * point, there are (entries + 1) chunks to consider, else there - * are (entries) chunks. - */ - if (*(stop - 1) == NULL) - choice = __wt_random(&session->rnd) % entries; - else - choice = __wt_random(&session->rnd) % (entries + 1); - - if (choice == entries) { - /* - * We selected the name space after the stop element on - * this level. Set the start point to the current stop - * point, descend a level and move the stop element to - * the end of the list, that is, the end of the newly - * discovered name space, counting entries as we go. - */ - start = stop; - --start; - --level; - for (entries = 0, stop = start; - *stop != NULL; stop = &(*stop)->next[level]) - ++entries; - } else { - /* - * We selected another name space on the level. Move the - * start pointer the selected number of entries forward - * to the start of the selected chunk (if the selected - * number is 0, start won't move). Set the stop pointer - * to the next element in the list and drop both start - * and stop down a level. - */ - for (i = 0; i < choice; ++i) - start = &(*start)->next[level]; - stop = &(*start)->next[level]; - - --start; - --stop; - --level; - - /* Count the entries in the selected name space. */ - for (entries = 0, - ins = *start; ins != *stop; ins = ins->next[level]) - ++entries; - } - } - - /* - * When we reach the bottom level, entries will already be set. Select - * a random entry from the name space and return it. - * - * It should be impossible for the entries count to be 0 at this point, - * but check for it out of paranoia and to quiet static testing tools. - */ - if (entries > 0) - entries = __wt_random(&session->rnd) % entries; - for (ins = *start; entries > 0; --entries) - ins = ins->next[0]; - - cbt->ins = ins; - cbt->ins_head = ins_head; - cbt->compare = 0; - - /* - * Random lookups in newly created collections can be slow if a page - * consists of a large skiplist. Schedule the page for eviction if we - * encounter a large skiplist. This worthwhile because applications - * that take a sample often take many samples, so the overhead of - * traversing the skip list each time accumulates to real time. - */ - if (samples > 5000) - __wt_page_evict_soon(session, cbt->ref); - - return (0); -} - -/* - * __wt_row_random_descent -- - * Find a random leaf page in a row-store tree. - */ -int -__wt_row_random_descent(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) -{ - WT_BTREE *btree; - WT_DECL_RET; - WT_PAGE *page; - WT_PAGE_INDEX *pindex; - WT_REF *current, *descent; - - btree = S2BT(session); - current = NULL; - - if (0) { -restart: /* - * Discard the currently held page and restart the search from - * the root. - */ - WT_RET(__wt_page_release(session, current, 0)); - } - - /* Search the internal pages of the tree. */ - current = &btree->root; - for (;;) { - page = current->page; - if (page->type != WT_PAGE_ROW_INT) - break; - - WT_INTL_INDEX_GET(session, page, pindex); - descent = pindex->index[ - __wt_random(&session->rnd) % pindex->entries]; - - /* - * Swap the current page for the child page. If the page splits - * while we're retrieving it, restart the search at the root. - * - * On other error, simply return, the swap call ensures we're - * holding nothing on failure. - */ - if ((ret = __wt_page_swap( - session, current, descent, WT_READ_RESTART_OK)) == 0) { - current = descent; - continue; - } - if (ret == WT_RESTART) - goto restart; - return (ret); - } - - cbt->ref = current; - return (0); -} diff --git a/src/third_party/wiredtiger/src/checksum/power8/crc32_wrapper.c b/src/third_party/wiredtiger/src/checksum/power8/crc32_wrapper.c index ddfa2bdaeb8..a9be9ced1c6 100644 --- a/src/third_party/wiredtiger/src/checksum/power8/crc32_wrapper.c +++ b/src/third_party/wiredtiger/src/checksum/power8/crc32_wrapper.c @@ -1,4 +1,6 @@ #if defined(__powerpc64__) +#include "wt_internal.h" + #define CRC_TABLE #include "crc32_constants.h" @@ -68,8 +70,6 @@ out: } #endif -#include "wt_internal.h" - /* * __wt_checksum_hw -- * WiredTiger: return a checksum for a chunk of memory. diff --git a/src/third_party/wiredtiger/src/checksum/zseries/crc32-s390x.c b/src/third_party/wiredtiger/src/checksum/zseries/crc32-s390x.c index f77d6768d42..28b46594220 100644 --- a/src/third_party/wiredtiger/src/checksum/zseries/crc32-s390x.c +++ b/src/third_party/wiredtiger/src/checksum/zseries/crc32-s390x.c @@ -6,8 +6,20 @@ * Author(s): Hendrik Brueckner * */ +#include "wt_internal.h" + #include #include + +#if defined(HAVE_CRC32_HARDWARE) + +#include + +/* RHEL 7 has kernel support, but does not define this constant in the lib c headers. */ +#ifndef HWCAP_S390_VX +#define HWCAP_S390_VX 2048 +#endif + #include "crc32-s390x.h" #include "slicing-consts.h" @@ -69,8 +81,6 @@ unsigned int __wt_crc32c_le(unsigned int crc, const unsigned char *buf, size_t l /* Main CRC-32 functions */ DEFINE_CRC32_VX(__wt_crc32c_le_vx, __wt_crc32c_le_vgfm_16, __wt_crc32c_le) -#include "wt_internal.h" - /* * __wt_checksum_hw -- * WiredTiger: return a checksum for a chunk of memory. @@ -81,6 +91,8 @@ __wt_checksum_hw(const void *chunk, size_t len) return (~__wt_crc32c_le_vx(0xffffffff, chunk, len)); } +#endif + /* * __wt_checksum_init -- * WiredTiger: detect CRC hardware and set the checksum function. @@ -89,8 +101,14 @@ void __wt_checksum_init(void) { #if defined(HAVE_CRC32_HARDWARE) - __wt_process.checksum = __wt_checksum_hw; -#else + unsigned long caps = getauxval(AT_HWCAP); + + if (caps & HWCAP_S390_VX) + __wt_process.checksum = __wt_checksum_hw; + else + __wt_process.checksum = __wt_checksum_sw; + +#else /* !HAVE_CRC32_HARDWARE */ __wt_process.checksum = __wt_checksum_sw; #endif } diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index 6a93c1d05e2..b11a8d63fdb 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -147,12 +147,12 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { confchk_WT_CONNECTION_reconfigure_statistics_log_subconfigs, 5 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," - "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," - "\"read\",\"rebalance\",\"reconcile\",\"recovery\"," - "\"recovery_progress\",\"salvage\",\"shared_cache\",\"split\"," - "\"temporary\",\"thread_group\",\"transaction\",\"verify\"," - "\"version\",\"write\"]", + "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\"," + "\"handleops\",\"log\",\"lsm\",\"lsm_manager\",\"metadata\"," + "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," + "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"thread_group\",\"transaction\"," + "\"verify\",\"version\",\"write\"]", NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } }; @@ -750,12 +750,12 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { { "use_environment_priv", "boolean", NULL, NULL, NULL, 0 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," - "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," - "\"read\",\"rebalance\",\"reconcile\",\"recovery\"," - "\"recovery_progress\",\"salvage\",\"shared_cache\",\"split\"," - "\"temporary\",\"thread_group\",\"transaction\",\"verify\"," - "\"version\",\"write\"]", + "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\"," + "\"handleops\",\"log\",\"lsm\",\"lsm_manager\",\"metadata\"," + "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," + "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"thread_group\",\"transaction\"," + "\"verify\",\"version\",\"write\"]", NULL, 0 }, { "write_through", "list", NULL, "choices=[\"data\",\"log\"]", @@ -837,12 +837,12 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { { "use_environment_priv", "boolean", NULL, NULL, NULL, 0 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," - "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," - "\"read\",\"rebalance\",\"reconcile\",\"recovery\"," - "\"recovery_progress\",\"salvage\",\"shared_cache\",\"split\"," - "\"temporary\",\"thread_group\",\"transaction\",\"verify\"," - "\"version\",\"write\"]", + "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\"," + "\"handleops\",\"log\",\"lsm\",\"lsm_manager\",\"metadata\"," + "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," + "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"thread_group\",\"transaction\"," + "\"verify\",\"version\",\"write\"]", NULL, 0 }, { "version", "string", NULL, NULL, NULL, 0 }, { "write_through", "list", @@ -919,12 +919,12 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { confchk_wiredtiger_open_transaction_sync_subconfigs, 2 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," - "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," - "\"read\",\"rebalance\",\"reconcile\",\"recovery\"," - "\"recovery_progress\",\"salvage\",\"shared_cache\",\"split\"," - "\"temporary\",\"thread_group\",\"transaction\",\"verify\"," - "\"version\",\"write\"]", + "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\"," + "\"handleops\",\"log\",\"lsm\",\"lsm_manager\",\"metadata\"," + "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," + "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"thread_group\",\"transaction\"," + "\"verify\",\"version\",\"write\"]", NULL, 0 }, { "version", "string", NULL, NULL, NULL, 0 }, { "write_through", "list", @@ -1001,12 +1001,12 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { confchk_wiredtiger_open_transaction_sync_subconfigs, 2 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," - "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," - "\"read\",\"rebalance\",\"reconcile\",\"recovery\"," - "\"recovery_progress\",\"salvage\",\"shared_cache\",\"split\"," - "\"temporary\",\"thread_group\",\"transaction\",\"verify\"," - "\"version\",\"write\"]", + "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\"," + "\"handleops\",\"log\",\"lsm\",\"lsm_manager\",\"metadata\"," + "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," + "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"thread_group\",\"transaction\"," + "\"verify\",\"version\",\"write\"]", NULL, 0 }, { "write_through", "list", NULL, "choices=[\"data\",\"log\"]", diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index f691a76b1f2..124250a7a7d 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -1798,6 +1798,7 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) { "checkpoint", WT_VERB_CHECKPOINT }, { "compact", WT_VERB_COMPACT }, { "evict", WT_VERB_EVICT }, + { "evict_stuck", WT_VERB_EVICT_STUCK }, { "evictserver", WT_VERB_EVICTSERVER }, { "fileops", WT_VERB_FILEOPS }, { "handleops", WT_VERB_HANDLEOPS }, @@ -1987,6 +1988,16 @@ __conn_set_file_system( CONNECTION_API_CALL(conn, session, set_file_system, config, cfg); WT_UNUSED(cfg); + /* + * You can only configure a file system once, and attempting to do it + * again probably means the extension argument didn't have early-load + * set and we've already configured the default file system. + */ + if (conn->file_system != NULL) + WT_ERR_MSG(session, EPERM, + "filesystem already configured; custom filesystems should " + "enable \"early_load\" configuration"); + conn->file_system = file_system; err: API_END_RET(session, ret); diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c index 2b0e5081f04..28dd06332e0 100644 --- a/src/third_party/wiredtiger/src/conn/conn_cache.c +++ b/src/third_party/wiredtiger/src/conn/conn_cache.c @@ -187,8 +187,8 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_RET_MSG(session, EINVAL, "eviction target must be lower than the eviction trigger"); - WT_RET(__wt_cond_auto_alloc(session, "cache eviction server", - false, 10000, WT_MILLION, &cache->evict_cond)); + WT_RET(__wt_cond_auto_alloc(session, + "cache eviction server", 10000, WT_MILLION, &cache->evict_cond)); WT_RET(__wt_spin_init(session, &cache->evict_pass_lock, "evict pass")); WT_RET(__wt_spin_init(session, &cache->evict_queue_lock, "cache eviction queue")); @@ -312,7 +312,7 @@ __wt_cache_destroy(WT_SESSION_IMPL *session) cache->bytes_dirty_intl + cache->bytes_dirty_leaf, cache->pages_dirty_intl + cache->pages_dirty_leaf); - WT_TRET(__wt_cond_auto_destroy(session, &cache->evict_cond)); + WT_TRET(__wt_cond_destroy(session, &cache->evict_cond)); __wt_spin_destroy(session, &cache->evict_pass_lock); __wt_spin_destroy(session, &cache->evict_queue_lock); __wt_spin_destroy(session, &cache->evict_walk_lock); diff --git a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c index 79c2fc23da5..49b766f4602 100644 --- a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c +++ b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c @@ -32,7 +32,7 @@ */ #define WT_CACHE_POOL_APP_EVICT_MULTIPLIER 3 #define WT_CACHE_POOL_APP_WAIT_MULTIPLIER 6 -#define WT_CACHE_POOL_READ_MULTIPLIER 1 +#define WT_CACHE_POOL_READ_MULTIPLIER 1 static void __cache_pool_adjust( WT_SESSION_IMPL *, uint64_t, uint64_t, bool, bool *); @@ -104,8 +104,8 @@ __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg) TAILQ_INIT(&cp->cache_pool_qh); WT_ERR(__wt_spin_init( session, &cp->cache_pool_lock, "cache shared pool")); - WT_ERR(__wt_cond_alloc(session, - "cache pool server", false, &cp->cache_pool_cond)); + WT_ERR(__wt_cond_alloc( + session, "cache pool server", &cp->cache_pool_cond)); __wt_process.cache_pool = cp; __wt_verbose(session, @@ -733,7 +733,7 @@ __wt_cache_pool_server(void *arg) F_ISSET(cache, WT_CACHE_POOL_RUN)) { if (cp->currently_used <= cp->size) __wt_cond_wait( - session, cp->cache_pool_cond, WT_MILLION); + session, cp->cache_pool_cond, WT_MILLION, NULL); /* * Re-check pool run flag - since we want to avoid getting the diff --git a/src/third_party/wiredtiger/src/conn/conn_ckpt.c b/src/third_party/wiredtiger/src/conn/conn_ckpt.c index faeef4e71a2..7797ed4421c 100644 --- a/src/third_party/wiredtiger/src/conn/conn_ckpt.c +++ b/src/third_party/wiredtiger/src/conn/conn_ckpt.c @@ -62,6 +62,16 @@ __ckpt_server_config(WT_SESSION_IMPL *session, const char **cfg, bool *startp) return (0); } +/* + * __ckpt_server_run_chk -- + * Check to decide if the checkpoint server should continue running. + */ +static bool +__ckpt_server_run_chk(WT_SESSION_IMPL *session) +{ + return (F_ISSET(S2C(session), WT_CONN_SERVER_CHECKPOINT)); +} + /* * __ckpt_server -- * The checkpoint server thread. @@ -78,14 +88,18 @@ __ckpt_server(void *arg) conn = S2C(session); wt_session = (WT_SESSION *)session; - while (F_ISSET(conn, WT_CONN_SERVER_RUN) && - F_ISSET(conn, WT_CONN_SERVER_CHECKPOINT)) { + for (;;) { /* * Wait... * NOTE: If the user only configured logsize, then usecs * will be 0 and this wait won't return until signalled. */ - __wt_cond_wait(session, conn->ckpt_cond, conn->ckpt_usecs); + __wt_cond_wait(session, + conn->ckpt_cond, conn->ckpt_usecs, __ckpt_server_run_chk); + + /* Check if we're quitting or being reconfigured. */ + if (!__ckpt_server_run_chk(session)) + break; /* * Checkpoint the database if the connection is marked dirty. @@ -113,7 +127,8 @@ __ckpt_server(void *arg) * it so we don't do another checkpoint * immediately. */ - __wt_cond_wait(session, conn->ckpt_cond, 1); + __wt_cond_wait( + session, conn->ckpt_cond, 1, NULL); } } else WT_STAT_CONN_INCR(session, txn_checkpoint_skipped); @@ -152,8 +167,7 @@ __ckpt_server_start(WT_CONNECTION_IMPL *conn) "checkpoint-server", true, session_flags, &conn->ckpt_session)); session = conn->ckpt_session; - WT_RET(__wt_cond_alloc( - session, "checkpoint server", false, &conn->ckpt_cond)); + WT_RET(__wt_cond_alloc(session, "checkpoint server", &conn->ckpt_cond)); /* * Start the thread. diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c index b2f4bb04ce4..866b8633f71 100644 --- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c +++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c @@ -25,21 +25,19 @@ __conn_dhandle_destroy(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle) } /* - * __conn_dhandle_alloc -- + * __wt_conn_dhandle_alloc -- * Allocate a new data handle and return it linked into the connection's * list. */ -static int -__conn_dhandle_alloc(WT_SESSION_IMPL *session, - const char *uri, const char *checkpoint, WT_DATA_HANDLE **dhandlep) +int +__wt_conn_dhandle_alloc( + WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) { WT_BTREE *btree; WT_DATA_HANDLE *dhandle; WT_DECL_RET; uint64_t bucket; - *dhandlep = NULL; - WT_RET(__wt_calloc_one(session, &dhandle)); __wt_rwlock_init(session, &dhandle->rwlock); @@ -75,7 +73,7 @@ __conn_dhandle_alloc(WT_SESSION_IMPL *session, bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE; WT_CONN_DHANDLE_INSERT(S2C(session), dhandle, bucket); - *dhandlep = dhandle; + session->dhandle = dhandle; return (0); err: __conn_dhandle_destroy(session, dhandle); @@ -122,10 +120,7 @@ __wt_conn_dhandle_find( } } - WT_RET(__conn_dhandle_alloc(session, uri, checkpoint, &dhandle)); - - session->dhandle = dhandle; - return (0); + return (WT_NOTFOUND); } /* @@ -419,12 +414,11 @@ __wt_conn_btree_apply(WT_SESSION_IMPL *session, const char *uri, { WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle; + WT_DECL_RET; uint64_t bucket; conn = S2C(session); - WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); - /* * If we're given a URI, then we walk only the hash list for that * name. If we don't have a URI we walk the entire dhandle list. @@ -432,29 +426,42 @@ __wt_conn_btree_apply(WT_SESSION_IMPL *session, const char *uri, if (uri != NULL) { bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE; - TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq) { + + for (dhandle = NULL;;) { + WT_WITH_HANDLE_LIST_READ_LOCK(session, + WT_DHANDLE_NEXT(session, dhandle, + &conn->dhhash[bucket], hashq)); + if (dhandle == NULL) + return (0); + if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) || F_ISSET(dhandle, WT_DHANDLE_DEAD) || dhandle->checkpoint != NULL || strcmp(uri, dhandle->name) != 0) continue; - WT_RET(__conn_btree_apply_internal( - session, dhandle, file_func, name_func, cfg)); + WT_ERR(__conn_btree_apply_internal(session, + dhandle, file_func, name_func, cfg)); } } else { - TAILQ_FOREACH(dhandle, &conn->dhqh, q) { + for (dhandle = NULL;;) { + WT_WITH_HANDLE_LIST_READ_LOCK(session, + WT_DHANDLE_NEXT(session, dhandle, &conn->dhqh, q)); + if (dhandle == NULL) + return (0); + if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) || F_ISSET(dhandle, WT_DHANDLE_DEAD) || dhandle->checkpoint != NULL || !WT_PREFIX_MATCH(dhandle->name, "file:") || WT_IS_METADATA(dhandle)) continue; - WT_RET(__conn_btree_apply_internal( - session, dhandle, file_func, name_func, cfg)); + WT_ERR(__conn_btree_apply_internal(session, + dhandle, file_func, name_func, cfg)); } } - return (0); +err: WT_DHANDLE_RELEASE(dhandle); + return (ret); } /* @@ -473,7 +480,8 @@ __wt_conn_dhandle_close_all( conn = S2C(session); - WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); + WT_ASSERT(session, + F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); WT_ASSERT(session, session->dhandle == NULL); bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE; @@ -534,7 +542,8 @@ __conn_dhandle_remove(WT_SESSION_IMPL *session, bool final) dhandle = session->dhandle; bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE; - WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); + WT_ASSERT(session, + F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); WT_ASSERT(session, dhandle != conn->cache->evict_file_next); /* Check if the handle was reacquired by a session while we waited. */ @@ -583,7 +592,7 @@ __wt_conn_dhandle_discard_single( } /* Try to remove the handle, protected by the data handle lock. */ - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, tret = __conn_dhandle_remove(session, final)); if (set_pass_intr) (void)__wt_atomic_subv32(&S2C(session)->cache->pass_intr, 1); diff --git a/src/third_party/wiredtiger/src/conn/conn_handle.c b/src/third_party/wiredtiger/src/conn/conn_handle.c index 3f7fc9bb2a7..287e9ca7b99 100644 --- a/src/third_party/wiredtiger/src/conn/conn_handle.c +++ b/src/third_party/wiredtiger/src/conn/conn_handle.c @@ -53,18 +53,18 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) /* Spinlocks. */ WT_RET(__wt_spin_init(session, &conn->api_lock, "api")); WT_SPIN_INIT_TRACKED(session, &conn->checkpoint_lock, checkpoint); - WT_SPIN_INIT_TRACKED(session, &conn->dhandle_lock, handle_list); WT_RET(__wt_spin_init(session, &conn->encryptor_lock, "encryptor")); WT_RET(__wt_spin_init(session, &conn->fh_lock, "file list")); WT_RET(__wt_spin_init(session, &conn->las_lock, "lookaside table")); WT_SPIN_INIT_TRACKED(session, &conn->metadata_lock, metadata); WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure")); WT_SPIN_INIT_TRACKED(session, &conn->schema_lock, schema); - WT_SPIN_INIT_TRACKED(session, &conn->table_lock, table); WT_RET(__wt_spin_init(session, &conn->turtle_lock, "turtle file")); /* Read-write locks */ + __wt_rwlock_init(session, &conn->dhandle_lock); __wt_rwlock_init(session, &conn->hot_backup_lock); + __wt_rwlock_init(session, &conn->table_lock); WT_RET(__wt_calloc_def(session, WT_PAGE_LOCKS, &conn->page_lock)); for (i = 0; i < WT_PAGE_LOCKS; ++i) @@ -79,7 +79,7 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_RET(__wt_spin_init( session, &conn->lsm_manager.switch_lock, "LSM switch queue lock")); WT_RET(__wt_cond_alloc( - session, "LSM worker cond", false, &conn->lsm_manager.work_cond)); + session, "LSM worker cond", &conn->lsm_manager.work_cond)); /* * Generation numbers. @@ -109,16 +109,15 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) * __wt_connection_destroy -- * Destroy the connection's underlying WT_CONNECTION_IMPL structure. */ -int +void __wt_connection_destroy(WT_CONNECTION_IMPL *conn) { - WT_DECL_RET; WT_SESSION_IMPL *session; u_int i; /* Check there's something to destroy. */ if (conn == NULL) - return (0); + return; session = conn->default_session; @@ -135,7 +134,7 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) __wt_spin_destroy(session, &conn->api_lock); __wt_spin_destroy(session, &conn->block_lock); __wt_spin_destroy(session, &conn->checkpoint_lock); - __wt_spin_destroy(session, &conn->dhandle_lock); + __wt_rwlock_destroy(session, &conn->dhandle_lock); __wt_spin_destroy(session, &conn->encryptor_lock); __wt_spin_destroy(session, &conn->fh_lock); __wt_rwlock_destroy(session, &conn->hot_backup_lock); @@ -143,17 +142,12 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) __wt_spin_destroy(session, &conn->metadata_lock); __wt_spin_destroy(session, &conn->reconfig_lock); __wt_spin_destroy(session, &conn->schema_lock); - __wt_spin_destroy(session, &conn->table_lock); + __wt_rwlock_destroy(session, &conn->table_lock); __wt_spin_destroy(session, &conn->turtle_lock); for (i = 0; i < WT_PAGE_LOCKS; ++i) __wt_spin_destroy(session, &conn->page_lock[i]); __wt_free(session, conn->page_lock); - /* Destroy the file-system configuration. */ - if (conn->file_system != NULL && conn->file_system->terminate != NULL) - WT_TRET(conn->file_system->terminate( - conn->file_system, (WT_SESSION *)session)); - /* Free allocated memory. */ __wt_free(session, conn->cfg); __wt_free(session, conn->home); @@ -162,5 +156,4 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) __wt_stat_connection_discard(session, conn); __wt_free(NULL, conn); - return (ret); } diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c index 8f8f8614ba8..c6dd795389d 100644 --- a/src/third_party/wiredtiger/src/conn/conn_log.c +++ b/src/third_party/wiredtiger/src/conn/conn_log.c @@ -174,7 +174,7 @@ __logmgr_config( WT_RET(__logmgr_sync_cfg(session, cfg)); if (conn->log_cond != NULL) - __wt_cond_auto_signal(session, conn->log_cond); + __wt_cond_signal(session, conn->log_cond); return (0); } @@ -341,7 +341,7 @@ __wt_log_truncate_files( conn = S2C(session); if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) return (0); - if (F_ISSET(conn, WT_CONN_SERVER_RUN) && + if (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN) && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE)) WT_RET_MSG(session, EINVAL, "Attempt to archive manually while a server is running"); @@ -505,8 +505,7 @@ __log_file_server(void *arg) locked = false; __wt_spin_unlock(session, &log->log_sync_lock); } else { - __wt_cond_auto_signal( - session, conn->log_wrlsn_cond); + __wt_cond_signal(session, conn->log_wrlsn_cond); /* * We do not want to wait potentially a second * to process this. Yield to give the wrlsn @@ -517,8 +516,9 @@ __log_file_server(void *arg) continue; } } + /* Wait until the next event. */ - __wt_cond_wait(session, conn->log_file_cond, WT_MILLION / 10); + __wt_cond_wait(session, conn->log_file_cond, 100000, NULL); } if (0) { @@ -730,12 +730,8 @@ __log_wrlsn_server(void *arg) if (yield++ < WT_THOUSAND) __wt_yield(); else - /* - * Send in false because if we did any work we would - * not be on this path. - */ __wt_cond_auto_wait( - session, conn->log_wrlsn_cond, did_work); + session, conn->log_wrlsn_cond, did_work, NULL); } /* * On close we need to do this one more time because there could @@ -840,10 +836,9 @@ __log_server(void *arg) } /* Wait until the next event. */ - __wt_epoch(session, &start); - __wt_cond_auto_wait_signal(session, - conn->log_cond, did_work, &signalled); + __wt_cond_auto_wait_signal( + session, conn->log_cond, did_work, NULL, &signalled); __wt_epoch(session, &now); timediff = WT_TIMEDIFF_MS(now, start); } @@ -904,10 +899,8 @@ __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_INIT_LSN(&log->write_lsn); WT_INIT_LSN(&log->write_start_lsn); log->fileid = 0; - WT_RET(__wt_cond_alloc( - session, "log sync", false, &log->log_sync_cond)); - WT_RET(__wt_cond_alloc( - session, "log write", false, &log->log_write_cond)); + WT_RET(__wt_cond_alloc(session, "log sync", &log->log_sync_cond)); + WT_RET(__wt_cond_alloc(session, "log write", &log->log_write_cond)); WT_RET(__wt_log_open(session)); WT_RET(__wt_log_slot_init(session)); @@ -930,6 +923,8 @@ __wt_logmgr_open(WT_SESSION_IMPL *session) if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) return (0); + F_SET(conn, WT_CONN_LOG_SERVER_RUN); + /* * Start the log close thread. It is not configurable. * If logging is enabled, this thread runs. @@ -937,8 +932,8 @@ __wt_logmgr_open(WT_SESSION_IMPL *session) session_flags = WT_SESSION_NO_DATA_HANDLES; WT_RET(__wt_open_internal_session(conn, "log-close-server", false, session_flags, &conn->log_file_session)); - WT_RET(__wt_cond_alloc(conn->log_file_session, - "log close server", false, &conn->log_file_cond)); + WT_RET(__wt_cond_alloc( + conn->log_file_session, "log close server", &conn->log_file_cond)); /* * Start the log file close thread. @@ -954,8 +949,7 @@ __wt_logmgr_open(WT_SESSION_IMPL *session) WT_RET(__wt_open_internal_session(conn, "log-wrlsn-server", false, session_flags, &conn->log_wrlsn_session)); WT_RET(__wt_cond_auto_alloc(conn->log_wrlsn_session, - "log write lsn server", false, 10000, WT_MILLION, - &conn->log_wrlsn_cond)); + "log write lsn server", 10000, WT_MILLION, &conn->log_wrlsn_cond)); WT_RET(__wt_thread_create(conn->log_wrlsn_session, &conn->log_wrlsn_tid, __log_wrlsn_server, conn->log_wrlsn_session)); conn->log_wrlsn_tid_set = true; @@ -969,13 +963,13 @@ __wt_logmgr_open(WT_SESSION_IMPL *session) if (conn->log_session != NULL) { WT_ASSERT(session, conn->log_cond != NULL); WT_ASSERT(session, conn->log_tid_set == true); - __wt_cond_auto_signal(session, conn->log_cond); + __wt_cond_signal(session, conn->log_cond); } else { /* The log server gets its own session. */ WT_RET(__wt_open_internal_session(conn, "log-server", false, session_flags, &conn->log_session)); WT_RET(__wt_cond_auto_alloc(conn->log_session, - "log server", false, 50000, WT_MILLION, &conn->log_cond)); + "log server", 50000, WT_MILLION, &conn->log_cond)); /* * Start the thread. @@ -1001,6 +995,8 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session) conn = S2C(session); + F_CLR(conn, WT_CONN_LOG_SERVER_RUN); + if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) { /* * We always set up the log_path so printlog can work without @@ -1011,7 +1007,7 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session) return (0); } if (conn->log_tid_set) { - __wt_cond_auto_signal(session, conn->log_cond); + __wt_cond_signal(session, conn->log_cond); WT_TRET(__wt_thread_join(session, conn->log_tid)); conn->log_tid_set = false; } @@ -1026,7 +1022,7 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session) conn->log_file_session = NULL; } if (conn->log_wrlsn_tid_set) { - __wt_cond_auto_signal(session, conn->log_wrlsn_cond); + __wt_cond_signal(session, conn->log_wrlsn_cond); WT_TRET(__wt_thread_join(session, conn->log_wrlsn_tid)); conn->log_wrlsn_tid_set = false; } @@ -1047,9 +1043,9 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session) } /* Destroy the condition variables now that all threads are stopped */ - WT_TRET(__wt_cond_auto_destroy(session, &conn->log_cond)); + WT_TRET(__wt_cond_destroy(session, &conn->log_cond)); WT_TRET(__wt_cond_destroy(session, &conn->log_file_cond)); - WT_TRET(__wt_cond_auto_destroy(session, &conn->log_wrlsn_cond)); + WT_TRET(__wt_cond_destroy(session, &conn->log_wrlsn_cond)); WT_TRET(__wt_cond_destroy(session, &conn->log->log_sync_cond)); WT_TRET(__wt_cond_destroy(session, &conn->log->log_write_cond)); diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c index d4ace127bb2..5b20377d437 100644 --- a/src/third_party/wiredtiger/src/conn/conn_open.c +++ b/src/third_party/wiredtiger/src/conn/conn_open.c @@ -25,7 +25,7 @@ __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]) * Tell internal server threads to run: this must be set before opening * any sessions. */ - F_SET(conn, WT_CONN_SERVER_RUN | WT_CONN_LOG_SERVER_RUN); + F_SET(conn, WT_CONN_SERVER_RUN); /* WT_SESSION_IMPL array. */ WT_RET(__wt_calloc(session, @@ -100,8 +100,12 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) __wt_yield(); } - /* Clear any pending async ops. */ + /* + * Clear any pending async operations and shut down the async worker + * threads and system before closing LSM. + */ WT_TRET(__wt_async_flush(session)); + WT_TRET(__wt_async_destroy(session)); /* * Shut down server threads other than the eviction server, which is @@ -110,14 +114,14 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) * exit before files are closed. */ F_CLR(conn, WT_CONN_SERVER_RUN); - WT_TRET(__wt_async_destroy(session)); WT_TRET(__wt_lsm_manager_destroy(session)); - WT_TRET(__wt_sweep_destroy(session)); F_SET(conn, WT_CONN_CLOSING); - WT_TRET(__wt_checkpoint_server_destroy(session)); WT_TRET(__wt_statlog_destroy(session, true)); + WT_TRET(__wt_sweep_destroy(session)); + + /* The eviction server is shut down last. */ WT_TRET(__wt_evict_destroy(session)); /* Shut down the lookaside table, after all eviction is complete. */ @@ -126,7 +130,7 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) /* Close open data handles. */ WT_TRET(__wt_conn_dhandle_discard(session)); - /* Shut down metadata tracking, required before creating tables. */ + /* Shut down metadata tracking. */ WT_TRET(__wt_meta_track_destroy(session)); /* @@ -140,7 +144,6 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE)) WT_TRET(__wt_txn_checkpoint_log( session, true, WT_TXN_LOG_CKPT_STOP, NULL)); - F_CLR(conn, WT_CONN_LOG_SERVER_RUN); WT_TRET(__wt_logmgr_destroy(session)); /* Free memory for collators, compressors, data sources. */ @@ -159,15 +162,6 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) /* Discard transaction state. */ __wt_txn_global_destroy(session); - /* Close extensions, first calling any unload entry point. */ - while ((dlh = TAILQ_FIRST(&conn->dlhqh)) != NULL) { - TAILQ_REMOVE(&conn->dlhqh, dlh, q); - - if (dlh->terminate != NULL) - WT_TRET(dlh->terminate(wt_conn)); - WT_TRET(__wt_dlclose(session, dlh)); - } - /* Close the lock file, opening up the database to other connections. */ if (conn->lock_fh != NULL) WT_TRET(__wt_close(session, &conn->lock_fh)); @@ -199,8 +193,22 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) __wt_free(session, s->hazard); } + /* Destroy the file-system configuration. */ + if (conn->file_system != NULL && conn->file_system->terminate != NULL) + WT_TRET(conn->file_system->terminate( + conn->file_system, (WT_SESSION *)session)); + + /* Close extensions, first calling any unload entry point. */ + while ((dlh = TAILQ_FIRST(&conn->dlhqh)) != NULL) { + TAILQ_REMOVE(&conn->dlhqh, dlh, q); + + if (dlh->terminate != NULL) + WT_TRET(dlh->terminate(wt_conn)); + WT_TRET(__wt_dlclose(session, dlh)); + } + /* Destroy the handle. */ - WT_TRET(__wt_connection_destroy(conn)); + __wt_connection_destroy(conn); return (ret); } diff --git a/src/third_party/wiredtiger/src/conn/conn_stat.c b/src/third_party/wiredtiger/src/conn/conn_stat.c index 3bcdfd7ecb1..d89392b66c6 100644 --- a/src/third_party/wiredtiger/src/conn/conn_stat.c +++ b/src/third_party/wiredtiger/src/conn/conn_stat.c @@ -409,7 +409,6 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp) struct timespec ts; struct tm *tm, _tm; WT_CONNECTION_IMPL *conn; - WT_DECL_RET; WT_FSTREAM *log_stream; conn = S2C(session); @@ -446,12 +445,9 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp) * Lock the schema and walk the list of open handles, dumping * any that match the list of object sources. */ - if (conn->stat_sources != NULL) { - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_conn_btree_apply( + if (conn->stat_sources != NULL) + WT_RET(__wt_conn_btree_apply( session, NULL, __statlog_apply, NULL, NULL)); - WT_RET(ret); - } /* * Walk the list of open LSM trees, dumping any that match the @@ -485,8 +481,7 @@ __statlog_on_close(WT_SESSION_IMPL *session) if (!FLD_ISSET(conn->stat_flags, WT_STAT_ON_CLOSE)) return (0); - if (F_ISSET(conn, WT_CONN_SERVER_RUN) && - F_ISSET(conn, WT_CONN_SERVER_STATISTICS)) + if (F_ISSET(conn, WT_CONN_SERVER_STATISTICS)) WT_RET_MSG(session, EINVAL, "Attempt to log statistics while a server is running"); @@ -497,6 +492,16 @@ err: __wt_scr_free(session, &tmp); return (ret); } +/* + * __statlog_server_run_chk -- + * Check to decide if the statistics log server should continue running. + */ +static bool +__statlog_server_run_chk(WT_SESSION_IMPL *session) +{ + return (F_ISSET(S2C(session), WT_CONN_SERVER_STATISTICS)); +} + /* * __statlog_server -- * The statistics server thread. @@ -525,10 +530,14 @@ __statlog_server(void *arg) WT_ERR(__wt_buf_init(session, &path, strlen(conn->stat_path) + 128)); WT_ERR(__wt_buf_init(session, &tmp, strlen(conn->stat_path) + 128)); - while (F_ISSET(conn, WT_CONN_SERVER_RUN) && - F_ISSET(conn, WT_CONN_SERVER_STATISTICS)) { + for (;;) { /* Wait until the next event. */ - __wt_cond_wait(session, conn->stat_cond, conn->stat_usecs); + __wt_cond_wait(session, conn->stat_cond, + conn->stat_usecs, __statlog_server_run_chk); + + /* Check if we're quitting or being reconfigured. */ + if (!__statlog_server_run_chk(session)) + break; if (WT_STAT_ENABLED(session)) WT_ERR(__statlog_log_one(session, &path, &tmp)); @@ -563,7 +572,7 @@ __statlog_start(WT_CONNECTION_IMPL *conn) session = conn->stat_session; WT_RET(__wt_cond_alloc( - session, "statistics log server", false, &conn->stat_cond)); + session, "statistics log server", &conn->stat_cond)); /* * Start the thread. diff --git a/src/third_party/wiredtiger/src/conn/conn_sweep.c b/src/third_party/wiredtiger/src/conn/conn_sweep.c index 7d5cb7d7c72..8c186c63939 100644 --- a/src/third_party/wiredtiger/src/conn/conn_sweep.c +++ b/src/third_party/wiredtiger/src/conn/conn_sweep.c @@ -233,7 +233,7 @@ __sweep_remove_handles(WT_SESSION_IMPL *session) if (!WT_DHANDLE_CAN_DISCARD(dhandle)) continue; - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, ret = __sweep_remove_one(session, dhandle)); if (ret == 0) WT_STAT_CONN_INCR(session, dh_sweep_remove); @@ -245,6 +245,16 @@ __sweep_remove_handles(WT_SESSION_IMPL *session) return (ret == EBUSY ? 0 : ret); } +/* + * __sweep_server_run_chk -- + * Check to decide if the checkpoint server should continue running. + */ +static bool +__sweep_server_run_chk(WT_SESSION_IMPL *session) +{ + return (F_ISSET(S2C(session), WT_CONN_SERVER_SWEEP)); +} + /* * __sweep_server -- * The handle sweep server thread. @@ -266,11 +276,15 @@ __sweep_server(void *arg) /* * Sweep for dead and excess handles. */ - while (F_ISSET(conn, WT_CONN_SERVER_RUN) && - F_ISSET(conn, WT_CONN_SERVER_SWEEP)) { + for (;;) { /* Wait until the next event. */ - __wt_cond_wait(session, - conn->sweep_cond, conn->sweep_interval * WT_MILLION); + __wt_cond_wait(session, conn->sweep_cond, + conn->sweep_interval * WT_MILLION, __sweep_server_run_chk); + + /* Check if we're quitting or being reconfigured. */ + if (!__sweep_server_run_chk(session)) + break; + __wt_seconds(session, &now); WT_STAT_CONN_INCR(session, dh_sweeps); @@ -390,7 +404,7 @@ __wt_sweep_create(WT_SESSION_IMPL *session) session = conn->sweep_session; WT_RET(__wt_cond_alloc( - session, "handle sweep server", false, &conn->sweep_cond)); + session, "handle sweep server", &conn->sweep_cond)); WT_RET(__wt_thread_create( session, &conn->sweep_tid, __sweep_server, session)); diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c index 08b15e6ca5e..61ced8d11e7 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_backup.c +++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c @@ -346,13 +346,9 @@ __backup_stop(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb) static int __backup_all(WT_SESSION_IMPL *session) { - WT_DECL_RET; - /* Build a list of the file objects that need to be copied. */ - WT_WITH_HANDLE_LIST_LOCK(session, ret = - __wt_meta_apply_all(session, NULL, __backup_list_uri_append, NULL)); - - return (ret); + return (__wt_meta_apply_all( + session, NULL, __backup_list_uri_append, NULL)); } /* diff --git a/src/third_party/wiredtiger/src/cursor/cur_index.c b/src/third_party/wiredtiger/src/cursor/cur_index.c index 4786b0524bc..6fc01c0421f 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_index.c +++ b/src/third_party/wiredtiger/src/cursor/cur_index.c @@ -240,7 +240,17 @@ __curindex_search(WT_CURSOR *cursor) found_key = child->key; if (found_key.size < cursor->key.size) WT_ERR(WT_NOTFOUND); - found_key.size = cursor->key.size; + + /* + * Custom collators expect to see complete keys, pass an item containing + * all the visible fields so it unpacks correctly. + */ + if (cindex->index->collator != NULL && + !F_ISSET(cursor, WT_CURSTD_RAW_SEARCH)) + WT_ERR(__wt_struct_repack(session, child->key_format, + cindex->iface.key_format, &child->key, &found_key)); + else + found_key.size = cursor->key.size; WT_ERR(__wt_compare( session, cindex->index->collator, &cursor->key, &found_key, &cmp)); @@ -307,8 +317,18 @@ __curindex_search_near(WT_CURSOR *cursor, int *exact) * so we flip the sign of the result to match what callers expect. */ found_key = child->key; - if (found_key.size > cursor->key.size) - found_key.size = cursor->key.size; + if (found_key.size > cursor->key.size) { + /* + * Custom collators expect to see complete keys, pass an item + * containing all the visible fields so it unpacks correctly. + */ + if (cindex->index->collator != NULL) + WT_ERR(__wt_struct_repack(session, + cindex->child->key_format, cindex->iface.key_format, + &child->key, &found_key)); + else + found_key.size = cursor->key.size; + } WT_ERR(__wt_compare( session, cindex->index->collator, &cursor->key, &found_key, exact)); diff --git a/src/third_party/wiredtiger/src/cursor/cur_std.c b/src/third_party/wiredtiger/src/cursor/cur_std.c index 7ace6d49cf0..99a9e373354 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_std.c +++ b/src/third_party/wiredtiger/src/cursor/cur_std.c @@ -633,6 +633,7 @@ __wt_cursor_reconfigure(WT_CURSOR *cursor, const char *config) int __wt_cursor_dup_position(WT_CURSOR *to_dup, WT_CURSOR *cursor) { + WT_DECL_RET; WT_ITEM key; /* @@ -662,9 +663,11 @@ __wt_cursor_dup_position(WT_CURSOR *to_dup, WT_CURSOR *cursor) * cursors cannot reference application memory after cursor operations * and that requirement will save the day. */ - WT_RET(cursor->search(cursor)); + F_SET(cursor, WT_CURSTD_RAW_SEARCH); + ret = cursor->search(cursor); + F_CLR(cursor, WT_CURSTD_RAW_SEARCH); - return (0); + return (ret); } /* diff --git a/src/third_party/wiredtiger/src/cursor/cur_table.c b/src/third_party/wiredtiger/src/cursor/cur_table.c index 76f7fc5865f..7e8cd153d2d 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_table.c +++ b/src/third_party/wiredtiger/src/cursor/cur_table.c @@ -769,7 +769,7 @@ __curtable_complete(WT_SESSION_IMPL *session, WT_TABLE *table) return (0); /* If the table is incomplete, wait on the table lock and recheck. */ - WT_WITH_TABLE_LOCK(session, complete = table->cg_complete); + WT_WITH_TABLE_READ_LOCK(session, complete = table->cg_complete); if (!complete) WT_RET_MSG(session, EINVAL, "'%s' not available until all column groups are created", diff --git a/src/third_party/wiredtiger/src/docs/cursor-random.dox b/src/third_party/wiredtiger/src/docs/cursor-random.dox index a0a3212be6d..b6434e3d161 100644 --- a/src/third_party/wiredtiger/src/docs/cursor-random.dox +++ b/src/third_party/wiredtiger/src/docs/cursor-random.dox @@ -20,9 +20,4 @@ cursor configured using \c next_random_sample_size divides the object into \c next_random_sample_size pieces, and each subsequent retrieval returns a record from the next one of those pieces. -For example, setting \c next_random_sample_percent to \c 10 would cause -the cursor to sequentially return records from each tenth part of the -object. Setting \c next_random_sample_percent to \c 1000 would cause the -cursor to sequentially return records from each .1% of the object. - */ diff --git a/src/third_party/wiredtiger/src/docs/upgrading.dox b/src/third_party/wiredtiger/src/docs/upgrading.dox index 4a356f7da61..f463e6bc615 100644 --- a/src/third_party/wiredtiger/src/docs/upgrading.dox +++ b/src/third_party/wiredtiger/src/docs/upgrading.dox @@ -7,6 +7,12 @@ The WiredTiger Utility can now \c truncate an object. Removing all contents from the specified object. +
Handle list lock statistics
+
+In the 2.9.1 release we added statistics tracking handle list lock timing, we +have switched that lock from a spin lock to a read-write lock, and consequently +changed the statistics tracking lock related wait time. +
@section version_291 Upgrading to Version 2.9.1 diff --git a/src/third_party/wiredtiger/src/docs/wtperf.dox b/src/third_party/wiredtiger/src/docs/wtperf.dox index 83aadf8a776..2eac0fef3f4 100644 --- a/src/third_party/wiredtiger/src/docs/wtperf.dox +++ b/src/third_party/wiredtiger/src/docs/wtperf.dox @@ -195,14 +195,14 @@ use pareto distribution for random numbers. Zero to disable, otherwise a percen number of operations to group into each transaction in the populate phase, zero for auto-commit @par populate_threads (unsigned int, default=1) number of populate threads, 1 for bulk load +@par pre_load_data (boolean, default=false) +Scan all data prior to starting the workload phase to warm the cache @par random_range (unsigned int, default=0) if non zero choose a value from within this range as the key for insert operations @par random_value (boolean, default=false) generate random content for the value @par range_partition (boolean, default=false) partition data by range (vs hash) -@par read_range (unsigned int, default=0) -scan a range of keys after each search @par readonly (boolean, default=false) reopen the connection between populate and workload phases in readonly mode. Requires reopen_connection turned on (default). Requires that read be the only workload specified @par reopen_connection (boolean, default=true) @@ -228,7 +228,7 @@ number of tables to run operations over. Keys are divided evenly over the table @par table_count_idle (unsigned int, default=0) number of tables to create, that won't be populated. Default 0. @par threads (string, default="") -workload configuration: each 'count' entry is the total number of threads, and the 'insert', 'read' and 'update' entries are the ratios of insert, read and update operations done by each worker thread; If a throttle value is provided each thread will do a maximum of that number of operations per second; multiple workload configurations may be specified per threads configuration; for example, a more complex threads configuration might be 'threads=((count=2,reads=1)(count=8,reads=1,inserts=2,updates=1))' which would create 2 threads doing nothing but reads and 8 threads each doing 50% inserts and 25% reads and updates. Allowed configuration values are 'count', 'throttle', 'update_delta', 'reads', 'inserts', 'updates', 'truncate', 'truncate_pct' and 'truncate_count'. There are also behavior modifiers, supported modifiers are 'ops_per_txn' +workload configuration: each 'count' entry is the total number of threads, and the 'insert', 'read' and 'update' entries are the ratios of insert, read and update operations done by each worker thread; If a throttle value is provided each thread will do a maximum of that number of operations per second; multiple workload configurations may be specified per threads configuration; for example, a more complex threads configuration might be 'threads=((count=2,reads=1)(count=8,reads=1,inserts=2,updates=1))' which would create 2 threads doing nothing but reads and 8 threads each doing 50% inserts and 25% reads and updates. Allowed configuration values are 'count', 'throttle', 'update_delta', 'reads', 'read_range', 'inserts', 'updates', 'truncate', 'truncate_pct' and 'truncate_count'. There are also behavior modifiers, supported modifiers are 'ops_per_txn' @par transaction_config (string, default="") WT_SESSION.begin_transaction configuration string, applied during the populate phase when populate_ops_per_txn is nonzero @par table_name (string, default="test") diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index 9b969de9a9e..42fe4d4608e 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -24,40 +24,40 @@ static int __evict_walk_file( (S2C(s)->evict_threads.current_threads > 1) /* - * __evict_lock_dhandle -- - * Try to get the dhandle lock, with yield and sleep back off. + * __evict_lock_handle_list -- + * Try to get the handle list lock, with yield and sleep back off. * Keep timing statistics overall. */ static int -__evict_lock_dhandle(WT_SESSION_IMPL *session) +__evict_lock_handle_list(WT_SESSION_IMPL *session) { struct timespec enter, leave; WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; - WT_SPINLOCK *dh_lock; - int64_t **stats; + WT_RWLOCK *dh_lock; u_int spins; bool dh_stats; conn = S2C(session); cache = conn->cache; dh_lock = &conn->dhandle_lock; - stats = (int64_t **)conn->stats; - dh_stats = WT_STAT_ENABLED(session) && dh_lock->stat_count_off != -1; /* - * Maintain lock acquisition timing statistics as if this were a - * regular lock acquisition. + * Setup tracking of handle lock acquisition wait time if statistics + * are enabled. */ + dh_stats = WT_STAT_ENABLED(session); + if (dh_stats) __wt_epoch(session, &enter); + /* * Use a custom lock acquisition back off loop so the eviction server * notices any interrupt quickly. */ for (spins = 0; - (ret = __wt_spin_trylock_track(session, dh_lock)) == EBUSY && + (ret = __wt_try_readlock(session, dh_lock)) == EBUSY && cache->pass_intr == 0; spins++) { if (spins < WT_THOUSAND) __wt_yield(); @@ -70,8 +70,9 @@ __evict_lock_dhandle(WT_SESSION_IMPL *session) WT_RET(ret); if (dh_stats) { __wt_epoch(session, &leave); - stats[session->stat_bucket][dh_lock->stat_int_usecs_off] += - (int64_t)WT_TIMEDIFF_US(leave, enter); + WT_STAT_CONN_INCRV( + session, lock_handle_list_wait_eviction, + (int64_t)WT_TIMEDIFF_US(leave, enter)); } return (0); } @@ -197,8 +198,7 @@ __wt_evict_list_clear_page(WT_SESSION_IMPL *session, WT_REF *ref) } __wt_spin_unlock(session, &cache->evict_queues[q].evict_lock); } - WT_ASSERT(session, - !F_ISSET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU)); + WT_ASSERT(session, !F_ISSET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU)); __wt_spin_unlock(session, &cache->evict_queue_lock); } @@ -267,7 +267,7 @@ __wt_evict_server_wake(WT_SESSION_IMPL *session) } #endif - __wt_cond_auto_signal(session, cache->evict_cond); + __wt_cond_signal(session, cache->evict_cond); } /* @@ -280,12 +280,12 @@ __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread) WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; - bool did_work; + bool did_work, was_intr; conn = S2C(session); cache = conn->cache; -#ifdef HAVE_DIAGNOSTIC +#if defined(HAVE_DIAGNOSTIC) || defined(HAVE_VERBOSE) /* * Ensure the cache stuck timer is initialized when starting eviction. */ @@ -308,12 +308,28 @@ __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread) ret = __evict_server(session, &did_work); F_CLR(cache->walk_session, WT_SESSION_LOCKED_PASS); F_CLR(session, WT_SESSION_LOCKED_PASS); + was_intr = cache->pass_intr != 0; __wt_spin_unlock(session, &cache->evict_pass_lock); WT_ERR(ret); + + /* + * If the eviction server was interrupted, wait until + * requests have been processed: the system may + * otherwise be busy so don't go to sleep. + */ + if (was_intr) { + while (cache->pass_intr != 0 && + F_ISSET(conn, WT_CONN_EVICTION_RUN) && + F_ISSET(thread, WT_THREAD_RUN)) + __wt_yield(); + continue; + } + __wt_verbose(session, WT_VERB_EVICTSERVER, "sleeping"); + /* Don't rely on signals: check periodically. */ __wt_cond_auto_wait( - session, cache->evict_cond, did_work); + session, cache->evict_cond, did_work, NULL); __wt_verbose(session, WT_VERB_EVICTSERVER, "waking"); } else WT_ERR(__evict_lru_pages(session, false)); @@ -353,12 +369,12 @@ err: WT_PANIC_MSG(session, ret, "cache eviction thread error"); static int __evict_server(WT_SESSION_IMPL *session, bool *did_work) { +#if defined(HAVE_DIAGNOSTIC) || defined(HAVE_VERBOSE) + struct timespec now; +#endif WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; -#ifdef HAVE_DIAGNOSTIC - struct timespec now; -#endif uint64_t orig_pages_evicted; conn = S2C(session); @@ -370,7 +386,8 @@ __evict_server(WT_SESSION_IMPL *session, bool *did_work) /* Evict pages from the cache as needed. */ WT_RET(__evict_pass(session)); - if (!F_ISSET(conn, WT_CONN_EVICTION_RUN)) + if (!F_ISSET(conn, WT_CONN_EVICTION_RUN) || + cache->pass_intr != 0) return (0); /* @@ -378,28 +395,31 @@ __evict_server(WT_SESSION_IMPL *session, bool *did_work) * otherwise we can block applications evicting large pages. */ if (!__wt_cache_stuck(session)) { - /* - * If we gave up acquiring the lock, that indicates a - * session is waiting for us to clear walks. Do that - * as part of a normal pass (without the handle list + * Try to get the handle list lock: if we give up, that + * indicates a session is waiting for us to clear walks. Do + * that as part of a normal pass (without the handle list * lock) to avoid deadlock. */ - if ((ret = __evict_lock_dhandle(session)) == EBUSY) + if ((ret = __evict_lock_handle_list(session)) == EBUSY) return (0); WT_RET(ret); ret = __evict_clear_all_walks(session); - __wt_spin_unlock(session, &conn->dhandle_lock); + __wt_readunlock(session, &conn->dhandle_lock); WT_RET(ret); cache->pages_evicted = 0; } else if (cache->pages_evicted != cache->pages_evict) { cache->pages_evicted = cache->pages_evict; -#ifdef HAVE_DIAGNOSTIC +#if defined(HAVE_DIAGNOSTIC) || defined(HAVE_VERBOSE) __wt_epoch(session, &cache->stuck_ts); } else if (!F_ISSET(conn, WT_CONN_IN_MEMORY)) { /* - * After being stuck for 5 minutes, give up. + * If we're stuck for 5 minutes in diagnostic mode, or the + * verbose evict_stuck flag is configured, log the cache + * and transaction state. + * + * If we're stuck for 5 minutes in diagnostic mode, give up. * * We don't do this check for in-memory workloads because * application threads are not blocked by the cache being full. @@ -408,11 +428,22 @@ __evict_server(WT_SESSION_IMPL *session, bool *did_work) */ __wt_epoch(session, &now); if (WT_TIMEDIFF_SEC(now, cache->stuck_ts) > 300) { - ret = ETIMEDOUT; - __wt_err(session, ret, +#if defined(HAVE_DIAGNOSTIC) + __wt_err(session, ETIMEDOUT, "Cache stuck for too long, giving up"); - WT_TRET(__wt_dump_stuck_info(session, NULL)); + ret = ETIMEDOUT; + WT_TRET(__wt_verbose_dump_txn(session)); + WT_TRET(__wt_verbose_dump_cache(session)); return (ret); +#elif defined(HAVE_VERBOSE) + if (WT_VERBOSE_ISSET(session, WT_VERB_EVICT_STUCK)) { + WT_RET(__wt_verbose_dump_txn(session)); + WT_RET(__wt_verbose_dump_cache(session)); + + /* Reset the timer. */ + __wt_epoch(session, &cache->stuck_ts); + } +#endif } #endif } @@ -697,8 +728,8 @@ __evict_pass(WT_SESSION_IMPL *session) */ WT_STAT_CONN_INCR(session, cache_eviction_server_slept); - __wt_cond_wait( - session, cache->evict_cond, WT_THOUSAND); + __wt_cond_wait(session, + cache->evict_cond, WT_THOUSAND, NULL); continue; } @@ -725,7 +756,7 @@ __evict_pass(WT_SESSION_IMPL *session) * Clear a single walk point. */ static int -__evict_clear_walk(WT_SESSION_IMPL *session, bool count_stat) +__evict_clear_walk(WT_SESSION_IMPL *session) { WT_BTREE *btree; WT_CACHE *cache; @@ -742,14 +773,14 @@ __evict_clear_walk(WT_SESSION_IMPL *session, bool count_stat) if ((ref = btree->evict_ref) == NULL) return (0); - if (count_stat) - WT_STAT_CONN_INCR(session, cache_eviction_walks_abandoned); + WT_STAT_CONN_INCR(session, cache_eviction_walks_abandoned); /* - * Clear evict_ref first, in case releasing it forces eviction (we - * assert we never try to evict the current eviction walk point). + * Clear evict_ref before releasing it in case that forces eviction (we + * assert that we never try to evict the current eviction walk point). */ btree->evict_ref = NULL; + WT_WITH_DHANDLE(cache->walk_session, session->dhandle, (ret = __wt_page_release(cache->walk_session, ref, WT_READ_NO_EVICT))); @@ -772,7 +803,7 @@ __evict_clear_all_walks(WT_SESSION_IMPL *session) TAILQ_FOREACH(dhandle, &conn->dhqh, q) if (WT_PREFIX_MATCH(dhandle->name, "file:")) WT_WITH_DHANDLE(session, dhandle, - WT_TRET(__evict_clear_walk(session, true))); + WT_TRET(__evict_clear_walk(session))); return (ret); } @@ -817,7 +848,7 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) /* Clear any existing LRU eviction walk for the file. */ WT_WITH_PASS_LOCK(session, - ret = __evict_clear_walk(session, true)); + ret = __evict_clear_walk(session)); (void)__wt_atomic_subv32(&cache->pass_intr, 1); WT_ERR(ret); @@ -1087,7 +1118,8 @@ __evict_lru_pages(WT_SESSION_IMPL *session, bool is_server) /* If a worker thread found the queue empty, pause. */ if (ret == WT_NOTFOUND && !is_server && F_ISSET(S2C(session), WT_CONN_EVICTION_RUN)) - __wt_cond_wait(session, conn->evict_threads.wait_cond, 10000); + __wt_cond_wait( + session, conn->evict_threads.wait_cond, 10000, NULL); return (ret == WT_NOTFOUND ? 0 : ret); } @@ -1304,7 +1336,7 @@ retry: while (slot < max_entries) { * reference count to keep it alive while we sweep. */ if (!dhandle_locked) { - WT_ERR(__evict_lock_dhandle(session)); + WT_ERR(__evict_lock_handle_list(session)); dhandle_locked = true; } @@ -1383,7 +1415,7 @@ retry: while (slot < max_entries) { (void)__wt_atomic_addi32(&dhandle->session_inuse, 1); incr = true; - __wt_spin_unlock(session, &conn->dhandle_lock); + __wt_readunlock(session, &conn->dhandle_lock); dhandle_locked = false; /* @@ -1430,7 +1462,7 @@ retry: while (slot < max_entries) { } err: if (dhandle_locked) { - __wt_spin_unlock(session, &conn->dhandle_lock); + __wt_readunlock(session, &conn->dhandle_lock); dhandle_locked = false; } @@ -1526,6 +1558,19 @@ __evict_walk_file(WT_SESSION_IMPL *session, start = queue->evict_queue + *slotp; remaining_slots = max_entries - *slotp; total_slots = max_entries - queue->evict_entries; + btree_inuse = cache_inuse = 0; + target_pages_clean = target_pages_dirty = 0; + + /* + * The number of times we should fill the queue by the end of + * considering all trees. + */ +#define QUEUE_FILLS_PER_PASS 10 + + /* + * The minimum number of pages we should consider per tree. + */ +#define MIN_PAGES_PER_TREE 10 /* * The target number of pages for this tree is proportional to the @@ -1534,13 +1579,12 @@ __evict_walk_file(WT_SESSION_IMPL *session, * cache (and only have to walk it once). */ if (F_ISSET(cache, WT_CACHE_EVICT_CLEAN)) { - btree_inuse = __wt_btree_bytes_inuse(session); + btree_inuse = __wt_btree_bytes_evictable(session); cache_inuse = __wt_cache_bytes_inuse(cache); bytes_per_slot = 1 + cache_inuse / total_slots; target_pages_clean = (uint32_t)( (btree_inuse + bytes_per_slot / 2) / bytes_per_slot); - } else - target_pages_clean = 0; + } if (F_ISSET(cache, WT_CACHE_EVICT_DIRTY)) { btree_inuse = __wt_btree_dirty_leaf_inuse(session); @@ -1548,35 +1592,58 @@ __evict_walk_file(WT_SESSION_IMPL *session, bytes_per_slot = 1 + cache_inuse / total_slots; target_pages_dirty = (uint32_t)( (btree_inuse + bytes_per_slot / 2) / bytes_per_slot); - } else - target_pages_dirty = 0; + } - target_pages = WT_MAX(target_pages_clean, target_pages_dirty); + /* + * Weight the number of target pages by the number of times we want to + * fill the cache per pass through all the trees. Note that we don't + * build this into the calculation above because we don't want to favor + * small trees, so round to a whole number of slots (zero for small + * trees) before multiplying. + */ + target_pages = WT_MAX(target_pages_clean, target_pages_dirty) * + QUEUE_FILLS_PER_PASS; + /* + * Randomly walk trees with a small fraction of the cache in case there + * are so many trees that none of them use enough of the cache to be + * allocated slots. + * + * The chance of walking a tree is equal to the chance that a random + * byte in cache belongs to the tree, weighted by how many times we + * want to fill queues during a pass through all the trees in cache. + */ if (target_pages == 0) { - /* - * Randomly walk trees with a tiny fraction of the cache in - * case there are so many trees that none of them use enough of - * the cache to be allocated slots. Walk small trees 1% of the - * time. - */ - if (__wt_random(&session->rnd) > UINT32_MAX / 100) + if (F_ISSET(cache, WT_CACHE_EVICT_CLEAN)) { + btree_inuse = __wt_btree_bytes_evictable(session); + cache_inuse = __wt_cache_bytes_inuse(cache); + } else { + btree_inuse = __wt_btree_dirty_leaf_inuse(session); + cache_inuse = __wt_cache_dirty_leaf_inuse(cache); + } + if (btree_inuse == 0 || cache_inuse == 0) + return (0); + if (__wt_random64(&session->rnd) % cache_inuse > + btree_inuse * QUEUE_FILLS_PER_PASS) return (0); - target_pages = 10; } + /* + * There is some cost associated with walking a tree. If we're going + * to visit this tree, always look for a minimum number of pages. + */ + if (target_pages < MIN_PAGES_PER_TREE) + target_pages = MIN_PAGES_PER_TREE; + + /* + * If the tree is dead or we're near the end of the queue, fill the + * remaining slots. + */ if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD) || target_pages > remaining_slots) target_pages = remaining_slots; end = start + target_pages; - walk_flags = - WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT; - - /* Randomize the walk direction. */ - if (btree->evict_walk_reverse) - FLD_SET(walk_flags, WT_READ_PREV); - /* * Examine at least a reasonable number of pages before deciding * whether to give up. When we are only looking for dirty pages, @@ -1588,8 +1655,41 @@ __evict_walk_file(WT_SESSION_IMPL *session, min_pages *= 10; /* - * Get some more eviction candidate pages. - * + * Choose a random point in the tree if looking for candidates in a + * tree with no starting point set. This is mostly aimed at ensuring + * eviction fairly visits all pages in trees with a lot of in-cache + * content. + */ + if (btree->evict_ref == NULL) { + /* Ensure internal pages indexes remain valid for our walk */ + WT_WITH_PAGE_INDEX(session, ret = + __wt_random_descent(session, &btree->evict_ref, true)); + WT_RET_NOTFOUND_OK(ret); + + /* + * Reverse the direction of the walk each time we start at a + * random point so both ends of the tree are equally likely to + * be visited. + */ + btree->evict_walk_reverse = !btree->evict_walk_reverse; + } + + walk_flags = + WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT; + + if (btree->evict_walk_reverse) + FLD_SET(walk_flags, WT_READ_PREV); + + /* + * Get some more eviction candidate pages, starting at the last saved + * point. Clear the saved point immediately, we assert when discarding + * pages we're not discarding an eviction point, so this clear must be + * complete before the page is released. + */ + ref = btree->evict_ref; + btree->evict_ref = NULL; + + /* * !!! Take care terminating this loop. * * Don't make an extra call to __wt_tree_walk after we hit the end of a @@ -1602,7 +1702,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, for (evict = start, pages_queued = pages_seen = refs_walked = 0; evict < end && (ret == 0 || ret == WT_NOTFOUND); ret = __wt_tree_walk_count( - session, &btree->evict_ref, &refs_walked, walk_flags)) { + session, &ref, &refs_walked, walk_flags)) { /* * Check whether we're finding a good ratio of candidates vs * pages seen. Some workloads create "deserts" in trees where @@ -1616,7 +1716,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, if (give_up) break; - if ((ref = btree->evict_ref) == NULL) { + if (ref == NULL) { if (++restarts == 2) break; WT_STAT_CONN_INCR( @@ -1706,7 +1806,7 @@ fast: /* If the page can't be evicted, give up. */ ++pages_queued; if (WT_PAGE_IS_INTERNAL(page)) - ++internal_pages; + ++internal_pages; __wt_verbose(session, WT_VERB_EVICTSERVER, "select: %p, size %" WT_SIZET_FMT, @@ -1719,12 +1819,10 @@ fast: /* If the page can't be evicted, give up. */ session, cache_eviction_pages_queued, (u_int)(evict - start)); /* - * If we didn't find any candidates in the file, reverse the direction - * of the walk and skip it next time. + * If we couldn't find the number of pages we were looking for, skip + * the tree next time. */ - if (give_up) - btree->evict_walk_reverse = !btree->evict_walk_reverse; - if (pages_queued == 0 && !urgent_queued) + if (pages_queued < target_pages / 2 && !urgent_queued) btree->evict_walk_period = WT_MIN( WT_MAX(1, 2 * btree->evict_walk_period), 100); else if (pages_queued == target_pages) @@ -1733,6 +1831,8 @@ fast: /* If the page can't be evicted, give up. */ btree->evict_walk_period /= 2; /* + * Give up the walk occasionally. + * * If we happen to end up on the root page or a page requiring urgent * eviction, clear it. We have to track hazard pointers, and the root * page complicates that calculation. @@ -1744,16 +1844,20 @@ fast: /* If the page can't be evicted, give up. */ * If we land on a page requiring forced eviction, move on to the next * page: we want this page evicted as quickly as possible. */ - if ((ref = btree->evict_ref) != NULL) { - /* Give up the walk occasionally. */ + if (ref != NULL) { if (__wt_ref_is_root(ref) || evict == start || give_up || ref->page->read_gen == WT_READGEN_OLDEST || - ref->page->memory_footprint >= btree->splitmempage) - WT_RET(__evict_clear_walk(session, restarts == 0)); - else if (ref->page->read_gen == WT_READGEN_OLDEST) + ref->page->memory_footprint >= btree->splitmempage) { + if (restarts == 0) + WT_STAT_CONN_INCR( + session, cache_eviction_walks_abandoned); + WT_RET(__wt_page_release(cache->walk_session, + ref, WT_READ_NO_EVICT)); + ref = NULL; + } else if (ref->page->read_gen == WT_READGEN_OLDEST) WT_RET_NOTFOUND_OK(__wt_tree_walk_count( - session, &btree->evict_ref, - &refs_walked, walk_flags)); + session, &ref, &refs_walked, walk_flags)); + btree->evict_ref = ref; } WT_STAT_CONN_INCRV(session, cache_eviction_walk, refs_walked); @@ -2087,8 +2191,8 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) break; case WT_NOTFOUND: /* Allow the queue to re-populate before retrying. */ - __wt_cond_wait( - session, conn->evict_threads.wait_cond, 10000); + __wt_cond_wait(session, + conn->evict_threads.wait_cond, 10000, NULL); cache->app_waits++; break; default: @@ -2184,226 +2288,140 @@ __wt_evict_priority_clear(WT_SESSION_IMPL *session) S2BT(session)->evict_priority = 0; } -#ifdef HAVE_DIAGNOSTIC +#if defined(HAVE_DIAGNOSTIC) || defined(HAVE_VERBOSE) /* - * __dump_txn_state -- - * Output debugging information about the global transaction state. + * __verbose_dump_cache_single -- + * Output diagnostic information about a single file in the cache. */ static int -__dump_txn_state(WT_SESSION_IMPL *session, FILE *fp) +__verbose_dump_cache_single(WT_SESSION_IMPL *session, + uint64_t *total_bytesp, uint64_t *total_dirty_bytesp) { - WT_CONNECTION_IMPL *conn; - WT_TXN_GLOBAL *txn_global; - WT_TXN *txn; - WT_TXN_STATE *s; - const char *iso_tag; - uint64_t id; - uint32_t i, session_cnt; - - conn = S2C(session); - txn_global = &conn->txn_global; - WT_ORDERED_READ(session_cnt, conn->session_cnt); - - /* Note: odd string concatenation avoids spelling errors. */ - if (fprintf(fp, "==========\n" "transaction state dump\n") < 0) - return (EIO); - - if (fprintf(fp, - "current ID: %" PRIu64 "\n" - "last running ID: %" PRIu64 "\n" - "oldest ID: %" PRIu64 "\n" - "oldest named snapshot ID: %" PRIu64 "\n", - txn_global->current, txn_global->last_running, - txn_global->oldest_id, txn_global->nsnap_oldest_id) < 0) - return (EIO); - - if (fprintf(fp, - "checkpoint running? %s\n" - "checkpoint generation: %" PRIu64 "\n" - "checkpoint pinned ID: %" PRIu64 "\n" - "checkpoint txn ID: %" PRIu64 "\n" - "session count: %" PRIu32 "\n", - txn_global->checkpoint_running ? "yes" : "no", - txn_global->checkpoint_gen, - txn_global->checkpoint_pinned, - txn_global->checkpoint_txnid, - session_cnt) < 0) - return (EIO); - - if (fprintf(fp, "Dumping transaction state of active sessions\n") < 0) - return (EIO); - - /* - * Walk each session transaction state and dump information. Accessing - * the content of session handles is not thread safe, so some - * information may change while traversing if other threads are active - * at the same time, which is OK since this is diagnostic code. - */ - for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) { - /* Skip sessions with no active transaction */ - if ((id = s->id) == WT_TXN_NONE && s->pinned_id == WT_TXN_NONE) - continue; + WT_DATA_HANDLE *dhandle; + WT_PAGE *page; + WT_REF *next_walk; + size_t size; + uint64_t intl_bytes, intl_bytes_max, intl_dirty_bytes; + uint64_t intl_dirty_bytes_max, intl_dirty_pages, intl_pages; + uint64_t leaf_bytes, leaf_bytes_max, leaf_dirty_bytes; + uint64_t leaf_dirty_bytes_max, leaf_dirty_pages, leaf_pages; - txn = &conn->sessions[i].txn; - iso_tag = "INVALID"; - switch (txn->isolation) { - case WT_ISO_READ_COMMITTED: - iso_tag = "WT_ISO_READ_COMMITTED"; - break; - case WT_ISO_READ_UNCOMMITTED: - iso_tag = "WT_ISO_READ_UNCOMMITTED"; - break; - case WT_ISO_SNAPSHOT: - iso_tag = "WT_ISO_SNAPSHOT"; - break; + intl_bytes = intl_bytes_max = intl_dirty_bytes = 0; + intl_dirty_bytes_max = intl_dirty_pages = intl_pages = 0; + leaf_bytes = leaf_bytes_max = leaf_dirty_bytes = 0; + leaf_dirty_bytes_max = leaf_dirty_pages = leaf_pages = 0; + + next_walk = NULL; + while (__wt_tree_walk(session, &next_walk, + WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_WAIT) == 0 && + next_walk != NULL) { + page = next_walk->page; + size = page->memory_footprint; + + if (WT_PAGE_IS_INTERNAL(page)) { + ++intl_pages; + intl_bytes += size; + intl_bytes_max = WT_MAX(intl_bytes_max, size); + if (__wt_page_is_modified(page)) { + ++intl_dirty_pages; + intl_dirty_bytes += size; + intl_dirty_bytes_max = + WT_MAX(intl_dirty_bytes_max, size); + } + } else { + ++leaf_pages; + leaf_bytes += size; + leaf_bytes_max = WT_MAX(leaf_bytes_max, size); + if (__wt_page_is_modified(page)) { + ++leaf_dirty_pages; + leaf_dirty_bytes += size; + leaf_dirty_bytes_max = + WT_MAX(leaf_dirty_bytes_max, size); + } } - - if (fprintf(fp, - "ID: %6" PRIu64 - ", mod count: %u" - ", pinned ID: %" PRIu64 - ", snap min: %" PRIu64 - ", snap max: %" PRIu64 - ", metadata pinned ID: %" PRIu64 - ", flags: 0x%08" PRIx32 - ", name: %s" - ", isolation: %s" "\n", - id, - txn->mod_count, - s->pinned_id, - txn->snap_min, - txn->snap_max, - s->metadata_pinned, - txn->flags, - conn->sessions[i].name == NULL ? - "EMPTY" : conn->sessions[i].name, - iso_tag) < 0) - return (EIO); } + dhandle = session->dhandle; + if (dhandle->checkpoint == NULL) + WT_RET(__wt_msg(session, "%s():", dhandle->name)); + else + WT_RET(__wt_msg(session, "%s(checkpoint=%s):", + dhandle->name, dhandle->checkpoint)); + if (intl_pages != 0) + WT_RET(__wt_msg(session, + "internal: " + "%" PRIu64 " pages, " + "%" PRIu64 "MB, " + "%" PRIu64 "/%" PRIu64 " clean/dirty pages, " + "%" PRIu64 "/%" PRIu64 " clean/dirty MB, " + "%" PRIu64 "MB max page, " + "%" PRIu64 "MB max dirty page", + intl_pages, + intl_bytes / WT_MEGABYTE, + intl_pages - intl_dirty_pages, + intl_dirty_pages, + (intl_bytes - intl_dirty_bytes) / WT_MEGABYTE, + intl_dirty_bytes / WT_MEGABYTE, + intl_bytes_max / WT_MEGABYTE, + intl_dirty_bytes_max / WT_MEGABYTE)); + if (leaf_pages != 0) + WT_RET(__wt_msg(session, + "leaf: " + "%" PRIu64 " pages, " + "%" PRIu64 "MB, " + "%" PRIu64 "/%" PRIu64 " clean/dirty pages, " + "%" PRIu64 "/%" PRIu64 " clean/dirty MB, " + "%" PRIu64 "MB max page, " + "%" PRIu64 "MB max dirty page", + leaf_pages, + leaf_bytes / WT_MEGABYTE, + leaf_pages - leaf_dirty_pages, + leaf_dirty_pages, + (leaf_bytes - leaf_dirty_bytes) / WT_MEGABYTE, + leaf_dirty_bytes / WT_MEGABYTE, + leaf_bytes_max / WT_MEGABYTE, + leaf_dirty_bytes_max / WT_MEGABYTE)); + + *total_bytesp += intl_bytes + leaf_bytes; + *total_dirty_bytesp += intl_dirty_bytes + leaf_dirty_bytes; + return (0); } /* - * __dump_cache -- - * Output debugging information about the size of the files in cache. + * __wt_verbose_dump_cache -- + * Output diagnostic information about the cache. */ -static int -__dump_cache(WT_SESSION_IMPL *session, FILE *fp) +int +__wt_verbose_dump_cache(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; - WT_DATA_HANDLE *dhandle, *saved_dhandle; - WT_PAGE *page; - WT_REF *next_walk; - uint64_t intl_bytes, intl_bytes_max, intl_dirty_bytes; - uint64_t intl_dirty_bytes_max, intl_dirty_pages, intl_pages; - uint64_t leaf_bytes, leaf_bytes_max, leaf_dirty_bytes; - uint64_t leaf_dirty_bytes_max, leaf_dirty_pages, leaf_pages; + WT_DATA_HANDLE *dhandle; + WT_DECL_RET; uint64_t total_bytes, total_dirty_bytes; - size_t size; conn = S2C(session); total_bytes = total_dirty_bytes = 0; - /* Note: odd string concatenation avoids spelling errors. */ - if (fprintf(fp, "==========\n" "cache dump\n") < 0) - return (EIO); + WT_RET(__wt_msg(session, "%s", WT_DIVIDER)); + WT_RET(__wt_msg(session, "cache dump")); - saved_dhandle = session->dhandle; - TAILQ_FOREACH(dhandle, &conn->dhqh, q) { + for (dhandle = NULL;;) { + WT_WITH_HANDLE_LIST_READ_LOCK(session, + WT_DHANDLE_NEXT(session, dhandle, &conn->dhqh, q)); + if (dhandle == NULL) + break; if (!WT_PREFIX_MATCH(dhandle->name, "file:") || !F_ISSET(dhandle, WT_DHANDLE_OPEN)) continue; - intl_bytes = intl_bytes_max = intl_dirty_bytes = 0; - intl_dirty_bytes_max = intl_dirty_pages = intl_pages = 0; - leaf_bytes = leaf_bytes_max = leaf_dirty_bytes = 0; - leaf_dirty_bytes_max = leaf_dirty_pages = leaf_pages = 0; - - next_walk = NULL; - session->dhandle = dhandle; - while (__wt_tree_walk(session, &next_walk, - WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_WAIT) == 0 && - next_walk != NULL) { - page = next_walk->page; - size = page->memory_footprint; - - if (WT_PAGE_IS_INTERNAL(page)) { - ++intl_pages; - intl_bytes += size; - intl_bytes_max = WT_MAX(intl_bytes_max, size); - if (__wt_page_is_modified(page)) { - ++intl_dirty_pages; - intl_dirty_bytes += size; - intl_dirty_bytes_max = - WT_MAX(intl_dirty_bytes_max, size); - } - } else { - ++leaf_pages; - leaf_bytes += size; - leaf_bytes_max = WT_MAX(leaf_bytes_max, size); - if (__wt_page_is_modified(page)) { - ++leaf_dirty_pages; - leaf_dirty_bytes += size; - leaf_dirty_bytes_max = - WT_MAX(leaf_dirty_bytes_max, size); - } - } - } - session->dhandle = NULL; - - if (dhandle->checkpoint == NULL) { - if (fprintf(fp, - "%s(): \n", dhandle->name) < 0) - return (EIO); - } else { - if (fprintf(fp, "%s(checkpoint=%s): \n", - dhandle->name, dhandle->checkpoint) < 0) - return (EIO); - } - if (intl_pages != 0) { - if (fprintf(fp, - "\t" "internal: " - "%" PRIu64 " pages, " - "%" PRIu64 "MB, " - "%" PRIu64 "/%" PRIu64 " clean/dirty pages, " - "%" PRIu64 "/%" PRIu64 " clean/dirty MB, " - "%" PRIu64 "MB max page, " - "%" PRIu64 "MB max dirty page\n", - intl_pages, - intl_bytes >> 20, - intl_pages - intl_dirty_pages, - intl_dirty_pages, - (intl_bytes - intl_dirty_bytes) >> 20, - intl_dirty_bytes >> 20, - intl_bytes_max >> 20, - intl_dirty_bytes_max >> 20) < 0) - return (EIO); - } - if (leaf_pages != 0) { - if (fprintf(fp, - "\t" "leaf: " - "%" PRIu64 " pages, " - "%" PRIu64 "MB, " - "%" PRIu64 "/%" PRIu64 " clean/dirty pages, " - "%" PRIu64 "/%" PRIu64 " clean/dirty MB, " - "%" PRIu64 "MB max page, " - "%" PRIu64 "MB max dirty page\n", - leaf_pages, - leaf_bytes >> 20, - leaf_pages - leaf_dirty_pages, - leaf_dirty_pages, - (leaf_bytes - leaf_dirty_bytes) >> 20, - leaf_dirty_bytes >> 20, - leaf_bytes_max >> 20, - leaf_dirty_bytes_max >> 20) < 0) - return (EIO); - } - - total_bytes += intl_bytes + leaf_bytes; - total_dirty_bytes += intl_dirty_bytes + leaf_dirty_bytes; + WT_WITH_DHANDLE(session, dhandle, + ret = __verbose_dump_cache_single( + session, &total_bytes, &total_dirty_bytes)); + if (ret != 0) + break; } - session->dhandle = saved_dhandle; + WT_RET(ret); /* * Apply the overhead percentage so our total bytes are comparable with @@ -2411,39 +2429,16 @@ __dump_cache(WT_SESSION_IMPL *session, FILE *fp) */ total_bytes = __wt_cache_bytes_plus_overhead(conn->cache, total_bytes); - if (fprintf(fp, + WT_RET(__wt_msg(session, "cache dump: " - "total found: %" PRIu64 "MB vs tracked inuse %" PRIu64 "MB\n" - "total dirty bytes: %" PRIu64 "MB\n", - total_bytes >> 20, __wt_cache_bytes_inuse(conn->cache) >> 20, - total_dirty_bytes >> 20) < 0) - return (EIO); - if (fprintf(fp, "==========\n") < 0) - return (EIO); + "total found: %" PRIu64 "MB vs tracked inuse %" PRIu64 "MB", + total_bytes / WT_MEGABYTE, + __wt_cache_bytes_inuse(conn->cache) / WT_MEGABYTE)); + WT_RET(__wt_msg(session, + "total dirty bytes: %" PRIu64 "MB", + total_dirty_bytes / WT_MEGABYTE)); + WT_RET(__wt_msg(session, "%s", WT_DIVIDER)); return (0); } - -/* - * __wt_dump_stuck_info -- - * Dump debugging information to a file (default stderr) about the state - * of WiredTiger when we have determined that the cache is stuck full. - */ -int -__wt_dump_stuck_info(WT_SESSION_IMPL *session, const char *ofile) -{ - FILE *fp; - WT_DECL_RET; - - if (ofile == NULL) - fp = stderr; - else if ((fp = fopen(ofile, "w")) == NULL) - return (EIO); - - WT_ERR(__dump_txn_state(session, fp)); - WT_ERR(__dump_cache(session, fp)); -err: if (ofile != NULL && fclose(fp) != 0) - return (EIO); - return (ret); -} #endif diff --git a/src/third_party/wiredtiger/src/evict/evict_stat.c b/src/third_party/wiredtiger/src/evict/evict_stat.c index 2dd3b1e83a0..7c2d5722a63 100644 --- a/src/third_party/wiredtiger/src/evict/evict_stat.c +++ b/src/third_party/wiredtiger/src/evict/evict_stat.c @@ -134,5 +134,5 @@ __wt_curstat_cache_walk(WT_SESSION_IMPL *session) WT_STAT_DATA_SET(session, cache_state_root_size, btree->root.page->memory_footprint); - WT_WITH_HANDLE_LIST_LOCK(session, __evict_stat_walk(session)); + __evict_stat_walk(session); } diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index 43c1a309d52..39ca223aebf 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -483,6 +483,7 @@ struct __wt_page { */ struct { WT_REF *parent_ref; /* Parent reference */ + uint64_t split_gen; /* Generation of last split */ struct __wt_page_index { uint32_t entries; @@ -492,6 +493,8 @@ struct __wt_page { } intl; #undef pg_intl_parent_ref #define pg_intl_parent_ref u.intl.parent_ref +#undef pg_intl_split_gen +#define pg_intl_split_gen u.intl.split_gen /* * Macros to copy/set the index because the name is obscured to ensure @@ -593,9 +596,8 @@ struct __wt_page { #define WT_PAGE_DISK_MAPPED 0x04 /* Disk image in mapped memory */ #define WT_PAGE_EVICT_LRU 0x08 /* Page is on the LRU queue */ #define WT_PAGE_OVERFLOW_KEYS 0x10 /* Page has overflow keys */ -#define WT_PAGE_SPLIT_BLOCK 0x20 /* Split blocking eviction and splits */ -#define WT_PAGE_SPLIT_INSERT 0x40 /* A leaf page was split for append */ -#define WT_PAGE_UPDATE_IGNORE 0x80 /* Ignore updates on page discard */ +#define WT_PAGE_SPLIT_INSERT 0x20 /* A leaf page was split for append */ +#define WT_PAGE_UPDATE_IGNORE 0x40 /* Ignore updates on page discard */ uint8_t flags_atomic; /* Atomic flags, use F_*_ATOMIC */ uint8_t unused[2]; /* Unused padding */ diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index 09fa8df8c56..315efa86fa6 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -70,6 +70,30 @@ __wt_btree_bytes_inuse(WT_SESSION_IMPL *session) return (__wt_cache_bytes_plus_overhead(cache, btree->bytes_inmem)); } +/* + * __wt_btree_bytes_evictable -- + * Return the number of bytes that can be evicted (i.e. bytes apart from + * the pinned root page). + */ +static inline uint64_t +__wt_btree_bytes_evictable(WT_SESSION_IMPL *session) +{ + WT_BTREE *btree; + WT_CACHE *cache; + WT_PAGE *root_page; + uint64_t bytes_inmem, bytes_root; + + btree = S2BT(session); + cache = S2C(session)->cache; + root_page = btree->root.page; + + bytes_inmem = btree->bytes_inmem; + bytes_root = root_page == NULL ? 0 : root_page->memory_footprint; + + return (bytes_inmem <= bytes_root ? 0 : + __wt_cache_bytes_plus_overhead(cache, bytes_inmem - bytes_root)); +} + /* * __wt_btree_dirty_inuse -- * Return the number of dirty bytes in use. @@ -1324,8 +1348,8 @@ __wt_page_can_evict( * discards its WT_REF array, and a thread traversing the original * parent page index might see a freed WT_REF. */ - if (WT_PAGE_IS_INTERNAL(page) && - F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_BLOCK)) + if (WT_PAGE_IS_INTERNAL(page) && !__wt_split_obsolete( + session, page->pg_intl_split_gen)) return (false); /* diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h index 70f6169200d..abd5a1901f7 100644 --- a/src/third_party/wiredtiger/src/include/cache.h +++ b/src/third_party/wiredtiger/src/include/cache.h @@ -83,7 +83,7 @@ struct __wt_cache { uint64_t worker_evicts; /* Pages evicted by worker threads */ uint64_t evict_max_page_size; /* Largest page seen at eviction */ -#ifdef HAVE_DIAGNOSTIC +#if defined(HAVE_DIAGNOSTIC) || defined(HAVE_VERBOSE) struct timespec stuck_ts; /* Stuck timestamp */ #endif diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i index 17ab39e97d2..d71978ccf35 100644 --- a/src/third_party/wiredtiger/src/include/cache.i +++ b/src/third_party/wiredtiger/src/include/cache.i @@ -364,7 +364,7 @@ __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool *didworkp) * block eviction), we don't want to highjack the thread for eviction. */ if (F_ISSET(session, WT_SESSION_NO_EVICTION | - WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_LOCKED_SCHEMA)) + WT_SESSION_LOCKED_HANDLE_LIST_WRITE | WT_SESSION_LOCKED_SCHEMA)) return (0); /* In memory configurations don't block when the cache is full. */ diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index 64ac4271db1..ce483d3291a 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -123,12 +123,16 @@ struct __wt_named_extractor { * main queue and the hashed queue. */ #define WT_CONN_DHANDLE_INSERT(conn, dhandle, bucket) do { \ + WT_ASSERT(session, \ + F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \ TAILQ_INSERT_HEAD(&(conn)->dhqh, dhandle, q); \ TAILQ_INSERT_HEAD(&(conn)->dhhash[bucket], dhandle, hashq); \ ++conn->dhandle_count; \ } while (0) #define WT_CONN_DHANDLE_REMOVE(conn, dhandle, bucket) do { \ + WT_ASSERT(session, \ + F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \ TAILQ_REMOVE(&(conn)->dhqh, dhandle, q); \ TAILQ_REMOVE(&(conn)->dhhash[bucket], dhandle, hashq); \ --conn->dhandle_count; \ @@ -163,13 +167,13 @@ struct __wt_connection_impl { WT_SPINLOCK api_lock; /* Connection API spinlock */ WT_SPINLOCK checkpoint_lock; /* Checkpoint spinlock */ - WT_SPINLOCK dhandle_lock; /* Data handle list spinlock */ WT_SPINLOCK fh_lock; /* File handle queue spinlock */ WT_SPINLOCK metadata_lock; /* Metadata update spinlock */ WT_SPINLOCK reconfig_lock; /* Single thread reconfigure */ WT_SPINLOCK schema_lock; /* Schema operation spinlock */ - WT_SPINLOCK table_lock; /* Table creation spinlock */ + WT_RWLOCK table_lock; /* Table list lock */ WT_SPINLOCK turtle_lock; /* Turtle file spinlock */ + WT_RWLOCK dhandle_lock; /* Data handle list lock */ /* * We distribute the btree page locks across a set of spin locks. Don't diff --git a/src/third_party/wiredtiger/src/include/dhandle.h b/src/third_party/wiredtiger/src/include/dhandle.h index dcc788f0839..4f318e7bccf 100644 --- a/src/third_party/wiredtiger/src/include/dhandle.h +++ b/src/third_party/wiredtiger/src/include/dhandle.h @@ -37,6 +37,24 @@ #define WT_SESSION_META_DHANDLE(s) \ (((WT_CURSOR_BTREE *)((s)->meta_cursor))->btree->dhandle) +#define WT_DHANDLE_ACQUIRE(dhandle) \ + (void)__wt_atomic_add32(&dhandle->session_ref, 1) + +#define WT_DHANDLE_RELEASE(dhandle) \ + (void)__wt_atomic_sub32(&dhandle->session_ref, 1) + +#define WT_DHANDLE_NEXT(session, dhandle, head, field) do { \ + WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));\ + if (dhandle == NULL) \ + dhandle = TAILQ_FIRST(head); \ + else { \ + WT_DHANDLE_RELEASE(dhandle); \ + dhandle = TAILQ_NEXT(dhandle, field); \ + } \ + if (dhandle != NULL) \ + WT_DHANDLE_ACQUIRE(dhandle); \ +} while (0) + /* * WT_DATA_HANDLE -- * A handle for a generic named data source. diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 566eb386c29..19ad9a880df 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -98,6 +98,7 @@ extern void __wt_cursor_key_order_reset(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_A extern void __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern bool __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_reset(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -105,7 +106,6 @@ extern int __wt_btcur_insert(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((w extern int __wt_btcur_update_check(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_remove(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -150,6 +150,9 @@ extern int __wt_ovfl_cache(WT_SESSION_IMPL *session, WT_PAGE *page, void *cookie extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, const void *image, size_t memsize, uint32_t flags, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, bool eviction) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_las_remove_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags @@ -160,6 +163,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags extern int __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern bool __wt_split_obsolete(WT_SESSION_IMPL *session, uint64_t split_gen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_split_stash_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_split_stash_discard_all( WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -192,8 +196,6 @@ extern WT_UPDATE *__wt_update_obsolete_check( WT_SESSION_IMPL *session, WT_PAGE extern void __wt_update_obsolete_free( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool insert) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_row_random_descent(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_las_stats_update(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_las_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_las_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -254,6 +256,7 @@ extern WT_THREAD_RET __wt_cache_pool_server(void *arg) WT_GCC_FUNC_DECL_ATTRIBUT extern int __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_checkpoint_signal(WT_SESSION_IMPL *session, wt_off_t logsize) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_conn_dhandle_alloc( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_conn_dhandle_find( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_conn_btree_open( WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -262,7 +265,7 @@ extern int __wt_conn_dhandle_close_all( WT_SESSION_IMPL *session, const char *ur extern int __wt_conn_dhandle_discard_single( WT_SESSION_IMPL *session, bool final, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_conn_dhandle_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_connection_destroy(WT_CONNECTION_IMPL *conn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_connection_destroy(WT_CONNECTION_IMPL *conn) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_logmgr_reconfig(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_truncate_files( WT_SESSION_IMPL *session, WT_CURSOR *cursor, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_log_wrlsn(WT_SESSION_IMPL *session, int *yield) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -352,7 +355,7 @@ extern int __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int extern bool __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_evict_priority_set(WT_SESSION_IMPL *session, uint64_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_evict_priority_clear(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_dump_stuck_info(WT_SESSION_IMPL *session, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_verbose_dump_cache(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_curstat_cache_walk(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -403,7 +406,7 @@ extern int __wt_log_slot_switch( WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bo extern int __wt_log_slot_new(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_slot_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_slot_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int64_t __wt_log_slot_release(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, int64_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_log_slot_free(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_clsm_request_switch(WT_CURSOR_LSM *clsm) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -613,11 +616,9 @@ extern void __wt_session_close_cache(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ extern int __wt_session_get_btree(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint, const char *cfg[], uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cond_auto_alloc( WT_SESSION_IMPL *session, const char *name, bool is_signalled, uint64_t min, uint64_t max, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cond_auto_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cond_auto_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool *signalled) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cond_auto_wait( WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cond_auto_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_cond_auto_alloc(WT_SESSION_IMPL *session, const char *name, uint64_t min, uint64_t max, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_cond_auto_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_cond_auto_wait(WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool (*run_func)(WT_SESSION_IMPL *)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_decrypt(WT_SESSION_IMPL *session, WT_ENCRYPTOR *encryptor, size_t skip, WT_ITEM *in, WT_ITEM *out) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_encrypt(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t skip, WT_ITEM *in, WT_ITEM *out) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_encrypt_size(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t incoming_size, size_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -689,6 +690,7 @@ extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2) WT_GCC_FUNC_DECL_ATTRIBUT extern void __wt_random_init(WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_random_init_seed( WT_SESSION_IMPL *session, WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern uint32_t __wt_random(WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern uint64_t __wt_random64(WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -741,6 +743,7 @@ extern void __wt_txn_stats_update(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATT extern void __wt_txn_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_txn_global_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_verbose_dump_txn(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[], bool waiting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/third_party/wiredtiger/src/include/extern_posix.h b/src/third_party/wiredtiger/src/include/extern_posix.h index 5acb7b0ed27..fed7835ada1 100644 --- a/src/third_party/wiredtiger/src/include/extern_posix.h +++ b/src/third_party/wiredtiger/src/include/extern_posix.h @@ -12,8 +12,8 @@ extern int __wt_posix_map(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapp extern int __wt_posix_map_preload(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, const void *map, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_posix_map_discard(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *map, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_posix_unmap(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_region, size_t len, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, bool is_signalled, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/third_party/wiredtiger/src/include/extern_win.h b/src/third_party/wiredtiger/src/include/extern_win.h index 11b45f11304..0bfc821c7a6 100644 --- a/src/third_party/wiredtiger/src/include/extern_win.h +++ b/src/third_party/wiredtiger/src/include/extern_win.h @@ -10,8 +10,8 @@ extern int __wt_os_win(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((war extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_win_map(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_win_unmap(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_region, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, bool is_signalled, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/third_party/wiredtiger/src/include/flags.h b/src/third_party/wiredtiger/src/include/flags.h index 2f0c207078a..c1fff920e3b 100644 --- a/src/third_party/wiredtiger/src/include/flags.h +++ b/src/third_party/wiredtiger/src/include/flags.h @@ -53,22 +53,24 @@ #define WT_SESSION_CAN_WAIT 0x00000001 #define WT_SESSION_INTERNAL 0x00000002 #define WT_SESSION_LOCKED_CHECKPOINT 0x00000004 -#define WT_SESSION_LOCKED_HANDLE_LIST 0x00000008 -#define WT_SESSION_LOCKED_METADATA 0x00000010 -#define WT_SESSION_LOCKED_PASS 0x00000020 -#define WT_SESSION_LOCKED_SCHEMA 0x00000040 -#define WT_SESSION_LOCKED_SLOT 0x00000080 -#define WT_SESSION_LOCKED_TABLE 0x00000100 -#define WT_SESSION_LOCKED_TURTLE 0x00000200 -#define WT_SESSION_LOGGING_INMEM 0x00000400 -#define WT_SESSION_LOOKASIDE_CURSOR 0x00000800 -#define WT_SESSION_NO_CACHE 0x00001000 -#define WT_SESSION_NO_DATA_HANDLES 0x00002000 -#define WT_SESSION_NO_EVICTION 0x00004000 -#define WT_SESSION_NO_LOGGING 0x00008000 -#define WT_SESSION_NO_SCHEMA_LOCK 0x00010000 -#define WT_SESSION_QUIET_CORRUPT_FILE 0x00020000 -#define WT_SESSION_SERVER_ASYNC 0x00040000 +#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x00000008 +#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x00000010 +#define WT_SESSION_LOCKED_METADATA 0x00000020 +#define WT_SESSION_LOCKED_PASS 0x00000040 +#define WT_SESSION_LOCKED_SCHEMA 0x00000080 +#define WT_SESSION_LOCKED_SLOT 0x00000100 +#define WT_SESSION_LOCKED_TABLE_READ 0x00000200 +#define WT_SESSION_LOCKED_TABLE_WRITE 0x00000400 +#define WT_SESSION_LOCKED_TURTLE 0x00000800 +#define WT_SESSION_LOGGING_INMEM 0x00001000 +#define WT_SESSION_LOOKASIDE_CURSOR 0x00002000 +#define WT_SESSION_NO_CACHE 0x00004000 +#define WT_SESSION_NO_DATA_HANDLES 0x00008000 +#define WT_SESSION_NO_EVICTION 0x00010000 +#define WT_SESSION_NO_LOGGING 0x00020000 +#define WT_SESSION_NO_SCHEMA_LOCK 0x00040000 +#define WT_SESSION_QUIET_CORRUPT_FILE 0x00080000 +#define WT_SESSION_SERVER_ASYNC 0x00100000 #define WT_STAT_CLEAR 0x00000001 #define WT_STAT_JSON 0x00000002 #define WT_STAT_ON_CLOSE 0x00000004 @@ -90,28 +92,29 @@ #define WT_VERB_COMPACT 0x00000008 #define WT_VERB_EVICT 0x00000010 #define WT_VERB_EVICTSERVER 0x00000020 -#define WT_VERB_FILEOPS 0x00000040 -#define WT_VERB_HANDLEOPS 0x00000080 -#define WT_VERB_LOG 0x00000100 -#define WT_VERB_LSM 0x00000200 -#define WT_VERB_LSM_MANAGER 0x00000400 -#define WT_VERB_METADATA 0x00000800 -#define WT_VERB_MUTEX 0x00001000 -#define WT_VERB_OVERFLOW 0x00002000 -#define WT_VERB_READ 0x00004000 -#define WT_VERB_REBALANCE 0x00008000 -#define WT_VERB_RECONCILE 0x00010000 -#define WT_VERB_RECOVERY 0x00020000 -#define WT_VERB_RECOVERY_PROGRESS 0x00040000 -#define WT_VERB_SALVAGE 0x00080000 -#define WT_VERB_SHARED_CACHE 0x00100000 -#define WT_VERB_SPLIT 0x00200000 -#define WT_VERB_TEMPORARY 0x00400000 -#define WT_VERB_THREAD_GROUP 0x00800000 -#define WT_VERB_TRANSACTION 0x01000000 -#define WT_VERB_VERIFY 0x02000000 -#define WT_VERB_VERSION 0x04000000 -#define WT_VERB_WRITE 0x08000000 +#define WT_VERB_EVICT_STUCK 0x00000040 +#define WT_VERB_FILEOPS 0x00000080 +#define WT_VERB_HANDLEOPS 0x00000100 +#define WT_VERB_LOG 0x00000200 +#define WT_VERB_LSM 0x00000400 +#define WT_VERB_LSM_MANAGER 0x00000800 +#define WT_VERB_METADATA 0x00001000 +#define WT_VERB_MUTEX 0x00002000 +#define WT_VERB_OVERFLOW 0x00004000 +#define WT_VERB_READ 0x00008000 +#define WT_VERB_REBALANCE 0x00010000 +#define WT_VERB_RECONCILE 0x00020000 +#define WT_VERB_RECOVERY 0x00040000 +#define WT_VERB_RECOVERY_PROGRESS 0x00080000 +#define WT_VERB_SALVAGE 0x00100000 +#define WT_VERB_SHARED_CACHE 0x00200000 +#define WT_VERB_SPLIT 0x00400000 +#define WT_VERB_TEMPORARY 0x00800000 +#define WT_VERB_THREAD_GROUP 0x01000000 +#define WT_VERB_TRANSACTION 0x02000000 +#define WT_VERB_VERIFY 0x04000000 +#define WT_VERB_VERSION 0x08000000 +#define WT_VERB_WRITE 0x10000000 #define WT_VISIBILITY_ERR 0x00000080 /* * flags section: END diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h index d9fea892c68..a6be3582b4d 100644 --- a/src/third_party/wiredtiger/src/include/log.h +++ b/src/third_party/wiredtiger/src/include/log.h @@ -163,7 +163,7 @@ struct __wt_logslot { WT_CACHE_LINE_PAD_BEGIN volatile int64_t slot_state; /* Slot state */ int64_t slot_unbuffered; /* Unbuffered data in this slot */ - int32_t slot_error; /* Error value */ + int slot_error; /* Error value */ wt_off_t slot_start_offset; /* Starting file offset */ wt_off_t slot_last_offset; /* Last record offset */ WT_LSN slot_release_lsn; /* Slot release LSN */ @@ -254,6 +254,7 @@ struct __wt_log { #define WT_SLOT_POOL 128 WT_LOGSLOT *active_slot; /* Active slot */ WT_LOGSLOT slot_pool[WT_SLOT_POOL]; /* Pool of all slots */ + int32_t pool_index; /* Index into slot pool */ size_t slot_buf_size; /* Buffer size for slots */ #ifdef HAVE_DIAGNOSTIC uint64_t write_calls; /* Calls to log_write */ diff --git a/src/third_party/wiredtiger/src/include/misc.i b/src/third_party/wiredtiger/src/include/misc.i index f36be32d6a2..d5692a3f9cf 100644 --- a/src/third_party/wiredtiger/src/include/misc.i +++ b/src/third_party/wiredtiger/src/include/misc.i @@ -11,11 +11,12 @@ * Wait on a mutex, optionally timing out. */ static inline void -__wt_cond_wait(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs) +__wt_cond_wait(WT_SESSION_IMPL *session, + WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *)) { bool notused; - __wt_cond_wait_signal(session, cond, usecs, ¬used); + __wt_cond_wait_signal(session, cond, usecs, run_func, ¬used); } /* diff --git a/src/third_party/wiredtiger/src/include/mutex.h b/src/third_party/wiredtiger/src/include/mutex.h index 727a690bb1c..06b8c4a3304 100644 --- a/src/third_party/wiredtiger/src/include/mutex.h +++ b/src/third_party/wiredtiger/src/include/mutex.h @@ -21,8 +21,8 @@ struct __wt_condvar { int waiters; /* Numbers of waiters, or -1 if signalled with no waiters. */ /* - * The following fields are only used for automatically adjusting - * condition variables. They could be in a separate structure. + * The following fields are used for automatically adjusting condition + * variable wait times. */ uint64_t min_wait; /* Minimum wait duration */ uint64_t max_wait; /* Maximum wait duration */ diff --git a/src/third_party/wiredtiger/src/include/packing.i b/src/third_party/wiredtiger/src/include/packing.i index 17ca261bcfc..8ba3dd536ac 100644 --- a/src/third_party/wiredtiger/src/include/packing.i +++ b/src/third_party/wiredtiger/src/include/packing.i @@ -168,10 +168,15 @@ next: if (pack->cur == pack->end) (int)(pack->end - pack->orig), pack->orig); return (0); case 'u': - case 'U': /* Special case for items with a size prefix. */ pv->type = (!pv->havesize && *pack->cur != '\0') ? 'U' : 'u'; return (0); + case 'U': + /* + * Don't change the type. 'U' is used internally, so this type + * was already changed to explicitly include the size. + */ + return (0); case 'b': case 'h': case 'i': diff --git a/src/third_party/wiredtiger/src/include/schema.h b/src/third_party/wiredtiger/src/include/schema.h index bb116e5cf2f..9a6e1e54e80 100644 --- a/src/third_party/wiredtiger/src/include/schema.h +++ b/src/third_party/wiredtiger/src/include/schema.h @@ -78,6 +78,14 @@ struct __wt_table { */ #define WT_COLGROUPS(t) WT_MAX((t)->ncolgroups, 1) +/* Helpers for the locked state of the handle list and table locks. */ +#define WT_SESSION_LOCKED_HANDLE_LIST \ + (WT_SESSION_LOCKED_HANDLE_LIST_READ | \ + WT_SESSION_LOCKED_HANDLE_LIST_WRITE) +#define WT_SESSION_LOCKED_TABLE \ + (WT_SESSION_LOCKED_TABLE_READ | \ + WT_SESSION_LOCKED_TABLE_WRITE) + /* * WT_WITH_LOCK_WAIT -- * Wait for a lock, perform an operation, drop the lock. @@ -85,7 +93,7 @@ struct __wt_table { #define WT_WITH_LOCK_WAIT(session, lock, flag, op) do { \ if (F_ISSET(session, (flag))) { \ op; \ - } else { \ + } else { \ __wt_spin_lock_track(session, lock); \ F_SET(session, (flag)); \ op; \ @@ -122,16 +130,46 @@ struct __wt_table { &S2C(session)->checkpoint_lock, WT_SESSION_LOCKED_CHECKPOINT, op) /* - * WT_WITH_HANDLE_LIST_LOCK -- - * Acquire the data handle list lock, perform an operation, drop the lock. + * WT_WITH_HANDLE_LIST_READ_LOCK -- + * Acquire the data handle list lock in shared mode, perform an operation, + * drop the lock. The handle list lock is a read-write lock so the + * implementation is different to the other lock macros. * * Note: always waits because some operations need the handle list lock to * discard handles, and we only expect it to be held across short * operations. */ -#define WT_WITH_HANDLE_LIST_LOCK(session, op) \ - WT_WITH_LOCK_WAIT(session, \ - &S2C(session)->dhandle_lock, WT_SESSION_LOCKED_HANDLE_LIST, op) +#define WT_WITH_HANDLE_LIST_READ_LOCK(session, op) do { \ + if (F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) { \ + op; \ + } else { \ + __wt_readlock(session, &S2C(session)->dhandle_lock); \ + F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ + op; \ + F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ + __wt_readunlock(session, &S2C(session)->dhandle_lock); \ + } \ +} while (0) + +/* + * WT_WITH_HANDLE_LIST_WRITE_LOCK -- + * Acquire the data handle list lock in exclusive mode, perform an + * operation, drop the lock. The handle list lock is a read-write lock so + * the implementation is different to the other lock macros. + */ +#define WT_WITH_HANDLE_LIST_WRITE_LOCK(session, op) do { \ + if (F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)) { \ + op; \ + } else { \ + WT_ASSERT(session, \ + !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ));\ + __wt_writelock(session, &S2C(session)->dhandle_lock); \ + F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ + op; \ + F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ + __wt_writeunlock(session, &S2C(session)->dhandle_lock); \ + } \ +} while (0) /* * WT_WITH_METADATA_LOCK -- @@ -165,22 +203,58 @@ struct __wt_table { } while (0) /* - * WT_WITH_TABLE_LOCK, WT_WITH_TABLE_LOCK_NOWAIT -- + * WT_WITH_TABLE_READ_LOCK, WT_WITH_TABLE_WRITE_LOCK, + * WT_WITH_TABLE_WRITE_LOCK_NOWAIT -- * Acquire the table lock, perform an operation, drop the lock. + * The table lock is a read-write lock so the implementation is different + * to most other lock macros. + * + * Note: readlock always waits because some operations need the table lock + * to discard handles, and we only expect it to be held across short + * operations. */ -#define WT_WITH_TABLE_LOCK(session, op) do { \ - WT_ASSERT(session, \ - F_ISSET(session, WT_SESSION_LOCKED_TABLE) || \ - !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); \ - WT_WITH_LOCK_WAIT(session, \ - &S2C(session)->table_lock, WT_SESSION_LOCKED_TABLE, op); \ +#define WT_WITH_TABLE_READ_LOCK(session, op) do { \ + if (F_ISSET(session, WT_SESSION_LOCKED_TABLE)) { \ + op; \ + } else { \ + WT_ASSERT(session, \ + !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); \ + __wt_readlock(session, &S2C(session)->table_lock); \ + F_SET(session, WT_SESSION_LOCKED_TABLE_READ); \ + op; \ + F_CLR(session, WT_SESSION_LOCKED_TABLE_READ); \ + __wt_readunlock(session, &S2C(session)->table_lock); \ + } \ +} while (0) + +#define WT_WITH_TABLE_WRITE_LOCK(session, op) do { \ + if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)) { \ + op; \ + } else { \ + WT_ASSERT(session, \ + !F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ | \ + WT_SESSION_LOCKED_HANDLE_LIST)); \ + __wt_writelock(session, &S2C(session)->table_lock); \ + F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \ + op; \ + F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \ + __wt_writeunlock(session, &S2C(session)->table_lock); \ + } \ } while (0) -#define WT_WITH_TABLE_LOCK_NOWAIT(session, ret, op) do { \ +#define WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret, op) do { \ WT_ASSERT(session, \ - F_ISSET(session, WT_SESSION_LOCKED_TABLE) || \ - !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); \ - WT_WITH_LOCK_NOWAIT(session, ret, \ - &S2C(session)->table_lock, WT_SESSION_LOCKED_TABLE, op); \ + F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE) || \ + !F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ | \ + WT_SESSION_LOCKED_HANDLE_LIST)); \ + if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)) { \ + op; \ + } else if ((ret = __wt_try_writelock(session, \ + &S2C(session)->table_lock)) == 0) { \ + F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \ + op; \ + F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \ + __wt_writeunlock(session, &S2C(session)->table_lock); \ + } \ } while (0) /* @@ -192,19 +266,31 @@ struct __wt_table { WT_CONNECTION_IMPL *__conn = S2C(session); \ bool __checkpoint_locked = \ F_ISSET(session, WT_SESSION_LOCKED_CHECKPOINT); \ - bool __handle_locked = \ - F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST); \ - bool __table_locked = \ - F_ISSET(session, WT_SESSION_LOCKED_TABLE); \ + bool __handle_read_locked = \ + F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ + bool __handle_write_locked = \ + F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ + bool __table_read_locked = \ + F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ); \ + bool __table_write_locked = \ + F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE); \ bool __schema_locked = \ F_ISSET(session, WT_SESSION_LOCKED_SCHEMA); \ - if (__handle_locked) { \ - F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST); \ - __wt_spin_unlock(session, &__conn->dhandle_lock); \ + if (__handle_read_locked) { \ + F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ + __wt_readunlock(session, &__conn->dhandle_lock); \ } \ - if (__table_locked) { \ - F_CLR(session, WT_SESSION_LOCKED_TABLE); \ - __wt_spin_unlock(session, &__conn->table_lock); \ + if (__handle_write_locked) { \ + F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ + __wt_writeunlock(session, &__conn->dhandle_lock); \ + } \ + if (__table_read_locked) { \ + F_CLR(session, WT_SESSION_LOCKED_TABLE_READ); \ + __wt_readunlock(session, &__conn->table_lock); \ + } \ + if (__table_write_locked) { \ + F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \ + __wt_writeunlock(session, &__conn->table_lock); \ } \ if (__schema_locked) { \ F_CLR(session, WT_SESSION_LOCKED_SCHEMA); \ @@ -223,12 +309,20 @@ struct __wt_table { __wt_spin_lock(session, &__conn->schema_lock); \ F_SET(session, WT_SESSION_LOCKED_SCHEMA); \ } \ - if (__table_locked) { \ - __wt_spin_lock(session, &__conn->table_lock); \ - F_SET(session, WT_SESSION_LOCKED_TABLE); \ + if (__table_read_locked) { \ + __wt_readlock(session, &__conn->table_lock); \ + F_SET(session, WT_SESSION_LOCKED_TABLE_READ); \ + } \ + if (__table_write_locked) { \ + __wt_writelock(session, &__conn->table_lock); \ + F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \ + } \ + if (__handle_read_locked) { \ + __wt_readlock(session, &__conn->dhandle_lock); \ + F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ } \ - if (__handle_locked) { \ - __wt_spin_lock(session, &__conn->dhandle_lock); \ - F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST); \ + if (__handle_write_locked) { \ + __wt_writelock(session, &__conn->dhandle_lock); \ + F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ } \ } while (0) diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h index 7dd523aea26..085f871a34f 100644 --- a/src/third_party/wiredtiger/src/include/session.h +++ b/src/third_party/wiredtiger/src/include/session.h @@ -52,8 +52,6 @@ struct __wt_session_impl { const char *lastop; /* Last operation */ uint32_t id; /* UID, offset in session array */ - WT_CONDVAR *cond; /* Condition variable */ - WT_EVENT_HANDLER *event_handler;/* Application's event handlers */ WT_DATA_HANDLE *dhandle; /* Current data handle */ diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index fd3e3290d95..8b2e78a4ed5 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -392,9 +392,7 @@ struct __wt_connection_stats { int64_t lock_checkpoint_count; int64_t lock_checkpoint_wait_application; int64_t lock_checkpoint_wait_internal; - int64_t lock_handle_list_count; - int64_t lock_handle_list_wait_application; - int64_t lock_handle_list_wait_internal; + int64_t lock_handle_list_wait_eviction; int64_t lock_metadata_count; int64_t lock_metadata_wait_application; int64_t lock_metadata_wait_internal; diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index 90989cc679d..c148e759299 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -576,8 +576,9 @@ struct __wt_cursor { #define WT_CURSTD_OPEN 0x00200 #define WT_CURSTD_OVERWRITE 0x00400 #define WT_CURSTD_RAW 0x00800 -#define WT_CURSTD_VALUE_EXT 0x01000 /* Value points out of the tree. */ -#define WT_CURSTD_VALUE_INT 0x02000 /* Value points into the tree. */ +#define WT_CURSTD_RAW_SEARCH 0x01000 +#define WT_CURSTD_VALUE_EXT 0x02000 /* Value points out of the tree. */ +#define WT_CURSTD_VALUE_INT 0x04000 /* Value points into the tree. */ #define WT_CURSTD_VALUE_SET (WT_CURSTD_VALUE_EXT | WT_CURSTD_VALUE_INT) uint32_t flags; #endif @@ -1982,12 +1983,13 @@ struct __wt_connection { * as a list\, such as "verbose=[evictserver\,read]"., a * list\, with values chosen from the following options: \c "api"\, \c * "block"\, \c "checkpoint"\, \c "compact"\, \c "evict"\, \c - * "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\, \c "lsm"\, - * \c "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c - * "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c - * "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c "split"\, - * \c "temporary"\, \c "thread_group"\, \c "transaction"\, \c "verify"\, - * \c "version"\, \c "write"; default empty.} + * "evict_stuck"\, \c "evictserver"\, \c "fileops"\, \c "handleops"\, \c + * "log"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c + * "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c + * "recovery"\, \c "recovery_progress"\, \c "salvage"\, \c + * "shared_cache"\, \c "split"\, \c "temporary"\, \c "thread_group"\, \c + * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default + * empty.} * @configend * @errors */ @@ -2361,7 +2363,7 @@ struct __wt_connection { * @config{exclusive, fail if the database already exists\, generally used with * the \c create option., a boolean flag; default \c false.} * @config{extensions, list of shared library extensions to load (using dlopen). - * Any values specified to an library extension are passed to + * Any values specified to a library extension are passed to * WT_CONNECTION::load_extension as the \c config parameter (for example\, * extensions=(/path/ext.so={entry=my_entry}))., a list of strings; * default empty.} @@ -2513,12 +2515,13 @@ struct __wt_connection { * WiredTiger is configured with --enable-verbose. Options are given as a * list\, such as "verbose=[evictserver\,read]"., a list\, with * values chosen from the following options: \c "api"\, \c "block"\, \c - * "checkpoint"\, \c "compact"\, \c "evict"\, \c "evictserver"\, \c "fileops"\, - * \c "handleops"\, \c "log"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c - * "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c - * "recovery"\, \c "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c - * "split"\, \c "temporary"\, \c "thread_group"\, \c "transaction"\, \c - * "verify"\, \c "version"\, \c "write"; default empty.} + * "checkpoint"\, \c "compact"\, \c "evict"\, \c "evict_stuck"\, \c + * "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\, \c "lsm"\, \c + * "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c "read"\, \c + * "rebalance"\, \c "reconcile"\, \c "recovery"\, \c "recovery_progress"\, \c + * "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, \c + * "thread_group"\, \c "transaction"\, \c "verify"\, \c "version"\, \c "write"; + * default empty.} * @config{write_through, Use \c FILE_FLAG_WRITE_THROUGH on Windows to write to * files. Ignored on non-Windows systems. Options are given as a list\, such * as "write_through=[data]". Configuring \c write_through requires @@ -4593,240 +4596,236 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1133 /*! lock: checkpoint lock internal thread wait time (usecs) */ #define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1134 -/*! lock: handle-list lock acquisitions */ -#define WT_STAT_CONN_LOCK_HANDLE_LIST_COUNT 1135 -/*! lock: handle-list lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_APPLICATION 1136 -/*! lock: handle-list lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_INTERNAL 1137 +/*! lock: handle-list lock eviction thread wait time (usecs) */ +#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_EVICTION 1135 /*! lock: metadata lock acquisitions */ -#define WT_STAT_CONN_LOCK_METADATA_COUNT 1138 +#define WT_STAT_CONN_LOCK_METADATA_COUNT 1136 /*! lock: metadata lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1139 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1137 /*! lock: metadata lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1140 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1138 /*! lock: schema lock acquisitions */ -#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1141 +#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1139 /*! lock: schema lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1142 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1140 /*! lock: schema lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1143 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1141 /*! lock: table lock acquisitions */ -#define WT_STAT_CONN_LOCK_TABLE_COUNT 1144 +#define WT_STAT_CONN_LOCK_TABLE_COUNT 1142 /*! * lock: table lock application thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1145 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1143 /*! * lock: table lock internal thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1146 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1144 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1147 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1145 /*! log: consolidated slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1148 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1146 /*! log: consolidated slot join races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1149 +#define WT_STAT_CONN_LOG_SLOT_RACES 1147 /*! log: consolidated slot join transitions */ -#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1150 +#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1148 /*! log: consolidated slot joins */ -#define WT_STAT_CONN_LOG_SLOT_JOINS 1151 +#define WT_STAT_CONN_LOG_SLOT_JOINS 1149 /*! log: consolidated slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1152 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1150 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1153 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1151 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1154 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1152 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1155 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1153 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1156 +#define WT_STAT_CONN_LOG_FLUSH 1154 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1157 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1155 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1158 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1156 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1159 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1157 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1160 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1158 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1161 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1159 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1162 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1160 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1163 +#define WT_STAT_CONN_LOG_SCANS 1161 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1164 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1162 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1165 +#define WT_STAT_CONN_LOG_WRITE_LSN 1163 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1166 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1164 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1167 +#define WT_STAT_CONN_LOG_SYNC 1165 /*! log: log sync time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DURATION 1168 +#define WT_STAT_CONN_LOG_SYNC_DURATION 1166 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1169 +#define WT_STAT_CONN_LOG_SYNC_DIR 1167 /*! log: log sync_dir time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1170 +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1168 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1171 +#define WT_STAT_CONN_LOG_WRITES 1169 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1172 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1170 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1173 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1171 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1174 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1172 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1175 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1173 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1176 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1174 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1177 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1175 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1178 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1176 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1179 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1177 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1180 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1178 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1181 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1179 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1182 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1180 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1183 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1181 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1184 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1182 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1185 +#define WT_STAT_CONN_REC_PAGES 1183 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1186 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1184 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1187 +#define WT_STAT_CONN_REC_PAGE_DELETE 1185 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1188 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1186 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1189 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1187 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1190 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1188 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1191 +#define WT_STAT_CONN_SESSION_OPEN 1189 /*! session: table alter failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1192 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1190 /*! session: table alter successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1193 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1191 /*! session: table alter unchanged and skipped */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1194 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1192 /*! session: table compact failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1195 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1193 /*! session: table compact successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1196 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1194 /*! session: table create failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1197 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1195 /*! session: table create successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1198 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1196 /*! session: table drop failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1199 +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1197 /*! session: table drop successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1200 +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1198 /*! session: table rebalance failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1201 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1199 /*! session: table rebalance successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1202 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1200 /*! session: table rename failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1203 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1201 /*! session: table rename successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1204 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1202 /*! session: table salvage failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1205 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1203 /*! session: table salvage successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1206 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1204 /*! session: table truncate failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1207 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1205 /*! session: table truncate successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1208 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1206 /*! session: table verify failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1209 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1207 /*! session: table verify successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1210 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1208 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1211 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1209 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_THREAD_READ_ACTIVE 1212 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1210 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1213 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1211 /*! thread-yield: application thread time evicting (usecs) */ -#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1214 +#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1212 /*! thread-yield: application thread time waiting for cache (usecs) */ -#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1215 +#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1213 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1216 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1214 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1217 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1215 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1218 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1216 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1219 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1217 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1220 +#define WT_STAT_CONN_PAGE_SLEEP 1218 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1221 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1219 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1222 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1220 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1223 +#define WT_STAT_CONN_TXN_BEGIN 1221 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1224 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1222 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1225 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1223 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1226 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1224 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1227 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1225 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1228 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1226 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1229 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1227 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1230 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1228 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1231 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1229 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1232 +#define WT_STAT_CONN_TXN_CHECKPOINT 1230 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1233 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1231 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1234 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1232 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1235 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1233 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1236 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1234 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1237 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1235 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1238 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1236 /*! * transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1239 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1237 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1240 +#define WT_STAT_CONN_TXN_SYNC 1238 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1241 +#define WT_STAT_CONN_TXN_COMMIT 1239 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1242 +#define WT_STAT_CONN_TXN_ROLLBACK 1240 /*! * @} diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c index da500a74e87..d6caa55f8c7 100644 --- a/src/third_party/wiredtiger/src/log/log.c +++ b/src/third_party/wiredtiger/src/log/log.c @@ -43,11 +43,11 @@ __log_wait_for_earlier_slot(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) */ if (F_ISSET(session, WT_SESSION_LOCKED_SLOT)) __wt_spin_unlock(session, &log->log_slot_lock); - __wt_cond_auto_signal(session, conn->log_wrlsn_cond); + __wt_cond_signal(session, conn->log_wrlsn_cond); if (++yield_count < WT_THOUSAND) __wt_yield(); else - __wt_cond_wait(session, log->log_write_cond, 200); + __wt_cond_wait(session, log->log_write_cond, 200, NULL); if (F_ISSET(session, WT_SESSION_LOCKED_SLOT)) __wt_spin_lock(session, &log->log_slot_lock); } @@ -62,6 +62,8 @@ static int __log_fs_write(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, wt_off_t offset, size_t len, const void *buf) { + WT_DECL_RET; + /* * If we're writing into a new log file, we have to wait for all * writes to the previous log file to complete otherwise there could @@ -71,7 +73,10 @@ __log_fs_write(WT_SESSION_IMPL *session, __log_wait_for_earlier_slot(session, slot); WT_RET(__wt_log_force_sync(session, &slot->slot_release_lsn)); } - return (__wt_write(session, slot->slot_fh, offset, len, buf)); + if ((ret = __wt_write(session, slot->slot_fh, offset, len, buf)) != 0) + WT_PANIC_MSG(session, ret, + "%s: fatal log failure", slot->slot_fh->name); + return (ret); } /* @@ -89,7 +94,7 @@ __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn) log = conn->log; log->ckpt_lsn = *ckp_lsn; if (conn->log_cond != NULL) - __wt_cond_auto_signal(session, conn->log_cond); + __wt_cond_signal(session, conn->log_cond); } /* @@ -170,7 +175,7 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) */ while (log->sync_lsn.l.file < min_lsn->l.file) { __wt_cond_signal(session, S2C(session)->log_file_cond); - __wt_cond_wait(session, log->log_sync_cond, 10000); + __wt_cond_wait(session, log->log_sync_cond, 10000, NULL); } __wt_spin_lock(session, &log->log_sync_lock); WT_ASSERT(session, log->log_dir_fh != NULL); @@ -915,7 +920,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) else { WT_STAT_CONN_INCR(session, log_prealloc_missed); if (conn->log_cond != NULL) - __wt_cond_auto_signal( + __wt_cond_signal( session, conn->log_cond); } } @@ -1490,7 +1495,8 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) */ if (log->sync_lsn.l.file < slot->slot_end_lsn.l.file || __wt_spin_trylock(session, &log->log_sync_lock) != 0) { - __wt_cond_wait(session, log->log_sync_cond, 10000); + __wt_cond_wait( + session, log->log_sync_cond, 10000, NULL); continue; } locked = true; @@ -2126,7 +2132,11 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, WT_STAT_CONN_INCR(session, log_writes); - __wt_log_slot_join(session, rdup_len, flags, &myslot); + /* + * The only time joining a slot should ever return an error is if it + * detects a panic. + */ + WT_ERR(__wt_log_slot_join(session, rdup_len, flags, &myslot)); /* * If the addition of this record crosses the buffer boundary, * switch in a new slot. @@ -2160,7 +2170,7 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, * XXX I've seen times when conditions are NULL. */ if (conn->log_cond != NULL) { - __wt_cond_auto_signal(session, conn->log_cond); + __wt_cond_signal(session, conn->log_cond); __wt_yield(); } else WT_ERR(__wt_log_force_write(session, 1, NULL)); @@ -2169,12 +2179,14 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, /* Wait for our writes to reach the OS */ while (__wt_log_cmp(&log->write_lsn, &lsn) <= 0 && myslot.slot->slot_error == 0) - __wt_cond_wait(session, log->log_write_cond, 10000); + __wt_cond_wait( + session, log->log_write_cond, 10000, NULL); } else if (LF_ISSET(WT_LOG_FSYNC)) { /* Wait for our writes to reach disk */ while (__wt_log_cmp(&log->sync_lsn, &lsn) <= 0 && myslot.slot->slot_error == 0) - __wt_cond_wait(session, log->log_sync_cond, 10000); + __wt_cond_wait( + session, log->log_sync_cond, 10000, NULL); } /* @@ -2199,12 +2211,12 @@ err: /* * If one of the sync flags is set, assert the proper LSN has moved to - * match. + * match on success. */ - WT_ASSERT(session, !LF_ISSET(WT_LOG_FLUSH) || + WT_ASSERT(session, ret != 0 || !LF_ISSET(WT_LOG_FLUSH) || __wt_log_cmp(&log->write_lsn, &lsn) >= 0); - WT_ASSERT(session, - !LF_ISSET(WT_LOG_FSYNC) || __wt_log_cmp(&log->sync_lsn, &lsn) >= 0); + WT_ASSERT(session, ret != 0 || !LF_ISSET(WT_LOG_FSYNC) || + __wt_log_cmp(&log->sync_lsn, &lsn) >= 0); return (ret); } diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c index a29a34e5652..542f010ea53 100644 --- a/src/third_party/wiredtiger/src/log/log_slot.c +++ b/src/third_party/wiredtiger/src/log/log_slot.c @@ -8,6 +8,49 @@ #include "wt_internal.h" +#ifdef HAVE_DIAGNOSTIC +/* + * __log_slot_dump -- + * Dump the entire slot state. + */ +static void +__log_slot_dump(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + WT_LOG *log; + WT_LOGSLOT *slot; + int earliest, i; + + conn = S2C(session); + log = conn->log; + earliest = 0; + for (i = 0; i < WT_SLOT_POOL; i++) { + slot = &log->slot_pool[i]; + if (__wt_log_cmp(&slot->slot_release_lsn, + &log->slot_pool[earliest].slot_release_lsn) < 0) + earliest = i; + __wt_errx(session, "Slot %d:", i); + __wt_errx(session, " State: %" PRIx64 " Flags: %" PRIx32, + slot->slot_state, slot->flags); + __wt_errx(session, " Start LSN: %" PRIu32 "/%" PRIu32, + slot->slot_start_lsn.l.file, slot->slot_start_lsn.l.offset); + __wt_errx(session, " End LSN: %" PRIu32 "/%" PRIu32, + slot->slot_end_lsn.l.file, slot->slot_end_lsn.l.offset); + __wt_errx(session, " Release LSN: %" PRIu32 "/%" PRIu32, + slot->slot_release_lsn.l.file, + slot->slot_release_lsn.l.offset); + __wt_errx(session, " Offset: start: %" PRIuMAX + " last:%" PRIuMAX, (uintmax_t)slot->slot_start_offset, + (uintmax_t)slot->slot_last_offset); + __wt_errx(session, " Unbuffered: %" PRId64 + " error: %" PRId32, slot->slot_unbuffered, + slot->slot_error); + } + __wt_errx(session, "Earliest slot: %d", earliest); + +} +#endif + /* * __wt_log_slot_activate -- * Initialize a slot to become active. @@ -21,7 +64,6 @@ __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) conn = S2C(session); log = conn->log; - slot->slot_state = 0; /* * !!! slot_release_lsn must be set outside this function because * this function may be called after a log file switch and the @@ -30,12 +72,19 @@ __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) * set for closing the file handle on a log file switch. The flags * are reset when the slot is freed. See log_slot_free. */ + slot->slot_unbuffered = 0; slot->slot_start_lsn = slot->slot_end_lsn = log->alloc_lsn; slot->slot_start_offset = log->alloc_lsn.l.offset; slot->slot_last_offset = log->alloc_lsn.l.offset; slot->slot_fh = log->log_fh; slot->slot_error = 0; - slot->slot_unbuffered = 0; + WT_DIAGNOSTIC_YIELD; + /* + * Set the slot state last. Other threads may have a stale pointer + * to this slot and could try to alter the state and other fields once + * they see the state cleared. + */ + WT_PUBLISH(slot->slot_state, 0); } /* @@ -50,6 +99,10 @@ __log_slot_close( WT_CONNECTION_IMPL *conn; WT_LOG *log; int64_t end_offset, new_state, old_state; +#ifdef HAVE_DIAGNOSTIC + struct timespec begin, now; + int count; +#endif WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT)); WT_ASSERT(session, releasep != NULL); @@ -101,9 +154,33 @@ retry: * that value. If the state is unbuffered, wait for the unbuffered * size to be set. */ - while (WT_LOG_SLOT_UNBUFFERED_ISSET(old_state) && - slot->slot_unbuffered == 0) - __wt_yield(); +#ifdef HAVE_DIAGNOSTIC + count = 0; + __wt_epoch(session, &begin); +#endif + if (WT_LOG_SLOT_UNBUFFERED_ISSET(old_state)) { + while (slot->slot_unbuffered == 0) { + WT_RET(WT_SESSION_CHECK_PANIC(session)); + __wt_yield(); +#ifdef HAVE_DIAGNOSTIC + ++count; + if (count > WT_MILLION) { + __wt_epoch(session, &now); + if (WT_TIMEDIFF_SEC(now, begin) > 10) { + __wt_errx(session, "SLOT_CLOSE: Slot %" + PRIu32 " Timeout unbuffered, state 0x%" + PRIx64 " unbuffered %" PRIu64, + (uint32_t)(slot - &log->slot_pool[0]), + slot->slot_state, + slot->slot_unbuffered); + __log_slot_dump(session); + __wt_abort(session); + } + count = 0; + } +#endif + } + } end_offset = WT_LOG_SLOT_JOINED_BUFFERED(old_state) + slot->slot_unbuffered; @@ -218,7 +295,11 @@ __wt_log_slot_new(WT_SESSION_IMPL *session) WT_CONNECTION_IMPL *conn; WT_LOG *log; WT_LOGSLOT *slot; - int32_t i; + int32_t i, pool_i; +#ifdef HAVE_DIAGNOSTIC + struct timespec begin, now; + int count; +#endif WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT)); conn = S2C(session); @@ -232,16 +313,22 @@ __wt_log_slot_new(WT_SESSION_IMPL *session) WT_LOG_SLOT_OPEN(slot->slot_state)) return (0); +#ifdef HAVE_DIAGNOSTIC + count = 0; + __wt_epoch(session, &begin); +#endif /* * Keep trying until we can find a free slot. */ for (;;) { /* - * For now just restart at 0. We could use log->pool_index - * if that is inefficient. + * Rotate among the slots to lessen collisions. */ - for (i = 0; i < WT_SLOT_POOL; i++) { - slot = &log->slot_pool[i]; + for (i = 0, pool_i = log->pool_index; i < WT_SLOT_POOL; + i++, pool_i++) { + if (pool_i >= WT_SLOT_POOL) + pool_i = 0; + slot = &log->slot_pool[pool_i]; if (slot->slot_state == WT_LOG_SLOT_FREE) { /* * Acquire our starting position in the @@ -256,14 +343,28 @@ __wt_log_slot_new(WT_SESSION_IMPL *session) WT_STAT_CONN_INCR(session, log_slot_transitions); log->active_slot = slot; + log->pool_index = pool_i; return (0); } } /* * If we didn't find any free slots signal the worker thread. */ - __wt_cond_auto_signal(session, conn->log_wrlsn_cond); + __wt_cond_signal(session, conn->log_wrlsn_cond); __wt_yield(); +#ifdef HAVE_DIAGNOSTIC + ++count; + if (count > WT_MILLION) { + __wt_epoch(session, &now); + if (WT_TIMEDIFF_SEC(now, begin) > 10) { + __wt_errx(session, + "SLOT_NEW: Timeout free slot"); + __log_slot_dump(session); + __wt_abort(session); + } + count = 0; + } +#endif } /* NOTREACHED */ } @@ -311,10 +412,13 @@ __wt_log_slot_init(WT_SESSION_IMPL *session) /* * We cannot initialize the release LSN in the activate function * because that function can be called after a log file switch. + * The release LSN is usually the same as the slot_start_lsn except + * around a log file switch. */ slot->slot_release_lsn = log->alloc_lsn; __wt_log_slot_activate(session, slot); log->active_slot = slot; + log->pool_index = 0; if (0) { err: while (--i >= 0) @@ -361,7 +465,7 @@ __wt_log_slot_destroy(WT_SESSION_IMPL *session) * __wt_log_slot_join -- * Join a consolidated logging slot. */ -void +int __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot) { @@ -370,53 +474,63 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, WT_LOGSLOT *slot; int64_t flag_state, new_state, old_state, released; int32_t join_offset, new_join; -#ifdef HAVE_DIAGNOSTIC - bool unbuf_force; -#endif + bool unbuffered, yld; conn = S2C(session); log = conn->log; WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SLOT)); + WT_ASSERT(session, mysize != 0); /* * There should almost always be a slot open. */ + unbuffered = false; #ifdef HAVE_DIAGNOSTIC - unbuf_force = (++log->write_calls % WT_THOUSAND) == 0; + yld = (++log->write_calls % 7) == 0; + if ((log->write_calls % WT_THOUSAND) == 0 || + mysize > WT_LOG_SLOT_BUF_MAX) { +#else + yld = false; + if (mysize > WT_LOG_SLOT_BUF_MAX) { #endif + unbuffered = true; + F_SET(myslot, WT_MYSLOT_UNBUFFERED); + } for (;;) { WT_BARRIER(); + WT_RET(WT_SESSION_CHECK_PANIC(session)); slot = log->active_slot; old_state = slot->slot_state; - /* - * Try to join our size into the existing size and - * atomically write it back into the state. - */ - flag_state = WT_LOG_SLOT_FLAGS(old_state); - released = WT_LOG_SLOT_RELEASED(old_state); - join_offset = WT_LOG_SLOT_JOINED(old_state); -#ifdef HAVE_DIAGNOSTIC - if (unbuf_force || mysize > WT_LOG_SLOT_BUF_MAX) { -#else - if (mysize > WT_LOG_SLOT_BUF_MAX) { -#endif - new_join = join_offset + WT_LOG_SLOT_UNBUFFERED; - F_SET(myslot, WT_MYSLOT_UNBUFFERED); - myslot->slot = slot; - } else - new_join = join_offset + (int32_t)mysize; - new_state = (int64_t)WT_LOG_SLOT_JOIN_REL( - (int64_t)new_join, (int64_t)released, (int64_t)flag_state); - - /* - * Check if the slot is open for joining and we are able to - * swap in our size into the state. - */ - if (WT_LOG_SLOT_OPEN(old_state) && - __wt_atomic_casiv64( - &slot->slot_state, old_state, new_state)) - break; + if (WT_LOG_SLOT_OPEN(old_state)) { + /* + * Try to join our size into the existing size and + * atomically write it back into the state. + */ + flag_state = WT_LOG_SLOT_FLAGS(old_state); + released = WT_LOG_SLOT_RELEASED(old_state); + join_offset = WT_LOG_SLOT_JOINED(old_state); + if (unbuffered) + new_join = join_offset + WT_LOG_SLOT_UNBUFFERED; + else + new_join = join_offset + (int32_t)mysize; + new_state = (int64_t)WT_LOG_SLOT_JOIN_REL( + (int64_t)new_join, (int64_t)released, + (int64_t)flag_state); + + /* + * Braces used due to potential empty body warning. + */ + if (yld) { + WT_DIAGNOSTIC_YIELD; + } + /* + * Attempt to swap our size into the state. + */ + if (__wt_atomic_casiv64( + &slot->slot_state, old_state, new_state)) + break; + } /* * The slot is no longer open or we lost the race to * update it. Yield and try again. @@ -428,8 +542,7 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, * We joined this slot. Fill in our information to return to * the caller. */ - if (mysize != 0) - WT_STAT_CONN_INCR(session, log_slot_joins); + WT_STAT_CONN_INCR(session, log_slot_joins); if (LF_ISSET(WT_LOG_DSYNC | WT_LOG_FSYNC)) F_SET(slot, WT_SLOT_SYNC_DIR); if (LF_ISSET(WT_LOG_FLUSH)) @@ -444,6 +557,7 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, myslot->slot = slot; myslot->offset = join_offset; myslot->end_offset = (wt_off_t)((uint64_t)join_offset + mysize); + return (0); } /* diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c index a2511f48e2b..60afbc99ade 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c @@ -1692,8 +1692,8 @@ __wt_clsm_open(WT_SESSION_IMPL *session, bulk = cval.val != 0; /* Get the LSM tree. */ - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_lsm_tree_get(session, uri, bulk, &lsm_tree)); + ret = __wt_lsm_tree_get(session, uri, bulk, &lsm_tree); + /* * Check whether the exclusive open for a bulk load succeeded, and * if it did ensure that it's safe to bulk load into the tree. diff --git a/src/third_party/wiredtiger/src/lsm/lsm_manager.c b/src/third_party/wiredtiger/src/lsm/lsm_manager.c index cbd83a5cd30..6dc06146179 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_manager.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_manager.c @@ -387,8 +387,8 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) __wt_sleep(0, 10000); if (TAILQ_EMPTY(&conn->lsmqh)) continue; - __wt_spin_lock(session, &conn->dhandle_lock); - F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST); + __wt_readlock(session, &conn->dhandle_lock); + F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); dhandle_locked = true; TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q) { if (!lsm_tree->active) @@ -448,14 +448,14 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) session, WT_LSM_WORK_MERGE, 0, lsm_tree)); } } - __wt_spin_unlock(session, &conn->dhandle_lock); - F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST); + __wt_readunlock(session, &conn->dhandle_lock); + F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); dhandle_locked = false; } err: if (dhandle_locked) { - __wt_spin_unlock(session, &conn->dhandle_lock); - F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST); + __wt_readunlock(session, &conn->dhandle_lock); + F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); } return (ret); } diff --git a/src/third_party/wiredtiger/src/lsm/lsm_stat.c b/src/third_party/wiredtiger/src/lsm/lsm_stat.c index 150de968722..21e8991be94 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_stat.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_stat.c @@ -33,9 +33,7 @@ __curstat_lsm_init( "checkpoint=" WT_CHECKPOINT, NULL, NULL }; locked = false; - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_lsm_tree_get(session, uri, false, &lsm_tree)); - WT_RET(ret); + WT_RET(__wt_lsm_tree_get(session, uri, false, &lsm_tree)); WT_ERR(__wt_scr_alloc(session, 0, &uribuf)); /* Propagate all, fast and/or clear to the cursors we open. */ diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c index 71a981a6284..a9275976023 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c @@ -38,7 +38,7 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final) /* We may be destroying an lsm_tree before it was added. */ if (F_ISSET(lsm_tree, WT_LSM_TREE_OPEN)) { WT_ASSERT(session, final || - F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); + F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); TAILQ_REMOVE(&S2C(session)->lsmqh, lsm_tree, q); } @@ -321,9 +321,7 @@ __wt_lsm_tree_create(WT_SESSION_IMPL *session, metadata = NULL; /* If the tree can be opened, it already exists. */ - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_lsm_tree_get(session, uri, false, &lsm_tree)); - if (ret == 0) { + if ((ret = __wt_lsm_tree_get(session, uri, false, &lsm_tree)) == 0) { __wt_lsm_tree_release(session, lsm_tree); return (exclusive ? EEXIST : 0); } @@ -339,7 +337,7 @@ __wt_lsm_tree_create(WT_SESSION_IMPL *session, * error: the returned handle is NULL on error, and the metadata * tracking macros handle cleaning up on failure. */ - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, ret = __lsm_tree_open(session, uri, true, &lsm_tree)); if (ret == 0) __wt_lsm_tree_release(session, lsm_tree); @@ -404,6 +402,9 @@ __lsm_tree_find(WT_SESSION_IMPL *session, } *treep = lsm_tree; + + WT_ASSERT(session, lsm_tree->excl_session == + (exclusive ? session : NULL)); return (0); } @@ -456,7 +457,8 @@ __lsm_tree_open(WT_SESSION_IMPL *session, conn = S2C(session); lsm_tree = NULL; - WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); + WT_ASSERT(session, + F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); /* Start the LSM manager thread if it isn't running. */ if (__wt_atomic_cas32(&conn->lsm_manager.lsm_workers, 0, 1)) @@ -520,14 +522,21 @@ __wt_lsm_tree_get(WT_SESSION_IMPL *session, { WT_DECL_RET; - WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); - - ret = __lsm_tree_find(session, uri, exclusive, treep); + /* + * Dropping and re-acquiring the lock is safe here, since the tree open + * call checks to see if another thread beat it to opening the tree + * before proceeding. + */ + if (exclusive) + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, + ret = __lsm_tree_find(session, uri, exclusive, treep)); + else + WT_WITH_HANDLE_LIST_READ_LOCK(session, + ret = __lsm_tree_find(session, uri, exclusive, treep)); if (ret == WT_NOTFOUND) - ret = __lsm_tree_open(session, uri, exclusive, treep); + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, + ret = __lsm_tree_open(session, uri, exclusive, treep)); - WT_ASSERT(session, ret != 0 || - (*treep)->excl_session == (exclusive ? session : NULL)); return (ret); } @@ -857,9 +866,7 @@ __wt_lsm_tree_alter( locked = false; /* Get the LSM tree. */ - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_lsm_tree_get(session, uri, false, &lsm_tree)); - WT_RET(ret); + WT_RET(__wt_lsm_tree_get(session, uri, false, &lsm_tree)); /* Prevent any new opens. */ __wt_lsm_tree_writelock(session, lsm_tree); @@ -899,9 +906,7 @@ __wt_lsm_tree_drop( locked = false; /* Get the LSM tree. */ - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_lsm_tree_get(session, name, true, &lsm_tree)); - WT_RET(ret); + WT_RET(__wt_lsm_tree_get(session, name, true, &lsm_tree)); WT_ASSERT(session, !lsm_tree->active); /* Prevent any new opens. */ @@ -934,7 +939,7 @@ __wt_lsm_tree_drop( WT_ASSERT(session, !lsm_tree->active); err: if (locked) __wt_lsm_tree_writeunlock(session, lsm_tree); - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, tret = __lsm_tree_discard(session, lsm_tree, false)); WT_TRET(tret); return (ret); @@ -960,9 +965,7 @@ __wt_lsm_tree_rename(WT_SESSION_IMPL *session, locked = false; /* Get the LSM tree. */ - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_lsm_tree_get(session, olduri, true, &lsm_tree)); - WT_RET(ret); + WT_RET(__wt_lsm_tree_get(session, olduri, true, &lsm_tree)); /* Prevent any new opens. */ __wt_lsm_tree_writelock(session, lsm_tree); @@ -1007,7 +1010,7 @@ err: if (locked) * Discard this LSM tree structure. The first operation on the renamed * tree will create a new one. */ - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, tret = __lsm_tree_discard(session, lsm_tree, false)); WT_TRET(tret); return (ret); @@ -1032,9 +1035,7 @@ __wt_lsm_tree_truncate( locked = false; /* Get the LSM tree. */ - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_lsm_tree_get(session, name, true, &lsm_tree)); - WT_RET(ret); + WT_RET(__wt_lsm_tree_get(session, name, true, &lsm_tree)); /* Prevent any new opens. */ __wt_lsm_tree_writelock(session, lsm_tree); @@ -1068,7 +1069,7 @@ err: if (locked) * the last good version of the metadata will be used, resulting * in a valid (not truncated) tree. */ - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, tret = __lsm_tree_discard(session, lsm_tree, false)); WT_TRET(tret); } @@ -1157,9 +1158,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) /* Tell __wt_schema_worker not to look inside the LSM tree. */ *skipp = true; - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_lsm_tree_get(session, name, false, &lsm_tree)); - WT_RET(ret); + WT_RET(__wt_lsm_tree_get(session, name, false, &lsm_tree)); if (!F_ISSET(S2C(session), WT_CONN_LSM_MERGE)) WT_ERR_MSG(session, EINVAL, @@ -1356,9 +1355,7 @@ __wt_lsm_tree_worker(WT_SESSION_IMPL *session, locked = false; exclusive = FLD_ISSET(open_flags, WT_DHANDLE_EXCLUSIVE); - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_lsm_tree_get(session, uri, exclusive, &lsm_tree)); - WT_RET(ret); + WT_RET(__wt_lsm_tree_get(session, uri, exclusive, &lsm_tree)); /* * We mark that we're busy using the tree to coordinate diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c index d9c185a3f58..4349acf7b55 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c @@ -276,7 +276,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && !F_ISSET(chunk, WT_LSM_CHUNK_STABLE) && !chunk->evicted) { - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, ret = __lsm_discard_handle(session, chunk->uri, NULL)); if (ret == 0) chunk->evicted = 1; @@ -517,7 +517,7 @@ __lsm_drop_file(WT_SESSION_IMPL *session, const char *uri) * * This will fail with EBUSY if the file is still in use. */ - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, ret = __lsm_discard_handle(session, uri, WT_CHECKPOINT)); WT_RET(ret); diff --git a/src/third_party/wiredtiger/src/lsm/lsm_worker.c b/src/third_party/wiredtiger/src/lsm/lsm_worker.c index b0d0758775d..ffa00c0a5e7 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_worker.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_worker.c @@ -154,7 +154,7 @@ __lsm_worker(void *arg) /* Don't busy wait if there was any work to do. */ if (!progress) { - __wt_cond_wait(session, cookie->work_cond, 10000); + __wt_cond_wait(session, cookie->work_cond, 10000, NULL); continue; } } diff --git a/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c b/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c index be8b1abda31..a5ee78f9e3e 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c +++ b/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c @@ -13,8 +13,7 @@ * Allocate and initialize a condition variable. */ int -__wt_cond_alloc(WT_SESSION_IMPL *session, - const char *name, bool is_signalled, WT_CONDVAR **condp) +__wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) { WT_CONDVAR *cond; WT_DECL_RET; @@ -27,7 +26,7 @@ __wt_cond_alloc(WT_SESSION_IMPL *session, WT_ERR(pthread_cond_init(&cond->cond, NULL)); cond->name = name; - cond->waiters = is_signalled ? -1 : 0; + cond->waiters = 0; *condp = cond; return (0); @@ -42,8 +41,8 @@ err: __wt_free(session, cond); * out period expires, let the caller know. */ void -__wt_cond_wait_signal( - WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled) +__wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, + uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled) { struct timespec ts; WT_DECL_RET; @@ -62,6 +61,23 @@ __wt_cond_wait_signal( WT_ERR(pthread_mutex_lock(&cond->mtx)); locked = true; + /* + * It's possible to race with threads waking us up. That's not a problem + * if there are multiple wakeups because the next wakeup will get us, or + * if we're only pausing for a short period. It's a problem if there's + * only a single wakeup, our waker is likely waiting for us to exit. + * After acquiring the mutex (so we're guaranteed to be awakened by any + * future wakeup call), optionally check if we're OK to keep running. + * This won't ensure our caller won't just loop and call us again, but + * at least it's not our fault. + * + * Assert we're not waiting longer than a second if not checking the + * run status. + */ + WT_ASSERT(session, run_func != NULL || usecs <= WT_MILLION); + if (run_func != NULL && !run_func(session)) + goto skipping; + if (usecs > 0) { __wt_epoch(session, &ts); ts.tv_sec += (time_t) @@ -81,7 +97,7 @@ __wt_cond_wait_signal( ret == ETIME || #endif ret == ETIMEDOUT) { - *signalled = false; +skipping: *signalled = false; ret = 0; } diff --git a/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c b/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c index 79c62ccd7f2..0001c6c2322 100644 --- a/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c +++ b/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c @@ -13,8 +13,7 @@ * Allocate and initialize a condition variable. */ int -__wt_cond_alloc(WT_SESSION_IMPL *session, - const char *name, bool is_signalled, WT_CONDVAR **condp) +__wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) { WT_CONDVAR *cond; @@ -26,7 +25,7 @@ __wt_cond_alloc(WT_SESSION_IMPL *session, InitializeConditionVariable(&cond->cond); cond->name = name; - cond->waiters = is_signalled ? -1 : 0; + cond->waiters = 0; *condp = cond; return (0); @@ -38,8 +37,8 @@ __wt_cond_alloc(WT_SESSION_IMPL *session, * out period expires, let the caller know. */ void -__wt_cond_wait_signal( - WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled) +__wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, + uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled) { BOOL sleepret; DWORD milliseconds, windows_error; @@ -59,8 +58,26 @@ __wt_cond_wait_signal( EnterCriticalSection(&cond->mtx); locked = true; + /* + * It's possible to race with threads waking us up. That's not a problem + * if there are multiple wakeups because the next wakeup will get us, or + * if we're only pausing for a short period. It's a problem if there's + * only a single wakeup, our waker is likely waiting for us to exit. + * After acquiring the mutex (so we're guaranteed to be awakened by any + * future wakeup call), optionally check if we're OK to keep running. + * This won't ensure our caller won't just loop and call us again, but + * at least it's not our fault. + * + * Assert we're not waiting longer than a second if not checking the + * run status. + */ + WT_ASSERT(session, run_func != NULL || usecs <= WT_MILLION); + + if (run_func != NULL && !run_func(session)) + goto skipping; + if (usecs > 0) { - milliseconds64 = usecs / 1000; + milliseconds64 = usecs / WT_THOUSAND; /* * Check for 32-bit unsigned integer overflow @@ -90,7 +107,7 @@ __wt_cond_wait_signal( if (sleepret == 0) { windows_error = __wt_getlasterror(); if (windows_error == ERROR_TIMEOUT) { - *signalled = false; +skipping: *signalled = false; sleepret = 1; } } @@ -117,17 +134,17 @@ void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond) { WT_DECL_RET; - bool locked; - - locked = false; __wt_verbose(session, WT_VERB_MUTEX, "signal %s", cond->name); /* - * Our callers are often setting flags to cause a thread to exit. Add - * a barrier to ensure the flags are seen by the threads. + * Our callers often set flags to cause a thread to exit. Add a barrier + * to ensure exit flags are seen by the sleeping threads, otherwise we + * can wake up a thread, it immediately goes back to sleep, and we'll + * hang. Use a full barrier (we may not write before waiting on thread + * join). */ - WT_WRITE_BARRIER(); + WT_FULL_BARRIER(); /* * Fast path if we are in (or can enter), a state where the next waiter diff --git a/src/third_party/wiredtiger/src/schema/schema_drop.c b/src/third_party/wiredtiger/src/schema/schema_drop.c index c1a4f257648..49801e4e5f9 100644 --- a/src/third_party/wiredtiger/src/schema/schema_drop.c +++ b/src/third_party/wiredtiger/src/schema/schema_drop.c @@ -30,7 +30,7 @@ __drop_file( WT_RET(__wt_schema_backup_check(session, filename)); /* Close all btree handles associated with this file. */ - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, ret = __wt_conn_dhandle_close_all(session, uri, force)); WT_RET(ret); diff --git a/src/third_party/wiredtiger/src/schema/schema_list.c b/src/third_party/wiredtiger/src/schema/schema_list.c index ea7374b7554..74ef5135a4a 100644 --- a/src/third_party/wiredtiger/src/schema/schema_list.c +++ b/src/third_party/wiredtiger/src/schema/schema_list.c @@ -25,7 +25,7 @@ __schema_add_table(WT_SESSION_IMPL *session, /* Make sure the metadata is open before getting other locks. */ WT_RET(__wt_metadata_cursor(session, NULL)); - WT_WITH_TABLE_LOCK(session, + WT_WITH_TABLE_READ_LOCK(session, ret = __wt_schema_open_table( session, name, namelen, ok_incomplete, &table)); WT_RET(ret); diff --git a/src/third_party/wiredtiger/src/schema/schema_rename.c b/src/third_party/wiredtiger/src/schema/schema_rename.c index f512482c162..a374f4c2831 100644 --- a/src/third_party/wiredtiger/src/schema/schema_rename.c +++ b/src/third_party/wiredtiger/src/schema/schema_rename.c @@ -33,7 +33,7 @@ __rename_file( WT_RET(__wt_schema_backup_check(session, filename)); WT_RET(__wt_schema_backup_check(session, newfile)); /* Close any btree handles in the file. */ - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, ret = __wt_conn_dhandle_close_all(session, uri, false)); WT_ERR(ret); diff --git a/src/third_party/wiredtiger/src/schema/schema_worker.c b/src/third_party/wiredtiger/src/schema/schema_worker.c index fb7f8cec074..e5f71b5d56f 100644 --- a/src/third_party/wiredtiger/src/schema/schema_worker.c +++ b/src/third_party/wiredtiger/src/schema/schema_worker.c @@ -49,7 +49,7 @@ __wt_schema_worker(WT_SESSION_IMPL *session, * any open file handles, including checkpoints. */ if (FLD_ISSET(open_flags, WT_DHANDLE_EXCLUSIVE)) { - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, ret = __wt_conn_dhandle_close_all( session, uri, false)); WT_ERR(ret); diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c index fcbfa8809b3..d282c5d0c32 100644 --- a/src/third_party/wiredtiger/src/session/session_api.c +++ b/src/third_party/wiredtiger/src/session/session_api.c @@ -162,7 +162,7 @@ __session_alter(WT_SESSION *wt_session, const char *uri, const char *config) cfg[1] = NULL; WT_WITH_CHECKPOINT_LOCK(session, WT_WITH_SCHEMA_LOCK(session, - WT_WITH_TABLE_LOCK(session, + WT_WITH_TABLE_WRITE_LOCK(session, ret = __wt_schema_alter(session, uri, cfg)))); err: if (ret != 0) @@ -234,9 +234,6 @@ __session_close(WT_SESSION *wt_session, const char *config) /* Release common session resources. */ WT_TRET(__wt_session_release_resources(session)); - /* Destroy the thread's mutex. */ - WT_TRET(__wt_cond_destroy(session, &session->cond)); - /* The API lock protects opening and closing of sessions. */ __wt_spin_lock(session, &conn->api_lock); @@ -521,7 +518,7 @@ __wt_session_create( WT_DECL_RET; WT_WITH_SCHEMA_LOCK(session, - WT_WITH_TABLE_LOCK(session, + WT_WITH_TABLE_WRITE_LOCK(session, ret = __wt_schema_create(session, uri, config))); return (ret); } @@ -769,7 +766,7 @@ __session_rename(WT_SESSION *wt_session, WT_WITH_CHECKPOINT_LOCK(session, WT_WITH_SCHEMA_LOCK(session, - WT_WITH_TABLE_LOCK(session, + WT_WITH_TABLE_WRITE_LOCK(session, ret = __wt_schema_rename(session, uri, newuri, cfg)))); err: if (ret != 0) @@ -858,21 +855,22 @@ __session_drop(WT_SESSION *wt_session, const char *uri, const char *config) if (lock_wait) WT_WITH_CHECKPOINT_LOCK(session, WT_WITH_SCHEMA_LOCK(session, - WT_WITH_TABLE_LOCK(session, ret = + WT_WITH_TABLE_WRITE_LOCK(session, ret = __wt_schema_drop(session, uri, cfg)))); else WT_WITH_CHECKPOINT_LOCK_NOWAIT(session, ret, WT_WITH_SCHEMA_LOCK_NOWAIT(session, ret, - WT_WITH_TABLE_LOCK_NOWAIT(session, ret, ret = + WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret, + ret = __wt_schema_drop(session, uri, cfg)))); } else { if (lock_wait) WT_WITH_SCHEMA_LOCK(session, - WT_WITH_TABLE_LOCK(session, + WT_WITH_TABLE_WRITE_LOCK(session, ret = __wt_schema_drop(session, uri, cfg))); else WT_WITH_SCHEMA_LOCK_NOWAIT(session, ret, - WT_WITH_TABLE_LOCK_NOWAIT(session, ret, + WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret, ret = __wt_schema_drop(session, uri, cfg))); } @@ -1488,6 +1486,20 @@ __session_transaction_pinned_range(WT_SESSION *wt_session, uint64_t *prange) err: API_END_RET(session, ret); } +/* + * __transaction_sync_run_chk -- + * Check to decide if the transaction sync call should continue running. + */ +static bool +__transaction_sync_run_chk(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + + conn = S2C(session); + + return (FLD_ISSET(conn->flags, WT_CONN_LOG_SERVER_RUN)); +} + /* * __session_transaction_sync -- * WT_SESSION->transaction_sync method. @@ -1502,7 +1514,7 @@ __session_transaction_sync(WT_SESSION *wt_session, const char *config) WT_SESSION_IMPL *session; WT_TXN *txn; struct timespec now, start; - uint64_t timeout_ms, waited_ms; + uint64_t remaining_usec, timeout_ms, waited_ms; bool forever; session = (WT_SESSION_IMPL *)wt_session; @@ -1555,22 +1567,20 @@ __session_transaction_sync(WT_SESSION *wt_session, const char *config) __wt_epoch(session, &start); /* * Keep checking the LSNs until we find it is stable or we reach - * our timeout. + * our timeout, or there's some other reason to quit. */ while (__wt_log_cmp(&session->bg_sync_lsn, &log->sync_lsn) > 0) { + if (!__transaction_sync_run_chk(session)) + WT_ERR(ETIMEDOUT); + __wt_cond_signal(session, conn->log_file_cond); __wt_epoch(session, &now); waited_ms = WT_TIMEDIFF_MS(now, start); - if (forever || waited_ms < timeout_ms) - /* - * Note, we will wait an increasing amount of time - * each iteration, likely doubling. Also note that - * the function timeout value is in usecs (we are - * computing the wait time in msecs and passing that - * in, unchanged, as the usecs to wait). - */ - __wt_cond_wait(session, log->log_sync_cond, waited_ms); - else + if (forever || waited_ms < timeout_ms) { + remaining_usec = (timeout_ms - waited_ms) * WT_THOUSAND; + __wt_cond_wait(session, log->log_sync_cond, + remaining_usec, __transaction_sync_run_chk); + } else WT_ERR(ETIMEDOUT); } @@ -1825,8 +1835,6 @@ __open_session(WT_CONNECTION_IMPL *conn, session_ret->name = NULL; session_ret->id = i; - WT_ERR(__wt_cond_alloc(session, "session", false, &session_ret->cond)); - if (WT_SESSION_FIRST_USE(session_ret)) __wt_random_init(&session_ret->rnd); diff --git a/src/third_party/wiredtiger/src/session/session_dhandle.c b/src/third_party/wiredtiger/src/session/session_dhandle.c index f1251794b89..ee9bddbfc19 100644 --- a/src/third_party/wiredtiger/src/session/session_dhandle.c +++ b/src/third_party/wiredtiger/src/session/session_dhandle.c @@ -44,8 +44,7 @@ __session_discard_dhandle( TAILQ_REMOVE(&session->dhandles, dhandle_cache, q); TAILQ_REMOVE(&session->dhhash[bucket], dhandle_cache, hashq); - (void)__wt_atomic_sub32(&dhandle_cache->dhandle->session_ref, 1); - + WT_DHANDLE_RELEASE(dhandle_cache->dhandle); __wt_overwrite_and_free(session, dhandle_cache); } @@ -412,17 +411,27 @@ __session_dhandle_sweep(WT_SESSION_IMPL *session) /* * __session_find_shared_dhandle -- * Search for a data handle in the connection and add it to a session's - * cache. Since the data handle isn't locked, this must be called holding - * the handle list lock, and we must increment the handle's reference - * count before releasing it. + * cache. We must increment the handle's reference count while holding + * the handle list lock. */ static int __session_find_shared_dhandle( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) { - WT_RET(__wt_conn_dhandle_find(session, uri, checkpoint)); - (void)__wt_atomic_add32(&session->dhandle->session_ref, 1); - return (0); + WT_DECL_RET; + + WT_WITH_HANDLE_LIST_READ_LOCK(session, + if ((ret = __wt_conn_dhandle_find(session, uri, checkpoint)) == 0) + WT_DHANDLE_ACQUIRE(session->dhandle)); + + if (ret != WT_NOTFOUND) + return (ret); + + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, + if ((ret = __wt_conn_dhandle_alloc(session, uri, checkpoint)) == 0) + WT_DHANDLE_ACQUIRE(session->dhandle)); + + return (ret); } /* @@ -450,16 +459,16 @@ __session_get_dhandle( * We didn't find a match in the session cache, search the shared * handle list and cache the handle we find. */ - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __session_find_shared_dhandle(session, uri, checkpoint)); - WT_RET(ret); + WT_RET(__session_find_shared_dhandle(session, uri, checkpoint)); /* * Fixup the reference count on failure (we incremented the reference * count while holding the handle-list lock). */ - if ((ret = __session_add_dhandle(session)) != 0) - (void)__wt_atomic_sub32(&session->dhandle->session_ref, 1); + if ((ret = __session_add_dhandle(session)) != 0) { + WT_DHANDLE_RELEASE(session->dhandle); + session->dhandle = NULL; + } return (ret); } @@ -505,17 +514,15 @@ __wt_session_get_btree(WT_SESSION_IMPL *session, * reopen handles in the meantime. A combination of the schema * and handle list locks are used to enforce this. */ - if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA) || - !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) { + if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) { dhandle->excl_session = NULL; dhandle->excl_ref = 0; F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE); __wt_writeunlock(session, &dhandle->rwlock); WT_WITH_SCHEMA_LOCK(session, - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_session_get_btree( - session, uri, checkpoint, cfg, flags))); + ret = __wt_session_get_btree( + session, uri, checkpoint, cfg, flags)); return (ret); } diff --git a/src/third_party/wiredtiger/src/support/cond_auto.c b/src/third_party/wiredtiger/src/support/cond_auto.c index a3ae67f5baa..600e5eab0ff 100644 --- a/src/third_party/wiredtiger/src/support/cond_auto.c +++ b/src/third_party/wiredtiger/src/support/cond_auto.c @@ -1,29 +1,9 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. - * Public Domain 2008-2014 WiredTiger, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. * - * This is free and unencumbered software released into the public domain. - * - * Anyone is free to copy, modify, publish, use, compile, sell, or - * distribute this software, either in source code form or as a compiled - * binary, for any purpose, commercial or non-commercial, and by any - * means. - * - * In jurisdictions that recognize copyright laws, the author or authors - * of this software dedicate any and all copyright interest in the - * software to the public domain. We make this dedication for the benefit - * of the public at large and to the detriment of our heirs and - * successors. We intend this dedication to be an overt act of - * relinquishment in perpetuity of all present and future rights to this - * software under copyright law. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. + * See the file LICENSE for redistribution information. */ #include "wt_internal.h" @@ -38,13 +18,12 @@ * Allocate and initialize an automatically adjusting condition variable. */ int -__wt_cond_auto_alloc( - WT_SESSION_IMPL *session, const char *name, - bool is_signalled, uint64_t min, uint64_t max, WT_CONDVAR **condp) +__wt_cond_auto_alloc(WT_SESSION_IMPL *session, + const char *name, uint64_t min, uint64_t max, WT_CONDVAR **condp) { WT_CONDVAR *cond; - WT_RET(__wt_cond_alloc(session, name, is_signalled, condp)); + WT_RET(__wt_cond_alloc(session, name, condp)); cond = *condp; cond->min_wait = min; @@ -54,34 +33,20 @@ __wt_cond_auto_alloc( return (0); } -/* - * __wt_cond_auto_signal -- - * Signal a condition variable. - */ -void -__wt_cond_auto_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond) -{ - - WT_ASSERT(session, cond->min_wait != 0); - __wt_cond_signal(session, cond); -} - /* * __wt_cond_auto_wait_signal -- * Wait on a mutex, optionally timing out. If we get it before the time * out period expires, let the caller know. - * TODO: Can this version of the API be removed, now that we have the - * auto adjusting condition variables? */ void -__wt_cond_auto_wait_signal( - WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool *signalled) +__wt_cond_auto_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, + bool progress, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled) { uint64_t delta; /* * Catch cases where this function is called with a condition variable - * that was initialized non-auto. + * that wasn't initialized to do automatic adjustments. */ WT_ASSERT(session, cond->min_wait != 0); @@ -94,7 +59,8 @@ __wt_cond_auto_wait_signal( cond->max_wait, cond->prev_wait + delta); } - __wt_cond_wait_signal(session, cond, cond->prev_wait, signalled); + __wt_cond_wait_signal( + session, cond, cond->prev_wait, run_func, signalled); if (progress || *signalled) WT_STAT_CONN_INCR(session, cond_auto_wait_reset); @@ -108,24 +74,10 @@ __wt_cond_auto_wait_signal( * out period expires, let the caller know. */ void -__wt_cond_auto_wait( - WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress) +__wt_cond_auto_wait(WT_SESSION_IMPL *session, + WT_CONDVAR *cond, bool progress, bool (*run_func)(WT_SESSION_IMPL *)) { - bool signalled; - - /* - * Call the signal version so the wait period is reset if the - * condition is woken explicitly. - */ - __wt_cond_auto_wait_signal(session, cond, progress, &signalled); -} + bool notused; -/* - * __wt_cond_auto_destroy -- - * Destroy a condition variable. - */ -int -__wt_cond_auto_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) -{ - return (__wt_cond_destroy(session, condp)); + __wt_cond_auto_wait_signal(session, cond, progress, run_func, ¬used); } diff --git a/src/third_party/wiredtiger/src/support/rand.c b/src/third_party/wiredtiger/src/support/rand.c index a5b229b9abc..4fae43edc8e 100644 --- a/src/third_party/wiredtiger/src/support/rand.c +++ b/src/third_party/wiredtiger/src/support/rand.c @@ -120,3 +120,15 @@ __wt_random(WT_RAND_STATE volatile * rnd_state) return ((z << 16) + (w & 65535)); } + +/* + * __wt_random64 -- + * Return a 64-bit pseudo-random number. + */ +uint64_t +__wt_random64(WT_RAND_STATE volatile * rnd_state) + WT_GCC_FUNC_ATTRIBUTE((visibility("default"))) +{ + return (((uint64_t)__wt_random(rnd_state) << 32) + + __wt_random(rnd_state)); +} diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index 167d17137ce..fd38e1b79ee 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -759,9 +759,7 @@ static const char * const __stats_connection_desc[] = { "lock: checkpoint lock acquisitions", "lock: checkpoint lock application thread wait time (usecs)", "lock: checkpoint lock internal thread wait time (usecs)", - "lock: handle-list lock acquisitions", - "lock: handle-list lock application thread wait time (usecs)", - "lock: handle-list lock internal thread wait time (usecs)", + "lock: handle-list lock eviction thread wait time (usecs)", "lock: metadata lock acquisitions", "lock: metadata lock application thread wait time (usecs)", "lock: metadata lock internal thread wait time (usecs)", @@ -1044,9 +1042,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->lock_checkpoint_count = 0; stats->lock_checkpoint_wait_application = 0; stats->lock_checkpoint_wait_internal = 0; - stats->lock_handle_list_count = 0; - stats->lock_handle_list_wait_application = 0; - stats->lock_handle_list_wait_internal = 0; + stats->lock_handle_list_wait_eviction = 0; stats->lock_metadata_count = 0; stats->lock_metadata_wait_application = 0; stats->lock_metadata_wait_internal = 0; @@ -1351,12 +1347,8 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, lock_checkpoint_wait_application); to->lock_checkpoint_wait_internal += WT_STAT_READ(from, lock_checkpoint_wait_internal); - to->lock_handle_list_count += - WT_STAT_READ(from, lock_handle_list_count); - to->lock_handle_list_wait_application += - WT_STAT_READ(from, lock_handle_list_wait_application); - to->lock_handle_list_wait_internal += - WT_STAT_READ(from, lock_handle_list_wait_internal); + to->lock_handle_list_wait_eviction += + WT_STAT_READ(from, lock_handle_list_wait_eviction); to->lock_metadata_count += WT_STAT_READ(from, lock_metadata_count); to->lock_metadata_wait_application += WT_STAT_READ(from, lock_metadata_wait_application); diff --git a/src/third_party/wiredtiger/src/support/thread_group.c b/src/third_party/wiredtiger/src/support/thread_group.c index beb143e63e2..2b4b7ad4e61 100644 --- a/src/third_party/wiredtiger/src/support/thread_group.c +++ b/src/third_party/wiredtiger/src/support/thread_group.c @@ -259,7 +259,7 @@ __wt_thread_group_create( __wt_rwlock_init(session, &group->lock); WT_ERR(__wt_cond_alloc( - session, "Thread group cond", false, &group->wait_cond)); + session, "thread group cond", &group->wait_cond)); cond_alloced = true; __wt_writelock(session, &group->lock); diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index 660d37b17d5..e5e59c2b901 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -803,3 +803,98 @@ __wt_txn_global_destroy(WT_SESSION_IMPL *session) __wt_rwlock_destroy(session, &txn_global->nsnap_rwlock); __wt_free(session, txn_global->states); } + +#if defined(HAVE_DIAGNOSTIC) || defined(HAVE_VERBOSE) +/* + * __wt_verbose_dump_txn -- + * Output diagnostic information about the global transaction state. + */ +int +__wt_verbose_dump_txn(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + WT_TXN_GLOBAL *txn_global; + WT_TXN *txn; + WT_TXN_STATE *s; + const char *iso_tag; + uint64_t id; + uint32_t i, session_cnt; + + conn = S2C(session); + txn_global = &conn->txn_global; + + WT_RET(__wt_msg(session, "%s", WT_DIVIDER)); + WT_RET(__wt_msg(session, "transaction state dump")); + + WT_RET(__wt_msg(session, "current ID: %" PRIu64, txn_global->current)); + WT_RET(__wt_msg(session, + "last running ID: %" PRIu64, txn_global->last_running)); + WT_RET(__wt_msg(session, "oldest ID: %" PRIu64, txn_global->oldest_id)); + WT_RET(__wt_msg(session, + "oldest named snapshot ID: %" PRIu64, txn_global->nsnap_oldest_id)); + + WT_RET(__wt_msg(session, "checkpoint running? %s", + txn_global->checkpoint_running ? "yes" : "no")); + WT_RET(__wt_msg(session, + "checkpoint generation: %" PRIu64, txn_global->checkpoint_gen)); + WT_RET(__wt_msg(session, + "checkpoint pinned ID: %" PRIu64, txn_global->checkpoint_pinned)); + WT_RET(__wt_msg(session, + "checkpoint txn ID: %" PRIu64, txn_global->checkpoint_txnid)); + + WT_ORDERED_READ(session_cnt, conn->session_cnt); + WT_RET(__wt_msg(session, "session count: %" PRIu32, session_cnt)); + + WT_RET(__wt_msg(session, "Transaction state of active sessions:")); + + /* + * Walk each session transaction state and dump information. Accessing + * the content of session handles is not thread safe, so some + * information may change while traversing if other threads are active + * at the same time, which is OK since this is diagnostic code. + */ + for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) { + /* Skip sessions with no active transaction */ + if ((id = s->id) == WT_TXN_NONE && s->pinned_id == WT_TXN_NONE) + continue; + + txn = &conn->sessions[i].txn; + iso_tag = "INVALID"; + switch (txn->isolation) { + case WT_ISO_READ_COMMITTED: + iso_tag = "WT_ISO_READ_COMMITTED"; + break; + case WT_ISO_READ_UNCOMMITTED: + iso_tag = "WT_ISO_READ_UNCOMMITTED"; + break; + case WT_ISO_SNAPSHOT: + iso_tag = "WT_ISO_SNAPSHOT"; + break; + } + + WT_RET(__wt_msg(session, + "ID: %6" PRIu64 + ", mod count: %u" + ", pinned ID: %" PRIu64 + ", snap min: %" PRIu64 + ", snap max: %" PRIu64 + ", metadata pinned ID: %" PRIu64 + ", flags: 0x%08" PRIx32 + ", name: %s" + ", isolation: %s", + id, + txn->mod_count, + s->pinned_id, + txn->snap_min, + txn->snap_max, + s->metadata_pinned, + txn->flags, + conn->sessions[i].name == NULL ? + "EMPTY" : conn->sessions[i].name, + iso_tag)); + } + WT_RET(__wt_msg(session, "%s", WT_DIVIDER)); + + return (0); +} +#endif diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index 3b19162fd3d..3261c8089f4 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -524,6 +524,17 @@ __checkpoint_verbose_track(WT_SESSION_IMPL *session, #endif } +/* + * __checkpoint_fail_reset -- + * Reset fields when a failure occurs. + */ +static void +__checkpoint_fail_reset(WT_SESSION_IMPL *session) +{ + S2BT(session)->modified = true; + S2BT(session)->ckpt = NULL; +} + /* * __txn_checkpoint -- * Checkpoint a database or a list of objects in the database. @@ -543,7 +554,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) void *saved_meta_next; u_int i; uint64_t fsync_duration_usecs; - bool full, idle, logging, tracking; + bool failed, full, idle, logging, tracking; const char *txn_cfg[] = { WT_CONFIG_BASE(session, WT_SESSION_begin_transaction), "isolation=snapshot", NULL }; @@ -639,10 +650,9 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) */ WT_ASSERT(session, session->ckpt_handle_next == 0); WT_WITH_SCHEMA_LOCK(session, - WT_WITH_TABLE_LOCK(session, - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __checkpoint_apply_all( - session, cfg, __wt_checkpoint_get_handles, NULL)))); + WT_WITH_TABLE_READ_LOCK(session, + ret = __checkpoint_apply_all( + session, cfg, __wt_checkpoint_get_handles, NULL))); WT_ERR(ret); /* @@ -825,12 +835,13 @@ err: /* * overwritten the checkpoint, so what ends up on disk is not * consistent. */ - if (ret != 0 && !conn->modified) + failed = ret != 0; + if (failed) conn->modified = true; session->isolation = txn->isolation = WT_ISO_READ_UNCOMMITTED; if (tracking) - WT_TRET(__wt_meta_track_off(session, false, ret != 0)); + WT_TRET(__wt_meta_track_off(session, false, failed)); cache->eviction_scrub_limit = 0.0; WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target, 0); @@ -863,6 +874,13 @@ err: /* for (i = 0; i < session->ckpt_handle_next; ++i) { if (session->ckpt_handle[i] == NULL) continue; + /* + * If the operation failed, mark all trees dirty so they are + * included if a future checkpoint can succeed. + */ + if (failed) + WT_WITH_DHANDLE(session, session->ckpt_handle[i], + __checkpoint_fail_reset(session)); WT_WITH_DHANDLE(session, session->ckpt_handle[i], WT_TRET(__wt_session_release_btree(session))); } @@ -1341,7 +1359,6 @@ __checkpoint_tree( WT_DATA_HANDLE *dhandle; WT_DECL_RET; WT_LSN ckptlsn; - int was_modified; bool fake_ckpt; WT_UNUSED(cfg); @@ -1352,7 +1369,6 @@ __checkpoint_tree( conn = S2C(session); dhandle = session->dhandle; fake_ckpt = false; - was_modified = btree->modified; /* * Set the checkpoint LSN to the maximum LSN so that if logging is @@ -1483,10 +1499,9 @@ err: /* * If the checkpoint didn't complete successfully, make sure the * tree is marked dirty. */ - if (ret != 0 && !btree->modified && was_modified) { + if (ret != 0) { btree->modified = true; - if (!S2C(session)->modified) - S2C(session)->modified = true; + S2C(session)->modified = true; } __wt_meta_ckptlist_free(session, ckptbase); diff --git a/src/third_party/wiredtiger/src/txn/txn_log.c b/src/third_party/wiredtiger/src/txn/txn_log.c index 7ad295f421b..2931dc1ce82 100644 --- a/src/third_party/wiredtiger/src/txn/txn_log.c +++ b/src/third_party/wiredtiger/src/txn/txn_log.c @@ -269,7 +269,7 @@ __wt_txn_checkpoint_logread(WT_SESSION_IMPL *session, WT_ITEM ckpt_snapshot_unused; uint32_t ckpt_file, ckpt_offset; u_int ckpt_nsnapshot_unused; - const char *fmt = WT_UNCHECKED_STRING(IIIU); + const char *fmt = WT_UNCHECKED_STRING(IIIu); if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt, &ckpt_file, &ckpt_offset, @@ -297,7 +297,7 @@ __wt_txn_checkpoint_log( uint8_t *end, *p; size_t recsize; uint32_t i, rectype = WT_LOGREC_CHECKPOINT; - const char *fmt = WT_UNCHECKED_STRING(IIIIU); + const char *fmt = WT_UNCHECKED_STRING(IIIIu); txn = &session->txn; ckpt_lsn = &txn->ckpt_lsn; diff --git a/src/third_party/wiredtiger/src/utilities/util.h b/src/third_party/wiredtiger/src/utilities/util.h index cf12d7d4aa6..93a96d44219 100644 --- a/src/third_party/wiredtiger/src/utilities/util.h +++ b/src/third_party/wiredtiger/src/utilities/util.h @@ -40,7 +40,6 @@ int util_flush(WT_SESSION *, const char *); int util_list(WT_SESSION *, int, char *[]); int util_load(WT_SESSION *, int, char *[]); int util_loadtext(WT_SESSION *, int, char *[]); -char *util_name(WT_SESSION *, const char *, const char *); int util_printlog(WT_SESSION *, int, char *[]); int util_read(WT_SESSION *, int, char *[]); int util_read_line(WT_SESSION *, ULINE *, bool, bool *); @@ -51,5 +50,6 @@ int util_stat(WT_SESSION *, int, char *[]); int util_str2recno(WT_SESSION *, const char *p, uint64_t *recnop); int util_truncate(WT_SESSION *, int, char *[]); int util_upgrade(WT_SESSION *, int, char *[]); +char *util_uri(WT_SESSION *, const char *, const char *); int util_verify(WT_SESSION *, int, char *[]); int util_write(WT_SESSION *, int, char *[]); diff --git a/src/third_party/wiredtiger/src/utilities/util_alter.c b/src/third_party/wiredtiger/src/utilities/util_alter.c index d228c15cd48..ef01a1ed826 100644 --- a/src/third_party/wiredtiger/src/utilities/util_alter.c +++ b/src/third_party/wiredtiger/src/utilities/util_alter.c @@ -34,9 +34,12 @@ util_alter(WT_SESSION *session, int argc, char *argv[]) for (configp = argv; configp != NULL && *configp != NULL; configp += 2) if ((ret = session->alter( - session, configp[0], configp[1])) != 0) - break; - return (ret); + session, configp[0], configp[1])) != 0) { + (void)util_err(session, ret, + "session.alter: %s, %s", configp[0], configp[1]); + return (1); + } + return (0); } static int diff --git a/src/third_party/wiredtiger/src/utilities/util_compact.c b/src/third_party/wiredtiger/src/utilities/util_compact.c index c114eb207fa..e469b4dce6e 100644 --- a/src/third_party/wiredtiger/src/utilities/util_compact.c +++ b/src/third_party/wiredtiger/src/utilities/util_compact.c @@ -30,21 +30,13 @@ util_compact(WT_SESSION *session, int argc, char *argv[]) /* The remaining argument is the table name. */ if (argc != 1) return (usage()); - if ((uri = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); - if ((ret = session->compact(session, uri, NULL)) != 0) { - fprintf(stderr, "%s: compact(%s): %s\n", - progname, uri, session->strerror(session, ret)); - goto err; - } - - if (0) { -err: ret = 1; - } + if ((ret = session->compact(session, uri, NULL)) != 0) + (void)util_err(session, ret, "session.compact: %s", uri); free(uri); - return (ret); } diff --git a/src/third_party/wiredtiger/src/utilities/util_create.c b/src/third_party/wiredtiger/src/utilities/util_create.c index 4e609736f2d..7c22a67792b 100644 --- a/src/third_party/wiredtiger/src/utilities/util_create.c +++ b/src/third_party/wiredtiger/src/utilities/util_create.c @@ -15,9 +15,9 @@ util_create(WT_SESSION *session, int argc, char *argv[]) { WT_DECL_RET; int ch; - const char *config, *uri; + char *config, *uri; - config = NULL; + config = uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "c:")) != EOF) switch (ch) { case 'c': /* command-line configuration */ @@ -35,12 +35,14 @@ util_create(WT_SESSION *session, int argc, char *argv[]) if (argc != 1) return (usage()); - if ((uri = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); if ((ret = session->create(session, uri, config)) != 0) - return (util_err(session, ret, "%s: session.create", uri)); - return (0); + (void)util_err(session, ret, "session.create: %s", uri); + + free(uri); + return (ret); } static int diff --git a/src/third_party/wiredtiger/src/utilities/util_drop.c b/src/third_party/wiredtiger/src/utilities/util_drop.c index ba41445dfb6..456005d445d 100644 --- a/src/third_party/wiredtiger/src/utilities/util_drop.c +++ b/src/third_party/wiredtiger/src/utilities/util_drop.c @@ -15,8 +15,9 @@ util_drop(WT_SESSION *session, int argc, char *argv[]) { WT_DECL_RET; int ch; - char *name; + char *uri; + uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF) switch (ch) { case '?': @@ -30,12 +31,13 @@ util_drop(WT_SESSION *session, int argc, char *argv[]) /* The remaining argument is the uri. */ if (argc != 1) return (usage()); - if ((name = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); - ret = session->drop(session, name, "force"); + if ((ret = session->drop(session, uri, "force")) != 0) + (void)util_err(session, ret, "session.drop: %s", uri); - free(name); + free(uri); return (ret); } diff --git a/src/third_party/wiredtiger/src/utilities/util_dump.c b/src/third_party/wiredtiger/src/utilities/util_dump.c index 3f8b4a49dfe..cded40a8b45 100644 --- a/src/third_party/wiredtiger/src/utilities/util_dump.c +++ b/src/third_party/wiredtiger/src/utilities/util_dump.c @@ -37,10 +37,10 @@ util_dump(WT_SESSION *session, int argc, char *argv[]) size_t len; int ch, i; bool hex, json, reverse; - char *checkpoint, *config, *name, *p, *simplename; + char *checkpoint, *config, *p, *simpleuri, *uri; hex = json = reverse = false; - checkpoint = config = name = simplename = NULL; + checkpoint = config = simpleuri = uri = NULL; cursor = NULL; while ((ch = __wt_getopt(progname, argc, argv, "c:f:jrx")) != EOF) switch (ch) { @@ -89,11 +89,11 @@ util_dump(WT_SESSION *session, int argc, char *argv[]) if (json && i > 0) if (dump_json_separator(session) != 0) goto err; - free(name); - free(simplename); - name = simplename = NULL; + free(uri); + free(simpleuri); + uri = simpleuri = NULL; - if ((name = util_name(session, argv[i], "table")) == NULL) + if ((uri = util_uri(session, argv[i], "table")) == NULL) goto err; len = @@ -113,19 +113,19 @@ util_dump(WT_SESSION *session, int argc, char *argv[]) (void)strcat(config, json ? "dump=json" : (hex ? "dump=hex" : "dump=print")); if ((ret = session->open_cursor( - session, name, NULL, config, &cursor)) != 0) { + session, uri, NULL, config, &cursor)) != 0) { fprintf(stderr, "%s: cursor open(%s) failed: %s\n", - progname, name, session->strerror(session, ret)); + progname, uri, session->strerror(session, ret)); goto err; } - if ((simplename = strdup(name)) == NULL) { + if ((simpleuri = strdup(uri)) == NULL) { (void)util_err(session, errno, NULL); goto err; } - if ((p = strchr(simplename, '(')) != NULL) + if ((p = strchr(simpleuri, '(')) != NULL) *p = '\0'; - if (dump_config(session, simplename, cursor, hex, json) != 0) + if (dump_config(session, simpleuri, cursor, hex, json) != 0) goto err; if (dump_record(cursor, reverse, json) != 0) @@ -148,8 +148,8 @@ err: ret = 1; } free(config); - free(name); - free(simplename); + free(uri); + free(simpleuri); if (cursor != NULL && (ret = cursor->close(cursor)) != 0) { (void)util_err(session, ret, NULL); ret = 1; diff --git a/src/third_party/wiredtiger/src/utilities/util_list.c b/src/third_party/wiredtiger/src/utilities/util_list.c index e91dbfce05b..f19ba4d1f97 100644 --- a/src/third_party/wiredtiger/src/utilities/util_list.c +++ b/src/third_party/wiredtiger/src/utilities/util_list.c @@ -19,10 +19,10 @@ util_list(WT_SESSION *session, int argc, char *argv[]) WT_DECL_RET; int ch; bool cflag, vflag; - char *name; + char *uri; cflag = vflag = false; - name = NULL; + uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "cv")) != EOF) switch (ch) { case 'c': @@ -42,17 +42,16 @@ util_list(WT_SESSION *session, int argc, char *argv[]) case 0: break; case 1: - if ((name = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); break; default: return (usage()); } - ret = list_print(session, name, cflag, vflag); - - free(name); + ret = list_print(session, uri, cflag, vflag); + free(uri); return (ret); } @@ -99,7 +98,7 @@ list_get_allocsize(WT_SESSION *session, const char *key, size_t *allocsize) * List the high-level objects in the database. */ static int -list_print(WT_SESSION *session, const char *name, bool cflag, bool vflag) +list_print(WT_SESSION *session, const char *uri, bool cflag, bool vflag) { WT_CURSOR *cursor; WT_DECL_RET; @@ -120,7 +119,7 @@ list_print(WT_SESSION *session, const char *name, bool cflag, bool vflag) ret, "%s: WT_SESSION.open_cursor", WT_METADATA_URI)); } - found = name == NULL; + found = uri == NULL; while ((ret = cursor->next(cursor)) == 0) { /* Get the key. */ if ((ret = cursor->get_key(cursor, &key)) != 0) @@ -129,8 +128,8 @@ list_print(WT_SESSION *session, const char *name, bool cflag, bool vflag) /* * If a name is specified, only show objects that match. */ - if (name != NULL) { - if (!WT_PREFIX_MATCH(key, name)) + if (uri != NULL) { + if (!WT_PREFIX_MATCH(key, uri)) continue; found = true; } @@ -161,7 +160,7 @@ list_print(WT_SESSION *session, const char *name, bool cflag, bool vflag) if (ret != WT_NOTFOUND) return (util_cerr(cursor, "next", ret)); if (!found) { - fprintf(stderr, "%s: %s: not found\n", progname, name); + fprintf(stderr, "%s: %s: not found\n", progname, uri); return (1); } diff --git a/src/third_party/wiredtiger/src/utilities/util_load.c b/src/third_party/wiredtiger/src/utilities/util_load.c index ac18df80851..ca77643eb49 100644 --- a/src/third_party/wiredtiger/src/utilities/util_load.c +++ b/src/third_party/wiredtiger/src/utilities/util_load.c @@ -126,7 +126,7 @@ load_dump(WT_SESSION *session) append ? ",append" : "", no_overwrite ? ",overwrite=false" : ""); if ((ret = session->open_cursor( session, uri, NULL, config, &cursor)) != 0) { - ret = util_err(session, ret, "%s: session.open", uri); + ret = util_err(session, ret, "%s: session.open_cursor", uri); goto err; } diff --git a/src/third_party/wiredtiger/src/utilities/util_load_json.c b/src/third_party/wiredtiger/src/utilities/util_load_json.c index 020a4ed9ba9..1189d49a483 100644 --- a/src/third_party/wiredtiger/src/utilities/util_load_json.c +++ b/src/third_party/wiredtiger/src/utilities/util_load_json.c @@ -242,7 +242,7 @@ json_data(WT_SESSION *session, LF_ISSET(LOAD_JSON_NO_OVERWRITE) ? ",overwrite=false" : ""); if ((ret = session->open_cursor( session, uri, NULL, config, &cursor)) != 0) { - ret = util_err(session, ret, "%s: session.open", uri); + ret = util_err(session, ret, "%s: session.open_cursor", uri); goto err; } keyformat = cursor->key_format; diff --git a/src/third_party/wiredtiger/src/utilities/util_loadtext.c b/src/third_party/wiredtiger/src/utilities/util_loadtext.c index f9c5b6e9a1f..7602d43f8c9 100644 --- a/src/third_party/wiredtiger/src/utilities/util_loadtext.c +++ b/src/third_party/wiredtiger/src/utilities/util_loadtext.c @@ -15,9 +15,11 @@ static int usage(void); int util_loadtext(WT_SESSION *session, int argc, char *argv[]) { + WT_DECL_RET; int ch; - const char *uri; + char *uri; + uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "f:")) != EOF) switch (ch) { case 'f': /* input file */ @@ -35,10 +37,13 @@ util_loadtext(WT_SESSION *session, int argc, char *argv[]) /* The remaining argument is the uri. */ if (argc != 1) return (usage()); - if ((uri = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); - return (text(session, uri)); + ret = text(session, uri); + + free(uri); + return (ret); } /* @@ -61,7 +66,7 @@ text(WT_SESSION *session, const char *uri) */ if ((ret = session->open_cursor( session, uri, NULL, "append,overwrite", &cursor)) != 0) - return (util_err(session, ret, "%s: session.open", uri)); + return (util_err(session, ret, "%s: session.open_cursor", uri)); /* * We're about to load strings, make sure the formats match. diff --git a/src/third_party/wiredtiger/src/utilities/util_main.c b/src/third_party/wiredtiger/src/utilities/util_main.c index 001a66d6d9e..7157f0d90fe 100644 --- a/src/third_party/wiredtiger/src/utilities/util_main.c +++ b/src/third_party/wiredtiger/src/utilities/util_main.c @@ -285,11 +285,11 @@ usage(void) } /* - * util_name -- + * util_uri -- * Build a name. */ char * -util_name(WT_SESSION *session, const char *s, const char *type) +util_uri(WT_SESSION *session, const char *s, const char *type) { size_t len; char *name; diff --git a/src/third_party/wiredtiger/src/utilities/util_printlog.c b/src/third_party/wiredtiger/src/utilities/util_printlog.c index e7fa2134934..5f3ed43905b 100644 --- a/src/third_party/wiredtiger/src/utilities/util_printlog.c +++ b/src/third_party/wiredtiger/src/utilities/util_printlog.c @@ -14,8 +14,8 @@ int util_printlog(WT_SESSION *session, int argc, char *argv[]) { WT_DECL_RET; - int ch; uint32_t flags; + int ch; flags = 0; while ((ch = __wt_getopt(progname, argc, argv, "f:x")) != EOF) @@ -41,17 +41,9 @@ util_printlog(WT_SESSION *session, int argc, char *argv[]) if (argc != 0) return (usage()); - ret = __wt_txn_printlog(session, flags); - - if (ret != 0) { - fprintf(stderr, "%s: printlog failed: %s\n", - progname, session->strerror(session, ret)); - goto err; - } + if ((ret = __wt_txn_printlog(session, flags)) != 0) + (void)util_err(session, ret, "printlog"); - if (0) { -err: ret = 1; - } return (ret); } diff --git a/src/third_party/wiredtiger/src/utilities/util_read.c b/src/third_party/wiredtiger/src/utilities/util_read.c index 2e766377aa9..393949b6a1c 100644 --- a/src/third_party/wiredtiger/src/utilities/util_read.c +++ b/src/third_party/wiredtiger/src/utilities/util_read.c @@ -18,8 +18,9 @@ util_read(WT_SESSION *session, int argc, char *argv[]) uint64_t recno; int ch; bool rkey, rval; - const char *uri, *value; + char *uri, *value; + uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF) switch (ch) { case '?': @@ -32,13 +33,19 @@ util_read(WT_SESSION *session, int argc, char *argv[]) /* The remaining arguments are a uri followed by a list of keys. */ if (argc < 2) return (usage()); - if ((uri = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); - /* Open the object. */ - if ((ret = session->open_cursor( - session, uri, NULL, NULL, &cursor)) != 0) - return (util_err(session, ret, "%s: session.open", uri)); + /* + * Open the object; free allocated memory immediately to simplify + * future error handling. + */ + if ((ret = + session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) + (void)util_err(session, ret, "%s: session.open_cursor", uri); + free(uri); + if (ret != 0) + return (ret); /* * A simple search only makes sense if the key format is a string or a diff --git a/src/third_party/wiredtiger/src/utilities/util_rebalance.c b/src/third_party/wiredtiger/src/utilities/util_rebalance.c index 45f161487e5..c188ea17d22 100644 --- a/src/third_party/wiredtiger/src/utilities/util_rebalance.c +++ b/src/third_party/wiredtiger/src/utilities/util_rebalance.c @@ -15,9 +15,9 @@ util_rebalance(WT_SESSION *session, int argc, char *argv[]) { WT_DECL_RET; int ch; - char *name; + char *uri; - name = NULL; + uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF) switch (ch) { case '?': @@ -30,25 +30,21 @@ util_rebalance(WT_SESSION *session, int argc, char *argv[]) /* The remaining argument is the table name. */ if (argc != 1) return (usage()); - if ((name = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); - if ((ret = session->rebalance(session, name, NULL)) != 0) { - fprintf(stderr, "%s: rebalance(%s): %s\n", - progname, name, session->strerror(session, ret)); - goto err; + if ((ret = session->rebalance(session, uri, NULL)) != 0) + (void)util_err(session, ret, "session.rebalance: %s", uri); + else { + /* + * Verbose configures a progress counter, move to the next + * line. + */ + if (verbose) + printf("\n"); } - /* Verbose configures a progress counter, move to the next line. */ - if (verbose) - printf("\n"); - - if (0) { -err: ret = 1; - } - - free(name); - + free(uri); return (ret); } diff --git a/src/third_party/wiredtiger/src/utilities/util_rename.c b/src/third_party/wiredtiger/src/utilities/util_rename.c index aee299c6e63..bb2d40cd103 100644 --- a/src/third_party/wiredtiger/src/utilities/util_rename.c +++ b/src/third_party/wiredtiger/src/utilities/util_rename.c @@ -30,22 +30,15 @@ util_rename(WT_SESSION *session, int argc, char *argv[]) /* The remaining arguments are the object uri and new name. */ if (argc != 2) return (usage()); - if ((uri = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); newuri = argv[1]; - if ((ret = session->rename(session, uri, newuri, NULL)) != 0) { - fprintf(stderr, "%s: rename %s to %s: %s\n", - progname, uri, newuri, session->strerror(session, ret)); - goto err; - } - - if (0) { -err: ret = 1; - } + if ((ret = session->rename(session, uri, newuri, NULL)) != 0) + (void)util_err( + session, ret, "session.rename: %s, %s", uri, newuri); free(uri); - return (ret); } diff --git a/src/third_party/wiredtiger/src/utilities/util_salvage.c b/src/third_party/wiredtiger/src/utilities/util_salvage.c index 679d1074457..6cc2278b846 100644 --- a/src/third_party/wiredtiger/src/utilities/util_salvage.c +++ b/src/third_party/wiredtiger/src/utilities/util_salvage.c @@ -16,10 +16,10 @@ util_salvage(WT_SESSION *session, int argc, char *argv[]) WT_DECL_RET; int ch; const char *force; - char *name; + char *uri; force = NULL; - name = NULL; + uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "F")) != EOF) switch (ch) { case 'F': @@ -35,25 +35,21 @@ util_salvage(WT_SESSION *session, int argc, char *argv[]) /* The remaining argument is the file name. */ if (argc != 1) return (usage()); - if ((name = util_name(session, *argv, "file")) == NULL) + if ((uri = util_uri(session, *argv, "file")) == NULL) return (1); - if ((ret = session->salvage(session, name, force)) != 0) { - fprintf(stderr, "%s: salvage(%s): %s\n", - progname, name, session->strerror(session, ret)); - goto err; + if ((ret = session->salvage(session, uri, force)) != 0) + (void)util_err(session, ret, "session.salvage: %s", uri); + else { + /* + * Verbose configures a progress counter, move to the next + * line. + */ + if (verbose) + printf("\n"); } - /* Verbose configures a progress counter, move to the next line. */ - if (verbose) - printf("\n"); - - if (0) { -err: ret = 1; - } - - free(name); - + free(uri); return (ret); } diff --git a/src/third_party/wiredtiger/src/utilities/util_stat.c b/src/third_party/wiredtiger/src/utilities/util_stat.c index 4376f559ceb..1b75d9ea8bf 100644 --- a/src/third_party/wiredtiger/src/utilities/util_stat.c +++ b/src/third_party/wiredtiger/src/utilities/util_stat.c @@ -55,7 +55,7 @@ util_stat(WT_SESSION *session, int argc, char *argv[]) objname = (char *)""; break; case 1: - if ((objname = util_name(session, *argv, "table")) == NULL) + if ((objname = util_uri(session, *argv, "table")) == NULL) return (1); objname_free = true; break; @@ -82,8 +82,8 @@ util_stat(WT_SESSION *session, int argc, char *argv[]) (ret = cursor->next(cursor)) == 0 && (ret = cursor->get_value(cursor, &desc, &pval, NULL)) == 0) if (printf("%s=%s\n", desc, pval) < 0) { - ret = errno; - break; + (void)util_err(session, errno, "printf"); + goto err; } if (ret == WT_NOTFOUND) ret = 0; diff --git a/src/third_party/wiredtiger/src/utilities/util_truncate.c b/src/third_party/wiredtiger/src/utilities/util_truncate.c index 9325c0d7e84..35de02345c8 100644 --- a/src/third_party/wiredtiger/src/utilities/util_truncate.c +++ b/src/third_party/wiredtiger/src/utilities/util_truncate.c @@ -15,8 +15,9 @@ util_truncate(WT_SESSION *session, int argc, char *argv[]) { WT_DECL_RET; int ch; - char *name; + char *uri; + uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF) switch (ch) { case '?': @@ -30,13 +31,13 @@ util_truncate(WT_SESSION *session, int argc, char *argv[]) /* The remaining argument is the uri. */ if (argc != 1) return (usage()); - if ((name = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); - if ((ret = session->truncate(session, name, NULL, NULL, NULL)) != 0) - return (util_err(session, ret, "%s: session.truncate", name)); + if ((ret = session->truncate(session, uri, NULL, NULL, NULL)) != 0) + (void)util_err(session, ret, "session.truncate: %s", uri); - free(name); + free(uri); return (ret); } diff --git a/src/third_party/wiredtiger/src/utilities/util_upgrade.c b/src/third_party/wiredtiger/src/utilities/util_upgrade.c index 63b23f28c16..f89bd46e133 100644 --- a/src/third_party/wiredtiger/src/utilities/util_upgrade.c +++ b/src/third_party/wiredtiger/src/utilities/util_upgrade.c @@ -15,9 +15,9 @@ util_upgrade(WT_SESSION *session, int argc, char *argv[]) { WT_DECL_RET; int ch; - char *name; + char *uri; - name = NULL; + uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF) switch (ch) { case '?': @@ -30,25 +30,21 @@ util_upgrade(WT_SESSION *session, int argc, char *argv[]) /* The remaining argument is the table name. */ if (argc != 1) return (usage()); - if ((name = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); - if ((ret = session->upgrade(session, name, NULL)) != 0) { - fprintf(stderr, "%s: upgrade(%s): %s\n", - progname, name, session->strerror(session, ret)); - goto err; + if ((ret = session->upgrade(session, uri, NULL)) != 0) + (void)util_err(session, ret, "session.upgrade: %s", uri); + else { + /* + * Verbose configures a progress counter, move to the next + * line. + */ + if (verbose) + printf("\n"); } - /* Verbose configures a progress counter, move to the next line. */ - if (verbose) - printf("\n"); - - if (0) { -err: ret = 1; - } - - free(name); - + free(uri); return (ret); } diff --git a/src/third_party/wiredtiger/src/utilities/util_verify.c b/src/third_party/wiredtiger/src/utilities/util_verify.c index 82bdd780cd3..d0587fcfc8c 100644 --- a/src/third_party/wiredtiger/src/utilities/util_verify.c +++ b/src/third_party/wiredtiger/src/utilities/util_verify.c @@ -17,10 +17,10 @@ util_verify(WT_SESSION *session, int argc, char *argv[]) size_t size; int ch; bool dump_address, dump_blocks, dump_layout, dump_pages; - char *config, *dump_offsets, *name; + char *config, *dump_offsets, *uri; dump_address = dump_blocks = dump_layout = dump_pages = false; - config = dump_offsets = name = NULL; + config = dump_offsets = uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "d:")) != EOF) switch (ch) { case 'd': @@ -55,7 +55,7 @@ util_verify(WT_SESSION *session, int argc, char *argv[]) /* The remaining argument is the table name. */ if (argc != 1) return (usage()); - if ((name = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); /* Build the configuration string as necessary. */ @@ -69,7 +69,7 @@ util_verify(WT_SESSION *session, int argc, char *argv[]) strlen("dump_offsets[],") + (dump_offsets == NULL ? 0 : strlen(dump_offsets)) + 20; if ((config = malloc(size)) == NULL) { - (void)util_err(session, errno, NULL); + ret = util_err(session, errno, NULL); goto err; } snprintf(config, size, @@ -82,23 +82,19 @@ util_verify(WT_SESSION *session, int argc, char *argv[]) dump_offsets != NULL ? "]," : "", dump_pages ? "dump_pages," : ""); } - if ((ret = session->verify(session, name, config)) != 0) { - fprintf(stderr, "%s: verify(%s): %s\n", - progname, name, session->strerror(session, ret)); - goto err; + if ((ret = session->verify(session, uri, config)) != 0) + (void)util_err(session, ret, "session.verify: %s", uri); + else { + /* + * Verbose configures a progress counter, move to the next + * line. + */ + if (verbose) + printf("\n"); } - /* Verbose configures a progress counter, move to the next line. */ - if (verbose) - printf("\n"); - - if (0) { -err: ret = 1; - } - - free(config); - free(name); - +err: free(config); + free(uri); return (ret); } diff --git a/src/third_party/wiredtiger/src/utilities/util_write.c b/src/third_party/wiredtiger/src/utilities/util_write.c index 7d9bce02b36..b931fad064d 100644 --- a/src/third_party/wiredtiger/src/utilities/util_write.c +++ b/src/third_party/wiredtiger/src/utilities/util_write.c @@ -18,10 +18,10 @@ util_write(WT_SESSION *session, int argc, char *argv[]) uint64_t recno; int ch; bool append, overwrite, rkey; - const char *uri; - char config[100]; + char *uri, config[100]; append = overwrite = false; + uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "ao")) != EOF) switch (ch) { case 'a': @@ -47,15 +47,21 @@ util_write(WT_SESSION *session, int argc, char *argv[]) } else if (argc < 3 || ((argc - 1) % 2 != 0)) return (usage()); - if ((uri = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); - /* Open the object. */ + /* + * Open the object; free allocated memory immediately to simplify + * future error handling. + */ (void)snprintf(config, sizeof(config), "%s,%s", append ? "append=true" : "", overwrite ? "overwrite=true" : ""); - if ((ret = session->open_cursor( - session, uri, NULL, config, &cursor)) != 0) - return (util_err(session, ret, "%s: session.open", uri)); + if ((ret = + session->open_cursor(session, uri, NULL, config, &cursor)) != 0) + (void)util_err(session, ret, "%s: session.open_cursor", uri); + free(uri); + if (ret != 0) + return (ret); /* * A simple search only makes sense if the key format is a string or a diff --git a/src/third_party/wiredtiger/test/csuite/Makefile.am b/src/third_party/wiredtiger/test/csuite/Makefile.am index a96492c1e71..e2b72532703 100644 --- a/src/third_party/wiredtiger/test/csuite/Makefile.am +++ b/src/third_party/wiredtiger/test/csuite/Makefile.am @@ -37,9 +37,21 @@ noinst_PROGRAMS += test_wt2834_join_bloom_fix test_wt2853_perf_SOURCES = wt2853_perf/main.c noinst_PROGRAMS += test_wt2853_perf +test_wt2909_checkpoint_integrity_SOURCES = wt2909_checkpoint_integrity/main.c +noinst_PROGRAMS += test_wt2909_checkpoint_integrity + test_wt2999_join_extractor_SOURCES = wt2999_join_extractor/main.c noinst_PROGRAMS += test_wt2999_join_extractor +test_wt3120_filesys_SOURCES = wt3120_filesys/main.c +noinst_PROGRAMS += test_wt3120_filesys + +test_wt3135_search_near_collator_SOURCES = wt3135_search_near_collator/main.c +noinst_PROGRAMS += test_wt3135_search_near_collator + +test_wt3184_dup_index_collator_SOURCES = wt3184_dup_index_collator/main.c +noinst_PROGRAMS += test_wt3184_dup_index_collator + # Run this during a "make check" smoke test. TESTS = $(noinst_PROGRAMS) LOG_COMPILER = $(TEST_WRAPPER) diff --git a/src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c b/src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c new file mode 100644 index 00000000000..ddf249fb406 --- /dev/null +++ b/src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c @@ -0,0 +1,666 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +#include +#include +#include + +/* + * JIRA ticket reference: WT-2909 + * Test case description: + * + * This test attempts to check the integrity of checkpoints by injecting + * failures (by means of a custom file system) and then trying to recover. To + * insulate the top level program from various crashes that may occur when + * injecting failures, the "populate" code runs in another process, and is + * expected to sometimes fail. Then the top level program runs recovery (with + * the normal file system) and checks the results. Any failure at the top level + * indicates a checkpoint integrity problem. + * + * Each subtest uses the same kind of schema and data, the only variance is + * when the faults are injected. At the moment, this test only injects during + * checkpoints, and only injects write failures. It varies in the number of + * successful writes that occur before an injected failure (during a checkpoint + * operation), this can be indicated with "-o N". When N is not specified, the + * test attempts to find the optimal range of N for testing. Clearly when N is + * large, then the checkpoint may be successfully written, and the data + * represented by the checkpoint will be fully present. When N is small, + * nothing of interest is written and no data is present. To find the sweet + * spot where interesting failures occur, the test does a binary search to find + * the approximate N that divides the "small" and "large" cases. This is not + * strictly deterministic, a given N may give different results on different + * runs. But approximate optimal N can be determined, allowing a series of + * additional tests clustered around this N. + * + * The data is stored in two tables, one having indices. Both tables have + * the same keys and are updated with the same key in a single transaction. + * + * Failure mode: + * If one table is out of step with the other, that is detected as a failure at + * the top level. If an index is missing values (or has extra values), that is + * likewise a failure at the top level. If the tables or the home directory + * cannot be opened, that is a top level error. The tables must be present + * as an initial checkpoint is done without any injected fault. + */ + +/* + * This program does not run on Windows. The non-portable aspects at minimum + * are fork/exec the use of environment variables (used by fail_fs), and file + * name and build locations of dynamically loaded libraries. + */ +#define BIG_SIZE (1024 * 10) +#define BIG_CONTENTS "" +#define MAX_ARGS 20 +#define MAX_OP_RANGE 1000 +#define STDERR_FILE "stderr.txt" +#define STDOUT_FILE "stdout.txt" +#define TESTS_PER_OP_VALUE 3 +#define VERBOSE_PRINT 10000 + +static int check_results(TEST_OPTS *, uint64_t *); +static void check_values(WT_CURSOR *, int, int, int, char *); +static int create_big_string(char **); +static void cursor_count_items(WT_CURSOR *, uint64_t *); +static void disable_failures(void); +static void enable_failures(uint64_t, uint64_t); +static void generate_key(uint64_t, int *); +static void generate_value(uint32_t, uint64_t, char *, int *, int *, int *, + char **); +static void run_check_subtest(TEST_OPTS *, const char *, uint64_t, bool, + uint64_t *); +static void run_check_subtest_range(TEST_OPTS *, const char *, bool); +static int run_process(TEST_OPTS *, const char *, char *[], int *); +static int subtest_main(int, char *[], bool); +static void subtest_populate(TEST_OPTS *, bool); +int main(int, char *[]); + +extern int __wt_optind; + +#define WT_FAIL_FS_LIB "../../ext/test/fail_fs/.libs/libwiredtiger_fail_fs.so" + +/* + * check_results -- + * Check all the tables and verify the results. + */ +static int +check_results(TEST_OPTS *opts, uint64_t *foundp) +{ + WT_CURSOR *maincur, *maincur2, *v0cur, *v1cur, *v2cur; + WT_SESSION *session; + uint64_t count, idxcount, nrecords; + uint32_t rndint; + int key, key_got, ret, v0, v1, v2; + char *bigref, *big; + + testutil_check(create_big_string(&bigref)); + nrecords = opts->nrecords; + testutil_check(wiredtiger_open(opts->home, NULL, + "create,log=(enabled)", &opts->conn)); + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + + testutil_check(session->open_cursor(session, "table:subtest", NULL, + NULL, &maincur)); + testutil_check(session->open_cursor(session, "table:subtest2", NULL, + NULL, &maincur2)); + testutil_check(session->open_cursor(session, "index:subtest:v0", NULL, + NULL, &v0cur)); + testutil_check(session->open_cursor(session, "index:subtest:v1", NULL, + NULL, &v1cur)); + testutil_check(session->open_cursor(session, "index:subtest:v2", NULL, + NULL, &v2cur)); + + count = 0; + while ((ret = maincur->next(maincur)) == 0) { + testutil_check(maincur2->next(maincur2)); + testutil_check(maincur2->get_key(maincur2, &key_got)); + testutil_check(maincur2->get_value(maincur2, &rndint)); + + generate_key(count, &key); + generate_value(rndint, count, bigref, &v0, &v1, &v2, &big); + testutil_assert(key == key_got); + + /* Check the key/values in main table. */ + testutil_check(maincur->get_key(maincur, &key_got)); + testutil_assert(key == key_got); + check_values(maincur, v0, v1, v2, big); + + /* Check the values in the indices. */ + v0cur->set_key(v0cur, v0); + testutil_check(v0cur->search(v0cur)); + check_values(v0cur, v0, v1, v2, big); + v1cur->set_key(v1cur, v1); + testutil_check(v1cur->search(v1cur)); + check_values(v1cur, v0, v1, v2, big); + v2cur->set_key(v2cur, v2); + testutil_check(v2cur->search(v2cur)); + check_values(v2cur, v0, v1, v2, big); + + count++; + if (count % VERBOSE_PRINT == 0 && opts->verbose) + printf("checked %" PRIu64 "/%" PRIu64 "\n", count, + nrecords); + } + if (count % VERBOSE_PRINT != 0 && opts->verbose) + printf("checked %" PRIu64 "/%" PRIu64 "\n", count, nrecords); + + /* + * Always expect at least one entry, as populate does a + * checkpoint after the first insert. + */ + testutil_assert(count > 0); + testutil_assert(ret == WT_NOTFOUND); + testutil_assert(maincur2->next(maincur2) == WT_NOTFOUND); + cursor_count_items(v0cur, &idxcount); + testutil_assert(count == idxcount); + cursor_count_items(v1cur, &idxcount); + testutil_assert(count == idxcount); + cursor_count_items(v2cur, &idxcount); + testutil_assert(count == idxcount); + + testutil_check(opts->conn->close(opts->conn, NULL)); + opts->conn = NULL; + + free(bigref); + *foundp = count; + return (0); +} + +/* + * check_values -- + * Check that the values in the cursor match the given values. + */ +static void +check_values(WT_CURSOR *cursor, int v0, int v1, int v2, char *big) +{ + int v0_got, v1_got, v2_got; + char *big_got; + + testutil_check(cursor->get_value(cursor, &v0_got, &v1_got, &v2_got, + &big_got)); + testutil_assert(v0 == v0_got); + testutil_assert(v1 == v1_got); + testutil_assert(v2 == v2_got); + testutil_assert(strcmp(big, big_got) == 0); +} + +/* + * create_big_string -- + * Create and fill the "reference" big array. + */ +static int create_big_string(char **bigp) +{ + size_t i, mod; + char *big; + + if ((big = malloc(BIG_SIZE + 1)) == NULL) + return (ENOMEM); + mod = strlen(BIG_CONTENTS); + for (i = 0; i < BIG_SIZE; i++) { + big[i] = BIG_CONTENTS[i % mod]; + } + big[BIG_SIZE] = '\0'; + *bigp = big; + return (0); +} + +/* + * cursor_count_items -- + * Count the number of items in the table by traversing + * through the cursor. + */ +static void +cursor_count_items(WT_CURSOR *cursor, uint64_t *countp) +{ + int ret; + + *countp = 0; + + testutil_check(cursor->reset(cursor)); + while ((ret = cursor->next(cursor)) == 0) + (*countp)++; + testutil_assert(ret == WT_NOTFOUND); +} + +/* + * disable_failures -- + * Disable failures in the fail file system. + */ +static void +disable_failures(void) +{ + testutil_check(setenv("WT_FAIL_FS_ENABLE", "0", 1)); +} + +/* + * enable_failures -- + * Enable failures in the fail file system. + */ +static void +enable_failures(uint64_t allow_writes, uint64_t allow_reads) +{ + char value[100]; + + testutil_check(setenv("WT_FAIL_FS_ENABLE", "1", 1)); + snprintf(value, sizeof(value), "%" PRIu64, allow_writes); + testutil_check(setenv("WT_FAIL_FS_WRITE_ALLOW", value, 1)); + snprintf(value, sizeof(value), "%" PRIu64, allow_reads); + testutil_check(setenv("WT_FAIL_FS_READ_ALLOW", value, 1)); +} + +/* + * generate_key -- + * Generate a key used by the "subtest" and "subtest2" tables. + */ +static void +generate_key(uint64_t i, int *keyp) +{ + *keyp = (int)i; +} + +/* + * generate_value -- + * Generate values for the "subtest" table. + */ +static void +generate_value(uint32_t rndint, uint64_t i, char *bigref, + int *v0p, int *v1p, int *v2p, char **bigp) +{ + *v0p = (int)(i * 7); + *v1p = (int)(i * 10007); + *v2p = (int)(i * 100000007); + *bigp = &bigref[rndint % BIG_SIZE]; +} + +/* + * run_check_subtest -- + * Run the subtest with the given parameters and check the results. + */ +static void +run_check_subtest(TEST_OPTS *opts, const char *debugger, uint64_t nops, + bool close_test, uint64_t *nresultsp) +{ + int estatus, narg; + char rarg[20], sarg[20], *subtest_args[MAX_ARGS]; + + narg = 0; + if (debugger != NULL) { + subtest_args[narg++] = (char *)debugger; + subtest_args[narg++] = (char *)"--"; + } + + subtest_args[narg++] = (char *)opts->progname; + /* "subtest" must appear before arguments */ + if (close_test) + subtest_args[narg++] = (char *)"subtest_close"; + else + subtest_args[narg++] = (char *)"subtest"; + subtest_args[narg++] = (char *)"-h"; + subtest_args[narg++] = opts->home; + subtest_args[narg++] = (char *)"-v"; /* subtest is always verbose */ + subtest_args[narg++] = (char *)"-p"; + subtest_args[narg++] = (char *)"-o"; + snprintf(sarg, sizeof(sarg), "%" PRIu64, nops); + subtest_args[narg++] = sarg; /* number of operations */ + subtest_args[narg++] = (char *)"-n"; + snprintf(rarg, sizeof(rarg), "%" PRIu64, opts->nrecords); + subtest_args[narg++] = rarg; /* number of records */ + subtest_args[narg++] = NULL; + testutil_assert(narg <= MAX_ARGS); + if (opts->verbose) + printf("running a separate process with %" PRIu64 + " operations until fail...\n", nops); + testutil_clean_work_dir(opts->home); + testutil_check(run_process( + opts, debugger != NULL ? debugger : opts->progname, + subtest_args, &estatus)); + if (opts->verbose) + printf("process exited %d\n", estatus); + + /* + * Verify results in parent process. + */ + testutil_check(check_results(opts, nresultsp)); +} + +/* + * run_check_subtest_range -- + * + * Run successive tests via binary search that determines the approximate + * crossover point between when data is recoverable or not. Once that is + * determined, run the subtest in a range near that crossover point. + * + * The theory is that running at the crossover point will tend to trigger + * "interesting" failures at the borderline when the checkpoint is about to, + * or has, succeeded. If any of those failures creates a WT home directory + * that cannot be recovered, the top level test will fail. + */ +static void +run_check_subtest_range(TEST_OPTS *opts, const char *debugger, bool close_test) +{ + uint64_t cutoff, high, low, mid, nops, nresults; + int i; + bool got_failure, got_success; + + if (opts->verbose) + printf("Determining best range of operations until failure, " + "with close_test %s.\n", + (close_test ? "enabled" : "disabled")); + + run_check_subtest(opts, debugger, 1, close_test, &cutoff); + low = 0; + high = MAX_OP_RANGE; + mid = (low + high) / 2; + while (mid != low) { + run_check_subtest(opts, debugger, mid, close_test, + &nresults); + if (nresults > cutoff) + high = mid; + else + low = mid; + mid = (low + high) / 2; + } + /* + * mid is the number of ops that is the crossover point. + * Run some tests near that point to try to trigger weird + * failures. If mid is too low or too high, it indicates + * there is a fundamental problem with the test. + */ + testutil_assert(mid > 1 && mid < MAX_OP_RANGE - 1); + if (opts->verbose) + printf("Retesting around %" PRIu64 " operations.\n", + mid); + + got_failure = false; + got_success = false; + for (nops = mid - 10; nops < mid + 10; nops++) { + for (i = 0; i < TESTS_PER_OP_VALUE; i++) { + run_check_subtest(opts, debugger, nops, + close_test, &nresults); + if (nresults > cutoff) + got_failure = true; + else + got_success = true; + } + } + /* + * Check that it really ran with a crossover point. + */ + testutil_assert(got_failure); + testutil_assert(got_success); +} + +/* + * run_process -- + * Run a program with arguments, wait until it completes. + */ +static int +run_process(TEST_OPTS *opts, const char *prog, char *argv[], int *status) +{ + int pid; + char **arg; + + if (opts->verbose) { + printf("running: "); + for (arg = argv; *arg != NULL; arg++) + printf("%s ", *arg); + printf("\n"); + } + if ((pid = fork()) == 0) { + (void)execv(prog, argv); + testutil_die(errno, "%s", prog); + } else if (pid < 0) + return (errno); + + (void)waitpid(pid, status, 0); + return (0); +} + +/* + * subtest_main -- + * The main program for the subtest + */ +static int +subtest_main(int argc, char *argv[], bool close_test) +{ + TEST_OPTS *opts, _opts; + WT_SESSION *session; + char config[1024], filename[1024]; + struct rlimit rlim; + + if (testutil_disable_long_tests()) + return (0); + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + memset(&rlim, 0, sizeof(rlim)); + + /* No core files during fault injection tests. */ + testutil_check(setrlimit(RLIMIT_CORE, &rlim)); + testutil_check(testutil_parse_opts(argc, argv, opts)); + testutil_make_work_dir(opts->home); + + /* Redirect stderr, stdout. */ + sprintf(filename, "%s/%s", opts->home, STDERR_FILE); + testutil_assert(freopen(filename, "a", stderr) != NULL); + sprintf(filename, "%s/%s", opts->home, STDOUT_FILE); + testutil_assert(freopen(filename, "a", stdout) != NULL); + snprintf(config, sizeof(config), + "create,cache_size=250M,log=(enabled)," + "transaction_sync=(enabled,method=none),extensions=(" + WT_FAIL_FS_LIB + "=(early_load,config={environment=true,verbose=true})]"); + + testutil_check(wiredtiger_open(opts->home, NULL, config, &opts->conn)); + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + + testutil_check(session->create(session, "table:subtest", + "key_format=i,value_format=iiiS," + "columns=(id,v0,v1,v2,big)")); + + testutil_check(session->create(session, "table:subtest2", + "key_format=i,value_format=i")); + + testutil_check(session->create(session, "index:subtest:v0", + "columns=(v0)")); + testutil_check(session->create(session, "index:subtest:v1", + "columns=(v1)")); + testutil_check(session->create(session, "index:subtest:v2", + "columns=(v2)")); + + testutil_check(session->close(session, NULL)); + + subtest_populate(opts, close_test); + + testutil_cleanup(opts); + + return (0); +} + +/* + * This macro is used as a substitute for testutil_check, except that it is + * aware of when a failure may be expected due to the effects of the fail_fs. + * This macro is used only in subtest_populate(), it uses local variables. + */ +#define CHECK(expr) { \ + int _ret; \ + _ret = expr; \ + if (_ret != 0) { \ + if (!failmode || \ + (_ret != WT_RUN_RECOVERY && _ret != EIO)) { \ + fprintf(stderr, " BAD RETURN %d for \"%s\"\n", \ + _ret, #expr); \ + testutil_check(_ret); \ + } else \ + failed = true; \ + } \ +} + +/* + * subtest_populate -- + * Populate the tables. + */ +static void +subtest_populate(TEST_OPTS *opts, bool close_test) +{ + WT_CURSOR *maincur, *maincur2; + WT_RAND_STATE rnd; + WT_SESSION *session; + uint64_t i, nrecords; + uint32_t rndint; + int key, v0, v1, v2; + char *big, *bigref; + bool failed, failmode; + + failmode = failed = false; + __wt_random_init_seed(NULL, &rnd); + CHECK(create_big_string(&bigref)); + nrecords = opts->nrecords; + + CHECK(opts->conn->open_session( + opts->conn, NULL, NULL, &session)); + + CHECK(session->open_cursor(session, "table:subtest", NULL, + NULL, &maincur)); + + CHECK(session->open_cursor(session, "table:subtest2", NULL, + NULL, &maincur2)); + + for (i = 0; i < nrecords && !failed; i++) { + rndint = __wt_random(&rnd); + generate_key(i, &key); + generate_value(rndint, i, bigref, &v0, &v1, &v2, &big); + CHECK(session->begin_transaction(session, NULL)); + maincur->set_key(maincur, key); + maincur->set_value(maincur, v0, v1, v2, big); + CHECK(maincur->insert(maincur)); + + maincur2->set_key(maincur2, key); + maincur2->set_value(maincur2, rndint); + CHECK(maincur2->insert(maincur2)); + CHECK(session->commit_transaction(session, NULL)); + + if (i == 0) + /* + * Force an initial checkpoint, that helps to + * distinguish a clear failure from just not running + * long enough. + */ + CHECK(session->checkpoint(session, NULL)); + + if ((i + 1) % VERBOSE_PRINT == 0 && opts->verbose) + printf(" %" PRIu64 "/%" PRIu64 "\n", + (i + 1), nrecords); + /* Attempt to isolate the failures to checkpointing. */ + if (i == (nrecords/100)) { + enable_failures(opts->nops, 1000000); + failmode = true; /* CHECK should expect failures. */ + CHECK(session->checkpoint(session, NULL)); + failmode = false; + disable_failures(); + if (failed && opts->verbose) + printf("checkpoint failed (expected).\n"); + } + } + + /* + * Closing handles after an extreme fail is likely to cause + * cascading failures (or crashes), so recommended practice is + * to immediately exit. We're interested in testing both with + * and without the recommended practice. + */ + if (failed) { + if (!close_test) { + fprintf(stderr, "exit early.\n"); + exit(0); + } else + fprintf(stderr, "closing after failure.\n"); + } + + free(bigref); + CHECK(maincur->close(maincur)); + CHECK(maincur2->close(maincur2)); + CHECK(session->close(session, NULL)); +} + +/* + * main -- + * The main program for the test. When invoked with "subtest" + * argument, run the subtest. Otherwise, run a separate process + * for each needed subtest, and check the results. + */ +int +main(int argc, char *argv[]) +{ + TEST_OPTS *opts, _opts; + uint64_t nresults; + const char *debugger; + + if (testutil_disable_long_tests()) + return (0); + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + debugger = NULL; + + testutil_check(testutil_parse_opts(argc, argv, opts)); + argc -= __wt_optind; + argv += __wt_optind; + if (opts->nrecords == 0) + opts->nrecords = 50000; + + while (argc > 0) { + if (strcmp(argv[0], "subtest") == 0) + return (subtest_main(argc, argv, false)); + else if (strcmp(argv[0], "subtest_close") == 0) + return (subtest_main(argc, argv, true)); + else if (strcmp(argv[0], "gdb") == 0) + debugger = "/usr/bin/gdb"; + else + testutil_assert(false); + argc--; + argv++; + } + if (opts->verbose) { + printf("Number of operations until failure: %" PRIu64 + " (change with -o N)\n", opts->nops); + printf("Number of records: %" PRIu64 + " (change with -n N)\n", opts->nrecords); + } + if (opts->nops == 0) { + run_check_subtest_range(opts, debugger, false); + run_check_subtest_range(opts, debugger, true); + } else + run_check_subtest(opts, debugger, opts->nops, + opts->nrecords, &nresults); + + testutil_clean_work_dir(opts->home); + testutil_cleanup(opts); + + return (0); +} diff --git a/src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c b/src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c new file mode 100644 index 00000000000..09dce624066 --- /dev/null +++ b/src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c @@ -0,0 +1,99 @@ +/*- + * Public Domain 2014-2017 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +/* + * JIRA ticket reference: WT-3120 + * Test case description: A simple file system extension built into + * a shared library. + * Failure mode: Loading the file system and closing the connection + * is enough to evoke the failure. This test does slightly more + * than that. + */ + +#define WT_FAIL_FS_LIB "../../ext/test/fail_fs/.libs/libwiredtiger_fail_fs.so" + +int +main(int argc, char *argv[]) +{ + TEST_OPTS *opts, _opts; + WT_CURSOR *cursor; + WT_SESSION *session; + char *kstr, *vstr; + char buf[1024]; + + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + testutil_check(testutil_parse_opts(argc, argv, opts)); + testutil_make_work_dir(opts->home); + + snprintf(buf, sizeof(buf), + "create,extensions=(" WT_FAIL_FS_LIB "=(early_load=true))"); + testutil_check(wiredtiger_open(opts->home, NULL, buf, &opts->conn)); + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + testutil_check(session->create(session, opts->uri, + "key_format=S,value_format=S")); + + testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, + &cursor)); + cursor->set_key(cursor, "a"); + cursor->set_value(cursor, "0"); + testutil_check(cursor->insert(cursor)); + cursor->set_key(cursor, "b"); + cursor->set_value(cursor, "1"); + testutil_check(cursor->insert(cursor)); + testutil_check(cursor->close(cursor)); + testutil_check(session->close(session, NULL)); + + /* Force to disk and re-open. */ + testutil_check(opts->conn->close(opts->conn, NULL)); + testutil_check(wiredtiger_open(opts->home, NULL, NULL, &opts->conn)); + + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, + &cursor)); + testutil_check(cursor->next(cursor)); + testutil_check(cursor->get_key(cursor, &kstr)); + testutil_check(cursor->get_value(cursor, &vstr)); + testutil_assert(strcmp(kstr, "a") == 0); + testutil_assert(strcmp(vstr, "0") == 0); + testutil_check(cursor->next(cursor)); + testutil_check(cursor->get_key(cursor, &kstr)); + testutil_check(cursor->get_value(cursor, &vstr)); + testutil_assert(strcmp(kstr, "b") == 0); + testutil_assert(strcmp(vstr, "1") == 0); + testutil_assert(cursor->next(cursor) == WT_NOTFOUND); + testutil_check(cursor->close(cursor)); + testutil_check(session->close(session, NULL)); + printf("Success\n"); + + testutil_cleanup(opts); + return (EXIT_SUCCESS); +} diff --git a/src/third_party/wiredtiger/test/csuite/wt3135_search_near_collator/main.c b/src/third_party/wiredtiger/test/csuite/wt3135_search_near_collator/main.c new file mode 100644 index 00000000000..8783034a7d8 --- /dev/null +++ b/src/third_party/wiredtiger/test/csuite/wt3135_search_near_collator/main.c @@ -0,0 +1,360 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +/* + * JIRA ticket reference: WT-3135 + * Test case description: Each set of data is ordered and contains + * five elements (0-4). We insert elements 1 and 3, and then do + * search_near and search for each element. For each set of data, we perform + * these tests first using a custom collator, and second using a custom collator + * and extractor. In each case there are index keys having variable length. + * Failure mode: In the reported test case, the custom compare routine is + * given a truncated key to compare, and the unpack functions return errors + * because the truncation appeared in the middle of a key. + */ + +#define TEST_ENTRY_COUNT 5 +typedef const char *TEST_SET[TEST_ENTRY_COUNT]; +static TEST_SET test_sets[] = { + { "0", "01", "012", "0123", "01234" }, + { "A", "B", "C", "D", "E" }, + { "5", "54", "543", "5432", "54321" }, + { "54321", "5433", "544", "55", "6" } +}; +#define TEST_SET_COUNT (sizeof(test_sets) / sizeof(test_sets[0])) + +static bool +item_str_equal(WT_ITEM *item, const char *str) +{ + return (item->size == strlen(str) + 1 && strncmp((char *)item->data, + str, item->size) == 0); +} + +static int +compare_int(int64_t a, int64_t b) +{ + return (a < b ? -1 : (a > b ? 1 : 0)); +} + +static int +index_compare_primary(WT_PACK_STREAM *s1, WT_PACK_STREAM *s2, int *cmp) +{ + int64_t pkey1, pkey2; + int rc1, rc2; + + rc1 = wiredtiger_unpack_int(s1, &pkey1); + rc2 = wiredtiger_unpack_int(s2, &pkey2); + + if (rc1 == 0 && rc2 == 0) + *cmp = compare_int(pkey1, pkey2); + else if (rc1 != 0 && rc2 != 0) + *cmp = 0; + else if (rc1 != 0) + *cmp = -1; + else + *cmp = 1; + return (0); +} + +static int +index_compare_S(WT_COLLATOR *collator, WT_SESSION *session, + const WT_ITEM *key1, const WT_ITEM *key2, int *cmp) +{ + WT_PACK_STREAM *s1, *s2; + const char *skey1, *skey2; + + (void)collator; + + testutil_check(wiredtiger_unpack_start(session, "Si", key1->data, + key1->size, &s1)); + testutil_check(wiredtiger_unpack_start(session, "Si", key2->data, + key2->size, &s2)); + + testutil_check(wiredtiger_unpack_str(s1, &skey1)); + testutil_check(wiredtiger_unpack_str(s2, &skey2)); + + if ((*cmp = strcmp(skey1, skey2)) == 0) + testutil_check(index_compare_primary(s1, s2, cmp)); + + testutil_check(wiredtiger_pack_close(s1, NULL)); + testutil_check(wiredtiger_pack_close(s2, NULL)); + + return (0); +} + +static int +index_compare_u(WT_COLLATOR *collator, WT_SESSION *session, + const WT_ITEM *key1, const WT_ITEM *key2, int *cmp) +{ + WT_ITEM skey1, skey2; + WT_PACK_STREAM *s1, *s2; + + (void)collator; + + testutil_check(wiredtiger_unpack_start(session, "ui", key1->data, + key1->size, &s1)); + testutil_check(wiredtiger_unpack_start(session, "ui", key2->data, + key2->size, &s2)); + + testutil_check(wiredtiger_unpack_item(s1, &skey1)); + testutil_check(wiredtiger_unpack_item(s2, &skey2)); + + if ((*cmp = strcmp(skey1.data, skey2.data)) == 0) + testutil_check(index_compare_primary(s1, s2, cmp)); + + testutil_check(wiredtiger_pack_close(s1, NULL)); + testutil_check(wiredtiger_pack_close(s2, NULL)); + + return (0); +} + +static int +index_extractor_u(WT_EXTRACTOR *extractor, WT_SESSION *session, + const WT_ITEM *key, const WT_ITEM *value, WT_CURSOR *result_cursor) +{ + (void)extractor; + (void)session; + (void)key; + + result_cursor->set_key(result_cursor, value); + return result_cursor->insert(result_cursor); +} + +static WT_COLLATOR collator_S = { index_compare_S, NULL, NULL }; +static WT_COLLATOR collator_u = { index_compare_u, NULL, NULL }; +static WT_EXTRACTOR extractor_u = { index_extractor_u, NULL, NULL }; + +/* + * Check search() and search_near() using the test string indicated + * by test_index. + */ +static void +search_using_str(WT_CURSOR *cursor, TEST_SET test_set, int test_index) +{ + int exact, ret; + const char *result; + const char *str_01, *str_0123, *test_str; + + testutil_assert(test_index >= 0 && test_index <= 4); + str_01 = test_set[1]; + str_0123 = test_set[3]; + test_str = test_set[test_index]; + + cursor->set_key(cursor, test_str); + testutil_check(cursor->search_near(cursor, &exact)); + testutil_check(cursor->get_key(cursor, &result)); + + if (test_index == 0) + testutil_assert(strcmp(result, str_01) == 0 && exact > 0); + else if (test_index == 1) + testutil_assert(strcmp(result, str_01) == 0 && exact == 0); + else if (test_index == 2) + testutil_assert((strcmp(result, str_0123) == 0 && exact > 0) || + (strcmp(result, str_01) == 0 && exact < 0)); + else if (test_index == 3) + testutil_assert(strcmp(result, str_0123) == 0 && exact == 0); + else if (test_index == 4) + testutil_assert(strcmp(result, str_0123) == 0 && exact < 0); + + cursor->set_key(cursor, test_str); + ret = cursor->search(cursor); + + if (test_index == 0 || test_index == 2 || test_index == 4) + testutil_assert(ret == WT_NOTFOUND); + else if (test_index == 1 || test_index == 3) + testutil_assert(ret == 0); +} + +/* + * Check search() and search_near() using the test string indicated + * by test_index against a table containing a variable sized item. + */ +static void +search_using_item(WT_CURSOR *cursor, TEST_SET test_set, int test_index) +{ + WT_ITEM item; + size_t testlen; + int exact, ret; + const char *str_01, *str_0123, *test_str; + + testutil_assert(test_index >= 0 && test_index <= 4); + str_01 = test_set[1]; + str_0123 = test_set[3]; + test_str = test_set[test_index]; + + testlen = strlen(test_str) + 1; + item.data = test_str; + item.size = testlen; + cursor->set_key(cursor, &item); + testutil_check(cursor->search_near(cursor, &exact)); + testutil_check(cursor->get_key(cursor, &item)); + + if (test_index == 0) + testutil_assert(item_str_equal(&item, str_01) && exact > 0); + else if (test_index == 1) + testutil_assert(item_str_equal(&item, str_01) && exact == 0); + else if (test_index == 2) + testutil_assert((item_str_equal(&item, str_0123) && exact > 0) + || (item_str_equal(&item, str_01) && exact < 0)); + else if (test_index == 3) + testutil_assert(item_str_equal(&item, str_0123) && exact == 0); + else if (test_index == 4) + testutil_assert(item_str_equal(&item, str_0123) && exact < 0); + + item.data = test_str; + item.size = testlen; + cursor->set_key(cursor, &item); + ret = cursor->search(cursor); + + if (test_index == 0 || test_index == 2 || test_index == 4) + testutil_assert(ret == WT_NOTFOUND); + else if (test_index == 1 || test_index == 3) + testutil_assert(ret == 0); +} + +/* + * For each set of data, perform tests. + */ +static void +test_one_set(WT_SESSION *session, TEST_SET set) +{ + WT_CURSOR *cursor; + WT_ITEM item; + int32_t i; + + /* + * Part 1: Using a custom collator, insert some elements + * and verify results from search_near. + */ + + testutil_check(session->create(session, + "table:main", "key_format=i,value_format=S,columns=(k,v)")); + testutil_check(session->create(session, + "index:main:def_collator", "columns=(v)")); + testutil_check(session->create(session, + "index:main:custom_collator", + "columns=(v),collator=collator_S")); + + /* Insert only elements #1 and #3. */ + testutil_check(session->open_cursor(session, + "table:main", NULL, NULL, &cursor)); + cursor->set_key(cursor, 0); + cursor->set_value(cursor, set[1]); + testutil_check(cursor->insert(cursor)); + cursor->set_key(cursor, 1); + cursor->set_value(cursor, set[3]); + testutil_check(cursor->insert(cursor)); + testutil_check(cursor->close(cursor)); + + /* Check all elements in def_collator index. */ + testutil_check(session->open_cursor(session, + "index:main:def_collator", NULL, NULL, &cursor)); + for (i = 0; i < (int32_t)TEST_ENTRY_COUNT; i++) + search_using_str(cursor, set, i); + testutil_check(cursor->close(cursor)); + + /* Check all elements in custom_collator index */ + testutil_check(session->open_cursor(session, + "index:main:custom_collator", NULL, NULL, &cursor)); + for (i = 0; i < (int32_t)TEST_ENTRY_COUNT; i++) + search_using_str(cursor, set, i); + testutil_check(cursor->close(cursor)); + + /* + * Part 2: perform the same checks using a custom collator and + * extractor. + */ + testutil_check(session->create(session, + "table:main2", "key_format=i,value_format=u,columns=(k,v)")); + + testutil_check(session->create(session, "index:main2:idx_w_coll", + "key_format=u,collator=collator_u,extractor=extractor_u")); + + testutil_check(session->open_cursor(session, + "table:main2", NULL, NULL, &cursor)); + + memset(&item, 0, sizeof(item)); + item.size = strlen(set[1]) + 1; + item.data = set[1]; + cursor->set_key(cursor, 1); + cursor->set_value(cursor, &item); + testutil_check(cursor->insert(cursor)); + + item.size = strlen(set[3]) + 1; + item.data = set[3]; + cursor->set_key(cursor, 3); + cursor->set_value(cursor, &item); + testutil_check(cursor->insert(cursor)); + + testutil_check(cursor->close(cursor)); + + testutil_check(session->open_cursor(session, + "index:main2:idx_w_coll", NULL, NULL, &cursor)); + for (i = 0; i < (int32_t)TEST_ENTRY_COUNT; i++) + search_using_item(cursor, set, i); + testutil_check(cursor->close(cursor)); + + testutil_check(session->drop(session, "table:main", NULL)); + testutil_check(session->drop(session, "table:main2", NULL)); +} + +int +main(int argc, char *argv[]) +{ + TEST_OPTS *opts, _opts; + WT_SESSION *session; + size_t i; + + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + testutil_check(testutil_parse_opts(argc, argv, opts)); + testutil_make_work_dir(opts->home); + + testutil_check(wiredtiger_open(opts->home, NULL, "create", + &opts->conn)); + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + + /* Add any collators and extractors used by tests */ + testutil_check(opts->conn->add_collator(opts->conn, "collator_S", + &collator_S, NULL)); + testutil_check(opts->conn->add_collator(opts->conn, "collator_u", + &collator_u, NULL)); + testutil_check(opts->conn->add_extractor(opts->conn, "extractor_u", + &extractor_u, NULL)); + + for (i = 0; i < TEST_SET_COUNT; i++) { + printf("test set %" WT_SIZET_FMT "\n", i); + test_one_set(session, test_sets[i]); + } + + testutil_check(session->close(session, NULL)); + testutil_cleanup(opts); + return (EXIT_SUCCESS); +} diff --git a/src/third_party/wiredtiger/test/csuite/wt3184_dup_index_collator/main.c b/src/third_party/wiredtiger/test/csuite/wt3184_dup_index_collator/main.c new file mode 100644 index 00000000000..bcefd2f1a3b --- /dev/null +++ b/src/third_party/wiredtiger/test/csuite/wt3184_dup_index_collator/main.c @@ -0,0 +1,168 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +/* + * JIRA ticket reference: WT-3184 + * Test case description: Each set of data is ordered and contains + * five elements (0-4). We insert elements 1 and 3, and then do + * search_near and search for each element. For each set of data, we perform + * these tests first using a custom collator, and second using a custom collator + * and extractor. In each case there are index keys having variable length. + * Failure mode: In the reported test case, the custom compare routine is + * given a truncated key to compare, and the unpack functions return errors + * because the truncation appeared in the middle of a key. + */ + +static int +compare_int(int32_t a, int32_t b) +{ + return (a < b ? -1 : (a > b ? 1 : 0)); +} + +static int32_t +item_to_int(WT_ITEM *item) +{ + testutil_assert(item->size == sizeof(int32_t)); + return (*(int32_t *)item->data); +} + +static int +compare_int_items(WT_ITEM *itema, WT_ITEM *itemb) +{ + testutil_assert(itema->size == sizeof(int32_t)); + testutil_assert(itemb->size == sizeof(int32_t)); + return (compare_int(item_to_int(itema), item_to_int(itemb))); +} + +static void +print_int_item(const char *str, const WT_ITEM *item) +{ + if (item->size > 0) { + testutil_assert(item->size == sizeof(int32_t)); + printf("%s%" PRId32, str, *(int32_t *)item->data); + } else + printf("%s", str); +} + +static int +index_compare(WT_COLLATOR *collator, WT_SESSION *session, + const WT_ITEM *key1, const WT_ITEM *key2, int *cmp) +{ + WT_ITEM ikey1, pkey1, ikey2, pkey2; + + (void)collator; + testutil_check(wiredtiger_struct_unpack(session, + key1->data, key1->size, "uu", &ikey1, &pkey1)); + testutil_check(wiredtiger_struct_unpack(session, + key2->data, key2->size, "uu", &ikey2, &pkey2)); + + print_int_item("index_compare: index key1 = ", &ikey1); + print_int_item(", primary key1 = ", &pkey1); + print_int_item(", index key2 = ", &ikey2); + print_int_item(", primary key2 = ", &pkey2); + printf("\n"); + + if ((*cmp = compare_int_items(&ikey1, &ikey2)) != 0) + return (0); + + if (pkey1.size != 0 && pkey2.size != 0) + *cmp = compare_int_items(&pkey1, &pkey2); + else if (pkey1.size != 0) + *cmp = 1; + else if (pkey2.size != 0) + *cmp = -1; + else + *cmp = 0; + + return (0); +} + +static WT_COLLATOR index_coll = { index_compare, NULL, NULL }; + +int +main(int argc, char *argv[]) +{ + TEST_OPTS *opts, _opts; + WT_CURSOR *cursor, *cursor1; + WT_ITEM got, k, v; + WT_SESSION *session; + int32_t ki, vi; + + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + testutil_check(testutil_parse_opts(argc, argv, opts)); + testutil_make_work_dir(opts->home); + + testutil_check(wiredtiger_open(opts->home, NULL, "create", + &opts->conn)); + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + + testutil_check(opts->conn->add_collator(opts->conn, "index_coll", + &index_coll, NULL)); + + testutil_check(session->create(session, + "table:main", "key_format=u,value_format=u,columns=(k,v)")); + testutil_check(session->create(session, + "index:main:index", "columns=(v),collator=index_coll")); + + printf("adding new record\n"); + testutil_check(session->open_cursor(session, "table:main", NULL, NULL, + &cursor)); + + ki = 13; + vi = 17; + + k.data = &ki; k.size = sizeof(ki); + v.data = &vi; v.size = sizeof(vi); + + cursor->set_key(cursor, &k); + cursor->set_value(cursor, &v); + testutil_check(cursor->insert(cursor)); + testutil_check(cursor->close(cursor)); + + printf("positioning index cursor\n"); + + testutil_check(session->open_cursor(session, "index:main:index", NULL, + NULL, &cursor)); + cursor->set_key(cursor, &v); + testutil_check(cursor->search(cursor)); + + printf("duplicating cursor\n"); + testutil_check(session->open_cursor(session, NULL, cursor, NULL, + &cursor1)); + cursor->get_value(cursor, &got); + testutil_assert(item_to_int(&got) == 17); + cursor1->get_value(cursor1, &got); + testutil_assert(item_to_int(&got) == 17); + + testutil_check(session->close(session, NULL)); + testutil_cleanup(opts); + return (EXIT_SUCCESS); +} diff --git a/src/third_party/wiredtiger/test/suite/test_cursor_random.py b/src/third_party/wiredtiger/test/suite/test_cursor_random.py index 3bda6dc9946..ee0f85a29ee 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor_random.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor_random.py @@ -71,6 +71,15 @@ class test_cursor_random(wttest.WiredTigerTestCase): self.assertEquals(cursor.reset(), 0) cursor.close() + # Check that next_random fails with an empty tree, repeatedly. + def test_cursor_random_empty(self): + uri = self.type + self.session.create(uri, 'key_format=S,value_format=S') + cursor = self.session.open_cursor(uri, None, self.config) + for i in range(1,5): + self.assertTrue(cursor.next(), wiredtiger.WT_NOTFOUND) + cursor.close + # Check that next_random works with a single value, repeatedly. def test_cursor_random_single_record(self): uri = self.type @@ -127,6 +136,46 @@ class test_cursor_random(wttest.WiredTigerTestCase): def test_cursor_random_multiple_page_records(self): self.cursor_random_multiple_page_records(0) + # Check that next_random fails in the presence of a set of values, some of + # which are deleted. + def test_cursor_random_deleted_partial(self): + uri = self.type + ds = self.dataset(self, uri, 10000, + config='allocation_size=512,leaf_page_max=512') + ds.populate() + + # Close the connection so everything is forced to disk. + self.reopen_conn() + + start = self.session.open_cursor(uri, None) + start.set_key(ds.key(10)) + end = self.session.open_cursor(uri, None) + end.set_key(ds.key(10000-10)) + self.session.truncate(None, start, end, None) + self.assertEqual(start.close(), 0) + self.assertEqual(end.close(), 0) + + cursor = self.session.open_cursor(uri, None, self.config) + for i in range(1,10): + self.assertEqual(cursor.next(), 0) + + # Check that next_random fails in the presence of a set of values, all of + # which are deleted. + def test_cursor_random_deleted_all(self): + uri = self.type + ds = self.dataset(self, uri, 10000, + config='allocation_size=512,leaf_page_max=512') + ds.populate() + + # Close the connection so everything is forced to disk. + self.reopen_conn() + + self.session.truncate(uri, None, None, None) + + cursor = self.session.open_cursor(uri, None, self.config) + for i in range(1,10): + self.assertTrue(cursor.next(), wiredtiger.WT_NOTFOUND) + # Check that opening a random cursor on column-store returns not-supported. class test_cursor_random_column(wttest.WiredTigerTestCase): scenarios = make_scenarios([ diff --git a/src/third_party/wiredtiger/test/suite/test_reconfig04.py b/src/third_party/wiredtiger/test/suite/test_reconfig04.py index be5e6d3729e..51d9b91c1f4 100644 --- a/src/third_party/wiredtiger/test/suite/test_reconfig04.py +++ b/src/third_party/wiredtiger/test/suite/test_reconfig04.py @@ -26,9 +26,7 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. -import fnmatch, os, time import wiredtiger, wttest -from wtdataset import SimpleDataSet # test_reconfig04.py # Test WT_SESSION::reconfigure diff --git a/src/third_party/wiredtiger/test/suite/test_sweep01.py b/src/third_party/wiredtiger/test/suite/test_sweep01.py index 71f8fcb180e..5559190caca 100644 --- a/src/third_party/wiredtiger/test/suite/test_sweep01.py +++ b/src/third_party/wiredtiger/test/suite/test_sweep01.py @@ -116,10 +116,15 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess): # Give slow machines time to process files. stat_cursor = self.session.open_cursor('statistics:', None, None) this_nfile = stat_cursor[stat.conn.file_open][2] + removed = stat_cursor[stat.conn.dh_sweep_remove][2] stat_cursor.close() self.pr("==== loop " + str(sleep)) self.pr("this_nfile " + str(this_nfile)) - if this_nfile == final_nfile: + self.pr("removed " + str(removed)) + # On slow machines there can be a lag where files get closed but + # the sweep server cannot yet remove the handles. So wait for the + # removed statistic to indicate forward progress too. + if this_nfile == final_nfile and removed != remove1: break c.close() self.pr("Sweep loop took " + str(sleep)) diff --git a/src/third_party/wiredtiger/test/utility/misc.c b/src/third_party/wiredtiger/test/utility/misc.c index 1491c9a6938..1ba08ddd77f 100644 --- a/src/third_party/wiredtiger/test/utility/misc.c +++ b/src/third_party/wiredtiger/test/utility/misc.c @@ -78,7 +78,7 @@ testutil_work_dir_from_path(char *buffer, size_t len, const char *dir) * Remove the work directory. */ void -testutil_clean_work_dir(char *dir) +testutil_clean_work_dir(const char *dir) { size_t len; int ret; diff --git a/src/third_party/wiredtiger/test/utility/test_util.h b/src/third_party/wiredtiger/test/utility/test_util.h index f6a9cd68e02..489bbe18d87 100644 --- a/src/third_party/wiredtiger/test/utility/test_util.h +++ b/src/third_party/wiredtiger/test/utility/test_util.h @@ -183,7 +183,7 @@ void *dmalloc(size_t); void *drealloc(void *, size_t); void *dstrdup(const void *); void *dstrndup(const char *, size_t); -void testutil_clean_work_dir(char *); +void testutil_clean_work_dir(const char *); void testutil_cleanup(TEST_OPTS *); bool testutil_disable_long_tests(void); void testutil_make_work_dir(char *); -- cgit v1.2.1