diff options
author | Alex Gorrod <alexander.gorrod@mongodb.com> | 2017-04-13 06:57:48 +1000 |
---|---|---|
committer | Alex Gorrod <alexander.gorrod@mongodb.com> | 2017-04-13 06:57:48 +1000 |
commit | 9c2e3c5396adb6bbaaf6a19e6c017b051f943ebf (patch) | |
tree | 4b5aaf900b05a60721fe7b8bdf4a395f7695ea99 /src/third_party | |
parent | 0458da0a52fe8b89f475888bca1848cb3b1a9813 (diff) | |
download | mongo-9c2e3c5396adb6bbaaf6a19e6c017b051f943ebf.tar.gz |
Import wiredtiger: cb16839cfbdf338af95bed43ca40979ae6e32f54 from branch mongodb-3.4
ref: cc2f15f595..cb16839cfb
for: 3.4.4
SERVER-28168 Cannot start or repair mongodb after unexpected shutdown.
SERVER-28194 Missing WiredTiger.turtle file loses data
WT-2439 Enhance reconciliation page layout
WT-2978 Make WiredTiger python binding pip-compatible
WT-2990 checkpoint load live_open assertion failure
WT-3136 bug fix: WiredTiger doesn't check sprintf calls for error return
WT-3155 Remove WT_CONN_SERVER_RUN flag
WT-3182 Switch make-check to run the short test suite by default
WT-3190 Enhance eviction thread auto-tuning algorithm
WT-3191 lint
WT-3193 Close a race between verify opening a handle and eviction visiting it
WT-3196 Race with LSM and eviction when switching chunks
WT-3199 bug: eviction assertion failure
WT-3202 wtperf report an error on in_memory=true mode : No such file or directory
WT-3203 bulk-load state changes can race
WT-3204 eviction changes cost LSM performance
WT-3206 bug: core dump on NULL page index
WT-3207 Drops with checkpoint_wait=false should not wait for checkpoints
WT-3208 test format hung with 9mb cache
WT-3211 WT_CURSOR.remove cannot always retain its position.
WT-3212 'wt dump' crashes when given table with unknown collator
WT-3213 generated test/format CONFIG invalid on next run
WT-3216 add support for clang-tidy
WT-3218 unexpected checkpoint ordering failures
WT-3224 LSM assertion failure pindex->entries == 1
WT-3225 WiredTiger won't build with clang on CentOS 7.3.1611
WT-3227 Python test suite inserts unnecessary whitespace in error output.
WT-3228 Remove with overwrite shouldn't return WT_NOTFOUND
WT-3234 Update WiredTiger build for clang 4.0.
WT-3238 Java: Cursor.compare and Cursor.equals throw Exceptions for valid return values
WT-3240 Coverity reports
WT-3243 Reorder log slot release so joins don't wait on IO
WT-3244 metadata operations failing in in-memory configurations
WT-3249 Unit test test_readonly fails as it is unable to open WiredTiger.lock
WT-3250 Incorrect statistics incremented on Windows
WT-3254 test_reconfig02 uses incorrect configuration string
WT-3262 Schema operations shouldn't wait for cache
WT-3265 Verify hits assertion in eviction when transiting handle to exclusive mode
WT-3271 Eviction tuning stuck in a loop
WT-98 Update the current cursor value without a search
Diffstat (limited to 'src/third_party')
217 files changed, 5489 insertions, 3466 deletions
diff --git a/src/third_party/wiredtiger/.gitignore b/src/third_party/wiredtiger/.gitignore index c7b3ade9e87..4611f2aa98c 100644 --- a/src/third_party/wiredtiger/.gitignore +++ b/src/third_party/wiredtiger/.gitignore @@ -90,24 +90,28 @@ _wiredtiger.pyd **/examples/c/ex_pack **/examples/c/ex_process **/examples/c/ex_schema -**/examples/c/ex_scope **/examples/c/ex_stat **/examples/c/ex_sync **/examples/c/ex_thread **/test/bloom/t **/test/checkpoint/t -**/test/csuite/test_wt1965_col_efficiency -**/test/csuite/test_wt2246_col_append -**/test/csuite/test_wt2323_join_visibility -**/test/csuite/test_wt2403_lsm_workload -**/test/csuite/test_wt2447_join_main_table -**/test/csuite/test_wt2535_insert_race -**/test/csuite/test_wt2592_join_schema -**/test/csuite/test_wt2695_checksum -**/test/csuite/test_wt2719_reconfig -**/test/csuite/test_wt2834_join_bloom_fix -**/test/csuite/test_wt2853_perf -**/test/csuite/test_wt2999_join_extractor +**/test_scope +**/test_wt1965_col_efficiency +**/test_wt2246_col_append +**/test_wt2323_join_visibility +**/test_wt2403_lsm_workload +**/test_wt2447_join_main_table +**/test_wt2535_insert_race +**/test_wt2592_join_schema +**/test_wt2695_checksum +**/test_wt2719_reconfig +**/test_wt2834_join_bloom_fix +**/test_wt2853_perf +**/test_wt2909_checkpoint_integrity +**/test_wt2999_join_extractor +**/test_wt3120_filesys +**/test_wt3135_search_near_collator +**/test_wt3184_dup_index_collator **/test/cursor_order/cursor_order **/test/fops/t **/test/format/s_dumpcmp diff --git a/src/third_party/wiredtiger/SConstruct b/src/third_party/wiredtiger/SConstruct index e9e72630b11..b397f662be7 100644 --- a/src/third_party/wiredtiger/SConstruct +++ b/src/third_party/wiredtiger/SConstruct @@ -376,7 +376,6 @@ examples = [ "ex_pack", "ex_process", "ex_schema", - "ex_scope", "ex_stat", "ex_thread", ] diff --git a/src/third_party/wiredtiger/bench/wtperf/config.c b/src/third_party/wiredtiger/bench/wtperf/config.c index 9eea99eeec4..e4eee66e4cb 100644 --- a/src/third_party/wiredtiger/bench/wtperf/config.c +++ b/src/third_party/wiredtiger/bench/wtperf/config.c @@ -438,14 +438,13 @@ config_opt(WTPERF *wtperf, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v) return (EINVAL); } strp = (char **)valueloc; - newlen = v->len + 1; if (*strp == NULL) begin = newstr = dstrdup(v->str); else { - newlen += strlen(*strp) + 1; - newstr = dcalloc(newlen, sizeof(char)); - snprintf(newstr, newlen, - "%s,%*s", *strp, (int)v->len, v->str); + newlen = strlen(*strp) + v->len + strlen(",") + 1; + newstr = dmalloc(newlen); + testutil_check(__wt_snprintf(newstr, newlen, + "%s,%.*s", *strp, (int)v->len, v->str)); /* Free the old value now we've copied it. */ free(*strp); begin = &newstr[(newlen - 1) - v->len]; @@ -712,7 +711,7 @@ config_opt_name_value(WTPERF *wtperf, const char *name, const char *value) /* name="value" */ len = strlen(name) + strlen(value) + 4; optstr = dmalloc(len); - snprintf(optstr, len, "%s=\"%s\"", name, value); + testutil_check(__wt_snprintf(optstr, len, "%s=\"%s\"", name, value)); ret = config_opt_str(wtperf, optstr); free(optstr); return (ret); diff --git a/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c b/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c index bb44cfbde59..4387860cfb2 100644 --- a/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c +++ b/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c @@ -80,8 +80,8 @@ cycle_idle_tables(void *arg) } for (cycle_count = 0; wtperf->idle_cycle_run; ++cycle_count) { - snprintf(uri, sizeof(uri), - "%s_cycle%07d", wtperf->uris[0], cycle_count); + testutil_check(__wt_snprintf(uri, sizeof(uri), + "%s_cycle%07d", wtperf->uris[0], cycle_count)); /* Don't busy cycle in this loop. */ __wt_sleep(1, 0); diff --git a/src/third_party/wiredtiger/bench/wtperf/misc.c b/src/third_party/wiredtiger/bench/wtperf/misc.c index 24b3323a49a..0874794e01e 100644 --- a/src/third_party/wiredtiger/bench/wtperf/misc.c +++ b/src/third_party/wiredtiger/bench/wtperf/misc.c @@ -46,8 +46,8 @@ setup_log_file(WTPERF *wtperf) len = strlen(wtperf->monitor_dir) + strlen(opts->table_name) + strlen(".stat") + 2; fname = dmalloc(len); - snprintf(fname, len, - "%s/%s.stat", wtperf->monitor_dir, opts->table_name); + testutil_check(__wt_snprintf(fname, len, + "%s/%s.stat", wtperf->monitor_dir, opts->table_name)); if ((wtperf->logf = fopen(fname, "w")) == NULL) { ret = errno; fprintf(stderr, "%s: %s\n", fname, strerror(ret)); diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/many-table-stress.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/many-table-stress.wtperf index 51d0bb0dd9d..6cf1d5d2696 100644 --- a/src/third_party/wiredtiger/bench/wtperf/runners/many-table-stress.wtperf +++ b/src/third_party/wiredtiger/bench/wtperf/runners/many-table-stress.wtperf @@ -1,7 +1,7 @@ # Create a set of tables with uneven distribution of data conn_config="cache_size=1G,eviction=(threads_max=8),file_manager=(close_idle_time=100000),checkpoint=(wait=20,log_size=2GB),statistics=(fast),statistics_log=(wait=5,json),session_max=1000" table_config="type=file" -table_count=5000 +table_count=2000 icount=0 random_range=1000000000 pareto=10 diff --git a/src/third_party/wiredtiger/bench/wtperf/track.c b/src/third_party/wiredtiger/bench/wtperf/track.c index 822bdaa4b4a..86a26120a6a 100644 --- a/src/third_party/wiredtiger/bench/wtperf/track.c +++ b/src/third_party/wiredtiger/bench/wtperf/track.c @@ -288,8 +288,8 @@ latency_print_single(WTPERF *wtperf, TRACK *total, const char *name) uint64_t cumops; char path[1024]; - snprintf(path, sizeof(path), - "%s/latency.%s", wtperf->monitor_dir, name); + testutil_check(__wt_snprintf(path, sizeof(path), + "%s/latency.%s", wtperf->monitor_dir, name)); if ((fp = fopen(path, "w")) == NULL) { lprintf(wtperf, errno, 0, "%s", path); return; diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.c b/src/third_party/wiredtiger/bench/wtperf/wtperf.c index 7f5e5ad3373..6d79eebe8b2 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf.c +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.c @@ -32,9 +32,6 @@ #define DEFAULT_HOME "WT_TEST" #define DEFAULT_MONITOR_DIR "WT_TEST" -static const char * const debug_cconfig = ""; -static const char * const debug_tconfig = ""; - static void *checkpoint_worker(void *); static int drop_all_tables(WTPERF *); static int execute_populate(WTPERF *); @@ -554,7 +551,8 @@ worker(void *arg) goto err; } for (i = 0; i < opts->table_count_idle; i++) { - snprintf(buf, 512, "%s_idle%05d", wtperf->uris[0], (int)i); + testutil_check(__wt_snprintf( + buf, 512, "%s_idle%05d", wtperf->uris[0], (int)i)); if ((ret = session->open_cursor( session, buf, NULL, NULL, &tmp_cursor)) != 0) { lprintf(wtperf, ret, 0, @@ -1300,7 +1298,8 @@ monitor(void *arg) /* Open the logging file. */ len = strlen(wtperf->monitor_dir) + 100; path = dmalloc(len); - snprintf(path, len, "%s/monitor", wtperf->monitor_dir); + testutil_check(__wt_snprintf( + path, len, "%s/monitor", wtperf->monitor_dir)); if ((fp = fopen(path, "w")) == NULL) { lprintf(wtperf, errno, 0, "%s", path); goto err; @@ -1655,6 +1654,9 @@ close_reopen(WTPERF *wtperf) opts = wtperf->opts; + if (opts->in_memory) + return (0); + if (!opts->readonly && !opts->reopen_connection) return (0); /* @@ -1937,19 +1939,19 @@ create_uris(WTPERF *wtperf) /* If there is only one table, just use the base name. */ wtperf->uris[i] = dmalloc(len); if (opts->table_count == 1) - snprintf(wtperf->uris[i], - len, "table:%s", opts->table_name); + testutil_check(__wt_snprintf(wtperf->uris[i], + len, "table:%s", opts->table_name)); else - snprintf(wtperf->uris[i], - len, "table:%s%05d", opts->table_name, i); + testutil_check(__wt_snprintf(wtperf->uris[i], + len, "table:%s%05d", opts->table_name, i)); } /* Create the log-like-table URI. */ len = strlen("table:") + strlen(opts->table_name) + strlen("_log_table") + 1; wtperf->log_table_uri = dmalloc(len); - snprintf( - wtperf->log_table_uri, len, "table:%s_log_table", opts->table_name); + testutil_check(__wt_snprintf(wtperf->log_table_uri, + len, "table:%s_log_table", opts->table_name)); } static int @@ -1971,7 +1973,8 @@ create_tables(WTPERF *wtperf) } for (i = 0; i < opts->table_count_idle; i++) { - snprintf(buf, 512, "%s_idle%05d", wtperf->uris[0], (int)i); + testutil_check(__wt_snprintf( + buf, 512, "%s_idle%05d", wtperf->uris[0], (int)i)); if ((ret = session->create( session, buf, opts->table_config)) != 0) { lprintf(wtperf, ret, 0, @@ -2000,8 +2003,9 @@ create_tables(WTPERF *wtperf) return (ret); } if (opts->index) { - snprintf(buf, 512, "index:%s:val_idx", - wtperf->uris[i] + strlen("table:")); + testutil_check(__wt_snprintf(buf, 512, + "index:%s:val_idx", + wtperf->uris[i] + strlen("table:"))); if ((ret = session->create( session, buf, "columns=(val)")) != 0) { lprintf(wtperf, ret, 0, @@ -2186,15 +2190,15 @@ start_all_runs(WTPERF *wtperf) */ len = strlen(wtperf->home) + 5; next_wtperf->home = dmalloc(len); - snprintf( - next_wtperf->home, len, "%s/D%02d", wtperf->home, (int)i); + testutil_check(__wt_snprintf( + next_wtperf->home, len, "%s/D%02d", wtperf->home, (int)i)); if (opts->create != 0) recreate_dir(next_wtperf->home); len = strlen(wtperf->monitor_dir) + 5; next_wtperf->monitor_dir = dmalloc(len); - snprintf(next_wtperf->monitor_dir, - len, "%s/D%02d", wtperf->monitor_dir, (int)i); + testutil_check(__wt_snprintf(next_wtperf->monitor_dir, + len, "%s/D%02d", wtperf->monitor_dir, (int)i)); if (opts->create != 0 && strcmp(next_wtperf->home, next_wtperf->monitor_dir) != 0) recreate_dir(next_wtperf->monitor_dir); @@ -2543,9 +2547,9 @@ main(int argc, char *argv[]) */ req_len = strlen(",async=(enabled=true,threads=)") + 4; wtperf->async_config = dmalloc(req_len); - snprintf(wtperf->async_config, req_len, + testutil_check(__wt_snprintf(wtperf->async_config, req_len, ",async=(enabled=true,threads=%" PRIu32 ")", - opts->async_threads); + opts->async_threads)); } if ((ret = config_compress(wtperf)) != 0) goto err; @@ -2566,9 +2570,9 @@ main(int argc, char *argv[]) __wt_stream_set_line_buffer(stdout); /* Concatenate non-default configuration strings. */ - if ((opts->verbose > 1 && strlen(debug_cconfig) != 0) || - user_cconfig != NULL || opts->session_count_idle > 0 || - wtperf->compress_ext != NULL || wtperf->async_config != NULL) { + if (user_cconfig != NULL || opts->session_count_idle > 0 || + wtperf->compress_ext != NULL || wtperf->async_config != NULL || + opts->in_memory) { req_len = 20; req_len += wtperf->async_config != NULL ? strlen(wtperf->async_config) : 0; @@ -2578,87 +2582,82 @@ main(int argc, char *argv[]) sreq_len = strlen("session_max=") + 6; req_len += sreq_len; sess_cfg = dmalloc(sreq_len); - snprintf(sess_cfg, sreq_len, + testutil_check(__wt_snprintf(sess_cfg, sreq_len, "session_max=%" PRIu32, opts->session_count_idle + - wtperf->workers_cnt + opts->populate_threads + 10); + wtperf->workers_cnt + opts->populate_threads + 10)); } + req_len += opts->in_memory ? strlen("in_memory=true") : 0; req_len += user_cconfig != NULL ? strlen(user_cconfig) : 0; - req_len += debug_cconfig != NULL ? strlen(debug_cconfig) : 0; cc_buf = dmalloc(req_len); pos = 0; append_comma = ""; if (wtperf->async_config != NULL && strlen(wtperf->async_config) != 0) { - pos += (size_t)snprintf( - cc_buf + pos, req_len - pos, "%s%s", - append_comma, wtperf->async_config); + testutil_check(__wt_snprintf_len_incr( + cc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, wtperf->async_config)); append_comma = ","; } if (wtperf->compress_ext != NULL && strlen(wtperf->compress_ext) != 0) { - pos += (size_t)snprintf( - cc_buf + pos, req_len - pos, "%s%s", - append_comma, wtperf->compress_ext); + testutil_check(__wt_snprintf_len_incr( + cc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, wtperf->compress_ext)); + append_comma = ","; + } + if (opts->in_memory) { + testutil_check(__wt_snprintf_len_incr( + cc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, "in_memory=true")); append_comma = ","; } if (sess_cfg != NULL && strlen(sess_cfg) != 0) { - pos += (size_t)snprintf( - cc_buf + pos, req_len - pos, "%s%s", - append_comma, sess_cfg); + testutil_check(__wt_snprintf_len_incr( + cc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, sess_cfg)); append_comma = ","; } if (user_cconfig != NULL && strlen(user_cconfig) != 0) { - pos += (size_t)snprintf( - cc_buf + pos, req_len - pos, "%s%s", - append_comma, user_cconfig); - append_comma = ","; + testutil_check(__wt_snprintf_len_incr( + cc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, user_cconfig)); } - if (opts->verbose > 1 && strlen(debug_cconfig) != 0) - pos += (size_t)snprintf( - cc_buf + pos, req_len - pos, "%s%s", - append_comma, debug_cconfig); if (strlen(cc_buf) != 0 && (ret = config_opt_name_value(wtperf, "conn_config", cc_buf)) != 0) goto err; } - if ((opts->verbose > 1 && strlen(debug_tconfig) != 0) || opts->index || + if (opts->index || user_tconfig != NULL || wtperf->compress_table != NULL) { req_len = 20; req_len += wtperf->compress_table != NULL ? strlen(wtperf->compress_table) : 0; req_len += opts->index ? strlen(INDEX_COL_NAMES) : 0; req_len += user_tconfig != NULL ? strlen(user_tconfig) : 0; - req_len += debug_tconfig != NULL ? strlen(debug_tconfig) : 0; tc_buf = dmalloc(req_len); pos = 0; append_comma = ""; if (wtperf->compress_table != NULL && strlen(wtperf->compress_table) != 0) { - pos += (size_t)snprintf( - tc_buf + pos, req_len - pos, "%s%s", - append_comma, wtperf->compress_table); + testutil_check(__wt_snprintf_len_incr( + tc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, wtperf->compress_table)); append_comma = ","; } if (opts->index) { - pos += (size_t)snprintf( - tc_buf + pos, req_len - pos, "%s%s", - append_comma, INDEX_COL_NAMES); + testutil_check(__wt_snprintf_len_incr( + tc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, INDEX_COL_NAMES)); append_comma = ","; } if (user_tconfig != NULL && strlen(user_tconfig) != 0) { - pos += (size_t)snprintf( - tc_buf + pos, req_len - pos, "%s%s", - append_comma, user_tconfig); - append_comma = ","; + testutil_check(__wt_snprintf_len_incr( + tc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, user_tconfig)); } - if (opts->verbose > 1 && strlen(debug_tconfig) != 0) - pos += (size_t)snprintf( - tc_buf + pos, req_len - pos, "%s%s", - append_comma, debug_tconfig); if (strlen(tc_buf) != 0 && (ret = config_opt_name_value(wtperf, "table_config", tc_buf)) != 0) @@ -2668,8 +2667,9 @@ main(int argc, char *argv[]) req_len = strlen(opts->table_config) + strlen(LOG_PARTIAL_CONFIG) + 1; wtperf->partial_config = dmalloc(req_len); - snprintf(wtperf->partial_config, req_len, "%s%s", - opts->table_config, LOG_PARTIAL_CONFIG); + testutil_check(__wt_snprintf( + wtperf->partial_config, req_len, "%s%s", + opts->table_config, LOG_PARTIAL_CONFIG)); } /* * Set the config for reopen. If readonly add in that string. @@ -2682,11 +2682,12 @@ main(int argc, char *argv[]) req_len = strlen(opts->conn_config) + 1; wtperf->reopen_config = dmalloc(req_len); if (opts->readonly) - snprintf(wtperf->reopen_config, req_len, "%s%s", - opts->conn_config, READONLY_CONFIG); + testutil_check(__wt_snprintf( + wtperf->reopen_config, req_len, "%s%s", + opts->conn_config, READONLY_CONFIG)); else - snprintf(wtperf->reopen_config, - req_len, "%s", opts->conn_config); + testutil_check(__wt_snprintf( + wtperf->reopen_config, req_len, "%s", opts->conn_config)); /* Sanity-check the configuration. */ if ((ret = config_sanity(wtperf)) != 0) @@ -2699,7 +2700,8 @@ main(int argc, char *argv[]) /* Write a copy of the config. */ req_len = strlen(wtperf->home) + strlen("/CONFIG.wtperf") + 1; path = dmalloc(req_len); - snprintf(path, req_len, "%s/CONFIG.wtperf", wtperf->home); + testutil_check(__wt_snprintf( + path, req_len, "%s/CONFIG.wtperf", wtperf->home)); config_opt_log(opts, path); free(path); @@ -2824,7 +2826,8 @@ recreate_dir(const char *name) len = strlen(name) * 2 + 100; buf = dmalloc(len); - (void)snprintf(buf, len, "rm -rf %s && mkdir %s", name, name); + testutil_check(__wt_snprintf( + buf, len, "rm -rf %s && mkdir %s", name, name)); testutil_checkfmt(system(buf), "system: %s", buf); free(buf); } diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i index 63cef4c28fb..90f70457407 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i @@ -110,6 +110,8 @@ DEF_OPT_AS_UINT32(database_count, 1, DEF_OPT_AS_BOOL(drop_tables, 0, "Whether to drop all tables at the end of the run, and report time taken" " to do the drop.") +DEF_OPT_AS_BOOL(in_memory, 0, + "Whether to create the database in-memory.") DEF_OPT_AS_UINT32(icount, 5000, "number of records to initially populate. If multiple tables are " "configured the count is spread evenly across all tables.") diff --git a/src/third_party/wiredtiger/build_posix/aclocal/strict.m4 b/src/third_party/wiredtiger/build_posix/aclocal/strict.m4 index c107dd017d7..659867fa69e 100644 --- a/src/third_party/wiredtiger/build_posix/aclocal/strict.m4 +++ b/src/third_party/wiredtiger/build_posix/aclocal/strict.m4 @@ -31,6 +31,7 @@ AC_DEFUN([AM_GCC_WARNINGS], [ w="$w -Wstrict-prototypes" w="$w -Wswitch-enum" w="$w -Wundef" + w="$w -Wuninitialized" w="$w -Wunreachable-code" w="$w -Wunsafe-loop-optimizations" w="$w -Wunused" @@ -66,6 +67,10 @@ AC_DEFUN([AM_CLANG_WARNINGS], [ # w="$w -Wno-error=cast-qual" w="$w -Wno-cast-qual" + # On Centos 7.3.1611, system header files aren't compatible with + # -Wdisabled-macro-expansion. + w="$w -Wno-disabled-macro-expansion" + case "$1" in *Apple*clang*version*4.1*) # Apple clang has its own numbering system, and older OS X diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index 1d669fa7fe0..22600dd5e29 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -295,12 +295,12 @@ file_config = format_meta + file_runtime_config + [ Config('split_deepen_per_child', '0', r''' entries allocated per child when deepening the tree''', type='int', undoc=True), - Config('split_pct', '75', r''' + Config('split_pct', '90', r''' the Btree page split size as a percentage of the maximum Btree page size, that is, when a Btree page is split, it will be split into smaller pages, where each page is the specified percentage of the maximum Btree page size''', - min='25', max='100'), + min='50', max='100'), ] # File metadata, including both configurable and non-configurable (internal) diff --git a/src/third_party/wiredtiger/dist/api_err.py b/src/third_party/wiredtiger/dist/api_err.py index 82f961a4ac9..bd379ac8d70 100644 --- a/src/third_party/wiredtiger/dist/api_err.py +++ b/src/third_party/wiredtiger/dist/api_err.py @@ -82,7 +82,7 @@ for line in open('../src/include/wiredtiger.in', 'r'): ''.join('\n * ' + l for l in textwrap.wrap( textwrap.dedent(err.long_desc).strip(), 77)) + '\n' if err.long_desc else '')) - tfile.write('#define\t%s\t%d\n' % (err.name, err.value)) + tfile.write('#define\t%s\t(%d)\n' % (err.name, err.value)) if 'undoc' in err.flags: tfile.write('/*! @endcond */\n') tfile.write('/*\n') diff --git a/src/third_party/wiredtiger/dist/filelist b/src/third_party/wiredtiger/dist/filelist index 3886035eaa9..5a3348b940a 100644 --- a/src/third_party/wiredtiger/dist/filelist +++ b/src/third_party/wiredtiger/dist/filelist @@ -133,6 +133,7 @@ src/os_posix/os_path.c POSIX_HOST src/os_posix/os_priv.c POSIX_HOST src/os_posix/os_setvbuf.c POSIX_HOST src/os_posix/os_sleep.c POSIX_HOST +src/os_posix/os_snprintf.c POSIX_HOST src/os_posix/os_thread.c POSIX_HOST src/os_posix/os_time.c POSIX_HOST src/os_posix/os_yield.c POSIX_HOST @@ -152,7 +153,6 @@ src/os_win/os_snprintf.c WINDOWS_HOST src/os_win/os_thread.c WINDOWS_HOST src/os_win/os_time.c WINDOWS_HOST src/os_win/os_utf8.c WINDOWS_HOST -src/os_win/os_vsnprintf.c WINDOWS_HOST src/os_win/os_winerr.c WINDOWS_HOST src/os_win/os_yield.c WINDOWS_HOST src/packing/pack_api.c diff --git a/src/third_party/wiredtiger/dist/flags.py b/src/third_party/wiredtiger/dist/flags.py index b20a7181532..64b5d789e72 100644 --- a/src/third_party/wiredtiger/dist/flags.py +++ b/src/third_party/wiredtiger/dist/flags.py @@ -96,19 +96,19 @@ flags = { 'CONN_CACHE_POOL', 'CONN_CKPT_SYNC', 'CONN_CLOSING', + 'CONN_CLOSING_NO_MORE_OPENS', 'CONN_EVICTION_RUN', 'CONN_IN_MEMORY', 'CONN_LAS_OPEN', 'CONN_LEAK_MEMORY', - 'CONN_LOG_SERVER_RUN', 'CONN_LSM_MERGE', 'CONN_PANIC', 'CONN_READONLY', 'CONN_RECOVERING', 'CONN_SERVER_ASYNC', 'CONN_SERVER_CHECKPOINT', + 'CONN_SERVER_LOG', 'CONN_SERVER_LSM', - 'CONN_SERVER_RUN', 'CONN_SERVER_STATISTICS', 'CONN_SERVER_SWEEP', 'CONN_WAS_BACKUP', diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index e033f77327f..1f7f7d9fd3a 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -63,10 +63,12 @@ CPUs CRC CSV CStream +CURFILE CURSORs CURSTD CallsCustDate Castagnoli +CentOS Checkpointing Checksum Checksums @@ -223,8 +225,10 @@ MEMALIGN MERCHANTABILITY METADATA MONGODB +MOVEFILE MRXB MRXBOPC +MSDN MSVC MULTI MULTIBLOCK @@ -238,8 +242,7 @@ Metadata Mewhort Mitzenmacher MongoDB -MoveFile -MoveFileW +MoveFileExW Multi MultiByteToWideChar Multithreaded @@ -1147,6 +1150,7 @@ sw sx sy sys +syscall sz t's tV diff --git a/src/third_party/wiredtiger/dist/s_style b/src/third_party/wiredtiger/dist/s_style index 8e755224ee2..388a481ef56 100755 --- a/src/third_party/wiredtiger/dist/s_style +++ b/src/third_party/wiredtiger/dist/s_style @@ -93,6 +93,14 @@ else cat $t fi + if ! expr "$f" : 'examples/c/*' > /dev/null && + ! expr "$f" : 'ext/*' > /dev/null && + ! expr "$f" : 'src/os_posix/os_snprintf.c' > /dev/null && + egrep '[^a-z_]snprintf\(|[^a-z_]vsnprintf\(' $f > $t; then + echo "$f: snprintf call, use WiredTiger library replacements" + cat $t + fi + # Alignment directive before "struct". egrep 'WT_COMPILER_TYPE_ALIGN.*struct' $f > $t test -s $t && { diff --git a/src/third_party/wiredtiger/dist/s_void b/src/third_party/wiredtiger/dist/s_void index 90425d5a718..249f043d029 100755 --- a/src/third_party/wiredtiger/dist/s_void +++ b/src/third_party/wiredtiger/dist/s_void @@ -137,7 +137,7 @@ for f in `find bench ext src test -name '*.[ci]'`; do # form of return assignment or call. file_parse $f | sed -e 's/return ([^)]*); }$//' \ - -e '/[A-Z]*_API_CALL[A-Z_]*(/d' \ + -e '/[_A-Z]*_API_CALL[_A-Z]*(/d' \ -e '/WT_CURSOR_NEEDKEY(/d' \ -e '/WT_CURSOR_NEEDVALUE(/d' \ -e '/WT_ERR[A-Z_]*(/d' \ @@ -166,7 +166,7 @@ for f in `find bench ext src test -name '*.[ci]'`; do file_parse $f | grep 'WT_DECL_RET' | sed -e '/ret =/d' \ - -e '/API_END_RET/d' \ + -e '/[_A-Z]*_API_CALL[_A-Z]*(/d' \ -e '/WT_CURSOR_NEEDKEY/d' \ -e '/WT_CURSOR_NEEDVALUE/d' \ -e '/WT_ERR/d' \ diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index a4d92345f88..ac79ffd029a 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -206,6 +206,7 @@ connection_stats = [ CacheStat('cache_eviction_force', 'pages evicted because they exceeded the in-memory maximum'), CacheStat('cache_eviction_force_delete', 'pages evicted because they had chains of deleted items'), CacheStat('cache_eviction_force_fail', 'failed eviction of pages that exceeded the in-memory maximum'), + CacheStat('cache_eviction_force_retune', 'force re-tuning of eviction workers once in a while'), CacheStat('cache_eviction_get_ref', 'eviction calls to get a page'), CacheStat('cache_eviction_get_ref_empty', 'eviction calls to get a page found queue empty'), CacheStat('cache_eviction_get_ref_empty2', 'eviction calls to get a page found queue empty after locking'), @@ -323,10 +324,12 @@ connection_stats = [ LogStat('log_scan_records', 'records processed by log scan'), LogStat('log_scan_rereads', 'log scan records requiring two reads'), LogStat('log_scans', 'log scan operations'), + LogStat('log_slot_active_closed', 'consolidated slot join active slot closed'), LogStat('log_slot_closes', 'consolidated slot closures'), LogStat('log_slot_coalesced', 'written slots coalesced'), LogStat('log_slot_consolidated', 'logging bytes consolidated', 'size'), LogStat('log_slot_joins', 'consolidated slot joins'), + LogStat('log_slot_no_free_slots', 'consolidated slot transitions unable to find free slot'), LogStat('log_slot_races', 'consolidated slot join races'), LogStat('log_slot_switch_busy', 'busy returns attempting to switch slots'), LogStat('log_slot_transitions', 'consolidated slot join transitions'), diff --git a/src/third_party/wiredtiger/examples/c/Makefile.am b/src/third_party/wiredtiger/examples/c/Makefile.am index d5305eec5c8..20936661e06 100644 --- a/src/third_party/wiredtiger/examples/c/Makefile.am +++ b/src/third_party/wiredtiger/examples/c/Makefile.am @@ -20,7 +20,6 @@ noinst_PROGRAMS = \ ex_pack \ ex_process \ ex_schema \ - ex_scope \ ex_stat \ ex_sync \ ex_thread diff --git a/src/third_party/wiredtiger/examples/c/ex_all.c b/src/third_party/wiredtiger/examples/c/ex_all.c index 8a1533011b2..82620673fe1 100644 --- a/src/third_party/wiredtiger/examples/c/ex_all.c +++ b/src/third_party/wiredtiger/examples/c/ex_all.c @@ -848,8 +848,8 @@ my_compare(WT_COLLATOR *collator, WT_SESSION *session, p1 = (const char *)value1->data; p2 = (const char *)value2->data; - while (*p1 != '\0' && *p1 == *p2) - p1++, p2++; + for (; *p1 != '\0' && *p1 == *p2; ++p1, ++p2) + ; *cmp = (int)*p2 - (int)*p1; return (0); diff --git a/src/third_party/wiredtiger/examples/c/ex_async.c b/src/third_party/wiredtiger/examples/c/ex_async.c index f7531a5c3d8..5cfafca0418 100644 --- a/src/third_party/wiredtiger/examples/c/ex_async.c +++ b/src/third_party/wiredtiger/examples/c/ex_async.c @@ -170,12 +170,12 @@ main(void) * an asynchronous insert. */ /*! [async set the operation's string key] */ - snprintf(k[i], sizeof(k), "key%d", i); + (void)snprintf(k[i], sizeof(k), "key%d", i); op->set_key(op, k[i]); /*! [async set the operation's string key] */ /*! [async set the operation's string value] */ - snprintf(v[i], sizeof(v), "value%d", i); + (void)snprintf(v[i], sizeof(v), "value%d", i); op->set_value(op, v[i]); /*! [async set the operation's string value] */ @@ -218,7 +218,7 @@ main(void) * Set the operation's string key and value, and then do * an asynchronous search. */ - snprintf(k[i], sizeof(k), "key%d", i); + (void)snprintf(k[i], sizeof(k), "key%d", i); op->set_key(op, k[i]); ret = op->search(op); /*! [async search] */ diff --git a/src/third_party/wiredtiger/examples/c/ex_backup.c b/src/third_party/wiredtiger/examples/c/ex_backup.c index 0697cbb3458..83cc9b22ecc 100644 --- a/src/third_party/wiredtiger/examples/c/ex_backup.c +++ b/src/third_party/wiredtiger/examples/c/ex_backup.c @@ -96,7 +96,7 @@ compare_backups(int i) if (i == 0) (void)strncpy(msg, "MAIN", sizeof(msg)); else - snprintf(msg, sizeof(msg), "%d", i); + (void)snprintf(msg, sizeof(msg), "%d", i); printf( "Iteration %s: Tables %s.%d and %s.%d %s\n", msg, full_out, i, incr_out, i, ret == 0 ? "identical" : "differ"); @@ -131,8 +131,8 @@ setup_directories(void) * For incremental backups we need 0-N. The 0 incremental * directory will compare with the original at the end. */ - snprintf(buf, sizeof(buf), "rm -rf %s.%d && mkdir %s.%d", - home_incr, i, home_incr, i); + (void)snprintf(buf, sizeof(buf), + "rm -rf %s.%d && mkdir %s.%d", home_incr, i, home_incr, i); if ((ret = system(buf)) != 0) { fprintf(stderr, "%s: failed ret %d\n", buf, ret); return (ret); @@ -142,8 +142,8 @@ setup_directories(void) /* * For full backups we need 1-N. */ - snprintf(buf, sizeof(buf), "rm -rf %s.%d && mkdir %s.%d", - home_full, i, home_full, i); + (void)snprintf(buf, sizeof(buf), + "rm -rf %s.%d && mkdir %s.%d", home_full, i, home_full, i); if ((ret = system(buf)) != 0) { fprintf(stderr, "%s: failed ret %d\n", buf, ret); return (ret); @@ -164,8 +164,8 @@ add_work(WT_SESSION *session, int iter) * Perform some operations with individual auto-commit transactions. */ for (i = 0; i < MAX_KEYS; i++) { - snprintf(k, sizeof(k), "key.%d.%d", iter, i); - snprintf(v, sizeof(v), "value.%d.%d", iter, i); + (void)snprintf(k, sizeof(k), "key.%d.%d", iter, i); + (void)snprintf(v, sizeof(v), "value.%d.%d", iter, i); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); @@ -187,7 +187,7 @@ take_full_backup(WT_SESSION *session, int i) * directories. Otherwise only into the appropriate full directory. */ if (i != 0) { - snprintf(h, sizeof(h), "%s.%d", home_full, i); + (void)snprintf(h, sizeof(h), "%s.%d", home_full, i); hdir = h; } else hdir = home_incr; @@ -200,14 +200,15 @@ take_full_backup(WT_SESSION *session, int i) * Take a full backup into each incremental directory. */ for (j = 0; j < MAX_ITERATIONS; j++) { - snprintf(h, sizeof(h), "%s.%d", home_incr, j); + (void)snprintf(h, sizeof(h), + "%s.%d", home_incr, j); (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, h, filename); ret = system(buf); } else { - snprintf(h, sizeof(h), "%s.%d", home_full, i); + (void)snprintf(h, sizeof(h), "%s.%d", home_full, i); (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, hdir, filename); ret = system(buf); @@ -237,12 +238,12 @@ take_incr_backup(WT_SESSION *session, int i) * Copy into the 0 incremental directory and then each of the * incremental directories for this iteration and later. */ - snprintf(h, sizeof(h), "%s.0", home_incr); + (void)snprintf(h, sizeof(h), "%s.0", home_incr); (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, h, filename); ret = system(buf); for (j = i; j < MAX_ITERATIONS; j++) { - snprintf(h, sizeof(h), "%s.%d", home_incr, j); + (void)snprintf(h, sizeof(h), "%s.%d", home_incr, j); (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, h, filename); ret = system(buf); @@ -270,7 +271,8 @@ main(void) int i, ret; char cmd_buf[256]; - snprintf(cmd_buf, sizeof(cmd_buf), "rm -rf %s && mkdir %s", home, home); + (void)snprintf(cmd_buf, sizeof(cmd_buf), + "rm -rf %s && mkdir %s", home, home); if ((ret = system(cmd_buf)) != 0) { fprintf(stderr, "%s: failed ret %d\n", cmd_buf, ret); return (EXIT_FAILURE); diff --git a/src/third_party/wiredtiger/examples/c/ex_encrypt.c b/src/third_party/wiredtiger/examples/c/ex_encrypt.c index 00dc66fc24d..1520bd286cd 100644 --- a/src/third_party/wiredtiger/examples/c/ex_encrypt.c +++ b/src/third_party/wiredtiger/examples/c/ex_encrypt.c @@ -507,12 +507,12 @@ main(void) * we decrypt on read. */ for (i = 0; i < MAX_KEYS; i++) { - snprintf(keybuf, sizeof(keybuf), "key%d", i); + (void)snprintf(keybuf, sizeof(keybuf), "key%d", i); c1->set_key(c1, keybuf); c2->set_key(c2, keybuf); nc->set_key(nc, keybuf); - snprintf(valbuf, sizeof(valbuf), "value%d", i); + (void)snprintf(valbuf, sizeof(valbuf), "value%d", i); c1->set_value(c1, valbuf); c2->set_value(c2, valbuf); nc->set_value(nc, valbuf); diff --git a/src/third_party/wiredtiger/examples/c/ex_log.c b/src/third_party/wiredtiger/examples/c/ex_log.c index fdbc39412ae..0d8fbf97233 100644 --- a/src/third_party/wiredtiger/examples/c/ex_log.c +++ b/src/third_party/wiredtiger/examples/c/ex_log.c @@ -291,8 +291,8 @@ main(void) char cmd_buf[256], k[16], v[16]; count_min = 0; - snprintf(cmd_buf, sizeof(cmd_buf), "rm -rf %s %s && mkdir %s %s", - home1, home2, home1, home2); + (void)snprintf(cmd_buf, sizeof(cmd_buf), + "rm -rf %s %s && mkdir %s %s", home1, home2, home1, home2); if ((ret = system(cmd_buf)) != 0) { fprintf(stderr, "%s: failed ret %d\n", cmd_buf, ret); return (EXIT_FAILURE); @@ -312,8 +312,8 @@ main(void) * Perform some operations with individual auto-commit transactions. */ for (record_count = 0, i = 0; i < MAX_KEYS; i++, record_count++) { - snprintf(k, sizeof(k), "key%d", i); - snprintf(v, sizeof(v), "value%d", i); + (void)snprintf(k, sizeof(k), "key%d", i); + (void)snprintf(v, sizeof(v), "value%d", i); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); @@ -324,8 +324,8 @@ main(void) * Perform some operations within a single transaction. */ for (i = MAX_KEYS; i < MAX_KEYS+5; i++, record_count++) { - snprintf(k, sizeof(k), "key%d", i); - snprintf(v, sizeof(v), "value%d", i); + (void)snprintf(k, sizeof(k), "key%d", i); + (void)snprintf(v, sizeof(v), "value%d", i); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); diff --git a/src/third_party/wiredtiger/examples/c/ex_scope.c b/src/third_party/wiredtiger/examples/c/ex_scope.c deleted file mode 100644 index 795ad85d57b..00000000000 --- a/src/third_party/wiredtiger/examples/c/ex_scope.c +++ /dev/null @@ -1,217 +0,0 @@ -/*- - * Public Domain 2014-2016 MongoDB, Inc. - * Public Domain 2008-2014 WiredTiger, Inc. - * - * This is free and unencumbered software released into the public domain. - * - * Anyone is free to copy, modify, publish, use, compile, sell, or - * distribute this software, either in source code form or as a compiled - * binary, for any purpose, commercial or non-commercial, and by any - * means. - * - * In jurisdictions that recognize copyright laws, the author or authors - * of this software dedicate any and all copyright interest in the - * software to the public domain. We make this dedication for the benefit - * of the public at large and to the detriment of our heirs and - * successors. We intend this dedication to be an overt act of - * relinquishment in perpetuity of all present and future rights to this - * software under copyright law. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * ex_scope.c - * demonstrates the scope of buffers holding cursor keys and values. - */ -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <wiredtiger.h> - -#ifdef _WIN32 -/* snprintf is not supported on <= VS2013 */ -#define snprintf _snprintf -#endif - -static const char *home; - -static int -cursor_scope_ops(WT_CURSOR *cursor) -{ - struct { - const char *op; - const char *key; - const char *value; - int (*apply)(WT_CURSOR *); - } *op, ops[] = { - { "insert", "key1", "value1", cursor->insert, }, - { "update", "key1", "value2", cursor->update, }, - { "search", "key1", "value2", cursor->search, }, - { "remove", "key1", "value2", cursor->remove, }, - { NULL, NULL, NULL, NULL } - }; - WT_SESSION *session; - const char *key, *value; - char keybuf[10], valuebuf[10]; - int ret; - - session = cursor->session; - - for (op = ops; op->key != NULL; op++) { - key = value = NULL; - - /*! [cursor scope operation] */ - (void)snprintf(keybuf, sizeof(keybuf), "%s", op->key); - cursor->set_key(cursor, keybuf); - (void)snprintf(valuebuf, sizeof(valuebuf), "%s", op->value); - cursor->set_value(cursor, valuebuf); - - /* - * The application must keep key and value memory valid until - * the next operation that positions the cursor, modifies the - * data, or resets or closes the cursor. - * - * Modifying either the key or value buffers is not permitted. - */ - - /* Apply the operation (insert, update, search or remove). */ - if ((ret = op->apply(cursor)) != 0) { - fprintf(stderr, - "%s: error performing the operation: %s\n", - op->op, session->strerror(session, ret)); - return (ret); - } - - /* - * The cursor no longer references application memory, so - * application buffers can be safely overwritten. - */ - strcpy(keybuf, "no key"); - strcpy(valuebuf, "no value"); - - /* - * Check that get_key/value behave as expected after the - * operation. - */ - if (op->apply == cursor->insert) { - /* - * WT_CURSOR::insert no longer references application - * memory, but as it does not position the cursor, it - * doesn't reference memory owned by the cursor, either. - */ - printf("ex_scope: " - "expect two WiredTiger error messages:\n"); - if ((ret = cursor->get_key(cursor, &key)) == 0 || - (ret = cursor->get_value(cursor, &value)) == 0) { - fprintf(stderr, - "%s: error in get_key/value: %s\n", - op->op, session->strerror(session, ret)); - return (ret); - } - continue; - } - if (op->apply == cursor->remove) { - /* - * WT_CURSOR::remove no longer references application - * memory; as it does not position the cursor, it will - * reference key memory owned by the cursor, but has no - * value. - */ - printf("ex_scope: " - "expect one WiredTiger error message:\n"); - if ((ret = cursor->get_key(cursor, &key)) != 0 || - (ret = cursor->get_value(cursor, &value)) == 0) { - fprintf(stderr, - "%s: error in get_key/value: %s\n", - op->op, session->strerror(session, ret)); - return (ret); - } - } else /* search, update */{ - /* - * WT_CURSOR::search and WT_CURSOR::update no longer - * reference application memory; as they position the - * cursor, they will reference key/value memory owned - * by the cursor. - */ - if ((ret = cursor->get_key(cursor, &key)) != 0 || - (ret = cursor->get_value(cursor, &value)) != 0) { - fprintf(stderr, - "%s: error in get_key/value: %s\n", - op->op, session->strerror(session, ret)); - return (ret); - } - } - - /* - * Modifying the memory referenced by either key or value is - * not permitted. - * - * Check that the cursor's key and value are what we expect. - */ - if (key == keybuf || - (op->apply != cursor->remove && value == valuebuf)) { - fprintf(stderr, - "%s: cursor points at application memory!\n", - op->op); - return (EINVAL); - } - - if (strcmp(key, op->key) != 0 || - (op->apply != cursor->remove && - strcmp(value, op->value) != 0)) { - fprintf(stderr, - "%s: unexpected key / value!\n", op->op); - return (EINVAL); - } - /*! [cursor scope operation] */ - } - - return (0); -} - -int -main(void) -{ - WT_CONNECTION *conn; - WT_CURSOR *cursor; - WT_SESSION *session; - int ret; - - /* - * Create a clean test directory for this run of the test program if the - * environment variable isn't already set (as is done by make check). - */ - if (getenv("WIREDTIGER_HOME") == NULL) { - home = "WT_HOME"; - ret = system("rm -rf WT_HOME && mkdir WT_HOME"); - } else - home = NULL; - - /* Open a connection, create a simple table, open a cursor. */ - if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0 || - (ret = conn->open_session(conn, NULL, NULL, &session)) != 0) { - fprintf(stderr, "Error connecting to %s: %s\n", - home == NULL ? "." : home, wiredtiger_strerror(ret)); - return (EXIT_FAILURE); - } - - ret = session->create(session, - "table:scope", "key_format=S,value_format=S,columns=(k,v)"); - - ret = session->open_cursor(session, - "table:scope", NULL, NULL, &cursor); - - ret = cursor_scope_ops(cursor); - - /* Close the connection and clean up. */ - ret = conn->close(conn, NULL); - - return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); -} diff --git a/src/third_party/wiredtiger/examples/c/ex_sync.c b/src/third_party/wiredtiger/examples/c/ex_sync.c index 2c610b1e570..b2d74b52f7f 100644 --- a/src/third_party/wiredtiger/examples/c/ex_sync.c +++ b/src/third_party/wiredtiger/examples/c/ex_sync.c @@ -59,8 +59,8 @@ main(void) char cmd_buf[256], k[16], v[16]; const char *conf; - snprintf(cmd_buf, sizeof(cmd_buf), "rm -rf %s && mkdir %s", - home, home); + (void)snprintf(cmd_buf, sizeof(cmd_buf), + "rm -rf %s && mkdir %s", home, home); if ((ret = system(cmd_buf)) != 0) { fprintf(stderr, "%s: failed ret %d\n", cmd_buf, ret); return (EXIT_FAILURE); @@ -98,8 +98,8 @@ main(void) ret = session->commit_transaction(session, conf); ret = session->begin_transaction(session, NULL); } - snprintf(k, sizeof(k), "key%d", i); - snprintf(v, sizeof(v), "value%d", i); + (void)snprintf(k, sizeof(k), "key%d", i); + (void)snprintf(v, sizeof(v), "value%d", i); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); @@ -113,8 +113,8 @@ main(void) * Perform some operations within a single transaction. */ for (i = MAX_KEYS; i < MAX_KEYS+5; i++, record_count++) { - snprintf(k, sizeof(k), "key%d", i); - snprintf(v, sizeof(v), "value%d", i); + (void)snprintf(k, sizeof(k), "key%d", i); + (void)snprintf(v, sizeof(v), "value%d", i); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); @@ -129,8 +129,8 @@ main(void) * Demonstrate using log_flush to force the log to disk. */ for (i = 0; i < MAX_KEYS; i++, record_count++) { - snprintf(k, sizeof(k), "key%d", record_count); - snprintf(v, sizeof(v), "value%d", record_count); + (void)snprintf(k, sizeof(k), "key%d", record_count); + (void)snprintf(v, sizeof(v), "value%d", record_count); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); @@ -138,8 +138,8 @@ main(void) ret = session->log_flush(session, "sync=on"); for (i = 0; i < MAX_KEYS; i++, record_count++) { - snprintf(k, sizeof(k), "key%d", record_count); - snprintf(v, sizeof(v), "value%d", record_count); + (void)snprintf(k, sizeof(k), "key%d", record_count); + (void)snprintf(v, sizeof(v), "value%d", record_count); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index a4f6891fd33..16d592dd33e 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -1,5 +1,5 @@ { - "commit": "cc2f15f595b16479affd73791c207da334453bcc", + "commit": "cb16839cfbdf338af95bed43ca40979ae6e32f54", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-3.4" } diff --git a/src/third_party/wiredtiger/lang/java/Makefile.am b/src/third_party/wiredtiger/lang/java/Makefile.am index 7184fe610dc..2ff822a5d08 100644 --- a/src/third_party/wiredtiger/lang/java/Makefile.am +++ b/src/third_party/wiredtiger/lang/java/Makefile.am @@ -49,6 +49,7 @@ JAVA_JUNIT = \ $(JAVATEST)/ConcurrentCloseTest.java \ $(JAVATEST)/CursorTest.java \ $(JAVATEST)/CursorTest02.java \ + $(JAVATEST)/CursorTest03.java \ $(JAVATEST)/ExceptionTest.java \ $(JAVATEST)/PackTest.java \ $(JAVATEST)/PackTest02.java \ diff --git a/src/third_party/wiredtiger/lang/java/wiredtiger.i b/src/third_party/wiredtiger/lang/java/wiredtiger.i index efc512f2f5a..275b708090c 100644 --- a/src/third_party/wiredtiger/lang/java/wiredtiger.i +++ b/src/third_party/wiredtiger/lang/java/wiredtiger.i @@ -319,6 +319,15 @@ WT_CLASS(struct __wt_async_op, WT_ASYNC_OP, op) %rename (getValueFormat) __wt_async_op::getValue_format; %rename (getType) __wt_async_op::get_type; +/* + * Special cases: override the out typemap, return checking is done in the + * wrapper. + */ +%typemap(out) int __wt_cursor::compare_wrap, + int __wt_cursor::equals_wrap %{ + $result = $1; +%} + /* SWIG magic to turn Java byte strings into data / size. */ %apply (char *STRING, int LENGTH) { (char *data, int size) }; @@ -529,7 +538,6 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; %} %extend __wt_async_op { - %javamethodmodifiers get_key_wrap "protected"; WT_ITEM get_key_wrap(JNIEnv *jenv) { WT_ITEM k; diff --git a/src/third_party/wiredtiger/lang/python/setup_pip.py b/src/third_party/wiredtiger/lang/python/setup_pip.py new file mode 100644 index 00000000000..636eecab80a --- /dev/null +++ b/src/third_party/wiredtiger/lang/python/setup_pip.py @@ -0,0 +1,408 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +# This script builds a Python source distribution that can built be installed +# via pip install. This must be run in a git repository to determine the files +# to package. Also as a prerequisite, SWIG must be run as the generated files +# are part of the package. To create the distribution, in this directory, run +# "python setup_pip.py sdist", this creates a tar.gz file under ./dist . +from __future__ import print_function +import os, os.path, re, shutil, site, sys +from setuptools import setup, Distribution +from distutils.extension import Extension +import distutils.sysconfig +import distutils.ccompiler +from distutils.errors import CompileError, LinkError +import subprocess +from subprocess import call +import setuptools.command.install +import setuptools.command.build_ext + +# msg -- +# Print a message to stderr. +def msg(s): + print(os.path.basename(__file__) + ": " + s, file=sys.stderr) + +# die -- +# For failures, show a message and exit. +def die(s): + msg(s) + sys.exit(1) + +# build_commands -- +# Run a sequence of commands, and die if any fail. +def build_commands(commands, build_dir, build_env): + for command in commands: + callargs = [ 'sh', '-c', command ] + verbose_command = '"' + '" "'.join(callargs) + '"' + print('running: ' + verbose_command) + if call(callargs, cwd=build_dir, env=build_env) != 0: + die('build command failed: ' + verbose_command) + +# check_needed_dependencies -- +# Make a quick check of any needed library dependencies, and +# add to the library path and include path as needed. If a library +# is not found, it is not definitive. +def check_needed_dependencies(builtins, inc_paths, lib_paths): + library_dirs = get_library_dirs() + compiler = distutils.ccompiler.new_compiler() + distutils.sysconfig.customize_compiler(compiler) + compiler.set_library_dirs(library_dirs) + missing = [] + for name, libname, instructions in builtins: + found = compiler.find_library_file(library_dirs, libname) + if found is None: + msg(libname + ": missing") + msg(instructions) + msg("after installing it, set LD_LIBRARY_PATH or DYLD_LIBRARY_PATH") + missing.append(libname) + else: + package_top = os.path.dirname(os.path.dirname(found)) + inc_paths.append(os.path.join(package_top, 'include')) + lib_paths.append(os.path.join(package_top, 'lib')) + + # XXX: we are not accounting for other directories that might be + # discoverable via /sbin/ldconfig. It might be better to write a tiny + # compile using -lsnappy, -lz... + # + #if len(missing) > 0: + # die("install packages for: " + str(missing)) + +# find_executable -- +# Locate an executable in the PATH. +def find_executable(exename, path): + p = subprocess.Popen(['which', exename ], stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + out, err = p.communicate('') + out = str(out) # needed for Python3 + if out == '': + if err != '': + err = ': "' + err + '"' + die('"' + exename + '": not found in path' + err) + dirname = os.path.dirname(out) + if not dirname in path: + path.append(dirname) + +# get_build_path -- +# Create a PATH that can be used for installation. Apparently, +# installation commands are run with a restricted PATH, and +# autoreconf/aclocal will not normally be found. +def get_build_path(): + build_paths = [] + find_executable('autoreconf', build_paths) + find_executable('aclocal', build_paths) + build_path = os.environ['PATH'] + ':' + ':'.join(build_paths) + return build_path + +# get_compile_flags -- +# Get system specific compile flags. Return a triple: C preprocessor +# flags, C compilation flags and linker flags. +def get_compile_flags(inc_paths, lib_paths): + # Suppress warnings building SWIG generated code + if sys.platform == 'win32' and cc == 'msvc': + cflags = ['/arch:SSE2', '/EHsc'] + cppflags = [] + ldflags = [] + # Windows untested and incomplete, don't claim that it works. + die('Windows is not supported by this setup script') + else: + cflags = [ '-w', '-Wno-sign-conversion', '-std=c11' ] + cppflags = ['-I' + path for path in inc_paths] + cppflags.append('-DHAVE_CONFIG_H') + ldflags = ['-L' + path for path in lib_paths] + if sys.platform == 'darwin': + cflags.extend([ '-arch', 'x86_64' ]) + return (cppflags, cflags, ldflags) + +# get_sources_curdir -- +# Get a list of sources from the current directory +def get_sources_curdir(): + DEVNULL = open(os.devnull, 'w') + gitproc = subprocess.Popen( + ['git', 'ls-tree', '-r', '--name-only', 'HEAD^{tree}'], + stdin=DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + sources = [line.rstrip() for line in gitproc.stdout.readlines()] + err = gitproc.stderr.read() + gitproc.wait() + subret = gitproc.returncode + if subret != 0 or err: + msg("git command to get sources returned " + str(subret) + + ", error=" + str(err)) + die("this command must be run in a git repository") + return sources + +# get_wiredtiger_versions -- +# Read the version information from the RELEASE_INFO file. +def get_wiredtiger_versions(wt_dir): + v = {} + for l in open(os.path.join(wt_dir, 'RELEASE_INFO')): + if re.match(r'WIREDTIGER_VERSION_(?:MAJOR|MINOR|PATCH)=', l): + exec(l, v) + wt_ver = '%d.%d' % (v['WIREDTIGER_VERSION_MAJOR'], + v['WIREDTIGER_VERSION_MINOR']) + wt_full_ver = wt_ver + '.%d' % (v['WIREDTIGER_VERSION_PATCH']) + return (wt_ver, wt_full_ver) + +# get_library_dirs +# Build a plausible set of library directories. +def get_library_dirs(): + dirs = [] + dirs.append("/usr/local/lib") + dirs.append("/usr/local/lib64") + dirs.append("/lib/x86_64-linux-gnu") + dirs.append("/opt/local/lib") + dirs.append("/usr/lib") + dirs.append("/usr/lib64") + for path in ['LD_LIBRARY_PATH', 'DYLD_LIBRARY_PATH', 'LIBRARY_PATH']: + if path in os.environ: + dirs.extend(os.environ[path].split(':')) + dirs = list(set(filter(os.path.isdir, dirs))) + return dirs + +# source_filter +# Make any needed changes to the sources list. Any entry that +# needs to be moved is returned in a dictionary. +def source_filter(sources): + result = [] + movers = dict() + py_dir = os.path.join('lang', 'python') + pywt_dir = os.path.join(py_dir, 'wiredtiger') + pywt_prefix = pywt_dir + os.path.sep + for f in sources: + if not re.match(source_regex, f): + continue + src = f + dest = f + # move all lang/python files to the top level. + if dest.startswith(pywt_prefix): + dest = os.path.basename(dest) + if dest == 'pip_init.py': + dest = '__init__.py' + if dest != src: + movers[dest] = src + result.append(dest) + # Add SWIG generated files + result.append('wiredtiger.py') + movers['wiredtiger.py'] = os.path.join(pywt_dir, '__init__.py') + result.append(os.path.join(py_dir, 'wiredtiger_wrap.c')) + return result, movers + +################################################################ +# Do some initial setup and checks. +this_abs_script = os.path.abspath(__file__) +this_dir = os.path.dirname(this_abs_script) +pip_command = None +for arg in sys.argv[1:]: + if arg[0] != '-' and pip_command == None: + pip_command = arg + break + +if this_dir.endswith(os.sep + os.path.join('lang', 'python')): + wt_dir = os.path.dirname(os.path.dirname(this_dir)) + os.chdir(wt_dir) +elif os.path.isfile(os.path.join(this_dir, 'LICENSE')): + wt_dir = this_dir +else: + die('running from an unknown directory') + +python3 = (sys.version_info[0] > 2) +if python3: + die('Python3 is not yet supported') + +# Ensure that Extensions won't be built for 32 bit, +# that won't work with WiredTiger. +if sys.maxsize < 2**32: + die('need to be running on a 64 bit system, and have a 64 bit Python') + +python_rel_dir = os.path.join('lang', 'python') +build_dir = os.path.join(wt_dir, 'build_posix') +makefile = os.path.join(build_dir, 'Makefile') +built_sentinal = os.path.join(build_dir, 'built.txt') +conf_make_dir = 'build_posix' +wt_swig_lib_name = os.path.join(python_rel_dir, '_wiredtiger.so') + +################################################################ +# Put together build options for the WiredTiger extension. +short_description = 'high performance, scalable, production quality, ' + \ + 'NoSQL, Open Source extensible platform for data management' +long_description = 'WiredTiger is a ' + short_description + '.\n\n' + \ + open(os.path.join(wt_dir, 'README')).read() + +wt_ver, wt_full_ver = get_wiredtiger_versions(wt_dir) +build_path = get_build_path() + +# We only need a small set of directories to build a WT library, +# we also include any files at the top level. +source_regex = r'^(?:(?:api|build_posix|ext|lang/python|src|dist)/|[^/]*$)' + +# The builtins that we include in this distribution. +builtins = [ + # [ name, libname, instructions ] + [ 'snappy', 'snappy', + 'Note: a suitable version of snappy can be found at\n' + \ + ' https://github.com/google/snappy/releases/download/' + \ + '1.1.3/snappy-1.1.3.tar.gz\n' + \ + 'It can be installed via: yum install snappy snappy-devel' + \ + 'or via: apt-get install libsnappy-dev' ], + [ 'zlib', 'z', + 'Need to install zlib\n' + \ + 'It can be installed via: apt-get install zlib1g' ] +] +builtin_names = [b[0] for b in builtins] +builtin_libraries = [b[1] for b in builtins] + +# Here's the configure/make operations we perform before the python extension +# is linked. +configure_cmds = [ + './makemake --clean-and-make', + './reconf', + # force building a position independent library; it will be linked + # into a single shared library with the SWIG interface code. + 'CFLAGS="${CFLAGS:-} -fPIC -DPIC" ' + \ + '../configure --enable-python --with-builtins=' + ','.join(builtin_names) +] + +# build all the builtins, at the moment they are all compressors. +make_cmds = [] +for name in builtin_names: + make_cmds.append('(cd ext/compressors/' + name + '/; make)') +make_cmds.append('make libwiredtiger.la') + +inc_paths = [ os.path.join(build_dir, 'src', 'include'), build_dir, '.' ] +lib_paths = [ '.' ] # wiredtiger.so is moved into the top level directory + +check_needed_dependencies(builtins, inc_paths, lib_paths) + +cppflags, cflags, ldflags = get_compile_flags(inc_paths, lib_paths) + +# If we are creating a source distribution, create a staging directory +# with just the right sources. Put the result in the python dist directory. +if pip_command == 'sdist': + sources, movers = source_filter(get_sources_curdir()) + stage_dir = os.path.join(python_rel_dir, 'stage') + shutil.rmtree(stage_dir, True) + os.makedirs(stage_dir) + shutil.copy2(this_abs_script, os.path.join(stage_dir, 'setup.py')) + for f in sources: + d = os.path.join(stage_dir, os.path.dirname(f)) + if not os.path.isdir(d): + os.makedirs(d) + if f in movers: + src = movers[f] + else: + src = f + # Symlinks are not followed in setup, we need to use real files. + shutil.copy2(src, os.path.join(stage_dir, f)) + os.chdir(stage_dir) + sys.argv.append('--dist-dir=' + os.path.join('..', 'dist')) +else: + sources = [ os.path.join(python_rel_dir, 'wiredtiger_wrap.c') ] + +wt_ext = Extension('_wiredtiger', + sources = sources, + extra_compile_args = cflags + cppflags, + extra_link_args = ldflags, + libraries = builtin_libraries, + extra_objects = [ os.path.join(build_dir, '.libs', 'libwiredtiger.a') ], + include_dirs = inc_paths, + library_dirs = lib_paths, +) +extensions = [ wt_ext ] +env = { "CFLAGS" : ' '.join(cflags), + "CPPFLAGS" : ' '.join(cppflags), + "LDFLAGS" : ' '.join(ldflags), + "PATH" : build_path } + +class BinaryDistribution(Distribution): + def is_pure(self): + return False + +class WTInstall(setuptools.command.install.install): + def run(self): + self.run_command("build_ext") + return setuptools.command.install.install.run(self) + +class WTBuildExt(setuptools.command.build_ext.build_ext): + def __init__(self, *args, **kwargs): + setuptools.command.build_ext.build_ext.__init__(self, *args, **kwargs) + + def run(self): + # only run this once + if not os.path.isfile(built_sentinal): + try: + os.remove(makefile) + except OSError: + pass + self.execute( + lambda: build_commands(configure_cmds, conf_make_dir, env), [], + 'wiredtiger configure') + if not os.path.isfile(makefile): + die('configure failed, file does not exist: ' + makefile) + self.execute( + lambda: build_commands(make_cmds, conf_make_dir, env), [], + 'wiredtiger make') + open(built_sentinal, 'a').close() + return setuptools.command.build_ext.build_ext.run(self) + +setup( + name = 'wiredtiger', + version = wt_full_ver, + author = 'The WiredTiger Development Team, part of MongoDB', + author_email = 'info@wiredtiger.com', + description = short_description, + license='GPL2,GPL3,Commercial', + long_description = long_description, + url = 'http://source.wiredtiger.com/', + keywords = 'scalable NoSQL database datastore engine open source', + packages = ['wiredtiger'], + ext_package = 'wiredtiger', + ext_modules = extensions, + include_package_data = True, + distclass = BinaryDistribution, + package_dir = { 'wiredtiger' : '.' }, + cmdclass = { 'install': WTInstall, 'build_ext': WTBuildExt }, + package_data = { + 'wiredtiger' : [ wt_swig_lib_name, '*.py' ] + }, + classifiers=[ + 'Intended Audience :: Developers', + 'Programming Language :: C', + 'Programming Language :: C++', + 'Programming Language :: Python', + 'Programming Language :: Java', + 'Operating System :: MacOS :: MacOS X', + 'Operating System :: POSIX', + 'Operating System :: POSIX :: BSD', + 'Operating System :: POSIX :: Linux', + 'Operating System :: POSIX :: SunOS/Solaris', + ] +) + +if pip_command == 'sdist': + shutil.rmtree(os.path.join(this_dir, 'stage')) diff --git a/src/third_party/wiredtiger/lang/python/wiredtiger/pip_init.py b/src/third_party/wiredtiger/lang/python/wiredtiger/pip_init.py new file mode 100644 index 00000000000..d59c8218976 --- /dev/null +++ b/src/third_party/wiredtiger/lang/python/wiredtiger/pip_init.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +# pip_init.py +# This is installed as __init__.py, and imports the file created by SWIG. +# This is needed because SWIG's import helper code created by certain SWIG +# versions may be broken, see: https://github.com/swig/swig/issues/769 . +# Importing indirectly seems to avoid these issues. +import os, sys +fname = os.path.basename(__file__) +if fname != '__init__.py' and fname != '__init__.pyc': + print(__file__ + ': this file is not yet installed') + sys.exit(1) + +# After importing the SWIG-generated file, copy all symbols from from it +# to this module so they will appear in the wiredtiger namespace. +me = sys.modules[__name__] +sys.path.append(os.path.dirname(__file__)) # needed for Python3 +import wiredtiger +for name in dir(wiredtiger): + value = getattr(wiredtiger, name) + setattr(me, name, value) diff --git a/src/third_party/wiredtiger/src/async/async_api.c b/src/third_party/wiredtiger/src/async/async_api.c index 026a008188c..b9cc995f5a5 100644 --- a/src/third_party/wiredtiger/src/async/async_api.c +++ b/src/third_party/wiredtiger/src/async/async_api.c @@ -338,17 +338,15 @@ __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]) * 2. If async is off, and the user wants it on, start it. * 3. If not a toggle and async is off, we're done. */ - if (conn->async_cfg && !run) { - /* Case 1 */ + if (conn->async_cfg && !run) { /* Case 1 */ WT_TRET(__wt_async_flush(session)); ret = __wt_async_destroy(session); conn->async_cfg = false; return (ret); - } else if (!conn->async_cfg && run) - /* Case 2 */ + } + if (!conn->async_cfg && run) /* Case 2 */ return (__async_start(session)); - else if (!conn->async_cfg) - /* Case 3 */ + if (!conn->async_cfg) /* Case 3 */ return (0); /* diff --git a/src/third_party/wiredtiger/src/block/block_addr.c b/src/third_party/wiredtiger/src/block/block_addr.c index 580316bdfc6..a67efca62a3 100644 --- a/src/third_party/wiredtiger/src/block/block_addr.c +++ b/src/third_party/wiredtiger/src/block/block_addr.c @@ -226,7 +226,7 @@ __wt_block_ckpt_to_buffer(WT_SESSION_IMPL *session, ci->discard.offset, ci->discard.size, ci->discard.checksum)); a = (uint64_t)ci->file_size; WT_RET(__wt_vpack_uint(pp, 0, a)); - a = (uint64_t)ci->ckpt_size; + a = ci->ckpt_size; WT_RET(__wt_vpack_uint(pp, 0, a)); return (0); diff --git a/src/third_party/wiredtiger/src/block/block_ext.c b/src/third_party/wiredtiger/src/block/block_ext.c index 26acc8c560f..da7a06d873d 100644 --- a/src/third_party/wiredtiger/src/block/block_ext.c +++ b/src/third_party/wiredtiger/src/block/block_ext.c @@ -634,11 +634,11 @@ __wt_block_off_free( */ if ((ret = __wt_block_off_remove_overlap( session, block, &block->live.alloc, offset, size)) == 0) - ret = __block_merge(session, block, - &block->live.avail, offset, (wt_off_t)size); + ret = __block_merge( + session, block, &block->live.avail, offset, size); else if (ret == WT_NOTFOUND) - ret = __block_merge(session, block, - &block->live.discard, offset, (wt_off_t)size); + ret = __block_merge( + session, block, &block->live.discard, offset, size); return (ret); } @@ -1247,7 +1247,8 @@ __wt_block_extlist_write(WT_SESSION_IMPL *session, WT_DECL_RET; WT_EXT *ext; WT_PAGE_HEADER *dsk; - size_t entries, size; + uint32_t entries; + size_t size; uint8_t *p; WT_RET(__block_extlist_dump(session, block, el, "write")); @@ -1377,8 +1378,8 @@ __wt_block_extlist_init(WT_SESSION_IMPL *session, size = (name == NULL ? 0 : strlen(name)) + strlen(".") + (extname == NULL ? 0 : strlen(extname) + 1); WT_RET(__wt_calloc_def(session, size, &el->name)); - (void)snprintf(el->name, size, "%s.%s", - name == NULL ? "" : name, extname == NULL ? "" : extname); + WT_RET(__wt_snprintf(el->name, size, "%s.%s", + name == NULL ? "" : name, extname == NULL ? "" : extname)); el->offset = WT_BLOCK_INVALID_OFFSET; el->track_size = track_size; diff --git a/src/third_party/wiredtiger/src/block/block_read.c b/src/third_party/wiredtiger/src/block/block_read.c index 869a92b6ae1..8d4aec7df75 100644 --- a/src/third_party/wiredtiger/src/block/block_read.c +++ b/src/third_party/wiredtiger/src/block/block_read.c @@ -39,7 +39,7 @@ __wt_bm_preload( (uint8_t *)bm->map + offset, size, bm->mapped_cookie); if (!mapped && handle->fh_advise != NULL) ret = handle->fh_advise(handle, (WT_SESSION *)session, - (wt_off_t)offset, (wt_off_t)size, WT_FILE_HANDLE_WILLNEED); + offset, (wt_off_t)size, WT_FILE_HANDLE_WILLNEED); if (ret != EBUSY && ret != ENOTSUP) return (ret); diff --git a/src/third_party/wiredtiger/src/block/block_vrfy.c b/src/third_party/wiredtiger/src/block/block_vrfy.c index 94824ad19f8..154765ed079 100644 --- a/src/third_party/wiredtiger/src/block/block_vrfy.c +++ b/src/third_party/wiredtiger/src/block/block_vrfy.c @@ -22,7 +22,7 @@ static int __verify_set_file_size(WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *); ((off) / (block)->allocsize - 1) #ifdef HAVE_VERBOSE #define WT_FRAG_TO_OFF(block, frag) \ - (((wt_off_t)(frag + 1)) * (block)->allocsize) + (((wt_off_t)((frag) + 1)) * (block)->allocsize) #endif /* diff --git a/src/third_party/wiredtiger/src/bloom/bloom.c b/src/third_party/wiredtiger/src/bloom/bloom.c index be3230437d3..b8d75678835 100644 --- a/src/third_party/wiredtiger/src/bloom/bloom.c +++ b/src/third_party/wiredtiger/src/bloom/bloom.c @@ -37,8 +37,8 @@ __bloom_init(WT_SESSION_IMPL *session, len += strlen(config); WT_ERR(__wt_calloc_def(session, len, &bloom->config)); /* Add the standard config at the end, so it overrides user settings. */ - (void)snprintf(bloom->config, len, - "%s,%s", config == NULL ? "" : config, WT_BLOOM_TABLE_CONFIG); + WT_ERR(__wt_snprintf(bloom->config, len, + "%s,%s", config == NULL ? "" : config, WT_BLOOM_TABLE_CONFIG)); bloom->session = session; diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c index ba5fceae7c7..21e575ffca9 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curnext.c +++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c @@ -579,20 +579,20 @@ __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt) int __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) { + WT_CURSOR *cursor; WT_DECL_RET; WT_PAGE *page; WT_SESSION_IMPL *session; uint32_t flags; bool newpage; + cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cbt->iface.session; WT_STAT_CONN_INCR(session, cursor_next); WT_STAT_DATA_INCR(session, cursor_next); - flags = WT_READ_SKIP_INTL; /* Tree walk flags. */ - if (truncating) - LF_SET(WT_READ_TRUNCATE); + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); WT_RET(__cursor_func_init(cbt, false)); @@ -608,6 +608,9 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) * found. Then, move to the next page, until we reach the end of the * file. */ + flags = WT_READ_SKIP_INTL; /* tree walk flags */ + if (truncating) + LF_SET(WT_READ_TRUNCATE); for (newpage = false;; newpage = true) { page = cbt->ref == NULL ? NULL : cbt->ref->page; @@ -676,6 +679,8 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) if (ret == 0) WT_ERR(__wt_cursor_key_order_check(session, cbt, true)); #endif + if (ret == 0) + F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); err: if (ret != 0) WT_TRET(__cursor_reset(cbt)); diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c index 602c01b60eb..bf4bdad6529 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curprev.c +++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c @@ -535,20 +535,20 @@ new_insert: if ((ins = cbt->ins) != NULL) { int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) { + WT_CURSOR *cursor; WT_DECL_RET; WT_PAGE *page; WT_SESSION_IMPL *session; uint32_t flags; bool newpage; + cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cbt->iface.session; WT_STAT_CONN_INCR(session, cursor_prev); WT_STAT_DATA_INCR(session, cursor_prev); - flags = WT_READ_PREV | WT_READ_SKIP_INTL; /* Tree walk flags. */ - if (truncating) - LF_SET(WT_READ_TRUNCATE); + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); WT_RET(__cursor_func_init(cbt, false)); @@ -564,6 +564,9 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) * found. Then, move to the previous page, until we reach the start * of the file. */ + flags = WT_READ_PREV | WT_READ_SKIP_INTL; /* tree walk flags */ + if (truncating) + LF_SET(WT_READ_TRUNCATE); for (newpage = false;; newpage = true) { page = cbt->ref == NULL ? NULL : cbt->ref->page; @@ -631,6 +634,8 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) if (ret == 0) WT_ERR(__wt_cursor_key_order_check(session, cbt, false)); #endif + if (ret == 0) + F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); err: if (ret != 0) WT_TRET(__cursor_reset(cbt)); diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c index 5fde2237538..944e276fc01 100644 --- a/src/third_party/wiredtiger/src/btree/bt_cursor.c +++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c @@ -9,6 +9,84 @@ #include "wt_internal.h" /* + * When returning an error, we need to restore the cursor to a valid state, the + * upper-level cursor code is likely to retry. This structure and the associated + * functions are used save and restore the cursor state. + */ +typedef struct { + WT_ITEM key; + WT_ITEM value; + uint64_t recno; + uint32_t flags; +} WT_CURFILE_STATE; + +/* + * __cursor_state_save -- + * Save the cursor's external state. + */ +static inline void +__cursor_state_save(WT_CURSOR *cursor, WT_CURFILE_STATE *state) +{ + WT_ITEM_SET(state->key, cursor->key); + WT_ITEM_SET(state->value, cursor->value); + state->recno = cursor->recno; + state->flags = cursor->flags; +} + +/* + * __cursor_state_restore -- + * Restore the cursor's external state. + */ +static inline void +__cursor_state_restore(WT_CURSOR *cursor, WT_CURFILE_STATE *state) +{ + if (F_ISSET(state, WT_CURSTD_KEY_EXT)) + WT_ITEM_SET(cursor->key, state->key); + if (F_ISSET(state, WT_CURSTD_VALUE_EXT)) + WT_ITEM_SET(cursor->value, state->value); + cursor->recno = state->recno; + F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); + F_SET(cursor, F_MASK(state, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT)); + +} + +/* + * __cursor_page_pinned -- + * Return if we have a page pinned and it's not been flagged for forced + * eviction (the forced eviction test is so we periodically release pages + * grown too large). + */ +static inline bool +__cursor_page_pinned(WT_CURSOR_BTREE *cbt) +{ + return (F_ISSET(cbt, WT_CBT_ACTIVE) && + cbt->ref->page->read_gen != WT_READGEN_OLDEST); +} + +/* + * __cursor_copy_int_key -- + * If we're pointing into the tree, save the key into local memory. + */ +static inline int +__cursor_copy_int_key(WT_CURSOR *cursor) +{ + /* + * We're about to discard the cursor's position and the cursor layer + * might retry the operation. We discard pinned pages on error, which + * will invalidate pinned keys. Clear WT_CURSTD_KEY_INT in all cases, + * the underlying page is gone whether we can allocate memory or not. + */ + if (F_ISSET(cursor, WT_CURSTD_KEY_INT)) { + F_CLR(cursor, WT_CURSTD_KEY_INT); + if (!WT_DATA_IN_ITEM(&cursor->key)) + WT_RET(__wt_buf_set((WT_SESSION_IMPL *)cursor->session, + &cursor->key, cursor->key.data, cursor->key.size)); + F_SET(cursor, WT_CURSTD_KEY_EXT); + } + return (0); +} + +/* * __cursor_size_chk -- * Return if an inserted item is too large. */ @@ -55,6 +133,34 @@ __cursor_size_chk(WT_SESSION_IMPL *session, WT_ITEM *kv) } /* + * __cursor_disable_bulk -- + * Disable bulk loads into a tree. + */ +static inline void +__cursor_disable_bulk(WT_SESSION_IMPL *session, WT_BTREE *btree) +{ + /* + * Once a tree (other than the LSM primary) is no longer empty, eviction + * should pay attention to it, and it's no longer possible to bulk-load + * into it. + */ + if (!btree->original) + return; + if (btree->lsm_primary) { + btree->original = 0; /* Make the next test faster. */ + return; + } + + /* + * We use a compare-and-swap here to avoid races among the first inserts + * into a tree. Eviction is disabled when an empty tree is opened, and + * it must only be enabled once. + */ + if (__wt_atomic_cas8(&btree->original, 1, 0)) + __wt_evict_file_exclusive_off(session); +} + +/* * __cursor_fix_implicit -- * Return if search went past the end of the tree. */ @@ -285,13 +391,17 @@ __cursor_row_modify( int __wt_btcur_reset(WT_CURSOR_BTREE *cbt) { + WT_CURSOR *cursor; WT_SESSION_IMPL *session; + cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cbt->iface.session; WT_STAT_CONN_INCR(session, cursor_reset); WT_STAT_DATA_INCR(session, cursor_reset); + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + return (__cursor_reset(cbt)); } @@ -303,6 +413,7 @@ int __wt_btcur_search(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; + WT_CURFILE_STATE state; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; @@ -317,14 +428,22 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_STAT_CONN_INCR(session, cursor_search); WT_STAT_DATA_INCR(session, cursor_search); + __cursor_state_save(cursor, &state); + + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key, then re-save the cursor state. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + /* * If we have a page pinned, search it; if we don't have a page pinned, * or the search of the pinned page doesn't find an exact match, search * from the root. */ valid = false; - if (F_ISSET(cbt, WT_CBT_ACTIVE) && - cbt->ref->page->read_gen != WT_READGEN_OLDEST) { + if (__cursor_page_pinned(cbt)) { __wt_txn_cursor_op(session); WT_ERR(btree->type == BTREE_ROW ? @@ -352,6 +471,8 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) cbt->v = 0; cursor->value.data = &cbt->v; cursor->value.size = 1; + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); } else ret = WT_NOTFOUND; @@ -360,8 +481,10 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_ERR(__wt_cursor_key_order_init(session, cbt)); #endif -err: if (ret != 0) +err: if (ret != 0) { WT_TRET(__cursor_reset(cbt)); + __cursor_state_restore(cursor, &state); + } return (ret); } @@ -373,6 +496,7 @@ int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) { WT_BTREE *btree; + WT_CURFILE_STATE state; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; @@ -389,6 +513,15 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_STAT_CONN_INCR(session, cursor_search_near); WT_STAT_DATA_INCR(session, cursor_search_near); + __cursor_state_save(cursor, &state); + + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key, then re-save the cursor state. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + /* * If we have a row-store page pinned, search it; if we don't have a * page pinned, or the search of the pinned page doesn't find an exact @@ -402,9 +535,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) * existing record. */ valid = false; - if (btree->type == BTREE_ROW && - F_ISSET(cbt, WT_CBT_ACTIVE) && - cbt->ref->page->read_gen != WT_READGEN_OLDEST) { + if (btree->type == BTREE_ROW && __cursor_page_pinned(cbt)) { __wt_txn_cursor_op(session); WT_ERR(__cursor_row_search(session, cbt, cbt->ref, true)); @@ -455,6 +586,8 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) cursor->value.data = &cbt->v; cursor->value.size = 1; exact = 0; + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); } else if ((ret = __wt_btcur_next(cbt, false)) != WT_NOTFOUND) exact = 1; else { @@ -469,15 +602,18 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) exact = -1; } +err: if (ret == 0 && exactp != NULL) + *exactp = exact; + #ifdef HAVE_DIAGNOSTIC if (ret == 0) - WT_ERR(__wt_cursor_key_order_init(session, cbt)); + WT_TRET(__wt_cursor_key_order_init(session, cbt)); #endif -err: if (ret != 0) + if (ret != 0) { WT_TRET(__cursor_reset(cbt)); - if (exactp != NULL && (ret == 0 || ret == WT_NOTFOUND)) - *exactp = exact; + __cursor_state_restore(cursor, &state); + } return (ret); } @@ -489,9 +625,11 @@ int __wt_btcur_insert(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; + WT_CURFILE_STATE state; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; + bool append_key; btree = cbt->btree; cursor = &cbt->iface; @@ -502,30 +640,86 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) WT_STAT_DATA_INCRV(session, cursor_insert_bytes, cursor->key.size + cursor->value.size); + __cursor_state_save(cursor, &state); + if (btree->type == BTREE_ROW) WT_RET(__cursor_size_chk(session, &cursor->key)); WT_RET(__cursor_size_chk(session, &cursor->value)); + /* It's no longer possible to bulk-load into the tree. */ + __cursor_disable_bulk(session, btree); + + /* + * Insert a new record if WT_CURSTD_APPEND configured, (ignoring any + * application set record number). Although append can't be configured + * for a row-store, this code would break if it were, and that's owned + * by the upper cursor layer, be cautious. + */ + append_key = + F_ISSET(cursor, WT_CURSTD_APPEND) && btree->type != BTREE_ROW; + /* - * The tree is no longer empty: eviction should pay attention to it, - * and it's no longer possible to bulk-load into it. + * If inserting with overwrite configured, and positioned to an on-page + * key, the update doesn't require another search. The cursor won't be + * positioned on a page with an external key set, but be sure. Cursors + * configured for append aren't included, regardless of whether or not + * they meet all other criteria. */ - if (btree->bulk_load_ok) { - btree->bulk_load_ok = false; - __wt_btree_evictable(session, true); + if (__cursor_page_pinned(cbt) && + F_ISSET_ALL(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_OVERWRITE) && + !append_key) { + WT_ERR(__wt_txn_autocommit_check(session)); + /* + * The cursor position may not be exact (the cursor's comparison + * value not equal to zero). Correct to an exact match so we can + * update whatever we're pointing at. + */ + cbt->compare = 0; + ret = btree->type == BTREE_ROW ? + __cursor_row_modify(session, cbt, false) : + __cursor_col_modify(session, cbt, false); + if (ret == 0) + goto done; + + /* + * The pinned page goes away if we fail for any reason, make + * sure there's a local copy of any key. (Restart could still + * use the pinned page, but that's an unlikely path.) Re-save + * the cursor state: we may retry but eventually fail. + */ + WT_TRET(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + goto err; } -retry: WT_RET(__cursor_func_init(cbt, true)); + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key. Re-save the cursor state: we may retry but + * eventually fail. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); - switch (btree->type) { - case BTREE_COL_FIX: - case BTREE_COL_VAR: +retry: WT_ERR(__cursor_func_init(cbt, true)); + + if (btree->type == BTREE_ROW) { + WT_ERR(__cursor_row_search(session, cbt, NULL, true)); /* - * If WT_CURSTD_APPEND is set, insert a new record (ignoring - * the application's record number). The real record number - * is assigned by the serialized append operation. + * If not overwriting, fail if the key exists, else insert the + * key/value pair. */ - if (F_ISSET(cursor, WT_CURSTD_APPEND)) + if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) && + cbt->compare == 0 && __wt_cursor_valid(cbt, NULL)) + WT_ERR(WT_DUPLICATE_KEY); + + ret = __cursor_row_modify(session, cbt, false); + } else { + /* + * Optionally insert a new record (ignoring the application's + * record number). The real record number is allocated by the + * serialized append operation. + */ + if (append_key) cbt->iface.recno = WT_RECNO_OOB; WT_ERR(__cursor_col_search(session, cbt, NULL)); @@ -542,21 +736,9 @@ retry: WT_RET(__cursor_func_init(cbt, true)); WT_ERR(WT_DUPLICATE_KEY); WT_ERR(__cursor_col_modify(session, cbt, false)); - if (F_ISSET(cursor, WT_CURSTD_APPEND)) - cbt->iface.recno = cbt->recno; - break; - case BTREE_ROW: - WT_ERR(__cursor_row_search(session, cbt, NULL, true)); - /* - * If not overwriting, fail if the key exists, else insert the - * key/value pair. - */ - if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) && - cbt->compare == 0 && __wt_cursor_valid(cbt, NULL)) - WT_ERR(WT_DUPLICATE_KEY); - ret = __cursor_row_modify(session, cbt, false); - break; + if (append_key) + cbt->iface.recno = cbt->recno; } err: if (ret == WT_RESTART) { @@ -564,11 +746,17 @@ err: if (ret == WT_RESTART) { WT_STAT_DATA_INCR(session, cursor_restart); goto retry; } - /* Insert doesn't maintain a position across calls, clear resources. */ - if (ret == 0) - WT_TRET(__curfile_leave(cbt)); + +done: /* Insert doesn't maintain a position across calls, clear resources. */ + if (ret == 0) { + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + if (append_key) + F_SET(cursor, WT_CURSTD_KEY_INT); + } + WT_TRET(__cursor_reset(cbt)); if (ret != 0) - WT_TRET(__cursor_reset(cbt)); + __cursor_state_restore(cursor, &state); + return (ret); } @@ -604,16 +792,15 @@ __curfile_update_check(WT_CURSOR_BTREE *cbt) } /* - * __wt_btcur_update_check -- + * __wt_btcur_insert_check -- * Check whether an update would conflict. * - * This can be used to replace WT_CURSOR::insert or WT_CURSOR::update, so - * they only check for conflicts without updating the tree. It is used to - * maintain snapshot isolation for transactions that span multiple chunks - * in an LSM tree. + * This can replace WT_CURSOR::insert, so it only checks for conflicts without + * updating the tree. It is used to maintain snapshot isolation for transactions + * that span multiple chunks in an LSM tree. */ int -__wt_btcur_update_check(WT_CURSOR_BTREE *cbt) +__wt_btcur_insert_check(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; WT_CURSOR *cursor; @@ -624,31 +811,35 @@ __wt_btcur_update_check(WT_CURSOR_BTREE *cbt) btree = cbt->btree; session = (WT_SESSION_IMPL *)cursor->session; -retry: WT_RET(__cursor_func_init(cbt, true)); + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key. Unlike most of the btree cursor routines, + * we don't have to save/restore the cursor key state, none of the + * work done here changes the key state. + */ + WT_ERR(__cursor_copy_int_key(cursor)); - switch (btree->type) { - case BTREE_ROW: +retry: WT_ERR(__cursor_func_init(cbt, true)); + + if (btree->type == BTREE_ROW) { WT_ERR(__cursor_row_search(session, cbt, NULL, true)); - /* - * Just check for conflicts. - */ + /* Just check for conflicts. */ ret = __curfile_update_check(cbt); - break; - case BTREE_COL_FIX: - case BTREE_COL_VAR: + } else WT_ERR(__wt_illegal_value(session, NULL)); - break; - } err: if (ret == WT_RESTART) { WT_STAT_CONN_INCR(session, cursor_restart); WT_STAT_DATA_INCR(session, cursor_restart); goto retry; } - WT_TRET(__curfile_leave(cbt)); - if (ret != 0) - WT_TRET(__cursor_reset(cbt)); + + /* Insert doesn't maintain a position across calls, clear resources. */ + if (ret == 0) + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + WT_TRET(__cursor_reset(cbt)); + return (ret); } @@ -660,9 +851,11 @@ int __wt_btcur_remove(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; + WT_CURFILE_STATE state; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; + bool positioned; btree = cbt->btree; cursor = &cbt->iface; @@ -672,11 +865,69 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt) WT_STAT_DATA_INCR(session, cursor_remove); WT_STAT_DATA_INCRV(session, cursor_remove_bytes, cursor->key.size); -retry: WT_RET(__cursor_func_init(cbt, true)); + __cursor_state_save(cursor, &state); - switch (btree->type) { - case BTREE_COL_FIX: - case BTREE_COL_VAR: + /* + * WT_CURSOR.remove has a unique semantic, the cursor stays positioned + * if it starts positioned, otherwise clear the cursor on completion. + */ + positioned = F_ISSET(cursor, WT_CURSTD_KEY_INT); + + /* + * If remove positioned to an on-page key, the remove doesn't require + * another search. We don't care about the "overwrite" configuration + * because regardless of the overwrite setting, any existing record is + * removed, and the record must exist with a positioned cursor. The + * cursor won't be positioned on a page with an external key set, but + * be sure. + */ + if (__cursor_page_pinned(cbt) && F_ISSET(cursor, WT_CURSTD_KEY_INT)) { + WT_ERR(__wt_txn_autocommit_check(session)); + + /* + * The cursor position may not be exact (the cursor's comparison + * value not equal to zero). Correct to an exact match so we can + * remove whatever we're pointing at. + */ + cbt->compare = 0; + ret = btree->type == BTREE_ROW ? + __cursor_row_modify(session, cbt, true) : + __cursor_col_modify(session, cbt, true); + if (ret == 0) + goto done; + + /* + * The pinned page goes away if we fail for any reason, make + * sure there's a local copy of any key. (Restart could still + * use the pinned page, but that's an unlikely path.) Re-save + * the cursor state: we may retry but eventually fail. + */ + WT_TRET(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + goto err; + } + + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key. Re-save the cursor state: we may retry but + * eventually fail. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + +retry: WT_ERR(__cursor_func_init(cbt, true)); + + if (btree->type == BTREE_ROW) { + WT_ERR(__cursor_row_search(session, cbt, NULL, false)); + + /* Check whether an update would conflict. */ + WT_ERR(__curfile_update_check(cbt)); + + if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) + WT_ERR(WT_NOTFOUND); + + ret = __cursor_row_modify(session, cbt, true); + } else { WT_ERR(__cursor_col_search(session, cbt, NULL)); /* @@ -703,19 +954,6 @@ retry: WT_RET(__cursor_func_init(cbt, true)); cbt->recno = cursor->recno; } else ret = __cursor_col_modify(session, cbt, true); - break; - case BTREE_ROW: - /* Remove the record if it exists. */ - WT_ERR(__cursor_row_search(session, cbt, NULL, false)); - - /* Check whether an update would conflict. */ - WT_ERR(__curfile_update_check(cbt)); - - if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) - WT_ERR(WT_NOTFOUND); - - ret = __cursor_row_modify(session, cbt, true); - break; } err: if (ret == WT_RESTART) { @@ -723,15 +961,27 @@ err: if (ret == WT_RESTART) { WT_STAT_DATA_INCR(session, cursor_restart); goto retry; } + /* - * If the cursor is configured to overwrite and the record is not - * found, that is exactly what we want. + * If the cursor is configured to overwrite and the record is not found, + * that is exactly what we want, return success. */ if (F_ISSET(cursor, WT_CURSTD_OVERWRITE) && ret == WT_NOTFOUND) ret = 0; - if (ret != 0) +done: /* + * If the cursor was positioned, it stays positioned, point the cursor + * at an internal copy of the key. Otherwise, there's no position or + * key/value. + */ + if (ret == 0) + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + if (ret == 0 && positioned) + WT_TRET(__wt_key_return(session, cbt)); + else WT_TRET(__cursor_reset(cbt)); + if (ret != 0) + __cursor_state_restore(cursor, &state); return (ret); } @@ -744,6 +994,7 @@ int __wt_btcur_update(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; + WT_CURFILE_STATE state; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; @@ -756,24 +1007,71 @@ __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_STAT_DATA_INCR(session, cursor_update); WT_STAT_DATA_INCRV(session, cursor_update_bytes, cursor->value.size); + __cursor_state_save(cursor, &state); + if (btree->type == BTREE_ROW) WT_RET(__cursor_size_chk(session, &cursor->key)); WT_RET(__cursor_size_chk(session, &cursor->value)); + /* It's no longer possible to bulk-load into the tree. */ + __cursor_disable_bulk(session, btree); + /* - * The tree is no longer empty: eviction should pay attention to it, - * and it's no longer possible to bulk-load into it. + * If update positioned to an on-page key, the update doesn't require + * another search. We don't care about the "overwrite" configuration + * because regardless of the overwrite setting, any existing record is + * updated, and the record must exist with a positioned cursor. The + * cursor won't be positioned on a page with an external key set, but + * be sure. */ - if (btree->bulk_load_ok) { - btree->bulk_load_ok = false; - __wt_btree_evictable(session, true); + if (__cursor_page_pinned(cbt) && F_ISSET(cursor, WT_CURSTD_KEY_INT)) { + WT_ERR(__wt_txn_autocommit_check(session)); + /* + * The cursor position may not be exact (the cursor's comparison + * value not equal to zero). Correct to an exact match so we can + * update whatever we're pointing at. + */ + cbt->compare = 0; + ret = btree->type == BTREE_ROW ? + __cursor_row_modify(session, cbt, false) : + __cursor_col_modify(session, cbt, false); + if (ret == 0) + goto done; + + /* + * The pinned page goes away if we fail for any reason, make + * sure there's a local copy of any key. (Restart could still + * use the pinned page, but that's an unlikely path.) Re-save + * the cursor state: we may retry but eventually fail. + */ + WT_TRET(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + goto err; } -retry: WT_RET(__cursor_func_init(cbt, true)); + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key. Re-save the cursor state: we may retry but + * eventually fail. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); - switch (btree->type) { - case BTREE_COL_FIX: - case BTREE_COL_VAR: +retry: WT_ERR(__cursor_func_init(cbt, true)); + + if (btree->type == BTREE_ROW) { + WT_ERR(__cursor_row_search(session, cbt, NULL, true)); + /* + * If not overwriting, check for conflicts and fail if the key + * does not exist. + */ + if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) { + WT_ERR(__curfile_update_check(cbt)); + if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) + WT_ERR(WT_NOTFOUND); + } + ret = __cursor_row_modify(session, cbt, false); + } else { WT_ERR(__cursor_col_search(session, cbt, NULL)); /* @@ -792,20 +1090,6 @@ retry: WT_RET(__cursor_func_init(cbt, true)); WT_ERR(WT_NOTFOUND); } ret = __cursor_col_modify(session, cbt, false); - break; - case BTREE_ROW: - WT_ERR(__cursor_row_search(session, cbt, NULL, true)); - /* - * If not overwriting, check for conflicts and fail if the key - * does not exist. - */ - if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) { - WT_ERR(__curfile_update_check(cbt)); - if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) - WT_ERR(WT_NOTFOUND); - } - ret = __cursor_row_modify(session, cbt, false); - break; } err: if (ret == WT_RESTART) { @@ -822,11 +1106,14 @@ err: if (ret == WT_RESTART) { * To make this work, we add a field to the btree cursor to pass back a * pointer to the modify function's allocated update structure. */ - if (ret == 0) +done: if (ret == 0) WT_TRET(__wt_kv_return(session, cbt, cbt->modify_update)); - if (ret != 0) + if (ret != 0) { WT_TRET(__cursor_reset(cbt)); + __cursor_state_restore(cursor, &state); + } + return (ret); } @@ -955,9 +1242,12 @@ __cursor_truncate(WT_SESSION_IMPL *session, WT_DECL_RET; /* - * First, call the standard cursor remove method to do a full search and - * re-position the cursor because we don't have a saved copy of the - * page's write generation information, which we need to remove records. + * First, call the cursor search method to re-position the cursor: we + * may not have a cursor position (if the higher-level truncate code + * switched the cursors to have an "external" cursor key, and because + * we don't save a copy of the page's write generation information, + * which we need to remove records. + * * Once that's done, we can delete records without a full search, unless * we encounter a restart error because the page was modified by some * other thread of control; in that case, repeat the full search to @@ -970,20 +1260,19 @@ __cursor_truncate(WT_SESSION_IMPL *session, * instantiated the end cursor, so we know that page is pinned in memory * and we can proceed without concern. */ -retry: WT_RET(__wt_btcur_remove(start)); +retry: WT_RET(__wt_btcur_search(start)); + WT_ASSERT(session, + F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); + + for (;;) { + if ((ret = rmfunc(session, start, 1)) != 0) + break; - /* - * Reset ret each time through so that we don't loop forever in - * the cursor equals case. - */ - for (ret = 0;;) { if (stop != NULL && __cursor_equals(start, stop)) break; if ((ret = __wt_btcur_next(start, true)) != 0) break; - start->compare = 0; /* Exact match */ - if ((ret = rmfunc(session, start, 1)) != 0) - break; + start->compare = 0; /* Exact match */ } if (ret == WT_RESTART) { @@ -1016,29 +1305,32 @@ __cursor_truncate_fix(WT_SESSION_IMPL *session, * record 37, records 1-36 magically appear. Those records can't be * deleted, which means we have to ignore already "deleted" records. * - * First, call the standard cursor remove method to do a full search and - * re-position the cursor because we don't have a saved copy of the - * page's write generation information, which we need to remove records. + * First, call the cursor search method to re-position the cursor: we + * may not have a cursor position (if the higher-level truncate code + * switched the cursors to have an "external" cursor key, and because + * we don't save a copy of the page's write generation information, + * which we need to remove records. + * * Once that's done, we can delete records without a full search, unless * we encounter a restart error because the page was modified by some * other thread of control; in that case, repeat the full search to * refresh the page's modification information. */ -retry: WT_RET(__wt_btcur_remove(start)); - /* - * Reset ret each time through so that we don't loop forever in - * the cursor equals case. - */ - for (ret = 0;;) { +retry: WT_RET(__wt_btcur_search(start)); + WT_ASSERT(session, + F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); + + for (;;) { + value = (const uint8_t *)start->iface.value.data; + if (*value != 0 && + (ret = rmfunc(session, start, 1)) != 0) + break; + if (stop != NULL && __cursor_equals(start, stop)) break; if ((ret = __wt_btcur_next(start, true)) != 0) break; start->compare = 0; /* Exact match */ - value = (const uint8_t *)start->iface.value.data; - if (*value != 0 && - (ret = rmfunc(session, start, 1)) != 0) - break; } if (ret == WT_RESTART) { @@ -1158,7 +1450,7 @@ __wt_btcur_close(WT_CURSOR_BTREE *cbt, bool lowlevel) * Skip the usual cursor tear-down in that case. */ if (!lowlevel) - ret = __curfile_leave(cbt); + ret = __cursor_reset(cbt); __wt_buf_free(session, &cbt->_row_key); __wt_buf_free(session, &cbt->_tmp); diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c index d664da2ebd3..d3f02e29b90 100644 --- a/src/third_party/wiredtiger/src/btree/bt_debug.c +++ b/src/third_party/wiredtiger/src/btree/bt_debug.c @@ -34,7 +34,7 @@ static const /* Output separator */ static int __debug_cell(WT_DBG *, const WT_PAGE_HEADER *, WT_CELL_UNPACK *); static int __debug_cell_data( - WT_DBG *, WT_PAGE *, int type, const char *, WT_CELL_UNPACK *); + WT_DBG *, WT_PAGE *, int, const char *, WT_CELL_UNPACK *); static int __debug_col_skip(WT_DBG *, WT_INSERT_HEAD *, const char *, bool); static int __debug_config(WT_SESSION_IMPL *, WT_DBG *, const char *); static int __debug_dsk_cell(WT_DBG *, const WT_PAGE_HEADER *); @@ -64,7 +64,7 @@ __wt_debug_set_verbose(WT_SESSION_IMPL *session, const char *v) const char *cfg[2] = { NULL, NULL }; char buf[256]; - snprintf(buf, sizeof(buf), "verbose=[%s]", v); + WT_RET(__wt_snprintf(buf, sizeof(buf), "verbose=[%s]", v)); cfg[0] = buf; return (__wt_verbose_config(session, cfg)); } @@ -87,6 +87,7 @@ __debug_hex_byte(WT_DBG *ds, uint8_t v) static int __dmsg_event(WT_DBG *ds, const char *fmt, ...) { + WT_DECL_RET; WT_ITEM *msg; WT_SESSION_IMPL *session; size_t len, space; @@ -107,8 +108,9 @@ __dmsg_event(WT_DBG *ds, const char *fmt, ...) p = (char *)msg->mem + msg->size; space = msg->memsize - msg->size; va_start(ap, fmt); - len = (size_t)vsnprintf(p, space, fmt, ap); + ret = __wt_vsnprintf_len_set(p, space, &len, fmt, ap); va_end(ap); + WT_RET(ret); /* Check if there was enough space. */ if (len < space) { @@ -447,13 +449,14 @@ __debug_tree_shape_info(WT_PAGE *page) v = page->memory_footprint; if (v >= WT_GIGABYTE) - snprintf(buf, sizeof(buf), + (void)__wt_snprintf(buf, sizeof(buf), "(%p %" PRIu64 "G)", (void *)page, v / WT_GIGABYTE); else if (v >= WT_MEGABYTE) - snprintf(buf, sizeof(buf), + (void)__wt_snprintf(buf, sizeof(buf), "(%p %" PRIu64 "M)", (void *)page, v / WT_MEGABYTE); else - snprintf(buf, sizeof(buf), "(%p %" PRIu64 ")", (void *)page, v); + (void)__wt_snprintf(buf, sizeof(buf), + "(%p %" PRIu64 ")", (void *)page, v); return (buf); } @@ -838,7 +841,8 @@ __debug_page_col_var(WT_DBG *ds, WT_REF *ref) __wt_cell_unpack(cell, unpack); rle = __wt_cell_rle(unpack); } - snprintf(tag, sizeof(tag), "%" PRIu64 " %" PRIu64, recno, rle); + WT_RET(__wt_snprintf( + tag, sizeof(tag), "%" PRIu64 " %" PRIu64, recno, rle)); WT_RET( __debug_cell_data(ds, page, WT_PAGE_COL_VAR, tag, unpack)); diff --git a/src/third_party/wiredtiger/src/btree/bt_discard.c b/src/third_party/wiredtiger/src/btree/bt_discard.c index d2beb84fee9..bab7b8145d6 100644 --- a/src/third_party/wiredtiger/src/btree/bt_discard.c +++ b/src/third_party/wiredtiger/src/btree/bt_discard.c @@ -16,13 +16,14 @@ static void __free_skip_array( WT_SESSION_IMPL *, WT_INSERT_HEAD **, uint32_t, bool); static void __free_skip_list(WT_SESSION_IMPL *, WT_INSERT *, bool); static void __free_update(WT_SESSION_IMPL *, WT_UPDATE **, uint32_t, bool); +static void __page_out_int(WT_SESSION_IMPL *, WT_PAGE **, bool); /* - * __wt_ref_out -- + * __wt_ref_out_int -- * Discard an in-memory page, freeing all memory associated with it. */ void -__wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref) +__wt_ref_out_int(WT_SESSION_IMPL *session, WT_REF *ref, bool rewrite) { /* * A version of the page-out function that allows us to make additional @@ -56,15 +57,25 @@ __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref) } #endif - __wt_page_out(session, &ref->page); + __page_out_int(session, &ref->page, rewrite); } /* - * __wt_page_out -- + * __wt_ref_out -- * Discard an in-memory page, freeing all memory associated with it. */ void -__wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) +__wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref) +{ + __wt_ref_out_int(session, ref, false); +} + +/* + * __page_out_int -- + * Discard an in-memory page, freeing all memory associated with it. + */ +static void +__page_out_int(WT_SESSION_IMPL *session, WT_PAGE **pagep, bool rewrite) { WT_PAGE *page; WT_PAGE_HEADER *dsk; @@ -103,7 +114,7 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) } /* Update the cache's information. */ - __wt_cache_page_evict(session, page); + __wt_cache_page_evict(session, page, rewrite); dsk = (WT_PAGE_HEADER *)page->dsk; if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC)) @@ -148,6 +159,16 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) } /* + * __wt_page_out -- + * Discard an in-memory page, freeing all memory associated with it. + */ +void +__wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) +{ + __page_out_int(session, pagep, false); +} + +/* * __free_page_modify -- * Discard the page's associated modification structures. */ diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c index 6ed70788759..d76720b19ae 100644 --- a/src/third_party/wiredtiger/src/btree/bt_handle.c +++ b/src/third_party/wiredtiger/src/btree/bt_handle.c @@ -15,6 +15,44 @@ static int __btree_preload(WT_SESSION_IMPL *); static int __btree_tree_open_empty(WT_SESSION_IMPL *, bool); /* + * __btree_clear -- + * Clear a Btree, either on handle discard or re-open. + */ +static int +__btree_clear(WT_SESSION_IMPL *session) +{ + WT_BTREE *btree; + WT_DECL_RET; + + btree = S2BT(session); + + /* + * If the tree hasn't gone through an open/close cycle, there's no + * cleanup to be done. + */ + if (!F_ISSET(btree, WT_BTREE_CLOSED)) + return (0); + + /* Close the Huffman tree. */ + __wt_btree_huffman_close(session); + + /* Terminate any associated collator. */ + if (btree->collator_owned && btree->collator->terminate != NULL) + WT_TRET(btree->collator->terminate( + btree->collator, &session->iface)); + + /* Destroy locks. */ + __wt_rwlock_destroy(session, &btree->ovfl_lock); + __wt_spin_destroy(session, &btree->flush_lock); + + /* Free allocated memory. */ + __wt_free(session, btree->key_format); + __wt_free(session, btree->value_format); + + return (ret); +} + +/* * __wt_btree_open -- * Open a Btree. */ @@ -28,12 +66,27 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) WT_DATA_HANDLE *dhandle; WT_DECL_RET; size_t root_addr_size; + uint32_t mask; uint8_t root_addr[WT_BTREE_MAX_ADDR_COOKIE]; const char *filename; bool creation, forced_salvage, readonly; - dhandle = session->dhandle; btree = S2BT(session); + dhandle = session->dhandle; + + /* + * This may be a re-open of an underlying object and we have to clean + * up. We can't clear the operation flags, however, they're set by the + * connection handle software that called us. + */ + WT_RET(__btree_clear(session)); + + mask = F_MASK(btree, WT_BTREE_SPECIAL_FLAGS); + memset(btree, 0, sizeof(*btree)); + btree->flags = mask; + + /* Set the data handle first, our called functions reasonably use it. */ + btree->dhandle = dhandle; /* Checkpoint files are readonly. */ readonly = dhandle->checkpoint != NULL || @@ -126,6 +179,20 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) } } + /* + * Eviction ignores trees until the handle's open flag is set, configure + * eviction before that happens. + * + * Files that can still be bulk-loaded cannot be evicted. + * Permanently cache-resident files can never be evicted. + * Special operations don't enable eviction. (The underlying commands + * may turn on eviction, but it's their decision.) + */ + if (btree->original || + F_ISSET(btree, WT_BTREE_IN_MEMORY | WT_BTREE_REBALANCE | + WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) + WT_ERR(__wt_evict_file_exclusive_on(session)); + if (0) { err: WT_TRET(__wt_btree_close(session)); } @@ -147,7 +214,24 @@ __wt_btree_close(WT_SESSION_IMPL *session) btree = S2BT(session); + /* + * The close process isn't the same as discarding the handle: we might + * re-open the handle, which isn't a big deal, but the backing blocks + * for the handle may not yet have been discarded from the cache, and + * eviction uses WT_BTREE structure elements. Free backing resources + * but leave the rest alone, and we'll discard the structure when we + * discard the data handle. + * + * Handles can be closed multiple times, ignore all but the first. + */ + if (F_ISSET(btree, WT_BTREE_CLOSED)) + return (0); + F_SET(btree, WT_BTREE_CLOSED); + + /* Discard any underlying block manager resources. */ if ((bm = btree->bm) != NULL) { + btree->bm = NULL; + /* Unload the checkpoint, unless it's a special command. */ if (!F_ISSET(btree, WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) @@ -155,33 +239,26 @@ __wt_btree_close(WT_SESSION_IMPL *session) /* Close the underlying block manager reference. */ WT_TRET(bm->close(bm, session)); - - btree->bm = NULL; } - /* Close the Huffman tree. */ - __wt_btree_huffman_close(session); - - /* Destroy locks. */ - __wt_rwlock_destroy(session, &btree->ovfl_lock); - __wt_spin_destroy(session, &btree->flush_lock); - - /* Free allocated memory. */ - __wt_free(session, btree->key_format); - __wt_free(session, btree->value_format); + return (ret); +} - if (btree->collator_owned) { - if (btree->collator->terminate != NULL) - WT_TRET(btree->collator->terminate( - btree->collator, &session->iface)); - btree->collator_owned = 0; - } - btree->collator = NULL; - btree->kencryptor = NULL; +/* + * __wt_btree_discard -- + * Discard a Btree. + */ +int +__wt_btree_discard(WT_SESSION_IMPL *session) +{ + WT_BTREE *btree; + WT_DECL_RET; - btree->bulk_load_ok = false; + ret = __btree_clear(session); - F_CLR(btree, WT_BTREE_SPECIAL_FLAGS); + btree = S2BT(session); + __wt_overwrite_and_free(session, btree); + session->dhandle->handle = NULL; return (ret); } @@ -267,9 +344,9 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) WT_RET(__wt_config_gets(session, cfg, "cache_resident", &cval)); if (cval.val) - F_SET(btree, WT_BTREE_IN_MEMORY | WT_BTREE_NO_EVICTION); + F_SET(btree, WT_BTREE_IN_MEMORY); else - F_CLR(btree, WT_BTREE_IN_MEMORY | WT_BTREE_NO_EVICTION); + F_CLR(btree, WT_BTREE_IN_MEMORY); WT_RET(__wt_config_gets(session, cfg, "ignore_in_memory_cache_size", &cval)); @@ -282,6 +359,14 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) } else F_CLR(btree, WT_BTREE_IGNORE_CACHE); + /* + * The metadata isn't blocked by in-memory cache limits because metadata + * "unroll" is performed by updates that are potentially blocked by the + * cache-full checks. + */ + if (WT_IS_METADATA(btree->dhandle)) + F_SET(btree, WT_BTREE_IGNORE_CACHE); + WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval)); if (cval.val) F_CLR(btree, WT_BTREE_NO_LOGGING); @@ -482,13 +567,10 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, bool creation) /* * Newly created objects can be used for cursor inserts or for bulk * loads; set a flag that's cleared when a row is inserted into the - * tree. Objects being bulk-loaded cannot be evicted, we set it - * globally, there's no point in searching empty trees for eviction. + * tree. */ - if (creation) { - btree->bulk_load_ok = true; - __wt_btree_evictable(session, false); - } + if (creation) + btree->original = 1; /* * A note about empty trees: the initial tree is a single root page. @@ -581,27 +663,6 @@ __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep) } /* - * __wt_btree_evictable -- - * Setup or release a cache-resident tree. - */ -void -__wt_btree_evictable(WT_SESSION_IMPL *session, bool on) -{ - WT_BTREE *btree; - - btree = S2BT(session); - - /* Permanently cache-resident files can never be evicted. */ - if (F_ISSET(btree, WT_BTREE_IN_MEMORY)) - return; - - if (on) - F_CLR(btree, WT_BTREE_NO_EVICTION); - else - F_SET(btree, WT_BTREE_NO_EVICTION); -} - -/* * __btree_preload -- * Pre-load internal pages. */ @@ -727,9 +788,16 @@ __btree_page_sizes(WT_SESSION_IMPL *session) * Get the split percentage (reconciliation splits pages into smaller * than the maximum page size chunks so we don't split every time a * new entry is added). Determine how large newly split pages will be. + * Set to the minimum, if the read value is less than that. */ WT_RET(__wt_config_gets(session, cfg, "split_pct", &cval)); - btree->split_pct = (int)cval.val; + if (cval.val < WT_BTREE_MIN_SPLIT_PCT) { + btree->split_pct = WT_BTREE_MIN_SPLIT_PCT; + WT_RET(__wt_msg(session, + "Re-setting split_pct for %s to the minimum allowed of " + "%d%%.", session->dhandle->name, WT_BTREE_MIN_SPLIT_PCT)); + } else + btree->split_pct = (int)cval.val; intl_split_size = __wt_split_page_size(btree, btree->maxintlpage); leaf_split_size = __wt_split_page_size(btree, btree->maxleafpage); diff --git a/src/third_party/wiredtiger/src/btree/bt_io.c b/src/third_party/wiredtiger/src/btree/bt_io.c index a8645f79dbe..b5e4d52394a 100644 --- a/src/third_party/wiredtiger/src/btree/bt_io.c +++ b/src/third_party/wiredtiger/src/btree/bt_io.c @@ -183,7 +183,7 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t dst_len, len, result_len, size, src_len; int compression_failed; /* Extension API, so not a bool. */ uint8_t *dst, *src; - bool data_checksum, encrypted; + bool data_checksum, encrypted, timer; btree = S2BT(session); bm = btree->bm; @@ -216,7 +216,7 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, &result_len)); WT_ASSERT(session, dsk->mem_size == result_len + WT_BLOCK_COMPRESS_SKIP); - ctmp->size = (uint32_t)result_len + WT_BLOCK_COMPRESS_SKIP; + ctmp->size = result_len + WT_BLOCK_COMPRESS_SKIP; ip = ctmp; } else { WT_ASSERT(session, dsk->mem_size == buf->size); @@ -357,7 +357,8 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, data_checksum = !compressed; break; } - if (!F_ISSET(session, WT_SESSION_INTERNAL)) + timer = !F_ISSET(session, WT_SESSION_INTERNAL); + if (timer) __wt_epoch(session, &start); /* Call the block manager to write the block. */ @@ -367,7 +368,7 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, bm, session, ip, addr, addr_sizep, data_checksum, checkpoint_io)); /* Update some statistics now that the write is done */ - if (!F_ISSET(session, WT_SESSION_INTERNAL)) { + if (timer) { __wt_epoch(session, &stop); WT_STAT_CONN_INCR(session, cache_write_app_count); WT_STAT_CONN_INCRV(session, cache_write_app_time, diff --git a/src/third_party/wiredtiger/src/btree/bt_random.c b/src/third_party/wiredtiger/src/btree/bt_random.c index 4c7ff861d26..c5948ec4ab5 100644 --- a/src/third_party/wiredtiger/src/btree/bt_random.c +++ b/src/third_party/wiredtiger/src/btree/bt_random.c @@ -178,6 +178,8 @@ __wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, bool eviction) WT_REF *current, *descent; uint32_t flags, i, entries, retry; + *refp = NULL; + btree = S2BT(session); current = NULL; retry = 100; @@ -201,16 +203,6 @@ restart: /* current = &btree->root; for (;;) { page = current->page; - /* - * When walking a tree for eviction, an exclusive operation may - * be in progress leaving the root page is not valid. Just give - * up in that case. - */ - if (page == NULL) { - WT_ASSERT(session, eviction); - break; - } - if (!WT_PAGE_IS_INTERNAL(page)) break; @@ -300,14 +292,16 @@ int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; + WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; WT_UPDATE *upd; wt_off_t size; uint64_t n, skip; - session = (WT_SESSION_IMPL *)cbt->iface.session; btree = cbt->btree; + cursor = &cbt->iface; + session = (WT_SESSION_IMPL *)cbt->iface.session; /* * Only supports row-store: applications can trivially select a random @@ -320,6 +314,8 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) WT_STAT_CONN_INCR(session, cursor_next); WT_STAT_DATA_INCR(session, cursor_next); + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + #ifdef HAVE_DIAGNOSTIC /* * Under some conditions we end up using the underlying cursor.next to @@ -328,7 +324,6 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) */ __wt_cursor_key_order_reset(cbt); #endif - /* * If we don't have a current position in the tree, or if retrieving * random values without sampling, pick a roughly random leaf page in diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c index 39f9e1159cb..64874547b9c 100644 --- a/src/third_party/wiredtiger/src/btree/bt_read.c +++ b/src/third_party/wiredtiger/src/btree/bt_read.c @@ -369,6 +369,7 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref) size_t addr_size; uint32_t previous_state; const uint8_t *addr; + bool timer; btree = S2BT(session); page = NULL; @@ -408,10 +409,11 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref) * There's an address, read or map the backing disk page and build an * in-memory version of the page. */ - if (!F_ISSET(session, WT_SESSION_INTERNAL)) + timer = !F_ISSET(session, WT_SESSION_INTERNAL); + if (timer) __wt_epoch(session, &start); WT_ERR(__wt_bt_read(session, &tmp, addr, addr_size)); - if (!F_ISSET(session, WT_SESSION_INTERNAL)) { + if (timer) { __wt_epoch(session, &stop); WT_STAT_CONN_INCR(session, cache_read_app_count); WT_STAT_CONN_INCRV(session, cache_read_app_time, @@ -590,8 +592,9 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags */ if (LF_ISSET(WT_READ_NO_EVICT) || F_ISSET(session, WT_SESSION_NO_EVICTION) || - (F_ISSET(btree, WT_BTREE_NO_EVICTION) && - !F_ISSET(btree, WT_BTREE_NO_RECONCILE))) + btree->lsm_primary || + (btree->evict_disabled > 0 && + !F_ISSET(btree, WT_BTREE_ALLOW_SPLITS))) goto skip_evict; /* diff --git a/src/third_party/wiredtiger/src/btree/bt_rebalance.c b/src/third_party/wiredtiger/src/btree/bt_rebalance.c index 24b4f7bb33d..68848c7c8f5 100644 --- a/src/third_party/wiredtiger/src/btree/bt_rebalance.c +++ b/src/third_party/wiredtiger/src/btree/bt_rebalance.c @@ -406,12 +406,10 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) WT_BTREE *btree; WT_DECL_RET; WT_REBALANCE_STUFF *rs, _rstuff; - bool evict_reset; WT_UNUSED(cfg); btree = S2BT(session); - evict_reset = false; /* * If the tree has never been written to disk, we're done, rebalance @@ -433,14 +431,6 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) /* Set the internal page tree type. */ rs->type = btree->root.page->type; - /* - * Get exclusive access to the file. (Not required, the only page in the - * cache is the root page, and that cannot be evicted; however, this way - * eviction ignores the tree entirely.) - */ - WT_ERR(__wt_evict_file_exclusive_on(session)); - evict_reset = true; - /* Recursively walk the tree. */ switch (rs->type) { case WT_PAGE_ROW_INT: @@ -471,10 +461,7 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) btree->root.page = rs->root; rs->root = NULL; -err: if (evict_reset) - __wt_evict_file_exclusive_off(session); - - /* Discard any leftover root page we created. */ +err: /* Discard any leftover root page we created. */ if (rs->root != NULL) { __wt_page_modify_clear(session, rs->root); __wt_page_out(session, &rs->root); diff --git a/src/third_party/wiredtiger/src/btree/bt_ret.c b/src/third_party/wiredtiger/src/btree/bt_ret.c index 6409a1a180c..f17fa1b85d1 100644 --- a/src/third_party/wiredtiger/src/btree/bt_ret.c +++ b/src/third_party/wiredtiger/src/btree/bt_ret.c @@ -9,64 +9,21 @@ #include "wt_internal.h" /* - * __wt_kv_return -- - * Return a page referenced key/value pair to the application. + * __key_return -- + * Change the cursor to reference an internal return key. */ -int -__wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) +static inline int +__key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) { - WT_BTREE *btree; - WT_CELL *cell; - WT_CELL_UNPACK unpack; WT_CURSOR *cursor; WT_ITEM *tmp; WT_PAGE *page; WT_ROW *rip; - uint8_t v; - - btree = S2BT(session); page = cbt->ref->page; cursor = &cbt->iface; - switch (page->type) { - case WT_PAGE_COL_FIX: - /* - * The interface cursor's record has usually been set, but that - * isn't universally true, specifically, cursor.search_near may - * call here without first setting the interface cursor. - */ - cursor->recno = cbt->recno; - - /* If the cursor references a WT_UPDATE item, return it. */ - if (upd != NULL) { - cursor->value.data = WT_UPDATE_DATA(upd); - cursor->value.size = upd->size; - return (0); - } - - /* Take the value from the original page. */ - v = __bit_getv_recno(cbt->ref, cursor->recno, btree->bitcnt); - return (__wt_buf_set(session, &cursor->value, &v, 1)); - case WT_PAGE_COL_VAR: - /* - * The interface cursor's record has usually been set, but that - * isn't universally true, specifically, cursor.search_near may - * call here without first setting the interface cursor. - */ - cursor->recno = cbt->recno; - - /* If the cursor references a WT_UPDATE item, return it. */ - if (upd != NULL) { - cursor->value.data = WT_UPDATE_DATA(upd); - cursor->value.size = upd->size; - return (0); - } - - /* Take the value from the original page cell. */ - cell = WT_COL_PTR(page, &page->pg_var[cbt->slot]); - break; - case WT_PAGE_ROW_LEAF: + if (page->type == WT_PAGE_ROW_LEAF) { rip = &page->pg_row[cbt->slot]; /* @@ -79,7 +36,10 @@ __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) if (cbt->ins != NULL) { cursor->key.data = WT_INSERT_KEY(cbt->ins); cursor->key.size = WT_INSERT_KEY_SIZE(cbt->ins); - } else if (cbt->compare == 0) { + return (0); + } + + if (cbt->compare == 0) { /* * If not in an insert list and there's an exact match, * the row-store search function built the key we want @@ -97,16 +57,51 @@ __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) cursor->key.data = cbt->row_key->data; cursor->key.size = cbt->row_key->size; - } else - WT_RET(__wt_row_leaf_key( - session, page, rip, &cursor->key, false)); - - /* If the cursor references a WT_UPDATE item, return it. */ - if (upd != NULL) { - cursor->value.data = WT_UPDATE_DATA(upd); - cursor->value.size = upd->size; return (0); } + return (__wt_row_leaf_key( + session, page, rip, &cursor->key, false)); + } + + /* + * WT_PAGE_COL_FIX, WT_PAGE_COL_VAR: + * The interface cursor's record has usually been set, but that + * isn't universally true, specifically, cursor.search_near may call + * here without first setting the interface cursor. + */ + cursor->recno = cbt->recno; + return (0); +} + +/* + * __value_return -- + * Change the cursor to reference an internal return value. + */ +static inline int +__value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) +{ + WT_BTREE *btree; + WT_CELL *cell; + WT_CELL_UNPACK unpack; + WT_CURSOR *cursor; + WT_PAGE *page; + WT_ROW *rip; + uint8_t v; + + btree = S2BT(session); + + page = cbt->ref->page; + cursor = &cbt->iface; + + /* If the cursor references a WT_UPDATE item, return it. */ + if (upd != NULL) { + cursor->value.data = WT_UPDATE_DATA(upd); + cursor->value.size = upd->size; + return (0); + } + + if (page->type == WT_PAGE_ROW_LEAF) { + rip = &page->pg_row[cbt->slot]; /* Simple values have their location encoded in the WT_ROW. */ if (__wt_row_leaf_value(page, rip, &cursor->value)) @@ -121,13 +116,65 @@ __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) cursor->value.size = 0; return (0); } - break; - WT_ILLEGAL_VALUE(session); + __wt_cell_unpack(cell, &unpack); + return (__wt_page_cell_data_ref( + session, page, &unpack, &cursor->value)); + } - /* The value is an on-page cell, unpack and expand it as necessary. */ - __wt_cell_unpack(cell, &unpack); - WT_RET(__wt_page_cell_data_ref(session, page, &unpack, &cursor->value)); + if (page->type == WT_PAGE_COL_VAR) { + /* Take the value from the original page cell. */ + cell = WT_COL_PTR(page, &page->pg_var[cbt->slot]); + __wt_cell_unpack(cell, &unpack); + return (__wt_page_cell_data_ref( + session, page, &unpack, &cursor->value)); + } + + /* WT_PAGE_COL_FIX: Take the value from the original page. */ + v = __bit_getv_recno(cbt->ref, cursor->recno, btree->bitcnt); + return (__wt_buf_set(session, &cursor->value, &v, 1)); +} + +/* + * __wt_key_return -- + * Change the cursor to reference an internal return key. + */ +int +__wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) +{ + WT_CURSOR *cursor; + + cursor = &cbt->iface; + + /* + * We may already have an internal key, in which case the cursor may + * not be set up to get another copy (for example, when we rely on a + * search-function result). + */ + F_CLR(cursor, WT_CURSTD_KEY_EXT); + if (!F_ISSET(cursor, WT_CURSTD_KEY_INT)) { + WT_RET(__key_return(session, cbt)); + F_SET(cursor, WT_CURSTD_KEY_INT); + } + return (0); +} + +/* + * __wt_kv_return -- + * Return a page referenced key/value pair to the application. + */ +int +__wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) +{ + WT_CURSOR *cursor; + + cursor = &cbt->iface; + + WT_RET(__wt_key_return(session, cbt)); + + F_CLR(cursor, WT_CURSTD_VALUE_EXT); + WT_RET(__value_return(session, cbt, upd)); + F_SET(cursor, WT_CURSTD_VALUE_INT); return (0); } diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c index fea979cac6e..165f932afb2 100644 --- a/src/third_party/wiredtiger/src/btree/bt_slvg.c +++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c @@ -166,13 +166,11 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) WT_DECL_RET; WT_STUFF *ss, stuff; uint32_t i, leaf_cnt; - bool evict_reset; WT_UNUSED(cfg); btree = S2BT(session); bm = btree->bm; - evict_reset = false; WT_CLEAR(stuff); ss = &stuff; @@ -184,13 +182,6 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) WT_ERR(__wt_scr_alloc(session, 0, &ss->tmp2)); /* - * Salvage handles its own page eviction; get exclusive access to the - * file, have eviction ignore the tree entirely. - */ - WT_ERR(__wt_evict_file_exclusive_on(session)); - evict_reset = true; - - /* * Step 1: * Inform the underlying block manager that we're salvaging the file. */ @@ -350,9 +341,6 @@ err: WT_TRET(bm->salvage_end(bm, session)); if (ss->root_ref.page != NULL) __wt_ref_out(session, &ss->root_ref); - if (evict_reset) - __wt_evict_file_exclusive_off(session); - /* Discard the leaf and overflow page memory. */ WT_TRET(__slvg_cleanup(session, ss)); diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index 6b2100ec7e3..49043c8bab4 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -10,8 +10,8 @@ #define WT_MEM_TRANSFER(from_decr, to_incr, len) do { \ size_t __len = (len); \ - from_decr += __len; \ - to_incr += __len; \ + (from_decr) += __len; \ + (to_incr) += __len; \ } while (0) /* @@ -119,7 +119,7 @@ __wt_split_stash_discard(WT_SESSION_IMPL *session) ++i, ++stash) { if (stash->p == NULL) continue; - else if (stash->split_gen >= oldest) + if (stash->split_gen >= oldest) break; /* * It's a bad thing if another thread is in this memory after @@ -2274,7 +2274,7 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi) * reconciliation, do it now. */ __wt_page_modify_clear(session, page); - __wt_ref_out(session, ref); + __wt_ref_out_int(session, ref, true); /* Swap the new page into place. */ ref->page = new->page; diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c index 7bf15baa67f..ead6ccc4ac0 100644 --- a/src/third_party/wiredtiger/src/btree/bt_sync.c +++ b/src/third_party/wiredtiger/src/btree/bt_sync.c @@ -78,6 +78,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages; uint64_t oldest_id, saved_pinned_id; uint32_t flags; + bool timer; conn = S2C(session); btree = S2BT(session); @@ -88,7 +89,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) internal_bytes = leaf_bytes = 0; internal_pages = leaf_pages = 0; - if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) + timer = WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT); + if (timer) __wt_epoch(session, &start); switch (syncop) { @@ -186,9 +188,9 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) * to grow significantly larger than the configured maximum * size. */ - F_SET(btree, WT_BTREE_NO_RECONCILE); + F_SET(btree, WT_BTREE_ALLOW_SPLITS); ret = __wt_evict_file_exclusive_on(session); - F_CLR(btree, WT_BTREE_NO_RECONCILE); + F_CLR(btree, WT_BTREE_ALLOW_SPLITS); WT_ERR(ret); __wt_evict_file_exclusive_off(session); @@ -242,7 +244,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) break; } - if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) { + if (timer) { __wt_epoch(session, &end); __wt_verbose(session, WT_VERB_CHECKPOINT, "__sync_file WT_SYNC_%s wrote: %" PRIu64 diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c index 05990918215..7475811adc5 100644 --- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c +++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c @@ -216,13 +216,11 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]) ckpt->raw.data, ckpt->raw.size, root_addr, &root_addr_size, true)); - /* - * Ignore trees with no root page. - * Verify, then discard the checkpoint from the cache. - */ - if (root_addr_size != 0 && - (ret = __wt_btree_tree_open( - session, root_addr, root_addr_size)) == 0) { + /* Skip trees with no root page. */ + if (root_addr_size != 0) { + WT_ERR(__wt_btree_tree_open( + session, root_addr, root_addr_size)); + if (WT_VRFY_DUMP(vs)) WT_ERR(__wt_msg(session, "Root: %s %s", __wt_addr_string(session, @@ -230,14 +228,38 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]) __wt_page_type_string( btree->root.page->type))); + __wt_evict_file_exclusive_off(session); + + /* Verify the tree. */ WT_WITH_PAGE_INDEX(session, ret = __verify_tree(session, &btree->root, vs)); + /* + * We have an exclusive lock on the handle, but we're + * swapping root pages in-and-out of that handle, and + * there's a race with eviction entering the tree and + * seeing an invalid root page. Eviction must work on + * trees being verified (else we'd have to do our own + * eviction), lock eviction out whenever we're loading + * a new root page. This loops works because we are + * called with eviction locked out, so we release the + * lock at the top of the loop and re-acquire it here. + */ + WT_TRET(__wt_evict_file_exclusive_on(session)); WT_TRET(__wt_cache_op(session, WT_SYNC_DISCARD)); } /* Unload the checkpoint. */ WT_TRET(bm->checkpoint_unload(bm, session)); + + /* + * We've finished one checkpoint's verification (verification, + * then cache eviction and checkpoint unload): if any errors + * occurred, quit. Done this way because otherwise we'd need + * at least two more state variables on error, one to know if + * we need to discard the tree from the cache and one to know + * if we need to unload the checkpoint. + */ WT_ERR(ret); /* Display the tree shape. */ @@ -252,7 +274,7 @@ err: /* Inform the underlying block manager we're done. */ /* Discard the list of checkpoints. */ if (ckptbase != NULL) - __wt_meta_ckptlist_free(session, ckptbase); + __wt_meta_ckptlist_free(session, &ckptbase); /* Free allocated memory. */ __wt_scr_free(session, &vs->max_key); diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c index 3a6fd8261ba..a4071c44aee 100644 --- a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c +++ b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c @@ -203,7 +203,8 @@ __verify_dsk_row( WT_ITEM *last; enum { FIRST, WAS_KEY, WAS_VALUE } last_cell_type; void *huffman; - uint32_t cell_num, cell_type, i, key_cnt, prefix; + size_t prefix; + uint32_t cell_num, cell_type, i, key_cnt; uint8_t *end; int cmp; @@ -343,8 +344,9 @@ __verify_dsk_row( if (cell_num > 1 && prefix > last->size) WT_ERR_VRFY(session, "key %" PRIu32 " on page at %s has a prefix " - "compression count of %" PRIu32 ", larger than " - "the length of the previous key, %" WT_SIZET_FMT, + "compression count of %" WT_SIZET_FMT + ", larger than the length of the previous key, %" + WT_SIZET_FMT, cell_num, tag, prefix, last->size); /* diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c index ddaa2e5f70b..86484feb7c9 100644 --- a/src/third_party/wiredtiger/src/btree/bt_walk.c +++ b/src/third_party/wiredtiger/src/btree/bt_walk.c @@ -348,16 +348,19 @@ __tree_walk_internal(WT_SESSION_IMPL *session, /* If no page is active, begin a walk from the start/end of the tree. */ if (ref == NULL) { restart: /* - * We can reach here with a NULL or root reference; the release + * We can be here with a NULL or root WT_REF; the page release * function handles them internally, don't complicate this code * by calling them out. */ WT_ERR(__wt_page_release(session, couple, flags)); - couple = couple_orig = ref = &btree->root; - if (ref->page == NULL) - goto done; + /* + * We're not supposed to walk trees without root pages. As this + * has not always been the case, assert to debug that change. + */ + WT_ASSERT(session, btree->root.page != NULL); + couple = couple_orig = ref = &btree->root; initial_descent = true; goto descend; } diff --git a/src/third_party/wiredtiger/src/config/config_api.c b/src/third_party/wiredtiger/src/config/config_api.c index 05c5c1287a7..c1299baaafe 100644 --- a/src/third_party/wiredtiger/src/config/config_api.c +++ b/src/third_party/wiredtiger/src/config/config_api.c @@ -215,7 +215,7 @@ __wt_configure_method(WT_SESSION_IMPL *session, WT_CONFIG_ENTRY *entry; WT_CONNECTION_IMPL *conn; WT_DECL_RET; - size_t cnt; + size_t cnt, len; char *newcheck_name, *p; /* @@ -276,12 +276,10 @@ __wt_configure_method(WT_SESSION_IMPL *session, */ WT_ERR(__wt_calloc_one(session, &entry)); entry->method = (*epp)->method; - WT_ERR(__wt_calloc_def(session, - strlen((*epp)->base) + strlen(",") + strlen(config) + 1, &p)); - (void)strcpy(p, (*epp)->base); - (void)strcat(p, ","); - (void)strcat(p, config); + len = strlen((*epp)->base) + strlen(",") + strlen(config) + 1; + WT_ERR(__wt_calloc_def(session, len, &p)); entry->base = p; + WT_ERR(__wt_snprintf(p, len, "%s,%s", (*epp)->base, config)); /* * There may be a default value in the config argument passed in (for diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index b11a8d63fdb..f152fbacad4 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -294,7 +294,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_create[] = { { "source", "string", NULL, NULL, NULL, 0 }, { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, - { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, + { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 }, { "type", "string", NULL, NULL, NULL, 0 }, { "value_format", "format", __wt_struct_confchk, NULL, @@ -466,7 +466,7 @@ static const WT_CONFIG_CHECK confchk_file_config[] = { { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 }, { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, - { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, + { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 }, { "value_format", "format", __wt_struct_confchk, NULL, NULL, 0 }, @@ -530,7 +530,7 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = { { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 }, { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, - { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, + { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 }, { "value_format", "format", __wt_struct_confchk, NULL, NULL, 0 }, @@ -614,7 +614,7 @@ static const WT_CONFIG_CHECK confchk_lsm_meta[] = { { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 }, { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, - { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, + { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 }, { "value_format", "format", __wt_struct_confchk, NULL, NULL, 0 }, @@ -1119,7 +1119,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "chunk_size=10MB,merge_max=15,merge_min=0),memory_page_max=5MB," "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," "prefix_compression_min=4,source=,split_deepen_min_child=0," - "split_deepen_per_child=0,split_pct=75,type=file,value_format=u", + "split_deepen_per_child=0,split_pct=90,type=file,value_format=u", confchk_WT_SESSION_create, 42 }, { "WT_SESSION.drop", @@ -1213,7 +1213,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "leaf_value_max=0,log=(enabled=true),memory_page_max=5MB," "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," "prefix_compression_min=4,split_deepen_min_child=0," - "split_deepen_per_child=0,split_pct=75,value_format=u", + "split_deepen_per_child=0,split_pct=90,value_format=u", confchk_file_config, 35 }, { "file.meta", @@ -1228,7 +1228,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=0," "log=(enabled=true),memory_page_max=5MB,os_cache_dirty_max=0," "os_cache_max=0,prefix_compression=false,prefix_compression_min=4" - ",split_deepen_min_child=0,split_deepen_per_child=0,split_pct=75," + ",split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90," "value_format=u,version=(major=0,minor=0)", confchk_file_meta, 39 }, @@ -1253,7 +1253,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "merge_min=0),memory_page_max=5MB,old_chunks=," "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," "prefix_compression_min=4,split_deepen_min_child=0," - "split_deepen_per_child=0,split_pct=75,value_format=u", + "split_deepen_per_child=0,split_pct=90,value_format=u", confchk_lsm_meta, 39 }, { "table.meta", diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index 124250a7a7d..68d45678965 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -1662,8 +1662,8 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR_MSG(session, EINVAL, "Creating a new database is incompatible with " "read-only configuration"); - len = (size_t)snprintf(buf, sizeof(buf), - "%s\n%s\n", WT_WIREDTIGER, WIREDTIGER_VERSION_STRING); + WT_ERR(__wt_snprintf_len_set(buf, sizeof(buf), &len, + "%s\n%s\n", WT_WIREDTIGER, WIREDTIGER_VERSION_STRING)); WT_ERR(__wt_write(session, fh, (wt_off_t)0, len, buf)); WT_ERR(__wt_fsync(session, fh, true)); } else { @@ -2250,10 +2250,9 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, WT_ERR(__wt_scr_alloc(session, 0, &i3)); cfg[0] = WT_CONFIG_BASE(session, wiredtiger_open_all); cfg[1] = NULL; - WT_ERR_TEST(snprintf(version, sizeof(version), + WT_ERR(__wt_snprintf(version, sizeof(version), "version=(major=%d,minor=%d)", - WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR) >= - (int)sizeof(version), ENOMEM); + WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR)); __conn_config_append(cfg, version); /* Ignore the base_config file if config_base_set is false. */ diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c index 99213c5b557..657cdebf7ee 100644 --- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c +++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c @@ -12,16 +12,20 @@ * __conn_dhandle_destroy -- * Destroy a data handle. */ -static void +static int __conn_dhandle_destroy(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle) { + WT_DECL_RET; + + WT_WITH_DHANDLE(session, dhandle, ret = __wt_btree_discard(session)); + __wt_rwlock_destroy(session, &dhandle->rwlock); __wt_free(session, dhandle->name); __wt_free(session, dhandle->checkpoint); - __wt_free(session, dhandle->handle); __wt_spin_destroy(session, &dhandle->close_lock); __wt_stat_dsrc_discard(session, dhandle); __wt_overwrite_and_free(session, dhandle); + return (ret); } /* @@ -84,7 +88,7 @@ __wt_conn_dhandle_alloc( session->dhandle = dhandle; return (0); -err: __conn_dhandle_destroy(session, dhandle); +err: WT_TRET(__conn_dhandle_destroy(session, dhandle)); return (ret); } @@ -156,11 +160,11 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force) WT_RET(__wt_evict_file_exclusive_on(session)); /* - * If we don't already have the schema lock, make it an error to try - * to acquire it. The problem is that we are holding an exclusive - * lock on the handle, and if we attempt to acquire the schema lock - * we might deadlock with a thread that has the schema lock and wants - * a handle lock (specifically, checkpoint). + * If we don't already have the schema lock, make it an error to try to + * acquire it. The problem is that we are holding an exclusive lock on + * the handle, and if we attempt to acquire the schema lock we might + * deadlock with a thread that has the schema lock and wants a handle + * lock. */ no_schema_lock = false; if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) { @@ -200,6 +204,7 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force) } WT_TRET(__wt_btree_close(session)); + F_CLR(btree, WT_BTREE_SPECIAL_FLAGS); /* * If we marked a handle dead it will be closed by sweep, via @@ -309,7 +314,8 @@ __wt_conn_btree_open( F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) && !LF_ISSET(WT_DHANDLE_LOCK_ONLY)); - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_CLOSING)); + WT_ASSERT(session, + !F_ISSET(S2C(session), WT_CONN_CLOSING_NO_MORE_OPENS)); /* * If the handle is already open, it has to be closed so it can be @@ -403,10 +409,7 @@ __conn_btree_apply_internal(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle, return (ret == EBUSY ? 0 : ret); WT_SAVE_DHANDLE(session, ret = file_func(session, cfg)); - if (WT_META_TRACKING(session)) - WT_TRET(__wt_meta_track_handle_lock(session, false)); - else - WT_TRET(__wt_session_release_btree(session)); + WT_TRET(__wt_session_release_btree(session)); return (ret); } @@ -500,7 +503,12 @@ __wt_conn_dhandle_close_all( session->dhandle = dhandle; - /* Lock the handle exclusively. */ + /* + * Lock the handle exclusively. If this is part of + * schema-changing operation (indicated by metadata tracking + * being enabled), hold the lock for the duration of the + * operation. + */ WT_ERR(__wt_session_get_btree(session, dhandle->name, dhandle->checkpoint, NULL, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY)); @@ -611,7 +619,7 @@ __wt_conn_dhandle_discard_single( */ if (ret == 0 || final) { __conn_btree_config_clear(session); - __conn_dhandle_destroy(session, dhandle); + WT_TRET(__conn_dhandle_destroy(session, dhandle)); session->dhandle = NULL; } diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c index c6dd795389d..b8b5bd2a908 100644 --- a/src/third_party/wiredtiger/src/conn/conn_log.c +++ b/src/third_party/wiredtiger/src/conn/conn_log.c @@ -341,7 +341,7 @@ __wt_log_truncate_files( conn = S2C(session); if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) return (0); - if (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN) && + if (F_ISSET(conn, WT_CONN_SERVER_LOG) && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE)) WT_RET_MSG(session, EINVAL, "Attempt to archive manually while a server is running"); @@ -382,7 +382,7 @@ __log_file_server(void *arg) conn = S2C(session); log = conn->log; locked = false; - while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) { + while (F_ISSET(conn, WT_CONN_SERVER_LOG)) { /* * If there is a log file to close, make sure any outstanding * write operations have completed, then fsync and close it. @@ -708,7 +708,7 @@ __log_wrlsn_server(void *arg) log = conn->log; yield = 0; WT_INIT_LSN(&prev); - while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) { + while (F_ISSET(conn, WT_CONN_SERVER_LOG)) { /* * Write out any log record buffers if anything was done * since last time. Only call the function to walk the @@ -783,7 +783,7 @@ __log_server(void *arg) * takes to sync out an earlier file. */ did_work = true; - while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) { + while (F_ISSET(conn, WT_CONN_SERVER_LOG)) { /* * Slots depend on future activity. Force out buffered * writes in case we are idle. This cannot be part of the @@ -923,7 +923,7 @@ __wt_logmgr_open(WT_SESSION_IMPL *session) if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) return (0); - F_SET(conn, WT_CONN_LOG_SERVER_RUN); + F_SET(conn, WT_CONN_SERVER_LOG); /* * Start the log close thread. It is not configurable. @@ -995,7 +995,7 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session) conn = S2C(session); - F_CLR(conn, WT_CONN_LOG_SERVER_RUN); + F_CLR(conn, WT_CONN_SERVER_LOG); if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) { /* diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c index 5b20377d437..eb3c79422a0 100644 --- a/src/third_party/wiredtiger/src/conn/conn_open.c +++ b/src/third_party/wiredtiger/src/conn/conn_open.c @@ -21,12 +21,6 @@ __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]) session = conn->default_session; WT_ASSERT(session, session->iface.connection == &conn->iface); - /* - * Tell internal server threads to run: this must be set before opening - * any sessions. - */ - F_SET(conn, WT_CONN_SERVER_RUN); - /* WT_SESSION_IMPL array. */ WT_RET(__wt_calloc(session, conn->session_size, sizeof(WT_SESSION_IMPL), &conn->sessions)); @@ -100,6 +94,10 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) __wt_yield(); } + /* Shut down the subsystems, ensuring workers see the state change. */ + F_SET(conn, WT_CONN_CLOSING); + WT_FULL_BARRIER(); + /* * Clear any pending async operations and shut down the async worker * threads and system before closing LSM. @@ -113,10 +111,15 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) * btree handles, so take care in ordering shutdown to make sure they * exit before files are closed. */ - F_CLR(conn, WT_CONN_SERVER_RUN); WT_TRET(__wt_lsm_manager_destroy(session)); - F_SET(conn, WT_CONN_CLOSING); + /* + * Once the async and LSM threads exit, we shouldn't be opening any + * more files. + */ + F_SET(conn, WT_CONN_CLOSING_NO_MORE_OPENS); + WT_FULL_BARRIER(); + WT_TRET(__wt_checkpoint_server_destroy(session)); WT_TRET(__wt_statlog_destroy(session, true)); WT_TRET(__wt_sweep_destroy(session)); diff --git a/src/third_party/wiredtiger/src/conn/conn_sweep.c b/src/third_party/wiredtiger/src/conn/conn_sweep.c index 8c186c63939..22d90b08438 100644 --- a/src/third_party/wiredtiger/src/conn/conn_sweep.c +++ b/src/third_party/wiredtiger/src/conn/conn_sweep.c @@ -10,7 +10,7 @@ #define WT_DHANDLE_CAN_DISCARD(dhandle) \ (!F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_OPEN) && \ - dhandle->session_inuse == 0 && dhandle->session_ref == 0) + (dhandle)->session_inuse == 0 && (dhandle)->session_ref == 0) /* * __sweep_mark -- diff --git a/src/third_party/wiredtiger/src/cursor/cur_file.c b/src/third_party/wiredtiger/src/cursor/cur_file.c index 0ec917fbf95..205afb607c3 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_file.c +++ b/src/third_party/wiredtiger/src/cursor/cur_file.c @@ -9,29 +9,6 @@ #include "wt_internal.h" /* - * WT_BTREE_CURSOR_SAVE_AND_RESTORE - * Save the cursor's key/value data/size fields, call an underlying btree - * function, and then consistently handle failure and success. - */ -#define WT_BTREE_CURSOR_SAVE_AND_RESTORE(cursor, f, ret) do { \ - WT_ITEM __key_copy = (cursor)->key; \ - uint64_t __recno = (cursor)->recno; \ - WT_ITEM __value_copy = (cursor)->value; \ - if (((ret) = (f)) == 0) { \ - F_CLR(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT); \ - F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); \ - } else { \ - if (F_ISSET(cursor, WT_CURSTD_KEY_EXT)) { \ - (cursor)->recno = __recno; \ - WT_ITEM_SET((cursor)->key, __key_copy); \ - } \ - if (F_ISSET(cursor, WT_CURSTD_VALUE_EXT)) \ - WT_ITEM_SET((cursor)->value, __value_copy); \ - F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); \ - } \ -} while (0) - -/* * __curfile_compare -- * WT_CURSOR->compare method for the btree cursor type. */ @@ -109,9 +86,12 @@ __curfile_next(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_API_CALL(cursor, session, next, cbt->btree); - F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); - if ((ret = __wt_btcur_next(cbt, false)) == 0) - F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); + WT_ERR(__wt_btcur_next(cbt, false)); + + /* Next maintains a position, key and value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); err: API_END_RET(session, ret); } @@ -131,9 +111,12 @@ __wt_curfile_next_random(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_API_CALL(cursor, session, next, cbt->btree); - F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); - if ((ret = __wt_btcur_next_random(cbt)) == 0) - F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); + WT_ERR(__wt_btcur_next_random(cbt)); + + /* Next-random maintains a position, key and value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); err: API_END_RET(session, ret); } @@ -152,9 +135,12 @@ __curfile_prev(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_API_CALL(cursor, session, prev, cbt->btree); - F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); - if ((ret = __wt_btcur_prev(cbt, false)) == 0) - F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); + WT_ERR(__wt_btcur_prev(cbt, false)); + + /* Prev maintains a position, key and value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); err: API_END_RET(session, ret); } @@ -175,7 +161,10 @@ __curfile_reset(WT_CURSOR *cursor) ret = __wt_btcur_reset(cbt); - F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + /* Reset maintains no position, key or value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == 0 && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == 0); err: API_END_RET(session, ret); } @@ -194,10 +183,15 @@ __curfile_search(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_API_CALL(cursor, session, search, cbt->btree); - WT_CURSOR_NEEDKEY(cursor); + WT_CURSOR_CHECKKEY(cursor); WT_CURSOR_NOVALUE(cursor); - WT_BTREE_CURSOR_SAVE_AND_RESTORE(cursor, __wt_btcur_search(cbt), ret); + WT_ERR(__wt_btcur_search(cbt)); + + /* Search maintains a position, key and value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); err: API_END_RET(session, ret); } @@ -216,11 +210,15 @@ __curfile_search_near(WT_CURSOR *cursor, int *exact) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_API_CALL(cursor, session, search_near, cbt->btree); - WT_CURSOR_NEEDKEY(cursor); + WT_CURSOR_CHECKKEY(cursor); WT_CURSOR_NOVALUE(cursor); - WT_BTREE_CURSOR_SAVE_AND_RESTORE( - cursor, __wt_btcur_search_near(cbt, exact), ret); + WT_ERR(__wt_btcur_search_near(cbt, exact)); + + /* Search-near maintains a position, key and value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); err: API_END_RET(session, ret); } @@ -238,38 +236,33 @@ __curfile_insert(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_UPDATE_API_CALL(cursor, session, insert, cbt->btree); + if (!F_ISSET(cursor, WT_CURSTD_APPEND)) - WT_CURSOR_NEEDKEY(cursor); - WT_CURSOR_NEEDVALUE(cursor); + WT_CURSOR_CHECKKEY(cursor); + WT_CURSOR_CHECKVALUE(cursor); - WT_BTREE_CURSOR_SAVE_AND_RESTORE(cursor, __wt_btcur_insert(cbt), ret); + WT_ERR(__wt_btcur_insert(cbt)); /* - * Insert is the one cursor operation that doesn't end with the cursor - * pointing to an on-page item (except for column-store appends, where - * we are returning a key). That is, the application's cursor continues - * to reference the application's memory after a successful cursor call, - * which isn't true anywhere else. We don't want to have to explain that - * scoping corner case, so we reset the application's cursor so it can - * free the referenced memory and continue on without risking subsequent - * core dumps. + * Insert maintains no position, key or value (except for column-store + * appends, where we are returning a key). */ - if (ret == 0) { - if (!F_ISSET(cursor, WT_CURSTD_APPEND)) - F_CLR(cursor, WT_CURSTD_KEY_INT); - F_CLR(cursor, WT_CURSTD_VALUE_INT); - } + WT_ASSERT(session, + (F_ISSET(cursor, WT_CURSTD_APPEND) && + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT) || + (!F_ISSET(cursor, WT_CURSTD_APPEND) && + F_MASK(cursor, WT_CURSTD_KEY_SET) == 0)); err: CURSOR_UPDATE_API_END(session, ret); return (ret); } /* - * __curfile_update -- - * WT_CURSOR->update method for the btree cursor type. + * __wt_curfile_insert_check -- + * WT_CURSOR->insert_check method for the btree cursor type. */ -static int -__curfile_update(WT_CURSOR *cursor) +int +__wt_curfile_insert_check(WT_CURSOR *cursor) { WT_CURSOR_BTREE *cbt; WT_DECL_RET; @@ -278,21 +271,21 @@ __curfile_update(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_UPDATE_API_CALL(cursor, session, update, cbt->btree); - WT_CURSOR_NEEDKEY(cursor); - WT_CURSOR_NEEDVALUE(cursor); + WT_CURSOR_CHECKKEY(cursor); + WT_CURSOR_NOVALUE(cursor); - WT_BTREE_CURSOR_SAVE_AND_RESTORE(cursor, __wt_btcur_update(cbt), ret); + ret = __wt_btcur_insert_check(cbt); err: CURSOR_UPDATE_API_END(session, ret); return (ret); } /* - * __wt_curfile_update_check -- - * WT_CURSOR->update_check method for the btree cursor type. + * __curfile_update -- + * WT_CURSOR->update method for the btree cursor type. */ -int -__wt_curfile_update_check(WT_CURSOR *cursor) +static int +__curfile_update(WT_CURSOR *cursor) { WT_CURSOR_BTREE *cbt; WT_DECL_RET; @@ -301,11 +294,15 @@ __wt_curfile_update_check(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_UPDATE_API_CALL(cursor, session, update, cbt->btree); - WT_CURSOR_NEEDKEY(cursor); - WT_CURSOR_NOVALUE(cursor); + WT_CURSOR_CHECKKEY(cursor); + WT_CURSOR_CHECKVALUE(cursor); - WT_BTREE_CURSOR_SAVE_AND_RESTORE( - cursor, __wt_btcur_update_check(cbt), ret); + WT_ERR(__wt_btcur_update(cbt)); + + /* Update maintains a position, key and value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); err: CURSOR_UPDATE_API_END(session, ret); return (ret); @@ -325,24 +322,21 @@ __curfile_remove(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_REMOVE_API_CALL(cursor, session, cbt->btree); - WT_CURSOR_NEEDKEY(cursor); + WT_CURSOR_CHECKKEY(cursor); WT_CURSOR_NOVALUE(cursor); - WT_BTREE_CURSOR_SAVE_AND_RESTORE(cursor, __wt_btcur_remove(cbt), ret); + WT_ERR(__wt_btcur_remove(cbt)); /* - * After a successful remove, copy the key: the value is not available. + * Remove with a search-key is fire-and-forget, no position and no key. + * Remove starting from a position maintains the position and a key. + * We don't know which it was at this layer, so can only assert the key + * is not set at all, or internal. There's never a value. */ - if (ret == 0) { - if (F_ISSET(cursor, WT_CURSTD_KEY_INT) && - !WT_DATA_IN_ITEM(&(cursor)->key)) { - WT_ERR(__wt_buf_set(session, &cursor->key, - cursor->key.data, cursor->key.size)); - F_CLR(cursor, WT_CURSTD_KEY_INT); - F_SET(cursor, WT_CURSTD_KEY_EXT); - } - F_CLR(cursor, WT_CURSTD_VALUE_SET); - } + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == 0 || + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); + WT_ASSERT(session, F_MASK(cursor, WT_CURSTD_VALUE_SET) == 0); err: CURSOR_UPDATE_API_END(session, ret); return (ret); diff --git a/src/third_party/wiredtiger/src/cursor/cur_join.c b/src/third_party/wiredtiger/src/cursor/cur_join.c index 013a64ef2d5..80afaf798dc 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_join.c +++ b/src/third_party/wiredtiger/src/cursor/cur_join.c @@ -185,7 +185,7 @@ __curjoin_iter_set_entry(WT_CURSOR_JOIN_ITER *iter, u_int entry_pos) size = strlen(to_dup->internal_uri) + 3; WT_ERR(__wt_calloc(session, size, 1, &uri)); - snprintf(uri, size, "%s()", to_dup->internal_uri); + WT_ERR(__wt_snprintf(uri, size, "%s()", to_dup->internal_uri)); if ((c = iter->cursor) == NULL || !WT_STREQ(c->uri, uri)) { iter->cursor = NULL; if (c != NULL) @@ -270,7 +270,7 @@ again: iter->positioned = true; return (ret); } - else if (ret == WT_NOTFOUND) { + if (ret == WT_NOTFOUND) { WT_RET(__curjoin_iter_close_all(iter->child)); entry->subjoin->iter = NULL; iter->child = NULL; @@ -518,8 +518,7 @@ __curjoin_entry_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, } if (disjunction && end == endmax) return (WT_NOTFOUND); - else - return (0); + return (0); } typedef struct { @@ -930,7 +929,7 @@ __curjoin_init_next(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, if ((proj = cjoin->projection) != NULL) { size = strlen(urimain) + strlen(proj) + 1; WT_ERR(__wt_calloc(session, size, 1, &mainbuf)); - snprintf(mainbuf, size, "%s%s", urimain, proj); + WT_ERR(__wt_snprintf(mainbuf, size, "%s%s", urimain, proj)); urimain = mainbuf; } WT_ERR(__wt_open_cursor(session, urimain, (WT_CURSOR *)cjoin, config, @@ -975,8 +974,8 @@ __curjoin_init_next(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, if (!iterable && F_ISSET(je, WT_CURJOIN_ENTRY_BLOOM)) { if (session->txn.isolation == WT_ISO_READ_UNCOMMITTED) WT_ERR_MSG(session, EINVAL, - "join cursors with Bloom filters cannot be " - "used with read-uncommitted isolation"); + "join cursors with Bloom filters cannot be " + "used with read-uncommitted isolation"); if (je->bloom == NULL) { /* * Look for compatible filters to be shared, @@ -1149,8 +1148,8 @@ __curjoin_open_main(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, newsize = strlen(cjoin->table->name) + idx->colconf.len + 1; WT_ERR(__wt_calloc(session, 1, newsize, &main_uri)); - snprintf(main_uri, newsize, "%s%.*s", - cjoin->table->name, (int)idx->colconf.len, idx->colconf.str); + WT_ERR(__wt_snprintf(main_uri, newsize, "%s%.*s", + cjoin->table->name, (int)idx->colconf.len, idx->colconf.str)); WT_ERR(__wt_open_cursor(session, main_uri, (WT_CURSOR *)cjoin, raw_cfg, &entry->main)); if (idx->extractor == NULL) { @@ -1163,7 +1162,8 @@ __curjoin_open_main(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, */ len = strlen(entry->main->value_format) + 3; WT_ERR(__wt_calloc(session, len, 1, &newformat)); - snprintf(newformat, len, "%s0x", entry->main->value_format); + WT_ERR(__wt_snprintf( + newformat, len, "%s0x", entry->main->value_format)); __wt_free(session, entry->main->value_format); entry->main->value_format = newformat; } @@ -1532,8 +1532,8 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, len = strlen(cindex->iface.key_format) + 3; WT_RET(__wt_calloc(session, len, 1, &entry->repack_format)); - snprintf(entry->repack_format, len, "%s0x", - cindex->iface.key_format); + WT_RET(__wt_snprintf(entry->repack_format, + len, "%s0x", cindex->iface.key_format)); } } return (0); diff --git a/src/third_party/wiredtiger/src/cursor/cur_json.c b/src/third_party/wiredtiger/src/cursor/cur_json.c index 5870d14273e..e8ddb767863 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_json.c +++ b/src/third_party/wiredtiger/src/cursor/cur_json.c @@ -8,8 +8,8 @@ #include "wt_internal.h" -static size_t __json_unpack_put(WT_SESSION_IMPL *, void *, u_char *, size_t, - WT_CONFIG_ITEM *); +static int __json_unpack_put( + WT_SESSION_IMPL *, void *, u_char *, size_t, WT_CONFIG_ITEM *, size_t *); static inline int __json_struct_size(WT_SESSION_IMPL *, const void *, size_t, const char *, WT_CONFIG_ITEM *, bool, size_t *); static inline int __json_struct_unpackv(WT_SESSION_IMPL *, const void *, size_t, @@ -23,20 +23,20 @@ static int __json_pack_size(WT_SESSION_IMPL *, const char *, WT_CONFIG_ITEM *, bool, const char *, size_t *); #define WT_PACK_JSON_GET(session, pv, jstr) do { \ - switch (pv.type) { \ + switch ((pv).type) { \ case 'x': \ break; \ case 's': \ case 'S': \ - WT_RET(json_string_arg(session, &jstr, &pv.u.item)); \ - pv.type = pv.type == 's' ? 'j' : 'J'; \ + WT_RET(json_string_arg(session, &(jstr), &(pv).u.item));\ + (pv).type = (pv).type == 's' ? 'j' : 'J'; \ break; \ case 'b': \ case 'h': \ case 'i': \ case 'l': \ case 'q': \ - WT_RET(json_int_arg(session, &jstr, &pv.u.i)); \ + WT_RET(json_int_arg(session, &(jstr), &(pv).u.i)); \ break; \ case 'B': \ case 'H': \ @@ -46,11 +46,11 @@ static int __json_pack_size(WT_SESSION_IMPL *, const char *, WT_CONFIG_ITEM *, case 'r': \ case 'R': \ case 't': \ - WT_RET(json_uint_arg(session, &jstr, &pv.u.u)); \ + WT_RET(json_uint_arg(session, &(jstr), &(pv).u.u)); \ break; \ case 'u': \ - WT_RET(json_string_arg(session, &jstr, &pv.u.item)); \ - pv.type = 'K'; \ + WT_RET(json_string_arg(session, &(jstr), &(pv).u.item));\ + (pv).type = 'K'; \ break; \ /* User format strings have already been validated. */ \ WT_ILLEGAL_VALUE(session); \ @@ -61,22 +61,22 @@ static int __json_pack_size(WT_SESSION_IMPL *, const char *, WT_CONFIG_ITEM *, * __json_unpack_put -- * Calculate the size of a packed byte string as formatted for JSON. */ -static size_t +static int __json_unpack_put(WT_SESSION_IMPL *session, void *voidpv, - u_char *buf, size_t bufsz, WT_CONFIG_ITEM *name) + u_char *buf, size_t bufsz, WT_CONFIG_ITEM *name, size_t *retsizep) { WT_PACK_VALUE *pv; const u_char *p, *end; size_t s, n; pv = (WT_PACK_VALUE *)voidpv; - s = (size_t)snprintf((char *)buf, bufsz, "\"%.*s\" : ", - (int)name->len, name->str); + + WT_RET(__wt_snprintf_len_set( + (char *)buf, bufsz, &s, "\"%.*s\" : ", (int)name->len, name->str)); if (s <= bufsz) { bufsz -= s; buf += s; - } - else + } else bufsz = 0; switch (pv->type) { @@ -118,7 +118,8 @@ __json_unpack_put(WT_SESSION_IMPL *session, void *voidpv, } if (bufsz > 0) *buf++ = '"'; - return (s); + *retsizep += s; + return (0); case 'U': case 'u': s += 2; @@ -140,14 +141,17 @@ __json_unpack_put(WT_SESSION_IMPL *session, void *voidpv, } if (bufsz > 0) *buf++ = '"'; - return (s); + *retsizep += s; + return (0); case 'b': case 'h': case 'i': case 'l': case 'q': - return (s + - (size_t)snprintf((char *)buf, bufsz, "%" PRId64, pv->u.i)); + WT_RET(__wt_snprintf_len_incr( + (char *)buf, bufsz, &s, "%" PRId64, pv->u.i)); + *retsizep += s; + return (0); case 'B': case 't': case 'H': @@ -156,11 +160,14 @@ __json_unpack_put(WT_SESSION_IMPL *session, void *voidpv, case 'Q': case 'r': case 'R': - return (s + - (size_t)snprintf((char *)buf, bufsz, "%" PRId64, pv->u.u)); + WT_RET(__wt_snprintf_len_incr( + (char *)buf, bufsz, &s, "%" PRId64, pv->u.u)); + *retsizep += s; + return (0); } - __wt_err(session, EINVAL, "unknown pack-value type: %c", (int)pv->type); - return ((size_t)-1); + + WT_RET_MSG(session, EINVAL, + "unknown pack-value type: %c", (int)pv->type); } /* @@ -194,7 +201,8 @@ __json_struct_size(WT_SESSION_IMPL *session, const void *buffer, needcr = true; WT_RET(__unpack_read(session, &pv, &p, (size_t)(end - p))); WT_RET(__pack_name_next(&packname, &name)); - result += __json_unpack_put(session, &pv, NULL, 0, &name); + WT_RET( + __json_unpack_put(session, &pv, NULL, 0, &name, &result)); } if (ret == WT_NOTFOUND) ret = 0; @@ -243,8 +251,9 @@ __json_struct_unpackv(WT_SESSION_IMPL *session, needcr = true; WT_RET(__unpack_read(session, &pv, &p, (size_t)(end - p))); WT_RET(__pack_name_next(&packname, &name)); - jsize = __json_unpack_put(session, - (u_char *)&pv, jbuf, jbufsize, &name); + jsize = 0; + WT_RET(__json_unpack_put(session, + (u_char *)&pv, jbuf, jbufsize, &name, &jsize)); WT_ASSERT(session, jsize <= jbufsize); jbuf += jsize; jbufsize -= jsize; @@ -304,7 +313,6 @@ __wt_json_close(WT_SESSION_IMPL *session, WT_CURSOR *cursor) __wt_free(session, json->value_buf); __wt_free(session, json); } - return; } /* @@ -323,33 +331,32 @@ __wt_json_unpack_char(u_char ch, u_char *buf, size_t bufsz, bool force_unicode) if (bufsz >= 1) *buf = ch; return (1); - } else { - abbrev = '\0'; - switch (ch) { - case '\\': - case '"': - abbrev = ch; - break; - case '\f': - abbrev = 'f'; - break; - case '\n': - abbrev = 'n'; - break; - case '\r': - abbrev = 'r'; - break; - case '\t': - abbrev = 't'; - break; - } - if (abbrev != '\0') { - if (bufsz >= 2) { - *buf++ = '\\'; - *buf = abbrev; - } - return (2); + } + abbrev = '\0'; + switch (ch) { + case '\\': + case '"': + abbrev = ch; + break; + case '\f': + abbrev = 'f'; + break; + case '\n': + abbrev = 'n'; + break; + case '\r': + abbrev = 'r'; + break; + case '\t': + abbrev = 't'; + break; + } + if (abbrev != '\0') { + if (bufsz >= 2) { + *buf++ = '\\'; + *buf = abbrev; } + return (2); } } if (bufsz >= 6) { @@ -421,16 +428,16 @@ __wt_json_column_init(WT_CURSOR *cursor, const char *uri, const char *keyformat, #define MATCH_KEYWORD(session, in, result, keyword, matchval) do { \ size_t _kwlen = strlen(keyword); \ if (strncmp(in, keyword, _kwlen) == 0 && \ - !__wt_isalnum((u_char)in[_kwlen])) { \ - in += _kwlen; \ - result = matchval; \ + !__wt_isalnum((u_char)(in)[_kwlen])) { \ + (in) += _kwlen; \ + (result) = matchval; \ } else { \ - const char *_bad = in; \ - while (__wt_isalnum((u_char)*in)) \ - in++; \ + const char *_bad = (in); \ + while (__wt_isalnum((u_char)*(in))) \ + (in)++; \ WT_RET_MSG(session, EINVAL, \ "unknown keyword \"%.*s\" in JSON", \ - (int)(in - _bad), _bad); \ + (int)((in) - _bad), _bad); \ } \ } while (0) @@ -692,12 +699,13 @@ json_uint_arg(WT_SESSION_IMPL *session, const char **jstr, uint64_t *up) #define JSON_EXPECT_TOKEN_GET(session, jstr, tokval, start, sz) do { \ int __tok; \ - WT_RET(__wt_json_token((WT_SESSION *)session, jstr, &__tok, &start, &sz));\ - if (__tok != tokval) \ + WT_RET(__wt_json_token( \ + (WT_SESSION *)(session), jstr, &__tok, &(start), &(sz))); \ + if (__tok != (tokval)) \ WT_RET_MSG(session, EINVAL, \ "expected JSON %s, got %s", \ __wt_json_tokname(tokval), __wt_json_tokname(__tok)); \ - jstr = start + sz; \ + (jstr) = (start) + (sz); \ } while (0) #define JSON_EXPECT_TOKEN(session, jstr, tokval) do { \ diff --git a/src/third_party/wiredtiger/src/cursor/cur_metadata.c b/src/third_party/wiredtiger/src/cursor/cur_metadata.c index 10e2fdf28be..fbfc73956e2 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_metadata.c +++ b/src/third_party/wiredtiger/src/cursor/cur_metadata.c @@ -16,7 +16,7 @@ WT_CURSOR_NEEDKEY(cursor); \ WT_ERR(__wt_buf_set(session, \ &((WT_CURSOR_METADATA *)(cursor))->file_cursor->key, \ - cursor->key.data, cursor->key.size)); \ + (cursor)->key.data, (cursor)->key.size)); \ F_SET(((WT_CURSOR_METADATA *)(cursor))->file_cursor, \ WT_CURSTD_KEY_EXT); \ } while (0) @@ -25,7 +25,7 @@ WT_CURSOR_NEEDVALUE(cursor); \ WT_ERR(__wt_buf_set(session, \ &((WT_CURSOR_METADATA *)(cursor))->file_cursor->value, \ - cursor->value.data, cursor->value.size)); \ + (cursor)->value.data, (cursor)->value.size)); \ F_SET(((WT_CURSOR_METADATA *)(cursor))->file_cursor, \ WT_CURSTD_VALUE_EXT); \ } while (0) diff --git a/src/third_party/wiredtiger/src/cursor/cur_stat.c b/src/third_party/wiredtiger/src/cursor/cur_stat.c index 5fde64c74ca..0bff642370d 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_stat.c +++ b/src/third_party/wiredtiger/src/cursor/cur_stat.c @@ -163,7 +163,6 @@ static void __curstat_set_value(WT_CURSOR *cursor, ...) { WT_UNUSED(cursor); - return; } /* @@ -478,8 +477,8 @@ __curstat_join_desc(WT_CURSOR_STAT *cst, int slot, const char **resultp) len = strlen("join: ") + strlen(sgrp->desc_prefix) + strlen(static_desc) + 1; WT_RET(__wt_realloc(session, NULL, len, &cst->desc_buf)); - snprintf(cst->desc_buf, len, "join: %s%s", sgrp->desc_prefix, - static_desc); + WT_RET(__wt_snprintf( + cst->desc_buf, len, "join: %s%s", sgrp->desc_prefix, static_desc)); *resultp = cst->desc_buf; return (0); } diff --git a/src/third_party/wiredtiger/src/cursor/cur_table.c b/src/third_party/wiredtiger/src/cursor/cur_table.c index 7e8cd153d2d..3b72bb0730f 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_table.c +++ b/src/third_party/wiredtiger/src/cursor/cur_table.c @@ -14,8 +14,8 @@ static int __curtable_update(WT_CURSOR *cursor); #define APPLY_CG(ctable, f) do { \ WT_CURSOR **__cp; \ u_int __i; \ - for (__i = 0, __cp = ctable->cg_cursors; \ - __i < WT_COLGROUPS(ctable->table); \ + for (__i = 0, __cp = (ctable)->cg_cursors; \ + __i < WT_COLGROUPS((ctable)->table); \ __i++, __cp++) \ WT_TRET((*__cp)->f(*__cp)); \ } while (0) @@ -511,9 +511,16 @@ __curtable_insert(WT_CURSOR *cursor) */ F_SET(primary, flag_orig | WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT); - if (ret == WT_DUPLICATE_KEY && F_ISSET(cursor, WT_CURSTD_OVERWRITE)) + if (ret == WT_DUPLICATE_KEY && F_ISSET(cursor, WT_CURSTD_OVERWRITE)) { WT_ERR(__curtable_update(cursor)); - else { + + /* + * The cursor is no longer positioned. This isn't just cosmetic, + * without a reset, iteration on this cursor won't start at the + * beginning/end of the table. + */ + APPLY_CG(ctable, reset); + } else { WT_ERR(ret); for (i = 1; i < WT_COLGROUPS(ctable->table); i++, cp++) { @@ -601,22 +608,53 @@ err: CURSOR_UPDATE_API_END(session, ret); static int __curtable_remove(WT_CURSOR *cursor) { + WT_CURSOR *primary; WT_CURSOR_TABLE *ctable; WT_DECL_RET; WT_SESSION_IMPL *session; + bool positioned; ctable = (WT_CURSOR_TABLE *)cursor; JOINABLE_CURSOR_REMOVE_API_CALL(cursor, session, NULL); WT_ERR(__curtable_open_indices(ctable)); + /* Check if the cursor was positioned. */ + primary = *ctable->cg_cursors; + positioned = F_ISSET(primary, WT_CURSTD_KEY_INT); + /* Find the old record so it can be removed from indices */ if (ctable->table->nindices > 0) { APPLY_CG(ctable, search); + if (ret == WT_NOTFOUND) + goto notfound; WT_ERR(ret); WT_ERR(__apply_idx(ctable, offsetof(WT_CURSOR, remove), false)); } APPLY_CG(ctable, remove); + if (ret == WT_NOTFOUND) + goto notfound; + WT_ERR(ret); + +notfound: + /* + * If the cursor is configured to overwrite and the record is not found, + * that is exactly what we want. + */ + if (ret == WT_NOTFOUND && F_ISSET(primary, WT_CURSTD_OVERWRITE)) + ret = 0; + + /* + * If the cursor was positioned, it stays positioned with a key but no + * no value, otherwise, there's no position, key or value. This isn't + * just cosmetic, without a reset, iteration on this cursor won't start + * at the beginning/end of the table. + */ + F_CLR(primary, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + if (positioned) + F_SET(primary, WT_CURSTD_KEY_INT); + else + APPLY_CG(ctable, reset); err: CURSOR_UPDATE_API_END(session, ret); return (ret); @@ -989,11 +1027,15 @@ __wt_curtable_open(WT_SESSION_IMPL *session, if (0) { err: if (*cursorp != NULL) { - if (*cursorp != cursor) - WT_TRET(__wt_cursor_close(*cursorp)); + /* + * When a dump cursor is opened, then *cursorp, not + * cursor, is the dump cursor. Close the dump cursor, + * and the table cursor will be closed as its child. + */ + cursor = *cursorp; *cursorp = NULL; } - WT_TRET(__curtable_close(cursor)); + WT_TRET(cursor->close(cursor)); } __wt_scr_free(session, &tmp); diff --git a/src/third_party/wiredtiger/src/docs/cursor-ops.dox b/src/third_party/wiredtiger/src/docs/cursor-ops.dox index b743d81db57..e479ff29191 100644 --- a/src/third_party/wiredtiger/src/docs/cursor-ops.dox +++ b/src/third_party/wiredtiger/src/docs/cursor-ops.dox @@ -145,9 +145,5 @@ that may not be modified or freed by the application. If a longer scope is required, the application must make a copy of the memory before the cursor is re-used, closed or reset. -The comments in this example code explain when the application can safely -modify memory passed to WT_CURSOR::set_key or WT_CURSOR::set_value: - -@snippet ex_scope.c cursor scope operation @m_endif */ diff --git a/src/third_party/wiredtiger/src/docs/upgrading.dox b/src/third_party/wiredtiger/src/docs/upgrading.dox index f463e6bc615..e5fce3d0d5d 100644 --- a/src/third_party/wiredtiger/src/docs/upgrading.dox +++ b/src/third_party/wiredtiger/src/docs/upgrading.dox @@ -2,22 +2,45 @@ @section version_292 Upgrading to Version 2.9.2 <dl> -<dt>WiredTiger Utility now supports truncate</dt> + +<dt>WiredTiger utility now supports truncate</dt> <dd> -The WiredTiger Utility can now \c truncate an object. Removing all contents -from the specified object. +The WiredTiger utility \c wt can now \c truncate objects, removing all +contents from the specified object. </dd> + <dt>Handle list lock statistics</dt> <dd> In the 2.9.1 release we added statistics tracking handle list lock timing, we have switched that lock from a spin lock to a read-write lock, and consequently changed the statistics tracking lock related wait time. </dd> -</dl> -@section version_291 Upgrading to Version 2.9.1 +<dt>Forced and named checkpoint error conditions changed</dt> +<dd> +There are new cases where checkpoints created with an explicit name or the +"force" configuration option can return an EBUSY error. This can happen if +the checkpoint overlaps with other schema operations, for example table create. +</dd> + +<dt>WT_CURSOR::remove may not return a positioned cursor</dt> +<dd> +The WT_CURSOR::remove method was previously documented to always return a +positioned cursor on success, which is not possible when \c overwrite=true +and the record does not exist. +The documentation has been updated, and the method has been changed to +never return a cursor position unless called with an existing cursor +position. In other words, if the cursor is positioned and the +WT_CURSOR::remove is called, the cursor will remain positioned; if the +cursor is not positioned and the WT_CURSOR::remove method is called, the +cursor will not be positioned on return. +</dd> + +</dl><hr> +@section version_291 Upgrading to Version 2.9.1 <dl> + <dt>Changes to hazard pointer configuration</dt> <dd> The \c hazard_max parameter to ::wiredtiger_open is now ignored. Memory is @@ -33,10 +56,11 @@ have added a new \c access_pattern_hint configuration option available for WT_SESSION::create that can be used to restore the old default by setting the value to "random". </dd> -</dl> +</dl><hr> @section version_290 Upgrading to Version 2.9.0 <dl> + <dt>Changes to cursor behavior after WT_CURSOR::insert</dt> <dd> After a successful call to WT_CURSOR::insert, unless a cursor has record diff --git a/src/third_party/wiredtiger/src/docs/wtperf.dox b/src/third_party/wiredtiger/src/docs/wtperf.dox index 2eac0fef3f4..6bdcf5f4f8d 100644 --- a/src/third_party/wiredtiger/src/docs/wtperf.dox +++ b/src/third_party/wiredtiger/src/docs/wtperf.dox @@ -167,6 +167,8 @@ do population phase; false to use existing database number of WiredTiger databases to use. Each database will execute the workload using a separate home directory and complete set of worker threads @par drop_tables (boolean, default=false) Whether to drop all tables at the end of the run, and report time taken to do the drop. +@par in_memory (boolean, default=false) +Whether to create the database in-memory. @par icount (unsigned int, default=5000) number of records to initially populate. If multiple tables are configured the count is spread evenly across all tables. @par idle_table_cycle (unsigned int, default=0) diff --git a/src/third_party/wiredtiger/src/evict/evict_file.c b/src/third_party/wiredtiger/src/evict/evict_file.c index 17b038fb003..3d8f4a61ca7 100644 --- a/src/third_party/wiredtiger/src/evict/evict_file.c +++ b/src/third_party/wiredtiger/src/evict/evict_file.c @@ -15,15 +15,27 @@ int __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) { + WT_BTREE *btree; WT_DECL_RET; WT_PAGE *page; WT_REF *next_ref, *ref; + btree = S2BT(session); + /* - * We need exclusive access to the file -- disable ordinary eviction - * and drain any blocks already queued. + * We need exclusive access to the file, we're about to discard the root + * page. Assert eviction has been locked out. */ - WT_RET(__wt_evict_file_exclusive_on(session)); + WT_ASSERT(session, + btree->evict_disabled > 0 || + !F_ISSET(session->dhandle, WT_DHANDLE_OPEN)); + + /* + * We do discard objects without pages in memory. If that's the case, + * we're done. + */ + if (btree->root.page == NULL) + return (0); /* Make sure the oldest transaction ID is up-to-date. */ WT_RET(__wt_txn_update_oldest( @@ -102,7 +114,5 @@ err: /* On error, clear any left-over tree walk. */ session, next_ref, WT_READ_NO_EVICT)); } - __wt_evict_file_exclusive_off(session); - return (ret); } diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index f1949a7c320..26bbf9f679b 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -824,31 +824,19 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) btree = S2BT(session); cache = S2C(session)->cache; - /* - * Hold the walk lock to set the no-eviction flag. - * - * The no-eviction flag can be set permanently, in which case we never - * increment the no-eviction count. - */ + /* Hold the walk lock to turn off eviction. */ __wt_spin_lock(session, &cache->evict_walk_lock); - if (F_ISSET(btree, WT_BTREE_NO_EVICTION)) { - if (btree->evict_disabled != 0) - ++btree->evict_disabled; + if (++btree->evict_disabled > 1) { __wt_spin_unlock(session, &cache->evict_walk_lock); return (0); } - ++btree->evict_disabled; /* * Ensure no new pages from the file will be queued for eviction after - * this point. + * this point, then clear any existing LRU eviction walk for the file. */ - F_SET(btree, WT_BTREE_NO_EVICTION); (void)__wt_atomic_addv32(&cache->pass_intr, 1); - - /* Clear any existing LRU eviction walk for the file. */ - WT_WITH_PASS_LOCK(session, - ret = __evict_clear_walk(session)); + WT_WITH_PASS_LOCK(session, ret = __evict_clear_walk(session)); (void)__wt_atomic_subv32(&cache->pass_intr, 1); WT_ERR(ret); @@ -879,7 +867,6 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) if (0) { err: --btree->evict_disabled; - F_CLR(btree, WT_BTREE_NO_EVICTION); } __wt_spin_unlock(session, &cache->evict_walk_lock); return (ret); @@ -904,38 +891,41 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) */ WT_DIAGNOSTIC_YIELD; - WT_ASSERT(session, - btree->evict_ref == NULL && F_ISSET(btree, WT_BTREE_NO_EVICTION)); - - /* - * The no-eviction flag can be set permanently, in which case we never - * increment the no-eviction count. - */ + /* Hold the walk lock to turn on eviction. */ __wt_spin_lock(session, &cache->evict_walk_lock); - if (btree->evict_disabled > 0 && --btree->evict_disabled == 0) - F_CLR(btree, WT_BTREE_NO_EVICTION); + WT_ASSERT(session, + btree->evict_ref == NULL && btree->evict_disabled > 0); + --btree->evict_disabled; __wt_spin_unlock(session, &cache->evict_walk_lock); } #define EVICT_TUNE_BATCH 1 /* Max workers to add each period */ -#define EVICT_TUNE_DATAPT_MIN 3 /* Data points needed before deciding - if we should keep adding workers or - settle on an earlier value. */ -#define EVICT_TUNE_PERIOD 2 /* Tune period in seconds */ +/* + * Data points needed before deciding if we should keep adding workers or settle + * on an earlier value. + */ +#define EVICT_TUNE_DATAPT_MIN 3 +#define EVICT_TUNE_PERIOD 1 /* Tune period in seconds */ + +/* + * We will do a fresh re-tune every that many seconds to adjust to + * significant phase changes. + */ +#define EVICT_FORCE_RETUNE 30 /* * __evict_tune_workers -- * Find the right number of eviction workers. Gradually ramp up the number of * workers increasing the number in batches indicated by the setting above. - * Store the number of workers that gave us the best throughput so far and - * the number of data points we have tried. + * Store the number of workers that gave us the best throughput so far and the + * number of data points we have tried. * - * Every once in a while when we have the minimum number of data points - * we check whether the eviction throughput achieved with the current number - * of workers is the best we have seen so far. If so, we will keep increasing - * the number of workers. If not, we are past the infliction point on the - * eviction throughput curve. In that case, we will set the number of workers - * to the best observed so far and settle into a stable state. + * Every once in a while when we have the minimum number of data points we check + * whether the eviction throughput achieved with the current number of workers + * is the best we have seen so far. If so, we will keep increasing the number of + * workers. If not, we are past the infliction point on the eviction throughput + * curve. In that case, we will set the number of workers to the best observed + * so far and settle into a stable state. */ static int __evict_tune_workers(WT_SESSION_IMPL *session) @@ -944,29 +934,63 @@ __evict_tune_workers(WT_SESSION_IMPL *session) WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; - uint64_t cur_threads, delta_msec, delta_pages, i, target_threads; - uint64_t pgs_evicted_cur, pgs_evicted_persec_cur; - uint32_t thread_surplus; + uint64_t delta_msec, delta_pages; + uint64_t pgs_evicted_cur, pgs_evicted_persec_cur, time_diff; + int32_t cur_threads, i, target_threads, thread_surplus; conn = S2C(session); cache = conn->cache; WT_ASSERT(session, conn->evict_threads.threads[0]->session == session); - pgs_evicted_persec_cur = 0; - - if (conn->evict_tune_stable) - return (0); + pgs_evicted_cur = pgs_evicted_persec_cur = 0; __wt_epoch(session, ¤t_time); + time_diff = WT_TIMEDIFF_SEC(current_time, conn->evict_tune_last_time); /* - * Every EVICT_TUNE_PERIOD seconds record the number of - * pages evicted per second observed in the previous period. + * If we have reached the stable state and have not run long enough to + * surpass the forced re-tuning threshold, return. */ - if (WT_TIMEDIFF_SEC( - current_time, conn->evict_tune_last_time) < EVICT_TUNE_PERIOD) - return (0); + if (conn->evict_tune_stable) { + if (time_diff < EVICT_FORCE_RETUNE) + return (0); + /* + * Stable state was reached a long time ago. Let's re-tune. + * Reset all the state. + */ + conn->evict_tune_stable = 0; + conn->evict_tune_last_action_time.tv_sec = 0; + conn->evict_tune_pgs_last = 0; + conn->evict_tune_num_points = 0; + conn->evict_tune_pg_sec_max = 0; + conn->evict_tune_workers_best = 0; + + /* Reduce the number of eviction workers to the minimum */ + thread_surplus = + (int32_t)conn->evict_threads.current_threads - + (int32_t)conn->evict_threads_min; + + for (i = 0; i < thread_surplus; i++) { + WT_ERR(__wt_thread_group_stop_one( + session, &conn->evict_threads, false)); + WT_STAT_CONN_INCR(session, + cache_eviction_worker_removed); + } + WT_STAT_CONN_INCR(session, cache_eviction_force_retune); + } else + if (time_diff < EVICT_TUNE_PERIOD) + /* + * If we have not reached stable state, don't do + * anything unless enough time has passed since the last + * time we have taken any action in this function. + */ + return (0); + + /* + * Measure the number of evicted pages so far. Eviction rate correlates + * to performance, so this is our metric of success. + */ pgs_evicted_cur = cache->pages_evict; /* @@ -984,7 +1008,8 @@ __evict_tune_workers(WT_SESSION_IMPL *session) pgs_evicted_persec_cur = (delta_pages * WT_THOUSAND) / delta_msec; conn->evict_tune_num_points++; - /* Keep track of the maximum eviction throughput seen and the number + /* + * Keep track of the maximum eviction throughput seen and the number * of workers corresponding to that throughput. */ if (pgs_evicted_persec_cur > conn->evict_tune_pg_sec_max) { @@ -1003,18 +1028,18 @@ __evict_tune_workers(WT_SESSION_IMPL *session) * settle into a stable state. */ if (conn->evict_tune_num_points >= conn->evict_tune_datapts_needed) { - if ((conn->evict_tune_workers_best == - conn->evict_threads.current_threads) && - (conn->evict_threads.current_threads < - conn->evict_threads_max)) { + if (conn->evict_tune_workers_best == + conn->evict_threads.current_threads && + conn->evict_threads.current_threads < + conn->evict_threads_max) { /* * Keep adding workers. We will check again * at the next check point. */ - conn->evict_tune_datapts_needed += - WT_MIN(EVICT_TUNE_DATAPT_MIN, - (conn->evict_threads_max - - conn->evict_threads.current_threads)/ + conn->evict_tune_datapts_needed += WT_MIN( + EVICT_TUNE_DATAPT_MIN, + (conn->evict_threads_max - + conn->evict_threads.current_threads) / EVICT_TUNE_BATCH); } else { /* @@ -1023,8 +1048,8 @@ __evict_tune_workers(WT_SESSION_IMPL *session) * settle into a stable state. */ thread_surplus = - conn->evict_threads.current_threads - - conn->evict_tune_workers_best; + (int32_t)conn->evict_threads.current_threads - + (int32_t)conn->evict_tune_workers_best; for (i = 0; i < thread_surplus; i++) { /* @@ -1043,7 +1068,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) conn->evict_tune_stable = true; WT_STAT_CONN_SET(session, cache_eviction_active_workers, conn->evict_threads.current_threads); - return (0); + goto err; } } @@ -1059,13 +1084,13 @@ __evict_tune_workers(WT_SESSION_IMPL *session) conn->evict_threads.current_threads) / EVICT_TUNE_BATCH); if (F_ISSET(cache, WT_CACHE_EVICT_ALL)) { - cur_threads = conn->evict_threads.current_threads; + cur_threads = (int32_t)conn->evict_threads.current_threads; target_threads = WT_MIN(cur_threads + EVICT_TUNE_BATCH, - conn->evict_threads_max); + (int32_t)conn->evict_threads_max); /* * Start the new threads. */ - for (i = 0; i < (target_threads - cur_threads); ++i) { + for (i = cur_threads; i < target_threads; ++i) { /* * If we get an error, it should be because we were * unable to acquire the thread group lock. Break out @@ -1372,7 +1397,7 @@ retry: while (slot < max_entries) { /* Skip files that don't allow eviction. */ btree = dhandle->handle; - if (F_ISSET(btree, WT_BTREE_NO_EVICTION)) + if (btree->evict_disabled > 0) continue; /* @@ -1428,13 +1453,23 @@ retry: while (slot < max_entries) { * the tree's current eviction point, and part of the process is * waiting on this thread to acknowledge that action. */ - if (!F_ISSET(btree, WT_BTREE_NO_EVICTION) && + if (btree->evict_disabled == 0 && !__wt_spin_trylock(session, &cache->evict_walk_lock)) { - if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) { + if (btree->evict_disabled == 0) { + /* + * Assert the handle has a root page: eviction + * should have been locked out if the tree is + * being discarded or the root page is changing. + * As this has not always been the case, assert + * to debug that change. + */ + WT_ASSERT(session, btree->root.page != NULL); + cache->evict_file_next = dhandle; - WT_WITH_DHANDLE(session, dhandle, ret = - __evict_walk_file(session, queue, - max_entries, &slot)); + WT_WITH_DHANDLE(session, dhandle, + ret = __evict_walk_file( + session, queue, max_entries, &slot)); + WT_ASSERT(session, session->split_gen == 0); } __wt_spin_unlock(session, &cache->evict_walk_lock); @@ -1663,7 +1698,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, * eviction fairly visits all pages in trees with a lot of in-cache * content. */ - switch (btree->evict_walk_state) { + switch ((WT_EVICT_WALK_START)btree->evict_start_type) { case WT_EVICT_WALK_NEXT: break; case WT_EVICT_WALK_PREV: @@ -1720,9 +1755,9 @@ __evict_walk_file(WT_SESSION_IMPL *session, * Try a different walk start point next time if a * walk gave up. */ - btree->evict_walk_state = - (btree->evict_walk_state + 1) % - WT_EVICT_WALK_MAX_LEGAL_VALUE; + btree->evict_start_type = + (btree->evict_start_type + 1) % + WT_EVICT_WALK_START_NUM; break; } @@ -2124,6 +2159,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) WT_TXN_GLOBAL *txn_global; WT_TXN_STATE *txn_state; uint64_t init_evict_count, max_pages_evicted; + bool timer; conn = S2C(session); cache = conn->cache; @@ -2144,7 +2180,9 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) __wt_evict_server_wake(session); /* Track how long application threads spend doing eviction. */ - if (WT_STAT_ENABLED(session) && !F_ISSET(session, WT_SESSION_INTERNAL)) + timer = + WT_STAT_ENABLED(session) && !F_ISSET(session, WT_SESSION_INTERNAL); + if (timer) __wt_epoch(session, &enter); for (init_evict_count = cache->pages_evict;; ret = 0) { @@ -2210,8 +2248,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) } } -err: if (WT_STAT_ENABLED(session) && - !F_ISSET(session, WT_SESSION_INTERNAL)) { +err: if (timer) { __wt_epoch(session, &leave); WT_STAT_CONN_INCRV(session, application_cache_time, WT_TIMEDIFF_US(leave, enter)); @@ -2239,7 +2276,7 @@ __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref) page = ref->page; if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU) || - F_ISSET(S2BT(session), WT_BTREE_NO_EVICTION)) + S2BT(session)->evict_disabled > 0) return (false); /* Append to the urgent queue if we can. */ @@ -2249,7 +2286,7 @@ __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref) __wt_spin_lock(session, &cache->evict_queue_lock); if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU) || - F_ISSET(S2BT(session), WT_BTREE_NO_EVICTION)) + S2BT(session)->evict_disabled > 0) goto done; __wt_spin_lock(session, &urgent_queue->evict_lock); diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c index 5b17a78a4dd..85689efd0b1 100644 --- a/src/third_party/wiredtiger/src/evict/evict_page.c +++ b/src/third_party/wiredtiger/src/evict/evict_page.c @@ -480,8 +480,8 @@ __evict_review( if (LF_ISSET(WT_EVICT_INMEM_SPLIT)) return (__wt_split_insert(session, ref)); - /* We are done if reconciliation is disabled. */ - if (F_ISSET(S2BT(session), WT_BTREE_NO_RECONCILE)) + /* If splits are the only permitted operation, we're done. */ + if (F_ISSET(S2BT(session), WT_BTREE_ALLOW_SPLITS)) return (EBUSY); } diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h index 2783d17f825..a3636eb8040 100644 --- a/src/third_party/wiredtiger/src/include/api.h +++ b/src/third_party/wiredtiger/src/include/api.h @@ -7,22 +7,21 @@ */ /* Standard entry points to the API: declares/initializes local variables. */ -#define API_SESSION_INIT(s, h, n, cur, dh) \ +#define API_SESSION_INIT(s, h, n, dh) \ WT_DATA_HANDLE *__olddh = (s)->dhandle; \ const char *__oldname = (s)->name; \ - (s)->cursor = (cur); \ (s)->dhandle = (dh); \ (s)->name = (s)->lastop = #h "." #n; \ -#define API_CALL_NOCONF(s, h, n, cur, dh) do { \ - API_SESSION_INIT(s, h, n, cur, dh); \ +#define API_CALL_NOCONF(s, h, n, dh) do { \ + API_SESSION_INIT(s, h, n, dh); \ WT_ERR(WT_SESSION_CHECK_PANIC(s)); \ __wt_verbose((s), WT_VERB_API, "CALL: " #h ":" #n) -#define API_CALL(s, h, n, cur, dh, config, cfg) do { \ - const char *cfg[] = \ +#define API_CALL(s, h, n, dh, config, cfg) do { \ + const char *(cfg)[] = \ { WT_CONFIG_BASE(s, h##_##n), config, NULL }; \ - API_SESSION_INIT(s, h, n, cur, dh); \ + API_SESSION_INIT(s, h, n, dh); \ WT_ERR(WT_SESSION_CHECK_PANIC(s)); \ if ((config) != NULL) \ WT_ERR(__wt_config_check((s), \ @@ -42,17 +41,17 @@ } while (0) /* An API call wrapped in a transaction if necessary. */ -#define TXN_API_CALL(s, h, n, cur, bt, config, cfg) do { \ +#define TXN_API_CALL(s, h, n, bt, config, cfg) do { \ bool __autotxn = false; \ - API_CALL(s, h, n, bt, cur, config, cfg); \ + API_CALL(s, h, n, bt, config, cfg); \ __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\ if (__autotxn) \ F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT) /* An API call wrapped in a transaction if necessary. */ -#define TXN_API_CALL_NOCONF(s, h, n, cur, bt) do { \ +#define TXN_API_CALL_NOCONF(s, h, n, bt) do { \ bool __autotxn = false; \ - API_CALL_NOCONF(s, h, n, cur, bt); \ + API_CALL_NOCONF(s, h, n, bt); \ __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\ if (__autotxn) \ F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT) @@ -63,15 +62,16 @@ if (__autotxn) { \ if (F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT)) \ F_CLR(&(s)->txn, WT_TXN_AUTOCOMMIT); \ - else if (ret == 0 && !F_ISSET(&(s)->txn, WT_TXN_ERROR)) \ - ret = __wt_txn_commit((s), NULL); \ + else if ((ret) == 0 && \ + !F_ISSET(&(s)->txn, WT_TXN_ERROR)) \ + (ret) = __wt_txn_commit((s), NULL); \ else { \ if (retry) \ WT_TRET(__wt_session_copy_values(s)); \ WT_TRET(__wt_txn_rollback((s), NULL)); \ - if ((ret == 0 || ret == WT_ROLLBACK) && \ + if (((ret) == 0 || (ret) == WT_ROLLBACK) && \ (retry)) { \ - ret = 0; \ + (ret) = 0; \ continue; \ } \ WT_TRET(__wt_session_reset_cursors(s, false)); \ @@ -98,24 +98,24 @@ #define CONNECTION_API_CALL(conn, s, n, config, cfg) \ s = (conn)->default_session; \ - API_CALL(s, WT_CONNECTION, n, NULL, NULL, config, cfg) + API_CALL(s, WT_CONNECTION, n, NULL, config, cfg) #define CONNECTION_API_CALL_NOCONF(conn, s, n) \ s = (conn)->default_session; \ - API_CALL_NOCONF(s, WT_CONNECTION, n, NULL, NULL) + API_CALL_NOCONF(s, WT_CONNECTION, n, NULL) #define SESSION_API_CALL(s, n, config, cfg) \ - API_CALL(s, WT_SESSION, n, NULL, NULL, config, cfg) + API_CALL(s, WT_SESSION, n, NULL, config, cfg) #define SESSION_API_CALL_NOCONF(s, n) \ - API_CALL_NOCONF(s, WT_SESSION, n, NULL, NULL) + API_CALL_NOCONF(s, WT_SESSION, n, NULL) #define SESSION_TXN_API_CALL(s, n, config, cfg) \ - TXN_API_CALL(s, WT_SESSION, n, NULL, NULL, config, cfg) + TXN_API_CALL(s, WT_SESSION, n, NULL, config, cfg) #define CURSOR_API_CALL(cur, s, n, bt) \ (s) = (WT_SESSION_IMPL *)(cur)->session; \ - API_CALL_NOCONF(s, WT_CURSOR, n, cur, \ + API_CALL_NOCONF(s, WT_CURSOR, n, \ ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle) #define JOINABLE_CURSOR_CALL_CHECK(cur) \ @@ -128,7 +128,7 @@ #define CURSOR_REMOVE_API_CALL(cur, s, bt) \ (s) = (WT_SESSION_IMPL *)(cur)->session; \ - TXN_API_CALL_NOCONF(s, WT_CURSOR, remove, cur, \ + TXN_API_CALL_NOCONF(s, WT_CURSOR, remove, \ ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); #define JOINABLE_CURSOR_REMOVE_API_CALL(cur, s, bt) \ @@ -137,7 +137,7 @@ #define CURSOR_UPDATE_API_CALL(cur, s, n, bt) \ (s) = (WT_SESSION_IMPL *)(cur)->session; \ - TXN_API_CALL_NOCONF(s, WT_CURSOR, n, cur, \ + TXN_API_CALL_NOCONF(s, WT_CURSOR, n, \ ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); \ if (F_ISSET(S2C(s), WT_CONN_IN_MEMORY) && \ !F_ISSET((WT_BTREE *)(bt), WT_BTREE_IGNORE_CACHE) && \ @@ -153,4 +153,4 @@ #define ASYNCOP_API_CALL(conn, s, n) \ s = (conn)->default_session; \ - API_CALL_NOCONF(s, asyncop, n, NULL, NULL) + API_CALL_NOCONF(s, asyncop, n, NULL) diff --git a/src/third_party/wiredtiger/src/include/bitstring.i b/src/third_party/wiredtiger/src/include/bitstring.i index 08746beb9b9..118dc0bba01 100644 --- a/src/third_party/wiredtiger/src/include/bitstring.i +++ b/src/third_party/wiredtiger/src/include/bitstring.i @@ -230,7 +230,7 @@ __bit_getv(uint8_t *bitf, uint64_t entry, uint8_t width) #define __BIT_GET(len, mask) \ case len: \ if (__bit_test(bitf, bit)) \ - value |= mask; \ + value |= (mask); \ ++bit \ /* FALLTHROUGH */ diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index 39ca223aebf..f1bb08d2699 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -507,7 +507,7 @@ struct __wt_page { #define WT_INTL_INDEX_GET_SAFE(page) \ ((page)->u.intl.__index) #define WT_INTL_INDEX_GET(session, page, pindex) do { \ - WT_ASSERT(session, session->split_gen != 0); \ + WT_ASSERT(session, (session)->split_gen != 0); \ (pindex) = WT_INTL_INDEX_GET_SAFE(page); \ } while (0) #define WT_INTL_INDEX_SET(page, v) do { \ @@ -868,7 +868,7 @@ struct __wt_col { * Return the 0-based array offset based on a WT_COL reference. */ #define WT_COL_SLOT(page, cip) \ - ((uint32_t)(((WT_COL *)cip) - (page)->pg_var)) + ((uint32_t)(((WT_COL *)(cip)) - (page)->pg_var)) /* * WT_IKEY -- @@ -977,10 +977,10 @@ struct __wt_insert { } key; } u; -#define WT_INSERT_KEY_SIZE(ins) (((WT_INSERT *)ins)->u.key.size) +#define WT_INSERT_KEY_SIZE(ins) (((WT_INSERT *)(ins))->u.key.size) #define WT_INSERT_KEY(ins) \ - ((void *)((uint8_t *)(ins) + ((WT_INSERT *)ins)->u.key.offset)) -#define WT_INSERT_RECNO(ins) (((WT_INSERT *)ins)->u.recno) + ((void *)((uint8_t *)(ins) + ((WT_INSERT *)(ins))->u.key.offset)) +#define WT_INSERT_RECNO(ins) (((WT_INSERT *)(ins))->u.recno) WT_INSERT *next[0]; /* forward-linked skip list */ }; @@ -989,9 +989,9 @@ struct __wt_insert { * Skiplist helper macros. */ #define WT_SKIP_FIRST(ins_head) \ - (((ins_head) == NULL) ? NULL : ((WT_INSERT_HEAD *)ins_head)->head[0]) + (((ins_head) == NULL) ? NULL : ((WT_INSERT_HEAD *)(ins_head))->head[0]) #define WT_SKIP_LAST(ins_head) \ - (((ins_head) == NULL) ? NULL : ((WT_INSERT_HEAD *)ins_head)->tail[0]) + (((ins_head) == NULL) ? NULL : ((WT_INSERT_HEAD *)(ins_head))->tail[0]) #define WT_SKIP_NEXT(ins) ((ins)->next[0]) #define WT_SKIP_FOREACH(ins, ins_head) \ for ((ins) = WT_SKIP_FIRST(ins_head); \ @@ -1004,7 +1004,7 @@ struct __wt_insert { #define WT_PAGE_ALLOC_AND_SWAP(s, page, dest, v, count) do { \ if (((v) = (dest)) == NULL) { \ WT_ERR(__wt_calloc_def(s, count, &(v))); \ - if (__wt_atomic_cas_ptr(&dest, NULL, v)) \ + if (__wt_atomic_cas_ptr(&(dest), NULL, v)) \ __wt_cache_page_inmem_incr( \ s, page, (count) * sizeof(*(v))); \ else \ diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h index 976c1d2110c..28fe1b94b23 100644 --- a/src/third_party/wiredtiger/src/include/btree.h +++ b/src/third_party/wiredtiger/src/include/btree.h @@ -58,6 +58,12 @@ #define WT_BTREE_DELETE_THRESHOLD 1000 /* + * Minimum size of the chunks (in percentage of the page size) a page gets split + * into during reconciliation. + */ +#define WT_BTREE_MIN_SPLIT_PCT 50 + +/* * WT_BTREE -- * A btree handle. */ @@ -118,15 +124,17 @@ struct __wt_btree { uint64_t last_recno; /* Column-store last record number */ - WT_REF root; /* Root page reference */ - bool modified; /* If the tree ever modified */ - bool bulk_load_ok; /* Bulk-load is a possibility */ + WT_REF root; /* Root page reference */ + bool modified; /* If the tree ever modified */ + uint8_t original; /* Newly created: bulk-load possible + (want a bool but needs atomic cas) */ + + bool lsm_primary; /* Handle is/was the LSM primary */ WT_BM *bm; /* Block manager reference */ u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */ uint64_t checkpoint_gen; /* Checkpoint generation */ - bool include_checkpoint_txn;/* ID checks include checkpoint */ uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */ uint64_t write_gen; /* Write generation */ @@ -139,14 +147,10 @@ struct __wt_btree { u_int evict_walk_period; /* Skip this many LRU walks */ u_int evict_walk_saved; /* Saved walk skips for checkpoints */ u_int evict_walk_skips; /* Number of walks skipped */ - u_int evict_disabled; /* Eviction disabled count */ + int evict_disabled; /* Eviction disabled count */ volatile uint32_t evict_busy; /* Count of threads in eviction */ - enum { - WT_EVICT_WALK_NEXT, WT_EVICT_WALK_PREV, - WT_EVICT_WALK_RAND_NEXT, WT_EVICT_WALK_RAND_PREV - } evict_walk_state; /* Eviction walk state */ -#define WT_EVICT_WALK_MAX_LEGAL_VALUE WT_EVICT_WALK_RAND_PREV + 1 - + int evict_start_type; /* Start position for eviction walk + (see WT_EVICT_WALK_START). */ enum { WT_CKPT_OFF, WT_CKPT_PREPARE, WT_CKPT_RUNNING } checkpointing; /* Checkpoint in progress */ @@ -159,15 +163,14 @@ struct __wt_btree { WT_SPINLOCK flush_lock; /* Lock to flush the tree's pages */ /* Flags values up to 0xff are reserved for WT_DHANDLE_* */ -#define WT_BTREE_BULK 0x000100 /* Bulk-load handle */ -#define WT_BTREE_IGNORE_CACHE 0x000200 /* Cache-resident object */ -#define WT_BTREE_IN_MEMORY 0x000400 /* Cache-resident object */ -#define WT_BTREE_LOOKASIDE 0x000800 /* Look-aside table */ -#define WT_BTREE_LSM_PRIMARY 0x001000 /* Handle is current LSM primary */ -#define WT_BTREE_NO_CHECKPOINT 0x002000 /* Disable checkpoints */ -#define WT_BTREE_NO_EVICTION 0x004000 /* Disable eviction */ +#define WT_BTREE_ALLOW_SPLITS 0x000100 /* Allow splits, even with no evict */ +#define WT_BTREE_BULK 0x000200 /* Bulk-load handle */ +#define WT_BTREE_CLOSED 0x000400 /* Handle closed */ +#define WT_BTREE_IGNORE_CACHE 0x000800 /* Cache-resident object */ +#define WT_BTREE_IN_MEMORY 0x001000 /* Cache-resident object */ +#define WT_BTREE_LOOKASIDE 0x002000 /* Look-aside table */ +#define WT_BTREE_NO_CHECKPOINT 0x004000 /* Disable checkpoints */ #define WT_BTREE_NO_LOGGING 0x008000 /* Disable logging */ -#define WT_BTREE_NO_RECONCILE 0x010000 /* Allow splits, even with no evict */ #define WT_BTREE_REBALANCE 0x020000 /* Handle is for rebalance */ #define WT_BTREE_SALVAGE 0x040000 /* Handle is for salvage */ #define WT_BTREE_SKIP_CKPT 0x080000 /* Handle skipped checkpoint */ diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index 315efa86fa6..1d6fcd6272c 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -149,7 +149,7 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) if (WT_PAGE_IS_INTERNAL(page)) { (void)__wt_atomic_add64(&btree->bytes_dirty_intl, size); (void)__wt_atomic_add64(&cache->bytes_dirty_intl, size); - } else if (!F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) { + } else if (!btree->lsm_primary) { (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size); (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size); } @@ -285,7 +285,7 @@ __wt_cache_page_byte_dirty_decr( decr, "WT_BTREE.bytes_dirty_intl"); __wt_cache_decr_check_uint64(session, &cache->bytes_dirty_intl, decr, "WT_CACHE.bytes_dirty_intl"); - } else if (!F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) { + } else if (!btree->lsm_primary) { __wt_cache_decr_check_uint64(session, &btree->bytes_dirty_leaf, decr, "WT_BTREE.bytes_dirty_leaf"); __wt_cache_decr_check_uint64(session, &cache->bytes_dirty_leaf, @@ -345,7 +345,7 @@ __wt_cache_dirty_incr(WT_SESSION_IMPL *session, WT_PAGE *page) (void)__wt_atomic_add64(&cache->bytes_dirty_intl, size); (void)__wt_atomic_add64(&cache->pages_dirty_intl, 1); } else { - if (!F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) { + if (!btree->lsm_primary) { (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size); (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size); } @@ -413,7 +413,7 @@ __wt_cache_page_image_incr(WT_SESSION_IMPL *session, uint32_t size) * Evict pages from the cache. */ static inline void -__wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) +__wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page, bool rewrite) { WT_BTREE *btree; WT_CACHE *cache; @@ -444,7 +444,7 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) __wt_cache_decr_zero_uint64(session, &cache->bytes_dirty_intl, modify->bytes_dirty, "WT_CACHE.bytes_dirty_intl"); - } else if (!F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) { + } else if (!btree->lsm_primary) { __wt_cache_decr_zero_uint64(session, &btree->bytes_dirty_leaf, modify->bytes_dirty, "WT_BTREE.bytes_dirty_leaf"); @@ -456,7 +456,15 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) /* Update pages and bytes evicted. */ (void)__wt_atomic_add64(&cache->bytes_evict, page->memory_footprint); - (void)__wt_atomic_addv64(&cache->pages_evict, 1); + + /* + * Don't count rewrites as eviction: there's no guarantee we are making + * real progress. + */ + if (rewrite) + (void)__wt_atomic_subv64(&cache->pages_inmem, 1); + else + (void)__wt_atomic_addv64(&cache->pages_evict, 1); } /* @@ -1229,7 +1237,6 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page) * data in the last skiplist on the page. Split if there are enough * items and the skiplist does not fit within a single disk page. */ - ins_head = page->type == WT_PAGE_ROW_LEAF ? (page->entries == 0 ? WT_ROW_INSERT_SMALLEST(page) : @@ -1347,8 +1354,13 @@ __wt_page_can_evict( * the original parent page's index, because evicting an internal page * discards its WT_REF array, and a thread traversing the original * parent page index might see a freed WT_REF. + * + * One special case where we know this is safe is if the handle is + * locked exclusive (e.g., when the whole tree is being evicted). In + * that case, no readers can be looking at an old index. */ - if (WT_PAGE_IS_INTERNAL(page) && !__wt_split_obsolete( + if (!F_ISSET(session->dhandle, WT_DHANDLE_EXCLUSIVE) && + WT_PAGE_IS_INTERNAL(page) && !__wt_split_obsolete( session, page->pg_intl_split_gen)) return (false); @@ -1401,7 +1413,7 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) if (page->read_gen != WT_READGEN_OLDEST || LF_ISSET(WT_READ_NO_EVICT) || F_ISSET(session, WT_SESSION_NO_EVICTION) || - F_ISSET(btree, WT_BTREE_NO_EVICTION) || + btree->evict_disabled > 0 || !__wt_page_can_evict(session, ref, NULL)) return (__wt_hazard_clear(session, ref)); @@ -1521,7 +1533,7 @@ __wt_btree_lsm_over_size(WT_SESSION_IMPL *session, uint64_t maxsize) return (false); /* A tree that can be evicted always requires a switch. */ - if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) + if (btree->evict_disabled == 0) return (true); /* Check for a tree with a single leaf page. */ @@ -1546,55 +1558,6 @@ __wt_btree_lsm_over_size(WT_SESSION_IMPL *session, uint64_t maxsize) } /* - * __wt_btree_lsm_switch_primary -- - * Switch a btree handle to/from the current primary chunk of an LSM tree. - */ -static inline void -__wt_btree_lsm_switch_primary(WT_SESSION_IMPL *session, bool on) -{ - WT_BTREE *btree; - WT_CACHE *cache; - WT_PAGE *child, *root; - WT_PAGE_INDEX *pindex; - WT_REF *first; - size_t size; - - btree = S2BT(session); - cache = S2C(session)->cache; - root = btree->root.page; - - if (!F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) - F_SET(btree, WT_BTREE_LSM_PRIMARY | WT_BTREE_NO_EVICTION); - if (!on && F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) { - pindex = WT_INTL_INDEX_GET_SAFE(root); - if (!F_ISSET(btree, WT_BTREE_NO_EVICTION) || - pindex->entries != 1) - return; - first = pindex->index[0]; - - /* - * We're reaching down into the page without a hazard pointer, - * but that's OK because we know that no-eviction is set so the - * page can't disappear. - * - * While this tree was the primary, its dirty bytes were not - * included in the cache accounting. Fix that now before we - * open it up for eviction. - */ - child = first->page; - if (first->state == WT_REF_MEM && - child->type == WT_PAGE_ROW_LEAF && - __wt_page_is_modified(child)) { - size = child->modify->bytes_dirty; - (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size); - (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size); - } - - F_CLR(btree, WT_BTREE_LSM_PRIMARY | WT_BTREE_NO_EVICTION); - } -} - -/* * __wt_split_descent_race -- * Return if we raced with an internal page split when descending the tree. */ diff --git a/src/third_party/wiredtiger/src/include/buf.i b/src/third_party/wiredtiger/src/include/buf.i index ebbee6b4633..d192e292dcf 100644 --- a/src/third_party/wiredtiger/src/include/buf.i +++ b/src/third_party/wiredtiger/src/include/buf.i @@ -37,28 +37,30 @@ __wt_buf_extend(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) /* * __wt_buf_init -- - * Initialize a buffer at a specific size. + * Create an empty buffer at a specific size. */ static inline int __wt_buf_init(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) { + /* + * The buffer grow function does what we need, but anticipates data + * referenced by the buffer. Avoid any data copy by setting data to + * reference the buffer's allocated memory, and clearing it. + */ buf->data = buf->mem; - buf->size = 0; /* Clear existing data length */ - WT_RET(__wt_buf_grow(session, buf, size)); - - return (0); + buf->size = 0; + return (__wt_buf_grow(session, buf, size)); } /* * __wt_buf_initsize -- - * Initialize a buffer at a specific size, and set the data length. + * Create an empty buffer at a specific size, and set the data length. */ static inline int __wt_buf_initsize(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) { - buf->data = buf->mem; - buf->size = 0; /* Clear existing data length */ - WT_RET(__wt_buf_grow(session, buf, size)); + WT_RET(__wt_buf_init(session, buf, size)); + buf->size = size; /* Set the data length. */ return (0); @@ -72,14 +74,15 @@ static inline int __wt_buf_set( WT_SESSION_IMPL *session, WT_ITEM *buf, const void *data, size_t size) { - /* Ensure the buffer is large enough. */ - WT_RET(__wt_buf_initsize(session, buf, size)); - - /* Copy the data, allowing for overlapping strings. */ - if (size != 0) - memmove(buf->mem, data, size); - - return (0); + /* + * The buffer grow function does what we need, but expects the data to + * be referenced by the buffer. If we're copying data from outside the + * buffer, set it up so it makes sense to the buffer grow function. (No + * test needed, this works if WT_ITEM.data is already set to "data".) + */ + buf->data = data; + buf->size = size; + return (__wt_buf_grow(session, buf, size)); } /* diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h index abd5a1901f7..04920c3585a 100644 --- a/src/third_party/wiredtiger/src/include/cache.h +++ b/src/third_party/wiredtiger/src/include/cache.h @@ -18,6 +18,15 @@ #define WT_EVICT_MAX_TREES 1000 /* Maximum walk points */ +/* Ways to position when starting an eviction walk. */ +typedef enum { + WT_EVICT_WALK_NEXT, + WT_EVICT_WALK_PREV, + WT_EVICT_WALK_RAND_NEXT, + WT_EVICT_WALK_RAND_PREV +} WT_EVICT_WALK_START; +#define WT_EVICT_WALK_START_NUM (WT_EVICT_WALK_RAND_PREV + 1) + /* * WT_EVICT_ENTRY -- * Encapsulation of an eviction candidate. diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i index d71978ccf35..90dd1bcdda8 100644 --- a/src/third_party/wiredtiger/src/include/cache.i +++ b/src/third_party/wiredtiger/src/include/cache.i @@ -360,11 +360,13 @@ __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool *didworkp) /* * LSM sets the no-cache-check flag when holding the LSM tree lock, in - * that case, or when holding the schema or handle list locks (which - * block eviction), we don't want to highjack the thread for eviction. + * that case, or when holding the handle list, schema or table locks + * (which can block checkpoints and eviction), don't block the thread + * for eviction. */ if (F_ISSET(session, WT_SESSION_NO_EVICTION | - WT_SESSION_LOCKED_HANDLE_LIST_WRITE | WT_SESSION_LOCKED_SCHEMA)) + WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_LOCKED_SCHEMA | + WT_SESSION_LOCKED_TABLE)) return (0); /* In memory configurations don't block when the cache is full. */ @@ -372,11 +374,14 @@ __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool *didworkp) return (0); /* - * Threads operating on cache-resident trees are ignored because they're - * not contributing to the problem. + * Threads operating on cache-resident trees are ignored because + * they're not contributing to the problem. We also don't block while + * reading metadata because we're likely to be holding some other + * resources that could block checkpoints or eviction. */ btree = S2BT_SAFE(session); - if (btree != NULL && F_ISSET(btree, WT_BTREE_IN_MEMORY)) + if (btree != NULL && (F_ISSET(btree, WT_BTREE_IN_MEMORY) || + WT_IS_METADATA(session->dhandle))) return (0); /* Check if eviction is needed. */ diff --git a/src/third_party/wiredtiger/src/include/cell.i b/src/third_party/wiredtiger/src/include/cell.i index c130768e595..71c2515daf0 100644 --- a/src/third_party/wiredtiger/src/include/cell.i +++ b/src/third_party/wiredtiger/src/include/cell.i @@ -361,14 +361,12 @@ __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size) cell->__chunk[0] = (uint8_t) ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT); return (1); - } else { - byte = (uint8_t)size; /* Type + length */ - cell->__chunk[0] = (uint8_t) - ((byte << WT_CELL_SHORT_SHIFT) | - WT_CELL_KEY_SHORT_PFX); - cell->__chunk[1] = prefix; /* Prefix */ - return (2); } + byte = (uint8_t)size; /* Type + length */ + cell->__chunk[0] = (uint8_t) + ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT_PFX); + cell->__chunk[1] = prefix; /* Prefix */ + return (2); } if (prefix == 0) { @@ -569,8 +567,8 @@ __wt_cell_unpack_safe( */ #define WT_CELL_LEN_CHK(t, len) do { \ if (start != NULL && \ - ((uint8_t *)t < (uint8_t *)start || \ - (((uint8_t *)t) + (len)) > (uint8_t *)end)) \ + ((uint8_t *)(t) < (uint8_t *)start || \ + (((uint8_t *)(t)) + (len)) > (uint8_t *)end)) \ return (WT_ERROR); \ } while (0) diff --git a/src/third_party/wiredtiger/src/include/column.i b/src/third_party/wiredtiger/src/include/column.i index c1b45a1f4e0..07b627315e6 100644 --- a/src/third_party/wiredtiger/src/include/column.i +++ b/src/third_party/wiredtiger/src/include/column.i @@ -108,7 +108,7 @@ __col_insert_search_match(WT_INSERT_HEAD *ins_head, uint64_t recno) /* Fast path the check for values at the end of the skiplist. */ if (recno > WT_INSERT_RECNO(ret_ins)) return (NULL); - else if (recno == WT_INSERT_RECNO(ret_ins)) + if (recno == WT_INSERT_RECNO(ret_ins)) return (ret_ins); /* @@ -127,7 +127,7 @@ __col_insert_search_match(WT_INSERT_HEAD *ins_head, uint64_t recno) if (cmp == 0) /* Exact match: return */ return (*insp); - else if (cmp > 0) /* Keep going at this level */ + if (cmp > 0) /* Keep going at this level */ insp = &(*insp)->next[i]; else { /* Drop down a level */ --i; diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index ce483d3291a..6c23492e926 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -127,7 +127,7 @@ struct __wt_named_extractor { F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \ TAILQ_INSERT_HEAD(&(conn)->dhqh, dhandle, q); \ TAILQ_INSERT_HEAD(&(conn)->dhhash[bucket], dhandle, hashq); \ - ++conn->dhandle_count; \ + ++(conn)->dhandle_count; \ } while (0) #define WT_CONN_DHANDLE_REMOVE(conn, dhandle, bucket) do { \ @@ -135,7 +135,7 @@ struct __wt_named_extractor { F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \ TAILQ_REMOVE(&(conn)->dhqh, dhandle, q); \ TAILQ_REMOVE(&(conn)->dhhash[bucket], dhandle, hashq); \ - --conn->dhandle_count; \ + --(conn)->dhandle_count; \ } while (0) /* diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h index 31c8963a486..f32b4250d30 100644 --- a/src/third_party/wiredtiger/src/include/cursor.h +++ b/src/third_party/wiredtiger/src/include/cursor.h @@ -73,7 +73,7 @@ struct __wt_cursor_backup { #define WT_CURBACKUP_LOCKER 0x01 /* Hot-backup started */ uint8_t flags; }; -#define WT_CURSOR_BACKUP_ID(cursor) (((WT_CURSOR_BACKUP *)cursor)->maxid) +#define WT_CURSOR_BACKUP_ID(cursor) (((WT_CURSOR_BACKUP *)(cursor))->maxid) struct __wt_cursor_btree { WT_CURSOR iface; @@ -474,7 +474,7 @@ struct __wt_cursor_stat { * Return a reference to a statistic cursor's stats structures. */ #define WT_CURSOR_STATS(cursor) \ - (((WT_CURSOR_STAT *)cursor)->stats) + (((WT_CURSOR_STAT *)(cursor))->stats) struct __wt_cursor_table { WT_CURSOR iface; @@ -493,7 +493,7 @@ struct __wt_cursor_table { }; #define WT_CURSOR_PRIMARY(cursor) \ - (((WT_CURSOR_TABLE *)cursor)->cg_cursors[0]) + (((WT_CURSOR_TABLE *)(cursor))->cg_cursors[0]) #define WT_CURSOR_RECNO(cursor) WT_STREQ((cursor)->key_format, "r") @@ -550,4 +550,4 @@ struct __wt_cursor_table { } while (0) #define WT_CURSOR_RAW_OK \ - WT_CURSTD_DUMP_HEX | WT_CURSTD_DUMP_PRINT | WT_CURSTD_RAW + (WT_CURSTD_DUMP_HEX | WT_CURSTD_DUMP_PRINT | WT_CURSTD_RAW) diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i index c3fcef9a13d..12044e0e228 100644 --- a/src/third_party/wiredtiger/src/include/cursor.i +++ b/src/third_party/wiredtiger/src/include/cursor.i @@ -76,34 +76,19 @@ __cursor_leave(WT_SESSION_IMPL *session) } /* - * __curfile_enter -- - * Activate a file cursor. - */ -static inline int -__curfile_enter(WT_CURSOR_BTREE *cbt) -{ - WT_SESSION_IMPL *session; - - session = (WT_SESSION_IMPL *)cbt->iface.session; - - if (!F_ISSET(cbt, WT_CBT_NO_TXN)) - WT_RET(__cursor_enter(session)); - F_SET(cbt, WT_CBT_ACTIVE); - return (0); -} - -/* - * __curfile_leave -- - * Clear a file cursor's position. + * __cursor_reset -- + * Reset the cursor, it no longer holds any position. */ static inline int -__curfile_leave(WT_CURSOR_BTREE *cbt) +__cursor_reset(WT_CURSOR_BTREE *cbt) { WT_DECL_RET; WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)cbt->iface.session; + __cursor_pos_clear(cbt); + /* If the cursor was active, deactivate it. */ if (F_ISSET(cbt, WT_CBT_ACTIVE)) { if (!F_ISSET(cbt, WT_CBT_NO_TXN)) @@ -111,12 +96,15 @@ __curfile_leave(WT_CURSOR_BTREE *cbt) F_CLR(cbt, WT_CBT_ACTIVE); } + /* If we're not holding a cursor reference, we're done. */ + if (cbt->ref == NULL) + return (0); + /* * If we were scanning and saw a lot of deleted records on this page, * try to evict the page when we release it. */ - if (cbt->ref != NULL && - cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD) + if (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD) __wt_page_evict_soon(session, cbt->ref); cbt->page_deleted_count = 0; @@ -247,7 +235,7 @@ __cursor_func_init(WT_CURSOR_BTREE *cbt, bool reenter) #ifdef HAVE_DIAGNOSTIC __wt_cursor_key_order_reset(cbt); #endif - WT_RET(__curfile_leave(cbt)); + WT_RET(__cursor_reset(cbt)); } /* @@ -259,8 +247,12 @@ __cursor_func_init(WT_CURSOR_BTREE *cbt, bool reenter) /* If the transaction is idle, check that the cache isn't full. */ WT_RET(__wt_txn_idle_cache_check(session)); - if (!F_ISSET(cbt, WT_CBT_ACTIVE)) - WT_RET(__curfile_enter(cbt)); + /* Activate the file cursor. */ + if (!F_ISSET(cbt, WT_CBT_ACTIVE)) { + if (!F_ISSET(cbt, WT_CBT_NO_TXN)) + WT_RET(__cursor_enter(session)); + F_SET(cbt, WT_CBT_ACTIVE); + } /* * If this is an ordinary transactional cursor, make sure we are set up @@ -272,24 +264,6 @@ __cursor_func_init(WT_CURSOR_BTREE *cbt, bool reenter) } /* - * __cursor_reset -- - * Reset the cursor. - */ -static inline int -__cursor_reset(WT_CURSOR_BTREE *cbt) -{ - WT_DECL_RET; - - /* - * The cursor is leaving the API, and no longer holds any position, - * generally called to clean up the cursor after an error. - */ - ret = __curfile_leave(cbt); - __cursor_pos_clear(cbt); - return (ret); -} - -/* * __cursor_row_slot_return -- * Return a row-store leaf page slot's K/V pair. */ diff --git a/src/third_party/wiredtiger/src/include/dhandle.h b/src/third_party/wiredtiger/src/include/dhandle.h index 4f318e7bccf..8861e96112b 100644 --- a/src/third_party/wiredtiger/src/include/dhandle.h +++ b/src/third_party/wiredtiger/src/include/dhandle.h @@ -38,20 +38,20 @@ (((WT_CURSOR_BTREE *)((s)->meta_cursor))->btree->dhandle) #define WT_DHANDLE_ACQUIRE(dhandle) \ - (void)__wt_atomic_add32(&dhandle->session_ref, 1) + (void)__wt_atomic_add32(&(dhandle)->session_ref, 1) #define WT_DHANDLE_RELEASE(dhandle) \ - (void)__wt_atomic_sub32(&dhandle->session_ref, 1) + (void)__wt_atomic_sub32(&(dhandle)->session_ref, 1) #define WT_DHANDLE_NEXT(session, dhandle, head, field) do { \ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));\ - if (dhandle == NULL) \ - dhandle = TAILQ_FIRST(head); \ + if ((dhandle) == NULL) \ + (dhandle) = TAILQ_FIRST(head); \ else { \ WT_DHANDLE_RELEASE(dhandle); \ - dhandle = TAILQ_NEXT(dhandle, field); \ + (dhandle) = TAILQ_NEXT(dhandle, field); \ } \ - if (dhandle != NULL) \ + if ((dhandle) != NULL) \ WT_DHANDLE_ACQUIRE(dhandle); \ } while (0) diff --git a/src/third_party/wiredtiger/src/include/error.h b/src/third_party/wiredtiger/src/include/error.h index bbb7f989332..c338acb370f 100644 --- a/src/third_party/wiredtiger/src/include/error.h +++ b/src/third_party/wiredtiger/src/include/error.h @@ -67,14 +67,16 @@ int __ret; \ if ((__ret = (a)) != 0 && \ (__ret == WT_PANIC || \ - ret == 0 || ret == WT_DUPLICATE_KEY || ret == WT_NOTFOUND)) \ + ret == 0 || ret == WT_DUPLICATE_KEY || \ + ret == WT_NOTFOUND || ret == WT_RESTART)) \ ret = __ret; \ } while (0) #define WT_TRET_ERROR_OK(a, e) do { \ int __ret; \ if ((__ret = (a)) != 0 && __ret != (e) && \ (__ret == WT_PANIC || \ - ret == 0 || ret == WT_DUPLICATE_KEY || ret == WT_NOTFOUND)) \ + ret == 0 || ret == WT_DUPLICATE_KEY || \ + ret == WT_NOTFOUND || ret == WT_RESTART)) \ ret = __ret; \ } while (0) #define WT_TRET_NOTFOUND_OK(a) WT_TRET_ERROR_OK(a, WT_NOTFOUND) diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 19ad9a880df..55ba1bada7c 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -103,7 +103,7 @@ extern int __wt_btcur_reset(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((wa extern int __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_insert(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btcur_update_check(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_remove(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -126,6 +126,7 @@ extern int __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) extern void __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern bool __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_ref_out_int(WT_SESSION_IMPL *session, WT_REF *ref, bool rewrite) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_free_ref( WT_SESSION_IMPL *session, WT_REF *ref, int page_type, bool free_pages) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -133,10 +134,10 @@ extern void __wt_free_ref_index(WT_SESSION_IMPL *session, WT_PAGE *page, WT_PAGE extern void __wt_free_update_list(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_btree_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_root_ref_init(WT_REF *root_ref, WT_PAGE *root, bool is_recno) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_tree_open( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_btree_evictable(WT_SESSION_IMPL *session, bool on) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_btree_huffman_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -161,6 +162,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags #endif ) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern bool __wt_split_obsolete(WT_SESSION_IMPL *session, uint64_t split_gen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -288,7 +290,7 @@ extern int __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const extern int __wt_curds_open( WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_DATA_SOURCE *dsrc, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_curfile_next_random(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curfile_update_check(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_curfile_insert_check(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_curjoin_joined(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -402,8 +404,7 @@ extern int __wt_logop_row_truncate_unpack( WT_SESSION_IMPL *session, const uint8 extern int __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_txn_op_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_slot_switch( WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_slot_new(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_log_slot_switch(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced, bool *did_work) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_slot_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_slot_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -456,13 +457,14 @@ extern int __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) extern int __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_lsm_worker_stop(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_meta_apply_all(WT_SESSION_IMPL *session, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_meta_checkpoint(WT_SESSION_IMPL *session, const char *fname, const char *checkpoint, WT_CKPT *ckpt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_meta_checkpoint_last_name( WT_SESSION_IMPL *session, const char *fname, const char **namep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_meta_checkpoint_clear(WT_SESSION_IMPL *session, const char *fname) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_meta_ckptlist_get( WT_SESSION_IMPL *session, const char *fname, WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_meta_ckptlist_set(WT_SESSION_IMPL *session, const char *fname, WT_CKPT *ckptbase, WT_LSN *ckptlsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT *ckptbase) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_meta_checkpoint_free(WT_SESSION_IMPL *session, WT_CKPT *ckpt) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_ext_metadata_insert(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_ext_metadata_remove( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/third_party/wiredtiger/src/include/extern_posix.h b/src/third_party/wiredtiger/src/include/extern_posix.h index fed7835ada1..57d94e392d1 100644 --- a/src/third_party/wiredtiger/src/include/extern_posix.h +++ b/src/third_party/wiredtiger/src/include/extern_posix.h @@ -24,8 +24,9 @@ extern bool __wt_has_priv(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden") extern void __wt_stream_set_line_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_stream_set_no_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_yield(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); diff --git a/src/third_party/wiredtiger/src/include/extern_win.h b/src/third_party/wiredtiger/src/include/extern_win.h index 0bfc821c7a6..43127a0c79f 100644 --- a/src/third_party/wiredtiger/src/include/extern_win.h +++ b/src/third_party/wiredtiger/src/include/extern_win.h @@ -22,9 +22,10 @@ extern bool __wt_has_priv(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden") extern void __wt_stream_set_line_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_stream_set_no_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_to_utf16_string( WT_SESSION_IMPL *session, const char*utf8, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_to_utf8_string( WT_SESSION_IMPL *session, const wchar_t*wide, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/third_party/wiredtiger/src/include/flags.h b/src/third_party/wiredtiger/src/include/flags.h index c1fff920e3b..f26a45c68f5 100644 --- a/src/third_party/wiredtiger/src/include/flags.h +++ b/src/third_party/wiredtiger/src/include/flags.h @@ -6,19 +6,19 @@ #define WT_CONN_CACHE_POOL 0x00000001 #define WT_CONN_CKPT_SYNC 0x00000002 #define WT_CONN_CLOSING 0x00000004 -#define WT_CONN_EVICTION_RUN 0x00000008 -#define WT_CONN_IN_MEMORY 0x00000010 -#define WT_CONN_LAS_OPEN 0x00000020 -#define WT_CONN_LEAK_MEMORY 0x00000040 -#define WT_CONN_LOG_SERVER_RUN 0x00000080 +#define WT_CONN_CLOSING_NO_MORE_OPENS 0x00000008 +#define WT_CONN_EVICTION_RUN 0x00000010 +#define WT_CONN_IN_MEMORY 0x00000020 +#define WT_CONN_LAS_OPEN 0x00000040 +#define WT_CONN_LEAK_MEMORY 0x00000080 #define WT_CONN_LSM_MERGE 0x00000100 #define WT_CONN_PANIC 0x00000200 #define WT_CONN_READONLY 0x00000400 #define WT_CONN_RECOVERING 0x00000800 #define WT_CONN_SERVER_ASYNC 0x00001000 #define WT_CONN_SERVER_CHECKPOINT 0x00002000 -#define WT_CONN_SERVER_LSM 0x00004000 -#define WT_CONN_SERVER_RUN 0x00008000 +#define WT_CONN_SERVER_LOG 0x00004000 +#define WT_CONN_SERVER_LSM 0x00008000 #define WT_CONN_SERVER_STATISTICS 0x00010000 #define WT_CONN_SERVER_SWEEP 0x00020000 #define WT_CONN_WAS_BACKUP 0x00040000 diff --git a/src/third_party/wiredtiger/src/include/intpack.i b/src/third_party/wiredtiger/src/include/intpack.i index e8bea58cede..a534de9d9a8 100644 --- a/src/third_party/wiredtiger/src/include/intpack.i +++ b/src/third_party/wiredtiger/src/include/intpack.i @@ -59,21 +59,21 @@ /* Count the leading zero bytes. */ #if defined(__GNUC__) #define WT_LEADING_ZEROS(x, i) \ - (i = (x == 0) ? (int)sizeof(x) : __builtin_clzll(x) >> 3) + ((i) = ((x) == 0) ? (int)sizeof(x) : __builtin_clzll(x) >> 3) #elif defined(_MSC_VER) #define WT_LEADING_ZEROS(x, i) do { \ - if (x == 0) i = (int)sizeof(x); \ + if ((x) == 0) (i) = (int)sizeof(x); \ else { \ unsigned long __index; \ _BitScanReverse64(&__index, x); \ __index = 63 ^ __index; \ - i = (int)(__index >> 3); } \ + (i) = (int)(__index >> 3); } \ } while (0) #else #define WT_LEADING_ZEROS(x, i) do { \ uint64_t __x = (x); \ uint64_t __m = (uint64_t)0xff << 56; \ - for (i = 0; !(__x & __m) && i != 8; i++) \ + for ((i) = 0; !(__x & __m) && (i) != 8; (i)++) \ __m >>= 8; \ } while (0) #endif @@ -231,7 +231,8 @@ __wt_vpack_int(uint8_t **pp, size_t maxlen, int64_t x) if (x < NEG_2BYTE_MIN) { *p = NEG_MULTI_MARKER; return (__wt_vpack_negint(pp, maxlen, (uint64_t)x)); - } else if (x < NEG_1BYTE_MIN) { + } + if (x < NEG_1BYTE_MIN) { WT_SIZE_CHECK_PACK(2, maxlen); x -= NEG_2BYTE_MIN; *p++ = NEG_2BYTE_MARKER | GET_BITS(x, 13, 8); @@ -358,12 +359,10 @@ __wt_vsize_uint(uint64_t x) { if (x <= POS_1BYTE_MAX) return (1); - else if (x <= POS_2BYTE_MAX + 1) { + if (x <= POS_2BYTE_MAX + 1) return (2); - } else { - x -= POS_2BYTE_MAX + 1; - return (__wt_vsize_posint(x)); - } + x -= POS_2BYTE_MAX + 1; + return (__wt_vsize_posint(x)); } /* @@ -373,13 +372,12 @@ __wt_vsize_uint(uint64_t x) static inline size_t __wt_vsize_int(int64_t x) { - if (x < NEG_2BYTE_MIN) { + if (x < NEG_2BYTE_MIN) return (__wt_vsize_negint((uint64_t)x)); - } else if (x < NEG_1BYTE_MIN) { + if (x < NEG_1BYTE_MIN) return (2); - } else if (x < 0) { + if (x < 0) return (1); - } else - /* For non-negative values, use the unsigned code above. */ - return (__wt_vsize_uint((uint64_t)x)); + /* For non-negative values, use the unsigned code above. */ + return (__wt_vsize_uint((uint64_t)x)); } diff --git a/src/third_party/wiredtiger/src/include/lint.h b/src/third_party/wiredtiger/src/include/lint.h index e20a83144ee..2d0f47988b7 100644 --- a/src/third_party/wiredtiger/src/include/lint.h +++ b/src/third_party/wiredtiger/src/include/lint.h @@ -29,9 +29,9 @@ __wt_atomic_fetch_add##name(type *vp, type v) \ { \ type orig; \ \ - old = *vp; \ + orig = *vp; \ *vp += v; \ - return (old); \ + return (orig); \ } \ static inline ret \ __wt_atomic_store##name(type *vp, type v) \ @@ -40,7 +40,7 @@ __wt_atomic_store##name(type *vp, type v) \ \ orig = *vp; \ *vp = v; \ - return (old); \ + return (orig); \ } \ static inline ret \ __wt_atomic_sub##name(type *vp, type v) \ @@ -49,9 +49,9 @@ __wt_atomic_sub##name(type *vp, type v) \ return (*vp); \ } \ static inline bool \ -__wt_atomic_cas##name(type *vp, type old, type new) \ +__wt_atomic_cas##name(type *vp, type orig, type new) \ { \ - if (*vp == old) { \ + if (*vp == orig) { \ *vp = new; \ return (true); \ } \ @@ -75,8 +75,8 @@ WT_ATOMIC_FUNC(size, size_t, size_t) * Pointer compare and swap. */ static inline bool -__wt_atomic_cas_ptr(void *vp, void *old, void *new) { - if (*(void **)vp == old) { +__wt_atomic_cas_ptr(void *vp, void *orig, void *new) { + if (*(void **)vp == orig) { *(void **)vp = new; return (true); } diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h index a6be3582b4d..fb3c961417f 100644 --- a/src/third_party/wiredtiger/src/include/log.h +++ b/src/third_party/wiredtiger/src/include/log.h @@ -86,8 +86,8 @@ union __wt_lsn { * The high bit is reserved for the special states. If the high bit is * set (WT_LOG_SLOT_RESERVED) then we are guaranteed to be in a special state. */ -#define WT_LOG_SLOT_FREE -1 /* Not in use */ -#define WT_LOG_SLOT_WRITTEN -2 /* Slot data written, not processed */ +#define WT_LOG_SLOT_FREE (-1) /* Not in use */ +#define WT_LOG_SLOT_WRITTEN (-2) /* Slot data written, not processed */ /* * We allocate the buffer size, but trigger a slot switch when we cross @@ -144,8 +144,8 @@ union __wt_lsn { /* Slot is in use, but closed to new joins */ #define WT_LOG_SLOT_CLOSED(state) \ (WT_LOG_SLOT_ACTIVE(state) && \ - (FLD64_ISSET((uint64_t)state, WT_LOG_SLOT_CLOSE) && \ - !FLD64_ISSET((uint64_t)state, WT_LOG_SLOT_RESERVED))) + (FLD64_ISSET((uint64_t)(state), WT_LOG_SLOT_CLOSE) && \ + !FLD64_ISSET((uint64_t)(state), WT_LOG_SLOT_RESERVED))) /* Slot is in use, all data copied into buffer */ #define WT_LOG_SLOT_INPROGRESS(state) \ (WT_LOG_SLOT_RELEASED(state) != WT_LOG_SLOT_JOINED(state)) @@ -185,7 +185,7 @@ struct __wt_logslot { #define WT_WITH_SLOT_LOCK(session, log, op) do { \ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SLOT)); \ WT_WITH_LOCK_WAIT(session, \ - &log->log_slot_lock, WT_SESSION_LOCKED_SLOT, op); \ + &(log)->log_slot_lock, WT_SESSION_LOCKED_SLOT, op); \ } while (0) struct __wt_myslot { @@ -193,7 +193,8 @@ struct __wt_myslot { wt_off_t end_offset; /* My end offset in buffer */ wt_off_t offset; /* Slot buffer offset */ #define WT_MYSLOT_CLOSE 0x01 /* This thread is closing the slot */ -#define WT_MYSLOT_UNBUFFERED 0x02 /* Write directly */ +#define WT_MYSLOT_NEEDS_RELEASE 0x02 /* This thread is releasing the slot */ +#define WT_MYSLOT_UNBUFFERED 0x04 /* Write directly */ uint32_t flags; /* Flags */ }; diff --git a/src/third_party/wiredtiger/src/include/lsm.h b/src/third_party/wiredtiger/src/include/lsm.h index 2bbb813bad2..e3f6897ef9d 100644 --- a/src/third_party/wiredtiger/src/include/lsm.h +++ b/src/third_party/wiredtiger/src/include/lsm.h @@ -23,11 +23,14 @@ struct __wt_lsm_worker_cookie { struct __wt_lsm_worker_args { WT_SESSION_IMPL *session; /* Session */ WT_CONDVAR *work_cond; /* Owned by the manager */ + wt_thread_t tid; /* Thread id */ + bool tid_set; /* Thread id set */ + u_int id; /* My manager slot id */ uint32_t type; /* Types of operations handled */ -#define WT_LSM_WORKER_RUN 0x01 - uint32_t flags; /* Worker flags */ + + volatile bool running; /* Worker is running */ }; /* @@ -162,6 +165,9 @@ struct __wt_lsm_manager { #define WT_LSM_MAX_WORKERS 20 #define WT_LSM_MIN_WORKERS 3 WT_LSM_WORKER_ARGS lsm_worker_cookies[WT_LSM_MAX_WORKERS]; + +#define WT_LSM_MANAGER_SHUTDOWN 0x01 /* Manager has shut down */ + uint32_t flags; }; /* diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h index 66d43496e93..9161a215fdc 100644 --- a/src/third_party/wiredtiger/src/include/misc.h +++ b/src/third_party/wiredtiger/src/include/misc.h @@ -63,7 +63,7 @@ #define WT_MAX(a, b) ((a) < (b) ? (b) : (a)) /* Elements in an array. */ -#define WT_ELEMENTS(a) (sizeof(a) / sizeof(a[0])) +#define WT_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) /* 10 level skip lists, 1/4 have a link to the next element. */ #define WT_SKIP_MAXDEPTH 10 @@ -140,6 +140,7 @@ #define F_CLR(p, mask) FLD_CLR((p)->flags, mask) #define F_ISSET(p, mask) FLD_ISSET((p)->flags, mask) +#define F_ISSET_ALL(p, mask) (FLD_MASK((p)->flags, mask) == (mask)) #define F_MASK(p, mask) FLD_MASK((p)->flags, mask) #define F_SET(p, mask) FLD_SET((p)->flags, mask) @@ -180,14 +181,14 @@ */ #define WT_BINARY_SEARCH(key, arrayp, n, found) do { \ uint32_t __base, __indx, __limit; \ - found = false; \ + (found) = false; \ for (__base = 0, __limit = (n); __limit != 0; __limit >>= 1) { \ __indx = __base + (__limit >> 1); \ - if ((arrayp)[__indx] < key) { \ + if ((arrayp)[__indx] < (key)) { \ __base = __indx + 1; \ --__limit; \ - } else if ((arrayp)[__indx] == key) { \ - found = true; \ + } else if ((arrayp)[__indx] == (key)) { \ + (found) = true; \ break; \ } \ } \ @@ -206,8 +207,8 @@ /* Check if a string matches a prefix. */ #define WT_PREFIX_MATCH(str, pfx) \ - (((const char *)(str))[0] == ((const char *)pfx)[0] && \ - strncmp((str), (pfx), strlen(pfx)) == 0) + (((const char *)(str))[0] == ((const char *)(pfx))[0] && \ + strncmp(str, pfx, strlen(pfx)) == 0) /* Check if a string matches a prefix, and move past it. */ #define WT_PREFIX_SKIP(str, pfx) \ @@ -224,8 +225,8 @@ /* Check if a string matches a byte string of len bytes. */ #define WT_STRING_MATCH(str, bytes, len) \ - (((const char *)str)[0] == ((const char *)bytes)[0] && \ - strncmp(str, bytes, len) == 0 && (str)[(len)] == '\0') + (((const char *)(str))[0] == ((const char *)(bytes))[0] && \ + strncmp(str, bytes, len) == 0 && (str)[len] == '\0') /* * Macro that produces a string literal that isn't wrapped in quotes, to avoid diff --git a/src/third_party/wiredtiger/src/include/misc.i b/src/third_party/wiredtiger/src/include/misc.i index d5692a3f9cf..7040886cf82 100644 --- a/src/third_party/wiredtiger/src/include/misc.i +++ b/src/third_party/wiredtiger/src/include/misc.i @@ -86,3 +86,94 @@ __wt_verbose(WT_SESSION_IMPL *session, int flag, const char *fmt, ...) WT_UNUSED(fmt); #endif } + +/* + * __wt_snprintf -- + * snprintf convenience function, ignoring the returned size. + */ +static inline int +__wt_snprintf(char *buf, size_t size, const char *fmt, ...) + WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4))) +{ + WT_DECL_RET; + size_t len; + va_list ap; + + len = 0; + + va_start(ap, fmt); + ret = __wt_vsnprintf_len_incr(buf, size, &len, fmt, ap); + va_end(ap); + WT_RET(ret); + + /* It's an error if the buffer couldn't hold everything. */ + return (len >= size ? ERANGE : 0); +} + +/* + * __wt_vsnprintf -- + * vsnprintf convenience function, ignoring the returned size. + */ +static inline int +__wt_vsnprintf(char *buf, size_t size, const char *fmt, va_list ap) +{ + size_t len; + + len = 0; + + WT_RET(__wt_vsnprintf_len_incr(buf, size, &len, fmt, ap)); + + /* It's an error if the buffer couldn't hold everything. */ + return (len >= size ? ERANGE : 0); +} + +/* + * __wt_snprintf_len_set -- + * snprintf convenience function, setting the returned size. + */ +static inline int +__wt_snprintf_len_set( + char *buf, size_t size, size_t *retsizep, const char *fmt, ...) + WT_GCC_FUNC_ATTRIBUTE((format (printf, 4, 5))) +{ + WT_DECL_RET; + va_list ap; + + *retsizep = 0; + + va_start(ap, fmt); + ret = __wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap); + va_end(ap); + return (ret); +} + +/* + * __wt_vsnprintf_len_set -- + * vsnprintf convenience function, setting the returned size. + */ +static inline int +__wt_vsnprintf_len_set( + char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) +{ + *retsizep = 0; + + return (__wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap)); +} + +/* + * __wt_snprintf_len_incr -- + * snprintf convenience function, incrementing the returned size. + */ +static inline int +__wt_snprintf_len_incr( + char *buf, size_t size, size_t *retsizep, const char *fmt, ...) + WT_GCC_FUNC_ATTRIBUTE((format (printf, 4, 5))) +{ + WT_DECL_RET; + va_list ap; + + va_start(ap, fmt); + ret = __wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap); + va_end(ap); + return (ret); +} diff --git a/src/third_party/wiredtiger/src/include/mutex.h b/src/third_party/wiredtiger/src/include/mutex.h index 06b8c4a3304..910eb7af5b9 100644 --- a/src/third_party/wiredtiger/src/include/mutex.h +++ b/src/third_party/wiredtiger/src/include/mutex.h @@ -62,31 +62,17 @@ union __wt_rwlock { /* Read/write lock */ #define SPINLOCK_PTHREAD_MUTEX 2 #define SPINLOCK_PTHREAD_MUTEX_ADAPTIVE 3 -#if SPINLOCK_TYPE == SPINLOCK_GCC - struct __wt_spinlock { WT_CACHE_LINE_PAD_BEGIN +#if SPINLOCK_TYPE == SPINLOCK_GCC volatile int lock; - - /* - * We track acquisitions and time spent waiting for some locks. For - * performance reasons and to make it possible to write generic code - * that tracks statistics for different locks, we store the offset - * of the statistics fields to be updated during lock acquisition. - */ - int16_t stat_count_off; /* acquisitions offset */ - int16_t stat_app_usecs_off; /* waiting application threads offset */ - int16_t stat_int_usecs_off; /* waiting server threads offset */ - WT_CACHE_LINE_PAD_END -}; - #elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\ SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE ||\ SPINLOCK_TYPE == SPINLOCK_MSVC - -struct __wt_spinlock { - WT_CACHE_LINE_PAD_BEGIN wt_mutex_t lock; +#else +#error Unknown spinlock type +#endif const char *name; /* Mutex name */ @@ -103,9 +89,3 @@ struct __wt_spinlock { int8_t initialized; /* Lock initialized, for cleanup */ WT_CACHE_LINE_PAD_END }; - -#else - -#error Unknown spinlock type - -#endif diff --git a/src/third_party/wiredtiger/src/include/mutex.i b/src/third_party/wiredtiger/src/include/mutex.i index 6b83cb280d3..2d483972ed2 100644 --- a/src/third_party/wiredtiger/src/include/mutex.i +++ b/src/third_party/wiredtiger/src/include/mutex.i @@ -14,6 +14,18 @@ * of instructions. */ +/* + * __spin_init_internal -- + * Initialize the WT portion of a spinlock. + */ +static inline void +__spin_init_internal(WT_SPINLOCK *t, const char *name) +{ + t->name = name; + t->stat_count_off = t->stat_app_usecs_off = t->stat_int_usecs_off = -1; + t->initialized = 1; +} + #if SPINLOCK_TYPE == SPINLOCK_GCC /* Default to spinning 1000 times before yielding. */ @@ -29,10 +41,9 @@ static inline int __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name) { WT_UNUSED(session); - WT_UNUSED(name); t->lock = 0; - t->stat_count_off = t->stat_app_usecs_off = t->stat_int_usecs_off = -1; + __spin_init_internal(t, name); return (0); } @@ -110,10 +121,7 @@ __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name) #else WT_RET(pthread_mutex_init(&t->lock, NULL)); #endif - - t->name = name; - t->stat_count_off = t->stat_app_usecs_off = t->stat_int_usecs_off = -1; - t->initialized = 1; + __spin_init_internal(t, name); WT_UNUSED(session); return (0); @@ -195,8 +203,7 @@ __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name) return (__wt_map_windows_error(windows_error)); } - t->name = name; - t->initialized = 1; + __spin_init_internal(t, name); return (0); } @@ -316,6 +323,6 @@ __wt_spin_trylock_track(WT_SESSION_IMPL *session, WT_SPINLOCK *t) stats = (int64_t **)S2C(session)->stats; stats[session->stat_bucket][t->stat_count_off]++; return (0); - } else - return (__wt_spin_trylock(session, t)); + } + return (__wt_spin_trylock(session, t)); } diff --git a/src/third_party/wiredtiger/src/include/os.h b/src/third_party/wiredtiger/src/include/os.h index 7a8e47ed81f..73d89268392 100644 --- a/src/third_party/wiredtiger/src/include/os.h +++ b/src/third_party/wiredtiger/src/include/os.h @@ -11,8 +11,14 @@ * A call returning 0 indicates success; any call where \ * 0 is not the only successful return must provide an \ * expression evaluating to 0 in all successful cases. \ + * \ + * XXX \ + * Casting the call's return to int is because CentOS 7.3.1611 \ + * complains about syscall returning a long and the loss of \ + * integer precision in the assignment to ret. The cast should \ + * be a no-op everywhere. \ */ \ - if (((ret) = (call)) == 0) \ + if (((ret) = (int)(call)) == 0) \ break; \ /* \ * The call's error was either returned by the call or \ @@ -61,7 +67,7 @@ #define WT_TIMECMP(t1, t2) \ ((t1).tv_sec < (t2).tv_sec ? -1 : \ - (t1).tv_sec == (t2.tv_sec) ? \ + (t1).tv_sec == (t2).tv_sec ? \ (t1).tv_nsec < (t2).tv_nsec ? -1 : \ (t1).tv_nsec == (t2).tv_nsec ? 0 : 1 : 1) diff --git a/src/third_party/wiredtiger/src/include/os_windows.h b/src/third_party/wiredtiger/src/include/os_windows.h index 65938ac9f17..c1e5f788dc6 100644 --- a/src/third_party/wiredtiger/src/include/os_windows.h +++ b/src/third_party/wiredtiger/src/include/os_windows.h @@ -43,16 +43,6 @@ typedef uint32_t u_int; typedef unsigned char u_char; typedef uint64_t u_long; -/* <= VS 2013 is not C99 compat */ -#if _MSC_VER < 1900 -#define snprintf _wt_snprintf - -_Check_return_opt_ int __cdecl _wt_snprintf( - _Out_writes_(_MaxCount) char * _DstBuf, - _In_ size_t _MaxCount, - _In_z_ _Printf_format_string_ const char * _Format, ...); -#endif - /* * Windows does have ssize_t * Python headers declare also though so we need to guard it @@ -61,18 +51,6 @@ _Check_return_opt_ int __cdecl _wt_snprintf( typedef int ssize_t; #endif -/* - * Provide a custom version of vsnprintf that returns the - * needed buffer length instead of -1 on truncation - */ -#define vsnprintf _wt_vsnprintf - -_Check_return_opt_ int __cdecl _wt_vsnprintf( - _Out_writes_(_MaxCount) char * _DstBuf, - _In_ size_t _MaxCount, - _In_z_ _Printf_format_string_ const char * _Format, - va_list _ArgList); - /* Provide a custom version of localtime_r */ struct tm *localtime_r(const time_t* timer, struct tm* result); diff --git a/src/third_party/wiredtiger/src/include/packing.i b/src/third_party/wiredtiger/src/include/packing.i index 8ba3dd536ac..0eadb2f2027 100644 --- a/src/third_party/wiredtiger/src/include/packing.i +++ b/src/third_party/wiredtiger/src/include/packing.i @@ -104,8 +104,8 @@ __pack_name_next(WT_PACK_NAME *pn, WT_CONFIG_ITEM *name) WT_CONFIG_ITEM ignore; if (pn->genname) { - (void)snprintf(pn->buf, sizeof(pn->buf), - (pn->iskey ? "key%d" : "value%d"), pn->count); + WT_RET(__wt_snprintf(pn->buf, sizeof(pn->buf), + (pn->iskey ? "key%d" : "value%d"), pn->count)); WT_CLEAR(*name); name->str = pn->buf; name->len = strlen(pn->buf); @@ -198,7 +198,7 @@ next: if (pack->cur == pack->end) return (0); default: WT_RET_MSG(pack->session, EINVAL, - "Invalid type '%c' found in format '%.*s'", + "Invalid type '%c' found in format '%.*s'", pv->type, (int)(pack->end - pack->orig), pack->orig); } @@ -206,43 +206,43 @@ next: if (pack->cur == pack->end) #define WT_PACK_GET(session, pv, ap) do { \ WT_ITEM *__item; \ - switch (pv.type) { \ + switch ((pv).type) { \ case 'x': \ break; \ case 's': \ case 'S': \ - pv.u.s = va_arg(ap, const char *); \ + (pv).u.s = va_arg(ap, const char *); \ break; \ case 'U': \ case 'u': \ __item = va_arg(ap, WT_ITEM *); \ - pv.u.item.data = __item->data; \ - pv.u.item.size = __item->size; \ + (pv).u.item.data = __item->data; \ + (pv).u.item.size = __item->size; \ break; \ case 'b': \ case 'h': \ case 'i': \ - pv.u.i = va_arg(ap, int); \ + (pv).u.i = va_arg(ap, int); \ break; \ case 'B': \ case 'H': \ case 'I': \ case 't': \ - pv.u.u = va_arg(ap, unsigned int); \ + (pv).u.u = va_arg(ap, unsigned int); \ break; \ case 'l': \ - pv.u.i = va_arg(ap, long); \ + (pv).u.i = va_arg(ap, long); \ break; \ case 'L': \ - pv.u.u = va_arg(ap, unsigned long); \ + (pv).u.u = va_arg(ap, unsigned long); \ break; \ case 'q': \ - pv.u.i = va_arg(ap, int64_t); \ + (pv).u.i = va_arg(ap, int64_t); \ break; \ case 'Q': \ case 'r': \ case 'R': \ - pv.u.u = va_arg(ap, uint64_t); \ + (pv).u.u = va_arg(ap, uint64_t); \ break; \ /* User format strings have already been validated. */ \ WT_ILLEGAL_VALUE(session); \ @@ -556,47 +556,47 @@ __unpack_read(WT_SESSION_IMPL *session, #define WT_UNPACK_PUT(session, pv, ap) do { \ WT_ITEM *__item; \ - switch (pv.type) { \ + switch ((pv).type) { \ case 'x': \ break; \ case 's': \ case 'S': \ - *va_arg(ap, const char **) = pv.u.s; \ + *va_arg(ap, const char **) = (pv).u.s; \ break; \ case 'U': \ case 'u': \ __item = va_arg(ap, WT_ITEM *); \ - __item->data = pv.u.item.data; \ - __item->size = pv.u.item.size; \ + __item->data = (pv).u.item.data; \ + __item->size = (pv).u.item.size; \ break; \ case 'b': \ - *va_arg(ap, int8_t *) = (int8_t)pv.u.i; \ + *va_arg(ap, int8_t *) = (int8_t)(pv).u.i; \ break; \ case 'h': \ - *va_arg(ap, int16_t *) = (short)pv.u.i; \ + *va_arg(ap, int16_t *) = (short)(pv).u.i; \ break; \ case 'i': \ case 'l': \ - *va_arg(ap, int32_t *) = (int32_t)pv.u.i; \ + *va_arg(ap, int32_t *) = (int32_t)(pv).u.i; \ break; \ case 'q': \ - *va_arg(ap, int64_t *) = pv.u.i; \ + *va_arg(ap, int64_t *) = (pv).u.i; \ break; \ case 'B': \ case 't': \ - *va_arg(ap, uint8_t *) = (uint8_t)pv.u.u; \ + *va_arg(ap, uint8_t *) = (uint8_t)(pv).u.u; \ break; \ case 'H': \ - *va_arg(ap, uint16_t *) = (uint16_t)pv.u.u; \ + *va_arg(ap, uint16_t *) = (uint16_t)(pv).u.u; \ break; \ case 'I': \ case 'L': \ - *va_arg(ap, uint32_t *) = (uint32_t)pv.u.u; \ + *va_arg(ap, uint32_t *) = (uint32_t)(pv).u.u; \ break; \ case 'Q': \ case 'r': \ case 'R': \ - *va_arg(ap, uint64_t *) = pv.u.u; \ + *va_arg(ap, uint64_t *) = (pv).u.u; \ break; \ /* User format strings have already been validated. */ \ WT_ILLEGAL_VALUE(session); \ diff --git a/src/third_party/wiredtiger/src/include/schema.h b/src/third_party/wiredtiger/src/include/schema.h index 9a6e1e54e80..50e141d9921 100644 --- a/src/third_party/wiredtiger/src/include/schema.h +++ b/src/third_party/wiredtiger/src/include/schema.h @@ -107,10 +107,11 @@ struct __wt_table { * Acquire a lock if available, perform an operation, drop the lock. */ #define WT_WITH_LOCK_NOWAIT(session, ret, lock, flag, op) do { \ - ret = 0; \ + (ret) = 0; \ if (F_ISSET(session, (flag))) { \ op; \ - } else if ((ret = __wt_spin_trylock_track(session, lock)) == 0) {\ + } else if (((ret) = \ + __wt_spin_trylock_track(session, lock)) == 0) { \ F_SET(session, (flag)); \ op; \ F_CLR(session, (flag)); \ @@ -248,7 +249,7 @@ struct __wt_table { WT_SESSION_LOCKED_HANDLE_LIST)); \ if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)) { \ op; \ - } else if ((ret = __wt_try_writelock(session, \ + } else if (((ret) = __wt_try_writelock(session, \ &S2C(session)->table_lock)) == 0) { \ F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \ op; \ diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h index 085f871a34f..674e92671b1 100644 --- a/src/third_party/wiredtiger/src/include/session.h +++ b/src/third_party/wiredtiger/src/include/session.h @@ -67,7 +67,6 @@ struct __wt_session_impl { TAILQ_HEAD(__dhandles, __wt_data_handle_cache) dhandles; time_t last_sweep; /* Last sweep for dead handles */ - WT_CURSOR *cursor; /* Current cursor */ /* Cursors closed with the session */ TAILQ_HEAD(__cursors, __wt_cursor) cursors; @@ -88,7 +87,7 @@ struct __wt_session_impl { void *meta_track_sub; /* Child transaction / save point */ size_t meta_track_alloc; /* Currently allocated */ int meta_track_nest; /* Nesting level of meta transaction */ -#define WT_META_TRACKING(session) (session->meta_track_next != NULL) +#define WT_META_TRACKING(session) ((session)->meta_track_next != NULL) /* * Each session keeps a cache of table handles. The set of handles @@ -151,20 +150,16 @@ struct __wt_session_impl { uint32_t flags; /* - * The split stash memory and hazard information persist past session - * close because they are accessed by threads of control other than the - * thread owning the session. - * + * All of the following fields live at the end of the structure so it's + * easier to clear everything but the fields that persist. + */ +#define WT_SESSION_CLEAR_SIZE (offsetof(WT_SESSION_IMPL, rnd)) + + /* * The random number state persists past session close because we don't - * want to repeatedly allocate repeated values for skiplist depth if the + * want to repeatedly use the same values for skiplist depth when the * application isn't caching sessions. - * - * All of these fields live at the end of the structure so it's easier - * to clear everything but the fields that persist. */ -#define WT_SESSION_CLEAR_SIZE(s) \ - (WT_PTRDIFF(&(s)->rnd, s)) - WT_RAND_STATE rnd; /* Random number generation state */ /* Hashed handle reference list array */ @@ -173,6 +168,9 @@ struct __wt_session_impl { TAILQ_HEAD(__tables_hash, __wt_table) *tablehash; /* + * Split stash memory persists past session close because it's accessed + * by threads of control other than the thread owning the session. + * * Splits can "free" memory that may still be in use, and we use a * split generation number to track it, that is, the session stores a * reference to the memory and allocates a split generation; when no @@ -192,6 +190,9 @@ struct __wt_session_impl { /* * Hazard pointers. * + * Hazard information persists past session close because it's accessed + * by threads of control other than the thread owning the session. + * * Use the non-NULL state of the hazard field to know if the session has * previously been initialized. */ diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index 8b2e78a4ed5..6c274484bcb 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -72,7 +72,7 @@ * and the session ID is a small, monotonically increasing number. */ #define WT_STATS_SLOT_ID(session) \ - ((session)->id) % WT_COUNTER_SLOTS + (((session)->id) % WT_COUNTER_SLOTS) /* * Statistic structures are arrays of int64_t's. We have functions to read/write @@ -318,6 +318,7 @@ struct __wt_connection_stats { int64_t cache_eviction_force_fail; int64_t cache_eviction_walks_active; int64_t cache_eviction_walks_started; + int64_t cache_eviction_force_retune; int64_t cache_eviction_hazard; int64_t cache_hazard_checks; int64_t cache_hazard_walks; @@ -404,9 +405,11 @@ struct __wt_connection_stats { int64_t lock_table_wait_internal; int64_t log_slot_switch_busy; int64_t log_slot_closes; + int64_t log_slot_active_closed; int64_t log_slot_races; int64_t log_slot_transitions; int64_t log_slot_joins; + int64_t log_slot_no_free_slots; int64_t log_slot_unbuffered; int64_t log_bytes_payload; int64_t log_bytes_written; diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index 0cc4a6f8439..314c948e4d1 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -125,7 +125,8 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session) * minimum of it with the oldest ID, which is what we want. */ oldest_id = txn_global->oldest_id; - include_checkpoint_txn = btree == NULL || btree->include_checkpoint_txn; + include_checkpoint_txn = btree == NULL || + btree->checkpoint_gen != txn_global->checkpoint_gen; WT_READ_BARRIER(); checkpoint_pinned = txn_global->checkpoint_pinned; diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index c148e759299..ddecb2ac765 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -36,7 +36,7 @@ extern "C" { #if defined(DOXYGEN) || defined(SWIG) #define __F(func) func #else -#define __F(func) (*func) +#define __F(func) (*(func)) #endif #ifdef SWIG @@ -427,7 +427,7 @@ struct __wt_cursor { * * @param cursor the cursor handle * @errors - * In particular, if \c overwrite is not configured and a record with + * In particular, if \c overwrite=false is configured and a record with * the specified key already exists, ::WT_DUPLICATE_KEY is returned. * Also, if \c in_memory is configured for the database and the insert * requires more than the configured cache size to complete, @@ -452,7 +452,9 @@ struct __wt_cursor { * * On success, the cursor ends positioned at the modified record; to * minimize cursor resources, the WT_CURSOR::reset method should be - * called as soon as the cursor no longer needs that position. + * called as soon as the cursor no longer needs that position. (The + * WT_CURSOR::insert method never keeps a cursor position and may be + * more efficient for that reason.) * * The maximum length of a single column stored in a table is not fixed * (as it partially depends on the underlying file configuration), but @@ -460,7 +462,7 @@ struct __wt_cursor { * * @param cursor the cursor handle * @errors - * In particular, if \c overwrite is not configured and no record with + * In particular, if \c overwrite=false is configured and no record with * the specified key exists, ::WT_NOTFOUND is returned. * Also, if \c in_memory is configured for the database and the insert * requires more than the configured cache size to complete, @@ -477,8 +479,18 @@ struct __wt_cursor { * * @snippet ex_all.c Remove a record * - * If the cursor was not configured with "overwrite=true", the key must - * be set and the key's record must exist; the record will be removed. + * If the cursor was configured with "overwrite=false" (not the + * default), the key must be set and the key's record must exist; the + * record will be removed. + * + * Any cursor position does not change: if the cursor was positioned + * before the WT_CURSOR::remove call, the cursor remains positioned + * at the removed record; to minimize cursor resources, the + * WT_CURSOR::reset method should be called as soon as the cursor no + * longer needs that position. If the cursor was not positioned before + * the WT_CURSOR::remove call, the cursor ends with no position, and a + * subsequent call to the WT_CURSOR::next (WT_CURSOR::prev) method will + * iterate from the beginning (end) of the table. * * @snippet ex_all.c Remove a record and fail if DNE * @@ -486,14 +498,10 @@ struct __wt_cursor { * (that is, a store with an 'r' type key and 't' type value) is * identical to setting the record's value to 0. * - * On success, the cursor ends positioned at the removed record; to - * minimize cursor resources, the WT_CURSOR::reset method should be - * called as soon as the cursor no longer needs that position. - * * @param cursor the cursor handle * @errors - * In particular, if \c overwrite is not configured and no record with - * the specified key exists, ::WT_NOTFOUND is returned. + * In particular, if \c overwrite=false is configured and no record + * with the specified key exists, ::WT_NOTFOUND is returned. */ int __F(remove)(WT_CURSOR *cursor); /*! @} */ @@ -1234,8 +1242,8 @@ struct __wt_session { * @config{split_pct, the Btree page split size as a percentage of the * maximum Btree page size\, that is\, when a Btree page is split\, it * will be split into smaller pages\, where each page is the specified - * percentage of the maximum Btree page size., an integer between 25 and - * 100; default \c 75.} + * percentage of the maximum Btree page size., an integer between 50 and + * 100; default \c 90.} * @config{type, set the type of data source used to store a column * group\, index or simple table. By default\, a \c "file:" URI is * derived from the object name. The \c type configuration can be used @@ -1472,6 +1480,10 @@ struct __wt_session { * contains. * @snippet ex_all.c Truncate a range * + * Any specified cursors end with no position, and subsequent calls to + * the WT_CURSOR::next (WT_CURSOR::prev) method will iterate from the + * beginning (end) of the table. + * * @param session the session handle * @param name the URI of the file or table to truncate * @param start optional cursor marking the first record discarded; @@ -3065,27 +3077,27 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp); * transaction is in progress, it should be rolled back and the operation * retried in a new transaction. */ -#define WT_ROLLBACK -31800 +#define WT_ROLLBACK (-31800) /*! * Attempt to insert an existing key. * This error is generated when the application attempts to insert a record with * the same key as an existing record without the 'overwrite' configuration to * WT_SESSION::open_cursor. */ -#define WT_DUPLICATE_KEY -31801 +#define WT_DUPLICATE_KEY (-31801) /*! * Non-specific WiredTiger error. * This error is returned when an error is not covered by a specific error * return. */ -#define WT_ERROR -31802 +#define WT_ERROR (-31802) /*! * Item not found. * This error indicates an operation did not find a value to return. This * includes cursor search and other operations where no record matched the * cursor's search key such as WT_CURSOR::update or WT_CURSOR::remove. */ -#define WT_NOTFOUND -31803 +#define WT_NOTFOUND (-31803) /*! * WiredTiger library panic. * This error indicates an underlying problem that requires the application exit @@ -3093,17 +3105,17 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp); * returned from a WiredTiger interface, no further WiredTiger calls are * required. */ -#define WT_PANIC -31804 +#define WT_PANIC (-31804) /*! @cond internal */ /*! Restart the operation (internal). */ -#define WT_RESTART -31805 +#define WT_RESTART (-31805) /*! @endcond */ /*! * Recovery must be run to continue. * This error is generated when wiredtiger_open is configured to return an error * if recovery is required to use the database. */ -#define WT_RUN_RECOVERY -31806 +#define WT_RUN_RECOVERY (-31806) /*! * Operation would overflow cache. * This error is only generated when wiredtiger_open is configured to run in- @@ -3112,7 +3124,7 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp); * progress, it should be rolled back and the operation retried in a new * transaction. */ -#define WT_CACHE_FULL -31807 +#define WT_CACHE_FULL (-31807) /* * Error return section: END * DO NOT EDIT: automatically built by dist/api_err.py. @@ -4448,384 +4460,390 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1059 /*! cache: files with new eviction walks started */ #define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1060 +/*! cache: force re-tuning of eviction workers once in a while */ +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_RETUNE 1061 /*! cache: hazard pointer blocked page eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1061 +#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1062 /*! cache: hazard pointer check calls */ -#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1062 +#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1063 /*! cache: hazard pointer check entries walked */ -#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1063 +#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1064 /*! cache: hazard pointer maximum array length */ -#define WT_STAT_CONN_CACHE_HAZARD_MAX 1064 +#define WT_STAT_CONN_CACHE_HAZARD_MAX 1065 /*! cache: in-memory page passed criteria to be split */ -#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1065 +#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1066 /*! cache: in-memory page splits */ -#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1066 +#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1067 /*! cache: internal pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1067 +#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1068 /*! cache: internal pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1068 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1069 /*! cache: leaf pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1069 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1070 /*! cache: lookaside table insert calls */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1070 +#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1071 /*! cache: lookaside table remove calls */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1071 +#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1072 /*! cache: maximum bytes configured */ -#define WT_STAT_CONN_CACHE_BYTES_MAX 1072 +#define WT_STAT_CONN_CACHE_BYTES_MAX 1073 /*! cache: maximum page size at eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1073 +#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1074 /*! cache: modified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1074 +#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1075 /*! cache: modified pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1075 +#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1076 /*! cache: overflow pages read into cache */ -#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1076 +#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1077 /*! cache: overflow values cached in memory */ -#define WT_STAT_CONN_CACHE_OVERFLOW_VALUE 1077 +#define WT_STAT_CONN_CACHE_OVERFLOW_VALUE 1078 /*! cache: page split during eviction deepened the tree */ -#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1078 +#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1079 /*! cache: page written requiring lookaside records */ -#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1079 +#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1080 /*! cache: pages currently held in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_INUSE 1080 +#define WT_STAT_CONN_CACHE_PAGES_INUSE 1081 /*! cache: pages evicted because they exceeded the in-memory maximum */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1081 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1082 /*! cache: pages evicted because they had chains of deleted items */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1082 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1083 /*! cache: pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP 1083 +#define WT_STAT_CONN_CACHE_EVICTION_APP 1084 /*! cache: pages queued for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1084 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1085 /*! cache: pages queued for urgent eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1085 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1086 /*! cache: pages queued for urgent eviction during walk */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1086 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1087 /*! cache: pages read into cache */ -#define WT_STAT_CONN_CACHE_READ 1087 +#define WT_STAT_CONN_CACHE_READ 1088 /*! cache: pages read into cache requiring lookaside entries */ -#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1088 +#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1089 /*! cache: pages requested from the cache */ -#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1089 +#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1090 /*! cache: pages seen by eviction walk */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1090 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1091 /*! cache: pages selected for eviction unable to be evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1091 +#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1092 /*! cache: pages walked for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_WALK 1092 +#define WT_STAT_CONN_CACHE_EVICTION_WALK 1093 /*! cache: pages written from cache */ -#define WT_STAT_CONN_CACHE_WRITE 1093 +#define WT_STAT_CONN_CACHE_WRITE 1094 /*! cache: pages written requiring in-memory restoration */ -#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1094 +#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1095 /*! cache: percentage overhead */ -#define WT_STAT_CONN_CACHE_OVERHEAD 1095 +#define WT_STAT_CONN_CACHE_OVERHEAD 1096 /*! cache: tracked bytes belonging to internal pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1096 +#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1097 /*! cache: tracked bytes belonging to leaf pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_LEAF 1097 +#define WT_STAT_CONN_CACHE_BYTES_LEAF 1098 /*! cache: tracked dirty bytes in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1098 +#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1099 /*! cache: tracked dirty pages in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1099 +#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1100 /*! cache: unmodified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1100 +#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1101 /*! connection: auto adjusting condition resets */ -#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1101 +#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1102 /*! connection: auto adjusting condition wait calls */ -#define WT_STAT_CONN_COND_AUTO_WAIT 1102 +#define WT_STAT_CONN_COND_AUTO_WAIT 1103 /*! connection: files currently open */ -#define WT_STAT_CONN_FILE_OPEN 1103 +#define WT_STAT_CONN_FILE_OPEN 1104 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1104 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1105 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1105 +#define WT_STAT_CONN_MEMORY_FREE 1106 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1106 +#define WT_STAT_CONN_MEMORY_GROW 1107 /*! connection: pthread mutex condition wait calls */ -#define WT_STAT_CONN_COND_WAIT 1107 +#define WT_STAT_CONN_COND_WAIT 1108 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1108 +#define WT_STAT_CONN_RWLOCK_READ 1109 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1109 +#define WT_STAT_CONN_RWLOCK_WRITE 1110 /*! connection: total fsync I/Os */ -#define WT_STAT_CONN_FSYNC_IO 1110 +#define WT_STAT_CONN_FSYNC_IO 1111 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1111 +#define WT_STAT_CONN_READ_IO 1112 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1112 +#define WT_STAT_CONN_WRITE_IO 1113 /*! cursor: cursor create calls */ -#define WT_STAT_CONN_CURSOR_CREATE 1113 +#define WT_STAT_CONN_CURSOR_CREATE 1114 /*! cursor: cursor insert calls */ -#define WT_STAT_CONN_CURSOR_INSERT 1114 +#define WT_STAT_CONN_CURSOR_INSERT 1115 /*! cursor: cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT 1115 +#define WT_STAT_CONN_CURSOR_NEXT 1116 /*! cursor: cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV 1116 +#define WT_STAT_CONN_CURSOR_PREV 1117 /*! cursor: cursor remove calls */ -#define WT_STAT_CONN_CURSOR_REMOVE 1117 +#define WT_STAT_CONN_CURSOR_REMOVE 1118 /*! cursor: cursor reset calls */ -#define WT_STAT_CONN_CURSOR_RESET 1118 +#define WT_STAT_CONN_CURSOR_RESET 1119 /*! cursor: cursor restarted searches */ -#define WT_STAT_CONN_CURSOR_RESTART 1119 +#define WT_STAT_CONN_CURSOR_RESTART 1120 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1120 +#define WT_STAT_CONN_CURSOR_SEARCH 1121 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1121 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1122 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1122 +#define WT_STAT_CONN_CURSOR_UPDATE 1123 /*! cursor: truncate calls */ -#define WT_STAT_CONN_CURSOR_TRUNCATE 1123 +#define WT_STAT_CONN_CURSOR_TRUNCATE 1124 /*! data-handle: connection data handles currently active */ -#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1124 +#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1125 /*! data-handle: connection sweep candidate became referenced */ -#define WT_STAT_CONN_DH_SWEEP_REF 1125 +#define WT_STAT_CONN_DH_SWEEP_REF 1126 /*! data-handle: connection sweep dhandles closed */ -#define WT_STAT_CONN_DH_SWEEP_CLOSE 1126 +#define WT_STAT_CONN_DH_SWEEP_CLOSE 1127 /*! data-handle: connection sweep dhandles removed from hash list */ -#define WT_STAT_CONN_DH_SWEEP_REMOVE 1127 +#define WT_STAT_CONN_DH_SWEEP_REMOVE 1128 /*! data-handle: connection sweep time-of-death sets */ -#define WT_STAT_CONN_DH_SWEEP_TOD 1128 +#define WT_STAT_CONN_DH_SWEEP_TOD 1129 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_SWEEPS 1129 +#define WT_STAT_CONN_DH_SWEEPS 1130 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1130 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1131 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1131 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1132 /*! lock: checkpoint lock acquisitions */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1132 +#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1133 /*! lock: checkpoint lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1133 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1134 /*! lock: checkpoint lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1134 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1135 /*! lock: handle-list lock eviction thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_EVICTION 1135 +#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_EVICTION 1136 /*! lock: metadata lock acquisitions */ -#define WT_STAT_CONN_LOCK_METADATA_COUNT 1136 +#define WT_STAT_CONN_LOCK_METADATA_COUNT 1137 /*! lock: metadata lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1137 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1138 /*! lock: metadata lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1138 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1139 /*! lock: schema lock acquisitions */ -#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1139 +#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1140 /*! lock: schema lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1140 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1141 /*! lock: schema lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1141 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1142 /*! lock: table lock acquisitions */ -#define WT_STAT_CONN_LOCK_TABLE_COUNT 1142 +#define WT_STAT_CONN_LOCK_TABLE_COUNT 1143 /*! * lock: table lock application thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1143 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1144 /*! * lock: table lock internal thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1144 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1145 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1145 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1146 /*! log: consolidated slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1146 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1147 +/*! log: consolidated slot join active slot closed */ +#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1148 /*! log: consolidated slot join races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1147 +#define WT_STAT_CONN_LOG_SLOT_RACES 1149 /*! log: consolidated slot join transitions */ -#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1148 +#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1150 /*! log: consolidated slot joins */ -#define WT_STAT_CONN_LOG_SLOT_JOINS 1149 +#define WT_STAT_CONN_LOG_SLOT_JOINS 1151 +/*! log: consolidated slot transitions unable to find free slot */ +#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1152 /*! log: consolidated slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1150 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1153 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1151 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1154 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1152 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1155 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1153 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1156 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1154 +#define WT_STAT_CONN_LOG_FLUSH 1157 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1155 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1158 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1156 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1159 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1157 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1160 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1158 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1161 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1159 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1162 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1160 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1163 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1161 +#define WT_STAT_CONN_LOG_SCANS 1164 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1162 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1165 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1163 +#define WT_STAT_CONN_LOG_WRITE_LSN 1166 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1164 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1167 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1165 +#define WT_STAT_CONN_LOG_SYNC 1168 /*! log: log sync time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DURATION 1166 +#define WT_STAT_CONN_LOG_SYNC_DURATION 1169 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1167 +#define WT_STAT_CONN_LOG_SYNC_DIR 1170 /*! log: log sync_dir time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1168 +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1171 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1169 +#define WT_STAT_CONN_LOG_WRITES 1172 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1170 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1173 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1171 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1174 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1172 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1175 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1173 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1176 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1174 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1177 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1175 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1178 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1176 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1179 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1177 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1180 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1178 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1181 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1179 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1182 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1180 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1183 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1181 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1184 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1182 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1185 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1183 +#define WT_STAT_CONN_REC_PAGES 1186 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1184 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1187 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1185 +#define WT_STAT_CONN_REC_PAGE_DELETE 1188 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1186 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1189 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1187 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1190 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1188 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1191 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1189 +#define WT_STAT_CONN_SESSION_OPEN 1192 /*! session: table alter failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1190 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1193 /*! session: table alter successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1191 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1194 /*! session: table alter unchanged and skipped */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1192 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1195 /*! session: table compact failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1193 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1196 /*! session: table compact successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1194 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1197 /*! session: table create failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1195 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1198 /*! session: table create successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1196 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1199 /*! session: table drop failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1197 +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1200 /*! session: table drop successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1198 +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1201 /*! session: table rebalance failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1199 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1202 /*! session: table rebalance successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1200 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1203 /*! session: table rename failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1201 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1204 /*! session: table rename successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1202 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1205 /*! session: table salvage failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1203 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1206 /*! session: table salvage successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1204 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1207 /*! session: table truncate failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1205 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1208 /*! session: table truncate successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1206 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1209 /*! session: table verify failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1207 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1210 /*! session: table verify successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1208 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1211 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1209 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1212 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_THREAD_READ_ACTIVE 1210 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1213 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1211 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1214 /*! thread-yield: application thread time evicting (usecs) */ -#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1212 +#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1215 /*! thread-yield: application thread time waiting for cache (usecs) */ -#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1213 +#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1216 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1214 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1217 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1215 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1218 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1216 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1219 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1217 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1220 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1218 +#define WT_STAT_CONN_PAGE_SLEEP 1221 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1219 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1222 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1220 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1223 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1221 +#define WT_STAT_CONN_TXN_BEGIN 1224 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1222 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1225 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1223 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1226 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1224 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1227 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1225 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1228 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1226 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1229 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1227 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1230 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1228 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1231 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1229 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1232 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1230 +#define WT_STAT_CONN_TXN_CHECKPOINT 1233 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1231 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1234 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1232 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1235 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1233 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1236 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1234 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1237 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1235 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1238 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1236 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1239 /*! * transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1237 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1240 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1238 +#define WT_STAT_CONN_TXN_SYNC 1241 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1239 +#define WT_STAT_CONN_TXN_COMMIT 1242 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1240 +#define WT_STAT_CONN_TXN_ROLLBACK 1243 /*! * @} diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c index 3477ca52502..803d3e8dfab 100644 --- a/src/third_party/wiredtiger/src/log/log.c +++ b/src/third_party/wiredtiger/src/log/log.c @@ -309,14 +309,11 @@ void __wt_log_written_reset(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; - WT_LOG *log; conn = S2C(session); - if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) - return; - log = conn->log; - log->log_written = 0; - return; + + if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) + conn->log->log_written = 0; } /* @@ -786,8 +783,8 @@ __log_openfile(WT_SESSION_IMPL *session, __wt_log_desc_byteswap(desc); if (desc->log_magic != WT_LOG_MAGIC) WT_PANIC_RET(session, WT_ERROR, - "log file %s corrupted: Bad magic number %" PRIu32, - (*fhp)->name, desc->log_magic); + "log file %s corrupted: Bad magic number %" PRIu32, + (*fhp)->name, desc->log_magic); if (desc->majorv > WT_LOG_MAJOR_VERSION || (desc->majorv == WT_LOG_MAJOR_VERSION && desc->minorv > WT_LOG_MINOR_VERSION)) @@ -1775,9 +1772,8 @@ advance: if (eol) /* Found a hole. This LSN is the end. */ break; - else - /* Last record in log. Look for more. */ - goto advance; + /* Last record in log. Look for more. */ + goto advance; } rdup_len = __wt_rduppo2(reclen, allocsize); if (reclen > allocsize) { @@ -1923,7 +1919,6 @@ __wt_log_force_write(WT_SESSION_IMPL *session, bool retry, bool *did_work) { WT_LOG *log; WT_MYSLOT myslot; - uint32_t joined; log = S2C(session)->log; memset(&myslot, 0, sizeof(myslot)); @@ -1931,14 +1926,7 @@ __wt_log_force_write(WT_SESSION_IMPL *session, bool retry, bool *did_work) if (did_work != NULL) *did_work = true; myslot.slot = log->active_slot; - joined = WT_LOG_SLOT_JOINED(log->active_slot->slot_state); - if (joined == 0) { - WT_STAT_CONN_INCR(session, log_force_write_skip); - if (did_work != NULL) - *did_work = false; - return (0); - } - return (__wt_log_slot_switch(session, &myslot, retry, true)); + return (__wt_log_slot_switch(session, &myslot, retry, true, did_work)); } /* @@ -2150,7 +2138,7 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, ret = 0; if (myslot.end_offset >= WT_LOG_SLOT_BUF_MAX || F_ISSET(&myslot, WT_MYSLOT_UNBUFFERED) || force) - ret = __wt_log_slot_switch(session, &myslot, true, false); + ret = __wt_log_slot_switch(session, &myslot, true, false, NULL); if (ret == 0) ret = __log_fill(session, &myslot, false, record, &lsn); release_size = __wt_log_slot_release( @@ -2250,8 +2238,10 @@ __wt_log_vprintf(WT_SESSION_IMPL *session, const char *fmt, va_list ap) return (0); va_copy(ap_copy, ap); - len = (size_t)vsnprintf(NULL, 0, fmt, ap_copy) + 1; + len = 1; + ret = __wt_vsnprintf_len_incr(NULL, 0, &len, fmt, ap_copy); va_end(ap_copy); + WT_RET(ret); WT_RET( __wt_logrec_alloc(session, sizeof(WT_LOG_RECORD) + len, &logrec)); @@ -2268,7 +2258,8 @@ __wt_log_vprintf(WT_SESSION_IMPL *session, const char *fmt, va_list ap) rec_fmt, rectype)); logrec->size += (uint32_t)header_size; - (void)vsnprintf((char *)logrec->data + logrec->size, len, fmt, ap); + WT_ERR(__wt_vsnprintf( + (char *)logrec->data + logrec->size, len, fmt, ap)); __wt_verbose(session, WT_VERB_LOG, "log_printf: %s", (char *)logrec->data + logrec->size); diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c index b4655ff6c1a..97e317ce68c 100644 --- a/src/third_party/wiredtiger/src/log/log_slot.c +++ b/src/third_party/wiredtiger/src/log/log_slot.c @@ -195,103 +195,12 @@ retry: } /* - * __log_slot_switch_internal -- - * Switch out the current slot and set up a new one. - */ -static int -__log_slot_switch_internal( - WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool forced) -{ - WT_DECL_RET; - WT_LOG *log; - WT_LOGSLOT *slot; - bool free_slot, release; - - log = S2C(session)->log; - release = false; - slot = myslot->slot; - - WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT)); - - /* - * If someone else raced us to closing this specific slot, we're - * done here. - */ - if (slot != log->active_slot) - return (0); - - WT_RET(WT_SESSION_CHECK_PANIC(session)); - /* - * We may come through here multiple times if we were able to close - * a slot but could not set up a new one. If we closed it already, - * don't try to do it again but still set up the new slot. - */ - if (!F_ISSET(myslot, WT_MYSLOT_CLOSE)) { - ret = __log_slot_close(session, slot, &release, forced); - /* - * If close returns WT_NOTFOUND it means that someone else - * is processing the slot change. - */ - if (ret == WT_NOTFOUND) - return (0); - WT_RET(ret); - if (release) { - WT_RET(__wt_log_release(session, slot, &free_slot)); - if (free_slot) - __wt_log_slot_free(session, slot); - } - } - /* - * Set that we have closed this slot because we may call in here - * multiple times if we retry creating a new slot. - */ - F_SET(myslot, WT_MYSLOT_CLOSE); - WT_RET(__wt_log_slot_new(session)); - F_CLR(myslot, WT_MYSLOT_CLOSE); - return (0); -} - -/* - * __wt_log_slot_switch -- - * Switch out the current slot and set up a new one. - */ -int -__wt_log_slot_switch( - WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced) -{ - WT_DECL_RET; - WT_LOG *log; - - log = S2C(session)->log; - /* - * !!! Since the WT_WITH_SLOT_LOCK macro is a do-while loop, the - * compiler does not like it combined directly with the while loop - * here. - * - * The loop conditional is a bit complex. We have to retry if we - * closed the slot but were unable to set up a new slot. In that - * case the flag indicating we have closed the slot will still be set. - * We have to retry in that case regardless of the retry setting - * because we are responsible for setting up the new slot. - */ - do { - WT_WITH_SLOT_LOCK(session, log, - ret = __log_slot_switch_internal(session, myslot, forced)); - if (ret == EBUSY) { - WT_STAT_CONN_INCR(session, log_slot_switch_busy); - __wt_yield(); - } - } while (F_ISSET(myslot, WT_MYSLOT_CLOSE) || (retry && ret == EBUSY)); - return (ret); -} - -/* - * __wt_log_slot_new -- + * __log_slot_new -- * Find a free slot and switch it as the new active slot. * Must be called holding the slot lock. */ -int -__wt_log_slot_new(WT_SESSION_IMPL *session) +static int +__log_slot_new(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; WT_LOG *log; @@ -351,6 +260,7 @@ __wt_log_slot_new(WT_SESSION_IMPL *session) /* * If we didn't find any free slots signal the worker thread. */ + WT_STAT_CONN_INCR(session, log_slot_no_free_slots); __wt_cond_signal(session, conn->log_wrlsn_cond); __wt_yield(); #ifdef HAVE_DIAGNOSTIC @@ -371,6 +281,122 @@ __wt_log_slot_new(WT_SESSION_IMPL *session) } /* + * __log_slot_switch_internal -- + * Switch out the current slot and set up a new one. + */ +static int +__log_slot_switch_internal( + WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool forced, bool *did_work) +{ + WT_DECL_RET; + WT_LOG *log; + WT_LOGSLOT *slot; + bool free_slot, release; + uint32_t joined; + + log = S2C(session)->log; + release = false; + slot = myslot->slot; + + WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT)); + + /* + * If someone else raced us to closing this specific slot, we're + * done here. + */ + if (slot != log->active_slot) + return (0); + /* + * If the current active slot is unused and this is a forced switch, + * we're done. If this is a non-forced switch we always switch + * because the slot could be part of an unbuffered operation. + */ + joined = WT_LOG_SLOT_JOINED(slot->slot_state); + if (joined == 0 && forced) { + WT_STAT_CONN_INCR(session, log_force_write_skip); + if (did_work != NULL) + *did_work = false; + return (0); + } + WT_RET(WT_SESSION_CHECK_PANIC(session)); + + /* + * We may come through here multiple times if we were not able to + * set up a new one. If we closed it already, + * don't try to do it again but still set up the new slot. + */ + if (!F_ISSET(myslot, WT_MYSLOT_CLOSE)) { + ret = __log_slot_close(session, slot, &release, forced); + /* + * If close returns WT_NOTFOUND it means that someone else + * is processing the slot change. + */ + if (ret == WT_NOTFOUND) + return (0); + WT_RET(ret); + /* + * Set that we have closed this slot because we may call in here + * multiple times if we retry creating a new slot. Similarly + * set retain whether this slot needs releasing so that we don't + * lose that information if we retry. + */ + F_SET(myslot, WT_MYSLOT_CLOSE); + if (release) + F_SET(myslot, WT_MYSLOT_NEEDS_RELEASE); + } + /* + * Now that the slot is closed, set up a new one so that joining + * threads don't have to wait on writing the previous slot if we + * release it. Release after setting a new one. + */ + WT_RET(__log_slot_new(session)); + F_CLR(myslot, WT_MYSLOT_CLOSE); + if (F_ISSET(myslot, WT_MYSLOT_NEEDS_RELEASE)) { + WT_RET(__wt_log_release(session, slot, &free_slot)); + F_CLR(myslot, WT_MYSLOT_NEEDS_RELEASE); + if (free_slot) + __wt_log_slot_free(session, slot); + } + return (ret); +} + +/* + * __wt_log_slot_switch -- + * Switch out the current slot and set up a new one. + */ +int +__wt_log_slot_switch(WT_SESSION_IMPL *session, + WT_MYSLOT *myslot, bool retry, bool forced, bool *did_work) +{ + WT_DECL_RET; + WT_LOG *log; + + log = S2C(session)->log; + + /* + * !!! Since the WT_WITH_SLOT_LOCK macro is a do-while loop, the + * compiler does not like it combined directly with the while loop + * here. + * + * The loop conditional is a bit complex. We have to retry if we + * closed the slot but were unable to set up a new slot. In that + * case the flag indicating we have closed the slot will still be set. + * We have to retry in that case regardless of the retry setting + * because we are responsible for setting up the new slot. + */ + do { + WT_WITH_SLOT_LOCK(session, log, + ret = __log_slot_switch_internal( + session, myslot, forced, did_work)); + if (ret == EBUSY) { + WT_STAT_CONN_INCR(session, log_slot_switch_busy); + __wt_yield(); + } + } while (F_ISSET(myslot, WT_MYSLOT_CLOSE) || (retry && ret == EBUSY)); + return (ret); +} + +/* * __wt_log_slot_init -- * Initialize the slot array. */ @@ -531,12 +557,13 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, if (__wt_atomic_casiv64( &slot->slot_state, old_state, new_state)) break; - } + WT_STAT_CONN_INCR(session, log_slot_races); + } else + WT_STAT_CONN_INCR(session, log_slot_active_closed); /* * The slot is no longer open or we lost the race to * update it. Yield and try again. */ - WT_STAT_CONN_INCR(session, log_slot_races); __wt_yield(); } /* @@ -574,7 +601,6 @@ __wt_log_slot_release(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, int64_t size) wt_off_t cur_offset, my_start; int64_t my_size, rel_size; - WT_UNUSED(session); slot = myslot->slot; my_start = slot->slot_start_offset + myslot->offset; /* diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c index 60afbc99ade..52265f02e62 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c @@ -10,10 +10,10 @@ #define WT_FORALL_CURSORS(clsm, c, i) \ for ((i) = (clsm)->nchunks; (i) > 0;) \ - if (((c) = (clsm)->chunks[--i]->cursor) != NULL) + if (((c) = (clsm)->chunks[--(i)]->cursor) != NULL) #define WT_LSM_CURCMP(s, lsm_tree, c1, c2, cmp) \ - __wt_compare(s, (lsm_tree)->collator, &(c1)->key, &(c2)->key, &cmp) + __wt_compare(s, (lsm_tree)->collator, &(c1)->key, &(c2)->key, &(cmp)) static int __clsm_lookup(WT_CURSOR_LSM *, WT_ITEM *); static int __clsm_open_cursors(WT_CURSOR_LSM *, bool, u_int, uint32_t); @@ -178,20 +178,12 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update) if (reset) { WT_ASSERT(session, !F_ISSET(&clsm->iface, - WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT)); + WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT)); WT_RET(__clsm_reset_cursors(clsm, NULL)); } for (;;) { - /* - * If the cursor looks up-to-date, check if the cache is full. - * In case this call blocks, the check will be repeated before - * proceeding. - */ - if (clsm->dsk_gen != lsm_tree->dsk_gen && - lsm_tree->nchunks != 0) - goto open; - + /* Check if the cursor looks up-to-date. */ if (clsm->dsk_gen != lsm_tree->dsk_gen && lsm_tree->nchunks != 0) goto open; @@ -666,7 +658,7 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) { */ if (i != nchunks - 1) clsm->chunks[i]->cursor->insert = - __wt_curfile_update_check; + __wt_curfile_insert_check; if (!F_ISSET(clsm, WT_CLSM_MERGE) && F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) @@ -688,19 +680,29 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) { if (chunk != NULL && !F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && chunk->switch_txn == WT_TXN_NONE) { - clsm->primary_chunk = chunk; primary = clsm->chunks[clsm->nchunks - 1]->cursor; + btree = ((WT_CURSOR_BTREE *)primary)->btree; + /* - * Disable eviction for the in-memory chunk. Also clear the - * bulk load flag here, otherwise eviction will be enabled by - * the first update. + * If the primary is not yet set as the primary, do that now. + * Note that eviction was configured off when the underlying + * object was created, which is what we want, leave it alone. + * + * We don't have to worry about races here: every thread that + * modifies the tree will have to come through here, at worse + * we set the flag repeatedly. We don't use a WT_BTREE handle + * flag, however, we could race doing the read-modify-write of + * the flags field. + * + * If something caused the chunk to be closed and reopened + * since it was created, we can no longer use it as a primary + * chunk and we need to force a switch. We detect the tree was + * created when it was opened by checking the "original" flag. */ - btree = ((WT_CURSOR_BTREE *)(primary))->btree; - if (btree->bulk_load_ok) { - btree->bulk_load_ok = false; - WT_WITH_BTREE(session, btree, - __wt_btree_lsm_switch_primary(session, true)); - } + if (!btree->lsm_primary && btree->original) + btree->lsm_primary = true; + if (btree->lsm_primary) + clsm->primary_chunk = chunk; } clsm->dsk_gen = lsm_tree->dsk_gen; @@ -1213,7 +1215,8 @@ __clsm_lookup(WT_CURSOR_LSM *clsm, WT_ITEM *value) WT_LSM_TREE_STAT_INCR( session, clsm->lsm_tree->bloom_miss); continue; - } else if (ret == 0) + } + if (ret == 0) WT_LSM_TREE_STAT_INCR( session, clsm->lsm_tree->bloom_hit); WT_ERR(ret); @@ -1239,10 +1242,10 @@ __clsm_lookup(WT_CURSOR_LSM *clsm, WT_ITEM *value) WT_ERR(WT_NOTFOUND); done: -err: F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); - if (ret == 0) { - clsm->current = c; +err: if (ret == 0) { + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); F_SET(cursor, WT_CURSTD_KEY_INT); + clsm->current = c; if (value == &cursor->value) F_SET(cursor, WT_CURSTD_VALUE_INT); } else if (c != NULL) @@ -1318,7 +1321,8 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp) if ((ret = c->search_near(c, &cmp)) == WT_NOTFOUND) { ret = 0; continue; - } else if (ret != 0) + } + if (ret != 0) goto err; /* Do we have an exact match? */ @@ -1338,7 +1342,8 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp) if ((ret = c->next(c)) == WT_NOTFOUND) { ret = 0; continue; - } else if (ret != 0) + } + if (ret != 0) goto err; } @@ -1564,12 +1569,23 @@ __clsm_update(WT_CURSOR *cursor) WT_CURSOR_NEEDVALUE(cursor); WT_ERR(__clsm_enter(clsm, false, true)); - if (F_ISSET(cursor, WT_CURSTD_OVERWRITE) || - (ret = __clsm_lookup(clsm, &value)) == 0) { - WT_ERR(__clsm_deleted_encode( - session, &cursor->value, &value, &buf)); - ret = __clsm_put(session, clsm, &cursor->key, &value, true); - } + if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) + WT_ERR(__clsm_lookup(clsm, &value)); + WT_ERR(__clsm_deleted_encode(session, &cursor->value, &value, &buf)); + WT_ERR(__clsm_put(session, clsm, &cursor->key, &value, true)); + + /* + * Set the cursor to reference the internal key/value of the positioned + * cursor. + */ + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + WT_ITEM_SET(cursor->key, clsm->current->key); + WT_ITEM_SET(cursor->value, clsm->current->value); + WT_ASSERT(session, + F_MASK(clsm->current, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); + WT_ASSERT(session, + F_MASK(clsm->current, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); + F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); err: __wt_scr_free(session, &buf); __clsm_leave(clsm); @@ -1588,18 +1604,34 @@ __clsm_remove(WT_CURSOR *cursor) WT_DECL_RET; WT_ITEM value; WT_SESSION_IMPL *session; + bool positioned; clsm = (WT_CURSOR_LSM *)cursor; + /* Check if the cursor is positioned. */ + positioned = F_ISSET(cursor, WT_CURSTD_KEY_INT); + CURSOR_REMOVE_API_CALL(cursor, session, NULL); WT_CURSOR_NEEDKEY(cursor); WT_CURSOR_NOVALUE(cursor); WT_ERR(__clsm_enter(clsm, false, true)); - if (F_ISSET(cursor, WT_CURSTD_OVERWRITE) || - (ret = __clsm_lookup(clsm, &value)) == 0) - ret = __clsm_put( - session, clsm, &cursor->key, &__tombstone, true); + if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) + WT_ERR(__clsm_lookup(clsm, &value)); + WT_ERR(__clsm_put( + session, clsm, &cursor->key, &__tombstone, positioned)); + + /* + * If the cursor was positioned, it stays positioned with a key but no + * no value, otherwise, there's no position, key or value. This isn't + * just cosmetic, without a reset, iteration on this cursor won't start + * at the beginning/end of the table. + */ + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + if (positioned) + F_SET(cursor, WT_CURSTD_KEY_INT); + else + WT_TRET(cursor->reset(cursor)); err: __clsm_leave(clsm); CURSOR_UPDATE_API_END(session, ret); diff --git a/src/third_party/wiredtiger/src/lsm/lsm_manager.c b/src/third_party/wiredtiger/src/lsm/lsm_manager.c index 6dc06146179..e33e119aa41 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_manager.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_manager.c @@ -89,7 +89,6 @@ __lsm_general_worker_start(WT_SESSION_IMPL *session) if (manager->lsm_workers % 2 == 0) FLD_SET(worker_args->type, WT_LSM_WORK_MERGE); } - F_SET(worker_args, WT_LSM_WORKER_RUN); WT_RET(__wt_lsm_worker_start(session, worker_args)); } @@ -129,17 +128,13 @@ __lsm_stop_workers(WT_SESSION_IMPL *session) manager->lsm_workers--) { worker_args = &manager->lsm_worker_cookies[manager->lsm_workers - 1]; - /* - * Clear this worker's flag so it stops. - */ - F_CLR(worker_args, WT_LSM_WORKER_RUN); - WT_ASSERT(session, worker_args->tid != 0); - WT_RET(__wt_thread_join(session, worker_args->tid)); - worker_args->tid = 0; + WT_ASSERT(session, worker_args->tid_set); + + WT_RET(__wt_lsm_worker_stop(session, worker_args)); worker_args->type = 0; - worker_args->flags = 0; + /* - * We do not clear the session because they are allocated + * We do not clear the other fields because they are allocated * statically when the connection was opened. */ } @@ -237,12 +232,12 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session) manager->lsm_worker_cookies[i].session = worker_session; } + F_SET(conn, WT_CONN_SERVER_LSM); + /* Start the LSM manager thread. */ WT_ERR(__wt_thread_create(session, &manager->lsm_worker_cookies[0].tid, __lsm_worker_manager, &manager->lsm_worker_cookies[0])); - F_SET(conn, WT_CONN_SERVER_LSM); - if (0) { err: for (i = 0; (worker_session = @@ -289,13 +284,18 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) manager = &conn->lsm_manager; removed = 0; + /* + * Clear the LSM server flag and flush to ensure running threads see + * the state change. + */ + F_CLR(conn, WT_CONN_SERVER_LSM); + WT_FULL_BARRIER(); + WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY) || manager->lsm_workers == 0); if (manager->lsm_workers > 0) { - /* - * Stop the main LSM manager thread first. - */ - while (F_ISSET(conn, WT_CONN_SERVER_LSM)) + /* Wait for the main LSM manager thread to finish. */ + while (!F_ISSET(manager, WT_LSM_MANAGER_SHUTDOWN)) __wt_yield(); /* Clean up open LSM handles. */ @@ -303,7 +303,6 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) WT_TRET(__wt_thread_join( session, manager->lsm_worker_cookies[0].tid)); - manager->lsm_worker_cookies[0].tid = 0; /* Release memory from any operations left on the queue. */ while ((current = TAILQ_FIRST(&manager->switchqh)) != NULL) { @@ -342,7 +341,7 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) /* * __lsm_manager_worker_shutdown -- - * Shutdown the LSM manager and worker threads. + * Shutdown the LSM worker threads. */ static int __lsm_manager_worker_shutdown(WT_SESSION_IMPL *session) @@ -354,14 +353,13 @@ __lsm_manager_worker_shutdown(WT_SESSION_IMPL *session) manager = &S2C(session)->lsm_manager; /* - * Wait for the rest of the LSM workers to shutdown. Stop at index + * Wait for the rest of the LSM workers to shutdown. Start at index * one - since we (the manager) are at index 0. */ for (i = 1; i < manager->lsm_workers; i++) { - WT_ASSERT(session, manager->lsm_worker_cookies[i].tid != 0); - __wt_cond_signal(session, manager->work_cond); - WT_TRET(__wt_thread_join( - session, manager->lsm_worker_cookies[i].tid)); + WT_ASSERT(session, manager->lsm_worker_cookies[i].tid_set); + WT_TRET(__wt_lsm_worker_stop( + session, &manager->lsm_worker_cookies[i])); } return (ret); } @@ -383,7 +381,7 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) conn = S2C(session); dhandle_locked = false; - while (F_ISSET(conn, WT_CONN_SERVER_RUN)) { + while (F_ISSET(conn, WT_CONN_SERVER_LSM)) { __wt_sleep(0, 10000); if (TAILQ_EMPTY(&conn->lsmqh)) continue; @@ -469,11 +467,13 @@ static WT_THREAD_RET __lsm_worker_manager(void *arg) { WT_DECL_RET; + WT_LSM_MANAGER *manager; WT_LSM_WORKER_ARGS *cookie; WT_SESSION_IMPL *session; cookie = (WT_LSM_WORKER_ARGS *)arg; session = cookie->session; + manager = &S2C(session)->lsm_manager; WT_ERR(__lsm_general_worker_start(session)); WT_ERR(__lsm_manager_run_server(session)); @@ -482,7 +482,11 @@ __lsm_worker_manager(void *arg) if (ret != 0) { err: WT_PANIC_MSG(session, ret, "LSM worker manager thread error"); } - F_CLR(S2C(session), WT_CONN_SERVER_LSM); + + /* Connection close waits on us to shutdown, let it know we're done. */ + F_SET(manager, WT_LSM_MANAGER_SHUTDOWN); + WT_FULL_BARRIER(); + return (WT_THREAD_RET_VALUE); } diff --git a/src/third_party/wiredtiger/src/lsm/lsm_merge.c b/src/third_party/wiredtiger/src/lsm/lsm_merge.c index ceb5f03a2f5..8838638f388 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_merge.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_merge.c @@ -187,7 +187,7 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, continue; if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM) || chunk->generation > 0) break; - else if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) && + if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) && F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) break; } @@ -625,7 +625,7 @@ err: if (locked) else __wt_verbose(session, WT_VERB_LSM, "Merge failed with %s", - __wt_strerror(session, ret, NULL, 0)); + __wt_strerror(session, ret, NULL, 0)); } F_CLR(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION); return (ret); diff --git a/src/third_party/wiredtiger/src/lsm/lsm_meta.c b/src/third_party/wiredtiger/src/lsm/lsm_meta.c index 46ead6d6ac4..fc4dde82470 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_meta.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_meta.c @@ -229,7 +229,7 @@ __lsm_meta_read_v1( cv.len -= 2; } WT_ERR(__wt_config_check(session, - WT_CONFIG_REF(session, WT_SESSION_create), cv.str, cv.len)); + WT_CONFIG_REF(session, WT_SESSION_create), cv.str, cv.len)); WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->bloom_config)); WT_ERR(__wt_config_getones( session, lsmconf, "lsm.bloom_hash_count", &cv)); diff --git a/src/third_party/wiredtiger/src/lsm/lsm_stat.c b/src/third_party/wiredtiger/src/lsm/lsm_stat.c index 21e8991be94..411655878af 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_stat.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_stat.c @@ -29,8 +29,8 @@ __curstat_lsm_init( const char *cfg[] = { WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL, NULL }; const char *disk_cfg[] = { - WT_CONFIG_BASE(session, WT_SESSION_open_cursor), - "checkpoint=" WT_CHECKPOINT, NULL, NULL }; + WT_CONFIG_BASE(session, WT_SESSION_open_cursor), + "checkpoint=" WT_CHECKPOINT, NULL, NULL }; locked = false; WT_RET(__wt_lsm_tree_get(session, uri, false, &lsm_tree)); @@ -38,13 +38,13 @@ __curstat_lsm_init( /* Propagate all, fast and/or clear to the cursors we open. */ if (cst->flags != 0) { - (void)snprintf(config, sizeof(config), + WT_ERR(__wt_snprintf(config, sizeof(config), "statistics=(%s%s%s%s)", F_ISSET(cst, WT_STAT_TYPE_ALL) ? "all," : "", F_ISSET(cst, WT_STAT_CLEAR) ? "clear," : "", !F_ISSET(cst, WT_STAT_TYPE_ALL) && F_ISSET(cst, WT_STAT_TYPE_FAST) ? "fast," : "", - F_ISSET(cst, WT_STAT_TYPE_SIZE) ? "size," : ""); + F_ISSET(cst, WT_STAT_TYPE_SIZE) ? "size," : "")); cfg[1] = disk_cfg[1] = config; } diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c index 4349acf7b55..e6a29666094 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c @@ -230,7 +230,7 @@ __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) if (__wt_atomic_cas32(&chunk->bloom_busy, 0, 1)) { if (!F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) { ret = __lsm_bloom_create( - session, lsm_tree, chunk, (u_int)i); + session, lsm_tree, chunk, i); /* * Record if we were successful so that we can * later push a merge work unit. @@ -265,9 +265,9 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, { WT_DECL_RET; WT_TXN_ISOLATION saved_isolation; - bool flush_set; + bool flush_set, release_btree; - flush_set = false; + flush_set = release_btree = false; /* * If the chunk is already checkpointed, make sure it is also evicted. @@ -318,20 +318,18 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, * We can wait here for checkpoints and fsyncs to complete, which can * take a long time. */ - if ((ret = __wt_session_get_btree( - session, chunk->uri, NULL, NULL, 0)) == 0) { - /* - * Set read-uncommitted: we have already checked that all of the - * updates in this chunk are globally visible, use the cheapest - * possible check in reconciliation. - */ - saved_isolation = session->txn.isolation; - session->txn.isolation = WT_ISO_READ_UNCOMMITTED; - ret = __wt_cache_op(session, WT_SYNC_WRITE_LEAVES); - session->txn.isolation = saved_isolation; - WT_TRET(__wt_session_release_btree(session)); - } - WT_ERR(ret); + WT_ERR(__wt_session_get_btree(session, chunk->uri, NULL, NULL, 0)); + release_btree = true; + + /* + * Set read-uncommitted: we have already checked that all of the updates + * in this chunk are globally visible, use the cheapest possible check + * in reconciliation. + */ + saved_isolation = session->txn.isolation; + session->txn.isolation = WT_ISO_READ_UNCOMMITTED; + WT_ERR(__wt_cache_op(session, WT_SYNC_WRITE_LEAVES)); + session->txn.isolation = saved_isolation; __wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointing %s", chunk->uri); @@ -348,12 +346,14 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_ERR(__wt_meta_track_on(session)); WT_WITH_CHECKPOINT_LOCK(session, WT_WITH_SCHEMA_LOCK(session, - ret = __wt_schema_worker( - session, chunk->uri, __wt_checkpoint, NULL, NULL, 0))); + ret = __wt_checkpoint(session, NULL))); WT_TRET(__wt_meta_track_off(session, false, ret != 0)); if (ret != 0) WT_ERR_MSG(session, ret, "LSM checkpoint"); + release_btree = false; + WT_ERR(__wt_session_release_btree(session)); + /* Now the file is written, get the chunk size. */ WT_ERR(__wt_lsm_tree_set_chunk_size(session, chunk)); @@ -376,16 +376,6 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_PUBLISH(chunk->flushing, 0); flush_set = false; - /* - * Clear the no-eviction flag so the primary can be evicted and - * eventually closed. Only do this once the checkpoint has succeeded: - * otherwise, accessing the leaf page during the checkpoint can trigger - * forced eviction. - */ - WT_ERR(__wt_session_get_btree(session, chunk->uri, NULL, NULL, 0)); - __wt_btree_lsm_switch_primary(session, false); - WT_ERR(__wt_session_release_btree(session)); - /* Make sure we aren't pinning a transaction ID. */ __wt_txn_release_snapshot(session); @@ -402,6 +392,8 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, err: if (flush_set) WT_PUBLISH(chunk->flushing, 0); + if (release_btree) + WT_TRET(__wt_session_release_btree(session)); return (ret); } @@ -518,7 +510,7 @@ __lsm_drop_file(WT_SESSION_IMPL *session, const char *uri) * This will fail with EBUSY if the file is still in use. */ WT_WITH_HANDLE_LIST_WRITE_LOCK(session, - ret = __lsm_discard_handle(session, uri, WT_CHECKPOINT)); + ret = __lsm_discard_handle(session, uri, WT_CHECKPOINT)); WT_RET(ret); /* @@ -610,7 +602,8 @@ __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) if (drop_ret == EBUSY) { ++skipped; continue; - } else if (drop_ret != ENOENT) + } + if (drop_ret != ENOENT) WT_ERR(drop_ret); flush_metadata = true; @@ -621,7 +614,8 @@ __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) if (drop_ret == EBUSY) { ++skipped; continue; - } else if (drop_ret != ENOENT) + } + if (drop_ret != ENOENT) WT_ERR(drop_ret); flush_metadata = true; } diff --git a/src/third_party/wiredtiger/src/lsm/lsm_worker.c b/src/third_party/wiredtiger/src/lsm/lsm_worker.c index ffa00c0a5e7..1cabbd4888d 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_worker.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_worker.c @@ -21,7 +21,23 @@ __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) { __wt_verbose(session, WT_VERB_LSM_MANAGER, "Start LSM worker %u type %#" PRIx32, args->id, args->type); - return (__wt_thread_create(session, &args->tid, __lsm_worker, args)); + + args->running = true; + WT_RET(__wt_thread_create(session, &args->tid, __lsm_worker, args)); + args->tid_set = true; + return (0); +} + +/* + * __wt_lsm_worker_stop -- + * A wrapper around the LSM worker thread stop. + */ +int +__wt_lsm_worker_stop(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) +{ + args->running = false; + args->tid_set = false; + return (__wt_thread_join(session, args->tid)); } /* @@ -84,7 +100,6 @@ err: __wt_lsm_manager_free_work_unit(session, entry); static WT_THREAD_RET __lsm_worker(void *arg) { - WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LSM_WORK_UNIT *entry; WT_LSM_WORKER_ARGS *cookie; @@ -93,11 +108,9 @@ __lsm_worker(void *arg) cookie = (WT_LSM_WORKER_ARGS *)arg; session = cookie->session; - conn = S2C(session); entry = NULL; - while (F_ISSET(conn, WT_CONN_SERVER_RUN) && - F_ISSET(cookie, WT_LSM_WORKER_RUN)) { + while (cookie->running) { progress = false; /* diff --git a/src/third_party/wiredtiger/src/meta/meta_apply.c b/src/third_party/wiredtiger/src/meta/meta_apply.c index fb483c21dd9..dc93180a5e5 100644 --- a/src/third_party/wiredtiger/src/meta/meta_apply.c +++ b/src/third_party/wiredtiger/src/meta/meta_apply.c @@ -45,11 +45,7 @@ __meta_btree_apply(WT_SESSION_IMPL *session, WT_CURSOR *cursor, session, uri, NULL, NULL, 0)) != 0) return (ret == EBUSY ? 0 : ret); WT_SAVE_DHANDLE(session, ret = file_func(session, cfg)); - if (WT_META_TRACKING(session)) - WT_TRET(__wt_meta_track_handle_lock( - session, false)); - else - WT_TRET(__wt_session_release_btree(session)); + WT_TRET(__wt_session_release_btree(session)); WT_RET(ret); } WT_RET_NOTFOUND_OK(ret); diff --git a/src/third_party/wiredtiger/src/meta/meta_ckpt.c b/src/third_party/wiredtiger/src/meta/meta_ckpt.c index b985104c2eb..151bbe0e081 100644 --- a/src/third_party/wiredtiger/src/meta/meta_ckpt.c +++ b/src/third_party/wiredtiger/src/meta/meta_ckpt.c @@ -297,7 +297,7 @@ __wt_meta_ckptlist_get( *ckptbasep = ckptbase; if (0) { -err: __wt_meta_ckptlist_free(session, ckptbase); +err: __wt_meta_ckptlist_free(session, &ckptbase); } __wt_free(session, config); __wt_scr_free(session, &buf); @@ -463,16 +463,16 @@ err: __wt_scr_free(session, &buf); * Discard the checkpoint array. */ void -__wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT *ckptbase) +__wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT **ckptbasep) { - WT_CKPT *ckpt; + WT_CKPT *ckpt, *ckptbase; - if (ckptbase == NULL) + if ((ckptbase = *ckptbasep) == NULL) return; WT_CKPT_FOREACH(ckptbase, ckpt) __wt_meta_checkpoint_free(session, ckpt); - __wt_free(session, ckptbase); + __wt_free(session, *ckptbasep); } /* diff --git a/src/third_party/wiredtiger/src/meta/meta_ext.c b/src/third_party/wiredtiger/src/meta/meta_ext.c index 50e7568fe77..aa1ea8b974d 100644 --- a/src/third_party/wiredtiger/src/meta/meta_ext.c +++ b/src/third_party/wiredtiger/src/meta/meta_ext.c @@ -102,5 +102,5 @@ void __wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase) WT_GCC_FUNC_ATTRIBUTE((visibility("default"))) { - __wt_meta_ckptlist_free((WT_SESSION_IMPL *)session, ckptbase); + __wt_meta_ckptlist_free((WT_SESSION_IMPL *)session, &ckptbase); } diff --git a/src/third_party/wiredtiger/src/meta/meta_table.c b/src/third_party/wiredtiger/src/meta/meta_table.c index 4f60728b2d2..aca69d0e6a2 100644 --- a/src/third_party/wiredtiger/src/meta/meta_table.c +++ b/src/third_party/wiredtiger/src/meta/meta_table.c @@ -68,9 +68,6 @@ __wt_metadata_cursor_open( if (F_ISSET(btree, WT_BTREE_NO_LOGGING)) F_CLR(btree, WT_BTREE_NO_LOGGING); - /* The metadata file always uses checkpoint IDs in visibility checks. */ - btree->include_checkpoint_txn = true; - return (0); } diff --git a/src/third_party/wiredtiger/src/meta/meta_turtle.c b/src/third_party/wiredtiger/src/meta/meta_turtle.c index 66e34c728f2..5a089471059 100644 --- a/src/third_party/wiredtiger/src/meta/meta_turtle.c +++ b/src/third_party/wiredtiger/src/meta/meta_turtle.c @@ -242,7 +242,7 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) WT_DECL_ITEM(buf); WT_DECL_RET; WT_FSTREAM *fs; - bool exist, match; + bool exist; *valuep = NULL; @@ -258,22 +258,19 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) __metadata_config(session, valuep) : WT_NOTFOUND); WT_RET(__wt_fopen(session, WT_METADATA_TURTLE, 0, WT_STREAM_READ, &fs)); - /* Search for the key. */ WT_ERR(__wt_scr_alloc(session, 512, &buf)); - for (match = false;;) { + + /* Search for the key. */ + do { WT_ERR(__wt_getline(session, fs, buf)); if (buf->size == 0) WT_ERR(WT_NOTFOUND); - if (strcmp(key, buf->data) == 0) - match = true; + } while (strcmp(key, buf->data) != 0); - /* Key matched: read the subsequent line for the value. */ - WT_ERR(__wt_getline(session, fs, buf)); - if (buf->size == 0) - WT_ERR(__wt_illegal_value(session, WT_METADATA_TURTLE)); - if (match) - break; - } + /* Key matched: read the subsequent line for the value. */ + WT_ERR(__wt_getline(session, fs, buf)); + if (buf->size == 0) + WT_ERR(WT_NOTFOUND); /* Copy the value for the caller. */ WT_ERR(__wt_strdup(session, buf->data, valuep)); @@ -283,7 +280,12 @@ err: WT_TRET(__wt_fclose(session, &fs)); if (ret != 0) __wt_free(session, *valuep); - return (ret); + + /* + * A file error or a missing key/value pair in the turtle file means + * something has gone horribly wrong -- we're done. + */ + return (ret == 0 ? 0 : __wt_illegal_value(session, WT_METADATA_TURTLE)); } /* @@ -322,5 +324,9 @@ __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) err: WT_TRET(__wt_fclose(session, &fs)); WT_TRET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET, false)); - return (ret); + /* + * An error updating the turtle file means something has gone horribly + * wrong -- we're done. + */ + return (ret == 0 ? 0 : __wt_illegal_value(session, WT_METADATA_TURTLE)); } diff --git a/src/third_party/wiredtiger/src/os_common/filename.c b/src/third_party/wiredtiger/src/os_common/filename.c index 5aeb64bb51e..d5695f63d91 100644 --- a/src/third_party/wiredtiger/src/os_common/filename.c +++ b/src/third_party/wiredtiger/src/os_common/filename.c @@ -29,6 +29,7 @@ int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path) { + WT_DECL_RET; size_t len; char *buf; @@ -39,16 +40,17 @@ __wt_nfilename( * the exists API which is used by the test utilities. */ if (session == NULL || __wt_absolute_path(name)) - WT_RET(__wt_strndup(session, name, namelen, path)); - else { - len = strlen(S2C(session)->home) + 1 + namelen + 1; - WT_RET(__wt_calloc(session, 1, len, &buf)); - snprintf(buf, len, "%s%s%.*s", S2C(session)->home, - __wt_path_separator(), (int)namelen, name); - *path = buf; - } + return (__wt_strndup(session, name, namelen, path)); + len = strlen(S2C(session)->home) + 1 + namelen + 1; + WT_RET(__wt_calloc(session, 1, len, &buf)); + WT_ERR(__wt_snprintf(buf, len, "%s%s%.*s", + S2C(session)->home, __wt_path_separator(), (int)namelen, name)); + *path = buf; return (0); + +err: __wt_free(session, buf); + return (ret); } /* diff --git a/src/third_party/wiredtiger/src/os_common/os_errno.c b/src/third_party/wiredtiger/src/os_common/os_errno.c index a8e56b7f1aa..7ac89536e79 100644 --- a/src/third_party/wiredtiger/src/os_common/os_errno.c +++ b/src/third_party/wiredtiger/src/os_common/os_errno.c @@ -44,7 +44,7 @@ __wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen) * Fallback to a generic message. */ if (session == NULL && - snprintf(errbuf, errlen, "error return: %d", error) > 0) + __wt_snprintf(errbuf, errlen, "error return: %d", error) == 0) return (errbuf); if (session != NULL && __wt_buf_fmt( session, &session->err, "error return: %d", error) == 0) diff --git a/src/third_party/wiredtiger/src/os_common/os_fstream.c b/src/third_party/wiredtiger/src/os_common/os_fstream.c index 5a368ea75e6..744da732d84 100644 --- a/src/third_party/wiredtiger/src/os_common/os_fstream.c +++ b/src/third_party/wiredtiger/src/os_common/os_fstream.c @@ -144,7 +144,7 @@ __fstream_printf( p = (char *)((uint8_t *)buf->mem + buf->size); WT_ASSERT(session, buf->memsize >= buf->size); space = buf->memsize - buf->size; - len = (size_t)vsnprintf(p, space, fmt, ap_copy); + WT_RET(__wt_vsnprintf_len_set(p, space, &len, fmt, ap_copy)); va_end(ap_copy); if (len < space) { diff --git a/src/third_party/wiredtiger/src/os_posix/os_snprintf.c b/src/third_party/wiredtiger/src/os_posix/os_snprintf.c new file mode 100644 index 00000000000..390e2e0334a --- /dev/null +++ b/src/third_party/wiredtiger/src/os_posix/os_snprintf.c @@ -0,0 +1,27 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_vsnprintf_len_incr -- + * POSIX vsnprintf convenience function, incrementing the returned size. + */ +int +__wt_vsnprintf_len_incr( + char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) + WT_GCC_FUNC_ATTRIBUTE((visibility("default"))) +{ + WT_DECL_RET; + + if ((ret = vsnprintf(buf, size, fmt, ap)) >= 0) { + *retsizep += (size_t)ret; + return (0); + } + return (__wt_errno()); +} diff --git a/src/third_party/wiredtiger/src/os_posix/os_thread.c b/src/third_party/wiredtiger/src/os_posix/os_thread.c index 9bf36cc2686..18e4c347436 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_thread.c +++ b/src/third_party/wiredtiger/src/os_posix/os_thread.c @@ -18,6 +18,13 @@ __wt_thread_create(WT_SESSION_IMPL *session, { WT_DECL_RET; + /* + * Creating a thread isn't a memory barrier, but WiredTiger commonly + * sets flags and or state and then expects worker threads to start. + * Include a barrier to ensure safety in those cases. + */ + WT_FULL_BARRIER(); + /* Spawn a new thread of control. */ WT_SYSCALL_RETRY(pthread_create(tidret, NULL, func, arg), ret); if (ret == 0) @@ -34,6 +41,13 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) { WT_DECL_RET; + /* + * Joining a thread isn't a memory barrier, but WiredTiger commonly + * sets flags and or state and then expects worker threads to halt. + * Include a barrier to ensure safety in those cases. + */ + WT_FULL_BARRIER(); + WT_SYSCALL(pthread_join(tid, NULL), ret); if (ret == 0) return (0); @@ -45,7 +59,7 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) * __wt_thread_id -- * Fill in a printable version of the process and thread IDs. */ -void +int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_ATTRIBUTE((visibility("default"))) { @@ -57,10 +71,10 @@ __wt_thread_id(char *buf, size_t buflen) */ self = pthread_self(); #ifdef __sun - (void)snprintf(buf, buflen, - "%" PRIuMAX ":%u", (uintmax_t)getpid(), self); + return (__wt_snprintf(buf, buflen, + "%" PRIuMAX ":%u", (uintmax_t)getpid(), self)); #else - (void)snprintf(buf, buflen, - "%" PRIuMAX ":%p", (uintmax_t)getpid(), (void *)self); + return (__wt_snprintf(buf, buflen, + "%" PRIuMAX ":%p", (uintmax_t)getpid(), (void *)self)); #endif } diff --git a/src/third_party/wiredtiger/src/os_win/os_fs.c b/src/third_party/wiredtiger/src/os_win/os_fs.c index 2f76fff04a5..5cf47ea5763 100644 --- a/src/third_party/wiredtiger/src/os_win/os_fs.c +++ b/src/third_party/wiredtiger/src/os_win/os_fs.c @@ -87,22 +87,19 @@ __win_fs_rename(WT_FILE_SYSTEM *file_system, WT_ERR(__wt_to_utf16_string(session, to, &to_wide)); /* - * Check if file exists since Windows does not override the file if - * it exists. + * We want an atomic rename, but that's not guaranteed by MoveFileExW + * (or by any MSDN API). Don't set the MOVEFILE_COPY_ALLOWED flag to + * prevent the system from falling back to a copy and delete process. + * Do set the MOVEFILE_WRITE_THROUGH flag so the window is as small + * as possible, just in case. WiredTiger renames are done in a single + * directory and we expect that to be an atomic metadata update on any + * modern filesystem. */ - if (GetFileAttributesW(to_wide->data) != INVALID_FILE_ATTRIBUTES) - if (DeleteFileW(to_wide->data) == FALSE) { - windows_error = __wt_getlasterror(); - __wt_errx(session, - "%s: file-rename: DeleteFileW: %s", - to, __wt_formatmessage(session, windows_error)); - WT_ERR(__wt_map_windows_error(windows_error)); - } - - if (MoveFileW(from_wide->data, to_wide->data) == FALSE) { + if (MoveFileExW(from_wide->data, to_wide->data, + MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH) == FALSE) { windows_error = __wt_getlasterror(); __wt_errx(session, - "%s to %s: file-rename: MoveFileW: %s", + "%s to %s: file-rename: MoveFileExW: %s", from, to, __wt_formatmessage(session, windows_error)); WT_ERR(__wt_map_windows_error(windows_error)); } diff --git a/src/third_party/wiredtiger/src/os_win/os_snprintf.c b/src/third_party/wiredtiger/src/os_win/os_snprintf.c index a6056ff9342..f3025b12a60 100644 --- a/src/third_party/wiredtiger/src/os_win/os_snprintf.c +++ b/src/third_party/wiredtiger/src/os_win/os_snprintf.c @@ -8,17 +8,47 @@ #include "wt_internal.h" -_Check_return_opt_ int __cdecl _wt_snprintf( - _Out_writes_(_MaxCount) char * _DstBuf, - _In_ size_t _MaxCount, - _In_z_ _Printf_format_string_ const char * _Format, ...) +/* + * __wt_vsnprintf_len_incr -- + * POSIX vsnprintf convenience function, incrementing the returned size. + */ +int +__wt_vsnprintf_len_incr( + char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) { - va_list args; - WT_DECL_RET; + int len; + + /* + * WiredTiger calls with length 0 to get the needed buffer size. Call + * the count only version in this case, _vsnprintf_s will invoke the + * invalid parameter handler if count is less than or equal to zero. + */ + if (size == 0) { + *retsizep += (size_t)_vscprintf(fmt, ap); + return (0); + } + + /* + * Additionally, the invalid parameter handler is invoked if buffer or + * format is a NULL pointer. + */ + if (buf == NULL || fmt == NULL) + return (EINVAL); + + /* + * If the storage required to store the data and a terminating null + * exceeds size, the invalid parameter handler is invoked, unless + * count is _TRUNCATE, in which case as much of the string as will + * fit in the buffer is written and -1 returned. + */ + if ((len = _vsnprintf_s(buf, size, _TRUNCATE, fmt, ap)) >= 0) { + *retsizep += (size_t)len; + return (0); + } - va_start(args, _Format); - ret = _wt_vsnprintf(_DstBuf, _MaxCount, _Format, args); - va_end(args); + /* Return the buffer size required. */ + if (len == -1) + *retsizep += (size_t)_vscprintf(fmt, ap); - return (ret); + return (0); } diff --git a/src/third_party/wiredtiger/src/os_win/os_thread.c b/src/third_party/wiredtiger/src/os_win/os_thread.c index a34dff776b6..4c8f212bb4f 100644 --- a/src/third_party/wiredtiger/src/os_win/os_thread.c +++ b/src/third_party/wiredtiger/src/os_win/os_thread.c @@ -16,6 +16,13 @@ int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) { + /* + * Creating a thread isn't a memory barrier, but WiredTiger commonly + * sets flags and or state and then expects worker threads to start. + * Include a barrier to ensure safety in those cases. + */ + WT_FULL_BARRIER(); + /* Spawn a new thread of control. */ *tidret = (HANDLE)_beginthreadex(NULL, 0, func, arg, 0, NULL); if (*tidret != 0) @@ -33,6 +40,13 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) { DWORD windows_error; + /* + * Joining a thread isn't a memory barrier, but WiredTiger commonly + * sets flags and or state and then expects worker threads to halt. + * Include a barrier to ensure safety in those cases. + */ + WT_FULL_BARRIER(); + if ((windows_error = WaitForSingleObject(tid, INFINITE)) != WAIT_OBJECT_0) { if (windows_error == WAIT_FAILED) @@ -58,10 +72,10 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) * __wt_thread_id -- * Fill in a printable version of the process and thread IDs. */ -void +int __wt_thread_id(char *buf, size_t buflen) { - (void)snprintf(buf, buflen, + return (__wt_snprintf(buf, buflen, "%" PRIu64 ":%" PRIu64, - (uint64_t)GetCurrentProcessId(), (uint64_t)GetCurrentThreadId); + (uint64_t)GetCurrentProcessId(), (uint64_t)GetCurrentThreadId)); } diff --git a/src/third_party/wiredtiger/src/os_win/os_vsnprintf.c b/src/third_party/wiredtiger/src/os_win/os_vsnprintf.c deleted file mode 100644 index 63f96e79d5b..00000000000 --- a/src/third_party/wiredtiger/src/os_win/os_vsnprintf.c +++ /dev/null @@ -1,41 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -_Check_return_opt_ int __cdecl _wt_vsnprintf( - _Out_writes_(_MaxCount) char * _DstBuf, - _In_ size_t _MaxCount, - _In_z_ _Printf_format_string_ const char * _Format, - va_list _ArgList) -{ - int len; - - /* - * WiredTiger will call with length 0 to get the needed buffer size - * We call the count only version in this case since vsnprintf_s assumes - * length is greater than zero or else it triggers the invalid_parameter - * handler. - */ - if (_MaxCount == 0) { - return _vscprintf(_Format, _ArgList); - } - - len = (size_t)_vsnprintf_s( - _DstBuf, _MaxCount, _TRUNCATE, _Format, _ArgList); - - /* - * The MSVC implementation returns -1 on truncation instead of what - * it would have written. We could let callers iteratively grow the - * buffer, or just ask us how big a buffer they would like. - */ - if (len == -1) - len = _vscprintf(_Format, _ArgList) + 1; - - return (len); -} diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index a667a288187..6f95b84d292 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -26,6 +26,11 @@ typedef struct { uint32_t flags; /* Caller's configuration */ WT_ITEM disk_image; /* Temporary disk-image buffer */ + /* + * Temporary buffer used to write out a disk image when managing two + * chunks worth of data in memory + */ + WT_ITEM *interim_buf; /* * Track start/stop write generation to decide if all changes to the @@ -127,6 +132,7 @@ typedef struct { * repeatedly split a packed page. */ uint32_t split_size; /* Split page size */ + uint32_t min_split_size; /* Minimum split page size */ /* * The problem with splits is we've done a lot of work by the time we @@ -151,16 +157,6 @@ typedef struct { */ size_t offset; /* Split's first byte */ - /* - * The recno and entries fields are the starting record number - * of the split chunk (for column-store splits), and the number - * of entries in the split chunk. These fields are used both - * to write the split chunk, and to create a new internal page - * to reference the split pages. - */ - uint64_t recno; /* Split's starting record */ - uint32_t entries; /* Split's entries */ - WT_ADDR addr; /* Split's written location */ uint32_t size; /* Split's size */ uint32_t checksum; /* Split's checksum */ @@ -182,11 +178,36 @@ typedef struct { size_t supd_allocated; /* + * While reconciling pages, at any given time, we maintain two + * split chunks in the memory to be written out as pages. As we + * get to the last two chunks, if the last one turns out to be + * smaller than the minimum split size, we go back into the + * penultimate chunk and split at this minimum split size + * boundary. This moves some data from the penultimate chunk to + * the last chunk, hence increasing the size of the last page + * written without decreasing the penultimate page size beyond + * the minimum split size. For this reason, we maintain both a + * maximum split percentage boundary and a minimum split + * percentage boundary. + * + * The recno and entries fields are the starting record number + * of the split chunk (for column-store splits), and the number + * of entries in the split chunk. These fields are used both to + * write the split chunk, and to create a new internal page to + * reference the split pages. + * * The key for a row-store page; no column-store key is needed * because the page's recno, stored in the recno field, is the * column-store key. */ - WT_ITEM key; /* Promoted row-store key */ + uint32_t max_bnd_entries; + uint64_t max_bnd_recno; + WT_ITEM max_bnd_key; + + size_t min_bnd_offset; + uint32_t min_bnd_entries; + uint64_t min_bnd_recno; + WT_ITEM min_bnd_key; } *bnd; /* Saved boundaries */ uint32_t bnd_next; /* Next boundary slot */ uint32_t bnd_next_max; /* Maximum boundary slots used */ @@ -194,28 +215,6 @@ typedef struct { size_t bnd_allocated; /* Bytes allocated */ /* - * We track the total number of page entries copied into split chunks - * so we can easily figure out how many entries in the current split - * chunk. - */ - uint32_t total_entries; /* Total entries in splits */ - - /* - * And there's state information as to where in this process we are: - * (1) tracking split boundaries because we can still fit more split - * chunks into the maximum page size, (2) tracking the maximum page - * size boundary because we can't fit any more split chunks into the - * maximum page size, (3) not performing boundary checks because it's - * either not useful with the current page size configuration, or - * because we've already been forced to split. - */ - enum { SPLIT_BOUNDARY=0, /* Next: a split page boundary */ - SPLIT_MAX=1, /* Next: the maximum page boundary */ - SPLIT_TRACKING_OFF=2, /* No boundary checks */ - SPLIT_TRACKING_RAW=3 } /* Underlying compression decides */ - bnd_state; - - /* * We track current information about the current record number, the * number of entries copied into the temporary buffer, where we are * in the temporary buffer, and how much memory remains. Those items @@ -226,6 +225,8 @@ typedef struct { uint32_t entries; /* Current number of entries */ uint8_t *first_free; /* Current first free byte */ size_t space_avail; /* Remaining space in this chunk */ + /* Remaining space in this chunk to put a minimum size boundary */ + size_t min_space_avail; /* * Saved update list, supporting the WT_EVICT_UPDATE_RESTORE and @@ -247,15 +248,14 @@ typedef struct { /* * WT_DICTIONARY -- - * We optionally build a dictionary of row-store values for leaf - * pages. Where two value cells are identical, only write the value - * once, the second and subsequent copies point to the original cell. - * The dictionary is fixed size, but organized in a skip-list to make - * searches faster. + * We optionally build a dictionary of values for leaf pages. Where + * two value cells are identical, only write the value once, the second + * and subsequent copies point to the original cell. The dictionary is + * fixed size, but organized in a skip-list to make searches faster. */ struct __rec_dictionary { uint64_t hash; /* Hash value */ - void *cell; /* Matching cell */ + uint32_t offset; /* Matching cell */ u_int depth; /* Skiplist */ WT_DICTIONARY *next[0]; @@ -293,6 +293,13 @@ typedef struct { uint32_t tested_ref_state; /* Debugging information */ } WT_RECONCILE; +#define WT_CROSSING_MIN_BND(r, next_len) \ + ((r)->bnd[(r)->bnd_next].min_bnd_offset == 0 && \ + (next_len) > (r)->min_space_avail) +#define WT_CROSSING_SPLIT_BND(r, next_len) ((next_len) > (r)->space_avail) +#define WT_CHECK_CROSSING_BND(r, next_len) \ + (WT_CROSSING_MIN_BND(r, next_len) || WT_CROSSING_SPLIT_BND(r, next_len)) + static void __rec_bnd_cleanup(WT_SESSION_IMPL *, WT_RECONCILE *, bool); static void __rec_cell_build_addr(WT_SESSION_IMPL *, WT_RECONCILE *, const void *, size_t, u_int, uint64_t); @@ -314,6 +321,7 @@ static int __rec_col_var(WT_SESSION_IMPL *, static int __rec_col_var_helper(WT_SESSION_IMPL *, WT_RECONCILE *, WT_SALVAGE_COOKIE *, WT_ITEM *, bool, uint8_t, uint64_t); static int __rec_destroy_session(WT_SESSION_IMPL *); +static uint32_t __rec_min_split_page_size(WT_BTREE *, uint32_t); static int __rec_root_write(WT_SESSION_IMPL *, WT_PAGE *, uint32_t); static int __rec_row_int(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_row_leaf(WT_SESSION_IMPL *, @@ -323,7 +331,6 @@ static int __rec_row_leaf_insert( static int __rec_row_merge(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_split_col(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_split_discard(WT_SESSION_IMPL *, WT_PAGE *); -static int __rec_split_fixup(WT_SESSION_IMPL *, WT_RECONCILE *); static int __rec_split_row(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_split_row_promote( WT_SESSION_IMPL *, WT_RECONCILE *, WT_ITEM *, uint8_t); @@ -968,6 +975,7 @@ __rec_destroy(WT_SESSION_IMPL *session, void *reconcilep) *(WT_RECONCILE **)reconcilep = NULL; __wt_buf_free(session, &r->disk_image); + __wt_scr_free(session, &r->interim_buf); __wt_free(session, r->raw_entries); __wt_free(session, r->raw_offsets); @@ -1032,7 +1040,8 @@ __rec_bnd_cleanup(WT_SESSION_IMPL *session, WT_RECONCILE *r, bool destroy) __wt_free(session, bnd->addr.addr); __wt_free(session, bnd->disk_image); __wt_free(session, bnd->supd); - __wt_buf_free(session, &bnd->key); + __wt_buf_free(session, &bnd->max_bnd_key); + __wt_buf_free(session, &bnd->min_bnd_key); } __wt_free(session, r->bnd); r->bnd_next = 0; @@ -1395,7 +1404,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, */ #define WT_CHILD_RELEASE(session, hazard, ref) do { \ if (hazard) { \ - hazard = false; \ + (hazard) = false; \ WT_TRET( \ __wt_page_release(session, ref, WT_READ_NO_EVICT)); \ } \ @@ -1717,6 +1726,17 @@ __rec_incr(WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size) r->entries += v; r->space_avail -= size; r->first_free += size; + + /* + * If offset for the minimum split size boundary is not set, we have not + * yet reached the minimum boundary, reduce the space available for it. + */ + if (r->bnd[r->bnd_next].min_bnd_offset == 0) { + if (r->min_space_avail >= size) + r->min_space_avail -= size; + else + r->min_space_avail = 0; + } } /* @@ -1737,7 +1757,7 @@ __rec_copy_incr(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_KV *kv) * WT_CELLs are typically small, 1 or 2 bytes -- don't call memcpy, do * the copy in-line. */ - for (p = (uint8_t *)r->first_free, + for (p = r->first_free, t = (uint8_t *)&kv->cell, len = kv->cell_len; len > 0; --len) *p++ = *t++; @@ -1781,16 +1801,22 @@ __rec_dict_replace( return (0); /* - * If the dictionary cell reference is not set, we're creating a new - * entry in the dictionary, update its location. + * If the dictionary offset isn't set, we're creating a new entry in the + * dictionary, set its location. * - * If the dictionary cell reference is set, we have a matching value. - * Create a copy cell instead. + * If the dictionary offset is set, we have a matching value. Create a + * copy cell instead. */ - if (dp->cell == NULL) - dp->cell = r->first_free; + if (dp->offset == 0) + dp->offset = WT_PTRDIFF32(r->first_free, r->disk_image.mem); else { - offset = WT_PTRDIFF(r->first_free, dp->cell); + /* + * The offset is the byte offset from this cell to the previous, + * matching cell, NOT the byte offset from the beginning of the + * page. + */ + offset = (uint64_t)WT_PTRDIFF(r->first_free, + (uint8_t *)r->disk_image.mem + dp->offset); val->len = val->cell_len = __wt_cell_pack_copy(&val->cell, rle, offset); val->buf.data = NULL; @@ -1927,8 +1953,8 @@ static void __rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd) { bnd->offset = 0; - bnd->recno = WT_RECNO_OOB; - bnd->entries = 0; + bnd->max_bnd_recno = WT_RECNO_OOB; + bnd->max_bnd_entries = 0; __wt_free(session, bnd->addr.addr); WT_CLEAR(bnd->addr); @@ -1943,6 +1969,10 @@ __rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd) bnd->already_compressed = false; + bnd->min_bnd_offset = 0; + bnd->min_bnd_entries = 0; + bnd->min_bnd_recno = WT_RECNO_OOB; + /* * Don't touch the key, we re-use that memory in each new * reconciliation. @@ -1974,40 +2004,64 @@ __rec_split_bnd_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r) } /* - * __wt_split_page_size -- - * Split page size calculation: we don't want to repeatedly split every - * time a new entry is added, so we split to a smaller-than-maximum page size. + * __rec_split_page_size_from_pct -- + * Given a split percentage, calculate split page size in bytes. */ -uint32_t -__wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize) -{ +static uint32_t +__rec_split_page_size_from_pct( + int split_pct, uint32_t maxpagesize, uint32_t allocsize) { uintmax_t a; uint32_t split_size; /* * Ideally, the split page size is some percentage of the maximum page - * size rounded to an allocation unit (round to an allocation unit so - * we don't waste space when we write). + * size rounded to an allocation unit (round to an allocation unit so we + * don't waste space when we write). */ a = maxpagesize; /* Don't overflow. */ split_size = (uint32_t)WT_ALIGN_NEAREST( - (a * (u_int)btree->split_pct) / 100, btree->allocsize); + (a * (u_int)split_pct) / 100, allocsize); /* - * Respect the configured split percentage if the calculated split - * size is either zero or a full page. The user has either configured - * an allocation size that matches the page size, or a split - * percentage that is close to zero or one hundred. Rounding is going - * to provide a worse outcome than having a split point that doesn't - * fall on an allocation size boundary in those cases. + * Respect the configured split percentage if the calculated split size + * is either zero or a full page. The user has either configured an + * allocation size that matches the page size, or a split percentage + * that is close to zero or one hundred. Rounding is going to provide a + * worse outcome than having a split point that doesn't fall on an + * allocation size boundary in those cases. */ if (split_size == 0 || split_size == maxpagesize) - split_size = (uint32_t)((a * (u_int)btree->split_pct) / 100); + split_size = (uint32_t)((a * (u_int)split_pct) / 100); return (split_size); } /* + * __wt_split_page_size -- + * Split page size calculation: we don't want to repeatedly split every + * time a new entry is added, so we split to a smaller-than-maximum page size. + */ +uint32_t +__wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize) +{ + return (__rec_split_page_size_from_pct( + btree->split_pct, maxpagesize, btree->allocsize)); +} + +/* + * __rec_min_split_page_size -- + * Minimum split size boundary calculation: To track a boundary at the + * minimum split size that we could have split at instead of splitting at + * the split page size. + */ +static uint32_t +__rec_min_split_page_size(WT_BTREE *btree, uint32_t maxpagesize) +{ + return (__rec_split_page_size_from_pct( + WT_BTREE_MIN_SPLIT_PCT, maxpagesize, btree->allocsize)); +} + +/* * __rec_split_init -- * Initialization for the reconciliation split functions. */ @@ -2018,7 +2072,7 @@ __rec_split_init(WT_SESSION_IMPL *session, WT_BM *bm; WT_BTREE *btree; WT_PAGE_HEADER *dsk; - size_t corrected_page_size; + size_t corrected_page_size, disk_img_buf_size; btree = S2BT(session); bm = btree->bm; @@ -2053,33 +2107,6 @@ __rec_split_init(WT_SESSION_IMPL *session, r->max_raw_page_size = r->page_size = (uint32_t)WT_MIN(r->page_size * 10, WT_MAX(r->page_size, btree->maxmempage / 2)); - - /* - * Ensure the disk image buffer is large enough for the max object, as - * corrected by the underlying block manager. - */ - corrected_page_size = r->page_size; - WT_RET(bm->write_size(bm, session, &corrected_page_size)); - WT_RET(__wt_buf_init(session, &r->disk_image, corrected_page_size)); - - /* - * Clear the disk page header to ensure all of it is initialized, even - * the unused fields. - * - * In the case of fixed-length column-store, clear the entire buffer: - * fixed-length column-store sets bits in bytes, where the bytes are - * assumed to initially be 0. - */ - memset(r->disk_image.mem, 0, page->type == WT_PAGE_COL_FIX ? - corrected_page_size : WT_PAGE_HEADER_SIZE); - - /* - * Set the page type (the type doesn't change, and setting it later - * would require additional code in a few different places). - */ - dsk = r->disk_image.mem; - dsk->type = page->type; - /* * If we have to split, we want to choose a smaller page size for the * split pages, because otherwise we could end up splitting one large @@ -2099,22 +2126,28 @@ __rec_split_init(WT_SESSION_IMPL *session, * creating overflow items and compacted data, for example, as those * items have already been written to disk). So, the loop calls the * helper functions when approaching a split boundary, and we save the - * information at that point. That allows us to go back and split the - * page at the boundary points if we eventually overflow the maximum - * page size. + * information at that point. We also save the boundary information at + * the minimum split size. We maintain two chunks (each boundary + * represents a chunk that gets written as a page) in the memory, + * writing out the older one to the disk as a page when we need to make + * space for a new chunk. On reaching the last chunk, if it turns out to + * be smaller than the minimum split size, we go back into the + * penultimate chunk and split at this minimum split size boundary. This + * moves some data from the penultimate chunk to the last chunk, hence + * increasing the size of the last page written without decreasing the + * penultimate page size beyond the minimum split size. * * Finally, all this doesn't matter for fixed-size column-store pages, * raw compression, and salvage. Fixed-size column store pages can * split under (very) rare circumstances, but they're allocated at a * fixed page size, never anything smaller. In raw compression, the - * underlying compression routine decides when we split, so it's not - * our problem. In salvage, as noted above, we can't split at all. + * underlying compression routine decides when we split, so it's not our + * problem. In salvage, as noted above, we can't split at all. */ if (r->raw_compression || r->salvage != NULL) { r->split_size = 0; r->space_avail = r->page_size - WT_PAGE_HEADER_BYTE_SIZE(btree); - } - else if (page->type == WT_PAGE_COL_FIX) { + } else if (page->type == WT_PAGE_COL_FIX) { r->split_size = r->page_size; r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); @@ -2122,32 +2155,55 @@ __rec_split_init(WT_SESSION_IMPL *session, r->split_size = __wt_split_page_size(btree, r->page_size); r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); + r->min_split_size = + __rec_min_split_page_size(btree, r->page_size); + r->min_space_avail = + r->min_split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); } + + /* + * Ensure the disk image buffer is large enough for the max object, as + * corrected by the underlying block manager. + * + * The buffer that we build disk image in, needs to hold two chunks + * worth of data. Since we want to support split_size more than the page + * size (to allow for adjustments based on the compression), this buffer + * should be greater of twice of split_size and page_size. + */ + corrected_page_size = r->page_size; + disk_img_buf_size = 2 * WT_MAX(corrected_page_size, r->split_size); + WT_RET(bm->write_size(bm, session, &corrected_page_size)); + WT_RET(__wt_buf_init(session, &r->disk_image, disk_img_buf_size)); + + /* + * Clear the disk page header to ensure all of it is initialized, even + * the unused fields. + * + * In the case of fixed-length column-store, clear the entire buffer: + * fixed-length column-store sets bits in bytes, where the bytes are + * assumed to initially be 0. + */ + memset(r->disk_image.mem, 0, page->type == WT_PAGE_COL_FIX ? + disk_img_buf_size : WT_PAGE_HEADER_SIZE); + + /* + * Set the page type (the type doesn't change, and setting it later + * would require additional code in a few different places). + */ + dsk = r->disk_image.mem; + dsk->type = page->type; + r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); /* Initialize the first boundary. */ r->bnd_next = 0; WT_RET(__rec_split_bnd_grow(session, r)); __rec_split_bnd_init(session, &r->bnd[0]); - r->bnd[0].recno = recno; + r->bnd[0].max_bnd_recno = recno; r->bnd[0].offset = WT_PAGE_HEADER_BYTE_SIZE(btree); - /* - * If the maximum page size is the same as the split page size, either - * because of the object type or application configuration, there isn't - * any need to maintain split boundaries within a larger page. - * - * No configuration for salvage here, because salvage can't split. - */ - if (r->raw_compression) - r->bnd_state = SPLIT_TRACKING_RAW; - else if (max == r->split_size) - r->bnd_state = SPLIT_TRACKING_OFF; - else - r->bnd_state = SPLIT_BOUNDARY; - - /* Initialize the entry counters. */ - r->entries = r->total_entries = 0; + /* Initialize the entry counter. */ + r->entries = 0; /* Initialize the starting record number. */ r->recno = recno; @@ -2350,19 +2406,112 @@ __rec_split_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t add_len) { WT_BM *bm; WT_BTREE *btree; - size_t corrected_page_size, len; + size_t corrected_page_size, inuse, len; btree = S2BT(session); bm = btree->bm; len = WT_PTRDIFF(r->first_free, r->disk_image.mem); - corrected_page_size = len + add_len; + inuse = (len - r->bnd[r->bnd_next].offset) + + WT_PAGE_HEADER_BYTE_SIZE(btree); + corrected_page_size = inuse + add_len; + WT_RET(bm->write_size(bm, session, &corrected_page_size)); - WT_RET(__wt_buf_grow(session, &r->disk_image, corrected_page_size)); + /* Need to account for buffer carrying two chunks worth of data */ + WT_RET(__wt_buf_grow(session, &r->disk_image, 2 * corrected_page_size)); + r->first_free = (uint8_t *)r->disk_image.mem + len; - WT_ASSERT(session, corrected_page_size >= len); - r->space_avail = corrected_page_size - len; + WT_ASSERT(session, corrected_page_size >= inuse); + r->space_avail = corrected_page_size - inuse; WT_ASSERT(session, r->space_avail >= add_len); + + return (0); +} + +/* + * __rec_split_write_prev_and_shift_cur -- + * Write the previous split chunk to the disk as a page. Shift the contents + * of the current chunk to the start of the buffer, making space for a new + * chunk to be written. + * If the caller asks for a chunk resizing, the boundary between the two + * chunks is readjusted to the minimum split size boundary details stored + * in the previous chunk, letting the current chunk grow at the cost of the + * previous chunk. + */ +static int +__rec_split_write_prev_and_shift_cur( + WT_SESSION_IMPL *session, WT_RECONCILE *r, bool resize_chunks) +{ + WT_BM *bm; + WT_BOUNDARY *bnd_cur, *bnd_prev; + WT_BTREE *btree; + WT_PAGE_HEADER *dsk, *dsk_tmp; + size_t cur_len, len; + uint8_t *dsk_start; + + WT_ASSERT(session, r->bnd_next != 0); + + btree = S2BT(session); + bm = btree->bm; + bnd_cur = &r->bnd[r->bnd_next]; + bnd_prev = bnd_cur - 1; + dsk = r->disk_image.mem; + cur_len = WT_PTRDIFF(r->first_free, dsk) - bnd_cur->offset; + + /* + * Resize chunks if the current is smaller than the minimum, and there + * are details on the minimum split size boundary available in the + * previous boundary details. + * + * There is a possibility that we do not have a minimum boundary set, in + * such a case we skip chunk resizing. Such a condition is possible for + * instance when we are building the image in the buffer and the first + * K/V pair is large enough that it surpasses both the minimum split + * size and the split size the application has set. In such a case we + * split the chunk without saving any minimum boundary. + */ + if (resize_chunks && + cur_len < r->min_split_size && bnd_prev->min_bnd_offset != 0) { + bnd_cur->offset = bnd_prev->min_bnd_offset; + bnd_cur->max_bnd_entries += + bnd_prev->max_bnd_entries - bnd_prev->min_bnd_entries; + bnd_prev->max_bnd_entries = bnd_prev->min_bnd_entries; + bnd_cur->max_bnd_recno = bnd_prev->min_bnd_recno; + + WT_RET(__wt_buf_set(session, &bnd_cur->max_bnd_key, + bnd_prev->min_bnd_key.data, bnd_prev->min_bnd_key.size)); + + /* Update current chunk's length */ + cur_len = WT_PTRDIFF(r->first_free, dsk) - bnd_cur->offset; + } + + /* + * Create an interim buffer if not already done to prepare the previous + * chunk's disk image. + */ + len = bnd_cur->offset; + WT_RET(bm->write_size(bm, session, &len)); + if (r->interim_buf == NULL) + WT_RET(__wt_scr_alloc(session, len, &r->interim_buf)); + else + WT_RET(__wt_buf_init(session, r->interim_buf, len)); + + dsk_tmp = r->interim_buf->mem; + memcpy(dsk_tmp, dsk, bnd_cur->offset); + dsk_tmp->recno = bnd_prev->max_bnd_recno; + dsk_tmp->u.entries = bnd_prev->max_bnd_entries; + dsk_tmp->mem_size = WT_STORE_SIZE(bnd_cur->offset); + r->interim_buf->size = dsk_tmp->mem_size; + WT_RET(__rec_split_write(session, r, bnd_prev, r->interim_buf, false)); + + /* Shift the current chunk to the start of the buffer */ + dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); + (void)memmove(dsk_start, (uint8_t *)dsk + bnd_cur->offset, cur_len); + + /* Fix boundary offset */ + bnd_cur->offset = WT_PAGE_HEADER_BYTE_SIZE(btree); + /* Fix where free points */ + r->first_free = dsk_start + cur_len; return (0); } @@ -2382,6 +2531,9 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) btree = S2BT(session); dsk = r->disk_image.mem; + /* Fixed length col store can call with next_len 0 */ + WT_ASSERT(session, next_len == 0 || r->space_avail < next_len); + /* * We should never split during salvage, and we're about to drop core * because there's no parent page. @@ -2391,147 +2543,63 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) "%s page too large, attempted split during salvage", __wt_page_type_string(r->page->type)); - /* Hitting a page boundary resets the dictionary, in all cases. */ - __rec_dictionary_reset(r); - - inuse = WT_PTRDIFF(r->first_free, dsk); - switch (r->bnd_state) { - case SPLIT_BOUNDARY: - /* - * We can get here if the first key/value pair won't fit. - * Additionally, grow the buffer to contain the current item if - * we haven't already consumed a reasonable portion of a split - * chunk. - */ - if (inuse < r->split_size / 2) - break; - - /* - * About to cross a split boundary but not yet forced to split - * into multiple pages. If we have to split, this is one of the - * split points, save information about where we are when the - * split would have happened. - */ - WT_RET(__rec_split_bnd_grow(session, r)); - last = &r->bnd[r->bnd_next++]; - next = last + 1; - - /* Set the number of entries for the just finished chunk. */ - last->entries = r->entries - r->total_entries; - r->total_entries = r->entries; - - /* Set the key for the next chunk. */ - next->recno = r->recno; - if (dsk->type == WT_PAGE_ROW_INT || - dsk->type == WT_PAGE_ROW_LEAF) - WT_RET(__rec_split_row_promote( - session, r, &next->key, dsk->type)); - - /* - * Set the starting buffer offset and clear the entries (the - * latter not required, but cleaner). - */ - next->offset = WT_PTRDIFF(r->first_free, dsk); - next->entries = 0; - - /* Set the space available to another split-size chunk. */ - r->space_avail = - r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); - - /* - * Adjust the space available to handle two cases: - * - We don't have enough room for another full split-size - * chunk on the page. - * - We chose to fill past a page boundary because of a - * large item. - */ - if (inuse + r->space_avail > r->page_size) { - r->space_avail = - r->page_size > inuse ? (r->page_size - inuse) : 0; - - /* There are no further boundary points. */ - r->bnd_state = SPLIT_MAX; - } - - /* - * Return if the next object fits into this page, else we have - * to split the page. - */ - if (r->space_avail >= next_len) - return (0); - - /* FALLTHROUGH */ - case SPLIT_MAX: - /* - * We're going to have to split and create multiple pages. - * - * Cycle through the saved split-point information, writing the - * split chunks we have tracked. The underlying fixup function - * sets the space available and other information, and copied - * any unwritten chunk of data to the beginning of the buffer. - */ - WT_RET(__rec_split_fixup(session, r)); - - /* We're done saving split chunks. */ - r->bnd_state = SPLIT_TRACKING_OFF; - break; - case SPLIT_TRACKING_OFF: - /* - * We can get here if the first key/value pair won't fit. - * Additionally, grow the buffer to contain the current item if - * we haven't already consumed a reasonable portion of a split - * chunk. - */ - if (inuse < r->split_size / 2) - break; + last = &r->bnd[r->bnd_next]; + inuse = (WT_PTRDIFF(r->first_free, dsk) - last->offset) + + WT_PAGE_HEADER_BYTE_SIZE(btree); - /* - * The key/value pairs didn't fit into a single page, but either - * we've already noticed that and are now processing the rest of - * the pairs at split size boundaries, or the split size was the - * same as the page size, and we never bothered with split point - * information at all. - */ - WT_RET(__rec_split_bnd_grow(session, r)); - last = &r->bnd[r->bnd_next++]; - next = last + 1; + /* + * We can get here if the first key/value pair won't fit. + * Additionally, grow the buffer to contain the current item if we + * haven't already consumed a reasonable portion of a split chunk. + */ + if (inuse < r->split_size / 2) + goto done; - /* - * Set the key for the next chunk (before writing the block, a - * key range is needed in that code). - */ - next->recno = r->recno; - if (dsk->type == WT_PAGE_ROW_INT || - dsk->type == WT_PAGE_ROW_LEAF) - WT_RET(__rec_split_row_promote( - session, r, &next->key, dsk->type)); + /* All page boundaries reset the dictionary. */ + __rec_dictionary_reset(r); - /* Clear the entries (not required, but cleaner). */ - next->entries = 0; + /* Set the number of entries for the just finished chunk. */ + last->max_bnd_entries = r->entries; - /* Finalize the header information and write the page. */ - dsk->recno = last->recno; - dsk->u.entries = r->entries; - dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); + /* + * In case of bulk load, write out chunks as we get them. Otherwise we + * keep two chunks in memory at a given time. So, if there is a previous + * chunk, write it out, making space in the buffer for the next chunk to + * be written. + */ + if (r->is_bulk_load) { + dsk->recno = last->max_bnd_recno; + dsk->u.entries = last->max_bnd_entries; + dsk->mem_size = (uint32_t)inuse; r->disk_image.size = dsk->mem_size; - WT_RET( - __rec_split_write(session, r, last, &r->disk_image, false)); - - /* - * Set the caller's entry count and buffer information for the - * next chunk. We only get here if we're not splitting or have - * already split, so it's split-size chunks from here on out. - */ - r->entries = 0; + WT_RET(__rec_split_write( + session, r, last, &r->disk_image, false)); + /* Fix where free points */ r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); - r->space_avail = - r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); - break; - case SPLIT_TRACKING_RAW: - return (__wt_illegal_value(session, NULL)); - } + } else if (r->bnd_next != 0) + WT_RET(__rec_split_write_prev_and_shift_cur(session, r, false)); - /* + /* Prepare the next boundary */ + WT_RET(__rec_split_bnd_grow(session, r)); + r->bnd_next++; + next = &r->bnd[r->bnd_next]; + next->offset = WT_PTRDIFF(r->first_free, dsk); + /* Set the key for the next chunk. */ + next->max_bnd_recno = r->recno; + if (dsk->type == WT_PAGE_ROW_INT || dsk->type == WT_PAGE_ROW_LEAF) + WT_RET(__rec_split_row_promote( + session, r, &next->max_bnd_key, dsk->type)); + + r->entries = 0; + /* + * Set the space available to another split-size and minimum split-size + * chunk. + */ + r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); + r->min_space_avail = + r->min_split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); + +done: /* * Overflow values can be larger than the maximum page size but still be * "on-page". If the next key/value pair is larger than space available * after a split has happened (in other words, larger than the maximum @@ -2549,6 +2617,64 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) } /* + * __rec_split_crossing_bnd -- + * Save the details for the minimum split size boundary or call for a + * split. + */ +static inline int +__rec_split_crossing_bnd( + WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) +{ + WT_BOUNDARY *bnd; + WT_BTREE *btree; + WT_PAGE_HEADER *dsk; + size_t min_bnd_offset; + + WT_ASSERT(session, WT_CHECK_CROSSING_BND(r, next_len)); + + /* + * If crossing the minimum split size boundary, store the boundary + * details at the current location in the buffer. If we are crossing the + * split boundary at the same time, possible when the next record is + * large enough, just split at this point. + */ + if (WT_CROSSING_MIN_BND(r, next_len) && + !WT_CROSSING_SPLIT_BND(r, next_len)) { + btree = S2BT(session); + bnd = &r->bnd[r->bnd_next]; + dsk = r->disk_image.mem; + min_bnd_offset = (WT_PTRDIFF(r->first_free, dsk) - + bnd->offset) + WT_PAGE_HEADER_BYTE_SIZE(btree); + if (min_bnd_offset == WT_PAGE_HEADER_BYTE_SIZE(btree)) + /* + * This is possible if the first record doesn't fit in + * the minimum split size, we write this record without + * setting up any boundary here. We will get the + * opportunity to setup a boundary before writing out + * the next record. + */ + return (0); + + WT_ASSERT(session, bnd->min_bnd_offset == 0); + + /* All page boundaries reset the dictionary. */ + __rec_dictionary_reset(r); + + bnd->min_bnd_offset = min_bnd_offset; + bnd->min_bnd_entries = r->entries; + bnd->min_bnd_recno = r->recno; + if (dsk->type == WT_PAGE_ROW_INT || + dsk->type == WT_PAGE_ROW_LEAF) + WT_RET(__rec_split_row_promote( + session, r, &bnd->min_bnd_key, dsk->type)); + return (0); + } + + /* We are crossing a split boundary */ + return (__rec_split(session, r, next_len)); +} + +/* * __rec_split_raw_worker -- * Handle the raw compression page reconciliation bookkeeping. */ @@ -2626,7 +2752,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session, */ recno = WT_RECNO_OOB; if (dsk->type == WT_PAGE_COL_VAR) - recno = last->recno; + recno = last->max_bnd_recno; entry = max_image_slot = slots = 0; WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { @@ -2853,7 +2979,7 @@ no_slots: */ dst->size = result_len + WT_BLOCK_COMPRESS_SKIP; dsk_dst = dst->mem; - dsk_dst->recno = last->recno; + dsk_dst->recno = last->max_bnd_recno; dsk_dst->mem_size = r->raw_offsets[result_slots] + WT_BLOCK_COMPRESS_SKIP; dsk_dst->u.entries = r->raw_entries[result_slots - 1]; @@ -2873,7 +2999,7 @@ no_slots: WT_RET(__wt_strndup(session, dsk, dsk_dst->mem_size, &last->disk_image)); disk_image = last->disk_image; - disk_image->recno = last->recno; + disk_image->recno = last->max_bnd_recno; disk_image->mem_size = dsk_dst->mem_size; disk_image->u.entries = dsk_dst->u.entries; } @@ -2889,7 +3015,7 @@ no_slots: len = WT_PTRDIFF( r->first_free, (uint8_t *)dsk + dsk_dst->mem_size); dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); - (void)memmove(dsk_start, (uint8_t *)r->first_free - len, len); + (void)memmove(dsk_start, r->first_free - len, len); r->entries -= r->raw_entries[result_slots - 1]; r->first_free = dsk_start + len; @@ -2903,14 +3029,14 @@ no_slots: */ switch (dsk->type) { case WT_PAGE_COL_INT: - next->recno = r->raw_recnos[result_slots]; + next->max_bnd_recno = r->raw_recnos[result_slots]; break; case WT_PAGE_COL_VAR: - next->recno = r->raw_recnos[result_slots - 1]; + next->max_bnd_recno = r->raw_recnos[result_slots - 1]; break; case WT_PAGE_ROW_INT: case WT_PAGE_ROW_LEAF: - next->recno = WT_RECNO_OOB; + next->max_bnd_recno = WT_RECNO_OOB; if (!last_block) { /* * Confirm there was uncompressed data remaining @@ -2919,7 +3045,7 @@ no_slots: */ WT_ASSERT(session, len > 0); WT_RET(__rec_split_row_promote_cell( - session, dsk, &next->key)); + session, dsk, &next->max_bnd_key)); } break; } @@ -2931,7 +3057,7 @@ no_slots: */ WT_STAT_DATA_INCR(session, compress_raw_fail); - dsk->recno = last->recno; + dsk->recno = last->max_bnd_recno; dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); dsk->u.entries = r->entries; r->disk_image.size = dsk->mem_size; @@ -3008,35 +3134,9 @@ __rec_split_raw(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) static int __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) { - WT_BOUNDARY *bnd; + WT_BOUNDARY *bnd_cur, *bnd_prev; WT_PAGE_HEADER *dsk; - - /* Adjust the boundary information based on our split status. */ - switch (r->bnd_state) { - case SPLIT_BOUNDARY: - case SPLIT_MAX: - /* - * We never split, the reconciled page fit into a maximum page - * size. Change the first boundary slot to represent the full - * page (the first boundary slot is largely correct, just update - * the number of entries). - */ - r->bnd_next = 0; - break; - case SPLIT_TRACKING_OFF: - /* - * If we have already split, or aren't tracking boundaries, put - * the remaining data in the next boundary slot. - */ - WT_RET(__rec_split_bnd_grow(session, r)); - break; - case SPLIT_TRACKING_RAW: - /* - * We were configured for raw compression, and either we never - * wrote anything, or there's a remaindered block of data. - */ - break; - } + bool grow_bnd; /* * We may arrive here with no entries to write if the page was entirely @@ -3063,20 +3163,66 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) return (EBUSY); } - /* Set the boundary reference and increment the count. */ - bnd = &r->bnd[r->bnd_next++]; - bnd->entries = r->entries; - - /* Finalize the header information. */ dsk = r->disk_image.mem; - dsk->recno = bnd->recno; - dsk->u.entries = r->entries; + + /* Set the number of entries for the just finished chunk. */ + bnd_cur = &r->bnd[r->bnd_next]; + bnd_cur->max_bnd_entries = r->entries; + + grow_bnd = true; + /* + * We can reach here even with raw_compression when the last split chunk + * is too small to be sent for raw compression. + */ + if (!r->is_bulk_load && !r->raw_compression) { + if (WT_PTRDIFF(r->first_free, dsk) > r->page_size && + r->bnd_next != 0) { + /* + * We hold two boundaries worth of data in the buffer, + * and this data doesn't fit in a single page. If the + * last chunk is too small, readjust the boundary to a + * pre-computed minimum. + * Write out the penultimate chunk to the disk as a page + */ + WT_RET(__rec_split_write_prev_and_shift_cur( + session, r, true)); + } else + if (r->bnd_next != 0) { + /* + * We have two boundaries, but the data in the + * buffer can fit a single page. Merge the + * boundaries to create a single chunk. + */ + bnd_prev = bnd_cur - 1; + bnd_prev->max_bnd_entries += + bnd_cur->max_bnd_entries; + r->bnd_next--; + grow_bnd = false; + } + } + + /* + * We already have space for an extra boundary if we merged two + * boundaries above, in that case we do not need to grow the boundary + * structure. + */ + if (grow_bnd) + WT_RET(__rec_split_bnd_grow(session, r)); + bnd_cur = &r->bnd[r->bnd_next]; + r->bnd_next++; + + /* + * Current boundary now has all the remaining data/last page now. + * Let's write it to the disk + */ + dsk->recno = bnd_cur->max_bnd_recno; + dsk->u.entries = bnd_cur->max_bnd_entries; dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); r->disk_image.size = dsk->mem_size; /* If this is a checkpoint, we're done, otherwise write the page. */ - return (__rec_is_checkpoint(session, r, bnd) ? - 0 : __rec_split_write(session, r, bnd, &r->disk_image, true)); + return (__rec_is_checkpoint(session, r, bnd_cur) ? + 0 : __rec_split_write(session, r, bnd_cur, &r->disk_image, true)); } /* @@ -3110,98 +3256,6 @@ __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r) } /* - * __rec_split_fixup -- - * Fix up after crossing the maximum page boundary. - */ -static int -__rec_split_fixup(WT_SESSION_IMPL *session, WT_RECONCILE *r) -{ - WT_BOUNDARY *bnd; - WT_BTREE *btree; - WT_DECL_ITEM(tmp); - WT_DECL_RET; - WT_PAGE_HEADER *dsk; - size_t i, len; - uint8_t *dsk_start, *p; - - /* - * When we overflow physical limits of the page, we walk the list of - * split chunks we've created and write those pages out, then update - * the caller's information. - */ - btree = S2BT(session); - - /* - * The data isn't laid out on a page boundary or nul padded; copy it to - * a clean, aligned, padded buffer before writing it. - * - * Allocate a scratch buffer to hold the new disk image. Copy the disk - * page's header and block-manager space into the scratch buffer, most - * of the header information remains unchanged between the pages. - */ - WT_RET(__wt_scr_alloc(session, r->disk_image.memsize, &tmp)); - dsk = tmp->mem; - memcpy(dsk, r->disk_image.mem, WT_PAGE_HEADER_BYTE_SIZE(btree)); - - /* - * For each split chunk we've created, update the disk image and copy - * it into place. - */ - dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); - for (i = 0, bnd = r->bnd; i < r->bnd_next; ++i, ++bnd) { - /* Copy the page contents to the temporary buffer. */ - len = (bnd + 1)->offset - bnd->offset; - memcpy(dsk_start, - (uint8_t *)r->disk_image.mem + bnd->offset, len); - - /* Finalize the header information and write the page. */ - dsk->recno = bnd->recno; - dsk->u.entries = bnd->entries; - tmp->size = WT_PAGE_HEADER_BYTE_SIZE(btree) + len; - dsk->mem_size = WT_STORE_SIZE(tmp->size); - WT_ERR(__rec_split_write(session, r, bnd, tmp, false)); - } - - /* - * There is probably a remnant in the working buffer that didn't get - * written, copy it down to the beginning of the working buffer. - * - * Confirm the remnant is no larger than a split-sized chunk, including - * header. We know that's the maximum sized remnant because we only have - * remnants if split switches from accumulating to a split boundary to - * accumulating to the end of the page (the other path here is when we - * hit a split boundary, there was room for another split chunk in the - * page, and the next item still wouldn't fit, in which case there is no - * remnant). So: we were accumulating to the end of the page and created - * a remnant. We know the remnant cannot be as large as a split-sized - * chunk, including header, because if there was room for that large a - * remnant, we wouldn't have switched from accumulating to a page end. - */ - p = (uint8_t *)r->disk_image.mem + bnd->offset; - len = WT_PTRDIFF(r->first_free, p); - if (len >= r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree)) - WT_PANIC_ERR(session, EINVAL, - "Reconciliation remnant too large for the split buffer"); - dsk = r->disk_image.mem; - dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); - (void)memmove(dsk_start, p, len); - - /* - * Fix up our caller's information, including updating the starting - * record number. - */ - r->entries -= r->total_entries; - r->first_free = dsk_start + len; - WT_ASSERT(session, - r->page_size >= (WT_PAGE_HEADER_BYTE_SIZE(btree) + len)); - r->space_avail = - r->split_size - (WT_PAGE_HEADER_BYTE_SIZE(btree) + len); - -err: __wt_scr_free(session, &tmp); - return (ret); -} - -/* * __rec_split_write -- * Write a disk block out for the split helper functions. */ @@ -3222,11 +3276,17 @@ __rec_split_write(WT_SESSION_IMPL *session, int cmp; uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE]; bool need_image; +#ifdef HAVE_DIAGNOSTIC + bool verify_image; +#endif btree = S2BT(session); dsk = buf->mem; page = r->page; mod = page->modify; +#ifdef HAVE_DIAGNOSTIC + verify_image = true; +#endif /* Set the zero-length value flag in the page header. */ if (dsk->type == WT_PAGE_ROW_LEAF) { @@ -3238,8 +3298,6 @@ __rec_split_write(WT_SESSION_IMPL *session, F_SET(dsk, WT_PAGE_EMPTY_V_NONE); } - bnd->entries = r->entries; - /* Initialize the address (set the page type for the parent). */ switch (dsk->type) { case WT_PAGE_COL_FIX: @@ -3285,7 +3343,8 @@ __rec_split_write(WT_SESSION_IMPL *session, switch (page->type) { case WT_PAGE_COL_FIX: case WT_PAGE_COL_VAR: - if (WT_INSERT_RECNO(supd->ins) >= (bnd + 1)->recno) + if (WT_INSERT_RECNO(supd->ins) >= + (bnd + 1)->max_bnd_recno) goto supd_check_complete; break; case WT_PAGE_ROW_LEAF: @@ -3296,8 +3355,8 @@ __rec_split_write(WT_SESSION_IMPL *session, key->data = WT_INSERT_KEY(supd->ins); key->size = WT_INSERT_KEY_SIZE(supd->ins); } - WT_ERR(__wt_compare(session, - btree->collator, key, &(bnd + 1)->key, &cmp)); + WT_ERR(__wt_compare(session, btree->collator, + key, &(bnd + 1)->max_bnd_key, &cmp)); if (cmp >= 0) goto supd_check_complete; break; @@ -3387,18 +3446,21 @@ supd_check_complete: #ifdef HAVE_VERBOSE /* Output a verbose message if we create a page without many entries */ - if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT) && r->entries < 6) + if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT) && + bnd->max_bnd_entries < 6) __wt_verbose(session, WT_VERB_SPLIT, "Reconciliation creating a page with %" PRIu32 " entries, memory footprint %" WT_SIZET_FMT - ", page count %" PRIu32 ", %s, split state: %d", - r->entries, r->page->memory_footprint, r->bnd_next, - F_ISSET(r, WT_EVICTING) ? "evict" : "checkpoint", - r->bnd_state); + ", page count %" PRIu32 ", %s", bnd->max_bnd_entries, + r->page->memory_footprint, r->bnd_next, + F_ISSET(r, WT_EVICTING) ? "evict" : "checkpoint"); #endif WT_ERR(__wt_bt_write(session, buf, addr, &addr_size, false, F_ISSET(r, WT_CHECKPOINTING), bnd->already_compressed)); +#ifdef HAVE_DIAGNOSTIC + verify_image = false; +#endif WT_ERR(__wt_strndup(session, addr, addr_size, &bnd->addr.addr)); bnd->addr.size = (uint8_t)addr_size; @@ -3425,9 +3487,20 @@ copy_image: */ need_image = F_ISSET(r, WT_EVICT_SCRUB) || (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL); - if (need_image && bnd->disk_image == NULL) + if (need_image && bnd->disk_image == NULL) { +#ifdef HAVE_DIAGNOSTIC + /* + * The I/O routines verify all disk images we write, but there + * are paths in reconciliation that don't do I/O. Verify those + * images, too. + */ + WT_ASSERT(session, verify_image == false || + __wt_verify_dsk_image( + session, "[reconcile-image]", buf->data, 0, true) == 0); +#endif WT_ERR(__wt_strndup( session, buf->data, buf->size, &bnd->disk_image)); + } if (!need_image) __wt_free(session, bnd->disk_image); @@ -3583,11 +3656,12 @@ __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) uint64_t recno; btree = S2BT(session); + /* * Bulk-load is only permitted on newly created files, not any empty * file -- see the checkpoint code for a discussion. */ - if (!btree->bulk_load_ok) + if (!btree->original) WT_RET_MSG(session, EINVAL, "bulk-load is only possible for newly created trees"); @@ -3604,16 +3678,7 @@ __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) r = cbulk->reconcile; r->is_bulk_load = true; - recno = WT_RECNO_OOB; /* -Werror=maybe-uninitialized */ - switch (btree->type) { - case BTREE_COL_FIX: - case BTREE_COL_VAR: - recno = 1; - break; - case BTREE_ROW: - recno = WT_RECNO_OOB; - break; - } + recno = btree->type == BTREE_ROW ? WT_RECNO_OOB : 1; return (__rec_split_init( session, r, cbulk->leaf, recno, btree->maxleafpage)); @@ -3688,11 +3753,12 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) cursor->value.data, cursor->value.size, (uint64_t)0)); /* Boundary: split or write the page. */ - if (key->len + val->len > r->space_avail) { - if (r->raw_compression) - WT_RET( - __rec_split_raw(session, r, key->len + val->len)); - else { + if (r->raw_compression) { + if (key->len + val->len > r->space_avail) + WT_RET(__rec_split_raw( + session, r, key->len + val->len)); + } else + if (WT_CROSSING_SPLIT_BND(r, key->len + val->len)) { /* * Turn off prefix compression until a full key written * to the new page, and (unless already working with an @@ -3704,10 +3770,9 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_RET(__rec_cell_build_leaf_key( session, r, NULL, 0, &ovfl_key)); } - - WT_RET(__rec_split(session, r, key->len + val->len)); + WT_RET(__rec_split_crossing_bnd( + session, r, key->len + val->len)); } - } /* Copy the key/value pair onto the page. */ __rec_copy_incr(session, r, key); @@ -3748,6 +3813,10 @@ __rec_col_fix_bulk_insert_split_check(WT_CURSOR_BULK *cbulk) * split. * * Boundary: split or write the page. + * + * No need to have a minimum split size boundary, all + * pages are filled 100% except the last, allowing it to + * grow in the future. */ __rec_incr(session, r, cbulk->entry, __bitstr_size( @@ -3852,10 +3921,12 @@ __wt_bulk_insert_var( r, cbulk->last.data, cbulk->last.size, cbulk->rle)); /* Boundary: split or write the page. */ - if (val->len > r->space_avail) - WT_RET(r->raw_compression ? - __rec_split_raw(session, r, val->len) : - __rec_split(session, r, val->len)); + if (r->raw_compression) { + if (val->len > r->space_avail) + WT_RET(__rec_split_raw(session, r, val->len)); + } else + if (WT_CROSSING_SPLIT_BND(r, val->len)) + WT_RET(__rec_split_crossing_bnd(session, r, val->len)); /* Copy the value onto the page. */ if (btree->dictionary) @@ -3991,10 +4062,13 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) WT_CHILD_RELEASE_ERR(session, hazard, ref); /* Boundary: split or write the page. */ - if (val->len > r->space_avail) - WT_ERR(r->raw_compression ? - __rec_split_raw(session, r, val->len) : - __rec_split(session, r, val->len)); + if (r->raw_compression) { + if (val->len > r->space_avail) + WT_ERR(__rec_split_raw(session, r, val->len)); + } else + if (WT_CHECK_CROSSING_BND(r, val->len)) + WT_ERR(__rec_split_crossing_bnd( + session, r, val->len)); /* Copy the value onto the page. */ __rec_copy_incr(session, r, val); @@ -4036,10 +4110,13 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) addr->addr, addr->size, __rec_vtype(addr), r->recno); /* Boundary: split or write the page. */ - if (val->len > r->space_avail) - WT_RET(r->raw_compression ? - __rec_split_raw(session, r, val->len) : - __rec_split(session, r, val->len)); + if (r->raw_compression) { + if (val->len > r->space_avail) + WT_RET(__rec_split_raw(session, r, val->len)); + } else + if (WT_CHECK_CROSSING_BND(r, val->len)) + WT_RET(__rec_split_crossing_bnd( + session, r, val->len)); /* Copy the value onto the page. */ __rec_copy_incr(session, r, val); @@ -4147,6 +4224,10 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) * split. * * Boundary: split or write the page. + * + * No need to have a minimum split size boundary, all + * pages are filled 100% except the last, allowing it to + * grow in the future. */ __rec_incr(session, r, entry, __bitstr_size((size_t)entry * btree->bitcnt)); @@ -4303,10 +4384,13 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, session, r, value->data, value->size, rle)); /* Boundary: split or write the page. */ - if (val->len > r->space_avail) - WT_RET(r->raw_compression ? - __rec_split_raw(session, r, val->len) : - __rec_split(session, r, val->len)); + if (r->raw_compression) { + if (val->len > r->space_avail) + WT_RET(__rec_split_raw(session, r, val->len)); + } else + if (WT_CHECK_CROSSING_BND(r, val->len)) + WT_RET(__rec_split_crossing_bnd( + session, r, val->len)); /* Copy the value onto the page. */ if (!deleted && !overflow_type && btree->dictionary) @@ -4969,11 +5053,12 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) r->cell_zero = false; /* Boundary: split or write the page. */ - if (key->len + val->len > r->space_avail) { - if (r->raw_compression) + if (r->raw_compression) { + if (key->len + val->len > r->space_avail) WT_ERR(__rec_split_raw( session, r, key->len + val->len)); - else { + } else + if (WT_CHECK_CROSSING_BND(r, key->len + val->len)) { /* * In one path above, we copied address blocks * from the page rather than building the actual @@ -4985,10 +5070,10 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_IKEY_DATA(ikey), ikey->size)); key_onpage_ovfl = false; } - WT_ERR(__rec_split( + + WT_ERR(__rec_split_crossing_bnd( session, r, key->len + val->len)); } - } /* Copy the key and value onto the page. */ __rec_copy_incr(session, r, key); @@ -5038,10 +5123,14 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) addr->addr, addr->size, __rec_vtype(addr), WT_RECNO_OOB); /* Boundary: split or write the page. */ - if (key->len + val->len > r->space_avail) - WT_RET(r->raw_compression ? - __rec_split_raw(session, r, key->len + val->len) : - __rec_split(session, r, key->len + val->len)); + if (r->raw_compression) { + if (key->len + val->len > r->space_avail) + WT_RET(__rec_split_raw( + session, r, key->len + val->len)); + } else + if (WT_CHECK_CROSSING_BND(r, key->len + val->len)) + WT_RET(__rec_split_crossing_bnd( + session, r, key->len + val->len)); /* Copy the key and value onto the page. */ __rec_copy_incr(session, r, key); @@ -5370,16 +5459,17 @@ build: } /* Boundary: split or write the page. */ - if (key->len + val->len > r->space_avail) { - if (r->raw_compression) + if (r->raw_compression) { + if (key->len + val->len > r->space_avail) WT_ERR(__rec_split_raw( session, r, key->len + val->len)); - else { + } else + if (WT_CHECK_CROSSING_BND(r, key->len + val->len)) { /* - * In one path above, we copied address blocks - * from the page rather than building the actual - * key. In that case, we have to build the key - * now because we are about to promote it. + * If we copied address blocks from the page + * rather than building the actual key, we have + * to build the key now because we are about to + * promote it. */ if (key_onpage_ovfl) { WT_ERR(__wt_dsk_cell_data_ref(session, @@ -5398,14 +5488,13 @@ build: if (!ovfl_key) WT_ERR( __rec_cell_build_leaf_key( - session, - r, NULL, 0, &ovfl_key)); + session, r, NULL, 0, + &ovfl_key)); } - WT_ERR(__rec_split( + WT_ERR(__rec_split_crossing_bnd( session, r, key->len + val->len)); } - } /* Copy the key/value pair onto the page. */ __rec_copy_incr(session, r, key); @@ -5468,11 +5557,12 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins), &ovfl_key)); /* Boundary: split or write the page. */ - if (key->len + val->len > r->space_avail) { - if (r->raw_compression) + if (r->raw_compression) { + if (key->len + val->len > r->space_avail) WT_RET(__rec_split_raw( session, r, key->len + val->len)); - else { + } else + if (WT_CHECK_CROSSING_BND(r, key->len + val->len)) { /* * Turn off prefix compression until a full key * written to the new page, and (unless already @@ -5484,14 +5574,13 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) if (!ovfl_key) WT_RET( __rec_cell_build_leaf_key( - session, - r, NULL, 0, &ovfl_key)); + session, r, NULL, 0, + &ovfl_key)); } - WT_RET(__rec_split( + WT_RET(__rec_split_crossing_bnd( session, r, key->len + val->len)); } - } /* Copy the key/value pair onto the page. */ __rec_copy_incr(session, r, key); @@ -5603,13 +5692,14 @@ __rec_split_dump_keys(WT_SESSION_IMPL *session, WT_PAGE *page, WT_RECONCILE *r) __wt_verbose(session, WT_VERB_SPLIT, "starting key %s", __wt_buf_set_printable( - session, bnd->key.data, bnd->key.size, tkey)); + session, bnd->max_bnd_key.data, + bnd->max_bnd_key.size, tkey)); break; case WT_PAGE_COL_FIX: case WT_PAGE_COL_INT: case WT_PAGE_COL_VAR: __wt_verbose(session, WT_VERB_SPLIT, - "starting recno %" PRIu64, bnd->recno); + "starting recno %" PRIu64, bnd->max_bnd_recno); break; WT_ILLEGAL_VALUE_ERR(session); } @@ -5871,10 +5961,10 @@ __rec_split_row(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) /* We never set the first page's key, grab it from the original page. */ ref = r->ref; if (__wt_ref_is_root(ref)) - WT_RET(__wt_buf_set(session, &r->bnd[0].key, "", 1)); + WT_RET(__wt_buf_set(session, &r->bnd[0].max_bnd_key, "", 1)); else { __wt_ref_key(ref->home, ref, &p, &size); - WT_RET(__wt_buf_set(session, &r->bnd[0].key, p, size)); + WT_RET(__wt_buf_set(session, &r->bnd[0].max_bnd_key, p, size)); } /* Allocate, then initialize the array of replacement blocks. */ @@ -5882,8 +5972,8 @@ __rec_split_row(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) for (multi = mod->mod_multi, bnd = r->bnd, i = 0; i < r->bnd_next; ++multi, ++bnd, ++i) { - WT_RET(__wt_row_ikey_alloc(session, 0, - bnd->key.data, bnd->key.size, &multi->key.ikey)); + WT_RET(__wt_row_ikey_alloc(session, 0, bnd->max_bnd_key.data, + bnd->max_bnd_key.size, &multi->key.ikey)); /* * Copy any disk image. Don't take saved updates without a @@ -5930,7 +6020,7 @@ __rec_split_col(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) for (multi = mod->mod_multi, bnd = r->bnd, i = 0; i < r->bnd_next; ++multi, ++bnd, ++i) { - multi->key.recno = bnd->recno; + multi->key.recno = bnd->max_bnd_recno; /* * Copy any disk image. Don't take saved updates without a @@ -6407,7 +6497,8 @@ __rec_dictionary_lookup( for (dp = __rec_dictionary_skip_search(r->dictionary_head, hash); dp != NULL && dp->hash == hash; dp = dp->next[0]) { WT_RET(__wt_cell_pack_data_match( - dp->cell, &val->cell, val->buf.data, &match)); + (WT_CELL *)((uint8_t *)r->disk_image.mem + dp->offset), + &val->cell, val->buf.data, &match)); if (match) { WT_STAT_DATA_INCR(session, rec_dictionary); *dpp = dp; @@ -6433,7 +6524,7 @@ __rec_dictionary_lookup( * know where on the page it will be written). */ next = r->dictionary[r->dictionary_next++]; - next->cell = NULL; /* Not necessary, just cautious. */ + next->offset = 0; /* Not necessary, just cautious. */ next->hash = hash; __rec_dictionary_skip_insert(r->dictionary_head, next, hash); *dpp = next; diff --git a/src/third_party/wiredtiger/src/schema/schema_create.c b/src/third_party/wiredtiger/src/schema/schema_create.c index 020d5e72c13..0677fa711a5 100644 --- a/src/third_party/wiredtiger/src/schema/schema_create.c +++ b/src/third_party/wiredtiger/src/schema/schema_create.c @@ -35,7 +35,7 @@ __wt_direct_io_size_check(WT_SESSION_IMPL *session, * units of its happy place. */ if (FLD_ISSET(conn->direct_io, - WT_DIRECT_IO_CHECKPOINT | WT_DIRECT_IO_DATA)) { + WT_DIRECT_IO_CHECKPOINT | WT_DIRECT_IO_DATA)) { align = (int64_t)conn->buffer_alignment; if (align != 0 && (cval.val < align || cval.val % align != 0)) WT_RET_MSG(session, EINVAL, @@ -601,7 +601,8 @@ __create_table(WT_SESSION_IMPL *session, if (ncolgroups == 0) { cgsize = strlen("colgroup:") + strlen(tablename) + 1; WT_ERR(__wt_calloc_def(session, cgsize, &cgname)); - snprintf(cgname, cgsize, "colgroup:%s", tablename); + WT_ERR(__wt_snprintf( + cgname, cgsize, "colgroup:%s", tablename)); WT_ERR(__create_colgroup( session, cgname, exclusive, config)); } diff --git a/src/third_party/wiredtiger/src/schema/schema_worker.c b/src/third_party/wiredtiger/src/schema/schema_worker.c index e5f71b5d56f..62cdd7d367b 100644 --- a/src/third_party/wiredtiger/src/schema/schema_worker.c +++ b/src/third_party/wiredtiger/src/schema/schema_worker.c @@ -112,10 +112,10 @@ __wt_schema_worker(WT_SESSION_IMPL *session, wt_session = (WT_SESSION *)session; if (file_func == __wt_salvage && dsrc->salvage != NULL) WT_ERR(dsrc->salvage( - dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg)); + dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg)); else if (file_func == __wt_verify && dsrc->verify != NULL) WT_ERR(dsrc->verify( - dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg)); + dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg)); else if (file_func == __wt_checkpoint) ; else if (file_func == __wt_checkpoint_get_handles) diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c index d282c5d0c32..b7daf0e2e02 100644 --- a/src/third_party/wiredtiger/src/session/session_api.c +++ b/src/third_party/wiredtiger/src/session/session_api.c @@ -128,7 +128,7 @@ __session_clear(WT_SESSION_IMPL *session) * * For these reasons, be careful when clearing the session structure. */ - memset(session, 0, WT_SESSION_CLEAR_SIZE(session)); + memset(session, 0, WT_SESSION_CLEAR_SIZE); WT_INIT_LSN(&session->bg_sync_lsn); @@ -1206,10 +1206,15 @@ __wt_session_range_truncate(WT_SESSION_IMPL *session, done: err: /* - * Close any locally-opened start cursor. + * Close any locally-opened start cursor. Reset application cursors, + * they've possibly moved and the application cannot use them. */ if (local_start) WT_TRET(start->close(start)); + else + WT_TRET(start->reset(start)); + if (stop != NULL) + WT_TRET(stop->reset(stop)); return (ret); } @@ -1497,7 +1502,7 @@ __transaction_sync_run_chk(WT_SESSION_IMPL *session) conn = S2C(session); - return (FLD_ISSET(conn->flags, WT_CONN_LOG_SERVER_RUN)); + return (FLD_ISSET(conn->flags, WT_CONN_SERVER_LOG)); } /* @@ -1807,7 +1812,7 @@ __open_session(WT_CONNECTION_IMPL *conn, * closes the connection. This is particularly intended to catch * cases where server threads open sessions. */ - WT_ASSERT(session, F_ISSET(conn, WT_CONN_SERVER_RUN)); + WT_ASSERT(session, !F_ISSET(conn, WT_CONN_CLOSING)); /* Find the first inactive session slot. */ for (session_ret = conn->sessions, diff --git a/src/third_party/wiredtiger/src/session/session_compact.c b/src/third_party/wiredtiger/src/session/session_compact.c index 85214ae6d98..72c072e0fb8 100644 --- a/src/third_party/wiredtiger/src/session/session_compact.c +++ b/src/third_party/wiredtiger/src/session/session_compact.c @@ -210,7 +210,7 @@ __compact_checkpoint(WT_SESSION_IMPL *session) * work we need to have done is done in the underlying block manager. */ const char *checkpoint_cfg[] = { - WT_CONFIG_BASE(session, WT_SESSION_checkpoint), "force=1", NULL }; + WT_CONFIG_BASE(session, WT_SESSION_checkpoint), "force=1", NULL }; /* Checkpoints take a lot of time, check if we've run out. */ WT_RET(__wt_session_compact_check_timeout(session)); diff --git a/src/third_party/wiredtiger/src/session/session_dhandle.c b/src/third_party/wiredtiger/src/session/session_dhandle.c index ee9bddbfc19..95fb6a6f90e 100644 --- a/src/third_party/wiredtiger/src/session/session_dhandle.c +++ b/src/third_party/wiredtiger/src/session/session_dhandle.c @@ -270,6 +270,16 @@ __wt_session_release_btree(WT_SESSION_IMPL *session) if (F_ISSET(dhandle, WT_DHANDLE_DISCARD_FORCE)) { ret = __wt_conn_btree_sync_and_close(session, false, true); F_CLR(dhandle, WT_DHANDLE_DISCARD_FORCE); + } else if (F_ISSET(btree, WT_BTREE_BULK)) { + WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) && + !F_ISSET(dhandle, WT_DHANDLE_DISCARD)); + /* + * Acquire the schema lock while completing a bulk load. This + * avoids racing with a checkpoint while it gathers a set + * of handles. + */ + WT_WITH_SCHEMA_LOCK(session, ret = + __wt_conn_btree_sync_and_close(session, false, false)); } else if (F_ISSET(dhandle, WT_DHANDLE_DISCARD) || F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS)) { WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE)); @@ -560,7 +570,7 @@ __wt_session_get_btree(WT_SESSION_IMPL *session, int __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint) { - WT_DATA_HANDLE *dhandle, *saved_dhandle; + WT_DATA_HANDLE *saved_dhandle; WT_DECL_RET; WT_ASSERT(session, WT_META_TRACKING(session)); @@ -568,31 +578,33 @@ __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint) /* * Get the checkpoint handle exclusive, so no one else can access it - * while we are creating the new checkpoint. + * while we are creating the new checkpoint. Hold the lock until the + * checkpoint completes. */ WT_ERR(__wt_session_get_btree(session, saved_dhandle->name, checkpoint, NULL, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY)); + if ((ret = __wt_meta_track_handle_lock(session, false)) != 0) { + WT_TRET(__wt_session_release_btree(session)); + goto err; + } /* - * Flush any pages in this checkpoint from the cache (we are about to - * re-write the checkpoint which will mean cached pages no longer have - * valid contents). This is especially noticeable with memory mapped - * files, since changes to the underlying file are visible to the in - * memory pages. + * Get exclusive access to the handle and then flush any pages in this + * checkpoint from the cache (we are about to re-write the checkpoint + * which will mean cached pages no longer have valid contents). This + * is especially noticeable with memory mapped files, since changes to + * the underlying file are visible to the in-memory pages. */ + WT_ERR(__wt_evict_file_exclusive_on(session)); WT_ERR(__wt_cache_op(session, WT_SYNC_DISCARD)); /* * We lock checkpoint handles that we are overwriting, so the handle * must be closed when we release it. */ - dhandle = session->dhandle; - F_SET(dhandle, WT_DHANDLE_DISCARD); - - WT_ERR(__wt_meta_track_handle_lock(session, false)); + F_SET(session->dhandle, WT_DHANDLE_DISCARD); - /* Restore the original btree in the session. */ + /* Restore the original data handle in the session. */ err: session->dhandle = saved_dhandle; - return (ret); } diff --git a/src/third_party/wiredtiger/src/session/session_salvage.c b/src/third_party/wiredtiger/src/session/session_salvage.c index 983b28dd8ea..12ce71cdbb0 100644 --- a/src/third_party/wiredtiger/src/session/session_salvage.c +++ b/src/third_party/wiredtiger/src/session/session_salvage.c @@ -54,6 +54,6 @@ __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(__wt_meta_ckptlist_set( session, dhandle->name, ckptbase, NULL)); -err: __wt_meta_ckptlist_free(session, ckptbase); +err: __wt_meta_ckptlist_free(session, &ckptbase); return (ret); } diff --git a/src/third_party/wiredtiger/src/support/crypto.c b/src/third_party/wiredtiger/src/support/crypto.c index ab94ec2c829..cce0d228832 100644 --- a/src/third_party/wiredtiger/src/support/crypto.c +++ b/src/third_party/wiredtiger/src/support/crypto.c @@ -133,5 +133,4 @@ __wt_encrypt_size(WT_SESSION_IMPL *session, return; *sizep = incoming_size + kencryptor->size_const + WT_ENCRYPT_LEN_SIZE; - return; } diff --git a/src/third_party/wiredtiger/src/support/err.c b/src/third_party/wiredtiger/src/support/err.c index 369997d38c0..57efde72b23 100644 --- a/src/third_party/wiredtiger/src/support/err.c +++ b/src/third_party/wiredtiger/src/support/err.c @@ -102,9 +102,10 @@ __handler_failure(WT_SESSION_IMPL *session, */ char s[256]; - (void)snprintf(s, sizeof(s), + if (__wt_snprintf(s, sizeof(s), "application %s event handler failed: %s", - which, __wt_strerror(session, error, NULL, 0)); + which, __wt_strerror(session, error, NULL, 0)) != 0) + return; /* * Use the error handler to report the failure, unless it was the error @@ -148,6 +149,23 @@ __wt_event_handler_set(WT_SESSION_IMPL *session, WT_EVENT_HANDLER *handler) session->event_handler = handler; } +#define WT_ERROR_APPEND(p, remain, ...) do { \ + size_t __len; \ + WT_ERR(__wt_snprintf_len_set(p, remain, &__len, __VA_ARGS__)); \ + if (__len > remain) \ + __len = remain; \ + p += __len; \ + remain -= __len; \ +} while (0) +#define WT_ERROR_APPEND_AP(p, remain, ...) do { \ + size_t __len; \ + WT_ERR(__wt_vsnprintf_len_set(p, remain, &__len, __VA_ARGS__)); \ + if (__len > remain) \ + __len = remain; \ + p += __len; \ + remain -= __len; \ +} while (0) + /* * __wt_eventv -- * Report a message to an event handler. @@ -161,9 +179,9 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, WT_DECL_RET; WT_SESSION *wt_session; struct timespec ts; - size_t len, remain, wlen; + size_t len, remain; const char *err, *prefix; - char *end, *p, tid[128]; + char *p, tid[128]; /* * We're using a stack buffer because we want error messages no matter @@ -174,6 +192,8 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, * Buffer placed at the end of the stack in case snprintf overflows. */ char s[2048]; + p = s; + remain = sizeof(s); /* * !!! @@ -185,24 +205,8 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, * first session, but if the allocation of the first session fails, for * example, we can end up here without a session.) */ - if (session == NULL) { - if (fprintf(stderr, - "WiredTiger Error%s%s: ", - error == 0 ? "" : ": ", - error == 0 ? "" : - __wt_strerror(session, error, NULL, 0)) < 0) - ret = EIO; - if (vfprintf(stderr, fmt, ap) < 0) - ret = EIO; - if (fprintf(stderr, "\n") < 0) - ret = EIO; - if (fflush(stderr) != 0) - ret = EIO; - return (ret); - } - - p = s; - end = s + sizeof(s); + if (session == NULL) + goto err; /* * We have several prefixes for the error message: a timestamp and the @@ -211,42 +215,24 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, * followed by a colon. */ __wt_epoch(session, &ts); - __wt_thread_id(tid, sizeof(tid)); - remain = WT_PTRDIFF(end, p); - wlen = (size_t)snprintf(p, remain, "[%" PRIuMAX ":%" PRIuMAX "][%s]", + WT_ERR(__wt_thread_id(tid, sizeof(tid))); + WT_ERROR_APPEND(p, remain, + "[%" PRIuMAX ":%" PRIuMAX "][%s]", (uintmax_t)ts.tv_sec, (uintmax_t)ts.tv_nsec / WT_THOUSAND, tid); - p = wlen >= remain ? end : p + wlen; - if ((prefix = S2C(session)->error_prefix) != NULL) { - remain = WT_PTRDIFF(end, p); - wlen = (size_t)snprintf(p, remain, ", %s", prefix); - p = wlen >= remain ? end : p + wlen; - } + if ((prefix = S2C(session)->error_prefix) != NULL) + WT_ERROR_APPEND(p, remain, ", %s", prefix); prefix = session->dhandle == NULL ? NULL : session->dhandle->name; - if (prefix != NULL) { - remain = WT_PTRDIFF(end, p); - wlen = (size_t)snprintf(p, remain, ", %s", prefix); - p = wlen >= remain ? end : p + wlen; - } - if ((prefix = session->name) != NULL) { - remain = WT_PTRDIFF(end, p); - wlen = (size_t)snprintf(p, remain, ", %s", prefix); - p = wlen >= remain ? end : p + wlen; - } - remain = WT_PTRDIFF(end, p); - wlen = (size_t)snprintf(p, remain, ": "); - p = wlen >= remain ? end : p + wlen; - - if (file_name != NULL) { - remain = WT_PTRDIFF(end, p); - wlen = (size_t) - snprintf(p, remain, "%s, %d: ", file_name, line_number); - p = wlen >= remain ? end : p + wlen; - } + if (prefix != NULL) + WT_ERROR_APPEND(p, remain, ", %s", prefix); + if ((prefix = session->name) != NULL) + WT_ERROR_APPEND(p, remain, ", %s", prefix); + WT_ERROR_APPEND(p, remain, ": "); + + if (file_name != NULL) + WT_ERROR_APPEND(p, remain, "%s, %d: ", file_name, line_number); - remain = WT_PTRDIFF(end, p); - wlen = (size_t)vsnprintf(p, remain, fmt, ap); - p = wlen >= remain ? end : p + wlen; + WT_ERROR_APPEND_AP(p, remain, fmt, ap); if (error != 0) { /* @@ -261,10 +247,8 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, */ err = __wt_strerror(session, error, NULL, 0); len = strlen(err); - if (WT_PTRDIFF(p, s) < len || strcmp(p - len, err) != 0) { - remain = WT_PTRDIFF(end, p); - (void)snprintf(p, remain, ": %s", err); - } + if (WT_PTRDIFF(p, s) < len || strcmp(p - len, err) != 0) + WT_ERROR_APPEND(p, remain, ": %s", err); } /* @@ -279,7 +263,7 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, * * If an application-specified error message handler fails, complain * using the default error handler. If the default error handler fails, - * there's nothing to do. + * fallback to stderr. */ wt_session = (WT_SESSION *)session; handler = session->event_handler; @@ -293,6 +277,21 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, __handler_failure(session, ret, "error", true); } + if (ret != 0) { +err: if (fprintf(stderr, + "WiredTiger Error%s%s: ", + error == 0 ? "" : ": ", + error == 0 ? "" : + __wt_strerror(session, error, NULL, 0)) < 0) + WT_TRET(EIO); + if (vfprintf(stderr, fmt, ap) < 0) + WT_TRET(EIO); + if (fprintf(stderr, "\n") < 0) + WT_TRET(EIO); + if (fflush(stderr) != 0) + WT_TRET(EIO); + } + return (ret); } @@ -376,7 +375,7 @@ info_msg(WT_SESSION_IMPL *session, const char *fmt, va_list ap) */ char s[2048]; - (void)vsnprintf(s, sizeof(s), fmt, ap); + WT_RET(__wt_vsnprintf(s, sizeof(s), fmt, ap)); wt_session = (WT_SESSION *)session; handler = session->event_handler; diff --git a/src/third_party/wiredtiger/src/support/scratch.c b/src/third_party/wiredtiger/src/support/scratch.c index 69987ebc852..485cea90e89 100644 --- a/src/third_party/wiredtiger/src/support/scratch.c +++ b/src/third_party/wiredtiger/src/support/scratch.c @@ -69,13 +69,16 @@ int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4))) { + WT_DECL_RET; va_list ap; size_t len; for (;;) { va_start(ap, fmt); - len = (size_t)vsnprintf(buf->mem, buf->memsize, fmt, ap); + ret = __wt_vsnprintf_len_set( + buf->mem, buf->memsize, &len, fmt, ap); va_end(ap); + WT_RET(ret); /* Check if there was enough space. */ if (len < buf->memsize) { @@ -100,6 +103,7 @@ int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4))) { + WT_DECL_RET; va_list ap; size_t len, space; char *p; @@ -117,8 +121,9 @@ __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) p = (char *)((uint8_t *)buf->mem + buf->size); WT_ASSERT(session, buf->memsize >= buf->size); space = buf->memsize - buf->size; - len = (size_t)vsnprintf(p, space, fmt, ap); + ret = __wt_vsnprintf_len_set(p, space, &len, fmt, ap); va_end(ap); + WT_RET(ret); /* Check if there was enough space. */ if (len < space) { diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index fd38e1b79ee..2c2217f8c20 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -685,6 +685,7 @@ static const char * const __stats_connection_desc[] = { "cache: failed eviction of pages that exceeded the in-memory maximum", "cache: files with active eviction walks", "cache: files with new eviction walks started", + "cache: force re-tuning of eviction workers once in a while", "cache: hazard pointer blocked page eviction", "cache: hazard pointer check calls", "cache: hazard pointer check entries walked", @@ -771,9 +772,11 @@ static const char * const __stats_connection_desc[] = { "lock: table lock internal thread time waiting for the table lock (usecs)", "log: busy returns attempting to switch slots", "log: consolidated slot closures", + "log: consolidated slot join active slot closed", "log: consolidated slot join races", "log: consolidated slot join transitions", "log: consolidated slot joins", + "log: consolidated slot transitions unable to find free slot", "log: consolidated slot unbuffered writes", "log: log bytes of payload data", "log: log bytes written", @@ -968,6 +971,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cache_eviction_force_fail = 0; /* not clearing cache_eviction_walks_active */ stats->cache_eviction_walks_started = 0; + stats->cache_eviction_force_retune = 0; stats->cache_eviction_hazard = 0; stats->cache_hazard_checks = 0; stats->cache_hazard_walks = 0; @@ -1054,9 +1058,11 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->lock_table_wait_internal = 0; stats->log_slot_switch_busy = 0; stats->log_slot_closes = 0; + stats->log_slot_active_closed = 0; stats->log_slot_races = 0; stats->log_slot_transitions = 0; stats->log_slot_joins = 0; + stats->log_slot_no_free_slots = 0; stats->log_slot_unbuffered = 0; stats->log_bytes_payload = 0; stats->log_bytes_written = 0; @@ -1252,6 +1258,8 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, cache_eviction_walks_active); to->cache_eviction_walks_started += WT_STAT_READ(from, cache_eviction_walks_started); + to->cache_eviction_force_retune += + WT_STAT_READ(from, cache_eviction_force_retune); to->cache_eviction_hazard += WT_STAT_READ(from, cache_eviction_hazard); to->cache_hazard_checks += WT_STAT_READ(from, cache_hazard_checks); @@ -1366,9 +1374,13 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, lock_table_wait_internal); to->log_slot_switch_busy += WT_STAT_READ(from, log_slot_switch_busy); to->log_slot_closes += WT_STAT_READ(from, log_slot_closes); + to->log_slot_active_closed += + WT_STAT_READ(from, log_slot_active_closed); to->log_slot_races += WT_STAT_READ(from, log_slot_races); to->log_slot_transitions += WT_STAT_READ(from, log_slot_transitions); to->log_slot_joins += WT_STAT_READ(from, log_slot_joins); + to->log_slot_no_free_slots += + WT_STAT_READ(from, log_slot_no_free_slots); to->log_slot_unbuffered += WT_STAT_READ(from, log_slot_unbuffered); to->log_bytes_payload += WT_STAT_READ(from, log_bytes_payload); to->log_bytes_written += WT_STAT_READ(from, log_bytes_written); diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index e5e59c2b901..6eebf5ecf9f 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -713,7 +713,7 @@ __wt_txn_stats_update(WT_SESSION_IMPL *session) snapshot_pinned = txn_global->nsnap_oldest_id; WT_STAT_SET(session, stats, txn_pinned_range, - txn_global->current - txn_global->oldest_id); + txn_global->current - txn_global->oldest_id); WT_STAT_SET(session, stats, txn_pinned_snapshot_range, snapshot_pinned == WT_TXN_NONE ? diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index 3261c8089f4..f4ccf5eacd0 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -8,9 +8,9 @@ #include "wt_internal.h" -static int __checkpoint_lock_tree( - WT_SESSION_IMPL *, bool, bool, const char *[]); -static int __checkpoint_mark_deletes(WT_SESSION_IMPL *, const char *[]); +static int __checkpoint_lock_dirty_tree( + WT_SESSION_IMPL *, bool, bool, bool, const char *[]); +static int __checkpoint_mark_skip(WT_SESSION_IMPL *, WT_CKPT *, bool); static int __checkpoint_presync(WT_SESSION_IMPL *, const char *[]); static int __checkpoint_tree_helper(WT_SESSION_IMPL *, const char *[]); @@ -90,6 +90,33 @@ err: WT_TRET(__wt_metadata_cursor_release(session, &cursor)); } /* + * __checkpoint_update_generation -- + * Update the checkpoint generation of the current tree. + * + * This indicates that the tree will not be visited again by the current + * checkpoint. + */ +static void +__checkpoint_update_generation(WT_SESSION_IMPL *session) +{ + WT_BTREE *btree; + + btree = S2BT(session); + + /* + * Updates to the metadata are made by the checkpoint transaction, so + * the metadata tree's checkpoint generation should never be updated. + */ + if (WT_IS_METADATA(session->dhandle)) + return; + + WT_PUBLISH(btree->checkpoint_gen, + S2C(session)->txn_global.checkpoint_gen); + WT_STAT_DATA_SET(session, + btree_checkpoint_generation, btree->checkpoint_gen); +} + +/* * __checkpoint_apply_all -- * Apply an operation to all files involved in a checkpoint. */ @@ -239,22 +266,82 @@ int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) { WT_BTREE *btree; + WT_CONFIG_ITEM cval; WT_DECL_RET; const char *name; + bool force; + + btree = S2BT(session); + + /* Find out if we have to force a checkpoint. */ + WT_RET(__wt_config_gets_def(session, cfg, "force", 0, &cval)); + force = cval.val != 0; + if (!force) { + WT_RET(__wt_config_gets_def(session, cfg, "name", 0, &cval)); + force = cval.len != 0; + } /* Should not be called with anything other than a file object. */ WT_ASSERT(session, session->dhandle->checkpoint == NULL); WT_ASSERT(session, WT_PREFIX_MATCH(session->dhandle->name, "file:")); /* Skip files that are never involved in a checkpoint. */ - if (F_ISSET(S2BT(session), WT_BTREE_NO_CHECKPOINT)) + if (F_ISSET(btree, WT_BTREE_NO_CHECKPOINT)) return (0); - /* Make sure there is space for the next entry. */ +#ifdef HAVE_DIAGNOSTIC + /* + * We may have raced between starting the checkpoint transaction and + * some operation completing on the handle that updated the metadata + * (e.g., closing a bulk load cursor). All such operations either have + * exclusive access to the handle or hold the schema lock. We are now + * holding the schema lock and have an open btree handle, so if we + * can't update the metadata, then there has been some state change + * invisible to the checkpoint transaction. + */ + if (!WT_IS_METADATA(session->dhandle)) { + WT_CURSOR *meta_cursor; + bool metadata_race; + + WT_ASSERT(session, !F_ISSET(&session->txn, WT_TXN_ERROR)); + WT_RET(__wt_metadata_cursor(session, &meta_cursor)); + meta_cursor->set_key(meta_cursor, session->dhandle->name); + ret = __wt_curfile_insert_check(meta_cursor); + if (ret == WT_ROLLBACK) { + metadata_race = true; + ret = 0; + } else + metadata_race = false; + WT_TRET(__wt_metadata_cursor_release(session, &meta_cursor)); + WT_RET(ret); + WT_ASSERT(session, !metadata_race); + } +#endif + + /* + * Decide whether the tree needs to be included in the checkpoint and + * if so, acquire the necessary locks. + */ + WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_dirty_tree( + session, true, force, true, cfg)); + WT_RET(ret); + if (F_ISSET(btree, WT_BTREE_SKIP_CKPT)) { + WT_ASSERT(session, btree->ckpt == NULL); + __checkpoint_update_generation(session); + return (0); + } + + /* + * Make sure there is space for the new entry: do this before getting + * the handle to avoid cleanup if we can't allocate the memory. + */ WT_RET(__wt_realloc_def(session, &session->ckpt_handle_allocated, session->ckpt_handle_next + 1, &session->ckpt_handle)); - /* Not strictly necessary, but cleaner to clear the current handle. */ + /* + * The current tree will be included: get it again because the handle + * we have is only valid for the duration of this function. + */ name = session->dhandle->name; session->dhandle = NULL; @@ -266,49 +353,13 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) * with eviction and we don't want to unfairly penalize (or promote) * eviction in trees due to checkpoints. */ - btree = S2BT(session); btree->evict_walk_saved = btree->evict_walk_period; - WT_SAVE_DHANDLE(session, - ret = __checkpoint_lock_tree(session, true, true, cfg)); - if (ret != 0) { - WT_TRET(__wt_session_release_btree(session)); - return (ret); - } - - /* - * Flag that the handle is part of a checkpoint for the purposes - * of transaction visibility checks. - */ - WT_PUBLISH(btree->include_checkpoint_txn, true); - session->ckpt_handle[session->ckpt_handle_next++] = session->dhandle; return (0); } /* - * __checkpoint_update_generation -- - * Update the checkpoint generation of the current tree. - * - * This indicates that the tree will not be visited again by the current - * checkpoint. - */ -static void -__checkpoint_update_generation(WT_SESSION_IMPL *session) -{ - WT_BTREE *btree; - - btree = S2BT(session); - if (!WT_IS_METADATA(session->dhandle)) - WT_PUBLISH(btree->include_checkpoint_txn, false); - - WT_PUBLISH(btree->checkpoint_gen, - S2C(session)->txn_global.checkpoint_gen); - WT_STAT_DATA_SET(session, - btree_checkpoint_generation, btree->checkpoint_gen); -} - -/* * __checkpoint_reduce_dirty_cache -- * Release clean trees from the list cached for checkpoints. */ @@ -371,7 +422,6 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session) __wt_sleep(0, stepdown_us / 10); __wt_epoch(session, &stop); current_us = WT_TIMEDIFF_US(stop, last); - total_ms = WT_TIMEDIFF_MS(stop, start); bytes_written_total = cache->bytes_written - bytes_written_start; @@ -434,36 +484,6 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session) } /* - * __checkpoint_release_clean_trees -- - * Release clean trees from the list cached for checkpoints. - */ -static int -__checkpoint_release_clean_trees(WT_SESSION_IMPL *session) -{ - WT_BTREE *btree; - WT_DATA_HANDLE *dhandle; - WT_DECL_RET; - u_int i; - - for (i = 0; i < session->ckpt_handle_next; i++) { - dhandle = session->ckpt_handle[i]; - btree = dhandle->handle; - if (!F_ISSET(btree, WT_BTREE_SKIP_CKPT)) - continue; - __wt_meta_ckptlist_free(session, btree->ckpt); - btree->ckpt = NULL; - WT_WITH_DHANDLE(session, dhandle, - __checkpoint_update_generation(session)); - session->ckpt_handle[i] = NULL; - WT_WITH_DHANDLE(session, dhandle, - ret = __wt_session_release_btree(session)); - WT_RET(ret); - } - - return (0); -} - -/* * __checkpoint_stats -- * Update checkpoint timer stats. */ @@ -531,8 +551,103 @@ __checkpoint_verbose_track(WT_SESSION_IMPL *session, static void __checkpoint_fail_reset(WT_SESSION_IMPL *session) { - S2BT(session)->modified = true; - S2BT(session)->ckpt = NULL; + WT_BTREE *btree; + + btree = S2BT(session); + btree->modified = true; + __wt_meta_ckptlist_free(session, &btree->ckpt); +} + +/* + * __checkpoint_prepare -- + * Start the transaction for a checkpoint and gather handles. + */ +static int +__checkpoint_prepare(WT_SESSION_IMPL *session, const char *cfg[]) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_TXN *txn; + WT_TXN_GLOBAL *txn_global; + WT_TXN_STATE *txn_state; + const char *txn_cfg[] = { WT_CONFIG_BASE(session, + WT_SESSION_begin_transaction), "isolation=snapshot", NULL }; + + conn = S2C(session); + txn = &session->txn; + txn_global = &conn->txn_global; + txn_state = WT_SESSION_TXN_STATE(session); + + /* + * Start a snapshot transaction for the checkpoint. + * + * Note: we don't go through the public API calls because they have + * side effects on cursors, which applications can hold open across + * calls to checkpoint. + */ + WT_RET(__wt_txn_begin(session, txn_cfg)); + + WT_DIAGNOSTIC_YIELD; + + /* Ensure a transaction ID is allocated prior to sharing it globally */ + WT_RET(__wt_txn_id_check(session)); + + /* + * Mark the connection as clean. If some data gets modified after + * generating checkpoint transaction id, connection will be reset to + * dirty when reconciliation marks the btree dirty on encountering the + * dirty page. + */ + conn->modified = false; + + /* + * Save the checkpoint session ID. + * + * We never do checkpoints in the default session (with id zero). + */ + WT_ASSERT(session, session->id != 0 && txn_global->checkpoint_id == 0); + txn_global->checkpoint_id = session->id; + + /* + * Remove the checkpoint transaction from the global table. + * + * This allows ordinary visibility checks to move forward because + * checkpoints often take a long time and only write to the metadata. + */ + __wt_writelock(session, &txn_global->scan_rwlock); + txn_global->checkpoint_txnid = txn->id; + txn_global->checkpoint_pinned = WT_MIN(txn->id, txn->snap_min); + + /* + * Sanity check that the oldest ID hasn't moved on before we have + * cleared our entry. + */ + WT_ASSERT(session, + WT_TXNID_LE(txn_global->oldest_id, txn_state->id) && + WT_TXNID_LE(txn_global->oldest_id, txn_state->pinned_id)); + + /* + * Clear our entry from the global transaction session table. Any + * operation that needs to know about the ID for this checkpoint will + * consider the checkpoint ID in the global structure. Most operations + * can safely ignore the checkpoint ID (see the visible all check for + * details). + */ + txn_state->id = txn_state->pinned_id = + txn_state->metadata_pinned = WT_TXN_NONE; + __wt_writeunlock(session, &txn_global->scan_rwlock); + + /* + * Get a list of handles we want to flush; for named checkpoints this + * may pull closed objects into the session cache. + * + * First, gather all handles, then start the checkpoint transaction, + * then release any clean handles. + */ + WT_ASSERT(session, session->ckpt_handle_next == 0); + WT_WITH_TABLE_READ_LOCK(session, ret = __checkpoint_apply_all( + session, cfg, __wt_checkpoint_get_handles, NULL)); + return (ret); } /* @@ -550,19 +665,15 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_TXN *txn; WT_TXN_GLOBAL *txn_global; WT_TXN_ISOLATION saved_isolation; - WT_TXN_STATE *txn_state; void *saved_meta_next; u_int i; uint64_t fsync_duration_usecs; bool failed, full, idle, logging, tracking; - const char *txn_cfg[] = { WT_CONFIG_BASE(session, - WT_SESSION_begin_transaction), "isolation=snapshot", NULL }; conn = S2C(session); cache = conn->cache; txn = &session->txn; txn_global = &conn->txn_global; - txn_state = WT_SESSION_TXN_STATE(session); saved_isolation = session->isolation; full = idle = logging = tracking = false; @@ -631,86 +742,24 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) tracking = true; /* - * Get a list of handles we want to flush; for named checkpoints this - * may pull closed objects into the session cache. - * * We want to skip checkpointing clean handles whenever possible. That * is, when the checkpoint is not named or forced. However, we need to * take care about ordering with respect to the checkpoint transaction. * - * If we skip clean handles before starting the transaction, the + * We can't skip clean handles before starting the transaction or the * checkpoint can miss updates in trees that become dirty as the * checkpoint is starting. If we wait until the transaction has * started before locking a handle, there could be a metadata-changing * operation in between (e.g., salvage) that will cause a write * conflict when the checkpoint goes to write the metadata. * - * First, gather all handles, then start the checkpoint transaction, - * then release any clean handles. + * Hold the schema lock while starting the transaction and gathering + * handles so the set we get is complete and correct. */ - WT_ASSERT(session, session->ckpt_handle_next == 0); - WT_WITH_SCHEMA_LOCK(session, - WT_WITH_TABLE_READ_LOCK(session, - ret = __checkpoint_apply_all( - session, cfg, __wt_checkpoint_get_handles, NULL))); + WT_WITH_SCHEMA_LOCK(session, ret = __checkpoint_prepare(session, cfg)); WT_ERR(ret); - /* - * Start a snapshot transaction for the checkpoint. - * - * Note: we don't go through the public API calls because they have - * side effects on cursors, which applications can hold open across - * calls to checkpoint. - */ - WT_ERR(__wt_txn_begin(session, txn_cfg)); - - /* Ensure a transaction ID is allocated prior to sharing it globally */ - WT_ERR(__wt_txn_id_check(session)); - - /* - * Mark the connection as clean. If some data gets modified after - * generating checkpoint transaction id, connection will be reset to - * dirty when reconciliation marks the btree dirty on encountering the - * dirty page. - */ - conn->modified = false; - - /* - * Save the checkpoint session ID. - * - * We never do checkpoints in the default session (with id zero). - */ - WT_ASSERT(session, session->id != 0 && txn_global->checkpoint_id == 0); - txn_global->checkpoint_id = session->id; - - /* - * Remove the checkpoint transaction from the global table. - * - * This allows ordinary visibility checks to move forward because - * checkpoints often take a long time and only write to the metadata. - */ - __wt_writelock(session, &txn_global->scan_rwlock); - txn_global->checkpoint_txnid = txn->id; - txn_global->checkpoint_pinned = WT_MIN(txn->id, txn->snap_min); - - /* - * Sanity check that the oldest ID hasn't moved on before we have - * cleared our entry. - */ - WT_ASSERT(session, - WT_TXNID_LE(txn_global->oldest_id, txn_state->id) && - WT_TXNID_LE(txn_global->oldest_id, txn_state->pinned_id)); - - /* - * Clear our entry from the global transaction session table. Any - * operation that needs to know about the ID for this checkpoint will - * consider the checkpoint ID in the global structure. Most operations - * can safely ignore the checkpoint ID (see the visible all check for - * details). - */ - txn_state->id = txn_state->pinned_id = - txn_state->metadata_pinned = WT_TXN_NONE; - __wt_writeunlock(session, &txn_global->scan_rwlock); + WT_ASSERT(session, txn->isolation == WT_ISO_SNAPSHOT); /* * Unblock updates -- we can figure out that any updates to clean pages @@ -719,16 +768,6 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) cache->eviction_scrub_limit = 0.0; WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target, 0); - /* - * Mark old checkpoints that are being deleted and figure out which - * trees we can skip in this checkpoint. - * - * Release clean trees. Any updates made after this point will not - * visible to the checkpoint transaction. - */ - WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_mark_deletes)); - WT_ERR(__checkpoint_release_clean_trees(session)); - /* Tell logging that we have started a database checkpoint. */ if (full && logging) WT_ERR(__wt_txn_checkpoint_log( @@ -1065,12 +1104,13 @@ __drop_to(WT_CKPT *ckptbase, const char *name, size_t len) } /* - * __checkpoint_lock_tree -- - * Acquire the locks required to checkpoint a tree. + * __checkpoint_lock_dirty_tree -- + * Decide whether the tree needs to be included in the checkpoint and if + * so, acquire the necessary locks. */ static int -__checkpoint_lock_tree(WT_SESSION_IMPL *session, - bool is_checkpoint, bool need_tracking, const char *cfg[]) +__checkpoint_lock_dirty_tree(WT_SESSION_IMPL *session, + bool is_checkpoint, bool force, bool need_tracking, const char *cfg[]) { WT_BTREE *btree; WT_CKPT *ckpt, *ckptbase; @@ -1195,6 +1235,14 @@ __checkpoint_lock_tree(WT_SESSION_IMPL *session, } /* + * Mark old checkpoints that are being deleted and figure out which + * trees we can skip in this checkpoint. + */ + WT_ERR(__checkpoint_mark_skip(session, ckptbase, force)); + if (F_ISSET(btree, WT_BTREE_SKIP_CKPT)) + goto err; + + /* * Lock the checkpoints that will be deleted. * * Checkpoints are only locked when tracking is enabled, which covers @@ -1227,33 +1275,20 @@ __checkpoint_lock_tree(WT_SESSION_IMPL *session, } /* - * There are special files: those being bulk-loaded, salvaged, upgraded - * or verified during the checkpoint. We have to do something for those - * objects because a checkpoint is an external name the application can - * reference and the name must exist no matter what's happening during - * the checkpoint. For bulk-loaded files, we could block until the load - * completes, checkpoint the partial load, or magic up an empty-file - * checkpoint. The first is too slow, the second is insane, so do the - * third. - * Salvage, upgrade and verify don't currently require any work, all - * three hold the schema lock, blocking checkpoints. If we ever want to - * fix that (and I bet we eventually will, at least for verify), we can - * copy the last checkpoint the file has. That works if we guarantee - * salvage, upgrade and verify act on objects with previous checkpoints - * (true if handles are closed/re-opened between object creation and a - * subsequent salvage, upgrade or verify operation). Presumably, - * salvage and upgrade will discard all previous checkpoints when they - * complete, which is fine with us. This change will require reference - * counting checkpoints, and once that's done, we should use checkpoint - * copy instead of forcing checkpoints on clean objects to associate - * names with checkpoints. + * There are special tree: those being bulk-loaded, salvaged, upgraded + * or verified during the checkpoint. They should never be part of a + * checkpoint: we will fail to lock them because the operations have + * exclusive access to the handles. Named checkpoints will fail in that + * case, ordinary checkpoints will skip files that cannot be opened + * normally. */ WT_ASSERT(session, !is_checkpoint || !F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS)); __wt_readunlock(session, &conn->hot_backup_lock); - WT_ASSERT(session, btree->ckpt == NULL); + WT_ASSERT(session, btree->ckpt == NULL && + !F_ISSET(btree, WT_BTREE_SKIP_CKPT)); btree->ckpt = ckptbase; return (0); @@ -1261,30 +1296,26 @@ __checkpoint_lock_tree(WT_SESSION_IMPL *session, err: if (hot_backup_locked) __wt_readunlock(session, &conn->hot_backup_lock); - __wt_meta_ckptlist_free(session, ckptbase); + __wt_meta_ckptlist_free(session, &ckptbase); __wt_free(session, name_alloc); return (ret); } /* - * __checkpoint_mark_deletes -- - * Figure out what old checkpoints will be deleted, and whether the - * checkpoint can be skipped entirely. + * __checkpoint_mark_skip -- + * Figure out whether the checkpoint can be skipped for a tree. */ static int -__checkpoint_mark_deletes( - WT_SESSION_IMPL *session, const char *cfg[]) +__checkpoint_mark_skip( + WT_SESSION_IMPL *session, WT_CKPT *ckptbase, bool force) { WT_BTREE *btree; - WT_CKPT *ckpt, *ckptbase; - WT_CONFIG_ITEM cval; + WT_CKPT *ckpt; const char *name; int deleted; - bool force; btree = S2BT(session); - ckptbase = btree->ckpt; /* * Check for clean objects not requiring a checkpoint. @@ -1310,12 +1341,7 @@ __checkpoint_mark_deletes( * to open the checkpoint in a cursor after taking any checkpoint, which * means it must exist. */ - force = false; F_CLR(btree, WT_BTREE_SKIP_CKPT); - if (!btree->modified && cfg != NULL) { - WT_RET(__wt_config_gets(session, cfg, "force", &cval)); - force = cval.val != 0; - } if (!btree->modified && !force) { deleted = 0; WT_CKPT_FOREACH(ckptbase, ckpt) @@ -1393,7 +1419,7 @@ __checkpoint_tree( * delete a physical checkpoint, and that will end in tears. */ if (is_checkpoint) - if (btree->bulk_load_ok) { + if (btree->original) { fake_ckpt = true; goto fake; } @@ -1504,8 +1530,7 @@ err: /* S2C(session)->modified = true; } - __wt_meta_ckptlist_free(session, ckptbase); - btree->ckpt = NULL; + __wt_meta_ckptlist_free(session, &btree->ckpt); return (ret); } @@ -1524,7 +1549,8 @@ __checkpoint_presync(WT_SESSION_IMPL *session, const char *cfg[]) WT_UNUSED(cfg); btree = S2BT(session); - WT_ASSERT(session, !btree->include_checkpoint_txn); + WT_ASSERT(session, btree->checkpoint_gen == + S2C(session)->txn_global.checkpoint_gen); btree->evict_walk_period = btree->evict_walk_saved; return (0); } @@ -1573,7 +1599,9 @@ __checkpoint_tree_helper(WT_SESSION_IMPL *session, const char *cfg[]) int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) { + WT_CONFIG_ITEM cval; WT_DECL_RET; + bool force; /* Should not be called with a checkpoint handle. */ WT_ASSERT(session, session->dhandle->checkpoint == NULL); @@ -1582,12 +1610,13 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_ASSERT(session, !WT_IS_METADATA(session->dhandle) || F_ISSET(session, WT_SESSION_LOCKED_METADATA)); - WT_SAVE_DHANDLE(session, - ret = __checkpoint_lock_tree(session, true, true, cfg)); - WT_RET(ret); - WT_SAVE_DHANDLE(session, - ret = __checkpoint_mark_deletes(session, cfg)); + WT_RET(__wt_config_gets_def(session, cfg, "force", 0, &cval)); + force = cval.val != 0; + WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_dirty_tree( + session, true, force, true, cfg)); WT_RET(ret); + if (F_ISSET(S2BT(session), WT_BTREE_SKIP_CKPT)) + return (0); return (__checkpoint_tree(session, true, cfg)); } @@ -1662,15 +1691,10 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) if (need_tracking) WT_RET(__wt_meta_track_on(session)); - WT_SAVE_DHANDLE(session, - ret = __checkpoint_lock_tree(session, false, need_tracking, NULL)); + WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_dirty_tree( + session, false, false, need_tracking, NULL)); WT_ASSERT(session, ret == 0); - if (ret == 0) { - WT_SAVE_DHANDLE(session, - ret = __checkpoint_mark_deletes(session, NULL)); - WT_ASSERT(session, ret == 0); - } - if (ret == 0) + if (ret == 0 && !F_ISSET(btree, WT_BTREE_SKIP_CKPT)) ret = __checkpoint_tree(session, false, NULL); if (need_tracking) diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c index 2d8a77a69e6..30932195b1e 100644 --- a/src/third_party/wiredtiger/src/txn/txn_recover.c +++ b/src/third_party/wiredtiger/src/txn/txn_recover.c @@ -93,7 +93,7 @@ __recovery_cursor(WT_SESSION_IMPL *session, WT_RECOVERY *r, "%s op %" PRIu32 " to file %" PRIu32 " at LSN %" PRIu32 \ "/%" PRIu32, \ cursor == NULL ? "Skipping" : "Applying", \ - optype, fileid, lsnp->l.file, lsnp->l.offset); \ + optype, fileid, (lsnp)->l.file, (lsnp)->l.offset); \ if (cursor == NULL) \ break diff --git a/src/third_party/wiredtiger/src/utilities/util_backup.c b/src/third_party/wiredtiger/src/utilities/util_backup.c index 5dc9671fb45..f1b31f7621a 100644 --- a/src/third_party/wiredtiger/src/utilities/util_backup.c +++ b/src/third_party/wiredtiger/src/utilities/util_backup.c @@ -109,9 +109,14 @@ copy(WT_SESSION *session, const char *directory, const char *name) /* Build the target pathname. */ len = strlen(directory) + strlen(name) + 2; - if ((to = malloc(len)) == NULL) - goto memerr; - (void)snprintf(to, len, "%s/%s", directory, name); + if ((to = malloc(len)) == NULL) { + fprintf(stderr, "%s: %s\n", progname, strerror(errno)); + return (1); + } + if ((ret = __wt_snprintf(to, len, "%s/%s", directory, name)) != 0) { + fprintf(stderr, "%s: %s\n", progname, strerror(ret)); + goto err; + } if (verbose && printf("Backing up %s/%s to %s\n", home, name, to) < 0) { fprintf(stderr, "%s: %s\n", progname, strerror(EIO)); @@ -126,11 +131,7 @@ copy(WT_SESSION *session, const char *directory, const char *name) fprintf(stderr, "%s/%s to %s: backup copy: %s\n", home, name, to, session->strerror(session, ret)); - if (0) { -memerr: fprintf(stderr, "%s: %s\n", progname, strerror(errno)); - } err: free(to); - return (ret); } diff --git a/src/third_party/wiredtiger/src/utilities/util_dump.c b/src/third_party/wiredtiger/src/utilities/util_dump.c index cded40a8b45..955148b7d46 100644 --- a/src/third_party/wiredtiger/src/utilities/util_dump.c +++ b/src/third_party/wiredtiger/src/utilities/util_dump.c @@ -259,14 +259,15 @@ dump_add_config(WT_SESSION *session, char **bufp, size_t *leftp, const char *fmt, ...) WT_GCC_FUNC_ATTRIBUTE((format (printf, 4, 5))) { - int n; + WT_DECL_RET; + size_t n; va_list ap; va_start(ap, fmt); - n = vsnprintf(*bufp, *leftp, fmt, ap); + ret = __wt_vsnprintf_len_set(*bufp, *leftp, &n, fmt, ap); va_end(ap); - if (n < 0) - return (util_err(session, EINVAL, NULL)); + if (ret != 0) + return (util_err(session, ret, NULL)); *bufp += n; *leftp -= (size_t)n; return (0); @@ -435,9 +436,11 @@ dump_table_parts_config(WT_SESSION *session, WT_CURSOR *cursor, len = strlen(entry) + strlen(name) + 1; if ((uriprefix = malloc(len)) == NULL) - return util_err(session, errno, NULL); - - snprintf(uriprefix, len, "%s%s", entry, name); + return (util_err(session, errno, NULL)); + if ((ret = __wt_snprintf(uriprefix, len, "%s%s", entry, name)) != 0) { + free(uriprefix); + return (util_err(session, ret, NULL)); + } /* * Search the file looking for column group and index key/value pairs: @@ -504,17 +507,18 @@ dump_prefix(WT_SESSION *session, bool hex, bool json) (void)wiredtiger_version(&vmajor, &vminor, &vpatch); + if (json && printf( + " \"%s\" : \"%d (%d.%d.%d)\",\n", + DUMP_JSON_VERSION_MARKER, DUMP_JSON_CURRENT_VERSION, + vmajor, vminor, vpatch) < 0) + return (util_err(session, EIO, NULL)); + if (!json && (printf( "WiredTiger Dump (WiredTiger Version %d.%d.%d)\n", vmajor, vminor, vpatch) < 0 || printf("Format=%s\n", hex ? "hex" : "print") < 0 || printf("Header\n") < 0)) return (util_err(session, EIO, NULL)); - else if (json && printf( - " \"%s\" : \"%d (%d.%d.%d)\",\n", - DUMP_JSON_VERSION_MARKER, DUMP_JSON_CURRENT_VERSION, - vmajor, vminor, vpatch) < 0) - return (util_err(session, EIO, NULL)); return (0); } diff --git a/src/third_party/wiredtiger/src/utilities/util_load.c b/src/third_party/wiredtiger/src/utilities/util_load.c index ca77643eb49..d2f00402217 100644 --- a/src/third_party/wiredtiger/src/utilities/util_load.c +++ b/src/third_party/wiredtiger/src/utilities/util_load.c @@ -80,8 +80,8 @@ util_load(WT_SESSION *session, int argc, char *argv[]) if (no_overwrite) flags |= LOAD_JSON_NO_OVERWRITE; return (util_load_json(session, filename, flags)); - } else - return (load_dump(session)); + } + return (load_dump(session)); } /* @@ -120,10 +120,12 @@ load_dump(WT_SESSION *session) goto err; /* Open the insert cursor. */ - (void)snprintf(config, sizeof(config), + if ((ret = __wt_snprintf(config, sizeof(config), "dump=%s%s%s", hex ? "hex" : "print", - append ? ",append" : "", no_overwrite ? ",overwrite=false" : ""); + append ? ",append" : "", + no_overwrite ? ",overwrite=false" : "")) != 0) + return (util_err(session, ret, NULL)); if ((ret = session->open_cursor( session, uri, NULL, config, &cursor)) != 0) { ret = util_err(session, ret, "%s: session.open_cursor", uri); @@ -472,6 +474,7 @@ config_update(WT_SESSION *session, char **list) static int config_rename(WT_SESSION *session, char **urip, const char *name) { + WT_DECL_RET; size_t len; char *buf, *p; @@ -490,7 +493,9 @@ config_rename(WT_SESSION *session, char **urip, const char *name) } *p = '\0'; p = strchr(p + 1, ':'); - snprintf(buf, len, "%s:%s%s", *urip, name, p == NULL ? "" : p); + if ((ret = __wt_snprintf( + buf, len, "%s:%s%s", *urip, name, p == NULL ? "" : p)) != 0) + return (util_err(session, ret, NULL)); *urip = buf; return (0); diff --git a/src/third_party/wiredtiger/src/utilities/util_load_json.c b/src/third_party/wiredtiger/src/utilities/util_load_json.c index 1189d49a483..c693e2b7651 100644 --- a/src/third_party/wiredtiger/src/utilities/util_load_json.c +++ b/src/third_party/wiredtiger/src/utilities/util_load_json.c @@ -145,6 +145,7 @@ static int json_kvraw_append(WT_SESSION *session, JSON_INPUT_STATE *ins, const char *str, size_t len) { + WT_DECL_RET; size_t needsize; char *tmp; @@ -152,11 +153,15 @@ json_kvraw_append(WT_SESSION *session, needsize = strlen(ins->kvraw) + len + 2; if ((tmp = malloc(needsize)) == NULL) return (util_err(session, errno, NULL)); - snprintf(tmp, needsize, "%s %.*s", ins->kvraw, (int)len, str); + WT_ERR(__wt_snprintf( + tmp, needsize, "%s %.*s", ins->kvraw, (int)len, str)); free(ins->kvraw); ins->kvraw = tmp; } return (0); + +err: free(tmp); + return (util_err(session, ret, NULL)); } /* @@ -181,7 +186,7 @@ json_strdup(WT_SESSION *session, JSON_INPUT_STATE *ins, char **resultp) goto err; } resultlen += 1; - if ((result = (char *)malloc((size_t)resultlen)) == NULL) { + if ((result = malloc((size_t)resultlen)) == NULL) { ret = util_err(session, errno, NULL); goto err; } @@ -236,10 +241,13 @@ json_data(WT_SESSION *session, goto err; uri = clp->list[0]; - (void)snprintf(config, sizeof(config), + if ((ret = __wt_snprintf(config, sizeof(config), "dump=json%s%s", LF_ISSET(LOAD_JSON_APPEND) ? ",append" : "", - LF_ISSET(LOAD_JSON_NO_OVERWRITE) ? ",overwrite=false" : ""); + LF_ISSET(LOAD_JSON_NO_OVERWRITE) ? ",overwrite=false" : "")) != 0) { + ret = util_err(session, ret, NULL); + goto err; + } if ((ret = session->open_cursor( session, uri, NULL, config, &cursor)) != 0) { ret = util_err(session, ret, "%s: session.open_cursor", uri); @@ -256,7 +264,7 @@ json_data(WT_SESSION *session, nfield = 0; JSON_EXPECT(session, ins, '{'); if (ins->kvraw == NULL) { - if ((ins->kvraw = (char *)malloc(1)) == NULL) { + if ((ins->kvraw = malloc(1)) == NULL) { ret = util_err(session, errno, NULL); goto err; } @@ -358,8 +366,11 @@ json_top_level(WT_SESSION *session, JSON_INPUT_STATE *ins, uint32_t flags) while (json_peek(session, ins) == 's') { JSON_EXPECT(session, ins, 's'); tableuri = realloc(tableuri, ins->toklen); - snprintf(tableuri, ins->toklen, "%.*s", - (int)(ins->toklen - 2), ins->tokstart + 1); + if ((ret = __wt_snprintf(tableuri, ins->toklen, + "%.*s", (int)(ins->toklen - 2), ins->tokstart + 1)) != 0) { + ret = util_err(session, ret, NULL); + goto err; + } JSON_EXPECT(session, ins, ':'); if (!hasversion) { if (strcmp(tableuri, DUMP_JSON_VERSION_MARKER) != 0) { diff --git a/src/third_party/wiredtiger/src/utilities/util_main.c b/src/third_party/wiredtiger/src/utilities/util_main.c index 7157f0d90fe..c6f225bb667 100644 --- a/src/third_party/wiredtiger/src/utilities/util_main.c +++ b/src/third_party/wiredtiger/src/utilities/util_main.c @@ -20,7 +20,43 @@ static const char *command; /* Command name */ #define REC_LOGOFF "log=(enabled=false)" #define REC_RECOVER "log=(recover=on)" -static int usage(void); +static void +usage(void) +{ + fprintf(stderr, + "WiredTiger Data Engine (version %d.%d)\n", + WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR); + fprintf(stderr, + "global options:\n" + "\t" "-C\t" "wiredtiger_open configuration\n" + "\t" "-h\t" "database directory\n" + "\t" "-L\t" "turn logging off for debug-mode\n" + "\t" "-R\t" "run recovery if configured\n" + "\t" "-V\t" "display library version and exit\n" + "\t" "-v\t" "verbose\n"); + fprintf(stderr, + "commands:\n" + "\t" "alter\t alter an object\n" + "\t" "backup\t database backup\n" + "\t" "compact\t compact an object\n" + "\t" "copyright copyright information\n" + "\t" "create\t create an object\n" + "\t" "drop\t drop an object\n" + "\t" "dump\t dump an object\n" + "\t" "list\t list database objects\n" + "\t" "load\t load an object\n" + "\t" "loadtext load an object from a text file\n" + "\t" "printlog display the database log\n" + "\t" "read\t read values from an object\n" + "\t" "rebalance rebalance an object\n" + "\t" "rename\t rename an object\n" + "\t" "salvage\t salvage a file\n" + "\t" "stat\t display statistics for an object\n" + "\t" "truncate truncate an object, removing all content\n" + "\t" "upgrade\t upgrade an object\n" + "\t" "verify\t verify an object\n" + "\t" "write\t write values to an object\n"); +} int main(int argc, char *argv[]) @@ -73,8 +109,9 @@ main(int argc, char *argv[]) cmd_config = __wt_optarg; break; case 'E': /* secret key */ + free(secretkey); /* lint: set more than once */ if ((secretkey = strdup(__wt_optarg)) == NULL) { - ret = util_err(NULL, errno, NULL); + (void)util_err(NULL, errno, NULL); goto err; } memset(__wt_optarg, 0, strlen(__wt_optarg)); @@ -92,24 +129,27 @@ main(int argc, char *argv[]) break; case 'V': /* version */ printf("%s\n", wiredtiger_version(NULL, NULL, NULL)); - return (EXIT_SUCCESS); + goto done; case 'v': /* verbose */ verbose = true; break; case '?': default: - return (usage()); + usage(); + goto err; } if (logoff && recover) { fprintf(stderr, "Only one of -L and -R is allowed.\n"); - return (EXIT_FAILURE); + goto err; } argc -= __wt_optind; argv += __wt_optind; /* The next argument is the command name. */ - if (argc < 1) - return (usage()); + if (argc < 1) { + usage(); + goto err; + } command = argv[0]; /* Reset getopt. */ @@ -130,7 +170,7 @@ main(int argc, char *argv[]) func = util_compact; else if (strcmp(command, "copyright") == 0) { util_copyright(); - return (EXIT_SUCCESS); + goto done; } else if (strcmp(command, "create") == 0) { func = util_create; config = "create"; @@ -194,8 +234,10 @@ main(int argc, char *argv[]) default: break; } - if (func == NULL) - return (usage()); + if (func == NULL) { + usage(); + goto err; + } /* Build the configuration string. */ len = 10; /* some slop */ @@ -212,30 +254,39 @@ main(int argc, char *argv[]) } len += strlen(rec_config); if ((p = malloc(len)) == NULL) { - ret = util_err(NULL, errno, NULL); + (void)util_err(NULL, errno, NULL); goto err; } - (void)snprintf(p, len, "%s,%s,%s%s%s%s", + if ((ret = __wt_snprintf(p, len, "%s,%s,%s%s%s%s", config == NULL ? "" : config, - cmd_config == NULL ? "" : cmd_config, rec_config, p1, p2, p3); + cmd_config == NULL ? "" : cmd_config, + rec_config, p1, p2, p3)) != 0) { + (void)util_err(NULL, ret, NULL); + goto err; + } config = p; /* Open the database and a session. */ if ((ret = wiredtiger_open(home, verbose ? verbose_handler : NULL, config, &conn)) != 0) { - ret = util_err(NULL, ret, NULL); + (void)util_err(NULL, ret, NULL); goto err; } if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) { - ret = util_err(NULL, ret, NULL); + (void)util_err(NULL, ret, NULL); goto err; } /* Call the function. */ ret = func(session, argc, argv); + if (0) { +err: ret = 1; + } +done: + /* Close the database. */ -err: if (conn != NULL && (tret = conn->close(conn, NULL)) != 0 && ret == 0) + if (conn != NULL && (tret = conn->close(conn, NULL)) != 0 && ret == 0) ret = tret; free(p); @@ -244,46 +295,6 @@ err: if (conn != NULL && (tret = conn->close(conn, NULL)) != 0 && ret == 0) return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } -static int -usage(void) -{ - fprintf(stderr, - "WiredTiger Data Engine (version %d.%d)\n", - WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR); - fprintf(stderr, - "global options:\n" - "\t" "-C\t" "wiredtiger_open configuration\n" - "\t" "-h\t" "database directory\n" - "\t" "-L\t" "turn logging off for debug-mode\n" - "\t" "-R\t" "run recovery if configured\n" - "\t" "-V\t" "display library version and exit\n" - "\t" "-v\t" "verbose\n"); - fprintf(stderr, - "commands:\n" - "\t" "alter\t alter an object\n" - "\t" "backup\t database backup\n" - "\t" "compact\t compact an object\n" - "\t" "copyright copyright information\n" - "\t" "create\t create an object\n" - "\t" "drop\t drop an object\n" - "\t" "dump\t dump an object\n" - "\t" "list\t list database objects\n" - "\t" "load\t load an object\n" - "\t" "loadtext load an object from a text file\n" - "\t" "printlog display the database log\n" - "\t" "read\t read values from an object\n" - "\t" "rebalance rebalance an object\n" - "\t" "rename\t rename an object\n" - "\t" "salvage\t salvage a file\n" - "\t" "stat\t display statistics for an object\n" - "\t" "truncate truncate an object, removing all content\n" - "\t" "upgrade\t upgrade an object\n" - "\t" "verify\t verify an object\n" - "\t" "write\t write values to an object\n"); - - return (EXIT_FAILURE); -} - /* * util_uri -- * Build a name. @@ -291,6 +302,7 @@ usage(void) char * util_uri(WT_SESSION *session, const char *s, const char *type) { + WT_DECL_RET; size_t len; char *name; @@ -314,8 +326,12 @@ util_uri(WT_SESSION *session, const char *s, const char *type) * the default type for the operation. */ if (strchr(s, ':') != NULL) - strcpy(name, s); + WT_ERR(__wt_snprintf(name, len, "%s", s)); else - snprintf(name, len, "%s:%s", type, s); + WT_ERR(__wt_snprintf(name, len, "%s:%s", type, s)); return (name); + +err: free(name); + (void)util_err(session, ret, NULL); + return (NULL); } diff --git a/src/third_party/wiredtiger/src/utilities/util_misc.c b/src/third_party/wiredtiger/src/utilities/util_misc.c index 0905bfa97be..e26185a0096 100644 --- a/src/third_party/wiredtiger/src/utilities/util_misc.c +++ b/src/third_party/wiredtiger/src/utilities/util_misc.c @@ -140,7 +140,10 @@ util_flush(WT_SESSION *session, const char *uri) if ((buf = malloc(len)) == NULL) return (util_err(session, errno, NULL)); - (void)snprintf(buf, len, "target=(\"%s\")", uri); + if ((ret = __wt_snprintf(buf, len, "target=(\"%s\")", uri)) != 0) { + free(buf); + return (util_err(session, ret, NULL)); + } ret = session->checkpoint(session, buf); free(buf); diff --git a/src/third_party/wiredtiger/src/utilities/util_stat.c b/src/third_party/wiredtiger/src/utilities/util_stat.c index 1b75d9ea8bf..0692afe2819 100644 --- a/src/third_party/wiredtiger/src/utilities/util_stat.c +++ b/src/third_party/wiredtiger/src/utilities/util_stat.c @@ -68,7 +68,10 @@ util_stat(WT_SESSION *session, int argc, char *argv[]) fprintf(stderr, "%s: %s\n", progname, strerror(errno)); goto err; } - snprintf(uri, urilen, "statistics:%s", objname); + if ((ret = __wt_snprintf(uri, urilen, "statistics:%s", objname)) != 0) { + fprintf(stderr, "%s: %s\n", progname, strerror(ret)); + goto err; + } if ((ret = session->open_cursor(session, uri, NULL, config, &cursor)) != 0) { diff --git a/src/third_party/wiredtiger/src/utilities/util_verify.c b/src/third_party/wiredtiger/src/utilities/util_verify.c index d0587fcfc8c..ace1be7a5de 100644 --- a/src/third_party/wiredtiger/src/utilities/util_verify.c +++ b/src/third_party/wiredtiger/src/utilities/util_verify.c @@ -72,7 +72,7 @@ util_verify(WT_SESSION *session, int argc, char *argv[]) ret = util_err(session, errno, NULL); goto err; } - snprintf(config, size, + if ((ret = __wt_snprintf(config, size, "%s%s%s%s%s%s%s", dump_address ? "dump_address," : "", dump_blocks ? "dump_blocks," : "", @@ -80,7 +80,10 @@ util_verify(WT_SESSION *session, int argc, char *argv[]) dump_offsets != NULL ? "dump_offsets=[" : "", dump_offsets != NULL ? dump_offsets : "", dump_offsets != NULL ? "]," : "", - dump_pages ? "dump_pages," : ""); + dump_pages ? "dump_pages," : "")) != 0) { + (void)util_err(session, ret, NULL); + goto err; + } } if ((ret = session->verify(session, uri, config)) != 0) (void)util_err(session, ret, "session.verify: %s", uri); diff --git a/src/third_party/wiredtiger/src/utilities/util_write.c b/src/third_party/wiredtiger/src/utilities/util_write.c index b931fad064d..1d3e6937f8d 100644 --- a/src/third_party/wiredtiger/src/utilities/util_write.c +++ b/src/third_party/wiredtiger/src/utilities/util_write.c @@ -54,8 +54,12 @@ util_write(WT_SESSION *session, int argc, char *argv[]) * Open the object; free allocated memory immediately to simplify * future error handling. */ - (void)snprintf(config, sizeof(config), "%s,%s", - append ? "append=true" : "", overwrite ? "overwrite=true" : ""); + if ((ret = __wt_snprintf(config, sizeof(config), "%s,%s", + append ? "append=true" : "", + overwrite ? "overwrite=true" : "")) != 0) { + free(uri); + return (util_err(session, ret, NULL)); + } if ((ret = session->open_cursor(session, uri, NULL, config, &cursor)) != 0) (void)util_err(session, ret, "%s: session.open_cursor", uri); diff --git a/src/third_party/wiredtiger/test/bloom/test_bloom.c b/src/third_party/wiredtiger/test/bloom/test_bloom.c index 67249ff887e..b6299bbbadc 100644 --- a/src/third_party/wiredtiger/test/bloom/test_bloom.c +++ b/src/third_party/wiredtiger/test/bloom/test_bloom.c @@ -29,8 +29,6 @@ #include "test_util.h" static struct { - char *progname; /* Program name */ - WT_CONNECTION *wt_conn; /* WT_CONNECTION handle */ WT_SESSION *wt_session; /* WT_SESSION handle */ @@ -61,10 +59,7 @@ main(int argc, char *argv[]) { int ch; - if ((g.progname = strrchr(argv[0], DIR_DELIM)) == NULL) - g.progname = argv[0]; - else - ++g.progname; + (void)testutil_set_progname(argv); /* Set default configuration values. */ g.c_cache = 10; @@ -75,7 +70,7 @@ main(int argc, char *argv[]) g.c_srand = 3233456; /* Set values from the command line. */ - while ((ch = __wt_getopt(g.progname, argc, argv, "c:f:k:o:s:")) != EOF) + while ((ch = __wt_getopt(progname, argc, argv, "c:f:k:o:s:")) != EOF) switch (ch) { case 'c': /* Cache size */ g.c_cache = (u_int)atoi(__wt_optarg); @@ -126,9 +121,9 @@ setup(void) * Open configuration -- put command line configuration options at the * end so they can override "standard" configuration. */ - snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "create,error_prefix=\"%s\",cache_size=%" PRIu32 "MB,%s", - g.progname, g.c_cache, g.config_open == NULL ? "" : g.config_open); + progname, g.c_cache, g.config_open == NULL ? "" : g.config_open)); testutil_check(wiredtiger_open(NULL, NULL, config, &conn)); @@ -246,7 +241,7 @@ populate_entries(void) void usage(void) { - fprintf(stderr, "usage: %s [-cfkos]\n", g.progname); + fprintf(stderr, "usage: %s [-cfkos]\n", progname); fprintf(stderr, "%s", "\t-c cache size\n" "\t-f number of bits per item\n" diff --git a/src/third_party/wiredtiger/test/checkpoint/checkpointer.c b/src/third_party/wiredtiger/test/checkpoint/checkpointer.c index ef49a9492ce..84d2765843a 100644 --- a/src/third_party/wiredtiger/test/checkpoint/checkpointer.c +++ b/src/third_party/wiredtiger/test/checkpoint/checkpointer.c @@ -74,7 +74,7 @@ checkpointer(void *arg) WT_UNUSED(arg); - __wt_thread_id(tid, sizeof(tid)); + testutil_check(__wt_thread_id(tid, sizeof(tid))); printf("checkpointer thread starting: tid: %s\n", tid); (void)real_checkpointer(); @@ -107,8 +107,9 @@ real_checkpointer(void) "WiredTigerCheckpoint", strlen("WiredTigerCheckpoint")) == 0) checkpoint_config = NULL; else { + testutil_check(__wt_snprintf( + _buf, sizeof(_buf), "name=%s", g.checkpoint_name)); checkpoint_config = _buf; - snprintf(checkpoint_config, 128, "name=%s", g.checkpoint_name); } while (g.running) { /* Execute a checkpoint */ @@ -147,7 +148,8 @@ verify_checkpoint(WT_SESSION *session) ret = t_ret = 0; key_count = 0; - snprintf(ckpt, 128, "checkpoint=%s", g.checkpoint_name); + testutil_check(__wt_snprintf( + ckpt, sizeof(ckpt), "checkpoint=%s", g.checkpoint_name)); cursors = calloc((size_t)g.ntables, sizeof(*cursors)); if (cursors == NULL) return (log_print_err("verify_checkpoint", ENOMEM, 1)); @@ -159,7 +161,8 @@ verify_checkpoint(WT_SESSION *session) */ if (g.cookies[i].type == LSM) continue; - snprintf(next_uri, 128, "table:__wt%04d", i); + testutil_check(__wt_snprintf( + next_uri, sizeof(next_uri), "table:__wt%04d", i)); if ((ret = session->open_cursor( session, next_uri, NULL, ckpt, &cursors[i])) != 0) { (void)log_print_err( @@ -296,7 +299,8 @@ diagnose_key_error( session = cursor1->session; key1_orig = key2_orig = 0; - snprintf(ckpt, 128, "checkpoint=%s", g.checkpoint_name); + testutil_check(__wt_snprintf( + ckpt, sizeof(ckpt), "checkpoint=%s", g.checkpoint_name)); /* Save the failed keys. */ if (cursor1->get_key(cursor1, &key1_orig) != 0 || @@ -338,7 +342,8 @@ diagnose_key_error( * Now try opening new cursors on the checkpoints and see if we * get the same missing key via searching. */ - snprintf(next_uri, 128, "table:__wt%04d", index1); + testutil_check(__wt_snprintf( + next_uri, sizeof(next_uri), "table:__wt%04d", index1)); if (session->open_cursor(session, next_uri, NULL, ckpt, &c) != 0) return (1); c->set_key(c, key1_orig); @@ -350,7 +355,8 @@ diagnose_key_error( if (c->close(c) != 0) return (1); - snprintf(next_uri, 128, "table:__wt%04d", index2); + testutil_check(__wt_snprintf( + next_uri, sizeof(next_uri), "table:__wt%04d", index2)); if (session->open_cursor(session, next_uri, NULL, ckpt, &c) != 0) return (1); c->set_key(c, key1_orig); @@ -367,7 +373,8 @@ live_check: * Now try opening cursors on the live checkpoint to see if we get the * same missing key via searching. */ - snprintf(next_uri, 128, "table:__wt%04d", index1); + testutil_check(__wt_snprintf( + next_uri, sizeof(next_uri), "table:__wt%04d", index1)); if (session->open_cursor(session, next_uri, NULL, NULL, &c) != 0) return (1); c->set_key(c, key1_orig); @@ -376,7 +383,8 @@ live_check: if (c->close(c) != 0) return (1); - snprintf(next_uri, 128, "table:__wt%04d", index2); + testutil_check(__wt_snprintf( + next_uri, sizeof(next_uri), "table:__wt%04d", index2)); if (session->open_cursor(session, next_uri, NULL, NULL, &c) != 0) return (1); c->set_key(c, key2_orig); diff --git a/src/third_party/wiredtiger/test/checkpoint/smoke.sh b/src/third_party/wiredtiger/test/checkpoint/smoke.sh index 123d4e00df5..39b1f428c2c 100755 --- a/src/third_party/wiredtiger/test/checkpoint/smoke.sh +++ b/src/third_party/wiredtiger/test/checkpoint/smoke.sh @@ -6,8 +6,8 @@ set -e echo "checkpoint: 3 mixed tables" $TEST_WRAPPER ./t -T 3 -t m -# We are done if short tests are requested -test -z "$TESTUTIL_DISABLE_LONG_TESTS" || exit 0 +# We are done unless long tests are enabled. +test "$TESTUTIL_ENABLE_LONG_TESTS" = "1" || exit 0 echo "checkpoint: 6 column-store tables" $TEST_WRAPPER ./t -T 6 -t c diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c index 4998019ad8e..e7e1a0b81a5 100644 --- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c +++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c @@ -50,10 +50,7 @@ main(int argc, char *argv[]) char *working_dir; const char *config_open; - if ((g.progname = strrchr(argv[0], DIR_DELIM)) == NULL) - g.progname = argv[0]; - else - ++g.progname; + (void)testutil_set_progname(argv); config_open = NULL; ret = 0; @@ -68,7 +65,7 @@ main(int argc, char *argv[]) runs = 1; while ((ch = __wt_getopt( - g.progname, argc, argv, "c:C:h:k:l:n:r:t:T:W:")) != EOF) + progname, argc, argv, "c:C:h:k:l:n:r:t:T:W:")) != EOF) switch (ch) { case 'c': g.checkpoint_name = __wt_optarg; @@ -132,7 +129,7 @@ main(int argc, char *argv[]) testutil_work_dir_from_path(g.home, 512, working_dir); - printf("%s: process %" PRIu64 "\n", g.progname, (uint64_t)getpid()); + printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid()); for (cnt = 1; (runs == 0 || cnt <= runs) && g.status == 0; ++cnt) { printf(" %d: %d workers, %d tables\n", cnt, g.nworkers, g.ntables); @@ -202,11 +199,11 @@ wt_connect(const char *config_open) testutil_make_work_dir(g.home); - snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "create,statistics=(fast),error_prefix=\"%s\",cache_size=1GB%s%s", - g.progname, + progname, config_open == NULL ? "" : ",", - config_open == NULL ? "" : config_open); + config_open == NULL ? "" : config_open)); if ((ret = wiredtiger_open( g.home, &event_handler, config, &g.conn)) != 0) @@ -297,10 +294,10 @@ log_print_err(const char *m, int e, int fatal) g.running = 0; g.status = e; } - fprintf(stderr, "%s: %s: %s\n", g.progname, m, wiredtiger_strerror(e)); + fprintf(stderr, "%s: %s: %s\n", progname, m, wiredtiger_strerror(e)); if (g.logfp != NULL) fprintf(g.logfp, "%s: %s: %s\n", - g.progname, m, wiredtiger_strerror(e)); + progname, m, wiredtiger_strerror(e)); return (e); } @@ -333,7 +330,7 @@ usage(void) "usage: %s " "[-S] [-C wiredtiger-config] [-k keys] [-l log]\n\t" "[-n ops] [-c checkpoint] [-r runs] [-t f|r|v] [-W workers]\n", - g.progname); + progname); fprintf(stderr, "%s", "\t-C specify wiredtiger_open configuration arguments\n" "\t-c checkpoint name to used named checkpoints\n" diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h index 0d0d02447d5..347bd2c6e89 100644 --- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h +++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h @@ -58,7 +58,6 @@ typedef struct { u_int nkeys; /* Keys to load */ u_int nops; /* Operations per thread */ FILE *logfp; /* Message log file. */ - char *progname; /* Program name */ int nworkers; /* Number workers configured */ int ntables; /* Number tables configured */ int ntables_created; /* Number tables opened */ diff --git a/src/third_party/wiredtiger/test/checkpoint/workers.c b/src/third_party/wiredtiger/test/checkpoint/workers.c index e4fe7bd1b29..82d1b8685c4 100644 --- a/src/third_party/wiredtiger/test/checkpoint/workers.c +++ b/src/third_party/wiredtiger/test/checkpoint/workers.c @@ -39,14 +39,12 @@ static int create_table(WT_SESSION *session, COOKIE *cookie) { int ret; - char *p, *end, config[128]; + char config[128]; - p = config; - end = config + sizeof(config); - p += snprintf(p, (size_t)(end - p), - "key_format=%s,value_format=S", cookie->type == COL ? "r" : "q"); - if (cookie->type == LSM) - (void)snprintf(p, (size_t)(end - p), ",type=lsm"); + testutil_check(__wt_snprintf(config, sizeof(config), + "key_format=%s,value_format=S,%s", + cookie->type == COL ? "r" : "q", + cookie->type == LSM ? ",type=lsm" : "")); if ((ret = session->create(session, cookie->uri, config)) != 0) if (ret != EEXIST) @@ -88,8 +86,9 @@ start_workers(table_type type) (table_type)((i % MAX_TABLE_TYPE) + 1); else g.cookies[i].type = type; - (void)snprintf(g.cookies[i].uri, 128, - "%s%04d", URI_BASE, g.cookies[i].id); + testutil_check(__wt_snprintf( + g.cookies[i].uri, sizeof(g.cookies[i].uri), + "%s%04d", URI_BASE, g.cookies[i].id)); /* Should probably be atomic to avoid races. */ if ((ret = create_table(session, &g.cookies[i])) != 0) @@ -132,7 +131,8 @@ worker_op(WT_CURSOR *cursor, uint64_t keyno, u_int new_val) char valuebuf[64]; cursor->set_key(cursor, keyno); - (void)snprintf(valuebuf, sizeof(valuebuf), "%037u", new_val); + testutil_check(__wt_snprintf( + valuebuf, sizeof(valuebuf), "%037u", new_val)); cursor->set_value(cursor, valuebuf); if ((ret = cursor->insert(cursor)) != 0) { if (ret == WT_ROLLBACK) @@ -153,7 +153,7 @@ worker(void *arg) WT_UNUSED(arg); - __wt_thread_id(tid, sizeof(tid)); + testutil_check(__wt_thread_id(tid, sizeof(tid))); printf("worker thread starting: tid: %s\n", tid); (void)real_worker(); diff --git a/src/third_party/wiredtiger/test/csuite/Makefile.am b/src/third_party/wiredtiger/test/csuite/Makefile.am index e2b72532703..10ab890f2f5 100644 --- a/src/third_party/wiredtiger/test/csuite/Makefile.am +++ b/src/third_party/wiredtiger/test/csuite/Makefile.am @@ -4,8 +4,13 @@ LDADD = $(top_builddir)/test/utility/libtest_util.la \ $(top_builddir)/libwiredtiger.la AM_LDFLAGS = -static +noinst_PROGRAMS= + +test_scope_SOURCES = scope/main.c +noinst_PROGRAMS += test_scope + test_wt1965_col_efficiency_SOURCES = wt1965_col_efficiency/main.c -noinst_PROGRAMS = test_wt1965_col_efficiency +noinst_PROGRAMS += test_wt1965_col_efficiency test_wt2403_lsm_workload_SOURCES = wt2403_lsm_workload/main.c noinst_PROGRAMS += test_wt2403_lsm_workload diff --git a/src/third_party/wiredtiger/test/csuite/scope/main.c b/src/third_party/wiredtiger/test/csuite/scope/main.c new file mode 100644 index 00000000000..15dabd97c40 --- /dev/null +++ b/src/third_party/wiredtiger/test/csuite/scope/main.c @@ -0,0 +1,288 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +#define KEY "key" +#define VALUE "value" + +static int ignore_errors; + +static int +handle_error(WT_EVENT_HANDLER *handler, + WT_SESSION *session, int error, const char *message) +{ + (void)(handler); + + /* Skip the error messages we're expecting to see. */ + if (ignore_errors > 0 && + (strstr(message, "requires key be set") != NULL || + strstr(message, "requires value be set") != NULL)) { + --ignore_errors; + return (0); + } + + (void)fprintf(stderr, "%s: %s\n", + message, session->strerror(session, error)); + return (0); +} + +static WT_EVENT_HANDLER event_handler = { + handle_error, + NULL, + NULL, + NULL +}; + +static void +cursor_scope_ops(WT_SESSION *session, const char *uri) +{ + struct { + const char *op; + enum { INSERT, SEARCH, SEARCH_NEAR, + REMOVE, REMOVE_POS, RESERVE, UPDATE } func; + const char *config; + } *op, ops[] = { + /* + * The ops order is fixed and shouldn't change, that is, insert + * has to happen first so search, update and remove operations + * are possible, and remove has to be last. + */ + { "insert", INSERT, NULL, }, + { "search", SEARCH, NULL, }, + { "search", SEARCH_NEAR, NULL, }, +#if 0 + { "reserve", RESERVE, NULL, }, +#endif + { "update", UPDATE, NULL, }, + { "remove", REMOVE, NULL, }, + { "remove", REMOVE_POS, NULL, }, + { NULL, INSERT, NULL } + }; + WT_CURSOR *cursor; + uint64_t keyr; + const char *key, *value; + char keybuf[100], valuebuf[100]; + int exact; + bool recno; + + /* Reserve requires a running transaction. */ + testutil_check(session->begin_transaction(session, NULL)); + + cursor = NULL; + for (op = ops; op->op != NULL; op++) { + key = value = NULL; + + /* Open a cursor. */ + if (cursor != NULL) + testutil_check(cursor->close(cursor)); + testutil_check(session->open_cursor( + session, uri, NULL, op->config, &cursor)); + recno = strcmp(cursor->key_format, "r") == 0; + + /* + * Set up application buffers so we can detect overwrites + * or failure to copy application information into library + * memory. + */ + if (recno) + cursor->set_key(cursor, (uint64_t)1); + else { + strcpy(keybuf, KEY); + cursor->set_key(cursor, keybuf); + } + strcpy(valuebuf, VALUE); + cursor->set_value(cursor, valuebuf); + + /* + * The application must keep key and value memory valid until + * the next operation that positions the cursor, modifies the + * data, or resets or closes the cursor. + * + * Modifying either the key or value buffers is not permitted. + */ + switch (op->func) { + case INSERT: + testutil_check(cursor->insert(cursor)); + break; + case SEARCH: + testutil_check(cursor->search(cursor)); + break; + case SEARCH_NEAR: + testutil_check(cursor->search_near(cursor, &exact)); + break; + case REMOVE_POS: + /* + * Remove has two modes, one where the remove is based + * on a cursor position, the other where it's based on + * a set key. The results are different, so test them + * separately. + */ + testutil_check(cursor->search(cursor)); + /* FALLTHROUGH */ + case REMOVE: + testutil_check(cursor->remove(cursor)); + break; + case RESERVE: +#if 0 + testutil_check(cursor->reserve(cursor)); +#endif + break; + case UPDATE: + testutil_check(cursor->update(cursor)); + break; + } + + /* + * The cursor should no longer reference application memory, + * and application buffers can be safely overwritten. + */ + memset(keybuf, 'K', sizeof(keybuf)); + memset(valuebuf, 'V', sizeof(valuebuf)); + + /* + * Check that get_key/get_value behave as expected after the + * operation. + */ + switch (op->func) { + case INSERT: + case REMOVE: + /* + * Insert and remove configured with a search key do + * not position the cursor and have no key or value. + * + * There should be two error messages, ignore them. + */ + ignore_errors = 2; + if (recno) + testutil_assert( + cursor->get_key(cursor, &keyr) != 0); + else + testutil_assert( + cursor->get_key(cursor, &key) != 0); + testutil_assert(cursor->get_value(cursor, &value) != 0); + testutil_assert(ignore_errors == 0); + break; + case REMOVE_POS: + /* + * Remove configured with a cursor position has a key, + * but no value. + * + * There should be one error message, ignore it. + */ + if (recno) { + testutil_assert( + cursor->get_key(cursor, &keyr) == 0); + testutil_assert(keyr == 1); + } else { + testutil_assert( + cursor->get_key(cursor, &key) == 0); + testutil_assert(key != keybuf); + testutil_assert(strcmp(key, KEY) == 0); + } + ignore_errors = 1; + testutil_assert(cursor->get_value(cursor, &value) != 0); + testutil_assert(ignore_errors == 0); + break; + case RESERVE: + case SEARCH: + case SEARCH_NEAR: + case UPDATE: + /* + * Reserve, search, search-near and update position the + * cursor and have both a key and value. + * + * Any key/value should not reference application + * memory. + */ + if (recno) { + testutil_assert( + cursor->get_key(cursor, &keyr) == 0); + testutil_assert(keyr == 1); + } else { + testutil_assert( + cursor->get_key(cursor, &key) == 0); + testutil_assert(key != keybuf); + testutil_assert(strcmp(key, KEY) == 0); + } + testutil_assert(cursor->get_value(cursor, &value) == 0); + testutil_assert(value != valuebuf); + testutil_assert(strcmp(value, VALUE) == 0); + break; + } + + /* + * We have more than one remove operation, add the key back + * in. + */ + if (op->func == REMOVE || op->func == REMOVE_POS) { + if (recno) + cursor->set_key(cursor, (uint64_t)1); + else { + cursor->set_key(cursor, KEY); + } + cursor->set_value(cursor, VALUE); + testutil_check(cursor->insert(cursor)); + } + } +} + +static void +run(WT_CONNECTION *conn, const char *uri, const char *config) +{ + WT_SESSION *session; + + testutil_check(conn->open_session(conn, NULL, NULL, &session)); + testutil_check(session->create(session, uri, config)); + cursor_scope_ops(session, uri); + testutil_check(session->close(session, NULL)); +} + +int +main(int argc, char *argv[]) +{ + TEST_OPTS *opts, _opts; + + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + testutil_check(testutil_parse_opts(argc, argv, opts)); + testutil_make_work_dir(opts->home); + + testutil_check( + wiredtiger_open(opts->home, &event_handler, "create", &opts->conn)); + + run(opts->conn, "file:file.SS", "key_format=S,value_format=S"); + run(opts->conn, "file:file.rS", "key_format=r,value_format=S"); + run(opts->conn, "lsm:lsm.SS", "key_format=S,value_format=S"); + run(opts->conn, "lsm:lsm.rS", "key_format=r,value_format=S"); + run(opts->conn, "table:table.SS", "key_format=S,value_format=S"); + run(opts->conn, "table:table.rS", "key_format=r,value_format=S"); + + testutil_cleanup(opts); + + return (EXIT_SUCCESS); +} diff --git a/src/third_party/wiredtiger/test/csuite/wt1965_col_efficiency/main.c b/src/third_party/wiredtiger/test/csuite/wt1965_col_efficiency/main.c index a7235d81b31..e5b73d5e642 100644 --- a/src/third_party/wiredtiger/test/csuite/wt1965_col_efficiency/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt1965_col_efficiency/main.c @@ -132,7 +132,8 @@ main(int argc, char *argv[]) testutil_check(opts->conn->open_session( opts->conn, NULL, NULL, &session)); - sprintf(table_format, "key_format=r,value_format="); + testutil_check(__wt_snprintf( + table_format, sizeof(table_format), "key_format=r,value_format=")); for (i = 0; i < NR_FIELDS; i++) strcat(table_format, "Q"); diff --git a/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c b/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c index 4b352b26051..9876582fffa 100644 --- a/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c @@ -68,8 +68,8 @@ page_init(uint64_t n) else { if (recno % 3 == 0) ++vrecno; - snprintf(buf, - sizeof(buf), "%" PRIu64 " VALUE ------", vrecno); + testutil_check(__wt_snprintf(buf, + sizeof(buf), "%" PRIu64 " VALUE ------", vrecno)); cursor->set_value(cursor, buf); } testutil_check(cursor->insert(cursor)); @@ -101,9 +101,10 @@ main(int argc, char *argv[]) uint64_t i, id; char buf[100]; + if (!testutil_enable_long_tests()) /* Ignore unless requested */ + return (EXIT_SUCCESS); + opts = &_opts; - if (testutil_disable_long_tests()) - return (0); memset(opts, 0, sizeof(*opts)); opts->table_type = TABLE_ROW; opts->n_append_threads = N_APPEND_THREADS; @@ -111,19 +112,19 @@ main(int argc, char *argv[]) testutil_check(testutil_parse_opts(argc, argv, opts)); testutil_make_work_dir(opts->home); - snprintf(buf, sizeof(buf), + testutil_check(__wt_snprintf(buf, sizeof(buf), "create," "cache_size=%s," "eviction=(threads_max=5)," "statistics=(fast)", - opts->table_type == TABLE_FIX ? "500MB" : "2GB"); + opts->table_type == TABLE_FIX ? "500MB" : "2GB")); testutil_check(wiredtiger_open(opts->home, NULL, buf, &opts->conn)); testutil_check( opts->conn->open_session(opts->conn, NULL, NULL, &session)); - snprintf(buf, sizeof(buf), + testutil_check(__wt_snprintf(buf, sizeof(buf), "key_format=r,value_format=%s," "allocation_size=4K,leaf_page_max=64K", - opts->table_type == TABLE_FIX ? "8t" : "S"); + opts->table_type == TABLE_FIX ? "8t" : "S")); testutil_check(session->create(session, opts->uri, buf)); testutil_check(session->close(session, NULL)); diff --git a/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c b/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c index 239a3f300d0..617490fec4d 100644 --- a/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c @@ -92,10 +92,11 @@ main(int argc, char *argv[]) TEST_OPTS *opts, _opts; const char *tablename; + if (!testutil_enable_long_tests()) /* Ignore unless requested */ + return (EXIT_SUCCESS); + opts = &_opts; sharedopts = &_sharedopts; - if (testutil_disable_long_tests()) - return (0); memset(opts, 0, sizeof(*opts)); memset(sharedopts, 0, sizeof(*sharedopts)); @@ -105,14 +106,18 @@ main(int argc, char *argv[]) tablename = strchr(opts->uri, ':'); testutil_assert(tablename != NULL); tablename++; - snprintf(sharedopts->posturi, sizeof(sharedopts->posturi), - "index:%s:post", tablename); - snprintf(sharedopts->baluri, sizeof(sharedopts->baluri), - "index:%s:bal", tablename); - snprintf(sharedopts->flaguri, sizeof(sharedopts->flaguri), - "index:%s:flag", tablename); - snprintf(sharedopts->joinuri, sizeof(sharedopts->joinuri), - "join:%s", opts->uri); + testutil_check(__wt_snprintf( + sharedopts->posturi, sizeof(sharedopts->posturi), + "index:%s:post", tablename)); + testutil_check(__wt_snprintf( + sharedopts->baluri, sizeof(sharedopts->baluri), + "index:%s:bal", tablename)); + testutil_check(__wt_snprintf( + sharedopts->flaguri, sizeof(sharedopts->flaguri), + "index:%s:flag", tablename)); + testutil_check(__wt_snprintf( + sharedopts->joinuri, sizeof(sharedopts->joinuri), + "join:%s", opts->uri)); testutil_check(wiredtiger_open(opts->home, NULL, "create,cache_size=1G", &opts->conn)); @@ -349,19 +354,21 @@ static void *thread_join(void *arg) balcur->set_key(balcur, 0); testutil_check(balcur->search(balcur)); if (sharedopts->bloom) - sprintf(cfg, "compare=lt,strategy=bloom,count=%d", - N_RECORDS); + testutil_check(__wt_snprintf(cfg, sizeof(cfg), + "compare=lt,strategy=bloom,count=%d", N_RECORDS)); else - sprintf(cfg, "compare=lt"); + testutil_check(__wt_snprintf( + cfg, sizeof(cfg), "compare=lt")); testutil_check(session->join(session, joincur, balcur, cfg)); flagcur->set_key(flagcur, 0); testutil_check(flagcur->search(flagcur)); if (sharedopts->bloom) - sprintf(cfg, "compare=eq,strategy=bloom,count=%d", - N_RECORDS); + testutil_check(__wt_snprintf(cfg, sizeof(cfg), + "compare=eq,strategy=bloom,count=%d", N_RECORDS)); else - sprintf(cfg, "compare=eq"); + testutil_check(__wt_snprintf( + cfg, sizeof(cfg), "compare=eq")); testutil_check(session->join(session, joincur, flagcur, cfg)); /* Expect no values returned */ diff --git a/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c b/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c index 1368e7c8c09..656cea04145 100644 --- a/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c @@ -102,9 +102,12 @@ main(int argc, char *argv[]) tablename = strchr(opts->uri, ':'); testutil_assert(tablename != NULL); tablename++; - snprintf(index1uri, sizeof(index1uri), "index:%s:index1", tablename); - snprintf(index2uri, sizeof(index2uri), "index:%s:index2", tablename); - snprintf(joinuri, sizeof(joinuri), "join:%s", opts->uri); + testutil_check(__wt_snprintf( + index1uri, sizeof(index1uri), "index:%s:index1", tablename)); + testutil_check(__wt_snprintf( + index2uri, sizeof(index2uri), "index:%s:index2", tablename)); + testutil_check(__wt_snprintf( + joinuri, sizeof(joinuri), "join:%s", opts->uri)); testutil_check(wiredtiger_open(opts->home, NULL, "statistics=(all),create", &opts->conn)); @@ -150,7 +153,8 @@ main(int argc, char *argv[]) cursor2->set_key(cursor2, half + 1); testutil_check(cursor2->search(cursor2)); - sprintf(bloom_cfg, "compare=lt,strategy=bloom,count=%d", half); + testutil_check(__wt_snprintf(bloom_cfg, sizeof(bloom_cfg), + "compare=lt,strategy=bloom,count=%d", half)); testutil_check(session->open_cursor(session, joinuri, NULL, NULL, &jcursor)); diff --git a/src/third_party/wiredtiger/test/csuite/wt2535_insert_race/main.c b/src/third_party/wiredtiger/test/csuite/wt2535_insert_race/main.c index ae18760a829..ba17d485e07 100644 --- a/src/third_party/wiredtiger/test/csuite/wt2535_insert_race/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt2535_insert_race/main.c @@ -49,9 +49,10 @@ main(int argc, char *argv[]) uint64_t current_value; int i; + if (!testutil_enable_long_tests()) /* Ignore unless requested */ + return (EXIT_SUCCESS); + opts = &_opts; - if (testutil_disable_long_tests()) - return (0); memset(opts, 0, sizeof(*opts)); opts->nthreads = 10; opts->nrecords = 1000; diff --git a/src/third_party/wiredtiger/test/csuite/wt2592_join_schema/main.c b/src/third_party/wiredtiger/test/csuite/wt2592_join_schema/main.c index 0ec1c765d99..be3eff6136c 100644 --- a/src/third_party/wiredtiger/test/csuite/wt2592_join_schema/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt2592_join_schema/main.c @@ -82,9 +82,12 @@ main(int argc, char *argv[]) tablename = strchr(opts->uri, ':'); testutil_assert(tablename != NULL); tablename++; - snprintf(countryuri, sizeof(countryuri), "index:%s:country", tablename); - snprintf(yearuri, sizeof(yearuri), "index:%s:year", tablename); - snprintf(joinuri, sizeof(joinuri), "join:%s", opts->uri); + testutil_check(__wt_snprintf( + countryuri, sizeof(countryuri), "index:%s:country", tablename)); + testutil_check(__wt_snprintf( + yearuri, sizeof(yearuri), "index:%s:year", tablename)); + testutil_check(__wt_snprintf( + joinuri, sizeof(joinuri), "join:%s", opts->uri)); testutil_check(wiredtiger_open(opts->home, NULL, "create,cache_size=200M", &opts->conn)); diff --git a/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c b/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c index 7c80496f1b6..e128df29f41 100644 --- a/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c @@ -59,11 +59,11 @@ main(int argc, char *argv[]) char flaguri[256]; char joinuri[256]; + if (!testutil_enable_long_tests()) /* Ignore unless requested */ + return (EXIT_SUCCESS); + opts = &_opts; - if (testutil_disable_long_tests()) - return (0); memset(opts, 0, sizeof(*opts)); - testutil_check(testutil_parse_opts(argc, argv, opts)); testutil_make_work_dir(opts->home); @@ -83,10 +83,14 @@ main(int argc, char *argv[]) tablename = strchr(opts->uri, ':'); testutil_assert(tablename != NULL); tablename++; - snprintf(posturi, sizeof(posturi), "index:%s:post", tablename); - snprintf(balanceuri, sizeof(balanceuri), "index:%s:balance", tablename); - snprintf(flaguri, sizeof(flaguri), "index:%s:flag", tablename); - snprintf(joinuri, sizeof(joinuri), "join:%s", opts->uri); + testutil_check(__wt_snprintf( + posturi, sizeof(posturi), "index:%s:post", tablename)); + testutil_check(__wt_snprintf( + balanceuri, sizeof(balanceuri), "index:%s:balance", tablename)); + testutil_check(__wt_snprintf( + flaguri, sizeof(flaguri), "index:%s:flag", tablename)); + testutil_check(__wt_snprintf( + joinuri, sizeof(joinuri), "join:%s", opts->uri)); testutil_check(session->create(session, posturi, "columns=(post)")); testutil_check(session->create(session, balanceuri, @@ -126,14 +130,14 @@ main(int argc, char *argv[]) balancecur->set_key(balancecur, 0); testutil_check(balancecur->search(balancecur)); - sprintf(cfg, "compare=lt,strategy=bloom,count=%d", - N_RECORDS / 100); + testutil_check(__wt_snprintf(cfg, sizeof(cfg), + "compare=lt,strategy=bloom,count=%d", N_RECORDS / 100)); testutil_check(session->join(session, joincur, balancecur, cfg)); flagcur->set_key(flagcur, 0); testutil_check(flagcur->search(flagcur)); - sprintf(cfg, "compare=eq,strategy=bloom,count=%d", - N_RECORDS / 100); + testutil_check(__wt_snprintf(cfg, sizeof(cfg), + "compare=eq,strategy=bloom,count=%d", N_RECORDS / 100)); testutil_check(session->join(session, joincur, flagcur, cfg)); /* Expect no values returned */ diff --git a/src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c b/src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c index 6cec9634cd1..46ba71372e5 100644 --- a/src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c @@ -82,11 +82,11 @@ main(int argc, char *argv[]) int i, nfail; const char *tablename; + if (!testutil_enable_long_tests()) /* Ignore unless requested */ + return (EXIT_SUCCESS); + opts = &_opts; sharedopts = &_sharedopts; - - if (testutil_disable_long_tests()) - return (0); memset(opts, 0, sizeof(*opts)); memset(sharedopts, 0, sizeof(*sharedopts)); memset(insert_args, 0, sizeof(insert_args)); @@ -114,12 +114,15 @@ main(int argc, char *argv[]) tablename = strchr(opts->uri, ':'); testutil_assert(tablename != NULL); tablename++; - snprintf(sharedopts->posturi, sizeof(sharedopts->posturi), - "index:%s:post", tablename); - snprintf(sharedopts->baluri, sizeof(sharedopts->baluri), - "index:%s:bal", tablename); - snprintf(sharedopts->flaguri, sizeof(sharedopts->flaguri), - "index:%s:flag", tablename); + testutil_check(__wt_snprintf( + sharedopts->posturi, sizeof(sharedopts->posturi), + "index:%s:post", tablename)); + testutil_check(__wt_snprintf( + sharedopts->baluri, sizeof(sharedopts->baluri), + "index:%s:bal", tablename)); + testutil_check(__wt_snprintf( + sharedopts->flaguri, sizeof(sharedopts->flaguri), + "index:%s:flag", tablename)); testutil_check(session->create(session, sharedopts->posturi, "columns=(post)")); diff --git a/src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c b/src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c index ddf249fb406..ce7bd72fa3f 100644 --- a/src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c @@ -96,9 +96,8 @@ static void run_check_subtest(TEST_OPTS *, const char *, uint64_t, bool, uint64_t *); static void run_check_subtest_range(TEST_OPTS *, const char *, bool); static int run_process(TEST_OPTS *, const char *, char *[], int *); -static int subtest_main(int, char *[], bool); +static void subtest_main(int, char *[], bool); static void subtest_populate(TEST_OPTS *, bool); -int main(int, char *[]); extern int __wt_optind; @@ -268,9 +267,11 @@ enable_failures(uint64_t allow_writes, uint64_t allow_reads) char value[100]; testutil_check(setenv("WT_FAIL_FS_ENABLE", "1", 1)); - snprintf(value, sizeof(value), "%" PRIu64, allow_writes); + testutil_check(__wt_snprintf( + value, sizeof(value), "%" PRIu64, allow_writes)); testutil_check(setenv("WT_FAIL_FS_WRITE_ALLOW", value, 1)); - snprintf(value, sizeof(value), "%" PRIu64, allow_reads); + testutil_check(__wt_snprintf( + value, sizeof(value), "%" PRIu64, allow_reads)); testutil_check(setenv("WT_FAIL_FS_READ_ALLOW", value, 1)); } @@ -326,10 +327,11 @@ run_check_subtest(TEST_OPTS *opts, const char *debugger, uint64_t nops, subtest_args[narg++] = (char *)"-v"; /* subtest is always verbose */ subtest_args[narg++] = (char *)"-p"; subtest_args[narg++] = (char *)"-o"; - snprintf(sarg, sizeof(sarg), "%" PRIu64, nops); + testutil_check(__wt_snprintf(sarg, sizeof(sarg), "%" PRIu64, nops)); subtest_args[narg++] = sarg; /* number of operations */ subtest_args[narg++] = (char *)"-n"; - snprintf(rarg, sizeof(rarg), "%" PRIu64, opts->nrecords); + testutil_check(__wt_snprintf( + rarg, sizeof(rarg), "%" PRIu64, opts->nrecords)); subtest_args[narg++] = rarg; /* number of records */ subtest_args[narg++] = NULL; testutil_assert(narg <= MAX_ARGS); @@ -446,7 +448,7 @@ run_process(TEST_OPTS *opts, const char *prog, char *argv[], int *status) * subtest_main -- * The main program for the subtest */ -static int +static void subtest_main(int argc, char *argv[], bool close_test) { TEST_OPTS *opts, _opts; @@ -454,8 +456,6 @@ subtest_main(int argc, char *argv[], bool close_test) char config[1024], filename[1024]; struct rlimit rlim; - if (testutil_disable_long_tests()) - return (0); opts = &_opts; memset(opts, 0, sizeof(*opts)); memset(&rlim, 0, sizeof(rlim)); @@ -466,15 +466,17 @@ subtest_main(int argc, char *argv[], bool close_test) testutil_make_work_dir(opts->home); /* Redirect stderr, stdout. */ - sprintf(filename, "%s/%s", opts->home, STDERR_FILE); + testutil_check(__wt_snprintf( + filename, sizeof(filename), "%s/%s", opts->home, STDERR_FILE)); testutil_assert(freopen(filename, "a", stderr) != NULL); - sprintf(filename, "%s/%s", opts->home, STDOUT_FILE); + testutil_check(__wt_snprintf( + filename, sizeof(filename), "%s/%s", opts->home, STDOUT_FILE)); testutil_assert(freopen(filename, "a", stdout) != NULL); - snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "create,cache_size=250M,log=(enabled)," "transaction_sync=(enabled,method=none),extensions=(" WT_FAIL_FS_LIB - "=(early_load,config={environment=true,verbose=true})]"); + "=(early_load,config={environment=true,verbose=true})]")); testutil_check(wiredtiger_open(opts->home, NULL, config, &opts->conn)); testutil_check( @@ -499,8 +501,6 @@ subtest_main(int argc, char *argv[], bool close_test) subtest_populate(opts, close_test); testutil_cleanup(opts); - - return (0); } /* @@ -622,8 +622,9 @@ main(int argc, char *argv[]) uint64_t nresults; const char *debugger; - if (testutil_disable_long_tests()) - return (0); + if (!testutil_enable_long_tests()) /* Ignore unless requested */ + return (EXIT_SUCCESS); + opts = &_opts; memset(opts, 0, sizeof(*opts)); debugger = NULL; @@ -635,11 +636,13 @@ main(int argc, char *argv[]) opts->nrecords = 50000; while (argc > 0) { - if (strcmp(argv[0], "subtest") == 0) - return (subtest_main(argc, argv, false)); - else if (strcmp(argv[0], "subtest_close") == 0) - return (subtest_main(argc, argv, true)); - else if (strcmp(argv[0], "gdb") == 0) + if (strcmp(argv[0], "subtest") == 0) { + subtest_main(argc, argv, false); + return (0); + } else if (strcmp(argv[0], "subtest_close") == 0) { + subtest_main(argc, argv, true); + return (0); + } else if (strcmp(argv[0], "gdb") == 0) debugger = "/usr/bin/gdb"; else testutil_assert(false); diff --git a/src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c b/src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c index 09dce624066..2fae85017d4 100644 --- a/src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c @@ -52,8 +52,8 @@ main(int argc, char *argv[]) testutil_check(testutil_parse_opts(argc, argv, opts)); testutil_make_work_dir(opts->home); - snprintf(buf, sizeof(buf), - "create,extensions=(" WT_FAIL_FS_LIB "=(early_load=true))"); + testutil_check(__wt_snprintf(buf, sizeof(buf), + "create,extensions=(" WT_FAIL_FS_LIB "=(early_load=true))")); testutil_check(wiredtiger_open(opts->home, NULL, buf, &opts->conn)); testutil_check( opts->conn->open_session(opts->conn, NULL, NULL, &session)); diff --git a/src/third_party/wiredtiger/test/csuite/wt3184_dup_index_collator/main.c b/src/third_party/wiredtiger/test/csuite/wt3184_dup_index_collator/main.c index bcefd2f1a3b..c969e7a1d7e 100644 --- a/src/third_party/wiredtiger/test/csuite/wt3184_dup_index_collator/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt3184_dup_index_collator/main.c @@ -157,9 +157,9 @@ main(int argc, char *argv[]) printf("duplicating cursor\n"); testutil_check(session->open_cursor(session, NULL, cursor, NULL, &cursor1)); - cursor->get_value(cursor, &got); + testutil_check(cursor->get_value(cursor, &got)); testutil_assert(item_to_int(&got) == 17); - cursor1->get_value(cursor1, &got); + testutil_check(cursor1->get_value(cursor1, &got)); testutil_assert(item_to_int(&got) == 17); testutil_check(session->close(session, NULL)); diff --git a/src/third_party/wiredtiger/test/cursor_order/cursor_order.c b/src/third_party/wiredtiger/test/cursor_order/cursor_order.c index 85b8c68e545..d3c64b54ab5 100644 --- a/src/third_party/wiredtiger/test/cursor_order/cursor_order.c +++ b/src/third_party/wiredtiger/test/cursor_order/cursor_order.c @@ -29,7 +29,6 @@ #include "cursor_order.h" static char home[512]; /* Program working dir */ -static char *progname; /* Program name */ static FILE *logfp; /* Log file */ static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *); @@ -51,10 +50,7 @@ main(int argc, char *argv[]) int ch, cnt, runs; char *config_open, *working_dir; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); cfg = &_cfg; config_open = NULL; @@ -185,19 +181,15 @@ wt_connect(SHARED_CONFIG *cfg, char *config_open) }; int ret; char config[512]; - size_t print_count; testutil_clean_work_dir(home); testutil_make_work_dir(home); - print_count = (size_t)snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "create,statistics=(all),error_prefix=\"%s\",%s%s", progname, config_open == NULL ? "" : ",", - config_open == NULL ? "" : config_open); - - if (print_count >= sizeof(config)) - testutil_die(EINVAL, "Config string too long"); + config_open == NULL ? "" : config_open)); if ((ret = wiredtiger_open( home, &event_handler, config, &cfg->conn)) != 0) diff --git a/src/third_party/wiredtiger/test/cursor_order/cursor_order_file.c b/src/third_party/wiredtiger/test/cursor_order/cursor_order_file.c index 5dc7194b5fb..42d7af54de4 100644 --- a/src/third_party/wiredtiger/test/cursor_order/cursor_order_file.c +++ b/src/third_party/wiredtiger/test/cursor_order/cursor_order_file.c @@ -34,23 +34,21 @@ file_create(SHARED_CONFIG *cfg, const char *name) WT_CONNECTION *conn; WT_SESSION *session; int ret; - char *p, *end, config[128]; + char config[128]; conn = cfg->conn; if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) testutil_die(ret, "conn.session"); - p = config; - end = config + sizeof(config); - p += snprintf(p, (size_t)(end - p), + testutil_check(__wt_snprintf(config, sizeof(config), "key_format=%s," "internal_page_max=%d," "split_deepen_min_child=200," - "leaf_page_max=%d,", - cfg->ftype == ROW ? "S" : "r", 16 * 1024, 128 * 1024); - if (cfg->ftype == FIX) - (void)snprintf(p, (size_t)(end - p), ",value_format=3t"); + "leaf_page_max=%d," + "%s", + cfg->ftype == ROW ? "S" : "r", 16 * 1024, 128 * 1024, + cfg->ftype == FIX ? ",value_format=3t" : "")); if ((ret = session->create(session, name, config)) != 0) if (ret != EEXIST) @@ -67,9 +65,10 @@ load(SHARED_CONFIG *cfg, const char *name) WT_CURSOR *cursor; WT_ITEM *value, _value; WT_SESSION *session; - char keybuf[64], valuebuf[64]; - int64_t keyno; + size_t len; + uint64_t keyno; int ret; + char keybuf[64], valuebuf[64]; conn = cfg->conn; @@ -83,9 +82,10 @@ load(SHARED_CONFIG *cfg, const char *name) testutil_die(ret, "cursor.open"); value = &_value; - for (keyno = 1; keyno <= (int64_t)cfg->nkeys; ++keyno) { + for (keyno = 1; keyno <= cfg->nkeys; ++keyno) { if (cfg->ftype == ROW) { - snprintf(keybuf, sizeof(keybuf), "%016u", (u_int)keyno); + testutil_check(__wt_snprintf( + keybuf, sizeof(keybuf), "%016" PRIu64, keyno)); cursor->set_key(cursor, keybuf); } else cursor->set_key(cursor, (uint32_t)keyno); @@ -93,8 +93,10 @@ load(SHARED_CONFIG *cfg, const char *name) if (cfg->ftype == FIX) cursor->set_value(cursor, 0x01); else { - value->size = (uint32_t)snprintf( - valuebuf, sizeof(valuebuf), "%37u", (u_int)keyno); + testutil_check(__wt_snprintf_len_set( + valuebuf, sizeof(valuebuf), + &len, "%37" PRIu64, keyno)); + value->size = (uint32_t)len; cursor->set_value(cursor, value); } if ((ret = cursor->insert(cursor)) != 0) diff --git a/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c b/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c index 58da49b2991..299f22684c9 100644 --- a/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c +++ b/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c @@ -69,7 +69,8 @@ ops_start(SHARED_CONFIG *cfg) run_info[i].cfg = cfg; if (i == 0 || cfg->multiple_files) { run_info[i].name = dmalloc(64); - snprintf(run_info[i].name, 64, FNAME, (int)i); + testutil_check(__wt_snprintf( + run_info[i].name, 64, FNAME, (int)i)); /* Vary by orders of magnitude */ if (cfg->vary_nops) @@ -93,8 +94,8 @@ ops_start(SHARED_CONFIG *cfg) run_info[offset].name = dmalloc(64); /* Have reverse scans read from tables with writes. */ name_index = i % cfg->append_inserters; - snprintf( - run_info[offset].name, 64, FNAME, (int)name_index); + testutil_check(__wt_snprintf( + run_info[offset].name, 64, FNAME, (int)name_index)); /* Vary by orders of magnitude */ if (cfg->vary_nops) @@ -231,7 +232,7 @@ reverse_scan(void *arg) id = (uintmax_t)arg; s = &run_info[id]; cfg = s->cfg; - __wt_thread_id(tid, sizeof(tid)); + testutil_check(__wt_thread_id(tid, sizeof(tid))); __wt_random_init(&s->rnd); printf(" reverse scan thread %2" PRIuMAX @@ -272,6 +273,7 @@ append_insert_op( { WT_ITEM *value, _value; uint64_t keyno; + size_t len; int ret; char keybuf[64], valuebuf[64]; @@ -281,7 +283,8 @@ append_insert_op( keyno = __wt_atomic_add64(&cfg->key_range, 1); if (cfg->ftype == ROW) { - snprintf(keybuf, sizeof(keybuf), "%016u", (u_int)keyno); + testutil_check(__wt_snprintf( + keybuf, sizeof(keybuf), "%016" PRIu64, keyno)); cursor->set_key(cursor, keybuf); } else cursor->set_key(cursor, (uint32_t)keyno); @@ -291,8 +294,9 @@ append_insert_op( if (cfg->ftype == FIX) cursor->set_value(cursor, 0x10); else { - value->size = (uint32_t)snprintf( - valuebuf, sizeof(valuebuf), "XXX %37u", (u_int)keyno); + testutil_check(__wt_snprintf_len_set( + valuebuf, sizeof(valuebuf), &len, "XXX %37" PRIu64, keyno)); + value->size = (uint32_t)len; cursor->set_value(cursor, value); } if ((ret = cursor->insert(cursor)) != 0) @@ -318,7 +322,7 @@ append_insert(void *arg) id = (uintmax_t)arg; s = &run_info[id]; cfg = s->cfg; - __wt_thread_id(tid, sizeof(tid)); + testutil_check(__wt_thread_id(tid, sizeof(tid))); __wt_random_init(&s->rnd); printf("write thread %2" PRIuMAX " starting: tid: %s, file: %s\n", diff --git a/src/third_party/wiredtiger/test/fops/file.c b/src/third_party/wiredtiger/test/fops/file.c index ea15f1ee80d..d1cd22ab391 100644 --- a/src/third_party/wiredtiger/test/fops/file.c +++ b/src/third_party/wiredtiger/test/fops/file.c @@ -51,7 +51,7 @@ obj_bulk(void) if ((ret = c->close(c)) != 0) testutil_die(ret, "cursor.close"); } else if (ret != ENOENT && ret != EBUSY && ret != EINVAL) - testutil_die(ret, "session.open_cursor"); + testutil_die(ret, "session.open_cursor bulk"); } if ((ret = session->close(session, NULL)) != 0) testutil_die(ret, "session.close"); @@ -71,7 +71,8 @@ obj_bulk_unique(int force) /* Generate a unique object name. */ if ((ret = pthread_rwlock_wrlock(&single)) != 0) testutil_die(ret, "pthread_rwlock_wrlock single"); - (void)snprintf(new_uri, sizeof(new_uri), "%s.%u", uri, ++uid); + testutil_check(__wt_snprintf( + new_uri, sizeof(new_uri), "%s.%u", uri, ++uid)); if ((ret = pthread_rwlock_unlock(&single)) != 0) testutil_die(ret, "pthread_rwlock_unlock single"); @@ -79,12 +80,17 @@ obj_bulk_unique(int force) testutil_die(ret, "session.create: %s", new_uri); __wt_yield(); - if ((ret = - session->open_cursor(session, new_uri, NULL, "bulk", &c)) != 0) - testutil_die(ret, "session.open_cursor: %s", new_uri); - - if ((ret = c->close(c)) != 0) - testutil_die(ret, "cursor.close"); + /* + * Opening a bulk cursor may have raced with a forced checkpoint + * which created a checkpoint of the empty file, and triggers an EINVAL + */ + if ((ret = session->open_cursor( + session, new_uri, NULL, "bulk", &c)) == 0) { + if ((ret = c->close(c)) != 0) + testutil_die(ret, "cursor.close"); + } else if (ret != EINVAL) + testutil_die(ret, + "session.open_cursor bulk unique: %s, new_uri"); while ((ret = session->drop( session, new_uri, force ? "force" : NULL)) != 0) @@ -147,7 +153,8 @@ obj_create_unique(int force) /* Generate a unique object name. */ if ((ret = pthread_rwlock_wrlock(&single)) != 0) testutil_die(ret, "pthread_rwlock_wrlock single"); - (void)snprintf(new_uri, sizeof(new_uri), "%s.%u", uri, ++uid); + testutil_check(__wt_snprintf( + new_uri, sizeof(new_uri), "%s.%u", uri, ++uid)); if ((ret = pthread_rwlock_unlock(&single)) != 0) testutil_die(ret, "pthread_rwlock_unlock single"); @@ -190,9 +197,13 @@ obj_checkpoint(void) if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) testutil_die(ret, "conn.session"); - /* Force the checkpoint so it has to be taken. */ + /* + * Force the checkpoint so it has to be taken. Forced checkpoints can + * race with other metadata operations and return EBUSY - we'd expect + * applications using forced checkpoints to retry on EBUSY. + */ if ((ret = session->checkpoint(session, "force")) != 0) - if (ret != ENOENT) + if (ret != EBUSY && ret != ENOENT) testutil_die(ret, "session.checkpoint"); if ((ret = session->close(session, NULL)) != 0) diff --git a/src/third_party/wiredtiger/test/fops/t.c b/src/third_party/wiredtiger/test/fops/t.c index 7b4a7cf8fca..07ac07349e3 100644 --- a/src/third_party/wiredtiger/test/fops/t.c +++ b/src/third_party/wiredtiger/test/fops/t.c @@ -34,7 +34,6 @@ u_int nops; /* Operations */ const char *uri; /* Object */ const char *config; /* Object config */ -static char *progname; /* Program name */ static FILE *logfp; /* Log file */ static char home[512]; @@ -71,22 +70,15 @@ main(int argc, char *argv[]) int ch, cnt, ret, runs; char *config_open, *working_dir; - working_dir = NULL; - - /* Remove directories */ - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); if ((ret = pthread_rwlock_init(&single, NULL)) != 0) testutil_die(ret, "pthread_rwlock_init: single"); - config_open = NULL; nops = 1000; nthreads = 10; runs = 1; - + config_open = working_dir = NULL; while ((ch = __wt_getopt(progname, argc, argv, "C:h:l:n:r:t:")) != EOF) switch (ch) { case 'C': /* wiredtiger_open config */ @@ -165,11 +157,11 @@ wt_startup(char *config_open) testutil_make_work_dir(home); - snprintf(config_buf, sizeof(config_buf), + testutil_check(__wt_snprintf(config_buf, sizeof(config_buf), "create,error_prefix=\"%s\",cache_size=5MB%s%s", progname, config_open == NULL ? "" : ",", - config_open == NULL ? "" : config_open); + config_open == NULL ? "" : config_open)); if ((ret = wiredtiger_open( home, &event_handler, config_buf, &conn)) != 0) testutil_die(ret, "wiredtiger_open"); @@ -225,6 +217,11 @@ handle_message(WT_EVENT_HANDLER *handler, (void)(handler); (void)(session); + /* Ignore messages about failing to create forced checkpoints. */ + if (strstr( + message, "forced or named checkpoint") != NULL) + return (0); + if (logfp != NULL) return (fprintf(logfp, "%s\n", message) < 0 ? -1 : 0); diff --git a/src/third_party/wiredtiger/test/format/backup.c b/src/third_party/wiredtiger/test/format/backup.c index 69fdf771de9..8aa614fa970 100644 --- a/src/third_party/wiredtiger/test/format/backup.c +++ b/src/third_party/wiredtiger/test/format/backup.c @@ -63,7 +63,7 @@ copy_file(WT_SESSION *session, const char *name) len = strlen("BACKUP") + strlen(name) + 10; first = dmalloc(len); - (void)snprintf(first, len, "BACKUP/%s", name); + testutil_check(__wt_snprintf(first, len, "BACKUP/%s", name)); testutil_check(__wt_copy_and_sync(session, name, first)); /* @@ -72,7 +72,7 @@ copy_file(WT_SESSION *session, const char *name) */ len = strlen("BACKUP_COPY") + strlen(name) + 10; second = dmalloc(len); - (void)snprintf(second, len, "BACKUP_COPY/%s", name); + testutil_check(__wt_snprintf(second, len, "BACKUP_COPY/%s", name)); testutil_check(__wt_copy_and_sync(session, first, second)); free(first); diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c index 50430fe073e..22b40f7164d 100644 --- a/src/third_party/wiredtiger/test/format/config.c +++ b/src/third_party/wiredtiger/test/format/config.c @@ -63,39 +63,42 @@ config_setup(void) config_in_memory(); /* - * Choose a data source type and a file type: they're interrelated (LSM - * trees are only compatible with row-store) and other items depend on - * them. + * Choose a file format and a data source: they're interrelated (LSM is + * only compatible with row-store) and other items depend on them. */ + if (!config_is_perm("file_type")) { + if (config_is_perm("data_source") && DATASOURCE("lsm")) + config_single("file_type=row", 0); + else + switch (mmrand(NULL, 1, 10)) { + case 1: /* 10% */ + config_single("file_type=fix", 0); + break; + case 2: case 3: case 4: /* 30% */ + config_single("file_type=var", 0); + break; /* 60% */ + case 5: case 6: case 7: case 8: case 9: case 10: + config_single("file_type=row", 0); + break; + } + } + config_map_file_type(g.c_file_type, &g.type); + if (!config_is_perm("data_source")) switch (mmrand(NULL, 1, 3)) { case 1: config_single("data_source=file", 0); break; case 2: - if (!g.c_in_memory) { - config_single("data_source=lsm", 0); - break; - } - /* FALLTHROUGH */ - case 3: config_single("data_source=table", 0); break; - } - - if (!config_is_perm("file_type")) - switch (DATASOURCE("lsm") ? 5 : mmrand(NULL, 1, 10)) { - case 1: - config_single("file_type=fix", 0); - break; - case 2: case 3: case 4: - config_single("file_type=var", 0); - break; - case 5: case 6: case 7: case 8: case 9: case 10: - config_single("file_type=row", 0); + case 3: + if (g.c_in_memory || g.type != ROW) + config_single("data_source=table", 0); + else + config_single("data_source=lsm", 0); break; } - config_map_file_type(g.c_file_type, &g.type); /* * If data_source and file_type were both "permanent", we may still @@ -104,7 +107,7 @@ config_setup(void) if (DATASOURCE("lsm") && g.type != ROW) { fprintf(stderr, "%s: lsm data_source is only compatible with row file_type\n", - g.progname); + progname); exit(EXIT_FAILURE); } @@ -254,8 +257,8 @@ config_compression(const char *conf_name) */ cstr = "none"; if (strcmp(conf_name, "logging_compression") == 0 && g.c_logging == 0) { - (void)snprintf( - confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr); + testutil_check(__wt_snprintf( + confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr)); config_single(confbuf, 0); return; } @@ -299,7 +302,8 @@ config_compression(const char *conf_name) break; } - (void)snprintf(confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr); + testutil_check(__wt_snprintf( + confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr)); config_single(confbuf, 0); } @@ -462,7 +466,7 @@ config_lrt(void) * stores. */ if (g.type == FIX) { - if (config_is_perm("long_running_txn")) + if (config_is_perm("long_running_txn") && g.c_long_running_txn) testutil_die(EINVAL, "long_running_txn not supported with fixed-length " "column store"); @@ -675,13 +679,14 @@ void config_single(const char *s, int perm) { CONFIG *cp; - long v; + long vlong; + uint32_t v; char *p; const char *ep; if ((ep = strchr(s, '=')) == NULL) { fprintf(stderr, - "%s: %s: illegal configuration value\n", g.progname, s); + "%s: %s: illegal configuration value\n", progname, s); exit(EXIT_FAILURE); } @@ -740,34 +745,35 @@ config_single(const char *s, int perm) return; } - v = -1; + vlong = -1; if (F_ISSET(cp, C_BOOL)) { if (strncmp(ep, "off", strlen("off")) == 0) - v = 0; + vlong = 0; else if (strncmp(ep, "on", strlen("on")) == 0) - v = 1; + vlong = 1; } - if (v == -1) { - v = strtol(ep, &p, 10); + if (vlong == -1) { + vlong = strtol(ep, &p, 10); if (*p != '\0') { fprintf(stderr, "%s: %s: illegal numeric value\n", - g.progname, s); + progname, s); exit(EXIT_FAILURE); } } + v = (uint32_t)vlong; if (F_ISSET(cp, C_BOOL)) { if (v != 0 && v != 1) { fprintf(stderr, "%s: %s: value of boolean not 0 or 1\n", - g.progname, s); + progname, s); exit(EXIT_FAILURE); } } else if (v < cp->min || v > cp->maxset) { fprintf(stderr, "%s: %s: value outside min/max values of %" PRIu32 "-%" PRIu32 "\n", - g.progname, s, cp->min, cp->maxset); + progname, s, cp->min, cp->maxset); exit(EXIT_FAILURE); } - *cp->v = (uint32_t)v; + *cp->v = v; } /* @@ -883,7 +889,7 @@ config_find(const char *s, size_t len) return (cp); fprintf(stderr, - "%s: %s: unknown configuration keyword\n", g.progname, s); + "%s: %s: unknown configuration keyword\n", progname, s); config_error(); exit(EXIT_FAILURE); } diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h index e3e1e73a786..b5feb7a5321 100644 --- a/src/third_party/wiredtiger/test/format/config.h +++ b/src/third_party/wiredtiger/test/format/config.h @@ -284,7 +284,7 @@ static CONFIG c[] = { { "split_pct", "page split size as a percentage of the maximum page size", - 0x0, 40, 85, 85, &g.c_split_pct, NULL }, + 0x0, 50, 100, 100, &g.c_split_pct, NULL }, { "statistics", "maintain statistics", /* 20% */ diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h index 6bb44410acc..41cc48c4278 100644 --- a/src/third_party/wiredtiger/test/format/format.h +++ b/src/third_party/wiredtiger/test/format/format.h @@ -79,8 +79,6 @@ #define FORMAT_OPERATION_REPS 3 /* 3 thread operations sets */ typedef struct { - char *progname; /* Program name */ - char *home; /* Home directory */ char *home_backup; /* Hot-backup directory */ char *home_backup_init; /* Initialize backup command */ diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c index 940318c87a9..72e885bd0d6 100644 --- a/src/third_party/wiredtiger/test/format/ops.c +++ b/src/third_party/wiredtiger/test/format/ops.c @@ -28,14 +28,17 @@ #include "format.h" -static int col_insert(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *); -static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t); -static int col_update(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); +static int col_insert(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *); +static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t, bool); +static int col_update( + TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); static int nextprev(WT_CURSOR *, int); static void *ops(void *); -static int row_insert(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); -static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t); -static int row_update(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); +static int row_insert( + TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); +static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t, bool); +static int row_update( + TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); static void table_append_init(void); #ifdef HAVE_BERKELEY_DB @@ -243,6 +246,9 @@ typedef struct { bool insert; /* Insert operation */ } SNAP_OPS; +#define SNAP_TRACK \ + (snap != NULL && (size_t)(snap - snap_list) < WT_ELEMENTS(snap_list)) + /* * snap_track -- * Add a single snapshot isolation returned value to the list. @@ -395,15 +401,16 @@ snap_check(WT_CURSOR *cursor, static void * ops(void *arg) { + enum { INSERT, READ, REMOVE, UPDATE } op; SNAP_OPS *snap, snap_list[64]; TINFO *tinfo; WT_CONNECTION *conn; - WT_CURSOR *cursor, *cursor_insert; + WT_CURSOR *cursor; WT_DECL_RET; WT_ITEM *key, _key, *value, _value; WT_SESSION *session; uint64_t keyno, ckpt_op, reset_op, session_op; - uint32_t op, rnd; + uint32_t rnd; u_int i; int dir; char *ckpt_config, ckpt_name[64]; @@ -429,9 +436,9 @@ ops(void *arg) val_gen_setup(&tinfo->rnd, value); /* Set the first operation where we'll create sessions and cursors. */ - session_op = 0; + cursor = NULL; session = NULL; - cursor = cursor_insert = NULL; + session_op = 0; /* Set the first operation where we'll perform checkpoint operations. */ ckpt_op = g.c_checkpoints ? mmrand(&tinfo->rnd, 100, 10000) : 0; @@ -485,24 +492,11 @@ ops(void *arg) readonly = true; } else { /* - * Open two cursors: one for overwriting and one - * for append (if it's a column-store). - * - * The reason is when testing with existing - * records, we don't track if a record was - * deleted or not, which means we must use - * cursor->insert with overwriting configured. - * But, in column-store files where we're - * testing with new, appended records, we don't - * want to have to specify the record number, - * which requires an append configuration. + * Configure "append", in the case of column + * stores, we append when inserting new rows. */ - testutil_check(session->open_cursor(session, - g.uri, NULL, "overwrite", &cursor)); - if (g.type == FIX || g.type == VAR) - testutil_check(session->open_cursor( - session, g.uri, - NULL, "append", &cursor_insert)); + testutil_check(session->open_cursor( + session, g.uri, NULL, "append", &cursor)); /* Pick the next session/cursor close/open. */ session_op += mmrand(&tinfo->rnd, 100, 5000); @@ -536,8 +530,9 @@ ops(void *arg) pthread_rwlock_trywrlock(&g.backup_lock) == EBUSY) ckpt_config = NULL; else { - (void)snprintf(ckpt_name, sizeof(ckpt_name), - "name=thread-%d", tinfo->id); + testutil_check(__wt_snprintf( + ckpt_name, sizeof(ckpt_name), + "name=thread-%d", tinfo->id)); ckpt_config = ckpt_name; } @@ -563,8 +558,9 @@ ops(void *arg) strcpy(ckpt_name, "checkpoint=WiredTigerCheckpoint"); else - (void)snprintf(ckpt_name, sizeof(ckpt_name), - "checkpoint=thread-%d", tinfo->id); + testutil_check(__wt_snprintf( + ckpt_name, sizeof(ckpt_name), + "checkpoint=thread-%d", tinfo->id)); ckpt_available = true; skip_checkpoint: /* Pick the next checkpoint operation. */ @@ -600,111 +596,174 @@ skip_checkpoint: /* Pick the next checkpoint operation. */ intxn = true; } + /* Select a row. */ keyno = mmrand(&tinfo->rnd, 1, (u_int)g.rows); positioned = false; + /* Select an operation. */ + op = READ; + if (!readonly) { + i = mmrand(&tinfo->rnd, 1, 100); + if (i < g.c_delete_pct) + op = REMOVE; + else if (i < g.c_delete_pct + g.c_insert_pct) + op = INSERT; + else if (i < + g.c_delete_pct + g.c_insert_pct + g.c_write_pct) + op = UPDATE; + else + op = READ; + } + /* - * Perform some number of operations: the percentage of deletes, - * inserts and writes are specified, reads are the rest. The - * percentages don't have to add up to 100, a high percentage - * of deletes will mean fewer inserts and writes. Modifications - * are always followed by a read to confirm it worked. + * Inserts, removes and updates can be done following a cursor + * set-key, or based on a cursor position taken from a previous + * search. If not already doing a read, position the cursor at + * an existing point in the tree 20% of the time. */ - op = readonly ? UINT32_MAX : mmrand(&tinfo->rnd, 1, 100); - if (op < g.c_delete_pct) { - ++tinfo->remove; + positioned = false; + if (op != READ && mmrand(&tinfo->rnd, 1, 5) == 1) { + ++tinfo->search; + ret = read_row(cursor, key, value, keyno); + if (ret == 0) { + positioned = true; + if (SNAP_TRACK) + snap_track(snap++, keyno, NULL, value); + } else { + positioned = false; + if (ret == WT_ROLLBACK && intxn) + goto deadlock; + testutil_assert(ret == WT_NOTFOUND); + } + } +#if 0 + /* Optionally reserve a row. */ + if (!readonly && intxn && mmrand(&tinfo->rnd, 0, 20) == 1) { switch (g.type) { case ROW: - ret = row_remove(cursor, key, keyno); + ret = + row_reserve(cursor, key, keyno, positioned); break; case FIX: case VAR: - ret = col_remove(cursor, key, keyno); + ret = col_reserve(cursor, keyno, positioned); break; } if (ret == 0) { positioned = true; - if (snap != NULL && (size_t) - (snap - snap_list) < WT_ELEMENTS(snap_list)) - snap_track(snap++, keyno, NULL, NULL); + __wt_yield(); } else { positioned = false; if (ret == WT_ROLLBACK && intxn) goto deadlock; + testutil_assert(ret == WT_NOTFOUND); } - } else if (op < g.c_delete_pct + g.c_insert_pct) { - ++tinfo->insert; + } +#endif + /* Perform the operation. */ + switch (op) { + case INSERT: switch (g.type) { case ROW: - key_gen_insert(&tinfo->rnd, key, keyno); - val_gen(&tinfo->rnd, value, keyno); - ret = row_insert(cursor, key, value, keyno); + ret = row_insert(tinfo, + cursor, key, value, keyno, positioned); break; case FIX: case VAR: /* - * We can only append so many new records, if - * we've reached that limit, update a record - * instead of doing an insert. + * We can only append so many new records, once + * we reach that limit, update a record instead + * of inserting. */ if (g.append_cnt >= g.append_max) - goto skip_insert; + goto update_instead_of_insert; - /* Insert, then reset the insert cursor. */ - val_gen(&tinfo->rnd, value, g.rows + 1); ret = col_insert( - cursor_insert, key, value, &keyno); - testutil_check( - cursor_insert->reset(cursor_insert)); + tinfo, cursor, key, value, &keyno); break; } + + /* Insert never leaves the cursor positioned. */ positioned = false; if (ret == 0) { - if (snap != NULL && (size_t) - (snap - snap_list) < WT_ELEMENTS(snap_list)) + ++tinfo->insert; + if (SNAP_TRACK) snap_track(snap++, keyno, g.type == ROW ? key : NULL, value); - } else + } else { if (ret == WT_ROLLBACK && intxn) goto deadlock; - } else if ( - op < g.c_delete_pct + g.c_insert_pct + g.c_write_pct) { - ++tinfo->update; + testutil_assert(ret == 0 || ret == WT_ROLLBACK); + } + break; + case READ: + ++tinfo->search; + ret = read_row(cursor, key, value, keyno); + if (ret == 0) { + positioned = true; + if (SNAP_TRACK) + snap_track(snap++, keyno, NULL, value); + } else { + positioned = false; + if (ret == WT_ROLLBACK && intxn) + goto deadlock; + testutil_assert(ret == WT_NOTFOUND); + } + break; + case REMOVE: switch (g.type) { case ROW: - key_gen(key, keyno); - val_gen(&tinfo->rnd, value, keyno); - ret = row_update(cursor, key, value, keyno); + ret = + row_remove(cursor, key, keyno, positioned); break; case FIX: case VAR: -skip_insert: val_gen(&tinfo->rnd, value, keyno); - ret = col_update(cursor, key, value, keyno); + ret = + col_remove(cursor, key, keyno, positioned); break; } if (ret == 0) { - positioned = true; - if (snap != NULL && (size_t) - (snap - snap_list) < WT_ELEMENTS(snap_list)) - snap_track(snap++, keyno, NULL, value); + ++tinfo->remove; + /* + * Don't set positioned: it's unchanged from the + * previous state, but not necessarily set. + */ + if (SNAP_TRACK) + snap_track(snap++, keyno, NULL, NULL); } else { positioned = false; if (ret == WT_ROLLBACK && intxn) goto deadlock; + testutil_assert(ret == WT_NOTFOUND); + } + break; + case UPDATE: +update_instead_of_insert: + ++tinfo->update; + + /* Update the row. */ + switch (g.type) { + case ROW: + ret = row_update(tinfo, + cursor, key, value, keyno, positioned); + break; + case FIX: + case VAR: + ret = col_update(tinfo, + cursor, key, value, keyno, positioned); + break; } - } else { - ++tinfo->search; - ret = read_row(cursor, key, value, keyno); if (ret == 0) { positioned = true; - if (snap != NULL && (size_t) - (snap - snap_list) < WT_ELEMENTS(snap_list)) + if (SNAP_TRACK) snap_track(snap++, keyno, NULL, value); } else { positioned = false; if (ret == WT_ROLLBACK && intxn) goto deadlock; + testutil_assert(ret == 0 || ret == WT_ROLLBACK); } + break; } /* @@ -727,8 +786,8 @@ skip_insert: val_gen(&tinfo->rnd, value, keyno); testutil_check(cursor->reset(cursor)); /* - * If we're in a transaction, commit 40% of the time and - * rollback 10% of the time. + * Continue if not in a transaction, else add more operations + * to the transaction half the time. */ if (!intxn || (rnd = mmrand(&tinfo->rnd, 1, 10)) > 5) continue; @@ -741,6 +800,10 @@ skip_insert: val_gen(&tinfo->rnd, value, keyno); cursor, snap_list, snap, key, value)) == WT_ROLLBACK) goto deadlock; + /* + * If we're in a transaction, commit 40% of the time and + * rollback 10% of the time. + */ switch (rnd) { case 1: case 2: case 3: case 4: /* 40% */ testutil_check( @@ -1040,27 +1103,94 @@ nextprev(WT_CURSOR *cursor, int next) return (ret); } +#if 0 +/* + * row_reserve -- + * Reserve a row in a row-store file. + */ +static int +row_reserve(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned) +{ + WT_DECL_RET; + + if (!positioned) { + key_gen(key, keyno); + cursor->set_key(cursor, key); + } + + if (g.logging == LOG_OPS) + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, + "%-10s{%.*s}", "reserve", (int)key->size, key->data); + + switch (ret = cursor->reserve(cursor)) { + case 0: + break; + case WT_CACHE_FULL: + case WT_ROLLBACK: + return (WT_ROLLBACK); + case WT_NOTFOUND: + return (WT_NOTFOUND); + default: + testutil_die(ret, + "row_reserve: reserve row %" PRIu64 " by key", keyno); + } + return (0); +} + +/* + * col_reserve -- + * Reserve a row in a column-store file. + */ +static int +col_reserve(WT_CURSOR *cursor, uint64_t keyno, bool positioned) +{ + WT_DECL_RET; + + if (!positioned) + cursor->set_key(cursor, keyno); + + if (g.logging == LOG_OPS) + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, + "%-10s%" PRIu64, "reserve", keyno); + + switch (ret = cursor->reserve(cursor)) { + case 0: + break; + case WT_CACHE_FULL: + case WT_ROLLBACK: + return (WT_ROLLBACK); + case WT_NOTFOUND: + return (WT_NOTFOUND); + default: + testutil_die(ret, "col_reserve: %" PRIu64, keyno); + } + return (0); +} +#endif + /* * row_update -- * Update a row in a row-store file. */ static int -row_update(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) +row_update(TINFO *tinfo, WT_CURSOR *cursor, + WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned) { WT_DECL_RET; - WT_SESSION *session; - session = cursor->session; + if (!positioned) { + key_gen(key, keyno); + cursor->set_key(cursor, key); + } + val_gen(&tinfo->rnd, value, keyno); + cursor->set_value(cursor, value); - /* Log the operation */ if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, session, + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s{%.*s}, {%.*s}", "put", (int)key->size, key->data, (int)value->size, value->data); - cursor->set_key(cursor, key); - cursor->set_value(cursor, value); switch (ret = cursor->update(cursor)) { case 0: break; @@ -1086,32 +1216,32 @@ row_update(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) * Update a row in a column-store file. */ static int -col_update(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) +col_update(TINFO *tinfo, WT_CURSOR *cursor, + WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned) { WT_DECL_RET; - WT_SESSION *session; - session = cursor->session; + if (!positioned) + cursor->set_key(cursor, keyno); + val_gen(&tinfo->rnd, value, keyno); + if (g.type == FIX) + cursor->set_value(cursor, *(uint8_t *)value->data); + else + cursor->set_value(cursor, value); - /* Log the operation */ if (g.logging == LOG_OPS) { if (g.type == FIX) - (void)g.wt_api->msg_printf(g.wt_api, session, + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s%" PRIu64 " {0x%02" PRIx8 "}", "update", keyno, ((uint8_t *)value->data)[0]); else - (void)g.wt_api->msg_printf(g.wt_api, session, + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s%" PRIu64 " {%.*s}", "update", keyno, (int)value->size, (char *)value->data); } - cursor->set_key(cursor, keyno); - if (g.type == FIX) - cursor->set_value(cursor, *(uint8_t *)value->data); - else - cursor->set_value(cursor, value); switch (ret = cursor->update(cursor)) { case 0: break; @@ -1238,22 +1368,29 @@ table_append(uint64_t keyno) * Insert a row in a row-store file. */ static int -row_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) +row_insert(TINFO *tinfo, WT_CURSOR *cursor, + WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned) { WT_DECL_RET; - WT_SESSION *session; - session = cursor->session; + /* + * If we positioned the cursor already, it's a test of an update using + * the insert method. Otherwise, generate a unique key and insert. + */ + if (!positioned) { + key_gen_insert(&tinfo->rnd, key, keyno); + cursor->set_key(cursor, key); + } + val_gen(&tinfo->rnd, value, keyno); + cursor->set_value(cursor, value); /* Log the operation */ if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, session, + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s{%.*s}, {%.*s}", "insert", (int)key->size, key->data, (int)value->size, value->data); - cursor->set_key(cursor, key); - cursor->set_value(cursor, value); switch (ret = cursor->insert(cursor)) { case 0: break; @@ -1279,14 +1416,13 @@ row_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) * Insert an element in a column-store file. */ static int -col_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) +col_insert(TINFO *tinfo, + WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) { WT_DECL_RET; - WT_SESSION *session; uint64_t keyno; - session = cursor->session; - + val_gen(&tinfo->rnd, value, g.rows + 1); if (g.type == FIX) cursor->set_value(cursor, *(uint8_t *)value->data); else @@ -1307,12 +1443,12 @@ col_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) if (g.logging == LOG_OPS) { if (g.type == FIX) - (void)g.wt_api->msg_printf(g.wt_api, session, + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s%" PRIu64 " {0x%02" PRIx8 "}", "insert", keyno, ((uint8_t *)value->data)[0]); else - (void)g.wt_api->msg_printf(g.wt_api, session, + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s%" PRIu64 " {%.*s}", "insert", keyno, (int)value->size, (char *)value->data); @@ -1335,21 +1471,19 @@ col_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) * Remove an row from a row-store file. */ static int -row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno) +row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned) { WT_DECL_RET; - WT_SESSION *session; - session = cursor->session; - - key_gen(key, keyno); + if (!positioned) { + key_gen(key, keyno); + cursor->set_key(cursor, key); + } - /* Log the operation */ if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf( - g.wt_api, session, "%-10s%" PRIu64, "remove", keyno); + (void)g.wt_api->msg_printf(g.wt_api, + cursor->session, "%-10s%" PRIu64, "remove", keyno); - cursor->set_key(cursor, key); /* We use the cursor in overwrite mode, check for existence. */ if ((ret = cursor->search(cursor)) == 0) ret = cursor->remove(cursor); @@ -1385,19 +1519,17 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno) * Remove a row from a column-store file. */ static int -col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno) +col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned) { WT_DECL_RET; - WT_SESSION *session; - session = cursor->session; + if (!positioned) + cursor->set_key(cursor, keyno); - /* Log the operation */ if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf( - g.wt_api, session, "%-10s%" PRIu64, "remove", keyno); + (void)g.wt_api->msg_printf(g.wt_api, + cursor->session, "%-10s%" PRIu64, "remove", keyno); - cursor->set_key(cursor, keyno); /* We use the cursor in overwrite mode, check for existence. */ if ((ret = cursor->search(cursor)) == 0) ret = cursor->remove(cursor); @@ -1448,7 +1580,7 @@ notfound_chk(const char *f, int wt_ret, int bdb_notfound, uint64_t keyno) return (1); if (bdb_notfound) { - fprintf(stderr, "%s: %s:", g.progname, f); + fprintf(stderr, "%s: %s:", progname, f); if (keyno != 0) fprintf(stderr, " row %" PRIu64 ":", keyno); fprintf(stderr, @@ -1456,7 +1588,7 @@ notfound_chk(const char *f, int wt_ret, int bdb_notfound, uint64_t keyno) testutil_die(0, NULL); } if (wt_ret == WT_NOTFOUND) { - fprintf(stderr, "%s: %s:", g.progname, f); + fprintf(stderr, "%s: %s:", progname, f); if (keyno != 0) fprintf(stderr, " row %" PRIu64 ":", keyno); fprintf(stderr, diff --git a/src/third_party/wiredtiger/test/format/rebalance.c b/src/third_party/wiredtiger/test/format/rebalance.c index 9849b7df82b..e35c62e7255 100644 --- a/src/third_party/wiredtiger/test/format/rebalance.c +++ b/src/third_party/wiredtiger/test/format/rebalance.c @@ -41,10 +41,10 @@ wts_rebalance(void) track("rebalance", 0ULL, NULL); /* Dump the current object. */ - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), ".." DIR_DELIM_STR ".." DIR_DELIM_STR "wt" " -h %s dump -f %s/rebalance.orig %s", - g.home, g.home, g.uri); + g.home, g.home, g.uri)); testutil_checkfmt(system(cmd), "command failed: %s", cmd); /* Rebalance, then verify the object. */ @@ -66,21 +66,21 @@ wts_rebalance(void) wts_verify("post-rebalance verify"); wts_close(); - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), ".." DIR_DELIM_STR ".." DIR_DELIM_STR "wt" " -h %s dump -f %s/rebalance.new %s", - g.home, g.home, g.uri); + g.home, g.home, g.uri)); testutil_checkfmt(system(cmd), "command failed: %s", cmd); /* Compare the old/new versions of the object. */ #ifdef _WIN32 - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), "fc /b %s\\rebalance.orig %s\\rebalance.new > NUL", - g.home, g.home); + g.home, g.home)); #else - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), "cmp %s/rebalance.orig %s/rebalance.new > /dev/null", - g.home, g.home); + g.home, g.home)); #endif testutil_checkfmt(system(cmd), "command failed: %s", cmd); } diff --git a/src/third_party/wiredtiger/test/format/salvage.c b/src/third_party/wiredtiger/test/format/salvage.c index 69805fb1018..f82dc34dd5f 100644 --- a/src/third_party/wiredtiger/test/format/salvage.c +++ b/src/third_party/wiredtiger/test/format/salvage.c @@ -70,29 +70,31 @@ corrupt(void) * It's a little tricky: if the data source is a file, we're looking * for "wt", if the data source is a table, we're looking for "wt.wt". */ - (void)snprintf(buf, sizeof(buf), "%s/%s", g.home, WT_NAME); + testutil_check(__wt_snprintf( + buf, sizeof(buf), "%s/%s", g.home, WT_NAME)); if ((fd = open(buf, O_RDWR)) != -1) { #ifdef _WIN32 - (void)snprintf(copycmd, sizeof(copycmd), + testutil_check(__wt_snprintf(copycmd, sizeof(copycmd), "copy %s\\%s %s\\slvg.copy\\%s.corrupted", - g.home, WT_NAME, g.home, WT_NAME); + g.home, WT_NAME, g.home, WT_NAME)); #else - (void)snprintf(copycmd, sizeof(copycmd), + testutil_check(__wt_snprintf(copycmd, sizeof(copycmd), "cp %s/%s %s/slvg.copy/%s.corrupted", - g.home, WT_NAME, g.home, WT_NAME); + g.home, WT_NAME, g.home, WT_NAME)); #endif goto found; } - (void)snprintf(buf, sizeof(buf), "%s/%s.wt", g.home, WT_NAME); + testutil_check(__wt_snprintf( + buf, sizeof(buf), "%s/%s.wt", g.home, WT_NAME)); if ((fd = open(buf, O_RDWR)) != -1) { #ifdef _WIN32 - (void)snprintf(copycmd, sizeof(copycmd), + testutil_check(__wt_snprintf(copycmd, sizeof(copycmd), "copy %s\\%s.wt %s\\slvg.copy\\%s.wt.corrupted", - g.home, WT_NAME, g.home, WT_NAME); + g.home, WT_NAME, g.home, WT_NAME)); #else - (void)snprintf(copycmd, sizeof(copycmd), + testutil_check(__wt_snprintf(copycmd, sizeof(copycmd), "cp %s/%s.wt %s/slvg.copy/%s.wt.corrupted", - g.home, WT_NAME, g.home, WT_NAME); + g.home, WT_NAME, g.home, WT_NAME)); #endif goto found; } @@ -103,7 +105,8 @@ found: if (fstat(fd, &sb) == -1) offset = mmrand(NULL, 0, (u_int)sb.st_size); len = (size_t)(20 + (sb.st_size / 100) * 2); - (void)snprintf(buf, sizeof(buf), "%s/slvg.corrupt", g.home); + testutil_check(__wt_snprintf( + buf, sizeof(buf), "%s/slvg.corrupt", g.home)); if ((fp = fopen(buf, "w")) == NULL) testutil_die(errno, "salvage-corrupt: open: %s", buf); (void)fprintf(fp, diff --git a/src/third_party/wiredtiger/test/format/t.c b/src/third_party/wiredtiger/test/format/t.c index 7701595776c..c6686ae8b91 100644 --- a/src/third_party/wiredtiger/test/format/t.c +++ b/src/third_party/wiredtiger/test/format/t.c @@ -49,14 +49,7 @@ main(int argc, char *argv[]) config = NULL; -#ifdef _WIN32 - g.progname = "t_format.exe"; -#else - if ((g.progname = strrchr(argv[0], DIR_DELIM)) == NULL) - g.progname = argv[0]; - else - ++g.progname; -#endif + (void)testutil_set_progname(argv); #if 0 /* Configure the GNU malloc for debugging. */ @@ -74,7 +67,7 @@ main(int argc, char *argv[]) home = NULL; onerun = 0; while ((ch = __wt_getopt( - g.progname, argc, argv, "1C:c:H:h:Llqrt:")) != EOF) + progname, argc, argv, "1C:c:H:h:Llqrt:")) != EOF) switch (ch) { case '1': /* One run */ onerun = 1; @@ -179,7 +172,7 @@ main(int argc, char *argv[]) testutil_check(pthread_rwlock_init(&g.checkpoint_lock, NULL)); testutil_check(pthread_rwlock_init(&g.death_lock, NULL)); - printf("%s: process %" PRIdMAX "\n", g.progname, (intmax_t)getpid()); + printf("%s: process %" PRIdMAX "\n", progname, (intmax_t)getpid()); while (++g.run_cnt <= g.c_runs || g.c_runs == 0 ) { startup(); /* Start a run */ @@ -344,7 +337,7 @@ usage(void) "usage: %s [-1Llqr] [-C wiredtiger-config]\n " "[-c config-file] [-H mount] [-h home] " "[name=value ...]\n", - g.progname); + progname); fprintf(stderr, "%s", "\t-1 run once\n" "\t-C specify wiredtiger_open configuration arguments\n" diff --git a/src/third_party/wiredtiger/test/format/util.c b/src/third_party/wiredtiger/test/format/util.c index b9788f1ac75..983d03e2525 100644 --- a/src/third_party/wiredtiger/test/format/util.c +++ b/src/third_party/wiredtiger/test/format/util.c @@ -241,20 +241,23 @@ val_gen(WT_RAND_STATE *rnd, WT_ITEM *value, uint64_t keyno) void track(const char *tag, uint64_t cnt, TINFO *tinfo) { - static int lastlen = 0; - int len; + static size_t lastlen = 0; + size_t len; char msg[128]; if (g.c_quiet || tag == NULL) return; if (tinfo == NULL && cnt == 0) - len = snprintf(msg, sizeof(msg), "%4d: %s", g.run_cnt, tag); + testutil_check(__wt_snprintf_len_set( + msg, sizeof(msg), &len, "%4d: %s", g.run_cnt, tag)); else if (tinfo == NULL) - len = snprintf( - msg, sizeof(msg), "%4d: %s: %" PRIu64, g.run_cnt, tag, cnt); + testutil_check(__wt_snprintf_len_set( + msg, sizeof(msg), &len, + "%4d: %s: %" PRIu64, g.run_cnt, tag, cnt)); else - len = snprintf(msg, sizeof(msg), + testutil_check(__wt_snprintf_len_set( + msg, sizeof(msg), &len, "%4d: %s: " "search %" PRIu64 "%s, " "insert %" PRIu64 "%s, " @@ -268,7 +271,7 @@ track(const char *tag, uint64_t cnt, TINFO *tinfo) tinfo->update > M(9) ? tinfo->update / M(1) : tinfo->update, tinfo->update > M(9) ? "M" : "", tinfo->remove > M(9) ? tinfo->remove / M(1) : tinfo->remove, - tinfo->remove > M(9) ? "M" : ""); + tinfo->remove > M(9) ? "M" : "")); if (lastlen > len) { memset(msg + len, ' ', (size_t)(lastlen - len)); @@ -297,27 +300,30 @@ path_setup(const char *home) /* Log file. */ len = strlen(g.home) + strlen("log") + 2; g.home_log = dmalloc(len); - snprintf(g.home_log, len, "%s/%s", g.home, "log"); + testutil_check(__wt_snprintf(g.home_log, len, "%s/%s", g.home, "log")); /* RNG log file. */ len = strlen(g.home) + strlen("rand") + 2; g.home_rand = dmalloc(len); - snprintf(g.home_rand, len, "%s/%s", g.home, "rand"); + testutil_check(__wt_snprintf( + g.home_rand, len, "%s/%s", g.home, "rand")); /* Run file. */ len = strlen(g.home) + strlen("CONFIG") + 2; g.home_config = dmalloc(len); - snprintf(g.home_config, len, "%s/%s", g.home, "CONFIG"); + testutil_check(__wt_snprintf( + g.home_config, len, "%s/%s", g.home, "CONFIG")); /* Statistics file. */ len = strlen(g.home) + strlen("stats") + 2; g.home_stats = dmalloc(len); - snprintf(g.home_stats, len, "%s/%s", g.home, "stats"); + testutil_check(__wt_snprintf( + g.home_stats, len, "%s/%s", g.home, "stats")); /* BDB directory. */ len = strlen(g.home) + strlen("bdb") + 2; g.home_bdb = dmalloc(len); - snprintf(g.home_bdb, len, "%s/%s", g.home, "bdb"); + testutil_check(__wt_snprintf(g.home_bdb, len, "%s/%s", g.home, "bdb")); /* * Home directory initialize command: create the directory if it doesn't @@ -336,21 +342,23 @@ path_setup(const char *home) "cd %s & mkdir KVS" len = strlen(g.home) * 7 + strlen(CMD) + 1; g.home_init = dmalloc(len); - snprintf(g.home_init, len, CMD, - g.home, g.home, g.home, g.home, g.home, g.home, g.home); + testutil_check(__wt_snprintf(g.home_init, len, CMD, + g.home, g.home, g.home, g.home, g.home, g.home, g.home)); #else #define CMD "test -e %s || mkdir %s; " \ "cd %s > /dev/null && rm -rf `ls | sed /rand/d`; " \ "mkdir KVS" len = strlen(g.home) * 3 + strlen(CMD) + 1; g.home_init = dmalloc(len); - snprintf(g.home_init, len, CMD, g.home, g.home, g.home); + testutil_check(__wt_snprintf( + g.home_init, len, CMD, g.home, g.home, g.home)); #endif /* Primary backup directory. */ len = strlen(g.home) + strlen("BACKUP") + 2; g.home_backup = dmalloc(len); - snprintf(g.home_backup, len, "%s/%s", g.home, "BACKUP"); + testutil_check(__wt_snprintf( + g.home_backup, len, "%s/%s", g.home, "BACKUP")); /* * Backup directory initialize command, remove and re-create the primary @@ -365,9 +373,9 @@ path_setup(const char *home) len = strlen(g.home) * 4 + strlen("BACKUP") * 2 + strlen("BACKUP_COPY") * 2 + strlen(CMD) + 1; g.home_backup_init = dmalloc(len); - snprintf(g.home_backup_init, len, CMD, + testutil_check(__wt_snprintf(g.home_backup_init, len, CMD, g.home, "BACKUP", g.home, "BACKUP_COPY", - g.home, "BACKUP", g.home, "BACKUP_COPY"); + g.home, "BACKUP", g.home, "BACKUP_COPY")); /* * Salvage command, save the interesting files so we can replay the @@ -390,7 +398,7 @@ path_setup(const char *home) #endif len = strlen(g.home) + strlen(CMD) + 1; g.home_salvage_copy = dmalloc(len); - snprintf(g.home_salvage_copy, len, CMD, g.home); + testutil_check(__wt_snprintf(g.home_salvage_copy, len, CMD, g.home)); } /* @@ -489,8 +497,9 @@ alter(void *arg) while (!g.workers_finished) { period = mmrand(NULL, 1, 10); - snprintf(buf, sizeof(buf), - "access_pattern_hint=%s", access_value ? "random" : "none"); + testutil_check(__wt_snprintf(buf, sizeof(buf), + "access_pattern_hint=%s", + access_value ? "random" : "none")); access_value = !access_value; if (session->alter(session, g.uri, buf) != 0) break; diff --git a/src/third_party/wiredtiger/test/format/wts.c b/src/third_party/wiredtiger/test/format/wts.c index da234ce53c7..6aa4784d1c1 100644 --- a/src/third_party/wiredtiger/test/format/wts.c +++ b/src/third_party/wiredtiger/test/format/wts.c @@ -120,8 +120,15 @@ static WT_EVENT_HANDLER event_handler = { NULL /* Close handler. */ }; -#undef REMAIN -#define REMAIN(p, end) (size_t)((p) >= (end) ? 0 : (end) - (p)) +#define CONFIG_APPEND(p, ...) do { \ + size_t __len; \ + testutil_check( \ + __wt_snprintf_len_set(p, max, &__len, __VA_ARGS__)); \ + if (__len > max) \ + __len = max; \ + p += __len; \ + max -= __len; \ +} while (0) /* * wts_open -- @@ -132,42 +139,42 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) { WT_CONNECTION *conn; WT_DECL_RET; - char *config, *end, *p, helium_config[1024]; + size_t max; + char *config, *p, helium_config[1024]; *connp = NULL; config = p = g.wiredtiger_open_config; - end = config + sizeof(g.wiredtiger_open_config); + max = sizeof(g.wiredtiger_open_config); - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, "create=true," "cache_size=%" PRIu32 "MB," "checkpoint_sync=false," "error_prefix=\"%s\"", - g.c_cache, g.progname); + g.c_cache, progname); /* In-memory configuration. */ if (g.c_in_memory != 0) - p += snprintf(p, REMAIN(p, end), ",in_memory=1"); + CONFIG_APPEND(p, ",in_memory=1"); /* LSM configuration. */ if (DATASOURCE("lsm")) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",lsm_manager=(worker_thread_max=%" PRIu32 "),", g.c_lsm_worker_threads); - if (DATASOURCE("lsm") || g.c_cache < 20) { - p += snprintf(p, REMAIN(p, end), ",eviction_dirty_trigger=95"); - } + if (DATASOURCE("lsm") || g.c_cache < 20) + CONFIG_APPEND(p, ",eviction_dirty_trigger=95"); /* Eviction worker configuration. */ if (g.c_evict_max != 0) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",eviction=(threads_max=%" PRIu32 ")", g.c_evict_max); /* Logging configuration. */ if (g.c_logging) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",log=(enabled=true,archive=%d,prealloc=%d" ",compressor=\"%s\")", g.c_logging_archive ? 1 : 0, @@ -175,21 +182,21 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) compressor(g.c_logging_compression_flag)); if (g.c_encryption) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",encryption=(name=%s)", encryptor(g.c_encryption_flag)); /* Miscellaneous. */ #ifdef HAVE_POSIX_MEMALIGN - p += snprintf(p, REMAIN(p, end), ",buffer_alignment=512"); + CONFIG_APPEND(p, ",buffer_alignment=512"); #endif - p += snprintf(p, REMAIN(p, end), ",mmap=%d", g.c_mmap ? 1 : 0); + CONFIG_APPEND(p, ",mmap=%d", g.c_mmap ? 1 : 0); if (g.c_direct_io) - p += snprintf(p, REMAIN(p, end), ",direct_io=(data)"); + CONFIG_APPEND(p, ",direct_io=(data)"); if (g.c_data_extend) - p += snprintf(p, REMAIN(p, end), ",file_extend=(data=8MB)"); + CONFIG_APPEND(p, ",file_extend=(data=8MB)"); /* * Run the statistics server and/or maintain statistics in the engine. @@ -198,18 +205,18 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) if (g.c_statistics_server) { if (mmrand(NULL, 0, 5) == 1 && memcmp(g.uri, "file:", strlen("file:")) == 0) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",statistics=(fast)" ",statistics_log=(wait=5,sources=(\"file:\"))"); else - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",statistics=(fast),statistics_log=(wait=5)"); } else - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",statistics=(%s)", g.c_statistics ? "fast" : "none"); /* Extensions. */ - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",extensions=[" "\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"],", g.c_reverse ? REVERSE_PATH : "", @@ -227,11 +234,11 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) * override the standard configuration. */ if (g.c_config_open != NULL) - p += snprintf(p, REMAIN(p, end), ",%s", g.c_config_open); + CONFIG_APPEND(p, ",%s", g.c_config_open); if (g.config_open != NULL) - p += snprintf(p, REMAIN(p, end), ",%s", g.config_open); + CONFIG_APPEND(p, ",%s", g.config_open); - if (REMAIN(p, end) == 0) + if (max == 0) testutil_die(ENOMEM, "wiredtiger_open configuration buffer too small"); @@ -259,12 +266,13 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) if (DATASOURCE("helium")) { if (g.helium_mount == NULL) testutil_die(EINVAL, "no Helium mount point specified"); - (void)snprintf(helium_config, sizeof(helium_config), + testutil_check( + __wt_snprintf(helium_config, sizeof(helium_config), "entry=wiredtiger_extension_init,config=[" "helium_verbose=0," "dev1=[helium_devices=\"he://./%s\"," "helium_o_volume_truncate=1]]", - g.helium_mount); + g.helium_mount)); if ((ret = conn->load_extension( conn, HELIUM_PATH, helium_config)) != 0) testutil_die(ret, @@ -299,13 +307,13 @@ wts_init(void) { WT_CONNECTION *conn; WT_SESSION *session; + size_t max; uint32_t maxintlpage, maxintlkey, maxleafpage, maxleafkey, maxleafvalue; - char config[4096], *end, *p; + char config[4096], *p; conn = g.wts_conn; - p = config; - end = config + sizeof(config); + max = sizeof(config); /* * Ensure that we can service at least one operation per-thread @@ -326,7 +334,7 @@ wts_init(void) if (maxleafpage > 512) maxleafpage >>= 1; } - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, "key_format=%s," "allocation_size=512,%s" "internal_page_max=%" PRIu32 ",leaf_page_max=%" PRIu32, @@ -340,43 +348,35 @@ wts_init(void) */ maxintlkey = mmrand(NULL, maxintlpage / 50, maxintlpage / 40); if (maxintlkey > 20) - p += snprintf(p, REMAIN(p, end), - ",internal_key_max=%" PRIu32, maxintlkey); + CONFIG_APPEND(p, ",internal_key_max=%" PRIu32, maxintlkey); maxleafkey = mmrand(NULL, maxleafpage / 50, maxleafpage / 40); if (maxleafkey > 20) - p += snprintf(p, REMAIN(p, end), - ",leaf_key_max=%" PRIu32, maxleafkey); + CONFIG_APPEND(p, ",leaf_key_max=%" PRIu32, maxleafkey); maxleafvalue = mmrand(NULL, maxleafpage * 10, maxleafpage / 40); if (maxleafvalue > 40 && maxleafvalue < 100 * 1024) - p += snprintf(p, REMAIN(p, end), - ",leaf_value_max=%" PRIu32, maxleafvalue); + CONFIG_APPEND(p, ",leaf_value_max=%" PRIu32, maxleafvalue); switch (g.type) { case FIX: - p += snprintf(p, REMAIN(p, end), - ",value_format=%" PRIu32 "t", g.c_bitcnt); + CONFIG_APPEND(p, ",value_format=%" PRIu32 "t", g.c_bitcnt); break; case ROW: if (g.c_huffman_key) - p += snprintf(p, REMAIN(p, end), - ",huffman_key=english"); + CONFIG_APPEND(p, ",huffman_key=english"); if (g.c_prefix_compression) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",prefix_compression_min=%" PRIu32, g.c_prefix_compression_min); else - p += snprintf(p, REMAIN(p, end), - ",prefix_compression=false"); + CONFIG_APPEND(p, ",prefix_compression=false"); if (g.c_reverse) - p += snprintf(p, REMAIN(p, end), - ",collator=reverse"); + CONFIG_APPEND(p, ",collator=reverse"); /* FALLTHROUGH */ case VAR: if (g.c_huffman_value) - p += snprintf(p, REMAIN(p, end), - ",huffman_value=english"); + CONFIG_APPEND(p, ",huffman_value=english"); if (g.c_dictionary) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",dictionary=%" PRIu32, mmrand(NULL, 123, 517)); break; } @@ -384,66 +384,63 @@ wts_init(void) /* Configure checksums. */ switch (g.c_checksum_flag) { case CHECKSUM_OFF: - p += snprintf(p, REMAIN(p, end), ",checksum=\"off\""); + CONFIG_APPEND(p, ",checksum=\"off\""); break; case CHECKSUM_ON: - p += snprintf(p, REMAIN(p, end), ",checksum=\"on\""); + CONFIG_APPEND(p, ",checksum=\"on\""); break; case CHECKSUM_UNCOMPRESSED: - p += snprintf(p, REMAIN(p, end), ",checksum=\"uncompressed\""); + CONFIG_APPEND(p, ",checksum=\"uncompressed\""); break; } /* Configure compression. */ if (g.c_compression_flag != COMPRESS_NONE) - p += snprintf(p, REMAIN(p, end), ",block_compressor=\"%s\"", + CONFIG_APPEND(p, ",block_compressor=\"%s\"", compressor(g.c_compression_flag)); /* Configure Btree internal key truncation. */ - p += snprintf(p, REMAIN(p, end), ",internal_key_truncate=%s", + CONFIG_APPEND(p, ",internal_key_truncate=%s", g.c_internal_key_truncation ? "true" : "false"); /* Configure Btree page key gap. */ - p += snprintf(p, REMAIN(p, end), ",key_gap=%" PRIu32, g.c_key_gap); + CONFIG_APPEND(p, ",key_gap=%" PRIu32, g.c_key_gap); /* Configure Btree split page percentage. */ - p += snprintf(p, REMAIN(p, end), ",split_pct=%" PRIu32, g.c_split_pct); + CONFIG_APPEND(p, ",split_pct=%" PRIu32, g.c_split_pct); /* Configure LSM and data-sources. */ if (DATASOURCE("helium")) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",type=helium,helium_o_compress=%d,helium_o_truncate=1", g.c_compression_flag == COMPRESS_NONE ? 0 : 1); if (DATASOURCE("kvsbdb")) - p += snprintf(p, REMAIN(p, end), ",type=kvsbdb"); + CONFIG_APPEND(p, ",type=kvsbdb"); if (DATASOURCE("lsm")) { - p += snprintf(p, REMAIN(p, end), ",type=lsm,lsm=("); - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",type=lsm,lsm=("); + CONFIG_APPEND(p, "auto_throttle=%s,", g.c_auto_throttle ? "true" : "false"); - p += snprintf(p, REMAIN(p, end), - "chunk_size=%" PRIu32 "MB,", g.c_chunk_size); + CONFIG_APPEND(p, "chunk_size=%" PRIu32 "MB,", g.c_chunk_size); /* * We can't set bloom_oldest without bloom, and we want to test * with Bloom filters on most of the time anyway. */ if (g.c_bloom_oldest) g.c_bloom = 1; - p += snprintf(p, REMAIN(p, end), - "bloom=%s,", g.c_bloom ? "true" : "false"); - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, "bloom=%s,", g.c_bloom ? "true" : "false"); + CONFIG_APPEND(p, "bloom_bit_count=%" PRIu32 ",", g.c_bloom_bit_count); - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, "bloom_hash_count=%" PRIu32 ",", g.c_bloom_hash_count); - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, "bloom_oldest=%s,", g.c_bloom_oldest ? "true" : "false"); - p += snprintf(p, REMAIN(p, end), - "merge_max=%" PRIu32 ",", g.c_merge_max); - p += snprintf(p, REMAIN(p, end), ",)"); + CONFIG_APPEND(p, "merge_max=%" PRIu32 ",", g.c_merge_max); + CONFIG_APPEND(p, ",)"); } - if (REMAIN(p, end) == 0) + if (max == 0) testutil_die(ENOMEM, "WT_SESSION.create configuration buffer too small"); @@ -490,14 +487,14 @@ wts_dump(const char *tag, int dump_bdb) len = strlen(g.home) + strlen(BERKELEY_DB_PATH) + strlen(g.uri) + 100; cmd = dmalloc(len); - (void)snprintf(cmd, len, + testutil_check(__wt_snprintf(cmd, len, "sh s_dumpcmp -h %s %s %s %s %s %s", g.home, dump_bdb ? "-b " : "", dump_bdb ? BERKELEY_DB_PATH : "", g.type == FIX || g.type == VAR ? "-c" : "", g.uri == NULL ? "" : "-n", - g.uri == NULL ? "" : g.uri); + g.uri == NULL ? "" : g.uri)); testutil_checkfmt(system(cmd), "%s: dump comparison failed", tag); free(cmd); @@ -587,7 +584,7 @@ wts_stats(void) fprintf(fp, "\n\n====== Data source statistics:\n"); len = strlen("statistics:") + strlen(g.uri) + 1; stat_name = dmalloc(len); - snprintf(stat_name, len, "statistics:%s", g.uri); + testutil_check(__wt_snprintf(stat_name, len, "statistics:%s", g.uri)); testutil_check(session->open_cursor( session, stat_name, NULL, NULL, &cursor)); free(stat_name); diff --git a/src/third_party/wiredtiger/test/huge/huge.c b/src/third_party/wiredtiger/test/huge/huge.c index 17e2db353d5..2b0d5f498e3 100644 --- a/src/third_party/wiredtiger/test/huge/huge.c +++ b/src/third_party/wiredtiger/test/huge/huge.c @@ -29,7 +29,6 @@ #include "test_util.h" static char home[512]; /* Program working dir */ -static const char *progname; /* Program name */ static uint8_t *big; /* Big key/value buffer */ #define GIGABYTE (1073741824) @@ -167,14 +166,10 @@ main(int argc, char *argv[]) int ch, small; char *working_dir; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); small = 0; working_dir = NULL; - while ((ch = __wt_getopt(progname, argc, argv, "h:s")) != EOF) switch (ch) { case 'h': diff --git a/src/third_party/wiredtiger/test/java/com/wiredtiger/test/CursorTest03.java b/src/third_party/wiredtiger/test/java/com/wiredtiger/test/CursorTest03.java new file mode 100644 index 00000000000..64f33f4d7b6 --- /dev/null +++ b/src/third_party/wiredtiger/test/java/com/wiredtiger/test/CursorTest03.java @@ -0,0 +1,175 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +package com.wiredtiger.test; + +import com.wiredtiger.db.Connection; +import com.wiredtiger.db.Cursor; +import com.wiredtiger.db.SearchStatus; +import com.wiredtiger.db.Session; +import com.wiredtiger.db.WiredTigerPackingException; +import com.wiredtiger.db.WiredTigerException; +import com.wiredtiger.db.wiredtiger; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; +import org.junit.Assert; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/* + * Test cases for WT-3238. + * + * Most WiredTiger methods return int, and our SWIG typemaps for Java add + * checking that throws exceptions for non-zero returns. Certain methods + * (Cursor.compare, Cursor.equals) are declared as returning int in Java, + * but should not throw exceptions for normal returns (which may be + * non-zero). + */ +public class CursorTest03 { + Connection conn; + Session s; + static String values[] = { "key0", "key1" }; + + @Test + public void cursor_int_methods() + throws WiredTigerPackingException { + setup(); + + Cursor c1 = s.open_cursor("table:t", null, null); + Cursor c2 = s.open_cursor("table:t", null, null); + for (String s : values) { + c1.putKeyString(s); + c1.putValueString(s); + c1.insert(); + } + c1.reset(); + + // "key1" compared to "key1" + c1.putKeyString(values[1]); + Assert.assertEquals(c1.search_near(), SearchStatus.FOUND); + c2.putKeyString(values[1]); + Assert.assertEquals(c2.search_near(), SearchStatus.FOUND); + Assert.assertEquals(c1.compare(c2), 0); + Assert.assertEquals(c2.compare(c1), 0); + Assert.assertEquals(c1.compare(c1), 0); + Assert.assertEquals(c1.equals(c2), 1); + Assert.assertEquals(c2.equals(c1), 1); + Assert.assertEquals(c1.equals(c1), 1); + + // "key0" compared to "key1" + c1.putKeyString(values[0]); + Assert.assertEquals(c1.search_near(), SearchStatus.FOUND); + Assert.assertEquals(c1.compare(c2), -1); + Assert.assertEquals(c2.compare(c1), 1); + Assert.assertEquals(c1.equals(c2), 0); + Assert.assertEquals(c2.equals(c1), 0); + + c1.close(); + c2.close(); + teardown(); + } + + public void expectException(Cursor c1, Cursor c2) + { + boolean caught = false; + try { + c1.compare(c2); + } + catch (WiredTigerException wte) { + caught = true; + } + Assert.assertTrue(caught); + + caught = false; + try { + c1.equals(c2); + } + catch (WiredTigerException wte) { + caught = true; + } + Assert.assertTrue(caught); + } + + @Test + public void cursor_int_methods_errors() + throws WiredTigerPackingException { + setup(); + + Cursor c1 = s.open_cursor("table:t", null, null); + Cursor c2 = s.open_cursor("table:t", null, null); + Cursor cx = s.open_cursor("table:t2", null, null); + for (String s : values) { + c1.putKeyString(s); + c1.putValueString(s); + c1.insert(); + cx.putKeyString(s); + cx.putValueString(s); + cx.insert(); + } + c1.reset(); + cx.reset(); + + // With both cursors not set, should be an exception. + expectException(c1, c2); + expectException(c1, c2); + + // With any one cursor not set, should be an exception. + c1.putKeyString(values[1]); + Assert.assertEquals(c1.search_near(), SearchStatus.FOUND); + expectException(c1, c2); + expectException(c1, c2); + + // With two cursors from different tables, should be an exception. + cx.putKeyString(values[1]); + Assert.assertEquals(cx.search_near(), SearchStatus.FOUND); + expectException(c1, cx); + expectException(c1, cx); + + c1.close(); + c2.close(); + cx.close(); + teardown(); + } + + private void setup() { + conn = wiredtiger.open("WT_HOME", "create"); + s = conn.open_session(null); + s.create("table:t", "key_format=S,value_format=S"); + s.create("table:t2", "key_format=S,value_format=S"); + } + + private void teardown() { + s.drop("table:t", ""); + s.drop("table:t2", ""); + s.close(""); + conn.close(""); + } + +} + diff --git a/src/third_party/wiredtiger/test/java/com/wiredtiger/test/WiredTigerSuite.java b/src/third_party/wiredtiger/test/java/com/wiredtiger/test/WiredTigerSuite.java index 5bd98d53fac..9322d30671a 100644 --- a/src/third_party/wiredtiger/test/java/com/wiredtiger/test/WiredTigerSuite.java +++ b/src/third_party/wiredtiger/test/java/com/wiredtiger/test/WiredTigerSuite.java @@ -38,6 +38,7 @@ import org.junit.runners.Suite; ConfigTest.class, CursorTest.class, CursorTest02.class, + CursorTest03.class, ExceptionTest.class, PackTest.class, PackTest02.class, diff --git a/src/third_party/wiredtiger/test/manydbs/manydbs.c b/src/third_party/wiredtiger/test/manydbs/manydbs.c index 7e986d47af3..42020d6ce9a 100644 --- a/src/third_party/wiredtiger/test/manydbs/manydbs.c +++ b/src/third_party/wiredtiger/test/manydbs/manydbs.c @@ -32,7 +32,6 @@ #define HOME_BASE "WT_TEST" static char home[HOME_SIZE]; /* Base home directory */ static char hometmp[HOME_SIZE]; /* Each conn home directory */ -static const char *progname; /* Program name */ static const char * const uri = "table:main"; #define WTOPEN_CFG_COMMON \ @@ -129,10 +128,8 @@ main(int argc, char *argv[]) const char *working_dir, *wt_cfg; char cmd[128]; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); + dbs = MAX_DBS; working_dir = HOME_BASE; idle = false; @@ -171,7 +168,8 @@ main(int argc, char *argv[]) testutil_make_work_dir(home); __wt_random_init(&rnd); for (i = 0; i < dbs; ++i) { - snprintf(hometmp, HOME_SIZE, "%s/%s.%d", home, HOME_BASE, i); + testutil_check(__wt_snprintf( + hometmp, HOME_SIZE, "%s/%s.%d", home, HOME_BASE, i)); testutil_make_work_dir(hometmp); /* * Open each database. Rotate different configurations diff --git a/src/third_party/wiredtiger/test/mciproject.yml b/src/third_party/wiredtiger/test/mciproject.yml index eb74914eb46..6456475aa00 100644 --- a/src/third_party/wiredtiger/test/mciproject.yml +++ b/src/third_party/wiredtiger/test/mciproject.yml @@ -65,7 +65,7 @@ tasks: ./build_posix/reconf ${configure_env_vars|} ./configure --enable-diagnostic --enable-python --enable-zlib --enable-strict --enable-verbose ${make_command|make} ${smp_command|} 2>&1 - ${make_command|make} VERBOSE=1 check 2>&1 + TESTUTIL_ENABLE_LONG_TESTS=1 ${make_command|make} VERBOSE=1 check 2>&1 fi - command: archive.targz_pack params: diff --git a/src/third_party/wiredtiger/test/readonly/readonly.c b/src/third_party/wiredtiger/test/readonly/readonly.c index a4b79f5859f..66c7a0ca692 100644 --- a/src/third_party/wiredtiger/test/readonly/readonly.c +++ b/src/third_party/wiredtiger/test/readonly/readonly.c @@ -39,7 +39,6 @@ static char home_rd[HOME_SIZE + sizeof(HOME_RD_SUFFIX)]; #define HOME_RD2_SUFFIX ".RDNOLOCK" /* Read-only dir no lock file */ static char home_rd2[HOME_SIZE + sizeof(HOME_RD2_SUFFIX)]; -static const char *progname; /* Program name */ static const char *saved_argv0; /* Program command */ static const char * const uri = "table:main"; @@ -172,10 +171,8 @@ main(int argc, char *argv[]) char cmd[512]; uint8_t buf[MAX_VAL]; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); + /* * Needed unaltered for system command later. */ @@ -209,10 +206,12 @@ main(int argc, char *argv[]) * Set up all the directory names. */ testutil_work_dir_from_path(home, sizeof(home), working_dir); - (void)snprintf(home_wr, sizeof(home_wr), "%s%s", home, HOME_WR_SUFFIX); - (void)snprintf(home_rd, sizeof(home_rd), "%s%s", home, HOME_RD_SUFFIX); - (void)snprintf( - home_rd2, sizeof(home_rd2), "%s%s", home, HOME_RD2_SUFFIX); + testutil_check(__wt_snprintf( + home_wr, sizeof(home_wr), "%s%s", home, HOME_WR_SUFFIX)); + testutil_check(__wt_snprintf( + home_rd, sizeof(home_rd), "%s%s", home, HOME_RD_SUFFIX)); + testutil_check(__wt_snprintf( + home_rd2, sizeof(home_rd2), "%s%s", home, HOME_RD2_SUFFIX)); if (!child) { testutil_make_work_dir(home); testutil_make_work_dir(home_wr); @@ -271,22 +270,22 @@ main(int argc, char *argv[]) * Copy the database. Remove any lock file from one copy * and chmod the copies to be read-only permissions. */ - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), "cp -rp %s/* %s; rm -f %s/WiredTiger.lock", - home, home_wr, home_wr); + home, home_wr, home_wr)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), "cp -rp %s/* %s; chmod 0555 %s; chmod -R 0444 %s/*", - home, home_rd, home_rd, home_rd); + home, home_rd, home_rd, home_rd)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), "cp -rp %s/* %s; rm -f %s/WiredTiger.lock; " "chmod 0555 %s; chmod -R 0444 %s/*", - home, home_rd2, home_rd2, home_rd2, home_rd2); + home, home_rd2, home_rd2, home_rd2, home_rd2)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); @@ -330,8 +329,8 @@ main(int argc, char *argv[]) * * The child will exit with success if its test passes. */ - (void)snprintf( - cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir); + testutil_check(__wt_snprintf( + cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); if (WEXITSTATUS(status) != 0) @@ -340,8 +339,8 @@ main(int argc, char *argv[]) /* * Scenario 2. Run child with writable config. */ - (void)snprintf( - cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir); + testutil_check(__wt_snprintf( + cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); if (WEXITSTATUS(status) != 0) @@ -361,8 +360,8 @@ main(int argc, char *argv[]) /* * Scenario 3. Child read-only. */ - (void)snprintf( - cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir); + testutil_check(__wt_snprintf( + cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); if (WEXITSTATUS(status) != 0) @@ -371,8 +370,8 @@ main(int argc, char *argv[]) /* * Scenario 4. Run child with writable config. */ - (void)snprintf( - cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir); + testutil_check(__wt_snprintf( + cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); if (WEXITSTATUS(status) != 0) @@ -393,11 +392,12 @@ main(int argc, char *argv[]) * We need to chmod the read-only databases back so that they can * be removed by scripts. */ - (void)snprintf(cmd, sizeof(cmd), "chmod 0777 %s %s", home_rd, home_rd2); + testutil_check(__wt_snprintf( + cmd, sizeof(cmd), "chmod 0777 %s %s", home_rd, home_rd2)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); - (void)snprintf(cmd, sizeof(cmd), "chmod -R 0666 %s/* %s/*", - home_rd, home_rd2); + testutil_check(__wt_snprintf( + cmd, sizeof(cmd), "chmod -R 0666 %s/* %s/*", home_rd, home_rd2)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); printf(" *** Readonly test successful ***\n"); diff --git a/src/third_party/wiredtiger/test/recovery/random-abort.c b/src/third_party/wiredtiger/test/recovery/random-abort.c index 660ef0cca67..febe6530534 100644 --- a/src/third_party/wiredtiger/test/recovery/random-abort.c +++ b/src/third_party/wiredtiger/test/recovery/random-abort.c @@ -32,7 +32,7 @@ #include <signal.h> static char home[1024]; /* Program working dir */ -static const char *progname; /* Program name */ + /* * These two names for the URI and file system must be maintained in tandem. */ @@ -94,14 +94,16 @@ thread_run(void *arg) /* * The value is the name of the record file with our id appended. */ - snprintf(buf, sizeof(buf), RECORDS_FILE, td->id); + testutil_check(__wt_snprintf(buf, sizeof(buf), RECORDS_FILE, td->id)); /* * Set up a large value putting our id in it. Write it in there a * bunch of times, but the rest of the buffer can just be zero. */ - snprintf(lgbuf, sizeof(lgbuf), "th-%" PRIu32, td->id); + testutil_check(__wt_snprintf( + lgbuf, sizeof(lgbuf), "th-%" PRIu32, td->id)); for (i = 0; i < 128; i += strlen(lgbuf)) - snprintf(&large[i], lsize - i, "%s", lgbuf); + testutil_check(__wt_snprintf( + &large[i], lsize - i, "%s", lgbuf)); /* * Keep a separate file with the records we wrote for checking. */ @@ -124,7 +126,8 @@ thread_run(void *arg) * Write our portion of the key space until we're killed. */ for (i = td->start; ; ++i) { - snprintf(kname, sizeof(kname), "%" PRIu64, i); + testutil_check(__wt_snprintf( + kname, sizeof(kname), "%" PRIu64, i)); cursor->set_key(cursor, kname); /* * Every 30th record write a very large record that exceeds the @@ -229,10 +232,7 @@ main(int argc, char *argv[]) const char *working_dir; char fname[64], kname[64], statname[1024]; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); inmem = false; nth = MIN_TH; @@ -316,7 +316,8 @@ main(int argc, char *argv[]) * still exists in case the child aborts for some reason we * don't stay in this loop forever. */ - snprintf(statname, sizeof(statname), "%s/%s", home, fs_main); + testutil_check(__wt_snprintf( + statname, sizeof(statname), "%s/%s", home, fs_main)); while (stat(statname, &sb) != 0 && kill(pid, 0) == 0) sleep(1); sleep(timeout); @@ -351,7 +352,8 @@ main(int argc, char *argv[]) fatal = false; for (i = 0; i < nth; ++i) { middle = 0; - snprintf(fname, sizeof(fname), RECORDS_FILE, i); + testutil_check(__wt_snprintf( + fname, sizeof(fname), RECORDS_FILE, i)); if ((fp = fopen(fname, "r")) == NULL) testutil_die(errno, "fopen: %s", fname); @@ -379,7 +381,8 @@ main(int argc, char *argv[]) fname, key, last_key); break; } - snprintf(kname, sizeof(kname), "%" PRIu64, key); + testutil_check(__wt_snprintf( + kname, sizeof(kname), "%" PRIu64, key)); cursor->set_key(cursor, kname); if ((ret = cursor->search(cursor)) != 0) { if (ret != WT_NOTFOUND) diff --git a/src/third_party/wiredtiger/test/recovery/truncated-log.c b/src/third_party/wiredtiger/test/recovery/truncated-log.c index 6a142b8e710..a127d8c1c63 100644 --- a/src/third_party/wiredtiger/test/recovery/truncated-log.c +++ b/src/third_party/wiredtiger/test/recovery/truncated-log.c @@ -30,13 +30,7 @@ #include <sys/wait.h> -#ifdef _WIN32 -/* snprintf is not supported on <= VS2013 */ -#define snprintf _snprintf -#endif - static char home[1024]; /* Program working dir */ -static const char *progname; /* Program name */ static const char * const uri = "table:main"; #define RECORDS_FILE "records" @@ -138,7 +132,8 @@ usage(void) * Child process creates the database and table, and then writes data into * the table until it is killed by the parent. */ -static void fill_db(void)WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); +static void fill_db(void) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); static void fill_db(void) { @@ -194,9 +189,9 @@ fill_db(void) max_key = min_key * 2; first = true; for (i = 0; i < max_key; ++i) { - snprintf(k, sizeof(k), "key%03d", (int)i); - snprintf(v, sizeof(v), "value%0*d", - (int)(V_SIZE - strlen("value")), (int)i); + testutil_check(__wt_snprintf(k, sizeof(k), "key%03d", (int)i)); + testutil_check(__wt_snprintf(v, sizeof(v), "value%0*d", + (int)(V_SIZE - (strlen("value") + 1)), (int)i)); cursor->set_key(cursor, k); cursor->set_value(cursor, v); if ((ret = cursor->insert(cursor)) != 0) @@ -271,10 +266,7 @@ main(int argc, char *argv[]) pid_t pid; const char *working_dir; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); working_dir = "WT_TEST.truncated-log"; while ((ch = __wt_getopt(progname, argc, argv, "h:")) != EOF) diff --git a/src/third_party/wiredtiger/test/salvage/salvage.c b/src/third_party/wiredtiger/test/salvage/salvage.c index b8553bbd72d..83f9c6349bc 100644 --- a/src/third_party/wiredtiger/test/salvage/salvage.c +++ b/src/third_party/wiredtiger/test/salvage/salvage.c @@ -54,8 +54,6 @@ void run(int); void t(int, u_int, int); int usage(void); -static const char *progname; /* Program name */ - static FILE *res_fp; /* Results file */ static u_int page_type; /* File types */ static int value_unique; /* Values are unique */ @@ -70,10 +68,7 @@ main(int argc, char *argv[]) u_int ptype; int ch, r; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); r = 0; ptype = 0; @@ -445,7 +440,8 @@ run(int r) process(); - snprintf(buf, sizeof(buf), "cmp %s %s > /dev/null", DUMP, RSLT); + testutil_check(__wt_snprintf( + buf, sizeof(buf), "cmp %s %s > /dev/null", DUMP, RSLT)); if (system(buf)) { fprintf(stderr, "check failed, salvage results were incorrect\n"); @@ -490,28 +486,28 @@ build(int ikey, int ivalue, int cnt) switch (page_type) { case WT_PAGE_COL_FIX: - (void)snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "key_format=r,value_format=7t," "allocation_size=%d," "internal_page_max=%d,internal_item_max=%d," "leaf_page_max=%d,leaf_item_max=%d", - PSIZE, PSIZE, OSIZE, PSIZE, OSIZE); + PSIZE, PSIZE, OSIZE, PSIZE, OSIZE)); break; case WT_PAGE_COL_VAR: - (void)snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "key_format=r," "allocation_size=%d," "internal_page_max=%d,internal_item_max=%d," "leaf_page_max=%d,leaf_item_max=%d", - PSIZE, PSIZE, OSIZE, PSIZE, OSIZE); + PSIZE, PSIZE, OSIZE, PSIZE, OSIZE)); break; case WT_PAGE_ROW_LEAF: - (void)snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "key_format=u," "allocation_size=%d," "internal_page_max=%d,internal_item_max=%d," "leaf_page_max=%d,leaf_item_max=%d", - PSIZE, PSIZE, OSIZE, PSIZE, OSIZE); + PSIZE, PSIZE, OSIZE, PSIZE, OSIZE)); break; default: assert(0); @@ -525,7 +521,8 @@ build(int ikey, int ivalue, int cnt) case WT_PAGE_COL_VAR: break; case WT_PAGE_ROW_LEAF: - snprintf(kbuf, sizeof(kbuf), "%010d KEY------", ikey); + testutil_check(__wt_snprintf( + kbuf, sizeof(kbuf), "%010d KEY------", ikey)); key.data = kbuf; key.size = 20; cursor->set_key(cursor, &key); @@ -538,8 +535,8 @@ build(int ikey, int ivalue, int cnt) break; case WT_PAGE_COL_VAR: case WT_PAGE_ROW_LEAF: - snprintf(vbuf, sizeof(vbuf), - "%010d VALUE----", value_unique ? ivalue : 37); + testutil_check(__wt_snprintf(vbuf, sizeof(vbuf), + "%010d VALUE----", value_unique ? ivalue : 37)); value.data = vbuf; value.size = 20; cursor->set_value(cursor, &value); @@ -626,9 +623,9 @@ process(void) /* Salvage. */ config[0] = '\0'; if (verbose) - snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "error_prefix=\"%s\",verbose=[salvage,verify],", - progname); + progname)); strcat(config, "log=(enabled=false),"); CHECK(wiredtiger_open(NULL, NULL, config, &conn) == 0); diff --git a/src/third_party/wiredtiger/test/suite/test_compact02.py b/src/third_party/wiredtiger/test/suite/test_compact02.py index 7af76b5fd58..803600eea14 100644 --- a/src/third_party/wiredtiger/test/suite/test_compact02.py +++ b/src/third_party/wiredtiger/test/suite/test_compact02.py @@ -99,7 +99,7 @@ class test_compact02(wttest.WiredTigerTestCase): def ConnectionOpen(self, cacheSize): self.home = '.' conn_params = 'create,' + \ - cacheSize + ',error_prefix="%s: ",' % self.shortid() + \ + cacheSize + ',error_prefix="%s",' % self.shortid() + \ 'statistics=(all),' + \ 'eviction_dirty_target=99,eviction_dirty_trigger=99' try: diff --git a/src/third_party/wiredtiger/test/suite/test_cursor10.py b/src/third_party/wiredtiger/test/suite/test_cursor10.py index b3cffeab4e9..6cabfde9f1f 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor10.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor10.py @@ -31,11 +31,11 @@ from wtscenario import make_scenarios # test_cursor10.py # Cursors with projections. -class test_cursor04(wttest.WiredTigerTestCase): +class test_cursor10(wttest.WiredTigerTestCase): """ Test cursor search and search_near """ - table_name1 = 'test_cursor04' + table_name1 = 'test_cursor10' nentries = 20 scenarios = make_scenarios([ diff --git a/src/third_party/wiredtiger/test/suite/test_cursor11.py b/src/third_party/wiredtiger/test/suite/test_cursor11.py new file mode 100644 index 00000000000..e159ec499e6 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_cursor11.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest +from wtdataset import SimpleDataSet, SimpleIndexDataSet +from wtdataset import SimpleLSMDataSet, ComplexDataSet, ComplexLSMDataSet +from wtscenario import make_scenarios + +# test_cursor11.py +# WT_CURSOR position tests: remove (if not already positioned), and insert +# leave the cursor without position or information. +class test_cursor11(wttest.WiredTigerTestCase): + + keyfmt = [ + ('integer', dict(keyfmt='i')), + ('recno', dict(keyfmt='r')), + ('string', dict(keyfmt='S')), + ] + types = [ + ('file', dict(uri='file', ds=SimpleDataSet)), + ('lsm', dict(uri='lsm', ds=SimpleDataSet)), + ('table-complex', dict(uri='table', ds=ComplexDataSet)), + ('table-complex-lsm', dict(uri='table', ds=ComplexLSMDataSet)), + ('table-index', dict(uri='table', ds=SimpleIndexDataSet)), + ('table-simple', dict(uri='table', ds=SimpleDataSet)), + ('table-simple-lsm', dict(uri='table', ds=SimpleLSMDataSet)), + ] + scenarios = make_scenarios(types, keyfmt) + + def skip(self): + return self.keyfmt == 'r' and \ + (self.ds.is_lsm() or self.uri == 'lsm') + + # Do a remove using the cursor after setting a position, and confirm + # the key and position remain set but no value. + def test_cursor_remove_with_position(self): + if self.skip(): + return + + # Build an object. + uri = self.uri + ':test_cursor11' + ds = self.ds(self, uri, 50, key_format=self.keyfmt) + ds.populate() + s = self.conn.open_session() + c = s.open_cursor(uri, None) + + c.set_key(ds.key(25)) + self.assertEquals(c.search(), 0) + self.assertEquals(c.next(), 0) + self.assertEquals(c.get_key(), ds.key(26)) + c.remove() + self.assertEquals(c.get_key(), ds.key(26)) + msg = '/requires value be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_value, msg) + self.assertEquals(c.next(), 0) + self.assertEquals(c.get_key(), ds.key(27)) + + # Do a remove using the cursor without setting a position, and confirm + # no key, value or position remains. + def test_cursor_remove_without_position(self): + if self.skip(): + return + + # Build an object. + uri = self.uri + ':test_cursor11' + ds = self.ds(self, uri, 50, key_format=self.keyfmt) + ds.populate() + s = self.conn.open_session() + c = s.open_cursor(uri, None) + + c.set_key(ds.key(25)) + c.remove() + msg = '/requires key be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_key, msg) + msg = '/requires value be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_value, msg) + self.assertEquals(c.next(), 0) + self.assertEquals(c.get_key(), ds.key(1)) + + # Do a remove using the key after also setting a position, and confirm + # no key, value or position remains. + def test_cursor_remove_with_key_and_position(self): + if self.skip(): + return + + # Build an object. + uri = self.uri + ':test_cursor11' + ds = self.ds(self, uri, 50, key_format=self.keyfmt) + ds.populate() + s = self.conn.open_session() + c = s.open_cursor(uri, None) + + c.set_key(ds.key(25)) + self.assertEquals(c.search(), 0) + c.set_key(ds.key(25)) + c.remove() + msg = '/requires key be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_key, msg) + msg = '/requires value be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_value, msg) + self.assertEquals(c.next(), 0) + self.assertEquals(c.get_key(), ds.key(1)) + + # Do an insert and confirm no key, value or position remains. + def test_cursor_insert(self): + if self.skip(): + return + + # Build an object. + uri = self.uri + ':test_cursor11' + ds = self.ds(self, uri, 50, key_format=self.keyfmt) + ds.populate() + s = self.conn.open_session() + c = s.open_cursor(uri, None) + + c.set_key(ds.key(25)) + c.set_value(ds.value(300)) + c.insert() + msg = '/requires key be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_key, msg) + msg = '/requires value be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_value, msg) + self.assertEquals(c.next(), 0) + self.assertEquals(c.get_key(), ds.key(1)) + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_encrypt04.py b/src/third_party/wiredtiger/test/suite/test_encrypt04.py index 17777fc9564..19c0b85d427 100644 --- a/src/third_party/wiredtiger/test/suite/test_encrypt04.py +++ b/src/third_party/wiredtiger/test/suite/test_encrypt04.py @@ -113,7 +113,7 @@ class test_encrypt04(wttest.WiredTigerTestCase, suite_subprocess): completed = False try: conn = self.wiredtiger_open(dir, - 'create,error_prefix="{0}: ",{1}{2}'.format( + 'create,error_prefix="{0}",{1}{2}'.format( self.shortid(), encarg, extarg)) except (BaseException) as err: # Capture the recognizable error created by rotn diff --git a/src/third_party/wiredtiger/test/suite/test_overwrite.py b/src/third_party/wiredtiger/test/suite/test_overwrite.py index 4739abaa578..c894de99bd0 100644 --- a/src/third_party/wiredtiger/test/suite/test_overwrite.py +++ b/src/third_party/wiredtiger/test/suite/test_overwrite.py @@ -27,32 +27,47 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtdataset import SimpleDataSet +from wtdataset import SimpleDataSet, SimpleIndexDataSet +from wtdataset import SimpleLSMDataSet, ComplexDataSet, ComplexLSMDataSet from wtscenario import make_scenarios # test_overwrite.py # cursor overwrite configuration method class test_overwrite(wttest.WiredTigerTestCase): name = 'overwrite' - scenarios = make_scenarios([ - ('file-r', dict(type='file:', keyfmt='r', dataset=SimpleDataSet)), - ('file-S', dict(type='file:', keyfmt='S', dataset=SimpleDataSet)), - ('lsm-S', dict(type='lsm:', keyfmt='S', dataset=SimpleDataSet)), - ('table-r', dict(type='table:', keyfmt='r', dataset=SimpleDataSet)), - ('table-S', dict(type='table:', keyfmt='S', dataset=SimpleDataSet)), - ]) + keyfmt = [ + ('integer', dict(keyfmt='i')), + ('recno', dict(keyfmt='r')), + ('string', dict(keyfmt='S')), + ] + types = [ + ('file', dict(uri='file:', ds=SimpleDataSet)), + ('lsm', dict(uri='lsm:', ds=SimpleDataSet)), + ('table-complex', dict(uri='table:', ds=ComplexDataSet)), + ('table-complex-lsm', dict(uri='table:', ds=ComplexLSMDataSet)), + ('table-index', dict(uri='table:', ds=SimpleIndexDataSet)), + ('table-simple', dict(uri='table:', ds=SimpleDataSet)), + ('table-simple-lsm', dict(uri='table:', ds=SimpleLSMDataSet)), + ] + scenarios = make_scenarios(types, keyfmt) + def skip(self): + return self.keyfmt == 'r' and \ + (self.ds.is_lsm() or self.uri == 'lsm') # Confirm a cursor configured with/without overwrite correctly handles # non-existent records during insert, remove and update operations. def test_overwrite_insert(self): - uri = self.type + self.name - ds = self.dataset(self, uri, 100, key_format=self.keyfmt) + if self.skip(): + return + + uri = self.uri + self.name + ds = self.ds(self, uri, 100, key_format=self.keyfmt) ds.populate() # Insert of an existing record with overwrite off fails. cursor = self.session.open_cursor(uri, None, "overwrite=false") cursor.set_key(ds.key(5)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1000)) self.assertRaises(wiredtiger.WiredTigerError, lambda: cursor.insert()) # One additional test for the insert method: duplicate the cursor with @@ -63,30 +78,33 @@ class test_overwrite(wttest.WiredTigerTestCase): cursor = self.session.open_cursor(uri, None, "overwrite=false") cursor.set_key(ds.key(5)) dupc = self.session.open_cursor(None, cursor, "overwrite=true") - dupc.set_value('XXXXXXXXXX') + dupc.set_value(ds.value(1001)) self.assertEquals(dupc.insert(), 0) # Insert of an existing record with overwrite on succeeds. cursor = self.session.open_cursor(uri, None) cursor.set_key(ds.key(6)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1002)) self.assertEquals(cursor.insert(), 0) # Insert of a non-existent record with overwrite off succeeds. cursor = self.session.open_cursor(uri, None, "overwrite=false") cursor.set_key(ds.key(200)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1003)) self.assertEquals(cursor.insert(), 0) # Insert of a non-existent record with overwrite on succeeds. cursor = self.session.open_cursor(uri, None) cursor.set_key(ds.key(201)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1004)) self.assertEquals(cursor.insert(), 0) def test_overwrite_remove(self): - uri = self.type + self.name - ds = self.dataset(self, uri, 100, key_format=self.keyfmt) + if self.skip(): + return + + uri = self.uri + self.name + ds = self.ds(self, uri, 100, key_format=self.keyfmt) ds.populate() # Remove of an existing record with overwrite off succeeds. @@ -110,32 +128,35 @@ class test_overwrite(wttest.WiredTigerTestCase): self.assertEquals(cursor.remove(), 0) def test_overwrite_update(self): - uri = self.type + self.name - ds = self.dataset(self, uri, 100, key_format=self.keyfmt) + if self.skip(): + return + + uri = self.uri + self.name + ds = self.ds(self, uri, 100, key_format=self.keyfmt) ds.populate() # Update of an existing record with overwrite off succeeds. cursor = self.session.open_cursor(uri, None, "overwrite=false") cursor.set_key(ds.key(5)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1005)) self.assertEquals(cursor.update(), 0) # Update of an existing record with overwrite on succeeds. cursor = self.session.open_cursor(uri, None) cursor.set_key(ds.key(6)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1006)) self.assertEquals(cursor.update(), 0) # Update of a non-existent record with overwrite off fails. cursor = self.session.open_cursor(uri, None, "overwrite=false") cursor.set_key(ds.key(200)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1007)) self.assertEquals(cursor.update(), wiredtiger.WT_NOTFOUND) # Update of a non-existent record with overwrite on succeeds. cursor = self.session.open_cursor(uri, None) cursor.set_key(ds.key(201)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1008)) self.assertEquals(cursor.update(), 0) if __name__ == '__main__': diff --git a/src/third_party/wiredtiger/test/suite/test_reconfig02.py b/src/third_party/wiredtiger/test/suite/test_reconfig02.py index 8054b2a6ab5..042d3bbe71f 100644 --- a/src/third_party/wiredtiger/test/suite/test_reconfig02.py +++ b/src/third_party/wiredtiger/test/suite/test_reconfig02.py @@ -62,7 +62,7 @@ class test_reconfig02(wttest.WiredTigerTestCase): self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.conn.reconfigure("log=(path=foo)"), msg) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.conn.reconfigure("log=(recovery=true)"), msg) + lambda: self.conn.reconfigure("log=(recover=true)"), msg) # Logging starts on, but prealloc is off. Verify it is off. # Reconfigure it on and run again, making sure that log files diff --git a/src/third_party/wiredtiger/test/suite/test_shared_cache01.py b/src/third_party/wiredtiger/test/suite/test_shared_cache01.py index 70560a625ee..c3bd946cc4b 100644 --- a/src/third_party/wiredtiger/test/suite/test_shared_cache01.py +++ b/src/third_party/wiredtiger/test/suite/test_shared_cache01.py @@ -73,7 +73,7 @@ class test_shared_cache01(wttest.WiredTigerTestCase): os.mkdir(name) next_conn = self.wiredtiger_open( name, - 'create,error_prefix="' + self.shortid() + ': "' + + 'create,error_prefix="%s",' % self.shortid() + pool_opts + extra_opts) self.conns.append(next_conn) self.sessions.append(next_conn.open_session(None)) diff --git a/src/third_party/wiredtiger/test/suite/test_shared_cache02.py b/src/third_party/wiredtiger/test/suite/test_shared_cache02.py index 7cde6c86695..67f9bf7c6b7 100644 --- a/src/third_party/wiredtiger/test/suite/test_shared_cache02.py +++ b/src/third_party/wiredtiger/test/suite/test_shared_cache02.py @@ -73,7 +73,7 @@ class test_shared_cache02(wttest.WiredTigerTestCase): os.mkdir(name) next_conn = self.wiredtiger_open( name, - 'create,error_prefix="' + self.shortid() + ': "' + + 'create,error_prefix="%s",' % self.shortid() + pool_opts + extra_opts) self.conns.append(next_conn) self.sessions.append(next_conn.open_session(None)) diff --git a/src/third_party/wiredtiger/test/suite/test_truncate01.py b/src/third_party/wiredtiger/test/suite/test_truncate01.py index 7d2b3862568..98b741ba6a4 100644 --- a/src/third_party/wiredtiger/test/suite/test_truncate01.py +++ b/src/third_party/wiredtiger/test/suite/test_truncate01.py @@ -128,6 +128,7 @@ class test_truncate_cursor_order(wttest.WiredTigerTestCase): msg = '/the start cursor position is after the stop cursor position/' self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.truncate(None, c1, c2, None), msg) + c1.set_key(ds.key(10)) c2.set_key(ds.key(20)) self.session.truncate(None, c1, c2, None) diff --git a/src/third_party/wiredtiger/test/suite/test_txn07.py b/src/third_party/wiredtiger/test/suite/test_txn07.py index e2986fb999a..e26cf5aaaea 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn07.py +++ b/src/third_party/wiredtiger/test/suite/test_txn07.py @@ -76,7 +76,7 @@ class test_txn07(wttest.WiredTigerTestCase, suite_subprocess): def conn_config(self): return 'log=(archive=false,enabled,file_max=%s,' % self.logmax + \ 'compressor=%s)' % self.compress + \ - ',create,error_prefix="%s: ",' % self.shortid() + \ + ',create,error_prefix="%s",' % self.shortid() + \ "statistics=(fast)," + \ 'transaction_sync="%s",' % \ self.sync_list[self.scenario_number % len(self.sync_list)] diff --git a/src/third_party/wiredtiger/test/suite/wttest.py b/src/third_party/wiredtiger/test/suite/wttest.py index 0dce51f07d5..e91838544b9 100644 --- a/src/third_party/wiredtiger/test/suite/wttest.py +++ b/src/third_party/wiredtiger/test/suite/wttest.py @@ -302,7 +302,7 @@ class WiredTigerTestCase(unittest.TestCase): # In case the open starts additional threads, flush first to # avoid confusion. sys.stdout.flush() - conn_param = 'create,error_prefix="%s: ",%s' % (self.shortid(), config) + conn_param = 'create,error_prefix="%s",%s' % (self.shortid(), config) try: conn = self.wiredtiger_open(home, conn_param) except wiredtiger.WiredTigerError as e: diff --git a/src/third_party/wiredtiger/test/thread/file.c b/src/third_party/wiredtiger/test/thread/file.c index 81ec6ad44f8..7a7d16c4cd6 100644 --- a/src/third_party/wiredtiger/test/thread/file.c +++ b/src/third_party/wiredtiger/test/thread/file.c @@ -33,20 +33,18 @@ file_create(const char *name) { WT_SESSION *session; int ret; - char *p, *end, config[128]; + char config[128]; if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) testutil_die(ret, "conn.session"); - p = config; - end = config + sizeof(config); - p += snprintf(p, (size_t)(end - p), + testutil_check(__wt_snprintf(config, sizeof(config), "key_format=%s," "internal_page_max=%d," - "leaf_page_max=%d,", - ftype == ROW ? "u" : "r", 16 * 1024, 128 * 1024); - if (ftype == FIX) - (void)snprintf(p, (size_t)(end - p), ",value_format=3t"); + "leaf_page_max=%d," + "%s", + ftype == ROW ? "u" : "r", 16 * 1024, 128 * 1024, + ftype == FIX ? ",value_format=3t" : "")); if ((ret = session->create(session, name, config)) != 0) if (ret != EEXIST) @@ -62,9 +60,10 @@ load(const char *name) WT_CURSOR *cursor; WT_ITEM *key, _key, *value, _value; WT_SESSION *session; - char keybuf[64], valuebuf[64]; - u_int keyno; + uint64_t keyno; + size_t len; int ret; + char keybuf[64], valuebuf[64]; file_create(name); @@ -79,18 +78,22 @@ load(const char *name) value = &_value; for (keyno = 1; keyno <= nkeys; ++keyno) { if (ftype == ROW) { + testutil_check(__wt_snprintf_len_set( + keybuf, sizeof(keybuf), + &len, "%017" PRIu64, keyno)); key->data = keybuf; - key->size = (uint32_t) - snprintf(keybuf, sizeof(keybuf), "%017u", keyno); + key->size = (uint32_t)len; cursor->set_key(cursor, key); } else - cursor->set_key(cursor, (uint32_t)keyno); - value->data = valuebuf; + cursor->set_key(cursor, keyno); if (ftype == FIX) cursor->set_value(cursor, 0x01); else { - value->size = (uint32_t) - snprintf(valuebuf, sizeof(valuebuf), "%37u", keyno); + testutil_check(__wt_snprintf_len_set( + valuebuf, sizeof(valuebuf), + &len, "%37" PRIu64, keyno)); + value->data = valuebuf; + value->size = (uint32_t)len; cursor->set_value(cursor, value); } if ((ret = cursor->insert(cursor)) != 0) diff --git a/src/third_party/wiredtiger/test/thread/rw.c b/src/third_party/wiredtiger/test/thread/rw.c index c6107a06c49..e8a2650ca51 100644 --- a/src/third_party/wiredtiger/test/thread/rw.c +++ b/src/third_party/wiredtiger/test/thread/rw.c @@ -66,7 +66,8 @@ rw_start(u_int readers, u_int writers) for (i = 0; i < writers; ++i) { if (i == 0 || multiple_files) { run_info[i].name = dmalloc(64); - snprintf(run_info[i].name, 64, FNAME, i); + testutil_check(__wt_snprintf( + run_info[i].name, 64, FNAME, i)); /* Vary by orders of magnitude */ if (vary_nops) @@ -88,8 +89,8 @@ rw_start(u_int readers, u_int writers) run_info[offset].name = dmalloc(64); /* Have readers read from tables with writes. */ name_index = i % writers; - snprintf( - run_info[offset].name, 64, FNAME, name_index); + testutil_check(__wt_snprintf( + run_info[offset].name, 64, FNAME, name_index)); /* Vary by orders of magnitude */ if (vary_nops) @@ -158,7 +159,8 @@ static inline void reader_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) { WT_ITEM *key, _key; - u_int keyno; + size_t len; + uint64_t keyno; int ret; char keybuf[64]; @@ -166,17 +168,18 @@ reader_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) keyno = __wt_random(&s->rnd) % nkeys + 1; if (ftype == ROW) { + testutil_check(__wt_snprintf_len_set( + keybuf, sizeof(keybuf), &len, "%017" PRIu64, keyno)); key->data = keybuf; - key->size = (uint32_t) - snprintf(keybuf, sizeof(keybuf), "%017u", keyno); + key->size = (uint32_t)len; cursor->set_key(cursor, key); } else - cursor->set_key(cursor, (uint32_t)keyno); + cursor->set_key(cursor, keyno); if ((ret = cursor->search(cursor)) != 0 && ret != WT_NOTFOUND) testutil_die(ret, "cursor.search"); if (log_print) testutil_check(session->log_printf(session, - "Reader Thread %p key %017u", pthread_self(), keyno)); + "Reader Thread %p key %017" PRIu64, pthread_self(), keyno)); } /* @@ -195,7 +198,7 @@ reader(void *arg) id = (int)(uintptr_t)arg; s = &run_info[id]; - __wt_thread_id(tid, sizeof(tid)); + testutil_check(__wt_thread_id(tid, sizeof(tid))); __wt_random_init(&s->rnd); printf(" read thread %2d starting: tid: %s, file: %s\n", @@ -242,7 +245,8 @@ static inline void writer_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) { WT_ITEM *key, _key, *value, _value; - u_int keyno; + uint64_t keyno; + size_t len; int ret; char keybuf[64], valuebuf[64]; @@ -251,12 +255,13 @@ writer_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) keyno = __wt_random(&s->rnd) % nkeys + 1; if (ftype == ROW) { + testutil_check(__wt_snprintf_len_set( + keybuf, sizeof(keybuf), &len, "%017" PRIu64, keyno)); key->data = keybuf; - key->size = (uint32_t) - snprintf(keybuf, sizeof(keybuf), "%017u", keyno); + key->size = (uint32_t)len; cursor->set_key(cursor, key); } else - cursor->set_key(cursor, (uint32_t)keyno); + cursor->set_key(cursor, keyno); if (keyno % 5 == 0) { ++s->remove; if ((ret = @@ -268,8 +273,10 @@ writer_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) if (ftype == FIX) cursor->set_value(cursor, 0x10); else { - value->size = (uint32_t)snprintf( - valuebuf, sizeof(valuebuf), "XXX %37u", keyno); + testutil_check(__wt_snprintf_len_set( + valuebuf, sizeof(valuebuf), + &len, "XXX %37" PRIu64, keyno)); + value->size = (uint32_t)len; cursor->set_value(cursor, value); } if ((ret = cursor->update(cursor)) != 0) @@ -277,7 +284,7 @@ writer_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) } if (log_print) testutil_check(session->log_printf(session, - "Writer Thread %p key %017u", pthread_self(), keyno)); + "Writer Thread %p key %017" PRIu64, pthread_self(), keyno)); } /* @@ -296,7 +303,7 @@ writer(void *arg) id = (int)(uintptr_t)arg; s = &run_info[id]; - __wt_thread_id(tid, sizeof(tid)); + testutil_check(__wt_thread_id(tid, sizeof(tid))); __wt_random_init(&s->rnd); printf("write thread %2d starting: tid: %s, file: %s\n", diff --git a/src/third_party/wiredtiger/test/thread/stats.c b/src/third_party/wiredtiger/test/thread/stats.c index 67a2c02719b..839d65e8a4d 100644 --- a/src/third_party/wiredtiger/test/thread/stats.c +++ b/src/third_party/wiredtiger/test/thread/stats.c @@ -65,7 +65,8 @@ stats(void) /* File statistics. */ if (!multiple_files) { - (void)snprintf(name, sizeof(name), "statistics:" FNAME, 0); + testutil_check(__wt_snprintf( + name, sizeof(name), "statistics:" FNAME, 0)); if ((ret = session->open_cursor( session, name, NULL, NULL, &cursor)) != 0) testutil_die(ret, "session.open_cursor"); diff --git a/src/third_party/wiredtiger/test/thread/t.c b/src/third_party/wiredtiger/test/thread/t.c index baadbf2adb9..d2ed4c74bb7 100644 --- a/src/third_party/wiredtiger/test/thread/t.c +++ b/src/third_party/wiredtiger/test/thread/t.c @@ -37,7 +37,6 @@ int multiple_files; /* File per thread */ int session_per_op; /* New session per operation */ static char home[512]; /* Program working dir */ -static char *progname; /* Program name */ static FILE *logfp; /* Log file */ static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *); @@ -59,10 +58,7 @@ main(int argc, char *argv[]) int ch, cnt, runs; char *config_open, *working_dir; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); config_open = NULL; working_dir = NULL; @@ -189,19 +185,15 @@ wt_connect(char *config_open) }; int ret; char config[512]; - size_t print_count; testutil_clean_work_dir(home); testutil_make_work_dir(home); - print_count = (size_t)snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "create,statistics=(all),error_prefix=\"%s\",%s%s", progname, config_open == NULL ? "" : ",", - config_open == NULL ? "" : config_open); - - if (print_count >= sizeof(config)) - testutil_die(EINVAL, "Config string too long"); + config_open == NULL ? "" : config_open)); if ((ret = wiredtiger_open(home, &event_handler, config, &conn)) != 0) testutil_die(ret, "wiredtiger_open"); diff --git a/src/third_party/wiredtiger/test/utility/misc.c b/src/third_party/wiredtiger/test/utility/misc.c index 1ba08ddd77f..934dac86a7b 100644 --- a/src/third_party/wiredtiger/test/utility/misc.c +++ b/src/third_party/wiredtiger/test/utility/misc.c @@ -28,6 +28,7 @@ #include "test_util.h" void (*custom_die)(void) = NULL; +const char *progname = "program name not set"; /* * die -- @@ -42,7 +43,9 @@ testutil_die(int e, const char *fmt, ...) if (custom_die != NULL) (*custom_die)(); + fprintf(stderr, "%s: FAILED", progname); if (fmt != NULL) { + fprintf(stderr, ": "); va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); @@ -55,6 +58,20 @@ testutil_die(int e, const char *fmt, ...) } /* + * testutil_set_progname -- + * Set the global program name for error handling. + */ +const char * +testutil_set_progname(char * const *argv) +{ + if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) + progname = argv[0]; + else + ++progname; + return (progname); +} + +/* * testutil_work_dir_from_path -- * Takes a buffer, its size and the intended work directory. * Creates the full intended work directory in buffer. @@ -91,14 +108,14 @@ testutil_clean_work_dir(const char *dir) if ((buf = malloc(len)) == NULL) testutil_die(ENOMEM, "Failed to allocate memory"); - snprintf(buf, len, "%s %s %s %s", DIR_EXISTS_COMMAND, dir, - RM_COMMAND, dir); + testutil_check(__wt_snprintf( + buf, len, "%s %s %s %s", DIR_EXISTS_COMMAND, dir, RM_COMMAND, dir)); #else len = strlen(dir) + strlen(RM_COMMAND) + 1; if ((buf = malloc(len)) == NULL) testutil_die(ENOMEM, "Failed to allocate memory"); - snprintf(buf, len, "%s%s", RM_COMMAND, dir); + testutil_check(__wt_snprintf(buf, len, "%s%s", RM_COMMAND, dir)); #endif if ((ret = system(buf)) != 0 && ret != ENOENT) @@ -125,7 +142,7 @@ testutil_make_work_dir(char *dir) testutil_die(ENOMEM, "Failed to allocate memory"); /* mkdir shares syntax between Windows and Linux */ - snprintf(buf, len, "%s%s", MKDIR_COMMAND, dir); + testutil_check(__wt_snprintf(buf, len, "%s%s", MKDIR_COMMAND, dir)); if ((ret = system(buf)) != 0) testutil_die(ret, "%s", buf); free(buf); @@ -149,20 +166,25 @@ testutil_cleanup(TEST_OPTS *opts) } /* - * testutil_disable_long_tests -- - * Return if TESTUTIL_DISABLE_LONG_TESTS is set. + * testutil_enable_long_tests -- + * Return if TESTUTIL_ENABLE_LONG_TESTS is set. */ bool -testutil_disable_long_tests(void) +testutil_enable_long_tests(void) { const char *res; + bool enable_long_tests; if (__wt_getenv(NULL, - "TESTUTIL_DISABLE_LONG_TESTS", &res) == WT_NOTFOUND) + "TESTUTIL_ENABLE_LONG_TESTS", &res) == WT_NOTFOUND) return (false); + /* Accept anything other than "TESTUTIL_ENABLE_LONG_TESTS=0". */ + enable_long_tests = res[0] != '0'; + free((void *)res); - return (true); + + return (enable_long_tests); } /* diff --git a/src/third_party/wiredtiger/test/utility/parse_opts.c b/src/third_party/wiredtiger/test/utility/parse_opts.c index 74a1c021d5d..c3eff3360de 100644 --- a/src/third_party/wiredtiger/test/utility/parse_opts.c +++ b/src/third_party/wiredtiger/test/utility/parse_opts.c @@ -43,10 +43,7 @@ testutil_parse_opts(int argc, char * const *argv, TEST_OPTS *opts) opts->running = true; opts->verbose = false; - if ((opts->progname = strrchr(argv[0], DIR_DELIM)) == NULL) - opts->progname = argv[0]; - else - ++opts->progname; + opts->progname = testutil_set_progname(argv); while ((ch = __wt_getopt(opts->progname, argc, argv, "A:h:n:o:pR:T:t:vW:")) != EOF) @@ -118,13 +115,15 @@ testutil_parse_opts(int argc, char * const *argv, TEST_OPTS *opts) if (opts->home == NULL) { len = strlen("WT_TEST.") + strlen(opts->progname) + 10; opts->home = dmalloc(len); - snprintf(opts->home, len, "WT_TEST.%s", opts->progname); + testutil_check(__wt_snprintf( + opts->home, len, "WT_TEST.%s", opts->progname)); } /* Setup the default URI string */ len = strlen("table:") + strlen(opts->progname) + 10; opts->uri = dmalloc(len); - snprintf(opts->uri, len, "table:%s", opts->progname); + testutil_check(__wt_snprintf( + opts->uri, len, "table:%s", opts->progname)); return (0); } diff --git a/src/third_party/wiredtiger/test/utility/test_util.h b/src/third_party/wiredtiger/test/utility/test_util.h index 489bbe18d87..406ed2c4961 100644 --- a/src/third_party/wiredtiger/test/utility/test_util.h +++ b/src/third_party/wiredtiger/test/utility/test_util.h @@ -48,7 +48,7 @@ /* Generic option parsing structure shared by all test cases. */ typedef struct { char *home; - char *progname; + const char *progname; enum { TABLE_COL=1, /* Fixed-length column store */ TABLE_FIX=2, /* Variable-length column store */ TABLE_ROW=3 /* Row-store */ @@ -185,10 +185,13 @@ void *dstrdup(const void *); void *dstrndup(const char *, size_t); void testutil_clean_work_dir(const char *); void testutil_cleanup(TEST_OPTS *); -bool testutil_disable_long_tests(void); +bool testutil_enable_long_tests(void); void testutil_make_work_dir(char *); int testutil_parse_opts(int, char * const *, TEST_OPTS *); void testutil_work_dir_from_path(char *, size_t, const char *); void *thread_append(void *); void *thread_insert_append(void *); void *thread_prev(void *); + +extern const char *progname; +const char *testutil_set_progname(char * const *); diff --git a/src/third_party/wiredtiger/test/utility/thread.c b/src/third_party/wiredtiger/test/utility/thread.c index 38465b2f02b..122ad554442 100644 --- a/src/third_party/wiredtiger/test/utility/thread.c +++ b/src/third_party/wiredtiger/test/utility/thread.c @@ -57,8 +57,8 @@ thread_append(void *arg) if (opts->table_type == TABLE_FIX) cursor->set_value(cursor, buf[0]); else { - snprintf(buf, sizeof(buf), - "%" PRIu64 " VALUE ------", recno); + testutil_check(__wt_snprintf(buf, sizeof(buf), + "%" PRIu64 " VALUE ------", recno)); cursor->set_value(cursor, buf); } testutil_check(cursor->insert(cursor)); @@ -94,7 +94,8 @@ thread_insert_append(void *arg) session, opts->uri, NULL, NULL, &cursor)); for (i = 0; i < opts->nrecords; ++i) { - snprintf(kbuf, sizeof(kbuf), "%010d KEY------", (int)i); + testutil_check(__wt_snprintf( + kbuf, sizeof(kbuf), "%010d KEY------", (int)i)); cursor->set_key(cursor, kbuf); cursor->set_value(cursor, "========== VALUE ======="); testutil_check(cursor->insert(cursor)); diff --git a/src/third_party/wiredtiger/test/windows/windows_shim.h b/src/third_party/wiredtiger/test/windows/windows_shim.h index 648b991b1a2..8985904fb19 100644 --- a/src/third_party/wiredtiger/test/windows/windows_shim.h +++ b/src/third_party/wiredtiger/test/windows/windows_shim.h @@ -36,6 +36,8 @@ #include <io.h> #include <process.h> +#include "wt_internal.h" + #define inline __inline /* Define some POSIX types */ @@ -52,12 +54,7 @@ typedef int u_int; /* snprintf does not exist on <= VS 2013 */ #if _MSC_VER < 1900 -#define snprintf _wt_snprintf - -_Check_return_opt_ int __cdecl _wt_snprintf( - _Out_writes_(_MaxCount) char * _DstBuf, - _In_ size_t _MaxCount, - _In_z_ _Printf_format_string_ const char * _Format, ...); +#define snprintf __wt_snprintf #endif /* |