diff options
author | Alex Gorrod <alexander.gorrod@mongodb.com> | 2017-04-01 07:11:40 +1100 |
---|---|---|
committer | Alex Gorrod <alexander.gorrod@mongodb.com> | 2017-04-01 07:11:40 +1100 |
commit | af735d14a603a6ef6256a6685f09ec13755a5024 (patch) | |
tree | eaa614fc686c4681694fff78442027d390503288 | |
parent | 16be8160ea3682f18df9bcc6d5addda0c0114137 (diff) | |
parent | 423f4e11050f7644b1a8d2b6b1cc60c35ef915c8 (diff) | |
download | mongo-af735d14a603a6ef6256a6685f09ec13755a5024.tar.gz |
Merge branch 'develop' into mongodb-3.6
131 files changed, 2920 insertions, 1909 deletions
diff --git a/bench/wtperf/config.c b/bench/wtperf/config.c index 9eea99eeec4..e4eee66e4cb 100644 --- a/bench/wtperf/config.c +++ b/bench/wtperf/config.c @@ -438,14 +438,13 @@ config_opt(WTPERF *wtperf, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v) return (EINVAL); } strp = (char **)valueloc; - newlen = v->len + 1; if (*strp == NULL) begin = newstr = dstrdup(v->str); else { - newlen += strlen(*strp) + 1; - newstr = dcalloc(newlen, sizeof(char)); - snprintf(newstr, newlen, - "%s,%*s", *strp, (int)v->len, v->str); + newlen = strlen(*strp) + v->len + strlen(",") + 1; + newstr = dmalloc(newlen); + testutil_check(__wt_snprintf(newstr, newlen, + "%s,%.*s", *strp, (int)v->len, v->str)); /* Free the old value now we've copied it. */ free(*strp); begin = &newstr[(newlen - 1) - v->len]; @@ -712,7 +711,7 @@ config_opt_name_value(WTPERF *wtperf, const char *name, const char *value) /* name="value" */ len = strlen(name) + strlen(value) + 4; optstr = dmalloc(len); - snprintf(optstr, len, "%s=\"%s\"", name, value); + testutil_check(__wt_snprintf(optstr, len, "%s=\"%s\"", name, value)); ret = config_opt_str(wtperf, optstr); free(optstr); return (ret); diff --git a/bench/wtperf/idle_table_cycle.c b/bench/wtperf/idle_table_cycle.c index bb44cfbde59..4387860cfb2 100644 --- a/bench/wtperf/idle_table_cycle.c +++ b/bench/wtperf/idle_table_cycle.c @@ -80,8 +80,8 @@ cycle_idle_tables(void *arg) } for (cycle_count = 0; wtperf->idle_cycle_run; ++cycle_count) { - snprintf(uri, sizeof(uri), - "%s_cycle%07d", wtperf->uris[0], cycle_count); + testutil_check(__wt_snprintf(uri, sizeof(uri), + "%s_cycle%07d", wtperf->uris[0], cycle_count)); /* Don't busy cycle in this loop. */ __wt_sleep(1, 0); diff --git a/bench/wtperf/misc.c b/bench/wtperf/misc.c index 24b3323a49a..0874794e01e 100644 --- a/bench/wtperf/misc.c +++ b/bench/wtperf/misc.c @@ -46,8 +46,8 @@ setup_log_file(WTPERF *wtperf) len = strlen(wtperf->monitor_dir) + strlen(opts->table_name) + strlen(".stat") + 2; fname = dmalloc(len); - snprintf(fname, len, - "%s/%s.stat", wtperf->monitor_dir, opts->table_name); + testutil_check(__wt_snprintf(fname, len, + "%s/%s.stat", wtperf->monitor_dir, opts->table_name)); if ((wtperf->logf = fopen(fname, "w")) == NULL) { ret = errno; fprintf(stderr, "%s: %s\n", fname, strerror(ret)); diff --git a/bench/wtperf/track.c b/bench/wtperf/track.c index 822bdaa4b4a..86a26120a6a 100644 --- a/bench/wtperf/track.c +++ b/bench/wtperf/track.c @@ -288,8 +288,8 @@ latency_print_single(WTPERF *wtperf, TRACK *total, const char *name) uint64_t cumops; char path[1024]; - snprintf(path, sizeof(path), - "%s/latency.%s", wtperf->monitor_dir, name); + testutil_check(__wt_snprintf(path, sizeof(path), + "%s/latency.%s", wtperf->monitor_dir, name)); if ((fp = fopen(path, "w")) == NULL) { lprintf(wtperf, errno, 0, "%s", path); return; diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 772dedac8c8..6d79eebe8b2 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -551,7 +551,8 @@ worker(void *arg) goto err; } for (i = 0; i < opts->table_count_idle; i++) { - snprintf(buf, 512, "%s_idle%05d", wtperf->uris[0], (int)i); + testutil_check(__wt_snprintf( + buf, 512, "%s_idle%05d", wtperf->uris[0], (int)i)); if ((ret = session->open_cursor( session, buf, NULL, NULL, &tmp_cursor)) != 0) { lprintf(wtperf, ret, 0, @@ -1297,7 +1298,8 @@ monitor(void *arg) /* Open the logging file. */ len = strlen(wtperf->monitor_dir) + 100; path = dmalloc(len); - snprintf(path, len, "%s/monitor", wtperf->monitor_dir); + testutil_check(__wt_snprintf( + path, len, "%s/monitor", wtperf->monitor_dir)); if ((fp = fopen(path, "w")) == NULL) { lprintf(wtperf, errno, 0, "%s", path); goto err; @@ -1937,19 +1939,19 @@ create_uris(WTPERF *wtperf) /* If there is only one table, just use the base name. */ wtperf->uris[i] = dmalloc(len); if (opts->table_count == 1) - snprintf(wtperf->uris[i], - len, "table:%s", opts->table_name); + testutil_check(__wt_snprintf(wtperf->uris[i], + len, "table:%s", opts->table_name)); else - snprintf(wtperf->uris[i], - len, "table:%s%05d", opts->table_name, i); + testutil_check(__wt_snprintf(wtperf->uris[i], + len, "table:%s%05d", opts->table_name, i)); } /* Create the log-like-table URI. */ len = strlen("table:") + strlen(opts->table_name) + strlen("_log_table") + 1; wtperf->log_table_uri = dmalloc(len); - snprintf( - wtperf->log_table_uri, len, "table:%s_log_table", opts->table_name); + testutil_check(__wt_snprintf(wtperf->log_table_uri, + len, "table:%s_log_table", opts->table_name)); } static int @@ -1971,7 +1973,8 @@ create_tables(WTPERF *wtperf) } for (i = 0; i < opts->table_count_idle; i++) { - snprintf(buf, 512, "%s_idle%05d", wtperf->uris[0], (int)i); + testutil_check(__wt_snprintf( + buf, 512, "%s_idle%05d", wtperf->uris[0], (int)i)); if ((ret = session->create( session, buf, opts->table_config)) != 0) { lprintf(wtperf, ret, 0, @@ -2000,8 +2003,9 @@ create_tables(WTPERF *wtperf) return (ret); } if (opts->index) { - snprintf(buf, 512, "index:%s:val_idx", - wtperf->uris[i] + strlen("table:")); + testutil_check(__wt_snprintf(buf, 512, + "index:%s:val_idx", + wtperf->uris[i] + strlen("table:"))); if ((ret = session->create( session, buf, "columns=(val)")) != 0) { lprintf(wtperf, ret, 0, @@ -2186,15 +2190,15 @@ start_all_runs(WTPERF *wtperf) */ len = strlen(wtperf->home) + 5; next_wtperf->home = dmalloc(len); - snprintf( - next_wtperf->home, len, "%s/D%02d", wtperf->home, (int)i); + testutil_check(__wt_snprintf( + next_wtperf->home, len, "%s/D%02d", wtperf->home, (int)i)); if (opts->create != 0) recreate_dir(next_wtperf->home); len = strlen(wtperf->monitor_dir) + 5; next_wtperf->monitor_dir = dmalloc(len); - snprintf(next_wtperf->monitor_dir, - len, "%s/D%02d", wtperf->monitor_dir, (int)i); + testutil_check(__wt_snprintf(next_wtperf->monitor_dir, + len, "%s/D%02d", wtperf->monitor_dir, (int)i)); if (opts->create != 0 && strcmp(next_wtperf->home, next_wtperf->monitor_dir) != 0) recreate_dir(next_wtperf->monitor_dir); @@ -2543,9 +2547,9 @@ main(int argc, char *argv[]) */ req_len = strlen(",async=(enabled=true,threads=)") + 4; wtperf->async_config = dmalloc(req_len); - snprintf(wtperf->async_config, req_len, + testutil_check(__wt_snprintf(wtperf->async_config, req_len, ",async=(enabled=true,threads=%" PRIu32 ")", - opts->async_threads); + opts->async_threads)); } if ((ret = config_compress(wtperf)) != 0) goto err; @@ -2578,10 +2582,10 @@ main(int argc, char *argv[]) sreq_len = strlen("session_max=") + 6; req_len += sreq_len; sess_cfg = dmalloc(sreq_len); - snprintf(sess_cfg, sreq_len, + testutil_check(__wt_snprintf(sess_cfg, sreq_len, "session_max=%" PRIu32, opts->session_count_idle + - wtperf->workers_cnt + opts->populate_threads + 10); + wtperf->workers_cnt + opts->populate_threads + 10)); } req_len += opts->in_memory ? strlen("in_memory=true") : 0; req_len += user_cconfig != NULL ? strlen(user_cconfig) : 0; @@ -2591,35 +2595,34 @@ main(int argc, char *argv[]) append_comma = ""; if (wtperf->async_config != NULL && strlen(wtperf->async_config) != 0) { - pos += (size_t)snprintf( - cc_buf + pos, req_len - pos, "%s%s", - append_comma, wtperf->async_config); + testutil_check(__wt_snprintf_len_incr( + cc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, wtperf->async_config)); append_comma = ","; } if (wtperf->compress_ext != NULL && strlen(wtperf->compress_ext) != 0) { - pos += (size_t)snprintf( - cc_buf + pos, req_len - pos, "%s%s", - append_comma, wtperf->compress_ext); + testutil_check(__wt_snprintf_len_incr( + cc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, wtperf->compress_ext)); append_comma = ","; } if (opts->in_memory) { - pos += (size_t)snprintf( - cc_buf + pos, req_len - pos, "%s%s", - append_comma, "in_memory=true"); + testutil_check(__wt_snprintf_len_incr( + cc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, "in_memory=true")); append_comma = ","; } if (sess_cfg != NULL && strlen(sess_cfg) != 0) { - pos += (size_t)snprintf( - cc_buf + pos, req_len - pos, "%s%s", - append_comma, sess_cfg); + testutil_check(__wt_snprintf_len_incr( + cc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, sess_cfg)); append_comma = ","; } if (user_cconfig != NULL && strlen(user_cconfig) != 0) { - pos += (size_t)snprintf( - cc_buf + pos, req_len - pos, "%s%s", - append_comma, user_cconfig); - append_comma = ","; + testutil_check(__wt_snprintf_len_incr( + cc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, user_cconfig)); } if (strlen(cc_buf) != 0 && (ret = @@ -2639,22 +2642,21 @@ main(int argc, char *argv[]) append_comma = ""; if (wtperf->compress_table != NULL && strlen(wtperf->compress_table) != 0) { - pos += (size_t)snprintf( - tc_buf + pos, req_len - pos, "%s%s", - append_comma, wtperf->compress_table); + testutil_check(__wt_snprintf_len_incr( + tc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, wtperf->compress_table)); append_comma = ","; } if (opts->index) { - pos += (size_t)snprintf( - tc_buf + pos, req_len - pos, "%s%s", - append_comma, INDEX_COL_NAMES); + testutil_check(__wt_snprintf_len_incr( + tc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, INDEX_COL_NAMES)); append_comma = ","; } if (user_tconfig != NULL && strlen(user_tconfig) != 0) { - pos += (size_t)snprintf( - tc_buf + pos, req_len - pos, "%s%s", - append_comma, user_tconfig); - append_comma = ","; + testutil_check(__wt_snprintf_len_incr( + tc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, user_tconfig)); } if (strlen(tc_buf) != 0 && (ret = @@ -2665,8 +2667,9 @@ main(int argc, char *argv[]) req_len = strlen(opts->table_config) + strlen(LOG_PARTIAL_CONFIG) + 1; wtperf->partial_config = dmalloc(req_len); - snprintf(wtperf->partial_config, req_len, "%s%s", - opts->table_config, LOG_PARTIAL_CONFIG); + testutil_check(__wt_snprintf( + wtperf->partial_config, req_len, "%s%s", + opts->table_config, LOG_PARTIAL_CONFIG)); } /* * Set the config for reopen. If readonly add in that string. @@ -2679,11 +2682,12 @@ main(int argc, char *argv[]) req_len = strlen(opts->conn_config) + 1; wtperf->reopen_config = dmalloc(req_len); if (opts->readonly) - snprintf(wtperf->reopen_config, req_len, "%s%s", - opts->conn_config, READONLY_CONFIG); + testutil_check(__wt_snprintf( + wtperf->reopen_config, req_len, "%s%s", + opts->conn_config, READONLY_CONFIG)); else - snprintf(wtperf->reopen_config, - req_len, "%s", opts->conn_config); + testutil_check(__wt_snprintf( + wtperf->reopen_config, req_len, "%s", opts->conn_config)); /* Sanity-check the configuration. */ if ((ret = config_sanity(wtperf)) != 0) @@ -2696,7 +2700,8 @@ main(int argc, char *argv[]) /* Write a copy of the config. */ req_len = strlen(wtperf->home) + strlen("/CONFIG.wtperf") + 1; path = dmalloc(req_len); - snprintf(path, req_len, "%s/CONFIG.wtperf", wtperf->home); + testutil_check(__wt_snprintf( + path, req_len, "%s/CONFIG.wtperf", wtperf->home)); config_opt_log(opts, path); free(path); @@ -2821,7 +2826,8 @@ recreate_dir(const char *name) len = strlen(name) * 2 + 100; buf = dmalloc(len); - (void)snprintf(buf, len, "rm -rf %s && mkdir %s", name, name); + testutil_check(__wt_snprintf( + buf, len, "rm -rf %s && mkdir %s", name, name)); testutil_checkfmt(system(buf), "system: %s", buf); free(buf); } diff --git a/dist/api_data.py b/dist/api_data.py index 1d669fa7fe0..22600dd5e29 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -295,12 +295,12 @@ file_config = format_meta + file_runtime_config + [ Config('split_deepen_per_child', '0', r''' entries allocated per child when deepening the tree''', type='int', undoc=True), - Config('split_pct', '75', r''' + Config('split_pct', '90', r''' the Btree page split size as a percentage of the maximum Btree page size, that is, when a Btree page is split, it will be split into smaller pages, where each page is the specified percentage of the maximum Btree page size''', - min='25', max='100'), + min='50', max='100'), ] # File metadata, including both configurable and non-configurable (internal) diff --git a/dist/filelist b/dist/filelist index 3886035eaa9..5a3348b940a 100644 --- a/dist/filelist +++ b/dist/filelist @@ -133,6 +133,7 @@ src/os_posix/os_path.c POSIX_HOST src/os_posix/os_priv.c POSIX_HOST src/os_posix/os_setvbuf.c POSIX_HOST src/os_posix/os_sleep.c POSIX_HOST +src/os_posix/os_snprintf.c POSIX_HOST src/os_posix/os_thread.c POSIX_HOST src/os_posix/os_time.c POSIX_HOST src/os_posix/os_yield.c POSIX_HOST @@ -152,7 +153,6 @@ src/os_win/os_snprintf.c WINDOWS_HOST src/os_win/os_thread.c WINDOWS_HOST src/os_win/os_time.c WINDOWS_HOST src/os_win/os_utf8.c WINDOWS_HOST -src/os_win/os_vsnprintf.c WINDOWS_HOST src/os_win/os_winerr.c WINDOWS_HOST src/os_win/os_yield.c WINDOWS_HOST src/packing/pack_api.c diff --git a/dist/flags.py b/dist/flags.py index b20a7181532..64b5d789e72 100644 --- a/dist/flags.py +++ b/dist/flags.py @@ -96,19 +96,19 @@ flags = { 'CONN_CACHE_POOL', 'CONN_CKPT_SYNC', 'CONN_CLOSING', + 'CONN_CLOSING_NO_MORE_OPENS', 'CONN_EVICTION_RUN', 'CONN_IN_MEMORY', 'CONN_LAS_OPEN', 'CONN_LEAK_MEMORY', - 'CONN_LOG_SERVER_RUN', 'CONN_LSM_MERGE', 'CONN_PANIC', 'CONN_READONLY', 'CONN_RECOVERING', 'CONN_SERVER_ASYNC', 'CONN_SERVER_CHECKPOINT', + 'CONN_SERVER_LOG', 'CONN_SERVER_LSM', - 'CONN_SERVER_RUN', 'CONN_SERVER_STATISTICS', 'CONN_SERVER_SWEEP', 'CONN_WAS_BACKUP', diff --git a/dist/s_style b/dist/s_style index 8e755224ee2..388a481ef56 100755 --- a/dist/s_style +++ b/dist/s_style @@ -93,6 +93,14 @@ else cat $t fi + if ! expr "$f" : 'examples/c/*' > /dev/null && + ! expr "$f" : 'ext/*' > /dev/null && + ! expr "$f" : 'src/os_posix/os_snprintf.c' > /dev/null && + egrep '[^a-z_]snprintf\(|[^a-z_]vsnprintf\(' $f > $t; then + echo "$f: snprintf call, use WiredTiger library replacements" + cat $t + fi + # Alignment directive before "struct". egrep 'WT_COMPILER_TYPE_ALIGN.*struct' $f > $t test -s $t && { diff --git a/dist/stat_data.py b/dist/stat_data.py index a4d92345f88..ac79ffd029a 100644 --- a/dist/stat_data.py +++ b/dist/stat_data.py @@ -206,6 +206,7 @@ connection_stats = [ CacheStat('cache_eviction_force', 'pages evicted because they exceeded the in-memory maximum'), CacheStat('cache_eviction_force_delete', 'pages evicted because they had chains of deleted items'), CacheStat('cache_eviction_force_fail', 'failed eviction of pages that exceeded the in-memory maximum'), + CacheStat('cache_eviction_force_retune', 'force re-tuning of eviction workers once in a while'), CacheStat('cache_eviction_get_ref', 'eviction calls to get a page'), CacheStat('cache_eviction_get_ref_empty', 'eviction calls to get a page found queue empty'), CacheStat('cache_eviction_get_ref_empty2', 'eviction calls to get a page found queue empty after locking'), @@ -323,10 +324,12 @@ connection_stats = [ LogStat('log_scan_records', 'records processed by log scan'), LogStat('log_scan_rereads', 'log scan records requiring two reads'), LogStat('log_scans', 'log scan operations'), + LogStat('log_slot_active_closed', 'consolidated slot join active slot closed'), LogStat('log_slot_closes', 'consolidated slot closures'), LogStat('log_slot_coalesced', 'written slots coalesced'), LogStat('log_slot_consolidated', 'logging bytes consolidated', 'size'), LogStat('log_slot_joins', 'consolidated slot joins'), + LogStat('log_slot_no_free_slots', 'consolidated slot transitions unable to find free slot'), LogStat('log_slot_races', 'consolidated slot join races'), LogStat('log_slot_switch_busy', 'busy returns attempting to switch slots'), LogStat('log_slot_transitions', 'consolidated slot join transitions'), diff --git a/examples/c/ex_async.c b/examples/c/ex_async.c index f7531a5c3d8..5cfafca0418 100644 --- a/examples/c/ex_async.c +++ b/examples/c/ex_async.c @@ -170,12 +170,12 @@ main(void) * an asynchronous insert. */ /*! [async set the operation's string key] */ - snprintf(k[i], sizeof(k), "key%d", i); + (void)snprintf(k[i], sizeof(k), "key%d", i); op->set_key(op, k[i]); /*! [async set the operation's string key] */ /*! [async set the operation's string value] */ - snprintf(v[i], sizeof(v), "value%d", i); + (void)snprintf(v[i], sizeof(v), "value%d", i); op->set_value(op, v[i]); /*! [async set the operation's string value] */ @@ -218,7 +218,7 @@ main(void) * Set the operation's string key and value, and then do * an asynchronous search. */ - snprintf(k[i], sizeof(k), "key%d", i); + (void)snprintf(k[i], sizeof(k), "key%d", i); op->set_key(op, k[i]); ret = op->search(op); /*! [async search] */ diff --git a/examples/c/ex_backup.c b/examples/c/ex_backup.c index 0697cbb3458..83cc9b22ecc 100644 --- a/examples/c/ex_backup.c +++ b/examples/c/ex_backup.c @@ -96,7 +96,7 @@ compare_backups(int i) if (i == 0) (void)strncpy(msg, "MAIN", sizeof(msg)); else - snprintf(msg, sizeof(msg), "%d", i); + (void)snprintf(msg, sizeof(msg), "%d", i); printf( "Iteration %s: Tables %s.%d and %s.%d %s\n", msg, full_out, i, incr_out, i, ret == 0 ? "identical" : "differ"); @@ -131,8 +131,8 @@ setup_directories(void) * For incremental backups we need 0-N. The 0 incremental * directory will compare with the original at the end. */ - snprintf(buf, sizeof(buf), "rm -rf %s.%d && mkdir %s.%d", - home_incr, i, home_incr, i); + (void)snprintf(buf, sizeof(buf), + "rm -rf %s.%d && mkdir %s.%d", home_incr, i, home_incr, i); if ((ret = system(buf)) != 0) { fprintf(stderr, "%s: failed ret %d\n", buf, ret); return (ret); @@ -142,8 +142,8 @@ setup_directories(void) /* * For full backups we need 1-N. */ - snprintf(buf, sizeof(buf), "rm -rf %s.%d && mkdir %s.%d", - home_full, i, home_full, i); + (void)snprintf(buf, sizeof(buf), + "rm -rf %s.%d && mkdir %s.%d", home_full, i, home_full, i); if ((ret = system(buf)) != 0) { fprintf(stderr, "%s: failed ret %d\n", buf, ret); return (ret); @@ -164,8 +164,8 @@ add_work(WT_SESSION *session, int iter) * Perform some operations with individual auto-commit transactions. */ for (i = 0; i < MAX_KEYS; i++) { - snprintf(k, sizeof(k), "key.%d.%d", iter, i); - snprintf(v, sizeof(v), "value.%d.%d", iter, i); + (void)snprintf(k, sizeof(k), "key.%d.%d", iter, i); + (void)snprintf(v, sizeof(v), "value.%d.%d", iter, i); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); @@ -187,7 +187,7 @@ take_full_backup(WT_SESSION *session, int i) * directories. Otherwise only into the appropriate full directory. */ if (i != 0) { - snprintf(h, sizeof(h), "%s.%d", home_full, i); + (void)snprintf(h, sizeof(h), "%s.%d", home_full, i); hdir = h; } else hdir = home_incr; @@ -200,14 +200,15 @@ take_full_backup(WT_SESSION *session, int i) * Take a full backup into each incremental directory. */ for (j = 0; j < MAX_ITERATIONS; j++) { - snprintf(h, sizeof(h), "%s.%d", home_incr, j); + (void)snprintf(h, sizeof(h), + "%s.%d", home_incr, j); (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, h, filename); ret = system(buf); } else { - snprintf(h, sizeof(h), "%s.%d", home_full, i); + (void)snprintf(h, sizeof(h), "%s.%d", home_full, i); (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, hdir, filename); ret = system(buf); @@ -237,12 +238,12 @@ take_incr_backup(WT_SESSION *session, int i) * Copy into the 0 incremental directory and then each of the * incremental directories for this iteration and later. */ - snprintf(h, sizeof(h), "%s.0", home_incr); + (void)snprintf(h, sizeof(h), "%s.0", home_incr); (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, h, filename); ret = system(buf); for (j = i; j < MAX_ITERATIONS; j++) { - snprintf(h, sizeof(h), "%s.%d", home_incr, j); + (void)snprintf(h, sizeof(h), "%s.%d", home_incr, j); (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, h, filename); ret = system(buf); @@ -270,7 +271,8 @@ main(void) int i, ret; char cmd_buf[256]; - snprintf(cmd_buf, sizeof(cmd_buf), "rm -rf %s && mkdir %s", home, home); + (void)snprintf(cmd_buf, sizeof(cmd_buf), + "rm -rf %s && mkdir %s", home, home); if ((ret = system(cmd_buf)) != 0) { fprintf(stderr, "%s: failed ret %d\n", cmd_buf, ret); return (EXIT_FAILURE); diff --git a/examples/c/ex_encrypt.c b/examples/c/ex_encrypt.c index 00dc66fc24d..1520bd286cd 100644 --- a/examples/c/ex_encrypt.c +++ b/examples/c/ex_encrypt.c @@ -507,12 +507,12 @@ main(void) * we decrypt on read. */ for (i = 0; i < MAX_KEYS; i++) { - snprintf(keybuf, sizeof(keybuf), "key%d", i); + (void)snprintf(keybuf, sizeof(keybuf), "key%d", i); c1->set_key(c1, keybuf); c2->set_key(c2, keybuf); nc->set_key(nc, keybuf); - snprintf(valbuf, sizeof(valbuf), "value%d", i); + (void)snprintf(valbuf, sizeof(valbuf), "value%d", i); c1->set_value(c1, valbuf); c2->set_value(c2, valbuf); nc->set_value(nc, valbuf); diff --git a/examples/c/ex_log.c b/examples/c/ex_log.c index fdbc39412ae..0d8fbf97233 100644 --- a/examples/c/ex_log.c +++ b/examples/c/ex_log.c @@ -291,8 +291,8 @@ main(void) char cmd_buf[256], k[16], v[16]; count_min = 0; - snprintf(cmd_buf, sizeof(cmd_buf), "rm -rf %s %s && mkdir %s %s", - home1, home2, home1, home2); + (void)snprintf(cmd_buf, sizeof(cmd_buf), + "rm -rf %s %s && mkdir %s %s", home1, home2, home1, home2); if ((ret = system(cmd_buf)) != 0) { fprintf(stderr, "%s: failed ret %d\n", cmd_buf, ret); return (EXIT_FAILURE); @@ -312,8 +312,8 @@ main(void) * Perform some operations with individual auto-commit transactions. */ for (record_count = 0, i = 0; i < MAX_KEYS; i++, record_count++) { - snprintf(k, sizeof(k), "key%d", i); - snprintf(v, sizeof(v), "value%d", i); + (void)snprintf(k, sizeof(k), "key%d", i); + (void)snprintf(v, sizeof(v), "value%d", i); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); @@ -324,8 +324,8 @@ main(void) * Perform some operations within a single transaction. */ for (i = MAX_KEYS; i < MAX_KEYS+5; i++, record_count++) { - snprintf(k, sizeof(k), "key%d", i); - snprintf(v, sizeof(v), "value%d", i); + (void)snprintf(k, sizeof(k), "key%d", i); + (void)snprintf(v, sizeof(v), "value%d", i); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); diff --git a/examples/c/ex_sync.c b/examples/c/ex_sync.c index 2c610b1e570..b2d74b52f7f 100644 --- a/examples/c/ex_sync.c +++ b/examples/c/ex_sync.c @@ -59,8 +59,8 @@ main(void) char cmd_buf[256], k[16], v[16]; const char *conf; - snprintf(cmd_buf, sizeof(cmd_buf), "rm -rf %s && mkdir %s", - home, home); + (void)snprintf(cmd_buf, sizeof(cmd_buf), + "rm -rf %s && mkdir %s", home, home); if ((ret = system(cmd_buf)) != 0) { fprintf(stderr, "%s: failed ret %d\n", cmd_buf, ret); return (EXIT_FAILURE); @@ -98,8 +98,8 @@ main(void) ret = session->commit_transaction(session, conf); ret = session->begin_transaction(session, NULL); } - snprintf(k, sizeof(k), "key%d", i); - snprintf(v, sizeof(v), "value%d", i); + (void)snprintf(k, sizeof(k), "key%d", i); + (void)snprintf(v, sizeof(v), "value%d", i); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); @@ -113,8 +113,8 @@ main(void) * Perform some operations within a single transaction. */ for (i = MAX_KEYS; i < MAX_KEYS+5; i++, record_count++) { - snprintf(k, sizeof(k), "key%d", i); - snprintf(v, sizeof(v), "value%d", i); + (void)snprintf(k, sizeof(k), "key%d", i); + (void)snprintf(v, sizeof(v), "value%d", i); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); @@ -129,8 +129,8 @@ main(void) * Demonstrate using log_flush to force the log to disk. */ for (i = 0; i < MAX_KEYS; i++, record_count++) { - snprintf(k, sizeof(k), "key%d", record_count); - snprintf(v, sizeof(v), "value%d", record_count); + (void)snprintf(k, sizeof(k), "key%d", record_count); + (void)snprintf(v, sizeof(v), "value%d", record_count); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); @@ -138,8 +138,8 @@ main(void) ret = session->log_flush(session, "sync=on"); for (i = 0; i < MAX_KEYS; i++, record_count++) { - snprintf(k, sizeof(k), "key%d", record_count); - snprintf(v, sizeof(v), "value%d", record_count); + (void)snprintf(k, sizeof(k), "key%d", record_count); + (void)snprintf(v, sizeof(v), "value%d", record_count); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); diff --git a/lang/java/Makefile.am b/lang/java/Makefile.am index 7184fe610dc..2ff822a5d08 100644 --- a/lang/java/Makefile.am +++ b/lang/java/Makefile.am @@ -49,6 +49,7 @@ JAVA_JUNIT = \ $(JAVATEST)/ConcurrentCloseTest.java \ $(JAVATEST)/CursorTest.java \ $(JAVATEST)/CursorTest02.java \ + $(JAVATEST)/CursorTest03.java \ $(JAVATEST)/ExceptionTest.java \ $(JAVATEST)/PackTest.java \ $(JAVATEST)/PackTest02.java \ diff --git a/lang/java/wiredtiger.i b/lang/java/wiredtiger.i index efc512f2f5a..275b708090c 100644 --- a/lang/java/wiredtiger.i +++ b/lang/java/wiredtiger.i @@ -319,6 +319,15 @@ WT_CLASS(struct __wt_async_op, WT_ASYNC_OP, op) %rename (getValueFormat) __wt_async_op::getValue_format; %rename (getType) __wt_async_op::get_type; +/* + * Special cases: override the out typemap, return checking is done in the + * wrapper. + */ +%typemap(out) int __wt_cursor::compare_wrap, + int __wt_cursor::equals_wrap %{ + $result = $1; +%} + /* SWIG magic to turn Java byte strings into data / size. */ %apply (char *STRING, int LENGTH) { (char *data, int size) }; @@ -529,7 +538,6 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; %} %extend __wt_async_op { - %javamethodmodifiers get_key_wrap "protected"; WT_ITEM get_key_wrap(JNIEnv *jenv) { WT_ITEM k; diff --git a/src/block/block_ext.c b/src/block/block_ext.c index e9357d73d1d..da7a06d873d 100644 --- a/src/block/block_ext.c +++ b/src/block/block_ext.c @@ -1378,8 +1378,8 @@ __wt_block_extlist_init(WT_SESSION_IMPL *session, size = (name == NULL ? 0 : strlen(name)) + strlen(".") + (extname == NULL ? 0 : strlen(extname) + 1); WT_RET(__wt_calloc_def(session, size, &el->name)); - (void)snprintf(el->name, size, "%s.%s", - name == NULL ? "" : name, extname == NULL ? "" : extname); + WT_RET(__wt_snprintf(el->name, size, "%s.%s", + name == NULL ? "" : name, extname == NULL ? "" : extname)); el->offset = WT_BLOCK_INVALID_OFFSET; el->track_size = track_size; diff --git a/src/bloom/bloom.c b/src/bloom/bloom.c index be3230437d3..b8d75678835 100644 --- a/src/bloom/bloom.c +++ b/src/bloom/bloom.c @@ -37,8 +37,8 @@ __bloom_init(WT_SESSION_IMPL *session, len += strlen(config); WT_ERR(__wt_calloc_def(session, len, &bloom->config)); /* Add the standard config at the end, so it overrides user settings. */ - (void)snprintf(bloom->config, len, - "%s,%s", config == NULL ? "" : config, WT_BLOOM_TABLE_CONFIG); + WT_ERR(__wt_snprintf(bloom->config, len, + "%s,%s", config == NULL ? "" : config, WT_BLOOM_TABLE_CONFIG)); bloom->session = session; diff --git a/src/btree/bt_curnext.c b/src/btree/bt_curnext.c index ba5fceae7c7..21e575ffca9 100644 --- a/src/btree/bt_curnext.c +++ b/src/btree/bt_curnext.c @@ -579,20 +579,20 @@ __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt) int __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) { + WT_CURSOR *cursor; WT_DECL_RET; WT_PAGE *page; WT_SESSION_IMPL *session; uint32_t flags; bool newpage; + cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cbt->iface.session; WT_STAT_CONN_INCR(session, cursor_next); WT_STAT_DATA_INCR(session, cursor_next); - flags = WT_READ_SKIP_INTL; /* Tree walk flags. */ - if (truncating) - LF_SET(WT_READ_TRUNCATE); + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); WT_RET(__cursor_func_init(cbt, false)); @@ -608,6 +608,9 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) * found. Then, move to the next page, until we reach the end of the * file. */ + flags = WT_READ_SKIP_INTL; /* tree walk flags */ + if (truncating) + LF_SET(WT_READ_TRUNCATE); for (newpage = false;; newpage = true) { page = cbt->ref == NULL ? NULL : cbt->ref->page; @@ -676,6 +679,8 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) if (ret == 0) WT_ERR(__wt_cursor_key_order_check(session, cbt, true)); #endif + if (ret == 0) + F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); err: if (ret != 0) WT_TRET(__cursor_reset(cbt)); diff --git a/src/btree/bt_curprev.c b/src/btree/bt_curprev.c index 602c01b60eb..bf4bdad6529 100644 --- a/src/btree/bt_curprev.c +++ b/src/btree/bt_curprev.c @@ -535,20 +535,20 @@ new_insert: if ((ins = cbt->ins) != NULL) { int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) { + WT_CURSOR *cursor; WT_DECL_RET; WT_PAGE *page; WT_SESSION_IMPL *session; uint32_t flags; bool newpage; + cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cbt->iface.session; WT_STAT_CONN_INCR(session, cursor_prev); WT_STAT_DATA_INCR(session, cursor_prev); - flags = WT_READ_PREV | WT_READ_SKIP_INTL; /* Tree walk flags. */ - if (truncating) - LF_SET(WT_READ_TRUNCATE); + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); WT_RET(__cursor_func_init(cbt, false)); @@ -564,6 +564,9 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) * found. Then, move to the previous page, until we reach the start * of the file. */ + flags = WT_READ_PREV | WT_READ_SKIP_INTL; /* tree walk flags */ + if (truncating) + LF_SET(WT_READ_TRUNCATE); for (newpage = false;; newpage = true) { page = cbt->ref == NULL ? NULL : cbt->ref->page; @@ -631,6 +634,8 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) if (ret == 0) WT_ERR(__wt_cursor_key_order_check(session, cbt, false)); #endif + if (ret == 0) + F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); err: if (ret != 0) WT_TRET(__cursor_reset(cbt)); diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index 48ae1ad6d76..944e276fc01 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -9,32 +9,46 @@ #include "wt_internal.h" /* - * WT_CURFILE_OP_XXX - * If we're going to return an error, we need to restore the cursor to - * a valid state, the upper-level cursor code is likely to retry. The macros - * here are called to save and restore that state. + * When returning an error, we need to restore the cursor to a valid state, the + * upper-level cursor code is likely to retry. This structure and the associated + * functions are used save and restore the cursor state. */ -#define WT_CURFILE_OP_DECL \ - WT_ITEM __key_copy; \ - WT_ITEM __value_copy; \ - uint64_t __recno; \ - uint32_t __flags -#define WT_CURFILE_OP_PUSH do { \ - WT_ITEM_SET(__key_copy, cursor->key); \ - WT_ITEM_SET(__value_copy, cursor->value); \ - __recno = cursor->recno; \ - __flags = cursor->flags; \ -} while (0) -#define WT_CURFILE_OP_POP do { \ - cursor->recno = __recno; \ - if (FLD_ISSET(__flags, WT_CURSTD_KEY_EXT)) \ - WT_ITEM_SET(cursor->key, __key_copy); \ - if (FLD_ISSET(__flags, WT_CURSTD_VALUE_EXT)) \ - WT_ITEM_SET(cursor->value, __value_copy); \ - F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); \ - F_SET(cursor, \ - FLD_MASK(__flags, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT));\ -} while (0) +typedef struct { + WT_ITEM key; + WT_ITEM value; + uint64_t recno; + uint32_t flags; +} WT_CURFILE_STATE; + +/* + * __cursor_state_save -- + * Save the cursor's external state. + */ +static inline void +__cursor_state_save(WT_CURSOR *cursor, WT_CURFILE_STATE *state) +{ + WT_ITEM_SET(state->key, cursor->key); + WT_ITEM_SET(state->value, cursor->value); + state->recno = cursor->recno; + state->flags = cursor->flags; +} + +/* + * __cursor_state_restore -- + * Restore the cursor's external state. + */ +static inline void +__cursor_state_restore(WT_CURSOR *cursor, WT_CURFILE_STATE *state) +{ + if (F_ISSET(state, WT_CURSTD_KEY_EXT)) + WT_ITEM_SET(cursor->key, state->key); + if (F_ISSET(state, WT_CURSTD_VALUE_EXT)) + WT_ITEM_SET(cursor->value, state->value); + cursor->recno = state->recno; + F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); + F_SET(cursor, F_MASK(state, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT)); + +} /* * __cursor_page_pinned -- @@ -377,13 +391,17 @@ __cursor_row_modify( int __wt_btcur_reset(WT_CURSOR_BTREE *cbt) { + WT_CURSOR *cursor; WT_SESSION_IMPL *session; + cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cbt->iface.session; WT_STAT_CONN_INCR(session, cursor_reset); WT_STAT_DATA_INCR(session, cursor_reset); + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + return (__cursor_reset(cbt)); } @@ -395,6 +413,7 @@ int __wt_btcur_search(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; + WT_CURFILE_STATE state; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; @@ -409,6 +428,15 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_STAT_CONN_INCR(session, cursor_search); WT_STAT_DATA_INCR(session, cursor_search); + __cursor_state_save(cursor, &state); + + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key, then re-save the cursor state. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + /* * If we have a page pinned, search it; if we don't have a page pinned, * or the search of the pinned page doesn't find an exact match, search @@ -443,6 +471,8 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) cbt->v = 0; cursor->value.data = &cbt->v; cursor->value.size = 1; + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); } else ret = WT_NOTFOUND; @@ -451,8 +481,10 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_ERR(__wt_cursor_key_order_init(session, cbt)); #endif -err: if (ret != 0) +err: if (ret != 0) { WT_TRET(__cursor_reset(cbt)); + __cursor_state_restore(cursor, &state); + } return (ret); } @@ -464,6 +496,7 @@ int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) { WT_BTREE *btree; + WT_CURFILE_STATE state; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; @@ -480,6 +513,15 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_STAT_CONN_INCR(session, cursor_search_near); WT_STAT_DATA_INCR(session, cursor_search_near); + __cursor_state_save(cursor, &state); + + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key, then re-save the cursor state. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + /* * If we have a row-store page pinned, search it; if we don't have a * page pinned, or the search of the pinned page doesn't find an exact @@ -544,6 +586,8 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) cursor->value.data = &cbt->v; cursor->value.size = 1; exact = 0; + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); } else if ((ret = __wt_btcur_next(cbt, false)) != WT_NOTFOUND) exact = 1; else { @@ -558,15 +602,18 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) exact = -1; } +err: if (ret == 0 && exactp != NULL) + *exactp = exact; + #ifdef HAVE_DIAGNOSTIC if (ret == 0) - WT_ERR(__wt_cursor_key_order_init(session, cbt)); + WT_TRET(__wt_cursor_key_order_init(session, cbt)); #endif -err: if (ret != 0) + if (ret != 0) { WT_TRET(__cursor_reset(cbt)); - if (exactp != NULL && (ret == 0 || ret == WT_NOTFOUND)) - *exactp = exact; + __cursor_state_restore(cursor, &state); + } return (ret); } @@ -578,9 +625,11 @@ int __wt_btcur_insert(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; + WT_CURFILE_STATE state; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; + bool append_key; btree = cbt->btree; cursor = &cbt->iface; @@ -591,6 +640,8 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) WT_STAT_DATA_INCRV(session, cursor_insert_bytes, cursor->key.size + cursor->value.size); + __cursor_state_save(cursor, &state); + if (btree->type == BTREE_ROW) WT_RET(__cursor_size_chk(session, &cursor->key)); WT_RET(__cursor_size_chk(session, &cursor->value)); @@ -598,7 +649,58 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) /* It's no longer possible to bulk-load into the tree. */ __cursor_disable_bulk(session, btree); -retry: WT_RET(__cursor_func_init(cbt, true)); + /* + * Insert a new record if WT_CURSTD_APPEND configured, (ignoring any + * application set record number). Although append can't be configured + * for a row-store, this code would break if it were, and that's owned + * by the upper cursor layer, be cautious. + */ + append_key = + F_ISSET(cursor, WT_CURSTD_APPEND) && btree->type != BTREE_ROW; + + /* + * If inserting with overwrite configured, and positioned to an on-page + * key, the update doesn't require another search. The cursor won't be + * positioned on a page with an external key set, but be sure. Cursors + * configured for append aren't included, regardless of whether or not + * they meet all other criteria. + */ + if (__cursor_page_pinned(cbt) && + F_ISSET_ALL(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_OVERWRITE) && + !append_key) { + WT_ERR(__wt_txn_autocommit_check(session)); + /* + * The cursor position may not be exact (the cursor's comparison + * value not equal to zero). Correct to an exact match so we can + * update whatever we're pointing at. + */ + cbt->compare = 0; + ret = btree->type == BTREE_ROW ? + __cursor_row_modify(session, cbt, false) : + __cursor_col_modify(session, cbt, false); + if (ret == 0) + goto done; + + /* + * The pinned page goes away if we fail for any reason, make + * sure there's a local copy of any key. (Restart could still + * use the pinned page, but that's an unlikely path.) Re-save + * the cursor state: we may retry but eventually fail. + */ + WT_TRET(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + goto err; + } + + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key. Re-save the cursor state: we may retry but + * eventually fail. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + +retry: WT_ERR(__cursor_func_init(cbt, true)); if (btree->type == BTREE_ROW) { WT_ERR(__cursor_row_search(session, cbt, NULL, true)); @@ -613,11 +715,11 @@ retry: WT_RET(__cursor_func_init(cbt, true)); ret = __cursor_row_modify(session, cbt, false); } else { /* - * If WT_CURSTD_APPEND is set, insert a new record (ignoring - * the application's record number). The real record number - * is assigned by the serialized append operation. + * Optionally insert a new record (ignoring the application's + * record number). The real record number is allocated by the + * serialized append operation. */ - if (F_ISSET(cursor, WT_CURSTD_APPEND)) + if (append_key) cbt->iface.recno = WT_RECNO_OOB; WT_ERR(__cursor_col_search(session, cbt, NULL)); @@ -634,7 +736,8 @@ retry: WT_RET(__cursor_func_init(cbt, true)); WT_ERR(WT_DUPLICATE_KEY); WT_ERR(__cursor_col_modify(session, cbt, false)); - if (F_ISSET(cursor, WT_CURSTD_APPEND)) + + if (append_key) cbt->iface.recno = cbt->recno; } @@ -644,8 +747,16 @@ err: if (ret == WT_RESTART) { goto retry; } - /* Insert doesn't maintain a position across calls, clear resources. */ +done: /* Insert doesn't maintain a position across calls, clear resources. */ + if (ret == 0) { + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + if (append_key) + F_SET(cursor, WT_CURSTD_KEY_INT); + } WT_TRET(__cursor_reset(cbt)); + if (ret != 0) + __cursor_state_restore(cursor, &state); + return (ret); } @@ -681,16 +792,15 @@ __curfile_update_check(WT_CURSOR_BTREE *cbt) } /* - * __wt_btcur_update_check -- + * __wt_btcur_insert_check -- * Check whether an update would conflict. * - * This can be used to replace WT_CURSOR::insert or WT_CURSOR::update, so - * they only check for conflicts without updating the tree. It is used to - * maintain snapshot isolation for transactions that span multiple chunks - * in an LSM tree. + * This can replace WT_CURSOR::insert, so it only checks for conflicts without + * updating the tree. It is used to maintain snapshot isolation for transactions + * that span multiple chunks in an LSM tree. */ int -__wt_btcur_update_check(WT_CURSOR_BTREE *cbt) +__wt_btcur_insert_check(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; WT_CURSOR *cursor; @@ -701,14 +811,20 @@ __wt_btcur_update_check(WT_CURSOR_BTREE *cbt) btree = cbt->btree; session = (WT_SESSION_IMPL *)cursor->session; -retry: WT_RET(__cursor_func_init(cbt, true)); + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key. Unlike most of the btree cursor routines, + * we don't have to save/restore the cursor key state, none of the + * work done here changes the key state. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + +retry: WT_ERR(__cursor_func_init(cbt, true)); if (btree->type == BTREE_ROW) { WT_ERR(__cursor_row_search(session, cbt, NULL, true)); - /* - * Just check for conflicts. - */ + /* Just check for conflicts. */ ret = __curfile_update_check(cbt); } else WT_ERR(__wt_illegal_value(session, NULL)); @@ -720,7 +836,10 @@ err: if (ret == WT_RESTART) { } /* Insert doesn't maintain a position across calls, clear resources. */ + if (ret == 0) + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); WT_TRET(__cursor_reset(cbt)); + return (ret); } @@ -732,7 +851,7 @@ int __wt_btcur_remove(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; - WT_CURFILE_OP_DECL; + WT_CURFILE_STATE state; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; @@ -742,26 +861,27 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt) cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; - WT_CURFILE_OP_PUSH; - WT_STAT_CONN_INCR(session, cursor_remove); WT_STAT_DATA_INCR(session, cursor_remove); WT_STAT_DATA_INCRV(session, cursor_remove_bytes, cursor->key.size); + __cursor_state_save(cursor, &state); + /* * WT_CURSOR.remove has a unique semantic, the cursor stays positioned * if it starts positioned, otherwise clear the cursor on completion. */ positioned = F_ISSET(cursor, WT_CURSTD_KEY_INT); -retry: /* - * If removing with overwrite configured, and positioned to an on-page - * key, the update doesn't require another search. The cursor won't be - * positioned on a page with an external key set, but be sure. + * If remove positioned to an on-page key, the remove doesn't require + * another search. We don't care about the "overwrite" configuration + * because regardless of the overwrite setting, any existing record is + * removed, and the record must exist with a positioned cursor. The + * cursor won't be positioned on a page with an external key set, but + * be sure. */ - if (__cursor_page_pinned(cbt) && - F_ISSET_ALL(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_OVERWRITE)) { + if (__cursor_page_pinned(cbt) && F_ISSET(cursor, WT_CURSTD_KEY_INT)) { WT_ERR(__wt_txn_autocommit_check(session)); /* @@ -773,6 +893,8 @@ retry: ret = btree->type == BTREE_ROW ? __cursor_row_modify(session, cbt, true) : __cursor_col_modify(session, cbt, true); + if (ret == 0) + goto done; /* * The pinned page goes away if we fail for any reason, make @@ -780,12 +902,9 @@ retry: * use the pinned page, but that's an unlikely path.) Re-save * the cursor state: we may retry but eventually fail. */ - if (ret != 0) { - WT_TRET(__cursor_copy_int_key(cursor)); - WT_CURFILE_OP_PUSH; - goto err; - } - goto done; + WT_TRET(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + goto err; } /* @@ -794,9 +913,9 @@ retry: * eventually fail. */ WT_ERR(__cursor_copy_int_key(cursor)); - WT_CURFILE_OP_PUSH; + __cursor_state_save(cursor, &state); - WT_ERR(__cursor_func_init(cbt, true)); +retry: WT_ERR(__cursor_func_init(cbt, true)); if (btree->type == BTREE_ROW) { WT_ERR(__cursor_row_search(session, cbt, NULL, false)); @@ -857,14 +976,12 @@ done: /* */ if (ret == 0) F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); - if (ret == 0 && positioned) { + if (ret == 0 && positioned) WT_TRET(__wt_key_return(session, cbt)); - if (ret == 0) - F_SET(cursor, WT_CURSTD_KEY_INT); - } else + else WT_TRET(__cursor_reset(cbt)); if (ret != 0) - WT_CURFILE_OP_POP; + __cursor_state_restore(cursor, &state); return (ret); } @@ -877,6 +994,7 @@ int __wt_btcur_update(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; + WT_CURFILE_STATE state; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; @@ -889,6 +1007,8 @@ __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_STAT_DATA_INCR(session, cursor_update); WT_STAT_DATA_INCRV(session, cursor_update_bytes, cursor->value.size); + __cursor_state_save(cursor, &state); + if (btree->type == BTREE_ROW) WT_RET(__cursor_size_chk(session, &cursor->key)); WT_RET(__cursor_size_chk(session, &cursor->value)); @@ -896,7 +1016,48 @@ __wt_btcur_update(WT_CURSOR_BTREE *cbt) /* It's no longer possible to bulk-load into the tree. */ __cursor_disable_bulk(session, btree); -retry: WT_RET(__cursor_func_init(cbt, true)); + /* + * If update positioned to an on-page key, the update doesn't require + * another search. We don't care about the "overwrite" configuration + * because regardless of the overwrite setting, any existing record is + * updated, and the record must exist with a positioned cursor. The + * cursor won't be positioned on a page with an external key set, but + * be sure. + */ + if (__cursor_page_pinned(cbt) && F_ISSET(cursor, WT_CURSTD_KEY_INT)) { + WT_ERR(__wt_txn_autocommit_check(session)); + /* + * The cursor position may not be exact (the cursor's comparison + * value not equal to zero). Correct to an exact match so we can + * update whatever we're pointing at. + */ + cbt->compare = 0; + ret = btree->type == BTREE_ROW ? + __cursor_row_modify(session, cbt, false) : + __cursor_col_modify(session, cbt, false); + if (ret == 0) + goto done; + + /* + * The pinned page goes away if we fail for any reason, make + * sure there's a local copy of any key. (Restart could still + * use the pinned page, but that's an unlikely path.) Re-save + * the cursor state: we may retry but eventually fail. + */ + WT_TRET(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + goto err; + } + + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key. Re-save the cursor state: we may retry but + * eventually fail. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + +retry: WT_ERR(__cursor_func_init(cbt, true)); if (btree->type == BTREE_ROW) { WT_ERR(__cursor_row_search(session, cbt, NULL, true)); @@ -945,11 +1106,14 @@ err: if (ret == WT_RESTART) { * To make this work, we add a field to the btree cursor to pass back a * pointer to the modify function's allocated update structure. */ - if (ret == 0) +done: if (ret == 0) WT_TRET(__wt_kv_return(session, cbt, cbt->modify_update)); - if (ret != 0) + if (ret != 0) { WT_TRET(__cursor_reset(cbt)); + __cursor_state_restore(cursor, &state); + } + return (ret); } @@ -1097,14 +1261,6 @@ __cursor_truncate(WT_SESSION_IMPL *session, * and we can proceed without concern. */ retry: WT_RET(__wt_btcur_search(start)); - - /* - * XXX KEITH - * When the btree cursor code sets/clears the cursor flags (rather than - * the cursor layer), the set/clear goes away, only the assert remains. - */ - F_CLR((WT_CURSOR *)start, WT_CURSTD_KEY_SET); - F_SET((WT_CURSOR *)start, WT_CURSTD_KEY_INT); WT_ASSERT(session, F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); @@ -1161,14 +1317,6 @@ __cursor_truncate_fix(WT_SESSION_IMPL *session, * refresh the page's modification information. */ retry: WT_RET(__wt_btcur_search(start)); - - /* - * XXX KEITH - * When the btree cursor code sets/clears the cursor flags (rather than - * the cursor layer), the set/clear goes away, only the assert remains. - */ - F_CLR((WT_CURSOR *)start, WT_CURSTD_KEY_SET); - F_SET((WT_CURSOR *)start, WT_CURSTD_KEY_INT); WT_ASSERT(session, F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index 4989301468f..d3f02e29b90 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -64,7 +64,7 @@ __wt_debug_set_verbose(WT_SESSION_IMPL *session, const char *v) const char *cfg[2] = { NULL, NULL }; char buf[256]; - snprintf(buf, sizeof(buf), "verbose=[%s]", v); + WT_RET(__wt_snprintf(buf, sizeof(buf), "verbose=[%s]", v)); cfg[0] = buf; return (__wt_verbose_config(session, cfg)); } @@ -87,6 +87,7 @@ __debug_hex_byte(WT_DBG *ds, uint8_t v) static int __dmsg_event(WT_DBG *ds, const char *fmt, ...) { + WT_DECL_RET; WT_ITEM *msg; WT_SESSION_IMPL *session; size_t len, space; @@ -107,8 +108,9 @@ __dmsg_event(WT_DBG *ds, const char *fmt, ...) p = (char *)msg->mem + msg->size; space = msg->memsize - msg->size; va_start(ap, fmt); - len = (size_t)vsnprintf(p, space, fmt, ap); + ret = __wt_vsnprintf_len_set(p, space, &len, fmt, ap); va_end(ap); + WT_RET(ret); /* Check if there was enough space. */ if (len < space) { @@ -447,13 +449,14 @@ __debug_tree_shape_info(WT_PAGE *page) v = page->memory_footprint; if (v >= WT_GIGABYTE) - snprintf(buf, sizeof(buf), + (void)__wt_snprintf(buf, sizeof(buf), "(%p %" PRIu64 "G)", (void *)page, v / WT_GIGABYTE); else if (v >= WT_MEGABYTE) - snprintf(buf, sizeof(buf), + (void)__wt_snprintf(buf, sizeof(buf), "(%p %" PRIu64 "M)", (void *)page, v / WT_MEGABYTE); else - snprintf(buf, sizeof(buf), "(%p %" PRIu64 ")", (void *)page, v); + (void)__wt_snprintf(buf, sizeof(buf), + "(%p %" PRIu64 ")", (void *)page, v); return (buf); } @@ -838,7 +841,8 @@ __debug_page_col_var(WT_DBG *ds, WT_REF *ref) __wt_cell_unpack(cell, unpack); rle = __wt_cell_rle(unpack); } - snprintf(tag, sizeof(tag), "%" PRIu64 " %" PRIu64, recno, rle); + WT_RET(__wt_snprintf( + tag, sizeof(tag), "%" PRIu64 " %" PRIu64, recno, rle)); WT_RET( __debug_cell_data(ds, page, WT_PAGE_COL_VAR, tag, unpack)); diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c index d2beb84fee9..bab7b8145d6 100644 --- a/src/btree/bt_discard.c +++ b/src/btree/bt_discard.c @@ -16,13 +16,14 @@ static void __free_skip_array( WT_SESSION_IMPL *, WT_INSERT_HEAD **, uint32_t, bool); static void __free_skip_list(WT_SESSION_IMPL *, WT_INSERT *, bool); static void __free_update(WT_SESSION_IMPL *, WT_UPDATE **, uint32_t, bool); +static void __page_out_int(WT_SESSION_IMPL *, WT_PAGE **, bool); /* - * __wt_ref_out -- + * __wt_ref_out_int -- * Discard an in-memory page, freeing all memory associated with it. */ void -__wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref) +__wt_ref_out_int(WT_SESSION_IMPL *session, WT_REF *ref, bool rewrite) { /* * A version of the page-out function that allows us to make additional @@ -56,15 +57,25 @@ __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref) } #endif - __wt_page_out(session, &ref->page); + __page_out_int(session, &ref->page, rewrite); } /* - * __wt_page_out -- + * __wt_ref_out -- * Discard an in-memory page, freeing all memory associated with it. */ void -__wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) +__wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref) +{ + __wt_ref_out_int(session, ref, false); +} + +/* + * __page_out_int -- + * Discard an in-memory page, freeing all memory associated with it. + */ +static void +__page_out_int(WT_SESSION_IMPL *session, WT_PAGE **pagep, bool rewrite) { WT_PAGE *page; WT_PAGE_HEADER *dsk; @@ -103,7 +114,7 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) } /* Update the cache's information. */ - __wt_cache_page_evict(session, page); + __wt_cache_page_evict(session, page, rewrite); dsk = (WT_PAGE_HEADER *)page->dsk; if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC)) @@ -148,6 +159,16 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) } /* + * __wt_page_out -- + * Discard an in-memory page, freeing all memory associated with it. + */ +void +__wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) +{ + __page_out_int(session, pagep, false); +} + +/* * __free_page_modify -- * Discard the page's associated modification structures. */ diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index f2bffee06da..d76720b19ae 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -359,6 +359,14 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) } else F_CLR(btree, WT_BTREE_IGNORE_CACHE); + /* + * The metadata isn't blocked by in-memory cache limits because metadata + * "unroll" is performed by updates that are potentially blocked by the + * cache-full checks. + */ + if (WT_IS_METADATA(btree->dhandle)) + F_SET(btree, WT_BTREE_IGNORE_CACHE); + WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval)); if (cval.val) F_CLR(btree, WT_BTREE_NO_LOGGING); @@ -780,9 +788,16 @@ __btree_page_sizes(WT_SESSION_IMPL *session) * Get the split percentage (reconciliation splits pages into smaller * than the maximum page size chunks so we don't split every time a * new entry is added). Determine how large newly split pages will be. + * Set to the minimum, if the read value is less than that. */ WT_RET(__wt_config_gets(session, cfg, "split_pct", &cval)); - btree->split_pct = (int)cval.val; + if (cval.val < WT_BTREE_MIN_SPLIT_PCT) { + btree->split_pct = WT_BTREE_MIN_SPLIT_PCT; + WT_RET(__wt_msg(session, + "Re-setting split_pct for %s to the minimum allowed of " + "%d%%.", session->dhandle->name, WT_BTREE_MIN_SPLIT_PCT)); + } else + btree->split_pct = (int)cval.val; intl_split_size = __wt_split_page_size(btree, btree->maxintlpage); leaf_split_size = __wt_split_page_size(btree, btree->maxleafpage); diff --git a/src/btree/bt_random.c b/src/btree/bt_random.c index 25ede0a09ac..c5948ec4ab5 100644 --- a/src/btree/bt_random.c +++ b/src/btree/bt_random.c @@ -292,14 +292,16 @@ int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; + WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; WT_UPDATE *upd; wt_off_t size; uint64_t n, skip; - session = (WT_SESSION_IMPL *)cbt->iface.session; btree = cbt->btree; + cursor = &cbt->iface; + session = (WT_SESSION_IMPL *)cbt->iface.session; /* * Only supports row-store: applications can trivially select a random @@ -312,6 +314,8 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) WT_STAT_CONN_INCR(session, cursor_next); WT_STAT_DATA_INCR(session, cursor_next); + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + #ifdef HAVE_DIAGNOSTIC /* * Under some conditions we end up using the underlying cursor.next to @@ -320,7 +324,6 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) */ __wt_cursor_key_order_reset(cbt); #endif - /* * If we don't have a current position in the tree, or if retrieving * random values without sampling, pick a roughly random leaf page in diff --git a/src/btree/bt_ret.c b/src/btree/bt_ret.c index 9fc457e2297..f17fa1b85d1 100644 --- a/src/btree/bt_ret.c +++ b/src/btree/bt_ret.c @@ -142,8 +142,20 @@ __value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) int __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) { - WT_RET(__key_return(session, cbt)); + WT_CURSOR *cursor; + + cursor = &cbt->iface; + /* + * We may already have an internal key, in which case the cursor may + * not be set up to get another copy (for example, when we rely on a + * search-function result). + */ + F_CLR(cursor, WT_CURSTD_KEY_EXT); + if (!F_ISSET(cursor, WT_CURSTD_KEY_INT)) { + WT_RET(__key_return(session, cbt)); + F_SET(cursor, WT_CURSTD_KEY_INT); + } return (0); } @@ -154,8 +166,15 @@ __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) int __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) { + WT_CURSOR *cursor; + + cursor = &cbt->iface; + WT_RET(__wt_key_return(session, cbt)); + + F_CLR(cursor, WT_CURSTD_VALUE_EXT); WT_RET(__value_return(session, cbt, upd)); + F_SET(cursor, WT_CURSTD_VALUE_INT); return (0); } diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index b1bad760826..49043c8bab4 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -2274,7 +2274,7 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi) * reconciliation, do it now. */ __wt_page_modify_clear(session, page); - __wt_ref_out(session, ref); + __wt_ref_out_int(session, ref, true); /* Swap the new page into place. */ ref->page = new->page; diff --git a/src/config/config_api.c b/src/config/config_api.c index 9f70ba65e9b..c1299baaafe 100644 --- a/src/config/config_api.c +++ b/src/config/config_api.c @@ -278,8 +278,8 @@ __wt_configure_method(WT_SESSION_IMPL *session, entry->method = (*epp)->method; len = strlen((*epp)->base) + strlen(",") + strlen(config) + 1; WT_ERR(__wt_calloc_def(session, len, &p)); - snprintf(p, len, "%s,%s", (*epp)->base, config); entry->base = p; + WT_ERR(__wt_snprintf(p, len, "%s,%s", (*epp)->base, config)); /* * There may be a default value in the config argument passed in (for diff --git a/src/config/config_def.c b/src/config/config_def.c index b11a8d63fdb..f152fbacad4 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -294,7 +294,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_create[] = { { "source", "string", NULL, NULL, NULL, 0 }, { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, - { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, + { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 }, { "type", "string", NULL, NULL, NULL, 0 }, { "value_format", "format", __wt_struct_confchk, NULL, @@ -466,7 +466,7 @@ static const WT_CONFIG_CHECK confchk_file_config[] = { { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 }, { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, - { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, + { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 }, { "value_format", "format", __wt_struct_confchk, NULL, NULL, 0 }, @@ -530,7 +530,7 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = { { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 }, { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, - { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, + { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 }, { "value_format", "format", __wt_struct_confchk, NULL, NULL, 0 }, @@ -614,7 +614,7 @@ static const WT_CONFIG_CHECK confchk_lsm_meta[] = { { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 }, { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, - { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, + { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 }, { "value_format", "format", __wt_struct_confchk, NULL, NULL, 0 }, @@ -1119,7 +1119,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "chunk_size=10MB,merge_max=15,merge_min=0),memory_page_max=5MB," "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," "prefix_compression_min=4,source=,split_deepen_min_child=0," - "split_deepen_per_child=0,split_pct=75,type=file,value_format=u", + "split_deepen_per_child=0,split_pct=90,type=file,value_format=u", confchk_WT_SESSION_create, 42 }, { "WT_SESSION.drop", @@ -1213,7 +1213,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "leaf_value_max=0,log=(enabled=true),memory_page_max=5MB," "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," "prefix_compression_min=4,split_deepen_min_child=0," - "split_deepen_per_child=0,split_pct=75,value_format=u", + "split_deepen_per_child=0,split_pct=90,value_format=u", confchk_file_config, 35 }, { "file.meta", @@ -1228,7 +1228,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=0," "log=(enabled=true),memory_page_max=5MB,os_cache_dirty_max=0," "os_cache_max=0,prefix_compression=false,prefix_compression_min=4" - ",split_deepen_min_child=0,split_deepen_per_child=0,split_pct=75," + ",split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90," "value_format=u,version=(major=0,minor=0)", confchk_file_meta, 39 }, @@ -1253,7 +1253,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "merge_min=0),memory_page_max=5MB,old_chunks=," "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," "prefix_compression_min=4,split_deepen_min_child=0," - "split_deepen_per_child=0,split_pct=75,value_format=u", + "split_deepen_per_child=0,split_pct=90,value_format=u", confchk_lsm_meta, 39 }, { "table.meta", diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 124250a7a7d..68d45678965 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1662,8 +1662,8 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR_MSG(session, EINVAL, "Creating a new database is incompatible with " "read-only configuration"); - len = (size_t)snprintf(buf, sizeof(buf), - "%s\n%s\n", WT_WIREDTIGER, WIREDTIGER_VERSION_STRING); + WT_ERR(__wt_snprintf_len_set(buf, sizeof(buf), &len, + "%s\n%s\n", WT_WIREDTIGER, WIREDTIGER_VERSION_STRING)); WT_ERR(__wt_write(session, fh, (wt_off_t)0, len, buf)); WT_ERR(__wt_fsync(session, fh, true)); } else { @@ -2250,10 +2250,9 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, WT_ERR(__wt_scr_alloc(session, 0, &i3)); cfg[0] = WT_CONFIG_BASE(session, wiredtiger_open_all); cfg[1] = NULL; - WT_ERR_TEST(snprintf(version, sizeof(version), + WT_ERR(__wt_snprintf(version, sizeof(version), "version=(major=%d,minor=%d)", - WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR) >= - (int)sizeof(version), ENOMEM); + WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR)); __conn_config_append(cfg, version); /* Ignore the base_config file if config_base_set is false. */ diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index c5480897494..657cdebf7ee 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -314,7 +314,8 @@ __wt_conn_btree_open( F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) && !LF_ISSET(WT_DHANDLE_LOCK_ONLY)); - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_CLOSING)); + WT_ASSERT(session, + !F_ISSET(S2C(session), WT_CONN_CLOSING_NO_MORE_OPENS)); /* * If the handle is already open, it has to be closed so it can be diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index c6dd795389d..b8b5bd2a908 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -341,7 +341,7 @@ __wt_log_truncate_files( conn = S2C(session); if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) return (0); - if (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN) && + if (F_ISSET(conn, WT_CONN_SERVER_LOG) && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE)) WT_RET_MSG(session, EINVAL, "Attempt to archive manually while a server is running"); @@ -382,7 +382,7 @@ __log_file_server(void *arg) conn = S2C(session); log = conn->log; locked = false; - while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) { + while (F_ISSET(conn, WT_CONN_SERVER_LOG)) { /* * If there is a log file to close, make sure any outstanding * write operations have completed, then fsync and close it. @@ -708,7 +708,7 @@ __log_wrlsn_server(void *arg) log = conn->log; yield = 0; WT_INIT_LSN(&prev); - while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) { + while (F_ISSET(conn, WT_CONN_SERVER_LOG)) { /* * Write out any log record buffers if anything was done * since last time. Only call the function to walk the @@ -783,7 +783,7 @@ __log_server(void *arg) * takes to sync out an earlier file. */ did_work = true; - while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) { + while (F_ISSET(conn, WT_CONN_SERVER_LOG)) { /* * Slots depend on future activity. Force out buffered * writes in case we are idle. This cannot be part of the @@ -923,7 +923,7 @@ __wt_logmgr_open(WT_SESSION_IMPL *session) if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) return (0); - F_SET(conn, WT_CONN_LOG_SERVER_RUN); + F_SET(conn, WT_CONN_SERVER_LOG); /* * Start the log close thread. It is not configurable. @@ -995,7 +995,7 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session) conn = S2C(session); - F_CLR(conn, WT_CONN_LOG_SERVER_RUN); + F_CLR(conn, WT_CONN_SERVER_LOG); if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) { /* diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c index 5b20377d437..eb3c79422a0 100644 --- a/src/conn/conn_open.c +++ b/src/conn/conn_open.c @@ -21,12 +21,6 @@ __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]) session = conn->default_session; WT_ASSERT(session, session->iface.connection == &conn->iface); - /* - * Tell internal server threads to run: this must be set before opening - * any sessions. - */ - F_SET(conn, WT_CONN_SERVER_RUN); - /* WT_SESSION_IMPL array. */ WT_RET(__wt_calloc(session, conn->session_size, sizeof(WT_SESSION_IMPL), &conn->sessions)); @@ -100,6 +94,10 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) __wt_yield(); } + /* Shut down the subsystems, ensuring workers see the state change. */ + F_SET(conn, WT_CONN_CLOSING); + WT_FULL_BARRIER(); + /* * Clear any pending async operations and shut down the async worker * threads and system before closing LSM. @@ -113,10 +111,15 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) * btree handles, so take care in ordering shutdown to make sure they * exit before files are closed. */ - F_CLR(conn, WT_CONN_SERVER_RUN); WT_TRET(__wt_lsm_manager_destroy(session)); - F_SET(conn, WT_CONN_CLOSING); + /* + * Once the async and LSM threads exit, we shouldn't be opening any + * more files. + */ + F_SET(conn, WT_CONN_CLOSING_NO_MORE_OPENS); + WT_FULL_BARRIER(); + WT_TRET(__wt_checkpoint_server_destroy(session)); WT_TRET(__wt_statlog_destroy(session, true)); WT_TRET(__wt_sweep_destroy(session)); diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c index 274dc1e8f62..205afb607c3 100644 --- a/src/cursor/cur_file.c +++ b/src/cursor/cur_file.c @@ -9,29 +9,6 @@ #include "wt_internal.h" /* - * WT_BTREE_CURSOR_SAVE_AND_RESTORE - * Save the cursor's key/value data/size fields, call an underlying btree - * function, and then consistently handle failure and success. - */ -#define WT_BTREE_CURSOR_SAVE_AND_RESTORE(cursor, f, ret) do { \ - WT_ITEM __key_copy = (cursor)->key; \ - uint64_t __recno = (cursor)->recno; \ - WT_ITEM __value_copy = (cursor)->value; \ - if (((ret) = (f)) == 0) { \ - F_CLR(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT); \ - F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); \ - } else { \ - if (F_ISSET(cursor, WT_CURSTD_KEY_EXT)) { \ - (cursor)->recno = __recno; \ - WT_ITEM_SET((cursor)->key, __key_copy); \ - } \ - if (F_ISSET(cursor, WT_CURSTD_VALUE_EXT)) \ - WT_ITEM_SET((cursor)->value, __value_copy); \ - F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); \ - } \ -} while (0) - -/* * __curfile_compare -- * WT_CURSOR->compare method for the btree cursor type. */ @@ -109,9 +86,12 @@ __curfile_next(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_API_CALL(cursor, session, next, cbt->btree); - F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); - if ((ret = __wt_btcur_next(cbt, false)) == 0) - F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); + WT_ERR(__wt_btcur_next(cbt, false)); + + /* Next maintains a position, key and value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); err: API_END_RET(session, ret); } @@ -131,9 +111,12 @@ __wt_curfile_next_random(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_API_CALL(cursor, session, next, cbt->btree); - F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); - if ((ret = __wt_btcur_next_random(cbt)) == 0) - F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); + WT_ERR(__wt_btcur_next_random(cbt)); + + /* Next-random maintains a position, key and value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); err: API_END_RET(session, ret); } @@ -152,9 +135,12 @@ __curfile_prev(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_API_CALL(cursor, session, prev, cbt->btree); - F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); - if ((ret = __wt_btcur_prev(cbt, false)) == 0) - F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); + WT_ERR(__wt_btcur_prev(cbt, false)); + + /* Prev maintains a position, key and value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); err: API_END_RET(session, ret); } @@ -175,7 +161,10 @@ __curfile_reset(WT_CURSOR *cursor) ret = __wt_btcur_reset(cbt); - F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + /* Reset maintains no position, key or value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == 0 && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == 0); err: API_END_RET(session, ret); } @@ -194,10 +183,15 @@ __curfile_search(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_API_CALL(cursor, session, search, cbt->btree); - WT_CURSOR_NEEDKEY(cursor); + WT_CURSOR_CHECKKEY(cursor); WT_CURSOR_NOVALUE(cursor); - WT_BTREE_CURSOR_SAVE_AND_RESTORE(cursor, __wt_btcur_search(cbt), ret); + WT_ERR(__wt_btcur_search(cbt)); + + /* Search maintains a position, key and value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); err: API_END_RET(session, ret); } @@ -216,11 +210,15 @@ __curfile_search_near(WT_CURSOR *cursor, int *exact) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_API_CALL(cursor, session, search_near, cbt->btree); - WT_CURSOR_NEEDKEY(cursor); + WT_CURSOR_CHECKKEY(cursor); WT_CURSOR_NOVALUE(cursor); - WT_BTREE_CURSOR_SAVE_AND_RESTORE( - cursor, __wt_btcur_search_near(cbt, exact), ret); + WT_ERR(__wt_btcur_search_near(cbt, exact)); + + /* Search-near maintains a position, key and value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); err: API_END_RET(session, ret); } @@ -238,38 +236,33 @@ __curfile_insert(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_UPDATE_API_CALL(cursor, session, insert, cbt->btree); + if (!F_ISSET(cursor, WT_CURSTD_APPEND)) - WT_CURSOR_NEEDKEY(cursor); - WT_CURSOR_NEEDVALUE(cursor); + WT_CURSOR_CHECKKEY(cursor); + WT_CURSOR_CHECKVALUE(cursor); - WT_BTREE_CURSOR_SAVE_AND_RESTORE(cursor, __wt_btcur_insert(cbt), ret); + WT_ERR(__wt_btcur_insert(cbt)); /* - * Insert is the one cursor operation that doesn't end with the cursor - * pointing to an on-page item (except for column-store appends, where - * we are returning a key). That is, the application's cursor continues - * to reference the application's memory after a successful cursor call, - * which isn't true anywhere else. We don't want to have to explain that - * scoping corner case, so we reset the application's cursor so it can - * free the referenced memory and continue on without risking subsequent - * core dumps. + * Insert maintains no position, key or value (except for column-store + * appends, where we are returning a key). */ - if (ret == 0) { - if (!F_ISSET(cursor, WT_CURSTD_APPEND)) - F_CLR(cursor, WT_CURSTD_KEY_INT); - F_CLR(cursor, WT_CURSTD_VALUE_INT); - } + WT_ASSERT(session, + (F_ISSET(cursor, WT_CURSTD_APPEND) && + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT) || + (!F_ISSET(cursor, WT_CURSTD_APPEND) && + F_MASK(cursor, WT_CURSTD_KEY_SET) == 0)); err: CURSOR_UPDATE_API_END(session, ret); return (ret); } /* - * __curfile_update -- - * WT_CURSOR->update method for the btree cursor type. + * __wt_curfile_insert_check -- + * WT_CURSOR->insert_check method for the btree cursor type. */ -static int -__curfile_update(WT_CURSOR *cursor) +int +__wt_curfile_insert_check(WT_CURSOR *cursor) { WT_CURSOR_BTREE *cbt; WT_DECL_RET; @@ -278,21 +271,21 @@ __curfile_update(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_UPDATE_API_CALL(cursor, session, update, cbt->btree); - WT_CURSOR_NEEDKEY(cursor); - WT_CURSOR_NEEDVALUE(cursor); + WT_CURSOR_CHECKKEY(cursor); + WT_CURSOR_NOVALUE(cursor); - WT_BTREE_CURSOR_SAVE_AND_RESTORE(cursor, __wt_btcur_update(cbt), ret); + ret = __wt_btcur_insert_check(cbt); err: CURSOR_UPDATE_API_END(session, ret); return (ret); } /* - * __wt_curfile_update_check -- - * WT_CURSOR->update_check method for the btree cursor type. + * __curfile_update -- + * WT_CURSOR->update method for the btree cursor type. */ -int -__wt_curfile_update_check(WT_CURSOR *cursor) +static int +__curfile_update(WT_CURSOR *cursor) { WT_CURSOR_BTREE *cbt; WT_DECL_RET; @@ -301,11 +294,15 @@ __wt_curfile_update_check(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_UPDATE_API_CALL(cursor, session, update, cbt->btree); - WT_CURSOR_NEEDKEY(cursor); - WT_CURSOR_NOVALUE(cursor); + WT_CURSOR_CHECKKEY(cursor); + WT_CURSOR_CHECKVALUE(cursor); - WT_BTREE_CURSOR_SAVE_AND_RESTORE( - cursor, __wt_btcur_update_check(cbt), ret); + WT_ERR(__wt_btcur_update(cbt)); + + /* Update maintains a position, key and value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); err: CURSOR_UPDATE_API_END(session, ret); return (ret); diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index 8df8e201173..80afaf798dc 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -185,7 +185,7 @@ __curjoin_iter_set_entry(WT_CURSOR_JOIN_ITER *iter, u_int entry_pos) size = strlen(to_dup->internal_uri) + 3; WT_ERR(__wt_calloc(session, size, 1, &uri)); - snprintf(uri, size, "%s()", to_dup->internal_uri); + WT_ERR(__wt_snprintf(uri, size, "%s()", to_dup->internal_uri)); if ((c = iter->cursor) == NULL || !WT_STREQ(c->uri, uri)) { iter->cursor = NULL; if (c != NULL) @@ -929,7 +929,7 @@ __curjoin_init_next(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, if ((proj = cjoin->projection) != NULL) { size = strlen(urimain) + strlen(proj) + 1; WT_ERR(__wt_calloc(session, size, 1, &mainbuf)); - snprintf(mainbuf, size, "%s%s", urimain, proj); + WT_ERR(__wt_snprintf(mainbuf, size, "%s%s", urimain, proj)); urimain = mainbuf; } WT_ERR(__wt_open_cursor(session, urimain, (WT_CURSOR *)cjoin, config, @@ -974,8 +974,8 @@ __curjoin_init_next(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, if (!iterable && F_ISSET(je, WT_CURJOIN_ENTRY_BLOOM)) { if (session->txn.isolation == WT_ISO_READ_UNCOMMITTED) WT_ERR_MSG(session, EINVAL, - "join cursors with Bloom filters cannot be " - "used with read-uncommitted isolation"); + "join cursors with Bloom filters cannot be " + "used with read-uncommitted isolation"); if (je->bloom == NULL) { /* * Look for compatible filters to be shared, @@ -1148,8 +1148,8 @@ __curjoin_open_main(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, newsize = strlen(cjoin->table->name) + idx->colconf.len + 1; WT_ERR(__wt_calloc(session, 1, newsize, &main_uri)); - snprintf(main_uri, newsize, "%s%.*s", - cjoin->table->name, (int)idx->colconf.len, idx->colconf.str); + WT_ERR(__wt_snprintf(main_uri, newsize, "%s%.*s", + cjoin->table->name, (int)idx->colconf.len, idx->colconf.str)); WT_ERR(__wt_open_cursor(session, main_uri, (WT_CURSOR *)cjoin, raw_cfg, &entry->main)); if (idx->extractor == NULL) { @@ -1162,7 +1162,8 @@ __curjoin_open_main(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, */ len = strlen(entry->main->value_format) + 3; WT_ERR(__wt_calloc(session, len, 1, &newformat)); - snprintf(newformat, len, "%s0x", entry->main->value_format); + WT_ERR(__wt_snprintf( + newformat, len, "%s0x", entry->main->value_format)); __wt_free(session, entry->main->value_format); entry->main->value_format = newformat; } @@ -1531,8 +1532,8 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, len = strlen(cindex->iface.key_format) + 3; WT_RET(__wt_calloc(session, len, 1, &entry->repack_format)); - snprintf(entry->repack_format, len, "%s0x", - cindex->iface.key_format); + WT_RET(__wt_snprintf(entry->repack_format, + len, "%s0x", cindex->iface.key_format)); } } return (0); diff --git a/src/cursor/cur_json.c b/src/cursor/cur_json.c index 0ad3c4f4201..e8ddb767863 100644 --- a/src/cursor/cur_json.c +++ b/src/cursor/cur_json.c @@ -8,8 +8,8 @@ #include "wt_internal.h" -static size_t __json_unpack_put(WT_SESSION_IMPL *, void *, u_char *, size_t, - WT_CONFIG_ITEM *); +static int __json_unpack_put( + WT_SESSION_IMPL *, void *, u_char *, size_t, WT_CONFIG_ITEM *, size_t *); static inline int __json_struct_size(WT_SESSION_IMPL *, const void *, size_t, const char *, WT_CONFIG_ITEM *, bool, size_t *); static inline int __json_struct_unpackv(WT_SESSION_IMPL *, const void *, size_t, @@ -61,22 +61,22 @@ static int __json_pack_size(WT_SESSION_IMPL *, const char *, WT_CONFIG_ITEM *, * __json_unpack_put -- * Calculate the size of a packed byte string as formatted for JSON. */ -static size_t +static int __json_unpack_put(WT_SESSION_IMPL *session, void *voidpv, - u_char *buf, size_t bufsz, WT_CONFIG_ITEM *name) + u_char *buf, size_t bufsz, WT_CONFIG_ITEM *name, size_t *retsizep) { WT_PACK_VALUE *pv; const u_char *p, *end; size_t s, n; pv = (WT_PACK_VALUE *)voidpv; - s = (size_t)snprintf((char *)buf, bufsz, "\"%.*s\" : ", - (int)name->len, name->str); + + WT_RET(__wt_snprintf_len_set( + (char *)buf, bufsz, &s, "\"%.*s\" : ", (int)name->len, name->str)); if (s <= bufsz) { bufsz -= s; buf += s; - } - else + } else bufsz = 0; switch (pv->type) { @@ -118,7 +118,8 @@ __json_unpack_put(WT_SESSION_IMPL *session, void *voidpv, } if (bufsz > 0) *buf++ = '"'; - return (s); + *retsizep += s; + return (0); case 'U': case 'u': s += 2; @@ -140,14 +141,17 @@ __json_unpack_put(WT_SESSION_IMPL *session, void *voidpv, } if (bufsz > 0) *buf++ = '"'; - return (s); + *retsizep += s; + return (0); case 'b': case 'h': case 'i': case 'l': case 'q': - return (s + - (size_t)snprintf((char *)buf, bufsz, "%" PRId64, pv->u.i)); + WT_RET(__wt_snprintf_len_incr( + (char *)buf, bufsz, &s, "%" PRId64, pv->u.i)); + *retsizep += s; + return (0); case 'B': case 't': case 'H': @@ -156,11 +160,14 @@ __json_unpack_put(WT_SESSION_IMPL *session, void *voidpv, case 'Q': case 'r': case 'R': - return (s + - (size_t)snprintf((char *)buf, bufsz, "%" PRId64, pv->u.u)); + WT_RET(__wt_snprintf_len_incr( + (char *)buf, bufsz, &s, "%" PRId64, pv->u.u)); + *retsizep += s; + return (0); } - __wt_err(session, EINVAL, "unknown pack-value type: %c", (int)pv->type); - return ((size_t)-1); + + WT_RET_MSG(session, EINVAL, + "unknown pack-value type: %c", (int)pv->type); } /* @@ -194,7 +201,8 @@ __json_struct_size(WT_SESSION_IMPL *session, const void *buffer, needcr = true; WT_RET(__unpack_read(session, &pv, &p, (size_t)(end - p))); WT_RET(__pack_name_next(&packname, &name)); - result += __json_unpack_put(session, &pv, NULL, 0, &name); + WT_RET( + __json_unpack_put(session, &pv, NULL, 0, &name, &result)); } if (ret == WT_NOTFOUND) ret = 0; @@ -243,8 +251,9 @@ __json_struct_unpackv(WT_SESSION_IMPL *session, needcr = true; WT_RET(__unpack_read(session, &pv, &p, (size_t)(end - p))); WT_RET(__pack_name_next(&packname, &name)); - jsize = __json_unpack_put(session, - (u_char *)&pv, jbuf, jbufsize, &name); + jsize = 0; + WT_RET(__json_unpack_put(session, + (u_char *)&pv, jbuf, jbufsize, &name, &jsize)); WT_ASSERT(session, jsize <= jbufsize); jbuf += jsize; jbufsize -= jsize; diff --git a/src/cursor/cur_stat.c b/src/cursor/cur_stat.c index c5ccdb1b649..0bff642370d 100644 --- a/src/cursor/cur_stat.c +++ b/src/cursor/cur_stat.c @@ -477,8 +477,8 @@ __curstat_join_desc(WT_CURSOR_STAT *cst, int slot, const char **resultp) len = strlen("join: ") + strlen(sgrp->desc_prefix) + strlen(static_desc) + 1; WT_RET(__wt_realloc(session, NULL, len, &cst->desc_buf)); - snprintf(cst->desc_buf, len, "join: %s%s", sgrp->desc_prefix, - static_desc); + WT_RET(__wt_snprintf( + cst->desc_buf, len, "join: %s%s", sgrp->desc_prefix, static_desc)); *resultp = cst->desc_buf; return (0); } diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 84c9990832d..3ce35c60f2e 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -900,24 +900,32 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) } #define EVICT_TUNE_BATCH 1 /* Max workers to add each period */ -#define EVICT_TUNE_DATAPT_MIN 3 /* Data points needed before deciding - if we should keep adding workers or - settle on an earlier value. */ +/* + * Data points needed before deciding if we should keep adding workers or settle + * on an earlier value. + */ +#define EVICT_TUNE_DATAPT_MIN 3 #define EVICT_TUNE_PERIOD 1 /* Tune period in seconds */ /* + * We will do a fresh re-tune every that many seconds to adjust to + * significant phase changes. + */ +#define EVICT_FORCE_RETUNE 30 + +/* * __evict_tune_workers -- * Find the right number of eviction workers. Gradually ramp up the number of * workers increasing the number in batches indicated by the setting above. - * Store the number of workers that gave us the best throughput so far and - * the number of data points we have tried. + * Store the number of workers that gave us the best throughput so far and the + * number of data points we have tried. * - * Every once in a while when we have the minimum number of data points - * we check whether the eviction throughput achieved with the current number - * of workers is the best we have seen so far. If so, we will keep increasing - * the number of workers. If not, we are past the infliction point on the - * eviction throughput curve. In that case, we will set the number of workers - * to the best observed so far and settle into a stable state. + * Every once in a while when we have the minimum number of data points we check + * whether the eviction throughput achieved with the current number of workers + * is the best we have seen so far. If so, we will keep increasing the number of + * workers. If not, we are past the infliction point on the eviction throughput + * curve. In that case, we will set the number of workers to the best observed + * so far and settle into a stable state. */ static int __evict_tune_workers(WT_SESSION_IMPL *session) @@ -927,27 +935,60 @@ __evict_tune_workers(WT_SESSION_IMPL *session) WT_CONNECTION_IMPL *conn; WT_DECL_RET; uint64_t cur_threads, delta_msec, delta_pages, i, target_threads; - uint64_t pgs_evicted_cur, pgs_evicted_persec_cur; + uint64_t pgs_evicted_cur, pgs_evicted_persec_cur, time_diff; uint32_t thread_surplus; conn = S2C(session); cache = conn->cache; WT_ASSERT(session, conn->evict_threads.threads[0]->session == session); - - if (conn->evict_tune_stable) - return (0); + pgs_evicted_cur = pgs_evicted_persec_cur = 0; __wt_epoch(session, ¤t_time); + time_diff = WT_TIMEDIFF_SEC(current_time, conn->evict_tune_last_time); /* - * Every EVICT_TUNE_PERIOD seconds record the number of - * pages evicted per second observed in the previous period. + * If we have reached the stable state and have not run long enough to + * surpass the forced re-tuning threshold, return. */ - if (WT_TIMEDIFF_SEC( - current_time, conn->evict_tune_last_time) < EVICT_TUNE_PERIOD) - return (0); + if (conn->evict_tune_stable) { + if (time_diff < EVICT_FORCE_RETUNE) + return (0); + + /* + * Stable state was reached a long time ago. Let's re-tune. + * Reset all the state. + */ + conn->evict_tune_stable = 0; + conn->evict_tune_last_action_time.tv_sec = 0; + conn->evict_tune_pgs_last = 0; + conn->evict_tune_num_points = 0; + conn->evict_tune_pg_sec_max = 0; + conn->evict_tune_workers_best = 0; + + /* Reduce the number of eviction workers to the minimum */ + thread_surplus = conn->evict_threads.current_threads - + conn->evict_threads_min; + for (i = 0; i < thread_surplus; i++) { + WT_ERR(__wt_thread_group_stop_one( + session, &conn->evict_threads, false)); + WT_STAT_CONN_INCR(session, + cache_eviction_worker_removed); + } + WT_STAT_CONN_INCR(session, cache_eviction_force_retune); + } else + if (time_diff < EVICT_TUNE_PERIOD) + /* + * If we have not reached stable state, don't do + * anything unless enough time has passed since the last + * time we have taken any action in this function. + */ + return (0); + /* + * Measure the number of evicted pages so far. Eviction rate correlates + * to performance, so this is our metric of success. + */ pgs_evicted_cur = cache->pages_evict; /* @@ -987,7 +1028,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) if (conn->evict_tune_num_points >= conn->evict_tune_datapts_needed) { if ((conn->evict_tune_workers_best == conn->evict_threads.current_threads) && - (conn->evict_threads.current_threads < + (conn->evict_threads.current_threads < conn->evict_threads_max)) { /* * Keep adding workers. We will check again @@ -996,7 +1037,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) conn->evict_tune_datapts_needed += WT_MIN(EVICT_TUNE_DATAPT_MIN, (conn->evict_threads_max - - conn->evict_threads.current_threads)/ + - conn->evict_threads.current_threads) / EVICT_TUNE_BATCH); } else { /* @@ -1025,7 +1066,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) conn->evict_tune_stable = true; WT_STAT_CONN_SET(session, cache_eviction_active_workers, conn->evict_threads.current_threads); - return (0); + goto err; } } diff --git a/src/include/btree.h b/src/include/btree.h index 88312f408cc..28fe1b94b23 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -58,6 +58,12 @@ #define WT_BTREE_DELETE_THRESHOLD 1000 /* + * Minimum size of the chunks (in percentage of the page size) a page gets split + * into during reconciliation. + */ +#define WT_BTREE_MIN_SPLIT_PCT 50 + +/* * WT_BTREE -- * A btree handle. */ diff --git a/src/include/btree.i b/src/include/btree.i index eefc2db075d..a4d88d5fda1 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -413,7 +413,7 @@ __wt_cache_page_image_incr(WT_SESSION_IMPL *session, uint32_t size) * Evict pages from the cache. */ static inline void -__wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) +__wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page, bool rewrite) { WT_BTREE *btree; WT_CACHE *cache; @@ -456,7 +456,15 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) /* Update pages and bytes evicted. */ (void)__wt_atomic_add64(&cache->bytes_evict, page->memory_footprint); - (void)__wt_atomic_addv64(&cache->pages_evict, 1); + + /* + * Don't count rewrites as eviction: there's no guarantee we are making + * real progress. + */ + if (rewrite) + (void)__wt_atomic_subv64(&cache->pages_inmem, 1); + else + (void)__wt_atomic_addv64(&cache->pages_evict, 1); } /* diff --git a/src/include/cursor.i b/src/include/cursor.i index 9cb9f5e7189..12044e0e228 100644 --- a/src/include/cursor.i +++ b/src/include/cursor.i @@ -76,23 +76,6 @@ __cursor_leave(WT_SESSION_IMPL *session) } /* - * __curfile_enter -- - * Activate a file cursor. - */ -static inline int -__curfile_enter(WT_CURSOR_BTREE *cbt) -{ - WT_SESSION_IMPL *session; - - session = (WT_SESSION_IMPL *)cbt->iface.session; - - if (!F_ISSET(cbt, WT_CBT_NO_TXN)) - WT_RET(__cursor_enter(session)); - F_SET(cbt, WT_CBT_ACTIVE); - return (0); -} - -/* * __cursor_reset -- * Reset the cursor, it no longer holds any position. */ @@ -264,8 +247,12 @@ __cursor_func_init(WT_CURSOR_BTREE *cbt, bool reenter) /* If the transaction is idle, check that the cache isn't full. */ WT_RET(__wt_txn_idle_cache_check(session)); - if (!F_ISSET(cbt, WT_CBT_ACTIVE)) - WT_RET(__curfile_enter(cbt)); + /* Activate the file cursor. */ + if (!F_ISSET(cbt, WT_CBT_ACTIVE)) { + if (!F_ISSET(cbt, WT_CBT_NO_TXN)) + WT_RET(__cursor_enter(session)); + F_SET(cbt, WT_CBT_ACTIVE); + } /* * If this is an ordinary transactional cursor, make sure we are set up diff --git a/src/include/extern.h b/src/include/extern.h index c0aa21b7f4c..c0a6087e9b1 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -103,7 +103,7 @@ extern int __wt_btcur_reset(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((wa extern int __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_insert(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btcur_update_check(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_remove(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -126,6 +126,7 @@ extern int __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) extern void __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern bool __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_ref_out_int(WT_SESSION_IMPL *session, WT_REF *ref, bool rewrite) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_free_ref( WT_SESSION_IMPL *session, WT_REF *ref, int page_type, bool free_pages) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -289,7 +290,7 @@ extern int __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const extern int __wt_curds_open( WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_DATA_SOURCE *dsrc, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_curfile_next_random(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curfile_update_check(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_curfile_insert_check(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_curjoin_joined(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -404,7 +405,6 @@ extern int __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, const uint8_t extern int __wt_txn_op_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_slot_switch( WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_slot_new(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_slot_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_slot_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -457,6 +457,7 @@ extern int __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) extern int __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_lsm_worker_stop(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_meta_apply_all(WT_SESSION_IMPL *session, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_meta_checkpoint(WT_SESSION_IMPL *session, const char *fname, const char *checkpoint, WT_CKPT *ckpt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_meta_checkpoint_last_name( WT_SESSION_IMPL *session, const char *fname, const char **namep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/extern_posix.h b/src/include/extern_posix.h index fed7835ada1..57d94e392d1 100644 --- a/src/include/extern_posix.h +++ b/src/include/extern_posix.h @@ -24,8 +24,9 @@ extern bool __wt_has_priv(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden") extern void __wt_stream_set_line_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_stream_set_no_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_yield(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); diff --git a/src/include/extern_win.h b/src/include/extern_win.h index 0bfc821c7a6..43127a0c79f 100644 --- a/src/include/extern_win.h +++ b/src/include/extern_win.h @@ -22,9 +22,10 @@ extern bool __wt_has_priv(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden") extern void __wt_stream_set_line_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_stream_set_no_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_to_utf16_string( WT_SESSION_IMPL *session, const char*utf8, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_to_utf8_string( WT_SESSION_IMPL *session, const wchar_t*wide, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/flags.h b/src/include/flags.h index c1fff920e3b..f26a45c68f5 100644 --- a/src/include/flags.h +++ b/src/include/flags.h @@ -6,19 +6,19 @@ #define WT_CONN_CACHE_POOL 0x00000001 #define WT_CONN_CKPT_SYNC 0x00000002 #define WT_CONN_CLOSING 0x00000004 -#define WT_CONN_EVICTION_RUN 0x00000008 -#define WT_CONN_IN_MEMORY 0x00000010 -#define WT_CONN_LAS_OPEN 0x00000020 -#define WT_CONN_LEAK_MEMORY 0x00000040 -#define WT_CONN_LOG_SERVER_RUN 0x00000080 +#define WT_CONN_CLOSING_NO_MORE_OPENS 0x00000008 +#define WT_CONN_EVICTION_RUN 0x00000010 +#define WT_CONN_IN_MEMORY 0x00000020 +#define WT_CONN_LAS_OPEN 0x00000040 +#define WT_CONN_LEAK_MEMORY 0x00000080 #define WT_CONN_LSM_MERGE 0x00000100 #define WT_CONN_PANIC 0x00000200 #define WT_CONN_READONLY 0x00000400 #define WT_CONN_RECOVERING 0x00000800 #define WT_CONN_SERVER_ASYNC 0x00001000 #define WT_CONN_SERVER_CHECKPOINT 0x00002000 -#define WT_CONN_SERVER_LSM 0x00004000 -#define WT_CONN_SERVER_RUN 0x00008000 +#define WT_CONN_SERVER_LOG 0x00004000 +#define WT_CONN_SERVER_LSM 0x00008000 #define WT_CONN_SERVER_STATISTICS 0x00010000 #define WT_CONN_SERVER_SWEEP 0x00020000 #define WT_CONN_WAS_BACKUP 0x00040000 diff --git a/src/include/log.h b/src/include/log.h index f0999ba316b..fb3c961417f 100644 --- a/src/include/log.h +++ b/src/include/log.h @@ -193,7 +193,8 @@ struct __wt_myslot { wt_off_t end_offset; /* My end offset in buffer */ wt_off_t offset; /* Slot buffer offset */ #define WT_MYSLOT_CLOSE 0x01 /* This thread is closing the slot */ -#define WT_MYSLOT_UNBUFFERED 0x02 /* Write directly */ +#define WT_MYSLOT_NEEDS_RELEASE 0x02 /* This thread is releasing the slot */ +#define WT_MYSLOT_UNBUFFERED 0x04 /* Write directly */ uint32_t flags; /* Flags */ }; diff --git a/src/include/lsm.h b/src/include/lsm.h index 2bbb813bad2..e3f6897ef9d 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -23,11 +23,14 @@ struct __wt_lsm_worker_cookie { struct __wt_lsm_worker_args { WT_SESSION_IMPL *session; /* Session */ WT_CONDVAR *work_cond; /* Owned by the manager */ + wt_thread_t tid; /* Thread id */ + bool tid_set; /* Thread id set */ + u_int id; /* My manager slot id */ uint32_t type; /* Types of operations handled */ -#define WT_LSM_WORKER_RUN 0x01 - uint32_t flags; /* Worker flags */ + + volatile bool running; /* Worker is running */ }; /* @@ -162,6 +165,9 @@ struct __wt_lsm_manager { #define WT_LSM_MAX_WORKERS 20 #define WT_LSM_MIN_WORKERS 3 WT_LSM_WORKER_ARGS lsm_worker_cookies[WT_LSM_MAX_WORKERS]; + +#define WT_LSM_MANAGER_SHUTDOWN 0x01 /* Manager has shut down */ + uint32_t flags; }; /* diff --git a/src/include/misc.i b/src/include/misc.i index d5692a3f9cf..7040886cf82 100644 --- a/src/include/misc.i +++ b/src/include/misc.i @@ -86,3 +86,94 @@ __wt_verbose(WT_SESSION_IMPL *session, int flag, const char *fmt, ...) WT_UNUSED(fmt); #endif } + +/* + * __wt_snprintf -- + * snprintf convenience function, ignoring the returned size. + */ +static inline int +__wt_snprintf(char *buf, size_t size, const char *fmt, ...) + WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4))) +{ + WT_DECL_RET; + size_t len; + va_list ap; + + len = 0; + + va_start(ap, fmt); + ret = __wt_vsnprintf_len_incr(buf, size, &len, fmt, ap); + va_end(ap); + WT_RET(ret); + + /* It's an error if the buffer couldn't hold everything. */ + return (len >= size ? ERANGE : 0); +} + +/* + * __wt_vsnprintf -- + * vsnprintf convenience function, ignoring the returned size. + */ +static inline int +__wt_vsnprintf(char *buf, size_t size, const char *fmt, va_list ap) +{ + size_t len; + + len = 0; + + WT_RET(__wt_vsnprintf_len_incr(buf, size, &len, fmt, ap)); + + /* It's an error if the buffer couldn't hold everything. */ + return (len >= size ? ERANGE : 0); +} + +/* + * __wt_snprintf_len_set -- + * snprintf convenience function, setting the returned size. + */ +static inline int +__wt_snprintf_len_set( + char *buf, size_t size, size_t *retsizep, const char *fmt, ...) + WT_GCC_FUNC_ATTRIBUTE((format (printf, 4, 5))) +{ + WT_DECL_RET; + va_list ap; + + *retsizep = 0; + + va_start(ap, fmt); + ret = __wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap); + va_end(ap); + return (ret); +} + +/* + * __wt_vsnprintf_len_set -- + * vsnprintf convenience function, setting the returned size. + */ +static inline int +__wt_vsnprintf_len_set( + char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) +{ + *retsizep = 0; + + return (__wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap)); +} + +/* + * __wt_snprintf_len_incr -- + * snprintf convenience function, incrementing the returned size. + */ +static inline int +__wt_snprintf_len_incr( + char *buf, size_t size, size_t *retsizep, const char *fmt, ...) + WT_GCC_FUNC_ATTRIBUTE((format (printf, 4, 5))) +{ + WT_DECL_RET; + va_list ap; + + va_start(ap, fmt); + ret = __wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap); + va_end(ap); + return (ret); +} diff --git a/src/include/os_windows.h b/src/include/os_windows.h index 65938ac9f17..c1e5f788dc6 100644 --- a/src/include/os_windows.h +++ b/src/include/os_windows.h @@ -43,16 +43,6 @@ typedef uint32_t u_int; typedef unsigned char u_char; typedef uint64_t u_long; -/* <= VS 2013 is not C99 compat */ -#if _MSC_VER < 1900 -#define snprintf _wt_snprintf - -_Check_return_opt_ int __cdecl _wt_snprintf( - _Out_writes_(_MaxCount) char * _DstBuf, - _In_ size_t _MaxCount, - _In_z_ _Printf_format_string_ const char * _Format, ...); -#endif - /* * Windows does have ssize_t * Python headers declare also though so we need to guard it @@ -61,18 +51,6 @@ _Check_return_opt_ int __cdecl _wt_snprintf( typedef int ssize_t; #endif -/* - * Provide a custom version of vsnprintf that returns the - * needed buffer length instead of -1 on truncation - */ -#define vsnprintf _wt_vsnprintf - -_Check_return_opt_ int __cdecl _wt_vsnprintf( - _Out_writes_(_MaxCount) char * _DstBuf, - _In_ size_t _MaxCount, - _In_z_ _Printf_format_string_ const char * _Format, - va_list _ArgList); - /* Provide a custom version of localtime_r */ struct tm *localtime_r(const time_t* timer, struct tm* result); diff --git a/src/include/packing.i b/src/include/packing.i index d79afe6d4a2..0eadb2f2027 100644 --- a/src/include/packing.i +++ b/src/include/packing.i @@ -104,8 +104,8 @@ __pack_name_next(WT_PACK_NAME *pn, WT_CONFIG_ITEM *name) WT_CONFIG_ITEM ignore; if (pn->genname) { - (void)snprintf(pn->buf, sizeof(pn->buf), - (pn->iskey ? "key%d" : "value%d"), pn->count); + WT_RET(__wt_snprintf(pn->buf, sizeof(pn->buf), + (pn->iskey ? "key%d" : "value%d"), pn->count)); WT_CLEAR(*name); name->str = pn->buf; name->len = strlen(pn->buf); @@ -198,7 +198,7 @@ next: if (pack->cur == pack->end) return (0); default: WT_RET_MSG(pack->session, EINVAL, - "Invalid type '%c' found in format '%.*s'", + "Invalid type '%c' found in format '%.*s'", pv->type, (int)(pack->end - pack->orig), pack->orig); } diff --git a/src/include/stat.h b/src/include/stat.h index ed3d588b7d3..6c274484bcb 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -318,6 +318,7 @@ struct __wt_connection_stats { int64_t cache_eviction_force_fail; int64_t cache_eviction_walks_active; int64_t cache_eviction_walks_started; + int64_t cache_eviction_force_retune; int64_t cache_eviction_hazard; int64_t cache_hazard_checks; int64_t cache_hazard_walks; @@ -404,9 +405,11 @@ struct __wt_connection_stats { int64_t lock_table_wait_internal; int64_t log_slot_switch_busy; int64_t log_slot_closes; + int64_t log_slot_active_closed; int64_t log_slot_races; int64_t log_slot_transitions; int64_t log_slot_joins; + int64_t log_slot_no_free_slots; int64_t log_slot_unbuffered; int64_t log_bytes_payload; int64_t log_bytes_written; diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 7223aeae0f6..ddecb2ac765 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -1242,8 +1242,8 @@ struct __wt_session { * @config{split_pct, the Btree page split size as a percentage of the * maximum Btree page size\, that is\, when a Btree page is split\, it * will be split into smaller pages\, where each page is the specified - * percentage of the maximum Btree page size., an integer between 25 and - * 100; default \c 75.} + * percentage of the maximum Btree page size., an integer between 50 and + * 100; default \c 90.} * @config{type, set the type of data source used to store a column * group\, index or simple table. By default\, a \c "file:" URI is * derived from the object name. The \c type configuration can be used @@ -1480,6 +1480,10 @@ struct __wt_session { * contains. * @snippet ex_all.c Truncate a range * + * Any specified cursors end with no position, and subsequent calls to + * the WT_CURSOR::next (WT_CURSOR::prev) method will iterate from the + * beginning (end) of the table. + * * @param session the session handle * @param name the URI of the file or table to truncate * @param start optional cursor marking the first record discarded; @@ -4456,384 +4460,390 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1059 /*! cache: files with new eviction walks started */ #define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1060 +/*! cache: force re-tuning of eviction workers once in a while */ +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_RETUNE 1061 /*! cache: hazard pointer blocked page eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1061 +#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1062 /*! cache: hazard pointer check calls */ -#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1062 +#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1063 /*! cache: hazard pointer check entries walked */ -#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1063 +#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1064 /*! cache: hazard pointer maximum array length */ -#define WT_STAT_CONN_CACHE_HAZARD_MAX 1064 +#define WT_STAT_CONN_CACHE_HAZARD_MAX 1065 /*! cache: in-memory page passed criteria to be split */ -#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1065 +#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1066 /*! cache: in-memory page splits */ -#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1066 +#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1067 /*! cache: internal pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1067 +#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1068 /*! cache: internal pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1068 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1069 /*! cache: leaf pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1069 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1070 /*! cache: lookaside table insert calls */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1070 +#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1071 /*! cache: lookaside table remove calls */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1071 +#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1072 /*! cache: maximum bytes configured */ -#define WT_STAT_CONN_CACHE_BYTES_MAX 1072 +#define WT_STAT_CONN_CACHE_BYTES_MAX 1073 /*! cache: maximum page size at eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1073 +#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1074 /*! cache: modified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1074 +#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1075 /*! cache: modified pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1075 +#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1076 /*! cache: overflow pages read into cache */ -#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1076 +#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1077 /*! cache: overflow values cached in memory */ -#define WT_STAT_CONN_CACHE_OVERFLOW_VALUE 1077 +#define WT_STAT_CONN_CACHE_OVERFLOW_VALUE 1078 /*! cache: page split during eviction deepened the tree */ -#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1078 +#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1079 /*! cache: page written requiring lookaside records */ -#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1079 +#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1080 /*! cache: pages currently held in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_INUSE 1080 +#define WT_STAT_CONN_CACHE_PAGES_INUSE 1081 /*! cache: pages evicted because they exceeded the in-memory maximum */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1081 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1082 /*! cache: pages evicted because they had chains of deleted items */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1082 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1083 /*! cache: pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP 1083 +#define WT_STAT_CONN_CACHE_EVICTION_APP 1084 /*! cache: pages queued for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1084 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1085 /*! cache: pages queued for urgent eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1085 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1086 /*! cache: pages queued for urgent eviction during walk */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1086 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1087 /*! cache: pages read into cache */ -#define WT_STAT_CONN_CACHE_READ 1087 +#define WT_STAT_CONN_CACHE_READ 1088 /*! cache: pages read into cache requiring lookaside entries */ -#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1088 +#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1089 /*! cache: pages requested from the cache */ -#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1089 +#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1090 /*! cache: pages seen by eviction walk */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1090 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1091 /*! cache: pages selected for eviction unable to be evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1091 +#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1092 /*! cache: pages walked for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_WALK 1092 +#define WT_STAT_CONN_CACHE_EVICTION_WALK 1093 /*! cache: pages written from cache */ -#define WT_STAT_CONN_CACHE_WRITE 1093 +#define WT_STAT_CONN_CACHE_WRITE 1094 /*! cache: pages written requiring in-memory restoration */ -#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1094 +#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1095 /*! cache: percentage overhead */ -#define WT_STAT_CONN_CACHE_OVERHEAD 1095 +#define WT_STAT_CONN_CACHE_OVERHEAD 1096 /*! cache: tracked bytes belonging to internal pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1096 +#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1097 /*! cache: tracked bytes belonging to leaf pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_LEAF 1097 +#define WT_STAT_CONN_CACHE_BYTES_LEAF 1098 /*! cache: tracked dirty bytes in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1098 +#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1099 /*! cache: tracked dirty pages in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1099 +#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1100 /*! cache: unmodified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1100 +#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1101 /*! connection: auto adjusting condition resets */ -#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1101 +#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1102 /*! connection: auto adjusting condition wait calls */ -#define WT_STAT_CONN_COND_AUTO_WAIT 1102 +#define WT_STAT_CONN_COND_AUTO_WAIT 1103 /*! connection: files currently open */ -#define WT_STAT_CONN_FILE_OPEN 1103 +#define WT_STAT_CONN_FILE_OPEN 1104 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1104 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1105 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1105 +#define WT_STAT_CONN_MEMORY_FREE 1106 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1106 +#define WT_STAT_CONN_MEMORY_GROW 1107 /*! connection: pthread mutex condition wait calls */ -#define WT_STAT_CONN_COND_WAIT 1107 +#define WT_STAT_CONN_COND_WAIT 1108 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1108 +#define WT_STAT_CONN_RWLOCK_READ 1109 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1109 +#define WT_STAT_CONN_RWLOCK_WRITE 1110 /*! connection: total fsync I/Os */ -#define WT_STAT_CONN_FSYNC_IO 1110 +#define WT_STAT_CONN_FSYNC_IO 1111 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1111 +#define WT_STAT_CONN_READ_IO 1112 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1112 +#define WT_STAT_CONN_WRITE_IO 1113 /*! cursor: cursor create calls */ -#define WT_STAT_CONN_CURSOR_CREATE 1113 +#define WT_STAT_CONN_CURSOR_CREATE 1114 /*! cursor: cursor insert calls */ -#define WT_STAT_CONN_CURSOR_INSERT 1114 +#define WT_STAT_CONN_CURSOR_INSERT 1115 /*! cursor: cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT 1115 +#define WT_STAT_CONN_CURSOR_NEXT 1116 /*! cursor: cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV 1116 +#define WT_STAT_CONN_CURSOR_PREV 1117 /*! cursor: cursor remove calls */ -#define WT_STAT_CONN_CURSOR_REMOVE 1117 +#define WT_STAT_CONN_CURSOR_REMOVE 1118 /*! cursor: cursor reset calls */ -#define WT_STAT_CONN_CURSOR_RESET 1118 +#define WT_STAT_CONN_CURSOR_RESET 1119 /*! cursor: cursor restarted searches */ -#define WT_STAT_CONN_CURSOR_RESTART 1119 +#define WT_STAT_CONN_CURSOR_RESTART 1120 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1120 +#define WT_STAT_CONN_CURSOR_SEARCH 1121 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1121 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1122 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1122 +#define WT_STAT_CONN_CURSOR_UPDATE 1123 /*! cursor: truncate calls */ -#define WT_STAT_CONN_CURSOR_TRUNCATE 1123 +#define WT_STAT_CONN_CURSOR_TRUNCATE 1124 /*! data-handle: connection data handles currently active */ -#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1124 +#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1125 /*! data-handle: connection sweep candidate became referenced */ -#define WT_STAT_CONN_DH_SWEEP_REF 1125 +#define WT_STAT_CONN_DH_SWEEP_REF 1126 /*! data-handle: connection sweep dhandles closed */ -#define WT_STAT_CONN_DH_SWEEP_CLOSE 1126 +#define WT_STAT_CONN_DH_SWEEP_CLOSE 1127 /*! data-handle: connection sweep dhandles removed from hash list */ -#define WT_STAT_CONN_DH_SWEEP_REMOVE 1127 +#define WT_STAT_CONN_DH_SWEEP_REMOVE 1128 /*! data-handle: connection sweep time-of-death sets */ -#define WT_STAT_CONN_DH_SWEEP_TOD 1128 +#define WT_STAT_CONN_DH_SWEEP_TOD 1129 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_SWEEPS 1129 +#define WT_STAT_CONN_DH_SWEEPS 1130 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1130 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1131 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1131 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1132 /*! lock: checkpoint lock acquisitions */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1132 +#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1133 /*! lock: checkpoint lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1133 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1134 /*! lock: checkpoint lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1134 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1135 /*! lock: handle-list lock eviction thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_EVICTION 1135 +#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_EVICTION 1136 /*! lock: metadata lock acquisitions */ -#define WT_STAT_CONN_LOCK_METADATA_COUNT 1136 +#define WT_STAT_CONN_LOCK_METADATA_COUNT 1137 /*! lock: metadata lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1137 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1138 /*! lock: metadata lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1138 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1139 /*! lock: schema lock acquisitions */ -#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1139 +#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1140 /*! lock: schema lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1140 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1141 /*! lock: schema lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1141 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1142 /*! lock: table lock acquisitions */ -#define WT_STAT_CONN_LOCK_TABLE_COUNT 1142 +#define WT_STAT_CONN_LOCK_TABLE_COUNT 1143 /*! * lock: table lock application thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1143 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1144 /*! * lock: table lock internal thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1144 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1145 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1145 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1146 /*! log: consolidated slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1146 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1147 +/*! log: consolidated slot join active slot closed */ +#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1148 /*! log: consolidated slot join races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1147 +#define WT_STAT_CONN_LOG_SLOT_RACES 1149 /*! log: consolidated slot join transitions */ -#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1148 +#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1150 /*! log: consolidated slot joins */ -#define WT_STAT_CONN_LOG_SLOT_JOINS 1149 +#define WT_STAT_CONN_LOG_SLOT_JOINS 1151 +/*! log: consolidated slot transitions unable to find free slot */ +#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1152 /*! log: consolidated slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1150 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1153 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1151 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1154 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1152 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1155 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1153 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1156 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1154 +#define WT_STAT_CONN_LOG_FLUSH 1157 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1155 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1158 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1156 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1159 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1157 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1160 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1158 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1161 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1159 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1162 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1160 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1163 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1161 +#define WT_STAT_CONN_LOG_SCANS 1164 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1162 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1165 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1163 +#define WT_STAT_CONN_LOG_WRITE_LSN 1166 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1164 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1167 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1165 +#define WT_STAT_CONN_LOG_SYNC 1168 /*! log: log sync time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DURATION 1166 +#define WT_STAT_CONN_LOG_SYNC_DURATION 1169 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1167 +#define WT_STAT_CONN_LOG_SYNC_DIR 1170 /*! log: log sync_dir time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1168 +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1171 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1169 +#define WT_STAT_CONN_LOG_WRITES 1172 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1170 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1173 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1171 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1174 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1172 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1175 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1173 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1176 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1174 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1177 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1175 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1178 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1176 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1179 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1177 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1180 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1178 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1181 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1179 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1182 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1180 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1183 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1181 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1184 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1182 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1185 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1183 +#define WT_STAT_CONN_REC_PAGES 1186 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1184 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1187 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1185 +#define WT_STAT_CONN_REC_PAGE_DELETE 1188 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1186 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1189 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1187 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1190 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1188 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1191 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1189 +#define WT_STAT_CONN_SESSION_OPEN 1192 /*! session: table alter failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1190 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1193 /*! session: table alter successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1191 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1194 /*! session: table alter unchanged and skipped */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1192 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1195 /*! session: table compact failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1193 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1196 /*! session: table compact successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1194 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1197 /*! session: table create failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1195 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1198 /*! session: table create successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1196 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1199 /*! session: table drop failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1197 +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1200 /*! session: table drop successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1198 +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1201 /*! session: table rebalance failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1199 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1202 /*! session: table rebalance successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1200 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1203 /*! session: table rename failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1201 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1204 /*! session: table rename successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1202 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1205 /*! session: table salvage failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1203 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1206 /*! session: table salvage successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1204 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1207 /*! session: table truncate failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1205 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1208 /*! session: table truncate successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1206 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1209 /*! session: table verify failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1207 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1210 /*! session: table verify successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1208 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1211 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1209 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1212 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_THREAD_READ_ACTIVE 1210 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1213 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1211 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1214 /*! thread-yield: application thread time evicting (usecs) */ -#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1212 +#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1215 /*! thread-yield: application thread time waiting for cache (usecs) */ -#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1213 +#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1216 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1214 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1217 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1215 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1218 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1216 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1219 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1217 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1220 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1218 +#define WT_STAT_CONN_PAGE_SLEEP 1221 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1219 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1222 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1220 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1223 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1221 +#define WT_STAT_CONN_TXN_BEGIN 1224 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1222 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1225 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1223 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1226 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1224 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1227 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1225 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1228 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1226 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1229 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1227 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1230 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1228 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1231 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1229 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1232 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1230 +#define WT_STAT_CONN_TXN_CHECKPOINT 1233 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1231 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1234 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1232 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1235 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1233 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1236 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1234 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1237 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1235 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1238 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1236 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1239 /*! * transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1237 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1240 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1238 +#define WT_STAT_CONN_TXN_SYNC 1241 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1239 +#define WT_STAT_CONN_TXN_COMMIT 1242 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1240 +#define WT_STAT_CONN_TXN_ROLLBACK 1243 /*! * @} diff --git a/src/log/log.c b/src/log/log.c index 05234619d32..5b24250fffc 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -783,8 +783,8 @@ __log_openfile(WT_SESSION_IMPL *session, __wt_log_desc_byteswap(desc); if (desc->log_magic != WT_LOG_MAGIC) WT_PANIC_RET(session, WT_ERROR, - "log file %s corrupted: Bad magic number %" PRIu32, - (*fhp)->name, desc->log_magic); + "log file %s corrupted: Bad magic number %" PRIu32, + (*fhp)->name, desc->log_magic); if (desc->majorv > WT_LOG_MAJOR_VERSION || (desc->majorv == WT_LOG_MAJOR_VERSION && desc->minorv > WT_LOG_MINOR_VERSION)) @@ -2246,8 +2246,10 @@ __wt_log_vprintf(WT_SESSION_IMPL *session, const char *fmt, va_list ap) return (0); va_copy(ap_copy, ap); - len = (size_t)vsnprintf(NULL, 0, fmt, ap_copy) + 1; + len = 1; + ret = __wt_vsnprintf_len_incr(NULL, 0, &len, fmt, ap_copy); va_end(ap_copy); + WT_RET(ret); WT_RET( __wt_logrec_alloc(session, sizeof(WT_LOG_RECORD) + len, &logrec)); @@ -2264,7 +2266,8 @@ __wt_log_vprintf(WT_SESSION_IMPL *session, const char *fmt, va_list ap) rec_fmt, rectype)); logrec->size += (uint32_t)header_size; - (void)vsnprintf((char *)logrec->data + logrec->size, len, fmt, ap); + WT_ERR(__wt_vsnprintf( + (char *)logrec->data + logrec->size, len, fmt, ap)); __wt_verbose(session, WT_VERB_LOG, "log_printf: %s", (char *)logrec->data + logrec->size); diff --git a/src/log/log_slot.c b/src/log/log_slot.c index c685aec3ffc..512a84dbd13 100644 --- a/src/log/log_slot.c +++ b/src/log/log_slot.c @@ -195,103 +195,12 @@ retry: } /* - * __log_slot_switch_internal -- - * Switch out the current slot and set up a new one. - */ -static int -__log_slot_switch_internal( - WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool forced) -{ - WT_DECL_RET; - WT_LOG *log; - WT_LOGSLOT *slot; - bool free_slot, release; - - log = S2C(session)->log; - release = false; - slot = myslot->slot; - - WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT)); - - /* - * If someone else raced us to closing this specific slot, we're - * done here. - */ - if (slot != log->active_slot) - return (0); - - WT_RET(WT_SESSION_CHECK_PANIC(session)); - /* - * We may come through here multiple times if we were able to close - * a slot but could not set up a new one. If we closed it already, - * don't try to do it again but still set up the new slot. - */ - if (!F_ISSET(myslot, WT_MYSLOT_CLOSE)) { - ret = __log_slot_close(session, slot, &release, forced); - /* - * If close returns WT_NOTFOUND it means that someone else - * is processing the slot change. - */ - if (ret == WT_NOTFOUND) - return (0); - WT_RET(ret); - if (release) { - WT_RET(__wt_log_release(session, slot, &free_slot)); - if (free_slot) - __wt_log_slot_free(session, slot); - } - } - /* - * Set that we have closed this slot because we may call in here - * multiple times if we retry creating a new slot. - */ - F_SET(myslot, WT_MYSLOT_CLOSE); - WT_RET(__wt_log_slot_new(session)); - F_CLR(myslot, WT_MYSLOT_CLOSE); - return (0); -} - -/* - * __wt_log_slot_switch -- - * Switch out the current slot and set up a new one. - */ -int -__wt_log_slot_switch( - WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced) -{ - WT_DECL_RET; - WT_LOG *log; - - log = S2C(session)->log; - /* - * !!! Since the WT_WITH_SLOT_LOCK macro is a do-while loop, the - * compiler does not like it combined directly with the while loop - * here. - * - * The loop conditional is a bit complex. We have to retry if we - * closed the slot but were unable to set up a new slot. In that - * case the flag indicating we have closed the slot will still be set. - * We have to retry in that case regardless of the retry setting - * because we are responsible for setting up the new slot. - */ - do { - WT_WITH_SLOT_LOCK(session, log, - ret = __log_slot_switch_internal(session, myslot, forced)); - if (ret == EBUSY) { - WT_STAT_CONN_INCR(session, log_slot_switch_busy); - __wt_yield(); - } - } while (F_ISSET(myslot, WT_MYSLOT_CLOSE) || (retry && ret == EBUSY)); - return (ret); -} - -/* - * __wt_log_slot_new -- + * __log_slot_new -- * Find a free slot and switch it as the new active slot. * Must be called holding the slot lock. */ -int -__wt_log_slot_new(WT_SESSION_IMPL *session) +static int +__log_slot_new(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; WT_LOG *log; @@ -351,6 +260,7 @@ __wt_log_slot_new(WT_SESSION_IMPL *session) /* * If we didn't find any free slots signal the worker thread. */ + WT_STAT_CONN_INCR(session, log_slot_no_free_slots); __wt_cond_signal(session, conn->log_wrlsn_cond); __wt_yield(); #ifdef HAVE_DIAGNOSTIC @@ -371,6 +281,108 @@ __wt_log_slot_new(WT_SESSION_IMPL *session) } /* + * __log_slot_switch_internal -- + * Switch out the current slot and set up a new one. + */ +static int +__log_slot_switch_internal( + WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool forced) +{ + WT_DECL_RET; + WT_LOG *log; + WT_LOGSLOT *slot; + bool free_slot, release; + + log = S2C(session)->log; + release = false; + slot = myslot->slot; + + WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT)); + + /* + * If someone else raced us to closing this specific slot, we're + * done here. + */ + if (slot != log->active_slot) + return (0); + WT_RET(WT_SESSION_CHECK_PANIC(session)); + + /* + * We may come through here multiple times if we were not able to + * set up a new one. If we closed it already, + * don't try to do it again but still set up the new slot. + */ + if (!F_ISSET(myslot, WT_MYSLOT_CLOSE)) { + ret = __log_slot_close(session, slot, &release, forced); + /* + * If close returns WT_NOTFOUND it means that someone else + * is processing the slot change. + */ + if (ret == WT_NOTFOUND) + return (0); + WT_RET(ret); + /* + * Set that we have closed this slot because we may call in here + * multiple times if we retry creating a new slot. Similarly + * set retain whether this slot needs releasing so that we don't + * lose that information if we retry. + */ + F_SET(myslot, WT_MYSLOT_CLOSE); + if (release) + F_SET(myslot, WT_MYSLOT_NEEDS_RELEASE); + } + /* + * Now that the slot is closed, set up a new one so that joining + * threads don't have to wait on writing the previous slot if we + * release it. Release after setting a new one. + */ + WT_RET(__log_slot_new(session)); + F_CLR(myslot, WT_MYSLOT_CLOSE); + if (F_ISSET(myslot, WT_MYSLOT_NEEDS_RELEASE)) { + WT_RET(__wt_log_release(session, slot, &free_slot)); + F_CLR(myslot, WT_MYSLOT_NEEDS_RELEASE); + if (free_slot) + __wt_log_slot_free(session, slot); + } + return (ret); +} + +/* + * __wt_log_slot_switch -- + * Switch out the current slot and set up a new one. + */ +int +__wt_log_slot_switch( + WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced) +{ + WT_DECL_RET; + WT_LOG *log; + + log = S2C(session)->log; + + /* + * !!! Since the WT_WITH_SLOT_LOCK macro is a do-while loop, the + * compiler does not like it combined directly with the while loop + * here. + * + * The loop conditional is a bit complex. We have to retry if we + * closed the slot but were unable to set up a new slot. In that + * case the flag indicating we have closed the slot will still be set. + * We have to retry in that case regardless of the retry setting + * because we are responsible for setting up the new slot. + */ + do { + WT_WITH_SLOT_LOCK(session, log, + ret = __log_slot_switch_internal(session, myslot, forced)); + if (ret == EBUSY) { + WT_STAT_CONN_INCR(session, log_slot_switch_busy); + __wt_yield(); + } + } while (F_ISSET(myslot, WT_MYSLOT_CLOSE) || (retry && ret == EBUSY)); + return (ret); +} + +/* * __wt_log_slot_init -- * Initialize the slot array. */ @@ -531,12 +543,13 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, if (__wt_atomic_casiv64( &slot->slot_state, old_state, new_state)) break; - } + WT_STAT_CONN_INCR(session, log_slot_races); + } else + WT_STAT_CONN_INCR(session, log_slot_active_closed); /* * The slot is no longer open or we lost the race to * update it. Yield and try again. */ - WT_STAT_CONN_INCR(session, log_slot_races); __wt_yield(); } /* diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index 3f0b6df8eb0..52265f02e62 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -178,20 +178,12 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update) if (reset) { WT_ASSERT(session, !F_ISSET(&clsm->iface, - WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT)); + WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT)); WT_RET(__clsm_reset_cursors(clsm, NULL)); } for (;;) { - /* - * If the cursor looks up-to-date, check if the cache is full. - * In case this call blocks, the check will be repeated before - * proceeding. - */ - if (clsm->dsk_gen != lsm_tree->dsk_gen && - lsm_tree->nchunks != 0) - goto open; - + /* Check if the cursor looks up-to-date. */ if (clsm->dsk_gen != lsm_tree->dsk_gen && lsm_tree->nchunks != 0) goto open; @@ -666,7 +658,7 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) { */ if (i != nchunks - 1) clsm->chunks[i]->cursor->insert = - __wt_curfile_update_check; + __wt_curfile_insert_check; if (!F_ISSET(clsm, WT_CLSM_MERGE) && F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) @@ -1624,12 +1616,10 @@ __clsm_remove(WT_CURSOR *cursor) WT_CURSOR_NOVALUE(cursor); WT_ERR(__clsm_enter(clsm, false, true)); - if (F_ISSET(cursor, WT_CURSTD_OVERWRITE) || - (ret = __clsm_lookup(clsm, &value)) == 0) - ret = __clsm_put( - session, clsm, &cursor->key, &__tombstone, positioned); - -err: __clsm_leave(clsm); + if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) + WT_ERR(__clsm_lookup(clsm, &value)); + WT_ERR(__clsm_put( + session, clsm, &cursor->key, &__tombstone, positioned)); /* * If the cursor was positioned, it stays positioned with a key but no @@ -1643,6 +1633,7 @@ err: __clsm_leave(clsm); else WT_TRET(cursor->reset(cursor)); +err: __clsm_leave(clsm); CURSOR_UPDATE_API_END(session, ret); return (ret); } diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 6dc06146179..e33e119aa41 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -89,7 +89,6 @@ __lsm_general_worker_start(WT_SESSION_IMPL *session) if (manager->lsm_workers % 2 == 0) FLD_SET(worker_args->type, WT_LSM_WORK_MERGE); } - F_SET(worker_args, WT_LSM_WORKER_RUN); WT_RET(__wt_lsm_worker_start(session, worker_args)); } @@ -129,17 +128,13 @@ __lsm_stop_workers(WT_SESSION_IMPL *session) manager->lsm_workers--) { worker_args = &manager->lsm_worker_cookies[manager->lsm_workers - 1]; - /* - * Clear this worker's flag so it stops. - */ - F_CLR(worker_args, WT_LSM_WORKER_RUN); - WT_ASSERT(session, worker_args->tid != 0); - WT_RET(__wt_thread_join(session, worker_args->tid)); - worker_args->tid = 0; + WT_ASSERT(session, worker_args->tid_set); + + WT_RET(__wt_lsm_worker_stop(session, worker_args)); worker_args->type = 0; - worker_args->flags = 0; + /* - * We do not clear the session because they are allocated + * We do not clear the other fields because they are allocated * statically when the connection was opened. */ } @@ -237,12 +232,12 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session) manager->lsm_worker_cookies[i].session = worker_session; } + F_SET(conn, WT_CONN_SERVER_LSM); + /* Start the LSM manager thread. */ WT_ERR(__wt_thread_create(session, &manager->lsm_worker_cookies[0].tid, __lsm_worker_manager, &manager->lsm_worker_cookies[0])); - F_SET(conn, WT_CONN_SERVER_LSM); - if (0) { err: for (i = 0; (worker_session = @@ -289,13 +284,18 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) manager = &conn->lsm_manager; removed = 0; + /* + * Clear the LSM server flag and flush to ensure running threads see + * the state change. + */ + F_CLR(conn, WT_CONN_SERVER_LSM); + WT_FULL_BARRIER(); + WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY) || manager->lsm_workers == 0); if (manager->lsm_workers > 0) { - /* - * Stop the main LSM manager thread first. - */ - while (F_ISSET(conn, WT_CONN_SERVER_LSM)) + /* Wait for the main LSM manager thread to finish. */ + while (!F_ISSET(manager, WT_LSM_MANAGER_SHUTDOWN)) __wt_yield(); /* Clean up open LSM handles. */ @@ -303,7 +303,6 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) WT_TRET(__wt_thread_join( session, manager->lsm_worker_cookies[0].tid)); - manager->lsm_worker_cookies[0].tid = 0; /* Release memory from any operations left on the queue. */ while ((current = TAILQ_FIRST(&manager->switchqh)) != NULL) { @@ -342,7 +341,7 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) /* * __lsm_manager_worker_shutdown -- - * Shutdown the LSM manager and worker threads. + * Shutdown the LSM worker threads. */ static int __lsm_manager_worker_shutdown(WT_SESSION_IMPL *session) @@ -354,14 +353,13 @@ __lsm_manager_worker_shutdown(WT_SESSION_IMPL *session) manager = &S2C(session)->lsm_manager; /* - * Wait for the rest of the LSM workers to shutdown. Stop at index + * Wait for the rest of the LSM workers to shutdown. Start at index * one - since we (the manager) are at index 0. */ for (i = 1; i < manager->lsm_workers; i++) { - WT_ASSERT(session, manager->lsm_worker_cookies[i].tid != 0); - __wt_cond_signal(session, manager->work_cond); - WT_TRET(__wt_thread_join( - session, manager->lsm_worker_cookies[i].tid)); + WT_ASSERT(session, manager->lsm_worker_cookies[i].tid_set); + WT_TRET(__wt_lsm_worker_stop( + session, &manager->lsm_worker_cookies[i])); } return (ret); } @@ -383,7 +381,7 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) conn = S2C(session); dhandle_locked = false; - while (F_ISSET(conn, WT_CONN_SERVER_RUN)) { + while (F_ISSET(conn, WT_CONN_SERVER_LSM)) { __wt_sleep(0, 10000); if (TAILQ_EMPTY(&conn->lsmqh)) continue; @@ -469,11 +467,13 @@ static WT_THREAD_RET __lsm_worker_manager(void *arg) { WT_DECL_RET; + WT_LSM_MANAGER *manager; WT_LSM_WORKER_ARGS *cookie; WT_SESSION_IMPL *session; cookie = (WT_LSM_WORKER_ARGS *)arg; session = cookie->session; + manager = &S2C(session)->lsm_manager; WT_ERR(__lsm_general_worker_start(session)); WT_ERR(__lsm_manager_run_server(session)); @@ -482,7 +482,11 @@ __lsm_worker_manager(void *arg) if (ret != 0) { err: WT_PANIC_MSG(session, ret, "LSM worker manager thread error"); } - F_CLR(S2C(session), WT_CONN_SERVER_LSM); + + /* Connection close waits on us to shutdown, let it know we're done. */ + F_SET(manager, WT_LSM_MANAGER_SHUTDOWN); + WT_FULL_BARRIER(); + return (WT_THREAD_RET_VALUE); } diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c index a06b736bf0a..8838638f388 100644 --- a/src/lsm/lsm_merge.c +++ b/src/lsm/lsm_merge.c @@ -625,7 +625,7 @@ err: if (locked) else __wt_verbose(session, WT_VERB_LSM, "Merge failed with %s", - __wt_strerror(session, ret, NULL, 0)); + __wt_strerror(session, ret, NULL, 0)); } F_CLR(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION); return (ret); diff --git a/src/lsm/lsm_meta.c b/src/lsm/lsm_meta.c index 46ead6d6ac4..fc4dde82470 100644 --- a/src/lsm/lsm_meta.c +++ b/src/lsm/lsm_meta.c @@ -229,7 +229,7 @@ __lsm_meta_read_v1( cv.len -= 2; } WT_ERR(__wt_config_check(session, - WT_CONFIG_REF(session, WT_SESSION_create), cv.str, cv.len)); + WT_CONFIG_REF(session, WT_SESSION_create), cv.str, cv.len)); WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->bloom_config)); WT_ERR(__wt_config_getones( session, lsmconf, "lsm.bloom_hash_count", &cv)); diff --git a/src/lsm/lsm_stat.c b/src/lsm/lsm_stat.c index 21e8991be94..411655878af 100644 --- a/src/lsm/lsm_stat.c +++ b/src/lsm/lsm_stat.c @@ -29,8 +29,8 @@ __curstat_lsm_init( const char *cfg[] = { WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL, NULL }; const char *disk_cfg[] = { - WT_CONFIG_BASE(session, WT_SESSION_open_cursor), - "checkpoint=" WT_CHECKPOINT, NULL, NULL }; + WT_CONFIG_BASE(session, WT_SESSION_open_cursor), + "checkpoint=" WT_CHECKPOINT, NULL, NULL }; locked = false; WT_RET(__wt_lsm_tree_get(session, uri, false, &lsm_tree)); @@ -38,13 +38,13 @@ __curstat_lsm_init( /* Propagate all, fast and/or clear to the cursors we open. */ if (cst->flags != 0) { - (void)snprintf(config, sizeof(config), + WT_ERR(__wt_snprintf(config, sizeof(config), "statistics=(%s%s%s%s)", F_ISSET(cst, WT_STAT_TYPE_ALL) ? "all," : "", F_ISSET(cst, WT_STAT_CLEAR) ? "clear," : "", !F_ISSET(cst, WT_STAT_TYPE_ALL) && F_ISSET(cst, WT_STAT_TYPE_FAST) ? "fast," : "", - F_ISSET(cst, WT_STAT_TYPE_SIZE) ? "size," : ""); + F_ISSET(cst, WT_STAT_TYPE_SIZE) ? "size," : "")); cfg[1] = disk_cfg[1] = config; } diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index ffa00c0a5e7..1cabbd4888d 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -21,7 +21,23 @@ __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) { __wt_verbose(session, WT_VERB_LSM_MANAGER, "Start LSM worker %u type %#" PRIx32, args->id, args->type); - return (__wt_thread_create(session, &args->tid, __lsm_worker, args)); + + args->running = true; + WT_RET(__wt_thread_create(session, &args->tid, __lsm_worker, args)); + args->tid_set = true; + return (0); +} + +/* + * __wt_lsm_worker_stop -- + * A wrapper around the LSM worker thread stop. + */ +int +__wt_lsm_worker_stop(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) +{ + args->running = false; + args->tid_set = false; + return (__wt_thread_join(session, args->tid)); } /* @@ -84,7 +100,6 @@ err: __wt_lsm_manager_free_work_unit(session, entry); static WT_THREAD_RET __lsm_worker(void *arg) { - WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LSM_WORK_UNIT *entry; WT_LSM_WORKER_ARGS *cookie; @@ -93,11 +108,9 @@ __lsm_worker(void *arg) cookie = (WT_LSM_WORKER_ARGS *)arg; session = cookie->session; - conn = S2C(session); entry = NULL; - while (F_ISSET(conn, WT_CONN_SERVER_RUN) && - F_ISSET(cookie, WT_LSM_WORKER_RUN)) { + while (cookie->running) { progress = false; /* diff --git a/src/meta/meta_turtle.c b/src/meta/meta_turtle.c index 66e34c728f2..5a089471059 100644 --- a/src/meta/meta_turtle.c +++ b/src/meta/meta_turtle.c @@ -242,7 +242,7 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) WT_DECL_ITEM(buf); WT_DECL_RET; WT_FSTREAM *fs; - bool exist, match; + bool exist; *valuep = NULL; @@ -258,22 +258,19 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) __metadata_config(session, valuep) : WT_NOTFOUND); WT_RET(__wt_fopen(session, WT_METADATA_TURTLE, 0, WT_STREAM_READ, &fs)); - /* Search for the key. */ WT_ERR(__wt_scr_alloc(session, 512, &buf)); - for (match = false;;) { + + /* Search for the key. */ + do { WT_ERR(__wt_getline(session, fs, buf)); if (buf->size == 0) WT_ERR(WT_NOTFOUND); - if (strcmp(key, buf->data) == 0) - match = true; + } while (strcmp(key, buf->data) != 0); - /* Key matched: read the subsequent line for the value. */ - WT_ERR(__wt_getline(session, fs, buf)); - if (buf->size == 0) - WT_ERR(__wt_illegal_value(session, WT_METADATA_TURTLE)); - if (match) - break; - } + /* Key matched: read the subsequent line for the value. */ + WT_ERR(__wt_getline(session, fs, buf)); + if (buf->size == 0) + WT_ERR(WT_NOTFOUND); /* Copy the value for the caller. */ WT_ERR(__wt_strdup(session, buf->data, valuep)); @@ -283,7 +280,12 @@ err: WT_TRET(__wt_fclose(session, &fs)); if (ret != 0) __wt_free(session, *valuep); - return (ret); + + /* + * A file error or a missing key/value pair in the turtle file means + * something has gone horribly wrong -- we're done. + */ + return (ret == 0 ? 0 : __wt_illegal_value(session, WT_METADATA_TURTLE)); } /* @@ -322,5 +324,9 @@ __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) err: WT_TRET(__wt_fclose(session, &fs)); WT_TRET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET, false)); - return (ret); + /* + * An error updating the turtle file means something has gone horribly + * wrong -- we're done. + */ + return (ret == 0 ? 0 : __wt_illegal_value(session, WT_METADATA_TURTLE)); } diff --git a/src/os_common/filename.c b/src/os_common/filename.c index 5aeb64bb51e..d5695f63d91 100644 --- a/src/os_common/filename.c +++ b/src/os_common/filename.c @@ -29,6 +29,7 @@ int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path) { + WT_DECL_RET; size_t len; char *buf; @@ -39,16 +40,17 @@ __wt_nfilename( * the exists API which is used by the test utilities. */ if (session == NULL || __wt_absolute_path(name)) - WT_RET(__wt_strndup(session, name, namelen, path)); - else { - len = strlen(S2C(session)->home) + 1 + namelen + 1; - WT_RET(__wt_calloc(session, 1, len, &buf)); - snprintf(buf, len, "%s%s%.*s", S2C(session)->home, - __wt_path_separator(), (int)namelen, name); - *path = buf; - } + return (__wt_strndup(session, name, namelen, path)); + len = strlen(S2C(session)->home) + 1 + namelen + 1; + WT_RET(__wt_calloc(session, 1, len, &buf)); + WT_ERR(__wt_snprintf(buf, len, "%s%s%.*s", + S2C(session)->home, __wt_path_separator(), (int)namelen, name)); + *path = buf; return (0); + +err: __wt_free(session, buf); + return (ret); } /* diff --git a/src/os_common/os_errno.c b/src/os_common/os_errno.c index a8e56b7f1aa..7ac89536e79 100644 --- a/src/os_common/os_errno.c +++ b/src/os_common/os_errno.c @@ -44,7 +44,7 @@ __wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen) * Fallback to a generic message. */ if (session == NULL && - snprintf(errbuf, errlen, "error return: %d", error) > 0) + __wt_snprintf(errbuf, errlen, "error return: %d", error) == 0) return (errbuf); if (session != NULL && __wt_buf_fmt( session, &session->err, "error return: %d", error) == 0) diff --git a/src/os_common/os_fstream.c b/src/os_common/os_fstream.c index 5a368ea75e6..744da732d84 100644 --- a/src/os_common/os_fstream.c +++ b/src/os_common/os_fstream.c @@ -144,7 +144,7 @@ __fstream_printf( p = (char *)((uint8_t *)buf->mem + buf->size); WT_ASSERT(session, buf->memsize >= buf->size); space = buf->memsize - buf->size; - len = (size_t)vsnprintf(p, space, fmt, ap_copy); + WT_RET(__wt_vsnprintf_len_set(p, space, &len, fmt, ap_copy)); va_end(ap_copy); if (len < space) { diff --git a/src/os_posix/os_snprintf.c b/src/os_posix/os_snprintf.c new file mode 100644 index 00000000000..390e2e0334a --- /dev/null +++ b/src/os_posix/os_snprintf.c @@ -0,0 +1,27 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_vsnprintf_len_incr -- + * POSIX vsnprintf convenience function, incrementing the returned size. + */ +int +__wt_vsnprintf_len_incr( + char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) + WT_GCC_FUNC_ATTRIBUTE((visibility("default"))) +{ + WT_DECL_RET; + + if ((ret = vsnprintf(buf, size, fmt, ap)) >= 0) { + *retsizep += (size_t)ret; + return (0); + } + return (__wt_errno()); +} diff --git a/src/os_posix/os_thread.c b/src/os_posix/os_thread.c index 9bf36cc2686..18e4c347436 100644 --- a/src/os_posix/os_thread.c +++ b/src/os_posix/os_thread.c @@ -18,6 +18,13 @@ __wt_thread_create(WT_SESSION_IMPL *session, { WT_DECL_RET; + /* + * Creating a thread isn't a memory barrier, but WiredTiger commonly + * sets flags and or state and then expects worker threads to start. + * Include a barrier to ensure safety in those cases. + */ + WT_FULL_BARRIER(); + /* Spawn a new thread of control. */ WT_SYSCALL_RETRY(pthread_create(tidret, NULL, func, arg), ret); if (ret == 0) @@ -34,6 +41,13 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) { WT_DECL_RET; + /* + * Joining a thread isn't a memory barrier, but WiredTiger commonly + * sets flags and or state and then expects worker threads to halt. + * Include a barrier to ensure safety in those cases. + */ + WT_FULL_BARRIER(); + WT_SYSCALL(pthread_join(tid, NULL), ret); if (ret == 0) return (0); @@ -45,7 +59,7 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) * __wt_thread_id -- * Fill in a printable version of the process and thread IDs. */ -void +int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_ATTRIBUTE((visibility("default"))) { @@ -57,10 +71,10 @@ __wt_thread_id(char *buf, size_t buflen) */ self = pthread_self(); #ifdef __sun - (void)snprintf(buf, buflen, - "%" PRIuMAX ":%u", (uintmax_t)getpid(), self); + return (__wt_snprintf(buf, buflen, + "%" PRIuMAX ":%u", (uintmax_t)getpid(), self)); #else - (void)snprintf(buf, buflen, - "%" PRIuMAX ":%p", (uintmax_t)getpid(), (void *)self); + return (__wt_snprintf(buf, buflen, + "%" PRIuMAX ":%p", (uintmax_t)getpid(), (void *)self)); #endif } diff --git a/src/os_win/os_snprintf.c b/src/os_win/os_snprintf.c index a6056ff9342..f3025b12a60 100644 --- a/src/os_win/os_snprintf.c +++ b/src/os_win/os_snprintf.c @@ -8,17 +8,47 @@ #include "wt_internal.h" -_Check_return_opt_ int __cdecl _wt_snprintf( - _Out_writes_(_MaxCount) char * _DstBuf, - _In_ size_t _MaxCount, - _In_z_ _Printf_format_string_ const char * _Format, ...) +/* + * __wt_vsnprintf_len_incr -- + * POSIX vsnprintf convenience function, incrementing the returned size. + */ +int +__wt_vsnprintf_len_incr( + char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) { - va_list args; - WT_DECL_RET; + int len; + + /* + * WiredTiger calls with length 0 to get the needed buffer size. Call + * the count only version in this case, _vsnprintf_s will invoke the + * invalid parameter handler if count is less than or equal to zero. + */ + if (size == 0) { + *retsizep += (size_t)_vscprintf(fmt, ap); + return (0); + } + + /* + * Additionally, the invalid parameter handler is invoked if buffer or + * format is a NULL pointer. + */ + if (buf == NULL || fmt == NULL) + return (EINVAL); + + /* + * If the storage required to store the data and a terminating null + * exceeds size, the invalid parameter handler is invoked, unless + * count is _TRUNCATE, in which case as much of the string as will + * fit in the buffer is written and -1 returned. + */ + if ((len = _vsnprintf_s(buf, size, _TRUNCATE, fmt, ap)) >= 0) { + *retsizep += (size_t)len; + return (0); + } - va_start(args, _Format); - ret = _wt_vsnprintf(_DstBuf, _MaxCount, _Format, args); - va_end(args); + /* Return the buffer size required. */ + if (len == -1) + *retsizep += (size_t)_vscprintf(fmt, ap); - return (ret); + return (0); } diff --git a/src/os_win/os_thread.c b/src/os_win/os_thread.c index a34dff776b6..4c8f212bb4f 100644 --- a/src/os_win/os_thread.c +++ b/src/os_win/os_thread.c @@ -16,6 +16,13 @@ int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) { + /* + * Creating a thread isn't a memory barrier, but WiredTiger commonly + * sets flags and or state and then expects worker threads to start. + * Include a barrier to ensure safety in those cases. + */ + WT_FULL_BARRIER(); + /* Spawn a new thread of control. */ *tidret = (HANDLE)_beginthreadex(NULL, 0, func, arg, 0, NULL); if (*tidret != 0) @@ -33,6 +40,13 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) { DWORD windows_error; + /* + * Joining a thread isn't a memory barrier, but WiredTiger commonly + * sets flags and or state and then expects worker threads to halt. + * Include a barrier to ensure safety in those cases. + */ + WT_FULL_BARRIER(); + if ((windows_error = WaitForSingleObject(tid, INFINITE)) != WAIT_OBJECT_0) { if (windows_error == WAIT_FAILED) @@ -58,10 +72,10 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) * __wt_thread_id -- * Fill in a printable version of the process and thread IDs. */ -void +int __wt_thread_id(char *buf, size_t buflen) { - (void)snprintf(buf, buflen, + return (__wt_snprintf(buf, buflen, "%" PRIu64 ":%" PRIu64, - (uint64_t)GetCurrentProcessId(), (uint64_t)GetCurrentThreadId); + (uint64_t)GetCurrentProcessId(), (uint64_t)GetCurrentThreadId)); } diff --git a/src/os_win/os_vsnprintf.c b/src/os_win/os_vsnprintf.c deleted file mode 100644 index 63f96e79d5b..00000000000 --- a/src/os_win/os_vsnprintf.c +++ /dev/null @@ -1,41 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -_Check_return_opt_ int __cdecl _wt_vsnprintf( - _Out_writes_(_MaxCount) char * _DstBuf, - _In_ size_t _MaxCount, - _In_z_ _Printf_format_string_ const char * _Format, - va_list _ArgList) -{ - int len; - - /* - * WiredTiger will call with length 0 to get the needed buffer size - * We call the count only version in this case since vsnprintf_s assumes - * length is greater than zero or else it triggers the invalid_parameter - * handler. - */ - if (_MaxCount == 0) { - return _vscprintf(_Format, _ArgList); - } - - len = (size_t)_vsnprintf_s( - _DstBuf, _MaxCount, _TRUNCATE, _Format, _ArgList); - - /* - * The MSVC implementation returns -1 on truncation instead of what - * it would have written. We could let callers iteratively grow the - * buffer, or just ask us how big a buffer they would like. - */ - if (len == -1) - len = _vscprintf(_Format, _ArgList) + 1; - - return (len); -} diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 23f654caa70..6f95b84d292 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -26,6 +26,11 @@ typedef struct { uint32_t flags; /* Caller's configuration */ WT_ITEM disk_image; /* Temporary disk-image buffer */ + /* + * Temporary buffer used to write out a disk image when managing two + * chunks worth of data in memory + */ + WT_ITEM *interim_buf; /* * Track start/stop write generation to decide if all changes to the @@ -127,6 +132,7 @@ typedef struct { * repeatedly split a packed page. */ uint32_t split_size; /* Split page size */ + uint32_t min_split_size; /* Minimum split page size */ /* * The problem with splits is we've done a lot of work by the time we @@ -151,16 +157,6 @@ typedef struct { */ size_t offset; /* Split's first byte */ - /* - * The recno and entries fields are the starting record number - * of the split chunk (for column-store splits), and the number - * of entries in the split chunk. These fields are used both - * to write the split chunk, and to create a new internal page - * to reference the split pages. - */ - uint64_t recno; /* Split's starting record */ - uint32_t entries; /* Split's entries */ - WT_ADDR addr; /* Split's written location */ uint32_t size; /* Split's size */ uint32_t checksum; /* Split's checksum */ @@ -182,11 +178,36 @@ typedef struct { size_t supd_allocated; /* + * While reconciling pages, at any given time, we maintain two + * split chunks in the memory to be written out as pages. As we + * get to the last two chunks, if the last one turns out to be + * smaller than the minimum split size, we go back into the + * penultimate chunk and split at this minimum split size + * boundary. This moves some data from the penultimate chunk to + * the last chunk, hence increasing the size of the last page + * written without decreasing the penultimate page size beyond + * the minimum split size. For this reason, we maintain both a + * maximum split percentage boundary and a minimum split + * percentage boundary. + * + * The recno and entries fields are the starting record number + * of the split chunk (for column-store splits), and the number + * of entries in the split chunk. These fields are used both to + * write the split chunk, and to create a new internal page to + * reference the split pages. + * * The key for a row-store page; no column-store key is needed * because the page's recno, stored in the recno field, is the * column-store key. */ - WT_ITEM key; /* Promoted row-store key */ + uint32_t max_bnd_entries; + uint64_t max_bnd_recno; + WT_ITEM max_bnd_key; + + size_t min_bnd_offset; + uint32_t min_bnd_entries; + uint64_t min_bnd_recno; + WT_ITEM min_bnd_key; } *bnd; /* Saved boundaries */ uint32_t bnd_next; /* Next boundary slot */ uint32_t bnd_next_max; /* Maximum boundary slots used */ @@ -194,28 +215,6 @@ typedef struct { size_t bnd_allocated; /* Bytes allocated */ /* - * We track the total number of page entries copied into split chunks - * so we can easily figure out how many entries in the current split - * chunk. - */ - uint32_t total_entries; /* Total entries in splits */ - - /* - * And there's state information as to where in this process we are: - * (1) tracking split boundaries because we can still fit more split - * chunks into the maximum page size, (2) tracking the maximum page - * size boundary because we can't fit any more split chunks into the - * maximum page size, (3) not performing boundary checks because it's - * either not useful with the current page size configuration, or - * because we've already been forced to split. - */ - enum { SPLIT_BOUNDARY=0, /* Next: a split page boundary */ - SPLIT_MAX=1, /* Next: the maximum page boundary */ - SPLIT_TRACKING_OFF=2, /* No boundary checks */ - SPLIT_TRACKING_RAW=3 } /* Underlying compression decides */ - bnd_state; - - /* * We track current information about the current record number, the * number of entries copied into the temporary buffer, where we are * in the temporary buffer, and how much memory remains. Those items @@ -226,6 +225,8 @@ typedef struct { uint32_t entries; /* Current number of entries */ uint8_t *first_free; /* Current first free byte */ size_t space_avail; /* Remaining space in this chunk */ + /* Remaining space in this chunk to put a minimum size boundary */ + size_t min_space_avail; /* * Saved update list, supporting the WT_EVICT_UPDATE_RESTORE and @@ -247,15 +248,14 @@ typedef struct { /* * WT_DICTIONARY -- - * We optionally build a dictionary of row-store values for leaf - * pages. Where two value cells are identical, only write the value - * once, the second and subsequent copies point to the original cell. - * The dictionary is fixed size, but organized in a skip-list to make - * searches faster. + * We optionally build a dictionary of values for leaf pages. Where + * two value cells are identical, only write the value once, the second + * and subsequent copies point to the original cell. The dictionary is + * fixed size, but organized in a skip-list to make searches faster. */ struct __rec_dictionary { uint64_t hash; /* Hash value */ - void *cell; /* Matching cell */ + uint32_t offset; /* Matching cell */ u_int depth; /* Skiplist */ WT_DICTIONARY *next[0]; @@ -293,6 +293,13 @@ typedef struct { uint32_t tested_ref_state; /* Debugging information */ } WT_RECONCILE; +#define WT_CROSSING_MIN_BND(r, next_len) \ + ((r)->bnd[(r)->bnd_next].min_bnd_offset == 0 && \ + (next_len) > (r)->min_space_avail) +#define WT_CROSSING_SPLIT_BND(r, next_len) ((next_len) > (r)->space_avail) +#define WT_CHECK_CROSSING_BND(r, next_len) \ + (WT_CROSSING_MIN_BND(r, next_len) || WT_CROSSING_SPLIT_BND(r, next_len)) + static void __rec_bnd_cleanup(WT_SESSION_IMPL *, WT_RECONCILE *, bool); static void __rec_cell_build_addr(WT_SESSION_IMPL *, WT_RECONCILE *, const void *, size_t, u_int, uint64_t); @@ -314,6 +321,7 @@ static int __rec_col_var(WT_SESSION_IMPL *, static int __rec_col_var_helper(WT_SESSION_IMPL *, WT_RECONCILE *, WT_SALVAGE_COOKIE *, WT_ITEM *, bool, uint8_t, uint64_t); static int __rec_destroy_session(WT_SESSION_IMPL *); +static uint32_t __rec_min_split_page_size(WT_BTREE *, uint32_t); static int __rec_root_write(WT_SESSION_IMPL *, WT_PAGE *, uint32_t); static int __rec_row_int(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_row_leaf(WT_SESSION_IMPL *, @@ -323,7 +331,6 @@ static int __rec_row_leaf_insert( static int __rec_row_merge(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_split_col(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_split_discard(WT_SESSION_IMPL *, WT_PAGE *); -static int __rec_split_fixup(WT_SESSION_IMPL *, WT_RECONCILE *); static int __rec_split_row(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_split_row_promote( WT_SESSION_IMPL *, WT_RECONCILE *, WT_ITEM *, uint8_t); @@ -968,6 +975,7 @@ __rec_destroy(WT_SESSION_IMPL *session, void *reconcilep) *(WT_RECONCILE **)reconcilep = NULL; __wt_buf_free(session, &r->disk_image); + __wt_scr_free(session, &r->interim_buf); __wt_free(session, r->raw_entries); __wt_free(session, r->raw_offsets); @@ -1032,7 +1040,8 @@ __rec_bnd_cleanup(WT_SESSION_IMPL *session, WT_RECONCILE *r, bool destroy) __wt_free(session, bnd->addr.addr); __wt_free(session, bnd->disk_image); __wt_free(session, bnd->supd); - __wt_buf_free(session, &bnd->key); + __wt_buf_free(session, &bnd->max_bnd_key); + __wt_buf_free(session, &bnd->min_bnd_key); } __wt_free(session, r->bnd); r->bnd_next = 0; @@ -1717,6 +1726,17 @@ __rec_incr(WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size) r->entries += v; r->space_avail -= size; r->first_free += size; + + /* + * If offset for the minimum split size boundary is not set, we have not + * yet reached the minimum boundary, reduce the space available for it. + */ + if (r->bnd[r->bnd_next].min_bnd_offset == 0) { + if (r->min_space_avail >= size) + r->min_space_avail -= size; + else + r->min_space_avail = 0; + } } /* @@ -1781,16 +1801,22 @@ __rec_dict_replace( return (0); /* - * If the dictionary cell reference is not set, we're creating a new - * entry in the dictionary, update its location. + * If the dictionary offset isn't set, we're creating a new entry in the + * dictionary, set its location. * - * If the dictionary cell reference is set, we have a matching value. - * Create a copy cell instead. + * If the dictionary offset is set, we have a matching value. Create a + * copy cell instead. */ - if (dp->cell == NULL) - dp->cell = r->first_free; + if (dp->offset == 0) + dp->offset = WT_PTRDIFF32(r->first_free, r->disk_image.mem); else { - offset = WT_PTRDIFF(r->first_free, dp->cell); + /* + * The offset is the byte offset from this cell to the previous, + * matching cell, NOT the byte offset from the beginning of the + * page. + */ + offset = (uint64_t)WT_PTRDIFF(r->first_free, + (uint8_t *)r->disk_image.mem + dp->offset); val->len = val->cell_len = __wt_cell_pack_copy(&val->cell, rle, offset); val->buf.data = NULL; @@ -1927,8 +1953,8 @@ static void __rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd) { bnd->offset = 0; - bnd->recno = WT_RECNO_OOB; - bnd->entries = 0; + bnd->max_bnd_recno = WT_RECNO_OOB; + bnd->max_bnd_entries = 0; __wt_free(session, bnd->addr.addr); WT_CLEAR(bnd->addr); @@ -1943,6 +1969,10 @@ __rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd) bnd->already_compressed = false; + bnd->min_bnd_offset = 0; + bnd->min_bnd_entries = 0; + bnd->min_bnd_recno = WT_RECNO_OOB; + /* * Don't touch the key, we re-use that memory in each new * reconciliation. @@ -1974,40 +2004,64 @@ __rec_split_bnd_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r) } /* - * __wt_split_page_size -- - * Split page size calculation: we don't want to repeatedly split every - * time a new entry is added, so we split to a smaller-than-maximum page size. + * __rec_split_page_size_from_pct -- + * Given a split percentage, calculate split page size in bytes. */ -uint32_t -__wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize) -{ +static uint32_t +__rec_split_page_size_from_pct( + int split_pct, uint32_t maxpagesize, uint32_t allocsize) { uintmax_t a; uint32_t split_size; /* * Ideally, the split page size is some percentage of the maximum page - * size rounded to an allocation unit (round to an allocation unit so - * we don't waste space when we write). + * size rounded to an allocation unit (round to an allocation unit so we + * don't waste space when we write). */ a = maxpagesize; /* Don't overflow. */ split_size = (uint32_t)WT_ALIGN_NEAREST( - (a * (u_int)btree->split_pct) / 100, btree->allocsize); + (a * (u_int)split_pct) / 100, allocsize); /* - * Respect the configured split percentage if the calculated split - * size is either zero or a full page. The user has either configured - * an allocation size that matches the page size, or a split - * percentage that is close to zero or one hundred. Rounding is going - * to provide a worse outcome than having a split point that doesn't - * fall on an allocation size boundary in those cases. + * Respect the configured split percentage if the calculated split size + * is either zero or a full page. The user has either configured an + * allocation size that matches the page size, or a split percentage + * that is close to zero or one hundred. Rounding is going to provide a + * worse outcome than having a split point that doesn't fall on an + * allocation size boundary in those cases. */ if (split_size == 0 || split_size == maxpagesize) - split_size = (uint32_t)((a * (u_int)btree->split_pct) / 100); + split_size = (uint32_t)((a * (u_int)split_pct) / 100); return (split_size); } /* + * __wt_split_page_size -- + * Split page size calculation: we don't want to repeatedly split every + * time a new entry is added, so we split to a smaller-than-maximum page size. + */ +uint32_t +__wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize) +{ + return (__rec_split_page_size_from_pct( + btree->split_pct, maxpagesize, btree->allocsize)); +} + +/* + * __rec_min_split_page_size -- + * Minimum split size boundary calculation: To track a boundary at the + * minimum split size that we could have split at instead of splitting at + * the split page size. + */ +static uint32_t +__rec_min_split_page_size(WT_BTREE *btree, uint32_t maxpagesize) +{ + return (__rec_split_page_size_from_pct( + WT_BTREE_MIN_SPLIT_PCT, maxpagesize, btree->allocsize)); +} + +/* * __rec_split_init -- * Initialization for the reconciliation split functions. */ @@ -2018,7 +2072,7 @@ __rec_split_init(WT_SESSION_IMPL *session, WT_BM *bm; WT_BTREE *btree; WT_PAGE_HEADER *dsk; - size_t corrected_page_size; + size_t corrected_page_size, disk_img_buf_size; btree = S2BT(session); bm = btree->bm; @@ -2053,33 +2107,6 @@ __rec_split_init(WT_SESSION_IMPL *session, r->max_raw_page_size = r->page_size = (uint32_t)WT_MIN(r->page_size * 10, WT_MAX(r->page_size, btree->maxmempage / 2)); - - /* - * Ensure the disk image buffer is large enough for the max object, as - * corrected by the underlying block manager. - */ - corrected_page_size = r->page_size; - WT_RET(bm->write_size(bm, session, &corrected_page_size)); - WT_RET(__wt_buf_init(session, &r->disk_image, corrected_page_size)); - - /* - * Clear the disk page header to ensure all of it is initialized, even - * the unused fields. - * - * In the case of fixed-length column-store, clear the entire buffer: - * fixed-length column-store sets bits in bytes, where the bytes are - * assumed to initially be 0. - */ - memset(r->disk_image.mem, 0, page->type == WT_PAGE_COL_FIX ? - corrected_page_size : WT_PAGE_HEADER_SIZE); - - /* - * Set the page type (the type doesn't change, and setting it later - * would require additional code in a few different places). - */ - dsk = r->disk_image.mem; - dsk->type = page->type; - /* * If we have to split, we want to choose a smaller page size for the * split pages, because otherwise we could end up splitting one large @@ -2099,22 +2126,28 @@ __rec_split_init(WT_SESSION_IMPL *session, * creating overflow items and compacted data, for example, as those * items have already been written to disk). So, the loop calls the * helper functions when approaching a split boundary, and we save the - * information at that point. That allows us to go back and split the - * page at the boundary points if we eventually overflow the maximum - * page size. + * information at that point. We also save the boundary information at + * the minimum split size. We maintain two chunks (each boundary + * represents a chunk that gets written as a page) in the memory, + * writing out the older one to the disk as a page when we need to make + * space for a new chunk. On reaching the last chunk, if it turns out to + * be smaller than the minimum split size, we go back into the + * penultimate chunk and split at this minimum split size boundary. This + * moves some data from the penultimate chunk to the last chunk, hence + * increasing the size of the last page written without decreasing the + * penultimate page size beyond the minimum split size. * * Finally, all this doesn't matter for fixed-size column-store pages, * raw compression, and salvage. Fixed-size column store pages can * split under (very) rare circumstances, but they're allocated at a * fixed page size, never anything smaller. In raw compression, the - * underlying compression routine decides when we split, so it's not - * our problem. In salvage, as noted above, we can't split at all. + * underlying compression routine decides when we split, so it's not our + * problem. In salvage, as noted above, we can't split at all. */ if (r->raw_compression || r->salvage != NULL) { r->split_size = 0; r->space_avail = r->page_size - WT_PAGE_HEADER_BYTE_SIZE(btree); - } - else if (page->type == WT_PAGE_COL_FIX) { + } else if (page->type == WT_PAGE_COL_FIX) { r->split_size = r->page_size; r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); @@ -2122,32 +2155,55 @@ __rec_split_init(WT_SESSION_IMPL *session, r->split_size = __wt_split_page_size(btree, r->page_size); r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); + r->min_split_size = + __rec_min_split_page_size(btree, r->page_size); + r->min_space_avail = + r->min_split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); } + + /* + * Ensure the disk image buffer is large enough for the max object, as + * corrected by the underlying block manager. + * + * The buffer that we build disk image in, needs to hold two chunks + * worth of data. Since we want to support split_size more than the page + * size (to allow for adjustments based on the compression), this buffer + * should be greater of twice of split_size and page_size. + */ + corrected_page_size = r->page_size; + disk_img_buf_size = 2 * WT_MAX(corrected_page_size, r->split_size); + WT_RET(bm->write_size(bm, session, &corrected_page_size)); + WT_RET(__wt_buf_init(session, &r->disk_image, disk_img_buf_size)); + + /* + * Clear the disk page header to ensure all of it is initialized, even + * the unused fields. + * + * In the case of fixed-length column-store, clear the entire buffer: + * fixed-length column-store sets bits in bytes, where the bytes are + * assumed to initially be 0. + */ + memset(r->disk_image.mem, 0, page->type == WT_PAGE_COL_FIX ? + disk_img_buf_size : WT_PAGE_HEADER_SIZE); + + /* + * Set the page type (the type doesn't change, and setting it later + * would require additional code in a few different places). + */ + dsk = r->disk_image.mem; + dsk->type = page->type; + r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); /* Initialize the first boundary. */ r->bnd_next = 0; WT_RET(__rec_split_bnd_grow(session, r)); __rec_split_bnd_init(session, &r->bnd[0]); - r->bnd[0].recno = recno; + r->bnd[0].max_bnd_recno = recno; r->bnd[0].offset = WT_PAGE_HEADER_BYTE_SIZE(btree); - /* - * If the maximum page size is the same as the split page size, either - * because of the object type or application configuration, there isn't - * any need to maintain split boundaries within a larger page. - * - * No configuration for salvage here, because salvage can't split. - */ - if (r->raw_compression) - r->bnd_state = SPLIT_TRACKING_RAW; - else if (max == r->split_size) - r->bnd_state = SPLIT_TRACKING_OFF; - else - r->bnd_state = SPLIT_BOUNDARY; - - /* Initialize the entry counters. */ - r->entries = r->total_entries = 0; + /* Initialize the entry counter. */ + r->entries = 0; /* Initialize the starting record number. */ r->recno = recno; @@ -2350,19 +2406,112 @@ __rec_split_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t add_len) { WT_BM *bm; WT_BTREE *btree; - size_t corrected_page_size, len; + size_t corrected_page_size, inuse, len; btree = S2BT(session); bm = btree->bm; len = WT_PTRDIFF(r->first_free, r->disk_image.mem); - corrected_page_size = len + add_len; + inuse = (len - r->bnd[r->bnd_next].offset) + + WT_PAGE_HEADER_BYTE_SIZE(btree); + corrected_page_size = inuse + add_len; + WT_RET(bm->write_size(bm, session, &corrected_page_size)); - WT_RET(__wt_buf_grow(session, &r->disk_image, corrected_page_size)); + /* Need to account for buffer carrying two chunks worth of data */ + WT_RET(__wt_buf_grow(session, &r->disk_image, 2 * corrected_page_size)); + r->first_free = (uint8_t *)r->disk_image.mem + len; - WT_ASSERT(session, corrected_page_size >= len); - r->space_avail = corrected_page_size - len; + WT_ASSERT(session, corrected_page_size >= inuse); + r->space_avail = corrected_page_size - inuse; WT_ASSERT(session, r->space_avail >= add_len); + + return (0); +} + +/* + * __rec_split_write_prev_and_shift_cur -- + * Write the previous split chunk to the disk as a page. Shift the contents + * of the current chunk to the start of the buffer, making space for a new + * chunk to be written. + * If the caller asks for a chunk resizing, the boundary between the two + * chunks is readjusted to the minimum split size boundary details stored + * in the previous chunk, letting the current chunk grow at the cost of the + * previous chunk. + */ +static int +__rec_split_write_prev_and_shift_cur( + WT_SESSION_IMPL *session, WT_RECONCILE *r, bool resize_chunks) +{ + WT_BM *bm; + WT_BOUNDARY *bnd_cur, *bnd_prev; + WT_BTREE *btree; + WT_PAGE_HEADER *dsk, *dsk_tmp; + size_t cur_len, len; + uint8_t *dsk_start; + + WT_ASSERT(session, r->bnd_next != 0); + + btree = S2BT(session); + bm = btree->bm; + bnd_cur = &r->bnd[r->bnd_next]; + bnd_prev = bnd_cur - 1; + dsk = r->disk_image.mem; + cur_len = WT_PTRDIFF(r->first_free, dsk) - bnd_cur->offset; + + /* + * Resize chunks if the current is smaller than the minimum, and there + * are details on the minimum split size boundary available in the + * previous boundary details. + * + * There is a possibility that we do not have a minimum boundary set, in + * such a case we skip chunk resizing. Such a condition is possible for + * instance when we are building the image in the buffer and the first + * K/V pair is large enough that it surpasses both the minimum split + * size and the split size the application has set. In such a case we + * split the chunk without saving any minimum boundary. + */ + if (resize_chunks && + cur_len < r->min_split_size && bnd_prev->min_bnd_offset != 0) { + bnd_cur->offset = bnd_prev->min_bnd_offset; + bnd_cur->max_bnd_entries += + bnd_prev->max_bnd_entries - bnd_prev->min_bnd_entries; + bnd_prev->max_bnd_entries = bnd_prev->min_bnd_entries; + bnd_cur->max_bnd_recno = bnd_prev->min_bnd_recno; + + WT_RET(__wt_buf_set(session, &bnd_cur->max_bnd_key, + bnd_prev->min_bnd_key.data, bnd_prev->min_bnd_key.size)); + + /* Update current chunk's length */ + cur_len = WT_PTRDIFF(r->first_free, dsk) - bnd_cur->offset; + } + + /* + * Create an interim buffer if not already done to prepare the previous + * chunk's disk image. + */ + len = bnd_cur->offset; + WT_RET(bm->write_size(bm, session, &len)); + if (r->interim_buf == NULL) + WT_RET(__wt_scr_alloc(session, len, &r->interim_buf)); + else + WT_RET(__wt_buf_init(session, r->interim_buf, len)); + + dsk_tmp = r->interim_buf->mem; + memcpy(dsk_tmp, dsk, bnd_cur->offset); + dsk_tmp->recno = bnd_prev->max_bnd_recno; + dsk_tmp->u.entries = bnd_prev->max_bnd_entries; + dsk_tmp->mem_size = WT_STORE_SIZE(bnd_cur->offset); + r->interim_buf->size = dsk_tmp->mem_size; + WT_RET(__rec_split_write(session, r, bnd_prev, r->interim_buf, false)); + + /* Shift the current chunk to the start of the buffer */ + dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); + (void)memmove(dsk_start, (uint8_t *)dsk + bnd_cur->offset, cur_len); + + /* Fix boundary offset */ + bnd_cur->offset = WT_PAGE_HEADER_BYTE_SIZE(btree); + /* Fix where free points */ + r->first_free = dsk_start + cur_len; return (0); } @@ -2382,6 +2531,9 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) btree = S2BT(session); dsk = r->disk_image.mem; + /* Fixed length col store can call with next_len 0 */ + WT_ASSERT(session, next_len == 0 || r->space_avail < next_len); + /* * We should never split during salvage, and we're about to drop core * because there's no parent page. @@ -2391,147 +2543,63 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) "%s page too large, attempted split during salvage", __wt_page_type_string(r->page->type)); - /* Hitting a page boundary resets the dictionary, in all cases. */ - __rec_dictionary_reset(r); - - inuse = WT_PTRDIFF(r->first_free, dsk); - switch (r->bnd_state) { - case SPLIT_BOUNDARY: - /* - * We can get here if the first key/value pair won't fit. - * Additionally, grow the buffer to contain the current item if - * we haven't already consumed a reasonable portion of a split - * chunk. - */ - if (inuse < r->split_size / 2) - break; - - /* - * About to cross a split boundary but not yet forced to split - * into multiple pages. If we have to split, this is one of the - * split points, save information about where we are when the - * split would have happened. - */ - WT_RET(__rec_split_bnd_grow(session, r)); - last = &r->bnd[r->bnd_next++]; - next = last + 1; - - /* Set the number of entries for the just finished chunk. */ - last->entries = r->entries - r->total_entries; - r->total_entries = r->entries; - - /* Set the key for the next chunk. */ - next->recno = r->recno; - if (dsk->type == WT_PAGE_ROW_INT || - dsk->type == WT_PAGE_ROW_LEAF) - WT_RET(__rec_split_row_promote( - session, r, &next->key, dsk->type)); - - /* - * Set the starting buffer offset and clear the entries (the - * latter not required, but cleaner). - */ - next->offset = WT_PTRDIFF(r->first_free, dsk); - next->entries = 0; - - /* Set the space available to another split-size chunk. */ - r->space_avail = - r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); - - /* - * Adjust the space available to handle two cases: - * - We don't have enough room for another full split-size - * chunk on the page. - * - We chose to fill past a page boundary because of a - * large item. - */ - if (inuse + r->space_avail > r->page_size) { - r->space_avail = - r->page_size > inuse ? (r->page_size - inuse) : 0; - - /* There are no further boundary points. */ - r->bnd_state = SPLIT_MAX; - } - - /* - * Return if the next object fits into this page, else we have - * to split the page. - */ - if (r->space_avail >= next_len) - return (0); - - /* FALLTHROUGH */ - case SPLIT_MAX: - /* - * We're going to have to split and create multiple pages. - * - * Cycle through the saved split-point information, writing the - * split chunks we have tracked. The underlying fixup function - * sets the space available and other information, and copied - * any unwritten chunk of data to the beginning of the buffer. - */ - WT_RET(__rec_split_fixup(session, r)); - - /* We're done saving split chunks. */ - r->bnd_state = SPLIT_TRACKING_OFF; - break; - case SPLIT_TRACKING_OFF: - /* - * We can get here if the first key/value pair won't fit. - * Additionally, grow the buffer to contain the current item if - * we haven't already consumed a reasonable portion of a split - * chunk. - */ - if (inuse < r->split_size / 2) - break; + last = &r->bnd[r->bnd_next]; + inuse = (WT_PTRDIFF(r->first_free, dsk) - last->offset) + + WT_PAGE_HEADER_BYTE_SIZE(btree); - /* - * The key/value pairs didn't fit into a single page, but either - * we've already noticed that and are now processing the rest of - * the pairs at split size boundaries, or the split size was the - * same as the page size, and we never bothered with split point - * information at all. - */ - WT_RET(__rec_split_bnd_grow(session, r)); - last = &r->bnd[r->bnd_next++]; - next = last + 1; + /* + * We can get here if the first key/value pair won't fit. + * Additionally, grow the buffer to contain the current item if we + * haven't already consumed a reasonable portion of a split chunk. + */ + if (inuse < r->split_size / 2) + goto done; - /* - * Set the key for the next chunk (before writing the block, a - * key range is needed in that code). - */ - next->recno = r->recno; - if (dsk->type == WT_PAGE_ROW_INT || - dsk->type == WT_PAGE_ROW_LEAF) - WT_RET(__rec_split_row_promote( - session, r, &next->key, dsk->type)); + /* All page boundaries reset the dictionary. */ + __rec_dictionary_reset(r); - /* Clear the entries (not required, but cleaner). */ - next->entries = 0; + /* Set the number of entries for the just finished chunk. */ + last->max_bnd_entries = r->entries; - /* Finalize the header information and write the page. */ - dsk->recno = last->recno; - dsk->u.entries = r->entries; - dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); + /* + * In case of bulk load, write out chunks as we get them. Otherwise we + * keep two chunks in memory at a given time. So, if there is a previous + * chunk, write it out, making space in the buffer for the next chunk to + * be written. + */ + if (r->is_bulk_load) { + dsk->recno = last->max_bnd_recno; + dsk->u.entries = last->max_bnd_entries; + dsk->mem_size = (uint32_t)inuse; r->disk_image.size = dsk->mem_size; - WT_RET( - __rec_split_write(session, r, last, &r->disk_image, false)); - - /* - * Set the caller's entry count and buffer information for the - * next chunk. We only get here if we're not splitting or have - * already split, so it's split-size chunks from here on out. - */ - r->entries = 0; + WT_RET(__rec_split_write( + session, r, last, &r->disk_image, false)); + /* Fix where free points */ r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); - r->space_avail = - r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); - break; - case SPLIT_TRACKING_RAW: - return (__wt_illegal_value(session, NULL)); - } + } else if (r->bnd_next != 0) + WT_RET(__rec_split_write_prev_and_shift_cur(session, r, false)); - /* + /* Prepare the next boundary */ + WT_RET(__rec_split_bnd_grow(session, r)); + r->bnd_next++; + next = &r->bnd[r->bnd_next]; + next->offset = WT_PTRDIFF(r->first_free, dsk); + /* Set the key for the next chunk. */ + next->max_bnd_recno = r->recno; + if (dsk->type == WT_PAGE_ROW_INT || dsk->type == WT_PAGE_ROW_LEAF) + WT_RET(__rec_split_row_promote( + session, r, &next->max_bnd_key, dsk->type)); + + r->entries = 0; + /* + * Set the space available to another split-size and minimum split-size + * chunk. + */ + r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); + r->min_space_avail = + r->min_split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); + +done: /* * Overflow values can be larger than the maximum page size but still be * "on-page". If the next key/value pair is larger than space available * after a split has happened (in other words, larger than the maximum @@ -2549,6 +2617,64 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) } /* + * __rec_split_crossing_bnd -- + * Save the details for the minimum split size boundary or call for a + * split. + */ +static inline int +__rec_split_crossing_bnd( + WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) +{ + WT_BOUNDARY *bnd; + WT_BTREE *btree; + WT_PAGE_HEADER *dsk; + size_t min_bnd_offset; + + WT_ASSERT(session, WT_CHECK_CROSSING_BND(r, next_len)); + + /* + * If crossing the minimum split size boundary, store the boundary + * details at the current location in the buffer. If we are crossing the + * split boundary at the same time, possible when the next record is + * large enough, just split at this point. + */ + if (WT_CROSSING_MIN_BND(r, next_len) && + !WT_CROSSING_SPLIT_BND(r, next_len)) { + btree = S2BT(session); + bnd = &r->bnd[r->bnd_next]; + dsk = r->disk_image.mem; + min_bnd_offset = (WT_PTRDIFF(r->first_free, dsk) - + bnd->offset) + WT_PAGE_HEADER_BYTE_SIZE(btree); + if (min_bnd_offset == WT_PAGE_HEADER_BYTE_SIZE(btree)) + /* + * This is possible if the first record doesn't fit in + * the minimum split size, we write this record without + * setting up any boundary here. We will get the + * opportunity to setup a boundary before writing out + * the next record. + */ + return (0); + + WT_ASSERT(session, bnd->min_bnd_offset == 0); + + /* All page boundaries reset the dictionary. */ + __rec_dictionary_reset(r); + + bnd->min_bnd_offset = min_bnd_offset; + bnd->min_bnd_entries = r->entries; + bnd->min_bnd_recno = r->recno; + if (dsk->type == WT_PAGE_ROW_INT || + dsk->type == WT_PAGE_ROW_LEAF) + WT_RET(__rec_split_row_promote( + session, r, &bnd->min_bnd_key, dsk->type)); + return (0); + } + + /* We are crossing a split boundary */ + return (__rec_split(session, r, next_len)); +} + +/* * __rec_split_raw_worker -- * Handle the raw compression page reconciliation bookkeeping. */ @@ -2626,7 +2752,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session, */ recno = WT_RECNO_OOB; if (dsk->type == WT_PAGE_COL_VAR) - recno = last->recno; + recno = last->max_bnd_recno; entry = max_image_slot = slots = 0; WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { @@ -2853,7 +2979,7 @@ no_slots: */ dst->size = result_len + WT_BLOCK_COMPRESS_SKIP; dsk_dst = dst->mem; - dsk_dst->recno = last->recno; + dsk_dst->recno = last->max_bnd_recno; dsk_dst->mem_size = r->raw_offsets[result_slots] + WT_BLOCK_COMPRESS_SKIP; dsk_dst->u.entries = r->raw_entries[result_slots - 1]; @@ -2873,7 +2999,7 @@ no_slots: WT_RET(__wt_strndup(session, dsk, dsk_dst->mem_size, &last->disk_image)); disk_image = last->disk_image; - disk_image->recno = last->recno; + disk_image->recno = last->max_bnd_recno; disk_image->mem_size = dsk_dst->mem_size; disk_image->u.entries = dsk_dst->u.entries; } @@ -2903,14 +3029,14 @@ no_slots: */ switch (dsk->type) { case WT_PAGE_COL_INT: - next->recno = r->raw_recnos[result_slots]; + next->max_bnd_recno = r->raw_recnos[result_slots]; break; case WT_PAGE_COL_VAR: - next->recno = r->raw_recnos[result_slots - 1]; + next->max_bnd_recno = r->raw_recnos[result_slots - 1]; break; case WT_PAGE_ROW_INT: case WT_PAGE_ROW_LEAF: - next->recno = WT_RECNO_OOB; + next->max_bnd_recno = WT_RECNO_OOB; if (!last_block) { /* * Confirm there was uncompressed data remaining @@ -2919,7 +3045,7 @@ no_slots: */ WT_ASSERT(session, len > 0); WT_RET(__rec_split_row_promote_cell( - session, dsk, &next->key)); + session, dsk, &next->max_bnd_key)); } break; } @@ -2931,7 +3057,7 @@ no_slots: */ WT_STAT_DATA_INCR(session, compress_raw_fail); - dsk->recno = last->recno; + dsk->recno = last->max_bnd_recno; dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); dsk->u.entries = r->entries; r->disk_image.size = dsk->mem_size; @@ -3008,35 +3134,9 @@ __rec_split_raw(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) static int __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) { - WT_BOUNDARY *bnd; + WT_BOUNDARY *bnd_cur, *bnd_prev; WT_PAGE_HEADER *dsk; - - /* Adjust the boundary information based on our split status. */ - switch (r->bnd_state) { - case SPLIT_BOUNDARY: - case SPLIT_MAX: - /* - * We never split, the reconciled page fit into a maximum page - * size. Change the first boundary slot to represent the full - * page (the first boundary slot is largely correct, just update - * the number of entries). - */ - r->bnd_next = 0; - break; - case SPLIT_TRACKING_OFF: - /* - * If we have already split, or aren't tracking boundaries, put - * the remaining data in the next boundary slot. - */ - WT_RET(__rec_split_bnd_grow(session, r)); - break; - case SPLIT_TRACKING_RAW: - /* - * We were configured for raw compression, and either we never - * wrote anything, or there's a remaindered block of data. - */ - break; - } + bool grow_bnd; /* * We may arrive here with no entries to write if the page was entirely @@ -3063,20 +3163,66 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) return (EBUSY); } - /* Set the boundary reference and increment the count. */ - bnd = &r->bnd[r->bnd_next++]; - bnd->entries = r->entries; - - /* Finalize the header information. */ dsk = r->disk_image.mem; - dsk->recno = bnd->recno; - dsk->u.entries = r->entries; + + /* Set the number of entries for the just finished chunk. */ + bnd_cur = &r->bnd[r->bnd_next]; + bnd_cur->max_bnd_entries = r->entries; + + grow_bnd = true; + /* + * We can reach here even with raw_compression when the last split chunk + * is too small to be sent for raw compression. + */ + if (!r->is_bulk_load && !r->raw_compression) { + if (WT_PTRDIFF(r->first_free, dsk) > r->page_size && + r->bnd_next != 0) { + /* + * We hold two boundaries worth of data in the buffer, + * and this data doesn't fit in a single page. If the + * last chunk is too small, readjust the boundary to a + * pre-computed minimum. + * Write out the penultimate chunk to the disk as a page + */ + WT_RET(__rec_split_write_prev_and_shift_cur( + session, r, true)); + } else + if (r->bnd_next != 0) { + /* + * We have two boundaries, but the data in the + * buffer can fit a single page. Merge the + * boundaries to create a single chunk. + */ + bnd_prev = bnd_cur - 1; + bnd_prev->max_bnd_entries += + bnd_cur->max_bnd_entries; + r->bnd_next--; + grow_bnd = false; + } + } + + /* + * We already have space for an extra boundary if we merged two + * boundaries above, in that case we do not need to grow the boundary + * structure. + */ + if (grow_bnd) + WT_RET(__rec_split_bnd_grow(session, r)); + bnd_cur = &r->bnd[r->bnd_next]; + r->bnd_next++; + + /* + * Current boundary now has all the remaining data/last page now. + * Let's write it to the disk + */ + dsk->recno = bnd_cur->max_bnd_recno; + dsk->u.entries = bnd_cur->max_bnd_entries; dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); r->disk_image.size = dsk->mem_size; /* If this is a checkpoint, we're done, otherwise write the page. */ - return (__rec_is_checkpoint(session, r, bnd) ? - 0 : __rec_split_write(session, r, bnd, &r->disk_image, true)); + return (__rec_is_checkpoint(session, r, bnd_cur) ? + 0 : __rec_split_write(session, r, bnd_cur, &r->disk_image, true)); } /* @@ -3110,98 +3256,6 @@ __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r) } /* - * __rec_split_fixup -- - * Fix up after crossing the maximum page boundary. - */ -static int -__rec_split_fixup(WT_SESSION_IMPL *session, WT_RECONCILE *r) -{ - WT_BOUNDARY *bnd; - WT_BTREE *btree; - WT_DECL_ITEM(tmp); - WT_DECL_RET; - WT_PAGE_HEADER *dsk; - size_t i, len; - uint8_t *dsk_start, *p; - - /* - * When we overflow physical limits of the page, we walk the list of - * split chunks we've created and write those pages out, then update - * the caller's information. - */ - btree = S2BT(session); - - /* - * The data isn't laid out on a page boundary or nul padded; copy it to - * a clean, aligned, padded buffer before writing it. - * - * Allocate a scratch buffer to hold the new disk image. Copy the disk - * page's header and block-manager space into the scratch buffer, most - * of the header information remains unchanged between the pages. - */ - WT_RET(__wt_scr_alloc(session, r->disk_image.memsize, &tmp)); - dsk = tmp->mem; - memcpy(dsk, r->disk_image.mem, WT_PAGE_HEADER_BYTE_SIZE(btree)); - - /* - * For each split chunk we've created, update the disk image and copy - * it into place. - */ - dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); - for (i = 0, bnd = r->bnd; i < r->bnd_next; ++i, ++bnd) { - /* Copy the page contents to the temporary buffer. */ - len = (bnd + 1)->offset - bnd->offset; - memcpy(dsk_start, - (uint8_t *)r->disk_image.mem + bnd->offset, len); - - /* Finalize the header information and write the page. */ - dsk->recno = bnd->recno; - dsk->u.entries = bnd->entries; - tmp->size = WT_PAGE_HEADER_BYTE_SIZE(btree) + len; - dsk->mem_size = WT_STORE_SIZE(tmp->size); - WT_ERR(__rec_split_write(session, r, bnd, tmp, false)); - } - - /* - * There is probably a remnant in the working buffer that didn't get - * written, copy it down to the beginning of the working buffer. - * - * Confirm the remnant is no larger than a split-sized chunk, including - * header. We know that's the maximum sized remnant because we only have - * remnants if split switches from accumulating to a split boundary to - * accumulating to the end of the page (the other path here is when we - * hit a split boundary, there was room for another split chunk in the - * page, and the next item still wouldn't fit, in which case there is no - * remnant). So: we were accumulating to the end of the page and created - * a remnant. We know the remnant cannot be as large as a split-sized - * chunk, including header, because if there was room for that large a - * remnant, we wouldn't have switched from accumulating to a page end. - */ - p = (uint8_t *)r->disk_image.mem + bnd->offset; - len = WT_PTRDIFF(r->first_free, p); - if (len >= r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree)) - WT_PANIC_ERR(session, EINVAL, - "Reconciliation remnant too large for the split buffer"); - dsk = r->disk_image.mem; - dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); - (void)memmove(dsk_start, p, len); - - /* - * Fix up our caller's information, including updating the starting - * record number. - */ - r->entries -= r->total_entries; - r->first_free = dsk_start + len; - WT_ASSERT(session, - r->page_size >= (WT_PAGE_HEADER_BYTE_SIZE(btree) + len)); - r->space_avail = - r->split_size - (WT_PAGE_HEADER_BYTE_SIZE(btree) + len); - -err: __wt_scr_free(session, &tmp); - return (ret); -} - -/* * __rec_split_write -- * Write a disk block out for the split helper functions. */ @@ -3222,11 +3276,17 @@ __rec_split_write(WT_SESSION_IMPL *session, int cmp; uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE]; bool need_image; +#ifdef HAVE_DIAGNOSTIC + bool verify_image; +#endif btree = S2BT(session); dsk = buf->mem; page = r->page; mod = page->modify; +#ifdef HAVE_DIAGNOSTIC + verify_image = true; +#endif /* Set the zero-length value flag in the page header. */ if (dsk->type == WT_PAGE_ROW_LEAF) { @@ -3238,8 +3298,6 @@ __rec_split_write(WT_SESSION_IMPL *session, F_SET(dsk, WT_PAGE_EMPTY_V_NONE); } - bnd->entries = r->entries; - /* Initialize the address (set the page type for the parent). */ switch (dsk->type) { case WT_PAGE_COL_FIX: @@ -3285,7 +3343,8 @@ __rec_split_write(WT_SESSION_IMPL *session, switch (page->type) { case WT_PAGE_COL_FIX: case WT_PAGE_COL_VAR: - if (WT_INSERT_RECNO(supd->ins) >= (bnd + 1)->recno) + if (WT_INSERT_RECNO(supd->ins) >= + (bnd + 1)->max_bnd_recno) goto supd_check_complete; break; case WT_PAGE_ROW_LEAF: @@ -3296,8 +3355,8 @@ __rec_split_write(WT_SESSION_IMPL *session, key->data = WT_INSERT_KEY(supd->ins); key->size = WT_INSERT_KEY_SIZE(supd->ins); } - WT_ERR(__wt_compare(session, - btree->collator, key, &(bnd + 1)->key, &cmp)); + WT_ERR(__wt_compare(session, btree->collator, + key, &(bnd + 1)->max_bnd_key, &cmp)); if (cmp >= 0) goto supd_check_complete; break; @@ -3387,18 +3446,21 @@ supd_check_complete: #ifdef HAVE_VERBOSE /* Output a verbose message if we create a page without many entries */ - if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT) && r->entries < 6) + if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT) && + bnd->max_bnd_entries < 6) __wt_verbose(session, WT_VERB_SPLIT, "Reconciliation creating a page with %" PRIu32 " entries, memory footprint %" WT_SIZET_FMT - ", page count %" PRIu32 ", %s, split state: %d", - r->entries, r->page->memory_footprint, r->bnd_next, - F_ISSET(r, WT_EVICTING) ? "evict" : "checkpoint", - r->bnd_state); + ", page count %" PRIu32 ", %s", bnd->max_bnd_entries, + r->page->memory_footprint, r->bnd_next, + F_ISSET(r, WT_EVICTING) ? "evict" : "checkpoint"); #endif WT_ERR(__wt_bt_write(session, buf, addr, &addr_size, false, F_ISSET(r, WT_CHECKPOINTING), bnd->already_compressed)); +#ifdef HAVE_DIAGNOSTIC + verify_image = false; +#endif WT_ERR(__wt_strndup(session, addr, addr_size, &bnd->addr.addr)); bnd->addr.size = (uint8_t)addr_size; @@ -3425,9 +3487,20 @@ copy_image: */ need_image = F_ISSET(r, WT_EVICT_SCRUB) || (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL); - if (need_image && bnd->disk_image == NULL) + if (need_image && bnd->disk_image == NULL) { +#ifdef HAVE_DIAGNOSTIC + /* + * The I/O routines verify all disk images we write, but there + * are paths in reconciliation that don't do I/O. Verify those + * images, too. + */ + WT_ASSERT(session, verify_image == false || + __wt_verify_dsk_image( + session, "[reconcile-image]", buf->data, 0, true) == 0); +#endif WT_ERR(__wt_strndup( session, buf->data, buf->size, &bnd->disk_image)); + } if (!need_image) __wt_free(session, bnd->disk_image); @@ -3680,11 +3753,12 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) cursor->value.data, cursor->value.size, (uint64_t)0)); /* Boundary: split or write the page. */ - if (key->len + val->len > r->space_avail) { - if (r->raw_compression) - WT_RET( - __rec_split_raw(session, r, key->len + val->len)); - else { + if (r->raw_compression) { + if (key->len + val->len > r->space_avail) + WT_RET(__rec_split_raw( + session, r, key->len + val->len)); + } else + if (WT_CROSSING_SPLIT_BND(r, key->len + val->len)) { /* * Turn off prefix compression until a full key written * to the new page, and (unless already working with an @@ -3696,10 +3770,9 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_RET(__rec_cell_build_leaf_key( session, r, NULL, 0, &ovfl_key)); } - - WT_RET(__rec_split(session, r, key->len + val->len)); + WT_RET(__rec_split_crossing_bnd( + session, r, key->len + val->len)); } - } /* Copy the key/value pair onto the page. */ __rec_copy_incr(session, r, key); @@ -3740,6 +3813,10 @@ __rec_col_fix_bulk_insert_split_check(WT_CURSOR_BULK *cbulk) * split. * * Boundary: split or write the page. + * + * No need to have a minimum split size boundary, all + * pages are filled 100% except the last, allowing it to + * grow in the future. */ __rec_incr(session, r, cbulk->entry, __bitstr_size( @@ -3844,10 +3921,12 @@ __wt_bulk_insert_var( r, cbulk->last.data, cbulk->last.size, cbulk->rle)); /* Boundary: split or write the page. */ - if (val->len > r->space_avail) - WT_RET(r->raw_compression ? - __rec_split_raw(session, r, val->len) : - __rec_split(session, r, val->len)); + if (r->raw_compression) { + if (val->len > r->space_avail) + WT_RET(__rec_split_raw(session, r, val->len)); + } else + if (WT_CROSSING_SPLIT_BND(r, val->len)) + WT_RET(__rec_split_crossing_bnd(session, r, val->len)); /* Copy the value onto the page. */ if (btree->dictionary) @@ -3983,10 +4062,13 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) WT_CHILD_RELEASE_ERR(session, hazard, ref); /* Boundary: split or write the page. */ - if (val->len > r->space_avail) - WT_ERR(r->raw_compression ? - __rec_split_raw(session, r, val->len) : - __rec_split(session, r, val->len)); + if (r->raw_compression) { + if (val->len > r->space_avail) + WT_ERR(__rec_split_raw(session, r, val->len)); + } else + if (WT_CHECK_CROSSING_BND(r, val->len)) + WT_ERR(__rec_split_crossing_bnd( + session, r, val->len)); /* Copy the value onto the page. */ __rec_copy_incr(session, r, val); @@ -4028,10 +4110,13 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) addr->addr, addr->size, __rec_vtype(addr), r->recno); /* Boundary: split or write the page. */ - if (val->len > r->space_avail) - WT_RET(r->raw_compression ? - __rec_split_raw(session, r, val->len) : - __rec_split(session, r, val->len)); + if (r->raw_compression) { + if (val->len > r->space_avail) + WT_RET(__rec_split_raw(session, r, val->len)); + } else + if (WT_CHECK_CROSSING_BND(r, val->len)) + WT_RET(__rec_split_crossing_bnd( + session, r, val->len)); /* Copy the value onto the page. */ __rec_copy_incr(session, r, val); @@ -4139,6 +4224,10 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) * split. * * Boundary: split or write the page. + * + * No need to have a minimum split size boundary, all + * pages are filled 100% except the last, allowing it to + * grow in the future. */ __rec_incr(session, r, entry, __bitstr_size((size_t)entry * btree->bitcnt)); @@ -4295,10 +4384,13 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, session, r, value->data, value->size, rle)); /* Boundary: split or write the page. */ - if (val->len > r->space_avail) - WT_RET(r->raw_compression ? - __rec_split_raw(session, r, val->len) : - __rec_split(session, r, val->len)); + if (r->raw_compression) { + if (val->len > r->space_avail) + WT_RET(__rec_split_raw(session, r, val->len)); + } else + if (WT_CHECK_CROSSING_BND(r, val->len)) + WT_RET(__rec_split_crossing_bnd( + session, r, val->len)); /* Copy the value onto the page. */ if (!deleted && !overflow_type && btree->dictionary) @@ -4961,11 +5053,12 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) r->cell_zero = false; /* Boundary: split or write the page. */ - if (key->len + val->len > r->space_avail) { - if (r->raw_compression) + if (r->raw_compression) { + if (key->len + val->len > r->space_avail) WT_ERR(__rec_split_raw( session, r, key->len + val->len)); - else { + } else + if (WT_CHECK_CROSSING_BND(r, key->len + val->len)) { /* * In one path above, we copied address blocks * from the page rather than building the actual @@ -4977,10 +5070,10 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_IKEY_DATA(ikey), ikey->size)); key_onpage_ovfl = false; } - WT_ERR(__rec_split( + + WT_ERR(__rec_split_crossing_bnd( session, r, key->len + val->len)); } - } /* Copy the key and value onto the page. */ __rec_copy_incr(session, r, key); @@ -5030,10 +5123,14 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) addr->addr, addr->size, __rec_vtype(addr), WT_RECNO_OOB); /* Boundary: split or write the page. */ - if (key->len + val->len > r->space_avail) - WT_RET(r->raw_compression ? - __rec_split_raw(session, r, key->len + val->len) : - __rec_split(session, r, key->len + val->len)); + if (r->raw_compression) { + if (key->len + val->len > r->space_avail) + WT_RET(__rec_split_raw( + session, r, key->len + val->len)); + } else + if (WT_CHECK_CROSSING_BND(r, key->len + val->len)) + WT_RET(__rec_split_crossing_bnd( + session, r, key->len + val->len)); /* Copy the key and value onto the page. */ __rec_copy_incr(session, r, key); @@ -5362,16 +5459,17 @@ build: } /* Boundary: split or write the page. */ - if (key->len + val->len > r->space_avail) { - if (r->raw_compression) + if (r->raw_compression) { + if (key->len + val->len > r->space_avail) WT_ERR(__rec_split_raw( session, r, key->len + val->len)); - else { + } else + if (WT_CHECK_CROSSING_BND(r, key->len + val->len)) { /* - * In one path above, we copied address blocks - * from the page rather than building the actual - * key. In that case, we have to build the key - * now because we are about to promote it. + * If we copied address blocks from the page + * rather than building the actual key, we have + * to build the key now because we are about to + * promote it. */ if (key_onpage_ovfl) { WT_ERR(__wt_dsk_cell_data_ref(session, @@ -5390,14 +5488,13 @@ build: if (!ovfl_key) WT_ERR( __rec_cell_build_leaf_key( - session, - r, NULL, 0, &ovfl_key)); + session, r, NULL, 0, + &ovfl_key)); } - WT_ERR(__rec_split( + WT_ERR(__rec_split_crossing_bnd( session, r, key->len + val->len)); } - } /* Copy the key/value pair onto the page. */ __rec_copy_incr(session, r, key); @@ -5460,11 +5557,12 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins), &ovfl_key)); /* Boundary: split or write the page. */ - if (key->len + val->len > r->space_avail) { - if (r->raw_compression) + if (r->raw_compression) { + if (key->len + val->len > r->space_avail) WT_RET(__rec_split_raw( session, r, key->len + val->len)); - else { + } else + if (WT_CHECK_CROSSING_BND(r, key->len + val->len)) { /* * Turn off prefix compression until a full key * written to the new page, and (unless already @@ -5476,14 +5574,13 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) if (!ovfl_key) WT_RET( __rec_cell_build_leaf_key( - session, - r, NULL, 0, &ovfl_key)); + session, r, NULL, 0, + &ovfl_key)); } - WT_RET(__rec_split( + WT_RET(__rec_split_crossing_bnd( session, r, key->len + val->len)); } - } /* Copy the key/value pair onto the page. */ __rec_copy_incr(session, r, key); @@ -5595,13 +5692,14 @@ __rec_split_dump_keys(WT_SESSION_IMPL *session, WT_PAGE *page, WT_RECONCILE *r) __wt_verbose(session, WT_VERB_SPLIT, "starting key %s", __wt_buf_set_printable( - session, bnd->key.data, bnd->key.size, tkey)); + session, bnd->max_bnd_key.data, + bnd->max_bnd_key.size, tkey)); break; case WT_PAGE_COL_FIX: case WT_PAGE_COL_INT: case WT_PAGE_COL_VAR: __wt_verbose(session, WT_VERB_SPLIT, - "starting recno %" PRIu64, bnd->recno); + "starting recno %" PRIu64, bnd->max_bnd_recno); break; WT_ILLEGAL_VALUE_ERR(session); } @@ -5863,10 +5961,10 @@ __rec_split_row(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) /* We never set the first page's key, grab it from the original page. */ ref = r->ref; if (__wt_ref_is_root(ref)) - WT_RET(__wt_buf_set(session, &r->bnd[0].key, "", 1)); + WT_RET(__wt_buf_set(session, &r->bnd[0].max_bnd_key, "", 1)); else { __wt_ref_key(ref->home, ref, &p, &size); - WT_RET(__wt_buf_set(session, &r->bnd[0].key, p, size)); + WT_RET(__wt_buf_set(session, &r->bnd[0].max_bnd_key, p, size)); } /* Allocate, then initialize the array of replacement blocks. */ @@ -5874,8 +5972,8 @@ __rec_split_row(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) for (multi = mod->mod_multi, bnd = r->bnd, i = 0; i < r->bnd_next; ++multi, ++bnd, ++i) { - WT_RET(__wt_row_ikey_alloc(session, 0, - bnd->key.data, bnd->key.size, &multi->key.ikey)); + WT_RET(__wt_row_ikey_alloc(session, 0, bnd->max_bnd_key.data, + bnd->max_bnd_key.size, &multi->key.ikey)); /* * Copy any disk image. Don't take saved updates without a @@ -5922,7 +6020,7 @@ __rec_split_col(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) for (multi = mod->mod_multi, bnd = r->bnd, i = 0; i < r->bnd_next; ++multi, ++bnd, ++i) { - multi->key.recno = bnd->recno; + multi->key.recno = bnd->max_bnd_recno; /* * Copy any disk image. Don't take saved updates without a @@ -6399,7 +6497,8 @@ __rec_dictionary_lookup( for (dp = __rec_dictionary_skip_search(r->dictionary_head, hash); dp != NULL && dp->hash == hash; dp = dp->next[0]) { WT_RET(__wt_cell_pack_data_match( - dp->cell, &val->cell, val->buf.data, &match)); + (WT_CELL *)((uint8_t *)r->disk_image.mem + dp->offset), + &val->cell, val->buf.data, &match)); if (match) { WT_STAT_DATA_INCR(session, rec_dictionary); *dpp = dp; @@ -6425,7 +6524,7 @@ __rec_dictionary_lookup( * know where on the page it will be written). */ next = r->dictionary[r->dictionary_next++]; - next->cell = NULL; /* Not necessary, just cautious. */ + next->offset = 0; /* Not necessary, just cautious. */ next->hash = hash; __rec_dictionary_skip_insert(r->dictionary_head, next, hash); *dpp = next; diff --git a/src/schema/schema_create.c b/src/schema/schema_create.c index 020d5e72c13..0677fa711a5 100644 --- a/src/schema/schema_create.c +++ b/src/schema/schema_create.c @@ -35,7 +35,7 @@ __wt_direct_io_size_check(WT_SESSION_IMPL *session, * units of its happy place. */ if (FLD_ISSET(conn->direct_io, - WT_DIRECT_IO_CHECKPOINT | WT_DIRECT_IO_DATA)) { + WT_DIRECT_IO_CHECKPOINT | WT_DIRECT_IO_DATA)) { align = (int64_t)conn->buffer_alignment; if (align != 0 && (cval.val < align || cval.val % align != 0)) WT_RET_MSG(session, EINVAL, @@ -601,7 +601,8 @@ __create_table(WT_SESSION_IMPL *session, if (ncolgroups == 0) { cgsize = strlen("colgroup:") + strlen(tablename) + 1; WT_ERR(__wt_calloc_def(session, cgsize, &cgname)); - snprintf(cgname, cgsize, "colgroup:%s", tablename); + WT_ERR(__wt_snprintf( + cgname, cgsize, "colgroup:%s", tablename)); WT_ERR(__create_colgroup( session, cgname, exclusive, config)); } diff --git a/src/schema/schema_worker.c b/src/schema/schema_worker.c index e5f71b5d56f..62cdd7d367b 100644 --- a/src/schema/schema_worker.c +++ b/src/schema/schema_worker.c @@ -112,10 +112,10 @@ __wt_schema_worker(WT_SESSION_IMPL *session, wt_session = (WT_SESSION *)session; if (file_func == __wt_salvage && dsrc->salvage != NULL) WT_ERR(dsrc->salvage( - dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg)); + dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg)); else if (file_func == __wt_verify && dsrc->verify != NULL) WT_ERR(dsrc->verify( - dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg)); + dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg)); else if (file_func == __wt_checkpoint) ; else if (file_func == __wt_checkpoint_get_handles) diff --git a/src/session/session_api.c b/src/session/session_api.c index 3d13287fbe6..b7daf0e2e02 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -1206,10 +1206,15 @@ __wt_session_range_truncate(WT_SESSION_IMPL *session, done: err: /* - * Close any locally-opened start cursor. + * Close any locally-opened start cursor. Reset application cursors, + * they've possibly moved and the application cannot use them. */ if (local_start) WT_TRET(start->close(start)); + else + WT_TRET(start->reset(start)); + if (stop != NULL) + WT_TRET(stop->reset(stop)); return (ret); } @@ -1497,7 +1502,7 @@ __transaction_sync_run_chk(WT_SESSION_IMPL *session) conn = S2C(session); - return (FLD_ISSET(conn->flags, WT_CONN_LOG_SERVER_RUN)); + return (FLD_ISSET(conn->flags, WT_CONN_SERVER_LOG)); } /* @@ -1807,7 +1812,7 @@ __open_session(WT_CONNECTION_IMPL *conn, * closes the connection. This is particularly intended to catch * cases where server threads open sessions. */ - WT_ASSERT(session, F_ISSET(conn, WT_CONN_SERVER_RUN)); + WT_ASSERT(session, !F_ISSET(conn, WT_CONN_CLOSING)); /* Find the first inactive session slot. */ for (session_ret = conn->sessions, diff --git a/src/session/session_compact.c b/src/session/session_compact.c index 85214ae6d98..72c072e0fb8 100644 --- a/src/session/session_compact.c +++ b/src/session/session_compact.c @@ -210,7 +210,7 @@ __compact_checkpoint(WT_SESSION_IMPL *session) * work we need to have done is done in the underlying block manager. */ const char *checkpoint_cfg[] = { - WT_CONFIG_BASE(session, WT_SESSION_checkpoint), "force=1", NULL }; + WT_CONFIG_BASE(session, WT_SESSION_checkpoint), "force=1", NULL }; /* Checkpoints take a lot of time, check if we've run out. */ WT_RET(__wt_session_compact_check_timeout(session)); diff --git a/src/support/err.c b/src/support/err.c index 369997d38c0..57efde72b23 100644 --- a/src/support/err.c +++ b/src/support/err.c @@ -102,9 +102,10 @@ __handler_failure(WT_SESSION_IMPL *session, */ char s[256]; - (void)snprintf(s, sizeof(s), + if (__wt_snprintf(s, sizeof(s), "application %s event handler failed: %s", - which, __wt_strerror(session, error, NULL, 0)); + which, __wt_strerror(session, error, NULL, 0)) != 0) + return; /* * Use the error handler to report the failure, unless it was the error @@ -148,6 +149,23 @@ __wt_event_handler_set(WT_SESSION_IMPL *session, WT_EVENT_HANDLER *handler) session->event_handler = handler; } +#define WT_ERROR_APPEND(p, remain, ...) do { \ + size_t __len; \ + WT_ERR(__wt_snprintf_len_set(p, remain, &__len, __VA_ARGS__)); \ + if (__len > remain) \ + __len = remain; \ + p += __len; \ + remain -= __len; \ +} while (0) +#define WT_ERROR_APPEND_AP(p, remain, ...) do { \ + size_t __len; \ + WT_ERR(__wt_vsnprintf_len_set(p, remain, &__len, __VA_ARGS__)); \ + if (__len > remain) \ + __len = remain; \ + p += __len; \ + remain -= __len; \ +} while (0) + /* * __wt_eventv -- * Report a message to an event handler. @@ -161,9 +179,9 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, WT_DECL_RET; WT_SESSION *wt_session; struct timespec ts; - size_t len, remain, wlen; + size_t len, remain; const char *err, *prefix; - char *end, *p, tid[128]; + char *p, tid[128]; /* * We're using a stack buffer because we want error messages no matter @@ -174,6 +192,8 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, * Buffer placed at the end of the stack in case snprintf overflows. */ char s[2048]; + p = s; + remain = sizeof(s); /* * !!! @@ -185,24 +205,8 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, * first session, but if the allocation of the first session fails, for * example, we can end up here without a session.) */ - if (session == NULL) { - if (fprintf(stderr, - "WiredTiger Error%s%s: ", - error == 0 ? "" : ": ", - error == 0 ? "" : - __wt_strerror(session, error, NULL, 0)) < 0) - ret = EIO; - if (vfprintf(stderr, fmt, ap) < 0) - ret = EIO; - if (fprintf(stderr, "\n") < 0) - ret = EIO; - if (fflush(stderr) != 0) - ret = EIO; - return (ret); - } - - p = s; - end = s + sizeof(s); + if (session == NULL) + goto err; /* * We have several prefixes for the error message: a timestamp and the @@ -211,42 +215,24 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, * followed by a colon. */ __wt_epoch(session, &ts); - __wt_thread_id(tid, sizeof(tid)); - remain = WT_PTRDIFF(end, p); - wlen = (size_t)snprintf(p, remain, "[%" PRIuMAX ":%" PRIuMAX "][%s]", + WT_ERR(__wt_thread_id(tid, sizeof(tid))); + WT_ERROR_APPEND(p, remain, + "[%" PRIuMAX ":%" PRIuMAX "][%s]", (uintmax_t)ts.tv_sec, (uintmax_t)ts.tv_nsec / WT_THOUSAND, tid); - p = wlen >= remain ? end : p + wlen; - if ((prefix = S2C(session)->error_prefix) != NULL) { - remain = WT_PTRDIFF(end, p); - wlen = (size_t)snprintf(p, remain, ", %s", prefix); - p = wlen >= remain ? end : p + wlen; - } + if ((prefix = S2C(session)->error_prefix) != NULL) + WT_ERROR_APPEND(p, remain, ", %s", prefix); prefix = session->dhandle == NULL ? NULL : session->dhandle->name; - if (prefix != NULL) { - remain = WT_PTRDIFF(end, p); - wlen = (size_t)snprintf(p, remain, ", %s", prefix); - p = wlen >= remain ? end : p + wlen; - } - if ((prefix = session->name) != NULL) { - remain = WT_PTRDIFF(end, p); - wlen = (size_t)snprintf(p, remain, ", %s", prefix); - p = wlen >= remain ? end : p + wlen; - } - remain = WT_PTRDIFF(end, p); - wlen = (size_t)snprintf(p, remain, ": "); - p = wlen >= remain ? end : p + wlen; - - if (file_name != NULL) { - remain = WT_PTRDIFF(end, p); - wlen = (size_t) - snprintf(p, remain, "%s, %d: ", file_name, line_number); - p = wlen >= remain ? end : p + wlen; - } + if (prefix != NULL) + WT_ERROR_APPEND(p, remain, ", %s", prefix); + if ((prefix = session->name) != NULL) + WT_ERROR_APPEND(p, remain, ", %s", prefix); + WT_ERROR_APPEND(p, remain, ": "); + + if (file_name != NULL) + WT_ERROR_APPEND(p, remain, "%s, %d: ", file_name, line_number); - remain = WT_PTRDIFF(end, p); - wlen = (size_t)vsnprintf(p, remain, fmt, ap); - p = wlen >= remain ? end : p + wlen; + WT_ERROR_APPEND_AP(p, remain, fmt, ap); if (error != 0) { /* @@ -261,10 +247,8 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, */ err = __wt_strerror(session, error, NULL, 0); len = strlen(err); - if (WT_PTRDIFF(p, s) < len || strcmp(p - len, err) != 0) { - remain = WT_PTRDIFF(end, p); - (void)snprintf(p, remain, ": %s", err); - } + if (WT_PTRDIFF(p, s) < len || strcmp(p - len, err) != 0) + WT_ERROR_APPEND(p, remain, ": %s", err); } /* @@ -279,7 +263,7 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, * * If an application-specified error message handler fails, complain * using the default error handler. If the default error handler fails, - * there's nothing to do. + * fallback to stderr. */ wt_session = (WT_SESSION *)session; handler = session->event_handler; @@ -293,6 +277,21 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, __handler_failure(session, ret, "error", true); } + if (ret != 0) { +err: if (fprintf(stderr, + "WiredTiger Error%s%s: ", + error == 0 ? "" : ": ", + error == 0 ? "" : + __wt_strerror(session, error, NULL, 0)) < 0) + WT_TRET(EIO); + if (vfprintf(stderr, fmt, ap) < 0) + WT_TRET(EIO); + if (fprintf(stderr, "\n") < 0) + WT_TRET(EIO); + if (fflush(stderr) != 0) + WT_TRET(EIO); + } + return (ret); } @@ -376,7 +375,7 @@ info_msg(WT_SESSION_IMPL *session, const char *fmt, va_list ap) */ char s[2048]; - (void)vsnprintf(s, sizeof(s), fmt, ap); + WT_RET(__wt_vsnprintf(s, sizeof(s), fmt, ap)); wt_session = (WT_SESSION *)session; handler = session->event_handler; diff --git a/src/support/scratch.c b/src/support/scratch.c index 69987ebc852..485cea90e89 100644 --- a/src/support/scratch.c +++ b/src/support/scratch.c @@ -69,13 +69,16 @@ int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4))) { + WT_DECL_RET; va_list ap; size_t len; for (;;) { va_start(ap, fmt); - len = (size_t)vsnprintf(buf->mem, buf->memsize, fmt, ap); + ret = __wt_vsnprintf_len_set( + buf->mem, buf->memsize, &len, fmt, ap); va_end(ap); + WT_RET(ret); /* Check if there was enough space. */ if (len < buf->memsize) { @@ -100,6 +103,7 @@ int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4))) { + WT_DECL_RET; va_list ap; size_t len, space; char *p; @@ -117,8 +121,9 @@ __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) p = (char *)((uint8_t *)buf->mem + buf->size); WT_ASSERT(session, buf->memsize >= buf->size); space = buf->memsize - buf->size; - len = (size_t)vsnprintf(p, space, fmt, ap); + ret = __wt_vsnprintf_len_set(p, space, &len, fmt, ap); va_end(ap); + WT_RET(ret); /* Check if there was enough space. */ if (len < space) { diff --git a/src/support/stat.c b/src/support/stat.c index fd38e1b79ee..2c2217f8c20 100644 --- a/src/support/stat.c +++ b/src/support/stat.c @@ -685,6 +685,7 @@ static const char * const __stats_connection_desc[] = { "cache: failed eviction of pages that exceeded the in-memory maximum", "cache: files with active eviction walks", "cache: files with new eviction walks started", + "cache: force re-tuning of eviction workers once in a while", "cache: hazard pointer blocked page eviction", "cache: hazard pointer check calls", "cache: hazard pointer check entries walked", @@ -771,9 +772,11 @@ static const char * const __stats_connection_desc[] = { "lock: table lock internal thread time waiting for the table lock (usecs)", "log: busy returns attempting to switch slots", "log: consolidated slot closures", + "log: consolidated slot join active slot closed", "log: consolidated slot join races", "log: consolidated slot join transitions", "log: consolidated slot joins", + "log: consolidated slot transitions unable to find free slot", "log: consolidated slot unbuffered writes", "log: log bytes of payload data", "log: log bytes written", @@ -968,6 +971,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cache_eviction_force_fail = 0; /* not clearing cache_eviction_walks_active */ stats->cache_eviction_walks_started = 0; + stats->cache_eviction_force_retune = 0; stats->cache_eviction_hazard = 0; stats->cache_hazard_checks = 0; stats->cache_hazard_walks = 0; @@ -1054,9 +1058,11 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->lock_table_wait_internal = 0; stats->log_slot_switch_busy = 0; stats->log_slot_closes = 0; + stats->log_slot_active_closed = 0; stats->log_slot_races = 0; stats->log_slot_transitions = 0; stats->log_slot_joins = 0; + stats->log_slot_no_free_slots = 0; stats->log_slot_unbuffered = 0; stats->log_bytes_payload = 0; stats->log_bytes_written = 0; @@ -1252,6 +1258,8 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, cache_eviction_walks_active); to->cache_eviction_walks_started += WT_STAT_READ(from, cache_eviction_walks_started); + to->cache_eviction_force_retune += + WT_STAT_READ(from, cache_eviction_force_retune); to->cache_eviction_hazard += WT_STAT_READ(from, cache_eviction_hazard); to->cache_hazard_checks += WT_STAT_READ(from, cache_hazard_checks); @@ -1366,9 +1374,13 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, lock_table_wait_internal); to->log_slot_switch_busy += WT_STAT_READ(from, log_slot_switch_busy); to->log_slot_closes += WT_STAT_READ(from, log_slot_closes); + to->log_slot_active_closed += + WT_STAT_READ(from, log_slot_active_closed); to->log_slot_races += WT_STAT_READ(from, log_slot_races); to->log_slot_transitions += WT_STAT_READ(from, log_slot_transitions); to->log_slot_joins += WT_STAT_READ(from, log_slot_joins); + to->log_slot_no_free_slots += + WT_STAT_READ(from, log_slot_no_free_slots); to->log_slot_unbuffered += WT_STAT_READ(from, log_slot_unbuffered); to->log_bytes_payload += WT_STAT_READ(from, log_bytes_payload); to->log_bytes_written += WT_STAT_READ(from, log_bytes_written); diff --git a/src/txn/txn.c b/src/txn/txn.c index e5e59c2b901..6eebf5ecf9f 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -713,7 +713,7 @@ __wt_txn_stats_update(WT_SESSION_IMPL *session) snapshot_pinned = txn_global->nsnap_oldest_id; WT_STAT_SET(session, stats, txn_pinned_range, - txn_global->current - txn_global->oldest_id); + txn_global->current - txn_global->oldest_id); WT_STAT_SET(session, stats, txn_pinned_snapshot_range, snapshot_pinned == WT_TXN_NONE ? diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 6c97922f7e1..f4ccf5eacd0 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -306,7 +306,7 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) WT_ASSERT(session, !F_ISSET(&session->txn, WT_TXN_ERROR)); WT_RET(__wt_metadata_cursor(session, &meta_cursor)); meta_cursor->set_key(meta_cursor, session->dhandle->name); - ret = __wt_curfile_update_check(meta_cursor); + ret = __wt_curfile_insert_check(meta_cursor); if (ret == WT_ROLLBACK) { metadata_race = true; ret = 0; @@ -1599,7 +1599,9 @@ __checkpoint_tree_helper(WT_SESSION_IMPL *session, const char *cfg[]) int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) { + WT_CONFIG_ITEM cval; WT_DECL_RET; + bool force; /* Should not be called with a checkpoint handle. */ WT_ASSERT(session, session->dhandle->checkpoint == NULL); @@ -1608,8 +1610,10 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_ASSERT(session, !WT_IS_METADATA(session->dhandle) || F_ISSET(session, WT_SESSION_LOCKED_METADATA)); + WT_RET(__wt_config_gets_def(session, cfg, "force", 0, &cval)); + force = cval.val != 0; WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_dirty_tree( - session, true, false, true, cfg)); + session, true, force, true, cfg)); WT_RET(ret); if (F_ISSET(S2BT(session), WT_BTREE_SKIP_CKPT)) return (0); diff --git a/src/utilities/util_backup.c b/src/utilities/util_backup.c index 5dc9671fb45..f1b31f7621a 100644 --- a/src/utilities/util_backup.c +++ b/src/utilities/util_backup.c @@ -109,9 +109,14 @@ copy(WT_SESSION *session, const char *directory, const char *name) /* Build the target pathname. */ len = strlen(directory) + strlen(name) + 2; - if ((to = malloc(len)) == NULL) - goto memerr; - (void)snprintf(to, len, "%s/%s", directory, name); + if ((to = malloc(len)) == NULL) { + fprintf(stderr, "%s: %s\n", progname, strerror(errno)); + return (1); + } + if ((ret = __wt_snprintf(to, len, "%s/%s", directory, name)) != 0) { + fprintf(stderr, "%s: %s\n", progname, strerror(ret)); + goto err; + } if (verbose && printf("Backing up %s/%s to %s\n", home, name, to) < 0) { fprintf(stderr, "%s: %s\n", progname, strerror(EIO)); @@ -126,11 +131,7 @@ copy(WT_SESSION *session, const char *directory, const char *name) fprintf(stderr, "%s/%s to %s: backup copy: %s\n", home, name, to, session->strerror(session, ret)); - if (0) { -memerr: fprintf(stderr, "%s: %s\n", progname, strerror(errno)); - } err: free(to); - return (ret); } diff --git a/src/utilities/util_dump.c b/src/utilities/util_dump.c index 947fa7bf9ef..955148b7d46 100644 --- a/src/utilities/util_dump.c +++ b/src/utilities/util_dump.c @@ -259,14 +259,15 @@ dump_add_config(WT_SESSION *session, char **bufp, size_t *leftp, const char *fmt, ...) WT_GCC_FUNC_ATTRIBUTE((format (printf, 4, 5))) { - int n; + WT_DECL_RET; + size_t n; va_list ap; va_start(ap, fmt); - n = vsnprintf(*bufp, *leftp, fmt, ap); + ret = __wt_vsnprintf_len_set(*bufp, *leftp, &n, fmt, ap); va_end(ap); - if (n < 0) - return (util_err(session, EINVAL, NULL)); + if (ret != 0) + return (util_err(session, ret, NULL)); *bufp += n; *leftp -= (size_t)n; return (0); @@ -435,9 +436,11 @@ dump_table_parts_config(WT_SESSION *session, WT_CURSOR *cursor, len = strlen(entry) + strlen(name) + 1; if ((uriprefix = malloc(len)) == NULL) - return util_err(session, errno, NULL); - - snprintf(uriprefix, len, "%s%s", entry, name); + return (util_err(session, errno, NULL)); + if ((ret = __wt_snprintf(uriprefix, len, "%s%s", entry, name)) != 0) { + free(uriprefix); + return (util_err(session, ret, NULL)); + } /* * Search the file looking for column group and index key/value pairs: diff --git a/src/utilities/util_load.c b/src/utilities/util_load.c index d31fa4c9d08..d2f00402217 100644 --- a/src/utilities/util_load.c +++ b/src/utilities/util_load.c @@ -120,10 +120,12 @@ load_dump(WT_SESSION *session) goto err; /* Open the insert cursor. */ - (void)snprintf(config, sizeof(config), + if ((ret = __wt_snprintf(config, sizeof(config), "dump=%s%s%s", hex ? "hex" : "print", - append ? ",append" : "", no_overwrite ? ",overwrite=false" : ""); + append ? ",append" : "", + no_overwrite ? ",overwrite=false" : "")) != 0) + return (util_err(session, ret, NULL)); if ((ret = session->open_cursor( session, uri, NULL, config, &cursor)) != 0) { ret = util_err(session, ret, "%s: session.open_cursor", uri); @@ -472,6 +474,7 @@ config_update(WT_SESSION *session, char **list) static int config_rename(WT_SESSION *session, char **urip, const char *name) { + WT_DECL_RET; size_t len; char *buf, *p; @@ -490,7 +493,9 @@ config_rename(WT_SESSION *session, char **urip, const char *name) } *p = '\0'; p = strchr(p + 1, ':'); - snprintf(buf, len, "%s:%s%s", *urip, name, p == NULL ? "" : p); + if ((ret = __wt_snprintf( + buf, len, "%s:%s%s", *urip, name, p == NULL ? "" : p)) != 0) + return (util_err(session, ret, NULL)); *urip = buf; return (0); diff --git a/src/utilities/util_load_json.c b/src/utilities/util_load_json.c index 1189d49a483..c693e2b7651 100644 --- a/src/utilities/util_load_json.c +++ b/src/utilities/util_load_json.c @@ -145,6 +145,7 @@ static int json_kvraw_append(WT_SESSION *session, JSON_INPUT_STATE *ins, const char *str, size_t len) { + WT_DECL_RET; size_t needsize; char *tmp; @@ -152,11 +153,15 @@ json_kvraw_append(WT_SESSION *session, needsize = strlen(ins->kvraw) + len + 2; if ((tmp = malloc(needsize)) == NULL) return (util_err(session, errno, NULL)); - snprintf(tmp, needsize, "%s %.*s", ins->kvraw, (int)len, str); + WT_ERR(__wt_snprintf( + tmp, needsize, "%s %.*s", ins->kvraw, (int)len, str)); free(ins->kvraw); ins->kvraw = tmp; } return (0); + +err: free(tmp); + return (util_err(session, ret, NULL)); } /* @@ -181,7 +186,7 @@ json_strdup(WT_SESSION *session, JSON_INPUT_STATE *ins, char **resultp) goto err; } resultlen += 1; - if ((result = (char *)malloc((size_t)resultlen)) == NULL) { + if ((result = malloc((size_t)resultlen)) == NULL) { ret = util_err(session, errno, NULL); goto err; } @@ -236,10 +241,13 @@ json_data(WT_SESSION *session, goto err; uri = clp->list[0]; - (void)snprintf(config, sizeof(config), + if ((ret = __wt_snprintf(config, sizeof(config), "dump=json%s%s", LF_ISSET(LOAD_JSON_APPEND) ? ",append" : "", - LF_ISSET(LOAD_JSON_NO_OVERWRITE) ? ",overwrite=false" : ""); + LF_ISSET(LOAD_JSON_NO_OVERWRITE) ? ",overwrite=false" : "")) != 0) { + ret = util_err(session, ret, NULL); + goto err; + } if ((ret = session->open_cursor( session, uri, NULL, config, &cursor)) != 0) { ret = util_err(session, ret, "%s: session.open_cursor", uri); @@ -256,7 +264,7 @@ json_data(WT_SESSION *session, nfield = 0; JSON_EXPECT(session, ins, '{'); if (ins->kvraw == NULL) { - if ((ins->kvraw = (char *)malloc(1)) == NULL) { + if ((ins->kvraw = malloc(1)) == NULL) { ret = util_err(session, errno, NULL); goto err; } @@ -358,8 +366,11 @@ json_top_level(WT_SESSION *session, JSON_INPUT_STATE *ins, uint32_t flags) while (json_peek(session, ins) == 's') { JSON_EXPECT(session, ins, 's'); tableuri = realloc(tableuri, ins->toklen); - snprintf(tableuri, ins->toklen, "%.*s", - (int)(ins->toklen - 2), ins->tokstart + 1); + if ((ret = __wt_snprintf(tableuri, ins->toklen, + "%.*s", (int)(ins->toklen - 2), ins->tokstart + 1)) != 0) { + ret = util_err(session, ret, NULL); + goto err; + } JSON_EXPECT(session, ins, ':'); if (!hasversion) { if (strcmp(tableuri, DUMP_JSON_VERSION_MARKER) != 0) { diff --git a/src/utilities/util_main.c b/src/utilities/util_main.c index 68e3b0f1bc5..c6f225bb667 100644 --- a/src/utilities/util_main.c +++ b/src/utilities/util_main.c @@ -257,9 +257,13 @@ main(int argc, char *argv[]) (void)util_err(NULL, errno, NULL); goto err; } - (void)snprintf(p, len, "%s,%s,%s%s%s%s", + if ((ret = __wt_snprintf(p, len, "%s,%s,%s%s%s%s", config == NULL ? "" : config, - cmd_config == NULL ? "" : cmd_config, rec_config, p1, p2, p3); + cmd_config == NULL ? "" : cmd_config, + rec_config, p1, p2, p3)) != 0) { + (void)util_err(NULL, ret, NULL); + goto err; + } config = p; /* Open the database and a session. */ @@ -298,6 +302,7 @@ done: char * util_uri(WT_SESSION *session, const char *s, const char *type) { + WT_DECL_RET; size_t len; char *name; @@ -321,8 +326,12 @@ util_uri(WT_SESSION *session, const char *s, const char *type) * the default type for the operation. */ if (strchr(s, ':') != NULL) - snprintf(name, len, "%s", s); + WT_ERR(__wt_snprintf(name, len, "%s", s)); else - snprintf(name, len, "%s:%s", type, s); + WT_ERR(__wt_snprintf(name, len, "%s:%s", type, s)); return (name); + +err: free(name); + (void)util_err(session, ret, NULL); + return (NULL); } diff --git a/src/utilities/util_misc.c b/src/utilities/util_misc.c index 0905bfa97be..e26185a0096 100644 --- a/src/utilities/util_misc.c +++ b/src/utilities/util_misc.c @@ -140,7 +140,10 @@ util_flush(WT_SESSION *session, const char *uri) if ((buf = malloc(len)) == NULL) return (util_err(session, errno, NULL)); - (void)snprintf(buf, len, "target=(\"%s\")", uri); + if ((ret = __wt_snprintf(buf, len, "target=(\"%s\")", uri)) != 0) { + free(buf); + return (util_err(session, ret, NULL)); + } ret = session->checkpoint(session, buf); free(buf); diff --git a/src/utilities/util_stat.c b/src/utilities/util_stat.c index 1b75d9ea8bf..0692afe2819 100644 --- a/src/utilities/util_stat.c +++ b/src/utilities/util_stat.c @@ -68,7 +68,10 @@ util_stat(WT_SESSION *session, int argc, char *argv[]) fprintf(stderr, "%s: %s\n", progname, strerror(errno)); goto err; } - snprintf(uri, urilen, "statistics:%s", objname); + if ((ret = __wt_snprintf(uri, urilen, "statistics:%s", objname)) != 0) { + fprintf(stderr, "%s: %s\n", progname, strerror(ret)); + goto err; + } if ((ret = session->open_cursor(session, uri, NULL, config, &cursor)) != 0) { diff --git a/src/utilities/util_verify.c b/src/utilities/util_verify.c index d0587fcfc8c..ace1be7a5de 100644 --- a/src/utilities/util_verify.c +++ b/src/utilities/util_verify.c @@ -72,7 +72,7 @@ util_verify(WT_SESSION *session, int argc, char *argv[]) ret = util_err(session, errno, NULL); goto err; } - snprintf(config, size, + if ((ret = __wt_snprintf(config, size, "%s%s%s%s%s%s%s", dump_address ? "dump_address," : "", dump_blocks ? "dump_blocks," : "", @@ -80,7 +80,10 @@ util_verify(WT_SESSION *session, int argc, char *argv[]) dump_offsets != NULL ? "dump_offsets=[" : "", dump_offsets != NULL ? dump_offsets : "", dump_offsets != NULL ? "]," : "", - dump_pages ? "dump_pages," : ""); + dump_pages ? "dump_pages," : "")) != 0) { + (void)util_err(session, ret, NULL); + goto err; + } } if ((ret = session->verify(session, uri, config)) != 0) (void)util_err(session, ret, "session.verify: %s", uri); diff --git a/src/utilities/util_write.c b/src/utilities/util_write.c index b931fad064d..1d3e6937f8d 100644 --- a/src/utilities/util_write.c +++ b/src/utilities/util_write.c @@ -54,8 +54,12 @@ util_write(WT_SESSION *session, int argc, char *argv[]) * Open the object; free allocated memory immediately to simplify * future error handling. */ - (void)snprintf(config, sizeof(config), "%s,%s", - append ? "append=true" : "", overwrite ? "overwrite=true" : ""); + if ((ret = __wt_snprintf(config, sizeof(config), "%s,%s", + append ? "append=true" : "", + overwrite ? "overwrite=true" : "")) != 0) { + free(uri); + return (util_err(session, ret, NULL)); + } if ((ret = session->open_cursor(session, uri, NULL, config, &cursor)) != 0) (void)util_err(session, ret, "%s: session.open_cursor", uri); diff --git a/test/bloom/test_bloom.c b/test/bloom/test_bloom.c index bef509e01d8..b6299bbbadc 100644 --- a/test/bloom/test_bloom.c +++ b/test/bloom/test_bloom.c @@ -121,9 +121,9 @@ setup(void) * Open configuration -- put command line configuration options at the * end so they can override "standard" configuration. */ - snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "create,error_prefix=\"%s\",cache_size=%" PRIu32 "MB,%s", - progname, g.c_cache, g.config_open == NULL ? "" : g.config_open); + progname, g.c_cache, g.config_open == NULL ? "" : g.config_open)); testutil_check(wiredtiger_open(NULL, NULL, config, &conn)); diff --git a/test/checkpoint/checkpointer.c b/test/checkpoint/checkpointer.c index ef49a9492ce..84d2765843a 100644 --- a/test/checkpoint/checkpointer.c +++ b/test/checkpoint/checkpointer.c @@ -74,7 +74,7 @@ checkpointer(void *arg) WT_UNUSED(arg); - __wt_thread_id(tid, sizeof(tid)); + testutil_check(__wt_thread_id(tid, sizeof(tid))); printf("checkpointer thread starting: tid: %s\n", tid); (void)real_checkpointer(); @@ -107,8 +107,9 @@ real_checkpointer(void) "WiredTigerCheckpoint", strlen("WiredTigerCheckpoint")) == 0) checkpoint_config = NULL; else { + testutil_check(__wt_snprintf( + _buf, sizeof(_buf), "name=%s", g.checkpoint_name)); checkpoint_config = _buf; - snprintf(checkpoint_config, 128, "name=%s", g.checkpoint_name); } while (g.running) { /* Execute a checkpoint */ @@ -147,7 +148,8 @@ verify_checkpoint(WT_SESSION *session) ret = t_ret = 0; key_count = 0; - snprintf(ckpt, 128, "checkpoint=%s", g.checkpoint_name); + testutil_check(__wt_snprintf( + ckpt, sizeof(ckpt), "checkpoint=%s", g.checkpoint_name)); cursors = calloc((size_t)g.ntables, sizeof(*cursors)); if (cursors == NULL) return (log_print_err("verify_checkpoint", ENOMEM, 1)); @@ -159,7 +161,8 @@ verify_checkpoint(WT_SESSION *session) */ if (g.cookies[i].type == LSM) continue; - snprintf(next_uri, 128, "table:__wt%04d", i); + testutil_check(__wt_snprintf( + next_uri, sizeof(next_uri), "table:__wt%04d", i)); if ((ret = session->open_cursor( session, next_uri, NULL, ckpt, &cursors[i])) != 0) { (void)log_print_err( @@ -296,7 +299,8 @@ diagnose_key_error( session = cursor1->session; key1_orig = key2_orig = 0; - snprintf(ckpt, 128, "checkpoint=%s", g.checkpoint_name); + testutil_check(__wt_snprintf( + ckpt, sizeof(ckpt), "checkpoint=%s", g.checkpoint_name)); /* Save the failed keys. */ if (cursor1->get_key(cursor1, &key1_orig) != 0 || @@ -338,7 +342,8 @@ diagnose_key_error( * Now try opening new cursors on the checkpoints and see if we * get the same missing key via searching. */ - snprintf(next_uri, 128, "table:__wt%04d", index1); + testutil_check(__wt_snprintf( + next_uri, sizeof(next_uri), "table:__wt%04d", index1)); if (session->open_cursor(session, next_uri, NULL, ckpt, &c) != 0) return (1); c->set_key(c, key1_orig); @@ -350,7 +355,8 @@ diagnose_key_error( if (c->close(c) != 0) return (1); - snprintf(next_uri, 128, "table:__wt%04d", index2); + testutil_check(__wt_snprintf( + next_uri, sizeof(next_uri), "table:__wt%04d", index2)); if (session->open_cursor(session, next_uri, NULL, ckpt, &c) != 0) return (1); c->set_key(c, key1_orig); @@ -367,7 +373,8 @@ live_check: * Now try opening cursors on the live checkpoint to see if we get the * same missing key via searching. */ - snprintf(next_uri, 128, "table:__wt%04d", index1); + testutil_check(__wt_snprintf( + next_uri, sizeof(next_uri), "table:__wt%04d", index1)); if (session->open_cursor(session, next_uri, NULL, NULL, &c) != 0) return (1); c->set_key(c, key1_orig); @@ -376,7 +383,8 @@ live_check: if (c->close(c) != 0) return (1); - snprintf(next_uri, 128, "table:__wt%04d", index2); + testutil_check(__wt_snprintf( + next_uri, sizeof(next_uri), "table:__wt%04d", index2)); if (session->open_cursor(session, next_uri, NULL, NULL, &c) != 0) return (1); c->set_key(c, key2_orig); diff --git a/test/checkpoint/test_checkpoint.c b/test/checkpoint/test_checkpoint.c index c7132b433d2..e7e1a0b81a5 100644 --- a/test/checkpoint/test_checkpoint.c +++ b/test/checkpoint/test_checkpoint.c @@ -199,11 +199,11 @@ wt_connect(const char *config_open) testutil_make_work_dir(g.home); - snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "create,statistics=(fast),error_prefix=\"%s\",cache_size=1GB%s%s", progname, config_open == NULL ? "" : ",", - config_open == NULL ? "" : config_open); + config_open == NULL ? "" : config_open)); if ((ret = wiredtiger_open( g.home, &event_handler, config, &g.conn)) != 0) diff --git a/test/checkpoint/workers.c b/test/checkpoint/workers.c index e4fe7bd1b29..82d1b8685c4 100644 --- a/test/checkpoint/workers.c +++ b/test/checkpoint/workers.c @@ -39,14 +39,12 @@ static int create_table(WT_SESSION *session, COOKIE *cookie) { int ret; - char *p, *end, config[128]; + char config[128]; - p = config; - end = config + sizeof(config); - p += snprintf(p, (size_t)(end - p), - "key_format=%s,value_format=S", cookie->type == COL ? "r" : "q"); - if (cookie->type == LSM) - (void)snprintf(p, (size_t)(end - p), ",type=lsm"); + testutil_check(__wt_snprintf(config, sizeof(config), + "key_format=%s,value_format=S,%s", + cookie->type == COL ? "r" : "q", + cookie->type == LSM ? ",type=lsm" : "")); if ((ret = session->create(session, cookie->uri, config)) != 0) if (ret != EEXIST) @@ -88,8 +86,9 @@ start_workers(table_type type) (table_type)((i % MAX_TABLE_TYPE) + 1); else g.cookies[i].type = type; - (void)snprintf(g.cookies[i].uri, 128, - "%s%04d", URI_BASE, g.cookies[i].id); + testutil_check(__wt_snprintf( + g.cookies[i].uri, sizeof(g.cookies[i].uri), + "%s%04d", URI_BASE, g.cookies[i].id)); /* Should probably be atomic to avoid races. */ if ((ret = create_table(session, &g.cookies[i])) != 0) @@ -132,7 +131,8 @@ worker_op(WT_CURSOR *cursor, uint64_t keyno, u_int new_val) char valuebuf[64]; cursor->set_key(cursor, keyno); - (void)snprintf(valuebuf, sizeof(valuebuf), "%037u", new_val); + testutil_check(__wt_snprintf( + valuebuf, sizeof(valuebuf), "%037u", new_val)); cursor->set_value(cursor, valuebuf); if ((ret = cursor->insert(cursor)) != 0) { if (ret == WT_ROLLBACK) @@ -153,7 +153,7 @@ worker(void *arg) WT_UNUSED(arg); - __wt_thread_id(tid, sizeof(tid)); + testutil_check(__wt_thread_id(tid, sizeof(tid))); printf("worker thread starting: tid: %s\n", tid); (void)real_worker(); diff --git a/test/csuite/wt1965_col_efficiency/main.c b/test/csuite/wt1965_col_efficiency/main.c index a7235d81b31..e5b73d5e642 100644 --- a/test/csuite/wt1965_col_efficiency/main.c +++ b/test/csuite/wt1965_col_efficiency/main.c @@ -132,7 +132,8 @@ main(int argc, char *argv[]) testutil_check(opts->conn->open_session( opts->conn, NULL, NULL, &session)); - sprintf(table_format, "key_format=r,value_format="); + testutil_check(__wt_snprintf( + table_format, sizeof(table_format), "key_format=r,value_format=")); for (i = 0; i < NR_FIELDS; i++) strcat(table_format, "Q"); diff --git a/test/csuite/wt2246_col_append/main.c b/test/csuite/wt2246_col_append/main.c index 976e2269da6..9876582fffa 100644 --- a/test/csuite/wt2246_col_append/main.c +++ b/test/csuite/wt2246_col_append/main.c @@ -68,8 +68,8 @@ page_init(uint64_t n) else { if (recno % 3 == 0) ++vrecno; - snprintf(buf, - sizeof(buf), "%" PRIu64 " VALUE ------", vrecno); + testutil_check(__wt_snprintf(buf, + sizeof(buf), "%" PRIu64 " VALUE ------", vrecno)); cursor->set_value(cursor, buf); } testutil_check(cursor->insert(cursor)); @@ -112,19 +112,19 @@ main(int argc, char *argv[]) testutil_check(testutil_parse_opts(argc, argv, opts)); testutil_make_work_dir(opts->home); - snprintf(buf, sizeof(buf), + testutil_check(__wt_snprintf(buf, sizeof(buf), "create," "cache_size=%s," "eviction=(threads_max=5)," "statistics=(fast)", - opts->table_type == TABLE_FIX ? "500MB" : "2GB"); + opts->table_type == TABLE_FIX ? "500MB" : "2GB")); testutil_check(wiredtiger_open(opts->home, NULL, buf, &opts->conn)); testutil_check( opts->conn->open_session(opts->conn, NULL, NULL, &session)); - snprintf(buf, sizeof(buf), + testutil_check(__wt_snprintf(buf, sizeof(buf), "key_format=r,value_format=%s," "allocation_size=4K,leaf_page_max=64K", - opts->table_type == TABLE_FIX ? "8t" : "S"); + opts->table_type == TABLE_FIX ? "8t" : "S")); testutil_check(session->create(session, opts->uri, buf)); testutil_check(session->close(session, NULL)); diff --git a/test/csuite/wt2323_join_visibility/main.c b/test/csuite/wt2323_join_visibility/main.c index a61f707e008..617490fec4d 100644 --- a/test/csuite/wt2323_join_visibility/main.c +++ b/test/csuite/wt2323_join_visibility/main.c @@ -106,14 +106,18 @@ main(int argc, char *argv[]) tablename = strchr(opts->uri, ':'); testutil_assert(tablename != NULL); tablename++; - snprintf(sharedopts->posturi, sizeof(sharedopts->posturi), - "index:%s:post", tablename); - snprintf(sharedopts->baluri, sizeof(sharedopts->baluri), - "index:%s:bal", tablename); - snprintf(sharedopts->flaguri, sizeof(sharedopts->flaguri), - "index:%s:flag", tablename); - snprintf(sharedopts->joinuri, sizeof(sharedopts->joinuri), - "join:%s", opts->uri); + testutil_check(__wt_snprintf( + sharedopts->posturi, sizeof(sharedopts->posturi), + "index:%s:post", tablename)); + testutil_check(__wt_snprintf( + sharedopts->baluri, sizeof(sharedopts->baluri), + "index:%s:bal", tablename)); + testutil_check(__wt_snprintf( + sharedopts->flaguri, sizeof(sharedopts->flaguri), + "index:%s:flag", tablename)); + testutil_check(__wt_snprintf( + sharedopts->joinuri, sizeof(sharedopts->joinuri), + "join:%s", opts->uri)); testutil_check(wiredtiger_open(opts->home, NULL, "create,cache_size=1G", &opts->conn)); @@ -350,19 +354,21 @@ static void *thread_join(void *arg) balcur->set_key(balcur, 0); testutil_check(balcur->search(balcur)); if (sharedopts->bloom) - sprintf(cfg, "compare=lt,strategy=bloom,count=%d", - N_RECORDS); + testutil_check(__wt_snprintf(cfg, sizeof(cfg), + "compare=lt,strategy=bloom,count=%d", N_RECORDS)); else - sprintf(cfg, "compare=lt"); + testutil_check(__wt_snprintf( + cfg, sizeof(cfg), "compare=lt")); testutil_check(session->join(session, joincur, balcur, cfg)); flagcur->set_key(flagcur, 0); testutil_check(flagcur->search(flagcur)); if (sharedopts->bloom) - sprintf(cfg, "compare=eq,strategy=bloom,count=%d", - N_RECORDS); + testutil_check(__wt_snprintf(cfg, sizeof(cfg), + "compare=eq,strategy=bloom,count=%d", N_RECORDS)); else - sprintf(cfg, "compare=eq"); + testutil_check(__wt_snprintf( + cfg, sizeof(cfg), "compare=eq")); testutil_check(session->join(session, joincur, flagcur, cfg)); /* Expect no values returned */ diff --git a/test/csuite/wt2447_join_main_table/main.c b/test/csuite/wt2447_join_main_table/main.c index 1368e7c8c09..656cea04145 100644 --- a/test/csuite/wt2447_join_main_table/main.c +++ b/test/csuite/wt2447_join_main_table/main.c @@ -102,9 +102,12 @@ main(int argc, char *argv[]) tablename = strchr(opts->uri, ':'); testutil_assert(tablename != NULL); tablename++; - snprintf(index1uri, sizeof(index1uri), "index:%s:index1", tablename); - snprintf(index2uri, sizeof(index2uri), "index:%s:index2", tablename); - snprintf(joinuri, sizeof(joinuri), "join:%s", opts->uri); + testutil_check(__wt_snprintf( + index1uri, sizeof(index1uri), "index:%s:index1", tablename)); + testutil_check(__wt_snprintf( + index2uri, sizeof(index2uri), "index:%s:index2", tablename)); + testutil_check(__wt_snprintf( + joinuri, sizeof(joinuri), "join:%s", opts->uri)); testutil_check(wiredtiger_open(opts->home, NULL, "statistics=(all),create", &opts->conn)); @@ -150,7 +153,8 @@ main(int argc, char *argv[]) cursor2->set_key(cursor2, half + 1); testutil_check(cursor2->search(cursor2)); - sprintf(bloom_cfg, "compare=lt,strategy=bloom,count=%d", half); + testutil_check(__wt_snprintf(bloom_cfg, sizeof(bloom_cfg), + "compare=lt,strategy=bloom,count=%d", half)); testutil_check(session->open_cursor(session, joinuri, NULL, NULL, &jcursor)); diff --git a/test/csuite/wt2592_join_schema/main.c b/test/csuite/wt2592_join_schema/main.c index 0ec1c765d99..be3eff6136c 100644 --- a/test/csuite/wt2592_join_schema/main.c +++ b/test/csuite/wt2592_join_schema/main.c @@ -82,9 +82,12 @@ main(int argc, char *argv[]) tablename = strchr(opts->uri, ':'); testutil_assert(tablename != NULL); tablename++; - snprintf(countryuri, sizeof(countryuri), "index:%s:country", tablename); - snprintf(yearuri, sizeof(yearuri), "index:%s:year", tablename); - snprintf(joinuri, sizeof(joinuri), "join:%s", opts->uri); + testutil_check(__wt_snprintf( + countryuri, sizeof(countryuri), "index:%s:country", tablename)); + testutil_check(__wt_snprintf( + yearuri, sizeof(yearuri), "index:%s:year", tablename)); + testutil_check(__wt_snprintf( + joinuri, sizeof(joinuri), "join:%s", opts->uri)); testutil_check(wiredtiger_open(opts->home, NULL, "create,cache_size=200M", &opts->conn)); diff --git a/test/csuite/wt2834_join_bloom_fix/main.c b/test/csuite/wt2834_join_bloom_fix/main.c index f2c54b942be..e128df29f41 100644 --- a/test/csuite/wt2834_join_bloom_fix/main.c +++ b/test/csuite/wt2834_join_bloom_fix/main.c @@ -83,10 +83,14 @@ main(int argc, char *argv[]) tablename = strchr(opts->uri, ':'); testutil_assert(tablename != NULL); tablename++; - snprintf(posturi, sizeof(posturi), "index:%s:post", tablename); - snprintf(balanceuri, sizeof(balanceuri), "index:%s:balance", tablename); - snprintf(flaguri, sizeof(flaguri), "index:%s:flag", tablename); - snprintf(joinuri, sizeof(joinuri), "join:%s", opts->uri); + testutil_check(__wt_snprintf( + posturi, sizeof(posturi), "index:%s:post", tablename)); + testutil_check(__wt_snprintf( + balanceuri, sizeof(balanceuri), "index:%s:balance", tablename)); + testutil_check(__wt_snprintf( + flaguri, sizeof(flaguri), "index:%s:flag", tablename)); + testutil_check(__wt_snprintf( + joinuri, sizeof(joinuri), "join:%s", opts->uri)); testutil_check(session->create(session, posturi, "columns=(post)")); testutil_check(session->create(session, balanceuri, @@ -126,14 +130,14 @@ main(int argc, char *argv[]) balancecur->set_key(balancecur, 0); testutil_check(balancecur->search(balancecur)); - sprintf(cfg, "compare=lt,strategy=bloom,count=%d", - N_RECORDS / 100); + testutil_check(__wt_snprintf(cfg, sizeof(cfg), + "compare=lt,strategy=bloom,count=%d", N_RECORDS / 100)); testutil_check(session->join(session, joincur, balancecur, cfg)); flagcur->set_key(flagcur, 0); testutil_check(flagcur->search(flagcur)); - sprintf(cfg, "compare=eq,strategy=bloom,count=%d", - N_RECORDS / 100); + testutil_check(__wt_snprintf(cfg, sizeof(cfg), + "compare=eq,strategy=bloom,count=%d", N_RECORDS / 100)); testutil_check(session->join(session, joincur, flagcur, cfg)); /* Expect no values returned */ diff --git a/test/csuite/wt2853_perf/main.c b/test/csuite/wt2853_perf/main.c index b365b03493a..46ba71372e5 100644 --- a/test/csuite/wt2853_perf/main.c +++ b/test/csuite/wt2853_perf/main.c @@ -114,12 +114,15 @@ main(int argc, char *argv[]) tablename = strchr(opts->uri, ':'); testutil_assert(tablename != NULL); tablename++; - snprintf(sharedopts->posturi, sizeof(sharedopts->posturi), - "index:%s:post", tablename); - snprintf(sharedopts->baluri, sizeof(sharedopts->baluri), - "index:%s:bal", tablename); - snprintf(sharedopts->flaguri, sizeof(sharedopts->flaguri), - "index:%s:flag", tablename); + testutil_check(__wt_snprintf( + sharedopts->posturi, sizeof(sharedopts->posturi), + "index:%s:post", tablename)); + testutil_check(__wt_snprintf( + sharedopts->baluri, sizeof(sharedopts->baluri), + "index:%s:bal", tablename)); + testutil_check(__wt_snprintf( + sharedopts->flaguri, sizeof(sharedopts->flaguri), + "index:%s:flag", tablename)); testutil_check(session->create(session, sharedopts->posturi, "columns=(post)")); diff --git a/test/csuite/wt2909_checkpoint_integrity/main.c b/test/csuite/wt2909_checkpoint_integrity/main.c index 0ae81543050..ce7bd72fa3f 100644 --- a/test/csuite/wt2909_checkpoint_integrity/main.c +++ b/test/csuite/wt2909_checkpoint_integrity/main.c @@ -267,9 +267,11 @@ enable_failures(uint64_t allow_writes, uint64_t allow_reads) char value[100]; testutil_check(setenv("WT_FAIL_FS_ENABLE", "1", 1)); - snprintf(value, sizeof(value), "%" PRIu64, allow_writes); + testutil_check(__wt_snprintf( + value, sizeof(value), "%" PRIu64, allow_writes)); testutil_check(setenv("WT_FAIL_FS_WRITE_ALLOW", value, 1)); - snprintf(value, sizeof(value), "%" PRIu64, allow_reads); + testutil_check(__wt_snprintf( + value, sizeof(value), "%" PRIu64, allow_reads)); testutil_check(setenv("WT_FAIL_FS_READ_ALLOW", value, 1)); } @@ -325,10 +327,11 @@ run_check_subtest(TEST_OPTS *opts, const char *debugger, uint64_t nops, subtest_args[narg++] = (char *)"-v"; /* subtest is always verbose */ subtest_args[narg++] = (char *)"-p"; subtest_args[narg++] = (char *)"-o"; - snprintf(sarg, sizeof(sarg), "%" PRIu64, nops); + testutil_check(__wt_snprintf(sarg, sizeof(sarg), "%" PRIu64, nops)); subtest_args[narg++] = sarg; /* number of operations */ subtest_args[narg++] = (char *)"-n"; - snprintf(rarg, sizeof(rarg), "%" PRIu64, opts->nrecords); + testutil_check(__wt_snprintf( + rarg, sizeof(rarg), "%" PRIu64, opts->nrecords)); subtest_args[narg++] = rarg; /* number of records */ subtest_args[narg++] = NULL; testutil_assert(narg <= MAX_ARGS); @@ -463,15 +466,17 @@ subtest_main(int argc, char *argv[], bool close_test) testutil_make_work_dir(opts->home); /* Redirect stderr, stdout. */ - sprintf(filename, "%s/%s", opts->home, STDERR_FILE); + testutil_check(__wt_snprintf( + filename, sizeof(filename), "%s/%s", opts->home, STDERR_FILE)); testutil_assert(freopen(filename, "a", stderr) != NULL); - sprintf(filename, "%s/%s", opts->home, STDOUT_FILE); + testutil_check(__wt_snprintf( + filename, sizeof(filename), "%s/%s", opts->home, STDOUT_FILE)); testutil_assert(freopen(filename, "a", stdout) != NULL); - snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "create,cache_size=250M,log=(enabled)," "transaction_sync=(enabled,method=none),extensions=(" WT_FAIL_FS_LIB - "=(early_load,config={environment=true,verbose=true})]"); + "=(early_load,config={environment=true,verbose=true})]")); testutil_check(wiredtiger_open(opts->home, NULL, config, &opts->conn)); testutil_check( diff --git a/test/csuite/wt3120_filesys/main.c b/test/csuite/wt3120_filesys/main.c index 09dce624066..2fae85017d4 100644 --- a/test/csuite/wt3120_filesys/main.c +++ b/test/csuite/wt3120_filesys/main.c @@ -52,8 +52,8 @@ main(int argc, char *argv[]) testutil_check(testutil_parse_opts(argc, argv, opts)); testutil_make_work_dir(opts->home); - snprintf(buf, sizeof(buf), - "create,extensions=(" WT_FAIL_FS_LIB "=(early_load=true))"); + testutil_check(__wt_snprintf(buf, sizeof(buf), + "create,extensions=(" WT_FAIL_FS_LIB "=(early_load=true))")); testutil_check(wiredtiger_open(opts->home, NULL, buf, &opts->conn)); testutil_check( opts->conn->open_session(opts->conn, NULL, NULL, &session)); diff --git a/test/cursor_order/cursor_order.c b/test/cursor_order/cursor_order.c index 62777f552bf..d3c64b54ab5 100644 --- a/test/cursor_order/cursor_order.c +++ b/test/cursor_order/cursor_order.c @@ -181,19 +181,15 @@ wt_connect(SHARED_CONFIG *cfg, char *config_open) }; int ret; char config[512]; - size_t print_count; testutil_clean_work_dir(home); testutil_make_work_dir(home); - print_count = (size_t)snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "create,statistics=(all),error_prefix=\"%s\",%s%s", progname, config_open == NULL ? "" : ",", - config_open == NULL ? "" : config_open); - - if (print_count >= sizeof(config)) - testutil_die(EINVAL, "Config string too long"); + config_open == NULL ? "" : config_open)); if ((ret = wiredtiger_open( home, &event_handler, config, &cfg->conn)) != 0) diff --git a/test/cursor_order/cursor_order_file.c b/test/cursor_order/cursor_order_file.c index 5dc7194b5fb..42d7af54de4 100644 --- a/test/cursor_order/cursor_order_file.c +++ b/test/cursor_order/cursor_order_file.c @@ -34,23 +34,21 @@ file_create(SHARED_CONFIG *cfg, const char *name) WT_CONNECTION *conn; WT_SESSION *session; int ret; - char *p, *end, config[128]; + char config[128]; conn = cfg->conn; if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) testutil_die(ret, "conn.session"); - p = config; - end = config + sizeof(config); - p += snprintf(p, (size_t)(end - p), + testutil_check(__wt_snprintf(config, sizeof(config), "key_format=%s," "internal_page_max=%d," "split_deepen_min_child=200," - "leaf_page_max=%d,", - cfg->ftype == ROW ? "S" : "r", 16 * 1024, 128 * 1024); - if (cfg->ftype == FIX) - (void)snprintf(p, (size_t)(end - p), ",value_format=3t"); + "leaf_page_max=%d," + "%s", + cfg->ftype == ROW ? "S" : "r", 16 * 1024, 128 * 1024, + cfg->ftype == FIX ? ",value_format=3t" : "")); if ((ret = session->create(session, name, config)) != 0) if (ret != EEXIST) @@ -67,9 +65,10 @@ load(SHARED_CONFIG *cfg, const char *name) WT_CURSOR *cursor; WT_ITEM *value, _value; WT_SESSION *session; - char keybuf[64], valuebuf[64]; - int64_t keyno; + size_t len; + uint64_t keyno; int ret; + char keybuf[64], valuebuf[64]; conn = cfg->conn; @@ -83,9 +82,10 @@ load(SHARED_CONFIG *cfg, const char *name) testutil_die(ret, "cursor.open"); value = &_value; - for (keyno = 1; keyno <= (int64_t)cfg->nkeys; ++keyno) { + for (keyno = 1; keyno <= cfg->nkeys; ++keyno) { if (cfg->ftype == ROW) { - snprintf(keybuf, sizeof(keybuf), "%016u", (u_int)keyno); + testutil_check(__wt_snprintf( + keybuf, sizeof(keybuf), "%016" PRIu64, keyno)); cursor->set_key(cursor, keybuf); } else cursor->set_key(cursor, (uint32_t)keyno); @@ -93,8 +93,10 @@ load(SHARED_CONFIG *cfg, const char *name) if (cfg->ftype == FIX) cursor->set_value(cursor, 0x01); else { - value->size = (uint32_t)snprintf( - valuebuf, sizeof(valuebuf), "%37u", (u_int)keyno); + testutil_check(__wt_snprintf_len_set( + valuebuf, sizeof(valuebuf), + &len, "%37" PRIu64, keyno)); + value->size = (uint32_t)len; cursor->set_value(cursor, value); } if ((ret = cursor->insert(cursor)) != 0) diff --git a/test/cursor_order/cursor_order_ops.c b/test/cursor_order/cursor_order_ops.c index 58da49b2991..299f22684c9 100644 --- a/test/cursor_order/cursor_order_ops.c +++ b/test/cursor_order/cursor_order_ops.c @@ -69,7 +69,8 @@ ops_start(SHARED_CONFIG *cfg) run_info[i].cfg = cfg; if (i == 0 || cfg->multiple_files) { run_info[i].name = dmalloc(64); - snprintf(run_info[i].name, 64, FNAME, (int)i); + testutil_check(__wt_snprintf( + run_info[i].name, 64, FNAME, (int)i)); /* Vary by orders of magnitude */ if (cfg->vary_nops) @@ -93,8 +94,8 @@ ops_start(SHARED_CONFIG *cfg) run_info[offset].name = dmalloc(64); /* Have reverse scans read from tables with writes. */ name_index = i % cfg->append_inserters; - snprintf( - run_info[offset].name, 64, FNAME, (int)name_index); + testutil_check(__wt_snprintf( + run_info[offset].name, 64, FNAME, (int)name_index)); /* Vary by orders of magnitude */ if (cfg->vary_nops) @@ -231,7 +232,7 @@ reverse_scan(void *arg) id = (uintmax_t)arg; s = &run_info[id]; cfg = s->cfg; - __wt_thread_id(tid, sizeof(tid)); + testutil_check(__wt_thread_id(tid, sizeof(tid))); __wt_random_init(&s->rnd); printf(" reverse scan thread %2" PRIuMAX @@ -272,6 +273,7 @@ append_insert_op( { WT_ITEM *value, _value; uint64_t keyno; + size_t len; int ret; char keybuf[64], valuebuf[64]; @@ -281,7 +283,8 @@ append_insert_op( keyno = __wt_atomic_add64(&cfg->key_range, 1); if (cfg->ftype == ROW) { - snprintf(keybuf, sizeof(keybuf), "%016u", (u_int)keyno); + testutil_check(__wt_snprintf( + keybuf, sizeof(keybuf), "%016" PRIu64, keyno)); cursor->set_key(cursor, keybuf); } else cursor->set_key(cursor, (uint32_t)keyno); @@ -291,8 +294,9 @@ append_insert_op( if (cfg->ftype == FIX) cursor->set_value(cursor, 0x10); else { - value->size = (uint32_t)snprintf( - valuebuf, sizeof(valuebuf), "XXX %37u", (u_int)keyno); + testutil_check(__wt_snprintf_len_set( + valuebuf, sizeof(valuebuf), &len, "XXX %37" PRIu64, keyno)); + value->size = (uint32_t)len; cursor->set_value(cursor, value); } if ((ret = cursor->insert(cursor)) != 0) @@ -318,7 +322,7 @@ append_insert(void *arg) id = (uintmax_t)arg; s = &run_info[id]; cfg = s->cfg; - __wt_thread_id(tid, sizeof(tid)); + testutil_check(__wt_thread_id(tid, sizeof(tid))); __wt_random_init(&s->rnd); printf("write thread %2" PRIuMAX " starting: tid: %s, file: %s\n", diff --git a/test/fops/file.c b/test/fops/file.c index 66c23dfed3c..d1cd22ab391 100644 --- a/test/fops/file.c +++ b/test/fops/file.c @@ -71,7 +71,8 @@ obj_bulk_unique(int force) /* Generate a unique object name. */ if ((ret = pthread_rwlock_wrlock(&single)) != 0) testutil_die(ret, "pthread_rwlock_wrlock single"); - (void)snprintf(new_uri, sizeof(new_uri), "%s.%u", uri, ++uid); + testutil_check(__wt_snprintf( + new_uri, sizeof(new_uri), "%s.%u", uri, ++uid)); if ((ret = pthread_rwlock_unlock(&single)) != 0) testutil_die(ret, "pthread_rwlock_unlock single"); @@ -152,7 +153,8 @@ obj_create_unique(int force) /* Generate a unique object name. */ if ((ret = pthread_rwlock_wrlock(&single)) != 0) testutil_die(ret, "pthread_rwlock_wrlock single"); - (void)snprintf(new_uri, sizeof(new_uri), "%s.%u", uri, ++uid); + testutil_check(__wt_snprintf( + new_uri, sizeof(new_uri), "%s.%u", uri, ++uid)); if ((ret = pthread_rwlock_unlock(&single)) != 0) testutil_die(ret, "pthread_rwlock_unlock single"); diff --git a/test/fops/t.c b/test/fops/t.c index 469d5acd33a..07ac07349e3 100644 --- a/test/fops/t.c +++ b/test/fops/t.c @@ -157,11 +157,11 @@ wt_startup(char *config_open) testutil_make_work_dir(home); - snprintf(config_buf, sizeof(config_buf), + testutil_check(__wt_snprintf(config_buf, sizeof(config_buf), "create,error_prefix=\"%s\",cache_size=5MB%s%s", progname, config_open == NULL ? "" : ",", - config_open == NULL ? "" : config_open); + config_open == NULL ? "" : config_open)); if ((ret = wiredtiger_open( home, &event_handler, config_buf, &conn)) != 0) testutil_die(ret, "wiredtiger_open"); diff --git a/test/format/backup.c b/test/format/backup.c index 69fdf771de9..8aa614fa970 100644 --- a/test/format/backup.c +++ b/test/format/backup.c @@ -63,7 +63,7 @@ copy_file(WT_SESSION *session, const char *name) len = strlen("BACKUP") + strlen(name) + 10; first = dmalloc(len); - (void)snprintf(first, len, "BACKUP/%s", name); + testutil_check(__wt_snprintf(first, len, "BACKUP/%s", name)); testutil_check(__wt_copy_and_sync(session, name, first)); /* @@ -72,7 +72,7 @@ copy_file(WT_SESSION *session, const char *name) */ len = strlen("BACKUP_COPY") + strlen(name) + 10; second = dmalloc(len); - (void)snprintf(second, len, "BACKUP_COPY/%s", name); + testutil_check(__wt_snprintf(second, len, "BACKUP_COPY/%s", name)); testutil_check(__wt_copy_and_sync(session, first, second)); free(first); diff --git a/test/format/config.c b/test/format/config.c index cd9856d641e..22b40f7164d 100644 --- a/test/format/config.c +++ b/test/format/config.c @@ -63,39 +63,42 @@ config_setup(void) config_in_memory(); /* - * Choose a data source type and a file type: they're interrelated (LSM - * trees are only compatible with row-store) and other items depend on - * them. + * Choose a file format and a data source: they're interrelated (LSM is + * only compatible with row-store) and other items depend on them. */ + if (!config_is_perm("file_type")) { + if (config_is_perm("data_source") && DATASOURCE("lsm")) + config_single("file_type=row", 0); + else + switch (mmrand(NULL, 1, 10)) { + case 1: /* 10% */ + config_single("file_type=fix", 0); + break; + case 2: case 3: case 4: /* 30% */ + config_single("file_type=var", 0); + break; /* 60% */ + case 5: case 6: case 7: case 8: case 9: case 10: + config_single("file_type=row", 0); + break; + } + } + config_map_file_type(g.c_file_type, &g.type); + if (!config_is_perm("data_source")) switch (mmrand(NULL, 1, 3)) { case 1: config_single("data_source=file", 0); break; case 2: - if (!g.c_in_memory) { - config_single("data_source=lsm", 0); - break; - } - /* FALLTHROUGH */ - case 3: config_single("data_source=table", 0); break; - } - - if (!config_is_perm("file_type")) - switch (DATASOURCE("lsm") ? 5 : mmrand(NULL, 1, 10)) { - case 1: - config_single("file_type=fix", 0); - break; - case 2: case 3: case 4: - config_single("file_type=var", 0); - break; - case 5: case 6: case 7: case 8: case 9: case 10: - config_single("file_type=row", 0); + case 3: + if (g.c_in_memory || g.type != ROW) + config_single("data_source=table", 0); + else + config_single("data_source=lsm", 0); break; } - config_map_file_type(g.c_file_type, &g.type); /* * If data_source and file_type were both "permanent", we may still @@ -254,8 +257,8 @@ config_compression(const char *conf_name) */ cstr = "none"; if (strcmp(conf_name, "logging_compression") == 0 && g.c_logging == 0) { - (void)snprintf( - confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr); + testutil_check(__wt_snprintf( + confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr)); config_single(confbuf, 0); return; } @@ -299,7 +302,8 @@ config_compression(const char *conf_name) break; } - (void)snprintf(confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr); + testutil_check(__wt_snprintf( + confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr)); config_single(confbuf, 0); } @@ -675,7 +679,8 @@ void config_single(const char *s, int perm) { CONFIG *cp; - long v; + long vlong; + uint32_t v; char *p; const char *ep; @@ -740,21 +745,22 @@ config_single(const char *s, int perm) return; } - v = -1; + vlong = -1; if (F_ISSET(cp, C_BOOL)) { if (strncmp(ep, "off", strlen("off")) == 0) - v = 0; + vlong = 0; else if (strncmp(ep, "on", strlen("on")) == 0) - v = 1; + vlong = 1; } - if (v == -1) { - v = strtol(ep, &p, 10); + if (vlong == -1) { + vlong = strtol(ep, &p, 10); if (*p != '\0') { fprintf(stderr, "%s: %s: illegal numeric value\n", progname, s); exit(EXIT_FAILURE); } } + v = (uint32_t)vlong; if (F_ISSET(cp, C_BOOL)) { if (v != 0 && v != 1) { fprintf(stderr, "%s: %s: value of boolean not 0 or 1\n", @@ -767,7 +773,7 @@ config_single(const char *s, int perm) progname, s, cp->min, cp->maxset); exit(EXIT_FAILURE); } - *cp->v = (uint32_t)v; + *cp->v = v; } /* diff --git a/test/format/config.h b/test/format/config.h index e3e1e73a786..b5feb7a5321 100644 --- a/test/format/config.h +++ b/test/format/config.h @@ -284,7 +284,7 @@ static CONFIG c[] = { { "split_pct", "page split size as a percentage of the maximum page size", - 0x0, 40, 85, 85, &g.c_split_pct, NULL }, + 0x0, 50, 100, 100, &g.c_split_pct, NULL }, { "statistics", "maintain statistics", /* 20% */ diff --git a/test/format/ops.c b/test/format/ops.c index 1013d1da30b..72e885bd0d6 100644 --- a/test/format/ops.c +++ b/test/format/ops.c @@ -28,14 +28,17 @@ #include "format.h" -static int col_insert(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *); -static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t); -static int col_update(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); +static int col_insert(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *); +static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t, bool); +static int col_update( + TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); static int nextprev(WT_CURSOR *, int); static void *ops(void *); -static int row_insert(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); -static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t); -static int row_update(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); +static int row_insert( + TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); +static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t, bool); +static int row_update( + TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); static void table_append_init(void); #ifdef HAVE_BERKELEY_DB @@ -243,6 +246,9 @@ typedef struct { bool insert; /* Insert operation */ } SNAP_OPS; +#define SNAP_TRACK \ + (snap != NULL && (size_t)(snap - snap_list) < WT_ELEMENTS(snap_list)) + /* * snap_track -- * Add a single snapshot isolation returned value to the list. @@ -395,15 +401,16 @@ snap_check(WT_CURSOR *cursor, static void * ops(void *arg) { + enum { INSERT, READ, REMOVE, UPDATE } op; SNAP_OPS *snap, snap_list[64]; TINFO *tinfo; WT_CONNECTION *conn; - WT_CURSOR *cursor, *cursor_insert; + WT_CURSOR *cursor; WT_DECL_RET; WT_ITEM *key, _key, *value, _value; WT_SESSION *session; uint64_t keyno, ckpt_op, reset_op, session_op; - uint32_t op, rnd; + uint32_t rnd; u_int i; int dir; char *ckpt_config, ckpt_name[64]; @@ -429,9 +436,9 @@ ops(void *arg) val_gen_setup(&tinfo->rnd, value); /* Set the first operation where we'll create sessions and cursors. */ - session_op = 0; + cursor = NULL; session = NULL; - cursor = cursor_insert = NULL; + session_op = 0; /* Set the first operation where we'll perform checkpoint operations. */ ckpt_op = g.c_checkpoints ? mmrand(&tinfo->rnd, 100, 10000) : 0; @@ -485,24 +492,11 @@ ops(void *arg) readonly = true; } else { /* - * Open two cursors: one for overwriting and one - * for append (if it's a column-store). - * - * The reason is when testing with existing - * records, we don't track if a record was - * deleted or not, which means we must use - * cursor->insert with overwriting configured. - * But, in column-store files where we're - * testing with new, appended records, we don't - * want to have to specify the record number, - * which requires an append configuration. + * Configure "append", in the case of column + * stores, we append when inserting new rows. */ - testutil_check(session->open_cursor(session, - g.uri, NULL, "overwrite", &cursor)); - if (g.type == FIX || g.type == VAR) - testutil_check(session->open_cursor( - session, g.uri, - NULL, "append", &cursor_insert)); + testutil_check(session->open_cursor( + session, g.uri, NULL, "append", &cursor)); /* Pick the next session/cursor close/open. */ session_op += mmrand(&tinfo->rnd, 100, 5000); @@ -536,8 +530,9 @@ ops(void *arg) pthread_rwlock_trywrlock(&g.backup_lock) == EBUSY) ckpt_config = NULL; else { - (void)snprintf(ckpt_name, sizeof(ckpt_name), - "name=thread-%d", tinfo->id); + testutil_check(__wt_snprintf( + ckpt_name, sizeof(ckpt_name), + "name=thread-%d", tinfo->id)); ckpt_config = ckpt_name; } @@ -563,8 +558,9 @@ ops(void *arg) strcpy(ckpt_name, "checkpoint=WiredTigerCheckpoint"); else - (void)snprintf(ckpt_name, sizeof(ckpt_name), - "checkpoint=thread-%d", tinfo->id); + testutil_check(__wt_snprintf( + ckpt_name, sizeof(ckpt_name), + "checkpoint=thread-%d", tinfo->id)); ckpt_available = true; skip_checkpoint: /* Pick the next checkpoint operation. */ @@ -600,111 +596,174 @@ skip_checkpoint: /* Pick the next checkpoint operation. */ intxn = true; } + /* Select a row. */ keyno = mmrand(&tinfo->rnd, 1, (u_int)g.rows); positioned = false; + /* Select an operation. */ + op = READ; + if (!readonly) { + i = mmrand(&tinfo->rnd, 1, 100); + if (i < g.c_delete_pct) + op = REMOVE; + else if (i < g.c_delete_pct + g.c_insert_pct) + op = INSERT; + else if (i < + g.c_delete_pct + g.c_insert_pct + g.c_write_pct) + op = UPDATE; + else + op = READ; + } + /* - * Perform some number of operations: the percentage of deletes, - * inserts and writes are specified, reads are the rest. The - * percentages don't have to add up to 100, a high percentage - * of deletes will mean fewer inserts and writes. Modifications - * are always followed by a read to confirm it worked. + * Inserts, removes and updates can be done following a cursor + * set-key, or based on a cursor position taken from a previous + * search. If not already doing a read, position the cursor at + * an existing point in the tree 20% of the time. */ - op = readonly ? UINT32_MAX : mmrand(&tinfo->rnd, 1, 100); - if (op < g.c_delete_pct) { - ++tinfo->remove; + positioned = false; + if (op != READ && mmrand(&tinfo->rnd, 1, 5) == 1) { + ++tinfo->search; + ret = read_row(cursor, key, value, keyno); + if (ret == 0) { + positioned = true; + if (SNAP_TRACK) + snap_track(snap++, keyno, NULL, value); + } else { + positioned = false; + if (ret == WT_ROLLBACK && intxn) + goto deadlock; + testutil_assert(ret == WT_NOTFOUND); + } + } +#if 0 + /* Optionally reserve a row. */ + if (!readonly && intxn && mmrand(&tinfo->rnd, 0, 20) == 1) { switch (g.type) { case ROW: - ret = row_remove(cursor, key, keyno); + ret = + row_reserve(cursor, key, keyno, positioned); break; case FIX: case VAR: - ret = col_remove(cursor, key, keyno); + ret = col_reserve(cursor, keyno, positioned); break; } if (ret == 0) { positioned = true; - if (snap != NULL && (size_t) - (snap - snap_list) < WT_ELEMENTS(snap_list)) - snap_track(snap++, keyno, NULL, NULL); + __wt_yield(); } else { positioned = false; if (ret == WT_ROLLBACK && intxn) goto deadlock; + testutil_assert(ret == WT_NOTFOUND); } - } else if (op < g.c_delete_pct + g.c_insert_pct) { - ++tinfo->insert; + } +#endif + /* Perform the operation. */ + switch (op) { + case INSERT: switch (g.type) { case ROW: - key_gen_insert(&tinfo->rnd, key, keyno); - val_gen(&tinfo->rnd, value, keyno); - ret = row_insert(cursor, key, value, keyno); + ret = row_insert(tinfo, + cursor, key, value, keyno, positioned); break; case FIX: case VAR: /* - * We can only append so many new records, if - * we've reached that limit, update a record - * instead of doing an insert. + * We can only append so many new records, once + * we reach that limit, update a record instead + * of inserting. */ if (g.append_cnt >= g.append_max) - goto skip_insert; + goto update_instead_of_insert; - /* Insert, then reset the insert cursor. */ - val_gen(&tinfo->rnd, value, g.rows + 1); ret = col_insert( - cursor_insert, key, value, &keyno); - testutil_check( - cursor_insert->reset(cursor_insert)); + tinfo, cursor, key, value, &keyno); break; } + + /* Insert never leaves the cursor positioned. */ positioned = false; if (ret == 0) { - if (snap != NULL && (size_t) - (snap - snap_list) < WT_ELEMENTS(snap_list)) + ++tinfo->insert; + if (SNAP_TRACK) snap_track(snap++, keyno, g.type == ROW ? key : NULL, value); - } else + } else { if (ret == WT_ROLLBACK && intxn) goto deadlock; - } else if ( - op < g.c_delete_pct + g.c_insert_pct + g.c_write_pct) { - ++tinfo->update; + testutil_assert(ret == 0 || ret == WT_ROLLBACK); + } + break; + case READ: + ++tinfo->search; + ret = read_row(cursor, key, value, keyno); + if (ret == 0) { + positioned = true; + if (SNAP_TRACK) + snap_track(snap++, keyno, NULL, value); + } else { + positioned = false; + if (ret == WT_ROLLBACK && intxn) + goto deadlock; + testutil_assert(ret == WT_NOTFOUND); + } + break; + case REMOVE: switch (g.type) { case ROW: - key_gen(key, keyno); - val_gen(&tinfo->rnd, value, keyno); - ret = row_update(cursor, key, value, keyno); + ret = + row_remove(cursor, key, keyno, positioned); break; case FIX: case VAR: -skip_insert: val_gen(&tinfo->rnd, value, keyno); - ret = col_update(cursor, key, value, keyno); + ret = + col_remove(cursor, key, keyno, positioned); break; } if (ret == 0) { - positioned = true; - if (snap != NULL && (size_t) - (snap - snap_list) < WT_ELEMENTS(snap_list)) - snap_track(snap++, keyno, NULL, value); + ++tinfo->remove; + /* + * Don't set positioned: it's unchanged from the + * previous state, but not necessarily set. + */ + if (SNAP_TRACK) + snap_track(snap++, keyno, NULL, NULL); } else { positioned = false; if (ret == WT_ROLLBACK && intxn) goto deadlock; + testutil_assert(ret == WT_NOTFOUND); + } + break; + case UPDATE: +update_instead_of_insert: + ++tinfo->update; + + /* Update the row. */ + switch (g.type) { + case ROW: + ret = row_update(tinfo, + cursor, key, value, keyno, positioned); + break; + case FIX: + case VAR: + ret = col_update(tinfo, + cursor, key, value, keyno, positioned); + break; } - } else { - ++tinfo->search; - ret = read_row(cursor, key, value, keyno); if (ret == 0) { positioned = true; - if (snap != NULL && (size_t) - (snap - snap_list) < WT_ELEMENTS(snap_list)) + if (SNAP_TRACK) snap_track(snap++, keyno, NULL, value); } else { positioned = false; if (ret == WT_ROLLBACK && intxn) goto deadlock; + testutil_assert(ret == 0 || ret == WT_ROLLBACK); } + break; } /* @@ -727,8 +786,8 @@ skip_insert: val_gen(&tinfo->rnd, value, keyno); testutil_check(cursor->reset(cursor)); /* - * If we're in a transaction, commit 40% of the time and - * rollback 10% of the time. + * Continue if not in a transaction, else add more operations + * to the transaction half the time. */ if (!intxn || (rnd = mmrand(&tinfo->rnd, 1, 10)) > 5) continue; @@ -741,6 +800,10 @@ skip_insert: val_gen(&tinfo->rnd, value, keyno); cursor, snap_list, snap, key, value)) == WT_ROLLBACK) goto deadlock; + /* + * If we're in a transaction, commit 40% of the time and + * rollback 10% of the time. + */ switch (rnd) { case 1: case 2: case 3: case 4: /* 40% */ testutil_check( @@ -1040,27 +1103,94 @@ nextprev(WT_CURSOR *cursor, int next) return (ret); } +#if 0 +/* + * row_reserve -- + * Reserve a row in a row-store file. + */ +static int +row_reserve(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned) +{ + WT_DECL_RET; + + if (!positioned) { + key_gen(key, keyno); + cursor->set_key(cursor, key); + } + + if (g.logging == LOG_OPS) + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, + "%-10s{%.*s}", "reserve", (int)key->size, key->data); + + switch (ret = cursor->reserve(cursor)) { + case 0: + break; + case WT_CACHE_FULL: + case WT_ROLLBACK: + return (WT_ROLLBACK); + case WT_NOTFOUND: + return (WT_NOTFOUND); + default: + testutil_die(ret, + "row_reserve: reserve row %" PRIu64 " by key", keyno); + } + return (0); +} + +/* + * col_reserve -- + * Reserve a row in a column-store file. + */ +static int +col_reserve(WT_CURSOR *cursor, uint64_t keyno, bool positioned) +{ + WT_DECL_RET; + + if (!positioned) + cursor->set_key(cursor, keyno); + + if (g.logging == LOG_OPS) + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, + "%-10s%" PRIu64, "reserve", keyno); + + switch (ret = cursor->reserve(cursor)) { + case 0: + break; + case WT_CACHE_FULL: + case WT_ROLLBACK: + return (WT_ROLLBACK); + case WT_NOTFOUND: + return (WT_NOTFOUND); + default: + testutil_die(ret, "col_reserve: %" PRIu64, keyno); + } + return (0); +} +#endif + /* * row_update -- * Update a row in a row-store file. */ static int -row_update(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) +row_update(TINFO *tinfo, WT_CURSOR *cursor, + WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned) { WT_DECL_RET; - WT_SESSION *session; - session = cursor->session; + if (!positioned) { + key_gen(key, keyno); + cursor->set_key(cursor, key); + } + val_gen(&tinfo->rnd, value, keyno); + cursor->set_value(cursor, value); - /* Log the operation */ if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, session, + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s{%.*s}, {%.*s}", "put", (int)key->size, key->data, (int)value->size, value->data); - cursor->set_key(cursor, key); - cursor->set_value(cursor, value); switch (ret = cursor->update(cursor)) { case 0: break; @@ -1086,32 +1216,32 @@ row_update(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) * Update a row in a column-store file. */ static int -col_update(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) +col_update(TINFO *tinfo, WT_CURSOR *cursor, + WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned) { WT_DECL_RET; - WT_SESSION *session; - session = cursor->session; + if (!positioned) + cursor->set_key(cursor, keyno); + val_gen(&tinfo->rnd, value, keyno); + if (g.type == FIX) + cursor->set_value(cursor, *(uint8_t *)value->data); + else + cursor->set_value(cursor, value); - /* Log the operation */ if (g.logging == LOG_OPS) { if (g.type == FIX) - (void)g.wt_api->msg_printf(g.wt_api, session, + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s%" PRIu64 " {0x%02" PRIx8 "}", "update", keyno, ((uint8_t *)value->data)[0]); else - (void)g.wt_api->msg_printf(g.wt_api, session, + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s%" PRIu64 " {%.*s}", "update", keyno, (int)value->size, (char *)value->data); } - cursor->set_key(cursor, keyno); - if (g.type == FIX) - cursor->set_value(cursor, *(uint8_t *)value->data); - else - cursor->set_value(cursor, value); switch (ret = cursor->update(cursor)) { case 0: break; @@ -1238,22 +1368,29 @@ table_append(uint64_t keyno) * Insert a row in a row-store file. */ static int -row_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) +row_insert(TINFO *tinfo, WT_CURSOR *cursor, + WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned) { WT_DECL_RET; - WT_SESSION *session; - session = cursor->session; + /* + * If we positioned the cursor already, it's a test of an update using + * the insert method. Otherwise, generate a unique key and insert. + */ + if (!positioned) { + key_gen_insert(&tinfo->rnd, key, keyno); + cursor->set_key(cursor, key); + } + val_gen(&tinfo->rnd, value, keyno); + cursor->set_value(cursor, value); /* Log the operation */ if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, session, + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s{%.*s}, {%.*s}", "insert", (int)key->size, key->data, (int)value->size, value->data); - cursor->set_key(cursor, key); - cursor->set_value(cursor, value); switch (ret = cursor->insert(cursor)) { case 0: break; @@ -1279,14 +1416,13 @@ row_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) * Insert an element in a column-store file. */ static int -col_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) +col_insert(TINFO *tinfo, + WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) { WT_DECL_RET; - WT_SESSION *session; uint64_t keyno; - session = cursor->session; - + val_gen(&tinfo->rnd, value, g.rows + 1); if (g.type == FIX) cursor->set_value(cursor, *(uint8_t *)value->data); else @@ -1307,12 +1443,12 @@ col_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) if (g.logging == LOG_OPS) { if (g.type == FIX) - (void)g.wt_api->msg_printf(g.wt_api, session, + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s%" PRIu64 " {0x%02" PRIx8 "}", "insert", keyno, ((uint8_t *)value->data)[0]); else - (void)g.wt_api->msg_printf(g.wt_api, session, + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s%" PRIu64 " {%.*s}", "insert", keyno, (int)value->size, (char *)value->data); @@ -1335,21 +1471,19 @@ col_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) * Remove an row from a row-store file. */ static int -row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno) +row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned) { WT_DECL_RET; - WT_SESSION *session; - session = cursor->session; - - key_gen(key, keyno); + if (!positioned) { + key_gen(key, keyno); + cursor->set_key(cursor, key); + } - /* Log the operation */ if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf( - g.wt_api, session, "%-10s%" PRIu64, "remove", keyno); + (void)g.wt_api->msg_printf(g.wt_api, + cursor->session, "%-10s%" PRIu64, "remove", keyno); - cursor->set_key(cursor, key); /* We use the cursor in overwrite mode, check for existence. */ if ((ret = cursor->search(cursor)) == 0) ret = cursor->remove(cursor); @@ -1385,19 +1519,17 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno) * Remove a row from a column-store file. */ static int -col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno) +col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned) { WT_DECL_RET; - WT_SESSION *session; - session = cursor->session; + if (!positioned) + cursor->set_key(cursor, keyno); - /* Log the operation */ if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf( - g.wt_api, session, "%-10s%" PRIu64, "remove", keyno); + (void)g.wt_api->msg_printf(g.wt_api, + cursor->session, "%-10s%" PRIu64, "remove", keyno); - cursor->set_key(cursor, keyno); /* We use the cursor in overwrite mode, check for existence. */ if ((ret = cursor->search(cursor)) == 0) ret = cursor->remove(cursor); diff --git a/test/format/rebalance.c b/test/format/rebalance.c index 9849b7df82b..e35c62e7255 100644 --- a/test/format/rebalance.c +++ b/test/format/rebalance.c @@ -41,10 +41,10 @@ wts_rebalance(void) track("rebalance", 0ULL, NULL); /* Dump the current object. */ - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), ".." DIR_DELIM_STR ".." DIR_DELIM_STR "wt" " -h %s dump -f %s/rebalance.orig %s", - g.home, g.home, g.uri); + g.home, g.home, g.uri)); testutil_checkfmt(system(cmd), "command failed: %s", cmd); /* Rebalance, then verify the object. */ @@ -66,21 +66,21 @@ wts_rebalance(void) wts_verify("post-rebalance verify"); wts_close(); - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), ".." DIR_DELIM_STR ".." DIR_DELIM_STR "wt" " -h %s dump -f %s/rebalance.new %s", - g.home, g.home, g.uri); + g.home, g.home, g.uri)); testutil_checkfmt(system(cmd), "command failed: %s", cmd); /* Compare the old/new versions of the object. */ #ifdef _WIN32 - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), "fc /b %s\\rebalance.orig %s\\rebalance.new > NUL", - g.home, g.home); + g.home, g.home)); #else - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), "cmp %s/rebalance.orig %s/rebalance.new > /dev/null", - g.home, g.home); + g.home, g.home)); #endif testutil_checkfmt(system(cmd), "command failed: %s", cmd); } diff --git a/test/format/salvage.c b/test/format/salvage.c index 69805fb1018..f82dc34dd5f 100644 --- a/test/format/salvage.c +++ b/test/format/salvage.c @@ -70,29 +70,31 @@ corrupt(void) * It's a little tricky: if the data source is a file, we're looking * for "wt", if the data source is a table, we're looking for "wt.wt". */ - (void)snprintf(buf, sizeof(buf), "%s/%s", g.home, WT_NAME); + testutil_check(__wt_snprintf( + buf, sizeof(buf), "%s/%s", g.home, WT_NAME)); if ((fd = open(buf, O_RDWR)) != -1) { #ifdef _WIN32 - (void)snprintf(copycmd, sizeof(copycmd), + testutil_check(__wt_snprintf(copycmd, sizeof(copycmd), "copy %s\\%s %s\\slvg.copy\\%s.corrupted", - g.home, WT_NAME, g.home, WT_NAME); + g.home, WT_NAME, g.home, WT_NAME)); #else - (void)snprintf(copycmd, sizeof(copycmd), + testutil_check(__wt_snprintf(copycmd, sizeof(copycmd), "cp %s/%s %s/slvg.copy/%s.corrupted", - g.home, WT_NAME, g.home, WT_NAME); + g.home, WT_NAME, g.home, WT_NAME)); #endif goto found; } - (void)snprintf(buf, sizeof(buf), "%s/%s.wt", g.home, WT_NAME); + testutil_check(__wt_snprintf( + buf, sizeof(buf), "%s/%s.wt", g.home, WT_NAME)); if ((fd = open(buf, O_RDWR)) != -1) { #ifdef _WIN32 - (void)snprintf(copycmd, sizeof(copycmd), + testutil_check(__wt_snprintf(copycmd, sizeof(copycmd), "copy %s\\%s.wt %s\\slvg.copy\\%s.wt.corrupted", - g.home, WT_NAME, g.home, WT_NAME); + g.home, WT_NAME, g.home, WT_NAME)); #else - (void)snprintf(copycmd, sizeof(copycmd), + testutil_check(__wt_snprintf(copycmd, sizeof(copycmd), "cp %s/%s.wt %s/slvg.copy/%s.wt.corrupted", - g.home, WT_NAME, g.home, WT_NAME); + g.home, WT_NAME, g.home, WT_NAME)); #endif goto found; } @@ -103,7 +105,8 @@ found: if (fstat(fd, &sb) == -1) offset = mmrand(NULL, 0, (u_int)sb.st_size); len = (size_t)(20 + (sb.st_size / 100) * 2); - (void)snprintf(buf, sizeof(buf), "%s/slvg.corrupt", g.home); + testutil_check(__wt_snprintf( + buf, sizeof(buf), "%s/slvg.corrupt", g.home)); if ((fp = fopen(buf, "w")) == NULL) testutil_die(errno, "salvage-corrupt: open: %s", buf); (void)fprintf(fp, diff --git a/test/format/util.c b/test/format/util.c index b9788f1ac75..983d03e2525 100644 --- a/test/format/util.c +++ b/test/format/util.c @@ -241,20 +241,23 @@ val_gen(WT_RAND_STATE *rnd, WT_ITEM *value, uint64_t keyno) void track(const char *tag, uint64_t cnt, TINFO *tinfo) { - static int lastlen = 0; - int len; + static size_t lastlen = 0; + size_t len; char msg[128]; if (g.c_quiet || tag == NULL) return; if (tinfo == NULL && cnt == 0) - len = snprintf(msg, sizeof(msg), "%4d: %s", g.run_cnt, tag); + testutil_check(__wt_snprintf_len_set( + msg, sizeof(msg), &len, "%4d: %s", g.run_cnt, tag)); else if (tinfo == NULL) - len = snprintf( - msg, sizeof(msg), "%4d: %s: %" PRIu64, g.run_cnt, tag, cnt); + testutil_check(__wt_snprintf_len_set( + msg, sizeof(msg), &len, + "%4d: %s: %" PRIu64, g.run_cnt, tag, cnt)); else - len = snprintf(msg, sizeof(msg), + testutil_check(__wt_snprintf_len_set( + msg, sizeof(msg), &len, "%4d: %s: " "search %" PRIu64 "%s, " "insert %" PRIu64 "%s, " @@ -268,7 +271,7 @@ track(const char *tag, uint64_t cnt, TINFO *tinfo) tinfo->update > M(9) ? tinfo->update / M(1) : tinfo->update, tinfo->update > M(9) ? "M" : "", tinfo->remove > M(9) ? tinfo->remove / M(1) : tinfo->remove, - tinfo->remove > M(9) ? "M" : ""); + tinfo->remove > M(9) ? "M" : "")); if (lastlen > len) { memset(msg + len, ' ', (size_t)(lastlen - len)); @@ -297,27 +300,30 @@ path_setup(const char *home) /* Log file. */ len = strlen(g.home) + strlen("log") + 2; g.home_log = dmalloc(len); - snprintf(g.home_log, len, "%s/%s", g.home, "log"); + testutil_check(__wt_snprintf(g.home_log, len, "%s/%s", g.home, "log")); /* RNG log file. */ len = strlen(g.home) + strlen("rand") + 2; g.home_rand = dmalloc(len); - snprintf(g.home_rand, len, "%s/%s", g.home, "rand"); + testutil_check(__wt_snprintf( + g.home_rand, len, "%s/%s", g.home, "rand")); /* Run file. */ len = strlen(g.home) + strlen("CONFIG") + 2; g.home_config = dmalloc(len); - snprintf(g.home_config, len, "%s/%s", g.home, "CONFIG"); + testutil_check(__wt_snprintf( + g.home_config, len, "%s/%s", g.home, "CONFIG")); /* Statistics file. */ len = strlen(g.home) + strlen("stats") + 2; g.home_stats = dmalloc(len); - snprintf(g.home_stats, len, "%s/%s", g.home, "stats"); + testutil_check(__wt_snprintf( + g.home_stats, len, "%s/%s", g.home, "stats")); /* BDB directory. */ len = strlen(g.home) + strlen("bdb") + 2; g.home_bdb = dmalloc(len); - snprintf(g.home_bdb, len, "%s/%s", g.home, "bdb"); + testutil_check(__wt_snprintf(g.home_bdb, len, "%s/%s", g.home, "bdb")); /* * Home directory initialize command: create the directory if it doesn't @@ -336,21 +342,23 @@ path_setup(const char *home) "cd %s & mkdir KVS" len = strlen(g.home) * 7 + strlen(CMD) + 1; g.home_init = dmalloc(len); - snprintf(g.home_init, len, CMD, - g.home, g.home, g.home, g.home, g.home, g.home, g.home); + testutil_check(__wt_snprintf(g.home_init, len, CMD, + g.home, g.home, g.home, g.home, g.home, g.home, g.home)); #else #define CMD "test -e %s || mkdir %s; " \ "cd %s > /dev/null && rm -rf `ls | sed /rand/d`; " \ "mkdir KVS" len = strlen(g.home) * 3 + strlen(CMD) + 1; g.home_init = dmalloc(len); - snprintf(g.home_init, len, CMD, g.home, g.home, g.home); + testutil_check(__wt_snprintf( + g.home_init, len, CMD, g.home, g.home, g.home)); #endif /* Primary backup directory. */ len = strlen(g.home) + strlen("BACKUP") + 2; g.home_backup = dmalloc(len); - snprintf(g.home_backup, len, "%s/%s", g.home, "BACKUP"); + testutil_check(__wt_snprintf( + g.home_backup, len, "%s/%s", g.home, "BACKUP")); /* * Backup directory initialize command, remove and re-create the primary @@ -365,9 +373,9 @@ path_setup(const char *home) len = strlen(g.home) * 4 + strlen("BACKUP") * 2 + strlen("BACKUP_COPY") * 2 + strlen(CMD) + 1; g.home_backup_init = dmalloc(len); - snprintf(g.home_backup_init, len, CMD, + testutil_check(__wt_snprintf(g.home_backup_init, len, CMD, g.home, "BACKUP", g.home, "BACKUP_COPY", - g.home, "BACKUP", g.home, "BACKUP_COPY"); + g.home, "BACKUP", g.home, "BACKUP_COPY")); /* * Salvage command, save the interesting files so we can replay the @@ -390,7 +398,7 @@ path_setup(const char *home) #endif len = strlen(g.home) + strlen(CMD) + 1; g.home_salvage_copy = dmalloc(len); - snprintf(g.home_salvage_copy, len, CMD, g.home); + testutil_check(__wt_snprintf(g.home_salvage_copy, len, CMD, g.home)); } /* @@ -489,8 +497,9 @@ alter(void *arg) while (!g.workers_finished) { period = mmrand(NULL, 1, 10); - snprintf(buf, sizeof(buf), - "access_pattern_hint=%s", access_value ? "random" : "none"); + testutil_check(__wt_snprintf(buf, sizeof(buf), + "access_pattern_hint=%s", + access_value ? "random" : "none")); access_value = !access_value; if (session->alter(session, g.uri, buf) != 0) break; diff --git a/test/format/wts.c b/test/format/wts.c index a87aa5b9f88..6aa4784d1c1 100644 --- a/test/format/wts.c +++ b/test/format/wts.c @@ -120,8 +120,15 @@ static WT_EVENT_HANDLER event_handler = { NULL /* Close handler. */ }; -#undef REMAIN -#define REMAIN(p, end) (size_t)((p) >= (end) ? 0 : (end) - (p)) +#define CONFIG_APPEND(p, ...) do { \ + size_t __len; \ + testutil_check( \ + __wt_snprintf_len_set(p, max, &__len, __VA_ARGS__)); \ + if (__len > max) \ + __len = max; \ + p += __len; \ + max -= __len; \ +} while (0) /* * wts_open -- @@ -132,14 +139,15 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) { WT_CONNECTION *conn; WT_DECL_RET; - char *config, *end, *p, helium_config[1024]; + size_t max; + char *config, *p, helium_config[1024]; *connp = NULL; config = p = g.wiredtiger_open_config; - end = config + sizeof(g.wiredtiger_open_config); + max = sizeof(g.wiredtiger_open_config); - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, "create=true," "cache_size=%" PRIu32 "MB," "checkpoint_sync=false," @@ -148,26 +156,25 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) /* In-memory configuration. */ if (g.c_in_memory != 0) - p += snprintf(p, REMAIN(p, end), ",in_memory=1"); + CONFIG_APPEND(p, ",in_memory=1"); /* LSM configuration. */ if (DATASOURCE("lsm")) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",lsm_manager=(worker_thread_max=%" PRIu32 "),", g.c_lsm_worker_threads); - if (DATASOURCE("lsm") || g.c_cache < 20) { - p += snprintf(p, REMAIN(p, end), ",eviction_dirty_trigger=95"); - } + if (DATASOURCE("lsm") || g.c_cache < 20) + CONFIG_APPEND(p, ",eviction_dirty_trigger=95"); /* Eviction worker configuration. */ if (g.c_evict_max != 0) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",eviction=(threads_max=%" PRIu32 ")", g.c_evict_max); /* Logging configuration. */ if (g.c_logging) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",log=(enabled=true,archive=%d,prealloc=%d" ",compressor=\"%s\")", g.c_logging_archive ? 1 : 0, @@ -175,21 +182,21 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) compressor(g.c_logging_compression_flag)); if (g.c_encryption) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",encryption=(name=%s)", encryptor(g.c_encryption_flag)); /* Miscellaneous. */ #ifdef HAVE_POSIX_MEMALIGN - p += snprintf(p, REMAIN(p, end), ",buffer_alignment=512"); + CONFIG_APPEND(p, ",buffer_alignment=512"); #endif - p += snprintf(p, REMAIN(p, end), ",mmap=%d", g.c_mmap ? 1 : 0); + CONFIG_APPEND(p, ",mmap=%d", g.c_mmap ? 1 : 0); if (g.c_direct_io) - p += snprintf(p, REMAIN(p, end), ",direct_io=(data)"); + CONFIG_APPEND(p, ",direct_io=(data)"); if (g.c_data_extend) - p += snprintf(p, REMAIN(p, end), ",file_extend=(data=8MB)"); + CONFIG_APPEND(p, ",file_extend=(data=8MB)"); /* * Run the statistics server and/or maintain statistics in the engine. @@ -198,18 +205,18 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) if (g.c_statistics_server) { if (mmrand(NULL, 0, 5) == 1 && memcmp(g.uri, "file:", strlen("file:")) == 0) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",statistics=(fast)" ",statistics_log=(wait=5,sources=(\"file:\"))"); else - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",statistics=(fast),statistics_log=(wait=5)"); } else - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",statistics=(%s)", g.c_statistics ? "fast" : "none"); /* Extensions. */ - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",extensions=[" "\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"],", g.c_reverse ? REVERSE_PATH : "", @@ -227,11 +234,11 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) * override the standard configuration. */ if (g.c_config_open != NULL) - p += snprintf(p, REMAIN(p, end), ",%s", g.c_config_open); + CONFIG_APPEND(p, ",%s", g.c_config_open); if (g.config_open != NULL) - p += snprintf(p, REMAIN(p, end), ",%s", g.config_open); + CONFIG_APPEND(p, ",%s", g.config_open); - if (REMAIN(p, end) == 0) + if (max == 0) testutil_die(ENOMEM, "wiredtiger_open configuration buffer too small"); @@ -259,12 +266,13 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) if (DATASOURCE("helium")) { if (g.helium_mount == NULL) testutil_die(EINVAL, "no Helium mount point specified"); - (void)snprintf(helium_config, sizeof(helium_config), + testutil_check( + __wt_snprintf(helium_config, sizeof(helium_config), "entry=wiredtiger_extension_init,config=[" "helium_verbose=0," "dev1=[helium_devices=\"he://./%s\"," "helium_o_volume_truncate=1]]", - g.helium_mount); + g.helium_mount)); if ((ret = conn->load_extension( conn, HELIUM_PATH, helium_config)) != 0) testutil_die(ret, @@ -299,13 +307,13 @@ wts_init(void) { WT_CONNECTION *conn; WT_SESSION *session; + size_t max; uint32_t maxintlpage, maxintlkey, maxleafpage, maxleafkey, maxleafvalue; - char config[4096], *end, *p; + char config[4096], *p; conn = g.wts_conn; - p = config; - end = config + sizeof(config); + max = sizeof(config); /* * Ensure that we can service at least one operation per-thread @@ -326,7 +334,7 @@ wts_init(void) if (maxleafpage > 512) maxleafpage >>= 1; } - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, "key_format=%s," "allocation_size=512,%s" "internal_page_max=%" PRIu32 ",leaf_page_max=%" PRIu32, @@ -340,43 +348,35 @@ wts_init(void) */ maxintlkey = mmrand(NULL, maxintlpage / 50, maxintlpage / 40); if (maxintlkey > 20) - p += snprintf(p, REMAIN(p, end), - ",internal_key_max=%" PRIu32, maxintlkey); + CONFIG_APPEND(p, ",internal_key_max=%" PRIu32, maxintlkey); maxleafkey = mmrand(NULL, maxleafpage / 50, maxleafpage / 40); if (maxleafkey > 20) - p += snprintf(p, REMAIN(p, end), - ",leaf_key_max=%" PRIu32, maxleafkey); + CONFIG_APPEND(p, ",leaf_key_max=%" PRIu32, maxleafkey); maxleafvalue = mmrand(NULL, maxleafpage * 10, maxleafpage / 40); if (maxleafvalue > 40 && maxleafvalue < 100 * 1024) - p += snprintf(p, REMAIN(p, end), - ",leaf_value_max=%" PRIu32, maxleafvalue); + CONFIG_APPEND(p, ",leaf_value_max=%" PRIu32, maxleafvalue); switch (g.type) { case FIX: - p += snprintf(p, REMAIN(p, end), - ",value_format=%" PRIu32 "t", g.c_bitcnt); + CONFIG_APPEND(p, ",value_format=%" PRIu32 "t", g.c_bitcnt); break; case ROW: if (g.c_huffman_key) - p += snprintf(p, REMAIN(p, end), - ",huffman_key=english"); + CONFIG_APPEND(p, ",huffman_key=english"); if (g.c_prefix_compression) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",prefix_compression_min=%" PRIu32, g.c_prefix_compression_min); else - p += snprintf(p, REMAIN(p, end), - ",prefix_compression=false"); + CONFIG_APPEND(p, ",prefix_compression=false"); if (g.c_reverse) - p += snprintf(p, REMAIN(p, end), - ",collator=reverse"); + CONFIG_APPEND(p, ",collator=reverse"); /* FALLTHROUGH */ case VAR: if (g.c_huffman_value) - p += snprintf(p, REMAIN(p, end), - ",huffman_value=english"); + CONFIG_APPEND(p, ",huffman_value=english"); if (g.c_dictionary) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",dictionary=%" PRIu32, mmrand(NULL, 123, 517)); break; } @@ -384,66 +384,63 @@ wts_init(void) /* Configure checksums. */ switch (g.c_checksum_flag) { case CHECKSUM_OFF: - p += snprintf(p, REMAIN(p, end), ",checksum=\"off\""); + CONFIG_APPEND(p, ",checksum=\"off\""); break; case CHECKSUM_ON: - p += snprintf(p, REMAIN(p, end), ",checksum=\"on\""); + CONFIG_APPEND(p, ",checksum=\"on\""); break; case CHECKSUM_UNCOMPRESSED: - p += snprintf(p, REMAIN(p, end), ",checksum=\"uncompressed\""); + CONFIG_APPEND(p, ",checksum=\"uncompressed\""); break; } /* Configure compression. */ if (g.c_compression_flag != COMPRESS_NONE) - p += snprintf(p, REMAIN(p, end), ",block_compressor=\"%s\"", + CONFIG_APPEND(p, ",block_compressor=\"%s\"", compressor(g.c_compression_flag)); /* Configure Btree internal key truncation. */ - p += snprintf(p, REMAIN(p, end), ",internal_key_truncate=%s", + CONFIG_APPEND(p, ",internal_key_truncate=%s", g.c_internal_key_truncation ? "true" : "false"); /* Configure Btree page key gap. */ - p += snprintf(p, REMAIN(p, end), ",key_gap=%" PRIu32, g.c_key_gap); + CONFIG_APPEND(p, ",key_gap=%" PRIu32, g.c_key_gap); /* Configure Btree split page percentage. */ - p += snprintf(p, REMAIN(p, end), ",split_pct=%" PRIu32, g.c_split_pct); + CONFIG_APPEND(p, ",split_pct=%" PRIu32, g.c_split_pct); /* Configure LSM and data-sources. */ if (DATASOURCE("helium")) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",type=helium,helium_o_compress=%d,helium_o_truncate=1", g.c_compression_flag == COMPRESS_NONE ? 0 : 1); if (DATASOURCE("kvsbdb")) - p += snprintf(p, REMAIN(p, end), ",type=kvsbdb"); + CONFIG_APPEND(p, ",type=kvsbdb"); if (DATASOURCE("lsm")) { - p += snprintf(p, REMAIN(p, end), ",type=lsm,lsm=("); - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",type=lsm,lsm=("); + CONFIG_APPEND(p, "auto_throttle=%s,", g.c_auto_throttle ? "true" : "false"); - p += snprintf(p, REMAIN(p, end), - "chunk_size=%" PRIu32 "MB,", g.c_chunk_size); + CONFIG_APPEND(p, "chunk_size=%" PRIu32 "MB,", g.c_chunk_size); /* * We can't set bloom_oldest without bloom, and we want to test * with Bloom filters on most of the time anyway. */ if (g.c_bloom_oldest) g.c_bloom = 1; - p += snprintf(p, REMAIN(p, end), - "bloom=%s,", g.c_bloom ? "true" : "false"); - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, "bloom=%s,", g.c_bloom ? "true" : "false"); + CONFIG_APPEND(p, "bloom_bit_count=%" PRIu32 ",", g.c_bloom_bit_count); - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, "bloom_hash_count=%" PRIu32 ",", g.c_bloom_hash_count); - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, "bloom_oldest=%s,", g.c_bloom_oldest ? "true" : "false"); - p += snprintf(p, REMAIN(p, end), - "merge_max=%" PRIu32 ",", g.c_merge_max); - p += snprintf(p, REMAIN(p, end), ",)"); + CONFIG_APPEND(p, "merge_max=%" PRIu32 ",", g.c_merge_max); + CONFIG_APPEND(p, ",)"); } - if (REMAIN(p, end) == 0) + if (max == 0) testutil_die(ENOMEM, "WT_SESSION.create configuration buffer too small"); @@ -490,14 +487,14 @@ wts_dump(const char *tag, int dump_bdb) len = strlen(g.home) + strlen(BERKELEY_DB_PATH) + strlen(g.uri) + 100; cmd = dmalloc(len); - (void)snprintf(cmd, len, + testutil_check(__wt_snprintf(cmd, len, "sh s_dumpcmp -h %s %s %s %s %s %s", g.home, dump_bdb ? "-b " : "", dump_bdb ? BERKELEY_DB_PATH : "", g.type == FIX || g.type == VAR ? "-c" : "", g.uri == NULL ? "" : "-n", - g.uri == NULL ? "" : g.uri); + g.uri == NULL ? "" : g.uri)); testutil_checkfmt(system(cmd), "%s: dump comparison failed", tag); free(cmd); @@ -587,7 +584,7 @@ wts_stats(void) fprintf(fp, "\n\n====== Data source statistics:\n"); len = strlen("statistics:") + strlen(g.uri) + 1; stat_name = dmalloc(len); - snprintf(stat_name, len, "statistics:%s", g.uri); + testutil_check(__wt_snprintf(stat_name, len, "statistics:%s", g.uri)); testutil_check(session->open_cursor( session, stat_name, NULL, NULL, &cursor)); free(stat_name); diff --git a/test/java/com/wiredtiger/test/CursorTest03.java b/test/java/com/wiredtiger/test/CursorTest03.java new file mode 100644 index 00000000000..64f33f4d7b6 --- /dev/null +++ b/test/java/com/wiredtiger/test/CursorTest03.java @@ -0,0 +1,175 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +package com.wiredtiger.test; + +import com.wiredtiger.db.Connection; +import com.wiredtiger.db.Cursor; +import com.wiredtiger.db.SearchStatus; +import com.wiredtiger.db.Session; +import com.wiredtiger.db.WiredTigerPackingException; +import com.wiredtiger.db.WiredTigerException; +import com.wiredtiger.db.wiredtiger; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; +import org.junit.Assert; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/* + * Test cases for WT-3238. + * + * Most WiredTiger methods return int, and our SWIG typemaps for Java add + * checking that throws exceptions for non-zero returns. Certain methods + * (Cursor.compare, Cursor.equals) are declared as returning int in Java, + * but should not throw exceptions for normal returns (which may be + * non-zero). + */ +public class CursorTest03 { + Connection conn; + Session s; + static String values[] = { "key0", "key1" }; + + @Test + public void cursor_int_methods() + throws WiredTigerPackingException { + setup(); + + Cursor c1 = s.open_cursor("table:t", null, null); + Cursor c2 = s.open_cursor("table:t", null, null); + for (String s : values) { + c1.putKeyString(s); + c1.putValueString(s); + c1.insert(); + } + c1.reset(); + + // "key1" compared to "key1" + c1.putKeyString(values[1]); + Assert.assertEquals(c1.search_near(), SearchStatus.FOUND); + c2.putKeyString(values[1]); + Assert.assertEquals(c2.search_near(), SearchStatus.FOUND); + Assert.assertEquals(c1.compare(c2), 0); + Assert.assertEquals(c2.compare(c1), 0); + Assert.assertEquals(c1.compare(c1), 0); + Assert.assertEquals(c1.equals(c2), 1); + Assert.assertEquals(c2.equals(c1), 1); + Assert.assertEquals(c1.equals(c1), 1); + + // "key0" compared to "key1" + c1.putKeyString(values[0]); + Assert.assertEquals(c1.search_near(), SearchStatus.FOUND); + Assert.assertEquals(c1.compare(c2), -1); + Assert.assertEquals(c2.compare(c1), 1); + Assert.assertEquals(c1.equals(c2), 0); + Assert.assertEquals(c2.equals(c1), 0); + + c1.close(); + c2.close(); + teardown(); + } + + public void expectException(Cursor c1, Cursor c2) + { + boolean caught = false; + try { + c1.compare(c2); + } + catch (WiredTigerException wte) { + caught = true; + } + Assert.assertTrue(caught); + + caught = false; + try { + c1.equals(c2); + } + catch (WiredTigerException wte) { + caught = true; + } + Assert.assertTrue(caught); + } + + @Test + public void cursor_int_methods_errors() + throws WiredTigerPackingException { + setup(); + + Cursor c1 = s.open_cursor("table:t", null, null); + Cursor c2 = s.open_cursor("table:t", null, null); + Cursor cx = s.open_cursor("table:t2", null, null); + for (String s : values) { + c1.putKeyString(s); + c1.putValueString(s); + c1.insert(); + cx.putKeyString(s); + cx.putValueString(s); + cx.insert(); + } + c1.reset(); + cx.reset(); + + // With both cursors not set, should be an exception. + expectException(c1, c2); + expectException(c1, c2); + + // With any one cursor not set, should be an exception. + c1.putKeyString(values[1]); + Assert.assertEquals(c1.search_near(), SearchStatus.FOUND); + expectException(c1, c2); + expectException(c1, c2); + + // With two cursors from different tables, should be an exception. + cx.putKeyString(values[1]); + Assert.assertEquals(cx.search_near(), SearchStatus.FOUND); + expectException(c1, cx); + expectException(c1, cx); + + c1.close(); + c2.close(); + cx.close(); + teardown(); + } + + private void setup() { + conn = wiredtiger.open("WT_HOME", "create"); + s = conn.open_session(null); + s.create("table:t", "key_format=S,value_format=S"); + s.create("table:t2", "key_format=S,value_format=S"); + } + + private void teardown() { + s.drop("table:t", ""); + s.drop("table:t2", ""); + s.close(""); + conn.close(""); + } + +} + diff --git a/test/java/com/wiredtiger/test/WiredTigerSuite.java b/test/java/com/wiredtiger/test/WiredTigerSuite.java index 5bd98d53fac..9322d30671a 100644 --- a/test/java/com/wiredtiger/test/WiredTigerSuite.java +++ b/test/java/com/wiredtiger/test/WiredTigerSuite.java @@ -38,6 +38,7 @@ import org.junit.runners.Suite; ConfigTest.class, CursorTest.class, CursorTest02.class, + CursorTest03.class, ExceptionTest.class, PackTest.class, PackTest02.class, diff --git a/test/manydbs/manydbs.c b/test/manydbs/manydbs.c index 345c470ba90..42020d6ce9a 100644 --- a/test/manydbs/manydbs.c +++ b/test/manydbs/manydbs.c @@ -168,7 +168,8 @@ main(int argc, char *argv[]) testutil_make_work_dir(home); __wt_random_init(&rnd); for (i = 0; i < dbs; ++i) { - snprintf(hometmp, HOME_SIZE, "%s/%s.%d", home, HOME_BASE, i); + testutil_check(__wt_snprintf( + hometmp, HOME_SIZE, "%s/%s.%d", home, HOME_BASE, i)); testutil_make_work_dir(hometmp); /* * Open each database. Rotate different configurations diff --git a/test/readonly/readonly.c b/test/readonly/readonly.c index 746aecbf6c5..66c7a0ca692 100644 --- a/test/readonly/readonly.c +++ b/test/readonly/readonly.c @@ -206,10 +206,12 @@ main(int argc, char *argv[]) * Set up all the directory names. */ testutil_work_dir_from_path(home, sizeof(home), working_dir); - (void)snprintf(home_wr, sizeof(home_wr), "%s%s", home, HOME_WR_SUFFIX); - (void)snprintf(home_rd, sizeof(home_rd), "%s%s", home, HOME_RD_SUFFIX); - (void)snprintf( - home_rd2, sizeof(home_rd2), "%s%s", home, HOME_RD2_SUFFIX); + testutil_check(__wt_snprintf( + home_wr, sizeof(home_wr), "%s%s", home, HOME_WR_SUFFIX)); + testutil_check(__wt_snprintf( + home_rd, sizeof(home_rd), "%s%s", home, HOME_RD_SUFFIX)); + testutil_check(__wt_snprintf( + home_rd2, sizeof(home_rd2), "%s%s", home, HOME_RD2_SUFFIX)); if (!child) { testutil_make_work_dir(home); testutil_make_work_dir(home_wr); @@ -268,22 +270,22 @@ main(int argc, char *argv[]) * Copy the database. Remove any lock file from one copy * and chmod the copies to be read-only permissions. */ - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), "cp -rp %s/* %s; rm -f %s/WiredTiger.lock", - home, home_wr, home_wr); + home, home_wr, home_wr)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), "cp -rp %s/* %s; chmod 0555 %s; chmod -R 0444 %s/*", - home, home_rd, home_rd, home_rd); + home, home_rd, home_rd, home_rd)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), "cp -rp %s/* %s; rm -f %s/WiredTiger.lock; " "chmod 0555 %s; chmod -R 0444 %s/*", - home, home_rd2, home_rd2, home_rd2, home_rd2); + home, home_rd2, home_rd2, home_rd2, home_rd2)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); @@ -327,8 +329,8 @@ main(int argc, char *argv[]) * * The child will exit with success if its test passes. */ - (void)snprintf( - cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir); + testutil_check(__wt_snprintf( + cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); if (WEXITSTATUS(status) != 0) @@ -337,8 +339,8 @@ main(int argc, char *argv[]) /* * Scenario 2. Run child with writable config. */ - (void)snprintf( - cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir); + testutil_check(__wt_snprintf( + cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); if (WEXITSTATUS(status) != 0) @@ -358,8 +360,8 @@ main(int argc, char *argv[]) /* * Scenario 3. Child read-only. */ - (void)snprintf( - cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir); + testutil_check(__wt_snprintf( + cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); if (WEXITSTATUS(status) != 0) @@ -368,8 +370,8 @@ main(int argc, char *argv[]) /* * Scenario 4. Run child with writable config. */ - (void)snprintf( - cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir); + testutil_check(__wt_snprintf( + cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); if (WEXITSTATUS(status) != 0) @@ -390,11 +392,12 @@ main(int argc, char *argv[]) * We need to chmod the read-only databases back so that they can * be removed by scripts. */ - (void)snprintf(cmd, sizeof(cmd), "chmod 0777 %s %s", home_rd, home_rd2); + testutil_check(__wt_snprintf( + cmd, sizeof(cmd), "chmod 0777 %s %s", home_rd, home_rd2)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); - (void)snprintf(cmd, sizeof(cmd), "chmod -R 0666 %s/* %s/*", - home_rd, home_rd2); + testutil_check(__wt_snprintf( + cmd, sizeof(cmd), "chmod -R 0666 %s/* %s/*", home_rd, home_rd2)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); printf(" *** Readonly test successful ***\n"); diff --git a/test/recovery/random-abort.c b/test/recovery/random-abort.c index 1d6599ce1b3..febe6530534 100644 --- a/test/recovery/random-abort.c +++ b/test/recovery/random-abort.c @@ -94,14 +94,16 @@ thread_run(void *arg) /* * The value is the name of the record file with our id appended. */ - snprintf(buf, sizeof(buf), RECORDS_FILE, td->id); + testutil_check(__wt_snprintf(buf, sizeof(buf), RECORDS_FILE, td->id)); /* * Set up a large value putting our id in it. Write it in there a * bunch of times, but the rest of the buffer can just be zero. */ - snprintf(lgbuf, sizeof(lgbuf), "th-%" PRIu32, td->id); + testutil_check(__wt_snprintf( + lgbuf, sizeof(lgbuf), "th-%" PRIu32, td->id)); for (i = 0; i < 128; i += strlen(lgbuf)) - snprintf(&large[i], lsize - i, "%s", lgbuf); + testutil_check(__wt_snprintf( + &large[i], lsize - i, "%s", lgbuf)); /* * Keep a separate file with the records we wrote for checking. */ @@ -124,7 +126,8 @@ thread_run(void *arg) * Write our portion of the key space until we're killed. */ for (i = td->start; ; ++i) { - snprintf(kname, sizeof(kname), "%" PRIu64, i); + testutil_check(__wt_snprintf( + kname, sizeof(kname), "%" PRIu64, i)); cursor->set_key(cursor, kname); /* * Every 30th record write a very large record that exceeds the @@ -313,7 +316,8 @@ main(int argc, char *argv[]) * still exists in case the child aborts for some reason we * don't stay in this loop forever. */ - snprintf(statname, sizeof(statname), "%s/%s", home, fs_main); + testutil_check(__wt_snprintf( + statname, sizeof(statname), "%s/%s", home, fs_main)); while (stat(statname, &sb) != 0 && kill(pid, 0) == 0) sleep(1); sleep(timeout); @@ -348,7 +352,8 @@ main(int argc, char *argv[]) fatal = false; for (i = 0; i < nth; ++i) { middle = 0; - snprintf(fname, sizeof(fname), RECORDS_FILE, i); + testutil_check(__wt_snprintf( + fname, sizeof(fname), RECORDS_FILE, i)); if ((fp = fopen(fname, "r")) == NULL) testutil_die(errno, "fopen: %s", fname); @@ -376,7 +381,8 @@ main(int argc, char *argv[]) fname, key, last_key); break; } - snprintf(kname, sizeof(kname), "%" PRIu64, key); + testutil_check(__wt_snprintf( + kname, sizeof(kname), "%" PRIu64, key)); cursor->set_key(cursor, kname); if ((ret = cursor->search(cursor)) != 0) { if (ret != WT_NOTFOUND) diff --git a/test/recovery/truncated-log.c b/test/recovery/truncated-log.c index 1f0a0f7a7bd..a127d8c1c63 100644 --- a/test/recovery/truncated-log.c +++ b/test/recovery/truncated-log.c @@ -30,11 +30,6 @@ #include <sys/wait.h> -#ifdef _WIN32 -/* snprintf is not supported on <= VS2013 */ -#define snprintf _snprintf -#endif - static char home[1024]; /* Program working dir */ static const char * const uri = "table:main"; @@ -137,7 +132,8 @@ usage(void) * Child process creates the database and table, and then writes data into * the table until it is killed by the parent. */ -static void fill_db(void)WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); +static void fill_db(void) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); static void fill_db(void) { @@ -193,9 +189,9 @@ fill_db(void) max_key = min_key * 2; first = true; for (i = 0; i < max_key; ++i) { - snprintf(k, sizeof(k), "key%03d", (int)i); - snprintf(v, sizeof(v), "value%0*d", - (int)(V_SIZE - strlen("value")), (int)i); + testutil_check(__wt_snprintf(k, sizeof(k), "key%03d", (int)i)); + testutil_check(__wt_snprintf(v, sizeof(v), "value%0*d", + (int)(V_SIZE - (strlen("value") + 1)), (int)i)); cursor->set_key(cursor, k); cursor->set_value(cursor, v); if ((ret = cursor->insert(cursor)) != 0) diff --git a/test/salvage/salvage.c b/test/salvage/salvage.c index 942f7faba03..83f9c6349bc 100644 --- a/test/salvage/salvage.c +++ b/test/salvage/salvage.c @@ -440,7 +440,8 @@ run(int r) process(); - snprintf(buf, sizeof(buf), "cmp %s %s > /dev/null", DUMP, RSLT); + testutil_check(__wt_snprintf( + buf, sizeof(buf), "cmp %s %s > /dev/null", DUMP, RSLT)); if (system(buf)) { fprintf(stderr, "check failed, salvage results were incorrect\n"); @@ -485,28 +486,28 @@ build(int ikey, int ivalue, int cnt) switch (page_type) { case WT_PAGE_COL_FIX: - (void)snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "key_format=r,value_format=7t," "allocation_size=%d," "internal_page_max=%d,internal_item_max=%d," "leaf_page_max=%d,leaf_item_max=%d", - PSIZE, PSIZE, OSIZE, PSIZE, OSIZE); + PSIZE, PSIZE, OSIZE, PSIZE, OSIZE)); break; case WT_PAGE_COL_VAR: - (void)snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "key_format=r," "allocation_size=%d," "internal_page_max=%d,internal_item_max=%d," "leaf_page_max=%d,leaf_item_max=%d", - PSIZE, PSIZE, OSIZE, PSIZE, OSIZE); + PSIZE, PSIZE, OSIZE, PSIZE, OSIZE)); break; case WT_PAGE_ROW_LEAF: - (void)snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "key_format=u," "allocation_size=%d," "internal_page_max=%d,internal_item_max=%d," "leaf_page_max=%d,leaf_item_max=%d", - PSIZE, PSIZE, OSIZE, PSIZE, OSIZE); + PSIZE, PSIZE, OSIZE, PSIZE, OSIZE)); break; default: assert(0); @@ -520,7 +521,8 @@ build(int ikey, int ivalue, int cnt) case WT_PAGE_COL_VAR: break; case WT_PAGE_ROW_LEAF: - snprintf(kbuf, sizeof(kbuf), "%010d KEY------", ikey); + testutil_check(__wt_snprintf( + kbuf, sizeof(kbuf), "%010d KEY------", ikey)); key.data = kbuf; key.size = 20; cursor->set_key(cursor, &key); @@ -533,8 +535,8 @@ build(int ikey, int ivalue, int cnt) break; case WT_PAGE_COL_VAR: case WT_PAGE_ROW_LEAF: - snprintf(vbuf, sizeof(vbuf), - "%010d VALUE----", value_unique ? ivalue : 37); + testutil_check(__wt_snprintf(vbuf, sizeof(vbuf), + "%010d VALUE----", value_unique ? ivalue : 37)); value.data = vbuf; value.size = 20; cursor->set_value(cursor, &value); @@ -621,9 +623,9 @@ process(void) /* Salvage. */ config[0] = '\0'; if (verbose) - snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "error_prefix=\"%s\",verbose=[salvage,verify],", - progname); + progname)); strcat(config, "log=(enabled=false),"); CHECK(wiredtiger_open(NULL, NULL, config, &conn) == 0); diff --git a/test/suite/test_truncate01.py b/test/suite/test_truncate01.py index 7d2b3862568..98b741ba6a4 100644 --- a/test/suite/test_truncate01.py +++ b/test/suite/test_truncate01.py @@ -128,6 +128,7 @@ class test_truncate_cursor_order(wttest.WiredTigerTestCase): msg = '/the start cursor position is after the stop cursor position/' self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.truncate(None, c1, c2, None), msg) + c1.set_key(ds.key(10)) c2.set_key(ds.key(20)) self.session.truncate(None, c1, c2, None) diff --git a/test/thread/file.c b/test/thread/file.c index 81ec6ad44f8..7a7d16c4cd6 100644 --- a/test/thread/file.c +++ b/test/thread/file.c @@ -33,20 +33,18 @@ file_create(const char *name) { WT_SESSION *session; int ret; - char *p, *end, config[128]; + char config[128]; if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) testutil_die(ret, "conn.session"); - p = config; - end = config + sizeof(config); - p += snprintf(p, (size_t)(end - p), + testutil_check(__wt_snprintf(config, sizeof(config), "key_format=%s," "internal_page_max=%d," - "leaf_page_max=%d,", - ftype == ROW ? "u" : "r", 16 * 1024, 128 * 1024); - if (ftype == FIX) - (void)snprintf(p, (size_t)(end - p), ",value_format=3t"); + "leaf_page_max=%d," + "%s", + ftype == ROW ? "u" : "r", 16 * 1024, 128 * 1024, + ftype == FIX ? ",value_format=3t" : "")); if ((ret = session->create(session, name, config)) != 0) if (ret != EEXIST) @@ -62,9 +60,10 @@ load(const char *name) WT_CURSOR *cursor; WT_ITEM *key, _key, *value, _value; WT_SESSION *session; - char keybuf[64], valuebuf[64]; - u_int keyno; + uint64_t keyno; + size_t len; int ret; + char keybuf[64], valuebuf[64]; file_create(name); @@ -79,18 +78,22 @@ load(const char *name) value = &_value; for (keyno = 1; keyno <= nkeys; ++keyno) { if (ftype == ROW) { + testutil_check(__wt_snprintf_len_set( + keybuf, sizeof(keybuf), + &len, "%017" PRIu64, keyno)); key->data = keybuf; - key->size = (uint32_t) - snprintf(keybuf, sizeof(keybuf), "%017u", keyno); + key->size = (uint32_t)len; cursor->set_key(cursor, key); } else - cursor->set_key(cursor, (uint32_t)keyno); - value->data = valuebuf; + cursor->set_key(cursor, keyno); if (ftype == FIX) cursor->set_value(cursor, 0x01); else { - value->size = (uint32_t) - snprintf(valuebuf, sizeof(valuebuf), "%37u", keyno); + testutil_check(__wt_snprintf_len_set( + valuebuf, sizeof(valuebuf), + &len, "%37" PRIu64, keyno)); + value->data = valuebuf; + value->size = (uint32_t)len; cursor->set_value(cursor, value); } if ((ret = cursor->insert(cursor)) != 0) diff --git a/test/thread/rw.c b/test/thread/rw.c index c6107a06c49..e8a2650ca51 100644 --- a/test/thread/rw.c +++ b/test/thread/rw.c @@ -66,7 +66,8 @@ rw_start(u_int readers, u_int writers) for (i = 0; i < writers; ++i) { if (i == 0 || multiple_files) { run_info[i].name = dmalloc(64); - snprintf(run_info[i].name, 64, FNAME, i); + testutil_check(__wt_snprintf( + run_info[i].name, 64, FNAME, i)); /* Vary by orders of magnitude */ if (vary_nops) @@ -88,8 +89,8 @@ rw_start(u_int readers, u_int writers) run_info[offset].name = dmalloc(64); /* Have readers read from tables with writes. */ name_index = i % writers; - snprintf( - run_info[offset].name, 64, FNAME, name_index); + testutil_check(__wt_snprintf( + run_info[offset].name, 64, FNAME, name_index)); /* Vary by orders of magnitude */ if (vary_nops) @@ -158,7 +159,8 @@ static inline void reader_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) { WT_ITEM *key, _key; - u_int keyno; + size_t len; + uint64_t keyno; int ret; char keybuf[64]; @@ -166,17 +168,18 @@ reader_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) keyno = __wt_random(&s->rnd) % nkeys + 1; if (ftype == ROW) { + testutil_check(__wt_snprintf_len_set( + keybuf, sizeof(keybuf), &len, "%017" PRIu64, keyno)); key->data = keybuf; - key->size = (uint32_t) - snprintf(keybuf, sizeof(keybuf), "%017u", keyno); + key->size = (uint32_t)len; cursor->set_key(cursor, key); } else - cursor->set_key(cursor, (uint32_t)keyno); + cursor->set_key(cursor, keyno); if ((ret = cursor->search(cursor)) != 0 && ret != WT_NOTFOUND) testutil_die(ret, "cursor.search"); if (log_print) testutil_check(session->log_printf(session, - "Reader Thread %p key %017u", pthread_self(), keyno)); + "Reader Thread %p key %017" PRIu64, pthread_self(), keyno)); } /* @@ -195,7 +198,7 @@ reader(void *arg) id = (int)(uintptr_t)arg; s = &run_info[id]; - __wt_thread_id(tid, sizeof(tid)); + testutil_check(__wt_thread_id(tid, sizeof(tid))); __wt_random_init(&s->rnd); printf(" read thread %2d starting: tid: %s, file: %s\n", @@ -242,7 +245,8 @@ static inline void writer_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) { WT_ITEM *key, _key, *value, _value; - u_int keyno; + uint64_t keyno; + size_t len; int ret; char keybuf[64], valuebuf[64]; @@ -251,12 +255,13 @@ writer_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) keyno = __wt_random(&s->rnd) % nkeys + 1; if (ftype == ROW) { + testutil_check(__wt_snprintf_len_set( + keybuf, sizeof(keybuf), &len, "%017" PRIu64, keyno)); key->data = keybuf; - key->size = (uint32_t) - snprintf(keybuf, sizeof(keybuf), "%017u", keyno); + key->size = (uint32_t)len; cursor->set_key(cursor, key); } else - cursor->set_key(cursor, (uint32_t)keyno); + cursor->set_key(cursor, keyno); if (keyno % 5 == 0) { ++s->remove; if ((ret = @@ -268,8 +273,10 @@ writer_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) if (ftype == FIX) cursor->set_value(cursor, 0x10); else { - value->size = (uint32_t)snprintf( - valuebuf, sizeof(valuebuf), "XXX %37u", keyno); + testutil_check(__wt_snprintf_len_set( + valuebuf, sizeof(valuebuf), + &len, "XXX %37" PRIu64, keyno)); + value->size = (uint32_t)len; cursor->set_value(cursor, value); } if ((ret = cursor->update(cursor)) != 0) @@ -277,7 +284,7 @@ writer_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) } if (log_print) testutil_check(session->log_printf(session, - "Writer Thread %p key %017u", pthread_self(), keyno)); + "Writer Thread %p key %017" PRIu64, pthread_self(), keyno)); } /* @@ -296,7 +303,7 @@ writer(void *arg) id = (int)(uintptr_t)arg; s = &run_info[id]; - __wt_thread_id(tid, sizeof(tid)); + testutil_check(__wt_thread_id(tid, sizeof(tid))); __wt_random_init(&s->rnd); printf("write thread %2d starting: tid: %s, file: %s\n", diff --git a/test/thread/stats.c b/test/thread/stats.c index 67a2c02719b..839d65e8a4d 100644 --- a/test/thread/stats.c +++ b/test/thread/stats.c @@ -65,7 +65,8 @@ stats(void) /* File statistics. */ if (!multiple_files) { - (void)snprintf(name, sizeof(name), "statistics:" FNAME, 0); + testutil_check(__wt_snprintf( + name, sizeof(name), "statistics:" FNAME, 0)); if ((ret = session->open_cursor( session, name, NULL, NULL, &cursor)) != 0) testutil_die(ret, "session.open_cursor"); diff --git a/test/thread/t.c b/test/thread/t.c index 9dfd02bdad2..d2ed4c74bb7 100644 --- a/test/thread/t.c +++ b/test/thread/t.c @@ -185,19 +185,15 @@ wt_connect(char *config_open) }; int ret; char config[512]; - size_t print_count; testutil_clean_work_dir(home); testutil_make_work_dir(home); - print_count = (size_t)snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "create,statistics=(all),error_prefix=\"%s\",%s%s", progname, config_open == NULL ? "" : ",", - config_open == NULL ? "" : config_open); - - if (print_count >= sizeof(config)) - testutil_die(EINVAL, "Config string too long"); + config_open == NULL ? "" : config_open)); if ((ret = wiredtiger_open(home, &event_handler, config, &conn)) != 0) testutil_die(ret, "wiredtiger_open"); diff --git a/test/utility/misc.c b/test/utility/misc.c index 61dad3d76c2..934dac86a7b 100644 --- a/test/utility/misc.c +++ b/test/utility/misc.c @@ -108,14 +108,14 @@ testutil_clean_work_dir(const char *dir) if ((buf = malloc(len)) == NULL) testutil_die(ENOMEM, "Failed to allocate memory"); - snprintf(buf, len, "%s %s %s %s", DIR_EXISTS_COMMAND, dir, - RM_COMMAND, dir); + testutil_check(__wt_snprintf( + buf, len, "%s %s %s %s", DIR_EXISTS_COMMAND, dir, RM_COMMAND, dir)); #else len = strlen(dir) + strlen(RM_COMMAND) + 1; if ((buf = malloc(len)) == NULL) testutil_die(ENOMEM, "Failed to allocate memory"); - snprintf(buf, len, "%s%s", RM_COMMAND, dir); + testutil_check(__wt_snprintf(buf, len, "%s%s", RM_COMMAND, dir)); #endif if ((ret = system(buf)) != 0 && ret != ENOENT) @@ -142,7 +142,7 @@ testutil_make_work_dir(char *dir) testutil_die(ENOMEM, "Failed to allocate memory"); /* mkdir shares syntax between Windows and Linux */ - snprintf(buf, len, "%s%s", MKDIR_COMMAND, dir); + testutil_check(__wt_snprintf(buf, len, "%s%s", MKDIR_COMMAND, dir)); if ((ret = system(buf)) != 0) testutil_die(ret, "%s", buf); free(buf); diff --git a/test/utility/parse_opts.c b/test/utility/parse_opts.c index af9256b199a..c3eff3360de 100644 --- a/test/utility/parse_opts.c +++ b/test/utility/parse_opts.c @@ -115,13 +115,15 @@ testutil_parse_opts(int argc, char * const *argv, TEST_OPTS *opts) if (opts->home == NULL) { len = strlen("WT_TEST.") + strlen(opts->progname) + 10; opts->home = dmalloc(len); - snprintf(opts->home, len, "WT_TEST.%s", opts->progname); + testutil_check(__wt_snprintf( + opts->home, len, "WT_TEST.%s", opts->progname)); } /* Setup the default URI string */ len = strlen("table:") + strlen(opts->progname) + 10; opts->uri = dmalloc(len); - snprintf(opts->uri, len, "table:%s", opts->progname); + testutil_check(__wt_snprintf( + opts->uri, len, "table:%s", opts->progname)); return (0); } diff --git a/test/utility/thread.c b/test/utility/thread.c index 38465b2f02b..122ad554442 100644 --- a/test/utility/thread.c +++ b/test/utility/thread.c @@ -57,8 +57,8 @@ thread_append(void *arg) if (opts->table_type == TABLE_FIX) cursor->set_value(cursor, buf[0]); else { - snprintf(buf, sizeof(buf), - "%" PRIu64 " VALUE ------", recno); + testutil_check(__wt_snprintf(buf, sizeof(buf), + "%" PRIu64 " VALUE ------", recno)); cursor->set_value(cursor, buf); } testutil_check(cursor->insert(cursor)); @@ -94,7 +94,8 @@ thread_insert_append(void *arg) session, opts->uri, NULL, NULL, &cursor)); for (i = 0; i < opts->nrecords; ++i) { - snprintf(kbuf, sizeof(kbuf), "%010d KEY------", (int)i); + testutil_check(__wt_snprintf( + kbuf, sizeof(kbuf), "%010d KEY------", (int)i)); cursor->set_key(cursor, kbuf); cursor->set_value(cursor, "========== VALUE ======="); testutil_check(cursor->insert(cursor)); diff --git a/test/windows/windows_shim.h b/test/windows/windows_shim.h index 648b991b1a2..8985904fb19 100644 --- a/test/windows/windows_shim.h +++ b/test/windows/windows_shim.h @@ -36,6 +36,8 @@ #include <io.h> #include <process.h> +#include "wt_internal.h" + #define inline __inline /* Define some POSIX types */ @@ -52,12 +54,7 @@ typedef int u_int; /* snprintf does not exist on <= VS 2013 */ #if _MSC_VER < 1900 -#define snprintf _wt_snprintf - -_Check_return_opt_ int __cdecl _wt_snprintf( - _Out_writes_(_MaxCount) char * _DstBuf, - _In_ size_t _MaxCount, - _In_z_ _Printf_format_string_ const char * _Format, ...); +#define snprintf __wt_snprintf #endif /* |