summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexander.gorrod@mongodb.com>2016-10-26 11:52:07 +1100
committerAlex Gorrod <alexander.gorrod@mongodb.com>2016-10-26 11:52:07 +1100
commitebbb4eb0b091fa185b06a060d24b68eb6761ba4a (patch)
treec42aefac8438be5e9c15ea34f6e56063204762d4
parent87909c906cd911464c48015919b5d846e2b4f4e4 (diff)
downloadmongo-ebbb4eb0b091fa185b06a060d24b68eb6761ba4a.tar.gz
Import wiredtiger: b11ed312cedb905dec49dd2c9c262fabf64d13cd from branch mongodb-3.2r3.2.10
ref: 9cf2f89d6d..b11ed312ce for: 3.2.11 WT-1592 Dump detailed cache information via statistics WT-2403 Enhance random cursor implementation for LSM trees WT-2831 Skip creating a checkpoint if there have been no changes WT-2858 rename wtperf's CONFIG structure WT-2880 Add support for Zstandard compression WT-2895 Reduce the runtime of make check testing with disable long WT-2904 Fix a bug where the reported checkpoint size could be many times data size WT-2907 Bug in Java ConcurrentCloseTest case WT-2917 split wtperf's configuration into per-database and per-run parts WT-2920 Add statistic tracking application thread cache maintenance time WT-2931 Configure default in-memory dirty cache usage lower WT-2932 Allow applications to selectively ignore cache limit with in-memory configuration WT-2933 Fix a race between named snapshots and checkpoints WT-2937 test_inmem01 aborts due to stuck cache WT-2938 Assembly files should end in .sx, not .S WT-2941 Improve test/format to use faster key-generation functions WT-2942 verbose strings don't need newline WT-2946 dist/s_docs incompatible with OS X Xcode installation WT-2948 simplify error handling by making epoch time return never fail WT-2949 Add an option to wtperf to not close connection on shutdown WT-2950 Inserting multi-megabyte values can cause large in-memory pages WT-2954 Inserting multi-megabyte values can cause large in-memory pages WT-2955 Add statistics tracking the amount of time threads spend waiting for high level locks WT-2956 utility tests -h option is always overridden by the default setup WT-2959 Ensure WT_SESSION_IMPL is never used before it's initialized WT-2963 Race setting max_entries during eviction WT-2965 test_wt2323_join_visibility can hang on OSX WT-2974 lint WT-2976 Add a statistic tracking how long application threads spend doing I/O WT-2977 Csuite LSM Random test can occasionally fail WT-2985 Race during checkpoint can cause a core dump WT-2987 Fix a bug where opening a cursor on an incomplete table drops core WT-2988 __wt_epoch potentially returns garbage values.
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/config.c504
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/config_opt.h14
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/doxy.c111
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c101
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/misc.c42
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/runners/500m-btree-50r50u.wtperf3
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/runners/500m-btree-80r20u.wtperf3
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/runners/checkpoint-stress.wtperf1
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-stress-multi.wtperf1
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/track.c122
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/wtperf.c1636
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/wtperf.h121
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i27
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c10
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/wtperf_truncate.c63
-rw-r--r--src/third_party/wiredtiger/build_posix/Make.base3
-rw-r--r--src/third_party/wiredtiger/build_posix/Make.subdirs1
-rw-r--r--src/third_party/wiredtiger/build_posix/aclocal/options.m434
-rw-r--r--src/third_party/wiredtiger/build_posix/configure.ac.in2
-rw-r--r--src/third_party/wiredtiger/build_win/wiredtiger_config.h6
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py25
-rw-r--r--src/third_party/wiredtiger/dist/filelist5
-rw-r--r--src/third_party/wiredtiger/dist/flags.py10
-rw-r--r--src/third_party/wiredtiger/dist/s_define.list6
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_docs14
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_export2
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_stat19
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok26
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_style2
-rw-r--r--src/third_party/wiredtiger/dist/s_void9
-rw-r--r--src/third_party/wiredtiger/dist/stat.py7
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py289
-rw-r--r--src/third_party/wiredtiger/dist/wtperf_config.py25
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_all.c33
-rw-r--r--src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_all.java29
-rw-r--r--src/third_party/wiredtiger/ext/compressors/lz4/lz4_compress.c41
-rw-r--r--src/third_party/wiredtiger/ext/compressors/snappy/snappy_compress.c119
-rw-r--r--src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c291
-rw-r--r--src/third_party/wiredtiger/ext/compressors/zstd/Makefile.am11
-rw-r--r--src/third_party/wiredtiger/ext/compressors/zstd/zstd_compress.c358
-rw-r--r--src/third_party/wiredtiger/src/async/async_worker.c2
-rw-r--r--src/third_party/wiredtiger/src/block/block_ckpt.c28
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_handle.c13
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_io.c11
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c33
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c15
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_stat.c29
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_sync.c99
-rw-r--r--src/third_party/wiredtiger/src/cache/cache_las.c2
-rw-r--r--src/third_party/wiredtiger/src/checksum/power8/README.md2
-rw-r--r--src/third_party/wiredtiger/src/checksum/power8/crc32.sx (renamed from src/third_party/wiredtiger/src/checksum/power8/crc32.S)0
-rw-r--r--src/third_party/wiredtiger/src/checksum/zseries/crc32le-vx.sx (renamed from src/third_party/wiredtiger/src/checksum/zseries/crc32le-vx.S)0
-rw-r--r--src/third_party/wiredtiger/src/config/config_collapse.c4
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c79
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c114
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache.c82
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_ckpt.c46
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_handle.c18
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_log.c4
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_stat.c12
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_sweep.c2
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_file.c10
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_stat.c77
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_table.c39
-rw-r--r--src/third_party/wiredtiger/src/docs/build-posix.dox6
-rw-r--r--src/third_party/wiredtiger/src/docs/compression.dox44
-rw-r--r--src/third_party/wiredtiger/src/docs/spell.ok2
-rw-r--r--src/third_party/wiredtiger/src/docs/wtperf.dox85
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c130
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c13
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_stat.c138
-rw-r--r--src/third_party/wiredtiger/src/include/api.h4
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h4
-rw-r--r--src/third_party/wiredtiger/src/include/btree.h27
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i91
-rw-r--r--src/third_party/wiredtiger/src/include/cache.h1
-rw-r--r--src/third_party/wiredtiger/src/include/cache.i2
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h14
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.h2
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h6
-rw-r--r--src/third_party/wiredtiger/src/include/extern_posix.h2
-rw-r--r--src/third_party/wiredtiger/src/include/extern_win.h2
-rw-r--r--src/third_party/wiredtiger/src/include/flags.h8
-rw-r--r--src/third_party/wiredtiger/src/include/lsm.h24
-rw-r--r--src/third_party/wiredtiger/src/include/misc.i6
-rw-r--r--src/third_party/wiredtiger/src/include/mutex.h22
-rw-r--r--src/third_party/wiredtiger/src/include/mutex.i45
-rw-r--r--src/third_party/wiredtiger/src/include/schema.h8
-rw-r--r--src/third_party/wiredtiger/src/include/session.h3
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h110
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h10
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i6
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in673
-rw-r--r--src/third_party/wiredtiger/src/include/wt_internal.h2
-rw-r--r--src/third_party/wiredtiger/src/log/log.c16
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_cursor.c275
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_cursor_bulk.c16
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_manager.c4
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_merge.c15
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_stat.c22
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_tree.c8
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_work_unit.c2
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_ckpt.c2
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c2
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_time.c23
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_time.c6
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c29
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_stat.c2
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c30
-rw-r--r--src/third_party/wiredtiger/src/session/session_compact.c9
-rw-r--r--src/third_party/wiredtiger/src/session/session_dhandle.c11
-rw-r--r--src/third_party/wiredtiger/src/support/err.c39
-rw-r--r--src/third_party/wiredtiger/src/support/rand.c6
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c172
-rw-r--r--src/third_party/wiredtiger/src/support/thread_group.c10
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c52
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c80
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_nsnap.c55
-rw-r--r--src/third_party/wiredtiger/test/bloom/test_bloom.c2
-rwxr-xr-xsrc/third_party/wiredtiger/test/checkpoint/smoke.sh3
-rw-r--r--src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c2
-rw-r--r--src/third_party/wiredtiger/test/csuite/Makefile.am3
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt1965_col_efficiency/main.c2
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c4
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c9
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2403_lsm_workload/main.c241
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c2
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2535_insert_race/main.c4
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2592_join_schema/main.c8
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2695_checksum/main.c4
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2719_reconfig/main.c6
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c18
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c4
-rw-r--r--src/third_party/wiredtiger/test/cursor_order/cursor_order.c2
-rw-r--r--src/third_party/wiredtiger/test/fops/t.c2
-rw-r--r--src/third_party/wiredtiger/test/format/config.c34
-rw-r--r--src/third_party/wiredtiger/test/format/config.h2
-rw-r--r--src/third_party/wiredtiger/test/format/format.h3
-rw-r--r--src/third_party/wiredtiger/test/format/lrt.c16
-rw-r--r--src/third_party/wiredtiger/test/format/t.c6
-rw-r--r--src/third_party/wiredtiger/test/format/util.c22
-rw-r--r--src/third_party/wiredtiger/test/format/wts.c11
-rw-r--r--src/third_party/wiredtiger/test/huge/huge.c2
-rw-r--r--src/third_party/wiredtiger/test/java/com/wiredtiger/test/ConcurrentCloseTest.java21
-rw-r--r--src/third_party/wiredtiger/test/manydbs/manydbs.c2
-rw-r--r--src/third_party/wiredtiger/test/packing/intpack-test.c2
-rw-r--r--src/third_party/wiredtiger/test/packing/intpack-test2.c2
-rw-r--r--src/third_party/wiredtiger/test/packing/intpack-test3.c2
-rw-r--r--src/third_party/wiredtiger/test/packing/packing-test.c2
-rw-r--r--src/third_party/wiredtiger/test/readonly/readonly.c2
-rw-r--r--src/third_party/wiredtiger/test/recovery/random-abort.c4
-rw-r--r--src/third_party/wiredtiger/test/recovery/truncated-log.c2
-rw-r--r--src/third_party/wiredtiger/test/salvage/salvage.c2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_bug017.py46
-rw-r--r--src/third_party/wiredtiger/test/suite/test_cursor_random02.py25
-rw-r--r--src/third_party/wiredtiger/test/suite/test_encrypt01.py1
-rw-r--r--src/third_party/wiredtiger/test/suite/test_inmem02.py64
-rw-r--r--src/third_party/wiredtiger/test/suite/test_stat02.py70
-rw-r--r--src/third_party/wiredtiger/test/thread/t.c2
-rw-r--r--src/third_party/wiredtiger/test/utility/misc.c4
-rw-r--r--src/third_party/wiredtiger/test/utility/parse_opts.c18
-rw-r--r--src/third_party/wiredtiger/test/utility/test_util.h68
-rw-r--r--src/third_party/wiredtiger/tools/wtstats/stat_data.py40
-rwxr-xr-xsrc/third_party/wiredtiger/tools/wtstats/wtstats.py3
165 files changed, 5215 insertions, 2804 deletions
diff --git a/src/third_party/wiredtiger/bench/wtperf/config.c b/src/third_party/wiredtiger/bench/wtperf/config.c
index 4c7b17f102a..5b14a4cdf68 100644
--- a/src/third_party/wiredtiger/bench/wtperf/config.c
+++ b/src/third_party/wiredtiger/bench/wtperf/config.c
@@ -28,15 +28,19 @@
#include "wtperf.h"
-/* All options changeable on command line using -o or -O are listed here. */
-static CONFIG_OPT config_opts[] = {
+static CONFIG_OPT config_opts_desc[] = { /* Option descriptions */
#define OPT_DEFINE_DESC
#include "wtperf_opt.i"
#undef OPT_DEFINE_DESC
};
-static int config_opt(CONFIG *, WT_CONFIG_ITEM *, WT_CONFIG_ITEM *);
-static void config_opt_usage(void);
+static CONFIG_OPTS config_opts_default = { /* Option defaults */
+#define OPT_DEFINE_DEFAULT
+#include "wtperf_opt.i"
+#undef OPT_DEFINE_DEFAULT
+
+ { NULL, NULL } /* config_head */
+};
/*
* STRING_MATCH --
@@ -47,6 +51,72 @@ static void config_opt_usage(void);
(strncmp(str, bytes, len) == 0 && (str)[(len)] == '\0')
/*
+ * config_opt_init --
+ * Initialize the global configuration options.
+ */
+void
+config_opt_init(CONFIG_OPTS **retp)
+{
+ CONFIG_OPT *desc;
+ CONFIG_OPTS *opts;
+ size_t i;
+ char **strp;
+ void *valueloc;
+
+ opts = dmalloc(sizeof(CONFIG_OPTS));
+ *opts = config_opts_default;
+
+ TAILQ_INIT(&opts->config_head);
+
+ /*
+ * Option strings come-and-go as we configure them, so allocate copies
+ * of the default strings now so that we can always free the string as
+ * we allocate new versions.
+ */
+ for (i = 0, desc = config_opts_desc;
+ i < WT_ELEMENTS(config_opts_desc); i++, ++desc)
+ if (desc->type == CONFIG_STRING_TYPE ||
+ desc->type == STRING_TYPE) {
+ valueloc = ((uint8_t *)opts + desc->offset);
+ strp = (char **)valueloc;
+ *strp = dstrdup(*strp);
+ }
+
+ *retp = opts;
+}
+
+/*
+ * config_opt_cleanup --
+ * Clean up the global configuration options.
+ */
+void
+config_opt_cleanup(CONFIG_OPTS *opts)
+{
+ CONFIG_OPT *desc;
+ CONFIG_QUEUE_ENTRY *config_line;
+ size_t i;
+ char **strp;
+ void *valueloc;
+
+ for (i = 0, desc = config_opts_desc;
+ i < WT_ELEMENTS(config_opts_desc); i++, ++desc)
+ if (desc->type == CONFIG_STRING_TYPE ||
+ desc->type == STRING_TYPE) {
+ valueloc = ((uint8_t *)opts + desc->offset);
+ strp = (char **)valueloc;
+ free(*strp);
+ }
+
+ while ((config_line = TAILQ_FIRST(&opts->config_head)) != NULL) {
+ TAILQ_REMOVE(&opts->config_head, config_line, q);
+ free(config_line->string);
+ free(config_line);
+ }
+
+ free(opts);
+}
+
+/*
* config_unescape --
* Modify a string in place, replacing any backslash escape sequences.
* The modified string is always shorter.
@@ -94,168 +164,11 @@ config_unescape(char *orig)
}
/*
- * config_copy --
- * CONFIG structure initialization, based on a source configuration.
- */
-void
-config_copy(CONFIG *dest, const CONFIG *src)
-{
- CONFIG_QUEUE_ENTRY *conf_line, *tmp_line;
- size_t i;
- char *newstr, **pstr;
-
- memcpy(dest, src, sizeof(CONFIG));
-
- if (src->home != NULL)
- dest->home = dstrdup(src->home);
- if (src->monitor_dir != NULL)
- dest->monitor_dir = dstrdup(src->monitor_dir);
- if (src->partial_config != NULL)
- dest->partial_config = dstrdup(src->partial_config);
- if (src->reopen_config != NULL)
- dest->reopen_config = dstrdup(src->reopen_config);
- if (src->base_uri != NULL)
- dest->base_uri = dstrdup(src->base_uri);
-
- if (src->uris != NULL) {
- dest->uris = dcalloc(src->table_count, sizeof(char *));
- for (i = 0; i < src->table_count; i++)
- dest->uris[i] = dstrdup(src->uris[i]);
- }
-
- if (src->async_config != NULL)
- dest->async_config = dstrdup(src->async_config);
-
- dest->ckptthreads = NULL;
- dest->popthreads = NULL;
- dest->workers = NULL;
-
- if (src->workload != NULL) {
- dest->workload = dcalloc(WORKLOAD_MAX, sizeof(WORKLOAD));
- memcpy(dest->workload,
- src->workload, WORKLOAD_MAX * sizeof(WORKLOAD));
- }
-
- for (i = 0; i < sizeof(config_opts) / sizeof(config_opts[0]); i++)
- if (config_opts[i].type == STRING_TYPE ||
- config_opts[i].type == CONFIG_STRING_TYPE) {
- pstr = (char **)
- ((u_char *)dest + config_opts[i].offset);
- if (*pstr != NULL) {
- newstr = dstrdup(*pstr);
- *pstr = newstr;
- }
- }
-
- TAILQ_INIT(&dest->stone_head);
- TAILQ_INIT(&dest->config_head);
-
- /* Clone the config string information into the new cfg object */
- TAILQ_FOREACH(conf_line, &src->config_head, c) {
- tmp_line = dcalloc(sizeof(CONFIG_QUEUE_ENTRY), 1);
- tmp_line->string = dstrdup(conf_line->string);
- TAILQ_INSERT_TAIL(&dest->config_head, tmp_line, c);
- }
-}
-
-/*
- * config_free --
- * Free any storage allocated in the config struct.
- */
-void
-config_free(CONFIG *cfg)
-{
- CONFIG_QUEUE_ENTRY *config_line;
- size_t i;
- char **pstr;
-
- free(cfg->home);
- free(cfg->monitor_dir);
- free(cfg->partial_config);
- free(cfg->reopen_config);
-
- /* Free the various URIs */
- free(cfg->base_uri);
- free(cfg->log_table_uri);
-
- if (cfg->uris != NULL) {
- for (i = 0; i < cfg->table_count; i++)
- free(cfg->uris[i]);
- free(cfg->uris);
- }
-
- free(cfg->async_config);
-
- free(cfg->ckptthreads);
- free(cfg->popthreads);
-
- free(cfg->workers);
- free(cfg->workload);
-
- cleanup_truncate_config(cfg);
-
- while (!TAILQ_EMPTY(&cfg->config_head)) {
- config_line = TAILQ_FIRST(&cfg->config_head);
- TAILQ_REMOVE(&cfg->config_head, config_line, c);
- free(config_line->string);
- free(config_line);
- }
-
- for (i = 0; i < sizeof(config_opts) / sizeof(config_opts[0]); i++)
- if (config_opts[i].type == STRING_TYPE ||
- config_opts[i].type == CONFIG_STRING_TYPE) {
- pstr = (char **)
- ((u_char *)cfg + config_opts[i].offset);
- free(*pstr);
- *pstr = NULL;
- }
-}
-
-/*
- * config_compress --
- * Parse the compression configuration.
- */
-int
-config_compress(CONFIG *cfg)
-{
- int ret;
- const char *s;
-
- ret = 0;
- s = cfg->compression;
- if (strcmp(s, "none") == 0) {
- cfg->compress_ext = NULL;
- cfg->compress_table = NULL;
- } else if (strcmp(s, "lz4") == 0) {
-#ifndef HAVE_BUILTIN_EXTENSION_LZ4
- cfg->compress_ext = LZ4_EXT;
-#endif
- cfg->compress_table = LZ4_BLK;
- } else if (strcmp(s, "snappy") == 0) {
-#ifndef HAVE_BUILTIN_EXTENSION_SNAPPY
- cfg->compress_ext = SNAPPY_EXT;
-#endif
- cfg->compress_table = SNAPPY_BLK;
- } else if (strcmp(s, "zlib") == 0) {
-#ifndef HAVE_BUILTIN_EXTENSION_ZLIB
- cfg->compress_ext = ZLIB_EXT;
-#endif
- cfg->compress_table = ZLIB_BLK;
- } else {
- fprintf(stderr,
- "invalid compression configuration: %s\n", s);
- ret = EINVAL;
- }
- return (ret);
-
-}
-
-/*
* config_threads --
* Parse the thread configuration.
*/
static int
-config_threads(CONFIG *cfg, const char *config, size_t len)
+config_threads(WTPERF *wtperf, const char *config, size_t len)
{
WORKLOAD *workp;
WT_CONFIG_ITEM groupk, groupv, k, v;
@@ -263,19 +176,19 @@ config_threads(CONFIG *cfg, const char *config, size_t len)
int ret;
group = scan = NULL;
- if (cfg->workload != NULL) {
+ if (wtperf->workload != NULL) {
/*
* This call overrides an earlier call. Free and
* reset everything.
*/
- free(cfg->workload);
- cfg->workload = NULL;
- cfg->workload_cnt = 0;
- cfg->workers_cnt = 0;
+ free(wtperf->workload);
+ wtperf->workload = NULL;
+ wtperf->workload_cnt = 0;
+ wtperf->workers_cnt = 0;
}
/* Allocate the workload array. */
- cfg->workload = dcalloc(WORKLOAD_MAX, sizeof(WORKLOAD));
- cfg->workload_cnt = 0;
+ wtperf->workload = dcalloc(WORKLOAD_MAX, sizeof(WORKLOAD));
+ wtperf->workload_cnt = 0;
/*
* The thread configuration may be in multiple groups, that is, we have
@@ -294,14 +207,14 @@ config_threads(CONFIG *cfg, const char *config, size_t len)
goto err;
/* Move to the next workload slot. */
- if (cfg->workload_cnt == WORKLOAD_MAX) {
+ if (wtperf->workload_cnt == WORKLOAD_MAX) {
fprintf(stderr,
"too many workloads configured, only %d workloads "
"supported\n",
WORKLOAD_MAX);
return (EINVAL);
}
- workp = &cfg->workload[cfg->workload_cnt++];
+ workp = &wtperf->workload[wtperf->workload_cnt++];
while ((ret = scan->next(scan, &k, &v)) == 0) {
if (STRING_MATCH("count", k.str, k.len)) {
@@ -334,9 +247,9 @@ config_threads(CONFIG *cfg, const char *config, size_t len)
if ((workp->truncate = v.val) != 1)
goto err;
/* There can only be one Truncate thread. */
- if (F_ISSET(cfg, CFG_TRUNCATE))
+ if (F_ISSET(wtperf, CFG_TRUNCATE))
goto err;
- F_SET(cfg, CFG_TRUNCATE);
+ F_SET(wtperf, CFG_TRUNCATE);
continue;
}
if (STRING_MATCH("truncate_pct", k.str, k.len)) {
@@ -364,13 +277,13 @@ config_threads(CONFIG *cfg, const char *config, size_t len)
goto err;
/* Special random value */
workp->update_delta = INT64_MAX;
- F_SET(cfg, CFG_GROW);
+ F_SET(wtperf, CFG_GROW);
} else {
workp->update_delta = v.val;
if (v.val > 0)
- F_SET(cfg, CFG_GROW);
+ F_SET(wtperf, CFG_GROW);
if (v.val < 0)
- F_SET(cfg, CFG_SHRINK);
+ F_SET(wtperf, CFG_SHRINK);
}
continue;
}
@@ -400,7 +313,7 @@ config_threads(CONFIG *cfg, const char *config, size_t len)
if (workp->truncate != 0 &&
(workp->insert > 0 || workp->read > 0 || workp->update > 0))
goto err;
- cfg->workers_cnt += (u_int)workp->threads;
+ wtperf->workers_cnt += (u_int)workp->threads;
}
ret = group->close(group);
@@ -428,32 +341,34 @@ err: if (group != NULL)
* value.
*/
static int
-config_opt(CONFIG *cfg, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v)
+config_opt(WTPERF *wtperf, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v)
{
- CONFIG_OPT *popt;
+ CONFIG_OPTS *opts;
+ CONFIG_OPT *desc;
char *begin, *newstr, **strp;
int ret;
- size_t i, newlen, nopt;
+ size_t i, newlen;
void *valueloc;
- popt = NULL;
- nopt = sizeof(config_opts)/sizeof(config_opts[0]);
- for (i = 0; i < nopt; i++)
- if (strlen(config_opts[i].name) == k->len &&
- strncmp(config_opts[i].name, k->str, k->len) == 0) {
- popt = &config_opts[i];
+ opts = wtperf->opts;
+
+ desc = NULL;
+ for (i = 0; i < WT_ELEMENTS(config_opts_desc); i++)
+ if (strlen(config_opts_desc[i].name) == k->len &&
+ strncmp(config_opts_desc[i].name, k->str, k->len) == 0) {
+ desc = &config_opts_desc[i];
break;
}
- if (popt == NULL) {
+ if (desc == NULL) {
fprintf(stderr, "wtperf: Error: "
"unknown option \'%.*s\'\n", (int)k->len, k->str);
fprintf(stderr, "Options:\n");
- for (i = 0; i < nopt; i++)
- fprintf(stderr, "\t%s\n", config_opts[i].name);
+ for (i = 0; i < WT_ELEMENTS(config_opts_desc); i++)
+ fprintf(stderr, "\t%s\n", config_opts_desc[i].name);
return (EINVAL);
}
- valueloc = ((u_char *)cfg + popt->offset);
- switch (popt->type) {
+ valueloc = ((uint8_t *)opts + desc->offset);
+ switch (desc->type) {
case BOOL_TYPE:
if (v->type != WT_CONFIG_ITEM_BOOL) {
fprintf(stderr, "wtperf: Error: "
@@ -531,7 +446,7 @@ config_opt(CONFIG *cfg, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v)
*/
if (v->type == WT_CONFIG_ITEM_STRUCT &&
STRING_MATCH("threads", k->str, k->len))
- return (config_threads(cfg, v->str, v->len));
+ return (config_threads(wtperf, v->str, v->len));
if (v->type != WT_CONFIG_ITEM_STRING &&
v->type != WT_CONFIG_ITEM_ID) {
@@ -559,7 +474,7 @@ config_opt(CONFIG *cfg, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v)
* via lines ending in '\'.
*/
int
-config_opt_file(CONFIG *cfg, const char *filename)
+config_opt_file(WTPERF *wtperf, const char *filename)
{
FILE *fp;
size_t linelen, optionpos;
@@ -659,7 +574,7 @@ config_opt_file(CONFIG *cfg, const char *filename)
if (contline)
optionpos += linelen;
else {
- if ((ret = config_opt_line(cfg, option)) != 0) {
+ if ((ret = config_opt_str(wtperf, option)) != 0) {
fprintf(stderr, "wtperf: %s: %d: parse error\n",
filename, linenum);
break;
@@ -684,23 +599,26 @@ config_opt_file(CONFIG *cfg, const char *filename)
}
/*
- * config_opt_line --
+ * config_opt_str --
* Parse a single line of config options. Continued lines have already
* been joined.
*/
int
-config_opt_line(CONFIG *cfg, const char *optstr)
+config_opt_str(WTPERF *wtperf, const char *optstr)
{
+ CONFIG_OPTS *opts;
CONFIG_QUEUE_ENTRY *config_line;
WT_CONFIG_ITEM k, v;
WT_CONFIG_PARSER *scan;
size_t len;
int ret, t_ret;
+ opts = wtperf->opts;
+
len = strlen(optstr);
if ((ret = wiredtiger_config_parser_open(
NULL, optstr, len, &scan)) != 0) {
- lprintf(cfg, ret, 0, "Error in config_scan_begin");
+ lprintf(wtperf, ret, 0, "Error in config_scan_begin");
return (ret);
}
@@ -712,7 +630,7 @@ config_opt_line(CONFIG *cfg, const char *optstr)
*/
config_line = dcalloc(sizeof(CONFIG_QUEUE_ENTRY), 1);
config_line->string = dstrdup(optstr);
- TAILQ_INSERT_TAIL(&cfg->config_head, config_line, c);
+ TAILQ_INSERT_TAIL(&opts->config_head, config_line, q);
while (ret == 0) {
if ((ret = scan->next(scan, &k, &v)) != 0) {
@@ -721,10 +639,10 @@ config_opt_line(CONFIG *cfg, const char *optstr)
ret = 0;
break;
}
- ret = config_opt(cfg, &k, &v);
+ ret = config_opt(wtperf, &k, &v);
}
if ((t_ret = scan->close(scan)) != 0) {
- lprintf(cfg, ret, 0, "Error in config_scan_end");
+ lprintf(wtperf, ret, 0, "Error in config_scan_end");
if (ret == 0)
ret = t_ret;
}
@@ -733,19 +651,20 @@ config_opt_line(CONFIG *cfg, const char *optstr)
}
/*
- * config_opt_str --
- * Set a single string config option.
+ * config_opt_name_value --
+ * Set a name/value configuration pair.
*/
int
-config_opt_str(CONFIG *cfg, const char *name, const char *value)
+config_opt_name_value(WTPERF *wtperf, const char *name, const char *value)
{
+ size_t len;
int ret;
char *optstr;
-
/* name="value" */
- optstr = dmalloc(strlen(name) + strlen(value) + 4);
- sprintf(optstr, "%s=\"%s\"", name, value);
- ret = config_opt_line(cfg, optstr);
+ len = strlen(name) + strlen(value) + 4;
+ optstr = dmalloc(len);
+ snprintf(optstr, len, "%s=\"%s\"", name, value);
+ ret = config_opt_str(wtperf, optstr);
free(optstr);
return (ret);
}
@@ -755,60 +674,63 @@ config_opt_str(CONFIG *cfg, const char *name, const char *value)
* Configuration sanity checks.
*/
int
-config_sanity(CONFIG *cfg)
+config_sanity(WTPERF *wtperf)
{
+ CONFIG_OPTS *opts;
WORKLOAD *workp;
u_int i;
+ opts = wtperf->opts;
+
/* Various intervals should be less than the run-time. */
- if (cfg->run_time > 0 &&
- ((cfg->checkpoint_threads != 0 &&
- cfg->checkpoint_interval > cfg->run_time) ||
- cfg->report_interval > cfg->run_time ||
- cfg->sample_interval > cfg->run_time)) {
+ if (opts->run_time > 0 &&
+ ((opts->checkpoint_threads != 0 &&
+ opts->checkpoint_interval > opts->run_time) ||
+ opts->report_interval > opts->run_time ||
+ opts->sample_interval > opts->run_time)) {
fprintf(stderr, "interval value longer than the run-time\n");
return (EINVAL);
}
/* The maximum is here to keep file name construction simple. */
- if (cfg->table_count < 1 || cfg->table_count > 99999) {
+ if (opts->table_count < 1 || opts->table_count > 99999) {
fprintf(stderr,
"invalid table count, less than 1 or greater than 99999\n");
return (EINVAL);
}
- if (cfg->database_count < 1 || cfg->database_count > 99) {
+ if (opts->database_count < 1 || opts->database_count > 99) {
fprintf(stderr,
"invalid database count, less than 1 or greater than 99\n");
return (EINVAL);
}
- if (cfg->pareto > 100) {
+ if (opts->pareto > 100) {
fprintf(stderr,
"Invalid pareto distribution - should be a percentage\n");
return (EINVAL);
}
- if (cfg->value_sz_max < cfg->value_sz) {
- if (F_ISSET(cfg, CFG_GROW)) {
+ if (opts->value_sz_max < opts->value_sz) {
+ if (F_ISSET(wtperf, CFG_GROW)) {
fprintf(stderr, "value_sz_max %" PRIu32
" must be greater than or equal to value_sz %"
- PRIu32 "\n", cfg->value_sz_max, cfg->value_sz);
+ PRIu32 "\n", opts->value_sz_max, opts->value_sz);
return (EINVAL);
} else
- cfg->value_sz_max = cfg->value_sz;
+ opts->value_sz_max = opts->value_sz;
}
- if (cfg->value_sz_min > cfg->value_sz) {
- if (F_ISSET(cfg, CFG_SHRINK)) {
+ if (opts->value_sz_min > opts->value_sz) {
+ if (F_ISSET(wtperf, CFG_SHRINK)) {
fprintf(stderr, "value_sz_min %" PRIu32
" must be less than or equal to value_sz %"
- PRIu32 "\n", cfg->value_sz_min, cfg->value_sz);
+ PRIu32 "\n", opts->value_sz_min, opts->value_sz);
return (EINVAL);
} else
- cfg->value_sz_min = cfg->value_sz;
+ opts->value_sz_min = opts->value_sz;
}
- if (cfg->readonly && cfg->workload != NULL)
- for (i = 0, workp = cfg->workload;
- i < cfg->workload_cnt; ++i, ++workp)
+ if (opts->readonly && wtperf->workload != NULL)
+ for (i = 0, workp = wtperf->workload;
+ i < wtperf->workload_cnt; ++i, ++workp)
if (workp->insert != 0 || workp->update != 0 ||
workp->truncate != 0) {
fprintf(stderr,
@@ -824,21 +746,21 @@ config_sanity(CONFIG *cfg)
* Consolidate repeated configuration settings so that it only appears
* once in the configuration output file.
*/
-void
-config_consolidate(CONFIG *cfg)
+static void
+config_consolidate(CONFIG_OPTS *opts)
{
CONFIG_QUEUE_ENTRY *conf_line, *test_line, *tmp;
char *string_key;
/*
- * This loop iterates over the config queue and for entry checks if an
- * entry later in the queue has the same key. If a match is found then
- * the current queue entry is removed and we continue.
+ * This loop iterates over the config queue and for each entry checks if
+ * a later queue entry has the same key. If there's a match, the current
+ * queue entry is removed and we continue.
*/
- conf_line = TAILQ_FIRST(&cfg->config_head);
+ conf_line = TAILQ_FIRST(&opts->config_head);
while (conf_line != NULL) {
string_key = strchr(conf_line->string, '=');
- tmp = test_line = TAILQ_NEXT(conf_line, c);
+ tmp = test_line = TAILQ_NEXT(conf_line, q);
while (test_line != NULL) {
/*
* The + 1 here forces the '=' sign to be matched
@@ -849,89 +771,75 @@ config_consolidate(CONFIG *cfg)
if (strncmp(conf_line->string, test_line->string,
(size_t)((string_key - conf_line->string) + 1))
== 0) {
- TAILQ_REMOVE(&cfg->config_head, conf_line, c);
+ TAILQ_REMOVE(&opts->config_head, conf_line, q);
free(conf_line->string);
free(conf_line);
break;
}
- test_line = TAILQ_NEXT(test_line, c);
+ test_line = TAILQ_NEXT(test_line, q);
}
conf_line = tmp;
}
}
/*
- * config_to_file --
+ * config_opt_log --
* Write the final config used in this execution to a file.
*/
void
-config_to_file(CONFIG *cfg)
+config_opt_log(CONFIG_OPTS *opts, const char *path)
{
CONFIG_QUEUE_ENTRY *config_line;
FILE *fp;
- size_t req_len;
- char *path;
- fp = NULL;
+ testutil_checkfmt(((fp = fopen(path, "w")) == NULL), "%s", path);
- /* Backup the config */
- req_len = strlen(cfg->home) + strlen("/CONFIG.wtperf") + 1;
- path = dcalloc(req_len, 1);
- snprintf(path, req_len, "%s/CONFIG.wtperf", cfg->home);
- if ((fp = fopen(path, "w")) == NULL) {
- lprintf(cfg, errno, 0, "%s", path);
- goto err;
- }
+ config_consolidate(opts);
- /* Print the config dump */
- fprintf(fp,"# Warning. This config includes "
+ fprintf(fp,"# Warning: This config includes "
"unwritten, implicit configuration defaults.\n"
"# Changes to those values may cause differences in behavior.\n");
- config_consolidate(cfg);
- config_line = TAILQ_FIRST(&cfg->config_head);
- while (config_line != NULL) {
+ TAILQ_FOREACH(config_line, &opts->config_head, q)
fprintf(fp, "%s\n", config_line->string);
- config_line = TAILQ_NEXT(config_line, c);
- }
-
-err: free(path);
- if (fp != NULL)
- (void)fclose(fp);
+ testutil_check(fclose(fp));
}
/*
- * config_print --
+ * config_opt_print --
* Print out the configuration in verbose mode.
*/
void
-config_print(CONFIG *cfg)
+config_opt_print(WTPERF *wtperf)
{
+ CONFIG_OPTS *opts;
WORKLOAD *workp;
u_int i;
+ opts = wtperf->opts;
+
printf("Workload configuration:\n");
- printf("\t" "Home: %s\n", cfg->home);
- printf("\t" "Table name: %s\n", cfg->table_name);
- printf("\t" "Connection configuration: %s\n", cfg->conn_config);
- if (cfg->sess_config != NULL)
- printf("\t" "Session configuration: %s\n", cfg->sess_config);
+ printf("\t" "Home: %s\n", wtperf->home);
+ printf("\t" "Table name: %s\n", opts->table_name);
+ printf("\t" "Connection configuration: %s\n", opts->conn_config);
+ if (opts->sess_config != NULL)
+ printf("\t" "Session configuration: %s\n", opts->sess_config);
printf("\t%s table: %s\n",
- cfg->create ? "Creating new" : "Using existing",
- cfg->table_config);
+ opts->create ? "Creating new" : "Using existing",
+ opts->table_config);
printf("\t" "Key size: %" PRIu32 ", value size: %" PRIu32 "\n",
- cfg->key_sz, cfg->value_sz);
- if (cfg->create)
+ opts->key_sz, opts->value_sz);
+ if (opts->create)
printf("\t" "Populate threads: %" PRIu32 ", inserting %" PRIu32
" rows\n",
- cfg->populate_threads, cfg->icount);
+ opts->populate_threads, opts->icount);
printf("\t" "Workload seconds, operations: %" PRIu32 ", %" PRIu32 "\n",
- cfg->run_time, cfg->run_ops);
- if (cfg->workload != NULL) {
+ opts->run_time, opts->run_ops);
+ if (wtperf->workload != NULL) {
printf("\t" "Workload configuration(s):\n");
- for (i = 0, workp = cfg->workload;
- i < cfg->workload_cnt; ++i, ++workp)
+ for (i = 0, workp = wtperf->workload;
+ i < wtperf->workload_cnt; ++i, ++workp)
printf("\t\t%" PRId64 " threads (inserts=%" PRId64
", reads=%" PRId64 ", updates=%" PRId64
", truncates=% " PRId64 ")\n",
@@ -941,11 +849,11 @@ config_print(CONFIG *cfg)
}
printf("\t" "Checkpoint threads, interval: %" PRIu32 ", %" PRIu32 "\n",
- cfg->checkpoint_threads, cfg->checkpoint_interval);
- printf("\t" "Reporting interval: %" PRIu32 "\n", cfg->report_interval);
- printf("\t" "Sampling interval: %" PRIu32 "\n", cfg->sample_interval);
+ opts->checkpoint_threads, opts->checkpoint_interval);
+ printf("\t" "Reporting interval: %" PRIu32 "\n", opts->report_interval);
+ printf("\t" "Sampling interval: %" PRIu32 "\n", opts->sample_interval);
- printf("\t" "Verbosity: %" PRIu32 "\n", cfg->verbose);
+ printf("\t" "Verbosity: %" PRIu32 "\n", opts->verbose);
}
/*
@@ -975,10 +883,10 @@ pretty_print(const char *p, const char *indent)
* config_opt_usage --
* Configuration usage error message.
*/
-static void
+void
config_opt_usage(void)
{
- size_t i, nopt;
+ size_t i;
const char *defaultval, *typestr;
pretty_print(
@@ -988,11 +896,10 @@ config_opt_usage(void)
"String values must be enclosed in \" quotes, boolean values must "
"be either true or false.\n", NULL);
- nopt = sizeof(config_opts)/sizeof(config_opts[0]);
- for (i = 0; i < nopt; i++) {
- defaultval = config_opts[i].defaultval;
+ for (i = 0; i < WT_ELEMENTS(config_opts_desc); i++) {
+ defaultval = config_opts_desc[i].defaultval;
typestr = "string";
- switch (config_opts[i].type) {
+ switch (config_opts_desc[i].type) {
case BOOL_TYPE:
typestr = "boolean";
if (strcmp(defaultval, "0") == 0)
@@ -1011,28 +918,7 @@ config_opt_usage(void)
break;
}
printf("%s (%s, default=%s)\n",
- config_opts[i].name, typestr, defaultval);
- pretty_print(config_opts[i].description, "\t");
+ config_opts_desc[i].name, typestr, defaultval);
+ pretty_print(config_opts_desc[i].description, "\t");
}
}
-
-/*
- * usage --
- * wtperf usage print, no error.
- */
-void
-usage(void)
-{
- printf("wtperf [-C config] "
- "[-H mount] [-h home] [-O file] [-o option] [-T config]\n");
- printf("\t-C <string> additional connection configuration\n");
- printf("\t (added to option conn_config)\n");
- printf("\t-H <mount> configure Helium volume mount point\n");
- printf("\t-h <string> Wired Tiger home must exist, default WT_TEST\n");
- printf("\t-O <file> file contains options as listed below\n");
- printf("\t-o option=val[,option=val,...] set options listed below\n");
- printf("\t-T <string> additional table configuration\n");
- printf("\t (added to option table_config)\n");
- printf("\n");
- config_opt_usage();
-}
diff --git a/src/third_party/wiredtiger/bench/wtperf/config_opt.h b/src/third_party/wiredtiger/bench/wtperf/config_opt.h
index b7eff8e143f..3f1ab642227 100644
--- a/src/third_party/wiredtiger/bench/wtperf/config_opt.h
+++ b/src/third_party/wiredtiger/bench/wtperf/config_opt.h
@@ -37,3 +37,17 @@ typedef struct {
CONFIG_OPT_TYPE type;
size_t offset;
} CONFIG_OPT;
+
+typedef struct __config_queue_entry {
+ char *string;
+ TAILQ_ENTRY(__config_queue_entry) q;
+} CONFIG_QUEUE_ENTRY;
+
+typedef struct { /* Option structure */
+#define OPT_DECLARE_STRUCT
+#include "wtperf_opt.i"
+#undef OPT_DECLARE_STRUCT
+
+ /* Queue head to save a copy of the config to be output */
+ TAILQ_HEAD(__config_qh, __config_queue_entry) config_head;
+} CONFIG_OPTS;
diff --git a/src/third_party/wiredtiger/bench/wtperf/doxy.c b/src/third_party/wiredtiger/bench/wtperf/doxy.c
deleted file mode 100644
index 26d73168ef2..00000000000
--- a/src/third_party/wiredtiger/bench/wtperf/doxy.c
+++ /dev/null
@@ -1,111 +0,0 @@
-/*-
- * Public Domain 2014-2016 MongoDB, Inc.
- * Public Domain 2008-2014 WiredTiger, Inc.
- *
- * This is free and unencumbered software released into the public domain.
- *
- * Anyone is free to copy, modify, publish, use, compile, sell, or
- * distribute this software, either in source code form or as a compiled
- * binary, for any purpose, commercial or non-commercial, and by any
- * means.
- *
- * In jurisdictions that recognize copyright laws, the author or authors
- * of this software dedicate any and all copyright interest in the
- * software to the public domain. We make this dedication for the benefit
- * of the public at large and to the detriment of our heirs and
- * successors. We intend this dedication to be an overt act of
- * relinquishment in perpetuity of all present and future rights to this
- * software under copyright law.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <string.h>
-#include <stdio.h>
-
-#include "config_opt.h"
-
-static const CONFIG_OPT config_opts[] = {
-#define OPT_DEFINE_DOXYGEN
-#include "wtperf_opt.i"
-#undef OPT_DEFINE_DOXYGEN
-};
-
-/*
- * pretty_print --
- * Print out lines of text for a 80 character window.
- */
-static void
-pretty_print(const char *p, const char *indent)
-{
- const char *t;
-
- for (;; p = t + 1) {
- if (strlen(p) <= 70)
- break;
- for (t = p + 70; t > p && *t != ' '; --t)
- ;
- if (t == p) /* No spaces? */
- break;
- printf("%s%.*s\n",
- indent == NULL ? "" : indent, (int)(t - p), p);
- }
- if (*p != '\0')
- printf("%s%s\n", indent == NULL ? "" : indent, p);
-}
-
-/*
- * config_doxygen --
- * Output the configuration information for doxgen.
- */
-static void
-config_doxygen(void)
-{
- size_t i, nopt;
- const char *defaultval, *typestr;
-
- nopt = sizeof(config_opts)/sizeof(config_opts[0]);
- for (i = 0; i < nopt; i++) {
- defaultval = config_opts[i].defaultval;
- typestr = "string";
- switch (config_opts[i].type) {
- case BOOL_TYPE:
- typestr = "boolean";
- if (strcmp(defaultval, "0") == 0)
- defaultval = "false";
- else
- defaultval = "true";
- break;
- case CONFIG_STRING_TYPE:
- case STRING_TYPE:
- break;
- case INT_TYPE:
- typestr = "int";
- break;
- case UINT32_TYPE:
- typestr = "unsigned int";
- break;
- }
- printf("@par %s (%s, default=%s)\n",
- config_opts[i].name, typestr, defaultval);
- pretty_print(config_opts[i].description, NULL);
- }
-}
-
-/*
- * config_doxygen --
- * A standalone program to output the configuration options in a doxygen
- * format.
- */
-int
-main()
-{
- config_doxygen();
- return (0);
-}
diff --git a/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c b/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c
index 3c079bb560f..13fa55e86f5 100644
--- a/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c
+++ b/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c
@@ -29,28 +29,25 @@
#include "wtperf.h"
static int
-check_timing(CONFIG *cfg,
+check_timing(WTPERF *wtperf,
const char *name, struct timespec start, struct timespec *stop)
{
+ CONFIG_OPTS *opts;
uint64_t last_interval;
- int ret;
- if ((ret = __wt_epoch(NULL, stop)) != 0) {
- lprintf(cfg, ret, 0,
- "Get time failed in cycle_idle_tables.");
- cfg->error = ret;
- return (ret);
- }
+ opts = wtperf->opts;
+
+ __wt_epoch(NULL, stop);
last_interval = (uint64_t)(WT_TIMEDIFF_SEC(*stop, start));
- if (last_interval > cfg->idle_table_cycle) {
- lprintf(cfg, ret, 0,
+ if (last_interval > opts->idle_table_cycle) {
+ lprintf(wtperf, ETIMEDOUT, 0,
"Cycling idle table failed because %s took %" PRIu64
" seconds which is longer than configured acceptable"
" maximum of %" PRIu32 ".",
- name, last_interval, cfg->idle_table_cycle);
- cfg->error = ETIMEDOUT;
+ name, last_interval, opts->idle_table_cycle);
+ wtperf->error = true;
return (ETIMEDOUT);
}
return (0);
@@ -64,64 +61,62 @@ static void *
cycle_idle_tables(void *arg)
{
struct timespec start, stop;
- CONFIG *cfg;
- WT_SESSION *session;
+ CONFIG_OPTS *opts;
+ WTPERF *wtperf;
WT_CURSOR *cursor;
+ WT_SESSION *session;
int cycle_count, ret;
char uri[512];
- cfg = (CONFIG *)arg;
+ wtperf = (WTPERF *)arg;
+ opts = wtperf->opts;
cycle_count = 0;
- if ((ret = cfg->conn->open_session(
- cfg->conn, NULL, cfg->sess_config, &session)) != 0) {
- lprintf(cfg, ret, 0,
- "Error opening a session on %s", cfg->home);
+ if ((ret = wtperf->conn->open_session(
+ wtperf->conn, NULL, opts->sess_config, &session)) != 0) {
+ lprintf(wtperf, ret, 0,
+ "Error opening a session on %s", wtperf->home);
return (NULL);
}
- for (cycle_count = 0; cfg->idle_cycle_run; ++cycle_count) {
- snprintf(uri, 512, "%s_cycle%07d", cfg->uris[0], cycle_count);
+ for (cycle_count = 0; wtperf->idle_cycle_run; ++cycle_count) {
+ snprintf(uri, sizeof(uri),
+ "%s_cycle%07d", wtperf->uris[0], cycle_count);
/* Don't busy cycle in this loop. */
__wt_sleep(1, 0);
/* Setup a start timer. */
- if ((ret = __wt_epoch(NULL, &start)) != 0) {
- lprintf(cfg, ret, 0,
- "Get time failed in cycle_idle_tables.");
- cfg->error = ret;
- return (NULL);
- }
+ __wt_epoch(NULL, &start);
/* Create a table. */
if ((ret = session->create(
- session, uri, cfg->table_config)) != 0) {
+ session, uri, opts->table_config)) != 0) {
if (ret == EBUSY)
continue;
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"Table create failed in cycle_idle_tables.");
- cfg->error = ret;
+ wtperf->error = true;
return (NULL);
}
- if (check_timing(cfg, "create", start, &stop) != 0)
+ if (check_timing(wtperf, "create", start, &stop) != 0)
return (NULL);
start = stop;
/* Open and close cursor. */
if ((ret = session->open_cursor(
session, uri, NULL, NULL, &cursor)) != 0) {
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"Cursor open failed in cycle_idle_tables.");
- cfg->error = ret;
+ wtperf->error = true;
return (NULL);
}
if ((ret = cursor->close(cursor)) != 0) {
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"Cursor close failed in cycle_idle_tables.");
- cfg->error = ret;
+ wtperf->error = true;
return (NULL);
}
- if (check_timing(cfg, "cursor", start, &stop) != 0)
+ if (check_timing(wtperf, "cursor", start, &stop) != 0)
return (NULL);
start = stop;
@@ -134,12 +129,12 @@ cycle_idle_tables(void *arg)
__wt_sleep(1, 0);
if (ret != 0 && ret != EBUSY) {
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"Table drop failed in cycle_idle_tables.");
- cfg->error = ret;
+ wtperf->error = true;
return (NULL);
}
- if (check_timing(cfg, "drop", start, &stop) != 0)
+ if (check_timing(wtperf, "drop", start, &stop) != 0)
return (NULL);
}
@@ -154,20 +149,23 @@ cycle_idle_tables(void *arg)
* initialization isn't necessary.
*/
int
-start_idle_table_cycle(CONFIG *cfg, pthread_t *idle_table_cycle_thread)
+start_idle_table_cycle(WTPERF *wtperf, pthread_t *idle_table_cycle_thread)
{
+ CONFIG_OPTS *opts;
pthread_t thread_id;
int ret;
- if (cfg->idle_table_cycle == 0)
+ opts = wtperf->opts;
+
+ if (opts->idle_table_cycle == 0)
return (0);
- cfg->idle_cycle_run = true;
+ wtperf->idle_cycle_run = true;
if ((ret = pthread_create(
- &thread_id, NULL, cycle_idle_tables, cfg)) != 0) {
- lprintf(
- cfg, ret, 0, "Error creating idle table cycle thread.");
- cfg->idle_cycle_run = false;
+ &thread_id, NULL, cycle_idle_tables, wtperf)) != 0) {
+ lprintf(wtperf,
+ ret, 0, "Error creating idle table cycle thread.");
+ wtperf->idle_cycle_run = false;
return (ret);
}
*idle_table_cycle_thread = thread_id;
@@ -176,17 +174,20 @@ start_idle_table_cycle(CONFIG *cfg, pthread_t *idle_table_cycle_thread)
}
int
-stop_idle_table_cycle(CONFIG *cfg, pthread_t idle_table_cycle_thread)
+stop_idle_table_cycle(WTPERF *wtperf, pthread_t idle_table_cycle_thread)
{
+ CONFIG_OPTS *opts;
int ret;
- if (cfg->idle_table_cycle == 0 || !cfg->idle_cycle_run)
+ opts = wtperf->opts;
+
+ if (opts->idle_table_cycle == 0 || !wtperf->idle_cycle_run)
return (0);
- cfg->idle_cycle_run = false;
+ wtperf->idle_cycle_run = false;
if ((ret = pthread_join(idle_table_cycle_thread, NULL)) != 0) {
lprintf(
- cfg, ret, 0, "Error joining idle table cycle thread.");
+ wtperf, ret, 0, "Error joining idle table cycle thread.");
return (ret);
}
return (0);
diff --git a/src/third_party/wiredtiger/bench/wtperf/misc.c b/src/third_party/wiredtiger/bench/wtperf/misc.c
index 2821216f240..24b3323a49a 100644
--- a/src/third_party/wiredtiger/bench/wtperf/misc.c
+++ b/src/third_party/wiredtiger/bench/wtperf/misc.c
@@ -30,31 +30,34 @@
/* Setup the logging output mechanism. */
int
-setup_log_file(CONFIG *cfg)
+setup_log_file(WTPERF *wtperf)
{
+ CONFIG_OPTS *opts;
+ size_t len;
int ret;
char *fname;
+ opts = wtperf->opts;
ret = 0;
- if (cfg->verbose < 1)
+ if (opts->verbose < 1)
return (0);
- fname = dcalloc(strlen(cfg->monitor_dir) +
- strlen(cfg->table_name) + strlen(".stat") + 2, 1);
-
- sprintf(fname, "%s/%s.stat", cfg->monitor_dir, cfg->table_name);
- cfg->logf = fopen(fname, "w");
- if (cfg->logf == NULL) {
+ len = strlen(wtperf->monitor_dir) +
+ strlen(opts->table_name) + strlen(".stat") + 2;
+ fname = dmalloc(len);
+ snprintf(fname, len,
+ "%s/%s.stat", wtperf->monitor_dir, opts->table_name);
+ if ((wtperf->logf = fopen(fname, "w")) == NULL) {
ret = errno;
fprintf(stderr, "%s: %s\n", fname, strerror(ret));
}
free(fname);
- if (cfg->logf == NULL)
+ if (wtperf->logf == NULL)
return (ret);
/* Use line buffering for the log file. */
- __wt_stream_set_line_buffer(cfg->logf);
+ __wt_stream_set_line_buffer(wtperf->logf);
return (0);
}
@@ -62,17 +65,20 @@ setup_log_file(CONFIG *cfg)
* Log printf - output a log message.
*/
void
-lprintf(const CONFIG *cfg, int err, uint32_t level, const char *fmt, ...)
+lprintf(const WTPERF *wtperf, int err, uint32_t level, const char *fmt, ...)
{
+ CONFIG_OPTS *opts;
va_list ap;
- if (err == 0 && level <= cfg->verbose) {
+ opts = wtperf->opts;
+
+ if (err == 0 && level <= opts->verbose) {
va_start(ap, fmt);
- vfprintf(cfg->logf, fmt, ap);
+ vfprintf(wtperf->logf, fmt, ap);
va_end(ap);
- fprintf(cfg->logf, "\n");
+ fprintf(wtperf->logf, "\n");
- if (level < cfg->verbose) {
+ if (level < opts->verbose) {
va_start(ap, fmt);
vprintf(fmt, ap);
va_end(ap);
@@ -87,11 +93,11 @@ lprintf(const CONFIG *cfg, int err, uint32_t level, const char *fmt, ...)
vfprintf(stderr, fmt, ap);
va_end(ap);
fprintf(stderr, " Error: %s\n", wiredtiger_strerror(err));
- if (cfg->logf != NULL) {
+ if (wtperf->logf != NULL) {
va_start(ap, fmt);
- vfprintf(cfg->logf, fmt, ap);
+ vfprintf(wtperf->logf, fmt, ap);
va_end(ap);
- fprintf(cfg->logf, " Error: %s\n", wiredtiger_strerror(err));
+ fprintf(wtperf->logf, " Error: %s\n", wiredtiger_strerror(err));
}
/* Never attempt to continue if we got a panic from WiredTiger. */
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/500m-btree-50r50u.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/500m-btree-50r50u.wtperf
index 06745bf7cca..536127f0dd8 100644
--- a/src/third_party/wiredtiger/bench/wtperf/runners/500m-btree-50r50u.wtperf
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/500m-btree-50r50u.wtperf
@@ -10,6 +10,9 @@ create=false
compression="snappy"
sess_config="isolation=snapshot"
table_count=2
+# close_conn as false allows this test to close/finish faster, but if running
+# as the set, the next test will need to run recovery.
+close_conn=false
key_sz=40
value_sz=120
max_latency=2000
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/500m-btree-80r20u.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/500m-btree-80r20u.wtperf
index 77edbfb4941..d6218c44af0 100644
--- a/src/third_party/wiredtiger/bench/wtperf/runners/500m-btree-80r20u.wtperf
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/500m-btree-80r20u.wtperf
@@ -8,6 +8,9 @@
conn_config="cache_size=16G,checkpoint=(wait=60,log_size=2GB),session_max=20000,log=(enabled),statistics=(fast),statistics_log=(wait=30,json),eviction=(threads_max=4)"
create=false
compression="snappy"
+# close_conn as false allows this test to close/finish faster, but if running
+# as the set, the next test will need to run recovery.
+close_conn=false
sess_config="isolation=snapshot
table_count=2
key_sz=40
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/checkpoint-stress.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/checkpoint-stress.wtperf
index 0c98a0c2db0..bbd3a3ba5ed 100644
--- a/src/third_party/wiredtiger/bench/wtperf/runners/checkpoint-stress.wtperf
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/checkpoint-stress.wtperf
@@ -4,6 +4,7 @@ conn_config="cache_size=16GB,eviction=(threads_max=4),log=(enabled=false)"
table_config="leaf_page_max=32k,internal_page_max=16k,allocation_size=4k,split_pct=90,type=file"
# Enough data to fill the cache. 150 million 1k records results in two ~11GB
# tables
+close_conn=false
icount=150000000
create=true
compression="snappy"
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-stress-multi.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-stress-multi.wtperf
index 9699b9ae3bb..a5a29f66fa0 100644
--- a/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-stress-multi.wtperf
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-stress-multi.wtperf
@@ -1,6 +1,7 @@
conn_config="cache_size=1G,eviction=(threads_max=4),session_max=2000"
table_config="type=file"
table_count=100
+close_conn=false
icount=100000000
report_interval=5
run_time=600
diff --git a/src/third_party/wiredtiger/bench/wtperf/track.c b/src/third_party/wiredtiger/bench/wtperf/track.c
index b3f4847d9d0..822bdaa4b4a 100644
--- a/src/third_party/wiredtiger/bench/wtperf/track.c
+++ b/src/third_party/wiredtiger/bench/wtperf/track.c
@@ -32,16 +32,18 @@
* Return total insert operations for the populate phase.
*/
uint64_t
-sum_pop_ops(CONFIG *cfg)
+sum_pop_ops(WTPERF *wtperf)
{
- CONFIG_THREAD *thread;
+ CONFIG_OPTS *opts;
+ WTPERF_THREAD *thread;
uint64_t total;
u_int i;
+ opts = wtperf->opts;
total = 0;
- for (i = 0, thread = cfg->popthreads;
- thread != NULL && i < cfg->populate_threads; ++i, ++thread)
+ for (i = 0, thread = wtperf->popthreads;
+ thread != NULL && i < opts->populate_threads; ++i, ++thread)
total += thread->insert.ops;
return (total);
}
@@ -50,16 +52,18 @@ sum_pop_ops(CONFIG *cfg)
* Return total checkpoint operations.
*/
uint64_t
-sum_ckpt_ops(CONFIG *cfg)
+sum_ckpt_ops(WTPERF *wtperf)
{
- CONFIG_THREAD *thread;
+ CONFIG_OPTS *opts;
+ WTPERF_THREAD *thread;
uint64_t total;
u_int i;
+ opts = wtperf->opts;
total = 0;
- for (i = 0, thread = cfg->ckptthreads;
- thread != NULL && i < cfg->checkpoint_threads; ++i, ++thread)
+ for (i = 0, thread = wtperf->ckptthreads;
+ thread != NULL && i < opts->checkpoint_threads; ++i, ++thread)
total += thread->ckpt.ops;
return (total);
}
@@ -68,19 +72,22 @@ sum_ckpt_ops(CONFIG *cfg)
* Return total operations count for the worker threads.
*/
static uint64_t
-sum_ops(CONFIG *cfg, size_t field_offset)
+sum_ops(WTPERF *wtperf, size_t field_offset)
{
- CONFIG_THREAD *thread;
+ CONFIG_OPTS *opts;
+ WTPERF_THREAD *thread;
uint64_t total;
int64_t i, th_cnt;
+ opts = wtperf->opts;
total = 0;
- if (cfg->popthreads == NULL) {
- thread = cfg->workers;
- th_cnt = cfg->workers_cnt;
+
+ if (wtperf->popthreads == NULL) {
+ thread = wtperf->workers;
+ th_cnt = wtperf->workers_cnt;
} else {
- thread = cfg->popthreads;
- th_cnt = cfg->populate_threads;
+ thread = wtperf->popthreads;
+ th_cnt = opts->populate_threads;
}
for (i = 0; thread != NULL && i < th_cnt; ++i, ++thread)
total += ((TRACK *)((uint8_t *)thread + field_offset))->ops;
@@ -88,24 +95,24 @@ sum_ops(CONFIG *cfg, size_t field_offset)
return (total);
}
uint64_t
-sum_insert_ops(CONFIG *cfg)
+sum_insert_ops(WTPERF *wtperf)
{
- return (sum_ops(cfg, offsetof(CONFIG_THREAD, insert)));
+ return (sum_ops(wtperf, offsetof(WTPERF_THREAD, insert)));
}
uint64_t
-sum_read_ops(CONFIG *cfg)
+sum_read_ops(WTPERF *wtperf)
{
- return (sum_ops(cfg, offsetof(CONFIG_THREAD, read)));
+ return (sum_ops(wtperf, offsetof(WTPERF_THREAD, read)));
}
uint64_t
-sum_truncate_ops(CONFIG *cfg)
+sum_truncate_ops(WTPERF *wtperf)
{
- return (sum_ops(cfg, offsetof(CONFIG_THREAD, truncate)));
+ return (sum_ops(wtperf, offsetof(WTPERF_THREAD, truncate)));
}
uint64_t
-sum_update_ops(CONFIG *cfg)
+sum_update_ops(WTPERF *wtperf)
{
- return (sum_ops(cfg, offsetof(CONFIG_THREAD, update)));
+ return (sum_ops(wtperf, offsetof(WTPERF_THREAD, update)));
}
/*
@@ -114,25 +121,27 @@ sum_update_ops(CONFIG *cfg)
* particular operation.
*/
static void
-latency_op(CONFIG *cfg,
+latency_op(WTPERF *wtperf,
size_t field_offset, uint32_t *avgp, uint32_t *minp, uint32_t *maxp)
{
- CONFIG_THREAD *thread;
+ CONFIG_OPTS *opts;
TRACK *track;
+ WTPERF_THREAD *thread;
uint64_t ops, latency, tmp;
int64_t i, th_cnt;
uint32_t max, min;
+ opts = wtperf->opts;
ops = latency = 0;
max = 0;
min = UINT32_MAX;
- if (cfg->popthreads == NULL) {
- thread = cfg->workers;
- th_cnt = cfg->workers_cnt;
+ if (wtperf->popthreads == NULL) {
+ thread = wtperf->workers;
+ th_cnt = wtperf->workers_cnt;
} else {
- thread = cfg->popthreads;
- th_cnt = cfg->populate_threads;
+ thread = wtperf->popthreads;
+ th_cnt = opts->populate_threads;
}
for (i = 0; thread != NULL && i < th_cnt; ++i, ++thread) {
track = (TRACK *)((uint8_t *)thread + field_offset);
@@ -160,11 +169,11 @@ latency_op(CONFIG *cfg,
}
}
void
-latency_read(CONFIG *cfg, uint32_t *avgp, uint32_t *minp, uint32_t *maxp)
+latency_read(WTPERF *wtperf, uint32_t *avgp, uint32_t *minp, uint32_t *maxp)
{
static uint32_t last_avg = 0, last_max = 0, last_min = 0;
- latency_op(cfg, offsetof(CONFIG_THREAD, read), avgp, minp, maxp);
+ latency_op(wtperf, offsetof(WTPERF_THREAD, read), avgp, minp, maxp);
/*
* If nothing happened, graph the average, minimum and maximum as they
@@ -181,11 +190,11 @@ latency_read(CONFIG *cfg, uint32_t *avgp, uint32_t *minp, uint32_t *maxp)
}
}
void
-latency_insert(CONFIG *cfg, uint32_t *avgp, uint32_t *minp, uint32_t *maxp)
+latency_insert(WTPERF *wtperf, uint32_t *avgp, uint32_t *minp, uint32_t *maxp)
{
static uint32_t last_avg = 0, last_max = 0, last_min = 0;
- latency_op(cfg, offsetof(CONFIG_THREAD, insert), avgp, minp, maxp);
+ latency_op(wtperf, offsetof(WTPERF_THREAD, insert), avgp, minp, maxp);
/*
* If nothing happened, graph the average, minimum and maximum as they
@@ -202,11 +211,11 @@ latency_insert(CONFIG *cfg, uint32_t *avgp, uint32_t *minp, uint32_t *maxp)
}
}
void
-latency_update(CONFIG *cfg, uint32_t *avgp, uint32_t *minp, uint32_t *maxp)
+latency_update(WTPERF *wtperf, uint32_t *avgp, uint32_t *minp, uint32_t *maxp)
{
static uint32_t last_avg = 0, last_max = 0, last_min = 0;
- latency_op(cfg, offsetof(CONFIG_THREAD, update), avgp, minp, maxp);
+ latency_op(wtperf, offsetof(WTPERF_THREAD, update), avgp, minp, maxp);
/*
* If nothing happened, graph the average, minimum and maximum as they
@@ -228,17 +237,17 @@ latency_update(CONFIG *cfg, uint32_t *avgp, uint32_t *minp, uint32_t *maxp)
* Sum latency for a set of threads.
*/
static void
-sum_latency(CONFIG *cfg, size_t field_offset, TRACK *total)
+sum_latency(WTPERF *wtperf, size_t field_offset, TRACK *total)
{
- CONFIG_THREAD *thread;
+ WTPERF_THREAD *thread;
TRACK *trk;
int64_t i;
u_int j;
memset(total, 0, sizeof(*total));
- for (i = 0, thread = cfg->workers;
- thread != NULL && i < cfg->workers_cnt; ++i, ++thread) {
+ for (i = 0, thread = wtperf->workers;
+ thread != NULL && i < wtperf->workers_cnt; ++i, ++thread) {
trk = (TRACK *)((uint8_t *)thread + field_offset);
for (j = 0; j < ELEMENTS(trk->us); ++j) {
@@ -256,32 +265,33 @@ sum_latency(CONFIG *cfg, size_t field_offset, TRACK *total)
}
}
static void
-sum_insert_latency(CONFIG *cfg, TRACK *total)
+sum_insert_latency(WTPERF *wtperf, TRACK *total)
{
- sum_latency(cfg, offsetof(CONFIG_THREAD, insert), total);
+ sum_latency(wtperf, offsetof(WTPERF_THREAD, insert), total);
}
static void
-sum_read_latency(CONFIG *cfg, TRACK *total)
+sum_read_latency(WTPERF *wtperf, TRACK *total)
{
- sum_latency(cfg, offsetof(CONFIG_THREAD, read), total);
+ sum_latency(wtperf, offsetof(WTPERF_THREAD, read), total);
}
static void
-sum_update_latency(CONFIG *cfg, TRACK *total)
+sum_update_latency(WTPERF *wtperf, TRACK *total)
{
- sum_latency(cfg, offsetof(CONFIG_THREAD, update), total);
+ sum_latency(wtperf, offsetof(WTPERF_THREAD, update), total);
}
static void
-latency_print_single(CONFIG *cfg, TRACK *total, const char *name)
+latency_print_single(WTPERF *wtperf, TRACK *total, const char *name)
{
FILE *fp;
u_int i;
uint64_t cumops;
char path[1024];
- snprintf(path, sizeof(path), "%s/latency.%s", cfg->monitor_dir, name);
+ snprintf(path, sizeof(path),
+ "%s/latency.%s", wtperf->monitor_dir, name);
if ((fp = fopen(path, "w")) == NULL) {
- lprintf(cfg, errno, 0, "%s", path);
+ lprintf(wtperf, errno, 0, "%s", path);
return;
}
@@ -317,14 +327,14 @@ latency_print_single(CONFIG *cfg, TRACK *total, const char *name)
}
void
-latency_print(CONFIG *cfg)
+latency_print(WTPERF *wtperf)
{
TRACK total;
- sum_insert_latency(cfg, &total);
- latency_print_single(cfg, &total, "insert");
- sum_read_latency(cfg, &total);
- latency_print_single(cfg, &total, "read");
- sum_update_latency(cfg, &total);
- latency_print_single(cfg, &total, "update");
+ sum_insert_latency(wtperf, &total);
+ latency_print_single(wtperf, &total, "insert");
+ sum_read_latency(wtperf, &total);
+ latency_print_single(wtperf, &total, "read");
+ sum_update_latency(wtperf, &total);
+ latency_print_single(wtperf, &total, "update");
}
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.c b/src/third_party/wiredtiger/bench/wtperf/wtperf.c
index bf6b156bb69..8c7f0053388 100644
--- a/src/third_party/wiredtiger/bench/wtperf/wtperf.c
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.c
@@ -31,78 +31,38 @@
/* Default values. */
#define DEFAULT_HOME "WT_TEST"
#define DEFAULT_MONITOR_DIR "WT_TEST"
-static const CONFIG default_cfg = {
- NULL, /* home */
- NULL, /* monitor dir */
- NULL, /* partial logging */
- NULL, /* reopen config */
- NULL, /* base_uri */
- NULL, /* log_table_uri */
- NULL, /* uris */
- NULL, /* conn */
- NULL, /* logf */
- NULL, /* async */
- NULL, NULL, /* compressor ext, blk */
- NULL, NULL, /* populate, checkpoint threads */
-
- NULL, /* worker threads */
- 0, /* worker thread count */
- NULL, /* workloads */
- 0, /* workload count */
- 0, /* use_asyncops */
- 0, /* checkpoint operations */
- 0, /* insert operations */
- 0, /* read operations */
- 0, /* truncate operations */
- 0, /* update operations */
- 0, /* insert key */
- 0, /* log like table key */
- 0, /* checkpoint in progress */
- 0, /* thread error */
- 0, /* notify threads to stop */
- 0, /* in warmup phase */
- false, /* Signal for idle cycle thread */
- 0, /* total seconds running */
- 0, /* flags */
- {NULL, NULL}, /* the truncate queue */
- {NULL, NULL}, /* the config queue */
-
-#define OPT_DEFINE_DEFAULT
-#include "wtperf_opt.i"
-#undef OPT_DEFINE_DEFAULT
-};
static const char * const debug_cconfig = "";
static const char * const debug_tconfig = "";
static void *checkpoint_worker(void *);
-static int drop_all_tables(CONFIG *);
-static int execute_populate(CONFIG *);
-static int execute_workload(CONFIG *);
-static int find_table_count(CONFIG *);
+static int drop_all_tables(WTPERF *);
+static int execute_populate(WTPERF *);
+static int execute_workload(WTPERF *);
+static int find_table_count(WTPERF *);
static void *monitor(void *);
static void *populate_thread(void *);
-static void randomize_value(CONFIG_THREAD *, char *);
+static void randomize_value(WTPERF_THREAD *, char *);
static void recreate_dir(const char *);
-static int start_all_runs(CONFIG *);
-static int start_run(CONFIG *);
-static int start_threads(CONFIG *,
- WORKLOAD *, CONFIG_THREAD *, u_int, void *(*)(void *));
-static int stop_threads(CONFIG *, u_int, CONFIG_THREAD *);
+static int start_all_runs(WTPERF *);
+static int start_run(WTPERF *);
+static int start_threads(WTPERF *,
+ WORKLOAD *, WTPERF_THREAD *, u_int, void *(*)(void *));
+static int stop_threads(WTPERF *, u_int, WTPERF_THREAD *);
static void *thread_run_wtperf(void *);
-static void update_value_delta(CONFIG_THREAD *);
+static void update_value_delta(WTPERF_THREAD *);
static void *worker(void *);
-static uint64_t wtperf_rand(CONFIG_THREAD *);
-static uint64_t wtperf_value_range(CONFIG *);
+static uint64_t wtperf_rand(WTPERF_THREAD *);
+static uint64_t wtperf_value_range(WTPERF *);
-#define INDEX_COL_NAMES ",columns=(key,val)"
+#define INDEX_COL_NAMES "columns=(key,val)"
/* Retrieve an ID for the next insert operation. */
static inline uint64_t
-get_next_incr(CONFIG *cfg)
+get_next_incr(WTPERF *wtperf)
{
- return (__wt_atomic_add64(&cfg->insert_key, 1));
+ return (__wt_atomic_add64(&wtperf->insert_key, 1));
}
/*
@@ -110,11 +70,14 @@ get_next_incr(CONFIG *cfg)
* other element in the value buffer.
*/
static void
-randomize_value(CONFIG_THREAD *thread, char *value_buf)
+randomize_value(WTPERF_THREAD *thread, char *value_buf)
{
+ CONFIG_OPTS *opts;
uint8_t *vb;
uint32_t i, max_range, rand_val;
+ opts = thread->wtperf->opts;
+
/*
* Limit how much of the buffer we validate for length, this means
* that only threads that do growing updates will ever make changes to
@@ -123,11 +86,11 @@ randomize_value(CONFIG_THREAD *thread, char *value_buf)
* in this performance sensitive function.
*/
if (thread->workload == NULL || thread->workload->update_delta == 0)
- max_range = thread->cfg->value_sz;
+ max_range = opts->value_sz;
else if (thread->workload->update_delta > 0)
- max_range = thread->cfg->value_sz_max;
+ max_range = opts->value_sz_max;
else
- max_range = thread->cfg->value_sz_min;
+ max_range = opts->value_sz_min;
/*
* Generate a single random value and re-use it. We generally only
@@ -157,17 +120,17 @@ randomize_value(CONFIG_THREAD *thread, char *value_buf)
* Partition data by key ranges.
*/
static uint32_t
-map_key_to_table(CONFIG *cfg, uint64_t k)
+map_key_to_table(CONFIG_OPTS *opts, uint64_t k)
{
- if (cfg->range_partition) {
+ if (opts->range_partition) {
/* Take care to return a result in [0..table_count-1]. */
- if (k > cfg->icount + cfg->random_range)
+ if (k > opts->icount + opts->random_range)
return (0);
return ((uint32_t)((k - 1) /
- ((cfg->icount + cfg->random_range + cfg->table_count - 1) /
- cfg->table_count)));
+ ((opts->icount + opts->random_range +
+ opts->table_count - 1) / opts->table_count)));
} else
- return ((uint32_t)(k % cfg->table_count));
+ return ((uint32_t)(k % opts->table_count));
}
/*
@@ -176,26 +139,28 @@ map_key_to_table(CONFIG *cfg, uint64_t k)
* scratch buffer.
*/
static inline void
-update_value_delta(CONFIG_THREAD *thread)
+update_value_delta(WTPERF_THREAD *thread)
{
- CONFIG *cfg;
+ CONFIG_OPTS *opts;
+ WTPERF *wtperf;
char * value;
int64_t delta, len, new_len;
- cfg = thread->cfg;
+ wtperf = thread->wtperf;
+ opts = wtperf->opts;
value = thread->value_buf;
delta = thread->workload->update_delta;
len = (int64_t)strlen(value);
if (delta == INT64_MAX)
delta = __wt_random(&thread->rnd) %
- (cfg->value_sz_max - cfg->value_sz);
+ (opts->value_sz_max - opts->value_sz);
/* Ensure we aren't changing across boundaries */
- if (delta > 0 && len + delta > cfg->value_sz_max)
- delta = cfg->value_sz_max - len;
- else if (delta < 0 && len + delta < cfg->value_sz_min)
- delta = cfg->value_sz_min - len;
+ if (delta > 0 && len + delta > opts->value_sz_max)
+ delta = opts->value_sz_max - len;
+ else if (delta < 0 && len + delta < opts->value_sz_min)
+ delta = opts->value_sz_min - len;
/* Bail if there isn't anything to do */
if (delta == 0)
@@ -206,7 +171,7 @@ update_value_delta(CONFIG_THREAD *thread)
else {
/* Extend the value by the configured amount. */
for (new_len = len;
- new_len < cfg->value_sz_max && new_len - len < delta;
+ new_len < opts->value_sz_max && new_len - len < delta;
new_len++)
value[new_len] = 'a';
}
@@ -215,24 +180,24 @@ update_value_delta(CONFIG_THREAD *thread)
static int
cb_asyncop(WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP *op, int ret, uint32_t flags)
{
- CONFIG *cfg;
- CONFIG_THREAD *thread;
TRACK *trk;
+ WTPERF *wtperf;
+ WTPERF_THREAD *thread;
WT_ASYNC_OPTYPE type;
- char *value;
uint32_t *tables;
int t_ret;
+ char *value;
(void)cb;
(void)flags;
- cfg = NULL; /* -Wconditional-uninitialized */
+ wtperf = NULL; /* -Wconditional-uninitialized */
thread = NULL; /* -Wconditional-uninitialized */
type = op->get_type(op);
if (type != WT_AOP_COMPACT) {
- thread = (CONFIG_THREAD *)op->app_private;
- cfg = thread->cfg;
+ thread = (WTPERF_THREAD *)op->app_private;
+ wtperf = thread->wtperf;
}
trk = NULL;
@@ -249,7 +214,7 @@ cb_asyncop(WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP *op, int ret, uint32_t flags)
if (ret == 0 &&
(t_ret = op->get_value(op, &value)) != 0) {
ret = t_ret;
- lprintf(cfg, ret, 0, "get_value in read.");
+ lprintf(wtperf, ret, 0, "get_value in read.");
goto err;
}
break;
@@ -259,7 +224,8 @@ cb_asyncop(WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP *op, int ret, uint32_t flags)
case WT_AOP_NONE:
case WT_AOP_REMOVE:
/* We never expect this type. */
- lprintf(cfg, ret, 0, "No type in op %" PRIu64, op->get_id(op));
+ lprintf(wtperf,
+ ret, 0, "No type in op %" PRIu64, op->get_id(op));
goto err;
}
@@ -273,15 +239,14 @@ cb_asyncop(WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP *op, int ret, uint32_t flags)
if (type == WT_AOP_COMPACT)
return (0);
if (ret == 0 || (ret == WT_NOTFOUND && type != WT_AOP_INSERT)) {
- if (!cfg->in_warmup)
+ if (!wtperf->in_warmup)
(void)__wt_atomic_add64(&trk->ops, 1);
return (0);
}
err:
/* Panic if error */
- lprintf(cfg, ret, 0, "Error in op %" PRIu64,
- op->get_id(op));
- cfg->error = cfg->stop = 1;
+ lprintf(wtperf, ret, 0, "Error in op %" PRIu64, op->get_id(op));
+ wtperf->error = wtperf->stop = true;
return (1);
}
@@ -353,8 +318,9 @@ op_name(uint8_t *op)
static void *
worker_async(void *arg)
{
- CONFIG *cfg;
- CONFIG_THREAD *thread;
+ CONFIG_OPTS *opts;
+ WTPERF *wtperf;
+ WTPERF_THREAD *thread;
WT_ASYNC_OP *asyncop;
WT_CONNECTION *conn;
uint64_t next_val;
@@ -362,9 +328,10 @@ worker_async(void *arg)
int ret;
char *key_buf, *value_buf;
- thread = (CONFIG_THREAD *)arg;
- cfg = thread->cfg;
- conn = cfg->conn;
+ thread = (WTPERF_THREAD *)arg;
+ wtperf = thread->wtperf;
+ opts = wtperf->opts;
+ conn = wtperf->conn;
key_buf = thread->key_buf;
value_buf = thread->value_buf;
@@ -372,7 +339,7 @@ worker_async(void *arg)
op = thread->workload->ops;
op_end = op + sizeof(thread->workload->ops);
- while (!cfg->stop) {
+ while (!wtperf->stop) {
/*
* Generate the next key and setup operation specific
* statistics tracking objects.
@@ -380,10 +347,10 @@ worker_async(void *arg)
switch (*op) {
case WORKER_INSERT:
case WORKER_INSERT_RMW:
- if (cfg->random_range)
+ if (opts->random_range)
next_val = wtperf_rand(thread);
else
- next_val = cfg->icount + get_next_incr(cfg);
+ next_val = opts->icount + get_next_incr(wtperf);
break;
case WORKER_READ:
case WORKER_UPDATE:
@@ -394,22 +361,22 @@ worker_async(void *arg)
* we rely on at least one insert to get a valid item
* id.
*/
- if (wtperf_value_range(cfg) < next_val)
+ if (wtperf_value_range(wtperf) < next_val)
continue;
break;
default:
goto err; /* can't happen */
}
- generate_key(cfg, key_buf, next_val);
+ generate_key(opts, key_buf, next_val);
/*
* Spread the data out around the multiple databases.
* Sleep to allow workers a chance to run and process async ops.
* Then retry to get an async op.
*/
- while ((ret = conn->async_new_op(
- conn, cfg->uris[map_key_to_table(cfg, next_val)],
+ while ((ret = conn->async_new_op(conn,
+ wtperf->uris[map_key_to_table(wtperf->opts, next_val)],
NULL, &cb, &asyncop)) == EBUSY)
(void)usleep(10000);
if (ret != 0)
@@ -424,23 +391,23 @@ worker_async(void *arg)
break;
goto op_err;
case WORKER_INSERT:
- if (cfg->random_value)
+ if (opts->random_value)
randomize_value(thread, value_buf);
asyncop->set_value(asyncop, value_buf);
if ((ret = asyncop->insert(asyncop)) == 0)
break;
goto op_err;
case WORKER_UPDATE:
- if (cfg->random_value)
+ if (opts->random_value)
randomize_value(thread, value_buf);
asyncop->set_value(asyncop, value_buf);
if ((ret = asyncop->update(asyncop)) == 0)
break;
goto op_err;
default:
-op_err: lprintf(cfg, ret, 0,
+op_err: lprintf(wtperf, ret, 0,
"%s failed for: %s, range: %"PRIu64,
- op_name(op), key_buf, wtperf_value_range(cfg));
+ op_name(op), key_buf, wtperf_value_range(wtperf));
goto err; /* can't happen */
}
@@ -454,7 +421,7 @@ op_err: lprintf(cfg, ret, 0,
/* Notify our caller we failed and shut the system down. */
if (0) {
-err: cfg->error = cfg->stop = 1;
+err: wtperf->error = wtperf->stop = true;
}
return (NULL);
}
@@ -465,17 +432,19 @@ err: cfg->error = cfg->stop = 1;
* search do them. Ensuring the keys we see are always in order.
*/
static int
-do_range_reads(CONFIG *cfg, WT_CURSOR *cursor)
+do_range_reads(WTPERF *wtperf, WT_CURSOR *cursor)
{
+ CONFIG_OPTS *opts;
size_t range;
uint64_t next_val, prev_val;
char *range_key_buf;
char buf[512];
int ret;
+ opts = wtperf->opts;
ret = 0;
- if (cfg->read_range == 0)
+ if (opts->read_range == 0)
return (0);
memset(&buf[0], 0, 512 * sizeof(char));
@@ -485,7 +454,7 @@ do_range_reads(CONFIG *cfg, WT_CURSOR *cursor)
testutil_check(cursor->get_key(cursor, &range_key_buf));
extract_key(range_key_buf, &next_val);
- for (range = 0; range < cfg->read_range; ++range) {
+ for (range = 0; range < opts->read_range; ++range) {
prev_val = next_val;
ret = cursor->next(cursor);
/* We are done if we reach the end. */
@@ -496,7 +465,7 @@ do_range_reads(CONFIG *cfg, WT_CURSOR *cursor)
testutil_check(cursor->get_key(cursor, &range_key_buf));
extract_key(range_key_buf, &next_val);
if (next_val < prev_val) {
- lprintf(cfg, EINVAL, 0,
+ lprintf(wtperf, EINVAL, 0,
"Out of order keys %" PRIu64
" came before %" PRIu64,
prev_val, next_val);
@@ -510,9 +479,10 @@ static void *
worker(void *arg)
{
struct timespec start, stop;
- CONFIG *cfg;
- CONFIG_THREAD *thread;
+ CONFIG_OPTS *opts;
TRACK *trk;
+ WTPERF *wtperf;
+ WTPERF_THREAD *thread;
WT_CONNECTION *conn;
WT_CURSOR **cursors, *cursor, *log_table_cursor, *tmp_cursor;
WT_SESSION *session;
@@ -524,9 +494,10 @@ worker(void *arg)
char *value_buf, *key_buf, *value;
char buf[512];
- thread = (CONFIG_THREAD *)arg;
- cfg = thread->cfg;
- conn = cfg->conn;
+ thread = (WTPERF_THREAD *)arg;
+ wtperf = thread->wtperf;
+ opts = wtperf->opts;
+ conn = wtperf->conn;
cursors = NULL;
log_table_cursor = NULL; /* -Wconditional-initialized */
ops = 0;
@@ -535,42 +506,40 @@ worker(void *arg)
trk = NULL;
if ((ret = conn->open_session(
- conn, NULL, cfg->sess_config, &session)) != 0) {
- lprintf(cfg, ret, 0, "worker: WT_CONNECTION.open_session");
+ conn, NULL, opts->sess_config, &session)) != 0) {
+ lprintf(wtperf, ret, 0, "worker: WT_CONNECTION.open_session");
goto err;
}
- cursors = dcalloc(cfg->table_count, sizeof(WT_CURSOR *));
- for (i = 0; i < cfg->table_count_idle; i++) {
- snprintf(buf, 512, "%s_idle%05d", cfg->uris[0], (int)i);
+ cursors = dcalloc(opts->table_count, sizeof(WT_CURSOR *));
+ for (i = 0; i < opts->table_count_idle; i++) {
+ snprintf(buf, 512, "%s_idle%05d", wtperf->uris[0], (int)i);
if ((ret = session->open_cursor(
session, buf, NULL, NULL, &tmp_cursor)) != 0) {
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"Error opening idle table %s", buf);
goto err;
}
if ((ret = tmp_cursor->close(tmp_cursor)) != 0) {
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"Error closing idle table %s", buf);
goto err;
}
}
- for (i = 0; i < cfg->table_count; i++) {
+ for (i = 0; i < opts->table_count; i++) {
if ((ret = session->open_cursor(session,
- cfg->uris[i], NULL, NULL, &cursors[i])) != 0) {
- lprintf(cfg, ret, 0,
+ wtperf->uris[i], NULL, NULL, &cursors[i])) != 0) {
+ lprintf(wtperf, ret, 0,
"worker: WT_SESSION.open_cursor: %s",
- cfg->uris[i]);
+ wtperf->uris[i]);
goto err;
}
}
- if (cfg->log_like_table) {
- if ((ret = session->open_cursor(session,
- cfg->log_table_uri, NULL, NULL, &log_table_cursor)) != 0) {
- lprintf(cfg, ret, 0,
- "worker: WT_SESSION.open_cursor: %s",
- cfg->log_table_uri);
- goto err;
- }
+ if (opts->log_like_table && (ret = session->open_cursor(session,
+ wtperf->log_table_uri, NULL, NULL, &log_table_cursor)) != 0) {
+ lprintf(wtperf, ret, 0,
+ "worker: WT_SESSION.open_cursor: %s",
+ wtperf->log_table_uri);
+ goto err;
}
/* Setup the timer for throttling. */
@@ -579,7 +548,7 @@ worker(void *arg)
/* Setup for truncate */
if (thread->workload->truncate != 0)
- if ((ret = setup_truncate(cfg, thread, session)) != 0)
+ if ((ret = setup_truncate(wtperf, thread, session)) != 0)
goto err;
key_buf = thread->key_buf;
@@ -588,13 +557,13 @@ worker(void *arg)
op = thread->workload->ops;
op_end = op + sizeof(thread->workload->ops);
- if ((ops_per_txn != 0 || cfg->log_like_table) &&
+ if ((ops_per_txn != 0 || opts->log_like_table) &&
(ret = session->begin_transaction(session, NULL)) != 0) {
- lprintf(cfg, ret, 0, "First transaction begin failed");
+ lprintf(wtperf, ret, 0, "First transaction begin failed");
goto err;
}
- while (!cfg->stop) {
+ while (!wtperf->stop) {
/*
* Generate the next key and setup operation specific
* statistics tracking objects.
@@ -603,10 +572,10 @@ worker(void *arg)
case WORKER_INSERT:
case WORKER_INSERT_RMW:
trk = &thread->insert;
- if (cfg->random_range)
+ if (opts->random_range)
next_val = wtperf_rand(thread);
else
- next_val = cfg->icount + get_next_incr(cfg);
+ next_val = opts->icount + get_next_incr(wtperf);
break;
case WORKER_READ:
trk = &thread->read;
@@ -621,7 +590,7 @@ worker(void *arg)
* we rely on at least one insert to get a valid item
* id.
*/
- if (wtperf_value_range(cfg) < next_val)
+ if (wtperf_value_range(wtperf) < next_val)
continue;
break;
case WORKER_TRUNCATE:
@@ -632,24 +601,22 @@ worker(void *arg)
goto err; /* can't happen */
}
- generate_key(cfg, key_buf, next_val);
+ generate_key(opts, key_buf, next_val);
/*
* Spread the data out around the multiple databases.
*/
- cursor = cursors[map_key_to_table(cfg, next_val)];
+ cursor = cursors[map_key_to_table(wtperf->opts, next_val)];
/*
* Skip the first time we do an operation, when trk->ops
* is 0, to avoid first time latency spikes.
*/
measure_latency =
- cfg->sample_interval != 0 && trk != NULL &&
- trk->ops != 0 && (trk->ops % cfg->sample_rate == 0);
- if (measure_latency && (ret = __wt_epoch(NULL, &start)) != 0) {
- lprintf(cfg, ret, 0, "Get time call failed");
- goto err;
- }
+ opts->sample_interval != 0 && trk != NULL &&
+ trk->ops != 0 && (trk->ops % opts->sample_rate == 0);
+ if (measure_latency)
+ __wt_epoch(NULL, &start);
cursor->set_key(cursor, key_buf);
@@ -666,7 +633,7 @@ worker(void *arg)
if (ret == 0) {
if ((ret = cursor->get_value(
cursor, &value)) != 0) {
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"get_value in read.");
goto err;
}
@@ -675,7 +642,7 @@ worker(void *arg)
* for several operations, confirming that the
* next key is in the correct order.
*/
- ret = do_range_reads(cfg, cursor);
+ ret = do_range_reads(wtperf, cursor);
}
if (ret == 0 || ret == WT_NOTFOUND)
@@ -690,15 +657,15 @@ worker(void *arg)
/* FALLTHROUGH */
case WORKER_INSERT:
- if (cfg->random_value)
+ if (opts->random_value)
randomize_value(thread, value_buf);
cursor->set_value(cursor, value_buf);
if ((ret = cursor->insert(cursor)) == 0)
break;
goto op_err;
case WORKER_TRUNCATE:
- if ((ret = run_truncate(
- cfg, thread, cursor, session, &truncated)) == 0) {
+ if ((ret = run_truncate(wtperf,
+ thread, cursor, session, &truncated)) == 0) {
if (truncated)
trk = &thread->truncate;
else
@@ -712,7 +679,7 @@ worker(void *arg)
if ((ret = cursor->search(cursor)) == 0) {
if ((ret = cursor->get_value(
cursor, &value)) != 0) {
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"get_value in update.");
goto err;
}
@@ -721,14 +688,14 @@ worker(void *arg)
* safe, and be sure to NUL-terminate.
*/
strncpy(value_buf,
- value, cfg->value_sz_max - 1);
+ value, opts->value_sz_max - 1);
if (thread->workload->update_delta != 0)
update_value_delta(thread);
if (value_buf[0] == 'a')
value_buf[0] = 'b';
else
value_buf[0] = 'a';
- if (cfg->random_value)
+ if (opts->random_value)
randomize_value(thread, value_buf);
cursor->set_value(cursor, value_buf);
if ((ret = cursor->update(cursor)) == 0)
@@ -756,62 +723,59 @@ op_err: if (ret == WT_ROLLBACK && ops_per_txn != 0) {
* order in cases of ordered inserts, as we
* aren't retrying here.
*/
- lprintf(cfg, ret, 1,
+ lprintf(wtperf, ret, 1,
"%s for: %s, range: %"PRIu64, op_name(op),
- key_buf, wtperf_value_range(cfg));
+ key_buf, wtperf_value_range(wtperf));
if ((ret = session->rollback_transaction(
session, NULL)) != 0) {
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"Failed rollback_transaction");
goto err;
}
if ((ret = session->begin_transaction(
session, NULL)) != 0) {
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"Worker begin transaction failed");
goto err;
}
break;
}
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"%s failed for: %s, range: %"PRIu64,
- op_name(op), key_buf, wtperf_value_range(cfg));
+ op_name(op), key_buf, wtperf_value_range(wtperf));
goto err;
default:
goto err; /* can't happen */
}
/* Update the log-like table. */
- if (cfg->log_like_table &&
+ if (opts->log_like_table &&
(*op != WORKER_READ && *op != WORKER_TRUNCATE)) {
- log_id = __wt_atomic_add64(&cfg->log_like_table_key, 1);
+ log_id =
+ __wt_atomic_add64(&wtperf->log_like_table_key, 1);
log_table_cursor->set_key(log_table_cursor, log_id);
log_table_cursor->set_value(
log_table_cursor, value_buf);
if ((ret =
log_table_cursor->insert(log_table_cursor)) != 0) {
- lprintf(cfg, ret, 0, "Cursor insert failed");
+ lprintf(wtperf, ret, 0, "Cursor insert failed");
goto err;
}
}
/* Release the cursor, if we have multiple tables. */
- if (cfg->table_count > 1 && ret == 0 &&
+ if (opts->table_count > 1 && ret == 0 &&
*op != WORKER_INSERT && *op != WORKER_INSERT_RMW) {
if ((ret = cursor->reset(cursor)) != 0) {
- lprintf(cfg, ret, 0, "Cursor reset failed");
+ lprintf(wtperf, ret, 0, "Cursor reset failed");
goto err;
}
}
/* Gather statistics */
- if (!cfg->in_warmup) {
+ if (!wtperf->in_warmup) {
if (measure_latency) {
- if ((ret = __wt_epoch(NULL, &stop)) != 0) {
- lprintf(cfg, ret, 0,
- "Get time call failed");
- goto err;
- }
+ __wt_epoch(NULL, &stop);
++trk->latency_ops;
usecs = WT_TIMEDIFF_US(stop, start);
track_operation(trk, usecs);
@@ -824,17 +788,17 @@ op_err: if (ret == WT_ROLLBACK && ops_per_txn != 0) {
* Commit the transaction if grouping operations together
* or tracking changes in our log table.
*/
- if ((cfg->log_like_table && ops_per_txn == 0) ||
+ if ((opts->log_like_table && ops_per_txn == 0) ||
(ops_per_txn != 0 && ops++ % ops_per_txn == 0)) {
if ((ret = session->commit_transaction(
session, NULL)) != 0) {
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"Worker transaction commit failed");
goto err;
}
if ((ret = session->begin_transaction(
session, NULL)) != 0) {
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"Worker begin transaction failed");
goto err;
}
@@ -854,13 +818,13 @@ op_err: if (ret == WT_ROLLBACK && ops_per_txn != 0) {
}
if ((ret = session->close(session, NULL)) != 0) {
- lprintf(cfg, ret, 0, "Session close in worker failed");
+ lprintf(wtperf, ret, 0, "Session close in worker failed");
goto err;
}
/* Notify our caller we failed and shut the system down. */
if (0) {
-err: cfg->error = cfg->stop = 1;
+err: wtperf->error = wtperf->stop = true;
}
free(cursors);
@@ -913,14 +877,17 @@ run_mix_schedule_op(WORKLOAD *workp, int op, int64_t op_cnt)
* Schedule the mixed-run operations.
*/
static int
-run_mix_schedule(CONFIG *cfg, WORKLOAD *workp)
+run_mix_schedule(WTPERF *wtperf, WORKLOAD *workp)
{
+ CONFIG_OPTS *opts;
int64_t pct;
+ opts = wtperf->opts;
+
/* Confirm reads, inserts, truncates and updates cannot all be zero. */
if (workp->insert == 0 && workp->read == 0 &&
workp->truncate == 0 && workp->update == 0) {
- lprintf(cfg, EINVAL, 0, "no operations scheduled");
+ lprintf(wtperf, EINVAL, 0, "no operations scheduled");
return (EINVAL);
}
@@ -931,7 +898,7 @@ run_mix_schedule(CONFIG *cfg, WORKLOAD *workp)
if (workp->truncate != 0) {
if (workp->insert != 0 ||
workp->read != 0 || workp->update != 0) {
- lprintf(cfg, EINVAL, 0,
+ lprintf(wtperf, EINVAL, 0,
"Can't configure truncate in a mixed workload");
return (EINVAL);
}
@@ -947,7 +914,7 @@ run_mix_schedule(CONFIG *cfg, WORKLOAD *workp)
*/
if (workp->insert != 0 && workp->read == 0 && workp->update == 0) {
memset(workp->ops,
- cfg->insert_rmw ? WORKER_INSERT_RMW : WORKER_INSERT,
+ opts->insert_rmw ? WORKER_INSERT_RMW : WORKER_INSERT,
sizeof(workp->ops));
return (0);
}
@@ -979,7 +946,7 @@ run_mix_schedule(CONFIG *cfg, WORKLOAD *workp)
(workp->insert + workp->read + workp->update);
if (pct != 0)
run_mix_schedule_op(workp,
- cfg->insert_rmw ? WORKER_INSERT_RMW : WORKER_INSERT, pct);
+ opts->insert_rmw ? WORKER_INSERT_RMW : WORKER_INSERT, pct);
pct = (workp->update * 100) /
(workp->insert + workp->read + workp->update);
if (pct != 0)
@@ -991,9 +958,10 @@ static void *
populate_thread(void *arg)
{
struct timespec start, stop;
- CONFIG *cfg;
- CONFIG_THREAD *thread;
+ CONFIG_OPTS *opts;
TRACK *trk;
+ WTPERF *wtperf;
+ WTPERF_THREAD *thread;
WT_CONNECTION *conn;
WT_CURSOR **cursors, *cursor;
WT_SESSION *session;
@@ -1004,9 +972,10 @@ populate_thread(void *arg)
char *value_buf, *key_buf;
const char *cursor_config;
- thread = (CONFIG_THREAD *)arg;
- cfg = thread->cfg;
- conn = cfg->conn;
+ thread = (WTPERF_THREAD *)arg;
+ wtperf = thread->wtperf;
+ opts = wtperf->opts;
+ conn = wtperf->conn;
session = NULL;
cursors = NULL;
ret = stress_checkpoint_due = 0;
@@ -1016,37 +985,37 @@ populate_thread(void *arg)
value_buf = thread->value_buf;
if ((ret = conn->open_session(
- conn, NULL, cfg->sess_config, &session)) != 0) {
- lprintf(cfg, ret, 0, "populate: WT_CONNECTION.open_session");
+ conn, NULL, opts->sess_config, &session)) != 0) {
+ lprintf(wtperf, ret, 0, "populate: WT_CONNECTION.open_session");
goto err;
}
/* Do bulk loads if populate is single-threaded. */
cursor_config =
- (cfg->populate_threads == 1 && !cfg->index) ? "bulk" : NULL;
+ (opts->populate_threads == 1 && !opts->index) ? "bulk" : NULL;
/* Create the cursors. */
- cursors = dcalloc(cfg->table_count, sizeof(WT_CURSOR *));
- for (i = 0; i < cfg->table_count; i++) {
+ cursors = dcalloc(opts->table_count, sizeof(WT_CURSOR *));
+ for (i = 0; i < opts->table_count; i++) {
if ((ret = session->open_cursor(
- session, cfg->uris[i], NULL,
+ session, wtperf->uris[i], NULL,
cursor_config, &cursors[i])) != 0) {
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"populate: WT_SESSION.open_cursor: %s",
- cfg->uris[i]);
+ wtperf->uris[i]);
goto err;
}
}
/* Populate the databases. */
for (intxn = 0, opcount = 0;;) {
- op = get_next_incr(cfg);
- if (op > cfg->icount)
+ op = get_next_incr(wtperf);
+ if (op > opts->icount)
break;
- if (cfg->populate_ops_per_txn != 0 && !intxn) {
+ if (opts->populate_ops_per_txn != 0 && !intxn) {
if ((ret = session->begin_transaction(
- session, cfg->transaction_config)) != 0) {
- lprintf(cfg, ret, 0,
+ session, opts->transaction_config)) != 0) {
+ lprintf(wtperf, ret, 0,
"Failed starting transaction.");
goto err;
}
@@ -1055,31 +1024,29 @@ populate_thread(void *arg)
/*
* Figure out which table this op belongs to.
*/
- cursor = cursors[map_key_to_table(cfg, op)];
- generate_key(cfg, key_buf, op);
+ cursor = cursors[map_key_to_table(wtperf->opts, op)];
+ generate_key(opts, key_buf, op);
measure_latency =
- cfg->sample_interval != 0 &&
- trk->ops != 0 && (trk->ops % cfg->sample_rate == 0);
- if (measure_latency && (ret = __wt_epoch(NULL, &start)) != 0) {
- lprintf(cfg, ret, 0, "Get time call failed");
- goto err;
- }
+ opts->sample_interval != 0 &&
+ trk->ops != 0 && (trk->ops % opts->sample_rate == 0);
+ if (measure_latency)
+ __wt_epoch(NULL, &start);
cursor->set_key(cursor, key_buf);
- if (cfg->random_value)
+ if (opts->random_value)
randomize_value(thread, value_buf);
cursor->set_value(cursor, value_buf);
if ((ret = cursor->insert(cursor)) == WT_ROLLBACK) {
- lprintf(cfg, ret, 0, "insert retrying");
+ lprintf(wtperf, ret, 0, "insert retrying");
if ((ret = session->rollback_transaction(
session, NULL)) != 0) {
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"Failed rollback_transaction");
goto err;
}
intxn = 0;
continue;
} else if (ret != 0) {
- lprintf(cfg, ret, 0, "Failed inserting");
+ lprintf(wtperf, ret, 0, "Failed inserting");
goto err;
}
/*
@@ -1089,28 +1056,25 @@ populate_thread(void *arg)
* of them.
*/
if (measure_latency) {
- if ((ret = __wt_epoch(NULL, &stop)) != 0) {
- lprintf(cfg, ret, 0, "Get time call failed");
- goto err;
- }
+ __wt_epoch(NULL, &stop);
++trk->latency_ops;
usecs = WT_TIMEDIFF_US(stop, start);
track_operation(trk, usecs);
}
++thread->insert.ops; /* Same as trk->ops */
- if (cfg->checkpoint_stress_rate != 0 &&
- (op % cfg->checkpoint_stress_rate) == 0)
+ if (opts->checkpoint_stress_rate != 0 &&
+ (op % opts->checkpoint_stress_rate) == 0)
stress_checkpoint_due = 1;
- if (cfg->populate_ops_per_txn != 0) {
- if (++opcount < cfg->populate_ops_per_txn)
+ if (opts->populate_ops_per_txn != 0) {
+ if (++opcount < opts->populate_ops_per_txn)
continue;
opcount = 0;
if ((ret = session->commit_transaction(
session, NULL)) != 0)
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"Fail committing, transaction was aborted");
intxn = 0;
}
@@ -1118,24 +1082,24 @@ populate_thread(void *arg)
if (stress_checkpoint_due && intxn == 0) {
stress_checkpoint_due = 0;
if ((ret = session->checkpoint(session, NULL)) != 0) {
- lprintf(cfg, ret, 0, "Checkpoint failed");
+ lprintf(wtperf, ret, 0, "Checkpoint failed");
goto err;
}
}
}
if (intxn &&
(ret = session->commit_transaction(session, NULL)) != 0)
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"Fail committing, transaction was aborted");
if ((ret = session->close(session, NULL)) != 0) {
- lprintf(cfg, ret, 0, "Error closing session in populate");
+ lprintf(wtperf, ret, 0, "Error closing session in populate");
goto err;
}
/* Notify our caller we failed and shut the system down. */
if (0) {
-err: cfg->error = cfg->stop = 1;
+err: wtperf->error = wtperf->stop = true;
}
free(cursors);
@@ -1146,9 +1110,10 @@ static void *
populate_async(void *arg)
{
struct timespec start, stop;
- CONFIG *cfg;
- CONFIG_THREAD *thread;
+ CONFIG_OPTS *opts;
TRACK *trk;
+ WTPERF *wtperf;
+ WTPERF_THREAD *thread;
WT_ASYNC_OP *asyncop;
WT_CONNECTION *conn;
WT_SESSION *session;
@@ -1156,9 +1121,10 @@ populate_async(void *arg)
int measure_latency, ret;
char *value_buf, *key_buf;
- thread = (CONFIG_THREAD *)arg;
- cfg = thread->cfg;
- conn = cfg->conn;
+ thread = (WTPERF_THREAD *)arg;
+ wtperf = thread->wtperf;
+ opts = wtperf->opts;
+ conn = wtperf->conn;
session = NULL;
ret = 0;
trk = &thread->insert;
@@ -1167,8 +1133,8 @@ populate_async(void *arg)
value_buf = thread->value_buf;
if ((ret = conn->open_session(
- conn, NULL, cfg->sess_config, &session)) != 0) {
- lprintf(cfg, ret, 0, "populate: WT_CONNECTION.open_session");
+ conn, NULL, opts->sess_config, &session)) != 0) {
+ lprintf(wtperf, ret, 0, "populate: WT_CONNECTION.open_session");
goto err;
}
@@ -1178,38 +1144,38 @@ populate_async(void *arg)
* the time to process by workers.
*/
measure_latency =
- cfg->sample_interval != 0 &&
- trk->ops != 0 && (trk->ops % cfg->sample_rate == 0);
- if (measure_latency && (ret = __wt_epoch(NULL, &start)) != 0) {
- lprintf(cfg, ret, 0, "Get time call failed");
- goto err;
- }
+ opts->sample_interval != 0 &&
+ trk->ops != 0 && (trk->ops % opts->sample_rate == 0);
+ if (measure_latency)
+ __wt_epoch(NULL, &start);
+
/* Populate the databases. */
for (;;) {
- op = get_next_incr(cfg);
- if (op > cfg->icount)
+ op = get_next_incr(wtperf);
+ if (op > opts->icount)
break;
/*
* Allocate an async op for whichever table.
*/
while ((ret = conn->async_new_op(
- conn, cfg->uris[map_key_to_table(cfg, op)],
+ conn, wtperf->uris[map_key_to_table(wtperf->opts, op)],
NULL, &cb, &asyncop)) == EBUSY)
(void)usleep(10000);
if (ret != 0)
goto err;
asyncop->app_private = thread;
- generate_key(cfg, key_buf, op);
+ generate_key(opts, key_buf, op);
asyncop->set_key(asyncop, key_buf);
- if (cfg->random_value)
+ if (opts->random_value)
randomize_value(thread, value_buf);
asyncop->set_value(asyncop, value_buf);
if ((ret = asyncop->insert(asyncop)) != 0) {
- lprintf(cfg, ret, 0, "Failed inserting");
+ lprintf(wtperf, ret, 0, "Failed inserting");
goto err;
}
}
+
/*
* Gather statistics.
* We measure the latency of inserting a single key. If there
@@ -1221,22 +1187,19 @@ populate_async(void *arg)
if (conn->async_flush(conn) != 0)
goto err;
if (measure_latency) {
- if ((ret = __wt_epoch(NULL, &stop)) != 0) {
- lprintf(cfg, ret, 0, "Get time call failed");
- goto err;
- }
+ __wt_epoch(NULL, &stop);
++trk->latency_ops;
usecs = WT_TIMEDIFF_US(stop, start);
track_operation(trk, usecs);
}
if ((ret = session->close(session, NULL)) != 0) {
- lprintf(cfg, ret, 0, "Error closing session in populate");
+ lprintf(wtperf, ret, 0, "Error closing session in populate");
goto err;
}
/* Notify our caller we failed and shut the system down. */
if (0) {
-err: cfg->error = cfg->stop = 1;
+err: wtperf->error = wtperf->stop = true;
}
return (NULL);
}
@@ -1246,8 +1209,9 @@ monitor(void *arg)
{
struct timespec t;
struct tm *tm, _tm;
- CONFIG *cfg;
+ CONFIG_OPTS *opts;
FILE *fp;
+ WTPERF *wtperf;
size_t len;
uint64_t min_thr, reads, inserts, updates;
uint64_t cur_reads, cur_inserts, cur_updates;
@@ -1257,24 +1221,26 @@ monitor(void *arg)
uint32_t update_avg, update_min, update_max;
uint32_t latency_max, level;
u_int i;
- int msg_err, ret;
+ int msg_err;
const char *str;
char buf[64], *path;
- cfg = (CONFIG *)arg;
- assert(cfg->sample_interval != 0);
+ wtperf = (WTPERF *)arg;
+ opts = wtperf->opts;
+ assert(opts->sample_interval != 0);
+
fp = NULL;
path = NULL;
- min_thr = (uint64_t)cfg->min_throughput;
- latency_max = (uint32_t)ms_to_us(cfg->max_latency);
+ min_thr = (uint64_t)opts->min_throughput;
+ latency_max = (uint32_t)ms_to_us(opts->max_latency);
/* Open the logging file. */
- len = strlen(cfg->monitor_dir) + 100;
+ len = strlen(wtperf->monitor_dir) + 100;
path = dmalloc(len);
- snprintf(path, len, "%s/monitor", cfg->monitor_dir);
+ snprintf(path, len, "%s/monitor", wtperf->monitor_dir);
if ((fp = fopen(path, "w")) == NULL) {
- lprintf(cfg, errno, 0, "%s", path);
+ lprintf(wtperf, errno, 0, "%s", path);
goto err;
}
/* Set line buffering for monitor file. */
@@ -1297,34 +1263,31 @@ monitor(void *arg)
"update maximum latency(uS)"
"\n");
last_reads = last_inserts = last_updates = 0;
- while (!cfg->stop) {
- for (i = 0; i < cfg->sample_interval; i++) {
+ while (!wtperf->stop) {
+ for (i = 0; i < opts->sample_interval; i++) {
sleep(1);
- if (cfg->stop)
+ if (wtperf->stop)
break;
}
/* If the workers are done, don't bother with a final call. */
- if (cfg->stop)
+ if (wtperf->stop)
break;
- if (cfg->in_warmup)
+ if (wtperf->in_warmup)
continue;
- if ((ret = __wt_epoch(NULL, &t)) != 0) {
- lprintf(cfg, ret, 0, "Get time call failed");
- goto err;
- }
+ __wt_epoch(NULL, &t);
tm = localtime_r(&t.tv_sec, &_tm);
(void)strftime(buf, sizeof(buf), "%b %d %H:%M:%S", tm);
- reads = sum_read_ops(cfg);
- inserts = sum_insert_ops(cfg);
- updates = sum_update_ops(cfg);
- latency_read(cfg, &read_avg, &read_min, &read_max);
- latency_insert(cfg, &insert_avg, &insert_min, &insert_max);
- latency_update(cfg, &update_avg, &update_min, &update_max);
+ reads = sum_read_ops(wtperf);
+ inserts = sum_insert_ops(wtperf);
+ updates = sum_update_ops(wtperf);
+ latency_read(wtperf, &read_avg, &read_min, &read_max);
+ latency_insert(wtperf, &insert_avg, &insert_min, &insert_max);
+ latency_update(wtperf, &update_avg, &update_min, &update_max);
- cur_reads = (reads - last_reads) / cfg->sample_interval;
- cur_updates = (updates - last_updates) / cfg->sample_interval;
+ cur_reads = (reads - last_reads) / opts->sample_interval;
+ cur_updates = (updates - last_updates) / opts->sample_interval;
/*
* For now the only item we need to worry about changing is
* inserts when we transition from the populate phase to
@@ -1334,7 +1297,7 @@ monitor(void *arg)
cur_inserts = 0;
else
cur_inserts =
- (inserts - last_inserts) / cfg->sample_interval;
+ (inserts - last_inserts) / opts->sample_interval;
(void)fprintf(fp,
"%s,%" PRIu32
@@ -1344,9 +1307,9 @@ monitor(void *arg)
",%" PRIu32 ",%" PRIu32 ",%" PRIu32
",%" PRIu32 ",%" PRIu32 ",%" PRIu32
"\n",
- buf, cfg->totalsec,
+ buf, wtperf->totalsec,
cur_reads, cur_inserts, cur_updates,
- cfg->ckpt ? 'Y' : 'N',
+ wtperf->ckpt ? 'Y' : 'N',
read_avg, read_min, read_max,
insert_avg, insert_min, insert_max,
update_avg, update_min, update_max);
@@ -1354,7 +1317,7 @@ monitor(void *arg)
if (latency_max != 0 &&
(read_max > latency_max || insert_max > latency_max ||
update_max > latency_max)) {
- if (cfg->max_latency_fatal) {
+ if (opts->max_latency_fatal) {
level = 1;
msg_err = WT_PANIC;
str = "ERROR";
@@ -1363,7 +1326,7 @@ monitor(void *arg)
msg_err = 0;
str = "WARNING";
}
- lprintf(cfg, msg_err, level,
+ lprintf(wtperf, msg_err, level,
"%s: max latency exceeded: threshold %" PRIu32
" read max %" PRIu32 " insert max %" PRIu32
" update max %" PRIu32, str, latency_max,
@@ -1373,7 +1336,7 @@ monitor(void *arg)
((cur_reads != 0 && cur_reads < min_thr) ||
(cur_inserts != 0 && cur_inserts < min_thr) ||
(cur_updates != 0 && cur_updates < min_thr))) {
- if (cfg->min_throughput_fatal) {
+ if (opts->min_throughput_fatal) {
level = 1;
msg_err = WT_PANIC;
str = "ERROR";
@@ -1382,7 +1345,7 @@ monitor(void *arg)
msg_err = 0;
str = "WARNING";
}
- lprintf(cfg, msg_err, level,
+ lprintf(wtperf, msg_err, level,
"%s: minimum throughput not met: threshold %" PRIu64
" reads %" PRIu64 " inserts %" PRIu64
" updates %" PRIu64, str, min_thr, cur_reads,
@@ -1395,7 +1358,7 @@ monitor(void *arg)
/* Notify our caller we failed and shut the system down. */
if (0) {
-err: cfg->error = cfg->stop = 1;
+err: wtperf->error = wtperf->stop = true;
}
if (fp != NULL)
@@ -1408,75 +1371,73 @@ err: cfg->error = cfg->stop = 1;
static void *
checkpoint_worker(void *arg)
{
- CONFIG *cfg;
- CONFIG_THREAD *thread;
+ CONFIG_OPTS *opts;
+ WTPERF *wtperf;
+ WTPERF_THREAD *thread;
WT_CONNECTION *conn;
WT_SESSION *session;
struct timespec e, s;
uint32_t i;
int ret;
- thread = (CONFIG_THREAD *)arg;
- cfg = thread->cfg;
- conn = cfg->conn;
+ thread = (WTPERF_THREAD *)arg;
+ wtperf = thread->wtperf;
+ opts = wtperf->opts;
+ conn = wtperf->conn;
session = NULL;
if ((ret = conn->open_session(
- conn, NULL, cfg->sess_config, &session)) != 0) {
- lprintf(cfg, ret, 0,
+ conn, NULL, opts->sess_config, &session)) != 0) {
+ lprintf(wtperf, ret, 0,
"open_session failed in checkpoint thread.");
goto err;
}
- while (!cfg->stop) {
+ while (!wtperf->stop) {
/* Break the sleep up, so we notice interrupts faster. */
- for (i = 0; i < cfg->checkpoint_interval; i++) {
+ for (i = 0; i < opts->checkpoint_interval; i++) {
sleep(1);
- if (cfg->stop)
+ if (wtperf->stop)
break;
}
/* If the workers are done, don't bother with a final call. */
- if (cfg->stop)
+ if (wtperf->stop)
break;
- if ((ret = __wt_epoch(NULL, &s)) != 0) {
- lprintf(cfg, ret, 0, "Get time failed in checkpoint.");
- goto err;
- }
- cfg->ckpt = 1;
+ __wt_epoch(NULL, &s);
+
+ wtperf->ckpt = true;
if ((ret = session->checkpoint(session, NULL)) != 0) {
- lprintf(cfg, ret, 0, "Checkpoint failed.");
+ lprintf(wtperf, ret, 0, "Checkpoint failed.");
goto err;
}
- cfg->ckpt = 0;
+ wtperf->ckpt = false;
++thread->ckpt.ops;
- if ((ret = __wt_epoch(NULL, &e)) != 0) {
- lprintf(cfg, ret, 0, "Get time failed in checkpoint.");
- goto err;
- }
+ __wt_epoch(NULL, &e);
}
if (session != NULL &&
((ret = session->close(session, NULL)) != 0)) {
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"Error closing session in checkpoint worker.");
goto err;
}
/* Notify our caller we failed and shut the system down. */
if (0) {
-err: cfg->error = cfg->stop = 1;
+err: wtperf->error = wtperf->stop = true;
}
return (NULL);
}
static int
-execute_populate(CONFIG *cfg)
+execute_populate(WTPERF *wtperf)
{
struct timespec start, stop;
- CONFIG_THREAD *popth;
+ CONFIG_OPTS *opts;
+ WTPERF_THREAD *popth;
WT_ASYNC_OP *asyncop;
pthread_t idle_table_cycle_thread;
size_t i;
@@ -1486,59 +1447,57 @@ execute_populate(CONFIG *cfg)
int elapsed, ret;
void *(*pfunc)(void *);
- lprintf(cfg, 0, 1,
+ opts = wtperf->opts;
+
+ lprintf(wtperf, 0, 1,
"Starting %" PRIu32
" populate thread(s) for %" PRIu32 " items",
- cfg->populate_threads, cfg->icount);
+ opts->populate_threads, opts->icount);
/* Start cycling idle tables if configured. */
- if ((ret = start_idle_table_cycle(cfg, &idle_table_cycle_thread)) != 0)
+ if ((ret =
+ start_idle_table_cycle(wtperf, &idle_table_cycle_thread)) != 0)
return (ret);
- cfg->insert_key = 0;
+ wtperf->insert_key = 0;
- cfg->popthreads = dcalloc(cfg->populate_threads, sizeof(CONFIG_THREAD));
- if (cfg->use_asyncops > 0) {
- lprintf(cfg, 0, 1, "Starting %" PRIu32 " async thread(s)",
- cfg->async_threads);
+ wtperf->popthreads =
+ dcalloc(opts->populate_threads, sizeof(WTPERF_THREAD));
+ if (wtperf->use_asyncops) {
+ lprintf(wtperf, 0, 1, "Starting %" PRIu32 " async thread(s)",
+ opts->async_threads);
pfunc = populate_async;
} else
pfunc = populate_thread;
- if ((ret = start_threads(cfg, NULL,
- cfg->popthreads, cfg->populate_threads, pfunc)) != 0)
+ if ((ret = start_threads(wtperf, NULL,
+ wtperf->popthreads, opts->populate_threads, pfunc)) != 0)
return (ret);
- if ((ret = __wt_epoch(NULL, &start)) != 0) {
- lprintf(cfg, ret, 0, "Get time failed in populate.");
- return (ret);
- }
+ __wt_epoch(NULL, &start);
for (elapsed = 0, interval = 0, last_ops = 0;
- cfg->insert_key < cfg->icount && cfg->error == 0;) {
+ wtperf->insert_key < opts->icount && !wtperf->error;) {
/*
* Sleep for 100th of a second, report_interval is in second
* granularity, each 100th increment of elapsed is a single
* increment of interval.
*/
(void)usleep(10000);
- if (cfg->report_interval == 0 || ++elapsed < 100)
+ if (opts->report_interval == 0 || ++elapsed < 100)
continue;
elapsed = 0;
- if (++interval < cfg->report_interval)
+ if (++interval < opts->report_interval)
continue;
interval = 0;
- cfg->totalsec += cfg->report_interval;
- cfg->insert_ops = sum_pop_ops(cfg);
- lprintf(cfg, 0, 1,
+ wtperf->totalsec += opts->report_interval;
+ wtperf->insert_ops = sum_pop_ops(wtperf);
+ lprintf(wtperf, 0, 1,
"%" PRIu64 " populate inserts (%" PRIu64 " of %"
PRIu32 ") in %" PRIu32 " secs (%" PRIu32 " total secs)",
- cfg->insert_ops - last_ops, cfg->insert_ops,
- cfg->icount, cfg->report_interval, cfg->totalsec);
- last_ops = cfg->insert_ops;
- }
- if ((ret = __wt_epoch(NULL, &stop)) != 0) {
- lprintf(cfg, ret, 0, "Get time failed in populate.");
- return (ret);
+ wtperf->insert_ops - last_ops, wtperf->insert_ops,
+ opts->icount, opts->report_interval, wtperf->totalsec);
+ last_ops = wtperf->insert_ops;
}
+ __wt_epoch(NULL, &stop);
/*
* Move popthreads aside to narrow possible race with the monitor
@@ -1546,21 +1505,22 @@ execute_populate(CONFIG *cfg)
* NULL when the populate phase is finished, to know that the workload
* phase has started.
*/
- popth = cfg->popthreads;
- cfg->popthreads = NULL;
- ret = stop_threads(cfg, cfg->populate_threads, popth);
+ popth = wtperf->popthreads;
+ wtperf->popthreads = NULL;
+ ret = stop_threads(wtperf, opts->populate_threads, popth);
free(popth);
if (ret != 0)
return (ret);
/* Report if any worker threads didn't finish. */
- if (cfg->error != 0) {
- lprintf(cfg, WT_ERROR, 0,
+ if (wtperf->error) {
+ lprintf(wtperf, WT_ERROR, 0,
"Populate thread(s) exited without finishing.");
return (WT_ERROR);
}
- lprintf(cfg, 0, 1, "Finished load of %" PRIu32 " items", cfg->icount);
+ lprintf(wtperf,
+ 0, 1, "Finished load of %" PRIu32 " items", opts->icount);
msecs = WT_TIMEDIFF_MS(stop, start);
/*
@@ -1572,9 +1532,9 @@ execute_populate(CONFIG *cfg)
print_ops_sec = 0;
} else {
print_secs = (double)msecs / (double)MSEC_PER_SEC;
- print_ops_sec = (uint64_t)(cfg->icount / print_secs);
+ print_ops_sec = (uint64_t)(opts->icount / print_secs);
}
- lprintf(cfg, 0, 1,
+ lprintf(wtperf, 0, 1,
"Load time: %.2f\n" "load ops/sec: %" PRIu64,
print_secs, print_ops_sec);
@@ -1583,58 +1543,57 @@ execute_populate(CONFIG *cfg)
* set an unlimited timeout because if we close the connection
* then any in-progress compact/merge is aborted.
*/
- if (cfg->compact) {
- assert(cfg->async_threads > 0);
- lprintf(cfg, 0, 1, "Compact after populate");
- if ((ret = __wt_epoch(NULL, &start)) != 0) {
- lprintf(cfg, ret, 0, "Get time failed in populate.");
- return (ret);
- }
- tables = cfg->table_count;
- for (i = 0; i < cfg->table_count; i++) {
+ if (opts->compact) {
+ assert(opts->async_threads > 0);
+ lprintf(wtperf, 0, 1, "Compact after populate");
+ __wt_epoch(NULL, &start);
+ tables = opts->table_count;
+ for (i = 0; i < opts->table_count; i++) {
/*
* If no ops are available, retry. Any other error,
* return.
*/
- while ((ret = cfg->conn->async_new_op(cfg->conn,
- cfg->uris[i], "timeout=0", &cb, &asyncop)) == EBUSY)
+ while ((ret = wtperf->conn->async_new_op(
+ wtperf->conn, wtperf->uris[i],
+ "timeout=0", &cb, &asyncop)) == EBUSY)
(void)usleep(10000);
if (ret != 0)
return (ret);
asyncop->app_private = &tables;
if ((ret = asyncop->compact(asyncop)) != 0) {
- lprintf(cfg, ret, 0, "Async compact failed.");
+ lprintf(wtperf,
+ ret, 0, "Async compact failed.");
return (ret);
}
}
- if ((ret = cfg->conn->async_flush(cfg->conn)) != 0) {
- lprintf(cfg, ret, 0, "Populate async flush failed.");
- return (ret);
- }
- if ((ret = __wt_epoch(NULL, &stop)) != 0) {
- lprintf(cfg, ret, 0, "Get time failed in populate.");
+ if ((ret = wtperf->conn->async_flush(wtperf->conn)) != 0) {
+ lprintf(wtperf, ret, 0, "Populate async flush failed.");
return (ret);
}
- lprintf(cfg, 0, 1,
+ __wt_epoch(NULL, &stop);
+ lprintf(wtperf, 0, 1,
"Compact completed in %" PRIu64 " seconds",
(uint64_t)(WT_TIMEDIFF_SEC(stop, start)));
assert(tables == 0);
}
/* Stop cycling idle tables. */
- if ((ret = stop_idle_table_cycle(cfg, idle_table_cycle_thread)) != 0)
+ if ((ret = stop_idle_table_cycle(wtperf, idle_table_cycle_thread)) != 0)
return (ret);
return (0);
}
static int
-close_reopen(CONFIG *cfg)
+close_reopen(WTPERF *wtperf)
{
+ CONFIG_OPTS *opts;
int ret;
- if (!cfg->readonly && !cfg->reopen_connection)
+ opts = wtperf->opts;
+
+ if (!opts->readonly && !opts->reopen_connection)
return (0);
/*
* Reopen the connection. We do this so that the workload phase always
@@ -1642,16 +1601,16 @@ close_reopen(CONFIG *cfg)
* be identified. This is particularly important for LSM, where the
* merge algorithm is more aggressive for read-only trees.
*/
- /* cfg->conn is released no matter the return value from close(). */
- ret = cfg->conn->close(cfg->conn, NULL);
- cfg->conn = NULL;
+ /* wtperf->conn is released no matter the return value from close(). */
+ ret = wtperf->conn->close(wtperf->conn, NULL);
+ wtperf->conn = NULL;
if (ret != 0) {
- lprintf(cfg, ret, 0, "Closing the connection failed");
+ lprintf(wtperf, ret, 0, "Closing the connection failed");
return (ret);
}
if ((ret = wiredtiger_open(
- cfg->home, NULL, cfg->reopen_config, &cfg->conn)) != 0) {
- lprintf(cfg, ret, 0, "Re-opening the connection failed");
+ wtperf->home, NULL, wtperf->reopen_config, &wtperf->conn)) != 0) {
+ lprintf(wtperf, ret, 0, "Re-opening the connection failed");
return (ret);
}
/*
@@ -1660,10 +1619,10 @@ close_reopen(CONFIG *cfg)
* threads looking for work that will never arrive don't affect
* performance.
*/
- if (cfg->compact && cfg->use_asyncops == 0) {
- if ((ret = cfg->conn->reconfigure(
- cfg->conn, "async=(enabled=false)")) != 0) {
- lprintf(cfg, ret, 0, "Reconfigure async off failed");
+ if (opts->compact && !wtperf->use_asyncops) {
+ if ((ret = wtperf->conn->reconfigure(
+ wtperf->conn, "async=(enabled=false)")) != 0) {
+ lprintf(wtperf, ret, 0, "Reconfigure async off failed");
return (ret);
}
}
@@ -1671,10 +1630,11 @@ close_reopen(CONFIG *cfg)
}
static int
-execute_workload(CONFIG *cfg)
+execute_workload(WTPERF *wtperf)
{
- CONFIG_THREAD *threads;
+ CONFIG_OPTS *opts;
WORKLOAD *workp;
+ WTPERF_THREAD *threads;
WT_CONNECTION *conn;
WT_SESSION **sessions;
pthread_t idle_table_cycle_thread;
@@ -1685,9 +1645,11 @@ execute_workload(CONFIG *cfg)
int ret, t_ret;
void *(*pfunc)(void *);
- cfg->insert_key = 0;
- cfg->insert_ops = cfg->read_ops = cfg->truncate_ops = 0;
- cfg->update_ops = 0;
+ opts = wtperf->opts;
+
+ wtperf->insert_key = 0;
+ wtperf->insert_ops = wtperf->read_ops = wtperf->truncate_ops = 0;
+ wtperf->update_ops = 0;
last_ckpts = last_inserts = last_reads = last_truncates = 0;
last_updates = 0;
@@ -1696,38 +1658,40 @@ execute_workload(CONFIG *cfg)
sessions = NULL;
/* Start cycling idle tables. */
- if ((ret = start_idle_table_cycle(cfg, &idle_table_cycle_thread)) != 0)
+ if ((ret =
+ start_idle_table_cycle(wtperf, &idle_table_cycle_thread)) != 0)
return (ret);
- if (cfg->warmup != 0)
- cfg->in_warmup = 1;
+ if (opts->warmup != 0)
+ wtperf->in_warmup = true;
/* Allocate memory for the worker threads. */
- cfg->workers = dcalloc((size_t)cfg->workers_cnt, sizeof(CONFIG_THREAD));
+ wtperf->workers =
+ dcalloc((size_t)wtperf->workers_cnt, sizeof(WTPERF_THREAD));
- if (cfg->use_asyncops > 0) {
- lprintf(cfg, 0, 1, "Starting %" PRIu32 " async thread(s)",
- cfg->async_threads);
+ if (wtperf->use_asyncops) {
+ lprintf(wtperf, 0, 1, "Starting %" PRIu32 " async thread(s)",
+ opts->async_threads);
pfunc = worker_async;
} else
pfunc = worker;
- if (cfg->session_count_idle != 0) {
- sessions = dcalloc((size_t)cfg->session_count_idle,
+ if (opts->session_count_idle != 0) {
+ sessions = dcalloc((size_t)opts->session_count_idle,
sizeof(WT_SESSION *));
- conn = cfg->conn;
- for (i = 0; i < cfg->session_count_idle; ++i)
- if ((ret = conn->open_session(
- conn, NULL, cfg->sess_config, &sessions[i])) != 0) {
- lprintf(cfg, ret, 0,
+ conn = wtperf->conn;
+ for (i = 0; i < opts->session_count_idle; ++i)
+ if ((ret = conn->open_session(conn,
+ NULL, opts->sess_config, &sessions[i])) != 0) {
+ lprintf(wtperf, ret, 0,
"execute_workload: idle open_session");
goto err;
}
}
/* Start each workload. */
- for (threads = cfg->workers, i = 0,
- workp = cfg->workload; i < cfg->workload_cnt; ++i, ++workp) {
- lprintf(cfg, 0, 1,
+ for (threads = wtperf->workers, i = 0,
+ workp = wtperf->workload; i < wtperf->workload_cnt; ++i, ++workp) {
+ lprintf(wtperf, 0, 1,
"Starting workload #%u: %" PRId64 " threads, inserts=%"
PRId64 ", reads=%" PRId64 ", updates=%" PRId64
", truncate=%" PRId64 ", throttle=%" PRId64,
@@ -1736,25 +1700,26 @@ execute_workload(CONFIG *cfg)
workp->throttle);
/* Figure out the workload's schedule. */
- if ((ret = run_mix_schedule(cfg, workp)) != 0)
+ if ((ret = run_mix_schedule(wtperf, workp)) != 0)
goto err;
/* Start the workload's threads. */
if ((ret = start_threads(
- cfg, workp, threads, (u_int)workp->threads, pfunc)) != 0)
+ wtperf, workp, threads, (u_int)workp->threads, pfunc)) != 0)
goto err;
threads += workp->threads;
}
- if (cfg->warmup != 0) {
- lprintf(cfg, 0, 1,
- "Waiting for warmup duration of %" PRIu32, cfg->warmup);
- sleep(cfg->warmup);
- cfg->in_warmup = 0;
+ if (opts->warmup != 0) {
+ lprintf(wtperf, 0, 1,
+ "Waiting for warmup duration of %" PRIu32, opts->warmup);
+ sleep(opts->warmup);
+ wtperf->in_warmup = false;
}
- for (interval = cfg->report_interval, run_time = cfg->run_time,
- run_ops = cfg->run_ops; cfg->error == 0;) {
+ for (interval = opts->report_interval,
+ run_time = opts->run_time, run_ops = opts->run_ops;
+ !wtperf->error;) {
/*
* Sleep for one second at a time.
* If we are tracking run time, check to see if we're done, and
@@ -1769,59 +1734,60 @@ execute_workload(CONFIG *cfg)
}
/* Sum the operations we've done. */
- cfg->ckpt_ops = sum_ckpt_ops(cfg);
- cfg->insert_ops = sum_insert_ops(cfg);
- cfg->read_ops = sum_read_ops(cfg);
- cfg->update_ops = sum_update_ops(cfg);
- cfg->truncate_ops = sum_truncate_ops(cfg);
+ wtperf->ckpt_ops = sum_ckpt_ops(wtperf);
+ wtperf->insert_ops = sum_insert_ops(wtperf);
+ wtperf->read_ops = sum_read_ops(wtperf);
+ wtperf->update_ops = sum_update_ops(wtperf);
+ wtperf->truncate_ops = sum_truncate_ops(wtperf);
/* If we're checking total operations, see if we're done. */
if (run_ops != 0 && run_ops <=
- cfg->insert_ops + cfg->read_ops + cfg->update_ops)
+ wtperf->insert_ops + wtperf->read_ops + wtperf->update_ops)
break;
/* If writing out throughput information, see if it's time. */
if (interval == 0 || --interval > 0)
continue;
- interval = cfg->report_interval;
- cfg->totalsec += cfg->report_interval;
+ interval = opts->report_interval;
+ wtperf->totalsec += opts->report_interval;
- lprintf(cfg, 0, 1,
+ lprintf(wtperf, 0, 1,
"%" PRIu64 " reads, %" PRIu64 " inserts, %" PRIu64
" updates, %" PRIu64 " truncates, %" PRIu64
" checkpoints in %" PRIu32 " secs (%" PRIu32 " total secs)",
- cfg->read_ops - last_reads,
- cfg->insert_ops - last_inserts,
- cfg->update_ops - last_updates,
- cfg->truncate_ops - last_truncates,
- cfg->ckpt_ops - last_ckpts,
- cfg->report_interval, cfg->totalsec);
- last_reads = cfg->read_ops;
- last_inserts = cfg->insert_ops;
- last_updates = cfg->update_ops;
- last_truncates = cfg->truncate_ops;
- last_ckpts = cfg->ckpt_ops;
+ wtperf->read_ops - last_reads,
+ wtperf->insert_ops - last_inserts,
+ wtperf->update_ops - last_updates,
+ wtperf->truncate_ops - last_truncates,
+ wtperf->ckpt_ops - last_ckpts,
+ opts->report_interval, wtperf->totalsec);
+ last_reads = wtperf->read_ops;
+ last_inserts = wtperf->insert_ops;
+ last_updates = wtperf->update_ops;
+ last_truncates = wtperf->truncate_ops;
+ last_ckpts = wtperf->ckpt_ops;
}
/* Notify the worker threads they are done. */
-err: cfg->stop = 1;
+err: wtperf->stop = true;
/* Stop cycling idle tables. */
- if ((ret = stop_idle_table_cycle(cfg, idle_table_cycle_thread)) != 0)
+ if ((ret = stop_idle_table_cycle(wtperf, idle_table_cycle_thread)) != 0)
return (ret);
- if ((t_ret = stop_threads(
- cfg, (u_int)cfg->workers_cnt, cfg->workers)) != 0 && ret == 0)
+ if ((t_ret = stop_threads(wtperf,
+ (u_int)wtperf->workers_cnt, wtperf->workers)) != 0 && ret == 0)
ret = t_ret;
/* Drop tables if configured to and this isn't an error path */
- if (ret == 0 && cfg->drop_tables && (ret = drop_all_tables(cfg)) != 0)
- lprintf(cfg, ret, 0, "Drop tables failed.");
+ if (ret == 0 &&
+ opts->drop_tables && (ret = drop_all_tables(wtperf)) != 0)
+ lprintf(wtperf, ret, 0, "Drop tables failed.");
free(sessions);
/* Report if any worker threads didn't finish. */
- if (cfg->error != 0) {
- lprintf(cfg, WT_ERROR, 0,
+ if (wtperf->error) {
+ lprintf(wtperf, WT_ERROR, 0,
"Worker thread(s) exited without finishing.");
if (ret == 0)
ret = WT_ERROR;
@@ -1834,8 +1800,9 @@ err: cfg->stop = 1;
* existing table.
*/
static int
-find_table_count(CONFIG *cfg)
+find_table_count(WTPERF *wtperf)
{
+ CONFIG_OPTS *opts;
WT_CONNECTION *conn;
WT_CURSOR *cursor;
WT_SESSION *session;
@@ -1843,29 +1810,30 @@ find_table_count(CONFIG *cfg)
int ret, t_ret;
char *key;
- conn = cfg->conn;
+ opts = wtperf->opts;
+ conn = wtperf->conn;
max_icount = 0;
if ((ret = conn->open_session(
- conn, NULL, cfg->sess_config, &session)) != 0) {
- lprintf(cfg, ret, 0,
+ conn, NULL, opts->sess_config, &session)) != 0) {
+ lprintf(wtperf, ret, 0,
"find_table_count: open_session failed");
goto out;
}
- for (i = 0; i < cfg->table_count; i++) {
- if ((ret = session->open_cursor(session, cfg->uris[i],
+ for (i = 0; i < opts->table_count; i++) {
+ if ((ret = session->open_cursor(session, wtperf->uris[i],
NULL, NULL, &cursor)) != 0) {
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"find_table_count: open_cursor failed");
goto err;
}
if ((ret = cursor->prev(cursor)) != 0) {
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"find_table_count: cursor prev failed");
goto err;
}
if ((ret = cursor->get_key(cursor, &key)) != 0) {
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"find_table_count: cursor get_key failed");
goto err;
}
@@ -1874,7 +1842,7 @@ find_table_count(CONFIG *cfg)
max_icount = table_icount;
if ((ret = cursor->close(cursor)) != 0) {
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"find_table_count: cursor close failed");
goto err;
}
@@ -1882,91 +1850,99 @@ find_table_count(CONFIG *cfg)
err: if ((t_ret = session->close(session, NULL)) != 0) {
if (ret == 0)
ret = t_ret;
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"find_table_count: session close failed");
}
- cfg->icount = max_icount;
+ opts->icount = max_icount;
out: return (ret);
}
/*
- * Populate the uri array if more than one table is being used.
+ * Populate the uri array.
*/
static void
-create_uris(CONFIG *cfg)
+create_uris(WTPERF *wtperf)
{
- size_t base_uri_len;
+ CONFIG_OPTS *opts;
+ size_t len;
uint32_t i;
- char *uri;
- base_uri_len = strlen(cfg->base_uri);
- cfg->uris = dcalloc(cfg->table_count, sizeof(char *));
- for (i = 0; i < cfg->table_count; i++) {
- uri = cfg->uris[i] = dcalloc(base_uri_len + 6, 1);
- /*
- * If there is only one table, just use base name.
- */
- if (cfg->table_count == 1)
- memcpy(uri, cfg->base_uri, base_uri_len);
+ opts = wtperf->opts;
+
+ wtperf->uris = dcalloc(opts->table_count, sizeof(char *));
+ len = strlen("table:") + strlen(opts->table_name) + 20;
+ for (i = 0; i < opts->table_count; i++) {
+ /* If there is only one table, just use the base name. */
+ wtperf->uris[i] = dmalloc(len);
+ if (opts->table_count == 1)
+ snprintf(wtperf->uris[i],
+ len, "table:%s", opts->table_name);
else
- sprintf(uri, "%s%05d", cfg->base_uri, i);
+ snprintf(wtperf->uris[i],
+ len, "table:%s%05d", opts->table_name, i);
}
/* Create the log-like-table URI. */
- cfg->log_table_uri = dcalloc(base_uri_len + 11, 1);
- sprintf(cfg->log_table_uri, "%s_log_table", cfg->base_uri);
+ len = strlen("table:") +
+ strlen(opts->table_name) + strlen("_log_table") + 1;
+ wtperf->log_table_uri = dmalloc(len);
+ snprintf(
+ wtperf->log_table_uri, len, "table:%s_log_table", opts->table_name);
}
static int
-create_tables(CONFIG *cfg)
+create_tables(WTPERF *wtperf)
{
+ CONFIG_OPTS *opts;
WT_SESSION *session;
size_t i;
int ret;
char buf[512];
- if ((ret = cfg->conn->open_session(
- cfg->conn, NULL, cfg->sess_config, &session)) != 0) {
- lprintf(cfg, ret, 0,
- "Error opening a session on %s", cfg->home);
+ opts = wtperf->opts;
+
+ if ((ret = wtperf->conn->open_session(
+ wtperf->conn, NULL, opts->sess_config, &session)) != 0) {
+ lprintf(wtperf, ret, 0,
+ "Error opening a session on %s", wtperf->home);
return (ret);
}
- for (i = 0; i < cfg->table_count_idle; i++) {
- snprintf(buf, 512, "%s_idle%05d", cfg->uris[0], (int)i);
+ for (i = 0; i < opts->table_count_idle; i++) {
+ snprintf(buf, 512, "%s_idle%05d", wtperf->uris[0], (int)i);
if ((ret = session->create(
- session, buf, cfg->table_config)) != 0) {
- lprintf(cfg, ret, 0,
+ session, buf, opts->table_config)) != 0) {
+ lprintf(wtperf, ret, 0,
"Error creating idle table %s", buf);
return (ret);
}
}
- if (cfg->log_like_table && (ret = session->create(session,
- cfg->log_table_uri, "key_format=Q,value_format=S")) != 0) {
- lprintf(cfg, ret, 0, "Error creating log table %s", buf);
+ if (opts->log_like_table && (ret = session->create(session,
+ wtperf->log_table_uri, "key_format=Q,value_format=S")) != 0) {
+ lprintf(wtperf, ret, 0, "Error creating log table %s", buf);
return (ret);
}
- for (i = 0; i < cfg->table_count; i++) {
- if (cfg->log_partial && i > 0) {
+ for (i = 0; i < opts->table_count; i++) {
+ if (opts->log_partial && i > 0) {
if (((ret = session->create(session,
- cfg->uris[i], cfg->partial_config)) != 0)) {
- lprintf(cfg, ret, 0,
- "Error creating table %s", cfg->uris[i]);
+ wtperf->uris[i], wtperf->partial_config)) != 0)) {
+ lprintf(wtperf, ret, 0,
+ "Error creating table %s", wtperf->uris[i]);
return (ret);
}
} else if ((ret = session->create(
- session, cfg->uris[i], cfg->table_config)) != 0) {
- lprintf(cfg, ret, 0,
- "Error creating table %s", cfg->uris[i]);
+ session, wtperf->uris[i], opts->table_config)) != 0) {
+ lprintf(wtperf, ret, 0,
+ "Error creating table %s", wtperf->uris[i]);
return (ret);
}
- if (cfg->index) {
+ if (opts->index) {
snprintf(buf, 512, "index:%s:val_idx",
- cfg->uris[i] + strlen("table:"));
+ wtperf->uris[i] + strlen("table:"));
if ((ret = session->create(
session, buf, "columns=(val)")) != 0) {
- lprintf(cfg, ret, 0,
+ lprintf(wtperf, ret, 0,
"Error creating index %s", buf);
return (ret);
}
@@ -1974,76 +1950,213 @@ create_tables(CONFIG *cfg)
}
if ((ret = session->close(session, NULL)) != 0) {
- lprintf(cfg, ret, 0, "Error closing session");
+ lprintf(wtperf, ret, 0, "Error closing session");
return (ret);
}
return (0);
}
+/*
+ * wtperf_copy --
+ * Create a new WTPERF structure as a duplicate of a previous one.
+ */
+static void
+wtperf_copy(const WTPERF *src, WTPERF **retp)
+{
+ CONFIG_OPTS *opts;
+ WTPERF *dest;
+ size_t i;
+
+ opts = src->opts;
+
+ dest = dcalloc(1, sizeof(WTPERF));
+
+ /*
+ * Don't copy the home and monitor directories, they are filled in by
+ * our caller, explicitly.
+ */
+
+ if (src->partial_config != NULL)
+ dest->partial_config = dstrdup(src->partial_config);
+ if (src->reopen_config != NULL)
+ dest->reopen_config = dstrdup(src->reopen_config);
+
+ if (src->uris != NULL) {
+ dest->uris = dcalloc(opts->table_count, sizeof(char *));
+ for (i = 0; i < opts->table_count; i++)
+ dest->uris[i] = dstrdup(src->uris[i]);
+ }
+
+ if (src->async_config != NULL)
+ dest->async_config = dstrdup(src->async_config);
+
+ dest->ckptthreads = NULL;
+ dest->popthreads = NULL;
+
+ dest->workers = NULL;
+ dest->workers_cnt = src->workers_cnt;
+ if (src->workload_cnt != 0) {
+ dest->workload_cnt = src->workload_cnt;
+ dest->workload = dcalloc(src->workload_cnt, sizeof(WORKLOAD));
+ memcpy(dest->workload,
+ src->workload, src->workload_cnt * sizeof(WORKLOAD));
+ }
+
+ TAILQ_INIT(&dest->stone_head);
+
+ dest->opts = src->opts;
+
+ *retp = dest;
+}
+
+/*
+ * wtperf_free --
+ * Free any storage allocated in the WTPERF structure.
+ */
+static void
+wtperf_free(WTPERF *wtperf)
+{
+ CONFIG_OPTS *opts;
+ size_t i;
+
+ opts = wtperf->opts;
+
+ free(wtperf->home);
+ free(wtperf->monitor_dir);
+ free(wtperf->partial_config);
+ free(wtperf->reopen_config);
+ free(wtperf->log_table_uri);
+
+ if (wtperf->uris != NULL) {
+ for (i = 0; i < opts->table_count; i++)
+ free(wtperf->uris[i]);
+ free(wtperf->uris);
+ }
+
+ free(wtperf->async_config);
+
+ free(wtperf->ckptthreads);
+ free(wtperf->popthreads);
+
+ free(wtperf->workers);
+ free(wtperf->workload);
+
+ cleanup_truncate_config(wtperf);
+}
+
+/*
+ * config_compress --
+ * Parse the compression configuration.
+ */
+static int
+config_compress(WTPERF *wtperf)
+{
+ CONFIG_OPTS *opts;
+ int ret;
+ const char *s;
+
+ opts = wtperf->opts;
+ ret = 0;
+
+ s = opts->compression;
+ if (strcmp(s, "none") == 0) {
+ wtperf->compress_ext = NULL;
+ wtperf->compress_table = NULL;
+ } else if (strcmp(s, "lz4") == 0) {
+#ifndef HAVE_BUILTIN_EXTENSION_LZ4
+ wtperf->compress_ext = LZ4_EXT;
+#endif
+ wtperf->compress_table = LZ4_BLK;
+ } else if (strcmp(s, "snappy") == 0) {
+#ifndef HAVE_BUILTIN_EXTENSION_SNAPPY
+ wtperf->compress_ext = SNAPPY_EXT;
+#endif
+ wtperf->compress_table = SNAPPY_BLK;
+ } else if (strcmp(s, "zlib") == 0) {
+#ifndef HAVE_BUILTIN_EXTENSION_ZLIB
+ wtperf->compress_ext = ZLIB_EXT;
+#endif
+ wtperf->compress_table = ZLIB_BLK;
+ } else if (strcmp(s, "zstd") == 0) {
+#ifndef HAVE_BUILTIN_EXTENSION_ZSTD
+ wtperf->compress_ext = ZSTD_EXT;
+#endif
+ wtperf->compress_table = ZSTD_BLK;
+ } else {
+ fprintf(stderr,
+ "invalid compression configuration: %s\n", s);
+ ret = EINVAL;
+ }
+ return (ret);
+
+}
+
static int
-start_all_runs(CONFIG *cfg)
+start_all_runs(WTPERF *wtperf)
{
- CONFIG *next_cfg, **configs;
+ CONFIG_OPTS *opts;
+ WTPERF *next_wtperf, **wtperfs;
pthread_t *threads;
- size_t home_len, i;
+ size_t i, len;
int ret, t_ret;
- char *new_home;
+ opts = wtperf->opts;
+ wtperfs = NULL;
ret = 0;
- configs = NULL;
- if (cfg->database_count == 1)
- return (start_run(cfg));
+ if (opts->database_count == 1)
+ return (start_run(wtperf));
- /* Allocate an array to hold our config struct copies. */
- configs = dcalloc(cfg->database_count, sizeof(CONFIG *));
+ /* Allocate an array to hold our WTPERF copies. */
+ wtperfs = dcalloc(opts->database_count, sizeof(WTPERF *));
/* Allocate an array to hold our thread IDs. */
- threads = dcalloc(cfg->database_count, sizeof(pthread_t));
-
- home_len = strlen(cfg->home);
- for (i = 0; i < cfg->database_count; i++) {
- next_cfg = dcalloc(1, sizeof(CONFIG));
- configs[i] = next_cfg;
- config_copy(next_cfg, cfg);
-
- /* Setup a unique home directory for each database. */
- new_home = dmalloc(home_len + 5);
- snprintf(new_home, home_len + 5, "%s/D%02d", cfg->home, (int)i);
- free(next_cfg->home);
- next_cfg->home = new_home;
-
- /* If the monitor dir is default, update it too. */
- if (strcmp(cfg->monitor_dir, cfg->home) == 0) {
- free(next_cfg->monitor_dir);
- next_cfg->monitor_dir = dstrdup(new_home);
- }
+ threads = dcalloc(opts->database_count, sizeof(pthread_t));
- /* If creating the sub-database, recreate its home */
- if (cfg->create != 0)
- recreate_dir(next_cfg->home);
+ for (i = 0; i < opts->database_count; i++) {
+ wtperf_copy(wtperf, &next_wtperf);
+ wtperfs[i] = next_wtperf;
+
+ /*
+ * Set up unique home/monitor directories for each database.
+ * Re-create the directories if creating the databases.
+ */
+ len = strlen(wtperf->home) + 5;
+ next_wtperf->home = dmalloc(len);
+ snprintf(
+ next_wtperf->home, len, "%s/D%02d", wtperf->home, (int)i);
+ if (opts->create != 0)
+ recreate_dir(next_wtperf->home);
+
+ len = strlen(wtperf->monitor_dir) + 5;
+ next_wtperf->monitor_dir = dmalloc(len);
+ snprintf(next_wtperf->monitor_dir,
+ len, "%s/D%02d", wtperf->monitor_dir, (int)i);
+ if (opts->create != 0 &&
+ strcmp(next_wtperf->home, next_wtperf->monitor_dir) != 0)
+ recreate_dir(next_wtperf->monitor_dir);
if ((ret = pthread_create(
- &threads[i], NULL, thread_run_wtperf, next_cfg)) != 0) {
- lprintf(cfg, ret, 0, "Error creating thread");
+ &threads[i], NULL, thread_run_wtperf, next_wtperf)) != 0) {
+ lprintf(wtperf, ret, 0, "Error creating thread");
goto err;
}
}
/* Wait for threads to finish. */
- for (i = 0; i < cfg->database_count; i++)
+ for (i = 0; i < opts->database_count; i++)
if ((t_ret = pthread_join(threads[i], NULL)) != 0) {
- lprintf(cfg, ret, 0, "Error joining thread");
+ lprintf(wtperf, ret, 0, "Error joining thread");
if (ret == 0)
ret = t_ret;
}
-err: for (i = 0; i < cfg->database_count && configs[i] != NULL; i++) {
- config_free(configs[i]);
- free(configs[i]);
+err: for (i = 0; i < opts->database_count && wtperfs[i] != NULL; i++) {
+ wtperf_free(wtperfs[i]);
+ free(wtperfs[i]);
}
- free(configs);
+ free(wtperfs);
free(threads);
return (ret);
@@ -2053,120 +2166,124 @@ err: for (i = 0; i < cfg->database_count && configs[i] != NULL; i++) {
static void *
thread_run_wtperf(void *arg)
{
- CONFIG *cfg;
+ WTPERF *wtperf;
int ret;
- cfg = (CONFIG *)arg;
- if ((ret = start_run(cfg)) != 0)
- lprintf(cfg, ret, 0, "Run failed for: %s.", cfg->home);
+ wtperf = (WTPERF *)arg;
+ if ((ret = start_run(wtperf)) != 0)
+ lprintf(wtperf, ret, 0, "Run failed for: %s.", wtperf->home);
return (NULL);
}
static int
-start_run(CONFIG *cfg)
+start_run(WTPERF *wtperf)
{
+ CONFIG_OPTS *opts;
pthread_t monitor_thread;
uint64_t total_ops;
uint32_t run_time;
int monitor_created, ret, t_ret;
+ opts = wtperf->opts;
monitor_created = ret = 0;
/* [-Wconditional-uninitialized] */
memset(&monitor_thread, 0, sizeof(monitor_thread));
- if ((ret = setup_log_file(cfg)) != 0)
+ if ((ret = setup_log_file(wtperf)) != 0)
goto err;
if ((ret = wiredtiger_open( /* Open the real connection. */
- cfg->home, NULL, cfg->conn_config, &cfg->conn)) != 0) {
- lprintf(cfg, ret, 0, "Error connecting to %s", cfg->home);
+ wtperf->home, NULL, opts->conn_config, &wtperf->conn)) != 0) {
+ lprintf(wtperf, ret, 0, "Error connecting to %s", wtperf->home);
goto err;
}
- create_uris(cfg);
+ create_uris(wtperf);
/* If creating, create the tables. */
- if (cfg->create != 0 && (ret = create_tables(cfg)) != 0)
+ if (opts->create != 0 && (ret = create_tables(wtperf)) != 0)
goto err;
/* Start the monitor thread. */
- if (cfg->sample_interval != 0) {
+ if (opts->sample_interval != 0) {
if ((ret = pthread_create(
- &monitor_thread, NULL, monitor, cfg)) != 0) {
- lprintf(
- cfg, ret, 0, "Error creating monitor thread.");
+ &monitor_thread, NULL, monitor, wtperf)) != 0) {
+ lprintf(wtperf,
+ ret, 0, "Error creating monitor thread.");
goto err;
}
monitor_created = 1;
}
/* If creating, populate the table. */
- if (cfg->create != 0 && execute_populate(cfg) != 0)
+ if (opts->create != 0 && execute_populate(wtperf) != 0)
goto err;
/* Optional workload. */
- if (cfg->workers_cnt != 0 &&
- (cfg->run_time != 0 || cfg->run_ops != 0)) {
+ if (wtperf->workers_cnt != 0 &&
+ (opts->run_time != 0 || opts->run_ops != 0)) {
/*
* If we have a workload, close and reopen the connection so
* that LSM can detect read-only workloads.
*/
- if (close_reopen(cfg) != 0)
+ if (close_reopen(wtperf) != 0)
goto err;
/* Didn't create, set insert count. */
- if (cfg->create == 0 && cfg->random_range == 0 &&
- find_table_count(cfg) != 0)
+ if (opts->create == 0 &&
+ opts->random_range == 0 && find_table_count(wtperf) != 0)
goto err;
/* Start the checkpoint thread. */
- if (cfg->checkpoint_threads != 0) {
- lprintf(cfg, 0, 1,
+ if (opts->checkpoint_threads != 0) {
+ lprintf(wtperf, 0, 1,
"Starting %" PRIu32 " checkpoint thread(s)",
- cfg->checkpoint_threads);
- cfg->ckptthreads = dcalloc(
- cfg->checkpoint_threads, sizeof(CONFIG_THREAD));
- if (start_threads(cfg, NULL, cfg->ckptthreads,
- cfg->checkpoint_threads, checkpoint_worker) != 0)
+ opts->checkpoint_threads);
+ wtperf->ckptthreads = dcalloc(
+ opts->checkpoint_threads, sizeof(WTPERF_THREAD));
+ if (start_threads(wtperf, NULL, wtperf->ckptthreads,
+ opts->checkpoint_threads, checkpoint_worker) != 0)
goto err;
}
/* Execute the workload. */
- if ((ret = execute_workload(cfg)) != 0)
+ if ((ret = execute_workload(wtperf)) != 0)
goto err;
/* One final summation of the operations we've completed. */
- cfg->read_ops = sum_read_ops(cfg);
- cfg->insert_ops = sum_insert_ops(cfg);
- cfg->truncate_ops = sum_truncate_ops(cfg);
- cfg->update_ops = sum_update_ops(cfg);
- cfg->ckpt_ops = sum_ckpt_ops(cfg);
- total_ops = cfg->read_ops + cfg->insert_ops + cfg->update_ops;
-
- run_time = cfg->run_time == 0 ? 1 : cfg->run_time;
- lprintf(cfg, 0, 1,
+ wtperf->read_ops = sum_read_ops(wtperf);
+ wtperf->insert_ops = sum_insert_ops(wtperf);
+ wtperf->truncate_ops = sum_truncate_ops(wtperf);
+ wtperf->update_ops = sum_update_ops(wtperf);
+ wtperf->ckpt_ops = sum_ckpt_ops(wtperf);
+ total_ops =
+ wtperf->read_ops + wtperf->insert_ops + wtperf->update_ops;
+
+ run_time = opts->run_time == 0 ? 1 : opts->run_time;
+ lprintf(wtperf, 0, 1,
"Executed %" PRIu64 " read operations (%" PRIu64
"%%) %" PRIu64 " ops/sec",
- cfg->read_ops, (cfg->read_ops * 100) / total_ops,
- cfg->read_ops / run_time);
- lprintf(cfg, 0, 1,
+ wtperf->read_ops, (wtperf->read_ops * 100) / total_ops,
+ wtperf->read_ops / run_time);
+ lprintf(wtperf, 0, 1,
"Executed %" PRIu64 " insert operations (%" PRIu64
"%%) %" PRIu64 " ops/sec",
- cfg->insert_ops, (cfg->insert_ops * 100) / total_ops,
- cfg->insert_ops / run_time);
- lprintf(cfg, 0, 1,
+ wtperf->insert_ops, (wtperf->insert_ops * 100) / total_ops,
+ wtperf->insert_ops / run_time);
+ lprintf(wtperf, 0, 1,
"Executed %" PRIu64 " truncate operations (%" PRIu64
"%%) %" PRIu64 " ops/sec",
- cfg->truncate_ops, (cfg->truncate_ops * 100) / total_ops,
- cfg->truncate_ops / run_time);
- lprintf(cfg, 0, 1,
+ wtperf->truncate_ops,
+ (wtperf->truncate_ops * 100) / total_ops,
+ wtperf->truncate_ops / run_time);
+ lprintf(wtperf, 0, 1,
"Executed %" PRIu64 " update operations (%" PRIu64
"%%) %" PRIu64 " ops/sec",
- cfg->update_ops, (cfg->update_ops * 100) / total_ops,
- cfg->update_ops / run_time);
- lprintf(cfg, 0, 1,
+ wtperf->update_ops, (wtperf->update_ops * 100) / total_ops,
+ wtperf->update_ops / run_time);
+ lprintf(wtperf, 0, 1,
"Executed %" PRIu64 " checkpoint operations",
- cfg->ckpt_ops);
+ wtperf->ckpt_ops);
- latency_print(cfg);
+ latency_print(wtperf);
}
if (0) {
@@ -2175,40 +2292,41 @@ err: if (ret == 0)
}
/* Notify the worker threads they are done. */
- cfg->stop = 1;
+ wtperf->stop = true;
- if ((t_ret = stop_threads(cfg, 1, cfg->ckptthreads)) != 0)
+ if ((t_ret = stop_threads(wtperf, 1, wtperf->ckptthreads)) != 0)
if (ret == 0)
ret = t_ret;
if (monitor_created != 0 &&
(t_ret = pthread_join(monitor_thread, NULL)) != 0) {
- lprintf(cfg, ret, 0, "Error joining monitor thread.");
+ lprintf(wtperf, ret, 0, "Error joining monitor thread.");
if (ret == 0)
ret = t_ret;
}
- if (cfg->conn != NULL &&
- (t_ret = cfg->conn->close(cfg->conn, NULL)) != 0) {
- lprintf(cfg, t_ret, 0,
- "Error closing connection to %s", cfg->home);
+ if (wtperf->conn != NULL && opts->close_conn &&
+ (t_ret = wtperf->conn->close(wtperf->conn, NULL)) != 0) {
+ lprintf(wtperf, t_ret, 0,
+ "Error closing connection to %s", wtperf->home);
if (ret == 0)
ret = t_ret;
}
if (ret == 0) {
- if (cfg->run_time == 0 && cfg->run_ops == 0)
- lprintf(cfg, 0, 1, "Run completed");
+ if (opts->run_time == 0 && opts->run_ops == 0)
+ lprintf(wtperf, 0, 1, "Run completed");
else
- lprintf(cfg, 0, 1, "Run completed: %" PRIu32 " %s",
- cfg->run_time == 0 ? cfg->run_ops : cfg->run_time,
- cfg->run_time == 0 ? "operations" : "seconds");
+ lprintf(wtperf, 0, 1, "Run completed: %" PRIu32 " %s",
+ opts->run_time == 0 ?
+ opts->run_ops : opts->run_time,
+ opts->run_time == 0 ? "operations" : "seconds");
}
- if (cfg->logf != NULL) {
- if ((t_ret = fflush(cfg->logf)) != 0 && ret == 0)
+ if (wtperf->logf != NULL) {
+ if ((t_ret = fflush(wtperf->logf)) != 0 && ret == 0)
ret = t_ret;
- if ((t_ret = fclose(cfg->logf)) != 0 && ret == 0)
+ if ((t_ret = fclose(wtperf->logf)) != 0 && ret == 0)
ret = t_ret;
}
return (ret);
@@ -2216,33 +2334,56 @@ err: if (ret == 0)
extern int __wt_optind, __wt_optreset;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
+
+/*
+ * usage --
+ * wtperf usage print, no error.
+ */
+static void
+usage(void)
+{
+ printf("wtperf [-C config] "
+ "[-H mount] [-h home] [-O file] [-o option] [-T config]\n");
+ printf("\t-C <string> additional connection configuration\n");
+ printf("\t (added to option conn_config)\n");
+ printf("\t-H <mount> configure Helium volume mount point\n");
+ printf("\t-h <string> Wired Tiger home must exist, default WT_TEST\n");
+ printf("\t-O <file> file contains options as listed below\n");
+ printf("\t-o option=val[,option=val,...] set options listed below\n");
+ printf("\t-T <string> additional table configuration\n");
+ printf("\t (added to option table_config)\n");
+ printf("\n");
+ config_opt_usage();
+}
int
main(int argc, char *argv[])
{
- CONFIG *cfg, _cfg;
+ CONFIG_OPTS *opts;
+ WTPERF *wtperf, _wtperf;
size_t req_len, sreq_len;
bool monitor_set;
int ch, ret;
- const char *opts = "C:h:m:O:o:T:";
+ const char *cmdflags = "C:h:m:O:o:T:";
const char *config_opts;
- char *cc_buf, *sess_cfg, *tc_buf, *user_cconfig, *user_tconfig;
+ char *cc_buf, *path, *sess_cfg, *tc_buf, *user_cconfig, *user_tconfig;
+
+ /* The first WTPERF structure (from which all others are derived). */
+ wtperf = &_wtperf;
+ memset(wtperf, 0, sizeof(*wtperf));
+ wtperf->home = dstrdup(DEFAULT_HOME);
+ wtperf->monitor_dir = dstrdup(DEFAULT_MONITOR_DIR);
+ TAILQ_INIT(&wtperf->stone_head);
+ config_opt_init(&wtperf->opts);
+ opts = wtperf->opts;
monitor_set = false;
ret = 0;
config_opts = NULL;
cc_buf = sess_cfg = tc_buf = user_cconfig = user_tconfig = NULL;
- /* Setup the default configuration values. */
- cfg = &_cfg;
- memset(cfg, 0, sizeof(*cfg));
- config_copy(cfg, &default_cfg);
- cfg->home = dstrdup(DEFAULT_HOME);
- cfg->monitor_dir = dstrdup(DEFAULT_MONITOR_DIR);
-
/* Do a basic validation of options, and home is needed before open. */
- while ((ch = __wt_getopt("wtperf", argc, argv, opts)) != EOF)
+ while ((ch = __wt_getopt("wtperf", argc, argv, cmdflags)) != EOF)
switch (ch) {
case 'C':
if (user_cconfig == NULL)
@@ -2256,12 +2397,12 @@ main(int argc, char *argv[])
}
break;
case 'h':
- free(cfg->home);
- cfg->home = dstrdup(__wt_optarg);
+ free(wtperf->home);
+ wtperf->home = dstrdup(__wt_optarg);
break;
case 'm':
- free(cfg->monitor_dir);
- cfg->monitor_dir = dstrdup(__wt_optarg);
+ free(wtperf->monitor_dir);
+ wtperf->monitor_dir = dstrdup(__wt_optarg);
monitor_set = true;
break;
case 'O':
@@ -2288,47 +2429,48 @@ main(int argc, char *argv[])
* monitor directory to the home dir.
*/
if (!monitor_set) {
- free(cfg->monitor_dir);
- cfg->monitor_dir = dstrdup(cfg->home);
+ free(wtperf->monitor_dir);
+ wtperf->monitor_dir = dstrdup(wtperf->home);
}
/* Parse configuration settings from configuration file. */
- if (config_opts != NULL && config_opt_file(cfg, config_opts) != 0)
+ if (config_opts != NULL && config_opt_file(wtperf, config_opts) != 0)
goto einval;
/* Parse options that override values set via a configuration file. */
__wt_optreset = __wt_optind = 1;
- while ((ch = __wt_getopt("wtperf", argc, argv, opts)) != EOF)
+ while ((ch = __wt_getopt("wtperf", argc, argv, cmdflags)) != EOF)
switch (ch) {
case 'o':
/* Allow -o key=value */
- if (config_opt_line(cfg, __wt_optarg) != 0)
+ if (config_opt_str(wtperf, __wt_optarg) != 0)
goto einval;
break;
}
- if (cfg->populate_threads == 0 && cfg->icount != 0) {
- lprintf(cfg, 1, 0,
+ if (opts->populate_threads == 0 && opts->icount != 0) {
+ lprintf(wtperf, 1, 0,
"Cannot have 0 populate threads when icount is set\n");
goto err;
}
- cfg->async_config = NULL;
+ wtperf->async_config = NULL;
/*
* If the user specified async_threads we use async for all ops.
* If the user wants compaction, then we also enable async for
* the compact operation, but not for the workloads.
*/
- if (cfg->async_threads > 0) {
- if (F_ISSET(cfg, CFG_TRUNCATE)) {
- lprintf(cfg, 1, 0, "Cannot run truncate and async\n");
+ if (opts->async_threads > 0) {
+ if (F_ISSET(wtperf, CFG_TRUNCATE)) {
+ lprintf(wtperf,
+ 1, 0, "Cannot run truncate and async\n");
goto err;
}
- cfg->use_asyncops = 1;
+ wtperf->use_asyncops = true;
}
- if (cfg->compact && cfg->async_threads == 0)
- cfg->async_threads = 2;
- if (cfg->async_threads > 0) {
+ if (opts->compact && opts->async_threads == 0)
+ opts->async_threads = 2;
+ if (opts->async_threads > 0) {
/*
* The maximum number of async threads is two digits, so just
* use that to compute the space we need. Assume the default
@@ -2336,145 +2478,133 @@ main(int argc, char *argv[])
* to 4096 if needed.
*/
req_len = strlen(",async=(enabled=true,threads=)") + 4;
- cfg->async_config = dmalloc(req_len);
- snprintf(cfg->async_config, req_len,
+ wtperf->async_config = dmalloc(req_len);
+ snprintf(wtperf->async_config, req_len,
",async=(enabled=true,threads=%" PRIu32 ")",
- cfg->async_threads);
+ opts->async_threads);
}
- if ((ret = config_compress(cfg)) != 0)
+ if ((ret = config_compress(wtperf)) != 0)
goto err;
/* You can't have truncate on a random collection. */
- if (F_ISSET(cfg, CFG_TRUNCATE) && cfg->random_range) {
- lprintf(cfg, 1, 0, "Cannot run truncate and random_range\n");
+ if (F_ISSET(wtperf, CFG_TRUNCATE) && opts->random_range) {
+ lprintf(wtperf, 1, 0, "Cannot run truncate and random_range\n");
goto err;
}
/* We can't run truncate with more than one table. */
- if (F_ISSET(cfg, CFG_TRUNCATE) && cfg->table_count > 1) {
- lprintf(cfg, 1, 0, "Cannot truncate more than 1 table\n");
+ if (F_ISSET(wtperf, CFG_TRUNCATE) && opts->table_count > 1) {
+ lprintf(wtperf, 1, 0, "Cannot truncate more than 1 table\n");
goto err;
}
- /* Build the URI from the table name. */
- req_len = strlen("table:") + strlen(cfg->table_name) + 2;
- cfg->base_uri = dmalloc(req_len);
- snprintf(cfg->base_uri, req_len, "table:%s", cfg->table_name);
-
/* Make stdout line buffered, so verbose output appears quickly. */
__wt_stream_set_line_buffer(stdout);
/* Concatenate non-default configuration strings. */
- if (cfg->verbose > 1 || user_cconfig != NULL ||
- cfg->session_count_idle > 0 || cfg->compress_ext != NULL ||
- cfg->async_config != NULL) {
- req_len = strlen(debug_cconfig) + 3;
+ if (opts->verbose > 1 || user_cconfig != NULL ||
+ opts->session_count_idle > 0 || wtperf->compress_ext != NULL ||
+ wtperf->async_config != NULL) {
+ req_len = strlen(debug_cconfig) + 20;
if (user_cconfig != NULL)
req_len += strlen(user_cconfig);
- if (cfg->async_config != NULL)
- req_len += strlen(cfg->async_config);
- if (cfg->compress_ext != NULL)
- req_len += strlen(cfg->compress_ext);
- if (cfg->session_count_idle > 0) {
+ if (wtperf->async_config != NULL)
+ req_len += strlen(wtperf->async_config);
+ if (wtperf->compress_ext != NULL)
+ req_len += strlen(wtperf->compress_ext);
+ if (opts->session_count_idle > 0) {
sreq_len = strlen(",session_max=") + 6;
req_len += sreq_len;
sess_cfg = dmalloc(sreq_len);
snprintf(sess_cfg, sreq_len,
",session_max=%" PRIu32,
- cfg->session_count_idle + cfg->workers_cnt +
- cfg->populate_threads + 10);
+ opts->session_count_idle +
+ wtperf->workers_cnt + opts->populate_threads + 10);
}
cc_buf = dmalloc(req_len);
- /*
- * This is getting hard to parse.
- */
- snprintf(cc_buf, req_len, "%s%s%s%s%s%s%s",
- cfg->async_config ? cfg->async_config : "",
- cfg->compress_ext ? cfg->compress_ext : "",
- cfg->verbose > 1 && strlen(debug_cconfig) ? ",": "",
- cfg->verbose > 1 &&
- strlen(debug_cconfig) ? debug_cconfig : "",
- sess_cfg ? sess_cfg : "",
- user_cconfig ? ",": "",
- user_cconfig ? user_cconfig : "");
- if (strlen(cc_buf))
- if ((ret = config_opt_str(
- cfg, "conn_config", cc_buf)) != 0)
- goto err;
+ snprintf(cc_buf, req_len, "%s,%s,%s,%s,%s",
+ wtperf->async_config ? wtperf->async_config : "",
+ wtperf->compress_ext ? wtperf->compress_ext : "",
+ opts->verbose > 1 ? debug_cconfig : "",
+ sess_cfg != NULL ? sess_cfg : "",
+ user_cconfig != NULL ? user_cconfig : "");
+ if (strlen(cc_buf) && (ret =
+ config_opt_name_value(wtperf, "conn_config", cc_buf)) != 0)
+ goto err;
}
- if (cfg->verbose > 1 || cfg->index ||
- user_tconfig != NULL || cfg->compress_table != NULL) {
- req_len = strlen(debug_tconfig) + 3;
+ if (opts->verbose > 1 || opts->index ||
+ user_tconfig != NULL || wtperf->compress_table != NULL) {
+ req_len = strlen(debug_tconfig) + 20;
if (user_tconfig != NULL)
req_len += strlen(user_tconfig);
- if (cfg->compress_table != NULL)
- req_len += strlen(cfg->compress_table);
- if (cfg->index)
+ if (wtperf->compress_table != NULL)
+ req_len += strlen(wtperf->compress_table);
+ if (opts->index)
req_len += strlen(INDEX_COL_NAMES);
tc_buf = dmalloc(req_len);
- /*
- * This is getting hard to parse.
- */
- snprintf(tc_buf, req_len, "%s%s%s%s%s%s",
- cfg->index ? INDEX_COL_NAMES : "",
- cfg->compress_table ? cfg->compress_table : "",
- cfg->verbose > 1 && strlen(debug_tconfig) ? ",": "",
- cfg->verbose > 1 &&
- strlen(debug_tconfig) ? debug_tconfig : "",
- user_tconfig ? ",": "",
+ snprintf(tc_buf, req_len, "%s,%s,%s,%s",
+ opts->index ? INDEX_COL_NAMES : "",
+ wtperf->compress_table != NULL ?
+ wtperf->compress_table : "",
+ opts->verbose > 1 ? debug_tconfig : "",
user_tconfig ? user_tconfig : "");
- if (strlen(tc_buf))
- if ((ret = config_opt_str(
- cfg, "table_config", tc_buf)) != 0)
- goto err;
+ if (strlen(tc_buf) && (ret =
+ config_opt_name_value(wtperf, "table_config", tc_buf)) != 0)
+ goto err;
}
- if (cfg->log_partial && cfg->table_count > 1) {
- req_len = strlen(cfg->table_config) +
+ if (opts->log_partial && opts->table_count > 1) {
+ req_len = strlen(opts->table_config) +
strlen(LOG_PARTIAL_CONFIG) + 1;
- cfg->partial_config = dmalloc(req_len);
- snprintf(cfg->partial_config, req_len, "%s%s",
- cfg->table_config, LOG_PARTIAL_CONFIG);
+ wtperf->partial_config = dmalloc(req_len);
+ snprintf(wtperf->partial_config, req_len, "%s%s",
+ opts->table_config, LOG_PARTIAL_CONFIG);
}
/*
* Set the config for reopen. If readonly add in that string.
* If not readonly then just copy the original conn_config.
*/
- if (cfg->readonly)
- req_len = strlen(cfg->conn_config) +
+ if (opts->readonly)
+ req_len = strlen(opts->conn_config) +
strlen(READONLY_CONFIG) + 1;
else
- req_len = strlen(cfg->conn_config) + 1;
- cfg->reopen_config = dmalloc(req_len);
- if (cfg->readonly)
- snprintf(cfg->reopen_config, req_len, "%s%s",
- cfg->conn_config, READONLY_CONFIG);
+ req_len = strlen(opts->conn_config) + 1;
+ wtperf->reopen_config = dmalloc(req_len);
+ if (opts->readonly)
+ snprintf(wtperf->reopen_config, req_len, "%s%s",
+ opts->conn_config, READONLY_CONFIG);
else
- snprintf(cfg->reopen_config, req_len, "%s",
- cfg->conn_config);
+ snprintf(wtperf->reopen_config,
+ req_len, "%s", opts->conn_config);
/* Sanity-check the configuration. */
- if ((ret = config_sanity(cfg)) != 0)
+ if ((ret = config_sanity(wtperf)) != 0)
goto err;
/* If creating, remove and re-create the home directory. */
- if (cfg->create != 0)
- recreate_dir(cfg->home);
+ if (opts->create != 0)
+ recreate_dir(wtperf->home);
/* Write a copy of the config. */
- config_to_file(cfg);
+ req_len = strlen(wtperf->home) + strlen("/CONFIG.wtperf") + 1;
+ path = dmalloc(req_len);
+ snprintf(path, req_len, "%s/CONFIG.wtperf", wtperf->home);
+ config_opt_log(opts, path);
+ free(path);
/* Display the configuration. */
- if (cfg->verbose > 1)
- config_print(cfg);
+ if (opts->verbose > 1)
+ config_opt_print(wtperf);
- if ((ret = start_all_runs(cfg)) != 0)
+ if ((ret = start_all_runs(wtperf)) != 0)
goto err;
if (0) {
einval: ret = EINVAL;
}
-err: config_free(cfg);
+err: wtperf_free(wtperf);
+ config_opt_cleanup(opts);
+
free(cc_buf);
free(sess_cfg);
free(tc_buf);
@@ -2485,26 +2615,26 @@ err: config_free(cfg);
}
static int
-start_threads(CONFIG *cfg,
- WORKLOAD *workp, CONFIG_THREAD *base, u_int num, void *(*func)(void *))
+start_threads(WTPERF *wtperf,
+ WORKLOAD *workp, WTPERF_THREAD *base, u_int num, void *(*func)(void *))
{
- CONFIG_THREAD *thread;
+ CONFIG_OPTS *opts;
+ WTPERF_THREAD *thread;
u_int i;
int ret;
+ opts = wtperf->opts;
+
/* Initialize the threads. */
for (i = 0, thread = base; i < num; ++i, ++thread) {
- thread->cfg = cfg;
+ thread->wtperf = wtperf;
thread->workload = workp;
/*
* We don't want the threads executing in lock-step, seed each
* one differently.
*/
- if ((ret = __wt_random_init_seed(NULL, &thread->rnd)) != 0) {
- lprintf(cfg, ret, 0, "Error initializing RNG");
- return (ret);
- }
+ __wt_random_init_seed(NULL, &thread->rnd);
/*
* Every thread gets a key/data buffer because we don't bother
@@ -2512,14 +2642,14 @@ start_threads(CONFIG *cfg,
* don't, it's not enough memory to bother. These buffers hold
* strings: trailing NUL is included in the size.
*/
- thread->key_buf = dcalloc(cfg->key_sz, 1);
- thread->value_buf = dcalloc(cfg->value_sz_max, 1);
+ thread->key_buf = dcalloc(opts->key_sz, 1);
+ thread->value_buf = dcalloc(opts->value_sz_max, 1);
/*
* Initialize and then toss in a bit of random values if needed.
*/
- memset(thread->value_buf, 'a', cfg->value_sz - 1);
- if (cfg->random_value)
+ memset(thread->value_buf, 'a', opts->value_sz - 1);
+ if (opts->random_value)
randomize_value(thread, thread->value_buf);
/*
@@ -2537,7 +2667,7 @@ start_threads(CONFIG *cfg,
for (i = 0, thread = base; i < num; ++i, ++thread)
if ((ret = pthread_create(
&thread->handle, NULL, func, thread)) != 0) {
- lprintf(cfg, ret, 0, "Error creating thread");
+ lprintf(wtperf, ret, 0, "Error creating thread");
return (ret);
}
@@ -2545,7 +2675,7 @@ start_threads(CONFIG *cfg,
}
static int
-stop_threads(CONFIG *cfg, u_int num, CONFIG_THREAD *threads)
+stop_threads(WTPERF *wtperf, u_int num, WTPERF_THREAD *threads)
{
u_int i;
int ret;
@@ -2555,7 +2685,7 @@ stop_threads(CONFIG *cfg, u_int num, CONFIG_THREAD *threads)
for (i = 0; i < num; ++i, ++threads) {
if ((ret = pthread_join(threads->handle, NULL)) != 0) {
- lprintf(cfg, ret, 0, "Error joining thread");
+ lprintf(wtperf, ret, 0, "Error joining thread");
return (ret);
}
@@ -2588,35 +2718,38 @@ recreate_dir(const char *name)
}
static int
-drop_all_tables(CONFIG *cfg)
+drop_all_tables(WTPERF *wtperf)
{
struct timespec start, stop;
+ CONFIG_OPTS *opts;
WT_SESSION *session;
size_t i;
uint64_t msecs;
int ret, t_ret;
+ opts = wtperf->opts;
+
/* Drop any tables. */
- if ((ret = cfg->conn->open_session(
- cfg->conn, NULL, cfg->sess_config, &session)) != 0) {
- lprintf(cfg, ret, 0,
- "Error opening a session on %s", cfg->home);
+ if ((ret = wtperf->conn->open_session(
+ wtperf->conn, NULL, opts->sess_config, &session)) != 0) {
+ lprintf(wtperf, ret, 0,
+ "Error opening a session on %s", wtperf->home);
return (ret);
}
- testutil_check(__wt_epoch(NULL, &start));
- for (i = 0; i < cfg->table_count; i++) {
- if ((ret = session->drop(
- session, cfg->uris[i], NULL)) != 0) {
- lprintf(cfg, ret, 0,
- "Error dropping table %s", cfg->uris[i]);
+ __wt_epoch(NULL, &start);
+ for (i = 0; i < opts->table_count; i++) {
+ if ((ret =
+ session->drop(session, wtperf->uris[i], NULL)) != 0) {
+ lprintf(wtperf, ret, 0,
+ "Error dropping table %s", wtperf->uris[i]);
goto err;
}
}
- testutil_check(__wt_epoch(NULL, &stop));
+ __wt_epoch(NULL, &stop);
msecs = WT_TIMEDIFF_MS(stop, start);
- lprintf(cfg, 0, 1,
+ lprintf(wtperf, 0, 1,
"Executed %" PRIu32 " drop operations average time %" PRIu64 "ms",
- cfg->table_count, msecs / cfg->table_count);
+ opts->table_count, msecs / opts->table_count);
err: if ((t_ret = session->close(session, NULL)) != 0 && ret == 0)
ret = t_ret;
@@ -2624,27 +2757,34 @@ err: if ((t_ret = session->close(session, NULL)) != 0 && ret == 0)
}
static uint64_t
-wtperf_value_range(CONFIG *cfg)
+wtperf_value_range(WTPERF *wtperf)
{
- if (cfg->random_range)
- return (cfg->icount + cfg->random_range);
+ CONFIG_OPTS *opts;
+
+ opts = wtperf->opts;
+
+ if (opts->random_range)
+ return (opts->icount + opts->random_range);
/*
* It is legal to configure a zero size populate phase, hide that
* from other code by pretending the range is 1 in that case.
*/
- if (cfg->icount + cfg->insert_key == 0)
+ if (opts->icount + wtperf->insert_key == 0)
return (1);
- return (cfg->icount + cfg->insert_key - (u_int)(cfg->workers_cnt + 1));
+ return (opts->icount +
+ wtperf->insert_key - (u_int)(wtperf->workers_cnt + 1));
}
static uint64_t
-wtperf_rand(CONFIG_THREAD *thread)
+wtperf_rand(WTPERF_THREAD *thread)
{
- CONFIG *cfg;
+ CONFIG_OPTS *opts;
+ WTPERF *wtperf;
double S1, S2, U;
uint64_t rval;
- cfg = thread->cfg;
+ wtperf = thread->wtperf;
+ opts = wtperf->opts;
/*
* Use WiredTiger's random number routine: it's lock-free and fairly
@@ -2653,11 +2793,11 @@ wtperf_rand(CONFIG_THREAD *thread)
rval = __wt_random(&thread->rnd);
/* Use Pareto distribution to give 80/20 hot/cold values. */
- if (cfg->pareto != 0) {
+ if (opts->pareto != 0) {
#define PARETO_SHAPE 1.5
S1 = (-1 / PARETO_SHAPE);
- S2 = wtperf_value_range(cfg) *
- (cfg->pareto / 100.0) * (PARETO_SHAPE - 1);
+ S2 = wtperf_value_range(wtperf) *
+ (opts->pareto / 100.0) * (PARETO_SHAPE - 1);
U = 1 - (double)rval / (double)UINT32_MAX;
rval = (uint64_t)((pow(U, S1) - 1) * S2);
/*
@@ -2665,13 +2805,13 @@ wtperf_rand(CONFIG_THREAD *thread)
* 2% of the time, from my testing. That will lead to the
* first item in the table being "hot".
*/
- if (rval > wtperf_value_range(cfg))
+ if (rval > wtperf_value_range(wtperf))
rval = 0;
}
/*
* Wrap the key to within the expected range and avoid zero: we never
* insert that key.
*/
- rval = (rval % wtperf_value_range(cfg)) + 1;
+ rval = (rval % wtperf_value_range(wtperf)) + 1;
return (rval);
}
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.h b/src/third_party/wiredtiger/bench/wtperf/wtperf.h
index 1bb94db2634..81d74e134f6 100644
--- a/src/third_party/wiredtiger/bench/wtperf/wtperf.h
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.h
@@ -36,8 +36,8 @@
#include "config_opt.h"
-typedef struct __config CONFIG;
-typedef struct __config_thread CONFIG_THREAD;
+typedef struct __wtperf WTPERF;
+typedef struct __wtperf_thread WTPERF_THREAD;
typedef struct __truncate_queue_entry TRUNCATE_QUEUE_ENTRY;
#define EXT_PFX ",extensions=("
@@ -54,6 +54,9 @@ typedef struct __truncate_queue_entry TRUNCATE_QUEUE_ENTRY;
#define ZLIB_BLK BLKCMP_PFX "zlib"
#define ZLIB_EXT \
EXT_PFX EXTPATH "zlib/.libs/libwiredtiger_zlib.so" EXT_SFX
+#define ZSTD_BLK BLKCMP_PFX "zstd"
+#define ZSTD_EXT \
+ EXT_PFX EXTPATH "zstd/.libs/libwiredtiger_zstd.so" EXT_SFX
typedef struct {
int64_t threads; /* Thread count */
@@ -95,12 +98,6 @@ struct __truncate_queue_entry {
TAILQ_ENTRY(__truncate_queue_entry) q;
};
-struct __config_queue_entry {
- char *string;
- TAILQ_ENTRY(__config_queue_entry) c;
-};
-typedef struct __config_queue_entry CONFIG_QUEUE_ENTRY;
-
/* Steering for the throttle configuration */
typedef struct {
struct timespec last_increment; /* Time that we last added more ops */
@@ -111,40 +108,35 @@ typedef struct {
#define LOG_PARTIAL_CONFIG ",log=(enabled=false)"
#define READONLY_CONFIG ",readonly=true"
-/*
- * NOTE: If you add any fields to this structure here, you must also add
- * an initialization in wtperf.c in the default_cfg.
- */
-struct __config { /* Configuration structure */
+struct __wtperf { /* Per-database structure */
char *home; /* WiredTiger home */
char *monitor_dir; /* Monitor output dir */
char *partial_config; /* Config string for partial logging */
char *reopen_config; /* Config string for conn reopen */
- char *base_uri; /* Object URI */
- char *log_table_uri; /* URI for log table */
- char **uris; /* URIs if multiple tables */
+ char *log_table_uri; /* URI for log table */
+ char **uris; /* URIs */
WT_CONNECTION *conn; /* Database connection */
FILE *logf; /* Logging handle */
- char *async_config; /* Config string for async */
+ char *async_config; /* Config string for async */
+ bool use_asyncops; /* Use async operations */
const char *compress_ext; /* Compression extension for conn */
const char *compress_table; /* Compression arg to table create */
- CONFIG_THREAD *ckptthreads, *popthreads;
+ WTPERF_THREAD *ckptthreads; /* Checkpoint threads */
+ WTPERF_THREAD *popthreads; /* Populate threads */
#define WORKLOAD_MAX 50
- CONFIG_THREAD *workers; /* Worker threads */
+ WTPERF_THREAD *workers; /* Worker threads */
u_int workers_cnt;
WORKLOAD *workload; /* Workloads */
u_int workload_cnt;
- uint32_t use_asyncops; /* Use async operations */
/* State tracking variables. */
-
uint64_t ckpt_ops; /* checkpoint operations */
uint64_t insert_ops; /* insert operations */
uint64_t read_ops; /* read operations */
@@ -154,10 +146,10 @@ struct __config { /* Configuration structure */
uint64_t insert_key; /* insert key */
uint64_t log_like_table_key; /* used to allocate IDs for log table */
- volatile int ckpt; /* checkpoint in progress */
- volatile int error; /* thread error */
- volatile int stop; /* notify threads to stop */
- volatile int in_warmup; /* Running warmup phase */
+ volatile bool ckpt; /* checkpoint in progress */
+ volatile bool error; /* thread error */
+ volatile bool stop; /* notify threads to stop */
+ volatile bool in_warmup; /* running warmup phase */
volatile bool idle_cycle_run; /* Signal for idle cycle thread */
@@ -171,13 +163,7 @@ struct __config { /* Configuration structure */
/* Queue head for use with the Truncate Logic */
TAILQ_HEAD(__truncate_qh, __truncate_queue_entry) stone_head;
- /* Queue head to save a copy of the config to be output */
- TAILQ_HEAD(__config_qh, __config_queue_entry) config_head;
-
- /* Fields changeable on command line are listed in wtperf_opt.i */
-#define OPT_DECLARE_STRUCT
-#include "wtperf_opt.i"
-#undef OPT_DECLARE_STRUCT
+ CONFIG_OPTS *opts; /* Global configuration */
};
#define ELEMENTS(a) (sizeof(a) / sizeof(a[0]))
@@ -237,8 +223,8 @@ typedef struct {
uint32_t sec[100]; /* < 1s 2s ... 100s */
} TRACK;
-struct __config_thread { /* Per-thread structure */
- CONFIG *cfg; /* Enclosing configuration */
+struct __wtperf_thread { /* Per-thread structure */
+ WTPERF *wtperf; /* Enclosing configuration */
WT_RAND_STATE rnd; /* Random number generation state */
@@ -260,50 +246,45 @@ struct __config_thread { /* Per-thread structure */
TRACK truncate_sleep; /* Truncate sleep operations */
};
-void cleanup_truncate_config(CONFIG *);
-int config_compress(CONFIG *);
-void config_free(CONFIG *);
-void config_copy(CONFIG *, const CONFIG *);
-int config_opt_file(CONFIG *, const char *);
-int config_opt_line(CONFIG *, const char *);
-int config_opt_str(CONFIG *, const char *, const char *);
-void config_to_file(CONFIG *);
-void config_consolidate(CONFIG *);
-void config_print(CONFIG *);
-int config_sanity(CONFIG *);
-void latency_insert(CONFIG *, uint32_t *, uint32_t *, uint32_t *);
-void latency_read(CONFIG *, uint32_t *, uint32_t *, uint32_t *);
-void latency_update(CONFIG *, uint32_t *, uint32_t *, uint32_t *);
-void latency_print(CONFIG *);
+void cleanup_truncate_config(WTPERF *);
+int config_opt_file(WTPERF *, const char *);
+void config_opt_cleanup(CONFIG_OPTS *);
+void config_opt_init(CONFIG_OPTS **);
+void config_opt_log(CONFIG_OPTS *, const char *);
+int config_opt_name_value(WTPERF *, const char *, const char *);
+void config_opt_print(WTPERF *);
+int config_opt_str(WTPERF *, const char *);
+void config_opt_usage(void);
+int config_sanity(WTPERF *);
+void latency_insert(WTPERF *, uint32_t *, uint32_t *, uint32_t *);
+void latency_print(WTPERF *);
+void latency_read(WTPERF *, uint32_t *, uint32_t *, uint32_t *);
+void latency_update(WTPERF *, uint32_t *, uint32_t *, uint32_t *);
int run_truncate(
- CONFIG *, CONFIG_THREAD *, WT_CURSOR *, WT_SESSION *, int *);
-int setup_log_file(CONFIG *);
-void setup_throttle(CONFIG_THREAD*);
-int setup_truncate(CONFIG *, CONFIG_THREAD *, WT_SESSION *);
-int start_idle_table_cycle(CONFIG *, pthread_t *);
-int stop_idle_table_cycle(CONFIG *, pthread_t);
-uint64_t sum_ckpt_ops(CONFIG *);
-uint64_t sum_insert_ops(CONFIG *);
-uint64_t sum_pop_ops(CONFIG *);
-uint64_t sum_read_ops(CONFIG *);
-uint64_t sum_truncate_ops(CONFIG *);
-uint64_t sum_update_ops(CONFIG *);
-void usage(void);
-void worker_throttle(CONFIG_THREAD*);
-
-void lprintf(const CONFIG *, int err, uint32_t, const char *, ...)
+ WTPERF *, WTPERF_THREAD *, WT_CURSOR *, WT_SESSION *, int *);
+int setup_log_file(WTPERF *);
+void setup_throttle(WTPERF_THREAD *);
+int setup_truncate(WTPERF *, WTPERF_THREAD *, WT_SESSION *);
+int start_idle_table_cycle(WTPERF *, pthread_t *);
+int stop_idle_table_cycle(WTPERF *, pthread_t);
+void worker_throttle(WTPERF_THREAD *);
+uint64_t sum_ckpt_ops(WTPERF *);
+uint64_t sum_insert_ops(WTPERF *);
+uint64_t sum_pop_ops(WTPERF *);
+uint64_t sum_read_ops(WTPERF *);
+uint64_t sum_truncate_ops(WTPERF *);
+uint64_t sum_update_ops(WTPERF *);
+
+void lprintf(const WTPERF *, int err, uint32_t, const char *, ...)
#if defined(__GNUC__)
__attribute__((format (printf, 4, 5)))
#endif
;
static inline void
-generate_key(CONFIG *cfg, char *key_buf, uint64_t keyno)
+generate_key(CONFIG_OPTS *opts, char *key_buf, uint64_t keyno)
{
- /*
- * Don't change to snprintf, sprintf is faster in some tests.
- */
- sprintf(key_buf, "%0*" PRIu64, cfg->key_sz - 1, keyno);
+ u64_to_string_zf(keyno, key_buf, opts->key_sz);
}
static inline void
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i
index 17517ffe477..680eb53a90e 100644
--- a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i
@@ -38,14 +38,14 @@
#ifdef OPT_DEFINE_DESC
#define DEF_OPT_AS_BOOL(name, initval, desc) \
- { #name, desc, #initval, BOOL_TYPE, offsetof(CONFIG, name) },
+ { #name, desc, #initval, BOOL_TYPE, offsetof(CONFIG_OPTS, name) },
#define DEF_OPT_AS_CONFIG_STRING(name, initval, desc) \
{ #name, desc, initval, CONFIG_STRING_TYPE, \
- offsetof(CONFIG, name) },
+ offsetof(CONFIG_OPTS, name) },
#define DEF_OPT_AS_STRING(name, initval, desc) \
- { #name, desc, initval, STRING_TYPE, offsetof(CONFIG, name) },
+ { #name, desc, initval, STRING_TYPE, offsetof(CONFIG_OPTS, name) },
#define DEF_OPT_AS_UINT32(name, initval, desc) \
- { #name, desc, #initval, UINT32_TYPE, offsetof(CONFIG, name) },
+ { #name, desc, #initval, UINT32_TYPE, offsetof(CONFIG_OPTS, name) },
#endif
#ifdef OPT_DEFINE_DEFAULT
@@ -57,13 +57,13 @@
#ifdef OPT_DEFINE_DOXYGEN
#define DEF_OPT_AS_BOOL(name, initval, desc) \
- { #name, desc, #initval, BOOL_TYPE, 0 },
+ OPTION #name, desc, #initval, boolean
#define DEF_OPT_AS_CONFIG_STRING(name, initval, desc) \
- { #name, desc, initval, CONFIG_STRING_TYPE, 0 },
+ OPTION #name, desc, initval, string
#define DEF_OPT_AS_STRING(name, initval, desc) \
- { #name, desc, initval, STRING_TYPE, 0 },
+ OPTION #name, desc, initval, string
#define DEF_OPT_AS_UINT32(name, initval, desc) \
- { #name, desc, #initval, UINT32_TYPE, 0 },
+ OPTION #name, desc, #initval, unsigned int
#endif
/*
@@ -94,17 +94,20 @@ DEF_OPT_AS_UINT32(checkpoint_stress_rate, 0,
DEF_OPT_AS_UINT32(checkpoint_threads, 0, "number of checkpoint threads")
DEF_OPT_AS_CONFIG_STRING(conn_config, "create",
"connection configuration string")
+DEF_OPT_AS_BOOL(close_conn, 1, "properly close connection at end of test. "
+ "Setting to false does not sync data to disk and can result in lost "
+ "data after test exits.")
DEF_OPT_AS_BOOL(compact, 0, "post-populate compact for LSM merging activity")
DEF_OPT_AS_STRING(compression, "none",
"compression extension. Allowed configuration values are: "
- "'none', 'lz4', 'snappy', 'zlib'")
+ "'none', 'lz4', 'snappy', 'zlib', 'zstd'")
DEF_OPT_AS_BOOL(create, 1,
"do population phase; false to use existing database")
DEF_OPT_AS_UINT32(database_count, 1,
"number of WiredTiger databases to use. Each database will execute the"
" workload using a separate home directory and complete set of worker"
" threads")
-DEF_OPT_AS_UINT32(drop_tables, 0,
+DEF_OPT_AS_BOOL(drop_tables, 0,
"Whether to drop all tables at the end of the run, and report time taken"
" to do the drop.")
DEF_OPT_AS_UINT32(icount, 5000,
@@ -193,8 +196,8 @@ DEF_OPT_AS_STRING(threads, "", "workload configuration: each 'count' "
"'updates', 'truncate', 'truncate_pct' and 'truncate_count'. There are "
"also behavior modifiers, supported modifiers are 'ops_per_txn'")
DEF_OPT_AS_CONFIG_STRING(transaction_config, "",
- "transaction configuration string, relevant when populate_opts_per_txn "
- "is nonzero")
+ "WT_SESSION.begin_transaction configuration string, applied during the "
+ "populate phase when populate_ops_per_txn is nonzero")
DEF_OPT_AS_STRING(table_name, "test", "table name")
DEF_OPT_AS_BOOL(truncate_single_ops, 0,
"Implement truncate via cursor remove instead of session API")
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c b/src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c
index e49bca00d07..d104a68175d 100644
--- a/src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c
@@ -32,7 +32,7 @@
* Put the initial config together for running a throttled workload.
*/
void
-setup_throttle(CONFIG_THREAD *thread)
+setup_throttle(WTPERF_THREAD *thread)
{
THROTTLE_CONFIG *throttle_cfg;
@@ -70,7 +70,7 @@ setup_throttle(CONFIG_THREAD *thread)
throttle_cfg->ops_count = throttle_cfg->ops_per_increment;
/* Set the first timestamp of when we incremented */
- testutil_check(__wt_epoch(NULL, &throttle_cfg->last_increment));
+ __wt_epoch(NULL, &throttle_cfg->last_increment);
}
/*
@@ -78,7 +78,7 @@ setup_throttle(CONFIG_THREAD *thread)
* counter to perform more operations.
*/
void
-worker_throttle(CONFIG_THREAD *thread)
+worker_throttle(WTPERF_THREAD *thread)
{
THROTTLE_CONFIG *throttle_cfg;
struct timespec now;
@@ -86,7 +86,7 @@ worker_throttle(CONFIG_THREAD *thread)
throttle_cfg = &thread->throttle_cfg;
- testutil_check(__wt_epoch(NULL, &now));
+ __wt_epoch(NULL, &now);
/*
* If we did enough operations in the current interval, sleep for
@@ -101,7 +101,7 @@ worker_throttle(CONFIG_THREAD *thread)
/*
* After sleeping, set the interval to the current time.
*/
- testutil_check(__wt_epoch(NULL, &throttle_cfg->last_increment));
+ __wt_epoch(NULL, &throttle_cfg->last_increment);
} else {
throttle_cfg->ops_count = (usecs_delta *
throttle_cfg->ops_per_increment) /
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_truncate.c b/src/third_party/wiredtiger/bench/wtperf/wtperf_truncate.c
index e6ebc83c681..3fbb740d2c8 100644
--- a/src/third_party/wiredtiger/bench/wtperf/wtperf_truncate.c
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_truncate.c
@@ -35,8 +35,9 @@ decode_key(char *key_buf)
}
int
-setup_truncate(CONFIG *cfg, CONFIG_THREAD *thread, WT_SESSION *session) {
-
+setup_truncate(WTPERF *wtperf, WTPERF_THREAD *thread, WT_SESSION *session)
+{
+ CONFIG_OPTS *opts;
TRUNCATE_CONFIG *trunc_cfg;
TRUNCATE_QUEUE_ENTRY *truncate_item;
WORKLOAD *workload;
@@ -45,13 +46,14 @@ setup_truncate(CONFIG *cfg, CONFIG_THREAD *thread, WT_SESSION *session) {
int ret;
uint64_t end_point, final_stone_gap, i, start_point;
+ opts = wtperf->opts;
end_point = final_stone_gap = start_point = 0;
trunc_cfg = &thread->trunc_cfg;
workload = thread->workload;
/* We are limited to only one table when running truncate. */
if ((ret = session->open_cursor(
- session, cfg->uris[0], NULL, NULL, &cursor)) != 0)
+ session, wtperf->uris[0], NULL, NULL, &cursor)) != 0)
goto err;
/*
@@ -79,14 +81,14 @@ setup_truncate(CONFIG *cfg, CONFIG_THREAD *thread, WT_SESSION *session) {
*/
if ((ret = cursor->next(cursor)) != 0 ||
(ret = cursor->get_key(cursor, &key)) != 0) {
- lprintf(cfg, ret, 0, "truncate setup start: failed");
+ lprintf(wtperf, ret, 0, "truncate setup start: failed");
goto err;
}
start_point = decode_key(key);
if ((cursor->reset(cursor)) != 0 || (ret = cursor->prev(cursor)) != 0 ||
(ret = cursor->get_key(cursor, &key)) != 0) {
- lprintf(cfg, ret, 0, "truncate setup end: failed");
+ lprintf(wtperf, ret, 0, "truncate setup end: failed");
goto err;
}
end_point = decode_key(key);
@@ -104,12 +106,13 @@ setup_truncate(CONFIG *cfg, CONFIG_THREAD *thread, WT_SESSION *session) {
for (i = 1; i <= trunc_cfg->needed_stones; i++) {
truncate_item =
dcalloc(sizeof(TRUNCATE_QUEUE_ENTRY), 1);
- truncate_item->key = dcalloc(cfg->key_sz, 1);
+ truncate_item->key = dcalloc(opts->key_sz, 1);
generate_key(
- cfg, truncate_item->key, trunc_cfg->stone_gap * i);
+ opts, truncate_item->key, trunc_cfg->stone_gap * i);
truncate_item->diff =
(trunc_cfg->stone_gap * i) - trunc_cfg->last_key;
- TAILQ_INSERT_TAIL(&cfg->stone_head, truncate_item, q);
+ TAILQ_INSERT_TAIL(
+ &wtperf->stone_head, truncate_item, q);
trunc_cfg->last_key = trunc_cfg->stone_gap * i;
trunc_cfg->num_stones++;
}
@@ -117,27 +120,29 @@ setup_truncate(CONFIG *cfg, CONFIG_THREAD *thread, WT_SESSION *session) {
trunc_cfg->stone_gap = final_stone_gap;
err: if ((ret = cursor->close(cursor)) != 0) {
- lprintf(cfg, ret, 0, "truncate setup: cursor close failed");
+ lprintf(wtperf, ret, 0, "truncate setup: cursor close failed");
}
return (ret);
}
int
-run_truncate(CONFIG *cfg, CONFIG_THREAD *thread,
- WT_CURSOR *cursor, WT_SESSION *session, int *truncatedp) {
-
+run_truncate(WTPERF *wtperf, WTPERF_THREAD *thread,
+ WT_CURSOR *cursor, WT_SESSION *session, int *truncatedp)
+{
+ CONFIG_OPTS *opts;
TRUNCATE_CONFIG *trunc_cfg;
TRUNCATE_QUEUE_ENTRY *truncate_item;
char *next_key;
int ret, t_ret;
uint64_t used_stone_gap;
- ret = 0;
+ opts = wtperf->opts;
trunc_cfg = &thread->trunc_cfg;
+ ret = 0;
*truncatedp = 0;
/* Update the total inserts */
- trunc_cfg->total_inserts = sum_insert_ops(cfg);
+ trunc_cfg->total_inserts = sum_insert_ops(wtperf);
trunc_cfg->expected_total +=
(trunc_cfg->total_inserts - trunc_cfg->last_total_inserts);
trunc_cfg->last_total_inserts = trunc_cfg->total_inserts;
@@ -170,10 +175,10 @@ run_truncate(CONFIG *cfg, CONFIG_THREAD *thread,
while (trunc_cfg->num_stones < trunc_cfg->needed_stones) {
trunc_cfg->last_key += used_stone_gap;
truncate_item = dcalloc(sizeof(TRUNCATE_QUEUE_ENTRY), 1);
- truncate_item->key = dcalloc(cfg->key_sz, 1);
- generate_key(cfg, truncate_item->key, trunc_cfg->last_key);
+ truncate_item->key = dcalloc(opts->key_sz, 1);
+ generate_key(opts, truncate_item->key, trunc_cfg->last_key);
truncate_item->diff = used_stone_gap;
- TAILQ_INSERT_TAIL(&cfg->stone_head, truncate_item, q);
+ TAILQ_INSERT_TAIL(&wtperf->stone_head, truncate_item, q);
trunc_cfg->num_stones++;
}
@@ -182,34 +187,35 @@ run_truncate(CONFIG *cfg, CONFIG_THREAD *thread,
trunc_cfg->expected_total <= thread->workload->truncate_count)
return (0);
- truncate_item = TAILQ_FIRST(&cfg->stone_head);
+ truncate_item = TAILQ_FIRST(&wtperf->stone_head);
trunc_cfg->num_stones--;
- TAILQ_REMOVE(&cfg->stone_head, truncate_item, q);
+ TAILQ_REMOVE(&wtperf->stone_head, truncate_item, q);
/*
* Truncate the content via a single truncate call or a cursor walk
* depending on the configuration.
*/
- if (cfg->truncate_single_ops) {
+ if (opts->truncate_single_ops) {
while ((ret = cursor->next(cursor)) == 0) {
testutil_check(cursor->get_key(cursor, &next_key));
if (strcmp(next_key, truncate_item->key) == 0)
break;
if ((ret = cursor->remove(cursor)) != 0) {
- lprintf(cfg, ret, 0, "Truncate remove: failed");
+ lprintf(wtperf,
+ ret, 0, "Truncate remove: failed");
goto err;
}
}
} else {
cursor->set_key(cursor,truncate_item->key);
if ((ret = cursor->search(cursor)) != 0) {
- lprintf(cfg, ret, 0, "Truncate search: failed");
+ lprintf(wtperf, ret, 0, "Truncate search: failed");
goto err;
}
if ((ret = session->truncate(
session, NULL, NULL, cursor, NULL)) != 0) {
- lprintf(cfg, ret, 0, "Truncate: failed");
+ lprintf(wtperf, ret, 0, "Truncate: failed");
goto err;
}
}
@@ -221,19 +227,20 @@ err: free(truncate_item->key);
free(truncate_item);
t_ret = cursor->reset(cursor);
if (t_ret != 0)
- lprintf(cfg, t_ret, 0, "Cursor reset failed");
+ lprintf(wtperf, t_ret, 0, "Cursor reset failed");
if (ret == 0 && t_ret != 0)
ret = t_ret;
return (ret);
}
void
-cleanup_truncate_config(CONFIG *cfg) {
+cleanup_truncate_config(WTPERF *wtperf)
+{
TRUNCATE_QUEUE_ENTRY *truncate_item;
- while (!TAILQ_EMPTY(&cfg->stone_head)) {
- truncate_item = TAILQ_FIRST(&cfg->stone_head);
- TAILQ_REMOVE(&cfg->stone_head, truncate_item, q);
+ while (!TAILQ_EMPTY(&wtperf->stone_head)) {
+ truncate_item = TAILQ_FIRST(&wtperf->stone_head);
+ TAILQ_REMOVE(&wtperf->stone_head, truncate_item, q);
free(truncate_item->key);
free(truncate_item);
}
diff --git a/src/third_party/wiredtiger/build_posix/Make.base b/src/third_party/wiredtiger/build_posix/Make.base
index 4efbe3f76c3..5b945aca5e0 100644
--- a/src/third_party/wiredtiger/build_posix/Make.base
+++ b/src/third_party/wiredtiger/build_posix/Make.base
@@ -77,6 +77,9 @@ endif
if HAVE_BUILTIN_EXTENSION_ZLIB
libwiredtiger_la_LIBADD += ext/compressors/zlib/libwiredtiger_zlib.la
endif
+if HAVE_BUILTIN_EXTENSION_ZSTD
+libwiredtiger_la_LIBADD += ext/compressors/zstd/libwiredtiger_zstd.la
+endif
libwiredtiger_static_la_LIBADD=$(libwiredtiger_la_LIBADD)
libwiredtiger_static_la_SOURCES=$(libwiredtiger_la_SOURCES)
diff --git a/src/third_party/wiredtiger/build_posix/Make.subdirs b/src/third_party/wiredtiger/build_posix/Make.subdirs
index 0b5175e4196..55941837249 100644
--- a/src/third_party/wiredtiger/build_posix/Make.subdirs
+++ b/src/third_party/wiredtiger/build_posix/Make.subdirs
@@ -11,6 +11,7 @@ ext/compressors/lz4 LZ4
ext/compressors/nop
ext/compressors/snappy SNAPPY
ext/compressors/zlib ZLIB
+ext/compressors/zstd ZSTD
ext/datasources/helium HAVE_HELIUM
ext/encryptors/nop
ext/encryptors/rotn
diff --git a/src/third_party/wiredtiger/build_posix/aclocal/options.m4 b/src/third_party/wiredtiger/build_posix/aclocal/options.m4
index 1f6a1690279..7043430a6d6 100644
--- a/src/third_party/wiredtiger/build_posix/aclocal/options.m4
+++ b/src/third_party/wiredtiger/build_posix/aclocal/options.m4
@@ -19,10 +19,12 @@ AH_TEMPLATE(HAVE_BUILTIN_EXTENSION_SNAPPY,
[Snappy support automatically loaded.])
AH_TEMPLATE(HAVE_BUILTIN_EXTENSION_ZLIB,
[Zlib support automatically loaded.])
+AH_TEMPLATE(HAVE_BUILTIN_EXTENSION_ZSTD,
+ [ZSTD support automatically loaded.])
AC_MSG_CHECKING(if --with-builtins option specified)
AC_ARG_WITH(builtins,
[AS_HELP_STRING([--with-builtins],
- [builtin extension names (lz4, snappy, zlib).])],
+ [builtin extension names (lz4, snappy, zlib, zstd).])],
[with_builtins=$withval],
[with_builtins=])
@@ -36,6 +38,8 @@ for builtin_i in $builtin_list; do
wt_cv_with_builtin_extension_snappy=yes;;
zlib) AC_DEFINE(HAVE_BUILTIN_EXTENSION_ZLIB)
wt_cv_with_builtin_extension_zlib=yes;;
+ zstd) AC_DEFINE(HAVE_BUILTIN_EXTENSION_ZSTD)
+ wt_cv_with_builtin_extension_zstd=yes;;
*) AC_MSG_ERROR([Unknown builtin extension "$builtin_i"]);;
esac
done
@@ -45,6 +49,8 @@ AM_CONDITIONAL([HAVE_BUILTIN_EXTENSION_SNAPPY],
[test "$wt_cv_with_builtin_extension_snappy" = "yes"])
AM_CONDITIONAL([HAVE_BUILTIN_EXTENSION_ZLIB],
[test "$wt_cv_with_builtin_extension_zlib" = "yes"])
+AM_CONDITIONAL([HAVE_BUILTIN_EXTENSION_ZSTD],
+ [test "$wt_cv_with_builtin_extension_zstd" = "yes"])
AC_MSG_RESULT($with_builtins)
AH_TEMPLATE(
@@ -276,4 +282,30 @@ if test "$wt_cv_enable_zlib" = "yes"; then
fi
AM_CONDITIONAL([ZLIB], [test "$wt_cv_enable_zlib" = "yes"])
+AC_MSG_CHECKING(if --enable-zstd option specified)
+AC_ARG_ENABLE(zstd,
+ [AS_HELP_STRING([--enable-zstd],
+ [Build the zstd compressor extension.])], r=$enableval, r=no)
+case "$r" in
+no) if test "$wt_cv_with_builtin_extension_zstd" = "yes"; then
+ wt_cv_enable_zstd=yes
+ else
+ wt_cv_enable_zstd=no
+ fi
+ ;;
+*) if test "$wt_cv_with_builtin_extension_zstd" = "yes"; then
+ AC_MSG_ERROR(
+ [Only one of --enable-zstd --with-builtins=zstd allowed])
+ fi
+ wt_cv_enable_zstd=yes;;
+esac
+AC_MSG_RESULT($wt_cv_enable_zstd)
+if test "$wt_cv_enable_zstd" = "yes"; then
+ AC_CHECK_HEADER(zstd.h,,
+ [AC_MSG_ERROR([--enable-zstd requires zstd.h])])
+ AC_CHECK_LIB(zstd, ZSTD_compress,,
+ [AC_MSG_ERROR([--enable-zstd requires Zstd library])])
+fi
+AM_CONDITIONAL([ZSTD], [test "$wt_cv_enable_zstd" = "yes"])
+
])
diff --git a/src/third_party/wiredtiger/build_posix/configure.ac.in b/src/third_party/wiredtiger/build_posix/configure.ac.in
index 608d602937d..ad00b19a3bb 100644
--- a/src/third_party/wiredtiger/build_posix/configure.ac.in
+++ b/src/third_party/wiredtiger/build_posix/configure.ac.in
@@ -74,7 +74,7 @@ AM_CONDITIONAL([ARM64_HOST], [test "$wt_cv_arm64" = "yes"])
# support compiling the ASM code we have to perform the CRC checks on PowerPC.
# To compile with clang we need to override the ASM compiler with CCAS to use
# gcc. Unfortunately, doing the compilation in this manner means libtool can't
-# determine what tag to use for that one .S file. If we catch that we are using
+# determine what tag to use for that one .sx file. If we catch that we are using
# two different compilers for CC and CCAS and we are on a PowerPC system we
# overload the libtool flags to provide CC by default.
if test "$wt_cv_powerpc" = "yes" -a "$CC" != "$CCAS"; then
diff --git a/src/third_party/wiredtiger/build_win/wiredtiger_config.h b/src/third_party/wiredtiger/build_win/wiredtiger_config.h
index 83ddc6eb194..78d2784cb70 100644
--- a/src/third_party/wiredtiger/build_win/wiredtiger_config.h
+++ b/src/third_party/wiredtiger/build_win/wiredtiger_config.h
@@ -19,6 +19,9 @@
/* Zlib support automatically loaded. */
/* #undef HAVE_BUILTIN_EXTENSION_ZLIB */
+/* ZSTD support automatically loaded. */
+/* #undef HAVE_BUILTIN_EXTENSION_ZSTD */
+
/* Define to 1 if you have the `clock_gettime' function. */
/* #undef HAVE_CLOCK_GETTIME */
@@ -70,6 +73,9 @@
/* Define to 1 if you have the `z' library (-lz). */
/* #undef HAVE_LIBZ */
+/* Define to 1 if you have the `zstd' library (-lzstd). */
+/* #undef HAVE_LIBZSTD */
+
/* Define to 1 if you have the <memory.h> header file. */
/* #undef HAVE_MEMORY_H */
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index 9781e58a807..7affc58a217 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -136,8 +136,8 @@ file_config = format_meta + [
configure a compressor for file blocks. Permitted values are \c "none"
or custom compression engine name created with
WT_CONNECTION::add_compressor. If WiredTiger has builtin support for
- \c "snappy", \c "lz4" or \c "zlib" compression, these names are also
- available. See @ref compression for more information'''),
+ \c "lz4", \c "snappy", \c "zlib" or \c "zstd" compression, these names
+ are also available. See @ref compression for more information'''),
Config('cache_resident', 'false', r'''
do not ever evict the object's pages from cache. Not compatible with
LSM tables; see @ref tuning_cache_resident for more information''',
@@ -183,6 +183,12 @@ file_config = format_meta + [
configure Huffman encoding for values. Permitted values are
\c "none", \c "english", \c "utf8<file>" or \c "utf16<file>".
See @ref huffman for more information'''),
+ Config('ignore_in_memory_cache_size', 'false', r'''
+ allow update and insert operations to proceed even if the cache is
+ already at capacity. Only valid in conjunction with in-memory
+ databases. Should be used with caution - this configuration allows
+ WiredTiger to consume memory over the configured cache limit''',
+ type='boolean'),
Config('internal_key_truncate', 'true', r'''
configure internal key truncation, discarding unnecessary
trailing bytes on internal keys (ignored for custom
@@ -410,13 +416,13 @@ connection_runtime_config = [
Config('eviction_dirty_target', '5', r'''
perform eviction in worker threads when the cache contains at least
this much dirty content, expressed as a percentage of the total cache
- size. Ignored if \c in_memory is \c true''',
+ size.''',
min=1, max=99),
Config('eviction_dirty_trigger', '20', r'''
trigger application threads to perform eviction when the cache contains
at least this much dirty content, expressed as a percentage of the
total cache size. This setting only alters behavior if it is lower than
- eviction_trigger. Ignored if \c in_memory is \c true''',
+ eviction_trigger''',
min=1, max=99),
Config('eviction_target', '80', r'''
perform eviction in worker threads when the cache contains at least
@@ -496,7 +502,8 @@ connection_runtime_config = [
is used to gather statistics, as well as each time statistics
are logged using the \c statistics_log configuration. See
@ref statistics for more information''',
- type='list', choices=['all', 'fast', 'none', 'clear']),
+ type='list',
+ choices=['all', 'cache_walk', 'fast', 'none', 'clear', 'tree_walk']),
Config('verbose', '', r'''
enable messages for various events. Only available if WiredTiger
is configured with --enable-verbose. Options are given as a
@@ -563,8 +570,9 @@ wiredtiger_open_log_configuration = [
configure a compressor for log records. Permitted values are
\c "none" or custom compression engine name created with
WT_CONNECTION::add_compressor. If WiredTiger has builtin support
- for \c "snappy", \c "lz4" or \c "zlib" compression, these names
- are also available. See @ref compression for more information'''),
+ for \c "lz4", \c "snappy", \c "zlib" or \c "zstd" compression,
+ these names are also available. See @ref compression for more
+ information'''),
Config('file_max', '100MB', r'''
the maximum size of log files''',
min='100KB', max='2GB'),
@@ -970,7 +978,8 @@ methods = {
gathering them, where appropriate (for example, a cache size statistic
is not cleared, while the count of cursor insert operations will be
cleared). See @ref statistics for more information''',
- type='list', choices=['all', 'fast', 'clear', 'size']),
+ type='list',
+ choices=['all', 'cache_walk', 'fast', 'clear', 'size', 'tree_walk']),
Config('target', '', r'''
if non-empty, backup the list of objects; valid only for a
backup data source''',
diff --git a/src/third_party/wiredtiger/dist/filelist b/src/third_party/wiredtiger/dist/filelist
index 19fa1122a27..fe9a17b7799 100644
--- a/src/third_party/wiredtiger/dist/filelist
+++ b/src/third_party/wiredtiger/dist/filelist
@@ -48,12 +48,12 @@ src/btree/row_modify.c
src/btree/row_srch.c
src/cache/cache_las.c
src/checksum/arm64/crc32-arm64.c ARM64_HOST
-src/checksum/power8/crc32.S POWERPC_HOST
+src/checksum/power8/crc32.sx POWERPC_HOST
src/checksum/power8/crc32_wrapper.c POWERPC_HOST
src/checksum/software/checksum.c
src/checksum/x86/crc32-x86.c X86_HOST
src/checksum/zseries/crc32-s390x.c ZSERIES_HOST
-src/checksum/zseries/crc32le-vx.S ZSERIES_HOST
+src/checksum/zseries/crc32le-vx.sx ZSERIES_HOST
src/config/config.c
src/config/config_api.c
src/config/config_check.c
@@ -90,6 +90,7 @@ src/cursor/cur_table.c
src/evict/evict_file.c
src/evict/evict_lru.c
src/evict/evict_page.c
+src/evict/evict_stat.c
src/log/log.c
src/log/log_auto.c
src/log/log_slot.c
diff --git a/src/third_party/wiredtiger/dist/flags.py b/src/third_party/wiredtiger/dist/flags.py
index 93b6e0cbbf4..e200f95fba6 100644
--- a/src/third_party/wiredtiger/dist/flags.py
+++ b/src/third_party/wiredtiger/dist/flags.py
@@ -133,6 +133,16 @@ flags = {
'SESSION_QUIET_CORRUPT_FILE',
'SESSION_SERVER_ASYNC',
],
+ 'stat' : [
+ 'STAT_CLEAR',
+ 'STAT_JSON',
+ 'STAT_ON_CLOSE',
+ 'STAT_TYPE_ALL',
+ 'STAT_TYPE_CACHE_WALK',
+ 'STAT_TYPE_FAST',
+ 'STAT_TYPE_SIZE',
+ 'STAT_TYPE_TREE_WALK',
+ ],
}
flag_cnt = {} # Dictionary [flag] : [reference count]
diff --git a/src/third_party/wiredtiger/dist/s_define.list b/src/third_party/wiredtiger/dist/s_define.list
index 6a1a32004ea..7b11d665de5 100644
--- a/src/third_party/wiredtiger/dist/s_define.list
+++ b/src/third_party/wiredtiger/dist/s_define.list
@@ -39,14 +39,18 @@ WT_READ_BARRIER
WT_REF_SIZE
WT_SESSION_LOCKED_CHECKPOINT
WT_SESSION_LOCKED_TURTLE
-WT_STATS_FIELD_TO_SLOT
+WT_STATS_FIELD_TO_OFFSET
WT_STATS_SLOT_ID
WT_STAT_CONN_DECRV
WT_STAT_DATA_DECRV
WT_STAT_DECR
WT_STAT_DECRV
WT_STAT_DECRV_ATOMIC
+WT_STAT_DECRV_ATOMIC_BASE
+WT_STAT_DECRV_BASE
WT_STAT_INCRV_ATOMIC
+WT_STAT_INCRV_ATOMIC_BASE
+WT_STAT_INCRV_BASE
WT_STAT_WRITE
WT_TIMEDIFF_US
WT_TRET_ERROR_OK
diff --git a/src/third_party/wiredtiger/dist/s_docs b/src/third_party/wiredtiger/dist/s_docs
index e2b1d2aed11..b4f449fa093 100755
--- a/src/third_party/wiredtiger/dist/s_docs
+++ b/src/third_party/wiredtiger/dist/s_docs
@@ -1,7 +1,7 @@
#! /bin/sh
t=__wt.$$
-trap 'rm -f $t /tmp/__doxy' 0 1 2 3 13 15
+trap 'rm -f $t' 0 1 2 3 13 15
# Skip this when building release packages: docs are built separately
test -n "$WT_RELEASE_BUILD" && exit 0
@@ -30,18 +30,22 @@ wtperf_config()
{
# The Linux ed command writes line numbers to stderr, redirect both
# stdout and stderr to keep things quiet.
- cc -o /tmp/__doxy ../bench/wtperf/doxy.c &&
+ #
+ # The OS X cpp program injects line number output in the middle of lines
+ # and doesn't stringify #XXX entries; use the -E option to the compiler
+ # instead.
+ cat ../bench/wtperf/wtperf_opt.i |
+ ${CC:-cc} -E -DOPT_DEFINE_DOXYGEN - | python wtperf_config.py > $t
(echo '/START_AUTO_GENERATED_WTPERF_CONFIGURATION/+3,/STOP_AUTO_GENERATED_WTPERF_CONFIGURATION/-1d'
echo 'i'
echo ''
echo '.'
- echo ".r !/tmp/__doxy"
+ echo ".r $t"
echo 'a'
echo ''
echo '.'
echo 'w'
- echo 'q') | ed ../src/docs/wtperf.dox 1>/dev/null 2>/dev/null &&
- rm -f /tmp/__doxy
+ echo 'q') | ed ../src/docs/wtperf.dox 1>/dev/null 2>/dev/null
}
structurechk()
diff --git a/src/third_party/wiredtiger/dist/s_export b/src/third_party/wiredtiger/dist/s_export
index dc69238b270..b8e42c970f9 100755
--- a/src/third_party/wiredtiger/dist/s_export
+++ b/src/third_party/wiredtiger/dist/s_export
@@ -26,7 +26,7 @@ check()
sort |
uniq -u |
egrep -v \
- 'zlib_extension_init|lz4_extension_init|snappy_extension_init' > $t
+ 'lz4_extension_init|snappy_extension_init|zlib_extension_init|zstd_extension_init' > $t
test -s $t && {
echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
diff --git a/src/third_party/wiredtiger/dist/s_stat b/src/third_party/wiredtiger/dist/s_stat
index 935c7e1fb43..5d5937e1833 100755
--- a/src/third_party/wiredtiger/dist/s_stat
+++ b/src/third_party/wiredtiger/dist/s_stat
@@ -20,6 +20,25 @@ search=`sed \
-e d ../src/include/stat.h |
sort`
+# There are some fields that are used, but we can't detect it.
+cat << UNUSED_STAT_FIELDS
+lock_checkpoint_count
+lock_checkpoint_wait_application
+lock_checkpoint_wait_internal
+lock_handle_list_count
+lock_handle_list_wait_application
+lock_handle_list_wait_internal
+lock_metadata_count
+lock_metadata_wait_application
+lock_metadata_wait_internal
+lock_schema_count
+lock_schema_wait_application
+lock_schema_wait_internal
+lock_table_count
+lock_table_wait_application
+lock_table_wait_internal
+UNUSED_STAT_FIELDS
+
echo "$search"
fgrep -who "$search" $l) | sort | uniq -u > $t
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index 1887cbd936f..7cf96aec399 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -18,6 +18,7 @@ ASYNC
Addr
Ailamaki
Alakuijala
+Alexandrescu's
Alloc
Async
Athanassoulis
@@ -59,6 +60,7 @@ COVERITY
CPUs
CRC
CSV
+CStream
CURSORs
CURSTD
CallsCustDate
@@ -68,6 +70,7 @@ Checksum
Checksums
CityHash
CloseHandle
+Collet
Comparator
Config
Coverity
@@ -124,6 +127,7 @@ FORALL
FOREACH
FS
FULLFSYNC
+Facebook
FindClose
FindFirstFile
Fixup
@@ -165,6 +169,7 @@ INSN
INTL
ISA
ITEMs
+ITER
InitializeCriticalSectionAndSpinCount
Inline
Intra
@@ -372,6 +377,7 @@ WILLNEED
WIREDTIGER
WRLSN
WRNOLOCK
+WTPERF
WaitForSingleObject
WakeAllConditionVariable
Wconditional
@@ -395,6 +401,12 @@ WriteFile
Wuninitialized
Wunused
XP
+Yann
+ZSTD
+Zlib
+Zlib's
+Zstd
+Zstd's
abcdef
abcdefghijklmnopqrstuvwxyz
addl
@@ -416,6 +428,7 @@ argc
args
argv
asm
+assertfmt
async
asyncopp
asyncops
@@ -511,6 +524,7 @@ collatorp
comparator
comparep
compat
+compressStream
concat
cond
conf
@@ -530,6 +544,7 @@ cp
cpuid
crc
create's
+createCStream
crypto
cryptobad
csv
@@ -622,6 +637,7 @@ emp
encodings
encryptor
encryptors
+endStream
endian
english
enqueue
@@ -749,6 +765,7 @@ infeasible
inflateInit
infmt
init
+initCStream
initializers
initn
initsize
@@ -784,6 +801,7 @@ isupper
isxdigit
iter
iteratively
+iters
jnr
jrx
json
@@ -849,6 +867,7 @@ majorp
malloc
marshall
marshalled
+maxCLevel
maxcpu
maxdbs
mbll
@@ -989,12 +1008,14 @@ qdown
qrrSS
qsort
quartile
+queueable
qup
rN
rS
rb
rbrace
rbracket
+rcursor
rdonly
rduppo
readlock
@@ -1201,6 +1222,7 @@ waitpid
walk's
warmup
wb
+wcursor
wiredTiger
wiredtiger
workFactor
@@ -1222,6 +1244,10 @@ xxxx
xxxxx
xxxxxx
zalloc
+zf
zfree
zlib
+zlib's
+zstd
+zstd's
zu
diff --git a/src/third_party/wiredtiger/dist/s_style b/src/third_party/wiredtiger/dist/s_style
index 3860a23b991..8e755224ee2 100755
--- a/src/third_party/wiredtiger/dist/s_style
+++ b/src/third_party/wiredtiger/dist/s_style
@@ -108,7 +108,7 @@ else
! expr "$f" : 'test/.*' > /dev/null &&
! expr "$f" : '.*/utilities/.*' > /dev/null; then
if ! expr "$f" : '.*/os_alloc.c' > /dev/null &&
- egrep '[[:space:]]free[(]|[[:space:]]strdup[(]|[[:space:]]strndup[(]|[[:space:]]malloc[(]|[[:space:]]calloc[(]|[[:space:]]realloc[(]' $f > $t; then
+ egrep '[[:space:]]free[(]|[[:space:]]strdup[(]|[[:space:]]strndup[(]|[[:space:]]malloc[(]|[[:space:]]calloc[(]|[[:space:]]realloc[(]|[[:space:]]sprintf[(]' $f > $t; then
test -s $t && {
echo "$f: call to illegal function"
cat $t
diff --git a/src/third_party/wiredtiger/dist/s_void b/src/third_party/wiredtiger/dist/s_void
index f7bfbcc7e8e..e5e9f97c0b7 100644
--- a/src/third_party/wiredtiger/dist/s_void
+++ b/src/third_party/wiredtiger/dist/s_void
@@ -96,10 +96,13 @@ func_ok()
-e '/int wiredtiger_extension_init$/d' \
-e '/int wiredtiger_extension_terminate$/d' \
-e '/int wiredtiger_pack_close$/d' \
- -e '/int wt_snappy_pre_size$/d' \
- -e '/int wt_snappy_terminate$/d' \
+ -e '/int snappy_pre_size$/d' \
+ -e '/int snappy_terminate$/d' \
-e '/int zlib_error$/d' \
- -e '/int zlib_terminate$/d'
+ -e '/int zlib_terminate$/d' \
+ -e '/int zstd_error$/d' \
+ -e '/int zstd_pre_size$/d' \
+ -e '/int zstd_terminate$/d'
}
# Complain about functions which return an "int" but which don't return except
diff --git a/src/third_party/wiredtiger/dist/stat.py b/src/third_party/wiredtiger/dist/stat.py
index c3c85bbe9b4..e42585c1b8c 100644
--- a/src/third_party/wiredtiger/dist/stat.py
+++ b/src/third_party/wiredtiger/dist/stat.py
@@ -42,8 +42,11 @@ compare_srcfile(tmp_file, '../src/include/stat.h')
def print_defines_one(capname, base, stats):
for v, l in enumerate(stats, base):
desc = l.desc
- if 'all_only' in l.flags:
- desc += ', only reported if statistics=all is set'
+ if 'cache_walk' in l.flags:
+ desc += \
+ ', only reported if cache_walk or all statistics are enabled'
+ if 'tree_walk' in l.flags:
+ desc += ', only reported if tree_walk or all statistics are enabled'
if len(textwrap.wrap(desc, 70)) > 1:
f.write('/*!\n')
f.write(' * %s\n' % '\n * '.join(textwrap.wrap(desc, 70)))
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index 5087afa44dc..bcf5201bd90 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -9,7 +9,8 @@
#
# Data-source statistics are normally aggregated across the set of underlying
# objects. Additional optional configuration flags are available:
-# all_only Only gets reported when statistics=all set
+# cache_walk Only reported when statistics=cache_walk is set
+# tree_walk Only reported when statistics=tree_walk is set
# max_aggregate Take the maximum value when aggregating statistics
# no_clear Value not cleared when statistics cleared
# no_scale Don't scale value per second in the logging tool script
@@ -46,6 +47,11 @@ class CacheStat(Stat):
prefix = 'cache'
def __init__(self, name, desc, flags=''):
Stat.__init__(self, name, CacheStat.prefix, desc, flags)
+class CacheWalkStat(Stat):
+ prefix = 'cache_walk'
+ def __init__(self, name, desc, flags=''):
+ flags += ',cache_walk'
+ Stat.__init__(self, name, CacheWalkStat.prefix, desc, flags)
class CompressStat(Stat):
prefix = 'compression'
def __init__(self, name, desc, flags=''):
@@ -66,6 +72,10 @@ class JoinStat(Stat):
prefix = '' # prefix is inserted dynamically
def __init__(self, name, desc, flags=''):
Stat.__init__(self, name, JoinStat.prefix, desc, flags)
+class LockStat(Stat):
+ prefix = 'lock'
+ def __init__(self, name, desc, flags=''):
+ Stat.__init__(self, name, LockStat.prefix, desc, flags)
class LogStat(Stat):
prefix = 'log'
def __init__(self, name, desc, flags=''):
@@ -105,11 +115,16 @@ groups['cursor'] = [CursorStat.prefix, SessionStat.prefix]
groups['evict'] = [
BlockStat.prefix,
CacheStat.prefix,
+ CacheWalkStat.prefix,
ConnStat.prefix,
ThreadStat.prefix
]
groups['lsm'] = [LSMStat.prefix, TxnStat.prefix]
-groups['memory'] = [CacheStat.prefix, ConnStat.prefix, RecStat.prefix]
+groups['memory'] = [
+ CacheStat.prefix,
+ CacheWalkStat.prefix,
+ ConnStat.prefix,
+ RecStat.prefix]
groups['system'] = [
ConnStat.prefix,
DhandleStat.prefix,
@@ -226,13 +241,32 @@ connection_stats = [
CacheStat('cache_pages_inuse', 'pages currently held in the cache', 'no_clear,no_scale'),
CacheStat('cache_pages_requested', 'pages requested from the cache'),
CacheStat('cache_read', 'pages read into cache'),
+ CacheStat('cache_read_app_count', 'application threads page read from disk to cache count'),
+ CacheStat('cache_read_app_time', 'application threads page read from disk to cache time (usecs)'),
CacheStat('cache_read_lookaside', 'pages read into cache requiring lookaside entries'),
CacheStat('cache_read_overflow', 'overflow pages read into cache'),
CacheStat('cache_write', 'pages written from cache'),
+ CacheStat('cache_write_app_count', 'application threads page write from cache to disk count'),
+ CacheStat('cache_write_app_time', 'application threads page write from cache to disk time (usecs)'),
CacheStat('cache_write_lookaside', 'page written requiring lookaside records'),
CacheStat('cache_write_restore', 'pages written requiring in-memory restoration'),
##########################################
+ # Cursor operations
+ ##########################################
+ CursorStat('cursor_create', 'cursor create calls'),
+ CursorStat('cursor_insert', 'cursor insert calls'),
+ CursorStat('cursor_next', 'cursor next calls'),
+ CursorStat('cursor_prev', 'cursor prev calls'),
+ CursorStat('cursor_remove', 'cursor remove calls'),
+ CursorStat('cursor_reset', 'cursor reset calls'),
+ CursorStat('cursor_restart', 'cursor restarted searches'),
+ CursorStat('cursor_search', 'cursor search calls'),
+ CursorStat('cursor_search_near', 'cursor search near calls'),
+ CursorStat('cursor_truncate', 'truncate calls'),
+ CursorStat('cursor_update', 'cursor update calls'),
+
+ ##########################################
# Dhandle statistics
##########################################
DhandleStat('dh_conn_handle_count', 'connection data handles currently active', 'no_clear,no_scale'),
@@ -245,6 +279,25 @@ connection_stats = [
DhandleStat('dh_sweeps', 'connection sweeps'),
##########################################
+ # Locking statistics
+ ##########################################
+ LockStat('lock_checkpoint_count', 'checkpoint lock acquisitions'),
+ LockStat('lock_checkpoint_wait_application', 'checkpoint lock application thread wait time (usecs)'),
+ LockStat('lock_checkpoint_wait_internal', 'checkpoint lock internal thread wait time (usecs)'),
+ LockStat('lock_handle_list_count', 'handle-list lock acquisitions'),
+ LockStat('lock_handle_list_wait_application', 'handle-list lock application thread wait time (usecs)'),
+ LockStat('lock_handle_list_wait_internal', 'handle-list lock internal thread wait time (usecs)'),
+ LockStat('lock_metadata_count', 'metadata lock acquisitions'),
+ LockStat('lock_metadata_wait_application', 'metadata lock application thread wait time (usecs)'),
+ LockStat('lock_metadata_wait_internal', 'metadata lock internal thread wait time (usecs)'),
+ LockStat('lock_schema_count', 'schema lock acquisitions'),
+ LockStat('lock_schema_wait_application', 'schema lock application thread wait time (usecs)'),
+ LockStat('lock_schema_wait_internal', 'schema lock internal thread wait time (usecs)'),
+ LockStat('lock_table_count', 'table lock acquisitions'),
+ LockStat('lock_table_wait_application', 'table lock application thread time waiting for the table lock (usecs)'),
+ LockStat('lock_table_wait_internal', 'table lock internal thread time waiting for the table lock (usecs)'),
+
+ ##########################################
# Logging statistics
##########################################
LogStat('log_buffer_size', 'total log buffer size', 'no_clear,no_scale,size'),
@@ -286,41 +339,6 @@ connection_stats = [
LogStat('log_zero_fills', 'log files manually zero-filled'),
##########################################
- # Reconciliation statistics
- ##########################################
- RecStat('rec_page_delete', 'pages deleted'),
- RecStat('rec_page_delete_fast', 'fast-path pages deleted'),
- RecStat('rec_pages', 'page reconciliation calls'),
- RecStat('rec_pages_eviction', 'page reconciliation calls for eviction'),
- RecStat('rec_split_stashed_bytes', 'split bytes currently awaiting free', 'no_clear,no_scale,size'),
- RecStat('rec_split_stashed_objects', 'split objects currently awaiting free', 'no_clear,no_scale'),
-
- ##########################################
- # Transaction statistics
- ##########################################
- TxnStat('txn_begin', 'transaction begins'),
- TxnStat('txn_checkpoint', 'transaction checkpoints'),
- TxnStat('txn_checkpoint_fsync_post', 'transaction fsync calls for checkpoint after allocating the transaction ID'),
- TxnStat('txn_checkpoint_fsync_post_duration', 'transaction fsync duration for checkpoint after allocating the transaction ID (usecs)', 'no_clear,no_scale'),
- TxnStat('txn_checkpoint_generation', 'transaction checkpoint generation', 'no_clear,no_scale'),
- TxnStat('txn_checkpoint_running', 'transaction checkpoint currently running', 'no_clear,no_scale'),
- TxnStat('txn_checkpoint_scrub_target', 'transaction checkpoint scrub dirty target', 'no_clear,no_scale'),
- TxnStat('txn_checkpoint_scrub_time', 'transaction checkpoint scrub time (msecs)', 'no_clear,no_scale'),
- TxnStat('txn_checkpoint_time_max', 'transaction checkpoint max time (msecs)', 'no_clear,no_scale'),
- TxnStat('txn_checkpoint_time_min', 'transaction checkpoint min time (msecs)', 'no_clear,no_scale'),
- TxnStat('txn_checkpoint_time_recent', 'transaction checkpoint most recent time (msecs)', 'no_clear,no_scale'),
- TxnStat('txn_checkpoint_time_total', 'transaction checkpoint total time (msecs)', 'no_clear,no_scale'),
- TxnStat('txn_commit', 'transactions committed'),
- TxnStat('txn_fail_cache', 'transaction failures due to cache overflow'),
- TxnStat('txn_pinned_checkpoint_range', 'transaction range of IDs currently pinned by a checkpoint', 'no_clear,no_scale'),
- TxnStat('txn_pinned_range', 'transaction range of IDs currently pinned', 'no_clear,no_scale'),
- TxnStat('txn_pinned_snapshot_range', 'transaction range of IDs currently pinned by named snapshots', 'no_clear,no_scale'),
- TxnStat('txn_rollback', 'transactions rolled back'),
- TxnStat('txn_snapshots_created', 'number of named snapshots created'),
- TxnStat('txn_snapshots_dropped', 'number of named snapshots dropped'),
- TxnStat('txn_sync', 'transaction sync calls'),
-
- ##########################################
# LSM statistics
##########################################
LSMStat('lsm_checkpoint_throttle', 'sleep for LSM checkpoint throttle'),
@@ -335,6 +353,16 @@ connection_stats = [
LSMStat('lsm_work_units_done', 'tree maintenance operations executed'),
##########################################
+ # Reconciliation statistics
+ ##########################################
+ RecStat('rec_page_delete', 'pages deleted'),
+ RecStat('rec_page_delete_fast', 'fast-path pages deleted'),
+ RecStat('rec_pages', 'page reconciliation calls'),
+ RecStat('rec_pages_eviction', 'page reconciliation calls for eviction'),
+ RecStat('rec_split_stashed_bytes', 'split bytes currently awaiting free', 'no_clear,no_scale,size'),
+ RecStat('rec_split_stashed_objects', 'split objects currently awaiting free', 'no_clear,no_scale'),
+
+ ##########################################
# Session operations
##########################################
SessionStat('session_cursor_open', 'open cursor count', 'no_clear,no_scale'),
@@ -357,21 +385,6 @@ connection_stats = [
SessionStat('session_table_verify_success', 'table verify successful calls', 'no_clear,no_scale'),
##########################################
- # Total cursor operations
- ##########################################
- CursorStat('cursor_create', 'cursor create calls'),
- CursorStat('cursor_insert', 'cursor insert calls'),
- CursorStat('cursor_next', 'cursor next calls'),
- CursorStat('cursor_prev', 'cursor prev calls'),
- CursorStat('cursor_remove', 'cursor remove calls'),
- CursorStat('cursor_reset', 'cursor reset calls'),
- CursorStat('cursor_restart', 'cursor restarted searches'),
- CursorStat('cursor_search', 'cursor search calls'),
- CursorStat('cursor_search_near', 'cursor search near calls'),
- CursorStat('cursor_truncate', 'truncate calls'),
- CursorStat('cursor_update', 'cursor update calls'),
-
- ##########################################
# Thread Count statistics
##########################################
ThreadStat('thread_fsync_active', 'active filesystem fsync calls','no_clear,no_scale'),
@@ -379,8 +392,36 @@ connection_stats = [
ThreadStat('thread_write_active', 'active filesystem write calls','no_clear,no_scale'),
##########################################
+ # Transaction statistics
+ ##########################################
+ TxnStat('txn_begin', 'transaction begins'),
+ TxnStat('txn_checkpoint', 'transaction checkpoints'),
+ TxnStat('txn_checkpoint_fsync_post', 'transaction fsync calls for checkpoint after allocating the transaction ID'),
+ TxnStat('txn_checkpoint_fsync_post_duration', 'transaction fsync duration for checkpoint after allocating the transaction ID (usecs)', 'no_clear,no_scale'),
+ TxnStat('txn_checkpoint_generation', 'transaction checkpoint generation', 'no_clear,no_scale'),
+ TxnStat('txn_checkpoint_running', 'transaction checkpoint currently running', 'no_clear,no_scale'),
+ TxnStat('txn_checkpoint_scrub_target', 'transaction checkpoint scrub dirty target', 'no_clear,no_scale'),
+ TxnStat('txn_checkpoint_scrub_time', 'transaction checkpoint scrub time (msecs)', 'no_clear,no_scale'),
+ TxnStat('txn_checkpoint_skipped', 'transaction checkpoints skipped because database was clean'),
+ TxnStat('txn_checkpoint_time_max', 'transaction checkpoint max time (msecs)', 'no_clear,no_scale'),
+ TxnStat('txn_checkpoint_time_min', 'transaction checkpoint min time (msecs)', 'no_clear,no_scale'),
+ TxnStat('txn_checkpoint_time_recent', 'transaction checkpoint most recent time (msecs)', 'no_clear,no_scale'),
+ TxnStat('txn_checkpoint_time_total', 'transaction checkpoint total time (msecs)', 'no_clear,no_scale'),
+ TxnStat('txn_commit', 'transactions committed'),
+ TxnStat('txn_fail_cache', 'transaction failures due to cache overflow'),
+ TxnStat('txn_pinned_checkpoint_range', 'transaction range of IDs currently pinned by a checkpoint', 'no_clear,no_scale'),
+ TxnStat('txn_pinned_range', 'transaction range of IDs currently pinned', 'no_clear,no_scale'),
+ TxnStat('txn_pinned_snapshot_range', 'transaction range of IDs currently pinned by named snapshots', 'no_clear,no_scale'),
+ TxnStat('txn_rollback', 'transactions rolled back'),
+ TxnStat('txn_snapshots_created', 'number of named snapshots created'),
+ TxnStat('txn_snapshots_dropped', 'number of named snapshots dropped'),
+ TxnStat('txn_sync', 'transaction sync calls'),
+
+ ##########################################
# Yield statistics
##########################################
+ YieldStat('application_cache_time', 'application thread time waiting for cache (usecs)'),
+ YieldStat('application_evict_time', 'application thread time evicting (usecs)'),
YieldStat('page_busy_blocked', 'page acquire busy blocked'),
YieldStat('page_forcible_evict_blocked', 'page acquire eviction blocked'),
YieldStat('page_locked_blocked', 'page acquire locked blocked'),
@@ -395,41 +436,30 @@ connection_stats = sorted(connection_stats, key=attrgetter('desc'))
##########################################
dsrc_stats = [
##########################################
- # Session operations
- ##########################################
- SessionStat('session_compact', 'object compaction'),
- SessionStat('session_cursor_open', 'open cursor count', 'no_clear,no_scale'),
-
- ##########################################
- # Cursor operations
+ # Block manager statistics
##########################################
- CursorStat('cursor_create', 'create calls'),
- CursorStat('cursor_insert', 'insert calls'),
- CursorStat('cursor_insert_bulk', 'bulk-loaded cursor-insert calls'),
- CursorStat('cursor_insert_bytes', 'cursor-insert key and value bytes inserted', 'size'),
- CursorStat('cursor_next', 'next calls'),
- CursorStat('cursor_prev', 'prev calls'),
- CursorStat('cursor_remove', 'remove calls'),
- CursorStat('cursor_remove_bytes', 'cursor-remove key bytes removed', 'size'),
- CursorStat('cursor_reset', 'reset calls'),
- CursorStat('cursor_restart', 'restarted searches'),
- CursorStat('cursor_search', 'search calls'),
- CursorStat('cursor_search_near', 'search near calls'),
- CursorStat('cursor_truncate', 'truncate calls'),
- CursorStat('cursor_update', 'update calls'),
- CursorStat('cursor_update_bytes', 'cursor-update value bytes updated', 'size'),
+ BlockStat('allocation_size', 'file allocation unit size', 'max_aggregate,no_scale,size'),
+ BlockStat('block_alloc', 'blocks allocated'),
+ BlockStat('block_checkpoint_size', 'checkpoint size', 'no_scale,size'),
+ BlockStat('block_extension', 'allocations requiring file extension'),
+ BlockStat('block_free', 'blocks freed'),
+ BlockStat('block_magic', 'file magic number', 'max_aggregate,no_scale'),
+ BlockStat('block_major', 'file major version number', 'max_aggregate,no_scale'),
+ BlockStat('block_minor', 'minor version number', 'max_aggregate,no_scale'),
+ BlockStat('block_reuse_bytes', 'file bytes available for reuse', 'no_scale,size'),
+ BlockStat('block_size', 'file size in bytes', 'no_scale,size'),
##########################################
# Btree statistics
##########################################
BtreeStat('btree_checkpoint_generation', 'btree checkpoint generation', 'no_clear,no_scale'),
- BtreeStat('btree_column_deleted', 'column-store variable-size deleted values', 'no_scale,all_only'),
- BtreeStat('btree_column_fix', 'column-store fixed-size leaf pages', 'no_scale,all_only'),
- BtreeStat('btree_column_internal', 'column-store internal pages', 'no_scale,all_only'),
- BtreeStat('btree_column_rle', 'column-store variable-size RLE encoded values', 'no_scale,all_only'),
- BtreeStat('btree_column_variable', 'column-store variable-size leaf pages', 'no_scale,all_only'),
+ BtreeStat('btree_column_deleted', 'column-store variable-size deleted values', 'no_scale,tree_walk'),
+ BtreeStat('btree_column_fix', 'column-store fixed-size leaf pages', 'no_scale,tree_walk'),
+ BtreeStat('btree_column_internal', 'column-store internal pages', 'no_scale,tree_walk'),
+ BtreeStat('btree_column_rle', 'column-store variable-size RLE encoded values', 'no_scale,tree_walk'),
+ BtreeStat('btree_column_variable', 'column-store variable-size leaf pages', 'no_scale,tree_walk'),
BtreeStat('btree_compact_rewrite', 'pages rewritten by compaction'),
- BtreeStat('btree_entries', 'number of key/value pairs', 'no_scale,all_only'),
+ BtreeStat('btree_entries', 'number of key/value pairs', 'no_scale,tree_walk'),
BtreeStat('btree_fixed_len', 'fixed-record size', 'max_aggregate,no_scale,size'),
BtreeStat('btree_maximum_depth', 'maximum tree depth', 'max_aggregate,no_scale'),
BtreeStat('btree_maxintlkey', 'maximum internal page key size', 'max_aggregate,no_scale,size'),
@@ -437,39 +467,9 @@ dsrc_stats = [
BtreeStat('btree_maxleafkey', 'maximum leaf page key size', 'max_aggregate,no_scale,size'),
BtreeStat('btree_maxleafpage', 'maximum leaf page size', 'max_aggregate,no_scale,size'),
BtreeStat('btree_maxleafvalue', 'maximum leaf page value size', 'max_aggregate,no_scale,size'),
- BtreeStat('btree_overflow', 'overflow pages', 'no_scale,all_only'),
- BtreeStat('btree_row_internal', 'row-store internal pages', 'no_scale,all_only'),
- BtreeStat('btree_row_leaf', 'row-store leaf pages', 'no_scale,all_only'),
-
- ##########################################
- # LSM statistics
- ##########################################
- LSMStat('bloom_count', 'bloom filters in the LSM tree', 'no_scale'),
- LSMStat('bloom_false_positive', 'bloom filter false positives'),
- LSMStat('bloom_hit', 'bloom filter hits'),
- LSMStat('bloom_miss', 'bloom filter misses'),
- LSMStat('bloom_page_evict', 'bloom filter pages evicted from cache'),
- LSMStat('bloom_page_read', 'bloom filter pages read into cache'),
- LSMStat('bloom_size', 'total size of bloom filters', 'no_scale,size'),
- LSMStat('lsm_checkpoint_throttle', 'sleep for LSM checkpoint throttle'),
- LSMStat('lsm_chunk_count', 'chunks in the LSM tree', 'no_scale'),
- LSMStat('lsm_generation_max', 'highest merge generation in the LSM tree', 'max_aggregate,no_scale'),
- LSMStat('lsm_lookup_no_bloom', 'queries that could have benefited from a Bloom filter that did not exist'),
- LSMStat('lsm_merge_throttle', 'sleep for LSM merge throttle'),
-
- ##########################################
- # Block manager statistics
- ##########################################
- BlockStat('allocation_size', 'file allocation unit size', 'max_aggregate,no_scale,size'),
- BlockStat('block_alloc', 'blocks allocated'),
- BlockStat('block_checkpoint_size', 'checkpoint size', 'no_scale,size'),
- BlockStat('block_extension', 'allocations requiring file extension'),
- BlockStat('block_free', 'blocks freed'),
- BlockStat('block_magic', 'file magic number', 'max_aggregate,no_scale'),
- BlockStat('block_major', 'file major version number', 'max_aggregate,no_scale'),
- BlockStat('block_minor', 'minor version number', 'max_aggregate,no_scale'),
- BlockStat('block_reuse_bytes', 'file bytes available for reuse', 'no_scale,size'),
- BlockStat('block_size', 'file size in bytes', 'no_scale,size'),
+ BtreeStat('btree_overflow', 'overflow pages', 'no_scale,tree_walk'),
+ BtreeStat('btree_row_internal', 'row-store internal pages', 'no_scale,tree_walk'),
+ BtreeStat('btree_row_leaf', 'row-store leaf pages', 'no_scale,tree_walk'),
##########################################
# Cache and eviction statistics
@@ -498,6 +498,28 @@ dsrc_stats = [
CacheStat('cache_write_restore', 'pages written requiring in-memory restoration'),
##########################################
+ # Cache content statistics
+ ##########################################
+ CacheWalkStat('cache_state_avg_written_size', 'Average on-disk page image size seen', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_gen_avg_gap', 'Average difference between current eviction generation when the page was last considered', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_gen_current', 'Current eviction generation', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_gen_max_gap', 'Maximum difference between current eviction generation when the page was last considered', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_max_pagesize', 'Maximum page size seen', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_memory', 'Pages created in memory and never written', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_min_written_size', 'Minimum on-disk page image size seen', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_not_queueable', 'Pages that could not be queued for eviction', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_pages', 'Total number of pages currently in cache', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_pages_clean', 'Clean pages currently in cache', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_pages_dirty', 'Dirty pages currently in cache', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_pages_internal', 'Internal pages currently in cache', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_pages_leaf', 'Leaf pages currently in cache', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_queued', 'Pages currently queued for eviction', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_refs_skipped', 'Refs skipped during cache traversal', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_root_entries', 'Entries in the root page', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_root_size', 'Size of the root page', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_smaller_alloc_size', 'On-disk page image sizes smaller than a single allocation unit', 'no_clear,no_scale'),
+
+ ##########################################
# Compression statistics
##########################################
CompressStat('compress_raw_fail', 'raw compression call failed, no additional data available'),
@@ -509,6 +531,41 @@ dsrc_stats = [
CompressStat('compress_write_too_small', 'page written was too small to compress'),
##########################################
+ # Cursor operations
+ ##########################################
+ CursorStat('cursor_create', 'create calls'),
+ CursorStat('cursor_insert', 'insert calls'),
+ CursorStat('cursor_insert_bulk', 'bulk-loaded cursor-insert calls'),
+ CursorStat('cursor_insert_bytes', 'cursor-insert key and value bytes inserted', 'size'),
+ CursorStat('cursor_next', 'next calls'),
+ CursorStat('cursor_prev', 'prev calls'),
+ CursorStat('cursor_remove', 'remove calls'),
+ CursorStat('cursor_remove_bytes', 'cursor-remove key bytes removed', 'size'),
+ CursorStat('cursor_reset', 'reset calls'),
+ CursorStat('cursor_restart', 'restarted searches'),
+ CursorStat('cursor_search', 'search calls'),
+ CursorStat('cursor_search_near', 'search near calls'),
+ CursorStat('cursor_truncate', 'truncate calls'),
+ CursorStat('cursor_update', 'update calls'),
+ CursorStat('cursor_update_bytes', 'cursor-update value bytes updated', 'size'),
+
+ ##########################################
+ # LSM statistics
+ ##########################################
+ LSMStat('bloom_count', 'bloom filters in the LSM tree', 'no_scale'),
+ LSMStat('bloom_false_positive', 'bloom filter false positives'),
+ LSMStat('bloom_hit', 'bloom filter hits'),
+ LSMStat('bloom_miss', 'bloom filter misses'),
+ LSMStat('bloom_page_evict', 'bloom filter pages evicted from cache'),
+ LSMStat('bloom_page_read', 'bloom filter pages read into cache'),
+ LSMStat('bloom_size', 'total size of bloom filters', 'no_scale,size'),
+ LSMStat('lsm_checkpoint_throttle', 'sleep for LSM checkpoint throttle'),
+ LSMStat('lsm_chunk_count', 'chunks in the LSM tree', 'no_scale'),
+ LSMStat('lsm_generation_max', 'highest merge generation in the LSM tree', 'max_aggregate,no_scale'),
+ LSMStat('lsm_lookup_no_bloom', 'queries that could have benefited from a Bloom filter that did not exist'),
+ LSMStat('lsm_merge_throttle', 'sleep for LSM merge throttle'),
+
+ ##########################################
# Reconciliation statistics
##########################################
RecStat('rec_dictionary', 'dictionary matches'),
@@ -527,6 +584,12 @@ dsrc_stats = [
RecStat('rec_suffix_compression', 'internal page key bytes discarded using suffix compression', 'size'),
##########################################
+ # Session operations
+ ##########################################
+ SessionStat('session_compact', 'object compaction'),
+ SessionStat('session_cursor_open', 'open cursor count', 'no_clear,no_scale'),
+
+ ##########################################
# Transaction statistics
##########################################
TxnStat('txn_update_conflict', 'update conflicts'),
diff --git a/src/third_party/wiredtiger/dist/wtperf_config.py b/src/third_party/wiredtiger/dist/wtperf_config.py
new file mode 100644
index 00000000000..72256ed5527
--- /dev/null
+++ b/src/third_party/wiredtiger/dist/wtperf_config.py
@@ -0,0 +1,25 @@
+# Output a doxgen version of the wtperf configuration options.
+import string, sys
+
+for line in sys.stdin:
+ if not line.startswith('OPTION '):
+ continue
+
+ line = line.replace('OPTION ', '')
+ v = line.split('",')
+ v[0] = v[0].replace('"', '').strip()
+ v[1] = v[1].replace('"', '').strip()
+ v[2] = v[2].replace('"', '').strip()
+ v[3] = v[3].replace('"', '').strip()
+
+ if v[3] == 'boolean':
+ if v[2] == '0':
+ d = 'false'
+ else:
+ d = 'true'
+ elif v[3] == 'string':
+ d = '"' + v[2] + '"'
+ else:
+ d = v[2]
+ print '@par ' + v[0] + ' (' + v[3] + ', default=' + d + ')'
+ print v[1]
diff --git a/src/third_party/wiredtiger/examples/c/ex_all.c b/src/third_party/wiredtiger/examples/c/ex_all.c
index a2042c22bbb..ea646604a76 100644
--- a/src/third_party/wiredtiger/examples/c/ex_all.c
+++ b/src/third_party/wiredtiger/examples/c/ex_all.c
@@ -611,6 +611,13 @@ session_ops(WT_SESSION *session)
"block_compressor=zlib,key_format=S,value_format=S");
/*! [Create a zlib compressed table] */
ret = session->drop(session, "table:mytable", NULL);
+
+ /*! [Create a zstd compressed table] */
+ ret = session->create(session,
+ "table:mytable",
+ "block_compressor=zstd,key_format=S,value_format=S");
+ /*! [Create a zstd compressed table] */
+ ret = session->drop(session, "table:mytable", NULL);
#endif
/*! [Configure checksums to uncompressed] */
@@ -1108,6 +1115,32 @@ main(void)
if (ret == 0)
(void)conn->close(conn, NULL);
+ /*! [Configure zlib extension with compression level] */
+ ret = wiredtiger_open(home, NULL,
+ "create,"
+ "extensions=[/usr/local/lib/"
+ "libwiredtiger_zlib.so=[config=[compression_level=3]]]", &conn);
+ /*! [Configure zlib extension with compression level] */
+ if (ret == 0)
+ (void)conn->close(conn, NULL);
+
+ /*! [Configure zstd extension] */
+ ret = wiredtiger_open(home, NULL,
+ "create,"
+ "extensions=[/usr/local/lib/libwiredtiger_zstd.so]", &conn);
+ /*! [Configure zstd extension] */
+ if (ret == 0)
+ (void)conn->close(conn, NULL);
+
+ /*! [Configure zstd extension with compression level] */
+ ret = wiredtiger_open(home, NULL,
+ "create,"
+ "extensions=[/usr/local/lib/"
+ "libwiredtiger_zstd.so=[config=[compression_level=9]]]", &conn);
+ /*! [Configure zstd extension with compression level] */
+ if (ret == 0)
+ (void)conn->close(conn, NULL);
+
/*
* This example code gets run, and direct I/O might not be available,
* causing the open to fail. The documentation requires code snippets,
diff --git a/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_all.java b/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_all.java
index 83a37e9a6a5..cf8491aa4f8 100644
--- a/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_all.java
+++ b/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_all.java
@@ -549,6 +549,12 @@ session_ops(Session session)
"block_compressor=zlib,key_format=S,value_format=S");
/*! [Create a zlib compressed table] */
ret = session.drop("table:mytable", null);
+
+ /*! [Create a zstd compressed table] */
+ ret = session.create("table:mytable",
+ "block_compressor=zstd,key_format=S,value_format=S");
+ /*! [Create a zstd compressed table] */
+ ret = session.drop("table:mytable", null);
} // if (false)
/*! [Configure checksums to uncompressed] */
@@ -942,6 +948,29 @@ allExample()
/*! [Configure zlib extension] */
conn.close(null);
+ /*! [Configure zlib extension with compression level] */
+ conn = wiredtiger.open(home,
+ "create," +
+ "extensions=[/usr/local/lib/" +
+ "libwiredtiger_zlib.so=[config=[compression_level=3]]]");
+ /*! [Configure zlib extension with compression level] */
+ conn.close(null);
+
+ /*! [Configure zstd extension] */
+ conn = wiredtiger.open(home,
+ "create," +
+ "extensions=[/usr/local/lib/libwiredtiger_zstd.so]");
+ /*! [Configure zstd extension] */
+ conn.close(null);
+
+ /*! [Configure zstd extension with compression level] */
+ conn = wiredtiger.open(home,
+ "create," +
+ "extensions=[/usr/local/lib/" +
+ "libwiredtiger_zstd.so=[config=[compression_level=9]]]");
+ /*! [Configure zstd extension with compression level] */
+ conn.close(null);
+
/*
* This example code gets run, and direct I/O might not be available,
* causing the open to fail. The documentation requires code snippets,
diff --git a/src/third_party/wiredtiger/ext/compressors/lz4/lz4_compress.c b/src/third_party/wiredtiger/ext/compressors/lz4/lz4_compress.c
index 35159d0fa76..885701e564b 100644
--- a/src/third_party/wiredtiger/ext/compressors/lz4/lz4_compress.c
+++ b/src/third_party/wiredtiger/ext/compressors/lz4/lz4_compress.c
@@ -31,10 +31,20 @@
#include <stdlib.h>
#include <string.h>
+/*
+ * We need to include the configuration file to detect whether this extension
+ * is being built into the WiredTiger library; application-loaded compression
+ * functions won't need it.
+ */
#include <wiredtiger_config.h>
+
#include <wiredtiger.h>
#include <wiredtiger_ext.h>
+#ifdef _MSC_VER
+#define inline __inline
+#endif
+
/* Local compressor structure. */
typedef struct {
WT_COMPRESSOR compressor; /* Must come first */
@@ -171,8 +181,6 @@ lz4_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session,
int decoded;
uint8_t *dst_tmp;
- (void)src_len; /* Unused parameters */
-
wt_api = ((LZ4_COMPRESSOR *)compressor)->wt_api;
/*
@@ -183,6 +191,13 @@ lz4_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session,
#ifdef WORDS_BIGENDIAN
lz4_prefix_swap(&prefix);
#endif
+ if (prefix.compressed_len + sizeof(LZ4_PREFIX) > src_len) {
+ (void)wt_api->err_printf(wt_api,
+ session,
+ "WT_COMPRESSOR.decompress: stored size exceeds source "
+ "size");
+ return (WT_ERROR);
+ }
/*
* Decompress, starting after the prefix bytes. Use safe decompression:
@@ -267,18 +282,24 @@ lz4_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session,
size_t *result_lenp, uint32_t *result_slotsp)
{
LZ4_PREFIX prefix;
- int lz4_len;
uint32_t slot;
- int sourceSize, targetDestSize;
+ int lz4_len, sourceSize, targetDestSize;
(void)compressor; /* Unused parameters */
(void)session;
(void)split_pct;
(void)final;
- sourceSize = (int)offsets[slots]; /* Type conversion */
- targetDestSize =
- (int)((dst_len < page_max ? dst_len : page_max) - extra);
+ /*
+ * Set the source and target sizes. The target size is complicated: we
+ * don't want to exceed the smaller of the maximum page size or the
+ * destination buffer length, and in both cases we have to take into
+ * account the space for our overhead and the extra bytes required by
+ * our caller.
+ */
+ sourceSize = (int)offsets[slots];
+ targetDestSize = (int)(page_max < dst_len ? page_max : dst_len);
+ targetDestSize -= (int)(sizeof(LZ4_PREFIX) + extra);
/* Compress, starting after the prefix bytes. */
lz4_len = LZ4_compress_destSize((const char *)src,
@@ -352,7 +373,7 @@ lz4_terminate(WT_COMPRESSOR *compressor, WT_SESSION *session)
* Add a LZ4 compressor.
*/
static int
-lz_add_compressor(WT_CONNECTION *connection, int raw, const char *name)
+lz_add_compressor(WT_CONNECTION *connection, bool raw, const char *name)
{
LZ4_COMPRESSOR *lz4_compressor;
@@ -391,9 +412,9 @@ lz4_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
(void)config; /* Unused parameters */
- if ((ret = lz_add_compressor(connection, 1, "lz4")) != 0)
+ if ((ret = lz_add_compressor(connection, true, "lz4")) != 0)
return (ret);
- if ((ret = lz_add_compressor(connection, 0, "lz4-noraw")) != 0)
+ if ((ret = lz_add_compressor(connection, false, "lz4-noraw")) != 0)
return (ret);
return (0);
}
diff --git a/src/third_party/wiredtiger/ext/compressors/snappy/snappy_compress.c b/src/third_party/wiredtiger/ext/compressors/snappy/snappy_compress.c
index 981e334a2de..32f1ddcb9a0 100644
--- a/src/third_party/wiredtiger/ext/compressors/snappy/snappy_compress.c
+++ b/src/third_party/wiredtiger/ext/compressors/snappy/snappy_compress.c
@@ -31,10 +31,20 @@
#include <stdlib.h>
#include <string.h>
+/*
+ * We need to include the configuration file to detect whether this extension
+ * is being built into the WiredTiger library; application-loaded compression
+ * functions won't need it.
+ */
#include <wiredtiger_config.h>
+
#include <wiredtiger.h>
#include <wiredtiger_ext.h>
+#ifdef _MSC_VER
+#define inline __inline
+#endif
+
/* Local compressor structure. */
typedef struct {
WT_COMPRESSOR compressor; /* Must come first */
@@ -42,6 +52,12 @@ typedef struct {
WT_EXTENSION_API *wt_api; /* Extension API */
} SNAPPY_COMPRESSOR;
+/*
+ * Snappy decompression requires an exact compressed byte count. WiredTiger
+ * doesn't track that value, store it in the destination buffer.
+ */
+#define SNAPPY_PREFIX sizeof(uint64_t)
+
#ifdef WORDS_BIGENDIAN
/*
* snappy_bswap64 --
@@ -64,11 +80,11 @@ snappy_bswap64(uint64_t v)
#endif
/*
- * wt_snappy_error --
+ * snappy_error --
* Output an error message, and return a standard error code.
*/
static int
-wt_snappy_error(WT_COMPRESSOR *compressor,
+snappy_error(WT_COMPRESSOR *compressor,
WT_SESSION *session, const char *call, snappy_status snret)
{
WT_EXTENSION_API *wt_api;
@@ -94,68 +110,69 @@ wt_snappy_error(WT_COMPRESSOR *compressor,
}
/*
- * wt_snappy_compress --
+ * snappy_compression --
* WiredTiger snappy compression.
*/
static int
-wt_snappy_compress(WT_COMPRESSOR *compressor, WT_SESSION *session,
+snappy_compression(WT_COMPRESSOR *compressor, WT_SESSION *session,
uint8_t *src, size_t src_len,
uint8_t *dst, size_t dst_len,
size_t *result_lenp, int *compression_failed)
{
snappy_status snret;
size_t snaplen;
+ uint64_t snaplen_u64;
char *snapbuf;
/*
- * dst_len was computed in wt_snappy_pre_size, so we know it's big
- * enough. Skip past the space we'll use to store the final count
- * of compressed bytes.
+ * dst_len was computed in snappy_pre_size, so we know it's big enough.
+ * Skip past the space we'll use to store the final count of compressed
+ * bytes.
*/
- snaplen = dst_len - sizeof(size_t);
- snapbuf = (char *)dst + sizeof(size_t);
+ snaplen = dst_len - SNAPPY_PREFIX;
+ snapbuf = (char *)dst + SNAPPY_PREFIX;
/* snaplen is an input and an output arg. */
snret = snappy_compress((char *)src, src_len, snapbuf, &snaplen);
- if (snret == SNAPPY_OK) {
- if (snaplen + sizeof(size_t) < src_len) {
- *result_lenp = snaplen + sizeof(size_t);
- *compression_failed = 0;
-
- /*
- * On decompression, snappy requires an exact compressed
- * byte count (the current value of snaplen). WiredTiger
- * does not preserve that value, so save snaplen at the
- * beginning of the destination buffer.
- *
- * Store the value in little-endian format.
- */
+ if (snret == SNAPPY_OK && snaplen + SNAPPY_PREFIX < src_len) {
+ *result_lenp = snaplen + SNAPPY_PREFIX;
+ *compression_failed = 0;
+
+ /*
+ * On decompression, snappy requires an exact compressed byte
+ * count (the current value of snaplen). WiredTiger does not
+ * preserve that value, so save snaplen at the beginning of
+ * the destination buffer.
+ *
+ * Store the value in little-endian format.
+ */
+ snaplen_u64 = snaplen;
#ifdef WORDS_BIGENDIAN
- snaplen = snappy_bswap64(snaplen);
+ snaplen_u64 = snappy_bswap64(snaplen_u64);
#endif
- *(size_t *)dst = snaplen;
- } else
- /* The compressor failed to produce a smaller result. */
- *compression_failed = 1;
+ *(uint64_t *)dst = snaplen_u64;
return (0);
}
- return (wt_snappy_error(compressor, session, "snappy_compress", snret));
+
+ *compression_failed = 1;
+ return (snret == SNAPPY_OK ?
+ 0 : snappy_error(compressor, session, "snappy_compress", snret));
}
/*
- * wt_snappy_decompress --
+ * snappy_decompression --
* WiredTiger snappy decompression.
*/
static int
-wt_snappy_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session,
+snappy_decompression(WT_COMPRESSOR *compressor, WT_SESSION *session,
uint8_t *src, size_t src_len,
uint8_t *dst, size_t dst_len,
size_t *result_lenp)
{
WT_EXTENSION_API *wt_api;
snappy_status snret;
- size_t snaplen;
+ uint64_t snaplen;
wt_api = ((SNAPPY_COMPRESSOR *)compressor)->wt_api;
@@ -163,36 +180,36 @@ wt_snappy_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session,
* Retrieve the saved length, handling little- to big-endian conversion
* as necessary.
*/
- snaplen = *(size_t *)src;
+ snaplen = *(uint64_t *)src;
#ifdef WORDS_BIGENDIAN
snaplen = snappy_bswap64(snaplen);
#endif
- if (snaplen + sizeof(size_t) > src_len) {
+ if (snaplen + SNAPPY_PREFIX > src_len) {
(void)wt_api->err_printf(wt_api,
session,
- "wt_snappy_decompress: stored size exceeds buffer size");
+ "WT_COMPRESSOR.decompress: stored size exceeds source "
+ "size");
return (WT_ERROR);
}
/* dst_len is an input and an output arg. */
snret = snappy_uncompress(
- (char *)src + sizeof(size_t), snaplen, (char *)dst, &dst_len);
+ (char *)src + SNAPPY_PREFIX,
+ (size_t)snaplen, (char *)dst, &dst_len);
if (snret == SNAPPY_OK) {
*result_lenp = dst_len;
return (0);
}
-
- return (
- wt_snappy_error(compressor, session, "snappy_decompress", snret));
+ return (snappy_error(compressor, session, "snappy_decompress", snret));
}
/*
- * wt_snappy_pre_size --
+ * snappy_pre_size --
* WiredTiger snappy destination buffer sizing.
*/
static int
-wt_snappy_pre_size(WT_COMPRESSOR *compressor, WT_SESSION *session,
+snappy_pre_size(WT_COMPRESSOR *compressor, WT_SESSION *session,
uint8_t *src, size_t src_len,
size_t *result_lenp)
{
@@ -203,19 +220,19 @@ wt_snappy_pre_size(WT_COMPRESSOR *compressor, WT_SESSION *session,
/*
* Snappy requires the dest buffer be somewhat larger than the source.
* Fortunately, this is fast to compute, and will give us a dest buffer
- * in wt_snappy_compress that we can compress to directly. We add space
+ * in snappy_compress that we can compress to directly. We add space
* in the dest buffer to store the accurate compressed size.
*/
- *result_lenp = snappy_max_compressed_length(src_len) + sizeof(size_t);
+ *result_lenp = snappy_max_compressed_length(src_len) + SNAPPY_PREFIX;
return (0);
}
/*
- * wt_snappy_terminate --
+ * snappy_terminate --
* WiredTiger snappy compression termination.
*/
static int
-wt_snappy_terminate(WT_COMPRESSOR *compressor, WT_SESSION *session)
+snappy_terminate(WT_COMPRESSOR *compressor, WT_SESSION *session)
{
(void)session; /* Unused parameters */
@@ -227,9 +244,9 @@ int snappy_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
/*
* snappy_extension_init --
- * WiredTiger snappy compression extension - called directly when
- * Snappy support is built in, or via wiredtiger_extension_init when
- * snappy support is included via extension loading.
+ * WiredTiger snappy compression extension - called directly when snappy
+ * support is built in, or via wiredtiger_extension_init when snappy support
+ * is included via extension loading.
*/
int
snappy_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
@@ -241,11 +258,11 @@ snappy_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
if ((snappy_compressor = calloc(1, sizeof(SNAPPY_COMPRESSOR))) == NULL)
return (errno);
- snappy_compressor->compressor.compress = wt_snappy_compress;
+ snappy_compressor->compressor.compress = snappy_compression;
snappy_compressor->compressor.compress_raw = NULL;
- snappy_compressor->compressor.decompress = wt_snappy_decompress;
- snappy_compressor->compressor.pre_size = wt_snappy_pre_size;
- snappy_compressor->compressor.terminate = wt_snappy_terminate;
+ snappy_compressor->compressor.decompress = snappy_decompression;
+ snappy_compressor->compressor.pre_size = snappy_pre_size;
+ snappy_compressor->compressor.terminate = snappy_terminate;
snappy_compressor->wt_api = connection->get_extension_api(connection);
diff --git a/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c b/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c
index 484df0a6785..ef20503df0a 100644
--- a/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c
+++ b/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c
@@ -32,16 +32,18 @@
#include <stdlib.h>
#include <string.h>
-#include <wiredtiger.h>
-#include <wiredtiger_ext.h>
-
/*
* We need to include the configuration file to detect whether this extension
- * is being built into the WiredTiger library.
+ * is being built into the WiredTiger library; application-loaded compression
+ * functions won't need it.
*/
-#include "wiredtiger_config.h"
+#include <wiredtiger_config.h>
+
+#include <wiredtiger.h>
+#include <wiredtiger_ext.h>
+
#ifdef _MSC_VER
-#define inline __inline
+#define inline __inline
#endif
/* Local compressor structure. */
@@ -234,121 +236,163 @@ zlib_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session,
{
ZLIB_COMPRESSOR *zlib_compressor;
ZLIB_OPAQUE opaque;
- z_stream *best_zs, last_zs, zs;
- uint32_t curr_slot, last_slot;
- int ret;
+ z_stream *best_zs, *last_zs, _last_zs, *zs, _zs;
+ uint32_t curr_slot, last_slot, zlib_reserved;
+ bool increase_reserve;
+ int ret, tret;
- curr_slot = last_slot = 0;
- (void)split_pct;
- (void)dst_len;
+ (void)split_pct; /* Unused parameters */
(void)final;
zlib_compressor = (ZLIB_COMPRESSOR *)compressor;
- memset(&zs, 0, sizeof(zs));
- zs.zalloc = zalloc;
- zs.zfree = zfree;
- opaque.compressor = compressor;
- opaque.session = session;
- zs.opaque = &opaque;
-
- if ((ret = deflateInit(&zs, zlib_compressor->zlib_level)) != Z_OK)
- return (zlib_error(compressor, session, "deflateInit", ret));
-
- zs.next_in = src;
- zs.next_out = dst;
/*
* Experimentally derived, reserve this many bytes for zlib to finish
* up a buffer. If this isn't sufficient, we don't fail but we will be
* inefficient.
*/
#define WT_ZLIB_RESERVED 24
- zs.avail_out = (uint32_t)(page_max - (extra + WT_ZLIB_RESERVED));
+#define WT_ZLIB_RESERVED_MAX 48
+ zlib_reserved = WT_ZLIB_RESERVED;
+
+ if (0) {
+retry: /* If we reached our maximum reserve, quit. */
+ if (zlib_reserved == WT_ZLIB_RESERVED_MAX)
+ return (0);
+ zlib_reserved = WT_ZLIB_RESERVED_MAX;
+ }
+
+ best_zs = last_zs = NULL;
+ last_slot = 0;
+ increase_reserve = false;
+ ret = 0;
- /* Save the stream state in case the chosen data doesn't fit. */
- if ((ret = deflateCopy(&last_zs, &zs)) != Z_OK)
- return (zlib_error(compressor, session, "deflateCopy", ret));
+ zs = &_zs;
+ memset(zs, 0, sizeof(*zs));
+ zs->zalloc = zalloc;
+ zs->zfree = zfree;
+ opaque.compressor = compressor;
+ opaque.session = session;
+ zs->opaque = &opaque;
+
+ if ((ret = deflateInit(zs, zlib_compressor->zlib_level)) != Z_OK)
+ return (zlib_error(compressor, session, "deflateInit", ret));
+
+ zs->next_in = src;
+ zs->next_out = dst;
+
+ /*
+ * Set the target size. The target size is complicated: we don't want
+ * to exceed the smaller of the maximum page size or the destination
+ * buffer length, and in both cases we have to take into account the
+ * space required by zlib to finish up the buffer and the extra bytes
+ * required by our caller.
+ */
+ zs->avail_out = (uint32_t)(page_max < dst_len ? page_max : dst_len);
+ zs->avail_out -= (uint32_t)(zlib_reserved + extra);
/*
* Strategy: take the available output size and compress that much
* input. Continue until there is no input small enough or the
* compression fails to fit.
*/
- for (best_zs = NULL;;) {
+ for (;;) {
/* Find the next slot we will try to compress up to. */
- if ((curr_slot = zlib_find_slot(
- zs.total_in + zs.avail_out, offsets, slots)) > last_slot) {
- zs.avail_in = offsets[curr_slot] - offsets[last_slot];
- while (zs.avail_in > 0 && zs.avail_out > 0)
- if ((ret = deflate(&zs, Z_SYNC_FLUSH)) != Z_OK)
- return (zlib_error(compressor,
- session, "deflate", ret));
+ curr_slot = zlib_find_slot(
+ zs->total_in + zs->avail_out, offsets, slots);
+ if (curr_slot > last_slot) {
+ zs->avail_in = offsets[curr_slot] - offsets[last_slot];
+ while (zs->avail_in > 0 && zs->avail_out > 0)
+ if ((ret = deflate(zs, Z_SYNC_FLUSH)) != Z_OK) {
+ ret = zlib_error(compressor,
+ session, "deflate", ret);
+ goto err;
+ }
}
/*
* We didn't do a deflate, or it didn't work: use the last saved
- * position.
+ * position (if any).
*/
- if (curr_slot <= last_slot || zs.avail_in > 0) {
- if ((ret = deflateEnd(&zs)) != Z_OK &&
- ret != Z_DATA_ERROR)
- return (zlib_error(
- compressor, session, "deflateEnd", ret));
-
- best_zs = &last_zs;
+ if (curr_slot <= last_slot || zs->avail_in > 0) {
+ best_zs = last_zs;
break;
}
- /* The last deflation succeeded, discard the saved one. */
- if ((ret = deflateEnd(&last_zs)) != Z_OK && ret != Z_DATA_ERROR)
- return (zlib_error(
- compressor, session, "deflateEnd", ret));
-
/*
* If there's more compression to do, save a snapshot and keep
* going, otherwise, use the current compression.
*/
last_slot = curr_slot;
- if (zs.avail_out > 0) {
- if ((ret = deflateCopy(&last_zs, &zs)) != Z_OK)
- return (zlib_error(
- compressor, session, "deflateCopy", ret));
+ if (zs->avail_out > 0) {
+ /* Discard any previously saved snapshot. */
+ if (last_zs != NULL) {
+ ret = deflateEnd(last_zs);
+ last_zs = NULL;
+ if (ret != Z_OK && ret != Z_DATA_ERROR) {
+ ret = zlib_error(compressor,
+ session, "deflateEnd", ret);
+ goto err;
+ }
+ }
+ last_zs = &_last_zs;
+ if ((ret = deflateCopy(last_zs, zs)) != Z_OK) {
+ last_zs = NULL;
+ ret = zlib_error(
+ compressor, session, "deflateCopy", ret);
+ goto err;
+ }
continue;
}
- best_zs = &zs;
+ best_zs = zs;
break;
}
- best_zs->avail_out += WT_ZLIB_RESERVED;
- ret = deflate(best_zs, Z_FINISH);
+ if (last_slot > 0 && best_zs != NULL) {
+ /* Add the reserved bytes and try to finish the compression. */
+ best_zs->avail_out += zlib_reserved;
+ ret = deflate(best_zs, Z_FINISH);
- /*
- * If the end marker didn't fit, report that we got no work done,
- * WiredTiger will compress the (possibly large) page image using
- * ordinary compression instead.
- */
- if (ret == Z_OK || ret == Z_BUF_ERROR)
- last_slot = 0;
- else if (ret != Z_STREAM_END)
- return (
- zlib_error(compressor, session, "deflate end block", ret));
+ /*
+ * If the end marker didn't fit with the default value, try
+ * again with a maximum value; if that doesn't work, report we
+ * got no work done, WiredTiger will compress the (possibly
+ * large) page image using ordinary compression instead.
+ */
+ if (ret == Z_OK || ret == Z_BUF_ERROR) {
+ last_slot = 0;
+ increase_reserve = true;
+ } else if (ret != Z_STREAM_END) {
+ ret = zlib_error(
+ compressor, session, "deflate end block", ret);
+ goto err;
+ }
+ ret = 0;
+ }
- if ((ret = deflateEnd(best_zs)) != Z_OK && ret != Z_DATA_ERROR)
- return (zlib_error(compressor, session, "deflateEnd", ret));
+err: if (zs != NULL &&
+ (tret = deflateEnd(zs)) != Z_OK && tret != Z_DATA_ERROR)
+ ret = zlib_error(compressor, session, "deflateEnd", tret);
+ if (last_zs != NULL &&
+ (tret = deflateEnd(last_zs)) != Z_OK && tret != Z_DATA_ERROR)
+ ret = zlib_error(compressor, session, "deflateEnd", tret);
- if (last_slot > 0) {
+ if (ret == 0 && last_slot > 0) {
*result_slotsp = last_slot;
*result_lenp = (size_t)best_zs->total_out;
} else {
- /* We didn't manage to compress anything: don't retry. */
+ /* We didn't manage to compress anything. */
*result_slotsp = 0;
*result_lenp = 1;
+
+ if (increase_reserve)
+ goto retry;
}
#if 0
/* Decompress the result and confirm it matches the original source. */
- if (last_slot > 0) {
+ if (ret == 0 && last_slot > 0) {
void *decomp;
size_t result_len;
@@ -363,19 +407,20 @@ zlib_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session,
"deflate compare with original source",
Z_DATA_ERROR);
zfree(&opaque, decomp);
- if (ret != 0)
- return (ret);
}
#endif
#if 0
- fprintf(stderr,
- "zlib_compress_raw (%s): page_max %" PRIuMAX ", slots %" PRIu32
- ", take %" PRIu32 ": %" PRIu32 " -> %" PRIuMAX "\n",
- final ? "final" : "not final", (uintmax_t)page_max,
- slots, last_slot, offsets[last_slot], (uintmax_t)*result_lenp);
+ if (ret == 0 && last_slot > 0)
+ fprintf(stderr,
+ "zlib_compress_raw (%s): page_max %" PRIuMAX ", slots %"
+ PRIu32 ", take %" PRIu32 ": %" PRIu32 " -> %" PRIuMAX "\n",
+ final ? "final" : "not final", (uintmax_t)page_max,
+ slots, last_slot, offsets[last_slot],
+ (uintmax_t)*result_lenp);
#endif
- return (0);
+
+ return (ret);
}
/*
@@ -396,7 +441,8 @@ zlib_terminate(WT_COMPRESSOR *compressor, WT_SESSION *session)
* Add a zlib compressor.
*/
static int
-zlib_add_compressor(WT_CONNECTION *connection, int raw, const char *name)
+zlib_add_compressor(
+ WT_CONNECTION *connection, bool raw, const char *name, int zlib_level)
{
ZLIB_COMPRESSOR *zlib_compressor;
@@ -415,17 +461,80 @@ zlib_add_compressor(WT_CONNECTION *connection, int raw, const char *name)
zlib_compressor->compressor.terminate = zlib_terminate;
zlib_compressor->wt_api = connection->get_extension_api(connection);
-
- /*
- * Between 0-10: level: see zlib manual.
- */
- zlib_compressor->zlib_level = Z_DEFAULT_COMPRESSION;
+ zlib_compressor->zlib_level = zlib_level;
/* Load the compressor. */
return (connection->add_compressor(
connection, name, (WT_COMPRESSOR *)zlib_compressor, NULL));
}
+/*
+ * zlib_init_config --
+ * Handle zlib configuration.
+ */
+static int
+zlib_init_config(
+ WT_CONNECTION *connection, WT_CONFIG_ARG *config, int *zlib_levelp)
+{
+ WT_CONFIG_ITEM k, v;
+ WT_CONFIG_PARSER *config_parser;
+ WT_EXTENSION_API *wtext;
+ int ret, zlib_level;
+
+ /* If configured as a built-in, there's no configuration argument. */
+ if (config == NULL)
+ return (0);
+
+ /*
+ * Zlib compression engine allows applications to specify a compression
+ * level; review the configuration.
+ */
+ wtext = connection->get_extension_api(connection);
+ if ((ret = wtext->config_get(wtext, NULL, config, "config", &v)) != 0) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_EXTENSION_API.config_get: zlib configure: %s",
+ wtext->strerror(wtext, NULL, ret));
+ return (ret);
+ }
+ if ((ret = wtext->config_parser_open(
+ wtext, NULL, v.str, v.len, &config_parser)) != 0) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_EXTENSION_API.config_parser_open: zlib configure: %s",
+ wtext->strerror(wtext, NULL, ret));
+ return (ret);
+ }
+ while ((ret = config_parser->next(config_parser, &k, &v)) == 0)
+ if (strlen("compression_level") == k.len &&
+ strncmp("compression_level", k.str, k.len) == 0) {
+ /*
+ * Between 0-9: level: see zlib manual.
+ */
+ zlib_level = (int)v.val;
+ if (zlib_level < 0 || zlib_level > 9) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_CONFIG_PARSER.next: zlib configure: "
+ "unsupported compression level %d",
+ zlib_level);
+ return (EINVAL);
+ }
+ *zlib_levelp = zlib_level;
+ continue;
+ }
+ if (ret != WT_NOTFOUND) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_CONFIG_PARSER.next: zlib configure: %s",
+ wtext->strerror(wtext, NULL, ret));
+ return (ret);
+ }
+ if ((ret = config_parser->close(config_parser)) != 0) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_CONFIG_PARSER.close: zlib configure: %s",
+ wtext->strerror(wtext, NULL, ret));
+ return (ret);
+ }
+ return (0);
+}
+
int zlib_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
/*
@@ -437,13 +546,17 @@ int zlib_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
int
zlib_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
{
- int ret;
+ int ret, zlib_level;
- (void)config; /* Unused parameters */
+ zlib_level = Z_DEFAULT_COMPRESSION; /* Default */
+ if ((ret = zlib_init_config(connection, config, &zlib_level)) != 0)
+ return (ret);
- if ((ret = zlib_add_compressor(connection, 1, "zlib")) != 0)
+ if ((ret = zlib_add_compressor(
+ connection, true, "zlib", zlib_level)) != 0)
return (ret);
- if ((ret = zlib_add_compressor(connection, 0, "zlib-noraw")) != 0)
+ if ((ret = zlib_add_compressor(
+ connection, false, "zlib-noraw", zlib_level)) != 0)
return (ret);
return (0);
}
diff --git a/src/third_party/wiredtiger/ext/compressors/zstd/Makefile.am b/src/third_party/wiredtiger/ext/compressors/zstd/Makefile.am
new file mode 100644
index 00000000000..9f0997011e9
--- /dev/null
+++ b/src/third_party/wiredtiger/ext/compressors/zstd/Makefile.am
@@ -0,0 +1,11 @@
+AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include
+
+if HAVE_BUILTIN_EXTENSION_ZSTD
+noinst_LTLIBRARIES = libwiredtiger_zstd.la
+else
+lib_LTLIBRARIES = libwiredtiger_zstd.la
+libwiredtiger_zstd_la_LDFLAGS = -avoid-version -module
+endif
+
+libwiredtiger_zstd_la_SOURCES = zstd_compress.c
+libwiredtiger_zstd_la_LIBADD = -lzstd
diff --git a/src/third_party/wiredtiger/ext/compressors/zstd/zstd_compress.c b/src/third_party/wiredtiger/ext/compressors/zstd/zstd_compress.c
new file mode 100644
index 00000000000..3d0447248b6
--- /dev/null
+++ b/src/third_party/wiredtiger/ext/compressors/zstd/zstd_compress.c
@@ -0,0 +1,358 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <zstd.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+/*
+ * We need to include the configuration file to detect whether this extension
+ * is being built into the WiredTiger library; application-loaded compression
+ * functions won't need it.
+ */
+#include <wiredtiger_config.h>
+
+#include <wiredtiger.h>
+#include <wiredtiger_ext.h>
+
+#ifdef _MSC_VER
+#define inline __inline
+#endif
+
+/* Local compressor structure. */
+typedef struct {
+ WT_COMPRESSOR compressor; /* Must come first */
+
+ WT_EXTENSION_API *wt_api; /* Extension API */
+
+ int compression_level; /* compression level */
+} ZSTD_COMPRESSOR;
+
+/*
+ * Zstd decompression requires an exact compressed byte count. WiredTiger
+ * doesn't track that value, store it in the destination buffer.
+ */
+#define ZSTD_PREFIX sizeof(uint64_t)
+
+#ifdef WORDS_BIGENDIAN
+/*
+ * zstd_bswap64 --
+ * 64-bit unsigned little-endian to/from big-endian value.
+ */
+static inline uint64_t
+zstd_bswap64(uint64_t v)
+{
+ return (
+ ((v << 56) & 0xff00000000000000UL) |
+ ((v << 40) & 0x00ff000000000000UL) |
+ ((v << 24) & 0x0000ff0000000000UL) |
+ ((v << 8) & 0x000000ff00000000UL) |
+ ((v >> 8) & 0x00000000ff000000UL) |
+ ((v >> 24) & 0x0000000000ff0000UL) |
+ ((v >> 40) & 0x000000000000ff00UL) |
+ ((v >> 56) & 0x00000000000000ffUL)
+ );
+}
+#endif
+
+/*
+ * zstd_error --
+ * Output an error message, and return a standard error code.
+ */
+static int
+zstd_error(WT_COMPRESSOR *compressor,
+ WT_SESSION *session, const char *call, size_t error)
+{
+ WT_EXTENSION_API *wt_api;
+
+ wt_api = ((ZSTD_COMPRESSOR *)compressor)->wt_api;
+
+ (void)wt_api->err_printf(wt_api, session,
+ "zstd error: %s: %s", call, ZSTD_getErrorName(error));
+ return (WT_ERROR);
+}
+
+/*
+ * zstd_compress --
+ * WiredTiger Zstd compression.
+ */
+static int
+zstd_compress(WT_COMPRESSOR *compressor, WT_SESSION *session,
+ uint8_t *src, size_t src_len,
+ uint8_t *dst, size_t dst_len,
+ size_t *result_lenp, int *compression_failed)
+{
+ ZSTD_COMPRESSOR *zcompressor;
+ size_t zstd_ret;
+ uint64_t zstd_len;
+
+ zcompressor = (ZSTD_COMPRESSOR *)compressor;
+
+ /* Compress, starting past the prefix bytes. */
+ zstd_ret = ZSTD_compress(
+ dst + ZSTD_PREFIX, dst_len - ZSTD_PREFIX,
+ src, src_len, zcompressor->compression_level);
+
+ /*
+ * If compression succeeded and the compressed length is smaller than
+ * the original size, return success.
+ */
+ if (!ZSTD_isError(zstd_ret) && zstd_ret + ZSTD_PREFIX < src_len) {
+ *result_lenp = zstd_ret + ZSTD_PREFIX;
+ *compression_failed = 0;
+
+ /*
+ * On decompression, Zstd requires an exact compressed byte
+ * count (the current value of zstd_ret). WiredTiger does not
+ * preserve that value, so save zstd_ret at the beginning of
+ * the destination buffer.
+ *
+ * Store the value in little-endian format.
+ */
+ zstd_len = zstd_ret;
+#ifdef WORDS_BIGENDIAN
+ zstd_len = zstd_bswap64(zstd_len);
+#endif
+ *(uint64_t *)dst = zstd_len;
+ return (0);
+ }
+
+ *compression_failed = 1;
+ return (ZSTD_isError(zstd_ret) ?
+ zstd_error(compressor, session, "ZSTD_compress", zstd_ret) : 0);
+}
+
+/*
+ * zstd_decompress --
+ * WiredTiger Zstd decompression.
+ */
+static int
+zstd_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session,
+ uint8_t *src, size_t src_len,
+ uint8_t *dst, size_t dst_len,
+ size_t *result_lenp)
+{
+ WT_EXTENSION_API *wt_api;
+ size_t zstd_ret;
+ uint64_t zstd_len;
+
+ wt_api = ((ZSTD_COMPRESSOR *)compressor)->wt_api;
+
+ /*
+ * Retrieve the saved length, handling little- to big-endian conversion
+ * as necessary.
+ */
+ zstd_len = *(uint64_t *)src;
+#ifdef WORDS_BIGENDIAN
+ zstd_len = zstd_bswap64(zstd_len);
+#endif
+ if (zstd_len + ZSTD_PREFIX > src_len) {
+ (void)wt_api->err_printf(wt_api,
+ session,
+ "WT_COMPRESSOR.decompress: stored size exceeds source "
+ "size");
+ return (WT_ERROR);
+ }
+
+ zstd_ret =
+ ZSTD_decompress(dst, dst_len, src + ZSTD_PREFIX, (size_t)zstd_len);
+
+ if (!ZSTD_isError(zstd_ret)) {
+ *result_lenp = zstd_ret;
+ return (0);
+ }
+ return (zstd_error(compressor, session, "ZSTD_decompress", zstd_ret));
+}
+
+/*
+ * zstd_pre_size --
+ * WiredTiger Zstd destination buffer sizing for compression.
+ */
+static int
+zstd_pre_size(WT_COMPRESSOR *compressor, WT_SESSION *session,
+ uint8_t *src, size_t src_len, size_t *result_lenp)
+{
+ (void)compressor; /* Unused parameters */
+ (void)session;
+ (void)src;
+
+ /*
+ * Zstd compression runs faster if the destination buffer is sized at
+ * the upper-bound of the buffer size needed by the compression. Use
+ * the library calculation of that overhead (plus our overhead).
+ */
+ *result_lenp = ZSTD_compressBound(src_len) + ZSTD_PREFIX;
+ return (0);
+}
+
+/*
+ * zstd_terminate --
+ * WiredTiger Zstd compression termination.
+ */
+static int
+zstd_terminate(WT_COMPRESSOR *compressor, WT_SESSION *session)
+{
+ (void)session; /* Unused parameters */
+
+ free(compressor);
+ return (0);
+}
+
+/*
+ * zstd_init_config --
+ * Handle zstd configuration.
+ */
+static int
+zstd_init_config(
+ WT_CONNECTION *connection, WT_CONFIG_ARG *config, int *compression_levelp)
+{
+ WT_CONFIG_ITEM k, v;
+ WT_CONFIG_PARSER *config_parser;
+ WT_EXTENSION_API *wtext;
+ int ret;
+
+ /* If configured as a built-in, there's no configuration argument. */
+ if (config == NULL)
+ return (0);
+
+ /*
+ * Zstd compression engine allows applications to specify a compression
+ * level; review the configuration.
+ */
+ wtext = connection->get_extension_api(connection);
+ if ((ret = wtext->config_get(wtext, NULL, config, "config", &v)) != 0) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_EXTENSION_API.config_get: zstd configure: %s",
+ wtext->strerror(wtext, NULL, ret));
+ return (ret);
+ }
+ if ((ret = wtext->config_parser_open(
+ wtext, NULL, v.str, v.len, &config_parser)) != 0) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_EXTENSION_API.config_parser_open: zstd configure: %s",
+ wtext->strerror(wtext, NULL, ret));
+ return (ret);
+ }
+ while ((ret = config_parser->next(config_parser, &k, &v)) == 0)
+ if (strlen("compression_level") == k.len &&
+ strncmp("compression_level", k.str, k.len) == 0) {
+ *compression_levelp = (int)v.val;
+ continue;
+ }
+ if (ret != WT_NOTFOUND) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_CONFIG_PARSER.next: zstd configure: %s",
+ wtext->strerror(wtext, NULL, ret));
+ return (ret);
+ }
+ if ((ret = config_parser->close(config_parser)) != 0) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_CONFIG_PARSER.close: zstd configure: %s",
+ wtext->strerror(wtext, NULL, ret));
+ return (ret);
+ }
+ return (0);
+}
+
+int zstd_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
+
+/*
+ * zstd_extension_init --
+ * WiredTiger Zstd compression extension - called directly when Zstd
+ * support is built in, or via wiredtiger_extension_init when Zstd support
+ * is included via extension loading.
+ */
+int
+zstd_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
+{
+ ZSTD_COMPRESSOR *zstd_compressor;
+ int compression_level, ret;
+
+ /*
+ * Zstd's sweet-spot is better compression than zlib at significantly
+ * faster compression/decompression speeds. LZ4 and snappy are faster
+ * than zstd, but have worse compression ratios. Applications wanting
+ * faster compression/decompression with worse compression will select
+ * LZ4 or snappy, so we configure zstd for better compression.
+ *
+ * From the zstd github site, default measurements of the compression
+ * engines we support, listing compression ratios with compression and
+ * decompression speeds:
+ *
+ * Name Ratio C.speed D.speed
+ * MB/s MB/s
+ * zstd 2.877 330 940
+ * zlib 2.730 95 360
+ * LZ4 2.101 620 3100
+ * snappy 2.091 480 1600
+ *
+ * Set the zstd compression level to 3: according to the zstd web site,
+ * that reduces zstd's compression speed to around 200 MB/s, increasing
+ * the compression ratio to 3.100 (close to zlib's best compression
+ * ratio). In other words, position zstd as a zlib replacement, having
+ * similar compression at much higher compression/decompression speeds.
+ */
+ compression_level = 3;
+ if ((ret =
+ zstd_init_config(connection, config, &compression_level)) != 0)
+ return (ret);
+
+ if ((zstd_compressor = calloc(1, sizeof(ZSTD_COMPRESSOR))) == NULL)
+ return (errno);
+
+ zstd_compressor->compressor.compress = zstd_compress;
+ zstd_compressor->compressor.compress_raw = NULL;
+ zstd_compressor->compressor.decompress = zstd_decompress;
+ zstd_compressor->compressor.pre_size = zstd_pre_size;
+ zstd_compressor->compressor.terminate = zstd_terminate;
+
+ zstd_compressor->wt_api = connection->get_extension_api(connection);
+
+ zstd_compressor->compression_level = compression_level;
+
+ /* Load the compressor */
+ return (connection->add_compressor(
+ connection, "zstd", (WT_COMPRESSOR *)zstd_compressor, NULL));
+}
+
+/*
+ * We have to remove this symbol when building as a builtin extension otherwise
+ * it will conflict with other builtin libraries.
+ */
+#ifndef HAVE_BUILTIN_EXTENSION_ZSTD
+/*
+ * wiredtiger_extension_init --
+ * WiredTiger Zstd compression extension.
+ */
+int
+wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
+{
+ return (zstd_extension_init(connection, config));
+}
+#endif
diff --git a/src/third_party/wiredtiger/src/async/async_worker.c b/src/third_party/wiredtiger/src/async/async_worker.c
index 401d0616eab..b1bc3902f7c 100644
--- a/src/third_party/wiredtiger/src/async/async_worker.c
+++ b/src/third_party/wiredtiger/src/async/async_worker.c
@@ -216,7 +216,7 @@ __async_worker_execop(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op,
break;
case WT_AOP_NONE:
WT_RET_MSG(session, EINVAL,
- "Unknown async optype %d\n", op->optype);
+ "Unknown async optype %d", op->optype);
}
return (0);
}
diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c
index b7ac953cdb1..48522768dc9 100644
--- a/src/third_party/wiredtiger/src/block/block_ckpt.c
+++ b/src/third_party/wiredtiger/src/block/block_ckpt.c
@@ -615,8 +615,6 @@ live_update:
WT_CKPT_FOREACH(ckptbase, ckpt)
if (F_ISSET(ckpt, WT_CKPT_ADD)) {
/*
- * Set the checkpoint size for the live system.
- *
* !!!
* Our caller wants the final checkpoint size. Setting
* the size here violates layering, but the alternative
@@ -624,7 +622,31 @@ live_update:
* cookie into its components, and that's a fair amount
* of work.
*/
- ckpt->ckpt_size = ci->ckpt_size = ckpt_size;
+ ckpt->ckpt_size = ckpt_size;
+
+ /*
+ * Set the rolling checkpoint size for the live system.
+ * The current size includes the current checkpoint's
+ * root page size (root pages are on the checkpoint's
+ * block allocation list as root pages are allocated
+ * with the usual block allocation functions). That's
+ * correct, but we don't want to include it in the size
+ * for the next checkpoint.
+ */
+ ckpt_size -= ci->root_size;
+
+ /*
+ * Additionally, we had a bug for awhile where the live
+ * checkpoint size grew without bound. We can't sanity
+ * check the value, that would require walking the tree
+ * as part of the checkpoint. Bound any bug at the size
+ * of the file.
+ * It isn't practical to assert that the value is within
+ * bounds since databases created with older versions
+ * of WiredTiger (2.8.0) would likely see an error.
+ */
+ ci->ckpt_size =
+ WT_MIN(ckpt_size, (uint64_t)block->size);
WT_ERR(__ckpt_update(session, block, ckpt, ci, true));
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index 3690b41ead4..41ae457b0fe 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -1217,7 +1217,7 @@ err: if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED))
/*
* __wt_btcur_init --
- * Initialize an cursor used for internal purposes.
+ * Initialize a cursor used for internal purposes.
*/
void
__wt_btcur_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c
index 9591023e163..337a3ea036f 100644
--- a/src/third_party/wiredtiger/src/btree/bt_handle.c
+++ b/src/third_party/wiredtiger/src/btree/bt_handle.c
@@ -271,6 +271,17 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
else
F_CLR(btree, WT_BTREE_IN_MEMORY | WT_BTREE_NO_EVICTION);
+ WT_RET(__wt_config_gets(session,
+ cfg, "ignore_in_memory_cache_size", &cval));
+ if (cval.val) {
+ if (!F_ISSET(conn, WT_CONN_IN_MEMORY))
+ WT_RET_MSG(session, EINVAL,
+ "ignore_in_memory_cache_size setting is only valid "
+ "with databases configured to run in-memory");
+ F_SET(btree, WT_BTREE_IGNORE_CACHE);
+ } else
+ F_CLR(btree, WT_BTREE_IGNORE_CACHE);
+
WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval));
if (cval.val)
F_CLR(btree, WT_BTREE_NO_LOGGING);
@@ -353,7 +364,7 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
WT_RET(__wt_spin_init(session, &btree->flush_lock, "btree flush"));
btree->checkpointing = WT_CKPT_OFF; /* Not checkpointing */
- btree->modified = 0; /* Clean */
+ btree->modified = false; /* Clean */
btree->write_gen = ckpt->write_gen; /* Write generation */
return (0);
diff --git a/src/third_party/wiredtiger/src/btree/bt_io.c b/src/third_party/wiredtiger/src/btree/bt_io.c
index 42c3a849a88..a8645f79dbe 100644
--- a/src/third_party/wiredtiger/src/btree/bt_io.c
+++ b/src/third_party/wiredtiger/src/btree/bt_io.c
@@ -171,6 +171,7 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf,
uint8_t *addr, size_t *addr_sizep,
bool checkpoint, bool checkpoint_io, bool compressed)
{
+ struct timespec start, stop;
WT_BM *bm;
WT_BTREE *btree;
WT_DECL_ITEM(ctmp);
@@ -356,6 +357,8 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf,
data_checksum = !compressed;
break;
}
+ if (!F_ISSET(session, WT_SESSION_INTERNAL))
+ __wt_epoch(session, &start);
/* Call the block manager to write the block. */
WT_ERR(checkpoint ?
@@ -363,6 +366,14 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf,
bm->write(
bm, session, ip, addr, addr_sizep, data_checksum, checkpoint_io));
+ /* Update some statistics now that the write is done */
+ if (!F_ISSET(session, WT_SESSION_INTERNAL)) {
+ __wt_epoch(session, &stop);
+ WT_STAT_CONN_INCR(session, cache_write_app_count);
+ WT_STAT_CONN_INCRV(session, cache_write_app_time,
+ WT_TIMEDIFF_US(stop, start));
+ }
+
WT_STAT_CONN_INCR(session, cache_write);
WT_STAT_DATA_INCR(session, cache_write);
S2C(session)->cache->bytes_written += dsk->mem_size;
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index c54eaa69c43..90188498535 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -327,22 +327,28 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
if (__wt_hazard_count(session, page) > 1)
return (false);
+ /* If we can do an in-memory split, do it. */
+ if (__wt_leaf_page_can_split(session, page))
+ return (true);
+ if (page->memory_footprint < btree->maxmempage)
+ return (false);
+
+ /* Bump the oldest ID, we're about to do some visibility checks. */
+ WT_IGNORE_RET(__wt_txn_update_oldest(session, 0));
+
/*
- * If we have already tried and the transaction state has not moved on,
- * eviction is highly likely to fail.
+ * Allow some leeway if the transaction ID isn't moving forward since
+ * it is unlikely eviction will be able to evict the page. Don't keep
+ * skipping the page indefinitely or large records can lead to
+ * extremely large memory footprints.
*/
- if (page->modify->last_eviction_id == __wt_txn_oldest_id(session))
+ if (page->modify->update_restored &&
+ page->modify->last_eviction_id == __wt_txn_oldest_id(session))
return (false);
- if (page->memory_footprint < btree->maxmempage)
- return (__wt_leaf_page_can_split(session, page));
-
/* Trigger eviction on the next page release. */
__wt_page_evict_soon(session, ref);
- /* Bump the oldest ID, we're about to do some visibility checks. */
- WT_IGNORE_RET(__wt_txn_update_oldest(session, 0));
-
/* If eviction cannot succeed, don't try. */
return (__wt_page_can_evict(session, ref, NULL));
}
@@ -354,6 +360,7 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
static int
__page_read(WT_SESSION_IMPL *session, WT_REF *ref)
{
+ struct timespec start, stop;
const WT_PAGE_HEADER *dsk;
WT_BTREE *btree;
WT_DECL_RET;
@@ -401,7 +408,15 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref)
* There's an address, read or map the backing disk page and build an
* in-memory version of the page.
*/
+ if (!F_ISSET(session, WT_SESSION_INTERNAL))
+ __wt_epoch(session, &start);
WT_ERR(__wt_bt_read(session, &tmp, addr, addr_size));
+ if (!F_ISSET(session, WT_SESSION_INTERNAL)) {
+ __wt_epoch(session, &stop);
+ WT_STAT_CONN_INCR(session, cache_read_app_count);
+ WT_STAT_CONN_INCRV(session, cache_read_app_time,
+ WT_TIMEDIFF_US(stop, start));
+ }
WT_ERR(__wt_page_inmem(session, ref, tmp.data, tmp.memsize,
WT_DATA_IN_ITEM(&tmp) ?
WT_PAGE_DISK_ALLOC : WT_PAGE_DISK_MAPPED, &page));
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index ea667460966..017c820ea29 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -1582,6 +1582,13 @@ __split_multi_inmem(
*/
page->modify->first_dirty_txn = WT_TXN_FIRST;
+ /*
+ * If the new page is modified, save the oldest ID from reconciliation
+ * to avoid repeatedly attempting eviction on the same page.
+ */
+ page->modify->last_eviction_id = orig->modify->last_eviction_id;
+ page->modify->update_restored = 1;
+
err: /* Free any resources that may have been cached in the cursor. */
WT_TRET(__wt_btcur_close(&cbt, true));
@@ -2245,14 +2252,6 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi)
WT_ERR(__split_multi_inmem(session, page, multi, new));
/*
- * If the new page is modified, save the oldest ID from reconciliation
- * to avoid repeatedly attempting eviction on the same page.
- */
- if (new->page->modify != NULL)
- new->page->modify->last_eviction_id =
- page->modify->last_eviction_id;
-
- /*
* The rewrite succeeded, we can no longer fail.
*
* Finalize the move, discarding moved update lists from the original
diff --git a/src/third_party/wiredtiger/src/btree/bt_stat.c b/src/third_party/wiredtiger/src/btree/bt_stat.c
index d3ddf33446e..06428b87f6e 100644
--- a/src/third_party/wiredtiger/src/btree/bt_stat.c
+++ b/src/third_party/wiredtiger/src/btree/bt_stat.c
@@ -8,6 +8,7 @@
#include "wt_internal.h"
+static int __stat_tree_walk(WT_SESSION_IMPL *);
static int __stat_page(WT_SESSION_IMPL *, WT_PAGE *, WT_DSRC_STATS **);
static void __stat_page_col_var(WT_SESSION_IMPL *, WT_PAGE *, WT_DSRC_STATS **);
static void __stat_page_row_int(WT_SESSION_IMPL *, WT_PAGE *, WT_DSRC_STATS **);
@@ -23,9 +24,7 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
{
WT_BM *bm;
WT_BTREE *btree;
- WT_DECL_RET;
WT_DSRC_STATS **stats;
- WT_REF *next_walk;
btree = S2BT(session);
bm = btree->bm;
@@ -44,9 +43,29 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
WT_STAT_SET(session, stats, cache_bytes_inuse,
__wt_btree_bytes_inuse(session));
- /* Everything else is really, really expensive. */
- if (!F_ISSET(cst, WT_CONN_STAT_ALL))
- return (0);
+ if (F_ISSET(cst, WT_STAT_TYPE_CACHE_WALK))
+ __wt_curstat_cache_walk(session);
+
+ if (F_ISSET(cst, WT_STAT_TYPE_TREE_WALK))
+ WT_RET(__stat_tree_walk(session));
+
+ return (0);
+}
+
+/*
+ * __stat_tree_walk --
+ * Gather btree statistics that require traversing the tree.
+ */
+static int
+__stat_tree_walk(WT_SESSION_IMPL *session)
+{
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_DSRC_STATS **stats;
+ WT_REF *next_walk;
+
+ btree = S2BT(session);
+ stats = btree->dhandle->stats;
/*
* Clear the statistics we're about to count.
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index b41179a565d..6d4ad9d0d0f 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -9,6 +9,59 @@
#include "wt_internal.h"
/*
+ * __sync_checkpoint_can_skip --
+ * There are limited conditions under which we can skip writing a dirty
+ * page during checkpoint.
+ */
+static inline bool
+__sync_checkpoint_can_skip(WT_SESSION_IMPL *session, WT_PAGE *page)
+{
+ WT_PAGE_MODIFY *mod;
+ WT_MULTI *multi;
+ WT_TXN *txn;
+ u_int i;
+
+ mod = page->modify;
+ txn = &session->txn;
+
+ /*
+ * We can skip some dirty pages during a checkpoint. The requirements:
+ *
+ * 1. they must be leaf pages,
+ * 2. there is a snapshot transaction active (which is the case in
+ * ordinary application checkpoints but not all internal cases),
+ * 3. the first dirty update on the page is sufficiently recent the
+ * checkpoint transaction would skip them,
+ * 4. there's already an address for every disk block involved.
+ */
+ if (WT_PAGE_IS_INTERNAL(page))
+ return (false);
+ if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT))
+ return (false);
+ if (!WT_TXNID_LT(txn->snap_max, mod->first_dirty_txn))
+ return (false);
+
+ /*
+ * The problematic case is when a page was evicted but when there were
+ * unresolved updates and not every block associated with the page has
+ * a disk address. We can't skip such pages because we need a checkpoint
+ * write with valid addresses.
+ *
+ * The page's modification information can change underfoot if the page
+ * is being reconciled, so we'd normally serialize with reconciliation
+ * before reviewing page-modification information. However, checkpoint
+ * is the only valid writer of dirty leaf pages at this point, we skip
+ * the lock.
+ */
+ if (mod->rec_result == WT_PM_REC_MULTIBLOCK)
+ for (multi = mod->mod_multi,
+ i = 0; i < mod->mod_multi_entries; ++multi, ++i)
+ if (multi->addr.addr == NULL)
+ return (false);
+ return (true);
+}
+
+/*
* __sync_file --
* Flush pages for a specific file.
*/
@@ -20,24 +73,23 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_PAGE *page;
- WT_PAGE_MODIFY *mod;
WT_REF *walk;
WT_TXN *txn;
uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages;
- uint64_t oldest_id, saved_snap_min;
+ uint64_t oldest_id, saved_pinned_id;
uint32_t flags;
conn = S2C(session);
btree = S2BT(session);
walk = NULL;
txn = &session->txn;
- saved_snap_min = WT_SESSION_TXN_STATE(session)->snap_min;
+ saved_pinned_id = WT_SESSION_TXN_STATE(session)->pinned_id;
flags = WT_READ_CACHE | WT_READ_NO_GEN;
internal_bytes = leaf_bytes = 0;
internal_pages = leaf_pages = 0;
if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT))
- WT_RET(__wt_epoch(session, &start));
+ __wt_epoch(session, &start);
switch (syncop) {
case WT_SYNC_WRITE_LEAVES:
@@ -161,29 +213,15 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
* reference and checking modified.
*/
page = walk->page;
- mod = page->modify;
/*
- * Write dirty pages, unless we can be sure they only
- * became dirty after the checkpoint started.
- *
- * We can skip dirty pages if:
- * (1) they are leaf pages;
- * (2) there is a snapshot transaction active (which
- * is the case in ordinary application checkpoints
- * but not all internal cases); and
- * (3) the first dirty update on the page is
- * sufficiently recent that the checkpoint
- * transaction would skip them.
- *
- * Mark the tree dirty: the checkpoint marked it clean
- * and we can't skip future checkpoints until this page
- * is written.
+ * Write dirty pages, if we can't skip them. If we skip
+ * a page, mark the tree dirty. The checkpoint marked it
+ * clean and we can't skip future checkpoints until this
+ * page is written.
*/
- if (!WT_PAGE_IS_INTERNAL(page) &&
- F_ISSET(txn, WT_TXN_HAS_SNAPSHOT) &&
- WT_TXNID_LT(txn->snap_max, mod->first_dirty_txn)) {
- __wt_page_modify_set(session, page);
+ if (__sync_checkpoint_can_skip(session, page)) {
+ __wt_tree_modify_set(session);
continue;
}
@@ -205,15 +243,14 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
}
if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) {
- WT_ERR(__wt_epoch(session, &end));
+ __wt_epoch(session, &end);
__wt_verbose(session, WT_VERB_CHECKPOINT,
- "__sync_file WT_SYNC_%s wrote:\n\t %" PRIu64
- " bytes, %" PRIu64 " pages of leaves\n\t %" PRIu64
- " bytes, %" PRIu64 " pages of internal\n\t"
- "Took: %" PRIu64 "ms",
+ "__sync_file WT_SYNC_%s wrote: %" PRIu64
+ " leaf pages (%" PRIu64 "B), %" PRIu64
+ " internal pages (%" PRIu64 "B), and took %" PRIu64 "ms",
syncop == WT_SYNC_WRITE_LEAVES ?
"WRITE_LEAVES" : "CHECKPOINT",
- leaf_bytes, leaf_pages, internal_bytes, internal_pages,
+ leaf_pages, leaf_bytes, internal_pages, internal_bytes,
WT_TIMEDIFF_MS(end, start));
}
@@ -226,7 +263,7 @@ err: /* On error, clear any left-over tree walk. */
* snapshot active when we started, release it.
*/
if (txn->isolation == WT_ISO_READ_COMMITTED &&
- saved_snap_min == WT_TXN_NONE)
+ saved_pinned_id == WT_TXN_NONE)
__wt_txn_release_snapshot(session);
/* Clear the checkpoint flag and push the change. */
diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c
index 4c338bc6ad9..41f50957809 100644
--- a/src/third_party/wiredtiger/src/cache/cache_las.c
+++ b/src/third_party/wiredtiger/src/cache/cache_las.c
@@ -49,7 +49,7 @@ __wt_las_stats_update(WT_SESSION_IMPL *session)
* don't have to worry about users seeing inconsistent data source
* information.
*/
- if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_CLEAR)) {
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_CLEAR)) {
WT_STAT_SET(session, dstats, cursor_insert, 0);
WT_STAT_SET(session, dstats, cursor_remove, 0);
}
diff --git a/src/third_party/wiredtiger/src/checksum/power8/README.md b/src/third_party/wiredtiger/src/checksum/power8/README.md
index 3e2976650cd..579d841a02c 100644
--- a/src/third_party/wiredtiger/src/checksum/power8/README.md
+++ b/src/third_party/wiredtiger/src/checksum/power8/README.md
@@ -39,7 +39,7 @@ Quick start
- Type make to create the constants (crc32_constants.h)
-- Import the code into your application (crc32.S crc32_wrapper.c
+- Import the code into your application (crc32.sx crc32_wrapper.c
crc32_constants.h ppc-opcode.h) and call the CRC:
```
diff --git a/src/third_party/wiredtiger/src/checksum/power8/crc32.S b/src/third_party/wiredtiger/src/checksum/power8/crc32.sx
index 0b7870668b5..0b7870668b5 100644
--- a/src/third_party/wiredtiger/src/checksum/power8/crc32.S
+++ b/src/third_party/wiredtiger/src/checksum/power8/crc32.sx
diff --git a/src/third_party/wiredtiger/src/checksum/zseries/crc32le-vx.S b/src/third_party/wiredtiger/src/checksum/zseries/crc32le-vx.sx
index 0f1392b0952..0f1392b0952 100644
--- a/src/third_party/wiredtiger/src/checksum/zseries/crc32le-vx.S
+++ b/src/third_party/wiredtiger/src/checksum/zseries/crc32le-vx.sx
diff --git a/src/third_party/wiredtiger/src/config/config_collapse.c b/src/third_party/wiredtiger/src/config/config_collapse.c
index ea956ebfff9..7fe78d06ba7 100644
--- a/src/third_party/wiredtiger/src/config/config_collapse.c
+++ b/src/third_party/wiredtiger/src/config/config_collapse.c
@@ -47,7 +47,7 @@ __wt_config_collapse(
if (k.type != WT_CONFIG_ITEM_STRING &&
k.type != WT_CONFIG_ITEM_ID)
WT_ERR_MSG(session, EINVAL,
- "Invalid configuration key found: '%s'\n", k.str);
+ "Invalid configuration key found: '%s'", k.str);
WT_ERR(__wt_config_get(session, cfg, &k, &v));
/* Include the quotes around string keys/values. */
if (k.type == WT_CONFIG_ITEM_STRING) {
@@ -132,7 +132,7 @@ __config_merge_scan(WT_SESSION_IMPL *session,
if (k.type != WT_CONFIG_ITEM_STRING &&
k.type != WT_CONFIG_ITEM_ID)
WT_ERR_MSG(session, EINVAL,
- "Invalid configuration key found: '%s'\n", k.str);
+ "Invalid configuration key found: '%s'", k.str);
/* Include the quotes around string keys/values. */
if (k.type == WT_CONFIG_ITEM_STRING) {
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index 7bad5f12a9f..018cc7a8ac4 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -138,7 +138,8 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
NULL, NULL,
confchk_wiredtiger_open_shared_cache_subconfigs, 5 },
{ "statistics", "list",
- NULL, "choices=[\"all\",\"fast\",\"none\",\"clear\"]",
+ NULL, "choices=[\"all\",\"cache_walk\",\"fast\",\"none\","
+ "\"clear\",\"tree_walk\"]",
NULL, 0 },
{ "statistics_log", "category",
NULL, NULL,
@@ -246,6 +247,9 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_create[] = {
{ "format", "string", NULL, "choices=[\"btree\"]", NULL, 0 },
{ "huffman_key", "string", NULL, NULL, NULL, 0 },
{ "huffman_value", "string", NULL, NULL, NULL, 0 },
+ { "ignore_in_memory_cache_size", "boolean",
+ NULL, NULL,
+ NULL, 0 },
{ "immutable", "boolean", NULL, NULL, NULL, 0 },
{ "internal_item_max", "int", NULL, "min=0", NULL, 0 },
{ "internal_key_max", "int", NULL, "min=0", NULL, 0 },
@@ -331,7 +335,8 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor[] = {
{ "readonly", "boolean", NULL, NULL, NULL, 0 },
{ "skip_sort_check", "boolean", NULL, NULL, NULL, 0 },
{ "statistics", "list",
- NULL, "choices=[\"all\",\"fast\",\"clear\",\"size\"]",
+ NULL, "choices=[\"all\",\"cache_walk\",\"fast\",\"clear\","
+ "\"size\",\"tree_walk\"]",
NULL, 0 },
{ "target", "list", NULL, NULL, NULL, 0 },
{ NULL, NULL, NULL, NULL, NULL, 0 }
@@ -413,6 +418,9 @@ static const WT_CONFIG_CHECK confchk_file_config[] = {
{ "format", "string", NULL, "choices=[\"btree\"]", NULL, 0 },
{ "huffman_key", "string", NULL, NULL, NULL, 0 },
{ "huffman_value", "string", NULL, NULL, NULL, 0 },
+ { "ignore_in_memory_cache_size", "boolean",
+ NULL, NULL,
+ NULL, 0 },
{ "internal_item_max", "int", NULL, "min=0", NULL, 0 },
{ "internal_key_max", "int", NULL, "min=0", NULL, 0 },
{ "internal_key_truncate", "boolean", NULL, NULL, NULL, 0 },
@@ -471,6 +479,9 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = {
{ "huffman_key", "string", NULL, NULL, NULL, 0 },
{ "huffman_value", "string", NULL, NULL, NULL, 0 },
{ "id", "string", NULL, NULL, NULL, 0 },
+ { "ignore_in_memory_cache_size", "boolean",
+ NULL, NULL,
+ NULL, 0 },
{ "internal_item_max", "int", NULL, "min=0", NULL, 0 },
{ "internal_key_max", "int", NULL, "min=0", NULL, 0 },
{ "internal_key_truncate", "boolean", NULL, NULL, NULL, 0 },
@@ -544,6 +555,9 @@ static const WT_CONFIG_CHECK confchk_lsm_meta[] = {
{ "format", "string", NULL, "choices=[\"btree\"]", NULL, 0 },
{ "huffman_key", "string", NULL, NULL, NULL, 0 },
{ "huffman_value", "string", NULL, NULL, NULL, 0 },
+ { "ignore_in_memory_cache_size", "boolean",
+ NULL, NULL,
+ NULL, 0 },
{ "internal_item_max", "int", NULL, "min=0", NULL, 0 },
{ "internal_key_max", "int", NULL, "min=0", NULL, 0 },
{ "internal_key_truncate", "boolean", NULL, NULL, NULL, 0 },
@@ -697,7 +711,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
NULL, NULL,
confchk_wiredtiger_open_shared_cache_subconfigs, 5 },
{ "statistics", "list",
- NULL, "choices=[\"all\",\"fast\",\"none\",\"clear\"]",
+ NULL, "choices=[\"all\",\"cache_walk\",\"fast\",\"none\","
+ "\"clear\",\"tree_walk\"]",
NULL, 0 },
{ "statistics_log", "category",
NULL, NULL,
@@ -781,7 +796,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
NULL, NULL,
confchk_wiredtiger_open_shared_cache_subconfigs, 5 },
{ "statistics", "list",
- NULL, "choices=[\"all\",\"fast\",\"none\",\"clear\"]",
+ NULL, "choices=[\"all\",\"cache_walk\",\"fast\",\"none\","
+ "\"clear\",\"tree_walk\"]",
NULL, 0 },
{ "statistics_log", "category",
NULL, NULL,
@@ -862,7 +878,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
NULL, NULL,
confchk_wiredtiger_open_shared_cache_subconfigs, 5 },
{ "statistics", "list",
- NULL, "choices=[\"all\",\"fast\",\"none\",\"clear\"]",
+ NULL, "choices=[\"all\",\"cache_walk\",\"fast\",\"none\","
+ "\"clear\",\"tree_walk\"]",
NULL, 0 },
{ "statistics_log", "category",
NULL, NULL,
@@ -941,7 +958,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
NULL, NULL,
confchk_wiredtiger_open_shared_cache_subconfigs, 5 },
{ "statistics", "list",
- NULL, "choices=[\"all\",\"fast\",\"none\",\"clear\"]",
+ NULL, "choices=[\"all\",\"cache_walk\",\"fast\",\"none\","
+ "\"clear\",\"tree_walk\"]",
NULL, 0 },
{ "statistics_log", "category",
NULL, NULL,
@@ -1053,18 +1071,18 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"block_compressor=,cache_resident=false,checksum=uncompressed,"
"colgroups=,collator=,columns=,dictionary=0,encryption=(keyid=,"
"name=),exclusive=false,extractor=,format=btree,huffman_key=,"
- "huffman_value=,immutable=false,internal_item_max=0,"
- "internal_key_max=0,internal_key_truncate=true,"
- "internal_page_max=4KB,key_format=u,key_gap=10,leaf_item_max=0,"
- "leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=0,"
- "log=(enabled=true),lsm=(auto_throttle=true,bloom=true,"
- "bloom_bit_count=16,bloom_config=,bloom_hash_count=8,"
+ "huffman_value=,ignore_in_memory_cache_size=false,immutable=false"
+ ",internal_item_max=0,internal_key_max=0,"
+ "internal_key_truncate=true,internal_page_max=4KB,key_format=u,"
+ "key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,"
+ "leaf_value_max=0,log=(enabled=true),lsm=(auto_throttle=true,"
+ "bloom=true,bloom_bit_count=16,bloom_config=,bloom_hash_count=8,"
"bloom_oldest=false,chunk_count_limit=0,chunk_max=5GB,"
"chunk_size=10MB,merge_max=15,merge_min=0),memory_page_max=5MB,"
"os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false,"
"prefix_compression_min=4,source=,split_deepen_min_child=0,"
"split_deepen_per_child=0,split_pct=75,type=file,value_format=u",
- confchk_WT_SESSION_create, 40
+ confchk_WT_SESSION_create, 41
},
{ "WT_SESSION.drop",
"checkpoint_wait=true,force=false,lock_wait=true,"
@@ -1148,7 +1166,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"allocation_size=4KB,app_metadata=,block_allocation=best,"
"block_compressor=,cache_resident=false,checksum=uncompressed,"
"collator=,columns=,dictionary=0,encryption=(keyid=,name=),"
- "format=btree,huffman_key=,huffman_value=,internal_item_max=0,"
+ "format=btree,huffman_key=,huffman_value=,"
+ "ignore_in_memory_cache_size=false,internal_item_max=0,"
"internal_key_max=0,internal_key_truncate=true,"
"internal_page_max=4KB,key_format=u,key_gap=10,leaf_item_max=0,"
"leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=0,"
@@ -1156,14 +1175,15 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"os_cache_max=0,prefix_compression=false,prefix_compression_min=4"
",split_deepen_min_child=0,split_deepen_per_child=0,split_pct=75,"
"value_format=u",
- confchk_file_config, 33
+ confchk_file_config, 34
},
{ "file.meta",
"allocation_size=4KB,app_metadata=,block_allocation=best,"
"block_compressor=,cache_resident=false,checkpoint=,"
"checkpoint_lsn=,checksum=uncompressed,collator=,columns=,"
"dictionary=0,encryption=(keyid=,name=),format=btree,huffman_key="
- ",huffman_value=,id=,internal_item_max=0,internal_key_max=0,"
+ ",huffman_value=,id=,ignore_in_memory_cache_size=false,"
+ "internal_item_max=0,internal_key_max=0,"
"internal_key_truncate=true,internal_page_max=4KB,key_format=u,"
"key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,"
"leaf_value_max=0,log=(enabled=true),memory_page_max=5MB,"
@@ -1171,7 +1191,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"prefix_compression_min=4,split_deepen_min_child=0,"
"split_deepen_per_child=0,split_pct=75,value_format=u,"
"version=(major=0,minor=0)",
- confchk_file_meta, 37
+ confchk_file_meta, 38
},
{ "index.meta",
"app_metadata=,collator=,columns=,extractor=,immutable=false,"
@@ -1183,18 +1203,19 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"block_compressor=,cache_resident=false,checksum=uncompressed,"
"chunks=,collator=,columns=,dictionary=0,encryption=(keyid=,"
"name=),format=btree,huffman_key=,huffman_value=,"
- "internal_item_max=0,internal_key_max=0,"
- "internal_key_truncate=true,internal_page_max=4KB,key_format=u,"
- "key_gap=10,last=,leaf_item_max=0,leaf_key_max=0,"
- "leaf_page_max=32KB,leaf_value_max=0,log=(enabled=true),"
- "lsm=(auto_throttle=true,bloom=true,bloom_bit_count=16,"
- "bloom_config=,bloom_hash_count=8,bloom_oldest=false,"
- "chunk_count_limit=0,chunk_max=5GB,chunk_size=10MB,merge_max=15,"
- "merge_min=0),memory_page_max=5MB,old_chunks=,"
- "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false,"
- "prefix_compression_min=4,split_deepen_min_child=0,"
- "split_deepen_per_child=0,split_pct=75,value_format=u",
- confchk_lsm_meta, 37
+ "ignore_in_memory_cache_size=false,internal_item_max=0,"
+ "internal_key_max=0,internal_key_truncate=true,"
+ "internal_page_max=4KB,key_format=u,key_gap=10,last=,"
+ "leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,"
+ "leaf_value_max=0,log=(enabled=true),lsm=(auto_throttle=true,"
+ "bloom=true,bloom_bit_count=16,bloom_config=,bloom_hash_count=8,"
+ "bloom_oldest=false,chunk_count_limit=0,chunk_max=5GB,"
+ "chunk_size=10MB,merge_max=15,merge_min=0),memory_page_max=5MB,"
+ "old_chunks=,os_cache_dirty_max=0,os_cache_max=0,"
+ "prefix_compression=false,prefix_compression_min=4,"
+ "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=75,"
+ "value_format=u",
+ confchk_lsm_meta, 38
},
{ "table.meta",
"app_metadata=,colgroups=,collator=,columns=,key_format=u,"
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index 0951fd4e58c..04c29e957a3 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -789,14 +789,17 @@ __conn_get_extension_api(WT_CONNECTION *wt_conn)
return (&conn->extension_api);
}
+#ifdef HAVE_BUILTIN_EXTENSION_LZ4
+ extern int lz4_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
+#endif
#ifdef HAVE_BUILTIN_EXTENSION_SNAPPY
extern int snappy_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
#endif
#ifdef HAVE_BUILTIN_EXTENSION_ZLIB
extern int zlib_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
#endif
-#ifdef HAVE_BUILTIN_EXTENSION_LZ4
- extern int lz4_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
+#ifdef HAVE_BUILTIN_EXTENSION_ZSTD
+ extern int zstd_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
#endif
/*
@@ -808,14 +811,17 @@ __conn_load_default_extensions(WT_CONNECTION_IMPL *conn)
{
WT_UNUSED(conn);
+#ifdef HAVE_BUILTIN_EXTENSION_LZ4
+ WT_RET(lz4_extension_init(&conn->iface, NULL));
+#endif
#ifdef HAVE_BUILTIN_EXTENSION_SNAPPY
WT_RET(snappy_extension_init(&conn->iface, NULL));
#endif
#ifdef HAVE_BUILTIN_EXTENSION_ZLIB
WT_RET(zlib_extension_init(&conn->iface, NULL));
#endif
-#ifdef HAVE_BUILTIN_EXTENSION_LZ4
- WT_RET(lz4_extension_init(&conn->iface, NULL));
+#ifdef HAVE_BUILTIN_EXTENSION_ZSTD
+ WT_RET(zstd_extension_init(&conn->iface, NULL));
#endif
return (0);
}
@@ -1668,32 +1674,60 @@ __conn_statistics_config(WT_SESSION_IMPL *session, const char *cfg[])
if ((ret = __wt_config_subgets(
session, &cval, "fast", &sval)) == 0 && sval.val != 0) {
- LF_SET(WT_CONN_STAT_FAST);
+ LF_SET(WT_STAT_TYPE_FAST);
++set;
}
WT_RET_NOTFOUND_OK(ret);
if ((ret = __wt_config_subgets(
session, &cval, "all", &sval)) == 0 && sval.val != 0) {
- LF_SET(WT_CONN_STAT_ALL | WT_CONN_STAT_FAST);
+ LF_SET(
+ WT_STAT_TYPE_ALL | WT_STAT_TYPE_CACHE_WALK |
+ WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK);
++set;
}
WT_RET_NOTFOUND_OK(ret);
+ if (set > 1)
+ WT_RET_MSG(session, EINVAL,
+ "Only one of all, fast, none configuration values should "
+ "be specified");
+
+ /*
+ * Now that we've parsed general statistics categories, process
+ * sub-categories.
+ */
+ if ((ret = __wt_config_subgets(
+ session, &cval, "cache_walk", &sval)) == 0 && sval.val != 0)
+ /*
+ * Configuring cache walk statistics implies fast statistics.
+ * Keep that knowledge internal for now - it may change in the
+ * future.
+ */
+ LF_SET(WT_STAT_TYPE_FAST | WT_STAT_TYPE_CACHE_WALK);
+ WT_RET_NOTFOUND_OK(ret);
+
+ if ((ret = __wt_config_subgets(
+ session, &cval, "tree_walk", &sval)) == 0 && sval.val != 0)
+ /*
+ * Configuring tree walk statistics implies fast statistics.
+ * Keep that knowledge internal for now - it may change in the
+ * future.
+ */
+ LF_SET(WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK);
+ WT_RET_NOTFOUND_OK(ret);
+
if ((ret = __wt_config_subgets(
session, &cval, "clear", &sval)) == 0 && sval.val != 0) {
- if (!LF_ISSET(WT_CONN_STAT_FAST | WT_CONN_STAT_ALL))
+ if (!LF_ISSET(WT_STAT_TYPE_ALL | WT_STAT_TYPE_CACHE_WALK |
+ WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK))
WT_RET_MSG(session, EINVAL,
- "the value \"clear\" can be specified only if "
- "either \"all\" or \"fast\" is specified");
- LF_SET(WT_CONN_STAT_CLEAR);
+ "the value \"clear\" can only be specified if "
+ "statistics are enabled");
+ LF_SET(WT_STAT_CLEAR);
}
WT_RET_NOTFOUND_OK(ret);
- if (set > 1)
- WT_RET_MSG(session, EINVAL,
- "only one statistics configuration value may be specified");
-
/* Configuring statistics clears any existing values. */
conn->stat_flags = flags;
@@ -1943,6 +1977,42 @@ __conn_chk_file_system(WT_SESSION_IMPL *session, bool readonly)
}
/*
+ * wiredtiger_dummy_session_init --
+ * Initialize the connection's dummy session.
+ */
+static void
+wiredtiger_dummy_session_init(
+ WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler)
+{
+ WT_SESSION_IMPL *session;
+
+ session = &conn->dummy_session;
+
+ /*
+ * We use a fake session until we can allocate and initialize the real
+ * ones. Initialize the necessary fields (unfortunately, the fields we
+ * initialize have been selected by core dumps, we need to do better).
+ */
+ session->iface.connection = &conn->iface;
+ session->name = "wiredtiger_open";
+
+ /* Standard I/O and error handling first. */
+ __wt_os_stdio(session);
+ __wt_event_handler_set(session, event_handler);
+
+ /* Statistics */
+ session->stat_bucket = 0;
+
+ /*
+ * Set the default session's strerror method. If one of the extensions
+ * being loaded reports an error via the WT_EXTENSION_API strerror
+ * method, but doesn't supply that method a WT_SESSION handle, we'll
+ * use the WT_CONNECTION_IMPL's default session and its strerror method.
+ */
+ session->iface.strerror = __wt_session_strerror;
+}
+
+/*
* wiredtiger_open --
* Main library entry point: open a new connection to a WiredTiger
* database.
@@ -2013,21 +2083,11 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
TAILQ_INSERT_TAIL(&__wt_process.connqh, conn, q);
__wt_spin_unlock(NULL, &__wt_process.spinlock);
- session = conn->default_session = &conn->dummy_session;
- session->iface.connection = &conn->iface;
- session->name = "wiredtiger_open";
-
- /* Do standard I/O and error handling first. */
- __wt_os_stdio(session);
- __wt_event_handler_set(session, event_handler);
-
/*
- * Set the default session's strerror method. If one of the extensions
- * being loaded reports an error via the WT_EXTENSION_API strerror
- * method, but doesn't supply that method a WT_SESSION handle, we'll
- * use the WT_CONNECTION_IMPL's default session and its strerror method.
+ * Initialize the fake session used until we can create real sessions.
*/
- conn->default_session->iface.strerror = __wt_session_strerror;
+ wiredtiger_dummy_session_init(conn, event_handler);
+ session = conn->default_session = &conn->dummy_session;
/* Basic initialization of the connection structure. */
WT_ERR(__wt_connection_init(conn));
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c
index 6788b1f7f47..fe5f94ea03d 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache.c
@@ -42,47 +42,38 @@ __cache_config_local(WT_SESSION_IMPL *session, bool shared, const char *cfg[])
WT_RET(__wt_config_gets(session, cfg, "eviction_trigger", &cval));
cache->eviction_trigger = (u_int)cval.val;
- if (F_ISSET(conn, WT_CONN_IN_MEMORY))
- cache->eviction_checkpoint_target =
- cache->eviction_dirty_target =
- cache->eviction_dirty_trigger = 100U;
- else {
- WT_RET(__wt_config_gets(
- session, cfg, "eviction_checkpoint_target", &cval));
- cache->eviction_checkpoint_target = (u_int)cval.val;
+ WT_RET(__wt_config_gets(
+ session, cfg, "eviction_checkpoint_target", &cval));
+ cache->eviction_checkpoint_target = (u_int)cval.val;
- WT_RET(__wt_config_gets(
- session, cfg, "eviction_dirty_target", &cval));
- cache->eviction_dirty_target = (u_int)cval.val;
+ WT_RET(__wt_config_gets(session, cfg, "eviction_dirty_target", &cval));
+ cache->eviction_dirty_target = (u_int)cval.val;
- /*
- * Don't allow the dirty target to be larger than the overall
- * target.
- */
- if (cache->eviction_dirty_target > cache->eviction_target)
- cache->eviction_dirty_target = cache->eviction_target;
+ /*
+ * Don't allow the dirty target to be larger than the overall
+ * target.
+ */
+ if (cache->eviction_dirty_target > cache->eviction_target)
+ cache->eviction_dirty_target = cache->eviction_target;
- /*
- * Sanity check the checkpoint target: don't allow a value
- * lower than the dirty target.
- */
- if (cache->eviction_checkpoint_target > 0 &&
- cache->eviction_checkpoint_target <
- cache->eviction_dirty_target)
- cache->eviction_checkpoint_target =
- cache->eviction_dirty_target;
+ /*
+ * Sanity check the checkpoint target: don't allow a value
+ * lower than the dirty target.
+ */
+ if (cache->eviction_checkpoint_target > 0 &&
+ cache->eviction_checkpoint_target < cache->eviction_dirty_target)
+ cache->eviction_checkpoint_target =
+ cache->eviction_dirty_target;
- WT_RET(__wt_config_gets(
- session, cfg, "eviction_dirty_trigger", &cval));
- cache->eviction_dirty_trigger = (u_int)cval.val;
+ WT_RET(__wt_config_gets(session, cfg, "eviction_dirty_trigger", &cval));
+ cache->eviction_dirty_trigger = (u_int)cval.val;
- /*
- * Don't allow the dirty trigger to be larger than the overall
- * trigger or we can get stuck with a cache full of dirty data.
- */
- if (cache->eviction_dirty_trigger > cache->eviction_trigger)
- cache->eviction_dirty_trigger = cache->eviction_trigger;
- }
+ /*
+ * Don't allow the dirty trigger to be larger than the overall
+ * trigger or we can get stuck with a cache full of dirty data.
+ */
+ if (cache->eviction_dirty_trigger > cache->eviction_trigger)
+ cache->eviction_dirty_trigger = cache->eviction_trigger;
WT_RET(__wt_config_gets(session, cfg, "eviction.threads_max", &cval));
WT_ASSERT(session, cval.val > 0);
@@ -192,26 +183,26 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[])
* get any work done.
*/
if (cache->eviction_target >= cache->eviction_trigger)
- WT_ERR_MSG(session, EINVAL,
+ WT_RET_MSG(session, EINVAL,
"eviction target must be lower than the eviction trigger");
- WT_ERR(__wt_cond_auto_alloc(session, "cache eviction server",
+ WT_RET(__wt_cond_auto_alloc(session, "cache eviction server",
false, 10000, WT_MILLION, &cache->evict_cond));
- WT_ERR(__wt_spin_init(session, &cache->evict_pass_lock, "evict pass"));
- WT_ERR(__wt_spin_init(session,
+ WT_RET(__wt_spin_init(session, &cache->evict_pass_lock, "evict pass"));
+ WT_RET(__wt_spin_init(session,
&cache->evict_queue_lock, "cache eviction queue"));
- WT_ERR(__wt_spin_init(session, &cache->evict_walk_lock, "cache walk"));
+ WT_RET(__wt_spin_init(session, &cache->evict_walk_lock, "cache walk"));
if ((ret = __wt_open_internal_session(conn, "evict pass",
false, WT_SESSION_NO_DATA_HANDLES, &cache->walk_session)) != 0)
- WT_ERR_MSG(NULL, ret,
+ WT_RET_MSG(NULL, ret,
"Failed to create session for eviction walks");
/* Allocate the LRU eviction queue. */
cache->evict_slots = WT_EVICT_WALK_BASE + WT_EVICT_WALK_INCR;
for (i = 0; i < WT_EVICT_QUEUE_MAX; ++i) {
- WT_ERR(__wt_calloc_def(session,
+ WT_RET(__wt_calloc_def(session,
cache->evict_slots, &cache->evict_queues[i].evict_queue));
- WT_ERR(__wt_spin_init(session,
+ WT_RET(__wt_spin_init(session,
&cache->evict_queues[i].evict_lock, "cache eviction"));
}
@@ -227,9 +218,6 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[])
*/
__wt_cache_stats_update(session);
return (0);
-
-err: WT_RET(__wt_cache_destroy(session));
- return (ret);
}
/*
diff --git a/src/third_party/wiredtiger/src/conn/conn_ckpt.c b/src/third_party/wiredtiger/src/conn/conn_ckpt.c
index 451b0cd86f6..1d18c128c5b 100644
--- a/src/third_party/wiredtiger/src/conn/conn_ckpt.c
+++ b/src/third_party/wiredtiger/src/conn/conn_ckpt.c
@@ -89,22 +89,36 @@ __ckpt_server(void *arg)
*/
__wt_cond_wait(session, conn->ckpt_cond, conn->ckpt_usecs);
- /* Checkpoint the database. */
- WT_ERR(wt_session->checkpoint(wt_session, NULL));
-
- /* Reset. */
- if (conn->ckpt_logsize) {
- __wt_log_written_reset(session);
- conn->ckpt_signalled = false;
-
- /*
- * In case we crossed the log limit during the
- * checkpoint and the condition variable was already
- * signalled, do a tiny wait to clear it so we don't do
- * another checkpoint immediately.
- */
- __wt_cond_wait(session, conn->ckpt_cond, 1);
- }
+ /*
+ * Checkpoint the database if the connection is marked dirty.
+ * A connection is marked dirty whenever a btree gets marked
+ * dirty, which reflects upon a change in the database that
+ * needs to be checkpointed. Said that, there can be short
+ * instances when a btree gets marked dirty and the connection
+ * is yet to be. We might skip a checkpoint in that short
+ * instance, which is okay because by the next time we get to
+ * checkpoint, the connection would have been marked dirty and
+ * hence the checkpoint will not be skipped this time.
+ */
+ if (conn->modified) {
+ WT_ERR(wt_session->checkpoint(wt_session, NULL));
+
+ /* Reset. */
+ if (conn->ckpt_logsize) {
+ __wt_log_written_reset(session);
+ conn->ckpt_signalled = false;
+
+ /*
+ * In case we crossed the log limit during the
+ * checkpoint and the condition variable was
+ * already signalled, do a tiny wait to clear
+ * it so we don't do another checkpoint
+ * immediately.
+ */
+ __wt_cond_wait(session, conn->ckpt_cond, 1);
+ }
+ } else
+ WT_STAT_CONN_INCR(session, txn_checkpoint_skipped);
}
if (0) {
diff --git a/src/third_party/wiredtiger/src/conn/conn_handle.c b/src/third_party/wiredtiger/src/conn/conn_handle.c
index 5ff8b7f798b..5104624523b 100644
--- a/src/third_party/wiredtiger/src/conn/conn_handle.c
+++ b/src/third_party/wiredtiger/src/conn/conn_handle.c
@@ -50,21 +50,23 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn)
/* Statistics. */
__wt_stat_connection_init(conn);
- /* Locks. */
+ /* Spinlocks. */
WT_RET(__wt_spin_init(session, &conn->api_lock, "api"));
- WT_RET(__wt_spin_init(session, &conn->checkpoint_lock, "checkpoint"));
- WT_RET(__wt_spin_init(session, &conn->dhandle_lock, "data handle"));
+ WT_SPIN_INIT_TRACKED(session, &conn->checkpoint_lock, checkpoint);
+ WT_SPIN_INIT_TRACKED(session, &conn->dhandle_lock, handle_list);
WT_RET(__wt_spin_init(session, &conn->encryptor_lock, "encryptor"));
WT_RET(__wt_spin_init(session, &conn->fh_lock, "file list"));
- WT_RET(__wt_rwlock_alloc(session,
- &conn->hot_backup_lock, "hot backup"));
WT_RET(__wt_spin_init(session, &conn->las_lock, "lookaside table"));
- WT_RET(__wt_spin_init(session, &conn->metadata_lock, "metadata"));
+ WT_SPIN_INIT_TRACKED(session, &conn->metadata_lock, metadata);
WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure"));
- WT_RET(__wt_spin_init(session, &conn->schema_lock, "schema"));
- WT_RET(__wt_spin_init(session, &conn->table_lock, "table creation"));
+ WT_SPIN_INIT_TRACKED(session, &conn->schema_lock, schema);
+ WT_SPIN_INIT_TRACKED(session, &conn->table_lock, table);
WT_RET(__wt_spin_init(session, &conn->turtle_lock, "turtle file"));
+ /* Read-write locks */
+ WT_RET(__wt_rwlock_alloc(
+ session, &conn->hot_backup_lock, "hot backup"));
+
WT_RET(__wt_calloc_def(session, WT_PAGE_LOCKS, &conn->page_lock));
WT_CACHE_LINE_ALIGNMENT_VERIFY(session, conn->page_lock);
for (i = 0; i < WT_PAGE_LOCKS; ++i)
diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c
index 2786526c2fa..34743034877 100644
--- a/src/third_party/wiredtiger/src/conn/conn_log.c
+++ b/src/third_party/wiredtiger/src/conn/conn_log.c
@@ -839,10 +839,10 @@ __log_server(void *arg)
/* Wait until the next event. */
- WT_ERR(__wt_epoch(session, &start));
+ __wt_epoch(session, &start);
__wt_cond_auto_wait_signal(session,
conn->log_cond, did_work, &signalled);
- WT_ERR(__wt_epoch(session, &now));
+ __wt_epoch(session, &now);
timediff = WT_TIMEDIFF_MS(now, start);
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_stat.c b/src/third_party/wiredtiger/src/conn/conn_stat.c
index 66979dfd023..0715a035807 100644
--- a/src/third_party/wiredtiger/src/conn/conn_stat.c
+++ b/src/third_party/wiredtiger/src/conn/conn_stat.c
@@ -130,12 +130,12 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp)
WT_RET(__wt_config_gets(session, cfg, "statistics_log.json", &cval));
if (cval.val != 0)
- FLD_SET(conn->stat_flags, WT_CONN_STAT_JSON);
+ FLD_SET(conn->stat_flags, WT_STAT_JSON);
WT_RET(__wt_config_gets(
session, cfg, "statistics_log.on_close", &cval));
if (cval.val != 0)
- FLD_SET(conn->stat_flags, WT_CONN_STAT_ON_CLOSE);
+ FLD_SET(conn->stat_flags, WT_STAT_ON_CLOSE);
/*
* We don't allow the log path to be reconfigured for security reasons.
@@ -206,7 +206,7 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp)
#define WT_TIMESTAMP_JSON_DEFAULT "%Y-%m-%dT%H:%M:%S.000Z"
WT_ERR(__wt_config_gets(
session, cfg, "statistics_log.timestamp", &cval));
- if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_JSON) &&
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_JSON) &&
WT_STRING_MATCH(WT_TIMESTAMP_DEFAULT, cval.str, cval.len))
WT_ERR(__wt_strdup(
session, WT_TIMESTAMP_JSON_DEFAULT, &conn->stat_format));
@@ -264,7 +264,7 @@ __statlog_dump(WT_SESSION_IMPL *session, const char *name, bool conn_stats)
goto err;
}
- if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_JSON)) {
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_JSON)) {
WT_ERR(__wt_fprintf(session, conn->stat_fs,
"{\"version\":\"%s\",\"localTime\":\"%s\"",
WIREDTIGER_VERSION_STRING, conn->stat_stamp));
@@ -415,7 +415,7 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp)
conn = S2C(session);
/* Get the current local time of day. */
- WT_RET(__wt_epoch(session, &ts));
+ __wt_epoch(session, &ts);
tm = localtime_r(&ts.tv_sec, &_tm);
/* Create the logging path name for this time of day. */
@@ -482,7 +482,7 @@ __wt_statlog_log_one(WT_SESSION_IMPL *session)
conn = S2C(session);
- if (!FLD_ISSET(conn->stat_flags, WT_CONN_STAT_ON_CLOSE))
+ if (!FLD_ISSET(conn->stat_flags, WT_STAT_ON_CLOSE))
return (0);
if (F_ISSET(conn, WT_CONN_SERVER_RUN) &&
diff --git a/src/third_party/wiredtiger/src/conn/conn_sweep.c b/src/third_party/wiredtiger/src/conn/conn_sweep.c
index 03593f8951a..dba37fa2eb0 100644
--- a/src/third_party/wiredtiger/src/conn/conn_sweep.c
+++ b/src/third_party/wiredtiger/src/conn/conn_sweep.c
@@ -271,7 +271,7 @@ __sweep_server(void *arg)
/* Wait until the next event. */
__wt_cond_wait(session,
conn->sweep_cond, conn->sweep_interval * WT_MILLION);
- WT_ERR(__wt_seconds(session, &now));
+ __wt_seconds(session, &now);
WT_STAT_CONN_INCR(session, dh_sweeps);
diff --git a/src/third_party/wiredtiger/src/cursor/cur_file.c b/src/third_party/wiredtiger/src/cursor/cur_file.c
index e304cf7b775..9fc466f4c76 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_file.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_file.c
@@ -117,12 +117,12 @@ err: API_END_RET(session, ret);
}
/*
- * __curfile_next_random --
+ * __wt_curfile_next_random --
* WT_CURSOR->next method for the btree cursor type when configured with
- * next_random.
+ * next_random. This is exported because it is called directly within LSM.
*/
-static int
-__curfile_next_random(WT_CURSOR *cursor)
+int
+__wt_curfile_next_random(WT_CURSOR *cursor)
{
WT_CURSOR_BTREE *cbt;
WT_DECL_RET;
@@ -473,7 +473,7 @@ __curfile_create(WT_SESSION_IMPL *session,
"column-store objects");
__wt_cursor_set_notsup(cursor);
- cursor->next = __curfile_next_random;
+ cursor->next = __wt_curfile_next_random;
cursor->reset = __curfile_reset;
WT_ERR(__wt_config_gets_def(
diff --git a/src/third_party/wiredtiger/src/cursor/cur_stat.c b/src/third_party/wiredtiger/src/cursor/cur_stat.c
index 700cc366ff0..b36416debe1 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_stat.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_stat.c
@@ -354,7 +354,7 @@ __curstat_conn_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
*/
__wt_conn_stat_init(session);
__wt_stat_connection_aggregate(conn->stats, &cst->u.conn_stats);
- if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
+ if (F_ISSET(cst, WT_STAT_CLEAR))
__wt_stat_connection_clear_all(conn->stats);
cst->stats = (int64_t *)&cst->u.conn_stats;
@@ -380,7 +380,7 @@ __curstat_file_init(WT_SESSION_IMPL *session,
* If we are only getting the size of the file, we don't need to open
* the tree.
*/
- if (F_ISSET(cst, WT_CONN_STAT_SIZE)) {
+ if (F_ISSET(cst, WT_STAT_TYPE_SIZE)) {
filename = uri;
if (!WT_PREFIX_SKIP(filename, "file:"))
return (EINVAL);
@@ -401,7 +401,7 @@ __curstat_file_init(WT_SESSION_IMPL *session,
if ((ret = __wt_btree_stat_init(session, cst)) == 0) {
__wt_stat_dsrc_init_single(&cst->u.dsrc_stats);
__wt_stat_dsrc_aggregate(dhandle->stats, &cst->u.dsrc_stats);
- if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
+ if (F_ISSET(cst, WT_STAT_CLEAR))
__wt_stat_dsrc_clear_all(dhandle->stats);
__wt_curstat_dsrc_final(cst);
}
@@ -604,50 +604,79 @@ __wt_curstat_open(WT_SESSION_IMPL *session,
if ((ret = __wt_config_gets(session, cfg, "statistics", &cval)) == 0) {
if ((ret = __wt_config_subgets(
session, &cval, "all", &sval)) == 0 && sval.val != 0) {
- if (!FLD_ISSET(conn->stat_flags, WT_CONN_STAT_ALL))
+ if (!FLD_ISSET(conn->stat_flags, WT_STAT_TYPE_ALL))
goto config_err;
- F_SET(cst, WT_CONN_STAT_ALL | WT_CONN_STAT_FAST);
+ F_SET(cst, WT_STAT_TYPE_ALL | WT_STAT_TYPE_CACHE_WALK |
+ WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK);
}
WT_ERR_NOTFOUND_OK(ret);
if ((ret = __wt_config_subgets(
session, &cval, "fast", &sval)) == 0 && sval.val != 0) {
- if (F_ISSET(cst, WT_CONN_STAT_ALL))
+ if (F_ISSET(cst, WT_STAT_TYPE_ALL))
WT_ERR_MSG(session, EINVAL,
- "only one statistics configuration value "
- "may be specified");
- F_SET(cst, WT_CONN_STAT_FAST);
+ "Only one of all, fast, none "
+ "configuration values should be specified");
+ F_SET(cst, WT_STAT_TYPE_FAST);
}
WT_ERR_NOTFOUND_OK(ret);
+
+ if ((ret = __wt_config_subgets(session,
+ &cval, "cache_walk", &sval)) == 0 && sval.val != 0) {
+ /*
+ * Configuring cache walk statistics implies fast
+ * statistics. Keep that knowledge internal for now -
+ * it may change in the future.
+ */
+ F_SET(cst, WT_STAT_TYPE_CACHE_WALK | WT_STAT_TYPE_FAST);
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+ if ((ret = __wt_config_subgets(session,
+ &cval, "tree_walk", &sval)) == 0 && sval.val != 0) {
+ /*
+ * Configuring tree walk statistics implies fast
+ * statistics. Keep that knowledge internal for now -
+ * it may change in the future.
+ */
+ F_SET(cst, WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK);
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
if ((ret = __wt_config_subgets(
session, &cval, "size", &sval)) == 0 && sval.val != 0) {
- if (F_ISSET(cst, WT_CONN_STAT_FAST | WT_CONN_STAT_ALL))
+ if (F_ISSET(cst, WT_STAT_TYPE_FAST | WT_STAT_TYPE_ALL))
WT_ERR_MSG(session, EINVAL,
- "only one statistics configuration value "
- "may be specified");
- F_SET(cst, WT_CONN_STAT_SIZE);
+ "Only one of all, fast, none "
+ "configuration values should be specified");
+ F_SET(cst, WT_STAT_TYPE_SIZE);
}
WT_ERR_NOTFOUND_OK(ret);
if ((ret = __wt_config_subgets(
session, &cval, "clear", &sval)) == 0 && sval.val != 0) {
- if (F_ISSET(cst, WT_CONN_STAT_SIZE))
+ if (F_ISSET(cst, WT_STAT_TYPE_SIZE))
WT_ERR_MSG(session, EINVAL,
"clear is incompatible with size "
"statistics");
- F_SET(cst, WT_CONN_STAT_CLEAR);
+ F_SET(cst, WT_STAT_CLEAR);
}
WT_ERR_NOTFOUND_OK(ret);
/* If no configuration, use the connection's configuration. */
if (cst->flags == 0) {
- if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_ALL))
- F_SET(cst, WT_CONN_STAT_ALL);
- if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_FAST))
- F_SET(cst, WT_CONN_STAT_FAST);
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_TYPE_ALL))
+ F_SET(cst, WT_STAT_TYPE_ALL);
+ if (FLD_ISSET(
+ conn->stat_flags, WT_STAT_TYPE_CACHE_WALK))
+ F_SET(cst, WT_STAT_TYPE_CACHE_WALK);
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_TYPE_FAST))
+ F_SET(cst, WT_STAT_TYPE_FAST);
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_TYPE_TREE_WALK))
+ F_SET(cst, WT_STAT_TYPE_TREE_WALK);
}
/* If the connection configures clear, so do we. */
- if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_CLEAR))
- F_SET(cst, WT_CONN_STAT_CLEAR);
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_CLEAR))
+ F_SET(cst, WT_STAT_CLEAR);
}
/*
@@ -670,9 +699,9 @@ __wt_curstat_open(WT_SESSION_IMPL *session,
/*
* Do the initial statistics snapshot: there won't be cursor operations
- * to trigger initialization when aggregating statistics for upper-level
- * objects like tables, we need to a valid set of statistics when before
- * the open returns.
+ * to trigger initialization with aggregating statistics for upper-level
+ * objects like tables so we need a valid set of statistics before the
+ * open returns.
*/
WT_ERR(__wt_curstat_init(session, uri, other, cst->cfg, cst));
cst->notinitialized = false;
diff --git a/src/third_party/wiredtiger/src/cursor/cur_table.c b/src/third_party/wiredtiger/src/cursor/cur_table.c
index 1b93b27f564..6543d54e90f 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_table.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_table.c
@@ -757,13 +757,36 @@ err: API_END_RET(session, ret);
}
/*
+ * __curtable_complete --
+ * Return failure if the table is not yet fully created.
+ */
+static int
+__curtable_complete(WT_SESSION_IMPL *session, WT_TABLE *table)
+{
+ WT_DECL_RET;
+ bool complete;
+
+ if (table->cg_complete)
+ return (0);
+
+ /* If the table is incomplete, wait on the table lock and recheck. */
+ complete = false;
+ WT_WITH_TABLE_LOCK(session, ret, complete = table->cg_complete);
+ WT_RET(ret);
+ if (!complete)
+ WT_RET_MSG(session, EINVAL,
+ "'%s' not available until all column groups are created",
+ table->name);
+ return (0);
+}
+
+/*
* __curtable_open_colgroups --
* Open cursors on column groups for a table cursor.
*/
static int
__curtable_open_colgroups(WT_CURSOR_TABLE *ctable, const char *cfg_arg[])
{
- WT_DECL_RET;
WT_SESSION_IMPL *session;
WT_TABLE *table;
WT_CURSOR **cp;
@@ -775,21 +798,11 @@ __curtable_open_colgroups(WT_CURSOR_TABLE *ctable, const char *cfg_arg[])
cfg_arg[0], cfg_arg[1], "dump=\"\",readonly=0", NULL, NULL
};
u_int i;
- bool complete;
session = (WT_SESSION_IMPL *)ctable->iface.session;
table = ctable->table;
- /* If the table is incomplete, wait on the table lock and recheck. */
- complete = table->cg_complete;
- if (!complete) {
- WT_WITH_TABLE_LOCK(session, ret, complete = table->cg_complete);
- WT_RET(ret);
- }
- if (!complete)
- WT_RET_MSG(session, EINVAL,
- "Can't use '%s' until all column groups are created",
- table->name);
+ WT_RET(__curtable_complete(session, table)); /* completeness check */
WT_RET(__wt_calloc_def(session,
WT_COLGROUPS(table), &ctable->cg_cursors));
@@ -887,6 +900,8 @@ __wt_curtable_open(WT_SESSION_IMPL *session,
size = WT_PTRDIFF(columns, tablename);
WT_RET(__wt_schema_get_table(session, tablename, size, false, &table));
+ WT_RET(__curtable_complete(session, table)); /* completeness check */
+
if (table->is_simple) {
/* Just return a cursor on the underlying data source. */
ret = __wt_open_cursor(session,
diff --git a/src/third_party/wiredtiger/src/docs/build-posix.dox b/src/third_party/wiredtiger/src/docs/build-posix.dox
index 4889bf931c9..3e7f8f37acd 100644
--- a/src/third_party/wiredtiger/src/docs/build-posix.dox
+++ b/src/third_party/wiredtiger/src/docs/build-posix.dox
@@ -150,10 +150,14 @@ Configure WiredTiger to support the \c verbose configuration string to
Configure WiredTiger for <a href="http://www.zlib.net/">zlib</a>
compression; see @ref compression for more information.
+@par \c --enable-zstd
+Configure WiredTiger for <a href="https://github.com/facebook/zstd">Zstd</a>
+compression; see @ref compression for more information.
+
@par <code>--with-builtins</code>
Configure WiredTiger to include support for extensions in the main library.
This avoids requiring additional libraries for supported extensions. Currently
-supported options are \c lz4, \c snappy and \c zlib.
+supported options are \c lz4, \c snappy, \c zlib and \c zstd.
@par <code>--with-python-prefix</code>
Configure WiredTiger to install Python libraries to a non-standard Python
diff --git a/src/third_party/wiredtiger/src/docs/compression.dox b/src/third_party/wiredtiger/src/docs/compression.dox
index 0be96835760..74bed5c6f68 100644
--- a/src/third_party/wiredtiger/src/docs/compression.dox
+++ b/src/third_party/wiredtiger/src/docs/compression.dox
@@ -1,7 +1,7 @@
/*! @m_page{{c,java},compression,Compressors}
This section explains how to configure WiredTiger's builtin support for
-the lz4, snappy and zlib compression engines.
+the lz4, snappy, zlib and zstd compression engines.
@section compression_lz4 Using LZ4 compression
@@ -85,11 +85,53 @@ an extension. For example, with the WiredTiger library installed in
@snippet ex_all.c Configure zlib extension
+The default compression level for the zlib compression is
+\c Z_DEFAULT_COMPRESSION (see the zlib documentation for further
+information); compression can be configured to other levels using the
+additional configuration argument \c compression_level.
+
+@snippet ex_all.c Configure zlib extension with compression level
+
Finally, when creating the WiredTiger object, set \c block_compressor
to \c zlib:
@snippet ex_all.c Create a zlib compressed table
+@section compression_zstd Using Zstd compression
+
+To use the builtin support for Facebook's
+<a href="https://github.com/facebook/zstd">Zstd</a>
+compression, first check that Zstd is installed in include and library
+directories searched by the compiler. Once Zstd is installed, you can
+enable Zstd using the \c --enable-zstd option to configure.
+
+If Zstd is installed in a location not normally searched by the
+compiler toolchain, you'll need to modify the \c CPPFLAGS and \c LDFLAGS
+to indicate these locations. For example, with the Zstd includes and
+libraries installed in \c /usr/local/include and \c /usr/local/lib, you
+would run configure with the following additional arguments:
+
+@code
+--enable-zstd CPPFLAGS="-I/usr/local/include" LDFLAGS="-L/usr/local/include"
+@endcode
+
+When opening the WiredTiger database, load the Zstd shared library as
+an extension. For example, with the WiredTiger library installed in
+\c /usr/local/lib, you would use the following extension:
+
+@snippet ex_all.c Configure zstd extension
+
+The default compression level for the zstd compression is 3; compression
+can be configured to other levels using the additional configuration
+argument \c compression_level.
+
+@snippet ex_all.c Configure zstd extension with compression level
+
+Finally, when creating the WiredTiger object, set \c block_compressor
+to \c zstd:
+
+@snippet ex_all.c Create a zstd compressed table
+
@section compression_upgrading Upgrading compression engines
WiredTiger does not store information with file blocks to identify the
diff --git a/src/third_party/wiredtiger/src/docs/spell.ok b/src/third_party/wiredtiger/src/docs/spell.ok
index a2ef7658ec6..4b1337f84b8 100644
--- a/src/third_party/wiredtiger/src/docs/spell.ok
+++ b/src/third_party/wiredtiger/src/docs/spell.ok
@@ -95,6 +95,7 @@ WiredTigerStat
WiredTigerTestCase
Yann
Za
+Zstd
aR
abstime
ack'ed
@@ -507,3 +508,4 @@ xa
yieldcpu
zlib
zseries
+zstd
diff --git a/src/third_party/wiredtiger/src/docs/wtperf.dox b/src/third_party/wiredtiger/src/docs/wtperf.dox
index a49d0d9f871..83aadf8a776 100644
--- a/src/third_party/wiredtiger/src/docs/wtperf.dox
+++ b/src/third_party/wiredtiger/src/docs/wtperf.dox
@@ -150,33 +150,27 @@ number of async worker threads
@par checkpoint_interval (unsigned int, default=120)
checkpoint every interval seconds during the workload phase.
@par checkpoint_stress_rate (unsigned int, default=0)
-checkpoint every rate operations during the populate phase in the
-populate thread(s), 0 to disable
+checkpoint every rate operations during the populate phase in the populate thread(s), 0 to disable
@par checkpoint_threads (unsigned int, default=0)
number of checkpoint threads
-@par conn_config (string, default=create)
+@par conn_config (string, default="create")
connection configuration string
+@par close_conn (boolean, default=true)
+properly close connection at end of test. Setting to false does not sync data to disk and can result in lost data after test exits.
@par compact (boolean, default=false)
post-populate compact for LSM merging activity
-@par compression (string, default=none)
-compression extension. Allowed configuration values are: 'none',
-'lz4', 'snappy', 'zlib'
+@par compression (string, default="none")
+compression extension. Allowed configuration values are: 'none', 'lz4', 'snappy', 'zlib', 'zstd'
@par create (boolean, default=true)
do population phase; false to use existing database
@par database_count (unsigned int, default=1)
-number of WiredTiger databases to use. Each database will execute the
-workload using a separate home directory and complete set of worker
-threads
-@par drop_tables (unsigned int, default=0)
-Whether to drop all tables at the end of the run, and report time
-taken to do the drop.
+number of WiredTiger databases to use. Each database will execute the workload using a separate home directory and complete set of worker threads
+@par drop_tables (boolean, default=false)
+Whether to drop all tables at the end of the run, and report time taken to do the drop.
@par icount (unsigned int, default=5000)
-number of records to initially populate. If multiple tables are
-configured the count is spread evenly across all tables.
+number of records to initially populate. If multiple tables are configured the count is spread evenly across all tables.
@par idle_table_cycle (unsigned int, default=0)
-Enable regular create and drop of idle tables, value is the maximum
-number of seconds a create or drop is allowed before flagging an
-error. Default 0 which means disabled.
+Enable regular create and drop of idle tables, value is the maximum number of seconds a create or drop is allowed before flagging an error. Default 0 which means disabled.
@par index (boolean, default=false)
Whether to create an index on the value field.
@par insert_rmw (boolean, default=false)
@@ -188,28 +182,21 @@ perform partial logging on first table only.
@par log_like_table (boolean, default=false)
Append all modification operations to another shared table.
@par min_throughput (unsigned int, default=0)
-notify if any throughput measured is less than this amount. Aborts or
-prints warning based on min_throughput_fatal setting. Requires
-sample_interval to be configured
+notify if any throughput measured is less than this amount. Aborts or prints warning based on min_throughput_fatal setting. Requires sample_interval to be configured
@par min_throughput_fatal (boolean, default=false)
print warning (false) or abort (true) of min_throughput failure.
@par max_latency (unsigned int, default=0)
-notify if any latency measured exceeds this number of
-milliseconds.Aborts or prints warning based on min_throughput_fatal
-setting. Requires sample_interval to be configured
+notify if any latency measured exceeds this number of milliseconds. Aborts or prints warning based on min_throughput_fatal setting. Requires sample_interval to be configured
@par max_latency_fatal (boolean, default=false)
print warning (false) or abort (true) of max_latency failure.
@par pareto (unsigned int, default=0)
-use pareto distribution for random numbers. Zero to disable, otherwise
-a percentage indicating how aggressive the distribution should be.
+use pareto distribution for random numbers. Zero to disable, otherwise a percentage indicating how aggressive the distribution should be.
@par populate_ops_per_txn (unsigned int, default=0)
-number of operations to group into each transaction in the populate
-phase, zero for auto-commit
+number of operations to group into each transaction in the populate phase, zero for auto-commit
@par populate_threads (unsigned int, default=1)
number of populate threads, 1 for bulk load
@par random_range (unsigned int, default=0)
-if non zero choose a value from within this range as the key for
-insert operations
+if non zero choose a value from within this range as the key for insert operations
@par random_value (boolean, default=false)
generate random content for the value
@par range_partition (boolean, default=false)
@@ -217,9 +204,7 @@ partition data by range (vs hash)
@par read_range (unsigned int, default=0)
scan a range of keys after each search
@par readonly (boolean, default=false)
-reopen the connection between populate and workload phases in readonly
-mode. Requires reopen_connection turned on (default). Requires that
-read be the only workload specified
+reopen the connection between populate and workload phases in readonly mode. Requires reopen_connection turned on (default). Requires that read be the only workload specified
@par reopen_connection (boolean, default=true)
close and reopen the connection between populate and workload phases
@par report_interval (unsigned int, default=2)
@@ -231,40 +216,22 @@ total workload seconds
@par sample_interval (unsigned int, default=0)
performance logging every interval seconds, 0 to disable
@par sample_rate (unsigned int, default=50)
-how often the latency of operations is measured. One for every
-operation,two for every second operation, three for every third
-operation etc.
-@par sess_config (string, default=)
+how often the latency of operations is measured. One for every operation, two for every second operation, three for every third operation etc.
+@par sess_config (string, default="")
session configuration string
@par session_count_idle (unsigned int, default=0)
number of idle sessions to create. Default 0.
-@par table_config (string, default=key_format=S,value_format=S,type=lsm,exclusive=true,allocation_size=4kb,internal_page_max=64kb,leaf_page_max=4kb,split_pct=100)
+@par table_config (string, default="key_format=S,value_format=S,type=lsm,exclusive=true, allocation_size=4kb,internal_page_max=64kb,leaf_page_max=4kb, split_pct=100")
table configuration string
@par table_count (unsigned int, default=1)
-number of tables to run operations over. Keys are divided evenly over
-the tables. Cursors are held open on all tables. Default 1, maximum
-99999.
+number of tables to run operations over. Keys are divided evenly over the tables. Cursors are held open on all tables. Default 1, maximum 99999.
@par table_count_idle (unsigned int, default=0)
number of tables to create, that won't be populated. Default 0.
-@par threads (string, default=)
-workload configuration: each 'count' entry is the total number of
-threads, and the 'insert', 'read' and 'update' entries are the ratios
-of insert, read and update operations done by each worker thread; If a
-throttle value is provided each thread will do a maximum of that
-number of operations per second; multiple workload configurations may
-be specified per threads configuration; for example, a more complex
-threads configuration might be
-'threads=((count=2,reads=1)(count=8,reads=1,inserts=2,updates=1))'
-which would create 2 threads doing nothing but reads and 8 threads
-each doing 50% inserts and 25% reads and updates. Allowed
-configuration values are 'count', 'throttle', 'update_delta', 'reads',
-'inserts', 'updates', 'truncate', 'truncate_pct' and 'truncate_count'.
-There are also behavior modifiers, supported modifiers are
-'ops_per_txn'
-@par transaction_config (string, default=)
-transaction configuration string, relevant when populate_opts_per_txn
-is nonzero
-@par table_name (string, default=test)
+@par threads (string, default="")
+workload configuration: each 'count' entry is the total number of threads, and the 'insert', 'read' and 'update' entries are the ratios of insert, read and update operations done by each worker thread; If a throttle value is provided each thread will do a maximum of that number of operations per second; multiple workload configurations may be specified per threads configuration; for example, a more complex threads configuration might be 'threads=((count=2,reads=1)(count=8,reads=1,inserts=2,updates=1))' which would create 2 threads doing nothing but reads and 8 threads each doing 50% inserts and 25% reads and updates. Allowed configuration values are 'count', 'throttle', 'update_delta', 'reads', 'inserts', 'updates', 'truncate', 'truncate_pct' and 'truncate_count'. There are also behavior modifiers, supported modifiers are 'ops_per_txn'
+@par transaction_config (string, default="")
+WT_SESSION.begin_transaction configuration string, applied during the populate phase when populate_ops_per_txn is nonzero
+@par table_name (string, default="test")
table name
@par truncate_single_ops (boolean, default=false)
Implement truncate via cursor remove instead of session API
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index acc81f566a5..6c99f3a13dc 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -233,10 +233,10 @@ __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread)
#ifdef HAVE_DIAGNOSTIC
/*
- * Ensure the cache stuck timer is initialized when starting eviction
+ * Ensure the cache stuck timer is initialized when starting eviction.
*/
if (thread->id == 0)
- WT_ERR(__wt_epoch(session, &cache->stuck_ts));
+ __wt_epoch(session, &cache->stuck_ts);
#endif
while (F_ISSET(conn, WT_CONN_EVICTION_RUN) &&
@@ -350,10 +350,10 @@ __evict_server(WT_SESSION_IMPL *session, bool *did_work)
} else if (cache->pages_evicted != cache->pages_evict) {
cache->pages_evicted = cache->pages_evict;
#ifdef HAVE_DIAGNOSTIC
- WT_RET(__wt_epoch(session, &cache->stuck_ts));
+ __wt_epoch(session, &cache->stuck_ts);
} else {
/* After being stuck for 5 minutes, give up. */
- WT_RET(__wt_epoch(session, &now));
+ __wt_epoch(session, &now);
if (WT_TIMEDIFF_SEC(now, cache->stuck_ts) > 300) {
ret = ETIMEDOUT;
__wt_err(session, ret,
@@ -465,16 +465,16 @@ __evict_update_work(WT_SESSION_IMPL *session)
*/
bytes_max = conn->cache_size + 1;
bytes_inuse = __wt_cache_bytes_inuse(cache);
- if (bytes_inuse > (cache->eviction_target * bytes_max) / 100)
- F_SET(cache, WT_CACHE_EVICT_CLEAN);
if (__wt_eviction_clean_needed(session, NULL))
F_SET(cache, WT_CACHE_EVICT_CLEAN | WT_CACHE_EVICT_CLEAN_HARD);
+ else if (bytes_inuse > (cache->eviction_target * bytes_max) / 100)
+ F_SET(cache, WT_CACHE_EVICT_CLEAN);
dirty_inuse = __wt_cache_dirty_leaf_inuse(cache);
- if (dirty_inuse > (cache->eviction_dirty_target * bytes_max) / 100)
- F_SET(cache, WT_CACHE_EVICT_DIRTY);
if (__wt_eviction_dirty_needed(session, NULL))
F_SET(cache, WT_CACHE_EVICT_DIRTY | WT_CACHE_EVICT_DIRTY_HARD);
+ else if (dirty_inuse > (cache->eviction_dirty_target * bytes_max) / 100)
+ F_SET(cache, WT_CACHE_EVICT_DIRTY);
/*
* If application threads are blocked by the total volume of data in
@@ -506,12 +506,6 @@ __evict_update_work(WT_SESSION_IMPL *session)
F_CLR(cache, WT_CACHE_EVICT_CLEAN | WT_CACHE_EVICT_CLEAN_HARD);
}
- /* If threads are blocked by eviction we should be looking for pages. */
- WT_ASSERT(session, !F_ISSET(cache, WT_CACHE_EVICT_CLEAN_HARD) ||
- F_ISSET(cache, WT_CACHE_EVICT_CLEAN));
- WT_ASSERT(session, !F_ISSET(cache, WT_CACHE_EVICT_DIRTY_HARD) ||
- F_ISSET(cache, WT_CACHE_EVICT_DIRTY));
-
WT_STAT_CONN_SET(session, cache_eviction_state,
F_MASK(cache, WT_CACHE_EVICT_MASK));
@@ -543,7 +537,7 @@ __evict_pass(WT_SESSION_IMPL *session)
/* Evict pages from the cache. */
for (loop = 0; cache->pass_intr == 0; loop++) {
- WT_RET(__wt_epoch(session, &now));
+ __wt_epoch(session, &now);
if (loop == 0)
prev = now;
@@ -554,6 +548,7 @@ __evict_pass(WT_SESSION_IMPL *session)
* does need to do some work.
*/
__wt_cache_read_gen_incr(session);
+ ++cache->evict_pass_gen;
/*
* Update the oldest ID: we use it to decide whether pages are
@@ -895,12 +890,11 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
/* Fill the next queue (that isn't the urgent queue). */
queue = cache->evict_fill_queue;
other_queue = cache->evict_queues + (1 - (queue - cache->evict_queues));
+ cache->evict_fill_queue = other_queue;
/* If this queue is full, try the other one. */
if (__evict_queue_full(queue) && !__evict_queue_full(other_queue))
queue = other_queue;
- cache->evict_fill_queue =
- &cache->evict_queues[1 - (queue - cache->evict_queues)];
/*
* If both queues are full and haven't been empty on recent refills,
@@ -1062,7 +1056,7 @@ __evict_walk(WT_SESSION_IMPL *session, WT_EVICT_QUEUE *queue)
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
- u_int max_entries, retries, slot, start_slot, spins;
+ u_int max_entries, retries, slot, spins, start_slot, total_candidates;
bool dhandle_locked, incr;
conn = S2C(session);
@@ -1079,6 +1073,14 @@ __evict_walk(WT_SESSION_IMPL *session, WT_EVICT_QUEUE *queue)
start_slot = slot = queue->evict_entries;
max_entries = WT_MIN(slot + WT_EVICT_WALK_INCR, cache->evict_slots);
+ /*
+ * Another pathological case: if there are only a tiny number of
+ * candidate pages in cache, don't put all of them on one queue.
+ */
+ total_candidates = (u_int)(F_ISSET(cache, WT_CACHE_EVICT_CLEAN) ?
+ __wt_cache_pages_inuse(cache) : cache->pages_dirty_leaf);
+ max_entries = WT_MIN(max_entries, 1 + total_candidates / 2);
+
retry: while (slot < max_entries) {
/*
* If another thread is waiting on the eviction server to clear
@@ -1282,8 +1284,8 @@ __evict_push_candidate(WT_SESSION_IMPL *session,
* Get a few page eviction candidates from a single underlying file.
*/
static int
-__evict_walk_file(WT_SESSION_IMPL *session,
- WT_EVICT_QUEUE *queue, u_int max_entries, u_int *slotp)
+__evict_walk_file(WT_SESSION_IMPL *session, WT_EVICT_QUEUE *queue,
+ u_int max_entries, u_int *slotp)
{
WT_BTREE *btree;
WT_CACHE *cache;
@@ -1410,6 +1412,7 @@ __evict_walk_file(WT_SESSION_IMPL *session,
page = ref->page;
modified = __wt_page_is_modified(page);
+ page->evict_pass_gen = cache->evict_pass_gen;
/*
* Use the EVICT_LRU flag to avoid putting pages onto the list
@@ -1508,19 +1511,22 @@ fast: /* If the page can't be evicted, give up. */
btree->evict_walk_period = 0;
/*
- * If we happen to end up on the root page, clear it. We have to track
- * hazard pointers, and the root page complicates that calculation.
+ * If we happen to end up on the root page or a page requiring urgent
+ * eviction, clear it. We have to track hazard pointers, and the root
+ * page complicates that calculation.
*
* Likewise if we found no new candidates during the walk: there is no
- * point keeping a page pinned, since it may be the only candidate in an
- * idle tree.
+ * point keeping a page pinned, since it may be the only candidate in
+ * an idle tree.
*
* If we land on a page requiring forced eviction, move on to the next
* page: we want this page evicted as quickly as possible.
*/
if ((ref = btree->evict_ref) != NULL) {
/* Give up the walk occasionally. */
- if (__wt_ref_is_root(ref) || evict == start || give_up)
+ if (__wt_ref_is_root(ref) || evict == start || give_up ||
+ ref->page->read_gen == WT_READGEN_OLDEST ||
+ ref->page->memory_footprint >= btree->splitmempage)
WT_RET(__evict_clear_walk(session, restarts == 0));
else if (ref->page->read_gen == WT_READGEN_OLDEST)
WT_RET_NOTFOUND_OK(__wt_tree_walk_count(
@@ -1543,17 +1549,17 @@ __evict_get_ref(
WT_SESSION_IMPL *session, bool is_server, WT_BTREE **btreep, WT_REF **refp)
{
WT_CACHE *cache;
- WT_DECL_RET;
WT_EVICT_ENTRY *evict;
WT_EVICT_QUEUE *queue, *other_queue, *urgent_queue;
uint32_t candidates;
- bool is_app, urgent_ok;
+ bool is_app, server_only, urgent_ok;
cache = S2C(session)->cache;
is_app = !F_ISSET(session, WT_SESSION_INTERNAL);
+ server_only = is_server && !WT_EVICT_HAS_WORKERS(session);
urgent_ok = (!is_app && !is_server) ||
!WT_EVICT_HAS_WORKERS(session) ||
- __wt_cache_aggressive(session);
+ (is_app && __wt_cache_aggressive(session));
urgent_queue = cache->evict_urgent_queue;
*btreep = NULL;
*refp = NULL;
@@ -1569,7 +1575,8 @@ __evict_get_ref(
}
/*
- * The server repopulates whenever the other queue is not full.
+ * The server repopulates whenever the other queue is not full, as long
+ * as at least one page has been evicted out of the current queue.
*
* Note that there are pathological cases where there are only enough
* eviction candidates in the cache to fill one queue. In that case,
@@ -1577,18 +1584,14 @@ __evict_get_ref(
* Such cases are extremely rare in real applications.
*/
if (is_server &&
+ (!urgent_ok || __evict_queue_empty(urgent_queue, false)) &&
+ !__evict_queue_full(cache->evict_current_queue) &&
+ !__evict_queue_full(cache->evict_fill_queue) &&
(cache->evict_empty_score > WT_EVICT_SCORE_CUTOFF ||
- __evict_queue_empty(cache->evict_fill_queue, false))) {
- while ((ret = __wt_spin_trylock(
- session, &cache->evict_queue_lock)) == EBUSY)
- if ((!urgent_ok ||
- __evict_queue_empty(urgent_queue, false)) &&
- !__evict_queue_full(cache->evict_fill_queue))
- return (WT_NOTFOUND);
+ __evict_queue_empty(cache->evict_fill_queue, false)))
+ return (WT_NOTFOUND);
- WT_RET(ret);
- } else
- __wt_spin_lock(session, &cache->evict_queue_lock);
+ __wt_spin_lock(session, &cache->evict_queue_lock);
/* Check the urgent queue first. */
if (urgent_ok && !__evict_queue_empty(urgent_queue, false))
@@ -1596,17 +1599,15 @@ __evict_get_ref(
else {
/*
* Check if the current queue needs to change.
- * The current queue could have changed while we waited for
- * the lock.
*
* The server will only evict half of the pages before looking
- * for more. The remainder are left to eviction workers (if any
- * configured), or application threads if necessary.
+ * for more, but should only switch queues if there are no
+ * other eviction workers.
*/
queue = cache->evict_current_queue;
other_queue = cache->evict_other_queue;
- if (__evict_queue_empty(queue, is_server) &&
- !__evict_queue_empty(other_queue, is_server)) {
+ if (__evict_queue_empty(queue, server_only) &&
+ !__evict_queue_empty(other_queue, server_only)) {
cache->evict_current_queue = other_queue;
cache->evict_other_queue = queue;
}
@@ -1715,15 +1716,19 @@ __evict_get_ref(
static int
__evict_page(WT_SESSION_IMPL *session, bool is_server)
{
+ struct timespec enter, leave;
WT_BTREE *btree;
WT_CACHE *cache;
WT_DECL_RET;
WT_REF *ref;
+ bool app_timer;
WT_RET(__evict_get_ref(session, is_server, &btree, &ref));
WT_ASSERT(session, ref->state == WT_REF_LOCKED);
+ app_timer = false;
cache = S2C(session)->cache;
+
/*
* An internal session flags either the server itself or an eviction
* worker thread.
@@ -1739,6 +1744,10 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
WT_STAT_CONN_INCR(session, cache_eviction_app_dirty);
WT_STAT_CONN_INCR(session, cache_eviction_app);
cache->app_evicts++;
+ if (WT_STAT_ENABLED(session)) {
+ app_timer = true;
+ __wt_epoch(session, &enter);
+ }
}
/*
@@ -1756,6 +1765,11 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
(void)__wt_atomic_subv32(&btree->evict_busy, 1);
+ if (app_timer) {
+ __wt_epoch(session, &leave);
+ WT_STAT_CONN_INCRV(session,
+ application_evict_time, WT_TIMEDIFF_US(leave, enter));
+ }
return (ret);
}
@@ -1767,6 +1781,7 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
int
__wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
{
+ struct timespec enter, leave;
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
@@ -1792,9 +1807,11 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
/* Wake the eviction server if we need to do work. */
__wt_evict_server_wake(session);
- init_evict_count = cache->pages_evict;
+ /* Track how long application threads spend doing eviction. */
+ if (WT_STAT_ENABLED(session) && !F_ISSET(session, WT_SESSION_INTERNAL))
+ __wt_epoch(session, &enter);
- for (;;) {
+ for (init_evict_count = cache->pages_evict;; ret = 0) {
/*
* A pathological case: if we're the oldest transaction in the
* system and the eviction server is stuck trying to find space,
@@ -1804,7 +1821,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
if (__wt_cache_stuck(session) && __wt_txn_am_oldest(session)) {
--cache->evict_aggressive_score;
WT_STAT_CONN_INCR(session, txn_fail_cache);
- return (WT_ROLLBACK);
+ WT_ERR(WT_ROLLBACK);
}
/*
@@ -1816,7 +1833,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
* limit the work to 5 evictions and return. If that's not the
* case, we can do more.
*/
- if (!busy && txn_state->snap_min != WT_TXN_NONE &&
+ if (!busy && txn_state->pinned_id != WT_TXN_NONE &&
txn_global->current != txn_global->oldest_id)
busy = true;
max_pages_evicted = busy ? 5 : 20;
@@ -1825,7 +1842,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
if (!__wt_eviction_needed(session, busy, &pct_full) ||
(pct_full < 100 &&
cache->pages_evict > init_evict_count + max_pages_evicted))
- return (0);
+ break;
/*
* Don't make application threads participate in scrubbing for
@@ -1842,7 +1859,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
switch (ret = __evict_page(session, false)) {
case 0:
if (busy)
- return (0);
+ goto err;
/* FALLTHROUGH */
case EBUSY:
break;
@@ -1853,9 +1870,18 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
cache->app_waits++;
break;
default:
- return (ret);
+ goto err;
}
}
+
+err: if (WT_STAT_ENABLED(session) &&
+ !F_ISSET(session, WT_SESSION_INTERNAL)) {
+ __wt_epoch(session, &leave);
+ WT_STAT_CONN_INCRV(session,
+ application_cache_time, WT_TIMEDIFF_US(leave, enter));
+ }
+
+ return (ret);
/* NOTREACHED */
}
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index 092f80cc000..3d1557e027e 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -31,23 +31,14 @@ __evict_exclusive_clear(WT_SESSION_IMPL *session, WT_REF *ref)
static inline int
__evict_exclusive(WT_SESSION_IMPL *session, WT_REF *ref)
{
- int loops;
-
WT_ASSERT(session, ref->state == WT_REF_LOCKED);
/*
* Check for a hazard pointer indicating another thread is using the
* page, meaning the page cannot be evicted.
*/
- for (loops = 0; loops < 10; loops++) {
- if (__wt_page_hazard_check(session, ref->page) == NULL)
- return (0);
- if (ref->page->read_gen != WT_READGEN_OLDEST &&
- ref->page->memory_footprint <
- S2BT(session)->split_deepen_min_child)
- break;
- __wt_sleep(0, WT_THOUSAND);
- }
+ if (__wt_page_hazard_check(session, ref->page) == NULL)
+ return (0);
WT_STAT_DATA_INCR(session, cache_eviction_hazard);
WT_STAT_CONN_INCR(session, cache_eviction_hazard);
diff --git a/src/third_party/wiredtiger/src/evict/evict_stat.c b/src/third_party/wiredtiger/src/evict/evict_stat.c
new file mode 100644
index 00000000000..2dd3b1e83a0
--- /dev/null
+++ b/src/third_party/wiredtiger/src/evict/evict_stat.c
@@ -0,0 +1,138 @@
+/*-
+ * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __evict_stat_walk --
+ * Walk all the pages in cache for a dhandle gathering stats information
+ */
+static void
+__evict_stat_walk(WT_SESSION_IMPL *session)
+{
+ WT_BTREE *btree;
+ WT_PAGE *page;
+ WT_REF *next_walk;
+ uint64_t dsk_size, gen_gap, size;
+ uint64_t written_size_cnt, written_size_sum;
+ uint64_t gen_gap_cnt, gen_gap_max, gen_gap_sum;
+ uint64_t max_pagesize, min_written_size;
+ uint64_t num_memory, num_queued, num_not_queueable, num_smaller_allocsz;
+ uint64_t pages_clean, pages_dirty, pages_internal, pages_leaf;
+ uint64_t seen_count, walk_count;
+
+ btree = S2BT(session);
+ next_walk = NULL;
+ written_size_cnt = written_size_sum = 0;
+ gen_gap_cnt = gen_gap_max = gen_gap_sum = 0;
+ max_pagesize = 0;
+ num_memory = num_queued = num_not_queueable = num_smaller_allocsz = 0;
+ pages_clean = pages_dirty = pages_internal = pages_leaf = 0;
+ seen_count = walk_count = 0;
+ min_written_size = UINT64_MAX;
+
+ while (__wt_tree_walk_count(session, &next_walk, &walk_count,
+ WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_WAIT) == 0 &&
+ next_walk != NULL) {
+ ++seen_count;
+ page = next_walk->page;
+ size = page->memory_footprint;
+
+ if (__wt_page_is_modified(page))
+ ++pages_dirty;
+ else
+ ++pages_clean;
+
+ if (!__wt_ref_is_root(next_walk) &&
+ !__wt_page_can_evict(session, next_walk, NULL))
+ ++num_not_queueable;
+
+ if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU))
+ ++num_queued;
+
+ if (size > max_pagesize)
+ max_pagesize = size;
+
+ dsk_size = page->dsk != NULL ? page->dsk->mem_size : 0;
+ if (dsk_size != 0) {
+ if (dsk_size < btree->allocsize)
+ ++num_smaller_allocsz;
+ if (dsk_size < min_written_size)
+ min_written_size = dsk_size;
+ ++written_size_cnt;
+ written_size_sum += dsk_size;
+ } else
+ ++num_memory;
+
+ if (WT_PAGE_IS_INTERNAL(page))
+ ++pages_internal;
+ else
+ ++pages_leaf;
+
+ /* Skip root pages since they are never considered */
+ if (__wt_ref_is_root(next_walk))
+ continue;
+
+ gen_gap =
+ S2C(session)->cache->evict_pass_gen - page->evict_pass_gen;
+ if (gen_gap > gen_gap_max)
+ gen_gap_max = gen_gap;
+ gen_gap_sum += gen_gap;
+ ++gen_gap_cnt;
+ }
+
+ WT_STAT_DATA_SET(session, cache_state_avg_written_size,
+ written_size_cnt == 0 ? 0 : written_size_sum / written_size_cnt);
+ WT_STAT_DATA_SET(session, cache_state_gen_avg_gap,
+ gen_gap_cnt == 0 ? 0 : gen_gap_sum / gen_gap_cnt);
+
+ WT_STAT_DATA_SET(session, cache_state_gen_max_gap, gen_gap_max);
+ WT_STAT_DATA_SET(session, cache_state_max_pagesize, max_pagesize);
+ WT_STAT_DATA_SET(session,
+ cache_state_min_written_size, min_written_size);
+ WT_STAT_DATA_SET(session, cache_state_memory, num_memory);
+ WT_STAT_DATA_SET(session, cache_state_queued, num_queued);
+ WT_STAT_DATA_SET(session, cache_state_not_queueable, num_not_queueable);
+ WT_STAT_DATA_SET(session,
+ cache_state_smaller_alloc_size, num_smaller_allocsz);
+ WT_STAT_DATA_SET(session, cache_state_pages, walk_count);
+ WT_STAT_DATA_SET(session, cache_state_pages_clean, pages_clean);
+ WT_STAT_DATA_SET(session, cache_state_pages_dirty, pages_dirty);
+ WT_STAT_DATA_SET(session, cache_state_pages_internal, pages_internal);
+ WT_STAT_DATA_SET(session, cache_state_pages_leaf, pages_leaf);
+ WT_STAT_DATA_SET(session,
+ cache_state_refs_skipped, walk_count - seen_count);
+}
+
+/*
+ * __wt_curstat_cache_walk --
+ * Initialize the statistics for a cache cache_walk pass.
+ */
+void
+__wt_curstat_cache_walk(WT_SESSION_IMPL *session)
+{
+ WT_BTREE *btree;
+ WT_CONNECTION_IMPL *conn;
+ WT_PAGE_INDEX *root_idx;
+
+ btree = S2BT(session);
+ conn = S2C(session);
+
+ /* Set statistics that don't require walking the cache. */
+ WT_STAT_DATA_SET(session,
+ cache_state_gen_current, conn->cache->evict_pass_gen);
+
+ /* Root page statistics */
+ root_idx = WT_INTL_INDEX_GET_SAFE(btree->root.page);
+ WT_STAT_DATA_SET(session,
+ cache_state_root_entries, root_idx->entries);
+ WT_STAT_DATA_SET(session,
+ cache_state_root_size, btree->root.page->memory_footprint);
+
+ WT_WITH_HANDLE_LIST_LOCK(session, __evict_stat_walk(session));
+}
diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h
index e1b2f8edaf3..2783d17f825 100644
--- a/src/third_party/wiredtiger/src/include/api.h
+++ b/src/third_party/wiredtiger/src/include/api.h
@@ -139,7 +139,9 @@
(s) = (WT_SESSION_IMPL *)(cur)->session; \
TXN_API_CALL_NOCONF(s, WT_CURSOR, n, cur, \
((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); \
- if (F_ISSET(S2C(s), WT_CONN_IN_MEMORY) && __wt_cache_full(s)) \
+ if (F_ISSET(S2C(s), WT_CONN_IN_MEMORY) && \
+ !F_ISSET((WT_BTREE *)(bt), WT_BTREE_IGNORE_CACHE) && \
+ __wt_cache_full(s)) \
WT_ERR(WT_CACHE_FULL);
#define JOINABLE_CURSOR_UPDATE_API_CALL(cur, s, n, bt) \
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index b4ca937e7ed..84c91097a99 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -430,6 +430,8 @@ struct __wt_page_modify {
#define WT_PM_REC_MULTIBLOCK 2 /* Reconciliation: multiple blocks */
#define WT_PM_REC_REPLACE 3 /* Reconciliation: single block */
uint8_t rec_result; /* Reconciliation state */
+
+ uint8_t update_restored; /* Page created by restoring updates */
};
/*
@@ -619,6 +621,8 @@ struct __wt_page {
#define WT_READGEN_START_VALUE 100
#define WT_READGEN_STEP 100
uint64_t read_gen;
+ /* The evict pass generation for the page */
+ uint64_t evict_pass_gen;
size_t memory_footprint; /* Memory attached to the page */
diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h
index cfaf59e70e1..713d46ae85f 100644
--- a/src/third_party/wiredtiger/src/include/btree.h
+++ b/src/third_party/wiredtiger/src/include/btree.h
@@ -119,7 +119,7 @@ struct __wt_btree {
uint64_t last_recno; /* Column-store last record number */
WT_REF root; /* Root page reference */
- int modified; /* If the tree ever modified */
+ bool modified; /* If the tree ever modified */
bool bulk_load_ok; /* Bulk-load is a possibility */
WT_BM *bm; /* Block manager reference */
@@ -154,18 +154,19 @@ struct __wt_btree {
WT_SPINLOCK flush_lock; /* Lock to flush the tree's pages */
/* Flags values up to 0xff are reserved for WT_DHANDLE_* */
-#define WT_BTREE_BULK 0x00100 /* Bulk-load handle */
-#define WT_BTREE_IN_MEMORY 0x00200 /* Cache-resident object */
-#define WT_BTREE_LOOKASIDE 0x00400 /* Look-aside table */
-#define WT_BTREE_NO_CHECKPOINT 0x00800 /* Disable checkpoints */
-#define WT_BTREE_NO_EVICTION 0x01000 /* Disable eviction */
-#define WT_BTREE_NO_LOGGING 0x02000 /* Disable logging */
-#define WT_BTREE_NO_RECONCILE 0x04000 /* Allow splits, even with no evict */
-#define WT_BTREE_REBALANCE 0x08000 /* Handle is for rebalance */
-#define WT_BTREE_SALVAGE 0x10000 /* Handle is for salvage */
-#define WT_BTREE_SKIP_CKPT 0x20000 /* Handle skipped checkpoint */
-#define WT_BTREE_UPGRADE 0x40000 /* Handle is for upgrade */
-#define WT_BTREE_VERIFY 0x80000 /* Handle is for verify */
+#define WT_BTREE_BULK 0x000100 /* Bulk-load handle */
+#define WT_BTREE_IGNORE_CACHE 0x000200 /* Cache-resident object */
+#define WT_BTREE_IN_MEMORY 0x000400 /* Cache-resident object */
+#define WT_BTREE_LOOKASIDE 0x000800 /* Look-aside table */
+#define WT_BTREE_NO_CHECKPOINT 0x001000 /* Disable checkpoints */
+#define WT_BTREE_NO_EVICTION 0x002000 /* Disable eviction */
+#define WT_BTREE_NO_LOGGING 0x004000 /* Disable logging */
+#define WT_BTREE_NO_RECONCILE 0x008000 /* Allow splits, even with no evict */
+#define WT_BTREE_REBALANCE 0x010000 /* Handle is for rebalance */
+#define WT_BTREE_SALVAGE 0x020000 /* Handle is for salvage */
+#define WT_BTREE_SKIP_CKPT 0x040000 /* Handle skipped checkpoint */
+#define WT_BTREE_UPGRADE 0x080000 /* Handle is for upgrade */
+#define WT_BTREE_VERIFY 0x100000 /* Handle is for verify */
uint32_t flags;
};
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index a9ce4f754a9..daf2eb158c1 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -485,6 +485,38 @@ __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page)
}
/*
+ * __wt_tree_modify_set --
+ * Mark the tree dirty.
+ */
+static inline void
+__wt_tree_modify_set(WT_SESSION_IMPL *session)
+{
+ /*
+ * Test before setting the dirty flag, it's a hot cache line.
+ *
+ * The tree's modified flag is cleared by the checkpoint thread: set it
+ * and insert a barrier before dirtying the page. (I don't think it's
+ * a problem if the tree is marked dirty with all the pages clean, it
+ * might result in an extra checkpoint that doesn't do any work but it
+ * shouldn't cause problems; regardless, let's play it safe.)
+ */
+ if (!S2BT(session)->modified) {
+ /* Assert we never dirty a checkpoint handle. */
+ WT_ASSERT(session, session->dhandle->checkpoint == NULL);
+
+ S2BT(session)->modified = true;
+ WT_FULL_BARRIER();
+ }
+
+ /*
+ * The btree may already be marked dirty while the connection is still
+ * clean; mark the connection dirty outside the test of the btree state.
+ */
+ if (!S2C(session)->modified)
+ S2C(session)->modified = true;
+}
+
+/*
* __wt_page_modify_clear --
* Clean a modified page.
*/
@@ -513,22 +545,9 @@ __wt_page_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page)
/*
* Mark the tree dirty (even if the page is already marked dirty), newly
* created pages to support "empty" files are dirty, but the file isn't
- * marked dirty until there's a real change needing to be written. Test
- * before setting the dirty flag, it's a hot cache line.
- *
- * The tree's modified flag is cleared by the checkpoint thread: set it
- * and insert a barrier before dirtying the page. (I don't think it's
- * a problem if the tree is marked dirty with all the pages clean, it
- * might result in an extra checkpoint that doesn't do any work but it
- * shouldn't cause problems; regardless, let's play it safe.)
+ * marked dirty until there's a real change needing to be written.
*/
- if (S2BT(session)->modified == 0) {
- /* Assert we never dirty a checkpoint handle. */
- WT_ASSERT(session, session->dhandle->checkpoint == NULL);
-
- S2BT(session)->modified = 1;
- WT_FULL_BARRIER();
- }
+ __wt_tree_modify_set(session);
__wt_page_only_modify_set(session, page);
}
@@ -1159,15 +1178,7 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
* There is no point doing an in-memory split unless there is a lot of
* data in the last skiplist on the page. Split if there are enough
* items and the skiplist does not fit within a single disk page.
- *
- * Rather than scanning the whole list, walk a higher level, which
- * gives a sample of the items -- at level 0 we have all the items, at
- * level 1 we have 1/4 and at level 2 we have 1/16th. If we see more
- * than 30 items and more data than would fit in a disk page, split.
*/
-#define WT_MIN_SPLIT_DEPTH 2
-#define WT_MIN_SPLIT_COUNT 30
-#define WT_MIN_SPLIT_MULTIPLIER 16 /* At level 2, we see 1/16th entries */
ins_head = page->type == WT_PAGE_ROW_LEAF ?
(page->pg_row_entries == 0 ?
@@ -1176,8 +1187,40 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
WT_COL_APPEND(page);
if (ins_head == NULL)
return (false);
+
+ /*
+ * In the extreme case, where the page is much larger than the maximum
+ * size, split as soon as there are 5 items on the page.
+ */
+#define WT_MAX_SPLIT_COUNT 5
+ if (page->memory_footprint > btree->maxleafpage * 2) {
+ for (count = 0, ins = ins_head->head[0];
+ ins != NULL;
+ ins = ins->next[0]) {
+ if (++count < WT_MAX_SPLIT_COUNT)
+ continue;
+
+ WT_STAT_CONN_INCR(session, cache_inmem_splittable);
+ WT_STAT_DATA_INCR(session, cache_inmem_splittable);
+ return (true);
+ }
+
+ return (false);
+ }
+
+ /*
+ * Rather than scanning the whole list, walk a higher level, which
+ * gives a sample of the items -- at level 0 we have all the items, at
+ * level 1 we have 1/4 and at level 2 we have 1/16th. If we see more
+ * than 30 items and more data than would fit in a disk page, split.
+ */
+#define WT_MIN_SPLIT_DEPTH 2
+#define WT_MIN_SPLIT_COUNT 30
+#define WT_MIN_SPLIT_MULTIPLIER 16 /* At level 2, we see 1/16th entries */
+
for (count = 0, size = 0, ins = ins_head->head[WT_MIN_SPLIT_DEPTH];
- ins != NULL; ins = ins->next[WT_MIN_SPLIT_DEPTH]) {
+ ins != NULL;
+ ins = ins->next[WT_MIN_SPLIT_DEPTH]) {
count += WT_MIN_SPLIT_MULTIPLIER;
size += WT_MIN_SPLIT_MULTIPLIER *
(WT_INSERT_KEY_SIZE(ins) + WT_UPDATE_MEMSIZE(ins->upd));
diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h
index b24b625aec4..9a2b83b5b57 100644
--- a/src/third_party/wiredtiger/src/include/cache.h
+++ b/src/third_party/wiredtiger/src/include/cache.h
@@ -91,6 +91,7 @@ struct __wt_cache {
uint64_t read_gen; /* Current page read generation */
uint64_t read_gen_oldest; /* Oldest read generation the eviction
* server saw in its last queue load */
+ uint64_t evict_pass_gen; /* Number of eviction passes */
/*
* Eviction thread information.
diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i
index 4255d04ec37..17ab39e97d2 100644
--- a/src/third_party/wiredtiger/src/include/cache.i
+++ b/src/third_party/wiredtiger/src/include/cache.i
@@ -355,7 +355,7 @@ __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool *didworkp)
txn_state = WT_SESSION_TXN_STATE(session);
busy = busy || txn_state->id != WT_TXN_NONE ||
session->nhazard > 0 ||
- (txn_state->snap_min != WT_TXN_NONE &&
+ (txn_state->pinned_id != WT_TXN_NONE &&
txn_global->current != txn_global->oldest_id);
/*
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index e19ad684b24..d7c3bf69686 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -285,13 +285,7 @@ struct __wt_connection_impl {
uint64_t ckpt_time_recent; /* Checkpoint time recent/total */
uint64_t ckpt_time_total;
-#define WT_CONN_STAT_ALL 0x01 /* "all" statistics configured */
-#define WT_CONN_STAT_CLEAR 0x02 /* clear after gathering */
-#define WT_CONN_STAT_FAST 0x04 /* "fast" statistics configured */
-#define WT_CONN_STAT_JSON 0x08 /* output JSON format */
-#define WT_CONN_STAT_ON_CLOSE 0x10 /* output statistics on close */
-#define WT_CONN_STAT_SIZE 0x20 /* "size" statistics configured */
- uint32_t stat_flags;
+ uint32_t stat_flags; /* Options declared in flags.py */
/* Connection statistics */
WT_CONNECTION_STATS *stats[WT_COUNTER_SLOTS];
@@ -352,6 +346,12 @@ struct __wt_connection_impl {
WT_SESSION_IMPL *meta_ckpt_session;/* Metadata checkpoint session */
+ /*
+ * Is there a data/schema change that needs to be the part of a
+ * checkpoint.
+ */
+ bool modified;
+
WT_SESSION_IMPL *sweep_session; /* Handle sweep session */
wt_thread_t sweep_tid; /* Handle sweep thread */
int sweep_tid_set; /* Handle sweep thread set */
diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h
index f1fa4d193ac..e322a53a65d 100644
--- a/src/third_party/wiredtiger/src/include/cursor.h
+++ b/src/third_party/wiredtiger/src/include/cursor.h
@@ -467,7 +467,7 @@ struct __wt_cursor_stat {
uint64_t v; /* Current stats value */
WT_ITEM pv; /* Current stats value (string) */
- /* Uses the same values as WT_CONNECTION::stat_flags field */
+ /* Options declared in flags.py, shared by WT_CONNECTION::stat_flags */
uint32_t flags;
};
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 5444b2e9f14..79e6405e148 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -282,6 +282,7 @@ extern int __wt_curbulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bo
extern int __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_curds_open( WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_DATA_SOURCE *dsrc, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curfile_next_random(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_curfile_update_check(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -352,6 +353,7 @@ extern void __wt_evict_priority_clear(WT_SESSION_IMPL *session);
extern int __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void __wt_curstat_cache_walk(WT_SESSION_IMPL *session);
extern void __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn);
extern int __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn);
@@ -677,7 +679,7 @@ extern uint32_t __wt_log2_int(uint32_t n);
extern bool __wt_ispo2(uint32_t v);
extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2);
extern void __wt_random_init(WT_RAND_STATE volatile *rnd_state);
-extern int __wt_random_init_seed( WT_SESSION_IMPL *session, WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void __wt_random_init_seed( WT_SESSION_IMPL *session, WT_RAND_STATE volatile *rnd_state);
extern uint32_t __wt_random(WT_RAND_STATE volatile *rnd_state);
extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -723,7 +725,7 @@ extern int __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_F
extern void __wt_txn_release(WT_SESSION_IMPL *session);
extern int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_init(WT_SESSION_IMPL *session, WT_SESSION_IMPL *session_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_txn_stats_update(WT_SESSION_IMPL *session);
extern void __wt_txn_destroy(WT_SESSION_IMPL *session);
extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/third_party/wiredtiger/src/include/extern_posix.h b/src/third_party/wiredtiger/src/include/extern_posix.h
index d2f74d2ffe4..fd94ef0ddf2 100644
--- a/src/third_party/wiredtiger/src/include/extern_posix.h
+++ b/src/third_party/wiredtiger/src/include/extern_posix.h
@@ -27,5 +27,5 @@ extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds);
extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_thread_id(char *buf, size_t buflen);
-extern int __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp);
extern void __wt_yield(void);
diff --git a/src/third_party/wiredtiger/src/include/extern_win.h b/src/third_party/wiredtiger/src/include/extern_win.h
index 8c2b19056e0..f06ee881ece 100644
--- a/src/third_party/wiredtiger/src/include/extern_win.h
+++ b/src/third_party/wiredtiger/src/include/extern_win.h
@@ -25,7 +25,7 @@ extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds);
extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_thread_id(char *buf, size_t buflen);
-extern int __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp);
extern int __wt_to_utf16_string( WT_SESSION_IMPL *session, const char*utf8, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_to_utf8_string( WT_SESSION_IMPL *session, const wchar_t*wide, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern DWORD __wt_getlasterror(void);
diff --git a/src/third_party/wiredtiger/src/include/flags.h b/src/third_party/wiredtiger/src/include/flags.h
index 5d718da473d..b0d167525b2 100644
--- a/src/third_party/wiredtiger/src/include/flags.h
+++ b/src/third_party/wiredtiger/src/include/flags.h
@@ -70,6 +70,14 @@
#define WT_SESSION_NO_SCHEMA_LOCK 0x00020000
#define WT_SESSION_QUIET_CORRUPT_FILE 0x00040000
#define WT_SESSION_SERVER_ASYNC 0x00080000
+#define WT_STAT_CLEAR 0x00000001
+#define WT_STAT_JSON 0x00000002
+#define WT_STAT_ON_CLOSE 0x00000004
+#define WT_STAT_TYPE_ALL 0x00000008
+#define WT_STAT_TYPE_CACHE_WALK 0x00000010
+#define WT_STAT_TYPE_FAST 0x00000020
+#define WT_STAT_TYPE_SIZE 0x00000040
+#define WT_STAT_TYPE_TREE_WALK 0x00000080
#define WT_TXN_LOG_CKPT_CLEANUP 0x00000001
#define WT_TXN_LOG_CKPT_PREPARE 0x00000002
#define WT_TXN_LOG_CKPT_START 0x00000004
diff --git a/src/third_party/wiredtiger/src/include/lsm.h b/src/third_party/wiredtiger/src/include/lsm.h
index 2550ca444c1..b433e4c3c44 100644
--- a/src/third_party/wiredtiger/src/include/lsm.h
+++ b/src/third_party/wiredtiger/src/include/lsm.h
@@ -31,6 +31,17 @@ struct __wt_lsm_worker_args {
};
/*
+ * WT_LSM_CURSOR_CHUNK --
+ * Iterator struct containing all the LSM cursor access points for a chunk.
+ */
+struct __wt_lsm_cursor_chunk {
+ WT_BLOOM *bloom; /* Bloom filter handle for each chunk.*/
+ WT_CURSOR *cursor; /* Cursor handle for each chunk. */
+ uint64_t count; /* Number of items in chunk */
+ uint64_t switch_txn; /* Switch txn for each chunk */
+};
+
+/*
* WT_CURSOR_LSM --
* An LSM cursor.
*/
@@ -43,17 +54,12 @@ struct __wt_cursor_lsm {
u_int nchunks; /* Number of chunks in the cursor */
u_int nupdates; /* Updates needed (including
snapshot isolation checks). */
- WT_BLOOM **blooms; /* Bloom filter handles. */
- size_t bloom_alloc;
-
- WT_CURSOR **cursors; /* Cursor handles. */
- size_t cursor_alloc;
-
- WT_CURSOR *current; /* The current cursor for iteration */
+ WT_CURSOR *current; /* The current cursor for iteration */
WT_LSM_CHUNK *primary_chunk; /* The current primary chunk */
- uint64_t *switch_txn; /* Switch txn for each chunk */
- size_t txnid_alloc;
+ WT_LSM_CURSOR_CHUNK **chunks; /* Array of LSM cursor units */
+ size_t chunks_alloc; /* Current size iterators array */
+ size_t chunks_count; /* Current number of iterators */
u_int update_count; /* Updates performed. */
diff --git a/src/third_party/wiredtiger/src/include/misc.i b/src/third_party/wiredtiger/src/include/misc.i
index f267c7afc91..befd480e085 100644
--- a/src/third_party/wiredtiger/src/include/misc.i
+++ b/src/third_party/wiredtiger/src/include/misc.i
@@ -33,16 +33,14 @@ __wt_strdup(WT_SESSION_IMPL *session, const char *str, void *retp)
* __wt_seconds --
* Return the seconds since the Epoch.
*/
-static inline int
+static inline void
__wt_seconds(WT_SESSION_IMPL *session, time_t *timep)
{
struct timespec t;
- WT_RET(__wt_epoch(session, &t));
+ __wt_epoch(session, &t);
*timep = t.tv_sec;
-
- return (0);
}
/*
diff --git a/src/third_party/wiredtiger/src/include/mutex.h b/src/third_party/wiredtiger/src/include/mutex.h
index f0f8173bad4..b736d6ee9fb 100644
--- a/src/third_party/wiredtiger/src/include/mutex.h
+++ b/src/third_party/wiredtiger/src/include/mutex.h
@@ -74,6 +74,16 @@ struct __wt_rwlock {
struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_spinlock {
volatile int lock;
+
+ /*
+ * We track acquisitions and time spent waiting for some locks. For
+ * performance reasons and to make it possible to write generic code
+ * that tracks statistics for different locks, we store the offset
+ * of the statistics fields to be updated during lock acquisition.
+ */
+ int16_t stat_count_off; /* acquisitions offset */
+ int16_t stat_app_usecs_off; /* waiting application threads offset */
+ int16_t stat_int_usecs_off; /* waiting server threads offset */
};
#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\
@@ -83,7 +93,17 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_spinlock {
struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_spinlock {
wt_mutex_t lock;
- const char *name; /* Statistics: mutex name */
+ const char *name; /* Mutex name */
+
+ /*
+ * We track acquisitions and time spent waiting for some locks. For
+ * performance reasons and to make it possible to write generic code
+ * that tracks statistics for different locks, we store the offset
+ * of the statistics fields to be updated during lock acquisition.
+ */
+ int16_t stat_count_off; /* acquisitions offset */
+ int16_t stat_app_usecs_off; /* waiting application threads offset */
+ int16_t stat_int_usecs_off; /* waiting server threads offset */
int8_t initialized; /* Lock initialized, for cleanup */
};
diff --git a/src/third_party/wiredtiger/src/include/mutex.i b/src/third_party/wiredtiger/src/include/mutex.i
index cb1847d9991..a6309e0976b 100644
--- a/src/third_party/wiredtiger/src/include/mutex.i
+++ b/src/third_party/wiredtiger/src/include/mutex.i
@@ -32,6 +32,7 @@ __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name)
WT_UNUSED(name);
t->lock = 0;
+ t->stat_count_off = t->stat_app_usecs_off = t->stat_int_usecs_off = -1;
return (0);
}
@@ -111,6 +112,7 @@ __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name)
#endif
t->name = name;
+ t->stat_count_off = t->stat_app_usecs_off = t->stat_int_usecs_off = -1;
t->initialized = 1;
WT_UNUSED(session);
@@ -255,3 +257,46 @@ __wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
#error Unknown spinlock type
#endif
+
+/*
+ * WT_SPIN_INIT_TRACKED --
+ * Spinlock initialization, with tracking.
+ *
+ * Implemented as a macro so we can pass in a statistics field and convert
+ * it into a statistics structure array offset.
+ */
+#define WT_SPIN_INIT_TRACKED(session, t, name) do { \
+ WT_RET(__wt_spin_init(session, t, #name)); \
+ (t)->stat_count_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \
+ S2C(session)->stats, lock_##name##_count); \
+ (t)->stat_app_usecs_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \
+ S2C(session)->stats, lock_##name##_wait_application); \
+ (t)->stat_int_usecs_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \
+ S2C(session)->stats, lock_##name##_wait_internal); \
+} while (0)
+
+/*
+ * __wt_spin_lock_track --
+ * Spinlock acquisition, with tracking.
+ */
+static inline void
+__wt_spin_lock_track(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
+{
+ struct timespec enter, leave;
+ int64_t **stats;
+
+ if (t->stat_count_off != -1 && WT_STAT_ENABLED(session)) {
+ __wt_epoch(session, &enter);
+ __wt_spin_lock(session, t);
+ __wt_epoch(session, &leave);
+ stats = (int64_t **)S2C(session)->stats;
+ stats[session->stat_bucket][t->stat_count_off]++;
+ if (F_ISSET(session, WT_SESSION_INTERNAL))
+ stats[session->stat_bucket][t->stat_int_usecs_off] +=
+ (int64_t)WT_TIMEDIFF_US(leave, enter);
+ else
+ stats[session->stat_bucket][t->stat_app_usecs_off] +=
+ (int64_t)WT_TIMEDIFF_US(leave, enter);
+ } else
+ __wt_spin_lock(session, t);
+}
diff --git a/src/third_party/wiredtiger/src/include/schema.h b/src/third_party/wiredtiger/src/include/schema.h
index f93c596e2ca..6a5ce67a867 100644
--- a/src/third_party/wiredtiger/src/include/schema.h
+++ b/src/third_party/wiredtiger/src/include/schema.h
@@ -86,11 +86,11 @@ struct __wt_table {
if (F_ISSET(session, (flag))) { \
op; \
} else { \
- __wt_spin_lock(session, (lock)); \
+ __wt_spin_lock_track(session, lock); \
F_SET(session, (flag)); \
op; \
F_CLR(session, (flag)); \
- __wt_spin_unlock(session, (lock)); \
+ __wt_spin_unlock(session, lock); \
} \
} while (0)
@@ -102,11 +102,11 @@ struct __wt_table {
ret = 0; \
if (!F_ISSET(session, (flag)) && \
F_ISSET(session, WT_SESSION_LOCK_NO_WAIT)) { \
- if ((ret = __wt_spin_trylock(session, (lock))) == 0) { \
+ if ((ret = __wt_spin_trylock(session, lock)) == 0) { \
F_SET(session, (flag)); \
op; \
F_CLR(session, (flag)); \
- __wt_spin_unlock(session, (lock)); \
+ __wt_spin_unlock(session, lock); \
} \
} else \
WT_WITH_LOCK_WAIT(session, lock, flag, op); \
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index aa51dae58c4..3f9f495c134 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -147,6 +147,9 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl {
void *reconcile; /* Reconciliation support */
int (*reconcile_cleanup)(WT_SESSION_IMPL *);
+ /* Sessions have an associated statistics bucket based on its ID. */
+ u_int stat_bucket; /* Statistics bucket offset */
+
uint32_t flags;
/*
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index cd0cae16826..d0b0b60585a 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -79,9 +79,9 @@
* those structures regardless of the specific statistic structure we're working
* with, by translating statistics structure field names to structure offsets.
*
- * Translate a statistic's value name to an offset.
+ * Translate a statistic's value name to an offset in the array.
*/
-#define WT_STATS_FIELD_TO_SLOT(stats, fld) \
+#define WT_STATS_FIELD_TO_OFFSET(stats, fld) \
(int)(&(stats)[0]->fld - (int64_t *)(stats)[0])
/*
@@ -140,38 +140,54 @@ __wt_stats_clear(void *stats_arg, int slot)
#define WT_STAT_ENABLED(session) (S2C(session)->stat_flags != 0)
#define WT_STAT_READ(stats, fld) \
- __wt_stats_aggregate(stats, WT_STATS_FIELD_TO_SLOT(stats, fld))
+ __wt_stats_aggregate(stats, WT_STATS_FIELD_TO_OFFSET(stats, fld))
#define WT_STAT_WRITE(session, stats, fld, v) do { \
if (WT_STAT_ENABLED(session)) \
(stats)->fld = (int64_t)(v); \
} while (0)
-#define WT_STAT_DECRV(session, stats, fld, value) do { \
+#define WT_STAT_DECRV_BASE(session, stat, fld, value) do { \
if (WT_STAT_ENABLED(session)) \
- (stats)[WT_STATS_SLOT_ID(session)]->fld -= (int64_t)(value); \
+ (stat)->fld -= (int64_t)(value); \
} while (0)
-#define WT_STAT_DECRV_ATOMIC(session, stats, fld, value) do { \
+#define WT_STAT_DECRV_ATOMIC_BASE(session, stat, fld, value) do { \
+ if (WT_STAT_ENABLED(session)) \
+ __wt_atomic_subi64(&(stat)->fld, (int64_t)(value)); \
+} while (0)
+#define WT_STAT_INCRV_BASE(session, stat, fld, value) do { \
+ if (WT_STAT_ENABLED(session)) \
+ (stat)->fld += (int64_t)(value); \
+} while (0)
+#define WT_STAT_INCRV_ATOMIC_BASE(session, stat, fld, value) do { \
if (WT_STAT_ENABLED(session)) \
- __wt_atomic_subi64(&(stats)[WT_STATS_SLOT_ID(session)]->fld, \
- (int64_t)(value)); \
+ __wt_atomic_addi64(&(stat)->fld, (int64_t)(value)); \
+} while (0)
+
+#define WT_STAT_DECRV(session, stats, fld, value) do { \
+ WT_STAT_DECRV_BASE( \
+ session, (stats)[(session)->stat_bucket], fld, value); \
+} while (0)
+#define WT_STAT_DECRV_ATOMIC(session, stats, fld, value) do { \
+ WT_STAT_DECRV_ATOMIC_BASE( \
+ session, (stats)[(session)->stat_bucket], fld, value); \
} while (0)
#define WT_STAT_DECR(session, stats, fld) \
WT_STAT_DECRV(session, stats, fld, 1)
+
#define WT_STAT_INCRV(session, stats, fld, value) do { \
- if (WT_STAT_ENABLED(session)) \
- (stats)[WT_STATS_SLOT_ID(session)]->fld += (int64_t)(value); \
+ WT_STAT_INCRV_BASE( \
+ session, (stats)[(session)->stat_bucket], fld, value); \
} while (0)
#define WT_STAT_INCRV_ATOMIC(session, stats, fld, value) do { \
- if (WT_STAT_ENABLED(session)) \
- __wt_atomic_addi64(&(stats)[WT_STATS_SLOT_ID(session)]->fld, \
- (int64_t)(value)); \
+ WT_STAT_INCRV_ATOMIC_BASE( \
+ session, (stats)[(session)->stat_bucket], fld, value); \
} while (0)
#define WT_STAT_INCR(session, stats, fld) \
WT_STAT_INCRV(session, stats, fld, 1)
#define WT_STAT_SET(session, stats, fld, value) do { \
if (WT_STAT_ENABLED(session)) { \
__wt_stats_clear(stats, \
- WT_STATS_FIELD_TO_SLOT(stats, fld)); \
+ WT_STATS_FIELD_TO_OFFSET(stats, fld)); \
(stats)[0]->fld = (int64_t)(value); \
} \
} while (0)
@@ -179,18 +195,24 @@ __wt_stats_clear(void *stats_arg, int slot)
/*
* Update connection handle statistics if statistics gathering is enabled.
*/
-#define WT_STAT_CONN_DECR(session, fld) \
- WT_STAT_DECR(session, S2C(session)->stats, fld)
-#define WT_STAT_CONN_DECR_ATOMIC(session, fld) \
- WT_STAT_DECRV_ATOMIC(session, S2C(session)->stats, fld, 1)
#define WT_STAT_CONN_DECRV(session, fld, value) \
- WT_STAT_DECRV(session, S2C(session)->stats, fld, value)
-#define WT_STAT_CONN_INCR(session, fld) \
- WT_STAT_INCR(session, S2C(session)->stats, fld)
-#define WT_STAT_CONN_INCR_ATOMIC(session, fld) \
- WT_STAT_INCRV_ATOMIC(session, S2C(session)->stats, fld, 1)
+ WT_STAT_DECRV_BASE(session, \
+ S2C(session)->stats[(session)->stat_bucket], fld, value)
+#define WT_STAT_CONN_DECR_ATOMIC(session, fld) \
+ WT_STAT_DECRV_ATOMIC_BASE(session, \
+ S2C(session)->stats[(session)->stat_bucket], fld, 1)
+#define WT_STAT_CONN_DECR(session, fld) \
+ WT_STAT_CONN_DECRV(session, fld, 1)
+
#define WT_STAT_CONN_INCRV(session, fld, value) \
- WT_STAT_INCRV(session, S2C(session)->stats, fld, value)
+ WT_STAT_INCRV_BASE(session, \
+ S2C(session)->stats[(session)->stat_bucket], fld, value)
+#define WT_STAT_CONN_INCR_ATOMIC(session, fld) \
+ WT_STAT_INCRV_ATOMIC_BASE(session, \
+ S2C(session)->stats[(session)->stat_bucket], fld, 1)
+#define WT_STAT_CONN_INCR(session, fld) \
+ WT_STAT_CONN_INCRV(session, fld, 1)
+
#define WT_STAT_CONN_SET(session, fld, value) \
WT_STAT_SET(session, S2C(session)->stats, fld, value)
@@ -263,6 +285,10 @@ struct __wt_connection_stats {
int64_t block_byte_write_checkpoint;
int64_t block_map_read;
int64_t block_byte_map_read;
+ int64_t cache_read_app_count;
+ int64_t cache_read_app_time;
+ int64_t cache_write_app_count;
+ int64_t cache_write_app_time;
int64_t cache_bytes_image;
int64_t cache_bytes_inuse;
int64_t cache_bytes_other;
@@ -356,6 +382,21 @@ struct __wt_connection_stats {
int64_t dh_sweeps;
int64_t dh_session_handles;
int64_t dh_session_sweeps;
+ int64_t lock_checkpoint_count;
+ int64_t lock_checkpoint_wait_application;
+ int64_t lock_checkpoint_wait_internal;
+ int64_t lock_handle_list_count;
+ int64_t lock_handle_list_wait_application;
+ int64_t lock_handle_list_wait_internal;
+ int64_t lock_metadata_count;
+ int64_t lock_metadata_wait_application;
+ int64_t lock_metadata_wait_internal;
+ int64_t lock_schema_count;
+ int64_t lock_schema_wait_application;
+ int64_t lock_schema_wait_internal;
+ int64_t lock_table_count;
+ int64_t lock_table_wait_application;
+ int64_t lock_table_wait_internal;
int64_t log_slot_switch_busy;
int64_t log_slot_closes;
int64_t log_slot_races;
@@ -420,6 +461,8 @@ struct __wt_connection_stats {
int64_t thread_fsync_active;
int64_t thread_read_active;
int64_t thread_write_active;
+ int64_t application_evict_time;
+ int64_t application_cache_time;
int64_t page_busy_blocked;
int64_t page_forcible_evict_blocked;
int64_t page_locked_blocked;
@@ -437,6 +480,7 @@ struct __wt_connection_stats {
int64_t txn_checkpoint_scrub_time;
int64_t txn_checkpoint_time_total;
int64_t txn_checkpoint;
+ int64_t txn_checkpoint_skipped;
int64_t txn_fail_cache;
int64_t txn_checkpoint_fsync_post;
int64_t txn_checkpoint_fsync_post_duration;
@@ -515,6 +559,24 @@ struct __wt_dsrc_stats {
int64_t cache_write;
int64_t cache_write_restore;
int64_t cache_eviction_clean;
+ int64_t cache_state_gen_avg_gap;
+ int64_t cache_state_avg_written_size;
+ int64_t cache_state_pages_clean;
+ int64_t cache_state_gen_current;
+ int64_t cache_state_pages_dirty;
+ int64_t cache_state_root_entries;
+ int64_t cache_state_pages_internal;
+ int64_t cache_state_pages_leaf;
+ int64_t cache_state_gen_max_gap;
+ int64_t cache_state_max_pagesize;
+ int64_t cache_state_min_written_size;
+ int64_t cache_state_smaller_alloc_size;
+ int64_t cache_state_memory;
+ int64_t cache_state_queued;
+ int64_t cache_state_not_queueable;
+ int64_t cache_state_refs_skipped;
+ int64_t cache_state_root_size;
+ int64_t cache_state_pages;
int64_t compress_read;
int64_t compress_write;
int64_t compress_write_fail;
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index 2e41ae8620d..8128e8e4cc2 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -49,9 +49,9 @@
WT_ASSERT((s), (s)->txn.forced_iso > 0); \
(s)->txn.forced_iso--; \
WT_ASSERT((s), txn_state->id == saved_state.id && \
- (txn_state->snap_min == saved_state.snap_min || \
- saved_state.snap_min == WT_TXN_NONE)); \
- txn_state->snap_min = saved_state.snap_min; \
+ (txn_state->pinned_id == saved_state.pinned_id || \
+ saved_state.pinned_id == WT_TXN_NONE)); \
+ txn_state->pinned_id = saved_state.pinned_id; \
} while (0)
struct __wt_named_snapshot {
@@ -59,14 +59,14 @@ struct __wt_named_snapshot {
TAILQ_ENTRY(__wt_named_snapshot) q;
- uint64_t snap_min, snap_max;
+ uint64_t pinned_id, snap_min, snap_max;
uint64_t *snapshot;
uint32_t snapshot_count;
};
struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_txn_state {
volatile uint64_t id;
- volatile uint64_t snap_min;
+ volatile uint64_t pinned_id;
};
struct __wt_txn_global {
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 1a8851a9a2a..cf7e2eafc65 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -309,7 +309,7 @@ __wt_txn_idle_cache_check(WT_SESSION_IMPL *session)
* WT_TXN_HAS_SNAPSHOT.
*/
if (F_ISSET(txn, WT_TXN_RUNNING) &&
- !F_ISSET(txn, WT_TXN_HAS_ID) && txn_state->snap_min == WT_TXN_NONE)
+ !F_ISSET(txn, WT_TXN_HAS_ID) && txn_state->pinned_id == WT_TXN_NONE)
WT_RET(__wt_cache_eviction_check(session, false, NULL));
return (0);
@@ -480,8 +480,8 @@ __wt_txn_cursor_op(WT_SESSION_IMPL *session)
* positioned on a value, it can't be freed.
*/
if (txn->isolation == WT_ISO_READ_UNCOMMITTED) {
- if (txn_state->snap_min == WT_TXN_NONE)
- txn_state->snap_min = txn_global->last_running;
+ if (txn_state->pinned_id == WT_TXN_NONE)
+ txn_state->pinned_id = txn_global->last_running;
} else if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT))
WT_RET(__wt_txn_get_snapshot(session));
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index f4763a113f1..b6185b4ead6 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -962,8 +962,9 @@ struct __wt_session {
* where appropriate (for example\, a cache size statistic is not
* cleared\, while the count of cursor insert operations will be
* cleared). See @ref statistics for more information., a list\, with
- * values chosen from the following options: \c "all"\, \c "fast"\, \c
- * "clear"\, \c "size"; default empty.}
+ * values chosen from the following options: \c "all"\, \c
+ * "cache_walk"\, \c "fast"\, \c "clear"\, \c "size"\, \c "tree_walk";
+ * default empty.}
* @config{target, if non-empty\, backup the list of objects; valid only
* for a backup data source., a list of strings; default empty.}
* @configend
@@ -1004,9 +1005,9 @@ struct __wt_session {
* @config{block_compressor, configure a compressor for file blocks.
* Permitted values are \c "none" or custom compression engine name
* created with WT_CONNECTION::add_compressor. If WiredTiger has
- * builtin support for \c "snappy"\, \c "lz4" or \c "zlib" compression\,
- * these names are also available. See @ref compression for more
- * information., a string; default \c none.}
+ * builtin support for \c "lz4"\, \c "snappy"\, \c "zlib" or \c "zstd"
+ * compression\, these names are also available. See @ref compression
+ * for more information., a string; default \c none.}
* @config{cache_resident, do not ever evict the object's pages from
* cache. Not compatible with LSM tables; see @ref
* tuning_cache_resident for more information., a boolean flag; default
@@ -1069,6 +1070,11 @@ struct __wt_session {
* Permitted values are \c "none"\, \c "english"\, \c "utf8<file>" or \c
* "utf16<file>". See @ref huffman for more information., a string;
* default \c none.}
+ * @config{ignore_in_memory_cache_size, allow update and insert
+ * operations to proceed even if the cache is already at capacity. Only
+ * valid in conjunction with in-memory databases. Should be used with
+ * caution - this configuration allows WiredTiger to consume memory over
+ * the configured cache limit., a boolean flag; default \c false.}
* @config{immutable, configure the index to be immutable - that is an
* index is not changed by any update to a record in the table., a
* boolean flag; default \c false.}
@@ -1815,14 +1821,13 @@ struct __wt_connection {
* default \c 5.}
* @config{eviction_dirty_target, perform eviction in worker threads
* when the cache contains at least this much dirty content\, expressed
- * as a percentage of the total cache size. Ignored if \c in_memory is
- * \c true., an integer between 1 and 99; default \c 5.}
+ * as a percentage of the total cache size., an integer between 1 and
+ * 99; default \c 5.}
* @config{eviction_dirty_trigger, trigger application threads to
* perform eviction when the cache contains at least this much dirty
* content\, expressed as a percentage of the total cache size. This
- * setting only alters behavior if it is lower than eviction_trigger.
- * Ignored if \c in_memory is \c true., an integer between 1 and 99;
- * default \c 20.}
+ * setting only alters behavior if it is lower than eviction_trigger.,
+ * an integer between 1 and 99; default \c 20.}
* @config{eviction_target, perform eviction in worker threads when the
* cache contains at least this much content\, expressed as a percentage
* of the total cache size. Must be less than \c eviction_trigger., an
@@ -1899,8 +1904,9 @@ struct __wt_connection {
* reset each time a statistics cursor is used to gather statistics\, as
* well as each time statistics are logged using the \c statistics_log
* configuration. See @ref statistics for more information., a list\,
- * with values chosen from the following options: \c "all"\, \c "fast"\,
- * \c "none"\, \c "clear"; default \c none.}
+ * with values chosen from the following options: \c "all"\, \c
+ * "cache_walk"\, \c "fast"\, \c "none"\, \c "clear"\, \c "tree_walk";
+ * default \c none.}
* @config{statistics_log = (, log any statistics the database is
* configured to maintain\, to a file. See @ref statistics for more
* information. Enabling the statistics log server uses a session from
@@ -2281,13 +2287,12 @@ struct __wt_connection {
* is \c true., an integer between 0 and 99; default \c 5.}
* @config{eviction_dirty_target, perform eviction in worker threads when the
* cache contains at least this much dirty content\, expressed as a percentage
- * of the total cache size. Ignored if \c in_memory is \c true., an integer
- * between 1 and 99; default \c 5.}
+ * of the total cache size., an integer between 1 and 99; default \c 5.}
* @config{eviction_dirty_trigger, trigger application threads to perform
* eviction when the cache contains at least this much dirty content\, expressed
* as a percentage of the total cache size. This setting only alters behavior
- * if it is lower than eviction_trigger. Ignored if \c in_memory is \c true.,
- * an integer between 1 and 99; default \c 20.}
+ * if it is lower than eviction_trigger., an integer between 1 and 99; default
+ * \c 20.}
* @config{eviction_target, perform eviction in worker threads when the cache
* contains at least this much content\, expressed as a percentage of the total
* cache size. Must be less than \c eviction_trigger., an integer between 10
@@ -2333,11 +2338,11 @@ struct __wt_connection {
* @config{&nbsp;&nbsp;&nbsp;&nbsp;compressor, configure a compressor for log
* records. Permitted values are \c "none" or custom compression engine name
* created with WT_CONNECTION::add_compressor. If WiredTiger has builtin
- * support for \c "snappy"\, \c "lz4" or \c "zlib" compression\, these names are
- * also available. See @ref compression for more information., a string;
- * default \c none.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, enable logging
- * subsystem., a boolean flag; default \c false.}
+ * support for \c "lz4"\, \c "snappy"\, \c "zlib" or \c "zstd" compression\,
+ * these names are also available. See @ref compression for more information.,
+ * a string; default \c none.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, enable
+ * logging subsystem., a boolean flag; default \c false.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;file_max, the maximum size of log files., an
* integer between 100KB and 2GB; default \c 100MB.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the name of a directory into which log
@@ -2403,8 +2408,9 @@ struct __wt_connection {
* statistics are reset each time a statistics cursor is used to gather
* statistics\, as well as each time statistics are logged using the \c
* statistics_log configuration. See @ref statistics for more information., a
- * list\, with values chosen from the following options: \c "all"\, \c "fast"\,
- * \c "none"\, \c "clear"; default \c none.}
+ * list\, with values chosen from the following options: \c "all"\, \c
+ * "cache_walk"\, \c "fast"\, \c "none"\, \c "clear"\, \c "tree_walk"; default
+ * \c none.}
* @config{statistics_log = (, log any statistics the database is configured to
* maintain\, to a file. See @ref statistics for more information. Enabling
* the statistics log server uses a session from the configured session_max., a
@@ -4274,384 +4280,437 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_BLOCK_MAP_READ 1029
/*! block-manager: mapped bytes read */
#define WT_STAT_CONN_BLOCK_BYTE_MAP_READ 1030
+/*! cache: application threads page read from disk to cache count */
+#define WT_STAT_CONN_CACHE_READ_APP_COUNT 1031
+/*! cache: application threads page read from disk to cache time (usecs) */
+#define WT_STAT_CONN_CACHE_READ_APP_TIME 1032
+/*! cache: application threads page write from cache to disk count */
+#define WT_STAT_CONN_CACHE_WRITE_APP_COUNT 1033
+/*! cache: application threads page write from cache to disk time (usecs) */
+#define WT_STAT_CONN_CACHE_WRITE_APP_TIME 1034
/*! cache: bytes belonging to page images in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_IMAGE 1031
+#define WT_STAT_CONN_CACHE_BYTES_IMAGE 1035
/*! cache: bytes currently in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INUSE 1032
+#define WT_STAT_CONN_CACHE_BYTES_INUSE 1036
/*! cache: bytes not belonging to page images in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_OTHER 1033
+#define WT_STAT_CONN_CACHE_BYTES_OTHER 1037
/*! cache: bytes read into cache */
-#define WT_STAT_CONN_CACHE_BYTES_READ 1034
+#define WT_STAT_CONN_CACHE_BYTES_READ 1038
/*! cache: bytes written from cache */
-#define WT_STAT_CONN_CACHE_BYTES_WRITE 1035
+#define WT_STAT_CONN_CACHE_BYTES_WRITE 1039
/*! cache: checkpoint blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1036
+#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1040
/*! cache: eviction calls to get a page */
-#define WT_STAT_CONN_CACHE_EVICTION_GET_REF 1037
+#define WT_STAT_CONN_CACHE_EVICTION_GET_REF 1041
/*! cache: eviction calls to get a page found queue empty */
-#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY 1038
+#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY 1042
/*! cache: eviction calls to get a page found queue empty after locking */
-#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY2 1039
+#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY2 1043
/*! cache: eviction currently operating in aggressive mode */
-#define WT_STAT_CONN_CACHE_EVICTION_AGGRESSIVE_SET 1040
+#define WT_STAT_CONN_CACHE_EVICTION_AGGRESSIVE_SET 1044
/*! cache: eviction empty score */
-#define WT_STAT_CONN_CACHE_EVICTION_EMPTY_SCORE 1041
+#define WT_STAT_CONN_CACHE_EVICTION_EMPTY_SCORE 1045
/*! cache: eviction server candidate queue empty when topping up */
-#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1042
+#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1046
/*! cache: eviction server candidate queue not empty when topping up */
-#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1043
+#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1047
/*! cache: eviction server evicting pages */
-#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1044
+#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1048
/*!
* cache: eviction server slept, because we did not make progress with
* eviction
*/
-#define WT_STAT_CONN_CACHE_EVICTION_SERVER_SLEPT 1045
+#define WT_STAT_CONN_CACHE_EVICTION_SERVER_SLEPT 1049
/*! cache: eviction server unable to reach eviction goal */
-#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1046
+#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1050
/*! cache: eviction state */
-#define WT_STAT_CONN_CACHE_EVICTION_STATE 1047
+#define WT_STAT_CONN_CACHE_EVICTION_STATE 1051
/*! cache: eviction walks abandoned */
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ABANDONED 1048
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ABANDONED 1052
/*! cache: eviction worker thread evicting pages */
-#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1049
+#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1053
/*! cache: failed eviction of pages that exceeded the in-memory maximum */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1050
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1054
/*! cache: files with active eviction walks */
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1051
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1055
/*! cache: files with new eviction walks started */
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1052
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1056
/*! cache: hazard pointer blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1053
+#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1057
/*! cache: hazard pointer check calls */
-#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1054
+#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1058
/*! cache: hazard pointer check entries walked */
-#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1055
+#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1059
/*! cache: hazard pointer maximum array length */
-#define WT_STAT_CONN_CACHE_HAZARD_MAX 1056
+#define WT_STAT_CONN_CACHE_HAZARD_MAX 1060
/*! cache: in-memory page passed criteria to be split */
-#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1057
+#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1061
/*! cache: in-memory page splits */
-#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1058
+#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1062
/*! cache: internal pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1059
+#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1063
/*! cache: internal pages split during eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1060
+#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1064
/*! cache: leaf pages split during eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1061
+#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1065
/*! cache: lookaside table insert calls */
-#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1062
+#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1066
/*! cache: lookaside table remove calls */
-#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1063
+#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1067
/*! cache: maximum bytes configured */
-#define WT_STAT_CONN_CACHE_BYTES_MAX 1064
+#define WT_STAT_CONN_CACHE_BYTES_MAX 1068
/*! cache: maximum page size at eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1065
+#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1069
/*! cache: modified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1066
+#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1070
/*! cache: modified pages evicted by application threads */
-#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1067
+#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1071
/*! cache: overflow pages read into cache */
-#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1068
+#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1072
/*! cache: overflow values cached in memory */
-#define WT_STAT_CONN_CACHE_OVERFLOW_VALUE 1069
+#define WT_STAT_CONN_CACHE_OVERFLOW_VALUE 1073
/*! cache: page split during eviction deepened the tree */
-#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1070
+#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1074
/*! cache: page written requiring lookaside records */
-#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1071
+#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1075
/*! cache: pages currently held in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_INUSE 1072
+#define WT_STAT_CONN_CACHE_PAGES_INUSE 1076
/*! cache: pages evicted because they exceeded the in-memory maximum */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1073
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1077
/*! cache: pages evicted because they had chains of deleted items */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1074
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1078
/*! cache: pages evicted by application threads */
-#define WT_STAT_CONN_CACHE_EVICTION_APP 1075
+#define WT_STAT_CONN_CACHE_EVICTION_APP 1079
/*! cache: pages queued for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1076
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1080
/*! cache: pages queued for urgent eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1077
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1081
/*! cache: pages queued for urgent eviction during walk */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1078
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1082
/*! cache: pages read into cache */
-#define WT_STAT_CONN_CACHE_READ 1079
+#define WT_STAT_CONN_CACHE_READ 1083
/*! cache: pages read into cache requiring lookaside entries */
-#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1080
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1084
/*! cache: pages requested from the cache */
-#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1081
+#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1085
/*! cache: pages seen by eviction walk */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1082
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1086
/*! cache: pages selected for eviction unable to be evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1083
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1087
/*! cache: pages walked for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK 1084
+#define WT_STAT_CONN_CACHE_EVICTION_WALK 1088
/*! cache: pages written from cache */
-#define WT_STAT_CONN_CACHE_WRITE 1085
+#define WT_STAT_CONN_CACHE_WRITE 1089
/*! cache: pages written requiring in-memory restoration */
-#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1086
+#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1090
/*! cache: percentage overhead */
-#define WT_STAT_CONN_CACHE_OVERHEAD 1087
+#define WT_STAT_CONN_CACHE_OVERHEAD 1091
/*! cache: tracked bytes belonging to internal pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1088
+#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1092
/*! cache: tracked bytes belonging to leaf pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_LEAF 1089
+#define WT_STAT_CONN_CACHE_BYTES_LEAF 1093
/*! cache: tracked dirty bytes in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1090
+#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1094
/*! cache: tracked dirty pages in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1091
+#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1095
/*! cache: unmodified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1092
+#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1096
/*! connection: auto adjusting condition resets */
-#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1093
+#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1097
/*! connection: auto adjusting condition wait calls */
-#define WT_STAT_CONN_COND_AUTO_WAIT 1094
+#define WT_STAT_CONN_COND_AUTO_WAIT 1098
/*! connection: files currently open */
-#define WT_STAT_CONN_FILE_OPEN 1095
+#define WT_STAT_CONN_FILE_OPEN 1099
/*! connection: memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 1096
+#define WT_STAT_CONN_MEMORY_ALLOCATION 1100
/*! connection: memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 1097
+#define WT_STAT_CONN_MEMORY_FREE 1101
/*! connection: memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 1098
+#define WT_STAT_CONN_MEMORY_GROW 1102
/*! connection: pthread mutex condition wait calls */
-#define WT_STAT_CONN_COND_WAIT 1099
+#define WT_STAT_CONN_COND_WAIT 1103
/*! connection: pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 1100
+#define WT_STAT_CONN_RWLOCK_READ 1104
/*! connection: pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 1101
+#define WT_STAT_CONN_RWLOCK_WRITE 1105
/*! connection: total fsync I/Os */
-#define WT_STAT_CONN_FSYNC_IO 1102
+#define WT_STAT_CONN_FSYNC_IO 1106
/*! connection: total read I/Os */
-#define WT_STAT_CONN_READ_IO 1103
+#define WT_STAT_CONN_READ_IO 1107
/*! connection: total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1104
+#define WT_STAT_CONN_WRITE_IO 1108
/*! cursor: cursor create calls */
-#define WT_STAT_CONN_CURSOR_CREATE 1105
+#define WT_STAT_CONN_CURSOR_CREATE 1109
/*! cursor: cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT 1106
+#define WT_STAT_CONN_CURSOR_INSERT 1110
/*! cursor: cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT 1107
+#define WT_STAT_CONN_CURSOR_NEXT 1111
/*! cursor: cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV 1108
+#define WT_STAT_CONN_CURSOR_PREV 1112
/*! cursor: cursor remove calls */
-#define WT_STAT_CONN_CURSOR_REMOVE 1109
+#define WT_STAT_CONN_CURSOR_REMOVE 1113
/*! cursor: cursor reset calls */
-#define WT_STAT_CONN_CURSOR_RESET 1110
+#define WT_STAT_CONN_CURSOR_RESET 1114
/*! cursor: cursor restarted searches */
-#define WT_STAT_CONN_CURSOR_RESTART 1111
+#define WT_STAT_CONN_CURSOR_RESTART 1115
/*! cursor: cursor search calls */
-#define WT_STAT_CONN_CURSOR_SEARCH 1112
+#define WT_STAT_CONN_CURSOR_SEARCH 1116
/*! cursor: cursor search near calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1113
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1117
/*! cursor: cursor update calls */
-#define WT_STAT_CONN_CURSOR_UPDATE 1114
+#define WT_STAT_CONN_CURSOR_UPDATE 1118
/*! cursor: truncate calls */
-#define WT_STAT_CONN_CURSOR_TRUNCATE 1115
+#define WT_STAT_CONN_CURSOR_TRUNCATE 1119
/*! data-handle: connection data handles currently active */
-#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1116
+#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1120
/*! data-handle: connection sweep candidate became referenced */
-#define WT_STAT_CONN_DH_SWEEP_REF 1117
+#define WT_STAT_CONN_DH_SWEEP_REF 1121
/*! data-handle: connection sweep dhandles closed */
-#define WT_STAT_CONN_DH_SWEEP_CLOSE 1118
+#define WT_STAT_CONN_DH_SWEEP_CLOSE 1122
/*! data-handle: connection sweep dhandles removed from hash list */
-#define WT_STAT_CONN_DH_SWEEP_REMOVE 1119
+#define WT_STAT_CONN_DH_SWEEP_REMOVE 1123
/*! data-handle: connection sweep time-of-death sets */
-#define WT_STAT_CONN_DH_SWEEP_TOD 1120
+#define WT_STAT_CONN_DH_SWEEP_TOD 1124
/*! data-handle: connection sweeps */
-#define WT_STAT_CONN_DH_SWEEPS 1121
+#define WT_STAT_CONN_DH_SWEEPS 1125
/*! data-handle: session dhandles swept */
-#define WT_STAT_CONN_DH_SESSION_HANDLES 1122
+#define WT_STAT_CONN_DH_SESSION_HANDLES 1126
/*! data-handle: session sweep attempts */
-#define WT_STAT_CONN_DH_SESSION_SWEEPS 1123
+#define WT_STAT_CONN_DH_SESSION_SWEEPS 1127
+/*! lock: checkpoint lock acquisitions */
+#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1128
+/*! lock: checkpoint lock application thread wait time (usecs) */
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1129
+/*! lock: checkpoint lock internal thread wait time (usecs) */
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1130
+/*! lock: handle-list lock acquisitions */
+#define WT_STAT_CONN_LOCK_HANDLE_LIST_COUNT 1131
+/*! lock: handle-list lock application thread wait time (usecs) */
+#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_APPLICATION 1132
+/*! lock: handle-list lock internal thread wait time (usecs) */
+#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_INTERNAL 1133
+/*! lock: metadata lock acquisitions */
+#define WT_STAT_CONN_LOCK_METADATA_COUNT 1134
+/*! lock: metadata lock application thread wait time (usecs) */
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1135
+/*! lock: metadata lock internal thread wait time (usecs) */
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1136
+/*! lock: schema lock acquisitions */
+#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1137
+/*! lock: schema lock application thread wait time (usecs) */
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1138
+/*! lock: schema lock internal thread wait time (usecs) */
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1139
+/*! lock: table lock acquisitions */
+#define WT_STAT_CONN_LOCK_TABLE_COUNT 1140
+/*!
+ * lock: table lock application thread time waiting for the table lock
+ * (usecs)
+ */
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1141
+/*!
+ * lock: table lock internal thread time waiting for the table lock
+ * (usecs)
+ */
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1142
/*! log: busy returns attempting to switch slots */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1124
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1143
/*! log: consolidated slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1125
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1144
/*! log: consolidated slot join races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1126
+#define WT_STAT_CONN_LOG_SLOT_RACES 1145
/*! log: consolidated slot join transitions */
-#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1127
+#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1146
/*! log: consolidated slot joins */
-#define WT_STAT_CONN_LOG_SLOT_JOINS 1128
+#define WT_STAT_CONN_LOG_SLOT_JOINS 1147
/*! log: consolidated slot unbuffered writes */
-#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1129
+#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1148
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1130
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1149
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1131
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1150
/*! log: log files manually zero-filled */
-#define WT_STAT_CONN_LOG_ZERO_FILLS 1132
+#define WT_STAT_CONN_LOG_ZERO_FILLS 1151
/*! log: log flush operations */
-#define WT_STAT_CONN_LOG_FLUSH 1133
+#define WT_STAT_CONN_LOG_FLUSH 1152
/*! log: log force write operations */
-#define WT_STAT_CONN_LOG_FORCE_WRITE 1134
+#define WT_STAT_CONN_LOG_FORCE_WRITE 1153
/*! log: log force write operations skipped */
-#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1135
+#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1154
/*! log: log records compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1136
+#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1155
/*! log: log records not compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1137
+#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1156
/*! log: log records too small to compress */
-#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1138
+#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1157
/*! log: log release advances write LSN */
-#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1139
+#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1158
/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1140
+#define WT_STAT_CONN_LOG_SCANS 1159
/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1141
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1160
/*! log: log server thread advances write LSN */
-#define WT_STAT_CONN_LOG_WRITE_LSN 1142
+#define WT_STAT_CONN_LOG_WRITE_LSN 1161
/*! log: log server thread write LSN walk skipped */
-#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1143
+#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1162
/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1144
+#define WT_STAT_CONN_LOG_SYNC 1163
/*! log: log sync time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DURATION 1145
+#define WT_STAT_CONN_LOG_SYNC_DURATION 1164
/*! log: log sync_dir operations */
-#define WT_STAT_CONN_LOG_SYNC_DIR 1146
+#define WT_STAT_CONN_LOG_SYNC_DIR 1165
/*! log: log sync_dir time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1147
+#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1166
/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1148
+#define WT_STAT_CONN_LOG_WRITES 1167
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1149
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1168
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1150
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1169
/*! log: number of pre-allocated log files to create */
-#define WT_STAT_CONN_LOG_PREALLOC_MAX 1151
+#define WT_STAT_CONN_LOG_PREALLOC_MAX 1170
/*! log: pre-allocated log files not ready and missed */
-#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1152
+#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1171
/*! log: pre-allocated log files prepared */
-#define WT_STAT_CONN_LOG_PREALLOC_FILES 1153
+#define WT_STAT_CONN_LOG_PREALLOC_FILES 1172
/*! log: pre-allocated log files used */
-#define WT_STAT_CONN_LOG_PREALLOC_USED 1154
+#define WT_STAT_CONN_LOG_PREALLOC_USED 1173
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1155
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1174
/*! log: total in-memory size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_MEM 1156
+#define WT_STAT_CONN_LOG_COMPRESS_MEM 1175
/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1157
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1176
/*! log: total size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_LEN 1158
+#define WT_STAT_CONN_LOG_COMPRESS_LEN 1177
/*! log: written slots coalesced */
-#define WT_STAT_CONN_LOG_SLOT_COALESCED 1159
+#define WT_STAT_CONN_LOG_SLOT_COALESCED 1178
/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1160
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1179
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1161
+#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1180
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1162
+#define WT_STAT_CONN_REC_PAGES 1181
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1163
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1182
/*! reconciliation: pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE 1164
+#define WT_STAT_CONN_REC_PAGE_DELETE 1183
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1165
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1184
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1166
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1185
/*! session: open cursor count */
-#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1167
+#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1186
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1168
+#define WT_STAT_CONN_SESSION_OPEN 1187
/*! session: table compact failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1169
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1188
/*! session: table compact successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1170
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1189
/*! session: table create failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1171
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1190
/*! session: table create successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1172
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1191
/*! session: table drop failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1173
+#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1192
/*! session: table drop successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1174
+#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1193
/*! session: table rebalance failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1175
+#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1194
/*! session: table rebalance successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1176
+#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1195
/*! session: table rename failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1177
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1196
/*! session: table rename successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1178
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1197
/*! session: table salvage failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1179
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1198
/*! session: table salvage successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1180
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1199
/*! session: table truncate failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1181
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1200
/*! session: table truncate successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1182
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1201
/*! session: table verify failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1183
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1202
/*! session: table verify successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1184
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1203
/*! thread-state: active filesystem fsync calls */
-#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1185
+#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1204
/*! thread-state: active filesystem read calls */
-#define WT_STAT_CONN_THREAD_READ_ACTIVE 1186
+#define WT_STAT_CONN_THREAD_READ_ACTIVE 1205
/*! thread-state: active filesystem write calls */
-#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1187
+#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1206
+/*! thread-yield: application thread time evicting (usecs) */
+#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1207
+/*! thread-yield: application thread time waiting for cache (usecs) */
+#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1208
/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1188
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1209
/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1189
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1210
/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1190
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1211
/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1191
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1212
/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1192
+#define WT_STAT_CONN_PAGE_SLEEP 1213
/*! transaction: number of named snapshots created */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1193
+#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1214
/*! transaction: number of named snapshots dropped */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1194
+#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1215
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1195
+#define WT_STAT_CONN_TXN_BEGIN 1216
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1196
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1217
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1197
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1218
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1198
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1219
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1199
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1220
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1200
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1221
/*! transaction: transaction checkpoint scrub dirty target */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1201
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1222
/*! transaction: transaction checkpoint scrub time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1202
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1223
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1203
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1224
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1204
+#define WT_STAT_CONN_TXN_CHECKPOINT 1225
+/*!
+ * transaction: transaction checkpoints skipped because database was
+ * clean
+ */
+#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1226
/*! transaction: transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1205
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1227
/*!
* transaction: transaction fsync calls for checkpoint after allocating
* the transaction ID
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1206
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1228
/*!
* transaction: transaction fsync duration for checkpoint after
* allocating the transaction ID (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1207
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1229
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1208
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1230
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1209
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1231
/*!
* transaction: transaction range of IDs currently pinned by named
* snapshots
*/
-#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1210
+#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1232
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1211
+#define WT_STAT_CONN_TXN_SYNC 1233
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1212
+#define WT_STAT_CONN_TXN_COMMIT 1234
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1213
+#define WT_STAT_CONN_TXN_ROLLBACK 1235
/*!
* @}
@@ -4709,28 +4768,28 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
/*! btree: btree checkpoint generation */
#define WT_STAT_DSRC_BTREE_CHECKPOINT_GENERATION 2022
/*!
- * btree: column-store fixed-size leaf pages, only reported if
- * statistics=all is set
+ * btree: column-store fixed-size leaf pages, only reported if tree_walk
+ * or all statistics are enabled
*/
#define WT_STAT_DSRC_BTREE_COLUMN_FIX 2023
/*!
- * btree: column-store internal pages, only reported if statistics=all is
- * set
+ * btree: column-store internal pages, only reported if tree_walk or all
+ * statistics are enabled
*/
#define WT_STAT_DSRC_BTREE_COLUMN_INTERNAL 2024
/*!
* btree: column-store variable-size RLE encoded values, only reported if
- * statistics=all is set
+ * tree_walk or all statistics are enabled
*/
#define WT_STAT_DSRC_BTREE_COLUMN_RLE 2025
/*!
* btree: column-store variable-size deleted values, only reported if
- * statistics=all is set
+ * tree_walk or all statistics are enabled
*/
#define WT_STAT_DSRC_BTREE_COLUMN_DELETED 2026
/*!
* btree: column-store variable-size leaf pages, only reported if
- * statistics=all is set
+ * tree_walk or all statistics are enabled
*/
#define WT_STAT_DSRC_BTREE_COLUMN_VARIABLE 2027
/*! btree: fixed-record size */
@@ -4748,20 +4807,26 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
/*! btree: maximum tree depth */
#define WT_STAT_DSRC_BTREE_MAXIMUM_DEPTH 2034
/*!
- * btree: number of key/value pairs, only reported if statistics=all is
- * set
+ * btree: number of key/value pairs, only reported if tree_walk or all
+ * statistics are enabled
*/
#define WT_STAT_DSRC_BTREE_ENTRIES 2035
-/*! btree: overflow pages, only reported if statistics=all is set */
+/*!
+ * btree: overflow pages, only reported if tree_walk or all statistics
+ * are enabled
+ */
#define WT_STAT_DSRC_BTREE_OVERFLOW 2036
/*! btree: pages rewritten by compaction */
#define WT_STAT_DSRC_BTREE_COMPACT_REWRITE 2037
/*!
- * btree: row-store internal pages, only reported if statistics=all is
- * set
+ * btree: row-store internal pages, only reported if tree_walk or all
+ * statistics are enabled
*/
#define WT_STAT_DSRC_BTREE_ROW_INTERNAL 2038
-/*! btree: row-store leaf pages, only reported if statistics=all is set */
+/*!
+ * btree: row-store leaf pages, only reported if tree_walk or all
+ * statistics are enabled
+ */
#define WT_STAT_DSRC_BTREE_ROW_LEAF 2039
/*! cache: bytes currently in the cache */
#define WT_STAT_DSRC_CACHE_BYTES_INUSE 2040
@@ -4807,87 +4872,179 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2060
/*! cache: unmodified pages evicted */
#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2061
+/*!
+ * cache_walk: Average difference between current eviction generation
+ * when the page was last considered, only reported if cache_walk or all
+ * statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_GEN_AVG_GAP 2062
+/*!
+ * cache_walk: Average on-disk page image size seen, only reported if
+ * cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_AVG_WRITTEN_SIZE 2063
+/*!
+ * cache_walk: Clean pages currently in cache, only reported if
+ * cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_PAGES_CLEAN 2064
+/*!
+ * cache_walk: Current eviction generation, only reported if cache_walk
+ * or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_GEN_CURRENT 2065
+/*!
+ * cache_walk: Dirty pages currently in cache, only reported if
+ * cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_PAGES_DIRTY 2066
+/*!
+ * cache_walk: Entries in the root page, only reported if cache_walk or
+ * all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_ROOT_ENTRIES 2067
+/*!
+ * cache_walk: Internal pages currently in cache, only reported if
+ * cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_PAGES_INTERNAL 2068
+/*!
+ * cache_walk: Leaf pages currently in cache, only reported if cache_walk
+ * or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_PAGES_LEAF 2069
+/*!
+ * cache_walk: Maximum difference between current eviction generation
+ * when the page was last considered, only reported if cache_walk or all
+ * statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_GEN_MAX_GAP 2070
+/*!
+ * cache_walk: Maximum page size seen, only reported if cache_walk or all
+ * statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_MAX_PAGESIZE 2071
+/*!
+ * cache_walk: Minimum on-disk page image size seen, only reported if
+ * cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_MIN_WRITTEN_SIZE 2072
+/*!
+ * cache_walk: On-disk page image sizes smaller than a single allocation
+ * unit, only reported if cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_SMALLER_ALLOC_SIZE 2073
+/*!
+ * cache_walk: Pages created in memory and never written, only reported
+ * if cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_MEMORY 2074
+/*!
+ * cache_walk: Pages currently queued for eviction, only reported if
+ * cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_QUEUED 2075
+/*!
+ * cache_walk: Pages that could not be queued for eviction, only reported
+ * if cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_NOT_QUEUEABLE 2076
+/*!
+ * cache_walk: Refs skipped during cache traversal, only reported if
+ * cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_REFS_SKIPPED 2077
+/*!
+ * cache_walk: Size of the root page, only reported if cache_walk or all
+ * statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_ROOT_SIZE 2078
+/*!
+ * cache_walk: Total number of pages currently in cache, only reported if
+ * cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_PAGES 2079
/*! compression: compressed pages read */
-#define WT_STAT_DSRC_COMPRESS_READ 2062
+#define WT_STAT_DSRC_COMPRESS_READ 2080
/*! compression: compressed pages written */
-#define WT_STAT_DSRC_COMPRESS_WRITE 2063
+#define WT_STAT_DSRC_COMPRESS_WRITE 2081
/*! compression: page written failed to compress */
-#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2064
+#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2082
/*! compression: page written was too small to compress */
-#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2065
+#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2083
/*! compression: raw compression call failed, additional data available */
-#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2066
+#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2084
/*! compression: raw compression call failed, no additional data available */
-#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2067
+#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2085
/*! compression: raw compression call succeeded */
-#define WT_STAT_DSRC_COMPRESS_RAW_OK 2068
+#define WT_STAT_DSRC_COMPRESS_RAW_OK 2086
/*! cursor: bulk-loaded cursor-insert calls */
-#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2069
+#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2087
/*! cursor: create calls */
-#define WT_STAT_DSRC_CURSOR_CREATE 2070
+#define WT_STAT_DSRC_CURSOR_CREATE 2088
/*! cursor: cursor-insert key and value bytes inserted */
-#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2071
+#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2089
/*! cursor: cursor-remove key bytes removed */
-#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2072
+#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2090
/*! cursor: cursor-update value bytes updated */
-#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2073
+#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2091
/*! cursor: insert calls */
-#define WT_STAT_DSRC_CURSOR_INSERT 2074
+#define WT_STAT_DSRC_CURSOR_INSERT 2092
/*! cursor: next calls */
-#define WT_STAT_DSRC_CURSOR_NEXT 2075
+#define WT_STAT_DSRC_CURSOR_NEXT 2093
/*! cursor: prev calls */
-#define WT_STAT_DSRC_CURSOR_PREV 2076
+#define WT_STAT_DSRC_CURSOR_PREV 2094
/*! cursor: remove calls */
-#define WT_STAT_DSRC_CURSOR_REMOVE 2077
+#define WT_STAT_DSRC_CURSOR_REMOVE 2095
/*! cursor: reset calls */
-#define WT_STAT_DSRC_CURSOR_RESET 2078
+#define WT_STAT_DSRC_CURSOR_RESET 2096
/*! cursor: restarted searches */
-#define WT_STAT_DSRC_CURSOR_RESTART 2079
+#define WT_STAT_DSRC_CURSOR_RESTART 2097
/*! cursor: search calls */
-#define WT_STAT_DSRC_CURSOR_SEARCH 2080
+#define WT_STAT_DSRC_CURSOR_SEARCH 2098
/*! cursor: search near calls */
-#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2081
+#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2099
/*! cursor: truncate calls */
-#define WT_STAT_DSRC_CURSOR_TRUNCATE 2082
+#define WT_STAT_DSRC_CURSOR_TRUNCATE 2100
/*! cursor: update calls */
-#define WT_STAT_DSRC_CURSOR_UPDATE 2083
+#define WT_STAT_DSRC_CURSOR_UPDATE 2101
/*! reconciliation: dictionary matches */
-#define WT_STAT_DSRC_REC_DICTIONARY 2084
+#define WT_STAT_DSRC_REC_DICTIONARY 2102
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2085
+#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2103
/*!
* reconciliation: internal page key bytes discarded using suffix
* compression
*/
-#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2086
+#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2104
/*! reconciliation: internal page multi-block writes */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2087
+#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2105
/*! reconciliation: internal-page overflow keys */
-#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2088
+#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2106
/*! reconciliation: leaf page key bytes discarded using prefix compression */
-#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2089
+#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2107
/*! reconciliation: leaf page multi-block writes */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2090
+#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2108
/*! reconciliation: leaf-page overflow keys */
-#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2091
+#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2109
/*! reconciliation: maximum blocks required for a page */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2092
+#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2110
/*! reconciliation: overflow values written */
-#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2093
+#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2111
/*! reconciliation: page checksum matches */
-#define WT_STAT_DSRC_REC_PAGE_MATCH 2094
+#define WT_STAT_DSRC_REC_PAGE_MATCH 2112
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_DSRC_REC_PAGES 2095
+#define WT_STAT_DSRC_REC_PAGES 2113
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_DSRC_REC_PAGES_EVICTION 2096
+#define WT_STAT_DSRC_REC_PAGES_EVICTION 2114
/*! reconciliation: pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE 2097
+#define WT_STAT_DSRC_REC_PAGE_DELETE 2115
/*! session: object compaction */
-#define WT_STAT_DSRC_SESSION_COMPACT 2098
+#define WT_STAT_DSRC_SESSION_COMPACT 2116
/*! session: open cursor count */
-#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2099
+#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2117
/*! transaction: update conflicts */
-#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2100
+#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2118
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h
index 4e6699ab9d1..d354757c592 100644
--- a/src/third_party/wiredtiger/src/include/wt_internal.h
+++ b/src/third_party/wiredtiger/src/include/wt_internal.h
@@ -216,6 +216,8 @@ struct __wt_logslot;
typedef struct __wt_logslot WT_LOGSLOT;
struct __wt_lsm_chunk;
typedef struct __wt_lsm_chunk WT_LSM_CHUNK;
+struct __wt_lsm_cursor_chunk;
+ typedef struct __wt_lsm_cursor_chunk WT_LSM_CURSOR_CHUNK;
struct __wt_lsm_data_source;
typedef struct __wt_lsm_data_source WT_LSM_DATA_SOURCE;
struct __wt_lsm_manager;
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c
index b0c789f0f9e..00e4ea5f441 100644
--- a/src/third_party/wiredtiger/src/log/log.c
+++ b/src/third_party/wiredtiger/src/log/log.c
@@ -128,9 +128,9 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
"log_force_sync: sync directory %s to LSN %" PRIu32
"/%" PRIu32,
log->log_dir_fh->name, min_lsn->l.file, min_lsn->l.offset);
- WT_ERR(__wt_epoch(session, &fsync_start));
+ __wt_epoch(session, &fsync_start);
WT_ERR(__wt_fsync(session, log->log_dir_fh, true));
- WT_ERR(__wt_epoch(session, &fsync_stop));
+ __wt_epoch(session, &fsync_stop);
fsync_duration_usecs = WT_TIMEDIFF_US(fsync_stop, fsync_start);
log->sync_dir_lsn = *min_lsn;
WT_STAT_CONN_INCR(session, log_sync_dir);
@@ -152,9 +152,9 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
__wt_verbose(session, WT_VERB_LOG,
"log_force_sync: sync %s to LSN %" PRIu32 "/%" PRIu32,
log_fh->name, min_lsn->l.file, min_lsn->l.offset);
- WT_ERR(__wt_epoch(session, &fsync_start));
+ __wt_epoch(session, &fsync_start);
WT_ERR(__wt_fsync(session, log_fh, true));
- WT_ERR(__wt_epoch(session, &fsync_stop));
+ __wt_epoch(session, &fsync_stop);
fsync_duration_usecs = WT_TIMEDIFF_US(fsync_stop, fsync_start);
log->sync_lsn = *min_lsn;
WT_STAT_CONN_INCR(session, log_sync);
@@ -1478,9 +1478,9 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
"/%" PRIu32,
log->log_dir_fh->name,
sync_lsn.l.file, sync_lsn.l.offset);
- WT_ERR(__wt_epoch(session, &fsync_start));
+ __wt_epoch(session, &fsync_start);
WT_ERR(__wt_fsync(session, log->log_dir_fh, true));
- WT_ERR(__wt_epoch(session, &fsync_stop));
+ __wt_epoch(session, &fsync_stop);
fsync_duration_usecs =
WT_TIMEDIFF_US(fsync_stop, fsync_start);
log->sync_dir_lsn = sync_lsn;
@@ -1500,9 +1500,9 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
log->log_fh->name,
sync_lsn.l.file, sync_lsn.l.offset);
WT_STAT_CONN_INCR(session, log_sync);
- WT_ERR(__wt_epoch(session, &fsync_start));
+ __wt_epoch(session, &fsync_start);
WT_ERR(__wt_fsync(session, log->log_fh, true));
- WT_ERR(__wt_epoch(session, &fsync_stop));
+ __wt_epoch(session, &fsync_stop);
fsync_duration_usecs =
WT_TIMEDIFF_US(fsync_stop, fsync_start);
WT_STAT_CONN_INCRV(session,
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
index e98f59e7b05..067c527a21a 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
@@ -10,7 +10,7 @@
#define WT_FORALL_CURSORS(clsm, c, i) \
for ((i) = (clsm)->nchunks; (i) > 0;) \
- if (((c) = (clsm)->cursors[--i]) != NULL)
+ if (((c) = (clsm)->chunks[--i]->cursor) != NULL)
#define WT_LSM_CURCMP(s, lsm_tree, c1, c2, cmp) \
__wt_compare(s, (lsm_tree)->collator, &(c1)->key, &(c2)->key, &cmp)
@@ -18,6 +18,7 @@
static int __clsm_lookup(WT_CURSOR_LSM *, WT_ITEM *);
static int __clsm_open_cursors(WT_CURSOR_LSM *, bool, u_int, uint32_t);
static int __clsm_reset_cursors(WT_CURSOR_LSM *, WT_CURSOR *);
+static int __clsm_search_near(WT_CURSOR *cursor, int *exactp);
/*
* __wt_clsm_request_switch --
@@ -109,7 +110,7 @@ __clsm_enter_update(WT_CURSOR_LSM *clsm)
primary = NULL;
have_primary = false;
} else {
- primary = clsm->cursors[clsm->nchunks - 1];
+ primary = clsm->chunks[clsm->nchunks - 1]->cursor;
primary_chunk = clsm->primary_chunk;
WT_ASSERT(session, F_ISSET(&session->txn, WT_TXN_HAS_ID));
have_primary = (primary != NULL && primary_chunk != NULL &&
@@ -165,8 +166,7 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update)
WT_LSM_TREE *lsm_tree;
WT_SESSION_IMPL *session;
WT_TXN *txn;
- uint64_t *switch_txnp;
- uint64_t snap_min;
+ uint64_t i, pinned_id , switch_txn;
lsm_tree = clsm->lsm_tree;
session = (WT_SESSION_IMPL *)clsm->iface.session;
@@ -226,8 +226,8 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update)
* that overlaps with our snapshot is a potential
* conflict.
*
- * Note that the global snap_min is correct here: it
- * tracks concurrent transactions excluding special
+ * Note that the pinned ID is correct here: it tracks
+ * concurrent transactions excluding special
* transactions such as checkpoint (which we can't
* conflict with because checkpoint only writes the
* metadata, which is not an LSM tree).
@@ -237,17 +237,18 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update)
F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) {
WT_ASSERT(session,
F_ISSET(txn, WT_TXN_HAS_SNAPSHOT));
- snap_min =
- WT_SESSION_TXN_STATE(session)->snap_min;
- for (switch_txnp =
- &clsm->switch_txn[clsm->nchunks - 2];
+ pinned_id =
+ WT_SESSION_TXN_STATE(session)->pinned_id;
+ for (i = clsm->nchunks - 2;
clsm->nupdates < clsm->nchunks;
- clsm->nupdates++, switch_txnp--) {
- if (WT_TXNID_LT(*switch_txnp, snap_min))
+ clsm->nupdates++, i--) {
+ switch_txn =
+ clsm->chunks[i]->switch_txn;
+ if (WT_TXNID_LT(switch_txn, pinned_id))
break;
WT_ASSERT(session,
!__wt_txn_visible_all(
- session, *switch_txnp));
+ session, switch_txn));
}
}
}
@@ -378,7 +379,7 @@ __clsm_close_cursors(WT_CURSOR_LSM *clsm, u_int start, u_int end)
WT_CURSOR *c;
u_int i;
- if (clsm->cursors == NULL || clsm->nchunks == 0)
+ if (clsm->chunks == NULL || clsm->nchunks == 0)
return (0);
/*
@@ -387,12 +388,12 @@ __clsm_close_cursors(WT_CURSOR_LSM *clsm, u_int start, u_int end)
* careful with unsigned integer wrapping.
*/
for (i = start; i < end; i++) {
- if ((c = (clsm)->cursors[i]) != NULL) {
- clsm->cursors[i] = NULL;
+ if ((c = (clsm)->chunks[i]->cursor) != NULL) {
+ clsm->chunks[i]->cursor = NULL;
WT_RET(c->close(c));
}
- if ((bloom = clsm->blooms[i]) != NULL) {
- clsm->blooms[i] = NULL;
+ if ((bloom = clsm->chunks[i]->bloom) != NULL) {
+ clsm->chunks[i]->bloom = NULL;
WT_RET(__wt_bloom_close(bloom));
}
}
@@ -401,6 +402,45 @@ __clsm_close_cursors(WT_CURSOR_LSM *clsm, u_int start, u_int end)
}
/*
+ * __clsm_resize_chunks --
+ * Allocates an array of unit objects for each chunk.
+ */
+static int
+__clsm_resize_chunks(
+ WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm, u_int nchunks)
+{
+ WT_DECL_RET;
+ WT_LSM_CURSOR_CHUNK *chunk;
+
+ /* Don't allocate more iterators if we don't need them. */
+ if (clsm->chunks_count >= nchunks) {
+ return (ret);
+ }
+
+ WT_RET(__wt_realloc_def(session, &clsm->chunks_alloc, nchunks,
+ &clsm->chunks));
+ for (; clsm->chunks_count < nchunks; clsm->chunks_count++) {
+ WT_RET(__wt_calloc_one(session, &chunk));
+ clsm->chunks[clsm->chunks_count] = chunk;
+ }
+ return (ret);
+}
+
+/*
+ * __clsm_free_chunks --
+ * Allocates an array of unit objects for each chunk.
+ */
+static void
+__clsm_free_chunks(WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm)
+{
+ size_t i;
+ for (i = 0; i < clsm->chunks_count; i++) {
+ __wt_free(session, clsm->chunks[i]);
+ }
+ __wt_free(session, clsm->chunks);
+}
+
+/*
* __clsm_open_cursors --
* Open cursors for the current set of files.
*/
@@ -409,7 +449,7 @@ __clsm_open_cursors(
WT_CURSOR_LSM *clsm, bool update, u_int start_chunk, uint32_t start_id)
{
WT_BTREE *btree;
- WT_CURSOR *c, **cp, *primary;
+ WT_CURSOR *c, *cursor, *primary;
WT_DECL_RET;
WT_LSM_CHUNK *chunk;
WT_LSM_TREE *lsm_tree;
@@ -422,6 +462,7 @@ __clsm_open_cursors(
bool locked;
c = &clsm->iface;
+ cursor = NULL;
session = (WT_SESSION_IMPL *)c->session;
txn = &session->txn;
chunk = NULL;
@@ -465,7 +506,7 @@ __clsm_open_cursors(
retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
nchunks = clsm->nchunks;
ngood = 0;
-
+ WT_ERR(__clsm_resize_chunks(session, clsm, nchunks));
/*
* We may have raced with another merge completing. Check that
* we're starting at the right offset in the chunk array.
@@ -486,16 +527,13 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
WT_ASSERT(session, start_chunk + nchunks <= lsm_tree->nchunks);
} else {
nchunks = lsm_tree->nchunks;
+ WT_ERR(__clsm_resize_chunks(session, clsm, nchunks));
/*
* If we are only opening the cursor for updates, only open the
* primary chunk, plus any other chunks that might be required
* to detect snapshot isolation conflicts.
*/
- if (F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT))
- WT_ERR(__wt_realloc_def(session,
- &clsm->txnid_alloc, nchunks,
- &clsm->switch_txn));
if (F_ISSET(clsm, WT_CLSM_OPEN_READ))
ngood = nupdates = 0;
else if (F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) {
@@ -504,11 +542,11 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
* chunk are globally visible. Copy the maximum
* transaction IDs into the cursor as we go.
*/
- for (ngood = nchunks - 1, nupdates = 1;
- ngood > 0;
+ for (ngood = nchunks - 1, nupdates = 1; ngood > 0;
ngood--, nupdates++) {
chunk = lsm_tree->chunk[ngood - 1];
- clsm->switch_txn[ngood - 1] = chunk->switch_txn;
+ clsm->chunks[ngood - 1]->switch_txn =
+ chunk->switch_txn;
if (__wt_txn_visible_all(
session, chunk->switch_txn))
break;
@@ -519,21 +557,20 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
}
/* Check how many cursors are already open. */
- for (cp = clsm->cursors + ngood;
- ngood < clsm->nchunks && ngood < nchunks;
- cp++, ngood++) {
+ for (; ngood < clsm->nchunks && ngood < nchunks; ngood++) {
chunk = lsm_tree->chunk[ngood];
+ cursor = clsm->chunks[ngood]->cursor;
/* If the cursor isn't open yet, we're done. */
- if (*cp == NULL)
+ if (cursor == NULL)
break;
/* Easy case: the URIs don't match. */
- if (strcmp((*cp)->uri, chunk->uri) != 0)
+ if (strcmp(cursor->uri, chunk->uri) != 0)
break;
/* Make sure the checkpoint config matches. */
- checkpoint = ((WT_CURSOR_BTREE *)*cp)->
+ checkpoint = ((WT_CURSOR_BTREE *)cursor)->
btree->dhandle->checkpoint;
if (checkpoint == NULL &&
F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) &&
@@ -541,7 +578,7 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
break;
/* Make sure the Bloom config matches. */
- if (clsm->blooms[ngood] == NULL &&
+ if (clsm->chunks[ngood]->bloom == NULL &&
F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
break;
}
@@ -559,7 +596,7 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
* full, we may block while closing a cursor. Save the
* generation number and retry if it has changed under us.
*/
- if (clsm->cursors != NULL && ngood < clsm->nchunks) {
+ if (clsm->chunks != NULL && ngood < clsm->nchunks) {
close_range_start = ngood;
close_range_end = clsm->nchunks;
} else if (!F_ISSET(clsm, WT_CLSM_OPEN_READ) && nupdates > 0 ) {
@@ -591,28 +628,23 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
clsm->current = NULL;
}
- WT_ERR(__wt_realloc_def(session,
- &clsm->bloom_alloc, nchunks, &clsm->blooms));
- WT_ERR(__wt_realloc_def(session,
- &clsm->cursor_alloc, nchunks, &clsm->cursors));
-
clsm->nchunks = nchunks;
/* Open the cursors for chunks that have changed. */
- for (i = ngood, cp = clsm->cursors + i; i != nchunks; i++, cp++) {
+ for (i = ngood; i != nchunks; i++) {
chunk = lsm_tree->chunk[i + start_chunk];
/* Copy the maximum transaction ID. */
if (F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT))
- clsm->switch_txn[i] = chunk->switch_txn;
+ clsm->chunks[i]->switch_txn = chunk->switch_txn;
/*
* Read from the checkpoint if the file has been written.
* Once all cursors switch, the in-memory tree can be evicted.
*/
- WT_ASSERT(session, *cp == NULL);
+ WT_ASSERT(session, clsm->chunks[i]->cursor == NULL);
ret = __wt_open_cursor(session, chunk->uri, c,
(F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && !chunk->empty) ?
- ckpt_cfg : NULL, cp);
+ ckpt_cfg : NULL, &clsm->chunks[i]->cursor);
/*
* XXX kludge: we may have an empty chunk where no checkpoint
@@ -620,8 +652,8 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
* chunk instead.
*/
if (ret == WT_NOTFOUND && F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) {
- ret = __wt_open_cursor(
- session, chunk->uri, c, NULL, cp);
+ ret = __wt_open_cursor(session,
+ chunk->uri, c, NULL, &clsm->chunks[i]->cursor);
if (ret == 0)
chunk->empty = 1;
}
@@ -634,25 +666,31 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
* write conflicts with concurrent updates.
*/
if (i != nchunks - 1)
- (*cp)->insert = __wt_curfile_update_check;
+ clsm->chunks[i]->cursor->insert =
+ __wt_curfile_update_check;
if (!F_ISSET(clsm, WT_CLSM_MERGE) &&
F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
WT_ERR(__wt_bloom_open(session, chunk->bloom_uri,
lsm_tree->bloom_bit_count,
lsm_tree->bloom_hash_count,
- c, &clsm->blooms[i]));
+ c, &clsm->chunks[i]->bloom));
/* Child cursors always use overwrite and raw mode. */
- F_SET(*cp, WT_CURSTD_OVERWRITE | WT_CURSTD_RAW);
+ F_SET(clsm->chunks[i]->cursor,
+ WT_CURSTD_OVERWRITE | WT_CURSTD_RAW);
}
+ /* Setup the count values for each chunk in the chunks*/
+ for (i = 0; i != clsm->nchunks; i++)
+ clsm->chunks[i]->count = lsm_tree->chunk[i]->count;
+
/* The last chunk is our new primary. */
if (chunk != NULL &&
!F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) &&
chunk->switch_txn == WT_TXN_NONE) {
clsm->primary_chunk = chunk;
- primary = clsm->cursors[clsm->nchunks - 1];
+ primary = clsm->chunks[clsm->nchunks - 1]->cursor;
/*
* Disable eviction for the in-memory chunk. Also clear the
* bulk load flag here, otherwise eviction will be enabled by
@@ -672,17 +710,19 @@ err:
#ifdef HAVE_DIAGNOSTIC
/* Check that all cursors are open as expected. */
if (ret == 0 && F_ISSET(clsm, WT_CLSM_OPEN_READ)) {
- for (i = 0, cp = clsm->cursors; i != clsm->nchunks; cp++, i++) {
+ for (i = 0; i != clsm->nchunks; i++) {
+ cursor = clsm->chunks[i]->cursor;
chunk = lsm_tree->chunk[i + start_chunk];
- /* Make sure the cursor is open. */
- WT_ASSERT(session, *cp != NULL);
+ /* Make sure the first cursor is open. */
+ WT_ASSERT(session, cursor != NULL);
/* Easy case: the URIs should match. */
- WT_ASSERT(session, strcmp((*cp)->uri, chunk->uri) == 0);
+ WT_ASSERT(
+ session, strcmp(cursor->uri, chunk->uri) == 0);
/* Make sure the checkpoint config matches. */
- checkpoint = ((WT_CURSOR_BTREE *)*cp)->
+ checkpoint = ((WT_CURSOR_BTREE *)cursor)->
btree->dhandle->checkpoint;
WT_ASSERT(session,
(F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) &&
@@ -693,7 +733,8 @@ err:
WT_ASSERT(session,
(F_ISSET(chunk, WT_LSM_CHUNK_BLOOM) &&
!F_ISSET(clsm, WT_CLSM_MERGE)) ?
- clsm->blooms[i] != NULL : clsm->blooms[i] == NULL);
+ clsm->chunks[i]->bloom != NULL :
+ clsm->chunks[i]->bloom == NULL);
}
}
#endif
@@ -902,6 +943,96 @@ err: __clsm_leave(clsm);
}
/*
+ * __clsm_random_chunk --
+ * Pick a chunk at random, weighted by the size of all chunks. Weighting
+ * proportional to documents avoids biasing towards small chunks. Then return
+ * the cursor on the chunk we have picked.
+ */
+static int
+__clsm_random_chunk(WT_SESSION_IMPL *session,
+ WT_CURSOR_LSM *clsm, WT_CURSOR **cursor)
+{
+ uint64_t checked_docs, i, rand_doc, total_docs;
+
+ /*
+ * If the tree is empty we cannot do a random lookup, so return a
+ * WT_NOTFOUND.
+ */
+ if (clsm->nchunks == 0)
+ return (WT_NOTFOUND);
+ for (total_docs = i = 0; i < clsm->nchunks; i++) {
+ total_docs += clsm->chunks[i]->count;
+ }
+ if (total_docs == 0)
+ return (WT_NOTFOUND);
+
+ rand_doc = __wt_random(&session->rnd) % total_docs;
+
+ for (checked_docs = i = 0; i < clsm->nchunks; i++) {
+ checked_docs += clsm->chunks[i]->count;
+ if (rand_doc <= checked_docs) {
+ *cursor = clsm->chunks[i]->cursor;
+ break;
+ }
+ }
+ return (0);
+}
+
+/*
+ * __clsm_next_random --
+ * WT_CURSOR->next method for the LSM cursor type when configured with
+ * next_random.
+ */
+static int
+__clsm_next_random(WT_CURSOR *cursor)
+{
+ WT_CURSOR_LSM *clsm;
+ WT_CURSOR *c;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ int exact;
+
+ c = NULL;
+ clsm = (WT_CURSOR_LSM *)cursor;
+
+ CURSOR_API_CALL(cursor, session, next, NULL);
+ WT_CURSOR_NOVALUE(cursor);
+ WT_ERR(__clsm_enter(clsm, false, false));
+
+ for (;;) {
+ WT_ERR(__clsm_random_chunk(session, clsm, &c));
+ /*
+ * This call to next_random on the chunk can potentially end in
+ * WT_NOTFOUND if the chunk we picked is empty. We want to retry
+ * in that case.
+ */
+ ret = __wt_curfile_next_random(c);
+ if (ret == WT_NOTFOUND)
+ continue;
+
+ WT_ERR(ret);
+ F_SET(cursor, WT_CURSTD_KEY_INT);
+ WT_ERR(c->get_key(c, &cursor->key));
+ /*
+ * Search near the current key to resolve any tombstones
+ * and position to a valid document. If we see a
+ * WT_NOTFOUND here that is valid, as the tree has no
+ * documents visible to us.
+ */
+ WT_ERR(__clsm_search_near(cursor, &exact));
+ break;
+ }
+
+ /* We have found a valid doc. Set that we are now positioned */
+ if (0) {
+err: F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+ }
+ __clsm_leave(clsm);
+ API_END(session, ret);
+ return (ret);
+}
+
+/*
* __clsm_prev --
* WT_CURSOR->prev method for the LSM cursor type.
*/
@@ -1072,7 +1203,7 @@ __clsm_lookup(WT_CURSOR_LSM *clsm, WT_ITEM *value)
WT_FORALL_CURSORS(clsm, c, i) {
/* If there is a Bloom filter, see if we can skip the read. */
bloom = NULL;
- if ((bloom = clsm->blooms[i]) != NULL) {
+ if ((bloom = clsm->chunks[i]->bloom) != NULL) {
if (!have_hash) {
__wt_bloom_hash(bloom, &cursor->key, &bhash);
have_hash = true;
@@ -1259,7 +1390,12 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp)
*/
F_CLR(cursor, WT_CURSTD_KEY_SET);
F_SET(cursor, WT_CURSTD_KEY_INT);
- if ((ret = cursor->next(cursor)) == 0) {
+ /*
+ * We call __clsm_next here as we want to advance
+ * forward. If we are a random LSM cursor calling next
+ * on the cursor will not advance as we intend.
+ */
+ if ((ret = __clsm_next(cursor)) == 0) {
cmp = 1;
deleted = false;
}
@@ -1268,7 +1404,11 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp)
}
if (deleted) {
clsm->current = NULL;
- WT_ERR(cursor->prev(cursor));
+ /*
+ * We call prev directly here as cursor->prev may be "invalid"
+ * if this is a random cursor.
+ */
+ WT_ERR(__clsm_prev(cursor));
cmp = -1;
}
*exactp = cmp;
@@ -1312,7 +1452,7 @@ __clsm_put(WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm,
* Clear the existing cursor position. Don't clear the primary cursor:
* we're about to use it anyway.
*/
- primary = clsm->cursors[clsm->nchunks - 1];
+ primary = clsm->chunks[clsm->nchunks - 1]->cursor;
WT_RET(__clsm_reset_cursors(clsm, primary));
/* If necessary, set the position for future scans. */
@@ -1322,12 +1462,12 @@ __clsm_put(WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm,
for (i = 0, slot = clsm->nchunks - 1; i < clsm->nupdates; i++, slot--) {
/* Check if we need to keep updating old chunks. */
if (i > 0 &&
- __wt_txn_visible(session, clsm->switch_txn[slot])) {
+ __wt_txn_visible(session, clsm->chunks[slot]->switch_txn)) {
clsm->nupdates = i;
break;
}
- c = clsm->cursors[slot];
+ c = clsm->chunks[slot]->cursor;
c->set_key(c, key);
c->set_value(c, value);
WT_RET((position && i == 0) ? c->update(c) : c->insert(c));
@@ -1485,9 +1625,7 @@ __wt_clsm_close(WT_CURSOR *cursor)
clsm = (WT_CURSOR_LSM *)cursor;
CURSOR_API_CALL(cursor, session, close, NULL);
WT_TRET(__clsm_close_cursors(clsm, 0, clsm->nchunks));
- __wt_free(session, clsm->blooms);
- __wt_free(session, clsm->cursors);
- __wt_free(session, clsm->switch_txn);
+ __clsm_free_chunks(session, clsm);
/* In case we were somehow left positioned, clear that. */
__clsm_leave(clsm);
@@ -1588,6 +1726,13 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
*/
clsm->dsk_gen = 0;
+ /* If the next_random option is set, configure a random cursor */
+ WT_ERR(__wt_config_gets_def(session, cfg, "next_random", 0, &cval));
+ if (cval.val != 0) {
+ __wt_cursor_set_notsup(cursor);
+ cursor->next = __clsm_next_random;
+ }
+
WT_ERR(__wt_cursor_init(cursor, cursor->uri, owner, cfg, cursorp));
if (bulk)
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor_bulk.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor_bulk.c
index 607ca0c9705..319426de3f0 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_cursor_bulk.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor_bulk.c
@@ -28,9 +28,8 @@ __clsm_close_bulk(WT_CURSOR *cursor)
session = (WT_SESSION_IMPL *)clsm->iface.session;
/* Close the bulk cursor to ensure the chunk is written to disk. */
- bulk_cursor = clsm->cursors[0];
+ bulk_cursor = clsm->chunks[0]->cursor;
WT_RET(bulk_cursor->close(bulk_cursor));
- clsm->cursors[0] = NULL;
clsm->nchunks = 0;
/* Set ondisk, and flush the metadata */
@@ -75,7 +74,7 @@ __clsm_insert_bulk(WT_CURSOR *cursor)
WT_ASSERT(session, lsm_tree->nchunks == 1 && clsm->nchunks == 1);
++chunk->count;
chunk->size += cursor->key.size + cursor->value.size;
- bulk_cursor = *clsm->cursors;
+ bulk_cursor = clsm->chunks[0]->cursor;
bulk_cursor->set_key(bulk_cursor, &cursor->key);
bulk_cursor->set_value(bulk_cursor, &cursor->value);
WT_RET(bulk_cursor->insert(bulk_cursor));
@@ -124,11 +123,10 @@ __wt_clsm_open_bulk(WT_CURSOR_LSM *clsm, const char *cfg[])
* for a bloom filter - it makes cleanup simpler. Cleaned up by
* cursor close on error.
*/
- WT_RET(__wt_calloc_one(session, &clsm->blooms));
- clsm->bloom_alloc = 1;
- WT_RET(__wt_calloc_one(session, &clsm->cursors));
- clsm->cursor_alloc = 1;
- clsm->nchunks = 1;
+ WT_RET(
+ __wt_realloc_def(session, &clsm->chunks_alloc, 1, &clsm->chunks));
+ WT_RET(__wt_calloc_one(session, &clsm->chunks[0]));
+ clsm->chunks_count = clsm->nchunks = 1;
/*
* Open a bulk cursor on the first chunk in the tree - take a read
@@ -139,7 +137,7 @@ __wt_clsm_open_bulk(WT_CURSOR_LSM *clsm, const char *cfg[])
*/
WT_RET(__wt_open_cursor(session,
lsm_tree->chunk[0]->uri, &clsm->iface, cfg, &bulk_cursor));
- clsm->cursors[0] = bulk_cursor;
+ clsm->chunks[0]->cursor = bulk_cursor;
/* LSM cursors are always raw */
F_SET(bulk_cursor, WT_CURSTD_RAW);
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_manager.c b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
index 5a5140b9c3a..0a5f4fdd8b5 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_manager.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
@@ -392,7 +392,7 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session)
TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q) {
if (!lsm_tree->active)
continue;
- WT_ERR(__wt_epoch(session, &now));
+ __wt_epoch(session, &now);
pushms = lsm_tree->work_push_ts.tv_sec == 0 ? 0 :
WT_TIMEDIFF_MS(now, lsm_tree->work_push_ts);
fillms = 3 * lsm_tree->chunk_fill_ms;
@@ -651,7 +651,7 @@ __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session,
return (0);
}
- WT_RET(__wt_epoch(session, &lsm_tree->work_push_ts));
+ __wt_epoch(session, &lsm_tree->work_push_ts);
WT_RET(__wt_calloc_one(session, &entry));
entry->type = type;
entry->flags = flags;
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_merge.c b/src/third_party/wiredtiger/src/lsm/lsm_merge.c
index 4bbfcfd4411..493855d489a 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_merge.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_merge.c
@@ -54,7 +54,7 @@ __lsm_merge_aggressive_clear(WT_LSM_TREE *lsm_tree)
* __lsm_merge_aggressive_update --
* Update the merge aggressiveness for an LSM tree.
*/
-static int
+static void
__lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
struct timespec now;
@@ -72,7 +72,7 @@ __lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
if (!lsm_tree->modified ||
F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) {
lsm_tree->merge_aggressiveness = 10;
- return (0);
+ return;
}
/*
@@ -81,7 +81,7 @@ __lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
*/
if (lsm_tree->chunks_flushed <= lsm_tree->merge_min) {
__lsm_merge_aggressive_clear(lsm_tree);
- return (0);
+ return;
}
/*
@@ -91,10 +91,10 @@ __lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
*/
if (!F_ISSET(lsm_tree, WT_LSM_TREE_AGGRESSIVE_TIMER)) {
F_SET(lsm_tree, WT_LSM_TREE_AGGRESSIVE_TIMER);
- return (__wt_epoch(session, &lsm_tree->merge_aggressive_ts));
+ __wt_epoch(session, &lsm_tree->merge_aggressive_ts);
}
- WT_RET(__wt_epoch(session, &now));
+ __wt_epoch(session, &now);
msec_since_last_merge =
WT_TIMEDIFF_MS(now, lsm_tree->merge_aggressive_ts);
@@ -113,7 +113,7 @@ __lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
* generates a variable load.
*/
if (msec_since_last_merge < msec_to_create_merge)
- return (0);
+ return;
/*
* Bump how aggressively we look for merges based on how long since
@@ -134,7 +134,6 @@ __lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
msec_since_last_merge, lsm_tree->chunk_fill_ms);
lsm_tree->merge_aggressiveness = new_aggressive;
}
- return (0);
}
/*
@@ -326,7 +325,7 @@ retry_find:
goto retry_find;
}
/* Consider getting aggressive if no merge was found */
- WT_RET(__lsm_merge_aggressive_update(session, lsm_tree));
+ __lsm_merge_aggressive_update(session, lsm_tree);
return (WT_NOTFOUND);
}
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_stat.c b/src/third_party/wiredtiger/src/lsm/lsm_stat.c
index f4f5a0acce8..3fe3ca1ba81 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_stat.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_stat.c
@@ -42,11 +42,11 @@ __curstat_lsm_init(
if (cst->flags != 0) {
(void)snprintf(config, sizeof(config),
"statistics=(%s%s%s%s)",
- F_ISSET(cst, WT_CONN_STAT_ALL) ? "all," : "",
- F_ISSET(cst, WT_CONN_STAT_CLEAR) ? "clear," : "",
- !F_ISSET(cst, WT_CONN_STAT_ALL) &&
- F_ISSET(cst, WT_CONN_STAT_FAST) ? "fast," : "",
- F_ISSET(cst, WT_CONN_STAT_SIZE) ? "size," : "");
+ F_ISSET(cst, WT_STAT_TYPE_ALL) ? "all," : "",
+ F_ISSET(cst, WT_STAT_CLEAR) ? "clear," : "",
+ !F_ISSET(cst, WT_STAT_TYPE_ALL) &&
+ F_ISSET(cst, WT_STAT_TYPE_FAST) ? "fast," : "",
+ F_ISSET(cst, WT_STAT_TYPE_SIZE) ? "size," : "");
cfg[1] = disk_cfg[1] = config;
}
@@ -132,26 +132,26 @@ __curstat_lsm_init(
/* Include, and optionally clear, LSM-level specific information. */
WT_STAT_WRITE(session, stats, bloom_miss, lsm_tree->bloom_miss);
- if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
+ if (F_ISSET(cst, WT_STAT_CLEAR))
lsm_tree->bloom_miss = 0;
WT_STAT_WRITE(session, stats, bloom_hit, lsm_tree->bloom_hit);
- if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
+ if (F_ISSET(cst, WT_STAT_CLEAR))
lsm_tree->bloom_hit = 0;
WT_STAT_WRITE(session,
stats, bloom_false_positive, lsm_tree->bloom_false_positive);
- if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
+ if (F_ISSET(cst, WT_STAT_CLEAR))
lsm_tree->bloom_false_positive = 0;
WT_STAT_WRITE(session,
stats, lsm_lookup_no_bloom, lsm_tree->lsm_lookup_no_bloom);
- if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
+ if (F_ISSET(cst, WT_STAT_CLEAR))
lsm_tree->lsm_lookup_no_bloom = 0;
WT_STAT_WRITE(session,
stats, lsm_checkpoint_throttle, lsm_tree->lsm_checkpoint_throttle);
- if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
+ if (F_ISSET(cst, WT_STAT_CLEAR))
lsm_tree->lsm_checkpoint_throttle = 0;
WT_STAT_WRITE(session,
stats, lsm_merge_throttle, lsm_tree->lsm_merge_throttle);
- if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
+ if (F_ISSET(cst, WT_STAT_CLEAR))
lsm_tree->lsm_merge_throttle = 0;
__wt_curstat_dsrc_final(cst);
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
index db9fd581110..0054dcd1583 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
@@ -265,7 +265,7 @@ __wt_lsm_tree_setup_chunk(
WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk)
{
WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
- WT_RET(__wt_epoch(session, &chunk->create_ts));
+ __wt_epoch(session, &chunk->create_ts);
WT_RET(__wt_lsm_tree_chunk_name(
session, lsm_tree, chunk->id, &chunk->uri));
@@ -496,7 +496,7 @@ __lsm_tree_open(WT_SESSION_IMPL *session,
lsm_tree->queue_ref = 0;
/* Set a flush timestamp as a baseline. */
- WT_ERR(__wt_epoch(session, &lsm_tree->last_flush_ts));
+ __wt_epoch(session, &lsm_tree->last_flush_ts);
/* Now the tree is setup, make it visible to others. */
TAILQ_INSERT_HEAD(&S2C(session)->lsmqh, lsm_tree, q);
@@ -1139,7 +1139,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
return (0);
}
- WT_ERR(__wt_seconds(session, &begin));
+ __wt_seconds(session, &begin);
/*
* Compacting has two distinct phases.
@@ -1267,7 +1267,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
break;
}
__wt_sleep(1, 0);
- WT_ERR(__wt_seconds(session, &end));
+ __wt_seconds(session, &end);
if (session->compact->max_time > 0 &&
session->compact->max_time < (uint64_t)(end - begin)) {
WT_ERR(ETIMEDOUT);
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
index 72bcf56b3c4..917104031fc 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
@@ -358,7 +358,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
WT_ERR(__wt_lsm_tree_set_chunk_size(session, chunk));
/* Update the flush timestamp to help track ongoing progress. */
- WT_ERR(__wt_epoch(session, &lsm_tree->last_flush_ts));
+ __wt_epoch(session, &lsm_tree->last_flush_ts);
++lsm_tree->chunks_flushed;
/* Lock the tree, mark the chunk as on disk and update the metadata. */
diff --git a/src/third_party/wiredtiger/src/meta/meta_ckpt.c b/src/third_party/wiredtiger/src/meta/meta_ckpt.c
index 2b7719c3241..b985104c2eb 100644
--- a/src/third_party/wiredtiger/src/meta/meta_ckpt.c
+++ b/src/third_party/wiredtiger/src/meta/meta_ckpt.c
@@ -424,7 +424,7 @@ __wt_meta_ckptlist_set(WT_SESSION_IMPL *session,
* guaranteed, a time_t has to be an arithmetic type,
* but not an integral type.
*/
- WT_ERR(__wt_seconds(session, &secs));
+ __wt_seconds(session, &secs);
ckpt->sec = (uintmax_t)secs;
}
if (strcmp(ckpt->name, WT_CHECKPOINT) == 0)
diff --git a/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c b/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c
index b25bb8c25d1..842bb6eeec9 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c
@@ -63,7 +63,7 @@ __wt_cond_wait_signal(
locked = true;
if (usecs > 0) {
- WT_ERR(__wt_epoch(session, &ts));
+ __wt_epoch(session, &ts);
ts.tv_sec += (time_t)
(((uint64_t)ts.tv_nsec + WT_THOUSAND * usecs) / WT_BILLION);
ts.tv_nsec = (long)
diff --git a/src/third_party/wiredtiger/src/os_posix/os_time.c b/src/third_party/wiredtiger/src/os_posix/os_time.c
index b1b22a8e684..719e214696b 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_time.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_time.c
@@ -12,26 +12,35 @@
* __wt_epoch --
* Return the time since the Epoch.
*/
-int
+void
__wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
{
WT_DECL_RET;
+ /*
+ * This function doesn't return an error, but panics on failure (which
+ * should never happen, it's done this way to simplify error handling
+ * in the caller). However, some compilers complain about using garbage
+ * values. Initializing the values avoids the complaint.
+ */
+ tsp->tv_sec = 0;
+ tsp->tv_nsec = 0;
+
#if defined(HAVE_CLOCK_GETTIME)
- WT_SYSCALL(clock_gettime(CLOCK_REALTIME, tsp), ret);
+ WT_SYSCALL_RETRY(clock_gettime(CLOCK_REALTIME, tsp), ret);
if (ret == 0)
- return (0);
- WT_RET_MSG(session, ret, "clock_gettime");
+ return;
+ WT_PANIC_MSG(session, ret, "clock_gettime");
#elif defined(HAVE_GETTIMEOFDAY)
struct timeval v;
- WT_SYSCALL(gettimeofday(&v, NULL), ret);
+ WT_SYSCALL_RETRY(gettimeofday(&v, NULL), ret);
if (ret == 0) {
tsp->tv_sec = v.tv_sec;
tsp->tv_nsec = v.tv_usec * WT_THOUSAND;
- return (0);
+ return;
}
- WT_RET_MSG(session, ret, "gettimeofday");
+ WT_PANIC_MSG(session, ret, "gettimeofday");
#else
NO TIME-OF-DAY IMPLEMENTATION: see src/os_posix/os_time.c
#endif
diff --git a/src/third_party/wiredtiger/src/os_win/os_time.c b/src/third_party/wiredtiger/src/os_win/os_time.c
index e784b5d8a36..6aa5b3719f6 100644
--- a/src/third_party/wiredtiger/src/os_win/os_time.c
+++ b/src/third_party/wiredtiger/src/os_win/os_time.c
@@ -12,11 +12,11 @@
* __wt_epoch --
* Return the time since the Epoch.
*/
-int
+void
__wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
{
- uint64_t ns100;
FILETIME time;
+ uint64_t ns100;
WT_UNUSED(session);
@@ -26,8 +26,6 @@ __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
- 116444736000000000LL;
tsp->tv_sec = ns100 / 10000000;
tsp->tv_nsec = (long)((ns100 % 10000000) * 100);
-
- return (0);
}
/*
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 9c38c535301..810f3fd976b 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -451,19 +451,18 @@ __wt_reconcile(WT_SESSION_IMPL *session,
}
/*
- * When application threads perform eviction, don't cache block manager
- * or reconciliation structures (even across calls), we can have a
- * significant number of application threads doing eviction at the same
- * time with large items. We ignore checkpoints, once the checkpoint
- * completes, all unnecessary session resources will be discarded.
+ * When threads perform eviction, don't cache block manager or
+ * reconciliation structures (even across calls), we can have a
+ * significant number of threads doing eviction at the same time with
+ * large items. We ignore checkpoints, once the checkpoint completes,
+ * all unnecessary session resources will be discarded.
*
- * Even in application threads doing checkpoints or in internal threads
- * doing any reconciliation, clean up reconciliation resources. Some
- * workloads have millions of boundary structures in a reconciliation
- * and we don't want to tie that memory down, even across calls.
+ * Even in application threads doing checkpoints, clean up
+ * reconciliation resources. Some workloads have millions of boundary
+ * structures in a reconciliation and we don't want to tie that memory
+ * down, even across calls.
*/
- if (WT_SESSION_IS_CHECKPOINT(session) ||
- F_ISSET(session, WT_SESSION_INTERNAL))
+ if (WT_SESSION_IS_CHECKPOINT(session))
__rec_bnd_cleanup(session, r, false);
else {
/*
@@ -564,10 +563,12 @@ __rec_write_status(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
* barrier after the change for clarity (the requirement is the
* flag be set before a subsequent checkpoint reads it, and
* as the current checkpoint is waiting on this reconciliation
- * to complete, there's no risk of that happening)
+ * to complete, there's no risk of that happening).
*/
- btree->modified = 1;
+ btree->modified = true;
WT_FULL_BARRIER();
+ if (!S2C(session)->modified)
+ S2C(session)->modified = true;
/*
* Eviction should only be here if following the save/restore
@@ -3335,7 +3336,7 @@ supd_check_complete:
__wt_verbose(session, WT_VERB_SPLIT,
"Reconciliation creating a page with %" PRIu32
" entries, memory footprint %" WT_SIZET_FMT
- ", page count %" PRIu32 ", %s, split state: %d\n",
+ ", page count %" PRIu32 ", %s, split state: %d",
r->entries, r->page->memory_footprint, r->bnd_next,
F_ISSET(r, WT_EVICTING) ? "evict" : "checkpoint",
r->bnd_state);
diff --git a/src/third_party/wiredtiger/src/schema/schema_stat.c b/src/third_party/wiredtiger/src/schema/schema_stat.c
index 1cd39d97364..345f9164e9b 100644
--- a/src/third_party/wiredtiger/src/schema/schema_stat.c
+++ b/src/third_party/wiredtiger/src/schema/schema_stat.c
@@ -137,7 +137,7 @@ __wt_curstat_table_init(WT_SESSION_IMPL *session,
* If only gathering table size statistics, try a fast path that
* avoids the schema and table list locks.
*/
- if (F_ISSET(cst, WT_CONN_STAT_SIZE)) {
+ if (F_ISSET(cst, WT_STAT_TYPE_SIZE)) {
WT_RET(__curstat_size_only(session, uri, &was_fast, cst));
if (was_fast)
return (0);
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index 0d3fcad3184..f594450db74 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -68,9 +68,10 @@ __wt_session_copy_values(WT_SESSION_IMPL *session)
* unless the cursor is reading from a checkpoint.
*/
WT_TXN_STATE *txn_state = WT_SESSION_TXN_STATE(session);
- WT_ASSERT(session, txn_state->snap_min != WT_TXN_NONE ||
- (WT_PREFIX_MATCH(cursor->uri, "file:") &&
- F_ISSET((WT_CURSOR_BTREE *)cursor, WT_CBT_NO_TXN)));
+ WT_ASSERT(session,
+ txn_state->pinned_id != WT_TXN_NONE ||
+ (WT_PREFIX_MATCH(cursor->uri, "file:") &&
+ F_ISSET((WT_CURSOR_BTREE *)cursor, WT_CBT_NO_TXN)));
#endif
F_CLR(cursor, WT_CURSTD_VALUE_INT);
@@ -1417,10 +1418,10 @@ __session_transaction_pinned_range(WT_SESSION *wt_session, uint64_t *prange)
/* Assign pinned to the lesser of id or snap_min */
if (txn_state->id != WT_TXN_NONE &&
- WT_TXNID_LT(txn_state->id, txn_state->snap_min))
+ WT_TXNID_LT(txn_state->id, txn_state->pinned_id))
pinned = txn_state->id;
else
- pinned = txn_state->snap_min;
+ pinned = txn_state->pinned_id;
if (pinned == WT_TXN_NONE)
*prange = 0;
@@ -1494,14 +1495,14 @@ __session_transaction_sync(WT_SESSION *wt_session, const char *config)
if (timeout_ms == 0)
WT_ERR(ETIMEDOUT);
- WT_ERR(__wt_epoch(session, &start));
+ __wt_epoch(session, &start);
/*
* Keep checking the LSNs until we find it is stable or we reach
* our timeout.
*/
while (__wt_log_cmp(&session->bg_sync_lsn, &log->sync_lsn) > 0) {
__wt_cond_signal(session, conn->log_file_cond);
- WT_ERR(__wt_epoch(session, &now));
+ __wt_epoch(session, &now);
waited_ms = WT_TIMEDIFF_MS(now, start);
if (forever || waited_ms < timeout_ms)
/*
@@ -1756,11 +1757,13 @@ __open_session(WT_CONNECTION_IMPL *conn,
if (i >= conn->session_cnt) /* Defend against off-by-one errors. */
conn->session_cnt = i + 1;
- session_ret->id = i;
session_ret->iface =
F_ISSET(conn, WT_CONN_READONLY) ? stds_readonly : stds;
session_ret->iface.connection = &conn->iface;
+ session_ret->name = NULL;
+ session_ret->id = i;
+
WT_ERR(__wt_cond_alloc(session, "session", false, &session_ret->cond));
if (WT_SESSION_FIRST_USE(session_ret))
@@ -1776,10 +1779,10 @@ __open_session(WT_CONNECTION_IMPL *conn,
* Allocate the table hash array as well.
*/
if (session_ret->dhhash == NULL)
- WT_ERR(__wt_calloc(session_ret, WT_HASH_ARRAY_SIZE,
+ WT_ERR(__wt_calloc(session, WT_HASH_ARRAY_SIZE,
sizeof(struct __dhandles_hash), &session_ret->dhhash));
if (session_ret->tablehash == NULL)
- WT_ERR(__wt_calloc(session_ret, WT_HASH_ARRAY_SIZE,
+ WT_ERR(__wt_calloc(session, WT_HASH_ARRAY_SIZE,
sizeof(struct __tables_hash), &session_ret->tablehash));
for (i = 0; i < WT_HASH_ARRAY_SIZE; i++) {
TAILQ_INIT(&session_ret->dhhash[i]);
@@ -1788,7 +1791,7 @@ __open_session(WT_CONNECTION_IMPL *conn,
/* Initialize transaction support: default to read-committed. */
session_ret->isolation = WT_ISO_READ_COMMITTED;
- WT_ERR(__wt_txn_init(session_ret));
+ WT_ERR(__wt_txn_init(session, session_ret));
/*
* The session's hazard pointer memory isn't discarded during normal
@@ -1807,6 +1810,9 @@ __open_session(WT_CONNECTION_IMPL *conn,
*/
session_ret->hazard_size = 0;
+ /* Cache the offset of this session's statistics bucket. */
+ session_ret->stat_bucket = WT_STATS_SLOT_ID(session);
+
/*
* Configuration: currently, the configuration for open_session is the
* same as session.reconfigure, so use that function.
@@ -1815,8 +1821,6 @@ __open_session(WT_CONNECTION_IMPL *conn,
WT_ERR(
__session_reconfigure((WT_SESSION *)session_ret, config));
- session_ret->name = NULL;
-
/*
* Publish: make the entry visible to server threads. There must be a
* barrier for two reasons, to ensure structure fields are set before
diff --git a/src/third_party/wiredtiger/src/session/session_compact.c b/src/third_party/wiredtiger/src/session/session_compact.c
index f03d5d34bac..66635007723 100644
--- a/src/third_party/wiredtiger/src/session/session_compact.c
+++ b/src/third_party/wiredtiger/src/session/session_compact.c
@@ -179,17 +179,16 @@ __compact_handle_append(WT_SESSION_IMPL *session, const char *cfg[])
* Check if the timeout has been exceeded.
*/
static int
-__session_compact_check_timeout(
- WT_SESSION_IMPL *session, struct timespec begin)
+__session_compact_check_timeout(WT_SESSION_IMPL *session, struct timespec begin)
{
struct timespec end;
if (session->compact->max_time == 0)
return (0);
- WT_RET(__wt_epoch(session, &end));
+ __wt_epoch(session, &end);
if (session->compact->max_time < WT_TIMEDIFF_SEC(end, begin))
- WT_RET(ETIMEDOUT);
+ return (ETIMEDOUT);
return (0);
}
@@ -219,7 +218,7 @@ __compact_file(WT_SESSION_IMPL *session, const char *cfg[])
session, t, "target=(\"%s\"),force=1", dhandle->name));
checkpoint_cfg[1] = t->data;
- WT_ERR(__wt_epoch(session, &start_time));
+ __wt_epoch(session, &start_time);
/*
* We compact 10% of the file on each pass (but the overall size of the
diff --git a/src/third_party/wiredtiger/src/session/session_dhandle.c b/src/third_party/wiredtiger/src/session/session_dhandle.c
index e76407567bc..725854c6001 100644
--- a/src/third_party/wiredtiger/src/session/session_dhandle.c
+++ b/src/third_party/wiredtiger/src/session/session_dhandle.c
@@ -8,8 +8,6 @@
#include "wt_internal.h"
-static int __session_dhandle_sweep(WT_SESSION_IMPL *);
-
/*
* __session_add_dhandle --
* Add a handle to the session's cache.
@@ -371,7 +369,7 @@ __wt_session_close_cache(WT_SESSION_IMPL *session)
* __session_dhandle_sweep --
* Discard any session dhandles that are not open.
*/
-static int
+static void
__session_dhandle_sweep(WT_SESSION_IMPL *session)
{
WT_CONNECTION_IMPL *conn;
@@ -385,9 +383,9 @@ __session_dhandle_sweep(WT_SESSION_IMPL *session)
* Periodically sweep for dead handles; if we've swept recently, don't
* do it again.
*/
- WT_RET(__wt_seconds(session, &now));
+ __wt_seconds(session, &now);
if (difftime(now, session->last_sweep) < conn->sweep_interval)
- return (0);
+ return;
session->last_sweep = now;
WT_STAT_CONN_INCR(session, dh_session_sweeps);
@@ -408,7 +406,6 @@ __session_dhandle_sweep(WT_SESSION_IMPL *session)
}
dhandle_cache = dhandle_cache_next;
}
- return (0);
}
/*
@@ -446,7 +443,7 @@ __session_get_dhandle(
}
/* Sweep the handle list to remove any dead handles. */
- WT_RET(__session_dhandle_sweep(session));
+ __session_dhandle_sweep(session);
/*
* We didn't find a match in the session cache, search the shared
diff --git a/src/third_party/wiredtiger/src/support/err.c b/src/third_party/wiredtiger/src/support/err.c
index 8bfac250b3a..3ecbab1cbe9 100644
--- a/src/third_party/wiredtiger/src/support/err.c
+++ b/src/third_party/wiredtiger/src/support/err.c
@@ -162,7 +162,6 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error,
WT_SESSION *wt_session;
struct timespec ts;
size_t len, remain, wlen;
- int prefix_cnt;
const char *err, *prefix;
char *end, *p, tid[128];
@@ -211,44 +210,32 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error,
* name, and the session's name. Write them as a comma-separate list,
* followed by a colon.
*/
- prefix_cnt = 0;
- if (__wt_epoch(session, &ts) == 0) {
- __wt_thread_id(tid, sizeof(tid));
- remain = WT_PTRDIFF(end, p);
- wlen = (size_t)snprintf(p, remain,
- "[%" PRIuMAX ":%" PRIuMAX "][%s]",
- (uintmax_t)ts.tv_sec,
- (uintmax_t)ts.tv_nsec / WT_THOUSAND, tid);
- p = wlen >= remain ? end : p + wlen;
- prefix_cnt = 1;
- }
+ __wt_epoch(session, &ts);
+ __wt_thread_id(tid, sizeof(tid));
+ remain = WT_PTRDIFF(end, p);
+ wlen = (size_t)snprintf(p, remain, "[%" PRIuMAX ":%" PRIuMAX "][%s]",
+ (uintmax_t)ts.tv_sec, (uintmax_t)ts.tv_nsec / WT_THOUSAND, tid);
+ p = wlen >= remain ? end : p + wlen;
+
if ((prefix = S2C(session)->error_prefix) != NULL) {
remain = WT_PTRDIFF(end, p);
- wlen = (size_t)snprintf(p, remain,
- "%s%s", prefix_cnt == 0 ? "" : ", ", prefix);
+ wlen = (size_t)snprintf(p, remain, ", %s", prefix);
p = wlen >= remain ? end : p + wlen;
- prefix_cnt = 1;
}
prefix = session->dhandle == NULL ? NULL : session->dhandle->name;
if (prefix != NULL) {
remain = WT_PTRDIFF(end, p);
- wlen = (size_t)snprintf(p, remain,
- "%s%s", prefix_cnt == 0 ? "" : ", ", prefix);
+ wlen = (size_t)snprintf(p, remain, ", %s", prefix);
p = wlen >= remain ? end : p + wlen;
- prefix_cnt = 1;
}
if ((prefix = session->name) != NULL) {
remain = WT_PTRDIFF(end, p);
- wlen = (size_t)snprintf(p, remain,
- "%s%s", prefix_cnt == 0 ? "" : ", ", prefix);
- p = wlen >= remain ? end : p + wlen;
- prefix_cnt = 1;
- }
- if (prefix_cnt != 0) {
- remain = WT_PTRDIFF(end, p);
- wlen = (size_t)snprintf(p, remain, ": ");
+ wlen = (size_t)snprintf(p, remain, ", %s", prefix);
p = wlen >= remain ? end : p + wlen;
}
+ remain = WT_PTRDIFF(end, p);
+ wlen = (size_t)snprintf(p, remain, ": ");
+ p = wlen >= remain ? end : p + wlen;
if (file_name != NULL) {
remain = WT_PTRDIFF(end, p);
diff --git a/src/third_party/wiredtiger/src/support/rand.c b/src/third_party/wiredtiger/src/support/rand.c
index d2e4cd27aab..025b18e4ed3 100644
--- a/src/third_party/wiredtiger/src/support/rand.c
+++ b/src/third_party/wiredtiger/src/support/rand.c
@@ -66,20 +66,18 @@ __wt_random_init(WT_RAND_STATE volatile * rnd_state)
* threads and we want each thread to initialize its own random state based
* on a different random seed.
*/
-int
+void
__wt_random_init_seed(
WT_SESSION_IMPL *session, WT_RAND_STATE volatile * rnd_state)
{
struct timespec ts;
WT_RAND_STATE rnd;
- WT_RET(__wt_epoch(session, &ts));
+ __wt_epoch(session, &ts);
M_W(rnd) = (uint32_t)(ts.tv_nsec + 521288629);
M_Z(rnd) = (uint32_t)(ts.tv_nsec + 362436069);
*rnd_state = rnd;
-
- return (0);
}
/*
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 7150223e6cb..6e8e218a0db 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -65,6 +65,24 @@ static const char * const __stats_dsrc_desc[] = {
"cache: pages written from cache",
"cache: pages written requiring in-memory restoration",
"cache: unmodified pages evicted",
+ "cache_walk: Average difference between current eviction generation when the page was last considered",
+ "cache_walk: Average on-disk page image size seen",
+ "cache_walk: Clean pages currently in cache",
+ "cache_walk: Current eviction generation",
+ "cache_walk: Dirty pages currently in cache",
+ "cache_walk: Entries in the root page",
+ "cache_walk: Internal pages currently in cache",
+ "cache_walk: Leaf pages currently in cache",
+ "cache_walk: Maximum difference between current eviction generation when the page was last considered",
+ "cache_walk: Maximum page size seen",
+ "cache_walk: Minimum on-disk page image size seen",
+ "cache_walk: On-disk page image sizes smaller than a single allocation unit",
+ "cache_walk: Pages created in memory and never written",
+ "cache_walk: Pages currently queued for eviction",
+ "cache_walk: Pages that could not be queued for eviction",
+ "cache_walk: Refs skipped during cache traversal",
+ "cache_walk: Size of the root page",
+ "cache_walk: Total number of pages currently in cache",
"compression: compressed pages read",
"compression: compressed pages written",
"compression: page written failed to compress",
@@ -196,6 +214,24 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->cache_write = 0;
stats->cache_write_restore = 0;
stats->cache_eviction_clean = 0;
+ /* not clearing cache_state_gen_avg_gap */
+ /* not clearing cache_state_avg_written_size */
+ /* not clearing cache_state_pages_clean */
+ /* not clearing cache_state_gen_current */
+ /* not clearing cache_state_pages_dirty */
+ /* not clearing cache_state_root_entries */
+ /* not clearing cache_state_pages_internal */
+ /* not clearing cache_state_pages_leaf */
+ /* not clearing cache_state_gen_max_gap */
+ /* not clearing cache_state_max_pagesize */
+ /* not clearing cache_state_min_written_size */
+ /* not clearing cache_state_smaller_alloc_size */
+ /* not clearing cache_state_memory */
+ /* not clearing cache_state_queued */
+ /* not clearing cache_state_not_queueable */
+ /* not clearing cache_state_refs_skipped */
+ /* not clearing cache_state_root_size */
+ /* not clearing cache_state_pages */
stats->compress_read = 0;
stats->compress_write = 0;
stats->compress_write_fail = 0;
@@ -325,6 +361,27 @@ __wt_stat_dsrc_aggregate_single(
to->cache_write += from->cache_write;
to->cache_write_restore += from->cache_write_restore;
to->cache_eviction_clean += from->cache_eviction_clean;
+ to->cache_state_gen_avg_gap += from->cache_state_gen_avg_gap;
+ to->cache_state_avg_written_size +=
+ from->cache_state_avg_written_size;
+ to->cache_state_pages_clean += from->cache_state_pages_clean;
+ to->cache_state_gen_current += from->cache_state_gen_current;
+ to->cache_state_pages_dirty += from->cache_state_pages_dirty;
+ to->cache_state_root_entries += from->cache_state_root_entries;
+ to->cache_state_pages_internal += from->cache_state_pages_internal;
+ to->cache_state_pages_leaf += from->cache_state_pages_leaf;
+ to->cache_state_gen_max_gap += from->cache_state_gen_max_gap;
+ to->cache_state_max_pagesize += from->cache_state_max_pagesize;
+ to->cache_state_min_written_size +=
+ from->cache_state_min_written_size;
+ to->cache_state_smaller_alloc_size +=
+ from->cache_state_smaller_alloc_size;
+ to->cache_state_memory += from->cache_state_memory;
+ to->cache_state_queued += from->cache_state_queued;
+ to->cache_state_not_queueable += from->cache_state_not_queueable;
+ to->cache_state_refs_skipped += from->cache_state_refs_skipped;
+ to->cache_state_root_size += from->cache_state_root_size;
+ to->cache_state_pages += from->cache_state_pages;
to->compress_read += from->compress_read;
to->compress_write += from->compress_write;
to->compress_write_fail += from->compress_write_fail;
@@ -467,6 +524,39 @@ __wt_stat_dsrc_aggregate(
to->cache_write += WT_STAT_READ(from, cache_write);
to->cache_write_restore += WT_STAT_READ(from, cache_write_restore);
to->cache_eviction_clean += WT_STAT_READ(from, cache_eviction_clean);
+ to->cache_state_gen_avg_gap +=
+ WT_STAT_READ(from, cache_state_gen_avg_gap);
+ to->cache_state_avg_written_size +=
+ WT_STAT_READ(from, cache_state_avg_written_size);
+ to->cache_state_pages_clean +=
+ WT_STAT_READ(from, cache_state_pages_clean);
+ to->cache_state_gen_current +=
+ WT_STAT_READ(from, cache_state_gen_current);
+ to->cache_state_pages_dirty +=
+ WT_STAT_READ(from, cache_state_pages_dirty);
+ to->cache_state_root_entries +=
+ WT_STAT_READ(from, cache_state_root_entries);
+ to->cache_state_pages_internal +=
+ WT_STAT_READ(from, cache_state_pages_internal);
+ to->cache_state_pages_leaf +=
+ WT_STAT_READ(from, cache_state_pages_leaf);
+ to->cache_state_gen_max_gap +=
+ WT_STAT_READ(from, cache_state_gen_max_gap);
+ to->cache_state_max_pagesize +=
+ WT_STAT_READ(from, cache_state_max_pagesize);
+ to->cache_state_min_written_size +=
+ WT_STAT_READ(from, cache_state_min_written_size);
+ to->cache_state_smaller_alloc_size +=
+ WT_STAT_READ(from, cache_state_smaller_alloc_size);
+ to->cache_state_memory += WT_STAT_READ(from, cache_state_memory);
+ to->cache_state_queued += WT_STAT_READ(from, cache_state_queued);
+ to->cache_state_not_queueable +=
+ WT_STAT_READ(from, cache_state_not_queueable);
+ to->cache_state_refs_skipped +=
+ WT_STAT_READ(from, cache_state_refs_skipped);
+ to->cache_state_root_size +=
+ WT_STAT_READ(from, cache_state_root_size);
+ to->cache_state_pages += WT_STAT_READ(from, cache_state_pages);
to->compress_read += WT_STAT_READ(from, compress_read);
to->compress_write += WT_STAT_READ(from, compress_write);
to->compress_write_fail += WT_STAT_READ(from, compress_write_fail);
@@ -549,6 +639,10 @@ static const char * const __stats_connection_desc[] = {
"block-manager: bytes written for checkpoint",
"block-manager: mapped blocks read",
"block-manager: mapped bytes read",
+ "cache: application threads page read from disk to cache count",
+ "cache: application threads page read from disk to cache time (usecs)",
+ "cache: application threads page write from cache to disk count",
+ "cache: application threads page write from cache to disk time (usecs)",
"cache: bytes belonging to page images in the cache",
"cache: bytes currently in the cache",
"cache: bytes not belonging to page images in the cache",
@@ -642,6 +736,21 @@ static const char * const __stats_connection_desc[] = {
"data-handle: connection sweeps",
"data-handle: session dhandles swept",
"data-handle: session sweep attempts",
+ "lock: checkpoint lock acquisitions",
+ "lock: checkpoint lock application thread wait time (usecs)",
+ "lock: checkpoint lock internal thread wait time (usecs)",
+ "lock: handle-list lock acquisitions",
+ "lock: handle-list lock application thread wait time (usecs)",
+ "lock: handle-list lock internal thread wait time (usecs)",
+ "lock: metadata lock acquisitions",
+ "lock: metadata lock application thread wait time (usecs)",
+ "lock: metadata lock internal thread wait time (usecs)",
+ "lock: schema lock acquisitions",
+ "lock: schema lock application thread wait time (usecs)",
+ "lock: schema lock internal thread wait time (usecs)",
+ "lock: table lock acquisitions",
+ "lock: table lock application thread time waiting for the table lock (usecs)",
+ "lock: table lock internal thread time waiting for the table lock (usecs)",
"log: busy returns attempting to switch slots",
"log: consolidated slot closures",
"log: consolidated slot join races",
@@ -706,6 +815,8 @@ static const char * const __stats_connection_desc[] = {
"thread-state: active filesystem fsync calls",
"thread-state: active filesystem read calls",
"thread-state: active filesystem write calls",
+ "thread-yield: application thread time evicting (usecs)",
+ "thread-yield: application thread time waiting for cache (usecs)",
"thread-yield: page acquire busy blocked",
"thread-yield: page acquire eviction blocked",
"thread-yield: page acquire locked blocked",
@@ -723,6 +834,7 @@ static const char * const __stats_connection_desc[] = {
"transaction: transaction checkpoint scrub time (msecs)",
"transaction: transaction checkpoint total time (msecs)",
"transaction: transaction checkpoints",
+ "transaction: transaction checkpoints skipped because database was clean",
"transaction: transaction failures due to cache overflow",
"transaction: transaction fsync calls for checkpoint after allocating the transaction ID",
"transaction: transaction fsync duration for checkpoint after allocating the transaction ID (usecs)",
@@ -793,6 +905,10 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->block_byte_write_checkpoint = 0;
stats->block_map_read = 0;
stats->block_byte_map_read = 0;
+ stats->cache_read_app_count = 0;
+ stats->cache_read_app_time = 0;
+ stats->cache_write_app_count = 0;
+ stats->cache_write_app_time = 0;
/* not clearing cache_bytes_image */
/* not clearing cache_bytes_inuse */
/* not clearing cache_bytes_other */
@@ -886,6 +1002,21 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->dh_sweeps = 0;
stats->dh_session_handles = 0;
stats->dh_session_sweeps = 0;
+ stats->lock_checkpoint_count = 0;
+ stats->lock_checkpoint_wait_application = 0;
+ stats->lock_checkpoint_wait_internal = 0;
+ stats->lock_handle_list_count = 0;
+ stats->lock_handle_list_wait_application = 0;
+ stats->lock_handle_list_wait_internal = 0;
+ stats->lock_metadata_count = 0;
+ stats->lock_metadata_wait_application = 0;
+ stats->lock_metadata_wait_internal = 0;
+ stats->lock_schema_count = 0;
+ stats->lock_schema_wait_application = 0;
+ stats->lock_schema_wait_internal = 0;
+ stats->lock_table_count = 0;
+ stats->lock_table_wait_application = 0;
+ stats->lock_table_wait_internal = 0;
stats->log_slot_switch_busy = 0;
stats->log_slot_closes = 0;
stats->log_slot_races = 0;
@@ -950,6 +1081,8 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
/* not clearing thread_fsync_active */
/* not clearing thread_read_active */
/* not clearing thread_write_active */
+ stats->application_evict_time = 0;
+ stats->application_cache_time = 0;
stats->page_busy_blocked = 0;
stats->page_forcible_evict_blocked = 0;
stats->page_locked_blocked = 0;
@@ -967,6 +1100,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
/* not clearing txn_checkpoint_scrub_time */
/* not clearing txn_checkpoint_time_total */
stats->txn_checkpoint = 0;
+ stats->txn_checkpoint_skipped = 0;
stats->txn_fail_cache = 0;
stats->txn_checkpoint_fsync_post = 0;
/* not clearing txn_checkpoint_fsync_post_duration */
@@ -1030,6 +1164,11 @@ __wt_stat_connection_aggregate(
WT_STAT_READ(from, block_byte_write_checkpoint);
to->block_map_read += WT_STAT_READ(from, block_map_read);
to->block_byte_map_read += WT_STAT_READ(from, block_byte_map_read);
+ to->cache_read_app_count += WT_STAT_READ(from, cache_read_app_count);
+ to->cache_read_app_time += WT_STAT_READ(from, cache_read_app_time);
+ to->cache_write_app_count +=
+ WT_STAT_READ(from, cache_write_app_count);
+ to->cache_write_app_time += WT_STAT_READ(from, cache_write_app_time);
to->cache_bytes_image += WT_STAT_READ(from, cache_bytes_image);
to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse);
to->cache_bytes_other += WT_STAT_READ(from, cache_bytes_other);
@@ -1156,6 +1295,33 @@ __wt_stat_connection_aggregate(
to->dh_sweeps += WT_STAT_READ(from, dh_sweeps);
to->dh_session_handles += WT_STAT_READ(from, dh_session_handles);
to->dh_session_sweeps += WT_STAT_READ(from, dh_session_sweeps);
+ to->lock_checkpoint_count +=
+ WT_STAT_READ(from, lock_checkpoint_count);
+ to->lock_checkpoint_wait_application +=
+ WT_STAT_READ(from, lock_checkpoint_wait_application);
+ to->lock_checkpoint_wait_internal +=
+ WT_STAT_READ(from, lock_checkpoint_wait_internal);
+ to->lock_handle_list_count +=
+ WT_STAT_READ(from, lock_handle_list_count);
+ to->lock_handle_list_wait_application +=
+ WT_STAT_READ(from, lock_handle_list_wait_application);
+ to->lock_handle_list_wait_internal +=
+ WT_STAT_READ(from, lock_handle_list_wait_internal);
+ to->lock_metadata_count += WT_STAT_READ(from, lock_metadata_count);
+ to->lock_metadata_wait_application +=
+ WT_STAT_READ(from, lock_metadata_wait_application);
+ to->lock_metadata_wait_internal +=
+ WT_STAT_READ(from, lock_metadata_wait_internal);
+ to->lock_schema_count += WT_STAT_READ(from, lock_schema_count);
+ to->lock_schema_wait_application +=
+ WT_STAT_READ(from, lock_schema_wait_application);
+ to->lock_schema_wait_internal +=
+ WT_STAT_READ(from, lock_schema_wait_internal);
+ to->lock_table_count += WT_STAT_READ(from, lock_table_count);
+ to->lock_table_wait_application +=
+ WT_STAT_READ(from, lock_table_wait_application);
+ to->lock_table_wait_internal +=
+ WT_STAT_READ(from, lock_table_wait_internal);
to->log_slot_switch_busy += WT_STAT_READ(from, log_slot_switch_busy);
to->log_slot_closes += WT_STAT_READ(from, log_slot_closes);
to->log_slot_races += WT_STAT_READ(from, log_slot_races);
@@ -1242,6 +1408,10 @@ __wt_stat_connection_aggregate(
to->thread_fsync_active += WT_STAT_READ(from, thread_fsync_active);
to->thread_read_active += WT_STAT_READ(from, thread_read_active);
to->thread_write_active += WT_STAT_READ(from, thread_write_active);
+ to->application_evict_time +=
+ WT_STAT_READ(from, application_evict_time);
+ to->application_cache_time +=
+ WT_STAT_READ(from, application_cache_time);
to->page_busy_blocked += WT_STAT_READ(from, page_busy_blocked);
to->page_forcible_evict_blocked +=
WT_STAT_READ(from, page_forcible_evict_blocked);
@@ -1270,6 +1440,8 @@ __wt_stat_connection_aggregate(
to->txn_checkpoint_time_total +=
WT_STAT_READ(from, txn_checkpoint_time_total);
to->txn_checkpoint += WT_STAT_READ(from, txn_checkpoint);
+ to->txn_checkpoint_skipped +=
+ WT_STAT_READ(from, txn_checkpoint_skipped);
to->txn_fail_cache += WT_STAT_READ(from, txn_fail_cache);
to->txn_checkpoint_fsync_post +=
WT_STAT_READ(from, txn_checkpoint_fsync_post);
diff --git a/src/third_party/wiredtiger/src/support/thread_group.c b/src/third_party/wiredtiger/src/support/thread_group.c
index f5ddabad7d4..a866d2d01c5 100644
--- a/src/third_party/wiredtiger/src/support/thread_group.c
+++ b/src/third_party/wiredtiger/src/support/thread_group.c
@@ -60,7 +60,7 @@ __thread_group_grow(
while (group->current_threads < new_count) {
thread = group->threads[group->current_threads++];
__wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Starting utility thread: %p:%"PRIu32"\n",
+ "Starting utility thread: %p:%" PRIu32,
(void *)group, thread->id);
F_SET(thread, WT_THREAD_RUN);
WT_ASSERT(session, thread->session != NULL);
@@ -100,7 +100,7 @@ __thread_group_shrink(WT_SESSION_IMPL *session,
/* Wake threads to ensure they notice the state change */
if (thread->tid != 0) {
__wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Stopping utility thread: %p:%"PRIu32"\n",
+ "Stopping utility thread: %p:%" PRIu32,
(void *)group, thread->id);
F_CLR(thread, WT_THREAD_RUN);
__wt_cond_signal(session, group->wait_cond);
@@ -224,7 +224,7 @@ __wt_thread_group_resize(
__wt_verbose(session, WT_VERB_THREAD_GROUP,
"Resize thread group: %p, from min: %" PRIu32 " -> %" PRIu32
- " from max: %" PRIu32 " -> %" PRIu32 "\n",
+ " from max: %" PRIu32 " -> %" PRIu32,
(void *)group, group->min, new_min, group->max, new_max);
__wt_writelock(session, group->lock);
@@ -253,7 +253,7 @@ __wt_thread_group_create(
cond_alloced = false;
__wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Creating thread group: %p\n", (void *)group);
+ "Creating thread group: %p", (void *)group);
WT_RET(__wt_rwlock_alloc(session, &group->lock, "Thread group"));
WT_ERR(__wt_cond_alloc(
@@ -286,7 +286,7 @@ __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group)
WT_DECL_RET;
__wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Destroying thread group: %p\n", (void *)group);
+ "Destroying thread group: %p", (void *)group);
WT_ASSERT(session, __wt_rwlock_islocked(session, group->lock));
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 01e0fbbb634..d60ea73c660 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -96,11 +96,11 @@ __wt_txn_release_snapshot(WT_SESSION_IMPL *session)
txn_state = WT_SESSION_TXN_STATE(session);
WT_ASSERT(session,
- txn_state->snap_min == WT_TXN_NONE ||
+ txn_state->pinned_id == WT_TXN_NONE ||
session->txn.isolation == WT_ISO_READ_UNCOMMITTED ||
- !__wt_txn_visible_all(session, txn_state->snap_min));
+ !__wt_txn_visible_all(session, txn_state->pinned_id));
- txn_state->snap_min = WT_TXN_NONE;
+ txn_state->pinned_id = WT_TXN_NONE;
F_CLR(txn, WT_TXN_HAS_SNAPSHOT);
}
@@ -117,7 +117,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
WT_TXN_GLOBAL *txn_global;
WT_TXN_STATE *s, *txn_state;
uint64_t current_id, id;
- uint64_t prev_oldest_id, snap_min;
+ uint64_t prev_oldest_id, pinned_id;
uint32_t i, n, session_cnt;
conn = S2C(session);
@@ -135,21 +135,21 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
WT_PAUSE();
WT_RET(ret);
- current_id = snap_min = txn_global->current;
+ current_id = pinned_id = txn_global->current;
prev_oldest_id = txn_global->oldest_id;
/*
* Include the checkpoint transaction, if one is running: we should
* ignore any uncommitted changes the checkpoint has written to the
* metadata. We don't have to keep the checkpoint's changes pinned so
- * don't including it in the published snap_min.
+ * don't including it in the published pinned ID.
*/
if ((id = txn_global->checkpoint_txnid) != WT_TXN_NONE)
txn->snapshot[n++] = id;
/* For pure read-only workloads, avoid scanning. */
if (prev_oldest_id == current_id) {
- txn_state->snap_min = current_id;
+ txn_state->pinned_id = current_id;
/* Check that the oldest ID has not moved in the meantime. */
WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
goto done;
@@ -172,18 +172,18 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
(id = s->id) != WT_TXN_NONE &&
WT_TXNID_LE(prev_oldest_id, id)) {
txn->snapshot[n++] = id;
- if (WT_TXNID_LT(id, snap_min))
- snap_min = id;
+ if (WT_TXNID_LT(id, pinned_id))
+ pinned_id = id;
}
}
/*
- * If we got a new snapshot, update the published snap_min for this
+ * If we got a new snapshot, update the published pinned ID for this
* session.
*/
- WT_ASSERT(session, WT_TXNID_LE(prev_oldest_id, snap_min));
+ WT_ASSERT(session, WT_TXNID_LE(prev_oldest_id, pinned_id));
WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
- txn_state->snap_min = snap_min;
+ txn_state->pinned_id = pinned_id;
done: __wt_readunlock(session, txn_global->scan_rwlock);
__txn_sort_snapshot(session, n, current_id);
@@ -232,13 +232,13 @@ __txn_oldest_scan(WT_SESSION_IMPL *session,
/*
* !!!
- * Note: Don't ignore snap_min values older than the previous
- * oldest ID. Read-uncommitted operations publish snap_min
+ * Note: Don't ignore pinned ID values older than the previous
+ * oldest ID. Read-uncommitted operations publish pinned ID
* values without acquiring the scan lock to protect the global
- * table. See the comment in __wt_txn_cursor_op for
- * more details.
+ * table. See the comment in __wt_txn_cursor_op for more
+ * details.
*/
- if ((id = s->snap_min) != WT_TXN_NONE &&
+ if ((id = s->pinned_id) != WT_TXN_NONE &&
WT_TXNID_LT(id, oldest_id)) {
oldest_id = id;
oldest_session = &conn->sessions[i];
@@ -360,7 +360,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags)
__wt_verbose(session, WT_VERB_TRANSACTION,
"old snapshot %" PRIu64
" pinned in session %" PRIu32 " [%s]"
- " with snap_min %" PRIu64 "\n",
+ " with snap_min %" PRIu64,
oldest_id, oldest_session->id,
oldest_session->lastop,
oldest_session->txn.snap_min);
@@ -659,21 +659,21 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[])
* Initialize a session's transaction data.
*/
int
-__wt_txn_init(WT_SESSION_IMPL *session)
+__wt_txn_init(WT_SESSION_IMPL *session, WT_SESSION_IMPL *session_ret)
{
WT_TXN *txn;
- txn = &session->txn;
+ txn = &session_ret->txn;
txn->id = WT_TXN_NONE;
WT_RET(__wt_calloc_def(session,
- S2C(session)->session_size, &txn->snapshot));
+ S2C(session_ret)->session_size, &txn->snapshot));
#ifdef HAVE_DIAGNOSTIC
- if (S2C(session)->txn_global.states != NULL) {
+ if (S2C(session_ret)->txn_global.states != NULL) {
WT_TXN_STATE *txn_state;
- txn_state = WT_SESSION_TXN_STATE(session);
- WT_ASSERT(session, txn_state->snap_min == WT_TXN_NONE);
+ txn_state = WT_SESSION_TXN_STATE(session_ret);
+ WT_ASSERT(session, txn_state->pinned_id == WT_TXN_NONE);
}
#endif
@@ -683,7 +683,7 @@ __wt_txn_init(WT_SESSION_IMPL *session)
*/
txn->mod = NULL;
- txn->isolation = session->isolation;
+ txn->isolation = session_ret->isolation;
return (0);
}
@@ -773,7 +773,7 @@ __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[])
WT_CACHE_LINE_ALIGNMENT_VERIFY(session, txn_global->states);
for (i = 0, s = txn_global->states; i < conn->session_size; i++, s++)
- s->id = s->snap_min = WT_TXN_NONE;
+ s->id = s->pinned_id = WT_TXN_NONE;
return (0);
}
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index 3aad95f5a9f..0557e6ce60c 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -314,7 +314,7 @@ __checkpoint_update_generation(WT_SESSION_IMPL *session)
* __checkpoint_reduce_dirty_cache --
* Release clean trees from the list cached for checkpoints.
*/
-static int
+static void
__checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
{
WT_CACHE *cache;
@@ -332,9 +332,9 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
/* Give up if scrubbing is disabled. */
if (cache->eviction_checkpoint_target == 0 ||
cache->eviction_checkpoint_target >= cache->eviction_dirty_trigger)
- return (0);
+ return;
- WT_RET(__wt_epoch(session, &start));
+ __wt_epoch(session, &start);
last = start;
bytes_written_last = 0;
bytes_written_start = cache->bytes_written;
@@ -345,7 +345,7 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
* cache via reconfigure. This avoids potential divide by zero.
*/
if (cache_size < 10 * WT_MEGABYTE)
- return (0);
+ return;
stepdown_us = 10000;
work_us = 0;
progress = false;
@@ -371,7 +371,7 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
break;
__wt_sleep(0, stepdown_us / 10);
- WT_RET(__wt_epoch(session, &stop));
+ __wt_epoch(session, &stop);
current_us = WT_TIMEDIFF_US(stop, last);
total_ms = WT_TIMEDIFF_MS(stop, start);
bytes_written_total =
@@ -427,14 +427,12 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
WT_MAX(cache->eviction_dirty_target, current_dirty - delta);
WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target,
cache->eviction_scrub_limit);
- WT_RET(__wt_epoch(session, &last));
+ __wt_epoch(session, &last);
}
- WT_RET(__wt_epoch(session, &stop));
+ __wt_epoch(session, &stop);
total_ms = WT_TIMEDIFF_MS(stop, start);
WT_STAT_CONN_SET(session, txn_checkpoint_scrub_time, total_ms);
-
- return (0);
}
/*
@@ -497,7 +495,7 @@ __checkpoint_stats(
* __checkpoint_verbose_track --
* Output a verbose message with timing information
*/
-static int
+static void
__checkpoint_verbose_track(WT_SESSION_IMPL *session,
const char *msg, struct timespec *start)
{
@@ -506,9 +504,9 @@ __checkpoint_verbose_track(WT_SESSION_IMPL *session,
uint64_t msec;
if (!WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT))
- return (0);
+ return;
- WT_RET(__wt_epoch(session, &stop));
+ __wt_epoch(session, &stop);
/*
* Get time diff in microseconds.
@@ -526,7 +524,6 @@ __checkpoint_verbose_track(WT_SESSION_IMPL *session,
WT_UNUSED(msg);
WT_UNUSED(start);
#endif
- return (0);
}
/*
@@ -576,7 +573,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
conn->cache->evict_max_page_size = 0;
/* Initialize the verbose tracking timer */
- WT_ERR(__wt_epoch(session, &verb_timer));
+ __wt_epoch(session, &verb_timer);
/*
* Update the global oldest ID so we do all possible cleanup.
@@ -594,18 +591,18 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* Try to reduce the amount of dirty data in cache so there is less
* work do during the critical section of the checkpoint.
*/
- WT_ERR(__checkpoint_reduce_dirty_cache(session));
+ __checkpoint_reduce_dirty_cache(session);
/* Tell logging that we are about to start a database checkpoint. */
if (full && logging)
WT_ERR(__wt_txn_checkpoint_log(
session, full, WT_TXN_LOG_CKPT_PREPARE, NULL));
- WT_ERR(__checkpoint_verbose_track(session,
- "starting transaction", &verb_timer));
+ __checkpoint_verbose_track(session,
+ "starting transaction", &verb_timer);
if (full)
- WT_ERR(__wt_epoch(session, &start));
+ __wt_epoch(session, &start);
/*
* Start the checkpoint for real.
@@ -666,6 +663,14 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__wt_txn_id_check(session));
/*
+ * Mark the connection as clean. If some data gets modified after
+ * generating checkpoint transaction id, connection will be reset to
+ * dirty when reconciliation marks the btree dirty on encountering the
+ * dirty page.
+ */
+ conn->modified = false;
+
+ /*
* Save the checkpoint session ID.
*
* We never do checkpoints in the default session (with id zero).
@@ -689,7 +694,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
*/
WT_ASSERT(session,
WT_TXNID_LE(txn_global->oldest_id, txn_state->id) &&
- WT_TXNID_LE(txn_global->oldest_id, txn_state->snap_min));
+ WT_TXNID_LE(txn_global->oldest_id, txn_state->pinned_id));
/*
* Clear our entry from the global transaction session table. Any
@@ -698,7 +703,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* can safely ignore the checkpoint ID (see the visible all check for
* details).
*/
- txn_state->id = txn_state->snap_min = WT_TXN_NONE;
+ txn_state->id = txn_state->pinned_id = WT_TXN_NONE;
__wt_writeunlock(session, txn_global->scan_rwlock);
/*
@@ -739,23 +744,22 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_presync));
__wt_evict_server_wake(session);
- WT_ERR(__checkpoint_verbose_track(session,
- "committing transaction", &verb_timer));
+ __checkpoint_verbose_track(session,
+ "committing transaction", &verb_timer);
/*
* Checkpoints have to hit disk (it would be reasonable to configure for
* lazy checkpoints, but we don't support them yet).
*/
- WT_ERR(__wt_epoch(session, &fsync_start));
+ __wt_epoch(session, &fsync_start);
WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync));
- WT_ERR(__wt_epoch(session, &fsync_stop));
+ __wt_epoch(session, &fsync_stop);
fsync_duration_usecs = WT_TIMEDIFF_US(fsync_stop, fsync_start);
WT_STAT_CONN_INCR(session, txn_checkpoint_fsync_post);
WT_STAT_CONN_SET(session,
txn_checkpoint_fsync_post_duration, fsync_duration_usecs);
- WT_ERR(__checkpoint_verbose_track(session,
- "sync completed", &verb_timer));
+ __checkpoint_verbose_track(session, "sync completed", &verb_timer);
/*
* Commit the transaction now that we are sure that all files in the
@@ -793,8 +797,8 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
ret = __wt_checkpoint_sync(session, NULL));
WT_ERR(ret);
- WT_ERR(__checkpoint_verbose_track(session,
- "metadata sync completed", &verb_timer));
+ __checkpoint_verbose_track(session,
+ "metadata sync completed", &verb_timer);
} else
WT_WITH_DHANDLE(session,
WT_SESSION_META_DHANDLE(session),
@@ -808,7 +812,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
txn_global->checkpoint_pinned = WT_TXN_NONE;
if (full) {
- WT_ERR(__wt_epoch(session, &stop));
+ __wt_epoch(session, &stop);
__checkpoint_stats(session, &start, &stop);
}
@@ -825,6 +829,9 @@ err: /*
* overwritten the checkpoint, so what ends up on disk is not
* consistent.
*/
+ if (ret != 0 && !conn->modified)
+ conn->modified = true;
+
session->isolation = txn->isolation = WT_ISO_READ_UNCOMMITTED;
if (tracking)
WT_TRET(__wt_meta_track_off(session, false, ret != 0));
@@ -1352,9 +1359,13 @@ __checkpoint_tree(
* out of sync with the set of dirty pages (modify is set, but there
* are no dirty pages), we perform a checkpoint without any writes, no
* checkpoint is created, and then things get bad.
+ * While marking the root page as dirty, we do not want to dirty the
+ * btree because we are marking the btree as clean just after this call.
+ * Also, marking the btree dirty at this stage will unnecessarily mark
+ * the connection as dirty causing checkpoint-skip code to fail.
*/
WT_ERR(__wt_page_modify_init(session, btree->root.page));
- __wt_page_modify_set(session, btree->root.page);
+ __wt_page_only_modify_set(session, btree->root.page);
/*
* Clear the tree's modified flag; any changes before we clear the flag
@@ -1366,7 +1377,7 @@ __checkpoint_tree(
* it sets the modified flag itself. Use a full barrier so we get the
* store done quickly, this isn't a performance path.
*/
- btree->modified = 0;
+ btree->modified = false;
WT_FULL_BARRIER();
/* Tell logging that a file checkpoint is starting. */
@@ -1440,8 +1451,11 @@ err: /*
* If the checkpoint didn't complete successfully, make sure the
* tree is marked dirty.
*/
- if (ret != 0 && !btree->modified && was_modified)
- btree->modified = 1;
+ if (ret != 0 && !btree->modified && was_modified) {
+ btree->modified = true;
+ if (!S2C(session)->modified)
+ S2C(session)->modified = true;
+ }
__wt_meta_ckptlist_free(session, ckptbase);
btree->ckpt = NULL;
diff --git a/src/third_party/wiredtiger/src/txn/txn_nsnap.c b/src/third_party/wiredtiger/src/txn/txn_nsnap.c
index 8f7e93238de..7ba0cc8700e 100644
--- a/src/third_party/wiredtiger/src/txn/txn_nsnap.c
+++ b/src/third_party/wiredtiger/src/txn/txn_nsnap.c
@@ -42,9 +42,16 @@ __nsnap_drop_one(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *name)
return (WT_NOTFOUND);
/* Bump the global ID if we are removing the first entry */
- if (found == TAILQ_FIRST(&txn_global->nsnaph))
+ if (found == TAILQ_FIRST(&txn_global->nsnaph)) {
+ WT_ASSERT(session, !__wt_txn_visible_all(
+ session, txn_global->nsnap_oldest_id));
txn_global->nsnap_oldest_id = (TAILQ_NEXT(found, q) != NULL) ?
- TAILQ_NEXT(found, q)->snap_min : WT_TXN_NONE;
+ TAILQ_NEXT(found, q)->pinned_id : WT_TXN_NONE;
+ WT_DIAGNOSTIC_YIELD;
+ WT_ASSERT(session, txn_global->nsnap_oldest_id == WT_TXN_NONE ||
+ !__wt_txn_visible_all(
+ session, txn_global->nsnap_oldest_id));
+ }
TAILQ_REMOVE(&txn_global->nsnaph, found, q);
__nsnap_destroy(session, found);
WT_STAT_CONN_INCR(session, txn_snapshots_dropped);
@@ -104,7 +111,7 @@ __nsnap_drop_to(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *name, bool inclusive)
}
if (TAILQ_NEXT(last, q) != NULL)
- new_nsnap_oldest = TAILQ_NEXT(last, q)->snap_min;
+ new_nsnap_oldest = TAILQ_NEXT(last, q)->pinned_id;
}
do {
@@ -117,7 +124,15 @@ __nsnap_drop_to(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *name, bool inclusive)
} while (nsnap != last && !TAILQ_EMPTY(&txn_global->nsnaph));
/* Now that the queue of named snapshots is updated, update the ID */
+ WT_ASSERT(session, !__wt_txn_visible_all(
+ session, txn_global->nsnap_oldest_id) &&
+ (new_nsnap_oldest == WT_TXN_NONE ||
+ WT_TXNID_LE(txn_global->nsnap_oldest_id, new_nsnap_oldest)));
txn_global->nsnap_oldest_id = new_nsnap_oldest;
+ WT_DIAGNOSTIC_YIELD;
+ WT_ASSERT(session,
+ new_nsnap_oldest == WT_TXN_NONE ||
+ !__wt_txn_visible_all(session, new_nsnap_oldest));
return (ret);
}
@@ -157,6 +172,7 @@ __wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__wt_calloc_one(session, &nsnap_new));
nsnap = nsnap_new;
WT_ERR(__wt_strndup(session, cval.str, cval.len, &nsnap->name));
+ nsnap->pinned_id = WT_SESSION_TXN_STATE(session)->pinned_id;
nsnap->snap_min = txn->snap_min;
nsnap->snap_max = txn->snap_max;
if (txn->snapshot_count > 0) {
@@ -175,15 +191,25 @@ __wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[])
*/
WT_ERR_NOTFOUND_OK(__nsnap_drop_one(session, &cval));
- if (TAILQ_EMPTY(&txn_global->nsnaph))
- txn_global->nsnap_oldest_id = nsnap_new->snap_min;
+ if (TAILQ_EMPTY(&txn_global->nsnaph)) {
+ WT_ASSERT(session, txn_global->nsnap_oldest_id == WT_TXN_NONE &&
+ !__wt_txn_visible_all(session, nsnap_new->pinned_id));
+ __wt_readlock(session, txn_global->scan_rwlock);
+ txn_global->nsnap_oldest_id = nsnap_new->pinned_id;
+ __wt_readunlock(session, txn_global->scan_rwlock);
+ }
TAILQ_INSERT_TAIL(&txn_global->nsnaph, nsnap_new, q);
WT_STAT_CONN_INCR(session, txn_snapshots_created);
nsnap_new = NULL;
-err: if (started_txn)
+err: if (started_txn) {
+#ifdef HAVE_DIAGNOSTIC
+ uint64_t pinned_id = WT_SESSION_TXN_STATE(session)->pinned_id;
+#endif
WT_TRET(__wt_txn_rollback(session, NULL));
- else if (ret == 0)
+ WT_DIAGNOSTIC_YIELD;
+ WT_ASSERT(session, !__wt_txn_visible_all(session, pinned_id));
+ } else if (ret == 0)
F_SET(txn, WT_TXN_NAMED_SNAPSHOT);
if (nsnap_new != NULL)
@@ -258,7 +284,20 @@ __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *nameval)
__wt_readlock(session, txn_global->nsnap_rwlock);
TAILQ_FOREACH(nsnap, &txn_global->nsnaph, q)
if (WT_STRING_MATCH(nsnap->name, nameval->str, nameval->len)) {
- txn->snap_min = txn_state->snap_min = nsnap->snap_min;
+ /*
+ * Acquire the scan lock so the oldest ID can't move
+ * forward without seeing our pinned ID.
+ */
+ __wt_readlock(session, txn_global->scan_rwlock);
+ txn_state->pinned_id = nsnap->pinned_id;
+ __wt_readunlock(session, txn_global->scan_rwlock);
+
+ WT_ASSERT(session, !__wt_txn_visible_all(
+ session, txn_state->pinned_id) &&
+ txn_global->nsnap_oldest_id != WT_TXN_NONE &&
+ WT_TXNID_LE(txn_global->nsnap_oldest_id,
+ txn_state->pinned_id));
+ txn->snap_min = nsnap->snap_min;
txn->snap_max = nsnap->snap_max;
if ((txn->snapshot_count = nsnap->snapshot_count) != 0)
memcpy(txn->snapshot, nsnap->snapshot,
diff --git a/src/third_party/wiredtiger/test/bloom/test_bloom.c b/src/third_party/wiredtiger/test/bloom/test_bloom.c
index 7a298f000aa..67249ff887e 100644
--- a/src/third_party/wiredtiger/test/bloom/test_bloom.c
+++ b/src/third_party/wiredtiger/test/bloom/test_bloom.c
@@ -56,8 +56,6 @@ void usage(void)
extern char *__wt_optarg;
extern int __wt_optind;
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/src/third_party/wiredtiger/test/checkpoint/smoke.sh b/src/third_party/wiredtiger/test/checkpoint/smoke.sh
index 2f1d4345ad7..123d4e00df5 100755
--- a/src/third_party/wiredtiger/test/checkpoint/smoke.sh
+++ b/src/third_party/wiredtiger/test/checkpoint/smoke.sh
@@ -6,6 +6,9 @@ set -e
echo "checkpoint: 3 mixed tables"
$TEST_WRAPPER ./t -T 3 -t m
+# We are done if short tests are requested
+test -z "$TESTUTIL_DISABLE_LONG_TESTS" || exit 0
+
echo "checkpoint: 6 column-store tables"
$TEST_WRAPPER ./t -T 6 -t c
diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c
index 6b2f0d4466c..4998019ad8e 100644
--- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c
+++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.c
@@ -42,8 +42,6 @@ static int wt_shutdown(void);
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/src/third_party/wiredtiger/test/csuite/Makefile.am b/src/third_party/wiredtiger/test/csuite/Makefile.am
index 15db2fbcf46..8f1714237b9 100644
--- a/src/third_party/wiredtiger/test/csuite/Makefile.am
+++ b/src/third_party/wiredtiger/test/csuite/Makefile.am
@@ -7,6 +7,9 @@ AM_LDFLAGS = -static
test_wt1965_col_efficiency_SOURCES = wt1965_col_efficiency/main.c
noinst_PROGRAMS = test_wt1965_col_efficiency
+test_wt2403_lsm_workload_SOURCES = wt2403_lsm_workload/main.c
+noinst_PROGRAMS += test_wt2403_lsm_workload
+
test_wt2246_col_append_SOURCES = wt2246_col_append/main.c
noinst_PROGRAMS += test_wt2246_col_append
diff --git a/src/third_party/wiredtiger/test/csuite/wt1965_col_efficiency/main.c b/src/third_party/wiredtiger/test/csuite/wt1965_col_efficiency/main.c
index 0dc367c0611..a7235d81b31 100644
--- a/src/third_party/wiredtiger/test/csuite/wt1965_col_efficiency/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt1965_col_efficiency/main.c
@@ -35,8 +35,6 @@
* it is demonstrating an inefficiency rather than a correctness bug.
*/
-void (*custom_die)(void) = NULL;
-
/* If changing field count also need to change set_value and get_value calls */
#define NR_FIELDS 8
#define NR_OBJECTS 100
diff --git a/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c b/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c
index b795816c76f..4b352b26051 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c
@@ -42,8 +42,6 @@
#define MILLION 1000000
-void (*custom_die)(void) = NULL;
-
/* Needs to be global for signal handling. */
static TEST_OPTS *opts, _opts;
@@ -104,6 +102,8 @@ main(int argc, char *argv[])
char buf[100];
opts = &_opts;
+ if (testutil_disable_long_tests())
+ return (0);
memset(opts, 0, sizeof(*opts));
opts->table_type = TABLE_ROW;
opts->n_append_threads = N_APPEND_THREADS;
diff --git a/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c b/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c
index bbf1626fe82..239a3f300d0 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c
@@ -52,8 +52,6 @@
* of inserts set low as a default.
*/
-void (*custom_die)(void) = NULL;
-
#define N_RECORDS 10000
#define N_INSERT 500000
#define N_INSERT_THREAD 2
@@ -96,6 +94,8 @@ main(int argc, char *argv[])
opts = &_opts;
sharedopts = &_sharedopts;
+ if (testutil_disable_long_tests())
+ return (0);
memset(opts, 0, sizeof(*opts));
memset(sharedopts, 0, sizeof(*sharedopts));
@@ -225,7 +225,8 @@ test_join(TEST_OPTS *opts, SHARED_OPTS *sharedopts, bool bloom,
testutil_check(session->close(session, NULL));
}
-static void *thread_insert(void *arg)
+static void *
+thread_insert(void *arg)
{
SHARED_OPTS *sharedopts;
TEST_OPTS *opts;
@@ -239,7 +240,7 @@ static void *thread_insert(void *arg)
threadargs = (THREAD_ARGS *)arg;
opts = threadargs->testopts;
sharedopts = threadargs->sharedopts;
- testutil_check(__wt_random_init_seed(NULL, &rnd));
+ __wt_random_init_seed(NULL, &rnd);
testutil_check(opts->conn->open_session(
opts->conn, NULL, NULL, &session));
diff --git a/src/third_party/wiredtiger/test/csuite/wt2403_lsm_workload/main.c b/src/third_party/wiredtiger/test/csuite/wt2403_lsm_workload/main.c
new file mode 100644
index 00000000000..0c287484b9e
--- /dev/null
+++ b/src/third_party/wiredtiger/test/csuite/wt2403_lsm_workload/main.c
@@ -0,0 +1,241 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "test_util.h"
+
+static const char name[] = "lsm:test";
+#define NUM_DOCS 100000
+#define NUM_QUERIES (NUM_DOCS/100)
+
+static void
+rand_str(uint64_t i, char *str)
+{
+ uint64_t x, y;
+
+ y = strlen(str);
+ for (x = y; x > y - 8; x--) {
+ str[x - 1] = (char)(i % 10) + 48;
+ i = i / 10;
+ }
+}
+
+static void
+check_str(uint64_t i, char *str, bool mod)
+{
+ char str2[] = "0000000000000000";
+
+ rand_str(i, str2);
+ if (mod)
+ str2[0] = 'A';
+ testutil_checkfmt(strcmp(str, str2),
+ "strcmp failed, got %s, expected %s", str, str2);
+}
+
+static void
+query_docs(WT_CURSOR *cursor, bool mod)
+{
+ WT_ITEM key, value;
+ int i;
+
+ for (i = 0; i < NUM_QUERIES; i++) {
+ testutil_check(cursor->next(cursor));
+ testutil_check(cursor->get_key(cursor, &key));
+ testutil_check(cursor->get_value(cursor, &value));
+ check_str((uint64_t)key.data, (char *)value.data, mod);
+ }
+ printf("%d documents read\n", NUM_QUERIES);
+}
+
+static void *
+compact_thread(void *args)
+{
+ WT_SESSION *session;
+
+ session = (WT_SESSION *)args;
+ testutil_check(session->compact(session, name, NULL));
+ return (NULL);
+}
+
+int
+main(int argc, char *argv[])
+{
+ TEST_OPTS *opts, _opts;
+ WT_CURSOR *rcursor, *wcursor;
+ WT_ITEM key, value;
+ WT_SESSION *session, *session2;
+ pthread_t thread;
+ uint64_t i;
+
+ char str[] = "0000000000000000";
+
+ /*
+ * Create a clean test directory for this run of the test program if the
+ * environment variable isn't already set (as is done by make check).
+ */
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+ testutil_check(wiredtiger_open(opts->home,
+ NULL, "create,cache_size=200M", &opts->conn));
+
+ testutil_check(
+ opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(
+ opts->conn->open_session(opts->conn, NULL, NULL, &session2));
+
+ testutil_check(session->create(session, name,
+ "key_format=Q,value_format=S"));
+
+ /* Populate the table with some data. */
+ testutil_check(session->open_cursor(
+ session, name, NULL, "overwrite", &wcursor));
+ for (i = 0; i < NUM_DOCS; i++) {
+ wcursor->set_key(wcursor, i);
+ rand_str(i, str);
+ wcursor->set_value(wcursor, str);
+ testutil_check(wcursor->insert(wcursor));
+ }
+ testutil_check(wcursor->close(wcursor));
+ printf("%d documents inserted\n", NUM_DOCS);
+
+ /* Perform some random reads */
+ testutil_check(session->open_cursor(
+ session, name, NULL, "next_random=true", &rcursor));
+ query_docs(rcursor, false);
+ testutil_check(rcursor->close(rcursor));
+
+ /* Setup Transaction to pin the current values */
+ testutil_check(
+ session2->begin_transaction(session2, "isolation=snapshot"));
+ testutil_check(session2->open_cursor(
+ session2, name, NULL, "next_random=true", &rcursor));
+
+ /* Perform updates in a txn to confirm that we see only the original. */
+ testutil_check(session->open_cursor(
+ session, name, NULL, "overwrite", &wcursor));
+ for (i = 0; i < NUM_DOCS; i++) {
+ rand_str(i, str);
+ str[0] = 'A';
+ wcursor->set_key(wcursor, i);
+ wcursor->set_value(wcursor, str);
+ testutil_check(wcursor->update(wcursor));
+ }
+ testutil_check(wcursor->close(wcursor));
+ printf("%d documents set to update\n", NUM_DOCS);
+
+ /* Random reads, which should see the original values */
+ query_docs(rcursor, false);
+ testutil_check(rcursor->close(rcursor));
+
+ /* Finish the txn */
+ testutil_check(session2->rollback_transaction(session2, NULL));
+
+ /* Random reads, which should see the updated values */
+ testutil_check(session2->open_cursor(
+ session2, name, NULL, "next_random=true", &rcursor));
+ query_docs(rcursor, true);
+ testutil_check(rcursor->close(rcursor));
+
+ /* Setup a pre-delete txn */
+ testutil_check(
+ session2->begin_transaction(session2, "isolation=snapshot"));
+ testutil_check(session2->open_cursor(
+ session2, name, NULL, "next_random=true", &rcursor));
+
+ /* Delete all but one document */
+ testutil_check(session->open_cursor(
+ session, name, NULL, "overwrite", &wcursor));
+ for (i = 0; i < NUM_DOCS - 1; i++) {
+ wcursor->set_key(wcursor, i);
+ testutil_check(wcursor->remove(wcursor));
+ }
+ testutil_check(wcursor->close(wcursor));
+ printf("%d documents deleted\n", NUM_DOCS - 1);
+
+ /* Random reads, which should not see the deletes */
+ query_docs(rcursor, true);
+ testutil_check(rcursor->close(rcursor));
+
+ /* Rollback the txn so we can see the deletes */
+ testutil_check(session2->rollback_transaction(session2, NULL));
+
+ /* Find the one remaining document 3 times */
+ testutil_check(session2->open_cursor(
+ session2, name, NULL, "next_random=true", &rcursor));
+ for (i = 0; i < 3; i++) {
+ testutil_check(rcursor->next(rcursor));
+ testutil_check(rcursor->get_key(rcursor, &key));
+ testutil_check(rcursor->get_value(rcursor, &value));
+ /* There should only be one value available to us */
+ testutil_assertfmt((uint64_t)key.data == NUM_DOCS - 1,
+ "expected %d and got %" PRIu64,
+ NUM_DOCS - 1, (uint64_t)key.data);
+ check_str((uint64_t)key.data, (char *)value.data, true);
+ }
+ printf("Found the deleted doc 3 times\n");
+ testutil_check(rcursor->close(rcursor));
+
+ /* Repopulate the table for compact. */
+ testutil_check(session->open_cursor(
+ session, name, NULL, "overwrite", &wcursor));
+ for (i = 0; i < NUM_DOCS - 1; i++) {
+ wcursor->set_key(wcursor, i);
+ rand_str(i, str);
+ str[0] = 'A';
+ wcursor->set_value(wcursor, str);
+ testutil_check(wcursor->insert(wcursor));
+ }
+ testutil_check(wcursor->close(wcursor));
+
+ /* Run random cursor queries while compact is running */
+ testutil_check(session2->open_cursor(
+ session2, name, NULL, "next_random=true", &rcursor));
+ testutil_check(pthread_create(&thread, NULL, compact_thread, session));
+ query_docs(rcursor, true);
+ testutil_check(rcursor->close(rcursor));
+ testutil_check(pthread_join(thread, NULL));
+
+ /* Delete everything. Check for infinite loops */
+ testutil_check(session->open_cursor(
+ session, name, NULL, "overwrite", &wcursor));
+ for (i = 0; i < NUM_DOCS; i++) {
+ wcursor->set_key(wcursor, i);
+ testutil_check(wcursor->remove(wcursor));
+ }
+ testutil_check(wcursor->close(wcursor));
+
+ testutil_check(session2->open_cursor(
+ session2, name, NULL, "next_random=true", &rcursor));
+ for (i = 0; i < 3; i++)
+ testutil_assert(rcursor->next(rcursor) == WT_NOTFOUND);
+ printf("Successfully got WT_NOTFOUND\n");
+
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
+}
diff --git a/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c b/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c
index bbae61e7ed5..1368e7c8c09 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c
@@ -49,8 +49,6 @@
* table.
*/
-void (*custom_die)(void) = NULL;
-
#define N_RECORDS 10000
static void
diff --git a/src/third_party/wiredtiger/test/csuite/wt2535_insert_race/main.c b/src/third_party/wiredtiger/test/csuite/wt2535_insert_race/main.c
index 5eaca3279b6..ae18760a829 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2535_insert_race/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2535_insert_race/main.c
@@ -36,8 +36,6 @@
* Failure mode: Check that the data is correct at the end of the run.
*/
-void (*custom_die)(void) = NULL;
-
void *thread_insert_race(void *);
int
@@ -52,6 +50,8 @@ main(int argc, char *argv[])
int i;
opts = &_opts;
+ if (testutil_disable_long_tests())
+ return (0);
memset(opts, 0, sizeof(*opts));
opts->nthreads = 10;
opts->nrecords = 1000;
diff --git a/src/third_party/wiredtiger/test/csuite/wt2592_join_schema/main.c b/src/third_party/wiredtiger/test/csuite/wt2592_join_schema/main.c
index 4ffc9194646..0ec1c765d99 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2592_join_schema/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2592_join_schema/main.c
@@ -36,12 +36,6 @@
* Failure mode: The failure seen in WT-2592 was that no items were returned
* by a join.
*/
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <wiredtiger.h>
/* The C struct for the data we are storing in a WiredTiger table. */
typedef struct {
@@ -66,8 +60,6 @@ static POP_RECORD pop_data[] = {
{ "", 0, 0 }
};
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/src/third_party/wiredtiger/test/csuite/wt2695_checksum/main.c b/src/third_party/wiredtiger/test/csuite/wt2695_checksum/main.c
index afb9d0788bd..db4fed5dc53 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2695_checksum/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2695_checksum/main.c
@@ -32,8 +32,6 @@
* Test case description: Smoke-test the CRC.
*/
-void (*custom_die)(void) = NULL;
-
static inline void
check(uint32_t hw, uint32_t sw, size_t len, const char *msg)
{
@@ -61,7 +59,7 @@ main(int argc, char *argv[])
wiredtiger_open(opts->home, NULL, "create", &opts->conn));
/* Initialize the RNG. */
- testutil_check(__wt_random_init_seed(NULL, &rnd));
+ __wt_random_init_seed(NULL, &rnd);
/* Allocate aligned memory for the data. */
data = dcalloc(DATASIZE, sizeof(uint8_t));
diff --git a/src/third_party/wiredtiger/test/csuite/wt2719_reconfig/main.c b/src/third_party/wiredtiger/test/csuite/wt2719_reconfig/main.c
index b67dae6d647..0942cfc73b2 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2719_reconfig/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2719_reconfig/main.c
@@ -34,9 +34,7 @@
* Test case description: Fuzz testing for WiredTiger reconfiguration.
*/
-void (*custom_die)(void) = NULL;
-
-static const char *list[] = {
+static const char * const list[] = {
",async=(enabled=0)",
",async=(enabled=1)",
",async=(ops_max=2048)",
@@ -256,7 +254,7 @@ main(int argc, char *argv[])
session, opts->uri, "type=lsm,key_format=S,value_format=S"));
/* Initialize the RNG. */
- testutil_check(__wt_random_init_seed(NULL, &rnd));
+ __wt_random_init_seed(NULL, &rnd);
/* Allocate memory for the config. */
len = WT_ELEMENTS(list) * 64;
diff --git a/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c b/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c
index 1e2d919d3c7..7c80496f1b6 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c
@@ -39,8 +39,6 @@
*
* Failure mode: We get results back from our join.
*/
-void (*custom_die)(void) = NULL;
-
#define N_RECORDS 100000
#define N_INSERT 1000000
@@ -62,6 +60,8 @@ main(int argc, char *argv[])
char joinuri[256];
opts = &_opts;
+ if (testutil_disable_long_tests())
+ return (0);
memset(opts, 0, sizeof(*opts));
testutil_check(testutil_parse_opts(argc, argv, opts));
@@ -101,8 +101,8 @@ main(int argc, char *argv[])
&maincur));
maincur->set_key(maincur, N_RECORDS);
maincur->set_value(maincur, 54321, 0, "", 0, N_RECORDS);
- maincur->insert(maincur);
- maincur->close(maincur);
+ testutil_check(maincur->insert(maincur));
+ testutil_check(maincur->close(maincur));
testutil_check(session->close(session, NULL));
populate(opts);
@@ -151,6 +151,7 @@ main(int argc, char *argv[])
key, key2, post, balance, flag);
count++;
}
+ testutil_assert(ret == WT_NOTFOUND);
testutil_assert(count == 0);
testutil_cleanup(opts);
@@ -159,7 +160,8 @@ main(int argc, char *argv[])
return (0);
}
-void populate(TEST_OPTS *opts)
+void
+populate(TEST_OPTS *opts)
{
WT_CURSOR *maincur;
WT_SESSION *session;
@@ -167,7 +169,7 @@ void populate(TEST_OPTS *opts)
int balance, i, flag, post;
WT_RAND_STATE rnd;
- testutil_check(__wt_random_init_seed(NULL, &rnd));
+ __wt_random_init_seed(NULL, &rnd);
testutil_check(opts->conn->open_session(
opts->conn, NULL, NULL, &session));
@@ -194,6 +196,6 @@ void populate(TEST_OPTS *opts)
testutil_check(maincur->insert(maincur));
testutil_check(session->commit_transaction(session, NULL));
}
- maincur->close(maincur);
- session->close(session, NULL);
+ testutil_check(maincur->close(maincur));
+ testutil_check(session->close(session, NULL));
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c b/src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c
index 67ba4a20ada..6cec9634cd1 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c
@@ -42,8 +42,6 @@
* continues until the test ends (~30 seconds).
*/
-void (*custom_die)(void) = NULL;
-
static void *thread_insert(void *);
static void *thread_get(void *);
@@ -201,7 +199,7 @@ thread_insert(void *arg)
threadargs = (THREAD_ARGS *)arg;
opts = threadargs->testopts;
- testutil_check(__wt_random_init_seed(NULL, &rnd));
+ __wt_random_init_seed(NULL, &rnd);
(void)time(&prevtime);
testutil_check(opts->conn->open_session(
diff --git a/src/third_party/wiredtiger/test/cursor_order/cursor_order.c b/src/third_party/wiredtiger/test/cursor_order/cursor_order.c
index aa351e6fea8..85b8c68e545 100644
--- a/src/third_party/wiredtiger/test/cursor_order/cursor_order.c
+++ b/src/third_party/wiredtiger/test/cursor_order/cursor_order.c
@@ -44,8 +44,6 @@ static void wt_shutdown(SHARED_CONFIG *);
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/src/third_party/wiredtiger/test/fops/t.c b/src/third_party/wiredtiger/test/fops/t.c
index bf0588d5a53..7b4a7cf8fca 100644
--- a/src/third_party/wiredtiger/test/fops/t.c
+++ b/src/third_party/wiredtiger/test/fops/t.c
@@ -51,8 +51,6 @@ static void wt_shutdown(void);
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c
index 542adf33da2..839ff5058de 100644
--- a/src/third_party/wiredtiger/test/format/config.c
+++ b/src/third_party/wiredtiger/test/format/config.c
@@ -187,8 +187,17 @@ config_setup(void)
/* Give in-memory configuration a final review. */
config_in_memory_check();
- /* Make the default maximum-run length 20 minutes. */
- if (!config_is_perm("timer"))
+ /*
+ * Run-length configured by a number of operations and a timer. If the
+ * operation count and the timer are both set by a configuration, there
+ * isn't anything to do. If only the operation count was configured,
+ * set a default maximum-run of 20 minutes. If only the timer is set,
+ * clear the operations count (which was set randomly).
+ */
+ if (config_is_perm("timer")) {
+ if (!config_is_perm("ops"))
+ config_single("ops=0", 0);
+ } else
config_single("timer=20", 0);
/*
@@ -270,28 +279,33 @@ config_compression(const char *conf_name)
*/
switch (mmrand(NULL, 1, 20)) {
#ifdef HAVE_BUILTIN_EXTENSION_LZ4
- case 1: case 2: case 3: case 4: /* 20% lz4 */
+ case 1: case 2: /* 10% lz4 */
cstr = "lz4";
break;
- case 5: /* 5% lz4-no-raw */
+ case 3: /* 5% lz4-no-raw */
cstr = "lz4-noraw";
break;
#endif
#ifdef HAVE_BUILTIN_EXTENSION_SNAPPY
- case 6: case 7: case 8: case 9: /* 30% snappy */
- case 10: case 11:
+ case 4: case 5: case 6: case 7: /* 30% snappy */
+ case 8: case 9:
cstr = "snappy";
break;
#endif
#ifdef HAVE_BUILTIN_EXTENSION_ZLIB
- case 12: case 13: case 14: case 15: /* 20% zlib */
+ case 10: case 11: case 12: case 13: /* 20% zlib */
cstr = "zlib";
break;
- case 16: /* 5% zlib-no-raw */
+ case 14: /* 5% zlib-no-raw */
cstr = "zlib-noraw";
break;
#endif
- case 17: case 18: case 19: case 20: /* 20% no compression */
+#ifdef HAVE_BUILTIN_EXTENSION_ZSTD
+ case 15: case 16 case 17: /* 15% zstd */
+ cstr = "zstd";
+ break;
+#endif
+ case 18: case 19: case 20: /* 15% no compression */
default:
break;
}
@@ -748,6 +762,8 @@ config_map_compression(const char *s, u_int *vp)
*vp = COMPRESS_ZLIB;
else if (strcmp(s, "zlib-noraw") == 0)
*vp = COMPRESS_ZLIB_NO_RAW;
+ else if (strcmp(s, "zstd") == 0)
+ *vp = COMPRESS_ZSTD;
else
testutil_die(EINVAL,
"illegal compression configuration: %s", s);
diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h
index 725bc7c5d97..9bfba3cd0df 100644
--- a/src/third_party/wiredtiger/test/format/config.h
+++ b/src/third_party/wiredtiger/test/format/config.h
@@ -58,7 +58,7 @@ typedef struct {
} CONFIG;
#define COMPRESSION_LIST \
- "(none | lz4 | lz4-noraw | snappy | zlib | zlib-noraw)"
+ "(none | lz4 | lz4-noraw | snappy | zlib | zlib-noraw | zstd)"
static CONFIG c[] = {
{ "abort",
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index 363dcf9eea8..820bc020c9b 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -48,6 +48,8 @@
EXTPATH "compressors/snappy/.libs/libwiredtiger_snappy.so"
#define ZLIB_PATH \
EXTPATH "compressors/zlib/.libs/libwiredtiger_zlib.so"
+#define ZSTD_PATH \
+ EXTPATH "compressors/zstd/.libs/libwiredtiger_zstd.so"
#define REVERSE_PATH \
EXTPATH "collators/reverse/.libs/libwiredtiger_reverse_collator.so"
@@ -219,6 +221,7 @@ typedef struct {
#define COMPRESS_SNAPPY 5
#define COMPRESS_ZLIB 6
#define COMPRESS_ZLIB_NO_RAW 7
+#define COMPRESS_ZSTD 8
u_int c_compression_flag; /* Compression flag value */
u_int c_logging_compression_flag; /* Log compression flag value */
diff --git a/src/third_party/wiredtiger/test/format/lrt.c b/src/third_party/wiredtiger/test/format/lrt.c
index 937525522fa..69d6b22d71f 100644
--- a/src/third_party/wiredtiger/test/format/lrt.c
+++ b/src/third_party/wiredtiger/test/format/lrt.c
@@ -96,6 +96,22 @@ lrt(void *arg)
pinned = 0;
} else {
/*
+ * Test named snapshots: create a snapshot, wait to
+ * give the transaction state time to move forward,
+ * then start a transaction with the named snapshot,
+ * drop it, then commit the transaction. This exercises
+ * most of the named snapshot logic under load.
+ */
+ testutil_check(session->snapshot(session, "name=test"));
+ sleep(1);
+ testutil_check(session->begin_transaction(
+ session, "snapshot=test"));
+ testutil_check(session->snapshot(
+ session, "drop=(all)"));
+ testutil_check(session->commit_transaction(
+ session, NULL));
+
+ /*
* Begin transaction: without an explicit transaction,
* the snapshot is only kept around while a cursor is
* positioned. As soon as the cursor loses its position
diff --git a/src/third_party/wiredtiger/test/format/t.c b/src/third_party/wiredtiger/test/format/t.c
index 12258af8e51..7701595776c 100644
--- a/src/third_party/wiredtiger/test/format/t.c
+++ b/src/third_party/wiredtiger/test/format/t.c
@@ -38,8 +38,6 @@ static void usage(void)
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = format_die; /* Local death handler. */
-
int
main(int argc, char *argv[])
{
@@ -47,6 +45,8 @@ main(int argc, char *argv[])
int ch, onerun, reps;
const char *config, *home;
+ custom_die = format_die; /* Local death handler. */
+
config = NULL;
#ifdef _WIN32
@@ -115,7 +115,7 @@ main(int argc, char *argv[])
argv += __wt_optind;
/* Initialize the global RNG. */
- testutil_check(__wt_random_init_seed(NULL, &g.rnd));
+ __wt_random_init_seed(NULL, &g.rnd);
/* Set up paths. */
path_setup(home);
diff --git a/src/third_party/wiredtiger/test/format/util.c b/src/third_party/wiredtiger/test/format/util.c
index 667f6d6bcb1..a709aa93a2e 100644
--- a/src/third_party/wiredtiger/test/format/util.c
+++ b/src/third_party/wiredtiger/test/format/util.c
@@ -78,7 +78,7 @@ key_gen_setup(WT_ITEM *key)
}
static void
-key_gen_common(WT_ITEM *key, uint64_t keyno, int suffix)
+key_gen_common(WT_ITEM *key, uint64_t keyno, const char * const suffix)
{
int len;
char *p;
@@ -86,11 +86,15 @@ key_gen_common(WT_ITEM *key, uint64_t keyno, int suffix)
p = key->mem;
/*
- * The key always starts with a 10-digit string (the specified cnt)
+ * The key always starts with a 10-digit string (the specified row)
* followed by two digits, a random number between 1 and 15 if it's
* an insert, otherwise 00.
*/
- len = sprintf(p, "%010" PRIu64 ".%02d", keyno, suffix);
+ u64_to_string_zf(keyno, key->mem, 11);
+ p[10] = '.';
+ p[11] = suffix[0];
+ p[12] = suffix[1];
+ len = 13;
/*
* In a column-store, the key is only used for Berkeley DB inserts,
@@ -118,13 +122,19 @@ key_gen_common(WT_ITEM *key, uint64_t keyno, int suffix)
void
key_gen(WT_ITEM *key, uint64_t keyno)
{
- key_gen_common(key, keyno, 0);
+ key_gen_common(key, keyno, "00");
}
void
key_gen_insert(WT_RAND_STATE *rnd, WT_ITEM *key, uint64_t keyno)
{
- key_gen_common(key, keyno, (int)mmrand(rnd, 1, 15));
+ static const char * const suffix[15] = {
+ "01", "02", "03", "04", "05",
+ "06", "07", "08", "09", "10",
+ "11", "12", "13", "14", "15"
+ };
+
+ key_gen_common(key, keyno, suffix[mmrand(rnd, 1, 15) - 1]);
}
static uint32_t val_dup_data_len; /* Length of duplicate data items */
@@ -221,7 +231,7 @@ val_gen(WT_RAND_STATE *rnd, WT_ITEM *value, uint64_t keyno)
p[10] = '/';
value->size = val_dup_data_len;
} else {
- (void)sprintf(p, "%010" PRIu64, keyno);
+ u64_to_string_zf(keyno, p, 11);
p[10] = '/';
value->size =
value_len(rnd, keyno, g.c_value_min, g.c_value_max);
diff --git a/src/third_party/wiredtiger/test/format/wts.c b/src/third_party/wiredtiger/test/format/wts.c
index 74c4bb902b3..23fdbce156c 100644
--- a/src/third_party/wiredtiger/test/format/wts.c
+++ b/src/third_party/wiredtiger/test/format/wts.c
@@ -50,6 +50,8 @@ compressor(uint32_t compress_flag)
return ("zlib");
case COMPRESS_ZLIB_NO_RAW:
return ("zlib-noraw");
+ case COMPRESS_ZSTD:
+ return ("zstd");
default:
break;
}
@@ -210,13 +212,14 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp)
/* Extensions. */
p += snprintf(p, REMAIN(p, end),
",extensions=["
- "\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"],",
+ "\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"],",
g.c_reverse ? REVERSE_PATH : "",
access(LZ4_PATH, R_OK) == 0 ? LZ4_PATH : "",
access(LZO_PATH, R_OK) == 0 ? LZO_PATH : "",
access(ROTN_PATH, R_OK) == 0 ? ROTN_PATH : "",
access(SNAPPY_PATH, R_OK) == 0 ? SNAPPY_PATH : "",
access(ZLIB_PATH, R_OK) == 0 ? ZLIB_PATH : "",
+ access(ZSTD_PATH, R_OK) == 0 ? ZSTD_PATH : "",
DATASOURCE("kvsbdb") ? KVS_BDB_PATH : "");
/*
@@ -546,6 +549,7 @@ wts_stats(void)
WT_DECL_RET;
WT_SESSION *session;
FILE *fp;
+ size_t len;
char *stat_name;
const char *pval, *desc;
uint64_t v;
@@ -582,8 +586,9 @@ wts_stats(void)
/* Data source statistics. */
fprintf(fp, "\n\n====== Data source statistics:\n");
- stat_name = dmalloc(strlen("statistics:") + strlen(g.uri) + 1);
- sprintf(stat_name, "statistics:%s", g.uri);
+ len = strlen("statistics:") + strlen(g.uri) + 1;
+ stat_name = dmalloc(len);
+ snprintf(stat_name, len, "statistics:%s", g.uri);
testutil_check(session->open_cursor(
session, stat_name, NULL, NULL, &cursor));
free(stat_name);
diff --git a/src/third_party/wiredtiger/test/huge/huge.c b/src/third_party/wiredtiger/test/huge/huge.c
index 3aa61a9048e..17e2db353d5 100644
--- a/src/third_party/wiredtiger/test/huge/huge.c
+++ b/src/third_party/wiredtiger/test/huge/huge.c
@@ -159,8 +159,6 @@ run(CONFIG *cp, int bigkey, size_t bytes)
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/src/third_party/wiredtiger/test/java/com/wiredtiger/test/ConcurrentCloseTest.java b/src/third_party/wiredtiger/test/java/com/wiredtiger/test/ConcurrentCloseTest.java
index fece0353bf0..fead0b0bf38 100644
--- a/src/third_party/wiredtiger/test/java/com/wiredtiger/test/ConcurrentCloseTest.java
+++ b/src/third_party/wiredtiger/test/java/com/wiredtiger/test/ConcurrentCloseTest.java
@@ -34,6 +34,7 @@ import com.wiredtiger.db.WiredTigerException;
import com.wiredtiger.db.wiredtiger;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
import java.io.BufferedReader;
import java.io.File;
@@ -69,7 +70,7 @@ class InsertThread extends Thread {
Session session = conn.open_session(null);
Cursor cursor = session.open_cursor("table:cclose", null,
"overwrite");
- cursor.putKeyString("key"+threadId + "-" + i);
+ cursor.putKeyString("key" + threadId + "-" + i);
cursor.putValueString("value1");
ret = cursor.insert();
cursor.close();
@@ -127,36 +128,36 @@ public class ConcurrentCloseTest {
setup();
try {
List<Thread> threads = new ArrayList<Thread>();
- int i, ret;
+ int i;
- ret = session.create("table:cclose", "key_format=S,value_format=S");
+ assertEquals(0, session.create("table:cclose",
+ "key_format=S,value_format=S"));
Cursor cursor = session.open_cursor("table:cclose", null,
"overwrite");
cursor.putKeyString("key1");
cursor.putValueString("value1");
- ret = cursor.insert();
+ assertEquals(0, cursor.insert());
cursor.close();
- ret = session.close(null);
+ assertEquals(0, session.close(null));
for (i = 0; i < NUM_THREADS; i++) {
Thread insertThread = new InsertThread(conn, i);
- Thread scanThread = new InsertThread(conn, i);
+ Thread scanThread = new ScanThread(conn);
insertThread.start();
scanThread.start();
threads.add(insertThread);
threads.add(scanThread);
}
-
for (Thread thread : threads)
try {
thread.join();
- ret = -1;
}
catch (InterruptedException ie) {
+ fail();
}
- ret = conn.close(null);
- System.exit(ret);
+ assertEquals(0, conn.close(null));
+ System.exit(0);
}
catch (WiredTigerException wte) {
System.err.println("Exception: " + wte);
diff --git a/src/third_party/wiredtiger/test/manydbs/manydbs.c b/src/third_party/wiredtiger/test/manydbs/manydbs.c
index c5c9a9a7ccd..7e986d47af3 100644
--- a/src/third_party/wiredtiger/test/manydbs/manydbs.c
+++ b/src/third_party/wiredtiger/test/manydbs/manydbs.c
@@ -68,8 +68,6 @@ usage(void)
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
-
static WT_CONNECTION **connections = NULL;
static WT_CURSOR **cursors = NULL;
static WT_RAND_STATE rnd;
diff --git a/src/third_party/wiredtiger/test/packing/intpack-test.c b/src/third_party/wiredtiger/test/packing/intpack-test.c
index 76851b38e35..c84823b741b 100644
--- a/src/third_party/wiredtiger/test/packing/intpack-test.c
+++ b/src/third_party/wiredtiger/test/packing/intpack-test.c
@@ -28,8 +28,6 @@
#include "test_util.h"
-void (*custom_die)(void) = NULL;
-
int
main(void)
{
diff --git a/src/third_party/wiredtiger/test/packing/intpack-test2.c b/src/third_party/wiredtiger/test/packing/intpack-test2.c
index a7d31329069..4e612808a35 100644
--- a/src/third_party/wiredtiger/test/packing/intpack-test2.c
+++ b/src/third_party/wiredtiger/test/packing/intpack-test2.c
@@ -28,8 +28,6 @@
#include "test_util.h"
-void (*custom_die)(void) = NULL;
-
int
main(void)
{
diff --git a/src/third_party/wiredtiger/test/packing/intpack-test3.c b/src/third_party/wiredtiger/test/packing/intpack-test3.c
index aac0178578f..763b0255ecf 100644
--- a/src/third_party/wiredtiger/test/packing/intpack-test3.c
+++ b/src/third_party/wiredtiger/test/packing/intpack-test3.c
@@ -28,8 +28,6 @@
#include "test_util.h"
-void (*custom_die)(void) = NULL;
-
void test_value(int64_t);
void test_spread(int64_t, int64_t, int64_t);
diff --git a/src/third_party/wiredtiger/test/packing/packing-test.c b/src/third_party/wiredtiger/test/packing/packing-test.c
index f251c17eb67..919b0622806 100644
--- a/src/third_party/wiredtiger/test/packing/packing-test.c
+++ b/src/third_party/wiredtiger/test/packing/packing-test.c
@@ -28,8 +28,6 @@
#include "test_util.h"
-void (*custom_die)(void) = NULL;
-
static void
check(const char *fmt, ...)
{
diff --git a/src/third_party/wiredtiger/test/readonly/readonly.c b/src/third_party/wiredtiger/test/readonly/readonly.c
index 7a131912c31..a4b79f5859f 100644
--- a/src/third_party/wiredtiger/test/readonly/readonly.c
+++ b/src/third_party/wiredtiger/test/readonly/readonly.c
@@ -158,8 +158,6 @@ open_dbs(int op, const char *dir,
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/src/third_party/wiredtiger/test/recovery/random-abort.c b/src/third_party/wiredtiger/test/recovery/random-abort.c
index 16065cec29e..03e67e2f723 100644
--- a/src/third_party/wiredtiger/test/recovery/random-abort.c
+++ b/src/third_party/wiredtiger/test/recovery/random-abort.c
@@ -179,8 +179,6 @@ fill_db(uint32_t nth)
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
@@ -245,7 +243,7 @@ main(int argc, char *argv[])
if (!verify_only) {
testutil_make_work_dir(home);
- testutil_assert(__wt_random_init_seed(NULL, &rnd) == 0);
+ __wt_random_init_seed(NULL, &rnd);
if (rand_time) {
timeout = __wt_random(&rnd) % MAX_TIME;
if (timeout < MIN_TIME)
diff --git a/src/third_party/wiredtiger/test/recovery/truncated-log.c b/src/third_party/wiredtiger/test/recovery/truncated-log.c
index c0effa85e95..c265263d44c 100644
--- a/src/third_party/wiredtiger/test/recovery/truncated-log.c
+++ b/src/third_party/wiredtiger/test/recovery/truncated-log.c
@@ -258,8 +258,6 @@ fill_db(void)
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/src/third_party/wiredtiger/test/salvage/salvage.c b/src/third_party/wiredtiger/test/salvage/salvage.c
index bad0167ca8e..b8553bbd72d 100644
--- a/src/third_party/wiredtiger/test/salvage/salvage.c
+++ b/src/third_party/wiredtiger/test/salvage/salvage.c
@@ -64,8 +64,6 @@ static int verbose; /* -v flag */
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/src/third_party/wiredtiger/test/suite/test_bug017.py b/src/third_party/wiredtiger/test/suite/test_bug017.py
new file mode 100644
index 00000000000..03e7b2ba714
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_bug017.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2016 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+
+# test_bug017.py
+# WT-2987: opening a cursor on an incomplete table drops core
+class test_bug017(wttest.WiredTigerTestCase):
+
+ def test_bug017_run(self):
+ self.session.create("table:bug17",
+ 'key_format=r,value_format=5sHQ,' +
+ 'columns=(id,country,year,population),colgroups=(main,population)')
+
+ msg = '/column groups/'
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.open_cursor("table:bug17(country)", None),
+ msg)
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_cursor_random02.py b/src/third_party/wiredtiger/test/suite/test_cursor_random02.py
index 93aa97f2282..195480d703b 100644
--- a/src/third_party/wiredtiger/test/suite/test_cursor_random02.py
+++ b/src/third_party/wiredtiger/test/suite/test_cursor_random02.py
@@ -34,7 +34,10 @@ from wtscenario import make_scenarios
# test_cursor_random02.py
# Cursor next_random operations
class test_cursor_random02(wttest.WiredTigerTestCase):
- type = 'table:random'
+ types = [
+ ('lsm', dict(type='lsm:random')),
+ ('table', dict(type='table:random'))
+ ]
config = [
('not-sample', dict(config='next_random=true'))
]
@@ -46,26 +49,35 @@ class test_cursor_random02(wttest.WiredTigerTestCase):
('10000', dict(records=10000)),
('50000', dict(records=50000)),
]
- scenarios = make_scenarios(config, records)
+ scenarios = make_scenarios(config, records, types)
# Check that next_random works in the presence of a larger set of values,
# where the values are in an insert list.
def test_cursor_random_reasonable_distribution(self):
uri = self.type
num_entries = self.records
+ config = 'key_format=S'
+ if uri == 'table:random':
+ config = 'leaf_page_max=100MB,' + config
# Set the leaf-page-max value, otherwise the page might split.
- simple_populate(self, uri,
- 'leaf_page_max=100MB,key_format=S', num_entries)
+ simple_populate(self, uri, config, num_entries)
# Setup an array to track which keys are seen
visitedKeys = [0] * (num_entries + 1)
+ # Setup a counter to see when we find a sequential key
+ sequentialKeys = 0
cursor = self.session.open_cursor(uri, None, 'next_random=true')
+ lastKey = None
for i in range(0, num_entries):
self.assertEqual(cursor.next(), 0)
current = cursor.get_key()
current = int(current)
visitedKeys[current] = visitedKeys[current] + 1
+ if lastKey != None:
+ if current == (lastKey + 1):
+ sequentialKeys += 1
+ lastKey = current
differentKeys = sum(x > 0 for x in visitedKeys)
@@ -76,7 +88,10 @@ class test_cursor_random02(wttest.WiredTigerTestCase):
str(num_entries) + ', ' + \
str((int)((differentKeys * 100) / num_entries)) + '%')
'''
-
+ # Can't test for non-sequential data when there is 1 item in the table
+ if num_entries > 1:
+ self.assertGreater(num_entries - 1, sequentialKeys,
+ 'cursor is returning sequential data')
self.assertGreater(differentKeys, num_entries / 4,
'next_random random distribution not adequate')
diff --git a/src/third_party/wiredtiger/test/suite/test_encrypt01.py b/src/third_party/wiredtiger/test/suite/test_encrypt01.py
index d314cbeadfd..746c9d13e96 100644
--- a/src/third_party/wiredtiger/test/suite/test_encrypt01.py
+++ b/src/third_party/wiredtiger/test/suite/test_encrypt01.py
@@ -57,6 +57,7 @@ class test_encrypt01(wttest.WiredTigerTestCase):
('lz4', dict(log_compress='lz4', block_compress='lz4')),
('snappy', dict(log_compress='snappy', block_compress='snappy')),
('zlib', dict(log_compress='zlib', block_compress='zlib')),
+ ('zstd', dict(log_compress='zstd', block_compress='zstd')),
('none-snappy', dict(log_compress=None, block_compress='snappy')),
('snappy-lz4', dict(log_compress='snappy', block_compress='lz4')),
]
diff --git a/src/third_party/wiredtiger/test/suite/test_inmem02.py b/src/third_party/wiredtiger/test/suite/test_inmem02.py
new file mode 100644
index 00000000000..9eb8330b2a3
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_inmem02.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2016 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+from time import sleep
+from helper import simple_populate, simple_populate_check
+from helper import key_populate, value_populate
+from wtscenario import make_scenarios
+
+# test_inmem02.py
+# Test in-memory with ignore-cache-size setting.
+class test_inmem02(wttest.WiredTigerTestCase):
+ uri = 'table:inmem02'
+ conn_config = \
+ 'cache_size=3MB,file_manager=(close_idle_time=0),in_memory=true'
+ table_config = 'key_format=S,value_format=S,memory_page_max=32k,leaf_page_max=4k'
+
+ # Add more data than fits into the configured cache and verify it fails.
+ def test_insert_over_allowed(self):
+
+ # Create a new table that is allowed to exceed the cache size, do this
+ # before filling the cache so that the create succeeds
+ self.session.create(
+ self.uri + '_over', 'ignore_in_memory_cache_size=true')
+
+ # Populate a table with enough data to fill the cache.
+ msg = '/WT_CACHE_FULL.*/'
+ self.assertRaisesHavingMessage(wiredtiger.WiredTigerError,
+ lambda:simple_populate(
+ self, self.uri, self.table_config, 10000000), msg)
+
+ # Add some content to the new table
+ cursor = self.session.open_cursor(self.uri + '_over', None)
+ for i in range(1, 1000):
+ cursor[str('%015d' % i)] = str(i) + ': abcdefghijklmnopqrstuvwxyz'
+ cursor.close()
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_stat02.py b/src/third_party/wiredtiger/test/suite/test_stat02.py
index 3d2a83d1c3c..047d2c74499 100644
--- a/src/third_party/wiredtiger/test/suite/test_stat02.py
+++ b/src/third_party/wiredtiger/test/suite/test_stat02.py
@@ -165,7 +165,7 @@ class test_stat_cursor_conn_error(wttest.WiredTigerTestCase):
args = ['none', 'all', 'fast']
for i in list(itertools.permutations(args, 2)):
config = 'create,statistics=(' + i[0] + ',' + i[1] + ')'
- msg = '/only one statistics configuration value/'
+ msg = '/Only one of/'
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.wiredtiger_open('.', config), msg)
@@ -188,10 +188,76 @@ class test_stat_cursor_dsrc_error(wttest.WiredTigerTestCase):
args = ['all', 'fast']
for i in list(itertools.permutations(args, 2)):
config = 'statistics=(' + i[0] + ',' + i[1] + ')'
- msg = '/only one statistics configuration value/'
+ msg = '/Only one of/'
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.open_cursor(
'statistics:' + self.uri, None, config), msg)
+# Test data-source cache walk statistics
+class test_stat_cursor_dsrc_cache_walk(wttest.WiredTigerTestCase):
+ uri = 'file:test_stat_cursor_dsrc_cache_walk'
+
+ conn_config = 'statistics=(none)'
+
+ def test_stat_cursor_dsrc_cache_walk(self):
+ simple_populate(self, self.uri, 'key_format=S', 100)
+ # Ensure that it's an error to get cache_walk stats if none is set
+ msg = '/doesn\'t match the database statistics/'
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.open_cursor(
+ 'statistics:' + self.uri, None, None), msg)
+
+ # Test configurations that are valid but should not collect
+ # cache walk information. Do these first since the cache walk
+ # statistics are mostly marked as not cleared - so once they are
+ # populated the values will always be returned
+ self.conn.reconfigure('statistics=(cache_walk,fast,clear)')
+ c = self.session.open_cursor(
+ 'statistics:' + self.uri, None, 'statistics=(fast)')
+ self.assertEqual(c[stat.dsrc.cache_state_root_size][2], 0)
+ c.close()
+
+ self.conn.reconfigure('statistics=(all,clear)')
+ c = self.session.open_cursor(
+ 'statistics:' + self.uri, None, 'statistics=(fast)')
+ self.assertEqual(c[stat.dsrc.cache_state_root_size][2], 0)
+ c.close()
+
+ self.conn.reconfigure('statistics=(cache_walk,fast,clear)')
+ c = self.session.open_cursor('statistics:' + self.uri, None, None)
+ self.assertGreater(c[stat.dsrc.cache_state_root_size][2], 0)
+ # Verify that cache_walk didn't imply tree_walk
+ self.assertEqual(c[stat.dsrc.btree_entries][2], 0)
+ c.close()
+
+ self.conn.reconfigure('statistics=(cache_walk,tree_walk,fast,clear)')
+ c = self.session.open_cursor('statistics:' + self.uri, None, None)
+ self.assertGreater(c[stat.dsrc.cache_state_root_size][2], 0)
+ # Verify that cache_walk didn't exclude tree_walk
+ self.assertGreater(c[stat.dsrc.btree_entries][2], 0)
+ c.close()
+
+ self.conn.reconfigure('statistics=(all,clear)')
+ c = self.session.open_cursor(
+ 'statistics:' + self.uri, None, 'statistics=(all)')
+ self.assertGreater(c[stat.dsrc.cache_state_root_size][2], 0)
+ self.assertGreater(c[stat.dsrc.btree_entries][2], 0)
+ c.close()
+
+ # Verify that cache and tree walk can operate independantly
+ self.conn.reconfigure('statistics=(all,clear)')
+ c = self.session.open_cursor(
+ 'statistics:' + self.uri, None, 'statistics=(cache_walk,fast)')
+ self.assertGreater(c[stat.dsrc.cache_state_root_size][2], 0)
+ self.assertEqual(c[stat.dsrc.btree_entries][2], 0)
+ c.close()
+
+ self.conn.reconfigure('statistics=(all,clear)')
+ c = self.session.open_cursor(
+ 'statistics:' + self.uri, None, 'statistics=(tree_walk,fast)')
+ # Don't check the cache walk stats for empty - they won't be cleared
+ self.assertGreater(c[stat.dsrc.btree_entries][2], 0)
+ c.close()
+
if __name__ == '__main__':
wttest.run()
diff --git a/src/third_party/wiredtiger/test/thread/t.c b/src/third_party/wiredtiger/test/thread/t.c
index 5b53532e8a6..baadbf2adb9 100644
--- a/src/third_party/wiredtiger/test/thread/t.c
+++ b/src/third_party/wiredtiger/test/thread/t.c
@@ -52,8 +52,6 @@ static void wt_shutdown(void);
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/src/third_party/wiredtiger/test/utility/misc.c b/src/third_party/wiredtiger/test/utility/misc.c
index 096bc752726..1491c9a6938 100644
--- a/src/third_party/wiredtiger/test/utility/misc.c
+++ b/src/third_party/wiredtiger/test/utility/misc.c
@@ -27,6 +27,8 @@
*/
#include "test_util.h"
+void (*custom_die)(void) = NULL;
+
/*
* die --
* Report an error and quit.
@@ -142,8 +144,6 @@ testutil_cleanup(TEST_OPTS *opts)
if (!opts->preserve)
testutil_clean_work_dir(opts->home);
- free(opts->conn_config);
- free(opts->table_config);
free(opts->uri);
free(opts->home);
}
diff --git a/src/third_party/wiredtiger/test/utility/parse_opts.c b/src/third_party/wiredtiger/test/utility/parse_opts.c
index 08aeafa9617..74a1c021d5d 100644
--- a/src/third_party/wiredtiger/test/utility/parse_opts.c
+++ b/src/third_party/wiredtiger/test/utility/parse_opts.c
@@ -27,10 +27,6 @@
*/
#include "test_util.h"
-extern int __wt_opterr; /* if error message should be printed */
-extern int __wt_optind; /* index into parent argv vector */
-extern int __wt_optopt; /* character checked for validity */
-extern int __wt_optreset; /* reset getopt */
extern char *__wt_optarg; /* argument associated with option */
/*
@@ -59,7 +55,7 @@ testutil_parse_opts(int argc, char * const *argv, TEST_OPTS *opts)
opts->n_append_threads = (uint64_t)atoll(__wt_optarg);
break;
case 'h': /* Home directory */
- opts->home = __wt_optarg;
+ opts->home = dstrdup(__wt_optarg);
break;
case 'n': /* Number of records */
opts->nrecords = (uint64_t)atoll(__wt_optarg);
@@ -116,12 +112,14 @@ testutil_parse_opts(int argc, char * const *argv, TEST_OPTS *opts)
}
/*
- * Setup the home directory. It needs to be unique for every test
- * or the auto make parallel tester gets upset.
+ * Setup the home directory if not explicitly specified. It needs to be
+ * unique for every test or the auto make parallel tester gets upset.
*/
- len = strlen("WT_TEST.") + strlen(opts->progname) + 10;
- opts->home = dmalloc(len);
- snprintf(opts->home, len, "WT_TEST.%s", opts->progname);
+ if (opts->home == NULL) {
+ len = strlen("WT_TEST.") + strlen(opts->progname) + 10;
+ opts->home = dmalloc(len);
+ snprintf(opts->home, len, "WT_TEST.%s", opts->progname);
+ }
/* Setup the default URI string */
len = strlen("table:") + strlen(opts->progname) + 10;
diff --git a/src/third_party/wiredtiger/test/utility/test_util.h b/src/third_party/wiredtiger/test/utility/test_util.h
index 3c1d0e2630a..f6a9cd68e02 100644
--- a/src/third_party/wiredtiger/test/utility/test_util.h
+++ b/src/third_party/wiredtiger/test/utility/test_util.h
@@ -68,10 +68,8 @@ typedef struct {
* resources.
*/
WT_CONNECTION *conn;
- char *conn_config;
WT_SESSION *session;
bool running;
- char *table_config;
char *uri;
volatile uint64_t next_threadid;
uint64_t max_inserted_id;
@@ -87,6 +85,16 @@ typedef struct {
} while (0)
/*
+ * testutil_assertfmt --
+ * Complain and quit if something isn't true.
+ */
+#define testutil_assertfmt(a, fmt, ...) do { \
+ if (!(a)) \
+ testutil_die(0, "%s/%d: %s: " fmt, \
+ __func__, __LINE__, #a, __VA_ARGS__); \
+} while (0)
+
+/*
* testutil_check --
* Complain and quit if a function call fails.
*/
@@ -108,6 +116,62 @@ typedef struct {
__func__, __LINE__, #call, __VA_ARGS__); \
} while (0)
+/*
+ * u64_to_string --
+ * Convert a uint64_t to a text string.
+ *
+ * Algorithm from Andrei Alexandrescu's talk: "Three Optimization Tips for C++"
+ */
+static inline void
+u64_to_string(uint64_t n, char **pp)
+{
+ static const char hundred_lookup[201] =
+ "0001020304050607080910111213141516171819"
+ "2021222324252627282930313233343536373839"
+ "4041424344454647484950515253545556575859"
+ "6061626364656667686970717273747576777879"
+ "8081828384858687888990919293949596979899";
+ u_int i;
+ char *p;
+
+ /*
+ * The argument pointer references the last element of a buffer (which
+ * must be large enough to hold any possible value).
+ *
+ * Nul-terminate the buffer.
+ */
+ for (p = *pp, *p-- = '\0'; n >= 100; n /= 100) {
+ i = (n % 100) * 2;
+ *p-- = hundred_lookup[i + 1];
+ *p-- = hundred_lookup[i];
+ }
+
+ /* Handle the last two digits. */
+ i = (u_int)n * 2;
+ *p = hundred_lookup[i + 1];
+ if (n >= 10)
+ *--p = hundred_lookup[i];
+
+ /* Return a pointer to the first byte of the text string. */
+ *pp = p;
+}
+
+/*
+ * u64_to_string_zf --
+ * Convert a uint64_t to a text string, zero-filling the buffer.
+ */
+static inline void
+u64_to_string_zf(uint64_t n, char *buf, size_t len)
+{
+ char *p;
+
+ p = buf + (len - 1);
+ u64_to_string(n, &p);
+
+ while (p > buf)
+ *--p = '0';
+}
+
/* Allow tests to add their own death handling. */
extern void (*custom_die)(void);
diff --git a/src/third_party/wiredtiger/tools/wtstats/stat_data.py b/src/third_party/wiredtiger/tools/wtstats/stat_data.py
index 8f47b86a23e..635e710c469 100644
--- a/src/third_party/wiredtiger/tools/wtstats/stat_data.py
+++ b/src/third_party/wiredtiger/tools/wtstats/stat_data.py
@@ -91,6 +91,24 @@ no_scale_per_second_list = [
'btree: row-store leaf pages',
'cache: bytes currently in the cache',
'cache: overflow values cached in memory',
+ 'cache_walk: Average difference between current eviction generation when the page was last considered',
+ 'cache_walk: Average on-disk page image size seen',
+ 'cache_walk: Clean pages currently in cache',
+ 'cache_walk: Current eviction generation',
+ 'cache_walk: Dirty pages currently in cache',
+ 'cache_walk: Entries in the root page',
+ 'cache_walk: Internal pages currently in cache',
+ 'cache_walk: Leaf pages currently in cache',
+ 'cache_walk: Maximum difference between current eviction generation when the page was last considered',
+ 'cache_walk: Maximum page size seen',
+ 'cache_walk: Minimum on-disk page image size seen',
+ 'cache_walk: On-disk page image sizes smaller than a single allocation unit',
+ 'cache_walk: Pages created in memory and never written',
+ 'cache_walk: Pages currently queued for eviction',
+ 'cache_walk: Pages that could not be queued for eviction',
+ 'cache_walk: Refs skipped during cache traversal',
+ 'cache_walk: Size of the root page',
+ 'cache_walk: Total number of pages currently in cache',
'LSM: bloom filters in the LSM tree',
'LSM: chunks in the LSM tree',
'LSM: highest merge generation in the LSM tree',
@@ -162,6 +180,24 @@ no_clear_list = [
'transaction: transaction range of IDs currently pinned by named snapshots',
'btree: btree checkpoint generation',
'cache: bytes currently in the cache',
+ 'cache_walk: Average difference between current eviction generation when the page was last considered',
+ 'cache_walk: Average on-disk page image size seen',
+ 'cache_walk: Clean pages currently in cache',
+ 'cache_walk: Current eviction generation',
+ 'cache_walk: Dirty pages currently in cache',
+ 'cache_walk: Entries in the root page',
+ 'cache_walk: Internal pages currently in cache',
+ 'cache_walk: Leaf pages currently in cache',
+ 'cache_walk: Maximum difference between current eviction generation when the page was last considered',
+ 'cache_walk: Maximum page size seen',
+ 'cache_walk: Minimum on-disk page image size seen',
+ 'cache_walk: On-disk page image sizes smaller than a single allocation unit',
+ 'cache_walk: Pages created in memory and never written',
+ 'cache_walk: Pages currently queued for eviction',
+ 'cache_walk: Pages that could not be queued for eviction',
+ 'cache_walk: Refs skipped during cache traversal',
+ 'cache_walk: Size of the root page',
+ 'cache_walk: Total number of pages currently in cache',
'session: open cursor count',
]
prefix_list = [
@@ -169,6 +205,7 @@ prefix_list = [
'reconciliation',
'LSM',
'log',
+ 'lock',
'cache',
'transaction',
'cursor',
@@ -176,9 +213,10 @@ prefix_list = [
'session',
'block-manager',
'thread-yield',
+ 'cache_walk',
'async',
'btree',
'thread-state',
'compression',
]
-groups = {'cursor': ['cursor', 'session'], 'lsm': ['LSM', 'transaction'], 'system': ['connection', 'data-handle', 'session', 'thread-state'], 'evict': ['block-manager', 'cache', 'connection', 'thread-state'], 'memory': ['cache', 'connection', 'reconciliation']} \ No newline at end of file
+groups = {'cursor': ['cursor', 'session'], 'lsm': ['LSM', 'transaction'], 'system': ['connection', 'data-handle', 'session', 'thread-state'], 'evict': ['block-manager', 'cache', 'cache_walk', 'connection', 'thread-state'], 'memory': ['cache', 'cache_walk', 'connection', 'reconciliation']} \ No newline at end of file
diff --git a/src/third_party/wiredtiger/tools/wtstats/wtstats.py b/src/third_party/wiredtiger/tools/wtstats/wtstats.py
index 3549031c30f..bf5557d12f4 100755
--- a/src/third_party/wiredtiger/tools/wtstats/wtstats.py
+++ b/src/third_party/wiredtiger/tools/wtstats/wtstats.py
@@ -115,6 +115,9 @@ def parse_wtstats_file(file, result):
# Parse file
for line in open(file, 'rU'):
month, day, time, v, title = line.strip('\n').split(" ", 4)
+ # The colon in the URI confuses parsing, strip it out.
+ if "cache_walk" in title:
+ title = title.replace("file:", "", 1)
result[title].append((month + " " + day + " " + time, v))