summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bench/wtperf/runners/500m-btree-50r50u.wtperf3
-rw-r--r--bench/wtperf/runners/500m-btree-80r20u.wtperf3
-rw-r--r--bench/wtperf/runners/checkpoint-stress.wtperf1
-rw-r--r--bench/wtperf/runners/evict-btree-stress-multi.wtperf1
-rw-r--r--bench/wtperf/wtperf.c8
-rw-r--r--bench/wtperf/wtperf.h3
-rw-r--r--bench/wtperf/wtperf_opt.i5
-rw-r--r--build_posix/Make.base3
-rw-r--r--build_posix/Make.subdirs1
-rw-r--r--build_posix/aclocal/options.m434
-rw-r--r--build_win/wiredtiger_config.h6
-rw-r--r--dist/api_data.py15
-rw-r--r--dist/filelist1
-rw-r--r--dist/flags.py10
-rw-r--r--dist/s_define.list6
-rwxr-xr-xdist/s_export2
-rwxr-xr-xdist/s_stat19
-rw-r--r--dist/s_string.ok23
-rw-r--r--dist/s_void9
-rw-r--r--dist/stat.py7
-rw-r--r--dist/stat_data.py288
-rw-r--r--examples/c/ex_all.c33
-rw-r--r--examples/java/com/wiredtiger/examples/ex_all.java29
-rw-r--r--ext/compressors/lz4/lz4_compress.c41
-rw-r--r--ext/compressors/snappy/snappy_compress.c119
-rw-r--r--ext/compressors/zlib/zlib_compress.c291
-rw-r--r--ext/compressors/zstd/Makefile.am11
-rw-r--r--ext/compressors/zstd/zstd_compress.c358
-rw-r--r--src/block/block_ckpt.c28
-rw-r--r--src/btree/bt_io.c11
-rw-r--r--src/btree/bt_read.c33
-rw-r--r--src/btree/bt_split.c15
-rw-r--r--src/btree/bt_stat.c29
-rw-r--r--src/btree/bt_sync.c80
-rw-r--r--src/cache/cache_las.c2
-rw-r--r--src/config/config_def.c18
-rw-r--r--src/conn/conn_api.c114
-rw-r--r--src/conn/conn_cache.c19
-rw-r--r--src/conn/conn_handle.c18
-rw-r--r--src/conn/conn_stat.c10
-rw-r--r--src/cursor/cur_file.c10
-rw-r--r--src/cursor/cur_stat.c77
-rw-r--r--src/cursor/cur_table.c39
-rw-r--r--src/docs/build-posix.dox6
-rw-r--r--src/docs/compression.dox44
-rw-r--r--src/docs/spell.ok2
-rw-r--r--src/docs/wtperf.dox4
-rw-r--r--src/evict/evict_lru.c19
-rw-r--r--src/evict/evict_page.c13
-rw-r--r--src/evict/evict_stat.c138
-rw-r--r--src/include/btmem.h4
-rw-r--r--src/include/btree.i99
-rw-r--r--src/include/cache.h1
-rw-r--r--src/include/connection.h8
-rw-r--r--src/include/cursor.h2
-rw-r--r--src/include/extern.h4
-rw-r--r--src/include/flags.h8
-rw-r--r--src/include/lsm.h24
-rw-r--r--src/include/mutex.h22
-rw-r--r--src/include/mutex.i45
-rw-r--r--src/include/schema.h8
-rw-r--r--src/include/session.h3
-rw-r--r--src/include/stat.h107
-rw-r--r--src/include/wiredtiger.in649
-rw-r--r--src/include/wt_internal.h2
-rw-r--r--src/lsm/lsm_cursor.c266
-rw-r--r--src/lsm/lsm_cursor_bulk.c16
-rw-r--r--src/lsm/lsm_stat.c22
-rw-r--r--src/os_posix/os_time.c9
-rw-r--r--src/schema/schema_stat.c2
-rw-r--r--src/session/session_api.c15
-rw-r--r--src/support/stat.c160
-rw-r--r--src/txn/txn.c12
-rw-r--r--test/bloom/test_bloom.c2
-rw-r--r--test/checkpoint/test_checkpoint.c2
-rw-r--r--test/csuite/Makefile.am3
-rw-r--r--test/csuite/wt1965_col_efficiency/main.c2
-rw-r--r--test/csuite/wt2246_col_append/main.c2
-rw-r--r--test/csuite/wt2323_join_visibility/main.c2
-rw-r--r--test/csuite/wt2403_lsm_workload/main.c241
-rw-r--r--test/csuite/wt2447_join_main_table/main.c2
-rw-r--r--test/csuite/wt2535_insert_race/main.c2
-rw-r--r--test/csuite/wt2592_join_schema/main.c8
-rw-r--r--test/csuite/wt2695_checksum/main.c2
-rw-r--r--test/csuite/wt2719_reconfig/main.c4
-rw-r--r--test/csuite/wt2834_join_bloom_fix/main.c11
-rw-r--r--test/csuite/wt2853_perf/main.c2
-rw-r--r--test/cursor_order/cursor_order.c2
-rw-r--r--test/fops/t.c2
-rw-r--r--test/format/config.c34
-rw-r--r--test/format/config.h2
-rw-r--r--test/format/format.h3
-rw-r--r--test/format/t.c4
-rw-r--r--test/format/wts.c5
-rw-r--r--test/huge/huge.c2
-rw-r--r--test/manydbs/manydbs.c2
-rw-r--r--test/packing/intpack-test.c2
-rw-r--r--test/packing/intpack-test2.c2
-rw-r--r--test/packing/intpack-test3.c2
-rw-r--r--test/packing/packing-test.c2
-rw-r--r--test/readonly/readonly.c2
-rw-r--r--test/recovery/random-abort.c2
-rw-r--r--test/recovery/truncated-log.c2
-rw-r--r--test/salvage/salvage.c2
-rw-r--r--test/suite/test_bug017.py46
-rw-r--r--test/suite/test_cursor_random02.py25
-rw-r--r--test/suite/test_encrypt01.py1
-rw-r--r--test/suite/test_stat02.py70
-rw-r--r--test/thread/t.c2
-rw-r--r--test/utility/misc.c4
-rw-r--r--test/utility/parse_opts.c18
-rw-r--r--test/utility/test_util.h12
-rw-r--r--tools/wtstats/stat_data.py40
-rwxr-xr-xtools/wtstats/wtstats.py3
114 files changed, 3155 insertions, 970 deletions
diff --git a/bench/wtperf/runners/500m-btree-50r50u.wtperf b/bench/wtperf/runners/500m-btree-50r50u.wtperf
index 06745bf7cca..536127f0dd8 100644
--- a/bench/wtperf/runners/500m-btree-50r50u.wtperf
+++ b/bench/wtperf/runners/500m-btree-50r50u.wtperf
@@ -10,6 +10,9 @@ create=false
compression="snappy"
sess_config="isolation=snapshot"
table_count=2
+# close_conn as false allows this test to close/finish faster, but if running
+# as the set, the next test will need to run recovery.
+close_conn=false
key_sz=40
value_sz=120
max_latency=2000
diff --git a/bench/wtperf/runners/500m-btree-80r20u.wtperf b/bench/wtperf/runners/500m-btree-80r20u.wtperf
index 77edbfb4941..d6218c44af0 100644
--- a/bench/wtperf/runners/500m-btree-80r20u.wtperf
+++ b/bench/wtperf/runners/500m-btree-80r20u.wtperf
@@ -8,6 +8,9 @@
conn_config="cache_size=16G,checkpoint=(wait=60,log_size=2GB),session_max=20000,log=(enabled),statistics=(fast),statistics_log=(wait=30,json),eviction=(threads_max=4)"
create=false
compression="snappy"
+# close_conn as false allows this test to close/finish faster, but if running
+# as the set, the next test will need to run recovery.
+close_conn=false
sess_config="isolation=snapshot
table_count=2
key_sz=40
diff --git a/bench/wtperf/runners/checkpoint-stress.wtperf b/bench/wtperf/runners/checkpoint-stress.wtperf
index 0c98a0c2db0..bbd3a3ba5ed 100644
--- a/bench/wtperf/runners/checkpoint-stress.wtperf
+++ b/bench/wtperf/runners/checkpoint-stress.wtperf
@@ -4,6 +4,7 @@ conn_config="cache_size=16GB,eviction=(threads_max=4),log=(enabled=false)"
table_config="leaf_page_max=32k,internal_page_max=16k,allocation_size=4k,split_pct=90,type=file"
# Enough data to fill the cache. 150 million 1k records results in two ~11GB
# tables
+close_conn=false
icount=150000000
create=true
compression="snappy"
diff --git a/bench/wtperf/runners/evict-btree-stress-multi.wtperf b/bench/wtperf/runners/evict-btree-stress-multi.wtperf
index 9699b9ae3bb..a5a29f66fa0 100644
--- a/bench/wtperf/runners/evict-btree-stress-multi.wtperf
+++ b/bench/wtperf/runners/evict-btree-stress-multi.wtperf
@@ -1,6 +1,7 @@
conn_config="cache_size=1G,eviction=(threads_max=4),session_max=2000"
table_config="type=file"
table_count=100
+close_conn=false
icount=100000000
report_interval=5
run_time=600
diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c
index a7618b19707..8c7f0053388 100644
--- a/bench/wtperf/wtperf.c
+++ b/bench/wtperf/wtperf.c
@@ -2078,6 +2078,11 @@ config_compress(WTPERF *wtperf)
wtperf->compress_ext = ZLIB_EXT;
#endif
wtperf->compress_table = ZLIB_BLK;
+ } else if (strcmp(s, "zstd") == 0) {
+#ifndef HAVE_BUILTIN_EXTENSION_ZSTD
+ wtperf->compress_ext = ZSTD_EXT;
+#endif
+ wtperf->compress_table = ZSTD_BLK;
} else {
fprintf(stderr,
"invalid compression configuration: %s\n", s);
@@ -2300,7 +2305,7 @@ err: if (ret == 0)
ret = t_ret;
}
- if (wtperf->conn != NULL &&
+ if (wtperf->conn != NULL && opts->close_conn &&
(t_ret = wtperf->conn->close(wtperf->conn, NULL)) != 0) {
lprintf(wtperf, t_ret, 0,
"Error closing connection to %s", wtperf->home);
@@ -2329,7 +2334,6 @@ err: if (ret == 0)
extern int __wt_optind, __wt_optreset;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
/*
* usage --
diff --git a/bench/wtperf/wtperf.h b/bench/wtperf/wtperf.h
index afce017d919..81d74e134f6 100644
--- a/bench/wtperf/wtperf.h
+++ b/bench/wtperf/wtperf.h
@@ -54,6 +54,9 @@ typedef struct __truncate_queue_entry TRUNCATE_QUEUE_ENTRY;
#define ZLIB_BLK BLKCMP_PFX "zlib"
#define ZLIB_EXT \
EXT_PFX EXTPATH "zlib/.libs/libwiredtiger_zlib.so" EXT_SFX
+#define ZSTD_BLK BLKCMP_PFX "zstd"
+#define ZSTD_EXT \
+ EXT_PFX EXTPATH "zstd/.libs/libwiredtiger_zstd.so" EXT_SFX
typedef struct {
int64_t threads; /* Thread count */
diff --git a/bench/wtperf/wtperf_opt.i b/bench/wtperf/wtperf_opt.i
index 5a632f26faa..680eb53a90e 100644
--- a/bench/wtperf/wtperf_opt.i
+++ b/bench/wtperf/wtperf_opt.i
@@ -94,10 +94,13 @@ DEF_OPT_AS_UINT32(checkpoint_stress_rate, 0,
DEF_OPT_AS_UINT32(checkpoint_threads, 0, "number of checkpoint threads")
DEF_OPT_AS_CONFIG_STRING(conn_config, "create",
"connection configuration string")
+DEF_OPT_AS_BOOL(close_conn, 1, "properly close connection at end of test. "
+ "Setting to false does not sync data to disk and can result in lost "
+ "data after test exits.")
DEF_OPT_AS_BOOL(compact, 0, "post-populate compact for LSM merging activity")
DEF_OPT_AS_STRING(compression, "none",
"compression extension. Allowed configuration values are: "
- "'none', 'lz4', 'snappy', 'zlib'")
+ "'none', 'lz4', 'snappy', 'zlib', 'zstd'")
DEF_OPT_AS_BOOL(create, 1,
"do population phase; false to use existing database")
DEF_OPT_AS_UINT32(database_count, 1,
diff --git a/build_posix/Make.base b/build_posix/Make.base
index 4efbe3f76c3..5b945aca5e0 100644
--- a/build_posix/Make.base
+++ b/build_posix/Make.base
@@ -77,6 +77,9 @@ endif
if HAVE_BUILTIN_EXTENSION_ZLIB
libwiredtiger_la_LIBADD += ext/compressors/zlib/libwiredtiger_zlib.la
endif
+if HAVE_BUILTIN_EXTENSION_ZSTD
+libwiredtiger_la_LIBADD += ext/compressors/zstd/libwiredtiger_zstd.la
+endif
libwiredtiger_static_la_LIBADD=$(libwiredtiger_la_LIBADD)
libwiredtiger_static_la_SOURCES=$(libwiredtiger_la_SOURCES)
diff --git a/build_posix/Make.subdirs b/build_posix/Make.subdirs
index 0b5175e4196..55941837249 100644
--- a/build_posix/Make.subdirs
+++ b/build_posix/Make.subdirs
@@ -11,6 +11,7 @@ ext/compressors/lz4 LZ4
ext/compressors/nop
ext/compressors/snappy SNAPPY
ext/compressors/zlib ZLIB
+ext/compressors/zstd ZSTD
ext/datasources/helium HAVE_HELIUM
ext/encryptors/nop
ext/encryptors/rotn
diff --git a/build_posix/aclocal/options.m4 b/build_posix/aclocal/options.m4
index 1f6a1690279..7043430a6d6 100644
--- a/build_posix/aclocal/options.m4
+++ b/build_posix/aclocal/options.m4
@@ -19,10 +19,12 @@ AH_TEMPLATE(HAVE_BUILTIN_EXTENSION_SNAPPY,
[Snappy support automatically loaded.])
AH_TEMPLATE(HAVE_BUILTIN_EXTENSION_ZLIB,
[Zlib support automatically loaded.])
+AH_TEMPLATE(HAVE_BUILTIN_EXTENSION_ZSTD,
+ [ZSTD support automatically loaded.])
AC_MSG_CHECKING(if --with-builtins option specified)
AC_ARG_WITH(builtins,
[AS_HELP_STRING([--with-builtins],
- [builtin extension names (lz4, snappy, zlib).])],
+ [builtin extension names (lz4, snappy, zlib, zstd).])],
[with_builtins=$withval],
[with_builtins=])
@@ -36,6 +38,8 @@ for builtin_i in $builtin_list; do
wt_cv_with_builtin_extension_snappy=yes;;
zlib) AC_DEFINE(HAVE_BUILTIN_EXTENSION_ZLIB)
wt_cv_with_builtin_extension_zlib=yes;;
+ zstd) AC_DEFINE(HAVE_BUILTIN_EXTENSION_ZSTD)
+ wt_cv_with_builtin_extension_zstd=yes;;
*) AC_MSG_ERROR([Unknown builtin extension "$builtin_i"]);;
esac
done
@@ -45,6 +49,8 @@ AM_CONDITIONAL([HAVE_BUILTIN_EXTENSION_SNAPPY],
[test "$wt_cv_with_builtin_extension_snappy" = "yes"])
AM_CONDITIONAL([HAVE_BUILTIN_EXTENSION_ZLIB],
[test "$wt_cv_with_builtin_extension_zlib" = "yes"])
+AM_CONDITIONAL([HAVE_BUILTIN_EXTENSION_ZSTD],
+ [test "$wt_cv_with_builtin_extension_zstd" = "yes"])
AC_MSG_RESULT($with_builtins)
AH_TEMPLATE(
@@ -276,4 +282,30 @@ if test "$wt_cv_enable_zlib" = "yes"; then
fi
AM_CONDITIONAL([ZLIB], [test "$wt_cv_enable_zlib" = "yes"])
+AC_MSG_CHECKING(if --enable-zstd option specified)
+AC_ARG_ENABLE(zstd,
+ [AS_HELP_STRING([--enable-zstd],
+ [Build the zstd compressor extension.])], r=$enableval, r=no)
+case "$r" in
+no) if test "$wt_cv_with_builtin_extension_zstd" = "yes"; then
+ wt_cv_enable_zstd=yes
+ else
+ wt_cv_enable_zstd=no
+ fi
+ ;;
+*) if test "$wt_cv_with_builtin_extension_zstd" = "yes"; then
+ AC_MSG_ERROR(
+ [Only one of --enable-zstd --with-builtins=zstd allowed])
+ fi
+ wt_cv_enable_zstd=yes;;
+esac
+AC_MSG_RESULT($wt_cv_enable_zstd)
+if test "$wt_cv_enable_zstd" = "yes"; then
+ AC_CHECK_HEADER(zstd.h,,
+ [AC_MSG_ERROR([--enable-zstd requires zstd.h])])
+ AC_CHECK_LIB(zstd, ZSTD_compress,,
+ [AC_MSG_ERROR([--enable-zstd requires Zstd library])])
+fi
+AM_CONDITIONAL([ZSTD], [test "$wt_cv_enable_zstd" = "yes"])
+
])
diff --git a/build_win/wiredtiger_config.h b/build_win/wiredtiger_config.h
index 83ddc6eb194..78d2784cb70 100644
--- a/build_win/wiredtiger_config.h
+++ b/build_win/wiredtiger_config.h
@@ -19,6 +19,9 @@
/* Zlib support automatically loaded. */
/* #undef HAVE_BUILTIN_EXTENSION_ZLIB */
+/* ZSTD support automatically loaded. */
+/* #undef HAVE_BUILTIN_EXTENSION_ZSTD */
+
/* Define to 1 if you have the `clock_gettime' function. */
/* #undef HAVE_CLOCK_GETTIME */
@@ -70,6 +73,9 @@
/* Define to 1 if you have the `z' library (-lz). */
/* #undef HAVE_LIBZ */
+/* Define to 1 if you have the `zstd' library (-lzstd). */
+/* #undef HAVE_LIBZSTD */
+
/* Define to 1 if you have the <memory.h> header file. */
/* #undef HAVE_MEMORY_H */
diff --git a/dist/api_data.py b/dist/api_data.py
index d02d7e4b985..7affc58a217 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -136,8 +136,8 @@ file_config = format_meta + [
configure a compressor for file blocks. Permitted values are \c "none"
or custom compression engine name created with
WT_CONNECTION::add_compressor. If WiredTiger has builtin support for
- \c "snappy", \c "lz4" or \c "zlib" compression, these names are also
- available. See @ref compression for more information'''),
+ \c "lz4", \c "snappy", \c "zlib" or \c "zstd" compression, these names
+ are also available. See @ref compression for more information'''),
Config('cache_resident', 'false', r'''
do not ever evict the object's pages from cache. Not compatible with
LSM tables; see @ref tuning_cache_resident for more information''',
@@ -502,7 +502,8 @@ connection_runtime_config = [
is used to gather statistics, as well as each time statistics
are logged using the \c statistics_log configuration. See
@ref statistics for more information''',
- type='list', choices=['all', 'fast', 'none', 'clear']),
+ type='list',
+ choices=['all', 'cache_walk', 'fast', 'none', 'clear', 'tree_walk']),
Config('verbose', '', r'''
enable messages for various events. Only available if WiredTiger
is configured with --enable-verbose. Options are given as a
@@ -569,8 +570,9 @@ wiredtiger_open_log_configuration = [
configure a compressor for log records. Permitted values are
\c "none" or custom compression engine name created with
WT_CONNECTION::add_compressor. If WiredTiger has builtin support
- for \c "snappy", \c "lz4" or \c "zlib" compression, these names
- are also available. See @ref compression for more information'''),
+ for \c "lz4", \c "snappy", \c "zlib" or \c "zstd" compression,
+ these names are also available. See @ref compression for more
+ information'''),
Config('file_max', '100MB', r'''
the maximum size of log files''',
min='100KB', max='2GB'),
@@ -976,7 +978,8 @@ methods = {
gathering them, where appropriate (for example, a cache size statistic
is not cleared, while the count of cursor insert operations will be
cleared). See @ref statistics for more information''',
- type='list', choices=['all', 'fast', 'clear', 'size']),
+ type='list',
+ choices=['all', 'cache_walk', 'fast', 'clear', 'size', 'tree_walk']),
Config('target', '', r'''
if non-empty, backup the list of objects; valid only for a
backup data source''',
diff --git a/dist/filelist b/dist/filelist
index 32e4231c5f2..fe9a17b7799 100644
--- a/dist/filelist
+++ b/dist/filelist
@@ -90,6 +90,7 @@ src/cursor/cur_table.c
src/evict/evict_file.c
src/evict/evict_lru.c
src/evict/evict_page.c
+src/evict/evict_stat.c
src/log/log.c
src/log/log_auto.c
src/log/log_slot.c
diff --git a/dist/flags.py b/dist/flags.py
index 93b6e0cbbf4..e200f95fba6 100644
--- a/dist/flags.py
+++ b/dist/flags.py
@@ -133,6 +133,16 @@ flags = {
'SESSION_QUIET_CORRUPT_FILE',
'SESSION_SERVER_ASYNC',
],
+ 'stat' : [
+ 'STAT_CLEAR',
+ 'STAT_JSON',
+ 'STAT_ON_CLOSE',
+ 'STAT_TYPE_ALL',
+ 'STAT_TYPE_CACHE_WALK',
+ 'STAT_TYPE_FAST',
+ 'STAT_TYPE_SIZE',
+ 'STAT_TYPE_TREE_WALK',
+ ],
}
flag_cnt = {} # Dictionary [flag] : [reference count]
diff --git a/dist/s_define.list b/dist/s_define.list
index 6a1a32004ea..7b11d665de5 100644
--- a/dist/s_define.list
+++ b/dist/s_define.list
@@ -39,14 +39,18 @@ WT_READ_BARRIER
WT_REF_SIZE
WT_SESSION_LOCKED_CHECKPOINT
WT_SESSION_LOCKED_TURTLE
-WT_STATS_FIELD_TO_SLOT
+WT_STATS_FIELD_TO_OFFSET
WT_STATS_SLOT_ID
WT_STAT_CONN_DECRV
WT_STAT_DATA_DECRV
WT_STAT_DECR
WT_STAT_DECRV
WT_STAT_DECRV_ATOMIC
+WT_STAT_DECRV_ATOMIC_BASE
+WT_STAT_DECRV_BASE
WT_STAT_INCRV_ATOMIC
+WT_STAT_INCRV_ATOMIC_BASE
+WT_STAT_INCRV_BASE
WT_STAT_WRITE
WT_TIMEDIFF_US
WT_TRET_ERROR_OK
diff --git a/dist/s_export b/dist/s_export
index dc69238b270..b8e42c970f9 100755
--- a/dist/s_export
+++ b/dist/s_export
@@ -26,7 +26,7 @@ check()
sort |
uniq -u |
egrep -v \
- 'zlib_extension_init|lz4_extension_init|snappy_extension_init' > $t
+ 'lz4_extension_init|snappy_extension_init|zlib_extension_init|zstd_extension_init' > $t
test -s $t && {
echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
diff --git a/dist/s_stat b/dist/s_stat
index 935c7e1fb43..5d5937e1833 100755
--- a/dist/s_stat
+++ b/dist/s_stat
@@ -20,6 +20,25 @@ search=`sed \
-e d ../src/include/stat.h |
sort`
+# There are some fields that are used, but we can't detect it.
+cat << UNUSED_STAT_FIELDS
+lock_checkpoint_count
+lock_checkpoint_wait_application
+lock_checkpoint_wait_internal
+lock_handle_list_count
+lock_handle_list_wait_application
+lock_handle_list_wait_internal
+lock_metadata_count
+lock_metadata_wait_application
+lock_metadata_wait_internal
+lock_schema_count
+lock_schema_wait_application
+lock_schema_wait_internal
+lock_table_count
+lock_table_wait_application
+lock_table_wait_internal
+UNUSED_STAT_FIELDS
+
echo "$search"
fgrep -who "$search" $l) | sort | uniq -u > $t
diff --git a/dist/s_string.ok b/dist/s_string.ok
index 0e99c6b9cec..7cf96aec399 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -60,6 +60,7 @@ COVERITY
CPUs
CRC
CSV
+CStream
CURSORs
CURSTD
CallsCustDate
@@ -69,6 +70,7 @@ Checksum
Checksums
CityHash
CloseHandle
+Collet
Comparator
Config
Coverity
@@ -125,6 +127,7 @@ FORALL
FOREACH
FS
FULLFSYNC
+Facebook
FindClose
FindFirstFile
Fixup
@@ -166,6 +169,7 @@ INSN
INTL
ISA
ITEMs
+ITER
InitializeCriticalSectionAndSpinCount
Inline
Intra
@@ -397,6 +401,12 @@ WriteFile
Wuninitialized
Wunused
XP
+Yann
+ZSTD
+Zlib
+Zlib's
+Zstd
+Zstd's
abcdef
abcdefghijklmnopqrstuvwxyz
addl
@@ -418,6 +428,7 @@ argc
args
argv
asm
+assertfmt
async
asyncopp
asyncops
@@ -513,6 +524,7 @@ collatorp
comparator
comparep
compat
+compressStream
concat
cond
conf
@@ -532,6 +544,7 @@ cp
cpuid
crc
create's
+createCStream
crypto
cryptobad
csv
@@ -624,6 +637,7 @@ emp
encodings
encryptor
encryptors
+endStream
endian
english
enqueue
@@ -751,6 +765,7 @@ infeasible
inflateInit
infmt
init
+initCStream
initializers
initn
initsize
@@ -786,6 +801,7 @@ isupper
isxdigit
iter
iteratively
+iters
jnr
jrx
json
@@ -851,6 +867,7 @@ majorp
malloc
marshall
marshalled
+maxCLevel
maxcpu
maxdbs
mbll
@@ -991,12 +1008,14 @@ qdown
qrrSS
qsort
quartile
+queueable
qup
rN
rS
rb
rbrace
rbracket
+rcursor
rdonly
rduppo
readlock
@@ -1203,6 +1222,7 @@ waitpid
walk's
warmup
wb
+wcursor
wiredTiger
wiredtiger
workFactor
@@ -1227,4 +1247,7 @@ zalloc
zf
zfree
zlib
+zlib's
+zstd
+zstd's
zu
diff --git a/dist/s_void b/dist/s_void
index f7bfbcc7e8e..e5e9f97c0b7 100644
--- a/dist/s_void
+++ b/dist/s_void
@@ -96,10 +96,13 @@ func_ok()
-e '/int wiredtiger_extension_init$/d' \
-e '/int wiredtiger_extension_terminate$/d' \
-e '/int wiredtiger_pack_close$/d' \
- -e '/int wt_snappy_pre_size$/d' \
- -e '/int wt_snappy_terminate$/d' \
+ -e '/int snappy_pre_size$/d' \
+ -e '/int snappy_terminate$/d' \
-e '/int zlib_error$/d' \
- -e '/int zlib_terminate$/d'
+ -e '/int zlib_terminate$/d' \
+ -e '/int zstd_error$/d' \
+ -e '/int zstd_pre_size$/d' \
+ -e '/int zstd_terminate$/d'
}
# Complain about functions which return an "int" but which don't return except
diff --git a/dist/stat.py b/dist/stat.py
index c3c85bbe9b4..e42585c1b8c 100644
--- a/dist/stat.py
+++ b/dist/stat.py
@@ -42,8 +42,11 @@ compare_srcfile(tmp_file, '../src/include/stat.h')
def print_defines_one(capname, base, stats):
for v, l in enumerate(stats, base):
desc = l.desc
- if 'all_only' in l.flags:
- desc += ', only reported if statistics=all is set'
+ if 'cache_walk' in l.flags:
+ desc += \
+ ', only reported if cache_walk or all statistics are enabled'
+ if 'tree_walk' in l.flags:
+ desc += ', only reported if tree_walk or all statistics are enabled'
if len(textwrap.wrap(desc, 70)) > 1:
f.write('/*!\n')
f.write(' * %s\n' % '\n * '.join(textwrap.wrap(desc, 70)))
diff --git a/dist/stat_data.py b/dist/stat_data.py
index 8d1011b1bb3..bcf5201bd90 100644
--- a/dist/stat_data.py
+++ b/dist/stat_data.py
@@ -9,7 +9,8 @@
#
# Data-source statistics are normally aggregated across the set of underlying
# objects. Additional optional configuration flags are available:
-# all_only Only gets reported when statistics=all set
+# cache_walk Only reported when statistics=cache_walk is set
+# tree_walk Only reported when statistics=tree_walk is set
# max_aggregate Take the maximum value when aggregating statistics
# no_clear Value not cleared when statistics cleared
# no_scale Don't scale value per second in the logging tool script
@@ -46,6 +47,11 @@ class CacheStat(Stat):
prefix = 'cache'
def __init__(self, name, desc, flags=''):
Stat.__init__(self, name, CacheStat.prefix, desc, flags)
+class CacheWalkStat(Stat):
+ prefix = 'cache_walk'
+ def __init__(self, name, desc, flags=''):
+ flags += ',cache_walk'
+ Stat.__init__(self, name, CacheWalkStat.prefix, desc, flags)
class CompressStat(Stat):
prefix = 'compression'
def __init__(self, name, desc, flags=''):
@@ -66,6 +72,10 @@ class JoinStat(Stat):
prefix = '' # prefix is inserted dynamically
def __init__(self, name, desc, flags=''):
Stat.__init__(self, name, JoinStat.prefix, desc, flags)
+class LockStat(Stat):
+ prefix = 'lock'
+ def __init__(self, name, desc, flags=''):
+ Stat.__init__(self, name, LockStat.prefix, desc, flags)
class LogStat(Stat):
prefix = 'log'
def __init__(self, name, desc, flags=''):
@@ -105,11 +115,16 @@ groups['cursor'] = [CursorStat.prefix, SessionStat.prefix]
groups['evict'] = [
BlockStat.prefix,
CacheStat.prefix,
+ CacheWalkStat.prefix,
ConnStat.prefix,
ThreadStat.prefix
]
groups['lsm'] = [LSMStat.prefix, TxnStat.prefix]
-groups['memory'] = [CacheStat.prefix, ConnStat.prefix, RecStat.prefix]
+groups['memory'] = [
+ CacheStat.prefix,
+ CacheWalkStat.prefix,
+ ConnStat.prefix,
+ RecStat.prefix]
groups['system'] = [
ConnStat.prefix,
DhandleStat.prefix,
@@ -226,13 +241,32 @@ connection_stats = [
CacheStat('cache_pages_inuse', 'pages currently held in the cache', 'no_clear,no_scale'),
CacheStat('cache_pages_requested', 'pages requested from the cache'),
CacheStat('cache_read', 'pages read into cache'),
+ CacheStat('cache_read_app_count', 'application threads page read from disk to cache count'),
+ CacheStat('cache_read_app_time', 'application threads page read from disk to cache time (usecs)'),
CacheStat('cache_read_lookaside', 'pages read into cache requiring lookaside entries'),
CacheStat('cache_read_overflow', 'overflow pages read into cache'),
CacheStat('cache_write', 'pages written from cache'),
+ CacheStat('cache_write_app_count', 'application threads page write from cache to disk count'),
+ CacheStat('cache_write_app_time', 'application threads page write from cache to disk time (usecs)'),
CacheStat('cache_write_lookaside', 'page written requiring lookaside records'),
CacheStat('cache_write_restore', 'pages written requiring in-memory restoration'),
##########################################
+ # Cursor operations
+ ##########################################
+ CursorStat('cursor_create', 'cursor create calls'),
+ CursorStat('cursor_insert', 'cursor insert calls'),
+ CursorStat('cursor_next', 'cursor next calls'),
+ CursorStat('cursor_prev', 'cursor prev calls'),
+ CursorStat('cursor_remove', 'cursor remove calls'),
+ CursorStat('cursor_reset', 'cursor reset calls'),
+ CursorStat('cursor_restart', 'cursor restarted searches'),
+ CursorStat('cursor_search', 'cursor search calls'),
+ CursorStat('cursor_search_near', 'cursor search near calls'),
+ CursorStat('cursor_truncate', 'truncate calls'),
+ CursorStat('cursor_update', 'cursor update calls'),
+
+ ##########################################
# Dhandle statistics
##########################################
DhandleStat('dh_conn_handle_count', 'connection data handles currently active', 'no_clear,no_scale'),
@@ -245,6 +279,25 @@ connection_stats = [
DhandleStat('dh_sweeps', 'connection sweeps'),
##########################################
+ # Locking statistics
+ ##########################################
+ LockStat('lock_checkpoint_count', 'checkpoint lock acquisitions'),
+ LockStat('lock_checkpoint_wait_application', 'checkpoint lock application thread wait time (usecs)'),
+ LockStat('lock_checkpoint_wait_internal', 'checkpoint lock internal thread wait time (usecs)'),
+ LockStat('lock_handle_list_count', 'handle-list lock acquisitions'),
+ LockStat('lock_handle_list_wait_application', 'handle-list lock application thread wait time (usecs)'),
+ LockStat('lock_handle_list_wait_internal', 'handle-list lock internal thread wait time (usecs)'),
+ LockStat('lock_metadata_count', 'metadata lock acquisitions'),
+ LockStat('lock_metadata_wait_application', 'metadata lock application thread wait time (usecs)'),
+ LockStat('lock_metadata_wait_internal', 'metadata lock internal thread wait time (usecs)'),
+ LockStat('lock_schema_count', 'schema lock acquisitions'),
+ LockStat('lock_schema_wait_application', 'schema lock application thread wait time (usecs)'),
+ LockStat('lock_schema_wait_internal', 'schema lock internal thread wait time (usecs)'),
+ LockStat('lock_table_count', 'table lock acquisitions'),
+ LockStat('lock_table_wait_application', 'table lock application thread time waiting for the table lock (usecs)'),
+ LockStat('lock_table_wait_internal', 'table lock internal thread time waiting for the table lock (usecs)'),
+
+ ##########################################
# Logging statistics
##########################################
LogStat('log_buffer_size', 'total log buffer size', 'no_clear,no_scale,size'),
@@ -286,42 +339,6 @@ connection_stats = [
LogStat('log_zero_fills', 'log files manually zero-filled'),
##########################################
- # Reconciliation statistics
- ##########################################
- RecStat('rec_page_delete', 'pages deleted'),
- RecStat('rec_page_delete_fast', 'fast-path pages deleted'),
- RecStat('rec_pages', 'page reconciliation calls'),
- RecStat('rec_pages_eviction', 'page reconciliation calls for eviction'),
- RecStat('rec_split_stashed_bytes', 'split bytes currently awaiting free', 'no_clear,no_scale,size'),
- RecStat('rec_split_stashed_objects', 'split objects currently awaiting free', 'no_clear,no_scale'),
-
- ##########################################
- # Transaction statistics
- ##########################################
- TxnStat('txn_begin', 'transaction begins'),
- TxnStat('txn_checkpoint', 'transaction checkpoints'),
- TxnStat('txn_checkpoint_fsync_post', 'transaction fsync calls for checkpoint after allocating the transaction ID'),
- TxnStat('txn_checkpoint_fsync_post_duration', 'transaction fsync duration for checkpoint after allocating the transaction ID (usecs)', 'no_clear,no_scale'),
- TxnStat('txn_checkpoint_generation', 'transaction checkpoint generation', 'no_clear,no_scale'),
- TxnStat('txn_checkpoint_running', 'transaction checkpoint currently running', 'no_clear,no_scale'),
- TxnStat('txn_checkpoint_scrub_target', 'transaction checkpoint scrub dirty target', 'no_clear,no_scale'),
- TxnStat('txn_checkpoint_scrub_time', 'transaction checkpoint scrub time (msecs)', 'no_clear,no_scale'),
- TxnStat('txn_checkpoint_skipped', 'transaction checkpoints skipped because database was clean'),
- TxnStat('txn_checkpoint_time_max', 'transaction checkpoint max time (msecs)', 'no_clear,no_scale'),
- TxnStat('txn_checkpoint_time_min', 'transaction checkpoint min time (msecs)', 'no_clear,no_scale'),
- TxnStat('txn_checkpoint_time_recent', 'transaction checkpoint most recent time (msecs)', 'no_clear,no_scale'),
- TxnStat('txn_checkpoint_time_total', 'transaction checkpoint total time (msecs)', 'no_clear,no_scale'),
- TxnStat('txn_commit', 'transactions committed'),
- TxnStat('txn_fail_cache', 'transaction failures due to cache overflow'),
- TxnStat('txn_pinned_checkpoint_range', 'transaction range of IDs currently pinned by a checkpoint', 'no_clear,no_scale'),
- TxnStat('txn_pinned_range', 'transaction range of IDs currently pinned', 'no_clear,no_scale'),
- TxnStat('txn_pinned_snapshot_range', 'transaction range of IDs currently pinned by named snapshots', 'no_clear,no_scale'),
- TxnStat('txn_rollback', 'transactions rolled back'),
- TxnStat('txn_snapshots_created', 'number of named snapshots created'),
- TxnStat('txn_snapshots_dropped', 'number of named snapshots dropped'),
- TxnStat('txn_sync', 'transaction sync calls'),
-
- ##########################################
# LSM statistics
##########################################
LSMStat('lsm_checkpoint_throttle', 'sleep for LSM checkpoint throttle'),
@@ -336,6 +353,16 @@ connection_stats = [
LSMStat('lsm_work_units_done', 'tree maintenance operations executed'),
##########################################
+ # Reconciliation statistics
+ ##########################################
+ RecStat('rec_page_delete', 'pages deleted'),
+ RecStat('rec_page_delete_fast', 'fast-path pages deleted'),
+ RecStat('rec_pages', 'page reconciliation calls'),
+ RecStat('rec_pages_eviction', 'page reconciliation calls for eviction'),
+ RecStat('rec_split_stashed_bytes', 'split bytes currently awaiting free', 'no_clear,no_scale,size'),
+ RecStat('rec_split_stashed_objects', 'split objects currently awaiting free', 'no_clear,no_scale'),
+
+ ##########################################
# Session operations
##########################################
SessionStat('session_cursor_open', 'open cursor count', 'no_clear,no_scale'),
@@ -358,21 +385,6 @@ connection_stats = [
SessionStat('session_table_verify_success', 'table verify successful calls', 'no_clear,no_scale'),
##########################################
- # Total cursor operations
- ##########################################
- CursorStat('cursor_create', 'cursor create calls'),
- CursorStat('cursor_insert', 'cursor insert calls'),
- CursorStat('cursor_next', 'cursor next calls'),
- CursorStat('cursor_prev', 'cursor prev calls'),
- CursorStat('cursor_remove', 'cursor remove calls'),
- CursorStat('cursor_reset', 'cursor reset calls'),
- CursorStat('cursor_restart', 'cursor restarted searches'),
- CursorStat('cursor_search', 'cursor search calls'),
- CursorStat('cursor_search_near', 'cursor search near calls'),
- CursorStat('cursor_truncate', 'truncate calls'),
- CursorStat('cursor_update', 'cursor update calls'),
-
- ##########################################
# Thread Count statistics
##########################################
ThreadStat('thread_fsync_active', 'active filesystem fsync calls','no_clear,no_scale'),
@@ -380,6 +392,32 @@ connection_stats = [
ThreadStat('thread_write_active', 'active filesystem write calls','no_clear,no_scale'),
##########################################
+ # Transaction statistics
+ ##########################################
+ TxnStat('txn_begin', 'transaction begins'),
+ TxnStat('txn_checkpoint', 'transaction checkpoints'),
+ TxnStat('txn_checkpoint_fsync_post', 'transaction fsync calls for checkpoint after allocating the transaction ID'),
+ TxnStat('txn_checkpoint_fsync_post_duration', 'transaction fsync duration for checkpoint after allocating the transaction ID (usecs)', 'no_clear,no_scale'),
+ TxnStat('txn_checkpoint_generation', 'transaction checkpoint generation', 'no_clear,no_scale'),
+ TxnStat('txn_checkpoint_running', 'transaction checkpoint currently running', 'no_clear,no_scale'),
+ TxnStat('txn_checkpoint_scrub_target', 'transaction checkpoint scrub dirty target', 'no_clear,no_scale'),
+ TxnStat('txn_checkpoint_scrub_time', 'transaction checkpoint scrub time (msecs)', 'no_clear,no_scale'),
+ TxnStat('txn_checkpoint_skipped', 'transaction checkpoints skipped because database was clean'),
+ TxnStat('txn_checkpoint_time_max', 'transaction checkpoint max time (msecs)', 'no_clear,no_scale'),
+ TxnStat('txn_checkpoint_time_min', 'transaction checkpoint min time (msecs)', 'no_clear,no_scale'),
+ TxnStat('txn_checkpoint_time_recent', 'transaction checkpoint most recent time (msecs)', 'no_clear,no_scale'),
+ TxnStat('txn_checkpoint_time_total', 'transaction checkpoint total time (msecs)', 'no_clear,no_scale'),
+ TxnStat('txn_commit', 'transactions committed'),
+ TxnStat('txn_fail_cache', 'transaction failures due to cache overflow'),
+ TxnStat('txn_pinned_checkpoint_range', 'transaction range of IDs currently pinned by a checkpoint', 'no_clear,no_scale'),
+ TxnStat('txn_pinned_range', 'transaction range of IDs currently pinned', 'no_clear,no_scale'),
+ TxnStat('txn_pinned_snapshot_range', 'transaction range of IDs currently pinned by named snapshots', 'no_clear,no_scale'),
+ TxnStat('txn_rollback', 'transactions rolled back'),
+ TxnStat('txn_snapshots_created', 'number of named snapshots created'),
+ TxnStat('txn_snapshots_dropped', 'number of named snapshots dropped'),
+ TxnStat('txn_sync', 'transaction sync calls'),
+
+ ##########################################
# Yield statistics
##########################################
YieldStat('application_cache_time', 'application thread time waiting for cache (usecs)'),
@@ -398,41 +436,30 @@ connection_stats = sorted(connection_stats, key=attrgetter('desc'))
##########################################
dsrc_stats = [
##########################################
- # Session operations
- ##########################################
- SessionStat('session_compact', 'object compaction'),
- SessionStat('session_cursor_open', 'open cursor count', 'no_clear,no_scale'),
-
- ##########################################
- # Cursor operations
+ # Block manager statistics
##########################################
- CursorStat('cursor_create', 'create calls'),
- CursorStat('cursor_insert', 'insert calls'),
- CursorStat('cursor_insert_bulk', 'bulk-loaded cursor-insert calls'),
- CursorStat('cursor_insert_bytes', 'cursor-insert key and value bytes inserted', 'size'),
- CursorStat('cursor_next', 'next calls'),
- CursorStat('cursor_prev', 'prev calls'),
- CursorStat('cursor_remove', 'remove calls'),
- CursorStat('cursor_remove_bytes', 'cursor-remove key bytes removed', 'size'),
- CursorStat('cursor_reset', 'reset calls'),
- CursorStat('cursor_restart', 'restarted searches'),
- CursorStat('cursor_search', 'search calls'),
- CursorStat('cursor_search_near', 'search near calls'),
- CursorStat('cursor_truncate', 'truncate calls'),
- CursorStat('cursor_update', 'update calls'),
- CursorStat('cursor_update_bytes', 'cursor-update value bytes updated', 'size'),
+ BlockStat('allocation_size', 'file allocation unit size', 'max_aggregate,no_scale,size'),
+ BlockStat('block_alloc', 'blocks allocated'),
+ BlockStat('block_checkpoint_size', 'checkpoint size', 'no_scale,size'),
+ BlockStat('block_extension', 'allocations requiring file extension'),
+ BlockStat('block_free', 'blocks freed'),
+ BlockStat('block_magic', 'file magic number', 'max_aggregate,no_scale'),
+ BlockStat('block_major', 'file major version number', 'max_aggregate,no_scale'),
+ BlockStat('block_minor', 'minor version number', 'max_aggregate,no_scale'),
+ BlockStat('block_reuse_bytes', 'file bytes available for reuse', 'no_scale,size'),
+ BlockStat('block_size', 'file size in bytes', 'no_scale,size'),
##########################################
# Btree statistics
##########################################
BtreeStat('btree_checkpoint_generation', 'btree checkpoint generation', 'no_clear,no_scale'),
- BtreeStat('btree_column_deleted', 'column-store variable-size deleted values', 'no_scale,all_only'),
- BtreeStat('btree_column_fix', 'column-store fixed-size leaf pages', 'no_scale,all_only'),
- BtreeStat('btree_column_internal', 'column-store internal pages', 'no_scale,all_only'),
- BtreeStat('btree_column_rle', 'column-store variable-size RLE encoded values', 'no_scale,all_only'),
- BtreeStat('btree_column_variable', 'column-store variable-size leaf pages', 'no_scale,all_only'),
+ BtreeStat('btree_column_deleted', 'column-store variable-size deleted values', 'no_scale,tree_walk'),
+ BtreeStat('btree_column_fix', 'column-store fixed-size leaf pages', 'no_scale,tree_walk'),
+ BtreeStat('btree_column_internal', 'column-store internal pages', 'no_scale,tree_walk'),
+ BtreeStat('btree_column_rle', 'column-store variable-size RLE encoded values', 'no_scale,tree_walk'),
+ BtreeStat('btree_column_variable', 'column-store variable-size leaf pages', 'no_scale,tree_walk'),
BtreeStat('btree_compact_rewrite', 'pages rewritten by compaction'),
- BtreeStat('btree_entries', 'number of key/value pairs', 'no_scale,all_only'),
+ BtreeStat('btree_entries', 'number of key/value pairs', 'no_scale,tree_walk'),
BtreeStat('btree_fixed_len', 'fixed-record size', 'max_aggregate,no_scale,size'),
BtreeStat('btree_maximum_depth', 'maximum tree depth', 'max_aggregate,no_scale'),
BtreeStat('btree_maxintlkey', 'maximum internal page key size', 'max_aggregate,no_scale,size'),
@@ -440,39 +467,9 @@ dsrc_stats = [
BtreeStat('btree_maxleafkey', 'maximum leaf page key size', 'max_aggregate,no_scale,size'),
BtreeStat('btree_maxleafpage', 'maximum leaf page size', 'max_aggregate,no_scale,size'),
BtreeStat('btree_maxleafvalue', 'maximum leaf page value size', 'max_aggregate,no_scale,size'),
- BtreeStat('btree_overflow', 'overflow pages', 'no_scale,all_only'),
- BtreeStat('btree_row_internal', 'row-store internal pages', 'no_scale,all_only'),
- BtreeStat('btree_row_leaf', 'row-store leaf pages', 'no_scale,all_only'),
-
- ##########################################
- # LSM statistics
- ##########################################
- LSMStat('bloom_count', 'bloom filters in the LSM tree', 'no_scale'),
- LSMStat('bloom_false_positive', 'bloom filter false positives'),
- LSMStat('bloom_hit', 'bloom filter hits'),
- LSMStat('bloom_miss', 'bloom filter misses'),
- LSMStat('bloom_page_evict', 'bloom filter pages evicted from cache'),
- LSMStat('bloom_page_read', 'bloom filter pages read into cache'),
- LSMStat('bloom_size', 'total size of bloom filters', 'no_scale,size'),
- LSMStat('lsm_checkpoint_throttle', 'sleep for LSM checkpoint throttle'),
- LSMStat('lsm_chunk_count', 'chunks in the LSM tree', 'no_scale'),
- LSMStat('lsm_generation_max', 'highest merge generation in the LSM tree', 'max_aggregate,no_scale'),
- LSMStat('lsm_lookup_no_bloom', 'queries that could have benefited from a Bloom filter that did not exist'),
- LSMStat('lsm_merge_throttle', 'sleep for LSM merge throttle'),
-
- ##########################################
- # Block manager statistics
- ##########################################
- BlockStat('allocation_size', 'file allocation unit size', 'max_aggregate,no_scale,size'),
- BlockStat('block_alloc', 'blocks allocated'),
- BlockStat('block_checkpoint_size', 'checkpoint size', 'no_scale,size'),
- BlockStat('block_extension', 'allocations requiring file extension'),
- BlockStat('block_free', 'blocks freed'),
- BlockStat('block_magic', 'file magic number', 'max_aggregate,no_scale'),
- BlockStat('block_major', 'file major version number', 'max_aggregate,no_scale'),
- BlockStat('block_minor', 'minor version number', 'max_aggregate,no_scale'),
- BlockStat('block_reuse_bytes', 'file bytes available for reuse', 'no_scale,size'),
- BlockStat('block_size', 'file size in bytes', 'no_scale,size'),
+ BtreeStat('btree_overflow', 'overflow pages', 'no_scale,tree_walk'),
+ BtreeStat('btree_row_internal', 'row-store internal pages', 'no_scale,tree_walk'),
+ BtreeStat('btree_row_leaf', 'row-store leaf pages', 'no_scale,tree_walk'),
##########################################
# Cache and eviction statistics
@@ -501,6 +498,28 @@ dsrc_stats = [
CacheStat('cache_write_restore', 'pages written requiring in-memory restoration'),
##########################################
+ # Cache content statistics
+ ##########################################
+ CacheWalkStat('cache_state_avg_written_size', 'Average on-disk page image size seen', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_gen_avg_gap', 'Average difference between current eviction generation when the page was last considered', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_gen_current', 'Current eviction generation', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_gen_max_gap', 'Maximum difference between current eviction generation when the page was last considered', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_max_pagesize', 'Maximum page size seen', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_memory', 'Pages created in memory and never written', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_min_written_size', 'Minimum on-disk page image size seen', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_not_queueable', 'Pages that could not be queued for eviction', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_pages', 'Total number of pages currently in cache', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_pages_clean', 'Clean pages currently in cache', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_pages_dirty', 'Dirty pages currently in cache', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_pages_internal', 'Internal pages currently in cache', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_pages_leaf', 'Leaf pages currently in cache', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_queued', 'Pages currently queued for eviction', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_refs_skipped', 'Refs skipped during cache traversal', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_root_entries', 'Entries in the root page', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_root_size', 'Size of the root page', 'no_clear,no_scale'),
+ CacheWalkStat('cache_state_smaller_alloc_size', 'On-disk page image sizes smaller than a single allocation unit', 'no_clear,no_scale'),
+
+ ##########################################
# Compression statistics
##########################################
CompressStat('compress_raw_fail', 'raw compression call failed, no additional data available'),
@@ -512,6 +531,41 @@ dsrc_stats = [
CompressStat('compress_write_too_small', 'page written was too small to compress'),
##########################################
+ # Cursor operations
+ ##########################################
+ CursorStat('cursor_create', 'create calls'),
+ CursorStat('cursor_insert', 'insert calls'),
+ CursorStat('cursor_insert_bulk', 'bulk-loaded cursor-insert calls'),
+ CursorStat('cursor_insert_bytes', 'cursor-insert key and value bytes inserted', 'size'),
+ CursorStat('cursor_next', 'next calls'),
+ CursorStat('cursor_prev', 'prev calls'),
+ CursorStat('cursor_remove', 'remove calls'),
+ CursorStat('cursor_remove_bytes', 'cursor-remove key bytes removed', 'size'),
+ CursorStat('cursor_reset', 'reset calls'),
+ CursorStat('cursor_restart', 'restarted searches'),
+ CursorStat('cursor_search', 'search calls'),
+ CursorStat('cursor_search_near', 'search near calls'),
+ CursorStat('cursor_truncate', 'truncate calls'),
+ CursorStat('cursor_update', 'update calls'),
+ CursorStat('cursor_update_bytes', 'cursor-update value bytes updated', 'size'),
+
+ ##########################################
+ # LSM statistics
+ ##########################################
+ LSMStat('bloom_count', 'bloom filters in the LSM tree', 'no_scale'),
+ LSMStat('bloom_false_positive', 'bloom filter false positives'),
+ LSMStat('bloom_hit', 'bloom filter hits'),
+ LSMStat('bloom_miss', 'bloom filter misses'),
+ LSMStat('bloom_page_evict', 'bloom filter pages evicted from cache'),
+ LSMStat('bloom_page_read', 'bloom filter pages read into cache'),
+ LSMStat('bloom_size', 'total size of bloom filters', 'no_scale,size'),
+ LSMStat('lsm_checkpoint_throttle', 'sleep for LSM checkpoint throttle'),
+ LSMStat('lsm_chunk_count', 'chunks in the LSM tree', 'no_scale'),
+ LSMStat('lsm_generation_max', 'highest merge generation in the LSM tree', 'max_aggregate,no_scale'),
+ LSMStat('lsm_lookup_no_bloom', 'queries that could have benefited from a Bloom filter that did not exist'),
+ LSMStat('lsm_merge_throttle', 'sleep for LSM merge throttle'),
+
+ ##########################################
# Reconciliation statistics
##########################################
RecStat('rec_dictionary', 'dictionary matches'),
@@ -530,6 +584,12 @@ dsrc_stats = [
RecStat('rec_suffix_compression', 'internal page key bytes discarded using suffix compression', 'size'),
##########################################
+ # Session operations
+ ##########################################
+ SessionStat('session_compact', 'object compaction'),
+ SessionStat('session_cursor_open', 'open cursor count', 'no_clear,no_scale'),
+
+ ##########################################
# Transaction statistics
##########################################
TxnStat('txn_update_conflict', 'update conflicts'),
diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c
index a2042c22bbb..ea646604a76 100644
--- a/examples/c/ex_all.c
+++ b/examples/c/ex_all.c
@@ -611,6 +611,13 @@ session_ops(WT_SESSION *session)
"block_compressor=zlib,key_format=S,value_format=S");
/*! [Create a zlib compressed table] */
ret = session->drop(session, "table:mytable", NULL);
+
+ /*! [Create a zstd compressed table] */
+ ret = session->create(session,
+ "table:mytable",
+ "block_compressor=zstd,key_format=S,value_format=S");
+ /*! [Create a zstd compressed table] */
+ ret = session->drop(session, "table:mytable", NULL);
#endif
/*! [Configure checksums to uncompressed] */
@@ -1108,6 +1115,32 @@ main(void)
if (ret == 0)
(void)conn->close(conn, NULL);
+ /*! [Configure zlib extension with compression level] */
+ ret = wiredtiger_open(home, NULL,
+ "create,"
+ "extensions=[/usr/local/lib/"
+ "libwiredtiger_zlib.so=[config=[compression_level=3]]]", &conn);
+ /*! [Configure zlib extension with compression level] */
+ if (ret == 0)
+ (void)conn->close(conn, NULL);
+
+ /*! [Configure zstd extension] */
+ ret = wiredtiger_open(home, NULL,
+ "create,"
+ "extensions=[/usr/local/lib/libwiredtiger_zstd.so]", &conn);
+ /*! [Configure zstd extension] */
+ if (ret == 0)
+ (void)conn->close(conn, NULL);
+
+ /*! [Configure zstd extension with compression level] */
+ ret = wiredtiger_open(home, NULL,
+ "create,"
+ "extensions=[/usr/local/lib/"
+ "libwiredtiger_zstd.so=[config=[compression_level=9]]]", &conn);
+ /*! [Configure zstd extension with compression level] */
+ if (ret == 0)
+ (void)conn->close(conn, NULL);
+
/*
* This example code gets run, and direct I/O might not be available,
* causing the open to fail. The documentation requires code snippets,
diff --git a/examples/java/com/wiredtiger/examples/ex_all.java b/examples/java/com/wiredtiger/examples/ex_all.java
index 83a37e9a6a5..cf8491aa4f8 100644
--- a/examples/java/com/wiredtiger/examples/ex_all.java
+++ b/examples/java/com/wiredtiger/examples/ex_all.java
@@ -549,6 +549,12 @@ session_ops(Session session)
"block_compressor=zlib,key_format=S,value_format=S");
/*! [Create a zlib compressed table] */
ret = session.drop("table:mytable", null);
+
+ /*! [Create a zstd compressed table] */
+ ret = session.create("table:mytable",
+ "block_compressor=zstd,key_format=S,value_format=S");
+ /*! [Create a zstd compressed table] */
+ ret = session.drop("table:mytable", null);
} // if (false)
/*! [Configure checksums to uncompressed] */
@@ -942,6 +948,29 @@ allExample()
/*! [Configure zlib extension] */
conn.close(null);
+ /*! [Configure zlib extension with compression level] */
+ conn = wiredtiger.open(home,
+ "create," +
+ "extensions=[/usr/local/lib/" +
+ "libwiredtiger_zlib.so=[config=[compression_level=3]]]");
+ /*! [Configure zlib extension with compression level] */
+ conn.close(null);
+
+ /*! [Configure zstd extension] */
+ conn = wiredtiger.open(home,
+ "create," +
+ "extensions=[/usr/local/lib/libwiredtiger_zstd.so]");
+ /*! [Configure zstd extension] */
+ conn.close(null);
+
+ /*! [Configure zstd extension with compression level] */
+ conn = wiredtiger.open(home,
+ "create," +
+ "extensions=[/usr/local/lib/" +
+ "libwiredtiger_zstd.so=[config=[compression_level=9]]]");
+ /*! [Configure zstd extension with compression level] */
+ conn.close(null);
+
/*
* This example code gets run, and direct I/O might not be available,
* causing the open to fail. The documentation requires code snippets,
diff --git a/ext/compressors/lz4/lz4_compress.c b/ext/compressors/lz4/lz4_compress.c
index 35159d0fa76..885701e564b 100644
--- a/ext/compressors/lz4/lz4_compress.c
+++ b/ext/compressors/lz4/lz4_compress.c
@@ -31,10 +31,20 @@
#include <stdlib.h>
#include <string.h>
+/*
+ * We need to include the configuration file to detect whether this extension
+ * is being built into the WiredTiger library; application-loaded compression
+ * functions won't need it.
+ */
#include <wiredtiger_config.h>
+
#include <wiredtiger.h>
#include <wiredtiger_ext.h>
+#ifdef _MSC_VER
+#define inline __inline
+#endif
+
/* Local compressor structure. */
typedef struct {
WT_COMPRESSOR compressor; /* Must come first */
@@ -171,8 +181,6 @@ lz4_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session,
int decoded;
uint8_t *dst_tmp;
- (void)src_len; /* Unused parameters */
-
wt_api = ((LZ4_COMPRESSOR *)compressor)->wt_api;
/*
@@ -183,6 +191,13 @@ lz4_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session,
#ifdef WORDS_BIGENDIAN
lz4_prefix_swap(&prefix);
#endif
+ if (prefix.compressed_len + sizeof(LZ4_PREFIX) > src_len) {
+ (void)wt_api->err_printf(wt_api,
+ session,
+ "WT_COMPRESSOR.decompress: stored size exceeds source "
+ "size");
+ return (WT_ERROR);
+ }
/*
* Decompress, starting after the prefix bytes. Use safe decompression:
@@ -267,18 +282,24 @@ lz4_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session,
size_t *result_lenp, uint32_t *result_slotsp)
{
LZ4_PREFIX prefix;
- int lz4_len;
uint32_t slot;
- int sourceSize, targetDestSize;
+ int lz4_len, sourceSize, targetDestSize;
(void)compressor; /* Unused parameters */
(void)session;
(void)split_pct;
(void)final;
- sourceSize = (int)offsets[slots]; /* Type conversion */
- targetDestSize =
- (int)((dst_len < page_max ? dst_len : page_max) - extra);
+ /*
+ * Set the source and target sizes. The target size is complicated: we
+ * don't want to exceed the smaller of the maximum page size or the
+ * destination buffer length, and in both cases we have to take into
+ * account the space for our overhead and the extra bytes required by
+ * our caller.
+ */
+ sourceSize = (int)offsets[slots];
+ targetDestSize = (int)(page_max < dst_len ? page_max : dst_len);
+ targetDestSize -= (int)(sizeof(LZ4_PREFIX) + extra);
/* Compress, starting after the prefix bytes. */
lz4_len = LZ4_compress_destSize((const char *)src,
@@ -352,7 +373,7 @@ lz4_terminate(WT_COMPRESSOR *compressor, WT_SESSION *session)
* Add a LZ4 compressor.
*/
static int
-lz_add_compressor(WT_CONNECTION *connection, int raw, const char *name)
+lz_add_compressor(WT_CONNECTION *connection, bool raw, const char *name)
{
LZ4_COMPRESSOR *lz4_compressor;
@@ -391,9 +412,9 @@ lz4_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
(void)config; /* Unused parameters */
- if ((ret = lz_add_compressor(connection, 1, "lz4")) != 0)
+ if ((ret = lz_add_compressor(connection, true, "lz4")) != 0)
return (ret);
- if ((ret = lz_add_compressor(connection, 0, "lz4-noraw")) != 0)
+ if ((ret = lz_add_compressor(connection, false, "lz4-noraw")) != 0)
return (ret);
return (0);
}
diff --git a/ext/compressors/snappy/snappy_compress.c b/ext/compressors/snappy/snappy_compress.c
index 981e334a2de..32f1ddcb9a0 100644
--- a/ext/compressors/snappy/snappy_compress.c
+++ b/ext/compressors/snappy/snappy_compress.c
@@ -31,10 +31,20 @@
#include <stdlib.h>
#include <string.h>
+/*
+ * We need to include the configuration file to detect whether this extension
+ * is being built into the WiredTiger library; application-loaded compression
+ * functions won't need it.
+ */
#include <wiredtiger_config.h>
+
#include <wiredtiger.h>
#include <wiredtiger_ext.h>
+#ifdef _MSC_VER
+#define inline __inline
+#endif
+
/* Local compressor structure. */
typedef struct {
WT_COMPRESSOR compressor; /* Must come first */
@@ -42,6 +52,12 @@ typedef struct {
WT_EXTENSION_API *wt_api; /* Extension API */
} SNAPPY_COMPRESSOR;
+/*
+ * Snappy decompression requires an exact compressed byte count. WiredTiger
+ * doesn't track that value, store it in the destination buffer.
+ */
+#define SNAPPY_PREFIX sizeof(uint64_t)
+
#ifdef WORDS_BIGENDIAN
/*
* snappy_bswap64 --
@@ -64,11 +80,11 @@ snappy_bswap64(uint64_t v)
#endif
/*
- * wt_snappy_error --
+ * snappy_error --
* Output an error message, and return a standard error code.
*/
static int
-wt_snappy_error(WT_COMPRESSOR *compressor,
+snappy_error(WT_COMPRESSOR *compressor,
WT_SESSION *session, const char *call, snappy_status snret)
{
WT_EXTENSION_API *wt_api;
@@ -94,68 +110,69 @@ wt_snappy_error(WT_COMPRESSOR *compressor,
}
/*
- * wt_snappy_compress --
+ * snappy_compression --
* WiredTiger snappy compression.
*/
static int
-wt_snappy_compress(WT_COMPRESSOR *compressor, WT_SESSION *session,
+snappy_compression(WT_COMPRESSOR *compressor, WT_SESSION *session,
uint8_t *src, size_t src_len,
uint8_t *dst, size_t dst_len,
size_t *result_lenp, int *compression_failed)
{
snappy_status snret;
size_t snaplen;
+ uint64_t snaplen_u64;
char *snapbuf;
/*
- * dst_len was computed in wt_snappy_pre_size, so we know it's big
- * enough. Skip past the space we'll use to store the final count
- * of compressed bytes.
+ * dst_len was computed in snappy_pre_size, so we know it's big enough.
+ * Skip past the space we'll use to store the final count of compressed
+ * bytes.
*/
- snaplen = dst_len - sizeof(size_t);
- snapbuf = (char *)dst + sizeof(size_t);
+ snaplen = dst_len - SNAPPY_PREFIX;
+ snapbuf = (char *)dst + SNAPPY_PREFIX;
/* snaplen is an input and an output arg. */
snret = snappy_compress((char *)src, src_len, snapbuf, &snaplen);
- if (snret == SNAPPY_OK) {
- if (snaplen + sizeof(size_t) < src_len) {
- *result_lenp = snaplen + sizeof(size_t);
- *compression_failed = 0;
-
- /*
- * On decompression, snappy requires an exact compressed
- * byte count (the current value of snaplen). WiredTiger
- * does not preserve that value, so save snaplen at the
- * beginning of the destination buffer.
- *
- * Store the value in little-endian format.
- */
+ if (snret == SNAPPY_OK && snaplen + SNAPPY_PREFIX < src_len) {
+ *result_lenp = snaplen + SNAPPY_PREFIX;
+ *compression_failed = 0;
+
+ /*
+ * On decompression, snappy requires an exact compressed byte
+ * count (the current value of snaplen). WiredTiger does not
+ * preserve that value, so save snaplen at the beginning of
+ * the destination buffer.
+ *
+ * Store the value in little-endian format.
+ */
+ snaplen_u64 = snaplen;
#ifdef WORDS_BIGENDIAN
- snaplen = snappy_bswap64(snaplen);
+ snaplen_u64 = snappy_bswap64(snaplen_u64);
#endif
- *(size_t *)dst = snaplen;
- } else
- /* The compressor failed to produce a smaller result. */
- *compression_failed = 1;
+ *(uint64_t *)dst = snaplen_u64;
return (0);
}
- return (wt_snappy_error(compressor, session, "snappy_compress", snret));
+
+ *compression_failed = 1;
+ return (snret == SNAPPY_OK ?
+ 0 : snappy_error(compressor, session, "snappy_compress", snret));
}
/*
- * wt_snappy_decompress --
+ * snappy_decompression --
* WiredTiger snappy decompression.
*/
static int
-wt_snappy_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session,
+snappy_decompression(WT_COMPRESSOR *compressor, WT_SESSION *session,
uint8_t *src, size_t src_len,
uint8_t *dst, size_t dst_len,
size_t *result_lenp)
{
WT_EXTENSION_API *wt_api;
snappy_status snret;
- size_t snaplen;
+ uint64_t snaplen;
wt_api = ((SNAPPY_COMPRESSOR *)compressor)->wt_api;
@@ -163,36 +180,36 @@ wt_snappy_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session,
* Retrieve the saved length, handling little- to big-endian conversion
* as necessary.
*/
- snaplen = *(size_t *)src;
+ snaplen = *(uint64_t *)src;
#ifdef WORDS_BIGENDIAN
snaplen = snappy_bswap64(snaplen);
#endif
- if (snaplen + sizeof(size_t) > src_len) {
+ if (snaplen + SNAPPY_PREFIX > src_len) {
(void)wt_api->err_printf(wt_api,
session,
- "wt_snappy_decompress: stored size exceeds buffer size");
+ "WT_COMPRESSOR.decompress: stored size exceeds source "
+ "size");
return (WT_ERROR);
}
/* dst_len is an input and an output arg. */
snret = snappy_uncompress(
- (char *)src + sizeof(size_t), snaplen, (char *)dst, &dst_len);
+ (char *)src + SNAPPY_PREFIX,
+ (size_t)snaplen, (char *)dst, &dst_len);
if (snret == SNAPPY_OK) {
*result_lenp = dst_len;
return (0);
}
-
- return (
- wt_snappy_error(compressor, session, "snappy_decompress", snret));
+ return (snappy_error(compressor, session, "snappy_decompress", snret));
}
/*
- * wt_snappy_pre_size --
+ * snappy_pre_size --
* WiredTiger snappy destination buffer sizing.
*/
static int
-wt_snappy_pre_size(WT_COMPRESSOR *compressor, WT_SESSION *session,
+snappy_pre_size(WT_COMPRESSOR *compressor, WT_SESSION *session,
uint8_t *src, size_t src_len,
size_t *result_lenp)
{
@@ -203,19 +220,19 @@ wt_snappy_pre_size(WT_COMPRESSOR *compressor, WT_SESSION *session,
/*
* Snappy requires the dest buffer be somewhat larger than the source.
* Fortunately, this is fast to compute, and will give us a dest buffer
- * in wt_snappy_compress that we can compress to directly. We add space
+ * in snappy_compress that we can compress to directly. We add space
* in the dest buffer to store the accurate compressed size.
*/
- *result_lenp = snappy_max_compressed_length(src_len) + sizeof(size_t);
+ *result_lenp = snappy_max_compressed_length(src_len) + SNAPPY_PREFIX;
return (0);
}
/*
- * wt_snappy_terminate --
+ * snappy_terminate --
* WiredTiger snappy compression termination.
*/
static int
-wt_snappy_terminate(WT_COMPRESSOR *compressor, WT_SESSION *session)
+snappy_terminate(WT_COMPRESSOR *compressor, WT_SESSION *session)
{
(void)session; /* Unused parameters */
@@ -227,9 +244,9 @@ int snappy_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
/*
* snappy_extension_init --
- * WiredTiger snappy compression extension - called directly when
- * Snappy support is built in, or via wiredtiger_extension_init when
- * snappy support is included via extension loading.
+ * WiredTiger snappy compression extension - called directly when snappy
+ * support is built in, or via wiredtiger_extension_init when snappy support
+ * is included via extension loading.
*/
int
snappy_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
@@ -241,11 +258,11 @@ snappy_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
if ((snappy_compressor = calloc(1, sizeof(SNAPPY_COMPRESSOR))) == NULL)
return (errno);
- snappy_compressor->compressor.compress = wt_snappy_compress;
+ snappy_compressor->compressor.compress = snappy_compression;
snappy_compressor->compressor.compress_raw = NULL;
- snappy_compressor->compressor.decompress = wt_snappy_decompress;
- snappy_compressor->compressor.pre_size = wt_snappy_pre_size;
- snappy_compressor->compressor.terminate = wt_snappy_terminate;
+ snappy_compressor->compressor.decompress = snappy_decompression;
+ snappy_compressor->compressor.pre_size = snappy_pre_size;
+ snappy_compressor->compressor.terminate = snappy_terminate;
snappy_compressor->wt_api = connection->get_extension_api(connection);
diff --git a/ext/compressors/zlib/zlib_compress.c b/ext/compressors/zlib/zlib_compress.c
index 484df0a6785..ef20503df0a 100644
--- a/ext/compressors/zlib/zlib_compress.c
+++ b/ext/compressors/zlib/zlib_compress.c
@@ -32,16 +32,18 @@
#include <stdlib.h>
#include <string.h>
-#include <wiredtiger.h>
-#include <wiredtiger_ext.h>
-
/*
* We need to include the configuration file to detect whether this extension
- * is being built into the WiredTiger library.
+ * is being built into the WiredTiger library; application-loaded compression
+ * functions won't need it.
*/
-#include "wiredtiger_config.h"
+#include <wiredtiger_config.h>
+
+#include <wiredtiger.h>
+#include <wiredtiger_ext.h>
+
#ifdef _MSC_VER
-#define inline __inline
+#define inline __inline
#endif
/* Local compressor structure. */
@@ -234,121 +236,163 @@ zlib_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session,
{
ZLIB_COMPRESSOR *zlib_compressor;
ZLIB_OPAQUE opaque;
- z_stream *best_zs, last_zs, zs;
- uint32_t curr_slot, last_slot;
- int ret;
+ z_stream *best_zs, *last_zs, _last_zs, *zs, _zs;
+ uint32_t curr_slot, last_slot, zlib_reserved;
+ bool increase_reserve;
+ int ret, tret;
- curr_slot = last_slot = 0;
- (void)split_pct;
- (void)dst_len;
+ (void)split_pct; /* Unused parameters */
(void)final;
zlib_compressor = (ZLIB_COMPRESSOR *)compressor;
- memset(&zs, 0, sizeof(zs));
- zs.zalloc = zalloc;
- zs.zfree = zfree;
- opaque.compressor = compressor;
- opaque.session = session;
- zs.opaque = &opaque;
-
- if ((ret = deflateInit(&zs, zlib_compressor->zlib_level)) != Z_OK)
- return (zlib_error(compressor, session, "deflateInit", ret));
-
- zs.next_in = src;
- zs.next_out = dst;
/*
* Experimentally derived, reserve this many bytes for zlib to finish
* up a buffer. If this isn't sufficient, we don't fail but we will be
* inefficient.
*/
#define WT_ZLIB_RESERVED 24
- zs.avail_out = (uint32_t)(page_max - (extra + WT_ZLIB_RESERVED));
+#define WT_ZLIB_RESERVED_MAX 48
+ zlib_reserved = WT_ZLIB_RESERVED;
+
+ if (0) {
+retry: /* If we reached our maximum reserve, quit. */
+ if (zlib_reserved == WT_ZLIB_RESERVED_MAX)
+ return (0);
+ zlib_reserved = WT_ZLIB_RESERVED_MAX;
+ }
+
+ best_zs = last_zs = NULL;
+ last_slot = 0;
+ increase_reserve = false;
+ ret = 0;
- /* Save the stream state in case the chosen data doesn't fit. */
- if ((ret = deflateCopy(&last_zs, &zs)) != Z_OK)
- return (zlib_error(compressor, session, "deflateCopy", ret));
+ zs = &_zs;
+ memset(zs, 0, sizeof(*zs));
+ zs->zalloc = zalloc;
+ zs->zfree = zfree;
+ opaque.compressor = compressor;
+ opaque.session = session;
+ zs->opaque = &opaque;
+
+ if ((ret = deflateInit(zs, zlib_compressor->zlib_level)) != Z_OK)
+ return (zlib_error(compressor, session, "deflateInit", ret));
+
+ zs->next_in = src;
+ zs->next_out = dst;
+
+ /*
+ * Set the target size. The target size is complicated: we don't want
+ * to exceed the smaller of the maximum page size or the destination
+ * buffer length, and in both cases we have to take into account the
+ * space required by zlib to finish up the buffer and the extra bytes
+ * required by our caller.
+ */
+ zs->avail_out = (uint32_t)(page_max < dst_len ? page_max : dst_len);
+ zs->avail_out -= (uint32_t)(zlib_reserved + extra);
/*
* Strategy: take the available output size and compress that much
* input. Continue until there is no input small enough or the
* compression fails to fit.
*/
- for (best_zs = NULL;;) {
+ for (;;) {
/* Find the next slot we will try to compress up to. */
- if ((curr_slot = zlib_find_slot(
- zs.total_in + zs.avail_out, offsets, slots)) > last_slot) {
- zs.avail_in = offsets[curr_slot] - offsets[last_slot];
- while (zs.avail_in > 0 && zs.avail_out > 0)
- if ((ret = deflate(&zs, Z_SYNC_FLUSH)) != Z_OK)
- return (zlib_error(compressor,
- session, "deflate", ret));
+ curr_slot = zlib_find_slot(
+ zs->total_in + zs->avail_out, offsets, slots);
+ if (curr_slot > last_slot) {
+ zs->avail_in = offsets[curr_slot] - offsets[last_slot];
+ while (zs->avail_in > 0 && zs->avail_out > 0)
+ if ((ret = deflate(zs, Z_SYNC_FLUSH)) != Z_OK) {
+ ret = zlib_error(compressor,
+ session, "deflate", ret);
+ goto err;
+ }
}
/*
* We didn't do a deflate, or it didn't work: use the last saved
- * position.
+ * position (if any).
*/
- if (curr_slot <= last_slot || zs.avail_in > 0) {
- if ((ret = deflateEnd(&zs)) != Z_OK &&
- ret != Z_DATA_ERROR)
- return (zlib_error(
- compressor, session, "deflateEnd", ret));
-
- best_zs = &last_zs;
+ if (curr_slot <= last_slot || zs->avail_in > 0) {
+ best_zs = last_zs;
break;
}
- /* The last deflation succeeded, discard the saved one. */
- if ((ret = deflateEnd(&last_zs)) != Z_OK && ret != Z_DATA_ERROR)
- return (zlib_error(
- compressor, session, "deflateEnd", ret));
-
/*
* If there's more compression to do, save a snapshot and keep
* going, otherwise, use the current compression.
*/
last_slot = curr_slot;
- if (zs.avail_out > 0) {
- if ((ret = deflateCopy(&last_zs, &zs)) != Z_OK)
- return (zlib_error(
- compressor, session, "deflateCopy", ret));
+ if (zs->avail_out > 0) {
+ /* Discard any previously saved snapshot. */
+ if (last_zs != NULL) {
+ ret = deflateEnd(last_zs);
+ last_zs = NULL;
+ if (ret != Z_OK && ret != Z_DATA_ERROR) {
+ ret = zlib_error(compressor,
+ session, "deflateEnd", ret);
+ goto err;
+ }
+ }
+ last_zs = &_last_zs;
+ if ((ret = deflateCopy(last_zs, zs)) != Z_OK) {
+ last_zs = NULL;
+ ret = zlib_error(
+ compressor, session, "deflateCopy", ret);
+ goto err;
+ }
continue;
}
- best_zs = &zs;
+ best_zs = zs;
break;
}
- best_zs->avail_out += WT_ZLIB_RESERVED;
- ret = deflate(best_zs, Z_FINISH);
+ if (last_slot > 0 && best_zs != NULL) {
+ /* Add the reserved bytes and try to finish the compression. */
+ best_zs->avail_out += zlib_reserved;
+ ret = deflate(best_zs, Z_FINISH);
- /*
- * If the end marker didn't fit, report that we got no work done,
- * WiredTiger will compress the (possibly large) page image using
- * ordinary compression instead.
- */
- if (ret == Z_OK || ret == Z_BUF_ERROR)
- last_slot = 0;
- else if (ret != Z_STREAM_END)
- return (
- zlib_error(compressor, session, "deflate end block", ret));
+ /*
+ * If the end marker didn't fit with the default value, try
+ * again with a maximum value; if that doesn't work, report we
+ * got no work done, WiredTiger will compress the (possibly
+ * large) page image using ordinary compression instead.
+ */
+ if (ret == Z_OK || ret == Z_BUF_ERROR) {
+ last_slot = 0;
+ increase_reserve = true;
+ } else if (ret != Z_STREAM_END) {
+ ret = zlib_error(
+ compressor, session, "deflate end block", ret);
+ goto err;
+ }
+ ret = 0;
+ }
- if ((ret = deflateEnd(best_zs)) != Z_OK && ret != Z_DATA_ERROR)
- return (zlib_error(compressor, session, "deflateEnd", ret));
+err: if (zs != NULL &&
+ (tret = deflateEnd(zs)) != Z_OK && tret != Z_DATA_ERROR)
+ ret = zlib_error(compressor, session, "deflateEnd", tret);
+ if (last_zs != NULL &&
+ (tret = deflateEnd(last_zs)) != Z_OK && tret != Z_DATA_ERROR)
+ ret = zlib_error(compressor, session, "deflateEnd", tret);
- if (last_slot > 0) {
+ if (ret == 0 && last_slot > 0) {
*result_slotsp = last_slot;
*result_lenp = (size_t)best_zs->total_out;
} else {
- /* We didn't manage to compress anything: don't retry. */
+ /* We didn't manage to compress anything. */
*result_slotsp = 0;
*result_lenp = 1;
+
+ if (increase_reserve)
+ goto retry;
}
#if 0
/* Decompress the result and confirm it matches the original source. */
- if (last_slot > 0) {
+ if (ret == 0 && last_slot > 0) {
void *decomp;
size_t result_len;
@@ -363,19 +407,20 @@ zlib_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session,
"deflate compare with original source",
Z_DATA_ERROR);
zfree(&opaque, decomp);
- if (ret != 0)
- return (ret);
}
#endif
#if 0
- fprintf(stderr,
- "zlib_compress_raw (%s): page_max %" PRIuMAX ", slots %" PRIu32
- ", take %" PRIu32 ": %" PRIu32 " -> %" PRIuMAX "\n",
- final ? "final" : "not final", (uintmax_t)page_max,
- slots, last_slot, offsets[last_slot], (uintmax_t)*result_lenp);
+ if (ret == 0 && last_slot > 0)
+ fprintf(stderr,
+ "zlib_compress_raw (%s): page_max %" PRIuMAX ", slots %"
+ PRIu32 ", take %" PRIu32 ": %" PRIu32 " -> %" PRIuMAX "\n",
+ final ? "final" : "not final", (uintmax_t)page_max,
+ slots, last_slot, offsets[last_slot],
+ (uintmax_t)*result_lenp);
#endif
- return (0);
+
+ return (ret);
}
/*
@@ -396,7 +441,8 @@ zlib_terminate(WT_COMPRESSOR *compressor, WT_SESSION *session)
* Add a zlib compressor.
*/
static int
-zlib_add_compressor(WT_CONNECTION *connection, int raw, const char *name)
+zlib_add_compressor(
+ WT_CONNECTION *connection, bool raw, const char *name, int zlib_level)
{
ZLIB_COMPRESSOR *zlib_compressor;
@@ -415,17 +461,80 @@ zlib_add_compressor(WT_CONNECTION *connection, int raw, const char *name)
zlib_compressor->compressor.terminate = zlib_terminate;
zlib_compressor->wt_api = connection->get_extension_api(connection);
-
- /*
- * Between 0-10: level: see zlib manual.
- */
- zlib_compressor->zlib_level = Z_DEFAULT_COMPRESSION;
+ zlib_compressor->zlib_level = zlib_level;
/* Load the compressor. */
return (connection->add_compressor(
connection, name, (WT_COMPRESSOR *)zlib_compressor, NULL));
}
+/*
+ * zlib_init_config --
+ * Handle zlib configuration.
+ */
+static int
+zlib_init_config(
+ WT_CONNECTION *connection, WT_CONFIG_ARG *config, int *zlib_levelp)
+{
+ WT_CONFIG_ITEM k, v;
+ WT_CONFIG_PARSER *config_parser;
+ WT_EXTENSION_API *wtext;
+ int ret, zlib_level;
+
+ /* If configured as a built-in, there's no configuration argument. */
+ if (config == NULL)
+ return (0);
+
+ /*
+ * Zlib compression engine allows applications to specify a compression
+ * level; review the configuration.
+ */
+ wtext = connection->get_extension_api(connection);
+ if ((ret = wtext->config_get(wtext, NULL, config, "config", &v)) != 0) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_EXTENSION_API.config_get: zlib configure: %s",
+ wtext->strerror(wtext, NULL, ret));
+ return (ret);
+ }
+ if ((ret = wtext->config_parser_open(
+ wtext, NULL, v.str, v.len, &config_parser)) != 0) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_EXTENSION_API.config_parser_open: zlib configure: %s",
+ wtext->strerror(wtext, NULL, ret));
+ return (ret);
+ }
+ while ((ret = config_parser->next(config_parser, &k, &v)) == 0)
+ if (strlen("compression_level") == k.len &&
+ strncmp("compression_level", k.str, k.len) == 0) {
+ /*
+ * Between 0-9: level: see zlib manual.
+ */
+ zlib_level = (int)v.val;
+ if (zlib_level < 0 || zlib_level > 9) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_CONFIG_PARSER.next: zlib configure: "
+ "unsupported compression level %d",
+ zlib_level);
+ return (EINVAL);
+ }
+ *zlib_levelp = zlib_level;
+ continue;
+ }
+ if (ret != WT_NOTFOUND) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_CONFIG_PARSER.next: zlib configure: %s",
+ wtext->strerror(wtext, NULL, ret));
+ return (ret);
+ }
+ if ((ret = config_parser->close(config_parser)) != 0) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_CONFIG_PARSER.close: zlib configure: %s",
+ wtext->strerror(wtext, NULL, ret));
+ return (ret);
+ }
+ return (0);
+}
+
int zlib_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
/*
@@ -437,13 +546,17 @@ int zlib_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
int
zlib_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
{
- int ret;
+ int ret, zlib_level;
- (void)config; /* Unused parameters */
+ zlib_level = Z_DEFAULT_COMPRESSION; /* Default */
+ if ((ret = zlib_init_config(connection, config, &zlib_level)) != 0)
+ return (ret);
- if ((ret = zlib_add_compressor(connection, 1, "zlib")) != 0)
+ if ((ret = zlib_add_compressor(
+ connection, true, "zlib", zlib_level)) != 0)
return (ret);
- if ((ret = zlib_add_compressor(connection, 0, "zlib-noraw")) != 0)
+ if ((ret = zlib_add_compressor(
+ connection, false, "zlib-noraw", zlib_level)) != 0)
return (ret);
return (0);
}
diff --git a/ext/compressors/zstd/Makefile.am b/ext/compressors/zstd/Makefile.am
new file mode 100644
index 00000000000..9f0997011e9
--- /dev/null
+++ b/ext/compressors/zstd/Makefile.am
@@ -0,0 +1,11 @@
+AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include
+
+if HAVE_BUILTIN_EXTENSION_ZSTD
+noinst_LTLIBRARIES = libwiredtiger_zstd.la
+else
+lib_LTLIBRARIES = libwiredtiger_zstd.la
+libwiredtiger_zstd_la_LDFLAGS = -avoid-version -module
+endif
+
+libwiredtiger_zstd_la_SOURCES = zstd_compress.c
+libwiredtiger_zstd_la_LIBADD = -lzstd
diff --git a/ext/compressors/zstd/zstd_compress.c b/ext/compressors/zstd/zstd_compress.c
new file mode 100644
index 00000000000..3d0447248b6
--- /dev/null
+++ b/ext/compressors/zstd/zstd_compress.c
@@ -0,0 +1,358 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <zstd.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+/*
+ * We need to include the configuration file to detect whether this extension
+ * is being built into the WiredTiger library; application-loaded compression
+ * functions won't need it.
+ */
+#include <wiredtiger_config.h>
+
+#include <wiredtiger.h>
+#include <wiredtiger_ext.h>
+
+#ifdef _MSC_VER
+#define inline __inline
+#endif
+
+/* Local compressor structure. */
+typedef struct {
+ WT_COMPRESSOR compressor; /* Must come first */
+
+ WT_EXTENSION_API *wt_api; /* Extension API */
+
+ int compression_level; /* compression level */
+} ZSTD_COMPRESSOR;
+
+/*
+ * Zstd decompression requires an exact compressed byte count. WiredTiger
+ * doesn't track that value, store it in the destination buffer.
+ */
+#define ZSTD_PREFIX sizeof(uint64_t)
+
+#ifdef WORDS_BIGENDIAN
+/*
+ * zstd_bswap64 --
+ * 64-bit unsigned little-endian to/from big-endian value.
+ */
+static inline uint64_t
+zstd_bswap64(uint64_t v)
+{
+ return (
+ ((v << 56) & 0xff00000000000000UL) |
+ ((v << 40) & 0x00ff000000000000UL) |
+ ((v << 24) & 0x0000ff0000000000UL) |
+ ((v << 8) & 0x000000ff00000000UL) |
+ ((v >> 8) & 0x00000000ff000000UL) |
+ ((v >> 24) & 0x0000000000ff0000UL) |
+ ((v >> 40) & 0x000000000000ff00UL) |
+ ((v >> 56) & 0x00000000000000ffUL)
+ );
+}
+#endif
+
+/*
+ * zstd_error --
+ * Output an error message, and return a standard error code.
+ */
+static int
+zstd_error(WT_COMPRESSOR *compressor,
+ WT_SESSION *session, const char *call, size_t error)
+{
+ WT_EXTENSION_API *wt_api;
+
+ wt_api = ((ZSTD_COMPRESSOR *)compressor)->wt_api;
+
+ (void)wt_api->err_printf(wt_api, session,
+ "zstd error: %s: %s", call, ZSTD_getErrorName(error));
+ return (WT_ERROR);
+}
+
+/*
+ * zstd_compress --
+ * WiredTiger Zstd compression.
+ */
+static int
+zstd_compress(WT_COMPRESSOR *compressor, WT_SESSION *session,
+ uint8_t *src, size_t src_len,
+ uint8_t *dst, size_t dst_len,
+ size_t *result_lenp, int *compression_failed)
+{
+ ZSTD_COMPRESSOR *zcompressor;
+ size_t zstd_ret;
+ uint64_t zstd_len;
+
+ zcompressor = (ZSTD_COMPRESSOR *)compressor;
+
+ /* Compress, starting past the prefix bytes. */
+ zstd_ret = ZSTD_compress(
+ dst + ZSTD_PREFIX, dst_len - ZSTD_PREFIX,
+ src, src_len, zcompressor->compression_level);
+
+ /*
+ * If compression succeeded and the compressed length is smaller than
+ * the original size, return success.
+ */
+ if (!ZSTD_isError(zstd_ret) && zstd_ret + ZSTD_PREFIX < src_len) {
+ *result_lenp = zstd_ret + ZSTD_PREFIX;
+ *compression_failed = 0;
+
+ /*
+ * On decompression, Zstd requires an exact compressed byte
+ * count (the current value of zstd_ret). WiredTiger does not
+ * preserve that value, so save zstd_ret at the beginning of
+ * the destination buffer.
+ *
+ * Store the value in little-endian format.
+ */
+ zstd_len = zstd_ret;
+#ifdef WORDS_BIGENDIAN
+ zstd_len = zstd_bswap64(zstd_len);
+#endif
+ *(uint64_t *)dst = zstd_len;
+ return (0);
+ }
+
+ *compression_failed = 1;
+ return (ZSTD_isError(zstd_ret) ?
+ zstd_error(compressor, session, "ZSTD_compress", zstd_ret) : 0);
+}
+
+/*
+ * zstd_decompress --
+ * WiredTiger Zstd decompression.
+ */
+static int
+zstd_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session,
+ uint8_t *src, size_t src_len,
+ uint8_t *dst, size_t dst_len,
+ size_t *result_lenp)
+{
+ WT_EXTENSION_API *wt_api;
+ size_t zstd_ret;
+ uint64_t zstd_len;
+
+ wt_api = ((ZSTD_COMPRESSOR *)compressor)->wt_api;
+
+ /*
+ * Retrieve the saved length, handling little- to big-endian conversion
+ * as necessary.
+ */
+ zstd_len = *(uint64_t *)src;
+#ifdef WORDS_BIGENDIAN
+ zstd_len = zstd_bswap64(zstd_len);
+#endif
+ if (zstd_len + ZSTD_PREFIX > src_len) {
+ (void)wt_api->err_printf(wt_api,
+ session,
+ "WT_COMPRESSOR.decompress: stored size exceeds source "
+ "size");
+ return (WT_ERROR);
+ }
+
+ zstd_ret =
+ ZSTD_decompress(dst, dst_len, src + ZSTD_PREFIX, (size_t)zstd_len);
+
+ if (!ZSTD_isError(zstd_ret)) {
+ *result_lenp = zstd_ret;
+ return (0);
+ }
+ return (zstd_error(compressor, session, "ZSTD_decompress", zstd_ret));
+}
+
+/*
+ * zstd_pre_size --
+ * WiredTiger Zstd destination buffer sizing for compression.
+ */
+static int
+zstd_pre_size(WT_COMPRESSOR *compressor, WT_SESSION *session,
+ uint8_t *src, size_t src_len, size_t *result_lenp)
+{
+ (void)compressor; /* Unused parameters */
+ (void)session;
+ (void)src;
+
+ /*
+ * Zstd compression runs faster if the destination buffer is sized at
+ * the upper-bound of the buffer size needed by the compression. Use
+ * the library calculation of that overhead (plus our overhead).
+ */
+ *result_lenp = ZSTD_compressBound(src_len) + ZSTD_PREFIX;
+ return (0);
+}
+
+/*
+ * zstd_terminate --
+ * WiredTiger Zstd compression termination.
+ */
+static int
+zstd_terminate(WT_COMPRESSOR *compressor, WT_SESSION *session)
+{
+ (void)session; /* Unused parameters */
+
+ free(compressor);
+ return (0);
+}
+
+/*
+ * zstd_init_config --
+ * Handle zstd configuration.
+ */
+static int
+zstd_init_config(
+ WT_CONNECTION *connection, WT_CONFIG_ARG *config, int *compression_levelp)
+{
+ WT_CONFIG_ITEM k, v;
+ WT_CONFIG_PARSER *config_parser;
+ WT_EXTENSION_API *wtext;
+ int ret;
+
+ /* If configured as a built-in, there's no configuration argument. */
+ if (config == NULL)
+ return (0);
+
+ /*
+ * Zstd compression engine allows applications to specify a compression
+ * level; review the configuration.
+ */
+ wtext = connection->get_extension_api(connection);
+ if ((ret = wtext->config_get(wtext, NULL, config, "config", &v)) != 0) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_EXTENSION_API.config_get: zstd configure: %s",
+ wtext->strerror(wtext, NULL, ret));
+ return (ret);
+ }
+ if ((ret = wtext->config_parser_open(
+ wtext, NULL, v.str, v.len, &config_parser)) != 0) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_EXTENSION_API.config_parser_open: zstd configure: %s",
+ wtext->strerror(wtext, NULL, ret));
+ return (ret);
+ }
+ while ((ret = config_parser->next(config_parser, &k, &v)) == 0)
+ if (strlen("compression_level") == k.len &&
+ strncmp("compression_level", k.str, k.len) == 0) {
+ *compression_levelp = (int)v.val;
+ continue;
+ }
+ if (ret != WT_NOTFOUND) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_CONFIG_PARSER.next: zstd configure: %s",
+ wtext->strerror(wtext, NULL, ret));
+ return (ret);
+ }
+ if ((ret = config_parser->close(config_parser)) != 0) {
+ (void)wtext->err_printf(wtext, NULL,
+ "WT_CONFIG_PARSER.close: zstd configure: %s",
+ wtext->strerror(wtext, NULL, ret));
+ return (ret);
+ }
+ return (0);
+}
+
+int zstd_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
+
+/*
+ * zstd_extension_init --
+ * WiredTiger Zstd compression extension - called directly when Zstd
+ * support is built in, or via wiredtiger_extension_init when Zstd support
+ * is included via extension loading.
+ */
+int
+zstd_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
+{
+ ZSTD_COMPRESSOR *zstd_compressor;
+ int compression_level, ret;
+
+ /*
+ * Zstd's sweet-spot is better compression than zlib at significantly
+ * faster compression/decompression speeds. LZ4 and snappy are faster
+ * than zstd, but have worse compression ratios. Applications wanting
+ * faster compression/decompression with worse compression will select
+ * LZ4 or snappy, so we configure zstd for better compression.
+ *
+ * From the zstd github site, default measurements of the compression
+ * engines we support, listing compression ratios with compression and
+ * decompression speeds:
+ *
+ * Name Ratio C.speed D.speed
+ * MB/s MB/s
+ * zstd 2.877 330 940
+ * zlib 2.730 95 360
+ * LZ4 2.101 620 3100
+ * snappy 2.091 480 1600
+ *
+ * Set the zstd compression level to 3: according to the zstd web site,
+ * that reduces zstd's compression speed to around 200 MB/s, increasing
+ * the compression ratio to 3.100 (close to zlib's best compression
+ * ratio). In other words, position zstd as a zlib replacement, having
+ * similar compression at much higher compression/decompression speeds.
+ */
+ compression_level = 3;
+ if ((ret =
+ zstd_init_config(connection, config, &compression_level)) != 0)
+ return (ret);
+
+ if ((zstd_compressor = calloc(1, sizeof(ZSTD_COMPRESSOR))) == NULL)
+ return (errno);
+
+ zstd_compressor->compressor.compress = zstd_compress;
+ zstd_compressor->compressor.compress_raw = NULL;
+ zstd_compressor->compressor.decompress = zstd_decompress;
+ zstd_compressor->compressor.pre_size = zstd_pre_size;
+ zstd_compressor->compressor.terminate = zstd_terminate;
+
+ zstd_compressor->wt_api = connection->get_extension_api(connection);
+
+ zstd_compressor->compression_level = compression_level;
+
+ /* Load the compressor */
+ return (connection->add_compressor(
+ connection, "zstd", (WT_COMPRESSOR *)zstd_compressor, NULL));
+}
+
+/*
+ * We have to remove this symbol when building as a builtin extension otherwise
+ * it will conflict with other builtin libraries.
+ */
+#ifndef HAVE_BUILTIN_EXTENSION_ZSTD
+/*
+ * wiredtiger_extension_init --
+ * WiredTiger Zstd compression extension.
+ */
+int
+wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
+{
+ return (zstd_extension_init(connection, config));
+}
+#endif
diff --git a/src/block/block_ckpt.c b/src/block/block_ckpt.c
index b7ac953cdb1..48522768dc9 100644
--- a/src/block/block_ckpt.c
+++ b/src/block/block_ckpt.c
@@ -615,8 +615,6 @@ live_update:
WT_CKPT_FOREACH(ckptbase, ckpt)
if (F_ISSET(ckpt, WT_CKPT_ADD)) {
/*
- * Set the checkpoint size for the live system.
- *
* !!!
* Our caller wants the final checkpoint size. Setting
* the size here violates layering, but the alternative
@@ -624,7 +622,31 @@ live_update:
* cookie into its components, and that's a fair amount
* of work.
*/
- ckpt->ckpt_size = ci->ckpt_size = ckpt_size;
+ ckpt->ckpt_size = ckpt_size;
+
+ /*
+ * Set the rolling checkpoint size for the live system.
+ * The current size includes the current checkpoint's
+ * root page size (root pages are on the checkpoint's
+ * block allocation list as root pages are allocated
+ * with the usual block allocation functions). That's
+ * correct, but we don't want to include it in the size
+ * for the next checkpoint.
+ */
+ ckpt_size -= ci->root_size;
+
+ /*
+ * Additionally, we had a bug for awhile where the live
+ * checkpoint size grew without bound. We can't sanity
+ * check the value, that would require walking the tree
+ * as part of the checkpoint. Bound any bug at the size
+ * of the file.
+ * It isn't practical to assert that the value is within
+ * bounds since databases created with older versions
+ * of WiredTiger (2.8.0) would likely see an error.
+ */
+ ci->ckpt_size =
+ WT_MIN(ckpt_size, (uint64_t)block->size);
WT_ERR(__ckpt_update(session, block, ckpt, ci, true));
}
diff --git a/src/btree/bt_io.c b/src/btree/bt_io.c
index 42c3a849a88..a8645f79dbe 100644
--- a/src/btree/bt_io.c
+++ b/src/btree/bt_io.c
@@ -171,6 +171,7 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf,
uint8_t *addr, size_t *addr_sizep,
bool checkpoint, bool checkpoint_io, bool compressed)
{
+ struct timespec start, stop;
WT_BM *bm;
WT_BTREE *btree;
WT_DECL_ITEM(ctmp);
@@ -356,6 +357,8 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf,
data_checksum = !compressed;
break;
}
+ if (!F_ISSET(session, WT_SESSION_INTERNAL))
+ __wt_epoch(session, &start);
/* Call the block manager to write the block. */
WT_ERR(checkpoint ?
@@ -363,6 +366,14 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf,
bm->write(
bm, session, ip, addr, addr_sizep, data_checksum, checkpoint_io));
+ /* Update some statistics now that the write is done */
+ if (!F_ISSET(session, WT_SESSION_INTERNAL)) {
+ __wt_epoch(session, &stop);
+ WT_STAT_CONN_INCR(session, cache_write_app_count);
+ WT_STAT_CONN_INCRV(session, cache_write_app_time,
+ WT_TIMEDIFF_US(stop, start));
+ }
+
WT_STAT_CONN_INCR(session, cache_write);
WT_STAT_DATA_INCR(session, cache_write);
S2C(session)->cache->bytes_written += dsk->mem_size;
diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c
index c54eaa69c43..90188498535 100644
--- a/src/btree/bt_read.c
+++ b/src/btree/bt_read.c
@@ -327,22 +327,28 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
if (__wt_hazard_count(session, page) > 1)
return (false);
+ /* If we can do an in-memory split, do it. */
+ if (__wt_leaf_page_can_split(session, page))
+ return (true);
+ if (page->memory_footprint < btree->maxmempage)
+ return (false);
+
+ /* Bump the oldest ID, we're about to do some visibility checks. */
+ WT_IGNORE_RET(__wt_txn_update_oldest(session, 0));
+
/*
- * If we have already tried and the transaction state has not moved on,
- * eviction is highly likely to fail.
+ * Allow some leeway if the transaction ID isn't moving forward since
+ * it is unlikely eviction will be able to evict the page. Don't keep
+ * skipping the page indefinitely or large records can lead to
+ * extremely large memory footprints.
*/
- if (page->modify->last_eviction_id == __wt_txn_oldest_id(session))
+ if (page->modify->update_restored &&
+ page->modify->last_eviction_id == __wt_txn_oldest_id(session))
return (false);
- if (page->memory_footprint < btree->maxmempage)
- return (__wt_leaf_page_can_split(session, page));
-
/* Trigger eviction on the next page release. */
__wt_page_evict_soon(session, ref);
- /* Bump the oldest ID, we're about to do some visibility checks. */
- WT_IGNORE_RET(__wt_txn_update_oldest(session, 0));
-
/* If eviction cannot succeed, don't try. */
return (__wt_page_can_evict(session, ref, NULL));
}
@@ -354,6 +360,7 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
static int
__page_read(WT_SESSION_IMPL *session, WT_REF *ref)
{
+ struct timespec start, stop;
const WT_PAGE_HEADER *dsk;
WT_BTREE *btree;
WT_DECL_RET;
@@ -401,7 +408,15 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref)
* There's an address, read or map the backing disk page and build an
* in-memory version of the page.
*/
+ if (!F_ISSET(session, WT_SESSION_INTERNAL))
+ __wt_epoch(session, &start);
WT_ERR(__wt_bt_read(session, &tmp, addr, addr_size));
+ if (!F_ISSET(session, WT_SESSION_INTERNAL)) {
+ __wt_epoch(session, &stop);
+ WT_STAT_CONN_INCR(session, cache_read_app_count);
+ WT_STAT_CONN_INCRV(session, cache_read_app_time,
+ WT_TIMEDIFF_US(stop, start));
+ }
WT_ERR(__wt_page_inmem(session, ref, tmp.data, tmp.memsize,
WT_DATA_IN_ITEM(&tmp) ?
WT_PAGE_DISK_ALLOC : WT_PAGE_DISK_MAPPED, &page));
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index ea667460966..017c820ea29 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -1582,6 +1582,13 @@ __split_multi_inmem(
*/
page->modify->first_dirty_txn = WT_TXN_FIRST;
+ /*
+ * If the new page is modified, save the oldest ID from reconciliation
+ * to avoid repeatedly attempting eviction on the same page.
+ */
+ page->modify->last_eviction_id = orig->modify->last_eviction_id;
+ page->modify->update_restored = 1;
+
err: /* Free any resources that may have been cached in the cursor. */
WT_TRET(__wt_btcur_close(&cbt, true));
@@ -2245,14 +2252,6 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi)
WT_ERR(__split_multi_inmem(session, page, multi, new));
/*
- * If the new page is modified, save the oldest ID from reconciliation
- * to avoid repeatedly attempting eviction on the same page.
- */
- if (new->page->modify != NULL)
- new->page->modify->last_eviction_id =
- page->modify->last_eviction_id;
-
- /*
* The rewrite succeeded, we can no longer fail.
*
* Finalize the move, discarding moved update lists from the original
diff --git a/src/btree/bt_stat.c b/src/btree/bt_stat.c
index d3ddf33446e..06428b87f6e 100644
--- a/src/btree/bt_stat.c
+++ b/src/btree/bt_stat.c
@@ -8,6 +8,7 @@
#include "wt_internal.h"
+static int __stat_tree_walk(WT_SESSION_IMPL *);
static int __stat_page(WT_SESSION_IMPL *, WT_PAGE *, WT_DSRC_STATS **);
static void __stat_page_col_var(WT_SESSION_IMPL *, WT_PAGE *, WT_DSRC_STATS **);
static void __stat_page_row_int(WT_SESSION_IMPL *, WT_PAGE *, WT_DSRC_STATS **);
@@ -23,9 +24,7 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
{
WT_BM *bm;
WT_BTREE *btree;
- WT_DECL_RET;
WT_DSRC_STATS **stats;
- WT_REF *next_walk;
btree = S2BT(session);
bm = btree->bm;
@@ -44,9 +43,29 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
WT_STAT_SET(session, stats, cache_bytes_inuse,
__wt_btree_bytes_inuse(session));
- /* Everything else is really, really expensive. */
- if (!F_ISSET(cst, WT_CONN_STAT_ALL))
- return (0);
+ if (F_ISSET(cst, WT_STAT_TYPE_CACHE_WALK))
+ __wt_curstat_cache_walk(session);
+
+ if (F_ISSET(cst, WT_STAT_TYPE_TREE_WALK))
+ WT_RET(__stat_tree_walk(session));
+
+ return (0);
+}
+
+/*
+ * __stat_tree_walk --
+ * Gather btree statistics that require traversing the tree.
+ */
+static int
+__stat_tree_walk(WT_SESSION_IMPL *session)
+{
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_DSRC_STATS **stats;
+ WT_REF *next_walk;
+
+ btree = S2BT(session);
+ stats = btree->dhandle->stats;
/*
* Clear the statistics we're about to count.
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index 7b583bd9c1e..6d4ad9d0d0f 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -9,6 +9,59 @@
#include "wt_internal.h"
/*
+ * __sync_checkpoint_can_skip --
+ * There are limited conditions under which we can skip writing a dirty
+ * page during checkpoint.
+ */
+static inline bool
+__sync_checkpoint_can_skip(WT_SESSION_IMPL *session, WT_PAGE *page)
+{
+ WT_PAGE_MODIFY *mod;
+ WT_MULTI *multi;
+ WT_TXN *txn;
+ u_int i;
+
+ mod = page->modify;
+ txn = &session->txn;
+
+ /*
+ * We can skip some dirty pages during a checkpoint. The requirements:
+ *
+ * 1. they must be leaf pages,
+ * 2. there is a snapshot transaction active (which is the case in
+ * ordinary application checkpoints but not all internal cases),
+ * 3. the first dirty update on the page is sufficiently recent the
+ * checkpoint transaction would skip them,
+ * 4. there's already an address for every disk block involved.
+ */
+ if (WT_PAGE_IS_INTERNAL(page))
+ return (false);
+ if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT))
+ return (false);
+ if (!WT_TXNID_LT(txn->snap_max, mod->first_dirty_txn))
+ return (false);
+
+ /*
+ * The problematic case is when a page was evicted but when there were
+ * unresolved updates and not every block associated with the page has
+ * a disk address. We can't skip such pages because we need a checkpoint
+ * write with valid addresses.
+ *
+ * The page's modification information can change underfoot if the page
+ * is being reconciled, so we'd normally serialize with reconciliation
+ * before reviewing page-modification information. However, checkpoint
+ * is the only valid writer of dirty leaf pages at this point, we skip
+ * the lock.
+ */
+ if (mod->rec_result == WT_PM_REC_MULTIBLOCK)
+ for (multi = mod->mod_multi,
+ i = 0; i < mod->mod_multi_entries; ++multi, ++i)
+ if (multi->addr.addr == NULL)
+ return (false);
+ return (true);
+}
+
+/*
* __sync_file --
* Flush pages for a specific file.
*/
@@ -20,7 +73,6 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_PAGE *page;
- WT_PAGE_MODIFY *mod;
WT_REF *walk;
WT_TXN *txn;
uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages;
@@ -161,29 +213,15 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
* reference and checking modified.
*/
page = walk->page;
- mod = page->modify;
/*
- * Write dirty pages, unless we can be sure they only
- * became dirty after the checkpoint started.
- *
- * We can skip dirty pages if:
- * (1) they are leaf pages;
- * (2) there is a snapshot transaction active (which
- * is the case in ordinary application checkpoints
- * but not all internal cases); and
- * (3) the first dirty update on the page is
- * sufficiently recent that the checkpoint
- * transaction would skip them.
- *
- * Mark the tree dirty: the checkpoint marked it clean
- * and we can't skip future checkpoints until this page
- * is written.
+ * Write dirty pages, if we can't skip them. If we skip
+ * a page, mark the tree dirty. The checkpoint marked it
+ * clean and we can't skip future checkpoints until this
+ * page is written.
*/
- if (!WT_PAGE_IS_INTERNAL(page) &&
- F_ISSET(txn, WT_TXN_HAS_SNAPSHOT) &&
- WT_TXNID_LT(txn->snap_max, mod->first_dirty_txn)) {
- __wt_page_modify_set(session, page);
+ if (__sync_checkpoint_can_skip(session, page)) {
+ __wt_tree_modify_set(session);
continue;
}
diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c
index 4c338bc6ad9..41f50957809 100644
--- a/src/cache/cache_las.c
+++ b/src/cache/cache_las.c
@@ -49,7 +49,7 @@ __wt_las_stats_update(WT_SESSION_IMPL *session)
* don't have to worry about users seeing inconsistent data source
* information.
*/
- if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_CLEAR)) {
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_CLEAR)) {
WT_STAT_SET(session, dstats, cursor_insert, 0);
WT_STAT_SET(session, dstats, cursor_remove, 0);
}
diff --git a/src/config/config_def.c b/src/config/config_def.c
index 7bce4bc9cef..018cc7a8ac4 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -138,7 +138,8 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
NULL, NULL,
confchk_wiredtiger_open_shared_cache_subconfigs, 5 },
{ "statistics", "list",
- NULL, "choices=[\"all\",\"fast\",\"none\",\"clear\"]",
+ NULL, "choices=[\"all\",\"cache_walk\",\"fast\",\"none\","
+ "\"clear\",\"tree_walk\"]",
NULL, 0 },
{ "statistics_log", "category",
NULL, NULL,
@@ -334,7 +335,8 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor[] = {
{ "readonly", "boolean", NULL, NULL, NULL, 0 },
{ "skip_sort_check", "boolean", NULL, NULL, NULL, 0 },
{ "statistics", "list",
- NULL, "choices=[\"all\",\"fast\",\"clear\",\"size\"]",
+ NULL, "choices=[\"all\",\"cache_walk\",\"fast\",\"clear\","
+ "\"size\",\"tree_walk\"]",
NULL, 0 },
{ "target", "list", NULL, NULL, NULL, 0 },
{ NULL, NULL, NULL, NULL, NULL, 0 }
@@ -709,7 +711,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
NULL, NULL,
confchk_wiredtiger_open_shared_cache_subconfigs, 5 },
{ "statistics", "list",
- NULL, "choices=[\"all\",\"fast\",\"none\",\"clear\"]",
+ NULL, "choices=[\"all\",\"cache_walk\",\"fast\",\"none\","
+ "\"clear\",\"tree_walk\"]",
NULL, 0 },
{ "statistics_log", "category",
NULL, NULL,
@@ -793,7 +796,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
NULL, NULL,
confchk_wiredtiger_open_shared_cache_subconfigs, 5 },
{ "statistics", "list",
- NULL, "choices=[\"all\",\"fast\",\"none\",\"clear\"]",
+ NULL, "choices=[\"all\",\"cache_walk\",\"fast\",\"none\","
+ "\"clear\",\"tree_walk\"]",
NULL, 0 },
{ "statistics_log", "category",
NULL, NULL,
@@ -874,7 +878,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
NULL, NULL,
confchk_wiredtiger_open_shared_cache_subconfigs, 5 },
{ "statistics", "list",
- NULL, "choices=[\"all\",\"fast\",\"none\",\"clear\"]",
+ NULL, "choices=[\"all\",\"cache_walk\",\"fast\",\"none\","
+ "\"clear\",\"tree_walk\"]",
NULL, 0 },
{ "statistics_log", "category",
NULL, NULL,
@@ -953,7 +958,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
NULL, NULL,
confchk_wiredtiger_open_shared_cache_subconfigs, 5 },
{ "statistics", "list",
- NULL, "choices=[\"all\",\"fast\",\"none\",\"clear\"]",
+ NULL, "choices=[\"all\",\"cache_walk\",\"fast\",\"none\","
+ "\"clear\",\"tree_walk\"]",
NULL, 0 },
{ "statistics_log", "category",
NULL, NULL,
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index 0951fd4e58c..04c29e957a3 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -789,14 +789,17 @@ __conn_get_extension_api(WT_CONNECTION *wt_conn)
return (&conn->extension_api);
}
+#ifdef HAVE_BUILTIN_EXTENSION_LZ4
+ extern int lz4_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
+#endif
#ifdef HAVE_BUILTIN_EXTENSION_SNAPPY
extern int snappy_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
#endif
#ifdef HAVE_BUILTIN_EXTENSION_ZLIB
extern int zlib_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
#endif
-#ifdef HAVE_BUILTIN_EXTENSION_LZ4
- extern int lz4_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
+#ifdef HAVE_BUILTIN_EXTENSION_ZSTD
+ extern int zstd_extension_init(WT_CONNECTION *, WT_CONFIG_ARG *);
#endif
/*
@@ -808,14 +811,17 @@ __conn_load_default_extensions(WT_CONNECTION_IMPL *conn)
{
WT_UNUSED(conn);
+#ifdef HAVE_BUILTIN_EXTENSION_LZ4
+ WT_RET(lz4_extension_init(&conn->iface, NULL));
+#endif
#ifdef HAVE_BUILTIN_EXTENSION_SNAPPY
WT_RET(snappy_extension_init(&conn->iface, NULL));
#endif
#ifdef HAVE_BUILTIN_EXTENSION_ZLIB
WT_RET(zlib_extension_init(&conn->iface, NULL));
#endif
-#ifdef HAVE_BUILTIN_EXTENSION_LZ4
- WT_RET(lz4_extension_init(&conn->iface, NULL));
+#ifdef HAVE_BUILTIN_EXTENSION_ZSTD
+ WT_RET(zstd_extension_init(&conn->iface, NULL));
#endif
return (0);
}
@@ -1668,32 +1674,60 @@ __conn_statistics_config(WT_SESSION_IMPL *session, const char *cfg[])
if ((ret = __wt_config_subgets(
session, &cval, "fast", &sval)) == 0 && sval.val != 0) {
- LF_SET(WT_CONN_STAT_FAST);
+ LF_SET(WT_STAT_TYPE_FAST);
++set;
}
WT_RET_NOTFOUND_OK(ret);
if ((ret = __wt_config_subgets(
session, &cval, "all", &sval)) == 0 && sval.val != 0) {
- LF_SET(WT_CONN_STAT_ALL | WT_CONN_STAT_FAST);
+ LF_SET(
+ WT_STAT_TYPE_ALL | WT_STAT_TYPE_CACHE_WALK |
+ WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK);
++set;
}
WT_RET_NOTFOUND_OK(ret);
+ if (set > 1)
+ WT_RET_MSG(session, EINVAL,
+ "Only one of all, fast, none configuration values should "
+ "be specified");
+
+ /*
+ * Now that we've parsed general statistics categories, process
+ * sub-categories.
+ */
+ if ((ret = __wt_config_subgets(
+ session, &cval, "cache_walk", &sval)) == 0 && sval.val != 0)
+ /*
+ * Configuring cache walk statistics implies fast statistics.
+ * Keep that knowledge internal for now - it may change in the
+ * future.
+ */
+ LF_SET(WT_STAT_TYPE_FAST | WT_STAT_TYPE_CACHE_WALK);
+ WT_RET_NOTFOUND_OK(ret);
+
+ if ((ret = __wt_config_subgets(
+ session, &cval, "tree_walk", &sval)) == 0 && sval.val != 0)
+ /*
+ * Configuring tree walk statistics implies fast statistics.
+ * Keep that knowledge internal for now - it may change in the
+ * future.
+ */
+ LF_SET(WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK);
+ WT_RET_NOTFOUND_OK(ret);
+
if ((ret = __wt_config_subgets(
session, &cval, "clear", &sval)) == 0 && sval.val != 0) {
- if (!LF_ISSET(WT_CONN_STAT_FAST | WT_CONN_STAT_ALL))
+ if (!LF_ISSET(WT_STAT_TYPE_ALL | WT_STAT_TYPE_CACHE_WALK |
+ WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK))
WT_RET_MSG(session, EINVAL,
- "the value \"clear\" can be specified only if "
- "either \"all\" or \"fast\" is specified");
- LF_SET(WT_CONN_STAT_CLEAR);
+ "the value \"clear\" can only be specified if "
+ "statistics are enabled");
+ LF_SET(WT_STAT_CLEAR);
}
WT_RET_NOTFOUND_OK(ret);
- if (set > 1)
- WT_RET_MSG(session, EINVAL,
- "only one statistics configuration value may be specified");
-
/* Configuring statistics clears any existing values. */
conn->stat_flags = flags;
@@ -1943,6 +1977,42 @@ __conn_chk_file_system(WT_SESSION_IMPL *session, bool readonly)
}
/*
+ * wiredtiger_dummy_session_init --
+ * Initialize the connection's dummy session.
+ */
+static void
+wiredtiger_dummy_session_init(
+ WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler)
+{
+ WT_SESSION_IMPL *session;
+
+ session = &conn->dummy_session;
+
+ /*
+ * We use a fake session until we can allocate and initialize the real
+ * ones. Initialize the necessary fields (unfortunately, the fields we
+ * initialize have been selected by core dumps, we need to do better).
+ */
+ session->iface.connection = &conn->iface;
+ session->name = "wiredtiger_open";
+
+ /* Standard I/O and error handling first. */
+ __wt_os_stdio(session);
+ __wt_event_handler_set(session, event_handler);
+
+ /* Statistics */
+ session->stat_bucket = 0;
+
+ /*
+ * Set the default session's strerror method. If one of the extensions
+ * being loaded reports an error via the WT_EXTENSION_API strerror
+ * method, but doesn't supply that method a WT_SESSION handle, we'll
+ * use the WT_CONNECTION_IMPL's default session and its strerror method.
+ */
+ session->iface.strerror = __wt_session_strerror;
+}
+
+/*
* wiredtiger_open --
* Main library entry point: open a new connection to a WiredTiger
* database.
@@ -2013,21 +2083,11 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
TAILQ_INSERT_TAIL(&__wt_process.connqh, conn, q);
__wt_spin_unlock(NULL, &__wt_process.spinlock);
- session = conn->default_session = &conn->dummy_session;
- session->iface.connection = &conn->iface;
- session->name = "wiredtiger_open";
-
- /* Do standard I/O and error handling first. */
- __wt_os_stdio(session);
- __wt_event_handler_set(session, event_handler);
-
/*
- * Set the default session's strerror method. If one of the extensions
- * being loaded reports an error via the WT_EXTENSION_API strerror
- * method, but doesn't supply that method a WT_SESSION handle, we'll
- * use the WT_CONNECTION_IMPL's default session and its strerror method.
+ * Initialize the fake session used until we can create real sessions.
*/
- conn->default_session->iface.strerror = __wt_session_strerror;
+ wiredtiger_dummy_session_init(conn, event_handler);
+ session = conn->default_session = &conn->dummy_session;
/* Basic initialization of the connection structure. */
WT_ERR(__wt_connection_init(conn));
diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c
index 1b8b3183d3c..fe5f94ea03d 100644
--- a/src/conn/conn_cache.c
+++ b/src/conn/conn_cache.c
@@ -183,26 +183,26 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[])
* get any work done.
*/
if (cache->eviction_target >= cache->eviction_trigger)
- WT_ERR_MSG(session, EINVAL,
+ WT_RET_MSG(session, EINVAL,
"eviction target must be lower than the eviction trigger");
- WT_ERR(__wt_cond_auto_alloc(session, "cache eviction server",
+ WT_RET(__wt_cond_auto_alloc(session, "cache eviction server",
false, 10000, WT_MILLION, &cache->evict_cond));
- WT_ERR(__wt_spin_init(session, &cache->evict_pass_lock, "evict pass"));
- WT_ERR(__wt_spin_init(session,
+ WT_RET(__wt_spin_init(session, &cache->evict_pass_lock, "evict pass"));
+ WT_RET(__wt_spin_init(session,
&cache->evict_queue_lock, "cache eviction queue"));
- WT_ERR(__wt_spin_init(session, &cache->evict_walk_lock, "cache walk"));
+ WT_RET(__wt_spin_init(session, &cache->evict_walk_lock, "cache walk"));
if ((ret = __wt_open_internal_session(conn, "evict pass",
false, WT_SESSION_NO_DATA_HANDLES, &cache->walk_session)) != 0)
- WT_ERR_MSG(NULL, ret,
+ WT_RET_MSG(NULL, ret,
"Failed to create session for eviction walks");
/* Allocate the LRU eviction queue. */
cache->evict_slots = WT_EVICT_WALK_BASE + WT_EVICT_WALK_INCR;
for (i = 0; i < WT_EVICT_QUEUE_MAX; ++i) {
- WT_ERR(__wt_calloc_def(session,
+ WT_RET(__wt_calloc_def(session,
cache->evict_slots, &cache->evict_queues[i].evict_queue));
- WT_ERR(__wt_spin_init(session,
+ WT_RET(__wt_spin_init(session,
&cache->evict_queues[i].evict_lock, "cache eviction"));
}
@@ -218,9 +218,6 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[])
*/
__wt_cache_stats_update(session);
return (0);
-
-err: WT_RET(__wt_cache_destroy(session));
- return (ret);
}
/*
diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c
index 5ff8b7f798b..5104624523b 100644
--- a/src/conn/conn_handle.c
+++ b/src/conn/conn_handle.c
@@ -50,21 +50,23 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn)
/* Statistics. */
__wt_stat_connection_init(conn);
- /* Locks. */
+ /* Spinlocks. */
WT_RET(__wt_spin_init(session, &conn->api_lock, "api"));
- WT_RET(__wt_spin_init(session, &conn->checkpoint_lock, "checkpoint"));
- WT_RET(__wt_spin_init(session, &conn->dhandle_lock, "data handle"));
+ WT_SPIN_INIT_TRACKED(session, &conn->checkpoint_lock, checkpoint);
+ WT_SPIN_INIT_TRACKED(session, &conn->dhandle_lock, handle_list);
WT_RET(__wt_spin_init(session, &conn->encryptor_lock, "encryptor"));
WT_RET(__wt_spin_init(session, &conn->fh_lock, "file list"));
- WT_RET(__wt_rwlock_alloc(session,
- &conn->hot_backup_lock, "hot backup"));
WT_RET(__wt_spin_init(session, &conn->las_lock, "lookaside table"));
- WT_RET(__wt_spin_init(session, &conn->metadata_lock, "metadata"));
+ WT_SPIN_INIT_TRACKED(session, &conn->metadata_lock, metadata);
WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure"));
- WT_RET(__wt_spin_init(session, &conn->schema_lock, "schema"));
- WT_RET(__wt_spin_init(session, &conn->table_lock, "table creation"));
+ WT_SPIN_INIT_TRACKED(session, &conn->schema_lock, schema);
+ WT_SPIN_INIT_TRACKED(session, &conn->table_lock, table);
WT_RET(__wt_spin_init(session, &conn->turtle_lock, "turtle file"));
+ /* Read-write locks */
+ WT_RET(__wt_rwlock_alloc(
+ session, &conn->hot_backup_lock, "hot backup"));
+
WT_RET(__wt_calloc_def(session, WT_PAGE_LOCKS, &conn->page_lock));
WT_CACHE_LINE_ALIGNMENT_VERIFY(session, conn->page_lock);
for (i = 0; i < WT_PAGE_LOCKS; ++i)
diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c
index d5a31c671c0..0715a035807 100644
--- a/src/conn/conn_stat.c
+++ b/src/conn/conn_stat.c
@@ -130,12 +130,12 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp)
WT_RET(__wt_config_gets(session, cfg, "statistics_log.json", &cval));
if (cval.val != 0)
- FLD_SET(conn->stat_flags, WT_CONN_STAT_JSON);
+ FLD_SET(conn->stat_flags, WT_STAT_JSON);
WT_RET(__wt_config_gets(
session, cfg, "statistics_log.on_close", &cval));
if (cval.val != 0)
- FLD_SET(conn->stat_flags, WT_CONN_STAT_ON_CLOSE);
+ FLD_SET(conn->stat_flags, WT_STAT_ON_CLOSE);
/*
* We don't allow the log path to be reconfigured for security reasons.
@@ -206,7 +206,7 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp)
#define WT_TIMESTAMP_JSON_DEFAULT "%Y-%m-%dT%H:%M:%S.000Z"
WT_ERR(__wt_config_gets(
session, cfg, "statistics_log.timestamp", &cval));
- if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_JSON) &&
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_JSON) &&
WT_STRING_MATCH(WT_TIMESTAMP_DEFAULT, cval.str, cval.len))
WT_ERR(__wt_strdup(
session, WT_TIMESTAMP_JSON_DEFAULT, &conn->stat_format));
@@ -264,7 +264,7 @@ __statlog_dump(WT_SESSION_IMPL *session, const char *name, bool conn_stats)
goto err;
}
- if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_JSON)) {
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_JSON)) {
WT_ERR(__wt_fprintf(session, conn->stat_fs,
"{\"version\":\"%s\",\"localTime\":\"%s\"",
WIREDTIGER_VERSION_STRING, conn->stat_stamp));
@@ -482,7 +482,7 @@ __wt_statlog_log_one(WT_SESSION_IMPL *session)
conn = S2C(session);
- if (!FLD_ISSET(conn->stat_flags, WT_CONN_STAT_ON_CLOSE))
+ if (!FLD_ISSET(conn->stat_flags, WT_STAT_ON_CLOSE))
return (0);
if (F_ISSET(conn, WT_CONN_SERVER_RUN) &&
diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c
index e304cf7b775..9fc466f4c76 100644
--- a/src/cursor/cur_file.c
+++ b/src/cursor/cur_file.c
@@ -117,12 +117,12 @@ err: API_END_RET(session, ret);
}
/*
- * __curfile_next_random --
+ * __wt_curfile_next_random --
* WT_CURSOR->next method for the btree cursor type when configured with
- * next_random.
+ * next_random. This is exported because it is called directly within LSM.
*/
-static int
-__curfile_next_random(WT_CURSOR *cursor)
+int
+__wt_curfile_next_random(WT_CURSOR *cursor)
{
WT_CURSOR_BTREE *cbt;
WT_DECL_RET;
@@ -473,7 +473,7 @@ __curfile_create(WT_SESSION_IMPL *session,
"column-store objects");
__wt_cursor_set_notsup(cursor);
- cursor->next = __curfile_next_random;
+ cursor->next = __wt_curfile_next_random;
cursor->reset = __curfile_reset;
WT_ERR(__wt_config_gets_def(
diff --git a/src/cursor/cur_stat.c b/src/cursor/cur_stat.c
index 700cc366ff0..b36416debe1 100644
--- a/src/cursor/cur_stat.c
+++ b/src/cursor/cur_stat.c
@@ -354,7 +354,7 @@ __curstat_conn_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
*/
__wt_conn_stat_init(session);
__wt_stat_connection_aggregate(conn->stats, &cst->u.conn_stats);
- if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
+ if (F_ISSET(cst, WT_STAT_CLEAR))
__wt_stat_connection_clear_all(conn->stats);
cst->stats = (int64_t *)&cst->u.conn_stats;
@@ -380,7 +380,7 @@ __curstat_file_init(WT_SESSION_IMPL *session,
* If we are only getting the size of the file, we don't need to open
* the tree.
*/
- if (F_ISSET(cst, WT_CONN_STAT_SIZE)) {
+ if (F_ISSET(cst, WT_STAT_TYPE_SIZE)) {
filename = uri;
if (!WT_PREFIX_SKIP(filename, "file:"))
return (EINVAL);
@@ -401,7 +401,7 @@ __curstat_file_init(WT_SESSION_IMPL *session,
if ((ret = __wt_btree_stat_init(session, cst)) == 0) {
__wt_stat_dsrc_init_single(&cst->u.dsrc_stats);
__wt_stat_dsrc_aggregate(dhandle->stats, &cst->u.dsrc_stats);
- if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
+ if (F_ISSET(cst, WT_STAT_CLEAR))
__wt_stat_dsrc_clear_all(dhandle->stats);
__wt_curstat_dsrc_final(cst);
}
@@ -604,50 +604,79 @@ __wt_curstat_open(WT_SESSION_IMPL *session,
if ((ret = __wt_config_gets(session, cfg, "statistics", &cval)) == 0) {
if ((ret = __wt_config_subgets(
session, &cval, "all", &sval)) == 0 && sval.val != 0) {
- if (!FLD_ISSET(conn->stat_flags, WT_CONN_STAT_ALL))
+ if (!FLD_ISSET(conn->stat_flags, WT_STAT_TYPE_ALL))
goto config_err;
- F_SET(cst, WT_CONN_STAT_ALL | WT_CONN_STAT_FAST);
+ F_SET(cst, WT_STAT_TYPE_ALL | WT_STAT_TYPE_CACHE_WALK |
+ WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK);
}
WT_ERR_NOTFOUND_OK(ret);
if ((ret = __wt_config_subgets(
session, &cval, "fast", &sval)) == 0 && sval.val != 0) {
- if (F_ISSET(cst, WT_CONN_STAT_ALL))
+ if (F_ISSET(cst, WT_STAT_TYPE_ALL))
WT_ERR_MSG(session, EINVAL,
- "only one statistics configuration value "
- "may be specified");
- F_SET(cst, WT_CONN_STAT_FAST);
+ "Only one of all, fast, none "
+ "configuration values should be specified");
+ F_SET(cst, WT_STAT_TYPE_FAST);
}
WT_ERR_NOTFOUND_OK(ret);
+
+ if ((ret = __wt_config_subgets(session,
+ &cval, "cache_walk", &sval)) == 0 && sval.val != 0) {
+ /*
+ * Configuring cache walk statistics implies fast
+ * statistics. Keep that knowledge internal for now -
+ * it may change in the future.
+ */
+ F_SET(cst, WT_STAT_TYPE_CACHE_WALK | WT_STAT_TYPE_FAST);
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+ if ((ret = __wt_config_subgets(session,
+ &cval, "tree_walk", &sval)) == 0 && sval.val != 0) {
+ /*
+ * Configuring tree walk statistics implies fast
+ * statistics. Keep that knowledge internal for now -
+ * it may change in the future.
+ */
+ F_SET(cst, WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK);
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
if ((ret = __wt_config_subgets(
session, &cval, "size", &sval)) == 0 && sval.val != 0) {
- if (F_ISSET(cst, WT_CONN_STAT_FAST | WT_CONN_STAT_ALL))
+ if (F_ISSET(cst, WT_STAT_TYPE_FAST | WT_STAT_TYPE_ALL))
WT_ERR_MSG(session, EINVAL,
- "only one statistics configuration value "
- "may be specified");
- F_SET(cst, WT_CONN_STAT_SIZE);
+ "Only one of all, fast, none "
+ "configuration values should be specified");
+ F_SET(cst, WT_STAT_TYPE_SIZE);
}
WT_ERR_NOTFOUND_OK(ret);
if ((ret = __wt_config_subgets(
session, &cval, "clear", &sval)) == 0 && sval.val != 0) {
- if (F_ISSET(cst, WT_CONN_STAT_SIZE))
+ if (F_ISSET(cst, WT_STAT_TYPE_SIZE))
WT_ERR_MSG(session, EINVAL,
"clear is incompatible with size "
"statistics");
- F_SET(cst, WT_CONN_STAT_CLEAR);
+ F_SET(cst, WT_STAT_CLEAR);
}
WT_ERR_NOTFOUND_OK(ret);
/* If no configuration, use the connection's configuration. */
if (cst->flags == 0) {
- if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_ALL))
- F_SET(cst, WT_CONN_STAT_ALL);
- if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_FAST))
- F_SET(cst, WT_CONN_STAT_FAST);
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_TYPE_ALL))
+ F_SET(cst, WT_STAT_TYPE_ALL);
+ if (FLD_ISSET(
+ conn->stat_flags, WT_STAT_TYPE_CACHE_WALK))
+ F_SET(cst, WT_STAT_TYPE_CACHE_WALK);
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_TYPE_FAST))
+ F_SET(cst, WT_STAT_TYPE_FAST);
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_TYPE_TREE_WALK))
+ F_SET(cst, WT_STAT_TYPE_TREE_WALK);
}
/* If the connection configures clear, so do we. */
- if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_CLEAR))
- F_SET(cst, WT_CONN_STAT_CLEAR);
+ if (FLD_ISSET(conn->stat_flags, WT_STAT_CLEAR))
+ F_SET(cst, WT_STAT_CLEAR);
}
/*
@@ -670,9 +699,9 @@ __wt_curstat_open(WT_SESSION_IMPL *session,
/*
* Do the initial statistics snapshot: there won't be cursor operations
- * to trigger initialization when aggregating statistics for upper-level
- * objects like tables, we need to a valid set of statistics when before
- * the open returns.
+ * to trigger initialization with aggregating statistics for upper-level
+ * objects like tables so we need a valid set of statistics before the
+ * open returns.
*/
WT_ERR(__wt_curstat_init(session, uri, other, cst->cfg, cst));
cst->notinitialized = false;
diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c
index 1b93b27f564..6543d54e90f 100644
--- a/src/cursor/cur_table.c
+++ b/src/cursor/cur_table.c
@@ -757,13 +757,36 @@ err: API_END_RET(session, ret);
}
/*
+ * __curtable_complete --
+ * Return failure if the table is not yet fully created.
+ */
+static int
+__curtable_complete(WT_SESSION_IMPL *session, WT_TABLE *table)
+{
+ WT_DECL_RET;
+ bool complete;
+
+ if (table->cg_complete)
+ return (0);
+
+ /* If the table is incomplete, wait on the table lock and recheck. */
+ complete = false;
+ WT_WITH_TABLE_LOCK(session, ret, complete = table->cg_complete);
+ WT_RET(ret);
+ if (!complete)
+ WT_RET_MSG(session, EINVAL,
+ "'%s' not available until all column groups are created",
+ table->name);
+ return (0);
+}
+
+/*
* __curtable_open_colgroups --
* Open cursors on column groups for a table cursor.
*/
static int
__curtable_open_colgroups(WT_CURSOR_TABLE *ctable, const char *cfg_arg[])
{
- WT_DECL_RET;
WT_SESSION_IMPL *session;
WT_TABLE *table;
WT_CURSOR **cp;
@@ -775,21 +798,11 @@ __curtable_open_colgroups(WT_CURSOR_TABLE *ctable, const char *cfg_arg[])
cfg_arg[0], cfg_arg[1], "dump=\"\",readonly=0", NULL, NULL
};
u_int i;
- bool complete;
session = (WT_SESSION_IMPL *)ctable->iface.session;
table = ctable->table;
- /* If the table is incomplete, wait on the table lock and recheck. */
- complete = table->cg_complete;
- if (!complete) {
- WT_WITH_TABLE_LOCK(session, ret, complete = table->cg_complete);
- WT_RET(ret);
- }
- if (!complete)
- WT_RET_MSG(session, EINVAL,
- "Can't use '%s' until all column groups are created",
- table->name);
+ WT_RET(__curtable_complete(session, table)); /* completeness check */
WT_RET(__wt_calloc_def(session,
WT_COLGROUPS(table), &ctable->cg_cursors));
@@ -887,6 +900,8 @@ __wt_curtable_open(WT_SESSION_IMPL *session,
size = WT_PTRDIFF(columns, tablename);
WT_RET(__wt_schema_get_table(session, tablename, size, false, &table));
+ WT_RET(__curtable_complete(session, table)); /* completeness check */
+
if (table->is_simple) {
/* Just return a cursor on the underlying data source. */
ret = __wt_open_cursor(session,
diff --git a/src/docs/build-posix.dox b/src/docs/build-posix.dox
index 4889bf931c9..3e7f8f37acd 100644
--- a/src/docs/build-posix.dox
+++ b/src/docs/build-posix.dox
@@ -150,10 +150,14 @@ Configure WiredTiger to support the \c verbose configuration string to
Configure WiredTiger for <a href="http://www.zlib.net/">zlib</a>
compression; see @ref compression for more information.
+@par \c --enable-zstd
+Configure WiredTiger for <a href="https://github.com/facebook/zstd">Zstd</a>
+compression; see @ref compression for more information.
+
@par <code>--with-builtins</code>
Configure WiredTiger to include support for extensions in the main library.
This avoids requiring additional libraries for supported extensions. Currently
-supported options are \c lz4, \c snappy and \c zlib.
+supported options are \c lz4, \c snappy, \c zlib and \c zstd.
@par <code>--with-python-prefix</code>
Configure WiredTiger to install Python libraries to a non-standard Python
diff --git a/src/docs/compression.dox b/src/docs/compression.dox
index 0be96835760..74bed5c6f68 100644
--- a/src/docs/compression.dox
+++ b/src/docs/compression.dox
@@ -1,7 +1,7 @@
/*! @m_page{{c,java},compression,Compressors}
This section explains how to configure WiredTiger's builtin support for
-the lz4, snappy and zlib compression engines.
+the lz4, snappy, zlib and zstd compression engines.
@section compression_lz4 Using LZ4 compression
@@ -85,11 +85,53 @@ an extension. For example, with the WiredTiger library installed in
@snippet ex_all.c Configure zlib extension
+The default compression level for the zlib compression is
+\c Z_DEFAULT_COMPRESSION (see the zlib documentation for further
+information); compression can be configured to other levels using the
+additional configuration argument \c compression_level.
+
+@snippet ex_all.c Configure zlib extension with compression level
+
Finally, when creating the WiredTiger object, set \c block_compressor
to \c zlib:
@snippet ex_all.c Create a zlib compressed table
+@section compression_zstd Using Zstd compression
+
+To use the builtin support for Facebook's
+<a href="https://github.com/facebook/zstd">Zstd</a>
+compression, first check that Zstd is installed in include and library
+directories searched by the compiler. Once Zstd is installed, you can
+enable Zstd using the \c --enable-zstd option to configure.
+
+If Zstd is installed in a location not normally searched by the
+compiler toolchain, you'll need to modify the \c CPPFLAGS and \c LDFLAGS
+to indicate these locations. For example, with the Zstd includes and
+libraries installed in \c /usr/local/include and \c /usr/local/lib, you
+would run configure with the following additional arguments:
+
+@code
+--enable-zstd CPPFLAGS="-I/usr/local/include" LDFLAGS="-L/usr/local/include"
+@endcode
+
+When opening the WiredTiger database, load the Zstd shared library as
+an extension. For example, with the WiredTiger library installed in
+\c /usr/local/lib, you would use the following extension:
+
+@snippet ex_all.c Configure zstd extension
+
+The default compression level for the zstd compression is 3; compression
+can be configured to other levels using the additional configuration
+argument \c compression_level.
+
+@snippet ex_all.c Configure zstd extension with compression level
+
+Finally, when creating the WiredTiger object, set \c block_compressor
+to \c zstd:
+
+@snippet ex_all.c Create a zstd compressed table
+
@section compression_upgrading Upgrading compression engines
WiredTiger does not store information with file blocks to identify the
diff --git a/src/docs/spell.ok b/src/docs/spell.ok
index a2ef7658ec6..4b1337f84b8 100644
--- a/src/docs/spell.ok
+++ b/src/docs/spell.ok
@@ -95,6 +95,7 @@ WiredTigerStat
WiredTigerTestCase
Yann
Za
+Zstd
aR
abstime
ack'ed
@@ -507,3 +508,4 @@ xa
yieldcpu
zlib
zseries
+zstd
diff --git a/src/docs/wtperf.dox b/src/docs/wtperf.dox
index df66ad43355..83aadf8a776 100644
--- a/src/docs/wtperf.dox
+++ b/src/docs/wtperf.dox
@@ -155,10 +155,12 @@ checkpoint every rate operations during the populate phase in the populate thre
number of checkpoint threads
@par conn_config (string, default="create")
connection configuration string
+@par close_conn (boolean, default=true)
+properly close connection at end of test. Setting to false does not sync data to disk and can result in lost data after test exits.
@par compact (boolean, default=false)
post-populate compact for LSM merging activity
@par compression (string, default="none")
-compression extension. Allowed configuration values are: 'none', 'lz4', 'snappy', 'zlib'
+compression extension. Allowed configuration values are: 'none', 'lz4', 'snappy', 'zlib', 'zstd'
@par create (boolean, default=true)
do population phase; false to use existing database
@par database_count (unsigned int, default=1)
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index 45ec9bce3b5..6c99f3a13dc 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -548,6 +548,7 @@ __evict_pass(WT_SESSION_IMPL *session)
* does need to do some work.
*/
__wt_cache_read_gen_incr(session);
+ ++cache->evict_pass_gen;
/*
* Update the oldest ID: we use it to decide whether pages are
@@ -1055,7 +1056,7 @@ __evict_walk(WT_SESSION_IMPL *session, WT_EVICT_QUEUE *queue)
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
- u_int max_entries, retries, slot, start_slot, spins;
+ u_int max_entries, retries, slot, spins, start_slot, total_candidates;
bool dhandle_locked, incr;
conn = S2C(session);
@@ -1076,12 +1077,9 @@ __evict_walk(WT_SESSION_IMPL *session, WT_EVICT_QUEUE *queue)
* Another pathological case: if there are only a tiny number of
* candidate pages in cache, don't put all of them on one queue.
*/
- if (F_ISSET(cache, WT_CACHE_EVICT_CLEAN))
- max_entries = WT_MIN(max_entries,
- 1 + (uint32_t)(__wt_cache_pages_inuse(cache) / 2));
- else
- max_entries = WT_MIN(max_entries,
- 1 + (uint32_t)(cache->pages_dirty_leaf / 2));
+ total_candidates = (u_int)(F_ISSET(cache, WT_CACHE_EVICT_CLEAN) ?
+ __wt_cache_pages_inuse(cache) : cache->pages_dirty_leaf);
+ max_entries = WT_MIN(max_entries, 1 + total_candidates / 2);
retry: while (slot < max_entries) {
/*
@@ -1286,8 +1284,8 @@ __evict_push_candidate(WT_SESSION_IMPL *session,
* Get a few page eviction candidates from a single underlying file.
*/
static int
-__evict_walk_file(WT_SESSION_IMPL *session,
- WT_EVICT_QUEUE *queue, u_int max_entries, u_int *slotp)
+__evict_walk_file(WT_SESSION_IMPL *session, WT_EVICT_QUEUE *queue,
+ u_int max_entries, u_int *slotp)
{
WT_BTREE *btree;
WT_CACHE *cache;
@@ -1414,6 +1412,7 @@ __evict_walk_file(WT_SESSION_IMPL *session,
page = ref->page;
modified = __wt_page_is_modified(page);
+ page->evict_pass_gen = cache->evict_pass_gen;
/*
* Use the EVICT_LRU flag to avoid putting pages onto the list
@@ -1560,7 +1559,7 @@ __evict_get_ref(
server_only = is_server && !WT_EVICT_HAS_WORKERS(session);
urgent_ok = (!is_app && !is_server) ||
!WT_EVICT_HAS_WORKERS(session) ||
- __wt_cache_aggressive(session);
+ (is_app && __wt_cache_aggressive(session));
urgent_queue = cache->evict_urgent_queue;
*btreep = NULL;
*refp = NULL;
diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c
index 092f80cc000..3d1557e027e 100644
--- a/src/evict/evict_page.c
+++ b/src/evict/evict_page.c
@@ -31,23 +31,14 @@ __evict_exclusive_clear(WT_SESSION_IMPL *session, WT_REF *ref)
static inline int
__evict_exclusive(WT_SESSION_IMPL *session, WT_REF *ref)
{
- int loops;
-
WT_ASSERT(session, ref->state == WT_REF_LOCKED);
/*
* Check for a hazard pointer indicating another thread is using the
* page, meaning the page cannot be evicted.
*/
- for (loops = 0; loops < 10; loops++) {
- if (__wt_page_hazard_check(session, ref->page) == NULL)
- return (0);
- if (ref->page->read_gen != WT_READGEN_OLDEST &&
- ref->page->memory_footprint <
- S2BT(session)->split_deepen_min_child)
- break;
- __wt_sleep(0, WT_THOUSAND);
- }
+ if (__wt_page_hazard_check(session, ref->page) == NULL)
+ return (0);
WT_STAT_DATA_INCR(session, cache_eviction_hazard);
WT_STAT_CONN_INCR(session, cache_eviction_hazard);
diff --git a/src/evict/evict_stat.c b/src/evict/evict_stat.c
new file mode 100644
index 00000000000..2dd3b1e83a0
--- /dev/null
+++ b/src/evict/evict_stat.c
@@ -0,0 +1,138 @@
+/*-
+ * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __evict_stat_walk --
+ * Walk all the pages in cache for a dhandle gathering stats information
+ */
+static void
+__evict_stat_walk(WT_SESSION_IMPL *session)
+{
+ WT_BTREE *btree;
+ WT_PAGE *page;
+ WT_REF *next_walk;
+ uint64_t dsk_size, gen_gap, size;
+ uint64_t written_size_cnt, written_size_sum;
+ uint64_t gen_gap_cnt, gen_gap_max, gen_gap_sum;
+ uint64_t max_pagesize, min_written_size;
+ uint64_t num_memory, num_queued, num_not_queueable, num_smaller_allocsz;
+ uint64_t pages_clean, pages_dirty, pages_internal, pages_leaf;
+ uint64_t seen_count, walk_count;
+
+ btree = S2BT(session);
+ next_walk = NULL;
+ written_size_cnt = written_size_sum = 0;
+ gen_gap_cnt = gen_gap_max = gen_gap_sum = 0;
+ max_pagesize = 0;
+ num_memory = num_queued = num_not_queueable = num_smaller_allocsz = 0;
+ pages_clean = pages_dirty = pages_internal = pages_leaf = 0;
+ seen_count = walk_count = 0;
+ min_written_size = UINT64_MAX;
+
+ while (__wt_tree_walk_count(session, &next_walk, &walk_count,
+ WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_WAIT) == 0 &&
+ next_walk != NULL) {
+ ++seen_count;
+ page = next_walk->page;
+ size = page->memory_footprint;
+
+ if (__wt_page_is_modified(page))
+ ++pages_dirty;
+ else
+ ++pages_clean;
+
+ if (!__wt_ref_is_root(next_walk) &&
+ !__wt_page_can_evict(session, next_walk, NULL))
+ ++num_not_queueable;
+
+ if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU))
+ ++num_queued;
+
+ if (size > max_pagesize)
+ max_pagesize = size;
+
+ dsk_size = page->dsk != NULL ? page->dsk->mem_size : 0;
+ if (dsk_size != 0) {
+ if (dsk_size < btree->allocsize)
+ ++num_smaller_allocsz;
+ if (dsk_size < min_written_size)
+ min_written_size = dsk_size;
+ ++written_size_cnt;
+ written_size_sum += dsk_size;
+ } else
+ ++num_memory;
+
+ if (WT_PAGE_IS_INTERNAL(page))
+ ++pages_internal;
+ else
+ ++pages_leaf;
+
+ /* Skip root pages since they are never considered */
+ if (__wt_ref_is_root(next_walk))
+ continue;
+
+ gen_gap =
+ S2C(session)->cache->evict_pass_gen - page->evict_pass_gen;
+ if (gen_gap > gen_gap_max)
+ gen_gap_max = gen_gap;
+ gen_gap_sum += gen_gap;
+ ++gen_gap_cnt;
+ }
+
+ WT_STAT_DATA_SET(session, cache_state_avg_written_size,
+ written_size_cnt == 0 ? 0 : written_size_sum / written_size_cnt);
+ WT_STAT_DATA_SET(session, cache_state_gen_avg_gap,
+ gen_gap_cnt == 0 ? 0 : gen_gap_sum / gen_gap_cnt);
+
+ WT_STAT_DATA_SET(session, cache_state_gen_max_gap, gen_gap_max);
+ WT_STAT_DATA_SET(session, cache_state_max_pagesize, max_pagesize);
+ WT_STAT_DATA_SET(session,
+ cache_state_min_written_size, min_written_size);
+ WT_STAT_DATA_SET(session, cache_state_memory, num_memory);
+ WT_STAT_DATA_SET(session, cache_state_queued, num_queued);
+ WT_STAT_DATA_SET(session, cache_state_not_queueable, num_not_queueable);
+ WT_STAT_DATA_SET(session,
+ cache_state_smaller_alloc_size, num_smaller_allocsz);
+ WT_STAT_DATA_SET(session, cache_state_pages, walk_count);
+ WT_STAT_DATA_SET(session, cache_state_pages_clean, pages_clean);
+ WT_STAT_DATA_SET(session, cache_state_pages_dirty, pages_dirty);
+ WT_STAT_DATA_SET(session, cache_state_pages_internal, pages_internal);
+ WT_STAT_DATA_SET(session, cache_state_pages_leaf, pages_leaf);
+ WT_STAT_DATA_SET(session,
+ cache_state_refs_skipped, walk_count - seen_count);
+}
+
+/*
+ * __wt_curstat_cache_walk --
+ * Initialize the statistics for a cache cache_walk pass.
+ */
+void
+__wt_curstat_cache_walk(WT_SESSION_IMPL *session)
+{
+ WT_BTREE *btree;
+ WT_CONNECTION_IMPL *conn;
+ WT_PAGE_INDEX *root_idx;
+
+ btree = S2BT(session);
+ conn = S2C(session);
+
+ /* Set statistics that don't require walking the cache. */
+ WT_STAT_DATA_SET(session,
+ cache_state_gen_current, conn->cache->evict_pass_gen);
+
+ /* Root page statistics */
+ root_idx = WT_INTL_INDEX_GET_SAFE(btree->root.page);
+ WT_STAT_DATA_SET(session,
+ cache_state_root_entries, root_idx->entries);
+ WT_STAT_DATA_SET(session,
+ cache_state_root_size, btree->root.page->memory_footprint);
+
+ WT_WITH_HANDLE_LIST_LOCK(session, __evict_stat_walk(session));
+}
diff --git a/src/include/btmem.h b/src/include/btmem.h
index b4ca937e7ed..84c91097a99 100644
--- a/src/include/btmem.h
+++ b/src/include/btmem.h
@@ -430,6 +430,8 @@ struct __wt_page_modify {
#define WT_PM_REC_MULTIBLOCK 2 /* Reconciliation: multiple blocks */
#define WT_PM_REC_REPLACE 3 /* Reconciliation: single block */
uint8_t rec_result; /* Reconciliation state */
+
+ uint8_t update_restored; /* Page created by restoring updates */
};
/*
@@ -619,6 +621,8 @@ struct __wt_page {
#define WT_READGEN_START_VALUE 100
#define WT_READGEN_STEP 100
uint64_t read_gen;
+ /* The evict pass generation for the page */
+ uint64_t evict_pass_gen;
size_t memory_footprint; /* Memory attached to the page */
diff --git a/src/include/btree.i b/src/include/btree.i
index 74ebf74f1e9..daf2eb158c1 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -485,6 +485,38 @@ __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page)
}
/*
+ * __wt_tree_modify_set --
+ * Mark the tree dirty.
+ */
+static inline void
+__wt_tree_modify_set(WT_SESSION_IMPL *session)
+{
+ /*
+ * Test before setting the dirty flag, it's a hot cache line.
+ *
+ * The tree's modified flag is cleared by the checkpoint thread: set it
+ * and insert a barrier before dirtying the page. (I don't think it's
+ * a problem if the tree is marked dirty with all the pages clean, it
+ * might result in an extra checkpoint that doesn't do any work but it
+ * shouldn't cause problems; regardless, let's play it safe.)
+ */
+ if (!S2BT(session)->modified) {
+ /* Assert we never dirty a checkpoint handle. */
+ WT_ASSERT(session, session->dhandle->checkpoint == NULL);
+
+ S2BT(session)->modified = true;
+ WT_FULL_BARRIER();
+ }
+
+ /*
+ * The btree may already be marked dirty while the connection is still
+ * clean; mark the connection dirty outside the test of the btree state.
+ */
+ if (!S2C(session)->modified)
+ S2C(session)->modified = true;
+}
+
+/*
* __wt_page_modify_clear --
* Clean a modified page.
*/
@@ -513,30 +545,9 @@ __wt_page_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page)
/*
* Mark the tree dirty (even if the page is already marked dirty), newly
* created pages to support "empty" files are dirty, but the file isn't
- * marked dirty until there's a real change needing to be written. Test
- * before setting the dirty flag, it's a hot cache line.
- *
- * The tree's modified flag is cleared by the checkpoint thread: set it
- * and insert a barrier before dirtying the page. (I don't think it's
- * a problem if the tree is marked dirty with all the pages clean, it
- * might result in an extra checkpoint that doesn't do any work but it
- * shouldn't cause problems; regardless, let's play it safe.)
- */
- if (!S2BT(session)->modified) {
- /* Assert we never dirty a checkpoint handle. */
- WT_ASSERT(session, session->dhandle->checkpoint == NULL);
-
- S2BT(session)->modified = true;
- WT_FULL_BARRIER();
- }
-
- /*
- * There is a possibility of btree being dirty whereas connection being
- * clean when entering this function. So make sure to update connection
- * to dirty outside a condition on btree modified flag.
+ * marked dirty until there's a real change needing to be written.
*/
- if (!S2C(session)->modified)
- S2C(session)->modified = true;
+ __wt_tree_modify_set(session);
__wt_page_only_modify_set(session, page);
}
@@ -1167,15 +1178,7 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
* There is no point doing an in-memory split unless there is a lot of
* data in the last skiplist on the page. Split if there are enough
* items and the skiplist does not fit within a single disk page.
- *
- * Rather than scanning the whole list, walk a higher level, which
- * gives a sample of the items -- at level 0 we have all the items, at
- * level 1 we have 1/4 and at level 2 we have 1/16th. If we see more
- * than 30 items and more data than would fit in a disk page, split.
*/
-#define WT_MIN_SPLIT_DEPTH 2
-#define WT_MIN_SPLIT_COUNT 30
-#define WT_MIN_SPLIT_MULTIPLIER 16 /* At level 2, we see 1/16th entries */
ins_head = page->type == WT_PAGE_ROW_LEAF ?
(page->pg_row_entries == 0 ?
@@ -1184,8 +1187,40 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
WT_COL_APPEND(page);
if (ins_head == NULL)
return (false);
+
+ /*
+ * In the extreme case, where the page is much larger than the maximum
+ * size, split as soon as there are 5 items on the page.
+ */
+#define WT_MAX_SPLIT_COUNT 5
+ if (page->memory_footprint > btree->maxleafpage * 2) {
+ for (count = 0, ins = ins_head->head[0];
+ ins != NULL;
+ ins = ins->next[0]) {
+ if (++count < WT_MAX_SPLIT_COUNT)
+ continue;
+
+ WT_STAT_CONN_INCR(session, cache_inmem_splittable);
+ WT_STAT_DATA_INCR(session, cache_inmem_splittable);
+ return (true);
+ }
+
+ return (false);
+ }
+
+ /*
+ * Rather than scanning the whole list, walk a higher level, which
+ * gives a sample of the items -- at level 0 we have all the items, at
+ * level 1 we have 1/4 and at level 2 we have 1/16th. If we see more
+ * than 30 items and more data than would fit in a disk page, split.
+ */
+#define WT_MIN_SPLIT_DEPTH 2
+#define WT_MIN_SPLIT_COUNT 30
+#define WT_MIN_SPLIT_MULTIPLIER 16 /* At level 2, we see 1/16th entries */
+
for (count = 0, size = 0, ins = ins_head->head[WT_MIN_SPLIT_DEPTH];
- ins != NULL; ins = ins->next[WT_MIN_SPLIT_DEPTH]) {
+ ins != NULL;
+ ins = ins->next[WT_MIN_SPLIT_DEPTH]) {
count += WT_MIN_SPLIT_MULTIPLIER;
size += WT_MIN_SPLIT_MULTIPLIER *
(WT_INSERT_KEY_SIZE(ins) + WT_UPDATE_MEMSIZE(ins->upd));
diff --git a/src/include/cache.h b/src/include/cache.h
index b24b625aec4..9a2b83b5b57 100644
--- a/src/include/cache.h
+++ b/src/include/cache.h
@@ -91,6 +91,7 @@ struct __wt_cache {
uint64_t read_gen; /* Current page read generation */
uint64_t read_gen_oldest; /* Oldest read generation the eviction
* server saw in its last queue load */
+ uint64_t evict_pass_gen; /* Number of eviction passes */
/*
* Eviction thread information.
diff --git a/src/include/connection.h b/src/include/connection.h
index ce81dcf5976..d7c3bf69686 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -285,13 +285,7 @@ struct __wt_connection_impl {
uint64_t ckpt_time_recent; /* Checkpoint time recent/total */
uint64_t ckpt_time_total;
-#define WT_CONN_STAT_ALL 0x01 /* "all" statistics configured */
-#define WT_CONN_STAT_CLEAR 0x02 /* clear after gathering */
-#define WT_CONN_STAT_FAST 0x04 /* "fast" statistics configured */
-#define WT_CONN_STAT_JSON 0x08 /* output JSON format */
-#define WT_CONN_STAT_ON_CLOSE 0x10 /* output statistics on close */
-#define WT_CONN_STAT_SIZE 0x20 /* "size" statistics configured */
- uint32_t stat_flags;
+ uint32_t stat_flags; /* Options declared in flags.py */
/* Connection statistics */
WT_CONNECTION_STATS *stats[WT_COUNTER_SLOTS];
diff --git a/src/include/cursor.h b/src/include/cursor.h
index f1fa4d193ac..e322a53a65d 100644
--- a/src/include/cursor.h
+++ b/src/include/cursor.h
@@ -467,7 +467,7 @@ struct __wt_cursor_stat {
uint64_t v; /* Current stats value */
WT_ITEM pv; /* Current stats value (string) */
- /* Uses the same values as WT_CONNECTION::stat_flags field */
+ /* Options declared in flags.py, shared by WT_CONNECTION::stat_flags */
uint32_t flags;
};
diff --git a/src/include/extern.h b/src/include/extern.h
index e3cffa4ca3c..79e6405e148 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -282,6 +282,7 @@ extern int __wt_curbulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bo
extern int __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_curds_open( WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_DATA_SOURCE *dsrc, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curfile_next_random(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_curfile_update_check(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -352,6 +353,7 @@ extern void __wt_evict_priority_clear(WT_SESSION_IMPL *session);
extern int __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void __wt_curstat_cache_walk(WT_SESSION_IMPL *session);
extern void __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn);
extern int __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn);
@@ -723,7 +725,7 @@ extern int __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_F
extern void __wt_txn_release(WT_SESSION_IMPL *session);
extern int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_init(WT_SESSION_IMPL *session, WT_SESSION_IMPL *session_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_txn_stats_update(WT_SESSION_IMPL *session);
extern void __wt_txn_destroy(WT_SESSION_IMPL *session);
extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/include/flags.h b/src/include/flags.h
index 5d718da473d..b0d167525b2 100644
--- a/src/include/flags.h
+++ b/src/include/flags.h
@@ -70,6 +70,14 @@
#define WT_SESSION_NO_SCHEMA_LOCK 0x00020000
#define WT_SESSION_QUIET_CORRUPT_FILE 0x00040000
#define WT_SESSION_SERVER_ASYNC 0x00080000
+#define WT_STAT_CLEAR 0x00000001
+#define WT_STAT_JSON 0x00000002
+#define WT_STAT_ON_CLOSE 0x00000004
+#define WT_STAT_TYPE_ALL 0x00000008
+#define WT_STAT_TYPE_CACHE_WALK 0x00000010
+#define WT_STAT_TYPE_FAST 0x00000020
+#define WT_STAT_TYPE_SIZE 0x00000040
+#define WT_STAT_TYPE_TREE_WALK 0x00000080
#define WT_TXN_LOG_CKPT_CLEANUP 0x00000001
#define WT_TXN_LOG_CKPT_PREPARE 0x00000002
#define WT_TXN_LOG_CKPT_START 0x00000004
diff --git a/src/include/lsm.h b/src/include/lsm.h
index 2550ca444c1..b433e4c3c44 100644
--- a/src/include/lsm.h
+++ b/src/include/lsm.h
@@ -31,6 +31,17 @@ struct __wt_lsm_worker_args {
};
/*
+ * WT_LSM_CURSOR_CHUNK --
+ * Iterator struct containing all the LSM cursor access points for a chunk.
+ */
+struct __wt_lsm_cursor_chunk {
+ WT_BLOOM *bloom; /* Bloom filter handle for each chunk.*/
+ WT_CURSOR *cursor; /* Cursor handle for each chunk. */
+ uint64_t count; /* Number of items in chunk */
+ uint64_t switch_txn; /* Switch txn for each chunk */
+};
+
+/*
* WT_CURSOR_LSM --
* An LSM cursor.
*/
@@ -43,17 +54,12 @@ struct __wt_cursor_lsm {
u_int nchunks; /* Number of chunks in the cursor */
u_int nupdates; /* Updates needed (including
snapshot isolation checks). */
- WT_BLOOM **blooms; /* Bloom filter handles. */
- size_t bloom_alloc;
-
- WT_CURSOR **cursors; /* Cursor handles. */
- size_t cursor_alloc;
-
- WT_CURSOR *current; /* The current cursor for iteration */
+ WT_CURSOR *current; /* The current cursor for iteration */
WT_LSM_CHUNK *primary_chunk; /* The current primary chunk */
- uint64_t *switch_txn; /* Switch txn for each chunk */
- size_t txnid_alloc;
+ WT_LSM_CURSOR_CHUNK **chunks; /* Array of LSM cursor units */
+ size_t chunks_alloc; /* Current size iterators array */
+ size_t chunks_count; /* Current number of iterators */
u_int update_count; /* Updates performed. */
diff --git a/src/include/mutex.h b/src/include/mutex.h
index f0f8173bad4..b736d6ee9fb 100644
--- a/src/include/mutex.h
+++ b/src/include/mutex.h
@@ -74,6 +74,16 @@ struct __wt_rwlock {
struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_spinlock {
volatile int lock;
+
+ /*
+ * We track acquisitions and time spent waiting for some locks. For
+ * performance reasons and to make it possible to write generic code
+ * that tracks statistics for different locks, we store the offset
+ * of the statistics fields to be updated during lock acquisition.
+ */
+ int16_t stat_count_off; /* acquisitions offset */
+ int16_t stat_app_usecs_off; /* waiting application threads offset */
+ int16_t stat_int_usecs_off; /* waiting server threads offset */
};
#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\
@@ -83,7 +93,17 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_spinlock {
struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_spinlock {
wt_mutex_t lock;
- const char *name; /* Statistics: mutex name */
+ const char *name; /* Mutex name */
+
+ /*
+ * We track acquisitions and time spent waiting for some locks. For
+ * performance reasons and to make it possible to write generic code
+ * that tracks statistics for different locks, we store the offset
+ * of the statistics fields to be updated during lock acquisition.
+ */
+ int16_t stat_count_off; /* acquisitions offset */
+ int16_t stat_app_usecs_off; /* waiting application threads offset */
+ int16_t stat_int_usecs_off; /* waiting server threads offset */
int8_t initialized; /* Lock initialized, for cleanup */
};
diff --git a/src/include/mutex.i b/src/include/mutex.i
index cb1847d9991..a6309e0976b 100644
--- a/src/include/mutex.i
+++ b/src/include/mutex.i
@@ -32,6 +32,7 @@ __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name)
WT_UNUSED(name);
t->lock = 0;
+ t->stat_count_off = t->stat_app_usecs_off = t->stat_int_usecs_off = -1;
return (0);
}
@@ -111,6 +112,7 @@ __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name)
#endif
t->name = name;
+ t->stat_count_off = t->stat_app_usecs_off = t->stat_int_usecs_off = -1;
t->initialized = 1;
WT_UNUSED(session);
@@ -255,3 +257,46 @@ __wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
#error Unknown spinlock type
#endif
+
+/*
+ * WT_SPIN_INIT_TRACKED --
+ * Spinlock initialization, with tracking.
+ *
+ * Implemented as a macro so we can pass in a statistics field and convert
+ * it into a statistics structure array offset.
+ */
+#define WT_SPIN_INIT_TRACKED(session, t, name) do { \
+ WT_RET(__wt_spin_init(session, t, #name)); \
+ (t)->stat_count_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \
+ S2C(session)->stats, lock_##name##_count); \
+ (t)->stat_app_usecs_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \
+ S2C(session)->stats, lock_##name##_wait_application); \
+ (t)->stat_int_usecs_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \
+ S2C(session)->stats, lock_##name##_wait_internal); \
+} while (0)
+
+/*
+ * __wt_spin_lock_track --
+ * Spinlock acquisition, with tracking.
+ */
+static inline void
+__wt_spin_lock_track(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
+{
+ struct timespec enter, leave;
+ int64_t **stats;
+
+ if (t->stat_count_off != -1 && WT_STAT_ENABLED(session)) {
+ __wt_epoch(session, &enter);
+ __wt_spin_lock(session, t);
+ __wt_epoch(session, &leave);
+ stats = (int64_t **)S2C(session)->stats;
+ stats[session->stat_bucket][t->stat_count_off]++;
+ if (F_ISSET(session, WT_SESSION_INTERNAL))
+ stats[session->stat_bucket][t->stat_int_usecs_off] +=
+ (int64_t)WT_TIMEDIFF_US(leave, enter);
+ else
+ stats[session->stat_bucket][t->stat_app_usecs_off] +=
+ (int64_t)WT_TIMEDIFF_US(leave, enter);
+ } else
+ __wt_spin_lock(session, t);
+}
diff --git a/src/include/schema.h b/src/include/schema.h
index f93c596e2ca..6a5ce67a867 100644
--- a/src/include/schema.h
+++ b/src/include/schema.h
@@ -86,11 +86,11 @@ struct __wt_table {
if (F_ISSET(session, (flag))) { \
op; \
} else { \
- __wt_spin_lock(session, (lock)); \
+ __wt_spin_lock_track(session, lock); \
F_SET(session, (flag)); \
op; \
F_CLR(session, (flag)); \
- __wt_spin_unlock(session, (lock)); \
+ __wt_spin_unlock(session, lock); \
} \
} while (0)
@@ -102,11 +102,11 @@ struct __wt_table {
ret = 0; \
if (!F_ISSET(session, (flag)) && \
F_ISSET(session, WT_SESSION_LOCK_NO_WAIT)) { \
- if ((ret = __wt_spin_trylock(session, (lock))) == 0) { \
+ if ((ret = __wt_spin_trylock(session, lock)) == 0) { \
F_SET(session, (flag)); \
op; \
F_CLR(session, (flag)); \
- __wt_spin_unlock(session, (lock)); \
+ __wt_spin_unlock(session, lock); \
} \
} else \
WT_WITH_LOCK_WAIT(session, lock, flag, op); \
diff --git a/src/include/session.h b/src/include/session.h
index aa51dae58c4..3f9f495c134 100644
--- a/src/include/session.h
+++ b/src/include/session.h
@@ -147,6 +147,9 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl {
void *reconcile; /* Reconciliation support */
int (*reconcile_cleanup)(WT_SESSION_IMPL *);
+ /* Sessions have an associated statistics bucket based on its ID. */
+ u_int stat_bucket; /* Statistics bucket offset */
+
uint32_t flags;
/*
diff --git a/src/include/stat.h b/src/include/stat.h
index 68879206851..d0b0b60585a 100644
--- a/src/include/stat.h
+++ b/src/include/stat.h
@@ -79,9 +79,9 @@
* those structures regardless of the specific statistic structure we're working
* with, by translating statistics structure field names to structure offsets.
*
- * Translate a statistic's value name to an offset.
+ * Translate a statistic's value name to an offset in the array.
*/
-#define WT_STATS_FIELD_TO_SLOT(stats, fld) \
+#define WT_STATS_FIELD_TO_OFFSET(stats, fld) \
(int)(&(stats)[0]->fld - (int64_t *)(stats)[0])
/*
@@ -140,38 +140,54 @@ __wt_stats_clear(void *stats_arg, int slot)
#define WT_STAT_ENABLED(session) (S2C(session)->stat_flags != 0)
#define WT_STAT_READ(stats, fld) \
- __wt_stats_aggregate(stats, WT_STATS_FIELD_TO_SLOT(stats, fld))
+ __wt_stats_aggregate(stats, WT_STATS_FIELD_TO_OFFSET(stats, fld))
#define WT_STAT_WRITE(session, stats, fld, v) do { \
if (WT_STAT_ENABLED(session)) \
(stats)->fld = (int64_t)(v); \
} while (0)
-#define WT_STAT_DECRV(session, stats, fld, value) do { \
+#define WT_STAT_DECRV_BASE(session, stat, fld, value) do { \
if (WT_STAT_ENABLED(session)) \
- (stats)[WT_STATS_SLOT_ID(session)]->fld -= (int64_t)(value); \
+ (stat)->fld -= (int64_t)(value); \
} while (0)
-#define WT_STAT_DECRV_ATOMIC(session, stats, fld, value) do { \
+#define WT_STAT_DECRV_ATOMIC_BASE(session, stat, fld, value) do { \
+ if (WT_STAT_ENABLED(session)) \
+ __wt_atomic_subi64(&(stat)->fld, (int64_t)(value)); \
+} while (0)
+#define WT_STAT_INCRV_BASE(session, stat, fld, value) do { \
+ if (WT_STAT_ENABLED(session)) \
+ (stat)->fld += (int64_t)(value); \
+} while (0)
+#define WT_STAT_INCRV_ATOMIC_BASE(session, stat, fld, value) do { \
if (WT_STAT_ENABLED(session)) \
- __wt_atomic_subi64(&(stats)[WT_STATS_SLOT_ID(session)]->fld, \
- (int64_t)(value)); \
+ __wt_atomic_addi64(&(stat)->fld, (int64_t)(value)); \
+} while (0)
+
+#define WT_STAT_DECRV(session, stats, fld, value) do { \
+ WT_STAT_DECRV_BASE( \
+ session, (stats)[(session)->stat_bucket], fld, value); \
+} while (0)
+#define WT_STAT_DECRV_ATOMIC(session, stats, fld, value) do { \
+ WT_STAT_DECRV_ATOMIC_BASE( \
+ session, (stats)[(session)->stat_bucket], fld, value); \
} while (0)
#define WT_STAT_DECR(session, stats, fld) \
WT_STAT_DECRV(session, stats, fld, 1)
+
#define WT_STAT_INCRV(session, stats, fld, value) do { \
- if (WT_STAT_ENABLED(session)) \
- (stats)[WT_STATS_SLOT_ID(session)]->fld += (int64_t)(value); \
+ WT_STAT_INCRV_BASE( \
+ session, (stats)[(session)->stat_bucket], fld, value); \
} while (0)
#define WT_STAT_INCRV_ATOMIC(session, stats, fld, value) do { \
- if (WT_STAT_ENABLED(session)) \
- __wt_atomic_addi64(&(stats)[WT_STATS_SLOT_ID(session)]->fld, \
- (int64_t)(value)); \
+ WT_STAT_INCRV_ATOMIC_BASE( \
+ session, (stats)[(session)->stat_bucket], fld, value); \
} while (0)
#define WT_STAT_INCR(session, stats, fld) \
WT_STAT_INCRV(session, stats, fld, 1)
#define WT_STAT_SET(session, stats, fld, value) do { \
if (WT_STAT_ENABLED(session)) { \
__wt_stats_clear(stats, \
- WT_STATS_FIELD_TO_SLOT(stats, fld)); \
+ WT_STATS_FIELD_TO_OFFSET(stats, fld)); \
(stats)[0]->fld = (int64_t)(value); \
} \
} while (0)
@@ -179,18 +195,24 @@ __wt_stats_clear(void *stats_arg, int slot)
/*
* Update connection handle statistics if statistics gathering is enabled.
*/
-#define WT_STAT_CONN_DECR(session, fld) \
- WT_STAT_DECR(session, S2C(session)->stats, fld)
-#define WT_STAT_CONN_DECR_ATOMIC(session, fld) \
- WT_STAT_DECRV_ATOMIC(session, S2C(session)->stats, fld, 1)
#define WT_STAT_CONN_DECRV(session, fld, value) \
- WT_STAT_DECRV(session, S2C(session)->stats, fld, value)
-#define WT_STAT_CONN_INCR(session, fld) \
- WT_STAT_INCR(session, S2C(session)->stats, fld)
-#define WT_STAT_CONN_INCR_ATOMIC(session, fld) \
- WT_STAT_INCRV_ATOMIC(session, S2C(session)->stats, fld, 1)
+ WT_STAT_DECRV_BASE(session, \
+ S2C(session)->stats[(session)->stat_bucket], fld, value)
+#define WT_STAT_CONN_DECR_ATOMIC(session, fld) \
+ WT_STAT_DECRV_ATOMIC_BASE(session, \
+ S2C(session)->stats[(session)->stat_bucket], fld, 1)
+#define WT_STAT_CONN_DECR(session, fld) \
+ WT_STAT_CONN_DECRV(session, fld, 1)
+
#define WT_STAT_CONN_INCRV(session, fld, value) \
- WT_STAT_INCRV(session, S2C(session)->stats, fld, value)
+ WT_STAT_INCRV_BASE(session, \
+ S2C(session)->stats[(session)->stat_bucket], fld, value)
+#define WT_STAT_CONN_INCR_ATOMIC(session, fld) \
+ WT_STAT_INCRV_ATOMIC_BASE(session, \
+ S2C(session)->stats[(session)->stat_bucket], fld, 1)
+#define WT_STAT_CONN_INCR(session, fld) \
+ WT_STAT_CONN_INCRV(session, fld, 1)
+
#define WT_STAT_CONN_SET(session, fld, value) \
WT_STAT_SET(session, S2C(session)->stats, fld, value)
@@ -263,6 +285,10 @@ struct __wt_connection_stats {
int64_t block_byte_write_checkpoint;
int64_t block_map_read;
int64_t block_byte_map_read;
+ int64_t cache_read_app_count;
+ int64_t cache_read_app_time;
+ int64_t cache_write_app_count;
+ int64_t cache_write_app_time;
int64_t cache_bytes_image;
int64_t cache_bytes_inuse;
int64_t cache_bytes_other;
@@ -356,6 +382,21 @@ struct __wt_connection_stats {
int64_t dh_sweeps;
int64_t dh_session_handles;
int64_t dh_session_sweeps;
+ int64_t lock_checkpoint_count;
+ int64_t lock_checkpoint_wait_application;
+ int64_t lock_checkpoint_wait_internal;
+ int64_t lock_handle_list_count;
+ int64_t lock_handle_list_wait_application;
+ int64_t lock_handle_list_wait_internal;
+ int64_t lock_metadata_count;
+ int64_t lock_metadata_wait_application;
+ int64_t lock_metadata_wait_internal;
+ int64_t lock_schema_count;
+ int64_t lock_schema_wait_application;
+ int64_t lock_schema_wait_internal;
+ int64_t lock_table_count;
+ int64_t lock_table_wait_application;
+ int64_t lock_table_wait_internal;
int64_t log_slot_switch_busy;
int64_t log_slot_closes;
int64_t log_slot_races;
@@ -518,6 +559,24 @@ struct __wt_dsrc_stats {
int64_t cache_write;
int64_t cache_write_restore;
int64_t cache_eviction_clean;
+ int64_t cache_state_gen_avg_gap;
+ int64_t cache_state_avg_written_size;
+ int64_t cache_state_pages_clean;
+ int64_t cache_state_gen_current;
+ int64_t cache_state_pages_dirty;
+ int64_t cache_state_root_entries;
+ int64_t cache_state_pages_internal;
+ int64_t cache_state_pages_leaf;
+ int64_t cache_state_gen_max_gap;
+ int64_t cache_state_max_pagesize;
+ int64_t cache_state_min_written_size;
+ int64_t cache_state_smaller_alloc_size;
+ int64_t cache_state_memory;
+ int64_t cache_state_queued;
+ int64_t cache_state_not_queueable;
+ int64_t cache_state_refs_skipped;
+ int64_t cache_state_root_size;
+ int64_t cache_state_pages;
int64_t compress_read;
int64_t compress_write;
int64_t compress_write_fail;
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index 2b71a580532..b6185b4ead6 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -962,8 +962,9 @@ struct __wt_session {
* where appropriate (for example\, a cache size statistic is not
* cleared\, while the count of cursor insert operations will be
* cleared). See @ref statistics for more information., a list\, with
- * values chosen from the following options: \c "all"\, \c "fast"\, \c
- * "clear"\, \c "size"; default empty.}
+ * values chosen from the following options: \c "all"\, \c
+ * "cache_walk"\, \c "fast"\, \c "clear"\, \c "size"\, \c "tree_walk";
+ * default empty.}
* @config{target, if non-empty\, backup the list of objects; valid only
* for a backup data source., a list of strings; default empty.}
* @configend
@@ -1004,9 +1005,9 @@ struct __wt_session {
* @config{block_compressor, configure a compressor for file blocks.
* Permitted values are \c "none" or custom compression engine name
* created with WT_CONNECTION::add_compressor. If WiredTiger has
- * builtin support for \c "snappy"\, \c "lz4" or \c "zlib" compression\,
- * these names are also available. See @ref compression for more
- * information., a string; default \c none.}
+ * builtin support for \c "lz4"\, \c "snappy"\, \c "zlib" or \c "zstd"
+ * compression\, these names are also available. See @ref compression
+ * for more information., a string; default \c none.}
* @config{cache_resident, do not ever evict the object's pages from
* cache. Not compatible with LSM tables; see @ref
* tuning_cache_resident for more information., a boolean flag; default
@@ -1903,8 +1904,9 @@ struct __wt_connection {
* reset each time a statistics cursor is used to gather statistics\, as
* well as each time statistics are logged using the \c statistics_log
* configuration. See @ref statistics for more information., a list\,
- * with values chosen from the following options: \c "all"\, \c "fast"\,
- * \c "none"\, \c "clear"; default \c none.}
+ * with values chosen from the following options: \c "all"\, \c
+ * "cache_walk"\, \c "fast"\, \c "none"\, \c "clear"\, \c "tree_walk";
+ * default \c none.}
* @config{statistics_log = (, log any statistics the database is
* configured to maintain\, to a file. See @ref statistics for more
* information. Enabling the statistics log server uses a session from
@@ -2336,11 +2338,11 @@ struct __wt_connection {
* @config{&nbsp;&nbsp;&nbsp;&nbsp;compressor, configure a compressor for log
* records. Permitted values are \c "none" or custom compression engine name
* created with WT_CONNECTION::add_compressor. If WiredTiger has builtin
- * support for \c "snappy"\, \c "lz4" or \c "zlib" compression\, these names are
- * also available. See @ref compression for more information., a string;
- * default \c none.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, enable logging
- * subsystem., a boolean flag; default \c false.}
+ * support for \c "lz4"\, \c "snappy"\, \c "zlib" or \c "zstd" compression\,
+ * these names are also available. See @ref compression for more information.,
+ * a string; default \c none.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, enable
+ * logging subsystem., a boolean flag; default \c false.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;file_max, the maximum size of log files., an
* integer between 100KB and 2GB; default \c 100MB.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the name of a directory into which log
@@ -2406,8 +2408,9 @@ struct __wt_connection {
* statistics are reset each time a statistics cursor is used to gather
* statistics\, as well as each time statistics are logged using the \c
* statistics_log configuration. See @ref statistics for more information., a
- * list\, with values chosen from the following options: \c "all"\, \c "fast"\,
- * \c "none"\, \c "clear"; default \c none.}
+ * list\, with values chosen from the following options: \c "all"\, \c
+ * "cache_walk"\, \c "fast"\, \c "none"\, \c "clear"\, \c "tree_walk"; default
+ * \c none.}
* @config{statistics_log = (, log any statistics the database is configured to
* maintain\, to a file. See @ref statistics for more information. Enabling
* the statistics log server uses a session from the configured session_max., a
@@ -4277,393 +4280,437 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_BLOCK_MAP_READ 1029
/*! block-manager: mapped bytes read */
#define WT_STAT_CONN_BLOCK_BYTE_MAP_READ 1030
+/*! cache: application threads page read from disk to cache count */
+#define WT_STAT_CONN_CACHE_READ_APP_COUNT 1031
+/*! cache: application threads page read from disk to cache time (usecs) */
+#define WT_STAT_CONN_CACHE_READ_APP_TIME 1032
+/*! cache: application threads page write from cache to disk count */
+#define WT_STAT_CONN_CACHE_WRITE_APP_COUNT 1033
+/*! cache: application threads page write from cache to disk time (usecs) */
+#define WT_STAT_CONN_CACHE_WRITE_APP_TIME 1034
/*! cache: bytes belonging to page images in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_IMAGE 1031
+#define WT_STAT_CONN_CACHE_BYTES_IMAGE 1035
/*! cache: bytes currently in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INUSE 1032
+#define WT_STAT_CONN_CACHE_BYTES_INUSE 1036
/*! cache: bytes not belonging to page images in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_OTHER 1033
+#define WT_STAT_CONN_CACHE_BYTES_OTHER 1037
/*! cache: bytes read into cache */
-#define WT_STAT_CONN_CACHE_BYTES_READ 1034
+#define WT_STAT_CONN_CACHE_BYTES_READ 1038
/*! cache: bytes written from cache */
-#define WT_STAT_CONN_CACHE_BYTES_WRITE 1035
+#define WT_STAT_CONN_CACHE_BYTES_WRITE 1039
/*! cache: checkpoint blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1036
+#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1040
/*! cache: eviction calls to get a page */
-#define WT_STAT_CONN_CACHE_EVICTION_GET_REF 1037
+#define WT_STAT_CONN_CACHE_EVICTION_GET_REF 1041
/*! cache: eviction calls to get a page found queue empty */
-#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY 1038
+#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY 1042
/*! cache: eviction calls to get a page found queue empty after locking */
-#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY2 1039
+#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY2 1043
/*! cache: eviction currently operating in aggressive mode */
-#define WT_STAT_CONN_CACHE_EVICTION_AGGRESSIVE_SET 1040
+#define WT_STAT_CONN_CACHE_EVICTION_AGGRESSIVE_SET 1044
/*! cache: eviction empty score */
-#define WT_STAT_CONN_CACHE_EVICTION_EMPTY_SCORE 1041
+#define WT_STAT_CONN_CACHE_EVICTION_EMPTY_SCORE 1045
/*! cache: eviction server candidate queue empty when topping up */
-#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1042
+#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1046
/*! cache: eviction server candidate queue not empty when topping up */
-#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1043
+#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1047
/*! cache: eviction server evicting pages */
-#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1044
+#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1048
/*!
* cache: eviction server slept, because we did not make progress with
* eviction
*/
-#define WT_STAT_CONN_CACHE_EVICTION_SERVER_SLEPT 1045
+#define WT_STAT_CONN_CACHE_EVICTION_SERVER_SLEPT 1049
/*! cache: eviction server unable to reach eviction goal */
-#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1046
+#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1050
/*! cache: eviction state */
-#define WT_STAT_CONN_CACHE_EVICTION_STATE 1047
+#define WT_STAT_CONN_CACHE_EVICTION_STATE 1051
/*! cache: eviction walks abandoned */
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ABANDONED 1048
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ABANDONED 1052
/*! cache: eviction worker thread evicting pages */
-#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1049
+#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1053
/*! cache: failed eviction of pages that exceeded the in-memory maximum */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1050
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1054
/*! cache: files with active eviction walks */
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1051
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1055
/*! cache: files with new eviction walks started */
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1052
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1056
/*! cache: hazard pointer blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1053
+#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1057
/*! cache: hazard pointer check calls */
-#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1054
+#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1058
/*! cache: hazard pointer check entries walked */
-#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1055
+#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1059
/*! cache: hazard pointer maximum array length */
-#define WT_STAT_CONN_CACHE_HAZARD_MAX 1056
+#define WT_STAT_CONN_CACHE_HAZARD_MAX 1060
/*! cache: in-memory page passed criteria to be split */
-#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1057
+#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1061
/*! cache: in-memory page splits */
-#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1058
+#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1062
/*! cache: internal pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1059
+#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1063
/*! cache: internal pages split during eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1060
+#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1064
/*! cache: leaf pages split during eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1061
+#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1065
/*! cache: lookaside table insert calls */
-#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1062
+#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1066
/*! cache: lookaside table remove calls */
-#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1063
+#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1067
/*! cache: maximum bytes configured */
-#define WT_STAT_CONN_CACHE_BYTES_MAX 1064
+#define WT_STAT_CONN_CACHE_BYTES_MAX 1068
/*! cache: maximum page size at eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1065
+#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1069
/*! cache: modified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1066
+#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1070
/*! cache: modified pages evicted by application threads */
-#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1067
+#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1071
/*! cache: overflow pages read into cache */
-#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1068
+#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1072
/*! cache: overflow values cached in memory */
-#define WT_STAT_CONN_CACHE_OVERFLOW_VALUE 1069
+#define WT_STAT_CONN_CACHE_OVERFLOW_VALUE 1073
/*! cache: page split during eviction deepened the tree */
-#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1070
+#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1074
/*! cache: page written requiring lookaside records */
-#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1071
+#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1075
/*! cache: pages currently held in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_INUSE 1072
+#define WT_STAT_CONN_CACHE_PAGES_INUSE 1076
/*! cache: pages evicted because they exceeded the in-memory maximum */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1073
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1077
/*! cache: pages evicted because they had chains of deleted items */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1074
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1078
/*! cache: pages evicted by application threads */
-#define WT_STAT_CONN_CACHE_EVICTION_APP 1075
+#define WT_STAT_CONN_CACHE_EVICTION_APP 1079
/*! cache: pages queued for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1076
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1080
/*! cache: pages queued for urgent eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1077
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1081
/*! cache: pages queued for urgent eviction during walk */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1078
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1082
/*! cache: pages read into cache */
-#define WT_STAT_CONN_CACHE_READ 1079
+#define WT_STAT_CONN_CACHE_READ 1083
/*! cache: pages read into cache requiring lookaside entries */
-#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1080
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1084
/*! cache: pages requested from the cache */
-#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1081
+#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1085
/*! cache: pages seen by eviction walk */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1082
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1086
/*! cache: pages selected for eviction unable to be evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1083
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1087
/*! cache: pages walked for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK 1084
+#define WT_STAT_CONN_CACHE_EVICTION_WALK 1088
/*! cache: pages written from cache */
-#define WT_STAT_CONN_CACHE_WRITE 1085
+#define WT_STAT_CONN_CACHE_WRITE 1089
/*! cache: pages written requiring in-memory restoration */
-#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1086
+#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1090
/*! cache: percentage overhead */
-#define WT_STAT_CONN_CACHE_OVERHEAD 1087
+#define WT_STAT_CONN_CACHE_OVERHEAD 1091
/*! cache: tracked bytes belonging to internal pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1088
+#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1092
/*! cache: tracked bytes belonging to leaf pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_LEAF 1089
+#define WT_STAT_CONN_CACHE_BYTES_LEAF 1093
/*! cache: tracked dirty bytes in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1090
+#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1094
/*! cache: tracked dirty pages in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1091
+#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1095
/*! cache: unmodified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1092
+#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1096
/*! connection: auto adjusting condition resets */
-#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1093
+#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1097
/*! connection: auto adjusting condition wait calls */
-#define WT_STAT_CONN_COND_AUTO_WAIT 1094
+#define WT_STAT_CONN_COND_AUTO_WAIT 1098
/*! connection: files currently open */
-#define WT_STAT_CONN_FILE_OPEN 1095
+#define WT_STAT_CONN_FILE_OPEN 1099
/*! connection: memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 1096
+#define WT_STAT_CONN_MEMORY_ALLOCATION 1100
/*! connection: memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 1097
+#define WT_STAT_CONN_MEMORY_FREE 1101
/*! connection: memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 1098
+#define WT_STAT_CONN_MEMORY_GROW 1102
/*! connection: pthread mutex condition wait calls */
-#define WT_STAT_CONN_COND_WAIT 1099
+#define WT_STAT_CONN_COND_WAIT 1103
/*! connection: pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 1100
+#define WT_STAT_CONN_RWLOCK_READ 1104
/*! connection: pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 1101
+#define WT_STAT_CONN_RWLOCK_WRITE 1105
/*! connection: total fsync I/Os */
-#define WT_STAT_CONN_FSYNC_IO 1102
+#define WT_STAT_CONN_FSYNC_IO 1106
/*! connection: total read I/Os */
-#define WT_STAT_CONN_READ_IO 1103
+#define WT_STAT_CONN_READ_IO 1107
/*! connection: total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1104
+#define WT_STAT_CONN_WRITE_IO 1108
/*! cursor: cursor create calls */
-#define WT_STAT_CONN_CURSOR_CREATE 1105
+#define WT_STAT_CONN_CURSOR_CREATE 1109
/*! cursor: cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT 1106
+#define WT_STAT_CONN_CURSOR_INSERT 1110
/*! cursor: cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT 1107
+#define WT_STAT_CONN_CURSOR_NEXT 1111
/*! cursor: cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV 1108
+#define WT_STAT_CONN_CURSOR_PREV 1112
/*! cursor: cursor remove calls */
-#define WT_STAT_CONN_CURSOR_REMOVE 1109
+#define WT_STAT_CONN_CURSOR_REMOVE 1113
/*! cursor: cursor reset calls */
-#define WT_STAT_CONN_CURSOR_RESET 1110
+#define WT_STAT_CONN_CURSOR_RESET 1114
/*! cursor: cursor restarted searches */
-#define WT_STAT_CONN_CURSOR_RESTART 1111
+#define WT_STAT_CONN_CURSOR_RESTART 1115
/*! cursor: cursor search calls */
-#define WT_STAT_CONN_CURSOR_SEARCH 1112
+#define WT_STAT_CONN_CURSOR_SEARCH 1116
/*! cursor: cursor search near calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1113
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1117
/*! cursor: cursor update calls */
-#define WT_STAT_CONN_CURSOR_UPDATE 1114
+#define WT_STAT_CONN_CURSOR_UPDATE 1118
/*! cursor: truncate calls */
-#define WT_STAT_CONN_CURSOR_TRUNCATE 1115
+#define WT_STAT_CONN_CURSOR_TRUNCATE 1119
/*! data-handle: connection data handles currently active */
-#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1116
+#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1120
/*! data-handle: connection sweep candidate became referenced */
-#define WT_STAT_CONN_DH_SWEEP_REF 1117
+#define WT_STAT_CONN_DH_SWEEP_REF 1121
/*! data-handle: connection sweep dhandles closed */
-#define WT_STAT_CONN_DH_SWEEP_CLOSE 1118
+#define WT_STAT_CONN_DH_SWEEP_CLOSE 1122
/*! data-handle: connection sweep dhandles removed from hash list */
-#define WT_STAT_CONN_DH_SWEEP_REMOVE 1119
+#define WT_STAT_CONN_DH_SWEEP_REMOVE 1123
/*! data-handle: connection sweep time-of-death sets */
-#define WT_STAT_CONN_DH_SWEEP_TOD 1120
+#define WT_STAT_CONN_DH_SWEEP_TOD 1124
/*! data-handle: connection sweeps */
-#define WT_STAT_CONN_DH_SWEEPS 1121
+#define WT_STAT_CONN_DH_SWEEPS 1125
/*! data-handle: session dhandles swept */
-#define WT_STAT_CONN_DH_SESSION_HANDLES 1122
+#define WT_STAT_CONN_DH_SESSION_HANDLES 1126
/*! data-handle: session sweep attempts */
-#define WT_STAT_CONN_DH_SESSION_SWEEPS 1123
+#define WT_STAT_CONN_DH_SESSION_SWEEPS 1127
+/*! lock: checkpoint lock acquisitions */
+#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1128
+/*! lock: checkpoint lock application thread wait time (usecs) */
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1129
+/*! lock: checkpoint lock internal thread wait time (usecs) */
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1130
+/*! lock: handle-list lock acquisitions */
+#define WT_STAT_CONN_LOCK_HANDLE_LIST_COUNT 1131
+/*! lock: handle-list lock application thread wait time (usecs) */
+#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_APPLICATION 1132
+/*! lock: handle-list lock internal thread wait time (usecs) */
+#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_INTERNAL 1133
+/*! lock: metadata lock acquisitions */
+#define WT_STAT_CONN_LOCK_METADATA_COUNT 1134
+/*! lock: metadata lock application thread wait time (usecs) */
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1135
+/*! lock: metadata lock internal thread wait time (usecs) */
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1136
+/*! lock: schema lock acquisitions */
+#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1137
+/*! lock: schema lock application thread wait time (usecs) */
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1138
+/*! lock: schema lock internal thread wait time (usecs) */
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1139
+/*! lock: table lock acquisitions */
+#define WT_STAT_CONN_LOCK_TABLE_COUNT 1140
+/*!
+ * lock: table lock application thread time waiting for the table lock
+ * (usecs)
+ */
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1141
+/*!
+ * lock: table lock internal thread time waiting for the table lock
+ * (usecs)
+ */
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1142
/*! log: busy returns attempting to switch slots */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1124
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1143
/*! log: consolidated slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1125
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1144
/*! log: consolidated slot join races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1126
+#define WT_STAT_CONN_LOG_SLOT_RACES 1145
/*! log: consolidated slot join transitions */
-#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1127
+#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1146
/*! log: consolidated slot joins */
-#define WT_STAT_CONN_LOG_SLOT_JOINS 1128
+#define WT_STAT_CONN_LOG_SLOT_JOINS 1147
/*! log: consolidated slot unbuffered writes */
-#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1129
+#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1148
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1130
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1149
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1131
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1150
/*! log: log files manually zero-filled */
-#define WT_STAT_CONN_LOG_ZERO_FILLS 1132
+#define WT_STAT_CONN_LOG_ZERO_FILLS 1151
/*! log: log flush operations */
-#define WT_STAT_CONN_LOG_FLUSH 1133
+#define WT_STAT_CONN_LOG_FLUSH 1152
/*! log: log force write operations */
-#define WT_STAT_CONN_LOG_FORCE_WRITE 1134
+#define WT_STAT_CONN_LOG_FORCE_WRITE 1153
/*! log: log force write operations skipped */
-#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1135
+#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1154
/*! log: log records compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1136
+#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1155
/*! log: log records not compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1137
+#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1156
/*! log: log records too small to compress */
-#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1138
+#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1157
/*! log: log release advances write LSN */
-#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1139
+#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1158
/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1140
+#define WT_STAT_CONN_LOG_SCANS 1159
/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1141
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1160
/*! log: log server thread advances write LSN */
-#define WT_STAT_CONN_LOG_WRITE_LSN 1142
+#define WT_STAT_CONN_LOG_WRITE_LSN 1161
/*! log: log server thread write LSN walk skipped */
-#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1143
+#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1162
/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1144
+#define WT_STAT_CONN_LOG_SYNC 1163
/*! log: log sync time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DURATION 1145
+#define WT_STAT_CONN_LOG_SYNC_DURATION 1164
/*! log: log sync_dir operations */
-#define WT_STAT_CONN_LOG_SYNC_DIR 1146
+#define WT_STAT_CONN_LOG_SYNC_DIR 1165
/*! log: log sync_dir time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1147
+#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1166
/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1148
+#define WT_STAT_CONN_LOG_WRITES 1167
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1149
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1168
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1150
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1169
/*! log: number of pre-allocated log files to create */
-#define WT_STAT_CONN_LOG_PREALLOC_MAX 1151
+#define WT_STAT_CONN_LOG_PREALLOC_MAX 1170
/*! log: pre-allocated log files not ready and missed */
-#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1152
+#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1171
/*! log: pre-allocated log files prepared */
-#define WT_STAT_CONN_LOG_PREALLOC_FILES 1153
+#define WT_STAT_CONN_LOG_PREALLOC_FILES 1172
/*! log: pre-allocated log files used */
-#define WT_STAT_CONN_LOG_PREALLOC_USED 1154
+#define WT_STAT_CONN_LOG_PREALLOC_USED 1173
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1155
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1174
/*! log: total in-memory size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_MEM 1156
+#define WT_STAT_CONN_LOG_COMPRESS_MEM 1175
/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1157
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1176
/*! log: total size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_LEN 1158
+#define WT_STAT_CONN_LOG_COMPRESS_LEN 1177
/*! log: written slots coalesced */
-#define WT_STAT_CONN_LOG_SLOT_COALESCED 1159
+#define WT_STAT_CONN_LOG_SLOT_COALESCED 1178
/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1160
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1179
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1161
+#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1180
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1162
+#define WT_STAT_CONN_REC_PAGES 1181
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1163
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1182
/*! reconciliation: pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE 1164
+#define WT_STAT_CONN_REC_PAGE_DELETE 1183
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1165
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1184
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1166
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1185
/*! session: open cursor count */
-#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1167
+#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1186
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1168
+#define WT_STAT_CONN_SESSION_OPEN 1187
/*! session: table compact failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1169
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1188
/*! session: table compact successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1170
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1189
/*! session: table create failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1171
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1190
/*! session: table create successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1172
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1191
/*! session: table drop failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1173
+#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1192
/*! session: table drop successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1174
+#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1193
/*! session: table rebalance failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1175
+#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1194
/*! session: table rebalance successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1176
+#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1195
/*! session: table rename failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1177
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1196
/*! session: table rename successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1178
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1197
/*! session: table salvage failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1179
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1198
/*! session: table salvage successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1180
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1199
/*! session: table truncate failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1181
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1200
/*! session: table truncate successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1182
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1201
/*! session: table verify failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1183
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1202
/*! session: table verify successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1184
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1203
/*! thread-state: active filesystem fsync calls */
-#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1185
+#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1204
/*! thread-state: active filesystem read calls */
-#define WT_STAT_CONN_THREAD_READ_ACTIVE 1186
+#define WT_STAT_CONN_THREAD_READ_ACTIVE 1205
/*! thread-state: active filesystem write calls */
-#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1187
+#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1206
/*! thread-yield: application thread time evicting (usecs) */
-#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1188
+#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1207
/*! thread-yield: application thread time waiting for cache (usecs) */
-#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1189
+#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1208
/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1190
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1209
/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1191
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1210
/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1192
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1211
/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1193
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1212
/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1194
+#define WT_STAT_CONN_PAGE_SLEEP 1213
/*! transaction: number of named snapshots created */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1195
+#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1214
/*! transaction: number of named snapshots dropped */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1196
+#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1215
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1197
+#define WT_STAT_CONN_TXN_BEGIN 1216
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1198
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1217
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1199
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1218
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1200
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1219
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1201
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1220
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1202
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1221
/*! transaction: transaction checkpoint scrub dirty target */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1203
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1222
/*! transaction: transaction checkpoint scrub time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1204
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1223
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1205
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1224
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1206
+#define WT_STAT_CONN_TXN_CHECKPOINT 1225
/*!
* transaction: transaction checkpoints skipped because database was
* clean
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1207
+#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1226
/*! transaction: transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1208
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1227
/*!
* transaction: transaction fsync calls for checkpoint after allocating
* the transaction ID
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1209
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1228
/*!
* transaction: transaction fsync duration for checkpoint after
* allocating the transaction ID (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1210
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1229
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1211
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1230
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1212
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1231
/*!
* transaction: transaction range of IDs currently pinned by named
* snapshots
*/
-#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1213
+#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1232
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1214
+#define WT_STAT_CONN_TXN_SYNC 1233
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1215
+#define WT_STAT_CONN_TXN_COMMIT 1234
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1216
+#define WT_STAT_CONN_TXN_ROLLBACK 1235
/*!
* @}
@@ -4721,28 +4768,28 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
/*! btree: btree checkpoint generation */
#define WT_STAT_DSRC_BTREE_CHECKPOINT_GENERATION 2022
/*!
- * btree: column-store fixed-size leaf pages, only reported if
- * statistics=all is set
+ * btree: column-store fixed-size leaf pages, only reported if tree_walk
+ * or all statistics are enabled
*/
#define WT_STAT_DSRC_BTREE_COLUMN_FIX 2023
/*!
- * btree: column-store internal pages, only reported if statistics=all is
- * set
+ * btree: column-store internal pages, only reported if tree_walk or all
+ * statistics are enabled
*/
#define WT_STAT_DSRC_BTREE_COLUMN_INTERNAL 2024
/*!
* btree: column-store variable-size RLE encoded values, only reported if
- * statistics=all is set
+ * tree_walk or all statistics are enabled
*/
#define WT_STAT_DSRC_BTREE_COLUMN_RLE 2025
/*!
* btree: column-store variable-size deleted values, only reported if
- * statistics=all is set
+ * tree_walk or all statistics are enabled
*/
#define WT_STAT_DSRC_BTREE_COLUMN_DELETED 2026
/*!
* btree: column-store variable-size leaf pages, only reported if
- * statistics=all is set
+ * tree_walk or all statistics are enabled
*/
#define WT_STAT_DSRC_BTREE_COLUMN_VARIABLE 2027
/*! btree: fixed-record size */
@@ -4760,20 +4807,26 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
/*! btree: maximum tree depth */
#define WT_STAT_DSRC_BTREE_MAXIMUM_DEPTH 2034
/*!
- * btree: number of key/value pairs, only reported if statistics=all is
- * set
+ * btree: number of key/value pairs, only reported if tree_walk or all
+ * statistics are enabled
*/
#define WT_STAT_DSRC_BTREE_ENTRIES 2035
-/*! btree: overflow pages, only reported if statistics=all is set */
+/*!
+ * btree: overflow pages, only reported if tree_walk or all statistics
+ * are enabled
+ */
#define WT_STAT_DSRC_BTREE_OVERFLOW 2036
/*! btree: pages rewritten by compaction */
#define WT_STAT_DSRC_BTREE_COMPACT_REWRITE 2037
/*!
- * btree: row-store internal pages, only reported if statistics=all is
- * set
+ * btree: row-store internal pages, only reported if tree_walk or all
+ * statistics are enabled
*/
#define WT_STAT_DSRC_BTREE_ROW_INTERNAL 2038
-/*! btree: row-store leaf pages, only reported if statistics=all is set */
+/*!
+ * btree: row-store leaf pages, only reported if tree_walk or all
+ * statistics are enabled
+ */
#define WT_STAT_DSRC_BTREE_ROW_LEAF 2039
/*! cache: bytes currently in the cache */
#define WT_STAT_DSRC_CACHE_BYTES_INUSE 2040
@@ -4819,87 +4872,179 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2060
/*! cache: unmodified pages evicted */
#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2061
+/*!
+ * cache_walk: Average difference between current eviction generation
+ * when the page was last considered, only reported if cache_walk or all
+ * statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_GEN_AVG_GAP 2062
+/*!
+ * cache_walk: Average on-disk page image size seen, only reported if
+ * cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_AVG_WRITTEN_SIZE 2063
+/*!
+ * cache_walk: Clean pages currently in cache, only reported if
+ * cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_PAGES_CLEAN 2064
+/*!
+ * cache_walk: Current eviction generation, only reported if cache_walk
+ * or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_GEN_CURRENT 2065
+/*!
+ * cache_walk: Dirty pages currently in cache, only reported if
+ * cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_PAGES_DIRTY 2066
+/*!
+ * cache_walk: Entries in the root page, only reported if cache_walk or
+ * all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_ROOT_ENTRIES 2067
+/*!
+ * cache_walk: Internal pages currently in cache, only reported if
+ * cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_PAGES_INTERNAL 2068
+/*!
+ * cache_walk: Leaf pages currently in cache, only reported if cache_walk
+ * or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_PAGES_LEAF 2069
+/*!
+ * cache_walk: Maximum difference between current eviction generation
+ * when the page was last considered, only reported if cache_walk or all
+ * statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_GEN_MAX_GAP 2070
+/*!
+ * cache_walk: Maximum page size seen, only reported if cache_walk or all
+ * statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_MAX_PAGESIZE 2071
+/*!
+ * cache_walk: Minimum on-disk page image size seen, only reported if
+ * cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_MIN_WRITTEN_SIZE 2072
+/*!
+ * cache_walk: On-disk page image sizes smaller than a single allocation
+ * unit, only reported if cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_SMALLER_ALLOC_SIZE 2073
+/*!
+ * cache_walk: Pages created in memory and never written, only reported
+ * if cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_MEMORY 2074
+/*!
+ * cache_walk: Pages currently queued for eviction, only reported if
+ * cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_QUEUED 2075
+/*!
+ * cache_walk: Pages that could not be queued for eviction, only reported
+ * if cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_NOT_QUEUEABLE 2076
+/*!
+ * cache_walk: Refs skipped during cache traversal, only reported if
+ * cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_REFS_SKIPPED 2077
+/*!
+ * cache_walk: Size of the root page, only reported if cache_walk or all
+ * statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_ROOT_SIZE 2078
+/*!
+ * cache_walk: Total number of pages currently in cache, only reported if
+ * cache_walk or all statistics are enabled
+ */
+#define WT_STAT_DSRC_CACHE_STATE_PAGES 2079
/*! compression: compressed pages read */
-#define WT_STAT_DSRC_COMPRESS_READ 2062
+#define WT_STAT_DSRC_COMPRESS_READ 2080
/*! compression: compressed pages written */
-#define WT_STAT_DSRC_COMPRESS_WRITE 2063
+#define WT_STAT_DSRC_COMPRESS_WRITE 2081
/*! compression: page written failed to compress */
-#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2064
+#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2082
/*! compression: page written was too small to compress */
-#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2065
+#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2083
/*! compression: raw compression call failed, additional data available */
-#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2066
+#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2084
/*! compression: raw compression call failed, no additional data available */
-#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2067
+#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2085
/*! compression: raw compression call succeeded */
-#define WT_STAT_DSRC_COMPRESS_RAW_OK 2068
+#define WT_STAT_DSRC_COMPRESS_RAW_OK 2086
/*! cursor: bulk-loaded cursor-insert calls */
-#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2069
+#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2087
/*! cursor: create calls */
-#define WT_STAT_DSRC_CURSOR_CREATE 2070
+#define WT_STAT_DSRC_CURSOR_CREATE 2088
/*! cursor: cursor-insert key and value bytes inserted */
-#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2071
+#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2089
/*! cursor: cursor-remove key bytes removed */
-#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2072
+#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2090
/*! cursor: cursor-update value bytes updated */
-#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2073
+#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2091
/*! cursor: insert calls */
-#define WT_STAT_DSRC_CURSOR_INSERT 2074
+#define WT_STAT_DSRC_CURSOR_INSERT 2092
/*! cursor: next calls */
-#define WT_STAT_DSRC_CURSOR_NEXT 2075
+#define WT_STAT_DSRC_CURSOR_NEXT 2093
/*! cursor: prev calls */
-#define WT_STAT_DSRC_CURSOR_PREV 2076
+#define WT_STAT_DSRC_CURSOR_PREV 2094
/*! cursor: remove calls */
-#define WT_STAT_DSRC_CURSOR_REMOVE 2077
+#define WT_STAT_DSRC_CURSOR_REMOVE 2095
/*! cursor: reset calls */
-#define WT_STAT_DSRC_CURSOR_RESET 2078
+#define WT_STAT_DSRC_CURSOR_RESET 2096
/*! cursor: restarted searches */
-#define WT_STAT_DSRC_CURSOR_RESTART 2079
+#define WT_STAT_DSRC_CURSOR_RESTART 2097
/*! cursor: search calls */
-#define WT_STAT_DSRC_CURSOR_SEARCH 2080
+#define WT_STAT_DSRC_CURSOR_SEARCH 2098
/*! cursor: search near calls */
-#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2081
+#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2099
/*! cursor: truncate calls */
-#define WT_STAT_DSRC_CURSOR_TRUNCATE 2082
+#define WT_STAT_DSRC_CURSOR_TRUNCATE 2100
/*! cursor: update calls */
-#define WT_STAT_DSRC_CURSOR_UPDATE 2083
+#define WT_STAT_DSRC_CURSOR_UPDATE 2101
/*! reconciliation: dictionary matches */
-#define WT_STAT_DSRC_REC_DICTIONARY 2084
+#define WT_STAT_DSRC_REC_DICTIONARY 2102
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2085
+#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2103
/*!
* reconciliation: internal page key bytes discarded using suffix
* compression
*/
-#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2086
+#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2104
/*! reconciliation: internal page multi-block writes */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2087
+#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2105
/*! reconciliation: internal-page overflow keys */
-#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2088
+#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2106
/*! reconciliation: leaf page key bytes discarded using prefix compression */
-#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2089
+#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2107
/*! reconciliation: leaf page multi-block writes */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2090
+#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2108
/*! reconciliation: leaf-page overflow keys */
-#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2091
+#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2109
/*! reconciliation: maximum blocks required for a page */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2092
+#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2110
/*! reconciliation: overflow values written */
-#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2093
+#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2111
/*! reconciliation: page checksum matches */
-#define WT_STAT_DSRC_REC_PAGE_MATCH 2094
+#define WT_STAT_DSRC_REC_PAGE_MATCH 2112
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_DSRC_REC_PAGES 2095
+#define WT_STAT_DSRC_REC_PAGES 2113
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_DSRC_REC_PAGES_EVICTION 2096
+#define WT_STAT_DSRC_REC_PAGES_EVICTION 2114
/*! reconciliation: pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE 2097
+#define WT_STAT_DSRC_REC_PAGE_DELETE 2115
/*! session: object compaction */
-#define WT_STAT_DSRC_SESSION_COMPACT 2098
+#define WT_STAT_DSRC_SESSION_COMPACT 2116
/*! session: open cursor count */
-#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2099
+#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2117
/*! transaction: update conflicts */
-#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2100
+#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2118
/*!
* @}
diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h
index 4e6699ab9d1..d354757c592 100644
--- a/src/include/wt_internal.h
+++ b/src/include/wt_internal.h
@@ -216,6 +216,8 @@ struct __wt_logslot;
typedef struct __wt_logslot WT_LOGSLOT;
struct __wt_lsm_chunk;
typedef struct __wt_lsm_chunk WT_LSM_CHUNK;
+struct __wt_lsm_cursor_chunk;
+ typedef struct __wt_lsm_cursor_chunk WT_LSM_CURSOR_CHUNK;
struct __wt_lsm_data_source;
typedef struct __wt_lsm_data_source WT_LSM_DATA_SOURCE;
struct __wt_lsm_manager;
diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c
index b9a6dd18b7a..067c527a21a 100644
--- a/src/lsm/lsm_cursor.c
+++ b/src/lsm/lsm_cursor.c
@@ -10,7 +10,7 @@
#define WT_FORALL_CURSORS(clsm, c, i) \
for ((i) = (clsm)->nchunks; (i) > 0;) \
- if (((c) = (clsm)->cursors[--i]) != NULL)
+ if (((c) = (clsm)->chunks[--i]->cursor) != NULL)
#define WT_LSM_CURCMP(s, lsm_tree, c1, c2, cmp) \
__wt_compare(s, (lsm_tree)->collator, &(c1)->key, &(c2)->key, &cmp)
@@ -18,6 +18,7 @@
static int __clsm_lookup(WT_CURSOR_LSM *, WT_ITEM *);
static int __clsm_open_cursors(WT_CURSOR_LSM *, bool, u_int, uint32_t);
static int __clsm_reset_cursors(WT_CURSOR_LSM *, WT_CURSOR *);
+static int __clsm_search_near(WT_CURSOR *cursor, int *exactp);
/*
* __wt_clsm_request_switch --
@@ -109,7 +110,7 @@ __clsm_enter_update(WT_CURSOR_LSM *clsm)
primary = NULL;
have_primary = false;
} else {
- primary = clsm->cursors[clsm->nchunks - 1];
+ primary = clsm->chunks[clsm->nchunks - 1]->cursor;
primary_chunk = clsm->primary_chunk;
WT_ASSERT(session, F_ISSET(&session->txn, WT_TXN_HAS_ID));
have_primary = (primary != NULL && primary_chunk != NULL &&
@@ -165,7 +166,7 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update)
WT_LSM_TREE *lsm_tree;
WT_SESSION_IMPL *session;
WT_TXN *txn;
- uint64_t pinned_id, *switchp;
+ uint64_t i, pinned_id , switch_txn;
lsm_tree = clsm->lsm_tree;
session = (WT_SESSION_IMPL *)clsm->iface.session;
@@ -238,15 +239,16 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update)
F_ISSET(txn, WT_TXN_HAS_SNAPSHOT));
pinned_id =
WT_SESSION_TXN_STATE(session)->pinned_id;
- for (switchp =
- &clsm->switch_txn[clsm->nchunks - 2];
+ for (i = clsm->nchunks - 2;
clsm->nupdates < clsm->nchunks;
- clsm->nupdates++, switchp--) {
- if (WT_TXNID_LT(*switchp, pinned_id))
+ clsm->nupdates++, i--) {
+ switch_txn =
+ clsm->chunks[i]->switch_txn;
+ if (WT_TXNID_LT(switch_txn, pinned_id))
break;
WT_ASSERT(session,
!__wt_txn_visible_all(
- session, *switchp));
+ session, switch_txn));
}
}
}
@@ -377,7 +379,7 @@ __clsm_close_cursors(WT_CURSOR_LSM *clsm, u_int start, u_int end)
WT_CURSOR *c;
u_int i;
- if (clsm->cursors == NULL || clsm->nchunks == 0)
+ if (clsm->chunks == NULL || clsm->nchunks == 0)
return (0);
/*
@@ -386,12 +388,12 @@ __clsm_close_cursors(WT_CURSOR_LSM *clsm, u_int start, u_int end)
* careful with unsigned integer wrapping.
*/
for (i = start; i < end; i++) {
- if ((c = (clsm)->cursors[i]) != NULL) {
- clsm->cursors[i] = NULL;
+ if ((c = (clsm)->chunks[i]->cursor) != NULL) {
+ clsm->chunks[i]->cursor = NULL;
WT_RET(c->close(c));
}
- if ((bloom = clsm->blooms[i]) != NULL) {
- clsm->blooms[i] = NULL;
+ if ((bloom = clsm->chunks[i]->bloom) != NULL) {
+ clsm->chunks[i]->bloom = NULL;
WT_RET(__wt_bloom_close(bloom));
}
}
@@ -400,6 +402,45 @@ __clsm_close_cursors(WT_CURSOR_LSM *clsm, u_int start, u_int end)
}
/*
+ * __clsm_resize_chunks --
+ * Allocates an array of unit objects for each chunk.
+ */
+static int
+__clsm_resize_chunks(
+ WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm, u_int nchunks)
+{
+ WT_DECL_RET;
+ WT_LSM_CURSOR_CHUNK *chunk;
+
+ /* Don't allocate more iterators if we don't need them. */
+ if (clsm->chunks_count >= nchunks) {
+ return (ret);
+ }
+
+ WT_RET(__wt_realloc_def(session, &clsm->chunks_alloc, nchunks,
+ &clsm->chunks));
+ for (; clsm->chunks_count < nchunks; clsm->chunks_count++) {
+ WT_RET(__wt_calloc_one(session, &chunk));
+ clsm->chunks[clsm->chunks_count] = chunk;
+ }
+ return (ret);
+}
+
+/*
+ * __clsm_free_chunks --
+ * Allocates an array of unit objects for each chunk.
+ */
+static void
+__clsm_free_chunks(WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm)
+{
+ size_t i;
+ for (i = 0; i < clsm->chunks_count; i++) {
+ __wt_free(session, clsm->chunks[i]);
+ }
+ __wt_free(session, clsm->chunks);
+}
+
+/*
* __clsm_open_cursors --
* Open cursors for the current set of files.
*/
@@ -408,7 +449,7 @@ __clsm_open_cursors(
WT_CURSOR_LSM *clsm, bool update, u_int start_chunk, uint32_t start_id)
{
WT_BTREE *btree;
- WT_CURSOR *c, **cp, *primary;
+ WT_CURSOR *c, *cursor, *primary;
WT_DECL_RET;
WT_LSM_CHUNK *chunk;
WT_LSM_TREE *lsm_tree;
@@ -421,6 +462,7 @@ __clsm_open_cursors(
bool locked;
c = &clsm->iface;
+ cursor = NULL;
session = (WT_SESSION_IMPL *)c->session;
txn = &session->txn;
chunk = NULL;
@@ -464,7 +506,7 @@ __clsm_open_cursors(
retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
nchunks = clsm->nchunks;
ngood = 0;
-
+ WT_ERR(__clsm_resize_chunks(session, clsm, nchunks));
/*
* We may have raced with another merge completing. Check that
* we're starting at the right offset in the chunk array.
@@ -485,16 +527,13 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
WT_ASSERT(session, start_chunk + nchunks <= lsm_tree->nchunks);
} else {
nchunks = lsm_tree->nchunks;
+ WT_ERR(__clsm_resize_chunks(session, clsm, nchunks));
/*
* If we are only opening the cursor for updates, only open the
* primary chunk, plus any other chunks that might be required
* to detect snapshot isolation conflicts.
*/
- if (F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT))
- WT_ERR(__wt_realloc_def(session,
- &clsm->txnid_alloc, nchunks,
- &clsm->switch_txn));
if (F_ISSET(clsm, WT_CLSM_OPEN_READ))
ngood = nupdates = 0;
else if (F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) {
@@ -503,11 +542,11 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
* chunk are globally visible. Copy the maximum
* transaction IDs into the cursor as we go.
*/
- for (ngood = nchunks - 1, nupdates = 1;
- ngood > 0;
+ for (ngood = nchunks - 1, nupdates = 1; ngood > 0;
ngood--, nupdates++) {
chunk = lsm_tree->chunk[ngood - 1];
- clsm->switch_txn[ngood - 1] = chunk->switch_txn;
+ clsm->chunks[ngood - 1]->switch_txn =
+ chunk->switch_txn;
if (__wt_txn_visible_all(
session, chunk->switch_txn))
break;
@@ -518,21 +557,20 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
}
/* Check how many cursors are already open. */
- for (cp = clsm->cursors + ngood;
- ngood < clsm->nchunks && ngood < nchunks;
- cp++, ngood++) {
+ for (; ngood < clsm->nchunks && ngood < nchunks; ngood++) {
chunk = lsm_tree->chunk[ngood];
+ cursor = clsm->chunks[ngood]->cursor;
/* If the cursor isn't open yet, we're done. */
- if (*cp == NULL)
+ if (cursor == NULL)
break;
/* Easy case: the URIs don't match. */
- if (strcmp((*cp)->uri, chunk->uri) != 0)
+ if (strcmp(cursor->uri, chunk->uri) != 0)
break;
/* Make sure the checkpoint config matches. */
- checkpoint = ((WT_CURSOR_BTREE *)*cp)->
+ checkpoint = ((WT_CURSOR_BTREE *)cursor)->
btree->dhandle->checkpoint;
if (checkpoint == NULL &&
F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) &&
@@ -540,7 +578,7 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
break;
/* Make sure the Bloom config matches. */
- if (clsm->blooms[ngood] == NULL &&
+ if (clsm->chunks[ngood]->bloom == NULL &&
F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
break;
}
@@ -558,7 +596,7 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
* full, we may block while closing a cursor. Save the
* generation number and retry if it has changed under us.
*/
- if (clsm->cursors != NULL && ngood < clsm->nchunks) {
+ if (clsm->chunks != NULL && ngood < clsm->nchunks) {
close_range_start = ngood;
close_range_end = clsm->nchunks;
} else if (!F_ISSET(clsm, WT_CLSM_OPEN_READ) && nupdates > 0 ) {
@@ -590,28 +628,23 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
clsm->current = NULL;
}
- WT_ERR(__wt_realloc_def(session,
- &clsm->bloom_alloc, nchunks, &clsm->blooms));
- WT_ERR(__wt_realloc_def(session,
- &clsm->cursor_alloc, nchunks, &clsm->cursors));
-
clsm->nchunks = nchunks;
/* Open the cursors for chunks that have changed. */
- for (i = ngood, cp = clsm->cursors + i; i != nchunks; i++, cp++) {
+ for (i = ngood; i != nchunks; i++) {
chunk = lsm_tree->chunk[i + start_chunk];
/* Copy the maximum transaction ID. */
if (F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT))
- clsm->switch_txn[i] = chunk->switch_txn;
+ clsm->chunks[i]->switch_txn = chunk->switch_txn;
/*
* Read from the checkpoint if the file has been written.
* Once all cursors switch, the in-memory tree can be evicted.
*/
- WT_ASSERT(session, *cp == NULL);
+ WT_ASSERT(session, clsm->chunks[i]->cursor == NULL);
ret = __wt_open_cursor(session, chunk->uri, c,
(F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && !chunk->empty) ?
- ckpt_cfg : NULL, cp);
+ ckpt_cfg : NULL, &clsm->chunks[i]->cursor);
/*
* XXX kludge: we may have an empty chunk where no checkpoint
@@ -619,8 +652,8 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
* chunk instead.
*/
if (ret == WT_NOTFOUND && F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) {
- ret = __wt_open_cursor(
- session, chunk->uri, c, NULL, cp);
+ ret = __wt_open_cursor(session,
+ chunk->uri, c, NULL, &clsm->chunks[i]->cursor);
if (ret == 0)
chunk->empty = 1;
}
@@ -633,25 +666,31 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
* write conflicts with concurrent updates.
*/
if (i != nchunks - 1)
- (*cp)->insert = __wt_curfile_update_check;
+ clsm->chunks[i]->cursor->insert =
+ __wt_curfile_update_check;
if (!F_ISSET(clsm, WT_CLSM_MERGE) &&
F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
WT_ERR(__wt_bloom_open(session, chunk->bloom_uri,
lsm_tree->bloom_bit_count,
lsm_tree->bloom_hash_count,
- c, &clsm->blooms[i]));
+ c, &clsm->chunks[i]->bloom));
/* Child cursors always use overwrite and raw mode. */
- F_SET(*cp, WT_CURSTD_OVERWRITE | WT_CURSTD_RAW);
+ F_SET(clsm->chunks[i]->cursor,
+ WT_CURSTD_OVERWRITE | WT_CURSTD_RAW);
}
+ /* Setup the count values for each chunk in the chunks*/
+ for (i = 0; i != clsm->nchunks; i++)
+ clsm->chunks[i]->count = lsm_tree->chunk[i]->count;
+
/* The last chunk is our new primary. */
if (chunk != NULL &&
!F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) &&
chunk->switch_txn == WT_TXN_NONE) {
clsm->primary_chunk = chunk;
- primary = clsm->cursors[clsm->nchunks - 1];
+ primary = clsm->chunks[clsm->nchunks - 1]->cursor;
/*
* Disable eviction for the in-memory chunk. Also clear the
* bulk load flag here, otherwise eviction will be enabled by
@@ -671,17 +710,19 @@ err:
#ifdef HAVE_DIAGNOSTIC
/* Check that all cursors are open as expected. */
if (ret == 0 && F_ISSET(clsm, WT_CLSM_OPEN_READ)) {
- for (i = 0, cp = clsm->cursors; i != clsm->nchunks; cp++, i++) {
+ for (i = 0; i != clsm->nchunks; i++) {
+ cursor = clsm->chunks[i]->cursor;
chunk = lsm_tree->chunk[i + start_chunk];
- /* Make sure the cursor is open. */
- WT_ASSERT(session, *cp != NULL);
+ /* Make sure the first cursor is open. */
+ WT_ASSERT(session, cursor != NULL);
/* Easy case: the URIs should match. */
- WT_ASSERT(session, strcmp((*cp)->uri, chunk->uri) == 0);
+ WT_ASSERT(
+ session, strcmp(cursor->uri, chunk->uri) == 0);
/* Make sure the checkpoint config matches. */
- checkpoint = ((WT_CURSOR_BTREE *)*cp)->
+ checkpoint = ((WT_CURSOR_BTREE *)cursor)->
btree->dhandle->checkpoint;
WT_ASSERT(session,
(F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) &&
@@ -692,7 +733,8 @@ err:
WT_ASSERT(session,
(F_ISSET(chunk, WT_LSM_CHUNK_BLOOM) &&
!F_ISSET(clsm, WT_CLSM_MERGE)) ?
- clsm->blooms[i] != NULL : clsm->blooms[i] == NULL);
+ clsm->chunks[i]->bloom != NULL :
+ clsm->chunks[i]->bloom == NULL);
}
}
#endif
@@ -901,6 +943,96 @@ err: __clsm_leave(clsm);
}
/*
+ * __clsm_random_chunk --
+ * Pick a chunk at random, weighted by the size of all chunks. Weighting
+ * proportional to documents avoids biasing towards small chunks. Then return
+ * the cursor on the chunk we have picked.
+ */
+static int
+__clsm_random_chunk(WT_SESSION_IMPL *session,
+ WT_CURSOR_LSM *clsm, WT_CURSOR **cursor)
+{
+ uint64_t checked_docs, i, rand_doc, total_docs;
+
+ /*
+ * If the tree is empty we cannot do a random lookup, so return a
+ * WT_NOTFOUND.
+ */
+ if (clsm->nchunks == 0)
+ return (WT_NOTFOUND);
+ for (total_docs = i = 0; i < clsm->nchunks; i++) {
+ total_docs += clsm->chunks[i]->count;
+ }
+ if (total_docs == 0)
+ return (WT_NOTFOUND);
+
+ rand_doc = __wt_random(&session->rnd) % total_docs;
+
+ for (checked_docs = i = 0; i < clsm->nchunks; i++) {
+ checked_docs += clsm->chunks[i]->count;
+ if (rand_doc <= checked_docs) {
+ *cursor = clsm->chunks[i]->cursor;
+ break;
+ }
+ }
+ return (0);
+}
+
+/*
+ * __clsm_next_random --
+ * WT_CURSOR->next method for the LSM cursor type when configured with
+ * next_random.
+ */
+static int
+__clsm_next_random(WT_CURSOR *cursor)
+{
+ WT_CURSOR_LSM *clsm;
+ WT_CURSOR *c;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ int exact;
+
+ c = NULL;
+ clsm = (WT_CURSOR_LSM *)cursor;
+
+ CURSOR_API_CALL(cursor, session, next, NULL);
+ WT_CURSOR_NOVALUE(cursor);
+ WT_ERR(__clsm_enter(clsm, false, false));
+
+ for (;;) {
+ WT_ERR(__clsm_random_chunk(session, clsm, &c));
+ /*
+ * This call to next_random on the chunk can potentially end in
+ * WT_NOTFOUND if the chunk we picked is empty. We want to retry
+ * in that case.
+ */
+ ret = __wt_curfile_next_random(c);
+ if (ret == WT_NOTFOUND)
+ continue;
+
+ WT_ERR(ret);
+ F_SET(cursor, WT_CURSTD_KEY_INT);
+ WT_ERR(c->get_key(c, &cursor->key));
+ /*
+ * Search near the current key to resolve any tombstones
+ * and position to a valid document. If we see a
+ * WT_NOTFOUND here that is valid, as the tree has no
+ * documents visible to us.
+ */
+ WT_ERR(__clsm_search_near(cursor, &exact));
+ break;
+ }
+
+ /* We have found a valid doc. Set that we are now positioned */
+ if (0) {
+err: F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+ }
+ __clsm_leave(clsm);
+ API_END(session, ret);
+ return (ret);
+}
+
+/*
* __clsm_prev --
* WT_CURSOR->prev method for the LSM cursor type.
*/
@@ -1071,7 +1203,7 @@ __clsm_lookup(WT_CURSOR_LSM *clsm, WT_ITEM *value)
WT_FORALL_CURSORS(clsm, c, i) {
/* If there is a Bloom filter, see if we can skip the read. */
bloom = NULL;
- if ((bloom = clsm->blooms[i]) != NULL) {
+ if ((bloom = clsm->chunks[i]->bloom) != NULL) {
if (!have_hash) {
__wt_bloom_hash(bloom, &cursor->key, &bhash);
have_hash = true;
@@ -1258,7 +1390,12 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp)
*/
F_CLR(cursor, WT_CURSTD_KEY_SET);
F_SET(cursor, WT_CURSTD_KEY_INT);
- if ((ret = cursor->next(cursor)) == 0) {
+ /*
+ * We call __clsm_next here as we want to advance
+ * forward. If we are a random LSM cursor calling next
+ * on the cursor will not advance as we intend.
+ */
+ if ((ret = __clsm_next(cursor)) == 0) {
cmp = 1;
deleted = false;
}
@@ -1267,7 +1404,11 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp)
}
if (deleted) {
clsm->current = NULL;
- WT_ERR(cursor->prev(cursor));
+ /*
+ * We call prev directly here as cursor->prev may be "invalid"
+ * if this is a random cursor.
+ */
+ WT_ERR(__clsm_prev(cursor));
cmp = -1;
}
*exactp = cmp;
@@ -1311,7 +1452,7 @@ __clsm_put(WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm,
* Clear the existing cursor position. Don't clear the primary cursor:
* we're about to use it anyway.
*/
- primary = clsm->cursors[clsm->nchunks - 1];
+ primary = clsm->chunks[clsm->nchunks - 1]->cursor;
WT_RET(__clsm_reset_cursors(clsm, primary));
/* If necessary, set the position for future scans. */
@@ -1321,12 +1462,12 @@ __clsm_put(WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm,
for (i = 0, slot = clsm->nchunks - 1; i < clsm->nupdates; i++, slot--) {
/* Check if we need to keep updating old chunks. */
if (i > 0 &&
- __wt_txn_visible(session, clsm->switch_txn[slot])) {
+ __wt_txn_visible(session, clsm->chunks[slot]->switch_txn)) {
clsm->nupdates = i;
break;
}
- c = clsm->cursors[slot];
+ c = clsm->chunks[slot]->cursor;
c->set_key(c, key);
c->set_value(c, value);
WT_RET((position && i == 0) ? c->update(c) : c->insert(c));
@@ -1484,9 +1625,7 @@ __wt_clsm_close(WT_CURSOR *cursor)
clsm = (WT_CURSOR_LSM *)cursor;
CURSOR_API_CALL(cursor, session, close, NULL);
WT_TRET(__clsm_close_cursors(clsm, 0, clsm->nchunks));
- __wt_free(session, clsm->blooms);
- __wt_free(session, clsm->cursors);
- __wt_free(session, clsm->switch_txn);
+ __clsm_free_chunks(session, clsm);
/* In case we were somehow left positioned, clear that. */
__clsm_leave(clsm);
@@ -1587,6 +1726,13 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
*/
clsm->dsk_gen = 0;
+ /* If the next_random option is set, configure a random cursor */
+ WT_ERR(__wt_config_gets_def(session, cfg, "next_random", 0, &cval));
+ if (cval.val != 0) {
+ __wt_cursor_set_notsup(cursor);
+ cursor->next = __clsm_next_random;
+ }
+
WT_ERR(__wt_cursor_init(cursor, cursor->uri, owner, cfg, cursorp));
if (bulk)
diff --git a/src/lsm/lsm_cursor_bulk.c b/src/lsm/lsm_cursor_bulk.c
index 607ca0c9705..319426de3f0 100644
--- a/src/lsm/lsm_cursor_bulk.c
+++ b/src/lsm/lsm_cursor_bulk.c
@@ -28,9 +28,8 @@ __clsm_close_bulk(WT_CURSOR *cursor)
session = (WT_SESSION_IMPL *)clsm->iface.session;
/* Close the bulk cursor to ensure the chunk is written to disk. */
- bulk_cursor = clsm->cursors[0];
+ bulk_cursor = clsm->chunks[0]->cursor;
WT_RET(bulk_cursor->close(bulk_cursor));
- clsm->cursors[0] = NULL;
clsm->nchunks = 0;
/* Set ondisk, and flush the metadata */
@@ -75,7 +74,7 @@ __clsm_insert_bulk(WT_CURSOR *cursor)
WT_ASSERT(session, lsm_tree->nchunks == 1 && clsm->nchunks == 1);
++chunk->count;
chunk->size += cursor->key.size + cursor->value.size;
- bulk_cursor = *clsm->cursors;
+ bulk_cursor = clsm->chunks[0]->cursor;
bulk_cursor->set_key(bulk_cursor, &cursor->key);
bulk_cursor->set_value(bulk_cursor, &cursor->value);
WT_RET(bulk_cursor->insert(bulk_cursor));
@@ -124,11 +123,10 @@ __wt_clsm_open_bulk(WT_CURSOR_LSM *clsm, const char *cfg[])
* for a bloom filter - it makes cleanup simpler. Cleaned up by
* cursor close on error.
*/
- WT_RET(__wt_calloc_one(session, &clsm->blooms));
- clsm->bloom_alloc = 1;
- WT_RET(__wt_calloc_one(session, &clsm->cursors));
- clsm->cursor_alloc = 1;
- clsm->nchunks = 1;
+ WT_RET(
+ __wt_realloc_def(session, &clsm->chunks_alloc, 1, &clsm->chunks));
+ WT_RET(__wt_calloc_one(session, &clsm->chunks[0]));
+ clsm->chunks_count = clsm->nchunks = 1;
/*
* Open a bulk cursor on the first chunk in the tree - take a read
@@ -139,7 +137,7 @@ __wt_clsm_open_bulk(WT_CURSOR_LSM *clsm, const char *cfg[])
*/
WT_RET(__wt_open_cursor(session,
lsm_tree->chunk[0]->uri, &clsm->iface, cfg, &bulk_cursor));
- clsm->cursors[0] = bulk_cursor;
+ clsm->chunks[0]->cursor = bulk_cursor;
/* LSM cursors are always raw */
F_SET(bulk_cursor, WT_CURSTD_RAW);
diff --git a/src/lsm/lsm_stat.c b/src/lsm/lsm_stat.c
index f4f5a0acce8..3fe3ca1ba81 100644
--- a/src/lsm/lsm_stat.c
+++ b/src/lsm/lsm_stat.c
@@ -42,11 +42,11 @@ __curstat_lsm_init(
if (cst->flags != 0) {
(void)snprintf(config, sizeof(config),
"statistics=(%s%s%s%s)",
- F_ISSET(cst, WT_CONN_STAT_ALL) ? "all," : "",
- F_ISSET(cst, WT_CONN_STAT_CLEAR) ? "clear," : "",
- !F_ISSET(cst, WT_CONN_STAT_ALL) &&
- F_ISSET(cst, WT_CONN_STAT_FAST) ? "fast," : "",
- F_ISSET(cst, WT_CONN_STAT_SIZE) ? "size," : "");
+ F_ISSET(cst, WT_STAT_TYPE_ALL) ? "all," : "",
+ F_ISSET(cst, WT_STAT_CLEAR) ? "clear," : "",
+ !F_ISSET(cst, WT_STAT_TYPE_ALL) &&
+ F_ISSET(cst, WT_STAT_TYPE_FAST) ? "fast," : "",
+ F_ISSET(cst, WT_STAT_TYPE_SIZE) ? "size," : "");
cfg[1] = disk_cfg[1] = config;
}
@@ -132,26 +132,26 @@ __curstat_lsm_init(
/* Include, and optionally clear, LSM-level specific information. */
WT_STAT_WRITE(session, stats, bloom_miss, lsm_tree->bloom_miss);
- if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
+ if (F_ISSET(cst, WT_STAT_CLEAR))
lsm_tree->bloom_miss = 0;
WT_STAT_WRITE(session, stats, bloom_hit, lsm_tree->bloom_hit);
- if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
+ if (F_ISSET(cst, WT_STAT_CLEAR))
lsm_tree->bloom_hit = 0;
WT_STAT_WRITE(session,
stats, bloom_false_positive, lsm_tree->bloom_false_positive);
- if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
+ if (F_ISSET(cst, WT_STAT_CLEAR))
lsm_tree->bloom_false_positive = 0;
WT_STAT_WRITE(session,
stats, lsm_lookup_no_bloom, lsm_tree->lsm_lookup_no_bloom);
- if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
+ if (F_ISSET(cst, WT_STAT_CLEAR))
lsm_tree->lsm_lookup_no_bloom = 0;
WT_STAT_WRITE(session,
stats, lsm_checkpoint_throttle, lsm_tree->lsm_checkpoint_throttle);
- if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
+ if (F_ISSET(cst, WT_STAT_CLEAR))
lsm_tree->lsm_checkpoint_throttle = 0;
WT_STAT_WRITE(session,
stats, lsm_merge_throttle, lsm_tree->lsm_merge_throttle);
- if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
+ if (F_ISSET(cst, WT_STAT_CLEAR))
lsm_tree->lsm_merge_throttle = 0;
__wt_curstat_dsrc_final(cst);
diff --git a/src/os_posix/os_time.c b/src/os_posix/os_time.c
index c7ae881af97..719e214696b 100644
--- a/src/os_posix/os_time.c
+++ b/src/os_posix/os_time.c
@@ -17,6 +17,15 @@ __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
{
WT_DECL_RET;
+ /*
+ * This function doesn't return an error, but panics on failure (which
+ * should never happen, it's done this way to simplify error handling
+ * in the caller). However, some compilers complain about using garbage
+ * values. Initializing the values avoids the complaint.
+ */
+ tsp->tv_sec = 0;
+ tsp->tv_nsec = 0;
+
#if defined(HAVE_CLOCK_GETTIME)
WT_SYSCALL_RETRY(clock_gettime(CLOCK_REALTIME, tsp), ret);
if (ret == 0)
diff --git a/src/schema/schema_stat.c b/src/schema/schema_stat.c
index 1cd39d97364..345f9164e9b 100644
--- a/src/schema/schema_stat.c
+++ b/src/schema/schema_stat.c
@@ -137,7 +137,7 @@ __wt_curstat_table_init(WT_SESSION_IMPL *session,
* If only gathering table size statistics, try a fast path that
* avoids the schema and table list locks.
*/
- if (F_ISSET(cst, WT_CONN_STAT_SIZE)) {
+ if (F_ISSET(cst, WT_STAT_TYPE_SIZE)) {
WT_RET(__curstat_size_only(session, uri, &was_fast, cst));
if (was_fast)
return (0);
diff --git a/src/session/session_api.c b/src/session/session_api.c
index d3432c19ef3..f594450db74 100644
--- a/src/session/session_api.c
+++ b/src/session/session_api.c
@@ -1757,11 +1757,13 @@ __open_session(WT_CONNECTION_IMPL *conn,
if (i >= conn->session_cnt) /* Defend against off-by-one errors. */
conn->session_cnt = i + 1;
- session_ret->id = i;
session_ret->iface =
F_ISSET(conn, WT_CONN_READONLY) ? stds_readonly : stds;
session_ret->iface.connection = &conn->iface;
+ session_ret->name = NULL;
+ session_ret->id = i;
+
WT_ERR(__wt_cond_alloc(session, "session", false, &session_ret->cond));
if (WT_SESSION_FIRST_USE(session_ret))
@@ -1777,10 +1779,10 @@ __open_session(WT_CONNECTION_IMPL *conn,
* Allocate the table hash array as well.
*/
if (session_ret->dhhash == NULL)
- WT_ERR(__wt_calloc(session_ret, WT_HASH_ARRAY_SIZE,
+ WT_ERR(__wt_calloc(session, WT_HASH_ARRAY_SIZE,
sizeof(struct __dhandles_hash), &session_ret->dhhash));
if (session_ret->tablehash == NULL)
- WT_ERR(__wt_calloc(session_ret, WT_HASH_ARRAY_SIZE,
+ WT_ERR(__wt_calloc(session, WT_HASH_ARRAY_SIZE,
sizeof(struct __tables_hash), &session_ret->tablehash));
for (i = 0; i < WT_HASH_ARRAY_SIZE; i++) {
TAILQ_INIT(&session_ret->dhhash[i]);
@@ -1789,7 +1791,7 @@ __open_session(WT_CONNECTION_IMPL *conn,
/* Initialize transaction support: default to read-committed. */
session_ret->isolation = WT_ISO_READ_COMMITTED;
- WT_ERR(__wt_txn_init(session_ret));
+ WT_ERR(__wt_txn_init(session, session_ret));
/*
* The session's hazard pointer memory isn't discarded during normal
@@ -1808,6 +1810,9 @@ __open_session(WT_CONNECTION_IMPL *conn,
*/
session_ret->hazard_size = 0;
+ /* Cache the offset of this session's statistics bucket. */
+ session_ret->stat_bucket = WT_STATS_SLOT_ID(session);
+
/*
* Configuration: currently, the configuration for open_session is the
* same as session.reconfigure, so use that function.
@@ -1816,8 +1821,6 @@ __open_session(WT_CONNECTION_IMPL *conn,
WT_ERR(
__session_reconfigure((WT_SESSION *)session_ret, config));
- session_ret->name = NULL;
-
/*
* Publish: make the entry visible to server threads. There must be a
* barrier for two reasons, to ensure structure fields are set before
diff --git a/src/support/stat.c b/src/support/stat.c
index 9d440f9ebf3..6e8e218a0db 100644
--- a/src/support/stat.c
+++ b/src/support/stat.c
@@ -65,6 +65,24 @@ static const char * const __stats_dsrc_desc[] = {
"cache: pages written from cache",
"cache: pages written requiring in-memory restoration",
"cache: unmodified pages evicted",
+ "cache_walk: Average difference between current eviction generation when the page was last considered",
+ "cache_walk: Average on-disk page image size seen",
+ "cache_walk: Clean pages currently in cache",
+ "cache_walk: Current eviction generation",
+ "cache_walk: Dirty pages currently in cache",
+ "cache_walk: Entries in the root page",
+ "cache_walk: Internal pages currently in cache",
+ "cache_walk: Leaf pages currently in cache",
+ "cache_walk: Maximum difference between current eviction generation when the page was last considered",
+ "cache_walk: Maximum page size seen",
+ "cache_walk: Minimum on-disk page image size seen",
+ "cache_walk: On-disk page image sizes smaller than a single allocation unit",
+ "cache_walk: Pages created in memory and never written",
+ "cache_walk: Pages currently queued for eviction",
+ "cache_walk: Pages that could not be queued for eviction",
+ "cache_walk: Refs skipped during cache traversal",
+ "cache_walk: Size of the root page",
+ "cache_walk: Total number of pages currently in cache",
"compression: compressed pages read",
"compression: compressed pages written",
"compression: page written failed to compress",
@@ -196,6 +214,24 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->cache_write = 0;
stats->cache_write_restore = 0;
stats->cache_eviction_clean = 0;
+ /* not clearing cache_state_gen_avg_gap */
+ /* not clearing cache_state_avg_written_size */
+ /* not clearing cache_state_pages_clean */
+ /* not clearing cache_state_gen_current */
+ /* not clearing cache_state_pages_dirty */
+ /* not clearing cache_state_root_entries */
+ /* not clearing cache_state_pages_internal */
+ /* not clearing cache_state_pages_leaf */
+ /* not clearing cache_state_gen_max_gap */
+ /* not clearing cache_state_max_pagesize */
+ /* not clearing cache_state_min_written_size */
+ /* not clearing cache_state_smaller_alloc_size */
+ /* not clearing cache_state_memory */
+ /* not clearing cache_state_queued */
+ /* not clearing cache_state_not_queueable */
+ /* not clearing cache_state_refs_skipped */
+ /* not clearing cache_state_root_size */
+ /* not clearing cache_state_pages */
stats->compress_read = 0;
stats->compress_write = 0;
stats->compress_write_fail = 0;
@@ -325,6 +361,27 @@ __wt_stat_dsrc_aggregate_single(
to->cache_write += from->cache_write;
to->cache_write_restore += from->cache_write_restore;
to->cache_eviction_clean += from->cache_eviction_clean;
+ to->cache_state_gen_avg_gap += from->cache_state_gen_avg_gap;
+ to->cache_state_avg_written_size +=
+ from->cache_state_avg_written_size;
+ to->cache_state_pages_clean += from->cache_state_pages_clean;
+ to->cache_state_gen_current += from->cache_state_gen_current;
+ to->cache_state_pages_dirty += from->cache_state_pages_dirty;
+ to->cache_state_root_entries += from->cache_state_root_entries;
+ to->cache_state_pages_internal += from->cache_state_pages_internal;
+ to->cache_state_pages_leaf += from->cache_state_pages_leaf;
+ to->cache_state_gen_max_gap += from->cache_state_gen_max_gap;
+ to->cache_state_max_pagesize += from->cache_state_max_pagesize;
+ to->cache_state_min_written_size +=
+ from->cache_state_min_written_size;
+ to->cache_state_smaller_alloc_size +=
+ from->cache_state_smaller_alloc_size;
+ to->cache_state_memory += from->cache_state_memory;
+ to->cache_state_queued += from->cache_state_queued;
+ to->cache_state_not_queueable += from->cache_state_not_queueable;
+ to->cache_state_refs_skipped += from->cache_state_refs_skipped;
+ to->cache_state_root_size += from->cache_state_root_size;
+ to->cache_state_pages += from->cache_state_pages;
to->compress_read += from->compress_read;
to->compress_write += from->compress_write;
to->compress_write_fail += from->compress_write_fail;
@@ -467,6 +524,39 @@ __wt_stat_dsrc_aggregate(
to->cache_write += WT_STAT_READ(from, cache_write);
to->cache_write_restore += WT_STAT_READ(from, cache_write_restore);
to->cache_eviction_clean += WT_STAT_READ(from, cache_eviction_clean);
+ to->cache_state_gen_avg_gap +=
+ WT_STAT_READ(from, cache_state_gen_avg_gap);
+ to->cache_state_avg_written_size +=
+ WT_STAT_READ(from, cache_state_avg_written_size);
+ to->cache_state_pages_clean +=
+ WT_STAT_READ(from, cache_state_pages_clean);
+ to->cache_state_gen_current +=
+ WT_STAT_READ(from, cache_state_gen_current);
+ to->cache_state_pages_dirty +=
+ WT_STAT_READ(from, cache_state_pages_dirty);
+ to->cache_state_root_entries +=
+ WT_STAT_READ(from, cache_state_root_entries);
+ to->cache_state_pages_internal +=
+ WT_STAT_READ(from, cache_state_pages_internal);
+ to->cache_state_pages_leaf +=
+ WT_STAT_READ(from, cache_state_pages_leaf);
+ to->cache_state_gen_max_gap +=
+ WT_STAT_READ(from, cache_state_gen_max_gap);
+ to->cache_state_max_pagesize +=
+ WT_STAT_READ(from, cache_state_max_pagesize);
+ to->cache_state_min_written_size +=
+ WT_STAT_READ(from, cache_state_min_written_size);
+ to->cache_state_smaller_alloc_size +=
+ WT_STAT_READ(from, cache_state_smaller_alloc_size);
+ to->cache_state_memory += WT_STAT_READ(from, cache_state_memory);
+ to->cache_state_queued += WT_STAT_READ(from, cache_state_queued);
+ to->cache_state_not_queueable +=
+ WT_STAT_READ(from, cache_state_not_queueable);
+ to->cache_state_refs_skipped +=
+ WT_STAT_READ(from, cache_state_refs_skipped);
+ to->cache_state_root_size +=
+ WT_STAT_READ(from, cache_state_root_size);
+ to->cache_state_pages += WT_STAT_READ(from, cache_state_pages);
to->compress_read += WT_STAT_READ(from, compress_read);
to->compress_write += WT_STAT_READ(from, compress_write);
to->compress_write_fail += WT_STAT_READ(from, compress_write_fail);
@@ -549,6 +639,10 @@ static const char * const __stats_connection_desc[] = {
"block-manager: bytes written for checkpoint",
"block-manager: mapped blocks read",
"block-manager: mapped bytes read",
+ "cache: application threads page read from disk to cache count",
+ "cache: application threads page read from disk to cache time (usecs)",
+ "cache: application threads page write from cache to disk count",
+ "cache: application threads page write from cache to disk time (usecs)",
"cache: bytes belonging to page images in the cache",
"cache: bytes currently in the cache",
"cache: bytes not belonging to page images in the cache",
@@ -642,6 +736,21 @@ static const char * const __stats_connection_desc[] = {
"data-handle: connection sweeps",
"data-handle: session dhandles swept",
"data-handle: session sweep attempts",
+ "lock: checkpoint lock acquisitions",
+ "lock: checkpoint lock application thread wait time (usecs)",
+ "lock: checkpoint lock internal thread wait time (usecs)",
+ "lock: handle-list lock acquisitions",
+ "lock: handle-list lock application thread wait time (usecs)",
+ "lock: handle-list lock internal thread wait time (usecs)",
+ "lock: metadata lock acquisitions",
+ "lock: metadata lock application thread wait time (usecs)",
+ "lock: metadata lock internal thread wait time (usecs)",
+ "lock: schema lock acquisitions",
+ "lock: schema lock application thread wait time (usecs)",
+ "lock: schema lock internal thread wait time (usecs)",
+ "lock: table lock acquisitions",
+ "lock: table lock application thread time waiting for the table lock (usecs)",
+ "lock: table lock internal thread time waiting for the table lock (usecs)",
"log: busy returns attempting to switch slots",
"log: consolidated slot closures",
"log: consolidated slot join races",
@@ -796,6 +905,10 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->block_byte_write_checkpoint = 0;
stats->block_map_read = 0;
stats->block_byte_map_read = 0;
+ stats->cache_read_app_count = 0;
+ stats->cache_read_app_time = 0;
+ stats->cache_write_app_count = 0;
+ stats->cache_write_app_time = 0;
/* not clearing cache_bytes_image */
/* not clearing cache_bytes_inuse */
/* not clearing cache_bytes_other */
@@ -889,6 +1002,21 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->dh_sweeps = 0;
stats->dh_session_handles = 0;
stats->dh_session_sweeps = 0;
+ stats->lock_checkpoint_count = 0;
+ stats->lock_checkpoint_wait_application = 0;
+ stats->lock_checkpoint_wait_internal = 0;
+ stats->lock_handle_list_count = 0;
+ stats->lock_handle_list_wait_application = 0;
+ stats->lock_handle_list_wait_internal = 0;
+ stats->lock_metadata_count = 0;
+ stats->lock_metadata_wait_application = 0;
+ stats->lock_metadata_wait_internal = 0;
+ stats->lock_schema_count = 0;
+ stats->lock_schema_wait_application = 0;
+ stats->lock_schema_wait_internal = 0;
+ stats->lock_table_count = 0;
+ stats->lock_table_wait_application = 0;
+ stats->lock_table_wait_internal = 0;
stats->log_slot_switch_busy = 0;
stats->log_slot_closes = 0;
stats->log_slot_races = 0;
@@ -1036,6 +1164,11 @@ __wt_stat_connection_aggregate(
WT_STAT_READ(from, block_byte_write_checkpoint);
to->block_map_read += WT_STAT_READ(from, block_map_read);
to->block_byte_map_read += WT_STAT_READ(from, block_byte_map_read);
+ to->cache_read_app_count += WT_STAT_READ(from, cache_read_app_count);
+ to->cache_read_app_time += WT_STAT_READ(from, cache_read_app_time);
+ to->cache_write_app_count +=
+ WT_STAT_READ(from, cache_write_app_count);
+ to->cache_write_app_time += WT_STAT_READ(from, cache_write_app_time);
to->cache_bytes_image += WT_STAT_READ(from, cache_bytes_image);
to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse);
to->cache_bytes_other += WT_STAT_READ(from, cache_bytes_other);
@@ -1162,6 +1295,33 @@ __wt_stat_connection_aggregate(
to->dh_sweeps += WT_STAT_READ(from, dh_sweeps);
to->dh_session_handles += WT_STAT_READ(from, dh_session_handles);
to->dh_session_sweeps += WT_STAT_READ(from, dh_session_sweeps);
+ to->lock_checkpoint_count +=
+ WT_STAT_READ(from, lock_checkpoint_count);
+ to->lock_checkpoint_wait_application +=
+ WT_STAT_READ(from, lock_checkpoint_wait_application);
+ to->lock_checkpoint_wait_internal +=
+ WT_STAT_READ(from, lock_checkpoint_wait_internal);
+ to->lock_handle_list_count +=
+ WT_STAT_READ(from, lock_handle_list_count);
+ to->lock_handle_list_wait_application +=
+ WT_STAT_READ(from, lock_handle_list_wait_application);
+ to->lock_handle_list_wait_internal +=
+ WT_STAT_READ(from, lock_handle_list_wait_internal);
+ to->lock_metadata_count += WT_STAT_READ(from, lock_metadata_count);
+ to->lock_metadata_wait_application +=
+ WT_STAT_READ(from, lock_metadata_wait_application);
+ to->lock_metadata_wait_internal +=
+ WT_STAT_READ(from, lock_metadata_wait_internal);
+ to->lock_schema_count += WT_STAT_READ(from, lock_schema_count);
+ to->lock_schema_wait_application +=
+ WT_STAT_READ(from, lock_schema_wait_application);
+ to->lock_schema_wait_internal +=
+ WT_STAT_READ(from, lock_schema_wait_internal);
+ to->lock_table_count += WT_STAT_READ(from, lock_table_count);
+ to->lock_table_wait_application +=
+ WT_STAT_READ(from, lock_table_wait_application);
+ to->lock_table_wait_internal +=
+ WT_STAT_READ(from, lock_table_wait_internal);
to->log_slot_switch_busy += WT_STAT_READ(from, log_slot_switch_busy);
to->log_slot_closes += WT_STAT_READ(from, log_slot_closes);
to->log_slot_races += WT_STAT_READ(from, log_slot_races);
diff --git a/src/txn/txn.c b/src/txn/txn.c
index 3b24bcd505d..d60ea73c660 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -659,20 +659,20 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[])
* Initialize a session's transaction data.
*/
int
-__wt_txn_init(WT_SESSION_IMPL *session)
+__wt_txn_init(WT_SESSION_IMPL *session, WT_SESSION_IMPL *session_ret)
{
WT_TXN *txn;
- txn = &session->txn;
+ txn = &session_ret->txn;
txn->id = WT_TXN_NONE;
WT_RET(__wt_calloc_def(session,
- S2C(session)->session_size, &txn->snapshot));
+ S2C(session_ret)->session_size, &txn->snapshot));
#ifdef HAVE_DIAGNOSTIC
- if (S2C(session)->txn_global.states != NULL) {
+ if (S2C(session_ret)->txn_global.states != NULL) {
WT_TXN_STATE *txn_state;
- txn_state = WT_SESSION_TXN_STATE(session);
+ txn_state = WT_SESSION_TXN_STATE(session_ret);
WT_ASSERT(session, txn_state->pinned_id == WT_TXN_NONE);
}
#endif
@@ -683,7 +683,7 @@ __wt_txn_init(WT_SESSION_IMPL *session)
*/
txn->mod = NULL;
- txn->isolation = session->isolation;
+ txn->isolation = session_ret->isolation;
return (0);
}
diff --git a/test/bloom/test_bloom.c b/test/bloom/test_bloom.c
index 7a298f000aa..67249ff887e 100644
--- a/test/bloom/test_bloom.c
+++ b/test/bloom/test_bloom.c
@@ -56,8 +56,6 @@ void usage(void)
extern char *__wt_optarg;
extern int __wt_optind;
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/test/checkpoint/test_checkpoint.c b/test/checkpoint/test_checkpoint.c
index 6b2f0d4466c..4998019ad8e 100644
--- a/test/checkpoint/test_checkpoint.c
+++ b/test/checkpoint/test_checkpoint.c
@@ -42,8 +42,6 @@ static int wt_shutdown(void);
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/test/csuite/Makefile.am b/test/csuite/Makefile.am
index 15db2fbcf46..8f1714237b9 100644
--- a/test/csuite/Makefile.am
+++ b/test/csuite/Makefile.am
@@ -7,6 +7,9 @@ AM_LDFLAGS = -static
test_wt1965_col_efficiency_SOURCES = wt1965_col_efficiency/main.c
noinst_PROGRAMS = test_wt1965_col_efficiency
+test_wt2403_lsm_workload_SOURCES = wt2403_lsm_workload/main.c
+noinst_PROGRAMS += test_wt2403_lsm_workload
+
test_wt2246_col_append_SOURCES = wt2246_col_append/main.c
noinst_PROGRAMS += test_wt2246_col_append
diff --git a/test/csuite/wt1965_col_efficiency/main.c b/test/csuite/wt1965_col_efficiency/main.c
index 0dc367c0611..a7235d81b31 100644
--- a/test/csuite/wt1965_col_efficiency/main.c
+++ b/test/csuite/wt1965_col_efficiency/main.c
@@ -35,8 +35,6 @@
* it is demonstrating an inefficiency rather than a correctness bug.
*/
-void (*custom_die)(void) = NULL;
-
/* If changing field count also need to change set_value and get_value calls */
#define NR_FIELDS 8
#define NR_OBJECTS 100
diff --git a/test/csuite/wt2246_col_append/main.c b/test/csuite/wt2246_col_append/main.c
index 1da8732abb4..4b352b26051 100644
--- a/test/csuite/wt2246_col_append/main.c
+++ b/test/csuite/wt2246_col_append/main.c
@@ -42,8 +42,6 @@
#define MILLION 1000000
-void (*custom_die)(void) = NULL;
-
/* Needs to be global for signal handling. */
static TEST_OPTS *opts, _opts;
diff --git a/test/csuite/wt2323_join_visibility/main.c b/test/csuite/wt2323_join_visibility/main.c
index 5987b77fd7d..239a3f300d0 100644
--- a/test/csuite/wt2323_join_visibility/main.c
+++ b/test/csuite/wt2323_join_visibility/main.c
@@ -52,8 +52,6 @@
* of inserts set low as a default.
*/
-void (*custom_die)(void) = NULL;
-
#define N_RECORDS 10000
#define N_INSERT 500000
#define N_INSERT_THREAD 2
diff --git a/test/csuite/wt2403_lsm_workload/main.c b/test/csuite/wt2403_lsm_workload/main.c
new file mode 100644
index 00000000000..0c287484b9e
--- /dev/null
+++ b/test/csuite/wt2403_lsm_workload/main.c
@@ -0,0 +1,241 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "test_util.h"
+
+static const char name[] = "lsm:test";
+#define NUM_DOCS 100000
+#define NUM_QUERIES (NUM_DOCS/100)
+
+static void
+rand_str(uint64_t i, char *str)
+{
+ uint64_t x, y;
+
+ y = strlen(str);
+ for (x = y; x > y - 8; x--) {
+ str[x - 1] = (char)(i % 10) + 48;
+ i = i / 10;
+ }
+}
+
+static void
+check_str(uint64_t i, char *str, bool mod)
+{
+ char str2[] = "0000000000000000";
+
+ rand_str(i, str2);
+ if (mod)
+ str2[0] = 'A';
+ testutil_checkfmt(strcmp(str, str2),
+ "strcmp failed, got %s, expected %s", str, str2);
+}
+
+static void
+query_docs(WT_CURSOR *cursor, bool mod)
+{
+ WT_ITEM key, value;
+ int i;
+
+ for (i = 0; i < NUM_QUERIES; i++) {
+ testutil_check(cursor->next(cursor));
+ testutil_check(cursor->get_key(cursor, &key));
+ testutil_check(cursor->get_value(cursor, &value));
+ check_str((uint64_t)key.data, (char *)value.data, mod);
+ }
+ printf("%d documents read\n", NUM_QUERIES);
+}
+
+static void *
+compact_thread(void *args)
+{
+ WT_SESSION *session;
+
+ session = (WT_SESSION *)args;
+ testutil_check(session->compact(session, name, NULL));
+ return (NULL);
+}
+
+int
+main(int argc, char *argv[])
+{
+ TEST_OPTS *opts, _opts;
+ WT_CURSOR *rcursor, *wcursor;
+ WT_ITEM key, value;
+ WT_SESSION *session, *session2;
+ pthread_t thread;
+ uint64_t i;
+
+ char str[] = "0000000000000000";
+
+ /*
+ * Create a clean test directory for this run of the test program if the
+ * environment variable isn't already set (as is done by make check).
+ */
+ opts = &_opts;
+ memset(opts, 0, sizeof(*opts));
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+ testutil_check(wiredtiger_open(opts->home,
+ NULL, "create,cache_size=200M", &opts->conn));
+
+ testutil_check(
+ opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(
+ opts->conn->open_session(opts->conn, NULL, NULL, &session2));
+
+ testutil_check(session->create(session, name,
+ "key_format=Q,value_format=S"));
+
+ /* Populate the table with some data. */
+ testutil_check(session->open_cursor(
+ session, name, NULL, "overwrite", &wcursor));
+ for (i = 0; i < NUM_DOCS; i++) {
+ wcursor->set_key(wcursor, i);
+ rand_str(i, str);
+ wcursor->set_value(wcursor, str);
+ testutil_check(wcursor->insert(wcursor));
+ }
+ testutil_check(wcursor->close(wcursor));
+ printf("%d documents inserted\n", NUM_DOCS);
+
+ /* Perform some random reads */
+ testutil_check(session->open_cursor(
+ session, name, NULL, "next_random=true", &rcursor));
+ query_docs(rcursor, false);
+ testutil_check(rcursor->close(rcursor));
+
+ /* Setup Transaction to pin the current values */
+ testutil_check(
+ session2->begin_transaction(session2, "isolation=snapshot"));
+ testutil_check(session2->open_cursor(
+ session2, name, NULL, "next_random=true", &rcursor));
+
+ /* Perform updates in a txn to confirm that we see only the original. */
+ testutil_check(session->open_cursor(
+ session, name, NULL, "overwrite", &wcursor));
+ for (i = 0; i < NUM_DOCS; i++) {
+ rand_str(i, str);
+ str[0] = 'A';
+ wcursor->set_key(wcursor, i);
+ wcursor->set_value(wcursor, str);
+ testutil_check(wcursor->update(wcursor));
+ }
+ testutil_check(wcursor->close(wcursor));
+ printf("%d documents set to update\n", NUM_DOCS);
+
+ /* Random reads, which should see the original values */
+ query_docs(rcursor, false);
+ testutil_check(rcursor->close(rcursor));
+
+ /* Finish the txn */
+ testutil_check(session2->rollback_transaction(session2, NULL));
+
+ /* Random reads, which should see the updated values */
+ testutil_check(session2->open_cursor(
+ session2, name, NULL, "next_random=true", &rcursor));
+ query_docs(rcursor, true);
+ testutil_check(rcursor->close(rcursor));
+
+ /* Setup a pre-delete txn */
+ testutil_check(
+ session2->begin_transaction(session2, "isolation=snapshot"));
+ testutil_check(session2->open_cursor(
+ session2, name, NULL, "next_random=true", &rcursor));
+
+ /* Delete all but one document */
+ testutil_check(session->open_cursor(
+ session, name, NULL, "overwrite", &wcursor));
+ for (i = 0; i < NUM_DOCS - 1; i++) {
+ wcursor->set_key(wcursor, i);
+ testutil_check(wcursor->remove(wcursor));
+ }
+ testutil_check(wcursor->close(wcursor));
+ printf("%d documents deleted\n", NUM_DOCS - 1);
+
+ /* Random reads, which should not see the deletes */
+ query_docs(rcursor, true);
+ testutil_check(rcursor->close(rcursor));
+
+ /* Rollback the txn so we can see the deletes */
+ testutil_check(session2->rollback_transaction(session2, NULL));
+
+ /* Find the one remaining document 3 times */
+ testutil_check(session2->open_cursor(
+ session2, name, NULL, "next_random=true", &rcursor));
+ for (i = 0; i < 3; i++) {
+ testutil_check(rcursor->next(rcursor));
+ testutil_check(rcursor->get_key(rcursor, &key));
+ testutil_check(rcursor->get_value(rcursor, &value));
+ /* There should only be one value available to us */
+ testutil_assertfmt((uint64_t)key.data == NUM_DOCS - 1,
+ "expected %d and got %" PRIu64,
+ NUM_DOCS - 1, (uint64_t)key.data);
+ check_str((uint64_t)key.data, (char *)value.data, true);
+ }
+ printf("Found the deleted doc 3 times\n");
+ testutil_check(rcursor->close(rcursor));
+
+ /* Repopulate the table for compact. */
+ testutil_check(session->open_cursor(
+ session, name, NULL, "overwrite", &wcursor));
+ for (i = 0; i < NUM_DOCS - 1; i++) {
+ wcursor->set_key(wcursor, i);
+ rand_str(i, str);
+ str[0] = 'A';
+ wcursor->set_value(wcursor, str);
+ testutil_check(wcursor->insert(wcursor));
+ }
+ testutil_check(wcursor->close(wcursor));
+
+ /* Run random cursor queries while compact is running */
+ testutil_check(session2->open_cursor(
+ session2, name, NULL, "next_random=true", &rcursor));
+ testutil_check(pthread_create(&thread, NULL, compact_thread, session));
+ query_docs(rcursor, true);
+ testutil_check(rcursor->close(rcursor));
+ testutil_check(pthread_join(thread, NULL));
+
+ /* Delete everything. Check for infinite loops */
+ testutil_check(session->open_cursor(
+ session, name, NULL, "overwrite", &wcursor));
+ for (i = 0; i < NUM_DOCS; i++) {
+ wcursor->set_key(wcursor, i);
+ testutil_check(wcursor->remove(wcursor));
+ }
+ testutil_check(wcursor->close(wcursor));
+
+ testutil_check(session2->open_cursor(
+ session2, name, NULL, "next_random=true", &rcursor));
+ for (i = 0; i < 3; i++)
+ testutil_assert(rcursor->next(rcursor) == WT_NOTFOUND);
+ printf("Successfully got WT_NOTFOUND\n");
+
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
+}
diff --git a/test/csuite/wt2447_join_main_table/main.c b/test/csuite/wt2447_join_main_table/main.c
index bbae61e7ed5..1368e7c8c09 100644
--- a/test/csuite/wt2447_join_main_table/main.c
+++ b/test/csuite/wt2447_join_main_table/main.c
@@ -49,8 +49,6 @@
* table.
*/
-void (*custom_die)(void) = NULL;
-
#define N_RECORDS 10000
static void
diff --git a/test/csuite/wt2535_insert_race/main.c b/test/csuite/wt2535_insert_race/main.c
index ee567df8749..ae18760a829 100644
--- a/test/csuite/wt2535_insert_race/main.c
+++ b/test/csuite/wt2535_insert_race/main.c
@@ -36,8 +36,6 @@
* Failure mode: Check that the data is correct at the end of the run.
*/
-void (*custom_die)(void) = NULL;
-
void *thread_insert_race(void *);
int
diff --git a/test/csuite/wt2592_join_schema/main.c b/test/csuite/wt2592_join_schema/main.c
index 4ffc9194646..0ec1c765d99 100644
--- a/test/csuite/wt2592_join_schema/main.c
+++ b/test/csuite/wt2592_join_schema/main.c
@@ -36,12 +36,6 @@
* Failure mode: The failure seen in WT-2592 was that no items were returned
* by a join.
*/
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <wiredtiger.h>
/* The C struct for the data we are storing in a WiredTiger table. */
typedef struct {
@@ -66,8 +60,6 @@ static POP_RECORD pop_data[] = {
{ "", 0, 0 }
};
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/test/csuite/wt2695_checksum/main.c b/test/csuite/wt2695_checksum/main.c
index 50f118bf828..db4fed5dc53 100644
--- a/test/csuite/wt2695_checksum/main.c
+++ b/test/csuite/wt2695_checksum/main.c
@@ -32,8 +32,6 @@
* Test case description: Smoke-test the CRC.
*/
-void (*custom_die)(void) = NULL;
-
static inline void
check(uint32_t hw, uint32_t sw, size_t len, const char *msg)
{
diff --git a/test/csuite/wt2719_reconfig/main.c b/test/csuite/wt2719_reconfig/main.c
index 64a47ea49a6..0942cfc73b2 100644
--- a/test/csuite/wt2719_reconfig/main.c
+++ b/test/csuite/wt2719_reconfig/main.c
@@ -34,9 +34,7 @@
* Test case description: Fuzz testing for WiredTiger reconfiguration.
*/
-void (*custom_die)(void) = NULL;
-
-static const char *list[] = {
+static const char * const list[] = {
",async=(enabled=0)",
",async=(enabled=1)",
",async=(ops_max=2048)",
diff --git a/test/csuite/wt2834_join_bloom_fix/main.c b/test/csuite/wt2834_join_bloom_fix/main.c
index 1d6abcfb179..7c80496f1b6 100644
--- a/test/csuite/wt2834_join_bloom_fix/main.c
+++ b/test/csuite/wt2834_join_bloom_fix/main.c
@@ -39,8 +39,6 @@
*
* Failure mode: We get results back from our join.
*/
-void (*custom_die)(void) = NULL;
-
#define N_RECORDS 100000
#define N_INSERT 1000000
@@ -103,8 +101,8 @@ main(int argc, char *argv[])
&maincur));
maincur->set_key(maincur, N_RECORDS);
maincur->set_value(maincur, 54321, 0, "", 0, N_RECORDS);
- maincur->insert(maincur);
- maincur->close(maincur);
+ testutil_check(maincur->insert(maincur));
+ testutil_check(maincur->close(maincur));
testutil_check(session->close(session, NULL));
populate(opts);
@@ -153,6 +151,7 @@ main(int argc, char *argv[])
key, key2, post, balance, flag);
count++;
}
+ testutil_assert(ret == WT_NOTFOUND);
testutil_assert(count == 0);
testutil_cleanup(opts);
@@ -197,6 +196,6 @@ populate(TEST_OPTS *opts)
testutil_check(maincur->insert(maincur));
testutil_check(session->commit_transaction(session, NULL));
}
- maincur->close(maincur);
- session->close(session, NULL);
+ testutil_check(maincur->close(maincur));
+ testutil_check(session->close(session, NULL));
}
diff --git a/test/csuite/wt2853_perf/main.c b/test/csuite/wt2853_perf/main.c
index 27591fdf214..6cec9634cd1 100644
--- a/test/csuite/wt2853_perf/main.c
+++ b/test/csuite/wt2853_perf/main.c
@@ -42,8 +42,6 @@
* continues until the test ends (~30 seconds).
*/
-void (*custom_die)(void) = NULL;
-
static void *thread_insert(void *);
static void *thread_get(void *);
diff --git a/test/cursor_order/cursor_order.c b/test/cursor_order/cursor_order.c
index aa351e6fea8..85b8c68e545 100644
--- a/test/cursor_order/cursor_order.c
+++ b/test/cursor_order/cursor_order.c
@@ -44,8 +44,6 @@ static void wt_shutdown(SHARED_CONFIG *);
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/test/fops/t.c b/test/fops/t.c
index bf0588d5a53..7b4a7cf8fca 100644
--- a/test/fops/t.c
+++ b/test/fops/t.c
@@ -51,8 +51,6 @@ static void wt_shutdown(void);
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/test/format/config.c b/test/format/config.c
index 542adf33da2..839ff5058de 100644
--- a/test/format/config.c
+++ b/test/format/config.c
@@ -187,8 +187,17 @@ config_setup(void)
/* Give in-memory configuration a final review. */
config_in_memory_check();
- /* Make the default maximum-run length 20 minutes. */
- if (!config_is_perm("timer"))
+ /*
+ * Run-length configured by a number of operations and a timer. If the
+ * operation count and the timer are both set by a configuration, there
+ * isn't anything to do. If only the operation count was configured,
+ * set a default maximum-run of 20 minutes. If only the timer is set,
+ * clear the operations count (which was set randomly).
+ */
+ if (config_is_perm("timer")) {
+ if (!config_is_perm("ops"))
+ config_single("ops=0", 0);
+ } else
config_single("timer=20", 0);
/*
@@ -270,28 +279,33 @@ config_compression(const char *conf_name)
*/
switch (mmrand(NULL, 1, 20)) {
#ifdef HAVE_BUILTIN_EXTENSION_LZ4
- case 1: case 2: case 3: case 4: /* 20% lz4 */
+ case 1: case 2: /* 10% lz4 */
cstr = "lz4";
break;
- case 5: /* 5% lz4-no-raw */
+ case 3: /* 5% lz4-no-raw */
cstr = "lz4-noraw";
break;
#endif
#ifdef HAVE_BUILTIN_EXTENSION_SNAPPY
- case 6: case 7: case 8: case 9: /* 30% snappy */
- case 10: case 11:
+ case 4: case 5: case 6: case 7: /* 30% snappy */
+ case 8: case 9:
cstr = "snappy";
break;
#endif
#ifdef HAVE_BUILTIN_EXTENSION_ZLIB
- case 12: case 13: case 14: case 15: /* 20% zlib */
+ case 10: case 11: case 12: case 13: /* 20% zlib */
cstr = "zlib";
break;
- case 16: /* 5% zlib-no-raw */
+ case 14: /* 5% zlib-no-raw */
cstr = "zlib-noraw";
break;
#endif
- case 17: case 18: case 19: case 20: /* 20% no compression */
+#ifdef HAVE_BUILTIN_EXTENSION_ZSTD
+ case 15: case 16 case 17: /* 15% zstd */
+ cstr = "zstd";
+ break;
+#endif
+ case 18: case 19: case 20: /* 15% no compression */
default:
break;
}
@@ -748,6 +762,8 @@ config_map_compression(const char *s, u_int *vp)
*vp = COMPRESS_ZLIB;
else if (strcmp(s, "zlib-noraw") == 0)
*vp = COMPRESS_ZLIB_NO_RAW;
+ else if (strcmp(s, "zstd") == 0)
+ *vp = COMPRESS_ZSTD;
else
testutil_die(EINVAL,
"illegal compression configuration: %s", s);
diff --git a/test/format/config.h b/test/format/config.h
index 725bc7c5d97..9bfba3cd0df 100644
--- a/test/format/config.h
+++ b/test/format/config.h
@@ -58,7 +58,7 @@ typedef struct {
} CONFIG;
#define COMPRESSION_LIST \
- "(none | lz4 | lz4-noraw | snappy | zlib | zlib-noraw)"
+ "(none | lz4 | lz4-noraw | snappy | zlib | zlib-noraw | zstd)"
static CONFIG c[] = {
{ "abort",
diff --git a/test/format/format.h b/test/format/format.h
index 363dcf9eea8..820bc020c9b 100644
--- a/test/format/format.h
+++ b/test/format/format.h
@@ -48,6 +48,8 @@
EXTPATH "compressors/snappy/.libs/libwiredtiger_snappy.so"
#define ZLIB_PATH \
EXTPATH "compressors/zlib/.libs/libwiredtiger_zlib.so"
+#define ZSTD_PATH \
+ EXTPATH "compressors/zstd/.libs/libwiredtiger_zstd.so"
#define REVERSE_PATH \
EXTPATH "collators/reverse/.libs/libwiredtiger_reverse_collator.so"
@@ -219,6 +221,7 @@ typedef struct {
#define COMPRESS_SNAPPY 5
#define COMPRESS_ZLIB 6
#define COMPRESS_ZLIB_NO_RAW 7
+#define COMPRESS_ZSTD 8
u_int c_compression_flag; /* Compression flag value */
u_int c_logging_compression_flag; /* Log compression flag value */
diff --git a/test/format/t.c b/test/format/t.c
index 41bdea11e73..7701595776c 100644
--- a/test/format/t.c
+++ b/test/format/t.c
@@ -38,8 +38,6 @@ static void usage(void)
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = format_die; /* Local death handler. */
-
int
main(int argc, char *argv[])
{
@@ -47,6 +45,8 @@ main(int argc, char *argv[])
int ch, onerun, reps;
const char *config, *home;
+ custom_die = format_die; /* Local death handler. */
+
config = NULL;
#ifdef _WIN32
diff --git a/test/format/wts.c b/test/format/wts.c
index 1600786855a..23fdbce156c 100644
--- a/test/format/wts.c
+++ b/test/format/wts.c
@@ -50,6 +50,8 @@ compressor(uint32_t compress_flag)
return ("zlib");
case COMPRESS_ZLIB_NO_RAW:
return ("zlib-noraw");
+ case COMPRESS_ZSTD:
+ return ("zstd");
default:
break;
}
@@ -210,13 +212,14 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp)
/* Extensions. */
p += snprintf(p, REMAIN(p, end),
",extensions=["
- "\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"],",
+ "\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"],",
g.c_reverse ? REVERSE_PATH : "",
access(LZ4_PATH, R_OK) == 0 ? LZ4_PATH : "",
access(LZO_PATH, R_OK) == 0 ? LZO_PATH : "",
access(ROTN_PATH, R_OK) == 0 ? ROTN_PATH : "",
access(SNAPPY_PATH, R_OK) == 0 ? SNAPPY_PATH : "",
access(ZLIB_PATH, R_OK) == 0 ? ZLIB_PATH : "",
+ access(ZSTD_PATH, R_OK) == 0 ? ZSTD_PATH : "",
DATASOURCE("kvsbdb") ? KVS_BDB_PATH : "");
/*
diff --git a/test/huge/huge.c b/test/huge/huge.c
index 3aa61a9048e..17e2db353d5 100644
--- a/test/huge/huge.c
+++ b/test/huge/huge.c
@@ -159,8 +159,6 @@ run(CONFIG *cp, int bigkey, size_t bytes)
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/test/manydbs/manydbs.c b/test/manydbs/manydbs.c
index c5c9a9a7ccd..7e986d47af3 100644
--- a/test/manydbs/manydbs.c
+++ b/test/manydbs/manydbs.c
@@ -68,8 +68,6 @@ usage(void)
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
-
static WT_CONNECTION **connections = NULL;
static WT_CURSOR **cursors = NULL;
static WT_RAND_STATE rnd;
diff --git a/test/packing/intpack-test.c b/test/packing/intpack-test.c
index 76851b38e35..c84823b741b 100644
--- a/test/packing/intpack-test.c
+++ b/test/packing/intpack-test.c
@@ -28,8 +28,6 @@
#include "test_util.h"
-void (*custom_die)(void) = NULL;
-
int
main(void)
{
diff --git a/test/packing/intpack-test2.c b/test/packing/intpack-test2.c
index a7d31329069..4e612808a35 100644
--- a/test/packing/intpack-test2.c
+++ b/test/packing/intpack-test2.c
@@ -28,8 +28,6 @@
#include "test_util.h"
-void (*custom_die)(void) = NULL;
-
int
main(void)
{
diff --git a/test/packing/intpack-test3.c b/test/packing/intpack-test3.c
index aac0178578f..763b0255ecf 100644
--- a/test/packing/intpack-test3.c
+++ b/test/packing/intpack-test3.c
@@ -28,8 +28,6 @@
#include "test_util.h"
-void (*custom_die)(void) = NULL;
-
void test_value(int64_t);
void test_spread(int64_t, int64_t, int64_t);
diff --git a/test/packing/packing-test.c b/test/packing/packing-test.c
index f251c17eb67..919b0622806 100644
--- a/test/packing/packing-test.c
+++ b/test/packing/packing-test.c
@@ -28,8 +28,6 @@
#include "test_util.h"
-void (*custom_die)(void) = NULL;
-
static void
check(const char *fmt, ...)
{
diff --git a/test/readonly/readonly.c b/test/readonly/readonly.c
index 7a131912c31..a4b79f5859f 100644
--- a/test/readonly/readonly.c
+++ b/test/readonly/readonly.c
@@ -158,8 +158,6 @@ open_dbs(int op, const char *dir,
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/test/recovery/random-abort.c b/test/recovery/random-abort.c
index 22a163bedff..03e67e2f723 100644
--- a/test/recovery/random-abort.c
+++ b/test/recovery/random-abort.c
@@ -179,8 +179,6 @@ fill_db(uint32_t nth)
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/test/recovery/truncated-log.c b/test/recovery/truncated-log.c
index c0effa85e95..c265263d44c 100644
--- a/test/recovery/truncated-log.c
+++ b/test/recovery/truncated-log.c
@@ -258,8 +258,6 @@ fill_db(void)
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/test/salvage/salvage.c b/test/salvage/salvage.c
index bad0167ca8e..b8553bbd72d 100644
--- a/test/salvage/salvage.c
+++ b/test/salvage/salvage.c
@@ -64,8 +64,6 @@ static int verbose; /* -v flag */
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/test/suite/test_bug017.py b/test/suite/test_bug017.py
new file mode 100644
index 00000000000..03e7b2ba714
--- /dev/null
+++ b/test/suite/test_bug017.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2016 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+
+# test_bug017.py
+# WT-2987: opening a cursor on an incomplete table drops core
+class test_bug017(wttest.WiredTigerTestCase):
+
+ def test_bug017_run(self):
+ self.session.create("table:bug17",
+ 'key_format=r,value_format=5sHQ,' +
+ 'columns=(id,country,year,population),colgroups=(main,population)')
+
+ msg = '/column groups/'
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.open_cursor("table:bug17(country)", None),
+ msg)
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/test/suite/test_cursor_random02.py b/test/suite/test_cursor_random02.py
index 93aa97f2282..195480d703b 100644
--- a/test/suite/test_cursor_random02.py
+++ b/test/suite/test_cursor_random02.py
@@ -34,7 +34,10 @@ from wtscenario import make_scenarios
# test_cursor_random02.py
# Cursor next_random operations
class test_cursor_random02(wttest.WiredTigerTestCase):
- type = 'table:random'
+ types = [
+ ('lsm', dict(type='lsm:random')),
+ ('table', dict(type='table:random'))
+ ]
config = [
('not-sample', dict(config='next_random=true'))
]
@@ -46,26 +49,35 @@ class test_cursor_random02(wttest.WiredTigerTestCase):
('10000', dict(records=10000)),
('50000', dict(records=50000)),
]
- scenarios = make_scenarios(config, records)
+ scenarios = make_scenarios(config, records, types)
# Check that next_random works in the presence of a larger set of values,
# where the values are in an insert list.
def test_cursor_random_reasonable_distribution(self):
uri = self.type
num_entries = self.records
+ config = 'key_format=S'
+ if uri == 'table:random':
+ config = 'leaf_page_max=100MB,' + config
# Set the leaf-page-max value, otherwise the page might split.
- simple_populate(self, uri,
- 'leaf_page_max=100MB,key_format=S', num_entries)
+ simple_populate(self, uri, config, num_entries)
# Setup an array to track which keys are seen
visitedKeys = [0] * (num_entries + 1)
+ # Setup a counter to see when we find a sequential key
+ sequentialKeys = 0
cursor = self.session.open_cursor(uri, None, 'next_random=true')
+ lastKey = None
for i in range(0, num_entries):
self.assertEqual(cursor.next(), 0)
current = cursor.get_key()
current = int(current)
visitedKeys[current] = visitedKeys[current] + 1
+ if lastKey != None:
+ if current == (lastKey + 1):
+ sequentialKeys += 1
+ lastKey = current
differentKeys = sum(x > 0 for x in visitedKeys)
@@ -76,7 +88,10 @@ class test_cursor_random02(wttest.WiredTigerTestCase):
str(num_entries) + ', ' + \
str((int)((differentKeys * 100) / num_entries)) + '%')
'''
-
+ # Can't test for non-sequential data when there is 1 item in the table
+ if num_entries > 1:
+ self.assertGreater(num_entries - 1, sequentialKeys,
+ 'cursor is returning sequential data')
self.assertGreater(differentKeys, num_entries / 4,
'next_random random distribution not adequate')
diff --git a/test/suite/test_encrypt01.py b/test/suite/test_encrypt01.py
index d314cbeadfd..746c9d13e96 100644
--- a/test/suite/test_encrypt01.py
+++ b/test/suite/test_encrypt01.py
@@ -57,6 +57,7 @@ class test_encrypt01(wttest.WiredTigerTestCase):
('lz4', dict(log_compress='lz4', block_compress='lz4')),
('snappy', dict(log_compress='snappy', block_compress='snappy')),
('zlib', dict(log_compress='zlib', block_compress='zlib')),
+ ('zstd', dict(log_compress='zstd', block_compress='zstd')),
('none-snappy', dict(log_compress=None, block_compress='snappy')),
('snappy-lz4', dict(log_compress='snappy', block_compress='lz4')),
]
diff --git a/test/suite/test_stat02.py b/test/suite/test_stat02.py
index 3d2a83d1c3c..047d2c74499 100644
--- a/test/suite/test_stat02.py
+++ b/test/suite/test_stat02.py
@@ -165,7 +165,7 @@ class test_stat_cursor_conn_error(wttest.WiredTigerTestCase):
args = ['none', 'all', 'fast']
for i in list(itertools.permutations(args, 2)):
config = 'create,statistics=(' + i[0] + ',' + i[1] + ')'
- msg = '/only one statistics configuration value/'
+ msg = '/Only one of/'
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.wiredtiger_open('.', config), msg)
@@ -188,10 +188,76 @@ class test_stat_cursor_dsrc_error(wttest.WiredTigerTestCase):
args = ['all', 'fast']
for i in list(itertools.permutations(args, 2)):
config = 'statistics=(' + i[0] + ',' + i[1] + ')'
- msg = '/only one statistics configuration value/'
+ msg = '/Only one of/'
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.open_cursor(
'statistics:' + self.uri, None, config), msg)
+# Test data-source cache walk statistics
+class test_stat_cursor_dsrc_cache_walk(wttest.WiredTigerTestCase):
+ uri = 'file:test_stat_cursor_dsrc_cache_walk'
+
+ conn_config = 'statistics=(none)'
+
+ def test_stat_cursor_dsrc_cache_walk(self):
+ simple_populate(self, self.uri, 'key_format=S', 100)
+ # Ensure that it's an error to get cache_walk stats if none is set
+ msg = '/doesn\'t match the database statistics/'
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.open_cursor(
+ 'statistics:' + self.uri, None, None), msg)
+
+ # Test configurations that are valid but should not collect
+ # cache walk information. Do these first since the cache walk
+ # statistics are mostly marked as not cleared - so once they are
+ # populated the values will always be returned
+ self.conn.reconfigure('statistics=(cache_walk,fast,clear)')
+ c = self.session.open_cursor(
+ 'statistics:' + self.uri, None, 'statistics=(fast)')
+ self.assertEqual(c[stat.dsrc.cache_state_root_size][2], 0)
+ c.close()
+
+ self.conn.reconfigure('statistics=(all,clear)')
+ c = self.session.open_cursor(
+ 'statistics:' + self.uri, None, 'statistics=(fast)')
+ self.assertEqual(c[stat.dsrc.cache_state_root_size][2], 0)
+ c.close()
+
+ self.conn.reconfigure('statistics=(cache_walk,fast,clear)')
+ c = self.session.open_cursor('statistics:' + self.uri, None, None)
+ self.assertGreater(c[stat.dsrc.cache_state_root_size][2], 0)
+ # Verify that cache_walk didn't imply tree_walk
+ self.assertEqual(c[stat.dsrc.btree_entries][2], 0)
+ c.close()
+
+ self.conn.reconfigure('statistics=(cache_walk,tree_walk,fast,clear)')
+ c = self.session.open_cursor('statistics:' + self.uri, None, None)
+ self.assertGreater(c[stat.dsrc.cache_state_root_size][2], 0)
+ # Verify that cache_walk didn't exclude tree_walk
+ self.assertGreater(c[stat.dsrc.btree_entries][2], 0)
+ c.close()
+
+ self.conn.reconfigure('statistics=(all,clear)')
+ c = self.session.open_cursor(
+ 'statistics:' + self.uri, None, 'statistics=(all)')
+ self.assertGreater(c[stat.dsrc.cache_state_root_size][2], 0)
+ self.assertGreater(c[stat.dsrc.btree_entries][2], 0)
+ c.close()
+
+ # Verify that cache and tree walk can operate independantly
+ self.conn.reconfigure('statistics=(all,clear)')
+ c = self.session.open_cursor(
+ 'statistics:' + self.uri, None, 'statistics=(cache_walk,fast)')
+ self.assertGreater(c[stat.dsrc.cache_state_root_size][2], 0)
+ self.assertEqual(c[stat.dsrc.btree_entries][2], 0)
+ c.close()
+
+ self.conn.reconfigure('statistics=(all,clear)')
+ c = self.session.open_cursor(
+ 'statistics:' + self.uri, None, 'statistics=(tree_walk,fast)')
+ # Don't check the cache walk stats for empty - they won't be cleared
+ self.assertGreater(c[stat.dsrc.btree_entries][2], 0)
+ c.close()
+
if __name__ == '__main__':
wttest.run()
diff --git a/test/thread/t.c b/test/thread/t.c
index 5b53532e8a6..baadbf2adb9 100644
--- a/test/thread/t.c
+++ b/test/thread/t.c
@@ -52,8 +52,6 @@ static void wt_shutdown(void);
extern int __wt_optind;
extern char *__wt_optarg;
-void (*custom_die)(void) = NULL;
-
int
main(int argc, char *argv[])
{
diff --git a/test/utility/misc.c b/test/utility/misc.c
index 096bc752726..1491c9a6938 100644
--- a/test/utility/misc.c
+++ b/test/utility/misc.c
@@ -27,6 +27,8 @@
*/
#include "test_util.h"
+void (*custom_die)(void) = NULL;
+
/*
* die --
* Report an error and quit.
@@ -142,8 +144,6 @@ testutil_cleanup(TEST_OPTS *opts)
if (!opts->preserve)
testutil_clean_work_dir(opts->home);
- free(opts->conn_config);
- free(opts->table_config);
free(opts->uri);
free(opts->home);
}
diff --git a/test/utility/parse_opts.c b/test/utility/parse_opts.c
index 08aeafa9617..74a1c021d5d 100644
--- a/test/utility/parse_opts.c
+++ b/test/utility/parse_opts.c
@@ -27,10 +27,6 @@
*/
#include "test_util.h"
-extern int __wt_opterr; /* if error message should be printed */
-extern int __wt_optind; /* index into parent argv vector */
-extern int __wt_optopt; /* character checked for validity */
-extern int __wt_optreset; /* reset getopt */
extern char *__wt_optarg; /* argument associated with option */
/*
@@ -59,7 +55,7 @@ testutil_parse_opts(int argc, char * const *argv, TEST_OPTS *opts)
opts->n_append_threads = (uint64_t)atoll(__wt_optarg);
break;
case 'h': /* Home directory */
- opts->home = __wt_optarg;
+ opts->home = dstrdup(__wt_optarg);
break;
case 'n': /* Number of records */
opts->nrecords = (uint64_t)atoll(__wt_optarg);
@@ -116,12 +112,14 @@ testutil_parse_opts(int argc, char * const *argv, TEST_OPTS *opts)
}
/*
- * Setup the home directory. It needs to be unique for every test
- * or the auto make parallel tester gets upset.
+ * Setup the home directory if not explicitly specified. It needs to be
+ * unique for every test or the auto make parallel tester gets upset.
*/
- len = strlen("WT_TEST.") + strlen(opts->progname) + 10;
- opts->home = dmalloc(len);
- snprintf(opts->home, len, "WT_TEST.%s", opts->progname);
+ if (opts->home == NULL) {
+ len = strlen("WT_TEST.") + strlen(opts->progname) + 10;
+ opts->home = dmalloc(len);
+ snprintf(opts->home, len, "WT_TEST.%s", opts->progname);
+ }
/* Setup the default URI string */
len = strlen("table:") + strlen(opts->progname) + 10;
diff --git a/test/utility/test_util.h b/test/utility/test_util.h
index 1047d1ca8a0..f6a9cd68e02 100644
--- a/test/utility/test_util.h
+++ b/test/utility/test_util.h
@@ -68,10 +68,8 @@ typedef struct {
* resources.
*/
WT_CONNECTION *conn;
- char *conn_config;
WT_SESSION *session;
bool running;
- char *table_config;
char *uri;
volatile uint64_t next_threadid;
uint64_t max_inserted_id;
@@ -87,6 +85,16 @@ typedef struct {
} while (0)
/*
+ * testutil_assertfmt --
+ * Complain and quit if something isn't true.
+ */
+#define testutil_assertfmt(a, fmt, ...) do { \
+ if (!(a)) \
+ testutil_die(0, "%s/%d: %s: " fmt, \
+ __func__, __LINE__, #a, __VA_ARGS__); \
+} while (0)
+
+/*
* testutil_check --
* Complain and quit if a function call fails.
*/
diff --git a/tools/wtstats/stat_data.py b/tools/wtstats/stat_data.py
index 8f47b86a23e..635e710c469 100644
--- a/tools/wtstats/stat_data.py
+++ b/tools/wtstats/stat_data.py
@@ -91,6 +91,24 @@ no_scale_per_second_list = [
'btree: row-store leaf pages',
'cache: bytes currently in the cache',
'cache: overflow values cached in memory',
+ 'cache_walk: Average difference between current eviction generation when the page was last considered',
+ 'cache_walk: Average on-disk page image size seen',
+ 'cache_walk: Clean pages currently in cache',
+ 'cache_walk: Current eviction generation',
+ 'cache_walk: Dirty pages currently in cache',
+ 'cache_walk: Entries in the root page',
+ 'cache_walk: Internal pages currently in cache',
+ 'cache_walk: Leaf pages currently in cache',
+ 'cache_walk: Maximum difference between current eviction generation when the page was last considered',
+ 'cache_walk: Maximum page size seen',
+ 'cache_walk: Minimum on-disk page image size seen',
+ 'cache_walk: On-disk page image sizes smaller than a single allocation unit',
+ 'cache_walk: Pages created in memory and never written',
+ 'cache_walk: Pages currently queued for eviction',
+ 'cache_walk: Pages that could not be queued for eviction',
+ 'cache_walk: Refs skipped during cache traversal',
+ 'cache_walk: Size of the root page',
+ 'cache_walk: Total number of pages currently in cache',
'LSM: bloom filters in the LSM tree',
'LSM: chunks in the LSM tree',
'LSM: highest merge generation in the LSM tree',
@@ -162,6 +180,24 @@ no_clear_list = [
'transaction: transaction range of IDs currently pinned by named snapshots',
'btree: btree checkpoint generation',
'cache: bytes currently in the cache',
+ 'cache_walk: Average difference between current eviction generation when the page was last considered',
+ 'cache_walk: Average on-disk page image size seen',
+ 'cache_walk: Clean pages currently in cache',
+ 'cache_walk: Current eviction generation',
+ 'cache_walk: Dirty pages currently in cache',
+ 'cache_walk: Entries in the root page',
+ 'cache_walk: Internal pages currently in cache',
+ 'cache_walk: Leaf pages currently in cache',
+ 'cache_walk: Maximum difference between current eviction generation when the page was last considered',
+ 'cache_walk: Maximum page size seen',
+ 'cache_walk: Minimum on-disk page image size seen',
+ 'cache_walk: On-disk page image sizes smaller than a single allocation unit',
+ 'cache_walk: Pages created in memory and never written',
+ 'cache_walk: Pages currently queued for eviction',
+ 'cache_walk: Pages that could not be queued for eviction',
+ 'cache_walk: Refs skipped during cache traversal',
+ 'cache_walk: Size of the root page',
+ 'cache_walk: Total number of pages currently in cache',
'session: open cursor count',
]
prefix_list = [
@@ -169,6 +205,7 @@ prefix_list = [
'reconciliation',
'LSM',
'log',
+ 'lock',
'cache',
'transaction',
'cursor',
@@ -176,9 +213,10 @@ prefix_list = [
'session',
'block-manager',
'thread-yield',
+ 'cache_walk',
'async',
'btree',
'thread-state',
'compression',
]
-groups = {'cursor': ['cursor', 'session'], 'lsm': ['LSM', 'transaction'], 'system': ['connection', 'data-handle', 'session', 'thread-state'], 'evict': ['block-manager', 'cache', 'connection', 'thread-state'], 'memory': ['cache', 'connection', 'reconciliation']} \ No newline at end of file
+groups = {'cursor': ['cursor', 'session'], 'lsm': ['LSM', 'transaction'], 'system': ['connection', 'data-handle', 'session', 'thread-state'], 'evict': ['block-manager', 'cache', 'cache_walk', 'connection', 'thread-state'], 'memory': ['cache', 'cache_walk', 'connection', 'reconciliation']} \ No newline at end of file
diff --git a/tools/wtstats/wtstats.py b/tools/wtstats/wtstats.py
index 3549031c30f..bf5557d12f4 100755
--- a/tools/wtstats/wtstats.py
+++ b/tools/wtstats/wtstats.py
@@ -115,6 +115,9 @@ def parse_wtstats_file(file, result):
# Parse file
for line in open(file, 'rU'):
month, day, time, v, title = line.strip('\n').split(" ", 4)
+ # The colon in the URI confuses parsing, strip it out.
+ if "cache_walk" in title:
+ title = title.replace("file:", "", 1)
result[title].append((month + " " + day + " " + time, v))