summaryrefslogtreecommitdiff
path: root/src/third_party
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2018-01-17 14:04:41 +1100
committerLuke Chen <luke.chen@mongodb.com>2018-01-17 14:15:15 +1100
commit23914068c331a42d1f98de0d58caecf0e391549a (patch)
tree4d03808a83bbf06d5f51c359711e5158957a578e /src/third_party
parent23870b6aecac924a15af49bb7abe2f8e1cda2aa8 (diff)
downloadmongo-23914068c331a42d1f98de0d58caecf0e391549a.tar.gz
Import wiredtiger: 357efdd4ce279efc71ff618c59fe1b903ef80bb2 from branch mongodb-3.8
ref: 9e50448231..357efdd4ce for: 3.7.2 WT-3565 Test and understand mixed timestamp/no-timestamp usage to same data WT-3597 Add a diagnostic check for updates to the same key out of timestamp order WT-3632 Increase how granularly cache usage settings can be configured WT-3695 format failed to report a stuck cache WT-3740 race in page dirty-byte decrement. WT-3767 Avoid lookaside instantiation for faster reads WT-3775 Improve commit timestamp is older than oldest timestamp error message WT-3792 LSM version 1 metadata incompatibility WT-3796 Report a better error message if transaction commit fails WT-3799 Test/format with timestamps enabled pin cache full WT-3809 Fix a bug in lookaside related to birthmarks WT-3811 Python scripts for visualizing operation tracking files WT-3818 __rec_txn_read() code order cleanup WT-3825 Fix calculation of CPU ticks per unit time WT-3826 random-abort test failure WT-3827 test_compact02 failure WT-3828 Link error on OS/X for __wt_process data reference WT-3831 uninitialized buffer value in statlog server path comparison WT-3832 Fixup shell script warning messages WT-3833 test/format cache_minimum value error WT-3841 Fix error message pattern in timestamp09 WT-3842 full-build Friday & lint WT-3844 Checkpoints can hang on limbo pages WT-3845 Compiler warning in examples using GCC 5.4.0
Diffstat (limited to 'src/third_party')
-rw-r--r--src/third_party/wiredtiger/NEWS29
-rw-r--r--src/third_party/wiredtiger/README6
-rw-r--r--src/third_party/wiredtiger/RELEASE_INFO2
-rw-r--r--src/third_party/wiredtiger/build_posix/aclocal/version-set.m44
-rw-r--r--src/third_party/wiredtiger/build_posix/aclocal/version.m42
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py41
-rw-r--r--src/third_party/wiredtiger/dist/s_define.list2
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok1
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_void1
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py2
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_smoke.c6
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c10
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_delete.c1
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_discard.c49
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_handle.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_io.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_random.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c374
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_ret.c44
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c30
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_sync.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_walk.c7
-rw-r--r--src/third_party/wiredtiger/src/btree/col_modify.c3
-rw-r--r--src/third_party/wiredtiger/src/btree/row_modify.c24
-rw-r--r--src/third_party/wiredtiger/src/cache/cache_las.c7
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c60
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache.c63
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache_pool.c11
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_log.c6
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_stat.c20
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_file.c30
-rw-r--r--src/third_party/wiredtiger/src/docs/top/main.dox8
-rw-r--r--src/third_party/wiredtiger/src/docs/upgrading.dox10
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c86
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c20
-rw-r--r--src/third_party/wiredtiger/src/include/api.h16
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h41
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i15
-rw-r--r--src/third_party/wiredtiger/src/include/cache.h14
-rw-r--r--src/third_party/wiredtiger/src/include/cache.i24
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h8
-rw-r--r--src/third_party/wiredtiger/src/include/misc.i24
-rw-r--r--src/third_party/wiredtiger/src/include/mutex.i8
-rw-r--r--src/third_party/wiredtiger/src/include/optrack.h2
-rw-r--r--src/third_party/wiredtiger/src/include/os.h16
-rw-r--r--src/third_party/wiredtiger/src/include/os_fhandle.i12
-rw-r--r--src/third_party/wiredtiger/src/include/serial.i23
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h2
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h3
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i3
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in564
-rw-r--r--src/third_party/wiredtiger/src/log/log.c24
-rw-r--r--src/third_party/wiredtiger/src/log/log_slot.c18
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_meta.c22
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_dir.c14
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c97
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c10
-rw-r--r--src/third_party/wiredtiger/src/support/global.c4
-rw-r--r--src/third_party/wiredtiger/src/support/hazard.c10
-rw-r--r--src/third_party/wiredtiger/src/support/mtx_rw.c16
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c8
-rw-r--r--src/third_party/wiredtiger/src/support/time.c26
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c110
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c22
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c30
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_timestamp.c65
-rw-r--r--src/third_party/wiredtiger/test/csuite/random_abort/main.c10
-rw-r--r--src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c3
-rw-r--r--src/third_party/wiredtiger/test/format/config.h2
-rw-r--r--src/third_party/wiredtiger/test/format/ops.c16
-rw-r--r--src/third_party/wiredtiger/test/format/util.c11
-rw-r--r--src/third_party/wiredtiger/test/packing/intpack-test.c6
-rw-r--r--src/third_party/wiredtiger/test/packing/intpack-test2.c6
-rw-r--r--src/third_party/wiredtiger/test/packing/intpack-test3.c6
-rw-r--r--src/third_party/wiredtiger/test/packing/packing-test.c6
-rw-r--r--src/third_party/wiredtiger/test/suite/test_compact02.py8
-rw-r--r--src/third_party/wiredtiger/test/suite/test_config04.py63
-rw-r--r--src/third_party/wiredtiger/test/suite/test_reconfig01.py7
-rw-r--r--src/third_party/wiredtiger/test/suite/test_shared_cache01.py33
-rw-r--r--src/third_party/wiredtiger/test/suite/test_shared_cache02.py30
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp04.py56
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp09.py4
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp10.py162
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp11.py150
-rw-r--r--src/third_party/wiredtiger/tools/optrack/arrow-left.pngbin0 -> 103602 bytes
-rw-r--r--src/third_party/wiredtiger/tools/optrack/arrow-right.pngbin0 -> 108216 bytes
-rwxr-xr-xsrc/third_party/wiredtiger/tools/optrack/find-latency-spikes.py1063
-rwxr-xr-xsrc/third_party/wiredtiger/tools/optrack/wt_optrack_decode.py (renamed from src/third_party/wiredtiger/tools/wt_optrack_decode.py)0
89 files changed, 2887 insertions, 983 deletions
diff --git a/src/third_party/wiredtiger/NEWS b/src/third_party/wiredtiger/NEWS
index ffcefd5f8c1..7bf3b0e7edb 100644
--- a/src/third_party/wiredtiger/NEWS
+++ b/src/third_party/wiredtiger/NEWS
@@ -1,6 +1,35 @@
Ticket reference tags refer to tickets in the MongoDB JIRA tracking system:
https://jira.mongodb.org
+WiredTiger release 3.0.0, 2018-01-08
+------------------------------------
+
+See the upgrading documentation for details of API and behavior changes.
+
+Significant changes:
+* WT-3039 Change the log file format to record a previous LSN record
+* WT-3181 Add support for application defined transaction IDs via a mechanism called timestamps.
+* WT-3310 Add support to WT_SESSION::alter to change table log setting
+* WT-3389 Restructure page split code to hold a split generation for the entire operation.
+* WT-3406 Fix a bug in reconciliation so that it ignores concurrent updates.
+* WT-3418 Fix a block manager race in tree close/open
+* WT-3435 Improvements to the cache overflow mechanism aka lookaside
+* WT-3437 Improvements to auto tuning of number of eviction workers
+* WT-3440 Add a log record when starting a checkpoint.
+* WT-3461 Avoid hangs when system clocks move backwards by using CLOCK_MONOTONIC for pthread_cond_timedwait if possible.
+* WT-3490 Fix a bug in WT_CURSOR.modify unaligned size_t access.
+* WT-3495 Fix a bug so we don't ftruncate if log cursors are open
+* WT-3497 Improve logging message when hitting the configured session limits
+* WT-3537 Split pages in memory when nothing can be written
+* WT-3556 Remove wtstats support
+* WT-3681 Change recovery so that it doesn't truncate the last log file
+* WT-3683 Allow eviction of clean pages with history when cache is stuck
+* WT-3710 Get a page-level lock to ensure page splits are single threaded
+* WT-3752 Allow trimming of obsolete modify updates.
+
+See JIRA changelog for a full listing:
+https://jira.mongodb.org/projects/WT/versions/18401
+
WiredTiger release 2.9.3, 2017-06-27
------------------------------------
diff --git a/src/third_party/wiredtiger/README b/src/third_party/wiredtiger/README
index 4def09abba6..234038d21d0 100644
--- a/src/third_party/wiredtiger/README
+++ b/src/third_party/wiredtiger/README
@@ -1,6 +1,6 @@
-WiredTiger 3.0.0: (June 27, 2017)
+WiredTiger 3.0.1: (January 8, 2018)
-This is version 3.0.0 of WiredTiger.
+This is version 3.0.1 of WiredTiger.
WiredTiger release packages and documentation can be found at:
@@ -8,7 +8,7 @@ WiredTiger release packages and documentation can be found at:
The documentation for this specific release can be found at:
- http://source.wiredtiger.com/3.0.0/index.html
+ http://source.wiredtiger.com/3.0.1/index.html
The WiredTiger source code can be found at:
diff --git a/src/third_party/wiredtiger/RELEASE_INFO b/src/third_party/wiredtiger/RELEASE_INFO
index ccdff34f2d5..38189c7be01 100644
--- a/src/third_party/wiredtiger/RELEASE_INFO
+++ b/src/third_party/wiredtiger/RELEASE_INFO
@@ -1,6 +1,6 @@
WIREDTIGER_VERSION_MAJOR=3
WIREDTIGER_VERSION_MINOR=0
-WIREDTIGER_VERSION_PATCH=0
+WIREDTIGER_VERSION_PATCH=1
WIREDTIGER_VERSION="$WIREDTIGER_VERSION_MAJOR.$WIREDTIGER_VERSION_MINOR.$WIREDTIGER_VERSION_PATCH"
WIREDTIGER_RELEASE_DATE=`date "+%B %e, %Y"`
diff --git a/src/third_party/wiredtiger/build_posix/aclocal/version-set.m4 b/src/third_party/wiredtiger/build_posix/aclocal/version-set.m4
index 5e54ad1cb69..fa3fed8638b 100644
--- a/src/third_party/wiredtiger/build_posix/aclocal/version-set.m4
+++ b/src/third_party/wiredtiger/build_posix/aclocal/version-set.m4
@@ -2,8 +2,8 @@ dnl build by dist/s_version
VERSION_MAJOR=3
VERSION_MINOR=0
-VERSION_PATCH=0
-VERSION_STRING='"WiredTiger 3.0.0: (June 27, 2017)"'
+VERSION_PATCH=1
+VERSION_STRING='"WiredTiger 3.0.1: (January 8, 2018)"'
AC_SUBST(VERSION_MAJOR)
AC_SUBST(VERSION_MINOR)
diff --git a/src/third_party/wiredtiger/build_posix/aclocal/version.m4 b/src/third_party/wiredtiger/build_posix/aclocal/version.m4
index 3c5980dbaad..9a6918366e9 100644
--- a/src/third_party/wiredtiger/build_posix/aclocal/version.m4
+++ b/src/third_party/wiredtiger/build_posix/aclocal/version.m4
@@ -1,2 +1,2 @@
dnl WiredTiger product version for AC_INIT. Maintained by dist/s_version
-3.0.0
+3.0.1
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index c0afe3ae041..2a369bdafbc 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -454,29 +454,36 @@ connection_runtime_config = [
]),
Config('eviction_checkpoint_target', '5', r'''
perform eviction at the beginning of checkpoints to bring the dirty
- content in cache to this level, expressed as a percentage of the total
- cache size. Ignored if set to zero or \c in_memory is \c true''',
- min=0, max=99),
+ content in cache to this level. It is a percentage of the cache size if
+ the value is within the range of 0 to 100 or an absolute size when
+ greater than 100. The value is not allowed to exceed the \c cache_size.
+ Ignored if set to zero or \c in_memory is \c true''',
+ min=0, max='10TB'),
Config('eviction_dirty_target', '5', r'''
perform eviction in worker threads when the cache contains at least
- this much dirty content, expressed as a percentage of the total cache
- size.''',
- min=1, max=99),
+ this much dirty content. It is a percentage of the cache size if the
+ value is within the range of 1 to 100 or an absolute size when greater
+ than 100. The value is not allowed to exceed the \c cache_size.''',
+ min=1, max='10TB'),
Config('eviction_dirty_trigger', '20', r'''
trigger application threads to perform eviction when the cache contains
- at least this much dirty content, expressed as a percentage of the
- total cache size. This setting only alters behavior if it is lower than
- eviction_trigger''',
- min=1, max=99),
+ at least this much dirty content. It is a percentage of the cache size
+ if the value is within the range of 1 to 100 or an absolute size when
+ greater than 100. The value is not allowed to exceed the \c cache_size.
+ This setting only alters behavior if it is lower than eviction_trigger
+ ''', min=1, max='10TB'),
Config('eviction_target', '80', r'''
perform eviction in worker threads when the cache contains at least
- this much content, expressed as a percentage of the total cache size.
- Must be less than \c eviction_trigger''',
- min=10, max=99),
+ this much content. It is a percentage of the cache size if the value is
+ within the range of 10 to 100 or an absolute size when greater than 100.
+ The value is not allowed to exceed the \c cache_size.''',
+ min=10, max='10TB'),
Config('eviction_trigger', '95', r'''
trigger application threads to perform eviction when the cache contains
- at least this much content, expressed as a percentage of the
- total cache size''', min=10, max=99),
+ at least this much content. It is a percentage of the cache size if the
+ value is within the range of 10 to 100 or an absolute size when greater
+ than 100. The value is not allowed to exceed the \c cache_size.''',
+ min=10, max='10TB'),
Config('file_manager', '', r'''
control how file handles are managed''',
type='category', subconfig=[
@@ -525,7 +532,9 @@ connection_runtime_config = [
Config('shared_cache', '', r'''
shared cache configuration options. A database should configure
either a cache_size or a shared_cache not both. Enabling a
- shared cache uses a session from the configured session_max''',
+ shared cache uses a session from the configured session_max. A
+ shared cache can not have absolute values configured for cache
+ eviction settings''',
type='category', subconfig=[
Config('chunk', '10MB', r'''
the granularity that a shared cache is redistributed''',
diff --git a/src/third_party/wiredtiger/dist/s_define.list b/src/third_party/wiredtiger/dist/s_define.list
index 16f06da383c..cfae3106fcf 100644
--- a/src/third_party/wiredtiger/dist/s_define.list
+++ b/src/third_party/wiredtiger/dist/s_define.list
@@ -20,6 +20,7 @@ WT_BLOCK_HEADER_SIZE
WT_CACHE_LINE_ALIGNMENT
WT_CACHE_LINE_PAD_BEGIN
WT_CACHE_LINE_PAD_END
+WT_CLOCKDIFF_NS
WT_CONN_CHECK_PANIC
WT_DEADLOCK
WT_DEBUG_BYTE
@@ -67,7 +68,6 @@ WT_TRACK_OP
WT_TRACK_OP_END
WT_TRACK_OP_INIT
WT_TRET_ERROR_OK
-WT_TSCDIFF_NS
WT_UPDATE_SIZE
WT_WITH_LOCK_NOWAIT
WT_WITH_LOCK_WAIT
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index 1f025013fe7..5d2eb7427b6 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -1206,6 +1206,7 @@ tempdir
testutil
th
tid
+timedwait
timestamp
timestamps
tmp
diff --git a/src/third_party/wiredtiger/dist/s_void b/src/third_party/wiredtiger/dist/s_void
index 4a0e73e1c0d..9c5f6711da0 100755
--- a/src/third_party/wiredtiger/dist/s_void
+++ b/src/third_party/wiredtiger/dist/s_void
@@ -70,6 +70,7 @@ func_ok()
-e '/int __wt_stat_connection_desc$/d' \
-e '/int __wt_stat_dsrc_desc$/d' \
-e '/int __wt_stat_join_desc$/d' \
+ -e '/int __wt_txn_rollback_required$/d' \
-e '/int __wt_win_directory_list_free$/d' \
-e '/int bdb_compare_reverse$/d' \
-e '/int copyout_val$/d' \
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index 9c828f66cef..6cd3f219b4a 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -272,6 +272,8 @@ connection_stats = [
CacheStat('cache_read_app_count', 'application threads page read from disk to cache count'),
CacheStat('cache_read_app_time', 'application threads page read from disk to cache time (usecs)'),
CacheStat('cache_read_lookaside', 'pages read into cache requiring lookaside entries'),
+ CacheStat('cache_read_lookaside_delay', 'pages read into cache with skipped lookaside entries needed later'),
+ CacheStat('cache_read_lookaside_skipped', 'pages read into cache skipping older lookaside entries'),
CacheStat('cache_read_overflow', 'overflow pages read into cache'),
CacheStat('cache_write', 'pages written from cache'),
CacheStat('cache_write_app_count', 'application threads page write from cache to disk count'),
diff --git a/src/third_party/wiredtiger/examples/c/ex_smoke.c b/src/third_party/wiredtiger/examples/c/ex_smoke.c
index 2091073c2f4..2647a706a54 100644
--- a/src/third_party/wiredtiger/examples/c/ex_smoke.c
+++ b/src/third_party/wiredtiger/examples/c/ex_smoke.c
@@ -45,7 +45,11 @@ main(int argc, char *argv[])
* This code deliberately doesn't use the standard test_util macros,
* we don't want to link against that code to smoke-test a build.
*/
- (void)system("rm -rf WT_HOME && mkdir WT_HOME");
+ if ((ret = system("rm -rf WT_HOME && mkdir WT_HOME")) != 0) {
+ fprintf(stderr,
+ "Failed to clean up prior to running example.\n");
+ return (EXIT_FAILURE);
+ }
/* Open a connection to the database, creating it if necessary. */
if ((ret = wiredtiger_open("WT_HOME", NULL, "create", &conn)) != 0) {
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 1800088ef2a..dc3e684a19f 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
{
- "commit": "9e5044823185feffa71e56a6593cfb92e0741a41",
+ "commit": "357efdd4ce279efc71ff618c59fe1b903ef80bb2",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
"branch": "mongodb-3.8"
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index a329e09a0c2..6e90447f18d 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -58,8 +58,10 @@ static inline bool
__cursor_page_pinned(WT_CURSOR_BTREE *cbt)
{
WT_CURSOR *cursor;
+ WT_SESSION_IMPL *session;
cursor = &cbt->iface;
+ session = (WT_SESSION_IMPL *)cursor->session;
/*
* Check the page active flag, asserting the page reference with any
@@ -86,6 +88,14 @@ __cursor_page_pinned(WT_CURSOR_BTREE *cbt)
if (cbt->ref->page->read_gen == WT_READGEN_OLDEST)
return (false);
+ /*
+ * If we are doing an update, we need a page with history. Release the
+ * page so we get it again with history if required.
+ */
+ if (F_ISSET(&session->txn, WT_TXN_UPDATE) &&
+ cbt->ref->state != WT_REF_MEM)
+ return (false);
+
return (true);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_delete.c b/src/third_party/wiredtiger/src/btree/bt_delete.c
index ed55491ab38..a728341e033 100644
--- a/src/third_party/wiredtiger/src/btree/bt_delete.c
+++ b/src/third_party/wiredtiger/src/btree/bt_delete.c
@@ -158,6 +158,7 @@ __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref)
for (sleep_count = yield_count = 0;;) {
switch (ref->state) {
case WT_REF_DISK:
+ case WT_REF_LIMBO:
case WT_REF_LOOKASIDE:
case WT_REF_READING:
WT_ASSERT(session, 0); /* Impossible, assert */
diff --git a/src/third_party/wiredtiger/src/btree/bt_discard.c b/src/third_party/wiredtiger/src/btree/bt_discard.c
index 2ba1c9734b9..66974c70e04 100644
--- a/src/third_party/wiredtiger/src/btree/bt_discard.c
+++ b/src/third_party/wiredtiger/src/btree/bt_discard.c
@@ -16,14 +16,13 @@ static void __free_skip_array(
WT_SESSION_IMPL *, WT_INSERT_HEAD **, uint32_t, bool);
static void __free_skip_list(WT_SESSION_IMPL *, WT_INSERT *, bool);
static void __free_update(WT_SESSION_IMPL *, WT_UPDATE **, uint32_t, bool);
-static void __page_out_int(WT_SESSION_IMPL *, WT_PAGE **, bool);
/*
- * __wt_ref_out_int --
+ * __wt_ref_out --
* Discard an in-memory page, freeing all memory associated with it.
*/
void
-__wt_ref_out_int(WT_SESSION_IMPL *session, WT_REF *ref, bool rewrite)
+__wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref)
{
/*
* A version of the page-out function that allows us to make additional
@@ -57,25 +56,15 @@ __wt_ref_out_int(WT_SESSION_IMPL *session, WT_REF *ref, bool rewrite)
}
#endif
- __page_out_int(session, &ref->page, rewrite);
+ __wt_page_out(session, &ref->page);
}
/*
- * __wt_ref_out --
+ * __wt_page_out --
* Discard an in-memory page, freeing all memory associated with it.
*/
void
-__wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref)
-{
- __wt_ref_out_int(session, ref, false);
-}
-
-/*
- * __page_out_int --
- * Discard an in-memory page, freeing all memory associated with it.
- */
-static void
-__page_out_int(WT_SESSION_IMPL *session, WT_PAGE **pagep, bool rewrite)
+__wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep)
{
WT_PAGE *page;
WT_PAGE_HEADER *dsk;
@@ -113,7 +102,7 @@ __page_out_int(WT_SESSION_IMPL *session, WT_PAGE **pagep, bool rewrite)
}
/* Update the cache's information. */
- __wt_cache_page_evict(session, page, rewrite);
+ __wt_cache_page_evict(session, page);
dsk = (WT_PAGE_HEADER *)page->dsk;
if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC))
@@ -158,16 +147,6 @@ __page_out_int(WT_SESSION_IMPL *session, WT_PAGE **pagep, bool rewrite)
}
/*
- * __wt_page_out --
- * Discard an in-memory page, freeing all memory associated with it.
- */
-void
-__wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep)
-{
- __page_out_int(session, pagep, false);
-}
-
-/*
* __free_page_modify --
* Discard the page's associated modification structures.
*/
@@ -310,20 +289,12 @@ __wt_free_ref(
break;
}
- /*
- * Free any address allocation; if there's no linked WT_REF page, it
- * must be allocated.
- */
+ /* Free any address allocation. */
__wt_ref_addr_free(session, ref);
- /*
- * Free any lookaside or page-deleted information. We only expect a
- * lookaside structure for lookaside references, but can see
- * page-deleted information in other cases (such as WT_REF_MEM).
- */
- if (ref->state == WT_REF_LOOKASIDE)
- __wt_free(session, ref->page_las);
- else if (ref->page_del != NULL) {
+ /* Free any lookaside or page-deleted information. */
+ __wt_free(session, ref->page_las);
+ if (ref->page_del != NULL) {
__wt_free(session, ref->page_del->update_list);
__wt_free(session, ref->page_del);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c
index 396c5f66539..840e4fa5d2e 100644
--- a/src/third_party/wiredtiger/src/btree/bt_handle.c
+++ b/src/third_party/wiredtiger/src/btree/bt_handle.c
@@ -807,7 +807,7 @@ __btree_page_sizes(WT_SESSION_IMPL *session)
btree->maxmempage = (uint64_t)cval.val;
if (!F_ISSET(conn, WT_CONN_CACHE_POOL) &&
(cache_size = conn->cache_size) > 0)
- btree->maxmempage = WT_MIN(btree->maxmempage,
+ btree->maxmempage = (uint64_t)WT_MIN(btree->maxmempage,
(conn->cache->eviction_dirty_trigger * cache_size) / 1000);
/* Enforce a lower bound of a single disk leaf page */
diff --git a/src/third_party/wiredtiger/src/btree/bt_io.c b/src/third_party/wiredtiger/src/btree/bt_io.c
index 3e636ad922d..007513fd581 100644
--- a/src/third_party/wiredtiger/src/btree/bt_io.c
+++ b/src/third_party/wiredtiger/src/btree/bt_io.c
@@ -368,7 +368,7 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf,
}
timer = !F_ISSET(session, WT_SESSION_INTERNAL);
if (timer)
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
/* Call the block manager to write the block. */
WT_ERR(checkpoint ?
@@ -378,10 +378,10 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf,
/* Update some statistics now that the write is done */
if (timer) {
- time_stop = __wt_rdtsc(session);
+ time_stop = __wt_clock(session);
WT_STAT_CONN_INCR(session, cache_write_app_count);
WT_STAT_CONN_INCRV(session, cache_write_app_time,
- WT_TSCDIFF_US(time_stop, time_start));
+ WT_CLOCKDIFF_US(time_stop, time_start));
}
WT_STAT_CONN_INCR(session, cache_write);
diff --git a/src/third_party/wiredtiger/src/btree/bt_random.c b/src/third_party/wiredtiger/src/btree/bt_random.c
index fd2a2ac7190..03b5039b00b 100644
--- a/src/third_party/wiredtiger/src/btree/bt_random.c
+++ b/src/third_party/wiredtiger/src/btree/bt_random.c
@@ -232,6 +232,7 @@ restart: /*
descent =
pindex->index[__wt_random(&session->rnd) % entries];
if (descent->state == WT_REF_DISK ||
+ descent->state == WT_REF_LIMBO ||
descent->state == WT_REF_LOOKASIDE ||
descent->state == WT_REF_MEM)
break;
@@ -240,6 +241,7 @@ restart: /*
for (i = 0; i < entries; ++i) {
descent = pindex->index[i];
if (descent->state == WT_REF_DISK ||
+ descent->state == WT_REF_LIMBO ||
descent->state == WT_REF_LOOKASIDE ||
descent->state == WT_REF_MEM)
break;
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index a98983746e4..afaf6c82aa5 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -8,9 +8,6 @@
#include "wt_internal.h"
-static void __btree_verbose_lookaside_read(
- WT_SESSION_IMPL *, uint32_t, uint64_t);
-
/*
* __col_instantiate --
* Update a column-store page entry based on a lookaside table update list.
@@ -70,6 +67,142 @@ __row_instantiate(WT_SESSION_IMPL *session,
}
/*
+ * __las_page_skip_locked --
+ * Check if we can skip reading a locked page with lookaside entries.
+ */
+static inline bool
+__las_page_skip_locked(WT_SESSION_IMPL *session, WT_REF *ref)
+{
+ WT_TXN *txn;
+
+ txn = &session->txn;
+
+ /*
+ * Skip lookaside pages if reading without a timestamp and all the
+ * updates in lookaside are in the past.
+ *
+ * Lookaside eviction preferentially chooses the newest updates when
+ * creating page images with no stable timestamp. If a stable timestamp
+ * has been set, we have to visit the page because eviction chooses old
+ * version of records in that case.
+ *
+ * One case where we may need to visit the page is if lookaside eviction
+ * is active in tree 2 when a checkpoint has started and is working its
+ * way through tree 1. In that case, lookaside may have created a page
+ * image with updates in the future of the checkpoint.
+ *
+ * We also need to instantiate a lookaside page if this is an update
+ * operation in progress.
+ */
+ if (ref->page_las->invalid)
+ return (false);
+
+ if (F_ISSET(txn, WT_TXN_UPDATE))
+ return (false);
+
+ if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT))
+ return (false);
+
+ if (WT_TXNID_LE(txn->snap_min, ref->page_las->las_max_txn))
+ return (false);
+
+ if (!F_ISSET(txn, WT_TXN_HAS_TS_READ) && ref->page_las->las_skew_newest)
+ return (true);
+
+#ifdef HAVE_TIMESTAMPS
+ /*
+ * Skip lookaside pages if reading as of a timestamp, we evicted new
+ * versions of data and all the updates are in the past.
+ */
+ if (F_ISSET(&session->txn, WT_TXN_HAS_TS_READ) &&
+ ref->page_las->las_skew_newest &&
+ __wt_timestamp_cmp(
+ &ref->page_las->onpage_timestamp, &session->txn.read_timestamp) < 0)
+ return (true);
+
+ /*
+ * Skip lookaside pages if reading as of a timestamp, we evicted old
+ * versions of data and all the updates are in the future.
+ */
+ if (F_ISSET(&session->txn, WT_TXN_HAS_TS_READ) &&
+ !ref->page_las->las_skew_newest &&
+ __wt_timestamp_cmp(
+ &ref->page_las->min_timestamp, &session->txn.read_timestamp) > 0)
+ return (true);
+#endif
+
+ return (false);
+}
+
+/*
+ * __las_page_skip --
+ * Check if we can skip reading a page with lookaside entries.
+ */
+static inline bool
+__las_page_skip(WT_SESSION_IMPL *session, WT_REF *ref)
+{
+ uint32_t previous_state;
+ bool skip;
+
+ if ((previous_state = ref->state) != WT_REF_LIMBO &&
+ previous_state != WT_REF_LOOKASIDE)
+ return (false);
+
+ if (!__wt_atomic_casv32(&ref->state, previous_state, WT_REF_LOCKED))
+ return (false);
+
+ skip = __las_page_skip_locked(session, ref);
+
+ /* Restore the state and push the change. */
+ ref->state = previous_state;
+ WT_FULL_BARRIER();
+
+ return (skip);
+}
+
+/*
+ * __las_page_instantiate_verbose --
+ * Create a verbose message to display at most once per checkpoint when
+ * performing a lookaside table read.
+ */
+static void
+__las_page_instantiate_verbose(WT_SESSION_IMPL *session, uint64_t las_pageid)
+{
+ WT_CACHE *cache;
+ uint64_t ckpt_gen_current, ckpt_gen_last;
+
+ if (!WT_VERBOSE_ISSET(session,
+ WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY))
+ return;
+
+ cache = S2C(session)->cache;
+ ckpt_gen_current = __wt_gen(session, WT_GEN_CHECKPOINT);
+ ckpt_gen_last = cache->las_verb_gen_read;
+
+ /*
+ * This message is throttled to one per checkpoint. To do this we
+ * track the generation of the last checkpoint for which the message
+ * was printed and check against the current checkpoint generation.
+ */
+ if (WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE) ||
+ ckpt_gen_current > ckpt_gen_last) {
+ /*
+ * Attempt to atomically replace the last checkpoint generation
+ * for which this message was printed. If the atomic swap fails
+ * we have raced and the winning thread will print the message.
+ */
+ if (__wt_atomic_casv64(&cache->las_verb_gen_read,
+ ckpt_gen_last, ckpt_gen_current)) {
+ __wt_verbose(session,
+ WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY,
+ "Read from lookaside file triggered for "
+ "file ID %" PRIu32 ", page ID %" PRIu64,
+ S2BT(session)->id, las_pageid);
+ }
+ }
+}
+
+/*
* __las_page_instantiate --
* Instantiate lookaside update records in a recently read page.
*/
@@ -97,6 +230,10 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id)
session_flags = 0; /* [-Werror=maybe-uninitialized] */
WT_CLEAR(las_key);
+ __las_page_instantiate_verbose(session, ref->page_las->las_pageid);
+ WT_STAT_CONN_INCR(session, cache_read_lookaside);
+ WT_STAT_DATA_INCR(session, cache_read_lookaside);
+
__wt_btcur_init(session, &cbt);
__wt_btcur_open(&cbt);
@@ -320,7 +457,7 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
WT_PAGE *page;
size_t addr_size;
uint64_t time_start, time_stop;
- uint32_t page_flags, new_state, previous_state;
+ uint32_t page_flags, final_state, new_state, previous_state;
const uint8_t *addr;
bool timer;
@@ -349,6 +486,7 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
new_state = WT_REF_READING;
break;
case WT_REF_DELETED:
+ case WT_REF_LIMBO:
case WT_REF_LOOKASIDE:
new_state = WT_REF_LOCKED;
break;
@@ -358,6 +496,20 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
if (!__wt_atomic_casv32(&ref->state, previous_state, new_state))
return (0);
+ final_state = WT_REF_MEM;
+
+ /*
+ * If we already have the page image, just instantiate the history.
+ *
+ * We need exclusive access because other threads could be reading the
+ * page without history and we can't change the state underneath them.
+ */
+ if (previous_state == WT_REF_LIMBO) {
+ if (__wt_hazard_check(session, ref) != NULL)
+ goto err;
+ goto skip_read;
+ }
+
/*
* Get the address: if there is no address, the page was deleted or had
* only lookaside entries, and a subsequent search or insert is forcing
@@ -380,66 +532,89 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
*/
timer = !F_ISSET(session, WT_SESSION_INTERNAL);
if (timer)
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
WT_ERR(__wt_bt_read(session, &tmp, addr, addr_size));
if (timer) {
- time_stop = __wt_rdtsc(session);
+ time_stop = __wt_clock(session);
WT_STAT_CONN_INCR(session, cache_read_app_count);
WT_STAT_CONN_INCRV(session, cache_read_app_time,
- WT_TSCDIFF_US(time_stop, time_start));
+ WT_CLOCKDIFF_US(time_stop, time_start));
}
/*
* Build the in-memory version of the page. Clear our local reference to
* the allocated copy of the disk image on return, the in-memory object
* steals it.
+ *
+ * If a page is read with eviction disabled, we don't count evicting it
+ * as progress. Since disabling eviction allows pages to be read even
+ * when the cache is full, we want to avoid workloads repeatedly reading
+ * a page with eviction disabled (e.g., a metadata page), then evicting
+ * that page and deciding that is a sign that eviction is unstuck.
*/
page_flags =
WT_DATA_IN_ITEM(&tmp) ? WT_PAGE_DISK_ALLOC : WT_PAGE_DISK_MAPPED;
if (LF_ISSET(WT_READ_IGNORE_CACHE_SIZE))
- FLD_SET(page_flags, WT_PAGE_READ_NO_EVICT);
+ FLD_SET(page_flags, WT_PAGE_EVICT_NO_PROGRESS);
WT_ERR(__wt_page_inmem(session, ref, tmp.data, page_flags, &page));
tmp.mem = NULL;
-skip_read:
+ /*
+ * The WT_REF lookaside state should match the page-header state of
+ * any page we read.
+ */
+ WT_ASSERT(session,
+ (previous_state != WT_REF_LIMBO &&
+ previous_state != WT_REF_LOOKASIDE) ||
+ ref->page->dsk == NULL ||
+ F_ISSET(ref->page->dsk, WT_PAGE_LAS_UPDATE));
+
/*
* If reading for a checkpoint, there's no additional work to do, the
* page on disk is correct as written.
*/
- if (session->dhandle->checkpoint != NULL)
+ if (session->dhandle->checkpoint != NULL) {
+ WT_ASSERT(session, previous_state == WT_REF_DISK);
goto done;
+ }
- /* If the page was deleted, instantiate that information. */
- if (previous_state == WT_REF_DELETED)
+skip_read:
+ switch (previous_state) {
+ case WT_REF_DELETED:
+ /* If the page was deleted, instantiate that information. */
WT_ERR(__wt_delete_page_instantiate(session, ref));
+ break;
+ case WT_REF_LOOKASIDE:
+ if (__las_page_skip_locked(session, ref)) {
+ WT_STAT_CONN_INCR(
+ session, cache_read_lookaside_skipped);
+ ref->page_las->eviction_to_lookaside = true;
+ final_state = WT_REF_LIMBO;
+ break;
+ }
+ /* FALLTHROUGH */
+ case WT_REF_LIMBO:
+ /* Instantiate updates from the database's lookaside table. */
+ if (previous_state == WT_REF_LIMBO)
+ WT_STAT_CONN_INCR(session, cache_read_lookaside_delay);
- /*
- * Instantiate updates from the database's lookaside table. The page
- * flag was set when the page was written, potentially a long time ago.
- * We only care if the lookaside table is currently active, check that
- * before doing any work.
- */
- if (previous_state == WT_REF_LOOKASIDE) {
- WT_ASSERT(session, (ref->page->dsk == NULL ||
- F_ISSET(ref->page->dsk, WT_PAGE_LAS_UPDATE)));
-
- __btree_verbose_lookaside_read(
- session, btree->id, ref->page_las->las_pageid);
- WT_STAT_CONN_INCR(session, cache_read_lookaside);
- WT_STAT_DATA_INCR(session, cache_read_lookaside);
WT_ERR(__las_page_instantiate(session, ref, btree->id));
/*
* The page is instantiated so we no longer need the lookaside
- * entries. Note that we are discarding updates so the page
- * must be marked available even if these operations fail.
+ * entries. Note we are discarding updates so the page must be
+ * marked available even if these operations fail.
+ *
+ * Don't free WT_REF.page_las, there may be concurrent readers.
*/
WT_TRET(__wt_las_remove_block(
session, NULL, btree->id, ref->page_las->las_pageid));
- __wt_free(session, ref->page_las);
+
+ ref->page_las->eviction_to_lookaside = false;
+ break;
}
-done: WT_PUBLISH(ref->state, WT_REF_MEM);
+done: WT_PUBLISH(ref->state, final_state);
return (ret);
err: /*
@@ -447,7 +622,7 @@ err: /*
* it discarded the page, but not the disk image. Discard the page
* and separately discard the disk image in all cases.
*/
- if (ref->page != NULL)
+ if (ref->page != NULL && previous_state != WT_REF_LIMBO)
__wt_ref_out(session, ref);
WT_PUBLISH(ref->state, previous_state);
@@ -457,74 +632,6 @@ err: /*
}
/*
- * __las_page_skip --
- * Check if we can skip reading a page with lookaside entries.
- */
-static inline bool
-__las_page_skip(WT_SESSION_IMPL *session, WT_REF *ref)
-{
- WT_TXN *txn;
- bool skip;
-
- txn = &session->txn;
- skip = false;
-
- if (!__wt_atomic_casv32(&ref->state, WT_REF_LOOKASIDE, WT_REF_LOCKED))
- return (false);
-
- /*
- * Skip lookaside pages if reading without a timestamp and all the
- * updates in lookaside are in the past.
- *
- * If we skip a lookaside page, the tree cannot be left clean:
- * lookaside entries must be resolved before the tree can be discarded.
- *
- * Lookaside eviction preferentially chooses the newest updates when
- * creating page image with no stable timestamp. If a stable timestamp
- * has been set, we have to visit the page because eviction chooses old
- * version of records in that case.
- *
- * One case where we may need to visit the page is if lookaside
- * eviction is active in tree 2 when a checkpoint has started and is
- * working its way through tree 1. In that case, lookaside may have
- * created a page image with updates in the future of the checkpoint.
- */
- if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT))
- goto done;
-
- if (WT_TXNID_LE(txn->snap_min, ref->page_las->las_max_txn))
- goto done;
-
- if (!F_ISSET(txn, WT_TXN_HAS_TS_READ) &&
- ref->page_las->las_skew_newest) {
- skip = true;
- goto done;
- }
-
-#ifdef HAVE_TIMESTAMPS
- /*
- * Skip lookaside pages if reading as of a timestamp and all the
- * updates are in the future.
- */
- WT_ASSERT(session,
- !F_ISSET(&session->txn, WT_TXN_HAS_TS_READ) ||
- __wt_timestamp_cmp(&ref->page_las->onpage_timestamp,
- &session->txn.read_timestamp) <= 0);
-
- if (F_ISSET(&session->txn, WT_TXN_HAS_TS_READ) &&
- !ref->page_las->las_skew_newest &&
- __wt_timestamp_cmp(
- &ref->page_las->min_timestamp, &session->txn.read_timestamp) > 0) {
- skip = true;
- goto done;
- }
-#endif
-
-done: WT_PUBLISH(ref->state, WT_REF_LOOKASIDE);
- return (skip);
-}
-
-/*
* __wt_page_in_func --
* Acquire a hazard pointer to a page; if the page is not in-memory,
* read it from the disk and build an in-memory version.
@@ -540,6 +647,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
WT_DECL_RET;
WT_PAGE *page;
uint64_t sleep_cnt, wait_cnt;
+ uint32_t current_state;
int force_attempts;
bool busy, cache_work, did_read, stalled, wont_need;
@@ -559,7 +667,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
for (did_read = wont_need = stalled = false,
force_attempts = 0, sleep_cnt = wait_cnt = 0;;) {
- switch (ref->state) {
+ switch (current_state = ref->state) {
case WT_REF_DELETED:
if (LF_ISSET(WT_READ_NO_EMPTY) &&
__wt_delete_page_skip(session, ref, false))
@@ -569,6 +677,12 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
if (LF_ISSET(WT_READ_CACHE)) {
if (!LF_ISSET(WT_READ_LOOKASIDE))
return (WT_NOTFOUND);
+ /*
+ * If we skip a lookaside page, the tree
+ * cannot be left clean: lookaside entries
+ * must be resolved before the tree can be
+ * discarded.
+ */
if (__las_page_skip(session, ref)) {
__wt_tree_modify_set(session);
return (WT_NOTFOUND);
@@ -628,6 +742,7 @@ read: /*
break;
case WT_REF_SPLIT:
return (WT_RESTART);
+ case WT_REF_LIMBO:
case WT_REF_MEM:
/*
* The page is in memory.
@@ -653,6 +768,22 @@ read: /*
WT_STAT_CONN_INCR(session, page_busy_blocked);
break;
}
+ /*
+ * If we are a limbo page check whether we need to
+ * instantiate the history. By having a hazard pointer
+ * we can use the locked version.
+ */
+ if (current_state == WT_REF_LIMBO &&
+ ((!LF_ISSET(WT_READ_CACHE) ||
+ LF_ISSET(WT_READ_LOOKASIDE)) &&
+ !__las_page_skip_locked(session, ref))) {
+ WT_RET(__wt_hazard_clear(session, ref));
+ goto read;
+ }
+ if (current_state == WT_REF_LIMBO &&
+ LF_ISSET(WT_READ_CACHE) &&
+ LF_ISSET(WT_READ_LOOKASIDE))
+ __wt_tree_modify_set(session);
/*
* Check if the page requires forced eviction.
@@ -767,46 +898,3 @@ skip_evict: /*
WT_STAT_CONN_INCRV(session, page_sleep, sleep_cnt);
}
}
-
-/*
- * __btree_verbose_lookaside_read --
- * Create a verbose message to display at most once per checkpoint when
- * performing a lookaside table read.
- */
-static void
-__btree_verbose_lookaside_read(
- WT_SESSION_IMPL *session, uint32_t las_id, uint64_t las_pageid)
-{
- WT_CACHE *cache;
- uint64_t ckpt_gen_current, ckpt_gen_last;
-
- if (!WT_VERBOSE_ISSET(session,
- WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY))
- return;
-
- cache = S2C(session)->cache;
- ckpt_gen_current = __wt_gen(session, WT_GEN_CHECKPOINT);
- ckpt_gen_last = cache->las_verb_gen_read;
-
- /*
- * This message is throttled to one per checkpoint. To do this we
- * track the generation of the last checkpoint for which the message
- * was printed and check against the current checkpoint generation.
- */
- if (WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE) ||
- ckpt_gen_current > ckpt_gen_last) {
- /*
- * Attempt to atomically replace the last checkpoint generation
- * for which this message was printed. If the atomic swap fails
- * we have raced and the winning thread will print the message.
- */
- if (__wt_atomic_casv64(&cache->las_verb_gen_read,
- ckpt_gen_last, ckpt_gen_current)) {
- __wt_verbose(session,
- WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY,
- "Read from lookaside file triggered for "
- "file ID %" PRIu32 ", page ID %" PRIu64,
- las_id, las_pageid);
- }
- }
-}
diff --git a/src/third_party/wiredtiger/src/btree/bt_ret.c b/src/third_party/wiredtiger/src/btree/bt_ret.c
index d1fc684e208..7d0da631e2b 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ret.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ret.c
@@ -213,27 +213,43 @@ __wt_value_return_upd(WT_SESSION_IMPL *session,
}
/*
- * If we hit the end of the chain, roll forward from the update item we
- * found, otherwise, from the original page's value.
+ * If there's no visible update and we skipped a birthmark, the base
+ * item is an empty item (in other words, birthmarks we can't read act
+ * as tombstones).
+ * If there's no visible update and we didn't skip a birthmark, the base
+ * item is the on-page item, which must be globally visible.
+ * If there's a visible update and it's a tombstone, the base item is an
+ * empty item.
+ * If there's a visible update and it's not a tombstone, the base item
+ * is the on-page item.
*/
- if (upd == NULL && !skipped_birthmark) {
- /*
- * Callers of this function set the cursor slot to an impossible
- * value to check we're not trying to return on-page values when
- * the update list should have been sufficient (which happens,
- * for example, if an update list was truncated, deleting some
- * standard update required by a previous modify update). Assert
- * the case.
- */
- WT_ASSERT(session, cbt->slot != UINT32_MAX);
+ if (upd == NULL) {
+ if (skipped_birthmark)
+ WT_ERR(__wt_buf_set(session, &cursor->value, "", 0));
+ else {
+ /*
+ * Callers of this function set the cursor slot to an
+ * impossible value to check we don't try and return
+ * on-page values when the update list should have been
+ * sufficient (which happens, for example, if an update
+ * list was truncated, deleting some standard update
+ * required by a previous modify update). Assert the
+ * case.
+ */
+ WT_ASSERT(session, cbt->slot != UINT32_MAX);
- WT_ERR(__value_return(session, cbt));
- } else if (upd->type == WT_UPDATE_TOMBSTONE || skipped_birthmark)
+ WT_ERR(__value_return(session, cbt));
+ }
+ } else if (upd->type == WT_UPDATE_TOMBSTONE)
WT_ERR(__wt_buf_set(session, &cursor->value, "", 0));
else
WT_ERR(__wt_buf_set(session,
&cursor->value, upd->data, upd->size));
+ /*
+ * Once we have a base item, roll forward through any visible modify
+ * updates.
+ */
while (i > 0)
WT_ERR(__wt_modify_apply(session, cursor, listp[--i]->data));
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index 2719481aa86..36bbe48b407 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -758,16 +758,6 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
*/
if (discard) {
/*
- * Page-delete information is only read when the WT_REF state is
- * WT_REF_DELETED. The page-delete memory wasn't added to the
- * parent's footprint, ignore it here.
- */
- if (ref->page_del != NULL) {
- __wt_free(session, ref->page_del->update_list);
- __wt_free(session, ref->page_del);
- }
-
- /*
* Set the discarded WT_REF state to split, ensuring we don't
* race with any discard of the WT_REF deleted fields.
*/
@@ -843,12 +833,18 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
}
/*
- * If this page was fast-truncated, any attached structure
- * should have been freed before now.
+ * The page-delete and lookaside memory weren't added to the
+ * parent's footprint, ignore it here.
*/
- WT_ASSERT(session, next_ref->page_del == NULL);
+ if (next_ref->page_del != NULL) {
+ __wt_free(session, next_ref->page_del->update_list);
+ __wt_free(session, next_ref->page_del);
+ }
+ __wt_free(session, next_ref->page_las);
+ /* Free the backing block and address. */
WT_TRET(__wt_ref_block_free(session, next_ref));
+
WT_TRET(__split_safe_free(
session, split_gen, exclusive, next_ref, sizeof(WT_REF)));
parent_decr += sizeof(WT_REF);
@@ -1574,7 +1570,7 @@ __split_multi_inmem_fail(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_REF *ref)
/*
* __wt_multi_to_ref --
- * Move a multi-block list into an array of WT_REF structures.
+ * Move a multi-block entry into a WT_REF structure.
*/
int
__wt_multi_to_ref(WT_SESSION_IMPL *session,
@@ -2261,9 +2257,13 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi)
*
* Pages with unresolved changes are not marked clean during
* reconciliation, do it now.
+ *
+ * Don't count this as eviction making progress, we did a one-for-one
+ * rewrite of a page in memory, typical in the case of cache pressure.
*/
__wt_page_modify_clear(session, page);
- __wt_ref_out_int(session, ref, true);
+ F_SET_ATOMIC(page, WT_PAGE_EVICT_NO_PROGRESS);
+ __wt_ref_out(session, ref);
/* Swap the new page into place. */
ref->page = new->page;
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index 423b569f0b7..8600c7d6555 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -138,7 +138,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
saved_pinned_id = WT_SESSION_TXN_STATE(session)->pinned_id;
timer = WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT);
if (timer)
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
switch (syncop) {
case WT_SYNC_WRITE_LEAVES:
@@ -330,7 +330,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
}
if (timer) {
- time_stop = __wt_rdtsc(session);
+ time_stop = __wt_clock(session);
__wt_verbose(session, WT_VERB_CHECKPOINT,
"__sync_file WT_SYNC_%s wrote: %" PRIu64
" leaf pages (%" PRIu64 "B), %" PRIu64
@@ -338,7 +338,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
syncop == WT_SYNC_WRITE_LEAVES ?
"WRITE_LEAVES" : "CHECKPOINT",
leaf_pages, leaf_bytes, internal_pages, internal_bytes,
- WT_TSCDIFF_MS(time_stop, time_start));
+ WT_CLOCKDIFF_MS(time_stop, time_start));
}
err: /* On error, clear any left-over tree walk. */
diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c
index 3d5e9a3540f..eef790d7459 100644
--- a/src/third_party/wiredtiger/src/btree/bt_walk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_walk.c
@@ -470,7 +470,8 @@ restart: /*
* fast-path some common cases.
*/
if (LF_ISSET(WT_READ_NO_WAIT) &&
- ref->state != WT_REF_MEM)
+ ref->state != WT_REF_MEM &&
+ ref->state != WT_REF_LIMBO)
break;
/* Skip lookaside pages if not requested. */
@@ -663,8 +664,8 @@ __wt_tree_walk_count(WT_SESSION_IMPL *session,
int
__wt_tree_walk_custom_skip(
WT_SESSION_IMPL *session, WT_REF **refp,
- int (*skip_func)(WT_SESSION_IMPL *, WT_REF *, void *, bool *),
- void *func_cookie, uint32_t flags)
+ int (*skip_func)(WT_SESSION_IMPL *, WT_REF *, void *, bool *),
+ void *func_cookie, uint32_t flags)
{
return (__tree_walk_internal(
session, refp, NULL, skip_func, func_cookie, flags));
diff --git a/src/third_party/wiredtiger/src/btree/col_modify.c b/src/third_party/wiredtiger/src/btree/col_modify.c
index 79fc06b7312..7270c49a9f5 100644
--- a/src/third_party/wiredtiger/src/btree/col_modify.c
+++ b/src/third_party/wiredtiger/src/btree/col_modify.c
@@ -67,6 +67,9 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
}
}
+ /* We're going to modify the page, we should have loaded history. */
+ WT_ASSERT(session, cbt->ref->state != WT_REF_LIMBO);
+
/* If we don't yet have a modify structure, we'll need one. */
WT_RET(__wt_page_modify_init(session, page));
mod = page->modify;
diff --git a/src/third_party/wiredtiger/src/btree/row_modify.c b/src/third_party/wiredtiger/src/btree/row_modify.c
index e3b9e492d78..8b1e4d78f54 100644
--- a/src/third_party/wiredtiger/src/btree/row_modify.c
+++ b/src/third_party/wiredtiger/src/btree/row_modify.c
@@ -61,6 +61,9 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
upd = upd_arg;
logged = false;
+ /* We're going to modify the page, we should have loaded history. */
+ WT_ASSERT(session, cbt->ref->state != WT_REF_LIMBO);
+
/* If we don't yet have a modify structure, we'll need one. */
WT_RET(__wt_page_modify_init(session, page));
mod = page->modify;
@@ -357,24 +360,3 @@ __wt_update_obsolete_check(
return (NULL);
}
-
-/*
- * __wt_update_obsolete_free --
- * Free an obsolete update list.
- */
-void
-__wt_update_obsolete_free(
- WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd)
-{
- WT_UPDATE *next;
- size_t size;
-
- /* Free a WT_UPDATE list. */
- for (size = 0; upd != NULL; upd = next) {
- next = upd->next;
- size += WT_UPDATE_MEMSIZE(upd);
- __wt_free(session, upd);
- }
- if (size != 0)
- __wt_cache_page_inmem_decr(session, page, size);
-}
diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c
index dc7ea1b7438..5fa46cb7fb2 100644
--- a/src/third_party/wiredtiger/src/cache/cache_las.c
+++ b/src/third_party/wiredtiger/src/cache/cache_las.c
@@ -341,7 +341,8 @@ __las_insert_block_verbose(WT_SESSION_IMPL *session, WT_MULTI *multi)
char hex_timestamp[9]; /* Enough for disabled string */
#endif
uint64_t ckpt_gen_current, ckpt_gen_last;
- uint32_t btree_id, pct_dirty, pct_full;
+ uint32_t btree_id;
+ double pct_dirty, pct_full;
btree_id = S2BT(session)->id;
@@ -379,8 +380,8 @@ __las_insert_block_verbose(WT_SESSION_IMPL *session, WT_MULTI *multi)
"file ID %" PRIu32 ", page ID %" PRIu64 ". "
"Max txn ID %" PRIu64 ", min timestamp %s, skewed %s. "
"Entries now in lookaside file: %" PRId64 ", "
- "cache dirty: %" PRIu32 "%% , "
- "cache use: %" PRIu32 "%%",
+ "cache dirty: %2.3f%% , "
+ "cache use: %2.3f%%",
btree_id, multi->page_las.las_pageid,
multi->page_las.las_max_txn,
hex_timestamp,
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index da11fa8c98a..a70b8f86648 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -149,16 +149,18 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
NULL, NULL,
confchk_wiredtiger_open_eviction_subconfigs, 2 },
{ "eviction_checkpoint_target", "int",
- NULL, "min=0,max=99",
+ NULL, "min=0,max=10TB",
NULL, 0 },
{ "eviction_dirty_target", "int",
- NULL, "min=1,max=99",
+ NULL, "min=1,max=10TB",
NULL, 0 },
{ "eviction_dirty_trigger", "int",
- NULL, "min=1,max=99",
+ NULL, "min=1,max=10TB",
+ NULL, 0 },
+ { "eviction_target", "int", NULL, "min=10,max=10TB", NULL, 0 },
+ { "eviction_trigger", "int",
+ NULL, "min=10,max=10TB",
NULL, 0 },
- { "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 },
- { "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 },
{ "file_manager", "category",
NULL, NULL,
confchk_wiredtiger_open_file_manager_subconfigs, 3 },
@@ -802,16 +804,18 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
NULL, NULL,
confchk_wiredtiger_open_eviction_subconfigs, 2 },
{ "eviction_checkpoint_target", "int",
- NULL, "min=0,max=99",
+ NULL, "min=0,max=10TB",
NULL, 0 },
{ "eviction_dirty_target", "int",
- NULL, "min=1,max=99",
+ NULL, "min=1,max=10TB",
NULL, 0 },
{ "eviction_dirty_trigger", "int",
- NULL, "min=1,max=99",
+ NULL, "min=1,max=10TB",
+ NULL, 0 },
+ { "eviction_target", "int", NULL, "min=10,max=10TB", NULL, 0 },
+ { "eviction_trigger", "int",
+ NULL, "min=10,max=10TB",
NULL, 0 },
- { "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 },
- { "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 },
{ "exclusive", "boolean", NULL, NULL, NULL, 0 },
{ "extensions", "list", NULL, NULL, NULL, 0 },
{ "file_extend", "list",
@@ -900,16 +904,18 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
NULL, NULL,
confchk_wiredtiger_open_eviction_subconfigs, 2 },
{ "eviction_checkpoint_target", "int",
- NULL, "min=0,max=99",
+ NULL, "min=0,max=10TB",
NULL, 0 },
{ "eviction_dirty_target", "int",
- NULL, "min=1,max=99",
+ NULL, "min=1,max=10TB",
NULL, 0 },
{ "eviction_dirty_trigger", "int",
- NULL, "min=1,max=99",
+ NULL, "min=1,max=10TB",
+ NULL, 0 },
+ { "eviction_target", "int", NULL, "min=10,max=10TB", NULL, 0 },
+ { "eviction_trigger", "int",
+ NULL, "min=10,max=10TB",
NULL, 0 },
- { "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 },
- { "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 },
{ "exclusive", "boolean", NULL, NULL, NULL, 0 },
{ "extensions", "list", NULL, NULL, NULL, 0 },
{ "file_extend", "list",
@@ -997,16 +1003,18 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
NULL, NULL,
confchk_wiredtiger_open_eviction_subconfigs, 2 },
{ "eviction_checkpoint_target", "int",
- NULL, "min=0,max=99",
+ NULL, "min=0,max=10TB",
NULL, 0 },
{ "eviction_dirty_target", "int",
- NULL, "min=1,max=99",
+ NULL, "min=1,max=10TB",
NULL, 0 },
{ "eviction_dirty_trigger", "int",
- NULL, "min=1,max=99",
+ NULL, "min=1,max=10TB",
+ NULL, 0 },
+ { "eviction_target", "int", NULL, "min=10,max=10TB", NULL, 0 },
+ { "eviction_trigger", "int",
+ NULL, "min=10,max=10TB",
NULL, 0 },
- { "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 },
- { "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 },
{ "extensions", "list", NULL, NULL, NULL, 0 },
{ "file_extend", "list",
NULL, "choices=[\"data\",\"log\"]",
@@ -1090,16 +1098,18 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
NULL, NULL,
confchk_wiredtiger_open_eviction_subconfigs, 2 },
{ "eviction_checkpoint_target", "int",
- NULL, "min=0,max=99",
+ NULL, "min=0,max=10TB",
NULL, 0 },
{ "eviction_dirty_target", "int",
- NULL, "min=1,max=99",
+ NULL, "min=1,max=10TB",
NULL, 0 },
{ "eviction_dirty_trigger", "int",
- NULL, "min=1,max=99",
+ NULL, "min=1,max=10TB",
+ NULL, 0 },
+ { "eviction_target", "int", NULL, "min=10,max=10TB", NULL, 0 },
+ { "eviction_trigger", "int",
+ NULL, "min=10,max=10TB",
NULL, 0 },
- { "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 },
- { "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 },
{ "extensions", "list", NULL, NULL, NULL, 0 },
{ "file_extend", "list",
NULL, "choices=[\"data\",\"log\"]",
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c
index 4f9f160ae3f..871190380f7 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache.c
@@ -9,6 +9,47 @@
#include "wt_internal.h"
/*
+ * __cache_config_abs_to_pct --
+ * Cache configuration values can be either a percentage or an absolute
+ * size, this function converts an absolute size to a percentage.
+ */
+static inline int
+__cache_config_abs_to_pct(WT_SESSION_IMPL *session,
+ double *param, const char *param_name, bool shared)
+{
+ WT_CONNECTION_IMPL *conn;
+ double input;
+
+ conn = S2C(session);
+
+ WT_ASSERT(session, param != NULL);
+ input = *param;
+
+ /*
+ * Anything above 100 is an absolute value; convert it to percentage.
+ */
+ if (input > 100.0) {
+ /*
+ * In a shared cache configuration the cache size changes
+ * regularly. Therefore, we require a percentage setting and do
+ * not allow an absolute size setting.
+ */
+ if (shared)
+ WT_RET_MSG(session, EINVAL,
+ "Shared cache configuration requires a percentage "
+ "value for %s", param_name);
+ /* An absolute value can't exceed the cache size. */
+ if (input > conn->cache_size)
+ WT_RET_MSG(session, EINVAL,
+ "%s should not exceed cache size", param_name);
+
+ *param = (input * 100.0) / (conn->cache_size);
+ }
+
+ return (0);
+}
+
+/*
* __cache_config_local --
* Configure the underlying cache.
*/
@@ -37,17 +78,26 @@ __cache_config_local(WT_SESSION_IMPL *session, bool shared, const char *cfg[])
cache->overhead_pct = (u_int)cval.val;
WT_RET(__wt_config_gets(session, cfg, "eviction_target", &cval));
- cache->eviction_target = (u_int)cval.val;
+ cache->eviction_target = (double)cval.val;
+ WT_RET(__cache_config_abs_to_pct(
+ session, &(cache->eviction_target), "eviction target", shared));
WT_RET(__wt_config_gets(session, cfg, "eviction_trigger", &cval));
- cache->eviction_trigger = (u_int)cval.val;
+ cache->eviction_trigger = (double)cval.val;
+ WT_RET(__cache_config_abs_to_pct(
+ session, &(cache->eviction_trigger), "eviction trigger", shared));
WT_RET(__wt_config_gets(
session, cfg, "eviction_checkpoint_target", &cval));
- cache->eviction_checkpoint_target = (u_int)cval.val;
+ cache->eviction_checkpoint_target = (double)cval.val;
+ WT_RET(__cache_config_abs_to_pct(session,
+ &(cache->eviction_checkpoint_target),
+ "eviction checkpoint target", shared));
WT_RET(__wt_config_gets(session, cfg, "eviction_dirty_target", &cval));
- cache->eviction_dirty_target = (u_int)cval.val;
+ cache->eviction_dirty_target = (double)cval.val;
+ WT_RET(__cache_config_abs_to_pct(session,
+ &(cache->eviction_dirty_target), "eviction dirty target", shared));
/*
* Don't allow the dirty target to be larger than the overall
@@ -66,7 +116,10 @@ __cache_config_local(WT_SESSION_IMPL *session, bool shared, const char *cfg[])
cache->eviction_dirty_target;
WT_RET(__wt_config_gets(session, cfg, "eviction_dirty_trigger", &cval));
- cache->eviction_dirty_trigger = (u_int)cval.val;
+ cache->eviction_dirty_trigger = (double)cval.val;
+ WT_RET(__cache_config_abs_to_pct(session,
+ &(cache->eviction_dirty_trigger), "eviction dirty trigger",
+ shared));
/*
* Don't allow the dirty trigger to be larger than the overall
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
index eefb50902f4..720df3c465d 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
@@ -572,8 +572,8 @@ __cache_pool_adjust(WT_SESSION_IMPL *session,
WT_CACHE *cache;
WT_CACHE_POOL *cp;
WT_CONNECTION_IMPL *entry;
+ double pct_full;
uint64_t adjustment, highest_percentile, pressure, reserved, smallest;
- u_int pct_full;
bool busy, decrease_ok, grow, pool_full;
*adjustedp = false;
@@ -581,7 +581,7 @@ __cache_pool_adjust(WT_SESSION_IMPL *session,
cp = __wt_process.cache_pool;
grow = false;
pool_full = cp->currently_used >= cp->size;
- pct_full = 0;
+ pct_full = 0.0;
/* Highest as a percentage, avoid 0 */
highest_percentile = (highest / 100) + 1;
@@ -613,7 +613,7 @@ __cache_pool_adjust(WT_SESSION_IMPL *session,
entry->default_session, false, true, &pct_full);
__wt_verbose(session, WT_VERB_SHARED_CACHE,
- "\t%5" PRIu64 ", %3" PRIu64 ", %2" PRIu32 ", %d, %2u",
+ "\t%5" PRIu64 ", %3" PRIu64 ", %2" PRIu32 ", %d, %2.3f",
entry->cache_size >> 20, pressure, cache->cp_skip_count,
busy, pct_full);
@@ -676,8 +676,9 @@ __cache_pool_adjust(WT_SESSION_IMPL *session,
* potentially a negative feedback loop in the
* balance algorithm.
*/
- smallest = (100 * __wt_cache_bytes_inuse(cache)) /
- cache->eviction_trigger;
+ smallest =
+ (uint64_t)((100 * __wt_cache_bytes_inuse(cache)) /
+ cache->eviction_trigger);
if (entry->cache_size > smallest)
adjustment = WT_MIN(cp->chunk,
(entry->cache_size - smallest) / 2);
diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c
index 7dea0a3fe4b..9097e10ef5a 100644
--- a/src/third_party/wiredtiger/src/conn/conn_log.c
+++ b/src/third_party/wiredtiger/src/conn/conn_log.c
@@ -949,11 +949,11 @@ __log_server(void *arg)
}
/* Wait until the next event. */
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
__wt_cond_auto_wait_signal(
session, conn->log_cond, did_work, NULL, &signalled);
- time_stop = __wt_rdtsc(session);
- timediff = WT_TSCDIFF_MS(time_stop, time_start);
+ time_stop = __wt_clock(session);
+ timediff = WT_CLOCKDIFF_MS(time_stop, time_start);
}
if (0) {
diff --git a/src/third_party/wiredtiger/src/conn/conn_stat.c b/src/third_party/wiredtiger/src/conn/conn_stat.c
index e27de84254e..14a1570c138 100644
--- a/src/third_party/wiredtiger/src/conn/conn_stat.c
+++ b/src/third_party/wiredtiger/src/conn/conn_stat.c
@@ -503,7 +503,6 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp)
struct timespec ts;
struct tm *tm, _tm;
WT_CONNECTION_IMPL *conn;
- WT_FSTREAM *log_stream;
conn = S2C(session);
@@ -516,17 +515,16 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp)
WT_RET_MSG(session, ENOMEM, "strftime path conversion");
/* If the path has changed, cycle the log file. */
- if ((log_stream = conn->stat_fs) == NULL ||
+ if (conn->stat_fs == NULL ||
path == NULL || strcmp(tmp->mem, path->mem) != 0) {
WT_RET(__wt_fclose(session, &conn->stat_fs));
- if (path != NULL)
- WT_RET(
- __wt_buf_set(session, path, tmp->data, tmp->size));
WT_RET(__wt_fopen(session, tmp->mem,
WT_FS_OPEN_CREATE | WT_FS_OPEN_FIXED, WT_STREAM_APPEND,
- &log_stream));
+ &conn->stat_fs));
+
+ if (path != NULL)
+ WT_RET(__wt_buf_setstr(session, path, tmp->mem));
}
- conn->stat_fs = log_stream;
/* Create the entry prefix for this time of day. */
if (strftime(tmp->mem, tmp->memsize, conn->stat_format, tm) == 0)
@@ -583,6 +581,7 @@ __statlog_on_close(WT_SESSION_IMPL *session)
"Attempt to log statistics while a server is running");
WT_RET(__wt_scr_alloc(session, strlen(conn->stat_path) + 128, &tmp));
+ WT_ERR(__wt_buf_setstr(session, tmp, ""));
WT_ERR(__statlog_log_one(session, NULL, tmp));
err: __wt_scr_free(session, &tmp);
@@ -614,9 +613,6 @@ __statlog_server(void *arg)
session = arg;
conn = S2C(session);
- WT_CLEAR(path);
- WT_CLEAR(tmp);
-
/*
* We need a temporary place to build a path and an entry prefix.
* The length of the path plus 128 should be more than enough.
@@ -624,8 +620,12 @@ __statlog_server(void *arg)
* We also need a place to store the current path, because that's
* how we know when to close/re-open the file.
*/
+ WT_CLEAR(path);
WT_ERR(__wt_buf_init(session, &path, strlen(conn->stat_path) + 128));
+ WT_ERR(__wt_buf_setstr(session, &path, ""));
+ WT_CLEAR(tmp);
WT_ERR(__wt_buf_init(session, &tmp, strlen(conn->stat_path) + 128));
+ WT_ERR(__wt_buf_setstr(session, &tmp, ""));
for (;;) {
/* Wait until the next event. */
diff --git a/src/third_party/wiredtiger/src/cursor/cur_file.c b/src/third_party/wiredtiger/src/cursor/cur_file.c
index 5f68ad3883e..9d6f031807f 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_file.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_file.c
@@ -196,11 +196,11 @@ __curfile_search(WT_CURSOR *cursor)
CURSOR_API_CALL(cursor, session, search, cbt->btree);
WT_ERR(__cursor_checkkey(cursor));
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
WT_ERR(__wt_btcur_search(cbt));
- time_stop = __wt_rdtsc(session);
+ time_stop = __wt_clock(session);
__wt_stat_usecs_hist_incr_opread(session,
- WT_TSCDIFF_US(time_stop, time_start));
+ WT_CLOCKDIFF_US(time_stop, time_start));
/* Search maintains a position, key and value. */
WT_ASSERT(session,
@@ -227,11 +227,11 @@ __curfile_search_near(WT_CURSOR *cursor, int *exact)
CURSOR_API_CALL(cursor, session, search_near, cbt->btree);
WT_ERR(__cursor_checkkey(cursor));
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
WT_ERR(__wt_btcur_search_near(cbt, exact));
- time_stop = __wt_rdtsc(session);
+ time_stop = __wt_clock(session);
__wt_stat_usecs_hist_incr_opread(session,
- WT_TSCDIFF_US(time_stop, time_start));
+ WT_CLOCKDIFF_US(time_stop, time_start));
/* Search-near maintains a position, key and value. */
WT_ASSERT(session,
@@ -261,11 +261,11 @@ __curfile_insert(WT_CURSOR *cursor)
WT_ERR(__cursor_checkkey(cursor));
WT_ERR(__cursor_checkvalue(cursor));
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
WT_ERR(__wt_btcur_insert(cbt));
- time_stop = __wt_rdtsc(session);
+ time_stop = __wt_clock(session);
__wt_stat_usecs_hist_incr_opwrite(session,
- WT_TSCDIFF_US(time_stop, time_start));
+ WT_CLOCKDIFF_US(time_stop, time_start));
/*
* Insert maintains no position, key or value (except for column-store
@@ -362,11 +362,11 @@ __curfile_update(WT_CURSOR *cursor)
WT_ERR(__cursor_checkkey(cursor));
WT_ERR(__cursor_checkvalue(cursor));
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
WT_ERR(__wt_btcur_update(cbt));
- time_stop = __wt_rdtsc(session);
+ time_stop = __wt_clock(session);
__wt_stat_usecs_hist_incr_opwrite(session,
- WT_TSCDIFF_US(time_stop, time_start));
+ WT_CLOCKDIFF_US(time_stop, time_start));
/* Update maintains a position, key and value. */
WT_ASSERT(session,
@@ -394,11 +394,11 @@ __curfile_remove(WT_CURSOR *cursor)
CURSOR_REMOVE_API_CALL(cursor, session, cbt->btree);
WT_ERR(__cursor_checkkey(cursor));
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
WT_ERR(__wt_btcur_remove(cbt));
- time_stop = __wt_rdtsc(session);
+ time_stop = __wt_clock(session);
__wt_stat_usecs_hist_incr_opwrite(session,
- WT_TSCDIFF_US(time_stop, time_start));
+ WT_CLOCKDIFF_US(time_stop, time_start));
/*
* Remove with a search-key is fire-and-forget, no position and no key.
diff --git a/src/third_party/wiredtiger/src/docs/top/main.dox b/src/third_party/wiredtiger/src/docs/top/main.dox
index 1bfb623c0a0..e4de22ff042 100644
--- a/src/third_party/wiredtiger/src/docs/top/main.dox
+++ b/src/third_party/wiredtiger/src/docs/top/main.dox
@@ -6,12 +6,12 @@ WiredTiger is an high performance, scalable, production quality, NoSQL,
@section releases Releases
<table>
-@row{<b>WiredTiger 2.9.3</b> (current),
+@row{<b>WiredTiger 3.0.0</b> (current),
+ <a href="releases/wiredtiger-3.0.0.tar.bz2"><b>[Release package]</b></a>,
+ <a href="3.0.0/index.html"><b>[Documentation]</b></a>}
+@row{<b>WiredTiger 2.9.3</b> (previous),
<a href="releases/wiredtiger-2.9.3.tar.bz2"><b>[Release package]</b></a>,
<a href="2.9.3/index.html"><b>[Documentation]</b></a>}
-@row{<b>WiredTiger 2.9.2</b> (previous),
- <a href="releases/wiredtiger-2.9.2.tar.bz2"><b>[Release package]</b></a>,
- <a href="2.9.2/index.html"><b>[Documentation]</b></a>}
@row{<b>Development branch</b>,
<a href="https://github.com/wiredtiger/wiredtiger"><b>[Source code]</b></a>,
<a href="develop/index.html"><b>[Documentation]</b></a>}
diff --git a/src/third_party/wiredtiger/src/docs/upgrading.dox b/src/third_party/wiredtiger/src/docs/upgrading.dox
index 09cafbf480d..2e4990e8a33 100644
--- a/src/third_party/wiredtiger/src/docs/upgrading.dox
+++ b/src/third_party/wiredtiger/src/docs/upgrading.dox
@@ -28,6 +28,16 @@ The performance visualization tool \c wtstats has been removed and is
no longer supported.
</dd>
+<dt>::wiredtiger_open cache configuration changes</dt>
+<dd>
+The cache configuration options \c eviction_checkpoint_target, \c
+eviction_dirty_target, \c eviction_dirty_trigger, \c eviction_target and \c
+eviction_trigger have changed. The options can now take absolute size. It would
+be a percentage of the cache size if the value is within the range of 0 to 100
+or an absolute size when greater than 100. This API change is compatible with
+existing usage.
+</dd>
+
</dl><hr>
@section version_292 Upgrading to Version 2.9.2
<dl>
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 9e46f24ca7f..39c84764070 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -557,11 +557,17 @@ __evict_update_work(WT_SESSION_IMPL *session)
WT_BTREE *las_tree;
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
+ double dirty_target, dirty_trigger, target, trigger;
uint64_t bytes_inuse, bytes_max, dirty_inuse;
conn = S2C(session);
cache = conn->cache;
+ dirty_target = cache->eviction_dirty_target;
+ dirty_trigger = cache->eviction_dirty_trigger;
+ target = cache->eviction_target;
+ trigger = cache->eviction_trigger;
+
/* Clear previous state. */
cache->flags = 0;
@@ -589,13 +595,13 @@ __evict_update_work(WT_SESSION_IMPL *session)
bytes_inuse = __wt_cache_bytes_inuse(cache);
if (__wt_eviction_clean_needed(session, NULL))
F_SET(cache, WT_CACHE_EVICT_CLEAN | WT_CACHE_EVICT_CLEAN_HARD);
- else if (bytes_inuse > (cache->eviction_target * bytes_max) / 100)
+ else if (bytes_inuse > (target * bytes_max) / 100)
F_SET(cache, WT_CACHE_EVICT_CLEAN);
dirty_inuse = __wt_cache_dirty_leaf_inuse(cache);
if (__wt_eviction_dirty_needed(session, NULL))
F_SET(cache, WT_CACHE_EVICT_DIRTY | WT_CACHE_EVICT_DIRTY_HARD);
- else if (dirty_inuse > (cache->eviction_dirty_target * bytes_max) / 100)
+ else if (dirty_inuse > (uint64_t)(dirty_target * bytes_max) / 100)
F_SET(cache, WT_CACHE_EVICT_DIRTY);
/*
@@ -610,10 +616,9 @@ __evict_update_work(WT_SESSION_IMPL *session)
* Scrub dirty pages and keep them in cache if we are less than half
* way to the clean or dirty trigger.
*/
- if (bytes_inuse < ((cache->eviction_target + cache->eviction_trigger) *
- bytes_max) / 200 && dirty_inuse < (uint64_t)
- ((cache->eviction_dirty_target + cache->eviction_dirty_trigger) *
- bytes_max) / 200)
+ if (bytes_inuse < (uint64_t)((target + trigger) * bytes_max) / 200 &&
+ dirty_inuse <
+ (uint64_t)((dirty_target + dirty_trigger) * bytes_max) / 200)
F_SET(cache, WT_CACHE_EVICT_SCRUB);
/*
@@ -626,9 +631,8 @@ __evict_update_work(WT_SESSION_IMPL *session)
if (!F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE) &&
(__wt_cache_stuck(session) ||
(__wt_cache_lookaside_score(cache) > 80 &&
- dirty_inuse > (uint64_t)
- ((cache->eviction_dirty_target + cache->eviction_dirty_trigger) *
- bytes_max) / 200)))
+ dirty_inuse >
+ (uint64_t)((dirty_target + dirty_trigger) * bytes_max) / 200)))
F_SET(cache, WT_CACHE_EVICT_LOOKASIDE);
/*
@@ -671,7 +675,7 @@ __evict_pass(WT_SESSION_IMPL *session)
/* Evict pages from the cache. */
for (loop = 0; cache->pass_intr == 0; loop++) {
- time_now = __wt_rdtsc(session);
+ time_now = __wt_clock(session);
if (loop == 0)
time_prev = time_now;
@@ -741,7 +745,7 @@ __evict_pass(WT_SESSION_IMPL *session)
* transactions and writing updates to the lookaside table.
*/
if (eviction_progress == cache->eviction_progress) {
- if (WT_TSCDIFF_MS(time_now, time_prev) >= 20 &&
+ if (WT_CLOCKDIFF_MS(time_now, time_prev) >= 20 &&
F_ISSET(cache, WT_CACHE_EVICT_CLEAN_HARD |
WT_CACHE_EVICT_DIRTY_HARD)) {
if (cache->evict_aggressive_score < 100)
@@ -2049,12 +2053,13 @@ fast: /* If the page can't be evicted, give up. */
* point keeping a page pinned, since it may be the only candidate in
* an idle tree.
*
- * If we land on a page requiring forced eviction, move on to the next
- * page: we want this page evicted as quickly as possible.
+ * If we land on a page requiring forced eviction, or that isn't an
+ * ordinary in-memory page (e.g., WT_REF_LIMBO), move until we find an
+ * ordinary page: we should not prevent exclusive access to the page
+ * until the next walk.
*/
if (ref != NULL) {
if (__wt_ref_is_root(ref) || evict == start || give_up ||
- WT_READGEN_EVICT_SOON(ref->page->read_gen) ||
ref->page->memory_footprint >= btree->splitmempage) {
if (restarts == 0)
WT_STAT_CONN_INCR(
@@ -2062,9 +2067,11 @@ fast: /* If the page can't be evicted, give up. */
WT_RET(__wt_page_release(
cache->walk_session, ref, walk_flags));
ref = NULL;
- } else if (WT_READGEN_EVICT_SOON(ref->page->read_gen))
- WT_RET_NOTFOUND_OK(__wt_tree_walk_count(
- session, &ref, &refs_walked, walk_flags));
+ } else
+ while (ref != NULL && (ref->state != WT_REF_MEM ||
+ WT_READGEN_EVICT_SOON(ref->page->read_gen)))
+ WT_RET_NOTFOUND_OK(__wt_tree_walk_count(
+ session, &ref, &refs_walked, walk_flags));
btree->evict_ref = ref;
}
@@ -2088,7 +2095,7 @@ __evict_get_ref(
WT_CACHE *cache;
WT_EVICT_ENTRY *evict;
WT_EVICT_QUEUE *queue, *other_queue, *urgent_queue;
- uint32_t candidates;
+ uint32_t candidates, previous_state;
bool is_app, server_only, urgent_ok;
*btreep = NULL;
@@ -2213,8 +2220,10 @@ __evict_get_ref(
* multiple attempts to evict it. For pages that are already
* being evicted, this operation will fail and we will move on.
*/
- if (!__wt_atomic_casv32(
- &evict->ref->state, WT_REF_MEM, WT_REF_LOCKED)) {
+ if (((previous_state = evict->ref->state) != WT_REF_MEM &&
+ previous_state != WT_REF_LIMBO) ||
+ !__wt_atomic_casv32(
+ &evict->ref->state, previous_state, WT_REF_LOCKED)) {
__evict_list_clear(session, evict);
continue;
}
@@ -2289,7 +2298,7 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
cache->app_evicts++;
if (WT_STAT_ENABLED(session)) {
app_timer = true;
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
}
}
@@ -2309,10 +2318,10 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
(void)__wt_atomic_subv32(&btree->evict_busy, 1);
if (app_timer) {
- time_stop = __wt_rdtsc(session);
+ time_stop = __wt_clock(session);
WT_STAT_CONN_INCRV(session,
application_evict_time,
- WT_TSCDIFF_US(time_stop, time_start));
+ WT_CLOCKDIFF_US(time_stop, time_start));
}
WT_TRACK_OP_END(session);
return (ret);
@@ -2325,7 +2334,7 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
*/
int
__wt_cache_eviction_worker(
- WT_SESSION_IMPL *session, bool busy, bool readonly, u_int pct_full)
+ WT_SESSION_IMPL *session, bool busy, bool readonly, double pct_full)
{
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
@@ -2348,7 +2357,7 @@ __wt_cache_eviction_worker(
* It is not safe to proceed if the eviction server threads aren't
* setup yet.
*/
- if (!conn->evict_server_running || (busy && pct_full < 100))
+ if (!conn->evict_server_running || (busy && pct_full < 100.0))
goto done;
/* Wake the eviction server if we need to do work. */
@@ -2358,7 +2367,7 @@ __wt_cache_eviction_worker(
timer =
WT_STAT_ENABLED(session) && !F_ISSET(session, WT_SESSION_INTERNAL);
if (timer)
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
for (initial_progress = cache->eviction_progress;; ret = 0) {
/*
@@ -2370,7 +2379,8 @@ __wt_cache_eviction_worker(
if (__wt_cache_stuck(session) && __wt_txn_am_oldest(session)) {
--cache->evict_aggressive_score;
WT_STAT_CONN_INCR(session, txn_fail_cache);
- WT_ERR(WT_ROLLBACK);
+ WT_ERR(__wt_txn_rollback_required(session,
+ "oldest transaction rolled back for eviction"));
}
/*
@@ -2389,7 +2399,7 @@ __wt_cache_eviction_worker(
/* See if eviction is still needed. */
if (!__wt_eviction_needed(session, busy, readonly, &pct_full) ||
- ((pct_full < 100 || cache->eviction_scrub_limit > 0.0) &&
+ ((pct_full < 100.0 || cache->eviction_scrub_limit > 0.0) &&
(cache->eviction_progress >
initial_progress + max_progress)))
break;
@@ -2425,10 +2435,10 @@ __wt_cache_eviction_worker(
}
err: if (timer) {
- time_stop = __wt_rdtsc(session);
+ time_stop = __wt_clock(session);
WT_STAT_CONN_INCRV(session,
application_cache_time,
- WT_TSCDIFF_US(time_stop, time_start));
+ WT_CLOCKDIFF_US(time_stop, time_start));
}
done: WT_TRACK_OP_END(session);
@@ -2573,7 +2583,9 @@ __verbose_dump_cache_single(WT_SESSION_IMPL *session,
dhandle->checkpoint != NULL ? dhandle->checkpoint : "<live>",
btree->evict_disabled != 0 ? "eviction disabled" : "",
btree->evict_disabled_open ? " at open" : ""));
- if (intl_pages != 0)
+ if (intl_pages == 0)
+ WT_RET(__wt_msg(session, "internal: 0 pages"));
+ else
WT_RET(__wt_msg(session,
"internal: "
"%" PRIu64 " pages, "
@@ -2590,7 +2602,9 @@ __verbose_dump_cache_single(WT_SESSION_IMPL *session,
intl_dirty_bytes / WT_MEGABYTE,
intl_bytes_max / WT_MEGABYTE,
intl_dirty_bytes_max / WT_MEGABYTE));
- if (leaf_pages != 0)
+ if (leaf_pages == 0)
+ WT_RET(__wt_msg(session, "leaf: 0 pages"));
+ else
WT_RET(__wt_msg(session,
"leaf: "
"%" PRIu64 " pages, "
@@ -2624,13 +2638,13 @@ __wt_verbose_dump_cache(WT_SESSION_IMPL *session)
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
+ double pct;
uint64_t total_bytes, total_dirty_bytes;
- u_int pct;
bool needed;
conn = S2C(session);
total_bytes = total_dirty_bytes = 0;
- pct = 0; /* [-Werror=uninitialized] */
+ pct = 0.0; /* [-Werror=uninitialized] */
WT_RET(__wt_msg(session, "%s", WT_DIVIDER));
WT_RET(__wt_msg(session, "cache dump"));
@@ -2639,10 +2653,10 @@ __wt_verbose_dump_cache(WT_SESSION_IMPL *session)
"cache full: %s", __wt_cache_full(session) ? "yes" : "no"));
needed = __wt_eviction_clean_needed(session, &pct);
WT_RET(__wt_msg(session,
- "cache clean check: %s (%u%%)", needed ? "yes" : "no", pct));
+ "cache clean check: %s (%2.3f%%)", needed ? "yes" : "no", pct));
needed = __wt_eviction_dirty_needed(session, &pct);
WT_RET(__wt_msg(session,
- "cache dirty check: %s (%u%%)", needed ? "yes" : "no", pct));
+ "cache dirty check: %s (%2.3f%%)", needed ? "yes" : "no", pct));
for (dhandle = NULL;;) {
WT_WITH_HANDLE_LIST_READ_LOCK(session,
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index 70c5d6d02da..0ff314f3484 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -60,7 +60,7 @@ __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref)
btree = S2BT(session);
page = ref->page;
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
/*
* Take some care with order of operations: if we release the hazard
@@ -83,12 +83,12 @@ __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref)
* we have one of two pairs of stats to increment.
*/
ret = __wt_evict(session, ref, false);
- time_stop = __wt_rdtsc(session);
+ time_stop = __wt_clock(session);
if (ret == 0) {
if (too_big) {
WT_STAT_CONN_INCR(session, cache_eviction_force);
WT_STAT_CONN_INCRV(session, cache_eviction_force_time,
- WT_TSCDIFF_US(time_stop, time_start));
+ WT_CLOCKDIFF_US(time_stop, time_start));
} else {
/*
* If the page isn't too big, we are evicting it because
@@ -98,12 +98,12 @@ __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref)
WT_STAT_CONN_INCR(session, cache_eviction_force_delete);
WT_STAT_CONN_INCRV(session,
cache_eviction_force_delete_time,
- WT_TSCDIFF_US(time_stop, time_start));
+ WT_CLOCKDIFF_US(time_stop, time_start));
}
} else {
WT_STAT_CONN_INCR(session, cache_eviction_force_fail);
WT_STAT_CONN_INCRV(session, cache_eviction_force_fail_time,
- WT_TSCDIFF_US(time_stop, time_start));
+ WT_CLOCKDIFF_US(time_stop, time_start));
}
(void)__wt_atomic_subv32(&btree->evict_busy, 1);
@@ -268,9 +268,16 @@ __evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
* Discard the page and update the reference structure; if the page has
* an address, it's a disk page; if it has no address, it's a deleted
* page re-instantiated (for example, by searching) and never written.
+ *
+ * If evicting a WT_REF_LIMBO reference, we get to here and transition
+ * back to WT_REF_LOOKASIDE.
*/
__wt_ref_out(session, ref);
- if (ref->addr == NULL) {
+ if (!closing && ref->page_las != NULL &&
+ ref->page_las->eviction_to_lookaside) {
+ ref->page_las->eviction_to_lookaside = false;
+ WT_PUBLISH(ref->state, WT_REF_LOOKASIDE);
+ } else if (ref->addr == NULL) {
WT_WITH_PAGE_INDEX(session,
ret = __evict_delete_ref(session, ref, closing));
WT_RET_BUSY_OK(ret);
@@ -361,6 +368,7 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
* Eviction wants to keep this page if we have a disk image,
* re-instantiate the page in memory, else discard the page.
*/
+ __wt_free(session, ref->page_las);
if (mod->mod_disk_image == NULL) {
if (mod->mod_page_las.las_pageid != 0) {
WT_RET(
diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h
index a4416f139a0..847d6c5ee01 100644
--- a/src/third_party/wiredtiger/src/include/api.h
+++ b/src/third_party/wiredtiger/src/include/api.h
@@ -81,25 +81,33 @@
/* An API call wrapped in a transaction if necessary. */
#define TXN_API_CALL(s, h, n, bt, config, cfg) do { \
- bool __autotxn = false; \
+ bool __autotxn = false, __update = false; \
API_CALL(s, h, n, bt, config, cfg); \
__wt_txn_timestamp_flags(s); \
__autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\
if (__autotxn) \
- F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT)
+ F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT); \
+ __update = !F_ISSET(&(s)->txn, WT_TXN_UPDATE); \
+ if (__update) \
+ F_SET(&(s)->txn, WT_TXN_UPDATE); \
/* An API call wrapped in a transaction if necessary. */
#define TXN_API_CALL_NOCONF(s, h, n, dh) do { \
- bool __autotxn = false; \
+ bool __autotxn = false, __update = false; \
API_CALL_NOCONF(s, h, n, dh); \
__wt_txn_timestamp_flags(s); \
__autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\
if (__autotxn) \
- F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT)
+ F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT); \
+ __update = !F_ISSET(&(s)->txn, WT_TXN_UPDATE); \
+ if (__update) \
+ F_SET(&(s)->txn, WT_TXN_UPDATE); \
/* End a transactional API call, optional retry on deadlock. */
#define TXN_API_END_RETRY(s, ret, retry) \
API_END(s, ret); \
+ if (__update) \
+ F_CLR(&(s)->txn, WT_TXN_UPDATE); \
if (__autotxn) { \
if (F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT)) \
F_CLR(&(s)->txn, WT_TXN_AUTOCOMMIT); \
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 39aac8730c4..7fbf27a1fff 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -232,12 +232,14 @@ struct __wt_ovfl_reuse {
* Related information for on-disk pages with lookaside entries.
*/
struct __wt_page_lookaside {
- uint64_t las_pageid; /* Page ID in lookaside */
- uint64_t las_max_txn; /* Maximum transaction ID in
- lookaside */
- WT_DECL_TIMESTAMP(min_timestamp) /* Min timestamp in lookaside */
- WT_DECL_TIMESTAMP(onpage_timestamp) /* Max timestamp on page */
- bool las_skew_newest; /* On-page skewed to newest */
+ uint64_t las_pageid; /* Page ID in lookaside */
+ uint64_t las_max_txn; /* Max transaction ID in lookaside */
+ WT_DECL_TIMESTAMP(min_timestamp)/* Min timestamp in lookaside */
+ /* Max timestamp on page */
+ WT_DECL_TIMESTAMP(onpage_timestamp)
+ bool eviction_to_lookaside; /* Revert to lookaside on eviction */
+ bool las_skew_newest; /* On-page skewed to newest */
+ bool invalid; /* History is required correct reads */
};
/*
@@ -643,8 +645,8 @@ struct __wt_page {
#define WT_PAGE_DISK_ALLOC 0x02u /* Disk image in allocated memory */
#define WT_PAGE_DISK_MAPPED 0x04u /* Disk image in mapped memory */
#define WT_PAGE_EVICT_LRU 0x08u /* Page is on the LRU queue */
-#define WT_PAGE_OVERFLOW_KEYS 0x10u /* Page has overflow keys */
-#define WT_PAGE_READ_NO_EVICT 0x20u /* Page read with eviction disabled */
+#define WT_PAGE_EVICT_NO_PROGRESS 0x10u /* Eviction doesn't count as progress */
+#define WT_PAGE_OVERFLOW_KEYS 0x20u /* Page has overflow keys */
#define WT_PAGE_SPLIT_INSERT 0x40u /* A leaf page was split for append */
#define WT_PAGE_UPDATE_IGNORE 0x80u /* Ignore updates on page discard */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
@@ -721,6 +723,10 @@ struct __wt_page {
* row-store leaf pages without reading them if they don't reference
* overflow items.
*
+ * WT_REF_LIMBO:
+ * The page image has been loaded into memory but there is additional
+ * history in the lookaside table that has not been applied.
+ *
* WT_REF_LOCKED:
* Locked for exclusive access. In eviction, this page or a parent has
* been selected for eviction; once hazard pointers are checked, the page
@@ -794,11 +800,12 @@ struct __wt_ref {
#define WT_REF_DISK 0 /* Page is on disk */
#define WT_REF_DELETED 1 /* Page is on disk, but deleted */
-#define WT_REF_LOCKED 2 /* Page locked for exclusive access */
-#define WT_REF_LOOKASIDE 3 /* Page is on disk with lookaside */
-#define WT_REF_MEM 4 /* Page is in cache and valid */
-#define WT_REF_READING 5 /* Page being read */
-#define WT_REF_SPLIT 6 /* Parent page split (WT_REF dead) */
+#define WT_REF_LIMBO 2 /* Page is in cache without history */
+#define WT_REF_LOCKED 3 /* Page locked for exclusive access */
+#define WT_REF_LOOKASIDE 4 /* Page is on disk with lookaside */
+#define WT_REF_MEM 5 /* Page is in cache and valid */
+#define WT_REF_READING 6 /* Page being read */
+#define WT_REF_SPLIT 7 /* Parent page split (WT_REF dead) */
volatile uint32_t state; /* Page state */
/*
@@ -820,16 +827,14 @@ struct __wt_ref {
#undef ref_ikey
#define ref_ikey key.ikey
- union {
- WT_PAGE_DELETED *page_del; /* Deleted page information */
- WT_PAGE_LOOKASIDE *page_las; /* Lookaside information */
- };
+ WT_PAGE_DELETED *page_del; /* Deleted page information */
+ WT_PAGE_LOOKASIDE *page_las; /* Lookaside information */
};
/*
* WT_REF_SIZE is the expected structure size -- we verify the build to ensure
* the compiler hasn't inserted padding which would break the world.
*/
-#define WT_REF_SIZE 48
+#define WT_REF_SIZE 56
/*
* WT_ROW --
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index fe9c81f42b2..3a6413162f3 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -257,7 +257,7 @@ __wt_cache_page_byte_dirty_decr(
* Take care to read the dirty-byte count only once in case
* we're racing with updates.
*/
- orig = page->modify->bytes_dirty;
+ WT_ORDERED_READ(orig, page->modify->bytes_dirty);
decr = WT_MIN(size, orig);
if (__wt_atomic_cassize(
&page->modify->bytes_dirty, orig, orig - decr))
@@ -400,7 +400,7 @@ __wt_cache_page_image_incr(WT_SESSION_IMPL *session, uint32_t size)
* Evict pages from the cache.
*/
static inline void
-__wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page, bool rewrite)
+__wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page)
{
WT_BTREE *btree;
WT_CACHE *cache;
@@ -448,17 +448,8 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page, bool rewrite)
/*
* Track if eviction makes progress. This is used in various places to
* determine whether eviction is stuck.
- *
- * We don't count rewrites as progress.
- *
- * Further, if a page was read with eviction disabled, we don't count
- * evicting a it as progress. Since disabling eviction allows pages to
- * be read even when the cache is full, we want to avoid workloads
- * repeatedly reading a page with eviction disabled (e.g., from the
- * metadata), then evicting that page and deciding that is a sign that
- * eviction is unstuck.
*/
- if (!rewrite && !F_ISSET_ATOMIC(page, WT_PAGE_READ_NO_EVICT))
+ if (!F_ISSET_ATOMIC(page, WT_PAGE_EVICT_NO_PROGRESS))
(void)__wt_atomic_addv64(&cache->eviction_progress, 1);
}
diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h
index eea0b977515..7a49f388826 100644
--- a/src/third_party/wiredtiger/src/include/cache.h
+++ b/src/third_party/wiredtiger/src/include/cache.h
@@ -105,12 +105,16 @@ struct __wt_cache {
WT_CONDVAR *evict_cond; /* Eviction server condition */
WT_SPINLOCK evict_walk_lock; /* Eviction walk location */
- u_int eviction_dirty_target; /* Percent to allow dirty */
- u_int eviction_dirty_trigger; /* Percent to trigger dirty eviction */
- u_int eviction_trigger; /* Percent to trigger eviction */
- u_int eviction_target; /* Percent to end eviction */
+ /*
+ * Eviction threshold percentages use double type to allow for
+ * specifying percentages less than one.
+ */
+ double eviction_dirty_target; /* Percent to allow dirty */
+ double eviction_dirty_trigger; /* Percent to trigger dirty eviction */
+ double eviction_trigger; /* Percent to trigger eviction */
+ double eviction_target; /* Percent to end eviction */
- u_int eviction_checkpoint_target;/* Percent to reduce dirty
+ double eviction_checkpoint_target;/* Percent to reduce dirty
to during checkpoint scrubs */
double eviction_scrub_limit; /* Percent of cache to trigger
dirty eviction during checkpoint
diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i
index 57376bb2fde..fc127942d02 100644
--- a/src/third_party/wiredtiger/src/include/cache.i
+++ b/src/third_party/wiredtiger/src/include/cache.i
@@ -255,7 +255,7 @@ __wt_session_can_wait(WT_SESSION_IMPL *session)
* volume of data in cache.
*/
static inline bool
-__wt_eviction_clean_needed(WT_SESSION_IMPL *session, u_int *pct_fullp)
+__wt_eviction_clean_needed(WT_SESSION_IMPL *session, double *pct_fullp)
{
WT_CACHE *cache;
uint64_t bytes_inuse, bytes_max;
@@ -270,7 +270,7 @@ __wt_eviction_clean_needed(WT_SESSION_IMPL *session, u_int *pct_fullp)
bytes_inuse = __wt_cache_bytes_inuse(cache);
if (pct_fullp != NULL)
- *pct_fullp = (u_int)((100 * bytes_inuse) / bytes_max);
+ *pct_fullp = ((100.0 * bytes_inuse) / bytes_max);
return (bytes_inuse > (cache->eviction_trigger * bytes_max) / 100);
}
@@ -281,7 +281,7 @@ __wt_eviction_clean_needed(WT_SESSION_IMPL *session, u_int *pct_fullp)
* volume of dirty data in cache.
*/
static inline bool
-__wt_eviction_dirty_needed(WT_SESSION_IMPL *session, u_int *pct_fullp)
+__wt_eviction_dirty_needed(WT_SESSION_IMPL *session, double *pct_fullp)
{
WT_CACHE *cache;
double dirty_trigger;
@@ -297,10 +297,10 @@ __wt_eviction_dirty_needed(WT_SESSION_IMPL *session, u_int *pct_fullp)
dirty_inuse = __wt_cache_dirty_leaf_inuse(cache);
if (pct_fullp != NULL)
- *pct_fullp = (u_int)((100 * dirty_inuse) / bytes_max);
+ *pct_fullp = ((100.0 * dirty_inuse) / bytes_max);
if ((dirty_trigger = cache->eviction_scrub_limit) < 1.0)
- dirty_trigger = (double)cache->eviction_dirty_trigger;
+ dirty_trigger = cache->eviction_dirty_trigger;
return (dirty_inuse > (uint64_t)(dirty_trigger * bytes_max) / 100);
}
@@ -312,10 +312,10 @@ __wt_eviction_dirty_needed(WT_SESSION_IMPL *session, u_int *pct_fullp)
*/
static inline bool
__wt_eviction_needed(
- WT_SESSION_IMPL *session, bool busy, bool readonly, u_int *pct_fullp)
+ WT_SESSION_IMPL *session, bool busy, bool readonly, double *pct_fullp)
{
WT_CACHE *cache;
- u_int pct_dirty, pct_full;
+ double pct_dirty, pct_full;
bool clean_needed, dirty_needed;
cache = S2C(session)->cache;
@@ -330,7 +330,7 @@ __wt_eviction_needed(
clean_needed = __wt_eviction_clean_needed(session, &pct_full);
if (readonly) {
dirty_needed = false;
- pct_dirty = 0;
+ pct_dirty = 0.0;
} else
dirty_needed = __wt_eviction_dirty_needed(session, &pct_dirty);
@@ -339,9 +339,9 @@ __wt_eviction_needed(
* we involve the application thread.
*/
if (pct_fullp != NULL)
- *pct_fullp = (u_int)WT_MAX(0, 100 - WT_MIN(
- (int)cache->eviction_trigger - (int)pct_full,
- (int)cache->eviction_dirty_trigger - (int)pct_dirty));
+ *pct_fullp = WT_MAX(0.0, 100.0 - WT_MIN(
+ cache->eviction_trigger - pct_full,
+ cache->eviction_dirty_trigger - pct_dirty));
/*
* Only check the dirty trigger when the session is not busy.
@@ -381,7 +381,7 @@ __wt_cache_eviction_check(
WT_BTREE *btree;
WT_TXN_GLOBAL *txn_global;
WT_TXN_STATE *txn_state;
- u_int pct_full;
+ double pct_full;
if (didworkp != NULL)
*didworkp = false;
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index c80e686ead5..3674d9218da 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -129,7 +129,6 @@ extern int __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
extern void __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref);
extern bool __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all);
extern int __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern void __wt_ref_out_int(WT_SESSION_IMPL *session, WT_REF *ref, bool rewrite);
extern void __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref);
extern void __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep);
extern void __wt_free_ref( WT_SESSION_IMPL *session, WT_REF *ref, int page_type, bool free_pages);
@@ -198,7 +197,6 @@ extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const
extern int __wt_row_insert_alloc(WT_SESSION_IMPL *session, const WT_ITEM *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_UPDATE **updp, size_t *sizep, u_int modify_type) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern WT_UPDATE *__wt_update_obsolete_check( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd);
-extern void __wt_update_obsolete_free( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd);
extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool insert, bool restore) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_las_nonempty(WT_SESSION_IMPL *session);
@@ -369,7 +367,7 @@ extern int __wt_evict_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUT
extern int __wt_evict_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session);
-extern int __wt_cache_eviction_worker( WT_SESSION_IMPL *session, bool busy, bool readonly, u_int pct_full) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cache_eviction_worker( WT_SESSION_IMPL *session, bool busy, bool readonly, double pct_full) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref);
extern void __wt_evict_priority_set(WT_SESSION_IMPL *session, uint64_t v);
extern void __wt_evict_priority_clear(WT_SESSION_IMPL *session);
@@ -787,8 +785,7 @@ extern void __wt_thread_group_start_one( WT_SESSION_IMPL *session, WT_THREAD_GRO
extern void __wt_thread_group_stop_one(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group);
extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_seconds(WT_SESSION_IMPL *session, time_t *timep);
-extern uint64_t __wt_tsc_to_nsec(uint64_t end, uint64_t begin);
-extern uint64_t __wt_tsc_get_expensive_timestamp(WT_SESSION_IMPL *session);
+extern uint64_t __wt_clock_to_nsec(uint64_t end, uint64_t begin);
extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session);
extern void __wt_txn_get_snapshot(WT_SESSION_IMPL *session);
extern int __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -797,6 +794,7 @@ extern int __wt_txn_reconfigure(WT_SESSION_IMPL *session, const char *config) WT
extern void __wt_txn_release(WT_SESSION_IMPL *session);
extern int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_rollback_required(WT_SESSION_IMPL *session, const char *reason) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_init(WT_SESSION_IMPL *session, WT_SESSION_IMPL *session_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_txn_stats_update(WT_SESSION_IMPL *session);
extern void __wt_txn_destroy(WT_SESSION_IMPL *session);
diff --git a/src/third_party/wiredtiger/src/include/misc.i b/src/third_party/wiredtiger/src/include/misc.i
index 2cca416d20f..05c0733d4ce 100644
--- a/src/third_party/wiredtiger/src/include/misc.i
+++ b/src/third_party/wiredtiger/src/include/misc.i
@@ -34,9 +34,7 @@ __wt_hex(int c)
* Get a timestamp from CPU registers.
*/
static inline uint64_t
-__wt_rdtsc(WT_SESSION_IMPL *session) {
- if (__wt_process.use_epochtime)
- return (__wt_tsc_get_expensive_timestamp(session));
+__wt_rdtsc(void) {
#if defined (__i386)
{
uint64_t x;
@@ -52,11 +50,29 @@ __wt_rdtsc(WT_SESSION_IMPL *session) {
return ((d << 32) | a);
}
#else
- return (__wt_tsc_get_expensive_timestamp(session));
+ return (0);
#endif
}
/*
+ * __wt_clock --
+ * Obtain a timestamp via either a CPU register or via a system call on
+ * platforms where obtaining it directly from the hardware register is
+ * not supported.
+ */
+static inline uint64_t
+__wt_clock(WT_SESSION_IMPL *session)
+{
+ struct timespec tsp;
+
+ if (__wt_process.use_epochtime) {
+ __wt_epoch(session, &tsp);
+ return ((uint64_t)(tsp.tv_sec * WT_BILLION + tsp.tv_nsec));
+ }
+ return (__wt_rdtsc());
+}
+
+/*
* __wt_strdup --
* ANSI strdup function.
*/
diff --git a/src/third_party/wiredtiger/src/include/mutex.i b/src/third_party/wiredtiger/src/include/mutex.i
index fa07e6b7d4f..8a2699f031d 100644
--- a/src/third_party/wiredtiger/src/include/mutex.i
+++ b/src/third_party/wiredtiger/src/include/mutex.i
@@ -297,17 +297,17 @@ __wt_spin_lock_track(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
int64_t **stats;
if (t->stat_count_off != -1 && WT_STAT_ENABLED(session)) {
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
__wt_spin_lock(session, t);
- time_stop = __wt_rdtsc(session);
+ time_stop = __wt_clock(session);
stats = (int64_t **)S2C(session)->stats;
stats[session->stat_bucket][t->stat_count_off]++;
if (F_ISSET(session, WT_SESSION_INTERNAL))
stats[session->stat_bucket][t->stat_int_usecs_off] +=
- (int64_t)WT_TSCDIFF_US(time_stop, time_start);
+ (int64_t)WT_CLOCKDIFF_US(time_stop, time_start);
else
stats[session->stat_bucket][t->stat_app_usecs_off] +=
- (int64_t)WT_TSCDIFF_US(time_stop, time_start);
+ (int64_t)WT_CLOCKDIFF_US(time_stop, time_start);
} else
__wt_spin_lock(session, t);
}
diff --git a/src/third_party/wiredtiger/src/include/optrack.h b/src/third_party/wiredtiger/src/include/optrack.h
index 9c9720bb3cc..bec724042cf 100644
--- a/src/third_party/wiredtiger/src/include/optrack.h
+++ b/src/third_party/wiredtiger/src/include/optrack.h
@@ -51,7 +51,7 @@ struct __wt_optrack_record {
WT_OPTRACK_RECORD *__tr; \
__tr = &((s)->optrack_buf[ \
(s)->optrackbuf_ptr % WT_OPTRACK_MAXRECS]); \
- __tr->op_timestamp = __wt_rdtsc(s); \
+ __tr->op_timestamp = __wt_clock(s); \
__tr->op_id = __func_id; \
__tr->op_type = optype; \
\
diff --git a/src/third_party/wiredtiger/src/include/os.h b/src/third_party/wiredtiger/src/include/os.h
index a3f1420d72b..c31619f2f96 100644
--- a/src/third_party/wiredtiger/src/include/os.h
+++ b/src/third_party/wiredtiger/src/include/os.h
@@ -65,14 +65,14 @@
#define WT_TIMEDIFF_SEC(end, begin) \
(WT_TIMEDIFF_NS((end), (begin)) / WT_BILLION)
-#define WT_TSCDIFF_NS(end, begin) \
- (__wt_tsc_to_nsec(end, begin))
-#define WT_TSCDIFF_US(end, begin) \
- (WT_TSCDIFF_NS(end, begin) / WT_THOUSAND)
-#define WT_TSCDIFF_MS(end, begin) \
- (WT_TSCDIFF_NS(end, begin) / WT_MILLION)
-#define WT_TSCDIFF_SEC(end, begin) \
- (WT_TSCDIFF_NS(end, begin) / WT_BILLION)
+#define WT_CLOCKDIFF_NS(end, begin) \
+ (__wt_clock_to_nsec(end, begin))
+#define WT_CLOCKDIFF_US(end, begin) \
+ (WT_CLOCKDIFF_NS(end, begin) / WT_THOUSAND)
+#define WT_CLOCKDIFF_MS(end, begin) \
+ (WT_CLOCKDIFF_NS(end, begin) / WT_MILLION)
+#define WT_CLOCKDIFF_SEC(end, begin) \
+ (WT_CLOCKDIFF_NS(end, begin) / WT_BILLION)
#define WT_TIMECMP(t1, t2) \
((t1).tv_sec < (t2).tv_sec ? -1 : \
diff --git a/src/third_party/wiredtiger/src/include/os_fhandle.i b/src/third_party/wiredtiger/src/include/os_fhandle.i
index 5685b0f46dc..7c09a83132c 100644
--- a/src/third_party/wiredtiger/src/include/os_fhandle.i
+++ b/src/third_party/wiredtiger/src/include/os_fhandle.i
@@ -109,14 +109,14 @@ __wt_read(
WT_STAT_CONN_INCR_ATOMIC(session, thread_read_active);
WT_STAT_CONN_INCR(session, read_io);
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
ret = fh->handle->fh_read(
fh->handle, (WT_SESSION *)session, offset, len, buf);
- time_stop = __wt_rdtsc(session);
+ time_stop = __wt_clock(session);
__wt_stat_msecs_hist_incr_fsread(session,
- WT_TSCDIFF_MS(time_stop, time_start));
+ WT_CLOCKDIFF_MS(time_stop, time_start));
WT_STAT_CONN_DECR_ATOMIC(session, thread_read_active);
return (ret);
}
@@ -188,14 +188,14 @@ __wt_write(WT_SESSION_IMPL *session,
WT_STAT_CONN_INCR(session, write_io);
WT_STAT_CONN_INCR_ATOMIC(session, thread_write_active);
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
ret = fh->handle->fh_write(
fh->handle, (WT_SESSION *)session, offset, len, buf);
- time_stop = __wt_rdtsc(session);
+ time_stop = __wt_clock(session);
__wt_stat_msecs_hist_incr_fswrite(session,
- WT_TSCDIFF_MS(time_stop, time_start));
+ WT_CLOCKDIFF_MS(time_stop, time_start));
WT_STAT_CONN_DECR_ATOMIC(session, thread_write_active);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/include/serial.i b/src/third_party/wiredtiger/src/include/serial.i
index 5590c68618b..02f15cdb8af 100644
--- a/src/third_party/wiredtiger/src/include/serial.i
+++ b/src/third_party/wiredtiger/src/include/serial.i
@@ -157,9 +157,10 @@ __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
size_t new_ins_size, uint64_t *recnop, u_int skipdepth, bool exclusive)
{
WT_DECL_RET;
- WT_INSERT *new_ins = *new_insp;
+ WT_INSERT *new_ins;
/* Clear references to memory we now own and must free on error. */
+ new_ins = *new_insp;
*new_insp = NULL;
/* Check for page write generation wrap. */
@@ -206,11 +207,12 @@ __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
size_t new_ins_size, u_int skipdepth, bool exclusive)
{
WT_DECL_RET;
- WT_INSERT *new_ins = *new_insp;
+ WT_INSERT *new_ins;
u_int i;
bool simple;
/* Clear references to memory we now own and must free on error. */
+ new_ins = *new_insp;
*new_insp = NULL;
/* Check for page write generation wrap. */
@@ -262,11 +264,13 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
WT_UPDATE **srch_upd, WT_UPDATE **updp, size_t upd_size, bool exclusive)
{
WT_DECL_RET;
- WT_UPDATE *obsolete, *upd = *updp;
+ WT_UPDATE *obsolete, *upd;
wt_timestamp_t *obsolete_timestamp;
+ size_t size;
uint64_t txn;
/* Clear references to memory we now own and must free on error. */
+ upd = *updp;
*updp = NULL;
/* Check for page write generation wrap. */
@@ -329,9 +333,20 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
return (0);
obsolete = __wt_update_obsolete_check(session, page, upd->next);
+
+ /*
+ * Decrement the dirty byte count while holding the page lock, else we
+ * can race with checkpoints cleaning a page.
+ */
+ for (size = 0, upd = obsolete; upd != NULL; upd = upd->next)
+ size += WT_UPDATE_MEMSIZE(upd);
+ if (size != 0)
+ __wt_cache_page_inmem_decr(session, page, size);
+
WT_PAGE_UNLOCK(session, page);
+
if (obsolete != NULL)
- __wt_update_obsolete_free(session, page, obsolete);
+ __wt_free_update_list(session, obsolete);
return (0);
}
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index 0a902f36b39..8b8c3a55a6c 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -420,6 +420,8 @@ struct __wt_connection_stats {
int64_t cache_eviction_pages_queued_oldest;
int64_t cache_read;
int64_t cache_read_lookaside;
+ int64_t cache_read_lookaside_skipped;
+ int64_t cache_read_lookaside_delay;
int64_t cache_pages_requested;
int64_t cache_eviction_pages_seen;
int64_t cache_eviction_fail;
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index 327c2cd8caa..a0f51be8a28 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -259,6 +259,8 @@ struct __wt_txn {
WT_ITEM *ckpt_snapshot;
bool full_ckpt;
+ const char *rollback_reason; /* If rollback, the reason */
+
/* AUTOMATIC FLAG VALUE GENERATION START */
#define WT_TXN_AUTOCOMMIT 0x0001u
#define WT_TXN_ERROR 0x0002u
@@ -274,6 +276,7 @@ struct __wt_txn {
#define WT_TXN_SYNC_SET 0x0800u
#define WT_TXN_TS_COMMIT_ALWAYS 0x1000u
#define WT_TXN_TS_COMMIT_NEVER 0x2000u
+#define WT_TXN_UPDATE 0x4000u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint32_t flags;
};
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 360a6cf1edb..121a18c9c3c 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -733,7 +733,8 @@ __wt_txn_update_check(WT_SESSION_IMPL *session, WT_UPDATE *upd)
session, txn_update_conflict);
WT_STAT_DATA_INCR(
session, txn_update_conflict);
- return (WT_ROLLBACK);
+ return (__wt_txn_rollback_required(session,
+ "conflict between concurrent operations"));
}
upd = upd->next;
}
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index edee21b5ca7..53067bf44ab 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -2116,27 +2116,36 @@ struct __wt_connection {
* current eviction load., an integer between 1 and 20; default \c 1.}
* @config{ ),,}
* @config{eviction_checkpoint_target, perform eviction at the beginning
- * of checkpoints to bring the dirty content in cache to this level\,
- * expressed as a percentage of the total cache size. Ignored if set to
- * zero or \c in_memory is \c true., an integer between 0 and 99;
- * default \c 5.}
+ * of checkpoints to bring the dirty content in cache to this level. It
+ * is a percentage of the cache size if the value is within the range of
+ * 0 to 100 or an absolute size when greater than 100. The value is not
+ * allowed to exceed the \c cache_size. Ignored if set to zero or \c
+ * in_memory is \c true., an integer between 0 and 10TB; default \c 5.}
* @config{eviction_dirty_target, perform eviction in worker threads
- * when the cache contains at least this much dirty content\, expressed
- * as a percentage of the total cache size., an integer between 1 and
- * 99; default \c 5.}
+ * when the cache contains at least this much dirty content. It is a
+ * percentage of the cache size if the value is within the range of 1 to
+ * 100 or an absolute size when greater than 100. The value is not
+ * allowed to exceed the \c cache_size., an integer between 1 and 10TB;
+ * default \c 5.}
* @config{eviction_dirty_trigger, trigger application threads to
* perform eviction when the cache contains at least this much dirty
- * content\, expressed as a percentage of the total cache size. This
- * setting only alters behavior if it is lower than eviction_trigger.,
- * an integer between 1 and 99; default \c 20.}
+ * content. It is a percentage of the cache size if the value is within
+ * the range of 1 to 100 or an absolute size when greater than 100. The
+ * value is not allowed to exceed the \c cache_size. This setting only
+ * alters behavior if it is lower than eviction_trigger., an integer
+ * between 1 and 10TB; default \c 20.}
* @config{eviction_target, perform eviction in worker threads when the
- * cache contains at least this much content\, expressed as a percentage
- * of the total cache size. Must be less than \c eviction_trigger., an
- * integer between 10 and 99; default \c 80.}
+ * cache contains at least this much content. It is a percentage of the
+ * cache size if the value is within the range of 10 to 100 or an
+ * absolute size when greater than 100. The value is not allowed to
+ * exceed the \c cache_size., an integer between 10 and 10TB; default \c
+ * 80.}
* @config{eviction_trigger, trigger application threads to perform
- * eviction when the cache contains at least this much content\,
- * expressed as a percentage of the total cache size., an integer
- * between 10 and 99; default \c 95.}
+ * eviction when the cache contains at least this much content. It is a
+ * percentage of the cache size if the value is within the range of 10
+ * to 100 or an absolute size when greater than 100. The value is not
+ * allowed to exceed the \c cache_size., an integer between 10 and 10TB;
+ * default \c 95.}
* @config{file_manager = (, control how file handles are managed., a
* set of related configuration options defined below.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;close_handle_minimum, number of
@@ -2186,13 +2195,15 @@ struct __wt_connection {
* @config{shared_cache = (, shared cache configuration options. A
* database should configure either a cache_size or a shared_cache not
* both. Enabling a shared cache uses a session from the configured
- * session_max., a set of related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;chunk, the granularity that a shared
- * cache is redistributed., an integer between 1MB and 10TB; default \c
- * 10MB.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;name, the name of a cache that
- * is shared between databases or \c "none" when no shared cache is
- * configured., a string; default \c none.}
+ * session_max. A shared cache can not have absolute values configured
+ * for cache eviction settings., a set of related configuration options
+ * defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;chunk, the
+ * granularity that a shared cache is redistributed., an integer between
+ * 1MB and 10TB; default \c 10MB.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;name,
+ * the name of a cache that is shared between databases or \c "none"
+ * when no shared cache is configured., a string; default \c none.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;quota, maximum size of cache this
* database can be allocated from the shared cache. Defaults to the
* entire shared cache size., an integer; default \c 0.}
@@ -2715,25 +2726,32 @@ struct __wt_connection {
* @config{
* ),,}
* @config{eviction_checkpoint_target, perform eviction at the beginning of
- * checkpoints to bring the dirty content in cache to this level\, expressed as
- * a percentage of the total cache size. Ignored if set to zero or \c in_memory
- * is \c true., an integer between 0 and 99; default \c 5.}
+ * checkpoints to bring the dirty content in cache to this level. It is a
+ * percentage of the cache size if the value is within the range of 0 to 100 or
+ * an absolute size when greater than 100. The value is not allowed to exceed
+ * the \c cache_size. Ignored if set to zero or \c in_memory is \c true., an
+ * integer between 0 and 10TB; default \c 5.}
* @config{eviction_dirty_target, perform eviction in worker threads when the
- * cache contains at least this much dirty content\, expressed as a percentage
- * of the total cache size., an integer between 1 and 99; default \c 5.}
+ * cache contains at least this much dirty content. It is a percentage of the
+ * cache size if the value is within the range of 1 to 100 or an absolute size
+ * when greater than 100. The value is not allowed to exceed the \c cache_size.,
+ * an integer between 1 and 10TB; default \c 5.}
* @config{eviction_dirty_trigger, trigger application threads to perform
- * eviction when the cache contains at least this much dirty content\, expressed
- * as a percentage of the total cache size. This setting only alters behavior
- * if it is lower than eviction_trigger., an integer between 1 and 99; default
- * \c 20.}
+ * eviction when the cache contains at least this much dirty content. It is a
+ * percentage of the cache size if the value is within the range of 1 to 100 or
+ * an absolute size when greater than 100. The value is not allowed to exceed
+ * the \c cache_size. This setting only alters behavior if it is lower than
+ * eviction_trigger., an integer between 1 and 10TB; default \c 20.}
* @config{eviction_target, perform eviction in worker threads when the cache
- * contains at least this much content\, expressed as a percentage of the total
- * cache size. Must be less than \c eviction_trigger., an integer between 10
- * and 99; default \c 80.}
+ * contains at least this much content. It is a percentage of the cache size if
+ * the value is within the range of 10 to 100 or an absolute size when greater
+ * than 100. The value is not allowed to exceed the \c cache_size., an integer
+ * between 10 and 10TB; default \c 80.}
* @config{eviction_trigger, trigger application threads to perform eviction
- * when the cache contains at least this much content\, expressed as a
- * percentage of the total cache size., an integer between 10 and 99; default \c
- * 95.}
+ * when the cache contains at least this much content. It is a percentage of
+ * the cache size if the value is within the range of 10 to 100 or an absolute
+ * size when greater than 100. The value is not allowed to exceed the \c
+ * cache_size., an integer between 10 and 10TB; default \c 95.}
* @config{exclusive, fail if the database already exists\, generally used with
* the \c create option., a boolean flag; default \c false.}
* @config{extensions, list of shared library extensions to load (using dlopen).
@@ -2822,8 +2840,9 @@ struct __wt_connection {
* threads)., an integer greater than or equal to 1; default \c 100.}
* @config{shared_cache = (, shared cache configuration options. A database
* should configure either a cache_size or a shared_cache not both. Enabling a
- * shared cache uses a session from the configured session_max., a set of
- * related configuration options defined below.}
+ * shared cache uses a session from the configured session_max. A shared cache
+ * can not have absolute values configured for cache eviction settings., a set
+ * of related configuration options defined below.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;chunk, the granularity that a shared cache is
* redistributed., an integer between 1MB and 10TB; default \c 10MB.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;name, the name of a cache that is shared
@@ -5009,521 +5028,528 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_CACHE_READ 1105
/*! cache: pages read into cache requiring lookaside entries */
#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1106
+/*! cache: pages read into cache skipping older lookaside entries */
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_SKIPPED 1107
+/*!
+ * cache: pages read into cache with skipped lookaside entries needed
+ * later
+ */
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_DELAY 1108
/*! cache: pages requested from the cache */
-#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1107
+#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1109
/*! cache: pages seen by eviction walk */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1108
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1110
/*! cache: pages selected for eviction unable to be evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1109
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1111
/*! cache: pages walked for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK 1110
+#define WT_STAT_CONN_CACHE_EVICTION_WALK 1112
/*! cache: pages written from cache */
-#define WT_STAT_CONN_CACHE_WRITE 1111
+#define WT_STAT_CONN_CACHE_WRITE 1113
/*! cache: pages written requiring in-memory restoration */
-#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1112
+#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1114
/*! cache: percentage overhead */
-#define WT_STAT_CONN_CACHE_OVERHEAD 1113
+#define WT_STAT_CONN_CACHE_OVERHEAD 1115
/*! cache: tracked bytes belonging to internal pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1114
+#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1116
/*! cache: tracked bytes belonging to leaf pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_LEAF 1115
+#define WT_STAT_CONN_CACHE_BYTES_LEAF 1117
/*! cache: tracked dirty bytes in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1116
+#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1118
/*! cache: tracked dirty pages in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1117
+#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1119
/*! cache: unmodified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1118
+#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1120
/*! connection: auto adjusting condition resets */
-#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1119
+#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1121
/*! connection: auto adjusting condition wait calls */
-#define WT_STAT_CONN_COND_AUTO_WAIT 1120
+#define WT_STAT_CONN_COND_AUTO_WAIT 1122
/*! connection: detected system time went backwards */
-#define WT_STAT_CONN_TIME_TRAVEL 1121
+#define WT_STAT_CONN_TIME_TRAVEL 1123
/*! connection: files currently open */
-#define WT_STAT_CONN_FILE_OPEN 1122
+#define WT_STAT_CONN_FILE_OPEN 1124
/*! connection: memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 1123
+#define WT_STAT_CONN_MEMORY_ALLOCATION 1125
/*! connection: memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 1124
+#define WT_STAT_CONN_MEMORY_FREE 1126
/*! connection: memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 1125
+#define WT_STAT_CONN_MEMORY_GROW 1127
/*! connection: pthread mutex condition wait calls */
-#define WT_STAT_CONN_COND_WAIT 1126
+#define WT_STAT_CONN_COND_WAIT 1128
/*! connection: pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 1127
+#define WT_STAT_CONN_RWLOCK_READ 1129
/*! connection: pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 1128
+#define WT_STAT_CONN_RWLOCK_WRITE 1130
/*! connection: total fsync I/Os */
-#define WT_STAT_CONN_FSYNC_IO 1129
+#define WT_STAT_CONN_FSYNC_IO 1131
/*! connection: total read I/Os */
-#define WT_STAT_CONN_READ_IO 1130
+#define WT_STAT_CONN_READ_IO 1132
/*! connection: total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1131
+#define WT_STAT_CONN_WRITE_IO 1133
/*! cursor: cursor create calls */
-#define WT_STAT_CONN_CURSOR_CREATE 1132
+#define WT_STAT_CONN_CURSOR_CREATE 1134
/*! cursor: cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT 1133
+#define WT_STAT_CONN_CURSOR_INSERT 1135
/*! cursor: cursor modify calls */
-#define WT_STAT_CONN_CURSOR_MODIFY 1134
+#define WT_STAT_CONN_CURSOR_MODIFY 1136
/*! cursor: cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT 1135
+#define WT_STAT_CONN_CURSOR_NEXT 1137
/*! cursor: cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV 1136
+#define WT_STAT_CONN_CURSOR_PREV 1138
/*! cursor: cursor remove calls */
-#define WT_STAT_CONN_CURSOR_REMOVE 1137
+#define WT_STAT_CONN_CURSOR_REMOVE 1139
/*! cursor: cursor reserve calls */
-#define WT_STAT_CONN_CURSOR_RESERVE 1138
+#define WT_STAT_CONN_CURSOR_RESERVE 1140
/*! cursor: cursor reset calls */
-#define WT_STAT_CONN_CURSOR_RESET 1139
+#define WT_STAT_CONN_CURSOR_RESET 1141
/*! cursor: cursor restarted searches */
-#define WT_STAT_CONN_CURSOR_RESTART 1140
+#define WT_STAT_CONN_CURSOR_RESTART 1142
/*! cursor: cursor search calls */
-#define WT_STAT_CONN_CURSOR_SEARCH 1141
+#define WT_STAT_CONN_CURSOR_SEARCH 1143
/*! cursor: cursor search near calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1142
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1144
/*! cursor: cursor update calls */
-#define WT_STAT_CONN_CURSOR_UPDATE 1143
+#define WT_STAT_CONN_CURSOR_UPDATE 1145
/*! cursor: truncate calls */
-#define WT_STAT_CONN_CURSOR_TRUNCATE 1144
+#define WT_STAT_CONN_CURSOR_TRUNCATE 1146
/*! data-handle: connection data handles currently active */
-#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1145
+#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1147
/*! data-handle: connection sweep candidate became referenced */
-#define WT_STAT_CONN_DH_SWEEP_REF 1146
+#define WT_STAT_CONN_DH_SWEEP_REF 1148
/*! data-handle: connection sweep dhandles closed */
-#define WT_STAT_CONN_DH_SWEEP_CLOSE 1147
+#define WT_STAT_CONN_DH_SWEEP_CLOSE 1149
/*! data-handle: connection sweep dhandles removed from hash list */
-#define WT_STAT_CONN_DH_SWEEP_REMOVE 1148
+#define WT_STAT_CONN_DH_SWEEP_REMOVE 1150
/*! data-handle: connection sweep time-of-death sets */
-#define WT_STAT_CONN_DH_SWEEP_TOD 1149
+#define WT_STAT_CONN_DH_SWEEP_TOD 1151
/*! data-handle: connection sweeps */
-#define WT_STAT_CONN_DH_SWEEPS 1150
+#define WT_STAT_CONN_DH_SWEEPS 1152
/*! data-handle: session dhandles swept */
-#define WT_STAT_CONN_DH_SESSION_HANDLES 1151
+#define WT_STAT_CONN_DH_SESSION_HANDLES 1153
/*! data-handle: session sweep attempts */
-#define WT_STAT_CONN_DH_SESSION_SWEEPS 1152
+#define WT_STAT_CONN_DH_SESSION_SWEEPS 1154
/*! lock: checkpoint lock acquisitions */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1153
+#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1155
/*! lock: checkpoint lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1154
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1156
/*! lock: checkpoint lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1155
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1157
/*!
* lock: commit timestamp queue lock application thread time waiting for
* the dhandle lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_APPLICATION 1156
+#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_APPLICATION 1158
/*!
* lock: commit timestamp queue lock internal thread time waiting for the
* dhandle lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_INTERNAL 1157
+#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_INTERNAL 1159
/*! lock: commit timestamp queue read lock acquisitions */
-#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_READ_COUNT 1158
+#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_READ_COUNT 1160
/*! lock: commit timestamp queue write lock acquisitions */
-#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WRITE_COUNT 1159
+#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WRITE_COUNT 1161
/*!
* lock: dhandle lock application thread time waiting for the dhandle
* lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1160
+#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1162
/*!
* lock: dhandle lock internal thread time waiting for the dhandle lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1161
+#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1163
/*! lock: dhandle read lock acquisitions */
-#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1162
+#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1164
/*! lock: dhandle write lock acquisitions */
-#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1163
+#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1165
/*! lock: metadata lock acquisitions */
-#define WT_STAT_CONN_LOCK_METADATA_COUNT 1164
+#define WT_STAT_CONN_LOCK_METADATA_COUNT 1166
/*! lock: metadata lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1165
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1167
/*! lock: metadata lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1166
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1168
/*!
* lock: read timestamp queue lock application thread time waiting for
* the dhandle lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1167
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1169
/*!
* lock: read timestamp queue lock internal thread time waiting for the
* dhandle lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1168
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1170
/*! lock: read timestamp queue read lock acquisitions */
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1169
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1171
/*! lock: read timestamp queue write lock acquisitions */
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1170
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1172
/*! lock: schema lock acquisitions */
-#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1171
+#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1173
/*! lock: schema lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1172
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1174
/*! lock: schema lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1173
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1175
/*!
* lock: table lock application thread time waiting for the table lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1174
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1176
/*!
* lock: table lock internal thread time waiting for the table lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1175
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1177
/*! lock: table read lock acquisitions */
-#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1176
+#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1178
/*! lock: table write lock acquisitions */
-#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1177
+#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1179
/*!
* lock: txn global lock application thread time waiting for the dhandle
* lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1178
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1180
/*!
* lock: txn global lock internal thread time waiting for the dhandle
* lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1179
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1181
/*! lock: txn global read lock acquisitions */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1180
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1182
/*! lock: txn global write lock acquisitions */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1181
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1183
/*! log: busy returns attempting to switch slots */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1182
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1184
/*! log: force checkpoint calls slept */
-#define WT_STAT_CONN_LOG_FORCE_CKPT_SLEEP 1183
+#define WT_STAT_CONN_LOG_FORCE_CKPT_SLEEP 1185
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1184
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1186
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1185
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1187
/*! log: log files manually zero-filled */
-#define WT_STAT_CONN_LOG_ZERO_FILLS 1186
+#define WT_STAT_CONN_LOG_ZERO_FILLS 1188
/*! log: log flush operations */
-#define WT_STAT_CONN_LOG_FLUSH 1187
+#define WT_STAT_CONN_LOG_FLUSH 1189
/*! log: log force write operations */
-#define WT_STAT_CONN_LOG_FORCE_WRITE 1188
+#define WT_STAT_CONN_LOG_FORCE_WRITE 1190
/*! log: log force write operations skipped */
-#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1189
+#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1191
/*! log: log records compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1190
+#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1192
/*! log: log records not compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1191
+#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1193
/*! log: log records too small to compress */
-#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1192
+#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1194
/*! log: log release advances write LSN */
-#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1193
+#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1195
/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1194
+#define WT_STAT_CONN_LOG_SCANS 1196
/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1195
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1197
/*! log: log server thread advances write LSN */
-#define WT_STAT_CONN_LOG_WRITE_LSN 1196
+#define WT_STAT_CONN_LOG_WRITE_LSN 1198
/*! log: log server thread write LSN walk skipped */
-#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1197
+#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1199
/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1198
+#define WT_STAT_CONN_LOG_SYNC 1200
/*! log: log sync time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DURATION 1199
+#define WT_STAT_CONN_LOG_SYNC_DURATION 1201
/*! log: log sync_dir operations */
-#define WT_STAT_CONN_LOG_SYNC_DIR 1200
+#define WT_STAT_CONN_LOG_SYNC_DIR 1202
/*! log: log sync_dir time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1201
+#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1203
/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1202
+#define WT_STAT_CONN_LOG_WRITES 1204
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1203
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1205
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1204
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1206
/*! log: number of pre-allocated log files to create */
-#define WT_STAT_CONN_LOG_PREALLOC_MAX 1205
+#define WT_STAT_CONN_LOG_PREALLOC_MAX 1207
/*! log: pre-allocated log files not ready and missed */
-#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1206
+#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1208
/*! log: pre-allocated log files prepared */
-#define WT_STAT_CONN_LOG_PREALLOC_FILES 1207
+#define WT_STAT_CONN_LOG_PREALLOC_FILES 1209
/*! log: pre-allocated log files used */
-#define WT_STAT_CONN_LOG_PREALLOC_USED 1208
+#define WT_STAT_CONN_LOG_PREALLOC_USED 1210
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1209
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1211
/*! log: slot close lost race */
-#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1210
+#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1212
/*! log: slot close unbuffered waits */
-#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1211
+#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1213
/*! log: slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1212
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1214
/*! log: slot join atomic update races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1213
+#define WT_STAT_CONN_LOG_SLOT_RACES 1215
/*! log: slot join calls atomic updates raced */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1214
+#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1216
/*! log: slot join calls did not yield */
-#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1215
+#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1217
/*! log: slot join calls found active slot closed */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1216
+#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1218
/*! log: slot join calls slept */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1217
+#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1219
/*! log: slot join calls yielded */
-#define WT_STAT_CONN_LOG_SLOT_YIELD 1218
+#define WT_STAT_CONN_LOG_SLOT_YIELD 1220
/*! log: slot join found active slot closed */
-#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1219
+#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1221
/*! log: slot joins yield time (usecs) */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1220
+#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1222
/*! log: slot transitions unable to find free slot */
-#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1221
+#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1223
/*! log: slot unbuffered writes */
-#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1222
+#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1224
/*! log: total in-memory size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_MEM 1223
+#define WT_STAT_CONN_LOG_COMPRESS_MEM 1225
/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1224
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1226
/*! log: total size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_LEN 1225
+#define WT_STAT_CONN_LOG_COMPRESS_LEN 1227
/*! log: written slots coalesced */
-#define WT_STAT_CONN_LOG_SLOT_COALESCED 1226
+#define WT_STAT_CONN_LOG_SLOT_COALESCED 1228
/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1227
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1229
/*! perf: file system read latency histogram (bucket 1) - 10-49ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1228
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1230
/*! perf: file system read latency histogram (bucket 2) - 50-99ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1229
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1231
/*! perf: file system read latency histogram (bucket 3) - 100-249ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1230
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1232
/*! perf: file system read latency histogram (bucket 4) - 250-499ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1231
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1233
/*! perf: file system read latency histogram (bucket 5) - 500-999ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1232
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1234
/*! perf: file system read latency histogram (bucket 6) - 1000ms+ */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1233
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1235
/*! perf: file system write latency histogram (bucket 1) - 10-49ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1234
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1236
/*! perf: file system write latency histogram (bucket 2) - 50-99ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1235
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1237
/*! perf: file system write latency histogram (bucket 3) - 100-249ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1236
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1238
/*! perf: file system write latency histogram (bucket 4) - 250-499ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1237
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1239
/*! perf: file system write latency histogram (bucket 5) - 500-999ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1238
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1240
/*! perf: file system write latency histogram (bucket 6) - 1000ms+ */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1239
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1241
/*! perf: operation read latency histogram (bucket 1) - 100-249us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1240
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1242
/*! perf: operation read latency histogram (bucket 2) - 250-499us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1241
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1243
/*! perf: operation read latency histogram (bucket 3) - 500-999us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1242
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1244
/*! perf: operation read latency histogram (bucket 4) - 1000-9999us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1243
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1245
/*! perf: operation read latency histogram (bucket 5) - 10000us+ */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1244
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1246
/*! perf: operation write latency histogram (bucket 1) - 100-249us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1245
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1247
/*! perf: operation write latency histogram (bucket 2) - 250-499us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1246
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1248
/*! perf: operation write latency histogram (bucket 3) - 500-999us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1247
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1249
/*! perf: operation write latency histogram (bucket 4) - 1000-9999us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1248
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1250
/*! perf: operation write latency histogram (bucket 5) - 10000us+ */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1249
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1251
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1250
+#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1252
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1251
+#define WT_STAT_CONN_REC_PAGES 1253
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1252
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1254
/*! reconciliation: pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE 1253
+#define WT_STAT_CONN_REC_PAGE_DELETE 1255
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1254
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1256
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1255
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1257
/*! session: open cursor count */
-#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1256
+#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1258
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1257
+#define WT_STAT_CONN_SESSION_OPEN 1259
/*! session: table alter failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1258
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1260
/*! session: table alter successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1259
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1261
/*! session: table alter unchanged and skipped */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1260
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1262
/*! session: table compact failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1261
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1263
/*! session: table compact successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1262
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1264
/*! session: table create failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1263
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1265
/*! session: table create successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1264
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1266
/*! session: table drop failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1265
+#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1267
/*! session: table drop successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1266
+#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1268
/*! session: table rebalance failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1267
+#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1269
/*! session: table rebalance successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1268
+#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1270
/*! session: table rename failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1269
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1271
/*! session: table rename successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1270
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1272
/*! session: table salvage failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1271
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1273
/*! session: table salvage successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1272
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1274
/*! session: table truncate failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1273
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1275
/*! session: table truncate successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1274
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1276
/*! session: table verify failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1275
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1277
/*! session: table verify successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1276
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1278
/*! thread-state: active filesystem fsync calls */
-#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1277
+#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1279
/*! thread-state: active filesystem read calls */
-#define WT_STAT_CONN_THREAD_READ_ACTIVE 1278
+#define WT_STAT_CONN_THREAD_READ_ACTIVE 1280
/*! thread-state: active filesystem write calls */
-#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1279
+#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1281
/*! thread-yield: application thread time evicting (usecs) */
-#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1280
+#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1282
/*! thread-yield: application thread time waiting for cache (usecs) */
-#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1281
+#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1283
/*!
* thread-yield: connection close blocked waiting for transaction state
* stabilization
*/
-#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1282
+#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1284
/*! thread-yield: connection close yielded for lsm manager shutdown */
-#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1283
+#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1285
/*! thread-yield: data handle lock yielded */
-#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1284
+#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1286
/*!
* thread-yield: get reference for page index and slot time sleeping
* (usecs)
*/
-#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1285
+#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1287
/*! thread-yield: log server sync yielded for log write */
-#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1286
+#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1288
/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1287
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1289
/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1288
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1290
/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1289
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1291
/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1290
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1292
/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1291
+#define WT_STAT_CONN_PAGE_SLEEP 1293
/*!
* thread-yield: page delete rollback time sleeping for state change
* (usecs)
*/
-#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1292
+#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1294
/*! thread-yield: page reconciliation yielded due to child modification */
-#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1293
+#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1295
/*!
* thread-yield: tree descend one level yielded for split page index
* update
*/
-#define WT_STAT_CONN_TREE_DESCEND_BLOCKED 1294
+#define WT_STAT_CONN_TREE_DESCEND_BLOCKED 1296
/*! transaction: commit timestamp queue insert to empty */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_EMPTY 1295
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_EMPTY 1297
/*! transaction: commit timestamp queue inserts to head */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_HEAD 1296
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_HEAD 1298
/*! transaction: commit timestamp queue inserts total */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS 1297
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS 1299
/*! transaction: commit timestamp queue length */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN 1298
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN 1300
/*! transaction: number of named snapshots created */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1299
+#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1301
/*! transaction: number of named snapshots dropped */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1300
+#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1302
/*! transaction: query timestamp calls */
-#define WT_STAT_CONN_TXN_QUERY_TS 1301
+#define WT_STAT_CONN_TXN_QUERY_TS 1303
/*! transaction: read timestamp queue insert to empty */
-#define WT_STAT_CONN_TXN_READ_QUEUE_EMPTY 1302
+#define WT_STAT_CONN_TXN_READ_QUEUE_EMPTY 1304
/*! transaction: read timestamp queue inserts to head */
-#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1303
+#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1305
/*! transaction: read timestamp queue inserts total */
-#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1304
+#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1306
/*! transaction: read timestamp queue length */
-#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1305
+#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1307
/*! transaction: set timestamp calls */
-#define WT_STAT_CONN_TXN_SET_TS 1306
+#define WT_STAT_CONN_TXN_SET_TS 1308
/*! transaction: set timestamp commit calls */
-#define WT_STAT_CONN_TXN_SET_TS_COMMIT 1307
+#define WT_STAT_CONN_TXN_SET_TS_COMMIT 1309
/*! transaction: set timestamp commit updates */
-#define WT_STAT_CONN_TXN_SET_TS_COMMIT_UPD 1308
+#define WT_STAT_CONN_TXN_SET_TS_COMMIT_UPD 1310
/*! transaction: set timestamp oldest calls */
-#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1309
+#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1311
/*! transaction: set timestamp oldest updates */
-#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1310
+#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1312
/*! transaction: set timestamp stable calls */
-#define WT_STAT_CONN_TXN_SET_TS_STABLE 1311
+#define WT_STAT_CONN_TXN_SET_TS_STABLE 1313
/*! transaction: set timestamp stable updates */
-#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1312
+#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1314
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1313
+#define WT_STAT_CONN_TXN_BEGIN 1315
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1314
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1316
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1315
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1317
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1316
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1318
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1317
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1319
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1318
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1320
/*! transaction: transaction checkpoint scrub dirty target */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1319
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1321
/*! transaction: transaction checkpoint scrub time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1320
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1322
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1321
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1323
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1322
+#define WT_STAT_CONN_TXN_CHECKPOINT 1324
/*!
* transaction: transaction checkpoints skipped because database was
* clean
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1323
+#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1325
/*! transaction: transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1324
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1326
/*!
* transaction: transaction fsync calls for checkpoint after allocating
* the transaction ID
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1325
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1327
/*!
* transaction: transaction fsync duration for checkpoint after
* allocating the transaction ID (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1326
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1328
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1327
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1329
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1328
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1330
/*!
* transaction: transaction range of IDs currently pinned by named
* snapshots
*/
-#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1329
+#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1331
/*! transaction: transaction range of timestamps currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1330
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1332
/*!
* transaction: transaction range of timestamps pinned by the oldest
* timestamp
*/
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1331
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1333
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1332
+#define WT_STAT_CONN_TXN_SYNC 1334
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1333
+#define WT_STAT_CONN_TXN_COMMIT 1335
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1334
+#define WT_STAT_CONN_TXN_ROLLBACK 1336
/*! transaction: update conflicts */
-#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1335
+#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1337
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c
index 217a3deab60..167297c5c80 100644
--- a/src/third_party/wiredtiger/src/log/log.c
+++ b/src/third_party/wiredtiger/src/log/log.c
@@ -311,10 +311,10 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
"log_force_sync: sync directory %s to LSN %" PRIu32
"/%" PRIu32,
log->log_dir_fh->name, min_lsn->l.file, min_lsn->l.offset);
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
WT_ERR(__wt_fsync(session, log->log_dir_fh, true));
- time_stop = __wt_rdtsc(session);
- fsync_duration_usecs = WT_TSCDIFF_US(time_stop, time_start);
+ time_stop = __wt_clock(session);
+ fsync_duration_usecs = WT_CLOCKDIFF_US(time_stop, time_start);
log->sync_dir_lsn = *min_lsn;
WT_STAT_CONN_INCR(session, log_sync_dir);
WT_STAT_CONN_INCRV(session,
@@ -334,10 +334,10 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
__wt_verbose(session, WT_VERB_LOG,
"log_force_sync: sync %s to LSN %" PRIu32 "/%" PRIu32,
log_fh->name, min_lsn->l.file, min_lsn->l.offset);
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
WT_ERR(__wt_fsync(session, log_fh, true));
- time_stop = __wt_rdtsc(session);
- fsync_duration_usecs = WT_TSCDIFF_US(time_stop, time_start);
+ time_stop = __wt_clock(session);
+ fsync_duration_usecs = WT_CLOCKDIFF_US(time_stop, time_start);
log->sync_lsn = *min_lsn;
WT_STAT_CONN_INCR(session, log_sync);
WT_STAT_CONN_INCRV(session,
@@ -1844,11 +1844,11 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
"/%" PRIu32,
log->log_dir_fh->name,
sync_lsn.l.file, sync_lsn.l.offset);
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
WT_ERR(__wt_fsync(session, log->log_dir_fh, true));
- time_stop = __wt_rdtsc(session);
+ time_stop = __wt_clock(session);
fsync_duration_usecs =
- WT_TSCDIFF_US(time_stop, time_start);
+ WT_CLOCKDIFF_US(time_stop, time_start);
log->sync_dir_lsn = sync_lsn;
WT_STAT_CONN_INCR(session, log_sync_dir);
WT_STAT_CONN_INCRV(session,
@@ -1866,11 +1866,11 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
log->log_fh->name,
sync_lsn.l.file, sync_lsn.l.offset);
WT_STAT_CONN_INCR(session, log_sync);
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
WT_ERR(__wt_fsync(session, log->log_fh, true));
- time_stop = __wt_rdtsc(session);
+ time_stop = __wt_clock(session);
fsync_duration_usecs =
- WT_TSCDIFF_US(time_stop, time_start);
+ WT_CLOCKDIFF_US(time_stop, time_start);
WT_STAT_CONN_INCRV(session,
log_sync_duration, fsync_duration_usecs);
log->sync_lsn = sync_lsn;
diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c
index 4a556913cdc..fc8181e2460 100644
--- a/src/third_party/wiredtiger/src/log/log_slot.c
+++ b/src/third_party/wiredtiger/src/log/log_slot.c
@@ -161,7 +161,7 @@ retry:
*/
#ifdef HAVE_DIAGNOSTIC
count = 0;
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
#endif
if (WT_LOG_SLOT_UNBUFFERED_ISSET(old_state)) {
while (slot->slot_unbuffered == 0) {
@@ -170,8 +170,8 @@ retry:
#ifdef HAVE_DIAGNOSTIC
++count;
if (count > WT_MILLION) {
- time_stop = __wt_rdtsc(session);
- if (WT_TSCDIFF_SEC(
+ time_stop = __wt_clock(session);
+ if (WT_CLOCKDIFF_SEC(
time_stop, time_start) > 10) {
__wt_errx(session, "SLOT_CLOSE: Slot %"
PRIu32 " Timeout unbuffered, state 0x%"
@@ -231,7 +231,7 @@ __log_slot_new(WT_SESSION_IMPL *session)
#ifdef HAVE_DIAGNOSTIC
count = 0;
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
#endif
/*
* Keep trying until we can find a free slot.
@@ -271,8 +271,8 @@ __log_slot_new(WT_SESSION_IMPL *session)
#ifdef HAVE_DIAGNOSTIC
++count;
if (count > WT_MILLION) {
- time_stop = __wt_rdtsc(session);
- if (WT_TSCDIFF_SEC(time_stop, time_start) > 10) {
+ time_stop = __wt_clock(session);
+ if (WT_CLOCKDIFF_SEC(time_stop, time_start) > 10) {
__wt_errx(session,
"SLOT_NEW: Timeout free slot");
__log_slot_dump(session);
@@ -577,7 +577,7 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize,
++wait_cnt;
}
if (!yielded)
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
yielded = true;
/*
* The slot is no longer open or we lost the race to
@@ -598,8 +598,8 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize,
WT_STAT_CONN_INCR(session, log_slot_immediate);
else {
WT_STAT_CONN_INCR(session, log_slot_yield);
- time_stop = __wt_rdtsc(session);
- usecs = WT_TSCDIFF_US(time_stop, time_start);
+ time_stop = __wt_clock(session);
+ usecs = WT_CLOCKDIFF_US(time_stop, time_start);
WT_STAT_CONN_INCRV(session, log_slot_yield_duration, usecs);
if (closed)
WT_STAT_CONN_INCR(session, log_slot_yield_close);
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_meta.c b/src/third_party/wiredtiger/src/lsm/lsm_meta.c
index 1337335ff5b..88daca989a6 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_meta.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_meta.c
@@ -282,17 +282,13 @@ __lsm_meta_read_v1(
WT_ERR(__wt_config_getones(session, lsmconf, "chunks", &cv));
__wt_config_subinit(session, &lparser, &cv);
for (nchunks = 0; (ret = __wt_config_next(&lparser, &lk, &lv)) == 0;) {
- if (WT_STRING_MATCH("generation", lk.str, lk.len)) {
+ if (WT_STRING_MATCH("id", lk.str, lk.len)) {
WT_ERR(__wt_realloc_def(session,
&lsm_tree->chunk_alloc,
nchunks + 1, &lsm_tree->chunk));
WT_ERR(__wt_calloc_one(session, &chunk));
lsm_tree->chunk[nchunks++] = chunk;
- chunk->generation = (uint32_t)lv.val;
- } else if (WT_STRING_MATCH("id", lk.str, lk.len)) {
chunk->id = (uint32_t)lv.val;
- WT_ERR(__wt_lsm_tree_chunk_name(session, lsm_tree,
- chunk->id, chunk->generation, &chunk->uri));
F_SET(chunk, WT_LSM_CHUNK_ONDISK | WT_LSM_CHUNK_STABLE);
} else if (WT_STRING_MATCH("bloom", lk.str, lk.len)) {
WT_ERR(__wt_lsm_tree_bloom_name(
@@ -302,6 +298,14 @@ __lsm_meta_read_v1(
chunk->size = (uint64_t)lv.val;
} else if (WT_STRING_MATCH("count", lk.str, lk.len)) {
chunk->count = (uint64_t)lv.val;
+ } else if (WT_STRING_MATCH("generation", lk.str, lk.len)) {
+ chunk->generation = (uint32_t)lv.val;
+ /*
+ * Id appears first, but we need both id and generation
+ * to create the name.
+ */
+ WT_ERR(__wt_lsm_tree_chunk_name(session, lsm_tree,
+ chunk->id, chunk->generation, &chunk->uri));
}
}
WT_ERR_NOTFOUND_OK(ret);
@@ -481,14 +485,10 @@ __wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree,
chunk = lsm_tree->chunk[i];
if (i > 0)
WT_ERR(__wt_buf_catfmt(session, buf, ","));
- /*
- * Note that we need the generation before the ID for custom
- * data sources, or the wrong URI will be generated.
- */
WT_ERR(__wt_buf_catfmt(
- session, buf, "generation=%" PRIu32, chunk->generation));
+ session, buf, "id=%" PRIu32, chunk->id));
WT_ERR(__wt_buf_catfmt(
- session, buf, ",id=%" PRIu32, chunk->id));
+ session, buf, ",generation=%" PRIu32, chunk->generation));
if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
WT_ERR(__wt_buf_catfmt(session, buf, ",bloom"));
if (chunk->size != 0)
diff --git a/src/third_party/wiredtiger/src/os_posix/os_dir.c b/src/third_party/wiredtiger/src/os_posix/os_dir.c
index a07577e6d38..2c2cb084a91 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_dir.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_dir.c
@@ -71,14 +71,12 @@ __directory_list_worker(WT_FILE_SYSTEM *file_system,
*dirlistp = entries;
*countp = count;
-err: if (dirp != NULL) {
- WT_SYSCALL(closedir(dirp), tret);
- if (tret != 0) {
- __wt_err(session, tret,
- "%s: directory-list: closedir", directory);
- if (ret == 0)
- ret = tret;
- }
+err: WT_SYSCALL(closedir(dirp), tret);
+ if (tret != 0) {
+ __wt_err(session, tret,
+ "%s: directory-list: closedir", directory);
+ if (ret == 0)
+ ret = tret;
}
if (ret == 0)
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index ae8f640f733..3ad6bdf41ea 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -799,9 +799,13 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
/*
* Create a new root page, initialize the array of child references,
* mark it dirty, then write it.
+ *
+ * Don't count the eviction of this page as progress, checkpoint can
+ * repeatedly create and discard these pages.
*/
WT_RET(__wt_page_alloc(session,
page->type, mod->mod_multi_entries, false, &next));
+ F_SET_ATOMIC(next, WT_PAGE_EVICT_NO_PROGRESS);
WT_INTL_INDEX_GET(session, next, pindex);
for (i = 0; i < mod->mod_multi_entries; ++i) {
@@ -1411,17 +1415,32 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
*updp = upd;
}
+ /* Keep track of the selected update. */
+ upd = *updp;
+
/* Reconciliation should never see an aborted or reserved update. */
- WT_ASSERT(session, *updp == NULL ||
- ((*updp)->txnid != WT_TXN_ABORTED &&
- (*updp)->type != WT_UPDATE_RESERVE));
+ WT_ASSERT(session, upd == NULL ||
+ (upd->txnid != WT_TXN_ABORTED && upd->type != WT_UPDATE_RESERVE));
/* If all of the updates were aborted, quit. */
if (first_txn_upd == NULL) {
- WT_ASSERT(session, *updp == NULL);
+ WT_ASSERT(session, upd == NULL);
return (0);
}
+ /* If no updates were skipped, record that we're making progress. */
+ if (upd == first_txn_upd)
+ r->update_used = true;
+
+ /*
+ * The checkpoint transaction is special. Make sure we never write
+ * metadata updates from a checkpoint in a concurrent session.
+ */
+ WT_ASSERT(session, !WT_IS_METADATA(session->dhandle) ||
+ upd == NULL || upd->txnid == WT_TXN_NONE ||
+ upd->txnid != S2C(session)->txn_global.checkpoint_state.id ||
+ WT_SESSION_IS_CHECKPOINT(session));
+
/*
* Track the most recent transaction in the page. We store this in the
* tree at the end of reconciliation in the service of checkpoints, it
@@ -1432,25 +1451,26 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
r->max_txn = max_txn;
#ifdef HAVE_TIMESTAMPS
+ /* Update the maximum timestamp. */
if (first_ts_upd != NULL &&
__wt_timestamp_cmp(&r->max_timestamp, &first_ts_upd->timestamp) < 0)
__wt_timestamp_set(&r->max_timestamp, &first_ts_upd->timestamp);
-#endif
- /*
- * The checkpoint transaction is special. Make sure we never write
- * metadata updates from a checkpoint in a concurrent session.
- */
- WT_ASSERT(session, !WT_IS_METADATA(session->dhandle) ||
- *updp == NULL || (*updp)->txnid == WT_TXN_NONE ||
- (*updp)->txnid != S2C(session)->txn_global.checkpoint_state.id ||
- WT_SESSION_IS_CHECKPOINT(session));
+ /* Update the maximum on-page timestamp. */
+ if (upd != NULL &&
+ __wt_timestamp_cmp(&upd->timestamp, &r->max_onpage_timestamp) > 0)
+ __wt_timestamp_set(&r->max_onpage_timestamp, &upd->timestamp);
+#endif
/*
- * If there are no skipped updates, record that we're making progress.
+ * If the update we chose was a birthmark, or we are doing
+ * update-restore and we skipped a birthmark, the original on-page
+ * value must be retained.
*/
- if (*updp == first_txn_upd)
- r->update_used = true;
+ if (upd != NULL &&
+ (upd->type == WT_UPDATE_BIRTHMARK ||
+ (F_ISSET(r, WT_REC_UPDATE_RESTORE) && skipped_birthmark)))
+ *updp = NULL;
/*
* Check if all updates on the page are visible. If not, it must stay
@@ -1465,40 +1485,20 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
#else
timestampp = NULL;
#endif
- all_visible = *updp == first_txn_upd && !uncommitted &&
+ all_visible = upd == first_txn_upd && !uncommitted &&
(F_ISSET(r, WT_REC_VISIBLE_ALL) ?
__wt_txn_visible_all(session, max_txn, timestampp) :
__wt_txn_visible(session, max_txn, timestampp));
- /*
- * If the update we chose was a birthmark, or doing update-restore and
- * we skipped a birthmark, the original on-page value must be retained.
- *
- * Update the maximum on-page timestamp before discarding the chosen
- * update.
- */
- if ((upd = *updp) != NULL) {
-#ifdef HAVE_TIMESTAMPS
- if (__wt_timestamp_cmp(
- &upd->timestamp, &r->max_onpage_timestamp) > 0)
- __wt_timestamp_set(
- &r->max_onpage_timestamp, &upd->timestamp);
-#endif
- if ((*updp)->type == WT_UPDATE_BIRTHMARK)
- *updp = NULL;
- if (F_ISSET(r, WT_REC_UPDATE_RESTORE) && skipped_birthmark)
- *updp = NULL;
- }
-
if (all_visible)
goto check_original_value;
+ r->leave_dirty = true;
+
if (F_ISSET(r, WT_REC_VISIBILITY_ERR))
WT_PANIC_RET(session, EINVAL,
"reconciliation error, update not visible");
- r->leave_dirty = true;
-
/*
* If not trying to evict the page, we know what we'll write and we're
* done.
@@ -1796,6 +1796,7 @@ __rec_child_modify(WT_SESSION_IMPL *session,
*/
break;
+ case WT_REF_LIMBO:
case WT_REF_LOOKASIDE:
/*
* On disk, with lookaside updates.
@@ -3429,16 +3430,18 @@ __rec_split_write_supd(WT_SESSION_IMPL *session,
r->supd_next = j;
}
-done: /* Track the oldest timestamp seen so far. */
- multi->page_las.las_skew_newest = r->las_skew_newest;
- multi->page_las.las_max_txn = r->max_txn;
- WT_ASSERT(session, r->max_txn != WT_TXN_NONE);
+done: if (F_ISSET(r, WT_REC_LOOKASIDE)) {
+ /* Track the oldest lookaside timestamp seen so far. */
+ multi->page_las.las_skew_newest = r->las_skew_newest;
+ multi->page_las.las_max_txn = r->max_txn;
+ WT_ASSERT(session, r->max_txn != WT_TXN_NONE);
#ifdef HAVE_TIMESTAMPS
- __wt_timestamp_set(
- &multi->page_las.min_timestamp, &r->min_saved_timestamp);
- __wt_timestamp_set(
- &multi->page_las.onpage_timestamp, &r->max_onpage_timestamp);
+ __wt_timestamp_set(&multi->page_las.min_timestamp,
+ &r->min_saved_timestamp);
+ __wt_timestamp_set(&multi->page_las.onpage_timestamp,
+ &r->max_onpage_timestamp);
#endif
+ }
err: __wt_scr_free(session, &key);
return (ret);
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index bbe5d2a0218..fd091cb5b13 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -1467,7 +1467,9 @@ __session_commit_transaction(WT_SESSION *wt_session, const char *config)
txn = &session->txn;
if (F_ISSET(txn, WT_TXN_ERROR) && txn->mod_count != 0)
WT_ERR_MSG(session, EINVAL,
- "failed transaction requires rollback");
+ "failed transaction requires rollback%s%s",
+ txn->rollback_reason == NULL ? "" : ": ",
+ txn->rollback_reason == NULL ? "" : txn->rollback_reason);
if (ret == 0)
ret = __wt_txn_commit(session, cfg);
@@ -1628,14 +1630,14 @@ __session_transaction_sync(WT_SESSION *wt_session, const char *config)
* Keep checking the LSNs until we find it is stable or we reach
* our timeout, or there's some other reason to quit.
*/
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
while (__wt_log_cmp(&session->bg_sync_lsn, &log->sync_lsn) > 0) {
if (!__transaction_sync_run_chk(session))
WT_ERR(ETIMEDOUT);
__wt_cond_signal(session, conn->log_file_cond);
- time_stop = __wt_rdtsc(session);
- waited_ms = WT_TSCDIFF_MS(time_stop, time_start);
+ time_stop = __wt_clock(session);
+ waited_ms = WT_CLOCKDIFF_MS(time_stop, time_start);
if (waited_ms < timeout_ms) {
remaining_usec = (timeout_ms - waited_ms) * WT_THOUSAND;
__wt_cond_wait(session, log->log_sync_cond,
diff --git a/src/third_party/wiredtiger/src/support/global.c b/src/third_party/wiredtiger/src/support/global.c
index 19786bb0974..d1271e0d427 100644
--- a/src/third_party/wiredtiger/src/support/global.c
+++ b/src/third_party/wiredtiger/src/support/global.c
@@ -72,10 +72,10 @@ __global_calibrate_ticks(void)
for (tries = 0; tries < 3; ++tries) {
/* This needs to be CPU intensive and large enough. */
__wt_epoch(NULL, &start);
- tsc_start = __wt_rdtsc(NULL);
+ tsc_start = __wt_rdtsc();
for (i = 0; i < 100 * WT_MILLION; i++)
;
- tsc_stop = __wt_rdtsc(NULL);
+ tsc_stop = __wt_rdtsc();
__wt_epoch(NULL, &stop);
diff_nsec = WT_TIMEDIFF_NS(stop, start);
diff_tsc = tsc_stop - tsc_start;
diff --git a/src/third_party/wiredtiger/src/support/hazard.c b/src/third_party/wiredtiger/src/support/hazard.c
index ac0b7f7de96..42148b068fb 100644
--- a/src/third_party/wiredtiger/src/support/hazard.c
+++ b/src/third_party/wiredtiger/src/support/hazard.c
@@ -84,7 +84,7 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp
* eviction and splits, we re-check it after a barrier to make sure
* we have a valid reference.
*/
- if (ref->state != WT_REF_MEM) {
+ if (ref->state != WT_REF_LIMBO && ref->state != WT_REF_MEM) {
*busyp = true;
return (0);
}
@@ -132,8 +132,8 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp
* Do the dance:
*
* The memory location which makes a page "real" is the WT_REF's state
- * of WT_REF_MEM, which can be set to WT_REF_LOCKED at any time by the
- * page eviction server.
+ * of WT_REF_LIMBO or WT_REF_MEM, which can be set to WT_REF_LOCKED
+ * at any time by the page eviction server.
*
* Add the WT_REF reference to the session's hazard list and flush the
* write, then see if the page's state is still valid. If so, we can
@@ -152,9 +152,9 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp
/*
* Check if the page state is still valid, where valid means a
- * state of WT_REF_MEM.
+ * state of WT_REF_LIMBO or WT_REF_MEM.
*/
- if (ref->state == WT_REF_MEM) {
+ if (ref->state == WT_REF_LIMBO || ref->state == WT_REF_MEM) {
++session->nhazard;
/*
diff --git a/src/third_party/wiredtiger/src/support/mtx_rw.c b/src/third_party/wiredtiger/src/support/mtx_rw.c
index 52c8004ecda..572592b9fbc 100644
--- a/src/third_party/wiredtiger/src/support/mtx_rw.c
+++ b/src/third_party/wiredtiger/src/support/mtx_rw.c
@@ -237,7 +237,7 @@ stall: __wt_cond_wait(session,
}
if (set_stats)
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
/* Wait for our group to start. */
for (pause_cnt = 0; ticket != l->u.s.current; pause_cnt++) {
if (pause_cnt < 1000)
@@ -252,13 +252,13 @@ stall: __wt_cond_wait(session,
}
}
if (set_stats) {
- time_stop = __wt_rdtsc(session);
+ time_stop = __wt_clock(session);
if (F_ISSET(session, WT_SESSION_INTERNAL))
stats[session->stat_bucket][l->stat_int_usecs_off] +=
- (int64_t)WT_TSCDIFF_US(time_stop, time_start);
+ (int64_t)WT_CLOCKDIFF_US(time_stop, time_start);
else
stats[session->stat_bucket][l->stat_app_usecs_off] +=
- (int64_t)WT_TSCDIFF_US(time_stop, time_start);
+ (int64_t)WT_CLOCKDIFF_US(time_stop, time_start);
}
/*
@@ -407,7 +407,7 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
* we have the lock.
*/
if (set_stats)
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
for (pause_cnt = 0, old.u.v = l->u.v;
ticket != old.u.s.current || old.u.s.readers_active != 0;
pause_cnt++, old.u.v = l->u.v) {
@@ -423,13 +423,13 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
}
}
if (set_stats) {
- time_stop = __wt_rdtsc(session);
+ time_stop = __wt_clock(session);
if (F_ISSET(session, WT_SESSION_INTERNAL))
stats[session->stat_bucket][l->stat_int_usecs_off] +=
- (int64_t)WT_TSCDIFF_US(time_stop, time_start);
+ (int64_t)WT_CLOCKDIFF_US(time_stop, time_start);
else
stats[session->stat_bucket][l->stat_app_usecs_off] +=
- (int64_t)WT_TSCDIFF_US(time_stop, time_start);
+ (int64_t)WT_CLOCKDIFF_US(time_stop, time_start);
}
/*
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 148e6bfd4d7..926176d6024 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -832,6 +832,8 @@ static const char * const __stats_connection_desc[] = {
"cache: pages queued for urgent eviction during walk",
"cache: pages read into cache",
"cache: pages read into cache requiring lookaside entries",
+ "cache: pages read into cache skipping older lookaside entries",
+ "cache: pages read into cache with skipped lookaside entries needed later",
"cache: pages requested from the cache",
"cache: pages seen by eviction walk",
"cache: pages selected for eviction unable to be evicted",
@@ -1210,6 +1212,8 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cache_eviction_pages_queued_oldest = 0;
stats->cache_read = 0;
stats->cache_read_lookaside = 0;
+ stats->cache_read_lookaside_skipped = 0;
+ stats->cache_read_lookaside_delay = 0;
stats->cache_pages_requested = 0;
stats->cache_eviction_pages_seen = 0;
stats->cache_eviction_fail = 0;
@@ -1624,6 +1628,10 @@ __wt_stat_connection_aggregate(
WT_STAT_READ(from, cache_eviction_pages_queued_oldest);
to->cache_read += WT_STAT_READ(from, cache_read);
to->cache_read_lookaside += WT_STAT_READ(from, cache_read_lookaside);
+ to->cache_read_lookaside_skipped +=
+ WT_STAT_READ(from, cache_read_lookaside_skipped);
+ to->cache_read_lookaside_delay +=
+ WT_STAT_READ(from, cache_read_lookaside_delay);
to->cache_pages_requested +=
WT_STAT_READ(from, cache_pages_requested);
to->cache_eviction_pages_seen +=
diff --git a/src/third_party/wiredtiger/src/support/time.c b/src/third_party/wiredtiger/src/support/time.c
index 34198508988..842b50fad09 100644
--- a/src/third_party/wiredtiger/src/support/time.c
+++ b/src/third_party/wiredtiger/src/support/time.c
@@ -73,13 +73,13 @@ __wt_seconds(WT_SESSION_IMPL *session, time_t *timep)
}
/*
- * __wt_tsc_to_nsec --
- * Convert from rdtsc ticks to nanoseconds.
+ * __wt_clock_to_nsec --
+ * Convert from clock ticks to nanoseconds.
*/
uint64_t
-__wt_tsc_to_nsec(uint64_t end, uint64_t begin)
+__wt_clock_to_nsec(uint64_t end, uint64_t begin)
{
- double tsc_diff;
+ double clock_diff;
/*
* If the ticks were reset, consider it an invalid check and just
@@ -88,20 +88,6 @@ __wt_tsc_to_nsec(uint64_t end, uint64_t begin)
*/
if (end < begin)
return (0);
- tsc_diff = (double)(end - begin);
- return ((uint64_t)(tsc_diff / __wt_process.tsc_nsec_ratio));
-}
-
-/*
- * __wt_tsc_get_expensive_timestamp --
- * Obtain a timestamp via a system call on platforms where obtaining it
- * directly from the hardware register is not supported.
- */
-uint64_t
-__wt_tsc_get_expensive_timestamp(WT_SESSION_IMPL *session)
-{
- struct timespec tsp;
-
- __wt_epoch(session, &tsp);
- return ((uint64_t)(tsp.tv_sec * WT_BILLION + tsp.tv_nsec));
+ clock_diff = (double)(end - begin);
+ return ((uint64_t)(clock_diff / __wt_process.tsc_nsec_ratio));
}
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 140731bcb54..6d1321b1a13 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -442,7 +442,7 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[])
#ifdef HAVE_TIMESTAMPS
wt_timestamp_t ts;
WT_TXN_GLOBAL *txn_global;
- char timestamp_buf[2 * WT_TIMESTAMP_SIZE + 1];
+ char hex_timestamp[2][2 * WT_TIMESTAMP_SIZE + 1];
bool round_to_oldest;
txn_global = &S2C(session)->txn_global;
@@ -460,11 +460,13 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[])
* avoid a race between checking and setting transaction
* timestamp.
*/
+ WT_RET(__wt_timestamp_to_hex_string(session,
+ hex_timestamp[0], &ts));
__wt_readlock(session, &txn_global->rwlock);
if (__wt_timestamp_cmp(&ts, &txn_global->oldest_timestamp) < 0)
{
WT_RET(__wt_timestamp_to_hex_string(session,
- timestamp_buf, &ts));
+ hex_timestamp[1], &txn_global->oldest_timestamp));
/*
* If given read timestamp is earlier than oldest
* timestamp then round the read timestamp to
@@ -476,8 +478,8 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[])
else {
__wt_readunlock(session, &txn_global->rwlock);
WT_RET_MSG(session, EINVAL, "read timestamp "
- "%s older than oldest timestamp",
- timestamp_buf);
+ "%s older than oldest timestamp %s",
+ hex_timestamp[0], hex_timestamp[1]);
}
} else {
__wt_timestamp_set(&txn->read_timestamp, &ts);
@@ -497,8 +499,8 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[])
* critical section.
*/
__wt_verbose(session, WT_VERB_TIMESTAMP, "Read "
- "timestamp %s : Rounded to oldest timestamp",
- timestamp_buf);
+ "timestamp %s : Rounded to oldest timestamp %s",
+ hex_timestamp[0], hex_timestamp[1]);
}
#else
WT_RET_MSG(session, EINVAL, "read_timestamp requires a "
@@ -592,10 +594,79 @@ __wt_txn_release(WT_SESSION_IMPL *session)
__wt_txn_release_snapshot(session);
txn->isolation = session->isolation;
+ txn->rollback_reason = NULL;
+
/* Ensure the transaction flags are cleared on exit */
txn->flags = 0;
}
+#ifdef HAVE_TIMESTAMPS
+/*
+ * __txn_commit_timestamp_validate --
+ * Validate that timestamp provided to commit is legal.
+ */
+static inline int
+__txn_commit_timestamp_validate(WT_SESSION_IMPL *session)
+{
+ WT_TXN *txn;
+ WT_TXN_OP *op;
+ WT_UPDATE *upd;
+ u_int i;
+ char timestamp_buf[2][2 * WT_TIMESTAMP_SIZE + 1];
+
+ txn = &session->txn;
+
+ /*
+ * Debugging checks on timestamps, if user requested them.
+ */
+ if (F_ISSET(txn, WT_TXN_TS_COMMIT_ALWAYS) &&
+ !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
+ txn->mod_count != 0)
+ WT_RET_MSG(session, EINVAL, "commit_timestamp required and "
+ "none set on this transaction");
+ if (F_ISSET(txn, WT_TXN_TS_COMMIT_NEVER) &&
+ F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
+ txn->mod_count != 0)
+ WT_RET_MSG(session, EINVAL, "no commit_timestamp required and "
+ "timestamp set on this transaction");
+
+ if (WT_VERBOSE_ISSET(session, WT_VERB_TIMESTAMP)) {
+ /*
+ * Error on any valid update structures for the same key that
+ * are at a later timestamp.
+ */
+ for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) {
+ if (op->type != WT_TXN_OP_BASIC_TS)
+ continue;
+ /*
+ * Skip over any aborted update structures.
+ */
+ upd = op->u.upd->next;
+ while (upd != NULL && upd->txnid == WT_TXN_ABORTED)
+ upd = upd->next;
+ /*
+ * Check the timestamp on this update with the
+ * first valid update in the chain. They're in
+ * most recent order.
+ */
+ if (upd != NULL &&
+ __wt_timestamp_cmp(&op->u.upd->timestamp,
+ &upd->timestamp) < 0) {
+ WT_RET(__wt_timestamp_to_hex_string(session,
+ timestamp_buf[0], &op->u.upd->timestamp));
+ WT_RET(__wt_timestamp_to_hex_string(session,
+ timestamp_buf[1], &upd->timestamp));
+ __wt_verbose(session, WT_VERB_TIMESTAMP,
+ "Timestamp %s on new update is older than "
+ "timestamp %s on existing update.",
+ timestamp_buf[0], timestamp_buf[1]);
+ }
+ }
+ }
+ return (0);
+}
+#endif
+
/*
* __wt_txn_commit --
* Commit the current transaction.
@@ -645,20 +716,9 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
}
#ifdef HAVE_TIMESTAMPS
- /*
- * Debugging checks on timestamps, if user requested them.
- */
- if (F_ISSET(txn, WT_TXN_TS_COMMIT_ALWAYS) &&
- !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
- txn->mod_count != 0)
- WT_ERR_MSG(session, EINVAL, "commit_timestamp required and "
- "none set on this transaction");
- if (F_ISSET(txn, WT_TXN_TS_COMMIT_NEVER) &&
- F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
- txn->mod_count != 0)
- WT_ERR_MSG(session, EINVAL, "no commit_timestamp required and "
- "timestamp set on this transaction");
+ WT_ERR(__txn_commit_timestamp_validate(session));
#endif
+
/*
* The default sync setting is inherited from the connection, but can
* be overridden by an explicit "sync" setting for this transaction.
@@ -940,6 +1000,18 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[])
}
/*
+ * __wt_txn_rollback_required --
+ * Prepare to log a reason if the user attempts to use the transaction to
+ * do anything other than rollback.
+ */
+int
+__wt_txn_rollback_required(WT_SESSION_IMPL *session, const char *reason)
+{
+ session->txn.rollback_reason = reason;
+ return (WT_ROLLBACK);
+}
+
+/*
* __wt_txn_init --
* Initialize a session's transaction data.
*/
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index 2e902a8db94..616816f0e8d 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -382,11 +382,11 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
cache = conn->cache;
/* Give up if scrubbing is disabled. */
- if (cache->eviction_checkpoint_target == 0 ||
+ if (cache->eviction_checkpoint_target < DBL_EPSILON ||
cache->eviction_checkpoint_target >= cache->eviction_dirty_trigger)
return;
- time_last = time_start = __wt_rdtsc(session);
+ time_last = time_start = __wt_clock(session);
bytes_written_last = 0;
bytes_written_start = cache->bytes_written;
cache_size = conn->cache_size;
@@ -436,7 +436,7 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
for (;;) {
current_dirty =
(100.0 * __wt_cache_dirty_leaf_inuse(cache)) / cache_size;
- if (current_dirty <= (double)cache->eviction_checkpoint_target)
+ if (current_dirty <= cache->eviction_checkpoint_target)
break;
/*
@@ -447,8 +447,8 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
break;
__wt_sleep(0, stepdown_us / 10);
- time_stop = __wt_rdtsc(session);
- current_us = WT_TSCDIFF_US(time_stop, time_last);
+ time_stop = __wt_clock(session);
+ current_us = WT_CLOCKDIFF_US(time_stop, time_last);
bytes_written_total =
cache->bytes_written - bytes_written_start;
@@ -502,11 +502,11 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
WT_MAX(cache->eviction_dirty_target, current_dirty - delta);
WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target,
cache->eviction_scrub_limit);
- time_last = __wt_rdtsc(session);
+ time_last = __wt_clock(session);
}
- time_stop = __wt_rdtsc(session);
- total_ms = WT_TSCDIFF_MS(time_stop, time_start);
+ time_stop = __wt_clock(session);
+ total_ms = WT_CLOCKDIFF_MS(time_stop, time_start);
WT_STAT_CONN_SET(session, txn_checkpoint_scrub_time, total_ms);
}
@@ -880,10 +880,10 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* Checkpoints have to hit disk (it would be reasonable to configure for
* lazy checkpoints, but we don't support them yet).
*/
- time_start = __wt_rdtsc(session);
+ time_start = __wt_clock(session);
WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync));
- time_stop = __wt_rdtsc(session);
- fsync_duration_usecs = WT_TSCDIFF_US(time_stop, time_start);
+ time_stop = __wt_clock(session);
+ fsync_duration_usecs = WT_CLOCKDIFF_US(time_stop, time_start);
WT_STAT_CONN_INCR(session, txn_checkpoint_fsync_post);
WT_STAT_CONN_SET(session,
txn_checkpoint_fsync_post_duration, fsync_duration_usecs);
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index 88b3bdb6693..0af70c4090d 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -251,22 +251,6 @@ __txn_abort_newer_updates(
}
/*
- * __txn_rollback_to_stable_custom_skip --
- * Return if custom rollback requires we read this page.
- */
-static int
-__txn_rollback_to_stable_custom_skip(
- WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool *skipp)
-{
- WT_UNUSED(context);
- WT_UNUSED(session);
-
- /* Review all pages that are in memory. */
- *skipp = !(ref->state == WT_REF_MEM || ref->state == WT_REF_DELETED);
- return (0);
-}
-
-/*
* __txn_rollback_to_stable_btree_walk --
* Called for each open handle - choose to either skip or wipe the commits
*/
@@ -275,22 +259,24 @@ __txn_rollback_to_stable_btree_walk(
WT_SESSION_IMPL *session, wt_timestamp_t *rollback_timestamp)
{
WT_DECL_RET;
- WT_PAGE *page;
WT_REF *ref;
/* Walk the tree, marking commits aborted where appropriate. */
ref = NULL;
- while ((ret = __wt_tree_walk_custom_skip(session, &ref,
- __txn_rollback_to_stable_custom_skip,
- NULL, WT_READ_NO_EVICT)) == 0 && ref != NULL) {
- page = ref->page;
+ while ((ret = __wt_tree_walk(session, &ref,
+ WT_READ_CACHE | WT_READ_LOOKASIDE | WT_READ_NO_EVICT)) == 0 &&
+ ref != NULL) {
+ if (ref->page_las != NULL &&
+ __wt_timestamp_cmp(rollback_timestamp,
+ &ref->page_las->onpage_timestamp) < 0)
+ ref->page_las->invalid = true;
/* Review deleted page saved to the ref */
if (ref->page_del != NULL && __wt_timestamp_cmp(
rollback_timestamp, &ref->page_del->timestamp) < 0)
__wt_delete_page_rollback(session, ref);
- if (!__wt_page_is_modified(page))
+ if (!__wt_page_is_modified(ref->page))
continue;
WT_RET(__txn_abort_newer_updates(
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index be771677a95..41ac970f14e 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -384,6 +384,7 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
WT_TXN_GLOBAL *txn_global;
wt_timestamp_t commit_ts, oldest_ts, stable_ts;
wt_timestamp_t last_oldest_ts, last_stable_ts;
+ char hex_timestamp[2][2 * WT_TIMESTAMP_SIZE + 1];
bool force;
txn_global = &S2C(session)->txn_global;
@@ -432,17 +433,25 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
if (has_commit && (has_oldest || txn_global->has_oldest_timestamp) &&
__wt_timestamp_cmp(&oldest_ts, &commit_ts) > 0) {
__wt_readunlock(session, &txn_global->rwlock);
+ WT_RET(__wt_timestamp_to_hex_string(
+ session, hex_timestamp[0], &oldest_ts));
+ WT_RET(__wt_timestamp_to_hex_string(
+ session, hex_timestamp[1], &commit_ts));
WT_RET_MSG(session, EINVAL,
- "set_timestamp: oldest timestamp must not be later than "
- "commit timestamp");
+ "set_timestamp: oldest timestamp %s must not be later than "
+ "commit timestamp %s", hex_timestamp[0], hex_timestamp[1]);
}
if (has_commit && (has_stable || txn_global->has_stable_timestamp) &&
__wt_timestamp_cmp(&stable_ts, &commit_ts) > 0) {
__wt_readunlock(session, &txn_global->rwlock);
+ WT_RET(__wt_timestamp_to_hex_string(
+ session, hex_timestamp[0], &stable_ts));
+ WT_RET(__wt_timestamp_to_hex_string(
+ session, hex_timestamp[1], &commit_ts));
WT_RET_MSG(session, EINVAL,
- "set_timestamp: stable timestamp must not be later than "
- "commit timestamp");
+ "set_timestamp: stable timestamp %s must not be later than "
+ "commit timestamp %s", hex_timestamp[0], hex_timestamp[1]);
}
/*
@@ -454,9 +463,13 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
(has_stable || txn_global->has_stable_timestamp) &&
__wt_timestamp_cmp(&oldest_ts, &stable_ts) > 0) {
__wt_readunlock(session, &txn_global->rwlock);
+ WT_RET(__wt_timestamp_to_hex_string(
+ session, hex_timestamp[0], &oldest_ts));
+ WT_RET(__wt_timestamp_to_hex_string(
+ session, hex_timestamp[1], &stable_ts));
WT_RET_MSG(session, EINVAL,
- "set_timestamp: oldest timestamp must not be later than "
- "stable timestamp");
+ "set_timestamp: oldest timestamp %s must not be later than "
+ "stable timestamp %s", hex_timestamp[0], hex_timestamp[1]);
}
__wt_readunlock(session, &txn_global->rwlock);
@@ -538,29 +551,41 @@ __wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name,
{
WT_TXN *txn = &session->txn;
WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global;
+ wt_timestamp_t oldest_ts, stable_ts;
char hex_timestamp[2 * WT_TIMESTAMP_SIZE + 1];
- bool older_than_oldest_ts, older_than_stable_ts;
+ bool has_oldest_ts, has_stable_ts;
/*
+ * Added this redundant initialization to circumvent build failure.
+ */
+ __wt_timestamp_set_zero(&oldest_ts);
+ __wt_timestamp_set_zero(&stable_ts);
+ /*
* Compare against the oldest and the stable timestamp. Return an error
* if the given timestamp is older than oldest and/or stable timestamp.
*/
WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
- older_than_oldest_ts =
- (cmp_oldest && txn_global->has_oldest_timestamp &&
- __wt_timestamp_cmp(ts, &txn_global->oldest_timestamp) < 0);
- older_than_stable_ts = (cmp_stable &&
- txn_global->has_stable_timestamp &&
- __wt_timestamp_cmp(ts, &txn_global->stable_timestamp) < 0));
-
- if (older_than_oldest_ts)
+ if ((has_oldest_ts = txn_global->has_oldest_timestamp))
+ __wt_timestamp_set(&oldest_ts, &txn_global->oldest_timestamp);
+ if ((has_stable_ts = txn_global->has_stable_timestamp))
+ __wt_timestamp_set(&stable_ts, &txn_global->stable_timestamp));
+
+ if (cmp_oldest && has_oldest_ts &&
+ __wt_timestamp_cmp(ts, &oldest_ts) < 0) {
+ WT_RET(__wt_timestamp_to_hex_string(session, hex_timestamp,
+ &oldest_ts));
WT_RET_MSG(session, EINVAL,
- "%s timestamp %.*s older than oldest timestamp",
- name, (int)cval->len, cval->str);
- if (older_than_stable_ts)
+ "%s timestamp %.*s older than oldest timestamp %s",
+ name, (int)cval->len, cval->str, hex_timestamp);
+ }
+ if (cmp_stable && has_stable_ts &&
+ __wt_timestamp_cmp(ts, &stable_ts) < 0) {
+ WT_RET(__wt_timestamp_to_hex_string(session, hex_timestamp,
+ &stable_ts));
WT_RET_MSG(session, EINVAL,
- "%s timestamp %.*s older than stable timestamp",
- name, (int)cval->len, cval->str);
+ "%s timestamp %.*s older than stable timestamp %s",
+ name, (int)cval->len, cval->str, hex_timestamp);
+ }
/*
* Compare against the commit timestamp of the current transaction.
diff --git a/src/third_party/wiredtiger/test/csuite/random_abort/main.c b/src/third_party/wiredtiger/test/csuite/random_abort/main.c
index e98c0474582..e99ed5ecd4d 100644
--- a/src/third_party/wiredtiger/test/csuite/random_abort/main.c
+++ b/src/third_party/wiredtiger/test/csuite/random_abort/main.c
@@ -391,9 +391,13 @@ main(int argc, char *argv[])
*/
for (last_key = UINT64_MAX;; ++count, last_key = key) {
ret = fscanf(fp, "%" SCNu64 "\n", &key);
- if (ret != EOF && ret != 1)
- testutil_die(errno, "fscanf");
- if (ret == EOF)
+ /*
+ * Consider anything other than clear success in
+ * getting the key to be EOF. We've seen file system
+ * issues where the file ends with zeroes on a 4K
+ * boundary and does not return EOF but a ret of zero.
+ */
+ if (ret != 1)
break;
/*
* If we're unlucky, the last line may be a partially
diff --git a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
index acd5743ebe4..2cf9a69110c 100644
--- a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
+++ b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
@@ -655,7 +655,8 @@ main(int argc, char *argv[])
"rm -rf ../%s.SAVE && mkdir ../%s.SAVE && "
"cp -p WiredTigerLog.* ../%s.SAVE",
home, home, home));
- (void)system(buf);
+ if ((status = system(buf)) < 0)
+ testutil_die(status, "system: %s", buf);
printf("Open database, run recovery and verify content\n");
/*
diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h
index ba5774e8a6a..565df91d46b 100644
--- a/src/third_party/wiredtiger/test/format/config.h
+++ b/src/third_party/wiredtiger/test/format/config.h
@@ -103,7 +103,7 @@ static CONFIG c[] = {
{ "cache_minimum",
"minimum size of the cache in MB",
- C_IGNORE, 1, 0, 100 * 1024, &g.c_cache_minimum, NULL },
+ C_IGNORE, 0, 0, 100 * 1024, &g.c_cache_minimum, NULL },
{ "checkpoints",
"type of checkpoints (on | off | wiredtiger)",
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index a5493321d3c..671582dcb16 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -492,6 +492,12 @@ commit_transaction(TINFO *tinfo, WT_SESSION *session)
char config_buf[64];
if (g.c_txn_timestamps) {
+ /*
+ * Update the thread's active timestamp with the current value
+ * to prevent the oldest timestamp moving past our allocated
+ * timestamp before the commit completes.
+ */
+ tinfo->timestamp = g.timestamp;
ts = __wt_atomic_addv64(&g.timestamp, 1);
testutil_check(__wt_snprintf(
config_buf, sizeof(config_buf),
@@ -500,12 +506,12 @@ commit_transaction(TINFO *tinfo, WT_SESSION *session)
session->commit_transaction(session, config_buf));
/*
- * Update the thread's last-committed timestamp. Don't let the
- * compiler re-order this statement, if we were to race with
- * the timestamp thread, it might see our thread update before
- * the transaction commit.
+ * Clear the thread's active timestamp: it no longer needs to
+ * be pinned. Don't let the compiler re-order this statement,
+ * if we were to race with the timestamp thread, it might see
+ * our thread update before the transaction commit.
*/
- WT_PUBLISH(tinfo->timestamp, ts);
+ WT_PUBLISH(tinfo->timestamp, 0);
} else
testutil_check(session->commit_transaction(session, NULL));
++tinfo->commit;
diff --git a/src/third_party/wiredtiger/test/format/util.c b/src/third_party/wiredtiger/test/format/util.c
index c21e58f84e4..b8343fee1d6 100644
--- a/src/third_party/wiredtiger/test/format/util.c
+++ b/src/third_party/wiredtiger/test/format/util.c
@@ -611,7 +611,7 @@ timestamp(void *arg)
* Find the lowest committed timestamp. The timestamp thread
* starts before the operational threads, wait for them.
*/
- oldest_timestamp = UINT64_MAX;
+ oldest_timestamp = g.timestamp;
for (i = 0; i < g.c_threads; ++i) {
tinfo = tinfo_list[i];
this_ts = tinfo->timestamp;
@@ -619,14 +619,10 @@ timestamp(void *arg)
this_ts < oldest_timestamp)
oldest_timestamp = this_ts;
}
- if (oldest_timestamp == UINT64_MAX) {
- __wt_sleep(1, 0);
- continue;
- }
/*
- * Don't get more than 100 transactions or more than 15 seconds
- * out of date.
+ * If less than 100 transactions out of date, wait up to 15
+ * seconds before updating.
*/
WT_READ_BARRIER();
testutil_assert(oldest_timestamp <= g.timestamp);
@@ -642,6 +638,7 @@ timestamp(void *arg)
config_buf, sizeof(config_buf),
"oldest_timestamp=%" PRIx64, oldest_timestamp));
testutil_check(conn->set_timestamp(conn, config_buf));
+ __wt_seconds((WT_SESSION_IMPL *)session, &last);
usecs = mmrand(NULL, 5, 40);
__wt_sleep(0, usecs);
diff --git a/src/third_party/wiredtiger/test/packing/intpack-test.c b/src/third_party/wiredtiger/test/packing/intpack-test.c
index 4f6b7143108..7bc3f1f519b 100644
--- a/src/third_party/wiredtiger/test/packing/intpack-test.c
+++ b/src/third_party/wiredtiger/test/packing/intpack-test.c
@@ -40,6 +40,12 @@ main(void)
memset(buf, 0xff, sizeof(buf)); /* -Werror=maybe-uninitialized */
+ /*
+ * Required on some systems to pull in parts of the library
+ * for which we have data references.
+ */
+ testutil_check(__wt_library_init());
+
for (ncalls = 0, i = 0; i < 10000000; i++) {
for (s = 0; s < 50; s += 5) {
++ncalls;
diff --git a/src/third_party/wiredtiger/test/packing/intpack-test2.c b/src/third_party/wiredtiger/test/packing/intpack-test2.c
index 1be6e78751c..b1f4b8756e7 100644
--- a/src/third_party/wiredtiger/test/packing/intpack-test2.c
+++ b/src/third_party/wiredtiger/test/packing/intpack-test2.c
@@ -38,6 +38,12 @@ main(void)
memset(buf, 0xff, sizeof(buf)); /* -Werror=maybe-uninitialized */
+ /*
+ * Required on some systems to pull in parts of the library
+ * for which we have data references.
+ */
+ testutil_check(__wt_library_init());
+
for (i = 1; i < 1LL << 60; i <<= 1) {
end = buf;
testutil_check(
diff --git a/src/third_party/wiredtiger/test/packing/intpack-test3.c b/src/third_party/wiredtiger/test/packing/intpack-test3.c
index d327c21a738..8076ca5cd52 100644
--- a/src/third_party/wiredtiger/test/packing/intpack-test3.c
+++ b/src/third_party/wiredtiger/test/packing/intpack-test3.c
@@ -45,6 +45,12 @@ test_value(int64_t val)
sinput = val;
soutput = 0; /* -Werror=maybe-uninitialized */
+ /*
+ * Required on some systems to pull in parts of the library
+ * for which we have data references.
+ */
+ testutil_check(__wt_library_init());
+
p = buf;
testutil_check(__wt_vpack_int(&p, sizeof(buf), sinput));
used_len = (size_t)(p - buf);
diff --git a/src/third_party/wiredtiger/test/packing/packing-test.c b/src/third_party/wiredtiger/test/packing/packing-test.c
index 7451aefc494..89946c4a64d 100644
--- a/src/third_party/wiredtiger/test/packing/packing-test.c
+++ b/src/third_party/wiredtiger/test/packing/packing-test.c
@@ -58,6 +58,12 @@ check(const char *fmt, ...)
int
main(void)
{
+ /*
+ * Required on some systems to pull in parts of the library
+ * for which we have data references.
+ */
+ testutil_check(__wt_library_init());
+
check("iii", 0, 101, -99);
check("3i", 0, 101, -99);
check("iS", 42, "forty two");
diff --git a/src/third_party/wiredtiger/test/suite/test_compact02.py b/src/third_party/wiredtiger/test/suite/test_compact02.py
index bb53ea06288..ffa05fb92db 100644
--- a/src/third_party/wiredtiger/test/suite/test_compact02.py
+++ b/src/third_party/wiredtiger/test/suite/test_compact02.py
@@ -146,12 +146,14 @@ class test_compact02(wttest.WiredTigerTestCase):
self.session.checkpoint()
# 5. Call compact.
- # Compact can collide with eviction, if that happens we retry.
- for i in range(1, 5):
+ # Compact can collide with eviction, if that happens we retry. Wait for
+ # up to a minute, the check for EBUSY should mean we're not retrying on
+ # real errors.
+ for i in range(1, 15):
if not self.raisesBusy(
lambda: self.session.compact(self.uri, None)):
break
- time.sleep(2)
+ time.sleep(4)
# 6. Get stats on compacted table.
sz = self.getSize()
diff --git a/src/third_party/wiredtiger/test/suite/test_config04.py b/src/third_party/wiredtiger/test/suite/test_config04.py
index 11a36c2a5d2..c3d7e3b8f49 100644
--- a/src/third_party/wiredtiger/test/suite/test_config04.py
+++ b/src/third_party/wiredtiger/test/suite/test_config04.py
@@ -140,7 +140,6 @@ class test_config04(wttest.WiredTigerTestCase):
def test_eviction(self):
self.common_test('eviction_target=84,eviction_trigger=94')
- # Note
def test_eviction_bad(self):
self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:
@@ -154,6 +153,68 @@ class test_config04(wttest.WiredTigerTestCase):
'eviction_trigger=86'),
"/eviction target must be lower than the eviction trigger/")
+ def test_eviction_absolute(self):
+ self.common_test('eviction_target=50MB,eviction_trigger=60MB,'
+ 'eviction_dirty_target=20MB,eviction_dirty_trigger=15MB,'
+ 'eviction_checkpoint_target=13MB')
+
+ def test_eviction_abs_and_pct(self):
+ self.common_test('eviction_target=50,eviction_trigger=60MB,'
+ 'eviction_dirty_target=20,eviction_dirty_trigger=15MB')
+
+ def test_eviction_abs_less_than_one_pct(self):
+ self.wiredtiger_open('.','create,cache_size=8GB,eviction_target=70MB,'
+ 'eviction_trigger=75MB')
+
+ # Test that eviction_target must be lower than eviction_trigger
+ def test_eviction_absolute_bad(self):
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:
+ self.wiredtiger_open('.','create,eviction_target=70MB,'
+ 'eviction_trigger=60MB'),
+ '/eviction target must be lower than the eviction trigger/')
+
+ def test_eviction_abs_and_pct_bad(self):
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:
+ self.wiredtiger_open('.','create,eviction_target=50,'
+ 'eviction_trigger=40MB'),
+ '/eviction target must be lower than the eviction trigger/')
+
+ def test_eviction_abs_and_pct_bad2(self):
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:
+ self.wiredtiger_open('.','create,eviction_target=50MB,'
+ 'eviction_trigger=40'),
+ '/eviction target must be lower than the eviction trigger/')
+
+ def test_eviction_tgt_abs_too_large(self):
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:
+ self.wiredtiger_open('.','create,cache_size=500MB,'
+ 'eviction_target=1G'),
+ '/eviction target should not exceed cache size/')
+
+ def test_eviction_trigger_abs_too_large(self):
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:
+ self.wiredtiger_open('.','create,cache_size=500MB,'
+ 'eviction_trigger=1G'),
+ '/eviction trigger should not exceed cache size/')
+
+ def test_eviction_dirty_tgt_abs_too_large(self):
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:
+ self.wiredtiger_open('.','create,cache_size=500MB,'
+ 'eviction_dirty_target=1G'),
+ '/eviction dirty target should not exceed cache size/')
+
+ def test_eviction_dirty_trigggr_abs_too_large(self):
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:
+ self.wiredtiger_open('.','create,cache_size=500MB,'
+ 'eviction_dirty_trigger=1G'),
+ '/eviction dirty trigger should not exceed cache size/')
+
+ def test_eviction_checkpoint_tgt_abs_too_large(self):
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:
+ self.wiredtiger_open('.','create,cache_size=500MB,'
+ 'eviction_checkpoint_target=1G'),
+ '/eviction checkpoint target should not exceed cache size/')
+
def test_invalid_config(self):
msg = '/Unbalanced brackets/'
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
diff --git a/src/third_party/wiredtiger/test/suite/test_reconfig01.py b/src/third_party/wiredtiger/test/suite/test_reconfig01.py
index 9c34f96c13e..fc78ea709a4 100644
--- a/src/third_party/wiredtiger/test/suite/test_reconfig01.py
+++ b/src/third_party/wiredtiger/test/suite/test_reconfig01.py
@@ -75,6 +75,13 @@ class test_reconfig01(wttest.WiredTigerTestCase):
self.conn.reconfigure("eviction=(threads_min=2)")
# Set min and max the same.
self.conn.reconfigure("eviction=(threads_min=6,threads_max=6)")
+ # Set target and trigger with an absolute value.
+ self.conn.reconfigure("eviction_target=50M,eviction_trigger=100M")
+ # Set dirty target and trigger with an absolute value
+ self.conn.reconfigure("eviction_dirty_target=20M,"
+ "eviction_dirty_trigger=40M")
+ # Set eviction checkpoint target with an absolute value
+ self.conn.reconfigure("eviction_checkpoint_target=50M")
def test_reconfig_lsm_manager(self):
# We create and populate a tiny LSM so that we can start off with
diff --git a/src/third_party/wiredtiger/test/suite/test_shared_cache01.py b/src/third_party/wiredtiger/test/suite/test_shared_cache01.py
index b6ed2289639..9ebdd5093fc 100644
--- a/src/third_party/wiredtiger/test/suite/test_shared_cache01.py
+++ b/src/third_party/wiredtiger/test/suite/test_shared_cache01.py
@@ -158,6 +158,39 @@ class test_shared_cache01(wttest.WiredTigerTestCase):
self.add_records(sess, 0, nops)
self.closeConnections()
+ # Opening a connection with absolute values for eviction config should fail
+ def test_shared_cache_absolute_evict_config(self):
+ nops = 1000
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.openConnections(['WT_TEST1', 'WT_TEST2'],
+ pool_opts = ',shared_cache=(name=pool,size=50M,reserve=20M),'
+ 'eviction_target=10M,'), '/Shared cache configuration requires a '
+ 'percentage value for eviction target/')
+
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.openConnections(['WT_TEST1', 'WT_TEST2'],
+ pool_opts = ',shared_cache=(name=pool,size=50M,reserve=20M),'
+ 'eviction_trigger=10M,'), '/Shared cache configuration requires a '
+ 'percentage value for eviction trigger/')
+
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.openConnections(['WT_TEST1', 'WT_TEST2'],
+ pool_opts = ',shared_cache=(name=pool,size=50M,reserve=20M),'
+ 'eviction_dirty_target=10M,'), '/Shared cache configuration '
+ 'requires a percentage value for eviction dirty target/')
+
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.openConnections(['WT_TEST1', 'WT_TEST2'],
+ pool_opts = ',shared_cache=(name=pool,size=50M,reserve=20M),'
+ 'eviction_dirty_trigger=10M,'), '/Shared cache configuration '
+ 'requires a percentage value for eviction dirty trigger/')
+
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.openConnections(['WT_TEST1', 'WT_TEST2'],
+ pool_opts = ',shared_cache=(name=pool,size=50M,reserve=20M),'
+ 'eviction_checkpoint_target=10M,'), '/Shared cache configuration '
+ 'requires a percentage value for eviction checkpoint target/')
+
# Test verbose output
@unittest.skip("Verbose output handling")
def test_shared_cache_verbose(self):
diff --git a/src/third_party/wiredtiger/test/suite/test_shared_cache02.py b/src/third_party/wiredtiger/test/suite/test_shared_cache02.py
index 05f080b3323..3d5b29f1969 100644
--- a/src/third_party/wiredtiger/test/suite/test_shared_cache02.py
+++ b/src/third_party/wiredtiger/test/suite/test_shared_cache02.py
@@ -162,5 +162,35 @@ class test_shared_cache02(wttest.WiredTigerTestCase):
self.closeConnections()
+ # Test reconfigure with absolute value for eviction config fails
+ def test_shared_cache_reconfig04(self):
+ nops = 1000
+ self.openConnections(['WT_TEST1', 'WT_TEST2'],
+ pool_opts = ',shared_cache=(name=pool,size=50M,reserve=20M),')
+
+ for sess in self.sessions:
+ sess.create(self.uri, "key_format=S,value_format=S")
+ self.add_records(sess, 0, nops)
+
+ connection = self.conns[0]
+ # Reconfiguring with absolute value of eviction trigger should fail.
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: connection.reconfigure("shared_cache=(name=pool,"
+ "size=20M,reserve=10M),eviction_trigger=10M"),'/Shared cache '
+ 'configuration requires a percentage value for eviction trigger/')
+
+ connection = self.conns[1]
+ # Reconfiguring with absolute value for eviction target should fail.
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: connection.reconfigure("shared_cache=(name=pool,"
+ "size=20M,reserve=10M),eviction_target=10M"),'/Shared cache '
+ 'configuration requires a percentage value for eviction target/')
+
+ # Reconfigure with percentage value for eviction target passes
+ self.conns[0].reconfigure("shared_cache=(name=pool,reserve=20M),"
+ "eviction_target=50")
+
+ self.closeConnections()
+
if __name__ == '__main__':
wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp04.py b/src/third_party/wiredtiger/test/suite/test_timestamp04.py
index 17aea80a1ee..48ec7fac9a6 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp04.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp04.py
@@ -27,7 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
#
# test_timestamp04.py
-# Timestamps: Test that rollback_to_stable obeys expected visibility rules
+# Timestamps: Test that rollback_to_stable obeys expected visibility rules.
#
from suite_subprocess import suite_subprocess
@@ -49,7 +49,7 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
('V2', dict(conn_config=',log=(enabled)', using_log=True)),
]
- # Minimum cache_size requirement of lsm is 31MB
+ # Minimum cache_size requirement of lsm is 31MB.
types = [
('col_fix', dict(empty=1, cacheSize='cache_size=20MB', extra_config=',key_format=r,value_format=8t')),
('col_var', dict(empty=0, cacheSize='cache_size=20MB', extra_config=',key_format=r')),
@@ -68,14 +68,12 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
cur = session.open_cursor(tablename, None)
if missing == False:
actual = dict((k, v) for k, v in cur if v != 0)
- if prn == True:
- print "CHECK : Expected"
- print expected
- print "CHECK : Actual"
- print actual
+ if actual != expected:
+ print "missing: ", sorted(set(expected) - set(actual))
+ print "extras: ", sorted(set(actual) - set(expected))
self.assertTrue(actual == expected)
- # Search for the expected items as well as iterating
+ # Search for the expected items as well as iterating.
for k, v in expected.iteritems():
if missing == False:
self.assertEqual(cur[k], v, "for key " + str(k))
@@ -114,7 +112,7 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
self.ConnectionOpen(self.cacheSize)
# Configure small page sizes to ensure eviction comes through and we
- # have a somewhat complex tree
+ # have a somewhat complex tree.
config_default = 'key_format=i,value_format=i,memory_page_max=32k,leaf_page_max=8k,internal_page_max=8k'
config_nolog = ',log=(enabled=false)'
#
@@ -133,7 +131,7 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
self.session.create(self.table_nots_nolog, config_default + config_nolog + self.extra_config)
cur_nots_nolog = self.session.open_cursor(self.table_nots_nolog)
- # Insert keys each with timestamp=key, in some order
+ # Insert keys each with timestamp=key, in some order.
key_range = 10000
keys = range(1, key_range + 1)
@@ -168,41 +166,41 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
self.conn.rollback_to_stable()
# Check that we see the inserted value (i.e. 1) for all the keys in
- # non-timestamp tables
+ # non-timestamp tables.
self.check(self.session, 'read_timestamp=' + latest_ts,
self.table_nots_log, dict((k, 1) for k in keys[:]))
self.check(self.session, 'read_timestamp=' + latest_ts,
self.table_nots_nolog, dict((k, 1) for k in keys[:]))
# For non-logged tables the behavior is consistent across connections
- # with or without log enabled
+ # with or without log enabled.
# Check that we see the inserted value (i.e. 1) for the keys in a
- # timestamp table till the stable_timestamp only.
+ # timestamped table until the stable_timestamp only.
self.check(self.session, 'read_timestamp=' + latest_ts,
self.table_ts_nolog, dict((k, 1) for k in keys[:(key_range / 2)]))
self.check(self.session, 'read_timestamp=' + latest_ts,
self.table_ts_nolog, dict((k, 1) for k in keys[(key_range / 2 + 1):]), missing=True)
- # For logged tables behavior changes for rollback_to_stable based on
+ # For logged tables, the behavior of rollback_to_stable changes based on
# whether connection level logging is enabled or not.
if self.using_log == True:
- # When log is enabled, none of the keys will be rolled back.
- # Check that we see all the keys
+ # When the log is enabled, none of the keys will be rolled back.
+ # Check that we see all the keys.
self.check(self.session, 'read_timestamp=' + latest_ts,
self.table_ts_log, dict((k, 1) for k in keys[:]))
else:
- # When log is disabled, keys will be rolled back till stable_timestamp
- # Check that we see the insertions are rolled back in timestamp tables
- # till the stable_timestamp
+ # When the log is disabled, the keys will be rolled back until stable_timestamp.
+ # Check that we see the insertions are rolled back in timestamped tables
+ # until the stable_timestamp.
self.check(self.session, 'read_timestamp=' + latest_ts,
self.table_ts_log, dict((k, 1) for k in keys[:(key_range / 2)]))
self.check(self.session, 'read_timestamp=' + latest_ts,
self.table_ts_log, dict((k, 1) for k in keys[(key_range / 2 + 1):]), missing=True)
- # Bump the oldest timestamp, we're not going back...
+ # Bump the oldest timestamp, we're not going back.
self.conn.set_timestamp('oldest_timestamp=' + stable_ts)
- # Update the values again in preparation for rolling back more
+ # Update the values again in preparation for rolling back more.
for k in keys:
cur_nots_log[k] = 2
cur_nots_nolog[k] = 2
@@ -212,7 +210,7 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
self.session.commit_transaction('commit_timestamp=' + timestamp_str(k + key_range))
# Scenario: 3
- # Check that we see all values updated (i.e 2) in all tables
+ # Check that we see all values updated (i.e 2) in all tables.
latest_ts = timestamp_str(2 * key_range)
self.check(self.session, 'read_timestamp=' + latest_ts,
self.table_nots_log, dict((k, 2) for k in keys[:]))
@@ -225,20 +223,20 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
# Scenario: 4
# Advance the stable_timestamp by a quarter range and rollback.
- # three-quarter timestamps will be rolled back.
+ # Three-fourths of the later timestamps will be rolled back.
stable_ts = timestamp_str(key_range + key_range / 4)
self.conn.set_timestamp('stable_timestamp=' + stable_ts)
self.conn.rollback_to_stable()
# Check that we see the updated value (i.e. 2) for all the keys in
- # non-timestamp tables
+ # non-timestamped tables.
self.check(self.session, 'read_timestamp=' + latest_ts,
self.table_nots_log, dict((k, 2) for k in keys[:]))
self.check(self.session, 'read_timestamp=' + latest_ts,
self.table_nots_nolog, dict((k, 2) for k in keys[:]))
# For non-logged tables the behavior is consistent across connections
- # with or without log enabled
- # Check that we see only half key ranges in timestamp tables. we see
+ # with or without log enabled.
+ # Check that we see only half key ranges in timestamp tables. We see
# the updated value (i.e. 2) for the first quarter keys and old values
# (i.e. 1) for the second quarter keys.
self.check(self.session, 'read_timestamp=' + latest_ts,
@@ -251,12 +249,12 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
# whether connection level logging is enabled or not.
if self.using_log == True:
# When log is enabled, none of the keys will be rolled back.
- # Check that we see all the keys
+ # Check that we see all the keys.
self.check(self.session, 'read_timestamp=' + latest_ts,
self.table_ts_log, dict((k, 2) for k in keys[:]))
else:
- # When log is disabled, keys will be rolled back till stable_timestamp
- # Check that we see only half key ranges in timestamp tables. we see
+ # When log is disabled, keys will be rolled back until the stable_timestamp.
+ # Check that we see only half the key ranges in timestamped tables. We see
# the updated value (i.e. 2) for the first quarter keys and old values
# (i.e. 1) for the second quarter keys.
self.check(self.session, 'read_timestamp=' + latest_ts,
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp09.py b/src/third_party/wiredtiger/test/suite/test_timestamp09.py
index 5000eb4e854..9b7d88bf64e 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp09.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp09.py
@@ -109,7 +109,7 @@ class test_timestamp09(wttest.WiredTigerTestCase, suite_subprocess):
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.conn.set_timestamp('oldest_timestamp=' +
timestamp_str(3) + ',stable_timestamp=' + timestamp_str(1)),
- '/oldest timestamp must not be later than stable timestamp/')
+ '/oldest timestamp 0*3 must not be later than stable timestamp 0*1/')
# Oldest timestamp is 3 at the moment, trying to set it to an earlier
# timestamp is a no-op.
@@ -128,7 +128,7 @@ class test_timestamp09(wttest.WiredTigerTestCase, suite_subprocess):
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.conn.set_timestamp('oldest_timestamp=' +
timestamp_str(6)),
- '/oldest timestamp must not be later than stable timestamp/')
+ '/oldest timestamp 0*6 must not be later than stable timestamp 0*5/')
# Commit timestamp >= Stable timestamp.
# Check both timestamp_transaction and commit_transaction API.
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp10.py b/src/third_party/wiredtiger/test/suite/test_timestamp10.py
new file mode 100644
index 00000000000..de928b34220
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp10.py
@@ -0,0 +1,162 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2018 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_timestamp10.py
+# Timestamps: timestamp ordering
+#
+
+from suite_subprocess import suite_subprocess
+import wiredtiger, wttest
+
+def timestamp_str(t):
+ return '%x' % t
+
+class test_timestamp10(wttest.WiredTigerTestCase, suite_subprocess):
+ conn_config = 'verbose=[timestamp]'
+ def test_timestamp_range(self):
+ if not wiredtiger.timestamp_build() or not wiredtiger.diagnostic_build():
+ self.skipTest('requires a timestamp and diagnostic build')
+
+ base = 'timestamp10'
+ uri = 'file:' + base
+ # Create a data item at a timestamp
+ self.session.create(uri, 'key_format=S,value_format=S')
+
+ # Insert a data item at timestamp 2
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ self.session.timestamp_transaction(
+ 'commit_timestamp=' + timestamp_str(2))
+ c['key'] = 'value2'
+ self.session.commit_transaction()
+ c.close()
+
+ # Modify the data item at timestamp 1
+ #
+ # The docs say:
+ # The commits to a particular data item must be performed in timestamp
+ # order. Again, this is only checked in diagnostic builds and if
+ # applications violate this rule, data consistency can be violated.
+ #
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ self.session.timestamp_transaction(
+ 'commit_timestamp=' + timestamp_str(1))
+ c['key'] = 'value1'
+ msg='on new update is older than'
+ with self.expectedStdoutPattern(msg):
+ self.session.commit_transaction()
+ c.close()
+
+ # Make sure we can successfully add a different key at timestamp 1.
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ self.session.timestamp_transaction(
+ 'commit_timestamp=' + timestamp_str(1))
+ c['key1'] = 'value1'
+ self.session.commit_transaction()
+ c.close()
+
+ #
+ # Insert key2 at timestamp 10 and key3 at 15.
+ # Then modify both keys in one transaction at timestamp 14.
+ # Modifying the one from 15 should report a warning message, but
+ # the update will be applied.
+ #
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ self.session.timestamp_transaction(
+ 'commit_timestamp=' + timestamp_str(10))
+ c['key2'] = 'value10'
+ self.session.commit_transaction()
+ self.session.begin_transaction()
+ self.session.timestamp_transaction(
+ 'commit_timestamp=' + timestamp_str(15))
+ c['key3'] = 'value15'
+ self.session.commit_transaction()
+
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ self.session.timestamp_transaction(
+ 'commit_timestamp=' + timestamp_str(14))
+ c['key2'] = 'value14'
+ c['key3'] = 'value14'
+ with self.expectedStdoutPattern(msg):
+ self.session.commit_transaction()
+ c.close()
+
+ c = self.session.open_cursor(uri)
+ self.assertEquals(c['key2'], 'value14')
+ self.assertEquals(c['key3'], 'value14')
+ c.close()
+
+ #
+ # Separately, we should be able to update key2 at timestamp 16.
+ #
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ self.session.timestamp_transaction(
+ 'commit_timestamp=' + timestamp_str(16))
+ c['key2'] = 'value16'
+ self.session.commit_transaction()
+
+ # Updating key3 inserted at timestamp 13 will report a warning.
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ self.session.timestamp_transaction(
+ 'commit_timestamp=' + timestamp_str(13))
+ c['key3'] = 'value13'
+ with self.expectedStdoutPattern(msg):
+ self.session.commit_transaction()
+ c.close()
+
+ # Test that updating again with an invalid timestamp reports a warning.
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ self.session.timestamp_transaction(
+ 'commit_timestamp=' + timestamp_str(12))
+ c['key3'] = 'value12'
+ with self.expectedStdoutPattern(msg):
+ self.session.commit_transaction()
+ c.close()
+
+ c = self.session.open_cursor(uri)
+ self.assertEquals(c['key3'], 'value12')
+ c.close()
+
+ # Now try a later timestamp.
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ self.session.timestamp_transaction(
+ 'commit_timestamp=' + timestamp_str(17))
+ c['key3'] = 'value17'
+ self.session.commit_transaction()
+ c.close()
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp11.py b/src/third_party/wiredtiger/test/suite/test_timestamp11.py
new file mode 100644
index 00000000000..f98b7c47b2b
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp11.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2018 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_timestamp11.py
+# Timestamps: mixed timestamp usage
+#
+
+from suite_subprocess import suite_subprocess
+import wiredtiger, wttest
+
+def timestamp_str(t):
+ return '%x' % t
+
+class test_timestamp11(wttest.WiredTigerTestCase, suite_subprocess):
+ def test_timestamp_range(self):
+ if not wiredtiger.timestamp_build():
+ self.skipTest('requires a timestamp build')
+
+ base = 'timestamp11'
+ uri = 'file:' + base
+ self.session.create(uri, 'key_format=S,value_format=S')
+
+ # Test that mixed timestamp usage where some transactions use timestamps
+ # and others don't behave in the expected way.
+
+ # Insert two data items at timestamp 2
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ self.session.timestamp_transaction(
+ 'commit_timestamp=' + timestamp_str(2))
+ c['key'] = 'value2'
+ c['key2'] = 'value2'
+ self.session.commit_transaction()
+ c.close()
+
+ #
+ # Modify one key without a timestamp and modify the other with a
+ # later timestamp.
+ #
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ self.session.timestamp_transaction(
+ 'commit_timestamp=' + timestamp_str(5))
+ c['key'] = 'value5'
+ self.session.commit_transaction()
+ c.close()
+
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ c['key2'] = 'valueNOTS'
+ self.session.commit_transaction()
+ c.close()
+
+ #
+ # Set the stable timestamp and then roll back to it. The first key
+ # should roll back to the original value and the second key should
+ # remain at the non-timestamped value. Also the non-timestamped value
+ # stays regardless of rollbacks or reading at a timestamp.
+ #
+ stable_ts = timestamp_str(2)
+ self.conn.set_timestamp('stable_timestamp=' + stable_ts)
+ self.conn.rollback_to_stable()
+
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ self.assertEquals(c['key'], 'value2')
+ self.assertEquals(c['key2'], 'valueNOTS')
+ self.session.commit_transaction()
+ c.close()
+
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction('read_timestamp=' + stable_ts)
+ self.assertEquals(c['key'], 'value2')
+ self.assertEquals(c['key2'], 'valueNOTS')
+ self.session.commit_transaction()
+ c.close()
+
+ #
+ # Repeat but swapping the keys using or not using timestamps.
+ #
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ self.session.timestamp_transaction(
+ 'commit_timestamp=' + timestamp_str(5))
+ c['key2'] = 'value5'
+ self.session.commit_transaction()
+ c.close()
+
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ c['key'] = 'valueNOTS'
+ self.session.commit_transaction()
+ c.close()
+
+ # Read with each timestamp and without any timestamp.
+ #
+ # Without a timestamp. We should see the latest value for each.
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ self.assertEquals(c['key'], 'valueNOTS')
+ self.assertEquals(c['key2'], 'value5')
+ self.session.commit_transaction()
+ c.close()
+
+ # With timestamp 2. Both non-timestamped values override the original
+ # value at timestamp 2.
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction('read_timestamp=' + stable_ts)
+ self.assertEquals(c['key'], 'valueNOTS')
+ self.assertEquals(c['key2'], 'valueNOTS')
+ self.session.commit_transaction()
+ c.close()
+
+ # With timestamp 5. We rolled back the first one and never re-inserted
+ # one at that timestamp and inserted without a timestamp. For the second
+ # we inserted at timestamp 5 after the non-timestamped insert.
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction('read_timestamp=' + timestamp_str(5))
+ self.assertEquals(c['key'], 'valueNOTS')
+ self.assertEquals(c['key2'], 'value5')
+ self.session.commit_transaction()
+ c.close()
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/tools/optrack/arrow-left.png b/src/third_party/wiredtiger/tools/optrack/arrow-left.png
new file mode 100644
index 00000000000..315983e3118
--- /dev/null
+++ b/src/third_party/wiredtiger/tools/optrack/arrow-left.png
Binary files differ
diff --git a/src/third_party/wiredtiger/tools/optrack/arrow-right.png b/src/third_party/wiredtiger/tools/optrack/arrow-right.png
new file mode 100644
index 00000000000..e874d0f55fc
--- /dev/null
+++ b/src/third_party/wiredtiger/tools/optrack/arrow-right.png
Binary files differ
diff --git a/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py b/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py
new file mode 100755
index 00000000000..5bb557ce21b
--- /dev/null
+++ b/src/third_party/wiredtiger/tools/optrack/find-latency-spikes.py
@@ -0,0 +1,1063 @@
+#!/usr/bin/env python
+
+import argparse
+from bokeh.layouts import column
+from bokeh.models import ColumnDataSource, CustomJS, HoverTool, FixedTicker
+from bokeh.models import Legend, LegendItem
+from bokeh.models import NumeralTickFormatter, OpenURL, Range1d, TapTool
+from bokeh.models.annotations import Label
+from bokeh.plotting import figure, output_file, reset_output, save, show
+from bokeh.resources import CDN
+import matplotlib
+import numpy as np
+import os
+import pandas as pd
+import sys
+import traceback
+
+# Names of the image files we use
+arrowLeftImg = "arrow-left.png";
+arrowRightImg = "arrow-right.png";
+
+# A directory where we store cross-file plots for each bucket of the outlier
+# histogram.
+#
+bucketDir = "BUCKET-FILES";
+
+# A static list of available CSS colors
+colorList = [];
+
+# Codes for various colors for printing of informational and error messages.
+#
+class color:
+ PURPLE = '\033[95m'
+ CYAN = '\033[96m'
+ DARKCYAN = '\033[36m'
+ BLUE = '\033[94m'
+ GREEN = '\033[92m'
+ YELLOW = '\033[93m'
+ RED = '\033[91m'
+ BOLD = '\033[1m'
+ UNDERLINE = '\033[4m'
+ END = '\033[0m'
+
+# A function name mapped to its corresponding color.
+#
+funcToColor = {};
+lastColorUsed = 0;
+
+# The smallest and the largest timestamps seen across all files.
+#
+firstTimeStamp = sys.maxsize;
+lastTimeStamp = 0;
+
+# A dictionary that holds function-specific threshold values telling
+# us when the function is to be considered an outlier. These values
+# would be read from a config file, if supplied by the user.
+#
+outlierThresholdDict = {};
+outlierPrettyNames = {};
+
+# A dictionary that holds a reference to the raw dataframe for each file.
+#
+perFileDataFrame = {};
+
+# A dictionary that holds the intervals data per function.
+#
+perFuncDF = {};
+
+# Data frames and largest stack depth for each file.
+perFileDataFrame = {};
+perFileLargestStackDepth = {};
+
+plotWidth = 1200;
+pixelsForTitle = 30;
+pixelsPerHeightUnit = 30;
+pixelsPerWidthUnit = 5;
+
+# The coefficient by which we multiply the standard deviation when
+# setting the outlier threshold, in case it is not specified by the user.
+#
+STDEV_MULT = 2;
+
+
+def initColorList():
+
+ global colorList;
+
+ colorList = matplotlib.colors.cnames.keys();
+
+ for color in colorList:
+ # Some browsers break if you try to give them 'sage'
+ if (color == "sage"):
+ colorList.remove(color);
+
+#
+# Each unique function name gets a unique color.
+# If we run out of colors, we repeat them from the
+# beginning of the list.
+#
+def getColorForFunction(function):
+
+ global colorList;
+ global lastColorUsed;
+ global funcToColor;
+
+ if not funcToColor.has_key(function):
+ funcToColor[function] = colorList[lastColorUsed % len(colorList)];
+ lastColorUsed += 1;
+
+ return funcToColor[function];
+
+
+#
+# An intervalEnd is a tuple of three items.
+# item #0 is the timestamp,
+# item #1 is the event type,
+# item #2 is the function name.
+#
+def getIntervalData(intervalBeginningsStack, intervalEnd, logfile):
+
+ errorOccurred = False;
+ matchFound = False;
+
+ if (intervalEnd[1] != 1):
+ logfile.write(
+ "getIntervaldata: only rows with event type 1 can be used.\n");
+ logfile.write(str(intervalEnd) + "\n");
+ return None;
+
+ if (len(intervalBeginningsStack) < 1):
+ logfile.write("Nothing on the intervalBeginningsStack. " +
+ "I cannot find the beginning for this interval.\n");
+ logfile.write(str(intervalEnd) + "\n");
+ return None;
+
+ while (not matchFound):
+ intervalBegin = intervalBeginningsStack.pop();
+ if (intervalBegin is None):
+ logfile.write("Could not find the matching operation begin record" +
+ " for the following operation end record: \n");
+ logfile.write(str(intervalEnd) + "\n");
+ return None;
+ if (intervalBegin[2] != intervalEnd[2]):
+ logfile.write("Operation end record does not match the available " +
+ "operation begin record. " +
+ "Your log file may be incomplete.\n" +
+ "Skipping the begin record.\n");
+ logfile.write("Begin: " + str(intervalBegin) + "\n");
+ logfile.write("End: " + str(intervalEnd) + "\n");
+ errorOccurred = True;
+ else:
+ matchFound = True;
+
+ # This value determines how deep we are in the callstack
+ # stackDepth = len(intervalBeginningsStack);
+
+ return intervalBegin[0], intervalEnd[0], intervalEnd[2], errorOccurred;
+
+def plotOutlierHistogram(dataframe, maxOutliers, func, durationThreshold,
+ averageDuration, maxDuration):
+
+ global pixelsForTitle;
+ global pixelsPerHeightUnit;
+ global plotWidth;
+
+ cds = ColumnDataSource(dataframe);
+
+ figureTitle = "Occurrences of " + func + " that took longer than " \
+ + durationThreshold + ".";
+
+ hover = HoverTool(tooltips = [
+ ("interval start", "@lowerbound{0,0}"),
+ ("interval end", "@upperbound{0,0}")]);
+
+ TOOLS = [hover, "tap, reset"];
+
+ p = figure(title = figureTitle, plot_width = plotWidth,
+ plot_height = min(500, (max(5, (maxOutliers + 1)) \
+ * pixelsPerHeightUnit + \
+ pixelsForTitle)),
+ x_axis_label = "Execution timeline (CPU cycles)",
+ y_axis_label = "Number of outliers", tools = TOOLS);
+
+ y_ticker_max = p.plot_height / pixelsPerHeightUnit;
+ y_ticker_step = max(1, (maxOutliers + 1)/y_ticker_max);
+ y_upper_bound = (maxOutliers / y_ticker_step + 1) * y_ticker_step;
+
+ p.yaxis.ticker = FixedTicker(ticks =
+ range(0, y_upper_bound, y_ticker_step));
+ p.ygrid.ticker = FixedTicker(ticks =
+ range(0, y_upper_bound, y_ticker_step));
+ p.xaxis.formatter = NumeralTickFormatter(format="0,");
+
+ p.y_range = Range1d(0, y_upper_bound);
+
+ p.quad(left = 'lowerbound', right = 'upperbound', bottom = 'bottom',
+ top = 'height', color = funcToColor[func], source = cds,
+ nonselection_fill_color=funcToColor[func],
+ nonselection_fill_alpha = 1.0,
+ line_color = "lightgrey",
+ selection_fill_color = funcToColor[func],
+ selection_line_color="grey"
+ );
+
+ # Add an annotation to the chart
+ #
+ y_max = dataframe['height'].max();
+ text = "Average duration: " + '{0:,.0f}'.format(averageDuration) + \
+ ". Maximum duration: " + '{0:,.0f}'.format(maxDuration) + ".";
+ mytext = Label(x=0, y=y_upper_bound-y_ticker_step, text=text,
+ text_color = "grey", text_font = "helvetica",
+ text_font_size = "10pt",
+ text_font_style = "italic");
+ p.add_layout(mytext);
+
+ url = "@bucketfiles";
+ taptool = p.select(type=TapTool);
+ taptool.callback = OpenURL(url=url);
+
+ return p;
+
+# From all timestamps subtract the smallest observed timestamp, so that
+# our execution timeline begins at zero.
+# Cleanup the data to remove incomplete records and fix their effects.
+#
+def normalizeIntervalData():
+
+ global firstTimeStamp;
+ global perFileDataFrame;
+
+ print(color.BLUE + color.BOLD + "Normalizing data..." + color.END);
+
+ for file, df in perFileDataFrame.iteritems():
+ df['origstart'] = df['start'];
+ df['start'] = df['start'] - firstTimeStamp;
+ df['end'] = df['end'] - firstTimeStamp;
+
+def reportDataError(logfile, logfilename):
+
+ if (logfile is not sys.stdout):
+ print(color.BOLD + color.RED + "Your data may have errors. " +
+ "Check the file " + logfilename + " for details." + color.END);
+ return True;
+
+#
+# Go over all operation records in the dataframe and assign stack depths.
+#
+def assignStackDepths(dataframe):
+
+ stack = [];
+
+ df = dataframe.sort_values(by=['start']);
+ df = df.reset_index(drop = True);
+
+ for i in range(len(df.index)):
+
+ myStartTime = df.at[i, 'start'];
+
+ # Pop all items off stack whose end time is earlier than my
+ # start time. They are not part of my stack, so I don't want to
+ # count them.
+ #
+ while (len(stack) > 0 and stack[-1] < myStartTime):
+ stack.pop();
+
+ df.at[i, 'stackdepth'] = len(stack);
+ stack.append(df.at[i, 'end']);
+
+ return df;
+
+def createCallstackSeries(data, logfilename):
+
+ global firstTimeStamp;
+ global lastTimeStamp;
+
+ colors = [];
+ beginIntervals = [];
+ dataFrame = None;
+ endIntervals = [];
+ errorReported = False;
+ functionNames = [];
+ intervalBeginningsStack = [];
+ largestStackDepth = 0;
+ logfile = None;
+ thisIsFirstRow = True;
+
+ # Let's open the log file.
+ try:
+ logfile = open(logfilename, "w");
+ except:
+ logfile = sys.stdout;
+
+ for row in data.itertuples():
+ # row[0] is the timestamp, row[1] is the event type,
+ # row[2] is the function name.
+ #
+ if (row[1] == 0):
+ intervalBeginningsStack.append(row);
+ elif (row[1] == 1):
+ try:
+ intervalBegin, intervalEnd, function, error\
+ = getIntervalData(intervalBeginningsStack, row, logfile);
+ if (error and (not errorReported)):
+ errorReported = reportDataError(logfile, logfilename);
+ except:
+ if (not errorReported):
+ errorReported = reportDataError(logfile, logfilename);
+ continue;
+
+ if (intervalBegin < firstTimeStamp):
+ firstTimeStamp = intervalBegin;
+ if (intervalEnd > lastTimeStamp):
+ lastTimeStamp = intervalEnd;
+
+ colors.append(getColorForFunction(function));
+ beginIntervals.append(intervalBegin);
+ endIntervals.append(intervalEnd);
+ functionNames.append(function);
+ #stackDepths.append(stackDepth);
+ #stackDepthsNext.append(stackDepth + 1);
+
+ #print("Begin: " + str(intervalBegin)),
+ #print(" Func: " + function),
+ #print(" Stack depth: " + str(stackDepth));
+
+ else:
+ print("Invalid event in this line:");
+ print(str(row[0]) + " " + str(row[1]) + " " + str(row[2]));
+ continue;
+
+ if (len(intervalBeginningsStack) > 0):
+ logfile.write(str(len(intervalBeginningsStack)) + " operations had a " +
+ "begin record, but no matching end records. " +
+ "Please check that your operation tracking macros " +
+ "are properly inserted.\n");
+ if (not errorReported):
+ errorReported = reportDataError(logfile, logfilename);
+ intervalBeginningsStack = [];
+
+ dict = {};
+ dict['color'] = colors;
+ dict['start'] = beginIntervals;
+ dict['end'] = endIntervals;
+ dict['function'] = functionNames;
+ dict['stackdepth'] = [0] * len(beginIntervals);
+
+ dataframe = pd.DataFrame(data=dict);
+ dataframe = assignStackDepths(dataframe);
+
+ dataframe['durations'] = dataframe['end'] - dataframe['start'];
+ dataframe['stackdepthNext'] = dataframe['stackdepth'] + 1;
+
+ return dataframe;
+
+def addLegend(p, legendItems, numLegends):
+
+ legend = Legend(items=legendItems, orientation = "horizontal");
+ p.add_layout(legend, place='above');
+ legendItems[:] = []; # Empty the list.
+
+ return (numLegends + 1);
+
+# For each function we only show the legend once. In this dictionary we
+# keep track of colors already used.
+#
+colorAlreadyUsedInLegend = {};
+
+def generateBucketChartForFile(figureName, dataframe, y_max, x_min, x_max):
+
+ global colorAlreadyUsedInLegend;
+ global funcToColor;
+
+ MAX_ITEMS_PER_LEGEND = 5;
+ numLegends = 0;
+ legendItems = [];
+ pixelsPerStackLevel = 30;
+ pixelsPerLegend = 60;
+ pixelsForTitle = 30;
+
+ cds = ColumnDataSource(dataframe);
+
+ hover = HoverTool(tooltips=[
+ ("function", "@function"),
+ ("duration", "@durations{0,0}"),
+ ("log file begin timestamp", "@origstart{0,0}")
+ ]);
+
+ TOOLS = [hover];
+
+ p = figure(title=figureName, plot_width=1200,
+ x_range = (x_min, x_max),
+ y_range = (0, y_max+1),
+ x_axis_label = "Time (CPU cycles)",
+ y_axis_label = "Stack depth",
+ tools = TOOLS
+ );
+
+ # No minor ticks or labels on the y-axis
+ p.yaxis.major_tick_line_color = None;
+ p.yaxis.minor_tick_line_color = None;
+ p.yaxis.major_label_text_font_size = '0pt';
+ p.yaxis.ticker = FixedTicker(ticks = range(0, y_max+1));
+ p.ygrid.ticker = FixedTicker(ticks = range(0, y_max+1));
+
+ p.xaxis.formatter = NumeralTickFormatter(format="0,")
+
+ p.quad(left = 'start', right = 'end', bottom = 'stackdepth',
+ top = 'stackdepthNext', color = 'color', line_color = "lightgrey",
+ line_width = 0.5, source=cds);
+
+ for func, fColor in funcToColor.iteritems():
+
+ # If this function is not present in this dataframe,
+ # we don't care about it.
+ #
+ boolVec = (dataframe['function'] == func);
+ fDF = dataframe[boolVec];
+ if (fDF.size == 0):
+ continue;
+
+ # If we already added a color to any legend, we don't
+ # add it again to avoid redundancy in the charts and
+ # in order not to waste space.
+ #
+ if (colorAlreadyUsedInLegend.has_key(fColor)):
+ continue;
+ else:
+ colorAlreadyUsedInLegend[fColor] = True;
+
+ r = p.quad(left=0, right=1, bottom=0, top=1, color=fColor);
+
+ lItem = LegendItem(label = func,
+ renderers = [r]);
+ legendItems.append(lItem);
+
+ # Cap the number of items in a legend, so it can
+ # fit horizontally.
+ if (len(legendItems) == MAX_ITEMS_PER_LEGEND):
+ numLegends = addLegend(p, legendItems, numLegends);
+
+ # Add whatever legend items did not get added
+ if (len(legendItems) > 0):
+ numLegends = addLegend(p, legendItems, numLegends);
+
+ # Plot height is the function of the maximum call stack and the number of
+ # legends
+ p.plot_height = (numLegends * pixelsPerLegend) \
+ + max((y_max+1) * pixelsPerStackLevel, 100) \
+ + pixelsForTitle;
+
+ return p;
+
+def generateEmptyDataset():
+
+ dict = {};
+ dict['color'] = [0];
+ dict['durations'] = [0];
+ dict['start'] = [0];
+ dict['end'] = [0];
+ dict['function'] = [""];
+ dict['stackdepth'] = [0];
+ dict['stackdepthNext'] = [0];
+
+ return pd.DataFrame(data=dict);
+
+# When we have no data for a trace interva we generate an empty file
+# for that interval.
+#
+def createNoDataFile(filename):
+
+ try:
+ f = open(filename, "w");
+ except:
+ print(color.RED + color.BOLD),
+ exc_type, exc_value, exc_traceback = sys.exc_info()
+ traceback.print_exception(exc_type, exc_value, exc_traceback);
+ print("Could not open file " + filename + " for writing.");
+ print(color.END);
+ return;
+
+ f.write("<body>\n");
+ f.write("<p style=\"text-align:center;\">");
+ f.write("No data was generated for this trace interval.</p>\n");
+ f.write("</body>\n");
+ f.close()
+#
+# Here we generate plots that span all the input files. Each plot shows
+# the timelines for all files, stacked vertically. The timeline shows
+# the function callstacks over time from this file.
+#
+# Since a single timeline is too large to fit on a single screen, we generate
+# a separate HTML file with plots for bucket "i". A bucket is a vertical slice
+# across the timelines for all files. We call it a bucket, because it
+# corresponds to a bucket in the outlier histogram.
+#
+def generateCrossFilePlotsForBucket(i, lowerBound, upperBound):
+
+ global bucketDir;
+ global colorAlreadyUsedInLegend;
+
+ figuresForAllFiles = [];
+ fileName = bucketDir + "/bucket-" + str(i) + ".html";
+
+ reset_output();
+
+ # The following dictionary keeps track of legends. We need
+ # a legend for each new HTML file. So we reset the dictionary
+ # before generating a new file.
+ #
+ colorAlreadyUsedInLegend = {};
+
+ intervalTitle = "Interval " + "{:,}".format(lowerBound) + \
+ " to " + "{:,}".format(upperBound) + \
+ " CPU cycles";
+
+ # Select from the dataframe for this file the records whose 'start'
+ # and 'end' timestamps fall within the lower and upper bound.
+ #
+ for fname in sorted(perFileDataFrame.keys()):
+
+ fileDF = perFileDataFrame[fname];
+
+ # Select operations whose start timestamp falls within
+ # the current interval, delimited by lowerBound and upperBound.
+ #
+ startInBucket = fileDF.loc[(fileDF['start'] >= lowerBound)
+ & (fileDF['start'] < upperBound)];
+
+ # Select operations whose end timestamp falls within
+ # the current interval, delimited by lowerBound and upperBound.
+ #
+ endInBucket = fileDF.loc[(fileDF['end'] > lowerBound)
+ & (fileDF['end'] <= upperBound)];
+
+ # Select operations that begin before this interval and end after
+ # this interval, but continue throughout this interval. The interval
+ # is delimited by lowerBound and upperBound.
+ #
+ spanBucket = fileDF.loc[(fileDF['start'] < lowerBound)
+ & (fileDF['end'] > upperBound)];
+
+ frames = [startInBucket, endInBucket, spanBucket];
+ bucketDF = pd.concat(frames).drop_duplicates().reset_index(drop=True);
+
+ if (bucketDF.size == 0):
+ continue;
+
+ # If the end of the function is outside the interval, let's pretend
+ # that it is within the interval, otherwise we won't see any data about
+ # it when we hover. This won't have the effect of showing wrong
+ # data to the user.
+ #
+ mask = bucketDF.end >= upperBound;
+ bucketDF.loc[mask, 'end'] = upperBound-1;
+
+ # Same adjustment as above if the start of the operation falls outside
+ # the interval's lower bound.
+ #
+ mask = bucketDF.start < lowerBound;
+ bucketDF.loc[mask, 'start'] = lowerBound;
+
+ largestStackDepth = bucketDF['stackdepthNext'].max();
+ figureTitle = fname + ": " + intervalTitle;
+
+ figure = generateBucketChartForFile(figureTitle, bucketDF,
+ largestStackDepth,
+ lowerBound, upperBound);
+
+ figuresForAllFiles.append(figure);
+
+ if (len(figuresForAllFiles) > 0):
+ savedFileName = save(column(figuresForAllFiles),
+ filename = fileName, title=intervalTitle,
+ resources=CDN);
+ else:
+ createNoDataFile(fileName);
+
+ return fileName;
+
+# Generate plots of time series slices across all files for each bucket
+# in the outlier histogram. Save each cross-file slice to an HTML file.
+#
+def generateTSSlicesForBuckets():
+
+ global firstTimeStamp;
+ global lastTimeStamp;
+ global plotWidth;
+ global pixelsPerWidthUnit;
+
+ bucketFilenames = [];
+
+ numBuckets = plotWidth / pixelsPerWidthUnit;
+ timeUnitsPerBucket = (lastTimeStamp - firstTimeStamp) / numBuckets;
+
+ for i in range(numBuckets):
+ lowerBound = i * timeUnitsPerBucket;
+ upperBound = (i+1) * timeUnitsPerBucket;
+
+ fileName = generateCrossFilePlotsForBucket(i, lowerBound,
+ upperBound);
+
+ percentComplete = float(i) / float(numBuckets) * 100;
+ print(color.BLUE + color.BOLD + " Generating timeline charts... "),
+ sys.stdout.write("%d%% complete \r" % (percentComplete) );
+ sys.stdout.flush();
+ bucketFilenames.append(fileName);
+
+ print(color.END);
+
+ return bucketFilenames;
+
+# Here we are making a line that will be inserted into an HTML file for
+# a given bucket (execution slice). This line will have links to the
+# previous slice and to the next slice, so we can navigate between slices
+# by clicking those links.
+#
+def makeLineWithLinks(previous, next):
+
+ global arrowLeftImg;
+ global arrowRightImg;
+
+ previousLink = "";
+ nextLink = "";
+
+ # Strip the directory component out of the file name.
+ #
+ if previous is not None:
+ words = previous.split("/");
+ previousStripped = words[len(words)-1];
+ previousLink = "<a href=\"" + previousStripped + "\">" + \
+ "<img src=\"" + arrowLeftImg + \
+ "\" height=\"30\" style=\"float:left\"></a><p>&nbsp;";
+
+
+ if next is not None:
+ words = next.split("/");
+ nextStripped = words[len(words)-1];
+ nextLink = "<a href=\"" + nextStripped + "\">" + \
+ "<img src=\"" + arrowRightImg + \
+ "\" height=\"30\" style=\"float:right\"></a><p>&nbsp;";
+
+ line = previousLink + " " + nextLink + "\n";
+ return line;
+
+
+# Into the current file insert links to the previous one and to the next one.
+# The rewritten file is saved under a new file name.
+#
+def linkFiles(current, previous, next):
+
+ curFile = None;
+ newFile = None;
+ newFileName = current + ".new";
+
+ try:
+ curFile = open(current, "r");
+ except:
+ print(color.RED + color.BOLD),
+ exc_type, exc_value, exc_traceback = sys.exc_info()
+ traceback.print_exception(exc_type, exc_value, exc_traceback);
+ print("Could not open file " + current + " for reading.");
+ print(color.END);
+ return None;
+
+ try:
+ newFile = open(newFileName, "w");
+ except:
+ print(color.RED + color.BOLD),
+ exc_type, exc_value, exc_traceback = sys.exc_info()
+ traceback.print_exception(exc_type, exc_value, exc_traceback);
+ print("Could not open file " + newFileName + " for writing.");
+ print(color.END);
+ return None;
+
+ curFileLines = curFile.readlines();
+
+ for i in range(len(curFileLines)):
+ line = curFileLines[i];
+
+ insertedLine = makeLineWithLinks(previous, next);
+
+ if "<body>" in line:
+ curFileLines.insert(i+1, insertedLine);
+ elif "</body>" in line:
+ curFileLines.insert(i, insertedLine);
+
+ for line in curFileLines:
+ newFile.write(line);
+
+ curFile.close();
+ newFile.close();
+
+ os.rename(newFileName, current);
+
+# We have a list of bucket files. Each one is an HTML file showing a slice of
+# the execution. To be able to easily navigate between consecutive execution
+# slices we insert links into each slice-file that take us to the previous
+# slice and to the next slice.
+#
+def interlinkFiles(fnameList):
+
+ for i in range(len(fnameList)):
+ current = fnameList[i];
+
+ if i > 0:
+ previous = fnameList[i-1];
+ else:
+ previous = None;
+
+ if (i < len(fnameList)-1):
+ next = fnameList[i+1];
+ else:
+ next = None;
+
+ linkFiles(current, previous, next);
+
+def processFile(fname):
+
+ global perFileDataFrame;
+ global perFuncDF;
+
+ rawData = pd.read_csv(fname,
+ header=None, delimiter=" ",
+ index_col=2,
+ names=["Event", "Function", "Timestamp"],
+ dtype={"Event": np.int32, "Timestamp": np.int64},
+ thousands=",");
+
+ print(color.BOLD + color.BLUE +
+ "Processing file " + str(fname) + color.END);
+ iDF = createCallstackSeries(rawData, "." + fname + ".log");
+
+ perFileDataFrame[fname] = iDF;
+
+ for func in funcToColor.keys():
+
+ funcDF = iDF.loc[lambda iDF: iDF.function == func, :];
+ funcDF = funcDF.drop(columns = ['function']);
+
+ if (not perFuncDF.has_key(func)):
+ perFuncDF[func] = funcDF;
+ else:
+ perFuncDF[func] = pd.concat([perFuncDF[func], funcDF]);
+
+
+#
+# For each function, split the timeline into buckets. In each bucket
+# show how many times this function took an unusually long time to
+# execute.
+#
+# The parameter durationThreshold tells us when a function should be
+# considered as unusually long. If this parameter is "-1" we count
+# all functions whose duration exceeded the average by more than
+# two standard deviations.
+#
+def createOutlierHistogramForFunction(func, funcDF, bucketFilenames):
+
+ global firstTimeStamp;
+ global lastTimeStamp;
+ global plotWidth;
+ global pixelsPerWidthUnit;
+ global STDEV_MULT;
+
+ durationThreshold = 0;
+ durationThresholdDescr = "";
+
+ #
+ # funcDF is a list of functions along with their start and end
+ # interval and durations. We need to create a new dataframe where
+ # we separate the entire timeline into a fixed number of periods
+ # and for each period compute how many outlier durations were
+ # observed. Then we create a histogram from this data.
+
+ # Subtract the smallest timestamp from all the interval data.
+ funcDF['start'] = funcDF['start'] - firstTimeStamp;
+ funcDF['end'] = funcDF['end'] - firstTimeStamp;
+
+ funcDF = funcDF.sort_values(by=['start']);
+
+ averageDuration = funcDF['durations'].mean();
+ maxDuration = funcDF['durations'].max();
+
+ if (outlierThresholdDict.has_key(func)):
+ durationThreshold = outlierThresholdDict[func];
+ durationThresholdDescr = outlierPrettyNames[func];
+ elif (outlierThresholdDict.has_key("*")):
+ durationThreshold = outlierThresholdDict["*"];
+ durationThresholdDescr = outlierPrettyNames["*"];
+ else:
+ # Signal that we will use standard deviation
+ durationThreshold = -STDEV_MULT;
+
+ if (durationThreshold < 0): # this is a stdev multiplier
+ mult = -durationThreshold;
+ stdDev = funcDF['durations'].std();
+ durationThreshold = averageDuration + mult * stdDev;
+ durationThresholdDescr = '{0:,.0f}'.format(durationThreshold) \
+ + " measurement units (" + str(mult) + \
+ " standard deviations)";
+
+ numBuckets = plotWidth / pixelsPerWidthUnit;
+ timeUnitsPerBucket = (lastTimeStamp - firstTimeStamp) / numBuckets;
+ lowerBounds = [];
+ upperBounds = [];
+ bucketHeights = [];
+ maxOutliers = 0;
+
+ for i in range(numBuckets):
+ lowerBound = i * timeUnitsPerBucket;
+ upperBound = (i+1) * timeUnitsPerBucket;
+
+ bucketDF = funcDF.loc[(funcDF['start'] >= lowerBound)
+ & (funcDF['start'] < upperBound)
+ & (funcDF['durations'] >= durationThreshold)];
+
+ numOutliers = bucketDF.size;
+ if (numOutliers > maxOutliers):
+ maxOutliers = numOutliers;
+
+ lowerBounds.append(lowerBound);
+ upperBounds.append(upperBound);
+ bucketHeights.append(numOutliers);
+
+ if (maxOutliers == 0):
+ return None;
+
+ dict = {};
+ dict['lowerbound'] = lowerBounds;
+ dict['upperbound'] = upperBounds;
+ dict['height'] = bucketHeights;
+ dict['bottom'] = [0] * len(lowerBounds);
+ dict['bucketfiles'] = bucketFilenames;
+
+ dataframe = pd.DataFrame(data=dict);
+
+ return plotOutlierHistogram(dataframe, maxOutliers, func,
+ durationThresholdDescr, averageDuration,
+ maxDuration);
+#
+# The configuration file tells us which functions should be considered
+# outliers. All comment lines must begin with '#'.
+#
+# The first non-comment line of the file must tell us how to interpret
+# the measurement units in the trace file. It must have a single number
+# telling us how many time units are contained in a second. This should
+# be the same time units used in the trace file. For example, if the trace
+# file contains timestamps measured in milliseconds, the number would be 1000.
+# If timestamps were measured in clock cycles, as is typically done, the number
+# must tell us how many times the CPU clock ticks per second on the processor
+# where the trace was gathered.
+#
+# The remaining lines must have the format:
+# <func_name> <outlier_threshold> [units]
+#
+# For example, if you would like to flag as outliers all instances of
+# __cursor_row_search that took longer than 200ms, you would specify this as:
+#
+# __cursor_row_search 200 ms
+#
+# You can use * as the wildcard for all function. No other wildcard options are
+# supported at the moment.
+#
+# Acceptable units are:
+#
+# s -- for seconds
+# ms -- for milliseconds
+# us -- for microseconds
+# ns -- for nanoseconds
+# stdev -- for standard deviations.
+#
+# If no units are supplied, the same unit as the one used for the timestamp
+# in the trace files is assumed.
+#
+# If there is a valid configuration file, but the function does not appear in
+# it, we will not generate an outlier histogram for this function. Use the
+# wildcard symbol to include all functions.
+#
+def parseConfigFile(fname):
+
+ global outlierThresholdDict;
+ global outlierPrettyNames;
+
+ configFile = None;
+ firstNonCommentLine = True;
+ unitsPerSecond = -1;
+ unitsPerMillisecond = 0.0;
+ unitsPerMicrosecond = 0.0;
+ unitsPerNanosecond = 0.0;
+
+ try:
+ configFile = open(fname, "r");
+ except:
+ print(color.BOLD + color.RED +
+ "Could not open " + fname + " for reading." + color.END);
+ return False;
+
+ for line in configFile:
+
+ if (line[0] == "#"):
+ continue;
+ elif (firstNonCommentLine):
+ try:
+ unitsPerSecond = int(line);
+ unitsPerMillisecond = unitsPerSecond / 1000;
+ unitsPerMicrosecond = unitsPerSecond / 1000000;
+ unitsPerNanosecond = unitsPerSecond / 1000000000;
+
+ firstNonCommentLine = False;
+ except ValueError:
+ print(color.BOLD + color.RED +
+ "Could not parse the number of measurement units " +
+ "per second. This must be the first value in the " +
+ "config file." + color.END);
+ return False;
+ else:
+ func = "";
+ number = 0;
+ threshold = 0.0;
+ units = "";
+
+ words = line.split();
+ try:
+ func = words[0];
+ number = int(words[1]);
+ units = words[2];
+ except ValueError:
+ print(color.BOLD + color.RED +
+ "While parsing the config file, could not understand " +
+ "the following line: " + color.END);
+ print(line);
+ continue;
+
+ # Now convert the number to the baseline units and record in the
+ # dictionary.
+ #
+ if (units == "s"):
+ threshold = unitsPerSecond * number;
+ elif (units == "ms"):
+ threshold = unitsPerMillisecond * number;
+ elif (units == "us"):
+ threshold = unitsPerMicrosecond * number;
+ elif (units == "ns"):
+ threshold = unitsPerNanosecond * number;
+ elif (units == "stdev"):
+ threshold = -units;
+ # We record it as negative, so that we know
+ # this is a standard deviation. We will compute
+ # the actual value once we know the average.
+ else:
+ print(color.BOLD + color.RED +
+ "While parsing the config file, could not understand " +
+ "the following line: " + color.END);
+ print(line);
+ continue;
+
+ outlierThresholdDict[func] = threshold;
+ outlierPrettyNames[func] = str(number) + " " + units;
+
+ # We were given an empty config file
+ if (firstNonCommentLine):
+ return False;
+
+ print outlierThresholdDict;
+ return True;
+
+
+def main():
+
+ global arrowLeftImg;
+ global arrowRightImg;
+ global bucketDir;
+ global perFuncDF;
+
+ configSupplied = False;
+ figuresForAllFunctions = [];
+
+ # Set up the argument parser
+ #
+ parser = argparse.ArgumentParser(description=
+ 'Visualize operation log');
+ parser.add_argument('files', type=str, nargs='*',
+ help='log files to process');
+ parser.add_argument('-c', '--config', dest='configFile', default='');
+ args = parser.parse_args();
+
+ if (len(args.files) == 0):
+ parser.print_help();
+ sys.exit(1);
+
+ # Get names of standard CSS colors that we will use for the legend
+ initColorList();
+
+ # Read the configuration file, if supplied.
+ if (args.configFile != ''):
+ configSupplied = parseConfigFile(args.configFile);
+
+ if (not configSupplied):
+ pluralSuffix = "";
+ if (STDEV_MULT > 1):
+ pluralSuffix = "s";
+ print(color.BLUE + color.BOLD +
+ "Will deem as outliers all function instances whose runtime " +
+ "was " + str(STDEV_MULT) + " standard deviation" + pluralSuffix +
+ " greater than the average runtime for that function."
+ + color.END);
+
+
+ # Create a directory for the files that display the data summarized
+ # in each bucket of the outlier histogram. We call these "bucket files".
+ #
+ if not os.path.exists(bucketDir):
+ os.makedirs(bucketDir);
+
+ # Copy the image files that we will need later into bucketDir
+ scriptLocation = os.path.dirname(os.path.realpath(__file__));
+ os.system("cp " + scriptLocation + "/" + arrowLeftImg + " " + bucketDir +
+ "/" + arrowLeftImg);
+ os.system("cp " + scriptLocation + "/" + arrowRightImg + " " + bucketDir +
+ "/" + arrowRightImg);
+
+ # Parallelize this later, so we are working on files in parallel.
+ for fname in args.files:
+ processFile(fname);
+
+ # Normalize all intervals by subtracting the first timestamp.
+ normalizeIntervalData();
+
+ # Generate plots of time series slices across all files for each bucket
+ # in the outlier histogram. Save each cross-file slice to an HTML file.
+ #
+ fileNameList = generateTSSlicesForBuckets();
+
+ # Rewrite the files, so that they have links to one another. This way
+ # you can navigate from one slice to the next by clicking the link inside
+ # the file.
+ #
+ interlinkFiles(fileNameList);
+
+ totalFuncs = len(perFuncDF.keys());
+ i = 0;
+ # Generate a histogram of outlier durations
+ for func in sorted(perFuncDF.keys()):
+ funcDF = perFuncDF[func];
+ figure = createOutlierHistogramForFunction(func, funcDF, fileNameList);
+ if (figure is not None):
+ figuresForAllFunctions.append(figure);
+
+ i += 1;
+ percentComplete = float(i) / float(totalFuncs) * 100;
+ print(color.BLUE + color.BOLD + " Generating outlier histograms... "),
+ sys.stdout.write("%d%% complete \r" % (percentComplete) );
+ sys.stdout.flush();
+
+ print(color.END);
+ reset_output();
+ output_file(filename = "WT-outliers.html", title="Outlier histograms");
+ show(column(figuresForAllFunctions));
+
+if __name__ == '__main__':
+ main()
+
+
+
diff --git a/src/third_party/wiredtiger/tools/wt_optrack_decode.py b/src/third_party/wiredtiger/tools/optrack/wt_optrack_decode.py
index ebfd4af05b7..ebfd4af05b7 100755
--- a/src/third_party/wiredtiger/tools/wt_optrack_decode.py
+++ b/src/third_party/wiredtiger/tools/optrack/wt_optrack_decode.py