summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2017-11-03 14:16:15 +1100
committerLuke Chen <luke.chen@mongodb.com>2017-11-03 14:16:15 +1100
commit0a2f8f6ad756189263d050b29f69bc57b45b9816 (patch)
tree98250d6b6447c379ad6b2f2847b38da9df698c33
parenta31e9d415a25561bd36b001f3a6e9b0a6a115236 (diff)
parentd4e28e6ca1430a7655621c47cc9d77092a813425 (diff)
downloadmongo-0a2f8f6ad756189263d050b29f69bc57b45b9816.tar.gz
Merge branch 'develop' into mongodb-3.6
-rw-r--r--dist/api_data.py1
-rw-r--r--dist/flags.py1
-rw-r--r--dist/package/debian/README.Debian8
-rw-r--r--dist/package/debian/README.source9
-rw-r--r--dist/package/debian/changelog5
-rw-r--r--dist/package/debian/compat1
-rw-r--r--dist/package/debian/control36
-rw-r--r--dist/package/debian/copyright26
-rw-r--r--dist/package/debian/docs2
-rw-r--r--dist/package/debian/files3
-rw-r--r--dist/package/debian/libwiredtiger-dev.dirs2
-rw-r--r--dist/package/debian/libwiredtiger-dev.install2
-rw-r--r--dist/package/debian/libwiredtiger-dev.substvars1
-rw-r--r--dist/package/debian/libwiredtiger.dirs1
-rw-r--r--dist/package/debian/libwiredtiger.install2
-rw-r--r--dist/package/debian/libwiredtiger.postinst.debhelper5
-rw-r--r--dist/package/debian/libwiredtiger.postrm.debhelper5
-rw-r--r--dist/package/debian/libwiredtiger.substvars2
-rwxr-xr-xdist/package/debian/rules13
-rw-r--r--dist/package/debian/shlibs.local1
-rw-r--r--dist/package/debian/source/format1
-rw-r--r--dist/package/debian/watch8
-rw-r--r--dist/package/debian/wiredtiger-util.dirs1
-rw-r--r--dist/package/debian/wiredtiger-util.install1
-rw-r--r--dist/package/debian/wiredtiger-util.substvars2
-rw-r--r--dist/package/debian/wiredtiger.doc-base12
-rw-r--r--dist/package/wiredtiger.spec58
-rw-r--r--dist/s_define.list1
-rw-r--r--dist/stat_data.py1
-rw-r--r--src/btree/bt_debug.c5
-rw-r--r--src/btree/bt_read.c109
-rw-r--r--src/btree/bt_split.c44
-rw-r--r--src/btree/bt_sync.c86
-rw-r--r--src/cache/cache_las.c59
-rw-r--r--src/config/config_def.c80
-rw-r--r--src/conn/conn_api.c1
-rw-r--r--src/conn/conn_cache.c13
-rw-r--r--src/conn/conn_cache_pool.c2
-rw-r--r--src/conn/conn_dhandle.c152
-rw-r--r--src/evict/evict_lru.c50
-rw-r--r--src/evict/evict_page.c67
-rw-r--r--src/include/api.h9
-rw-r--r--src/include/btmem.h35
-rw-r--r--src/include/cache.h36
-rw-r--r--src/include/cache.i53
-rw-r--r--src/include/connection.h9
-rw-r--r--src/include/extern.h2
-rw-r--r--src/include/flags.h57
-rw-r--r--src/include/lint.h10
-rw-r--r--src/include/misc.h2
-rw-r--r--src/include/stat.h1
-rw-r--r--src/include/txn.i14
-rw-r--r--src/include/wiredtiger.in426
-rw-r--r--src/log/log.c14
-rw-r--r--src/lsm/lsm_tree.c7
-rw-r--r--src/meta/meta_track.c58
-rw-r--r--src/reconcile/rec_write.c170
-rw-r--r--src/support/stat.c4
-rw-r--r--src/txn/txn.c2
-rw-r--r--src/txn/txn_ckpt.c93
-rw-r--r--src/txn/txn_timestamp.c17
-rw-r--r--src/utilities/util_dump.c2
-rw-r--r--test/csuite/random_abort/main.c32
-rw-r--r--test/csuite/timestamp_abort/main.c178
-rw-r--r--test/format/compact.c7
-rw-r--r--test/format/config.c42
-rw-r--r--test/format/config.h18
-rw-r--r--test/format/format.h1
-rw-r--r--test/mciproject.yml16
-rw-r--r--test/suite/suite_subprocess.py44
70 files changed, 1206 insertions, 1032 deletions
diff --git a/dist/api_data.py b/dist/api_data.py
index 8fcf99ad3c0..3d6d4712413 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -553,6 +553,7 @@ connection_runtime_config = [
'api',
'block',
'checkpoint',
+ 'checkpoint_progress',
'compact',
'evict',
'evict_stuck',
diff --git a/dist/flags.py b/dist/flags.py
index 70602333ad5..21fd0756435 100644
--- a/dist/flags.py
+++ b/dist/flags.py
@@ -65,6 +65,7 @@ flags = {
'VERB_API',
'VERB_BLOCK',
'VERB_CHECKPOINT',
+ 'VERB_CHECKPOINT_PROGRESS',
'VERB_COMPACT',
'VERB_EVICT',
'VERB_EVICTSERVER',
diff --git a/dist/package/debian/README.Debian b/dist/package/debian/README.Debian
deleted file mode 100644
index 2028c0740dc..00000000000
--- a/dist/package/debian/README.Debian
+++ /dev/null
@@ -1,8 +0,0 @@
-wiredtiger for Debian
----------------------
-
-This is a package of the WiredTiger database library for Debian based
-systems. For more information on WiredTiger please visit:
-http://www.wiredtiger.com or contact us at info@wiredtiger.com
-
- -- Alex <alexg@wiredtiger.com> Tue, 01 Apr 2014 15:50:02 +1100
diff --git a/dist/package/debian/README.source b/dist/package/debian/README.source
deleted file mode 100644
index ddd6dc94c3d..00000000000
--- a/dist/package/debian/README.source
+++ /dev/null
@@ -1,9 +0,0 @@
-wiredtiger for Debian
----------------------
-
-<this file describes information about the source package, see Debian policy
-manual section 4.14. You WILL either need to modify or delete this file>
-
-
-
-
diff --git a/dist/package/debian/changelog b/dist/package/debian/changelog
deleted file mode 100644
index 1481a506d6d..00000000000
--- a/dist/package/debian/changelog
+++ /dev/null
@@ -1,5 +0,0 @@
-wiredtiger (2.1.2-1) UNRELEASED; urgency=low
-
- * Initial release of WiredTiger
-
- -- Alex <alexg@wiredtiger.com> Tue, 01 Apr 2014 15:50:02 +1100
diff --git a/dist/package/debian/compat b/dist/package/debian/compat
deleted file mode 100644
index 45a4fb75db8..00000000000
--- a/dist/package/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-8
diff --git a/dist/package/debian/control b/dist/package/debian/control
deleted file mode 100644
index 5ad2b71b4b9..00000000000
--- a/dist/package/debian/control
+++ /dev/null
@@ -1,36 +0,0 @@
-Source: wiredtiger
-Priority: extra
-Maintainer: Alex Gorrod <alexg@wiredtiger.com>
-Build-Depends: debhelper (>= 8.0.0), autotools-dev
-Standards-Version: 3.9.4
-Section: libs
-Homepage: http://www.wiredtiger.com
-#Vcs-Git: git://git.debian.org/collab-maint/wiredtiger.git
-#Vcs-Browser: http://git.debian.org/?p=collab-maint/wiredtiger.git;a=summary
-
-Package: libwiredtiger-dev
-Architecture: any
-Section: libdevel
-Priority: extra
-Depends: ${misc:Depends}
-Description: WiredTiger Database Libraries [development]
- This is the development package which contains headers and static
- libraries for the WiredTiger database library.
-
-Package: libwiredtiger
-Architecture: any
-Depends: ${shlibs:Depends},
- ${misc:Depends}
-Description: WiredTiger Database Libraries [runtime]
- This is the runtime package for programs that use the WiredTiger
- database library.
-
-Package: wiredtiger-util
-Architecture: any
-Section: database
-Priority: extra
-Depends: ${shlibs:Depends},
- ${misc:Depends}
-Description: WiredTiger Database Utilities
- This package provides tools for manipulating WiredTiger databases
-
diff --git a/dist/package/debian/copyright b/dist/package/debian/copyright
deleted file mode 100644
index 1394ad8b4bd..00000000000
--- a/dist/package/debian/copyright
+++ /dev/null
@@ -1,26 +0,0 @@
-Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Name: wiredtiger
-Source: <http://www.wiredtiger.com>
-
-Files: *
-Copyright: (c) 2008-2014 WiredTiger, Inc.
-License:
- This program is free software: you can redistribute it and/or modify it under
- the terms of either version 2 or version 3 of the GNU General Public License
- as published by the Free Software Foundation.
- .
- On Debian GNU/Linux systems, the complete text of the GNU General
- Public License can be found in `/usr/share/common-licenses/GPL-2' and
- `/usr/share/common-licenses/GPL-3'.
- .
- This program is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
- details.
- .
- For a license to use the WiredTiger software under conditions other than those
- described by the GNU General Public License, or for technical support for this
- software, contact WiredTiger, Inc. at info@wiredtiger.com.
- .
- For further information, see the licensing section in the documentation.
-
diff --git a/dist/package/debian/docs b/dist/package/debian/docs
deleted file mode 100644
index 50bd824bb7b..00000000000
--- a/dist/package/debian/docs
+++ /dev/null
@@ -1,2 +0,0 @@
-NEWS
-README
diff --git a/dist/package/debian/files b/dist/package/debian/files
deleted file mode 100644
index 53662d0c48e..00000000000
--- a/dist/package/debian/files
+++ /dev/null
@@ -1,3 +0,0 @@
-libwiredtiger-dev_2.1.2-1_amd64.deb libdevel extra
-libwiredtiger_2.1.2-1_amd64.deb libs extra
-wiredtiger-util_2.1.2-1_amd64.deb database extra
diff --git a/dist/package/debian/libwiredtiger-dev.dirs b/dist/package/debian/libwiredtiger-dev.dirs
deleted file mode 100644
index da07fddd09b..00000000000
--- a/dist/package/debian/libwiredtiger-dev.dirs
+++ /dev/null
@@ -1,2 +0,0 @@
-usr/include
-usr/lib
diff --git a/dist/package/debian/libwiredtiger-dev.install b/dist/package/debian/libwiredtiger-dev.install
deleted file mode 100644
index deb99408b27..00000000000
--- a/dist/package/debian/libwiredtiger-dev.install
+++ /dev/null
@@ -1,2 +0,0 @@
-usr/include/*
-usr/lib/pkgconfig/*
diff --git a/dist/package/debian/libwiredtiger-dev.substvars b/dist/package/debian/libwiredtiger-dev.substvars
deleted file mode 100644
index abd3ebebc30..00000000000
--- a/dist/package/debian/libwiredtiger-dev.substvars
+++ /dev/null
@@ -1 +0,0 @@
-misc:Depends=
diff --git a/dist/package/debian/libwiredtiger.dirs b/dist/package/debian/libwiredtiger.dirs
deleted file mode 100644
index 68457717bd8..00000000000
--- a/dist/package/debian/libwiredtiger.dirs
+++ /dev/null
@@ -1 +0,0 @@
-usr/lib
diff --git a/dist/package/debian/libwiredtiger.install b/dist/package/debian/libwiredtiger.install
deleted file mode 100644
index 27fae7a0850..00000000000
--- a/dist/package/debian/libwiredtiger.install
+++ /dev/null
@@ -1,2 +0,0 @@
-usr/lib/libwiredtiger*.a
-usr/lib/libwiredtiger*.so
diff --git a/dist/package/debian/libwiredtiger.postinst.debhelper b/dist/package/debian/libwiredtiger.postinst.debhelper
deleted file mode 100644
index 3d89d3ef629..00000000000
--- a/dist/package/debian/libwiredtiger.postinst.debhelper
+++ /dev/null
@@ -1,5 +0,0 @@
-# Automatically added by dh_makeshlibs
-if [ "$1" = "configure" ]; then
- ldconfig
-fi
-# End automatically added section
diff --git a/dist/package/debian/libwiredtiger.postrm.debhelper b/dist/package/debian/libwiredtiger.postrm.debhelper
deleted file mode 100644
index 7f44047270f..00000000000
--- a/dist/package/debian/libwiredtiger.postrm.debhelper
+++ /dev/null
@@ -1,5 +0,0 @@
-# Automatically added by dh_makeshlibs
-if [ "$1" = "remove" ]; then
- ldconfig
-fi
-# End automatically added section
diff --git a/dist/package/debian/libwiredtiger.substvars b/dist/package/debian/libwiredtiger.substvars
deleted file mode 100644
index 1e00e6fd7a6..00000000000
--- a/dist/package/debian/libwiredtiger.substvars
+++ /dev/null
@@ -1,2 +0,0 @@
-shlibs:Depends=libc6 (>= 2.14)
-misc:Depends=
diff --git a/dist/package/debian/rules b/dist/package/debian/rules
deleted file mode 100755
index 312e24d2e6f..00000000000
--- a/dist/package/debian/rules
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/make -f
-# -*- makefile -*-
-# Sample debian/rules that uses debhelper.
-# This file was originally written by Joey Hess and Craig Small.
-# As a special exception, when this file is copied by dh-make into a
-# dh-make output file, you may use that output file without restriction.
-# This special exception was added by Craig Small in version 0.37 of dh-make.
-
-# Uncomment this to turn on verbose mode.
-#export DH_VERBOSE=1
-
-%:
- dh $@ --with autotools-dev
diff --git a/dist/package/debian/shlibs.local b/dist/package/debian/shlibs.local
deleted file mode 100644
index a3b3face389..00000000000
--- a/dist/package/debian/shlibs.local
+++ /dev/null
@@ -1 +0,0 @@
-libwiredtiger 2.1.2 wiredtiger (>> 2.1.2-0), wiredtiger (<< 2.1.2-99)
diff --git a/dist/package/debian/source/format b/dist/package/debian/source/format
deleted file mode 100644
index 163aaf8d82b..00000000000
--- a/dist/package/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/dist/package/debian/watch b/dist/package/debian/watch
deleted file mode 100644
index 9287dca3d91..00000000000
--- a/dist/package/debian/watch
+++ /dev/null
@@ -1,8 +0,0 @@
-# Watch control file for uscan
-# to check for upstream updates and more.
-# See uscan(1) for format
-
-# Compulsory line, this is a version 3 file
-version=3
-
-http://source.wiredtiger.com/releases/wiredtiger-(.*)\.tar\.bz2
diff --git a/dist/package/debian/wiredtiger-util.dirs b/dist/package/debian/wiredtiger-util.dirs
deleted file mode 100644
index e7724817552..00000000000
--- a/dist/package/debian/wiredtiger-util.dirs
+++ /dev/null
@@ -1 +0,0 @@
-usr/bin
diff --git a/dist/package/debian/wiredtiger-util.install b/dist/package/debian/wiredtiger-util.install
deleted file mode 100644
index 1df36c612fb..00000000000
--- a/dist/package/debian/wiredtiger-util.install
+++ /dev/null
@@ -1 +0,0 @@
-usr/bin/*
diff --git a/dist/package/debian/wiredtiger-util.substvars b/dist/package/debian/wiredtiger-util.substvars
deleted file mode 100644
index 4dd9c7cf955..00000000000
--- a/dist/package/debian/wiredtiger-util.substvars
+++ /dev/null
@@ -1,2 +0,0 @@
-shlibs:Depends=libc6 (>= 2.14), wiredtiger (>> 2.1.2-0), wiredtiger (<< 2.1.2-99)
-misc:Depends=
diff --git a/dist/package/debian/wiredtiger.doc-base b/dist/package/debian/wiredtiger.doc-base
deleted file mode 100644
index faa994f156f..00000000000
--- a/dist/package/debian/wiredtiger.doc-base
+++ /dev/null
@@ -1,12 +0,0 @@
-Document: wiredtiger
-Title: Debian wiredtiger Manual
-Author: WiredTiger, Inc.
-Abstract: WiredTiger is a database storage engine library.
-Section: library
-
-Format: postscript
-Files: /usr/share/doc/wiredtiger/wiredtiger.ps.gz
-
-Format: HTML
-Index: /usr/share/doc/wiredtiger/html/index.html
-Files: /usr/share/doc/wiredtiger/html/*.html
diff --git a/dist/package/wiredtiger.spec b/dist/package/wiredtiger.spec
deleted file mode 100644
index f4cb78183d0..00000000000
--- a/dist/package/wiredtiger.spec
+++ /dev/null
@@ -1,58 +0,0 @@
-Name: wiredtiger
-Version: 3.0.0
-Release: 1%{?dist}
-Summary: WiredTiger data storage engine
-
-Group: Development/Libraries
-License: GPLV2 or GPLV3
-URL: www.wiredtiger.com
-Source0: http://source.wiredtiger.com/releases/%{name}-%{version}.tar.bz2
-BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
-
-BuildRequires: python-devel java-devel
-Requires: jemalloc
-
-%description
-
-WiredTiger is a data storage engine that provides APIs for efficiently
-storing data in highly concurrent applications. It includes functionality
-for automatically maintaining indexes. It implements both row and column
-store formats - so that all types of data can be stored space efficiently.
-
-WiredTiger is a library that can be accessed via C, Python and Java APIs.
-
-
-%prep
-%autosetup
-
-
-%build
-%configure --enable-java --enable-bzip2 --enable-snappy --enable-zlib
-# Stop the build setting up an rpath
-sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' libtool
-sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool
-make %{?_smp_mflags}
-
-
-%install
-rm -rf %{buildroot}
-make install DESTDIR=%{buildroot}
-# Need to resolve make install with --enable-python before we can
-# install the python API.
-# python setup.py install -O1 --skip-build --root $RPM_BUILD_ROOT
-
-%clean
-rm -rf %{buildroot}
-
-
-%files
-%defattr(-,root,root,-)
-%doc README LICENSE NEWS
-%{_bindir}/*
-%{_datadir}/*
-%{_includedir}/*
-%{_libdir}/*
-
-
-%changelog
-
diff --git a/dist/s_define.list b/dist/s_define.list
index b2f6cbec43e..dcaf975434f 100644
--- a/dist/s_define.list
+++ b/dist/s_define.list
@@ -58,7 +58,6 @@ WT_STAT_INCRV_BASE
WT_STAT_WRITE
WT_TIMEDIFF_US
WT_TRET_ERROR_OK
-WT_TXN_TIMESTAMP_FLAG_CHECK
WT_UPDATE_SIZE
WT_WITH_LOCK_NOWAIT
WT_WITH_LOCK_WAIT
diff --git a/dist/stat_data.py b/dist/stat_data.py
index 24610b9ab14..64d3d46818b 100644
--- a/dist/stat_data.py
+++ b/dist/stat_data.py
@@ -257,6 +257,7 @@ connection_stats = [
CacheStat('cache_lookaside_entries', 'lookaside table entries', 'no_clear,no_scale'),
CacheStat('cache_lookaside_insert', 'lookaside table insert calls'),
CacheStat('cache_lookaside_remove', 'lookaside table remove calls'),
+ CacheStat('cache_lookaside_score', 'lookaside score', 'no_clear,no_scale'),
CacheStat('cache_overhead', 'percentage overhead', 'no_clear,no_scale'),
CacheStat('cache_pages_dirty', 'tracked dirty pages in the cache', 'no_clear,no_scale'),
CacheStat('cache_pages_inuse', 'pages currently held in the cache', 'no_clear,no_scale'),
diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c
index caa960d78ae..3df5920830c 100644
--- a/src/btree/bt_debug.c
+++ b/src/btree/bt_debug.c
@@ -733,6 +733,8 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref)
WT_RET(ds->f(ds, ", entries %" PRIu32, entries));
WT_RET(ds->f(ds,
", %s", __wt_page_is_modified(page) ? "dirty" : "clean"));
+ WT_RET(ds->f(ds,
+ ", memory_size %" WT_SIZET_FMT, page->memory_footprint));
if (F_ISSET_ATOMIC(page, WT_PAGE_BUILD_KEYS))
WT_RET(ds->f(ds, ", keys-built"));
@@ -1032,8 +1034,7 @@ __debug_modified(WT_DBG *ds, WT_UPDATE *upd)
p = (size_t *)upd->data;
memcpy(&nentries, p++, sizeof(size_t));
- data = upd->data +
- sizeof(size_t) + ((size_t)nentries * 3 * sizeof(size_t));
+ data = upd->data + sizeof(size_t) + (nentries * 3 * sizeof(size_t));
WT_RET(ds->f(ds, "%" WT_SIZET_FMT ": ", nentries));
for (; nentries-- > 0; data += data_size) {
diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c
index 838c6845b08..fe6be6517a2 100644
--- a/src/btree/bt_read.c
+++ b/src/btree/bt_read.c
@@ -88,7 +88,6 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id)
uint32_t las_id, session_flags;
const uint8_t *p;
uint8_t upd_type;
- int exact;
cursor = NULL;
page = ref->page;
@@ -112,14 +111,9 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id)
* in-order updates for a subsequent key. We process all of the updates
* for a key and then insert those updates into the page, then all the
* updates for the next key, and so on.
- *
- * Search for the block's unique prefix, stepping through any matching
- * records.
*/
- cursor->set_key(cursor,
- btree_id, ref->page_las->las_pageid, (uint64_t)0, &las_key);
- if ((ret = cursor->search_near(cursor, &exact)) == 0 && exact < 0)
- ret = cursor->next(cursor);
+ ret = __wt_las_cursor_position(
+ cursor, btree_id, ref->page_las->las_pageid);
for (; ret == 0; ret = cursor->next(cursor)) {
WT_ERR(cursor->get_key(cursor,
&las_id, &las_pageid, &las_counter, &las_key));
@@ -436,6 +430,74 @@ err: /*
}
/*
+ * __las_page_skip --
+ * Check if we can skip reading a page with lookaside entries.
+ */
+static inline bool
+__las_page_skip(WT_SESSION_IMPL *session, WT_REF *ref)
+{
+ WT_TXN *txn;
+ bool skip;
+
+ txn = &session->txn;
+ skip = false;
+
+ if (!__wt_atomic_casv32(&ref->state, WT_REF_LOOKASIDE, WT_REF_LOCKED))
+ return (false);
+
+ /*
+ * Skip lookaside pages if reading without a timestamp and all the
+ * updates in lookaside are in the past.
+ *
+ * If we skip a lookaside page, the tree cannot be left clean:
+ * lookaside entries must be resolved before the tree can be discarded.
+ *
+ * Lookaside eviction preferentially chooses the newest updates when
+ * creating page image with no stable timestamp. If a stable timestamp
+ * has been set, we have to visit the page because eviction chooses old
+ * version of records in that case.
+ *
+ * One case where we may need to visit the page is if lookaside
+ * eviction is active in tree 2 when a checkpoint has started and is
+ * working its way through tree 1. In that case, lookaside may have
+ * created a page image with updates in the future of the checkpoint.
+ */
+ if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT))
+ goto done;
+
+ if (WT_TXNID_LE(txn->snap_min, ref->page_las->las_max_txn))
+ goto done;
+
+ if (!F_ISSET(txn, WT_TXN_HAS_TS_READ) &&
+ !ref->page_las->las_skew_oldest) {
+ skip = true;
+ goto done;
+ }
+
+#ifdef HAVE_TIMESTAMPS
+ /*
+ * Skip lookaside pages if reading as of a timestamp and all the
+ * updates are in the future.
+ */
+ WT_ASSERT(session,
+ !F_ISSET(&session->txn, WT_TXN_HAS_TS_READ) ||
+ __wt_timestamp_cmp(&ref->page_las->onpage_timestamp,
+ &session->txn.read_timestamp) <= 0);
+
+ if (F_ISSET(&session->txn, WT_TXN_HAS_TS_READ) &&
+ ref->page_las->las_skew_oldest &&
+ __wt_timestamp_cmp(
+ &ref->page_las->min_timestamp, &session->txn.read_timestamp) > 0) {
+ skip = true;
+ goto done;
+ }
+#endif
+
+done: WT_PUBLISH(ref->state, WT_REF_LOOKASIDE);
+ return (skip);
+}
+
+/*
* __wt_page_in_func --
* Acquire a hazard pointer to a page; if the page is not in-memory,
* read it from the disk and build an in-memory version.
@@ -472,29 +534,22 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
if (LF_ISSET(WT_READ_NO_EMPTY) &&
__wt_delete_page_skip(session, ref, false))
return (WT_NOTFOUND);
- /* FALLTHROUGH */
- case WT_REF_DISK:
+ goto read;
case WT_REF_LOOKASIDE:
if (LF_ISSET(WT_READ_CACHE)) {
- if (ref->state != WT_REF_LOOKASIDE ||
- !LF_ISSET(WT_READ_LOOKASIDE))
+ if (!LF_ISSET(WT_READ_LOOKASIDE))
return (WT_NOTFOUND);
-#ifdef HAVE_TIMESTAMPS
- /*
- * Skip lookaside pages if reading as of a
- * timestamp and all the updates are in the
- * future.
- */
- if (F_ISSET(
- &session->txn, WT_TXN_HAS_TS_READ) &&
- __wt_timestamp_cmp(
- &ref->page_las->min_timestamp,
- &session->txn.read_timestamp) > 0)
+ if (__las_page_skip(session, ref)) {
+ __wt_tree_modify_set(session);
return (WT_NOTFOUND);
-#endif
+ }
}
+ goto read;
+ case WT_REF_DISK:
+ if (LF_ISSET(WT_READ_CACHE))
+ return (WT_NOTFOUND);
- /*
+read: /*
* The page isn't in memory, read it. If this thread is
* allowed to do eviction work, check for space in the
* cache.
@@ -605,7 +660,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
continue;
}
- /*
+skip_evict: /*
* If we read the page and are configured to not trash
* the cache, and no other thread has already used the
* page, set the read generation so the page is evicted
@@ -624,7 +679,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
__wt_cache_read_gen_new(session, page);
} else if (!LF_ISSET(WT_READ_NO_GEN))
__wt_cache_read_gen_bump(session, page);
-skip_evict:
+
/*
* Check if we need an autocommit transaction.
* Starting a transaction can trigger eviction, so skip
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index 66f0478c542..dc699a6b23b 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -1383,11 +1383,11 @@ __split_multi_inmem(
WT_DECL_RET;
WT_PAGE *page;
WT_SAVE_UPD *supd;
- WT_UPDATE *prev_upd, *upd;
+ WT_UPDATE *upd;
uint64_t recno;
uint32_t i, slot;
- WT_ASSERT(session, multi->las_pageid == 0);
+ WT_ASSERT(session, multi->page_las.las_pageid == 0);
/*
* In 04/2016, we removed column-store record numbers from the WT_PAGE
@@ -1474,36 +1474,6 @@ __split_multi_inmem(
break;
WT_ILLEGAL_VALUE_ERR(session);
}
-
- /*
- * Discard the update used to create the on-page disk image.
- * This is not just a performance issue: if the update used to
- * create the value for this on-page disk image was a modify,
- * and it was applied to the previous on-page value to
- * determine a value to write to this disk image, that update
- * cannot be applied to the new on-page value without risking
- * corruption.
- */
- if (supd->onpage_upd != NULL) {
- for (prev_upd = upd; prev_upd != NULL &&
- prev_upd->next != supd->onpage_upd;
- prev_upd = prev_upd->next)
- ;
- /*
- * If the on-page update was in fact a tombstone, there
- * will be no value on the page. Don't throw the
- * tombstone away: we may need it to correctly resolve
- * modifications.
- */
- if (supd->onpage_upd->type == WT_UPDATE_DELETED &&
- prev_upd != NULL)
- prev_upd = prev_upd->next;
- if (prev_upd != NULL) {
- __wt_update_obsolete_free(
- session, page, prev_upd->next);
- prev_upd->next = NULL;
- }
- }
}
/*
@@ -1624,7 +1594,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
* There can be an address or a disk image or both, but if there is
* neither, there must be a backing lookaside page.
*/
- WT_ASSERT(session, multi->las_pageid != 0 ||
+ WT_ASSERT(session, multi->page_las.las_pageid != 0 ||
multi->addr.addr != NULL || multi->disk_image != NULL);
/* If closing the file, there better be an address. */
@@ -1664,7 +1634,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
* WT_REF.state. Regardless of a backing address, WT_REF_LOOKASIDE
* overrides WT_REF_DISK.
*/
- if (multi->las_pageid != 0) {
+ if (multi->page_las.las_pageid != 0) {
/*
* We should not have a disk image if we did lookaside
* eviction.
@@ -1672,11 +1642,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
WT_ASSERT(session, multi->disk_image == NULL);
WT_RET(__wt_calloc_one(session, &ref->page_las));
- ref->page_las->las_pageid = multi->las_pageid;
-#ifdef HAVE_TIMESTAMPS
- __wt_timestamp_set(
- &ref->page_las->min_timestamp, &multi->las_min_timestamp);
-#endif
+ *ref->page_las = multi->page_las;
ref->state = WT_REF_LOOKASIDE;
}
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index 15d83169ea2..d15852af935 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -107,36 +107,6 @@ __sync_dup_walk(
}
/*
- * __sync_evict_page --
- * Attempt to evict a page during a checkpoint walk.
- */
-static int
-__sync_evict_page(WT_SESSION_IMPL *session, WT_REF **walkp, uint32_t flags)
-{
- WT_DECL_RET;
- WT_REF *next, *to_evict;
-
- to_evict = *walkp;
- next = NULL;
-
- /*
- * Get the ref after the page we're trying to evicting. If the
- * eviction is successful, the walk will continue from here.
- */
- WT_RET(__sync_dup_walk(session, to_evict, flags, &next));
- WT_ERR(__wt_tree_walk(session, &next, flags));
-
- WT_ERR(__wt_page_release_evict(session, to_evict));
-
- /* Success: continue the walk at the next page. */
- *walkp = next;
- return (0);
-
-err: WT_TRET(__wt_page_release(session, next, flags));
- return (ret);
-}
-
-/*
* __sync_file --
* Flush pages for a specific file.
*/
@@ -153,13 +123,13 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages;
uint64_t oldest_id, saved_pinned_id;
uint32_t flags;
- bool evict_failed, skip_walk, timer;
+ bool timer, tried_eviction;
conn = S2C(session);
btree = S2BT(session);
prev = walk = NULL;
txn = &session->txn;
- evict_failed = skip_walk = false;
+ tried_eviction = false;
flags = WT_READ_CACHE | WT_READ_NO_GEN;
internal_bytes = leaf_bytes = 0;
@@ -266,12 +236,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
LF_SET(WT_READ_LOOKASIDE | WT_READ_WONT_NEED);
for (;;) {
- if (!skip_walk) {
- WT_ERR(__sync_dup_walk(
- session, walk, flags, &prev));
- WT_ERR(__wt_tree_walk(session, &walk, flags));
- }
- skip_walk = false;
+ WT_ERR(__sync_dup_walk(session, walk, flags, &prev));
+ WT_ERR(__wt_tree_walk(session, &walk, flags));
if (walk == NULL)
break;
@@ -317,29 +283,43 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
* visit. We want to avoid this code being too special
* purpose, so try to reuse the ordinary eviction path.
*
- * If eviction succeeded, it steps to the next ref, so
- * we have to skip the next walk. If eviction fails,
- * remember so we don't retry it.
+ * Regardless of whether eviction succeeds or fails,
+ * the walk continues from the previous location. We
+ * remember whether we tried eviction, and don't try
+ * again. Even if eviction fails (the page may stay in
+ * cache clean but with history that cannot be
+ * discarded), that is not wasted effort because
+ * checkpoint doesn't need to write the page again.
*/
if (!WT_PAGE_IS_INTERNAL(page) &&
page->read_gen == WT_READGEN_WONT_NEED &&
- !evict_failed) {
- if ((ret = __sync_evict_page(
- session, &walk, flags)) == 0) {
- evict_failed = false;
- skip_walk = true;
- } else {
- walk = prev;
- prev = NULL;
- evict_failed = true;
- }
- WT_ERR_BUSY_OK(ret);
+ !tried_eviction) {
+ WT_ERR_BUSY_OK(
+ __wt_page_release_evict(session, walk));
+ walk = prev;
+ prev = NULL;
+ tried_eviction = true;
continue;
}
+ tried_eviction = false;
- evict_failed = false;
WT_ERR(__wt_reconcile(
session, walk, NULL, WT_REC_CHECKPOINT, NULL));
+
+ /*
+ * Update checkpoint IO tracking data if configured
+ * to log verbose progress messages.
+ */
+ if (conn->ckpt_timer_start.tv_sec > 0) {
+ conn->ckpt_write_bytes +=
+ page->memory_footprint;
+ ++conn->ckpt_write_pages;
+
+ /* Periodically log checkpoint progress. */
+ if (conn->ckpt_write_pages % 5000 == 0)
+ __wt_checkpoint_progress(
+ session, false);
+ }
}
break;
case WT_SYNC_CLOSE:
diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c
index 13516d80c58..e2ebd38e82f 100644
--- a/src/cache/cache_las.c
+++ b/src/cache/cache_las.c
@@ -333,7 +333,7 @@ __wt_las_insert_block(WT_SESSION_IMPL *session,
insert_cnt = 0;
btree_id = S2BT(session)->id;
- las_pageid = multi->las_pageid =
+ las_pageid = multi->page_las.las_pageid =
__wt_atomic_add64(&S2BT(session)->las_pageid, 1);
/*
@@ -437,6 +437,57 @@ __wt_las_insert_block(WT_SESSION_IMPL *session,
}
/*
+ * __wt_las_cursor_position --
+ * Position a lookaside cursor at the beginning of a block.
+ *
+ * There may be no block of lookaside entries if they have been removed by
+ * WT_CONNECTION::rollback_to_stable.
+ */
+int
+__wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid)
+{
+ WT_ITEM las_key;
+ uint64_t las_counter, las_pageid;
+ uint32_t las_id;
+ int exact;
+
+ /*
+ * Because of the special visibility rules for lookaside, a new block
+ * can appear in between our search and the block of interest. Keep
+ * trying until we find it.
+ */
+ for (;;) {
+ WT_CLEAR(las_key);
+ cursor->set_key(cursor,
+ btree_id, pageid, (uint64_t)0, &las_key);
+ WT_RET(cursor->search_near(cursor, &exact));
+ if (exact < 0) {
+ WT_RET(cursor->next(cursor));
+
+ /*
+ * Because of the special visibility rules for
+ * lookaside, a new block can appear in between our
+ * search and the block of interest. Keep trying while
+ * we have a key lower that we expect.
+ *
+ * There may be no block of lookaside entries if they
+ * have been removed by
+ * WT_CONNECTION::rollback_to_stable.
+ */
+ WT_RET(cursor->get_key(cursor,
+ &las_id, &las_pageid, &las_counter, &las_key));
+ if (las_id < btree_id || (las_id == btree_id &&
+ pageid != 0 && las_pageid < pageid))
+ continue;
+ }
+
+ return (0);
+ }
+
+ /* NOTREACHED */
+}
+
+/*
* __wt_las_remove_block --
* Remove all records matching a key prefix from the lookaside store.
*/
@@ -448,7 +499,6 @@ __wt_las_remove_block(WT_SESSION_IMPL *session,
WT_ITEM las_key;
uint64_t las_counter, las_pageid, remove_cnt;
uint32_t las_id, session_flags;
- int exact;
bool local_cursor;
remove_cnt = 0;
@@ -464,10 +514,7 @@ __wt_las_remove_block(WT_SESSION_IMPL *session,
* Search for the block's unique prefix and step through all matching
* records, removing them.
*/
- las_key.size = 0;
- cursor->set_key(cursor, btree_id, pageid, (uint64_t)0, &las_key);
- if ((ret = cursor->search_near(cursor, &exact)) == 0 && exact < 0)
- ret = cursor->next(cursor);
+ ret = __wt_las_cursor_position(cursor, btree_id, pageid);
for (; ret == 0; ret = cursor->next(cursor)) {
WT_ERR(cursor->get_key(cursor,
&las_id, &las_pageid, &las_counter, &las_key));
diff --git a/src/config/config_def.c b/src/config/config_def.c
index 4edd436712b..e7ead608672 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -177,14 +177,14 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
",\"page_split_race\"]",
NULL, 0 },
{ "verbose", "list",
- NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\","
- "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\","
- "\"handleops\",\"log\",\"lookaside_activity\",\"lsm\","
- "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
- "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\","
- "\"salvage\",\"shared_cache\",\"split\",\"temporary\","
- "\"thread_group\",\"timestamp\",\"transaction\",\"verify\","
- "\"version\",\"write\"]",
+ NULL, "choices=[\"api\",\"block\",\"checkpoint\","
+ "\"checkpoint_progress\",\"compact\",\"evict\",\"evict_stuck\","
+ "\"evictserver\",\"fileops\",\"handleops\",\"log\","
+ "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\","
+ "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\","
+ "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\","
+ "\"split\",\"temporary\",\"thread_group\",\"timestamp\","
+ "\"transaction\",\"verify\",\"version\",\"write\"]",
NULL, 0 },
{ NULL, NULL, NULL, NULL, NULL, 0 }
};
@@ -832,14 +832,14 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
{ "use_environment", "boolean", NULL, NULL, NULL, 0 },
{ "use_environment_priv", "boolean", NULL, NULL, NULL, 0 },
{ "verbose", "list",
- NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\","
- "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\","
- "\"handleops\",\"log\",\"lookaside_activity\",\"lsm\","
- "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
- "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\","
- "\"salvage\",\"shared_cache\",\"split\",\"temporary\","
- "\"thread_group\",\"timestamp\",\"transaction\",\"verify\","
- "\"version\",\"write\"]",
+ NULL, "choices=[\"api\",\"block\",\"checkpoint\","
+ "\"checkpoint_progress\",\"compact\",\"evict\",\"evict_stuck\","
+ "\"evictserver\",\"fileops\",\"handleops\",\"log\","
+ "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\","
+ "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\","
+ "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\","
+ "\"split\",\"temporary\",\"thread_group\",\"timestamp\","
+ "\"transaction\",\"verify\",\"version\",\"write\"]",
NULL, 0 },
{ "write_through", "list",
NULL, "choices=[\"data\",\"log\"]",
@@ -927,14 +927,14 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
{ "use_environment", "boolean", NULL, NULL, NULL, 0 },
{ "use_environment_priv", "boolean", NULL, NULL, NULL, 0 },
{ "verbose", "list",
- NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\","
- "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\","
- "\"handleops\",\"log\",\"lookaside_activity\",\"lsm\","
- "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
- "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\","
- "\"salvage\",\"shared_cache\",\"split\",\"temporary\","
- "\"thread_group\",\"timestamp\",\"transaction\",\"verify\","
- "\"version\",\"write\"]",
+ NULL, "choices=[\"api\",\"block\",\"checkpoint\","
+ "\"checkpoint_progress\",\"compact\",\"evict\",\"evict_stuck\","
+ "\"evictserver\",\"fileops\",\"handleops\",\"log\","
+ "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\","
+ "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\","
+ "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\","
+ "\"split\",\"temporary\",\"thread_group\",\"timestamp\","
+ "\"transaction\",\"verify\",\"version\",\"write\"]",
NULL, 0 },
{ "version", "string", NULL, NULL, NULL, 0 },
{ "write_through", "list",
@@ -1017,14 +1017,14 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
NULL, NULL,
confchk_wiredtiger_open_transaction_sync_subconfigs, 2 },
{ "verbose", "list",
- NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\","
- "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\","
- "\"handleops\",\"log\",\"lookaside_activity\",\"lsm\","
- "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
- "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\","
- "\"salvage\",\"shared_cache\",\"split\",\"temporary\","
- "\"thread_group\",\"timestamp\",\"transaction\",\"verify\","
- "\"version\",\"write\"]",
+ NULL, "choices=[\"api\",\"block\",\"checkpoint\","
+ "\"checkpoint_progress\",\"compact\",\"evict\",\"evict_stuck\","
+ "\"evictserver\",\"fileops\",\"handleops\",\"log\","
+ "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\","
+ "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\","
+ "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\","
+ "\"split\",\"temporary\",\"thread_group\",\"timestamp\","
+ "\"transaction\",\"verify\",\"version\",\"write\"]",
NULL, 0 },
{ "version", "string", NULL, NULL, NULL, 0 },
{ "write_through", "list",
@@ -1107,14 +1107,14 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
NULL, NULL,
confchk_wiredtiger_open_transaction_sync_subconfigs, 2 },
{ "verbose", "list",
- NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\","
- "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\","
- "\"handleops\",\"log\",\"lookaside_activity\",\"lsm\","
- "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
- "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\","
- "\"salvage\",\"shared_cache\",\"split\",\"temporary\","
- "\"thread_group\",\"timestamp\",\"transaction\",\"verify\","
- "\"version\",\"write\"]",
+ NULL, "choices=[\"api\",\"block\",\"checkpoint\","
+ "\"checkpoint_progress\",\"compact\",\"evict\",\"evict_stuck\","
+ "\"evictserver\",\"fileops\",\"handleops\",\"log\","
+ "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\","
+ "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\","
+ "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\","
+ "\"split\",\"temporary\",\"thread_group\",\"timestamp\","
+ "\"transaction\",\"verify\",\"version\",\"write\"]",
NULL, 0 },
{ "write_through", "list",
NULL, "choices=[\"data\",\"log\"]",
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index f7525f22787..5f77f27ee3f 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -1808,6 +1808,7 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[])
{ "api", WT_VERB_API },
{ "block", WT_VERB_BLOCK },
{ "checkpoint", WT_VERB_CHECKPOINT },
+ { "checkpoint_progress",WT_VERB_CHECKPOINT_PROGRESS },
{ "compact", WT_VERB_COMPACT },
{ "evict", WT_VERB_EVICT },
{ "evict_stuck", WT_VERB_EVICT_STUCK },
diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c
index c83fb544982..007aa8757da 100644
--- a/src/conn/conn_cache.c
+++ b/src/conn/conn_cache.c
@@ -266,6 +266,19 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session)
WT_STAT_SET(session, stats, cache_pages_dirty,
cache->pages_dirty_intl + cache->pages_dirty_leaf);
+ WT_STAT_CONN_SET(session, cache_eviction_state, cache->flags);
+ WT_STAT_CONN_SET(session,
+ cache_eviction_aggressive_set, cache->evict_aggressive_score);
+ WT_STAT_CONN_SET(session,
+ cache_eviction_empty_score, cache->evict_empty_score);
+ WT_STAT_CONN_SET(session,
+ cache_lookaside_score, __wt_cache_lookaside_score(cache));
+
+ WT_STAT_CONN_SET(session,
+ cache_eviction_active_workers, conn->evict_threads.current_threads);
+ WT_STAT_CONN_SET(session, cache_eviction_stable_state_workers,
+ cache->evict_tune_workers_best);
+
/*
* The number of files with active walks ~= number of hazard pointers
* in the walk session. Note: reading without locking.
diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c
index 4475b27a7b8..afe29284d06 100644
--- a/src/conn/conn_cache_pool.c
+++ b/src/conn/conn_cache_pool.c
@@ -135,7 +135,7 @@ __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg)
if (__wt_config_gets(session, &cfg[1],
"shared_cache.size", &cval) == 0 && cval.val != 0)
size = (uint64_t)cval.val;
- else
+ else
size = cp->size;
if (__wt_config_gets(session, &cfg[1],
"shared_cache.chunk", &cval) == 0 && cval.val != 0)
diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c
index 2606c9d083b..7f55b1cc4fd 100644
--- a/src/conn/conn_dhandle.c
+++ b/src/conn/conn_dhandle.c
@@ -9,6 +9,81 @@
#include "wt_internal.h"
/*
+ * __conn_dhandle_config_clear --
+ * Clear the underlying object's configuration information.
+ */
+static void
+__conn_dhandle_config_clear(WT_SESSION_IMPL *session)
+{
+ WT_DATA_HANDLE *dhandle;
+ const char **a;
+
+ dhandle = session->dhandle;
+
+ if (dhandle->cfg == NULL)
+ return;
+ for (a = dhandle->cfg; *a != NULL; ++a)
+ __wt_free(session, *a);
+ __wt_free(session, dhandle->cfg);
+}
+
+/*
+ * __conn_dhandle_config_set --
+ * Set up a btree handle's configuration information.
+ */
+static int
+__conn_dhandle_config_set(WT_SESSION_IMPL *session)
+{
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ char *metaconf;
+
+ dhandle = session->dhandle;
+
+ /*
+ * Read the object's entry from the metadata file, we're done if we
+ * don't find one.
+ */
+ if ((ret =
+ __wt_metadata_search(session, dhandle->name, &metaconf)) != 0) {
+ if (ret == WT_NOTFOUND)
+ ret = ENOENT;
+ WT_RET(ret);
+ }
+
+ /*
+ * The defaults are included because persistent configuration
+ * information is stored in the metadata file and it may be from an
+ * earlier version of WiredTiger. If defaults are included in the
+ * configuration, we can add new configuration strings without
+ * upgrading the metadata file or writing special code in case a
+ * configuration string isn't initialized, as long as the new
+ * configuration string has an appropriate default value.
+ *
+ * The error handling is a little odd, but be careful: we're holding a
+ * chunk of allocated memory in metaconf. If we fail before we copy a
+ * reference to it into the object's configuration array, we must free
+ * it, after the copy, we don't want to free it.
+ */
+ WT_ERR(__wt_calloc_def(session, 3, &dhandle->cfg));
+ switch (dhandle->type) {
+ case WT_DHANDLE_TYPE_BTREE:
+ WT_ERR(__wt_strdup(session,
+ WT_CONFIG_BASE(session, file_meta), &dhandle->cfg[0]));
+ break;
+ case WT_DHANDLE_TYPE_TABLE:
+ WT_ERR(__wt_strdup(session,
+ WT_CONFIG_BASE(session, table_meta), &dhandle->cfg[0]));
+ break;
+ }
+ dhandle->cfg[1] = metaconf;
+ return (0);
+
+err: __wt_free(session, metaconf);
+ return (ret);
+}
+
+/*
* __conn_dhandle_destroy --
* Destroy a data handle.
*/
@@ -30,6 +105,7 @@ __conn_dhandle_destroy(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle)
__wt_rwlock_destroy(session, &dhandle->rwlock);
__wt_free(session, dhandle->name);
__wt_free(session, dhandle->checkpoint);
+ __conn_dhandle_config_clear(session);
__wt_spin_destroy(session, &dhandle->close_lock);
__wt_stat_dsrc_discard(session, dhandle);
__wt_overwrite_and_free(session, dhandle);
@@ -316,81 +392,6 @@ err: __wt_spin_unlock(session, &dhandle->close_lock);
}
/*
- * __conn_dhandle_config_clear --
- * Clear the underlying object's configuration information.
- */
-static void
-__conn_dhandle_config_clear(WT_SESSION_IMPL *session)
-{
- WT_DATA_HANDLE *dhandle;
- const char **a;
-
- dhandle = session->dhandle;
-
- if (dhandle->cfg == NULL)
- return;
- for (a = dhandle->cfg; *a != NULL; ++a)
- __wt_free(session, *a);
- __wt_free(session, dhandle->cfg);
-}
-
-/*
- * __conn_dhandle_config_set --
- * Set up a btree handle's configuration information.
- */
-static int
-__conn_dhandle_config_set(WT_SESSION_IMPL *session)
-{
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
- char *metaconf;
-
- dhandle = session->dhandle;
-
- /*
- * Read the object's entry from the metadata file, we're done if we
- * don't find one.
- */
- if ((ret =
- __wt_metadata_search(session, dhandle->name, &metaconf)) != 0) {
- if (ret == WT_NOTFOUND)
- ret = ENOENT;
- WT_RET(ret);
- }
-
- /*
- * The defaults are included because persistent configuration
- * information is stored in the metadata file and it may be from an
- * earlier version of WiredTiger. If defaults are included in the
- * configuration, we can add new configuration strings without
- * upgrading the metadata file or writing special code in case a
- * configuration string isn't initialized, as long as the new
- * configuration string has an appropriate default value.
- *
- * The error handling is a little odd, but be careful: we're holding a
- * chunk of allocated memory in metaconf. If we fail before we copy a
- * reference to it into the object's configuration array, we must free
- * it, after the copy, we don't want to free it.
- */
- WT_ERR(__wt_calloc_def(session, 3, &dhandle->cfg));
- switch (dhandle->type) {
- case WT_DHANDLE_TYPE_BTREE:
- WT_ERR(__wt_strdup(session,
- WT_CONFIG_BASE(session, file_meta), &dhandle->cfg[0]));
- break;
- case WT_DHANDLE_TYPE_TABLE:
- WT_ERR(__wt_strdup(session,
- WT_CONFIG_BASE(session, table_meta), &dhandle->cfg[0]));
- break;
- }
- dhandle->cfg[1] = metaconf;
- return (0);
-
-err: __wt_free(session, metaconf);
- return (ret);
-}
-
-/*
* __wt_conn_dhandle_open --
* Open the current data handle.
*/
@@ -746,7 +747,6 @@ __wt_conn_dhandle_discard_single(
* After successfully removing the handle, clean it up.
*/
if (ret == 0 || final) {
- __conn_dhandle_config_clear(session);
WT_TRET(__conn_dhandle_destroy(session, dhandle));
session->dhandle = NULL;
}
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index 0205dbb08e3..02851492039 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -617,8 +617,6 @@ __evict_update_work(WT_SESSION_IMPL *session)
F_CLR(cache, WT_CACHE_EVICT_CLEAN | WT_CACHE_EVICT_CLEAN_HARD);
}
- WT_STAT_CONN_SET(session, cache_eviction_state, cache->flags);
-
return (F_ISSET(cache, WT_CACHE_EVICT_ALL | WT_CACHE_EVICT_URGENT));
}
@@ -727,9 +725,6 @@ __evict_pass(WT_SESSION_IMPL *session)
txn_global->current != oldest_id &&
cache->evict_aggressive_score < 100)
++cache->evict_aggressive_score;
- WT_STAT_CONN_SET(session,
- cache_eviction_aggressive_set,
- cache->evict_aggressive_score);
prev = now;
prev_oldest_id = oldest_id;
}
@@ -761,12 +756,8 @@ __evict_pass(WT_SESSION_IMPL *session)
"%s", "unable to reach eviction goal");
break;
} else {
- if (cache->evict_aggressive_score > 0) {
+ if (cache->evict_aggressive_score > 0)
--cache->evict_aggressive_score;
- WT_STAT_CONN_SET(session,
- cache_eviction_aggressive_set,
- cache->evict_aggressive_score);
- }
loop = 0;
eviction_progress = cache->eviction_progress;
}
@@ -983,8 +974,6 @@ __evict_tune_workers(WT_SESSION_IMPL *session)
if (conn->evict_threads_max == conn->evict_threads_min)
return;
- eviction_progress_rate = 0;
-
__wt_epoch(session, &current_time);
time_diff = WT_TIMEDIFF_MS(current_time, cache->evict_tune_last_time);
@@ -1098,12 +1087,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session)
WT_STAT_CONN_INCR(session,
cache_eviction_worker_removed);
}
- WT_STAT_CONN_SET(session,
- cache_eviction_stable_state_workers,
- cache->evict_tune_workers_best);
cache->evict_tune_stable = true;
- WT_STAT_CONN_SET(session, cache_eviction_active_workers,
- conn->evict_threads.current_threads);
goto done;
}
}
@@ -1135,9 +1119,6 @@ __evict_tune_workers(WT_SESSION_IMPL *session)
cache->evict_tune_last_action_time = current_time;
}
- WT_STAT_CONN_SET(session, cache_eviction_active_workers,
- conn->evict_threads.current_threads);
-
done: cache->evict_tune_last_time = current_time;
cache->evict_tune_progress_last = eviction_progress;
}
@@ -1187,11 +1168,8 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
cache = S2C(session)->cache;
/* Age out the score of how much the queue has been empty recently. */
- if (cache->evict_empty_score > 0) {
+ if (cache->evict_empty_score > 0)
--cache->evict_empty_score;
- WT_STAT_CONN_SET(session, cache_eviction_empty_score,
- cache->evict_empty_score);
- }
/* Fill the next queue (that isn't the urgent queue). */
queue = cache->evict_fill_queue;
@@ -1221,14 +1199,10 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
*/
if (__evict_queue_empty(queue, false)) {
if (F_ISSET(cache,
- WT_CACHE_EVICT_CLEAN_HARD | WT_CACHE_EVICT_DIRTY_HARD)) {
+ WT_CACHE_EVICT_CLEAN_HARD | WT_CACHE_EVICT_DIRTY_HARD))
cache->evict_empty_score = WT_MIN(
cache->evict_empty_score + WT_EVICT_SCORE_BUMP,
WT_EVICT_SCORE_MAX);
- WT_STAT_CONN_SET(session,
- cache_eviction_empty_score,
- cache->evict_empty_score);
- }
WT_STAT_CONN_INCR(session, cache_eviction_queue_empty);
} else
WT_STAT_CONN_INCR(session, cache_eviction_queue_not_empty);
@@ -1897,6 +1871,24 @@ __evict_walk_file(WT_SESSION_IMPL *session,
F_ISSET(btree, WT_BTREE_LOOKASIDE))
goto fast;
+ /*
+ * If application threads are blocked on eviction of clean
+ * pages, and the only thing preventing a clean leaf page from
+ * being evicted is it contains historical data, mark it dirty
+ * so we can do lookaside eviction. We also mark the tree
+ * dirty to avoid an assertion that we don't discard dirty
+ * pages from a clean tree.
+ */
+ if (F_ISSET(cache, WT_CACHE_EVICT_CLEAN_HARD) &&
+ !F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE) &&
+ !WT_PAGE_IS_INTERNAL(page) &&
+ !modified && page->modify != NULL &&
+ !__wt_txn_visible_all(session, page->modify->rec_max_txn,
+ WT_TIMESTAMP_NULL(&page->modify->rec_max_timestamp))) {
+ __wt_page_modify_set(session, page);
+ goto fast;
+ }
+
/* Skip clean pages if appropriate. */
if (!modified && !F_ISSET(cache, WT_CACHE_EVICT_CLEAN))
continue;
diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c
index edf80ec4460..103c93a075b 100644
--- a/src/evict/evict_page.c
+++ b/src/evict/evict_page.c
@@ -364,16 +364,11 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
* re-instantiate the page in memory, else discard the page.
*/
if (mod->mod_disk_image == NULL) {
- if (mod->mod_replace_las_pageid != 0) {
+ if (mod->mod_page_las.las_pageid != 0) {
WT_RET(
__wt_calloc_one(session, &ref->page_las));
- ref->page_las->las_pageid =
- mod->mod_replace_las_pageid;
-#ifdef HAVE_TIMESTAMPS
- __wt_timestamp_set(
- &ref->page_las->min_timestamp,
- &mod->mod_replace_las_min_timestamp);
-#endif
+ *ref->page_las = mod->mod_page_las;
+ __wt_page_modify_clear(session, ref->page);
__wt_ref_out(session, ref);
WT_PUBLISH(ref->state, WT_REF_LOOKASIDE);
} else {
@@ -567,7 +562,7 @@ __evict_review(
if (F_ISSET(conn, WT_CONN_IN_MEMORY))
LF_SET(WT_REC_IN_MEMORY |
WT_REC_SCRUB | WT_REC_UPDATE_RESTORE);
- else {
+ else if (!WT_IS_METADATA(session->dhandle)) {
if (!WT_SESSION_IS_CHECKPOINT(session)) {
LF_SET(WT_REC_UPDATE_RESTORE);
@@ -576,18 +571,13 @@ __evict_review(
}
/*
- * If the cache is nearly stuck, check if
- * reconciliation suggests trying the lookaside table
- * unless lookaside eviction is disabled globally.
- *
- * We don't wait until the cache is completely stuck:
- * for workloads where lookaside eviction is necessary
- * to make progress, we don't want a single successful
- * page eviction to make the cache "unstuck" so we have
- * to wait again before evicting the next page.
+ * If the cache is under pressure with many updates
+ * that can't be evicted, check if reconciliation
+ * suggests trying the lookaside table.
*/
- if (__wt_cache_nearly_stuck(session) &&
- !F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE))
+ if (!F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE) &&
+ (__wt_cache_lookaside_score(cache) > 50 ||
+ __wt_cache_stuck(session)))
lookaside_retryp = &lookaside_retry;
}
}
@@ -596,11 +586,21 @@ __evict_review(
ret = __wt_reconcile(session, ref, NULL, flags, lookaside_retryp);
/*
- * If reconciliation fails, eviction is stuck and reconciliation
- * reports it might succeed if we use the lookaside table, then
- * configure reconciliation to write those updates to the lookaside
- * table, allowing the eviction of pages we'd otherwise have to retain
- * in cache to support older readers.
+ * If attempting eviction in service of a checkpoint, we may
+ * successfully reconcile but then find that there are updates on the
+ * page too new to evict. Give up evicting in that case: checkpoint
+ * will include the reconciled page when it visits the parent.
+ */
+ if (WT_SESSION_IS_CHECKPOINT(session) && !__wt_page_is_modified(page) &&
+ !__wt_txn_visible_all(session, page->modify->rec_max_txn,
+ WT_TIMESTAMP_NULL(&page->modify->rec_max_timestamp)))
+ return (EBUSY);
+
+ /*
+ * If reconciliation fails but reports it might succeed if we use the
+ * lookaside table, try again with the lookaside table, allowing the
+ * eviction of pages we'd otherwise have to retain in cache to support
+ * older readers.
*/
if (ret == EBUSY && lookaside_retry) {
LF_CLR(WT_REC_SCRUB | WT_REC_UPDATE_RESTORE);
@@ -611,29 +611,16 @@ __evict_review(
WT_RET(ret);
/*
- * If attempting eviction in service of a checkpoint, we may
- * successfully reconcile but then find that there are updates on the
- * page too new to evict. Give up in that case: checkpoint will
- * reconcile the page normally.
- */
- if (WT_SESSION_IS_CHECKPOINT(session) && !__wt_page_is_modified(page) &&
- !LF_ISSET(WT_REC_LOOKASIDE) &&
- !__wt_txn_visible_all(session, page->modify->rec_max_txn,
- WT_TIMESTAMP_NULL(&page->modify->rec_max_timestamp)))
- return (EBUSY);
-
- /*
* Success: assert the page is clean or reconciliation was configured
* for update/restore. If the page is clean, assert that reconciliation
* was configured for a lookaside table, or it's not a durable object
* (currently the lookaside table), or all page updates were globally
* visible.
*/
- WT_ASSERT(session,
- !__wt_page_is_modified(page) || LF_ISSET(WT_REC_UPDATE_RESTORE));
+ WT_ASSERT(session, !__wt_page_is_modified(page) ||
+ LF_ISSET(WT_REC_LOOKASIDE | WT_REC_UPDATE_RESTORE));
WT_ASSERT(session,
__wt_page_is_modified(page) ||
- LF_ISSET(WT_REC_LOOKASIDE) ||
__wt_txn_visible_all(session, page->modify->rec_max_txn,
WT_TIMESTAMP_NULL(&page->modify->rec_max_timestamp)));
diff --git a/src/include/api.h b/src/include/api.h
index e1c3ba9ff3c..aa080d2bcca 100644
--- a/src/include/api.h
+++ b/src/include/api.h
@@ -39,15 +39,10 @@
} while (0)
/* An API call wrapped in a transaction if necessary. */
-#ifdef HAVE_TIMESTAMPS
-#define WT_TXN_TIMESTAMP_FLAG_CHECK(s) __wt_txn_timestamp_flags((s))
-#else
-#define WT_TXN_TIMESTAMP_FLAG_CHECK(s)
-#endif
#define TXN_API_CALL(s, h, n, bt, config, cfg) do { \
bool __autotxn = false; \
API_CALL(s, h, n, bt, config, cfg); \
- WT_TXN_TIMESTAMP_FLAG_CHECK(s); \
+ __wt_txn_timestamp_flags(s); \
__autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\
if (__autotxn) \
F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT)
@@ -56,7 +51,7 @@
#define TXN_API_CALL_NOCONF(s, h, n, dh) do { \
bool __autotxn = false; \
API_CALL_NOCONF(s, h, n, dh); \
- WT_TXN_TIMESTAMP_FLAG_CHECK(s); \
+ __wt_txn_timestamp_flags(s); \
__autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\
if (__autotxn) \
F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT)
diff --git a/src/include/btmem.h b/src/include/btmem.h
index 158fcf87d29..c3646a2ae59 100644
--- a/src/include/btmem.h
+++ b/src/include/btmem.h
@@ -186,6 +186,19 @@ struct __wt_ovfl_reuse {
",value_format=" WT_UNCHECKED_STRING(QuBu)
/*
+ * WT_PAGE_LOOKASIDE --
+ * Related information for on-disk pages with lookaside entries.
+ */
+struct __wt_page_lookaside {
+ uint64_t las_pageid; /* Page ID in lookaside */
+ uint64_t las_max_txn; /* Maximum transaction ID in
+ lookaside */
+ WT_DECL_TIMESTAMP(min_timestamp) /* Min timestamp in lookaside */
+ WT_DECL_TIMESTAMP(onpage_timestamp) /* Max timestamp on page */
+ bool las_skew_oldest; /* On-page skewed to oldest */
+};
+
+/*
* WT_PAGE_MODIFY --
* When a page is modified, there's additional information to maintain.
*/
@@ -241,17 +254,14 @@ struct __wt_page_modify {
void *disk_image;
/* The page has lookaside entries. */
- uint64_t las_pageid;
- WT_DECL_TIMESTAMP(las_min_timestamp)
+ WT_PAGE_LOOKASIDE page_las;
} r;
#undef mod_replace
#define mod_replace u1.r.replace
#undef mod_disk_image
#define mod_disk_image u1.r.disk_image
-#undef mod_replace_las_pageid
-#define mod_replace_las_pageid u1.r.las_pageid
-#undef mod_replace_las_min_timestamp
-#define mod_replace_las_min_timestamp u1.r.las_min_timestamp
+#undef mod_page_las
+#define mod_page_las u1.r.page_las
struct { /* Multiple replacement blocks */
struct __wt_multi {
@@ -297,8 +307,7 @@ struct __wt_page_modify {
uint32_t size;
uint32_t checksum;
- uint64_t las_pageid;
- WT_DECL_TIMESTAMP(las_min_timestamp)
+ WT_PAGE_LOOKASIDE page_las;
} *multi;
uint32_t multi_entries; /* Multiple blocks element count */
} m;
@@ -721,16 +730,6 @@ struct __wt_page_deleted {
};
/*
- * WT_PAGE_LOOKASIDE --
- * Related information for on-disk pages with lookaside entries.
- */
-struct __wt_page_lookaside {
- uint64_t las_pageid; /* Page ID in lookaside */
- WT_DECL_TIMESTAMP(min_timestamp) /* Oldest timestamp in
- lookaside for the page */
-};
-
-/*
* WT_REF --
* A single in-memory page and the state information used to determine if
* it's OK to dereference the pointer to the page.
diff --git a/src/include/cache.h b/src/include/cache.h
index 456cb0382e4..0a42853b95b 100644
--- a/src/include/cache.h
+++ b/src/include/cache.h
@@ -152,20 +152,28 @@ struct __wt_cache {
#define WT_EVICT_SCORE_BUMP 10
#define WT_EVICT_SCORE_CUTOFF 10
#define WT_EVICT_SCORE_MAX 100
- uint32_t evict_aggressive_score;/* Score of how aggressive eviction
- should be about selecting eviction
- candidates. If eviction is
- struggling to make progress, this
- score rises (up to a maximum of
- 100), at which point the cache is
- "stuck" and transaction will be
- rolled back. */
- uint32_t evict_empty_score; /* Score of how often LRU queues are
- empty on refill. This score varies
- between 0 (if the queue hasn't been
- empty for a long time) and 100 (if
- the queue has been empty the last 10
- times we filled up. */
+ /*
+ * Score of how aggressive eviction should be about selecting eviction
+ * candidates. If eviction is struggling to make progress, this score
+ * rises (up to a maximum of 100), at which point the cache is "stuck"
+ * and transaction will be rolled back.
+ */
+ uint32_t evict_aggressive_score;
+
+ /*
+ * Score of how often LRU queues are empty on refill. This score varies
+ * between 0 (if the queue hasn't been empty for a long time) and 100
+ * (if the queue has been empty the last 10 times we filled up.
+ */
+ uint32_t evict_empty_score;
+
+ /*
+ * Score of how much pressure storing historical versions is having on
+ * eviction. This score varies between 0, if reconciliation always
+ * sees updates that are globally visible and hence can be discarded,
+ * to 100 if no updates are globally visible.
+ */
+ int32_t evict_lookaside_score;
/*
* Cache pool information.
diff --git a/src/include/cache.i b/src/include/cache.i
index 33b1bf2a7af..e160dbf4d64 100644
--- a/src/include/cache.i
+++ b/src/include/cache.i
@@ -79,22 +79,6 @@ __wt_cache_read_gen_new(WT_SESSION_IMPL *session, WT_PAGE *page)
}
/*
- * __wt_cache_nearly_stuck --
- * Indicate if the cache is nearly stuck.
- */
-static inline bool
-__wt_cache_nearly_stuck(WT_SESSION_IMPL *session)
-{
- WT_CACHE *cache;
-
- cache = S2C(session)->cache;
- return (cache->evict_aggressive_score >=
- (WT_EVICT_SCORE_MAX - WT_EVICT_SCORE_BUMP) &&
- F_ISSET(cache,
- WT_CACHE_EVICT_CLEAN_HARD | WT_CACHE_EVICT_DIRTY_HARD));
-}
-
-/*
* __wt_cache_stuck --
* Indicate if the cache is stuck (i.e., not making progress).
*/
@@ -205,6 +189,43 @@ __wt_cache_bytes_other(WT_CACHE *cache)
}
/*
+ * __wt_cache_lookaside_score --
+ * Get the current lookaside score (between 0 and 100).
+ */
+static inline uint32_t
+__wt_cache_lookaside_score(WT_CACHE *cache)
+{
+ int32_t global_score;
+
+ global_score = cache->evict_lookaside_score;
+ return ((uint32_t)WT_MIN(WT_MAX(global_score, 0), 100));
+}
+
+/*
+ * __wt_cache_update_lookaside_score --
+ * Update the lookaside score based how many unstable updates are seen.
+ */
+static inline void
+__wt_cache_update_lookaside_score(
+ WT_SESSION_IMPL *session, u_int updates_seen, u_int updates_unstable)
+{
+ WT_CACHE *cache;
+ int32_t global_score, score;
+
+ if (updates_seen == 0)
+ return;
+
+ cache = S2C(session)->cache;
+ score = (int32_t)((100 * updates_unstable) / updates_seen);
+ global_score = cache->evict_lookaside_score;
+
+ if (score > global_score && global_score < 100)
+ __wt_atomic_addi32(&cache->evict_lookaside_score, 1);
+ else if (score < global_score && global_score > 0)
+ __wt_atomic_subi32(&cache->evict_lookaside_score, 1);
+}
+
+/*
* __wt_session_can_wait --
* Return if a session available for a potentially slow operation.
*/
diff --git a/src/include/connection.h b/src/include/connection.h
index 2fa440e4e08..c1d1921bdcc 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -268,6 +268,15 @@ struct __wt_connection_impl {
uint64_t ckpt_time_recent; /* Checkpoint time recent/total */
uint64_t ckpt_time_total;
+ /* Checkpoint stats and verbosity timers */
+ struct timespec ckpt_timer_start;
+ struct timespec ckpt_timer_scrub_end;
+
+ /* Checkpoint progress message data */
+ uint64_t ckpt_progress_msg_count;
+ uint64_t ckpt_write_bytes;
+ uint64_t ckpt_write_pages;
+
uint32_t stat_flags; /* Options declared in flags.py */
/* Connection statistics */
diff --git a/src/include/extern.h b/src/include/extern.h
index fc0b5135882..bbe66abf753 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -207,6 +207,7 @@ extern int __wt_las_cursor_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRI
extern void __wt_las_cursor( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags);
extern int __wt_las_cursor_close( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_insert_block(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CURSOR *cursor, WT_MULTI *multi, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_remove_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern uint32_t __wt_checksum_sw(const void *chunk, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_checksum_init(void);
@@ -791,6 +792,7 @@ extern int __wt_txn_global_shutdown(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_A
extern int __wt_verbose_dump_txn_one(WT_SESSION_IMPL *session, WT_TXN *txn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_verbose_dump_txn(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void __wt_checkpoint_progress(WT_SESSION_IMPL *session, bool closing);
extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[], bool waiting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/include/flags.h b/src/include/flags.h
index dd98234f9e2..23be5fd2e14 100644
--- a/src/include/flags.h
+++ b/src/include/flags.h
@@ -93,34 +93,35 @@
#define WT_VERB_API 0x00000001
#define WT_VERB_BLOCK 0x00000002
#define WT_VERB_CHECKPOINT 0x00000004
-#define WT_VERB_COMPACT 0x00000008
-#define WT_VERB_EVICT 0x00000010
-#define WT_VERB_EVICTSERVER 0x00000020
-#define WT_VERB_EVICT_STUCK 0x00000040
-#define WT_VERB_FILEOPS 0x00000080
-#define WT_VERB_HANDLEOPS 0x00000100
-#define WT_VERB_LOG 0x00000200
-#define WT_VERB_LOOKASIDE 0x00000400
-#define WT_VERB_LSM 0x00000800
-#define WT_VERB_LSM_MANAGER 0x00001000
-#define WT_VERB_METADATA 0x00002000
-#define WT_VERB_MUTEX 0x00004000
-#define WT_VERB_OVERFLOW 0x00008000
-#define WT_VERB_READ 0x00010000
-#define WT_VERB_REBALANCE 0x00020000
-#define WT_VERB_RECONCILE 0x00040000
-#define WT_VERB_RECOVERY 0x00080000
-#define WT_VERB_RECOVERY_PROGRESS 0x00100000
-#define WT_VERB_SALVAGE 0x00200000
-#define WT_VERB_SHARED_CACHE 0x00400000
-#define WT_VERB_SPLIT 0x00800000
-#define WT_VERB_TEMPORARY 0x01000000
-#define WT_VERB_THREAD_GROUP 0x02000000
-#define WT_VERB_TIMESTAMP 0x04000000
-#define WT_VERB_TRANSACTION 0x08000000
-#define WT_VERB_VERIFY 0x10000000
-#define WT_VERB_VERSION 0x20000000
-#define WT_VERB_WRITE 0x40000000
+#define WT_VERB_CHECKPOINT_PROGRESS 0x00000008
+#define WT_VERB_COMPACT 0x00000010
+#define WT_VERB_EVICT 0x00000020
+#define WT_VERB_EVICTSERVER 0x00000040
+#define WT_VERB_EVICT_STUCK 0x00000080
+#define WT_VERB_FILEOPS 0x00000100
+#define WT_VERB_HANDLEOPS 0x00000200
+#define WT_VERB_LOG 0x00000400
+#define WT_VERB_LOOKASIDE 0x00000800
+#define WT_VERB_LSM 0x00001000
+#define WT_VERB_LSM_MANAGER 0x00002000
+#define WT_VERB_METADATA 0x00004000
+#define WT_VERB_MUTEX 0x00008000
+#define WT_VERB_OVERFLOW 0x00010000
+#define WT_VERB_READ 0x00020000
+#define WT_VERB_REBALANCE 0x00040000
+#define WT_VERB_RECONCILE 0x00080000
+#define WT_VERB_RECOVERY 0x00100000
+#define WT_VERB_RECOVERY_PROGRESS 0x00200000
+#define WT_VERB_SALVAGE 0x00400000
+#define WT_VERB_SHARED_CACHE 0x00800000
+#define WT_VERB_SPLIT 0x01000000
+#define WT_VERB_TEMPORARY 0x02000000
+#define WT_VERB_THREAD_GROUP 0x04000000
+#define WT_VERB_TIMESTAMP 0x08000000
+#define WT_VERB_TRANSACTION 0x10000000
+#define WT_VERB_VERIFY 0x20000000
+#define WT_VERB_VERSION 0x40000000
+#define WT_VERB_WRITE 0x80000000
/*
* flags section: END
* DO NOT EDIT: automatically built by dist/flags.py.
diff --git a/src/include/lint.h b/src/include/lint.h
index 97b91c4c061..c02e25592c0 100644
--- a/src/include/lint.h
+++ b/src/include/lint.h
@@ -84,8 +84,8 @@ __wt_atomic_cas_ptr(void *vp, void *orig, void *new) {
return (false);
}
-static inline void WT_BARRIER(void) { return; }
-static inline void WT_FULL_BARRIER(void) { return; }
-static inline void WT_PAUSE(void) { return; }
-static inline void WT_READ_BARRIER(void) { return; }
-static inline void WT_WRITE_BARRIER(void) { return; }
+static inline void WT_BARRIER(void) { }
+static inline void WT_FULL_BARRIER(void) { }
+static inline void WT_PAUSE(void) { }
+static inline void WT_READ_BARRIER(void) { }
+static inline void WT_WRITE_BARRIER(void) { }
diff --git a/src/include/misc.h b/src/include/misc.h
index e7b3e1931ac..2435d37ee20 100644
--- a/src/include/misc.h
+++ b/src/include/misc.h
@@ -275,8 +275,8 @@ typedef struct __wt_timestamp_t wt_timestamp_t;
#define WT_TIMESTAMP_NULL(x) (x)
#else
typedef void wt_timestamp_t;
-#define WT_TIMESTAMP_NULL(x) (NULL)
#define WT_DECL_TIMESTAMP(x)
+#define WT_TIMESTAMP_NULL(x) (NULL)
#endif
/*
diff --git a/src/include/stat.h b/src/include/stat.h
index 922b211bec4..12a7d532496 100644
--- a/src/include/stat.h
+++ b/src/include/stat.h
@@ -341,6 +341,7 @@ struct __wt_connection_stats {
int64_t cache_eviction_internal;
int64_t cache_eviction_split_internal;
int64_t cache_eviction_split_leaf;
+ int64_t cache_lookaside_score;
int64_t cache_lookaside_entries;
int64_t cache_lookaside_insert;
int64_t cache_lookaside_remove;
diff --git a/src/include/txn.i b/src/include/txn.i
index b0b71dbb3d0..9e70632d890 100644
--- a/src/include/txn.i
+++ b/src/include/txn.i
@@ -82,7 +82,9 @@ __wt_timestamp_set_zero(wt_timestamp_t *ts)
{
ts->val = 0;
}
-#else
+
+#else /* WT_TIMESTAMP_SIZE != 8 */
+
#define WT_WITH_TIMESTAMP_READLOCK(s, l, e) do { \
__wt_readlock((s), (l)); \
e; \
@@ -141,6 +143,16 @@ __wt_timestamp_set_zero(wt_timestamp_t *ts)
memset(ts->ts, 0x00, WT_TIMESTAMP_SIZE);
}
#endif /* WT_TIMESTAMP_SIZE == 8 */
+
+#else /* !HAVE_TIMESTAMPS */
+
+#define __wt_timestamp_set(dest, src)
+#define __wt_timestamp_set_inf(ts)
+#define __wt_timestamp_set_zero(ts)
+#define __wt_txn_clear_commit_timestamp(session)
+#define __wt_txn_clear_read_timestamp(session)
+#define __wt_txn_timestamp_flags(session)
+
#endif /* HAVE_TIMESTAMPS */
/*
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index b9801aedfbb..41dd970d3ba 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -2201,11 +2201,11 @@ struct __wt_connection {
* if WiredTiger is configured with --enable-verbose. Options are given
* as a list\, such as <code>"verbose=[evictserver\,read]"</code>., a
* list\, with values chosen from the following options: \c "api"\, \c
- * "block"\, \c "checkpoint"\, \c "compact"\, \c "evict"\, \c
- * "evict_stuck"\, \c "evictserver"\, \c "fileops"\, \c "handleops"\, \c
- * "log"\, \c "lookaside_activity"\, \c "lsm"\, \c "lsm_manager"\, \c
- * "metadata"\, \c "mutex"\, \c "overflow"\, \c "read"\, \c
- * "rebalance"\, \c "reconcile"\, \c "recovery"\, \c
+ * "block"\, \c "checkpoint"\, \c "checkpoint_progress"\, \c "compact"\,
+ * \c "evict"\, \c "evict_stuck"\, \c "evictserver"\, \c "fileops"\, \c
+ * "handleops"\, \c "log"\, \c "lookaside_activity"\, \c "lsm"\, \c
+ * "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c
+ * "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c
* "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c "split"\,
* \c "temporary"\, \c "thread_group"\, \c "timestamp"\, \c
* "transaction"\, \c "verify"\, \c "version"\, \c "write"; default
@@ -2841,9 +2841,9 @@ struct __wt_connection {
* WiredTiger is configured with --enable-verbose. Options are given as a
* list\, such as <code>"verbose=[evictserver\,read]"</code>., a list\, with
* values chosen from the following options: \c "api"\, \c "block"\, \c
- * "checkpoint"\, \c "compact"\, \c "evict"\, \c "evict_stuck"\, \c
- * "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\, \c
- * "lookaside_activity"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c
+ * "checkpoint"\, \c "checkpoint_progress"\, \c "compact"\, \c "evict"\, \c
+ * "evict_stuck"\, \c "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\,
+ * \c "lookaside_activity"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c
* "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c
* "recovery"\, \c "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c
* "split"\, \c "temporary"\, \c "thread_group"\, \c "timestamp"\, \c
@@ -4854,456 +4854,458 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1082
/*! cache: leaf pages split during eviction */
#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1083
+/*! cache: lookaside score */
+#define WT_STAT_CONN_CACHE_LOOKASIDE_SCORE 1084
/*! cache: lookaside table entries */
-#define WT_STAT_CONN_CACHE_LOOKASIDE_ENTRIES 1084
+#define WT_STAT_CONN_CACHE_LOOKASIDE_ENTRIES 1085
/*! cache: lookaside table insert calls */
-#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1085
+#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1086
/*! cache: lookaside table remove calls */
-#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1086
+#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1087
/*! cache: maximum bytes configured */
-#define WT_STAT_CONN_CACHE_BYTES_MAX 1087
+#define WT_STAT_CONN_CACHE_BYTES_MAX 1088
/*! cache: maximum page size at eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1088
+#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1089
/*! cache: modified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1089
+#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1090
/*! cache: modified pages evicted by application threads */
-#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1090
+#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1091
/*! cache: overflow pages read into cache */
-#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1091
+#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1092
/*! cache: page split during eviction deepened the tree */
-#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1092
+#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1093
/*! cache: page written requiring lookaside records */
-#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1093
+#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1094
/*! cache: pages currently held in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_INUSE 1094
+#define WT_STAT_CONN_CACHE_PAGES_INUSE 1095
/*! cache: pages evicted because they exceeded the in-memory maximum count */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1095
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1096
/*!
* cache: pages evicted because they exceeded the in-memory maximum time
* (usecs)
*/
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_TIME 1096
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_TIME 1097
/*! cache: pages evicted because they had chains of deleted items count */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1097
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1098
/*!
* cache: pages evicted because they had chains of deleted items time
* (usecs)
*/
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE_TIME 1098
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE_TIME 1099
/*! cache: pages evicted by application threads */
-#define WT_STAT_CONN_CACHE_EVICTION_APP 1099
+#define WT_STAT_CONN_CACHE_EVICTION_APP 1100
/*! cache: pages queued for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1100
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1101
/*! cache: pages queued for urgent eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1101
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1102
/*! cache: pages queued for urgent eviction during walk */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1102
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1103
/*! cache: pages read into cache */
-#define WT_STAT_CONN_CACHE_READ 1103
+#define WT_STAT_CONN_CACHE_READ 1104
/*! cache: pages read into cache requiring lookaside entries */
-#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1104
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1105
/*! cache: pages requested from the cache */
-#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1105
+#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1106
/*! cache: pages seen by eviction walk */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1106
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1107
/*! cache: pages selected for eviction unable to be evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1107
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1108
/*! cache: pages walked for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK 1108
+#define WT_STAT_CONN_CACHE_EVICTION_WALK 1109
/*! cache: pages written from cache */
-#define WT_STAT_CONN_CACHE_WRITE 1109
+#define WT_STAT_CONN_CACHE_WRITE 1110
/*! cache: pages written requiring in-memory restoration */
-#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1110
+#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1111
/*! cache: percentage overhead */
-#define WT_STAT_CONN_CACHE_OVERHEAD 1111
+#define WT_STAT_CONN_CACHE_OVERHEAD 1112
/*! cache: tracked bytes belonging to internal pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1112
+#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1113
/*! cache: tracked bytes belonging to leaf pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_LEAF 1113
+#define WT_STAT_CONN_CACHE_BYTES_LEAF 1114
/*! cache: tracked dirty bytes in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1114
+#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1115
/*! cache: tracked dirty pages in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1115
+#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1116
/*! cache: unmodified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1116
+#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1117
/*! connection: auto adjusting condition resets */
-#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1117
+#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1118
/*! connection: auto adjusting condition wait calls */
-#define WT_STAT_CONN_COND_AUTO_WAIT 1118
+#define WT_STAT_CONN_COND_AUTO_WAIT 1119
/*! connection: detected system time went backwards */
-#define WT_STAT_CONN_TIME_TRAVEL 1119
+#define WT_STAT_CONN_TIME_TRAVEL 1120
/*! connection: files currently open */
-#define WT_STAT_CONN_FILE_OPEN 1120
+#define WT_STAT_CONN_FILE_OPEN 1121
/*! connection: memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 1121
+#define WT_STAT_CONN_MEMORY_ALLOCATION 1122
/*! connection: memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 1122
+#define WT_STAT_CONN_MEMORY_FREE 1123
/*! connection: memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 1123
+#define WT_STAT_CONN_MEMORY_GROW 1124
/*! connection: pthread mutex condition wait calls */
-#define WT_STAT_CONN_COND_WAIT 1124
+#define WT_STAT_CONN_COND_WAIT 1125
/*! connection: pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 1125
+#define WT_STAT_CONN_RWLOCK_READ 1126
/*! connection: pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 1126
+#define WT_STAT_CONN_RWLOCK_WRITE 1127
/*! connection: total fsync I/Os */
-#define WT_STAT_CONN_FSYNC_IO 1127
+#define WT_STAT_CONN_FSYNC_IO 1128
/*! connection: total read I/Os */
-#define WT_STAT_CONN_READ_IO 1128
+#define WT_STAT_CONN_READ_IO 1129
/*! connection: total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1129
+#define WT_STAT_CONN_WRITE_IO 1130
/*! cursor: cursor create calls */
-#define WT_STAT_CONN_CURSOR_CREATE 1130
+#define WT_STAT_CONN_CURSOR_CREATE 1131
/*! cursor: cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT 1131
+#define WT_STAT_CONN_CURSOR_INSERT 1132
/*! cursor: cursor modify calls */
-#define WT_STAT_CONN_CURSOR_MODIFY 1132
+#define WT_STAT_CONN_CURSOR_MODIFY 1133
/*! cursor: cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT 1133
+#define WT_STAT_CONN_CURSOR_NEXT 1134
/*! cursor: cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV 1134
+#define WT_STAT_CONN_CURSOR_PREV 1135
/*! cursor: cursor remove calls */
-#define WT_STAT_CONN_CURSOR_REMOVE 1135
+#define WT_STAT_CONN_CURSOR_REMOVE 1136
/*! cursor: cursor reserve calls */
-#define WT_STAT_CONN_CURSOR_RESERVE 1136
+#define WT_STAT_CONN_CURSOR_RESERVE 1137
/*! cursor: cursor reset calls */
-#define WT_STAT_CONN_CURSOR_RESET 1137
+#define WT_STAT_CONN_CURSOR_RESET 1138
/*! cursor: cursor restarted searches */
-#define WT_STAT_CONN_CURSOR_RESTART 1138
+#define WT_STAT_CONN_CURSOR_RESTART 1139
/*! cursor: cursor search calls */
-#define WT_STAT_CONN_CURSOR_SEARCH 1139
+#define WT_STAT_CONN_CURSOR_SEARCH 1140
/*! cursor: cursor search near calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1140
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1141
/*! cursor: cursor update calls */
-#define WT_STAT_CONN_CURSOR_UPDATE 1141
+#define WT_STAT_CONN_CURSOR_UPDATE 1142
/*! cursor: truncate calls */
-#define WT_STAT_CONN_CURSOR_TRUNCATE 1142
+#define WT_STAT_CONN_CURSOR_TRUNCATE 1143
/*! data-handle: connection data handles currently active */
-#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1143
+#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1144
/*! data-handle: connection sweep candidate became referenced */
-#define WT_STAT_CONN_DH_SWEEP_REF 1144
+#define WT_STAT_CONN_DH_SWEEP_REF 1145
/*! data-handle: connection sweep dhandles closed */
-#define WT_STAT_CONN_DH_SWEEP_CLOSE 1145
+#define WT_STAT_CONN_DH_SWEEP_CLOSE 1146
/*! data-handle: connection sweep dhandles removed from hash list */
-#define WT_STAT_CONN_DH_SWEEP_REMOVE 1146
+#define WT_STAT_CONN_DH_SWEEP_REMOVE 1147
/*! data-handle: connection sweep time-of-death sets */
-#define WT_STAT_CONN_DH_SWEEP_TOD 1147
+#define WT_STAT_CONN_DH_SWEEP_TOD 1148
/*! data-handle: connection sweeps */
-#define WT_STAT_CONN_DH_SWEEPS 1148
+#define WT_STAT_CONN_DH_SWEEPS 1149
/*! data-handle: session dhandles swept */
-#define WT_STAT_CONN_DH_SESSION_HANDLES 1149
+#define WT_STAT_CONN_DH_SESSION_HANDLES 1150
/*! data-handle: session sweep attempts */
-#define WT_STAT_CONN_DH_SESSION_SWEEPS 1150
+#define WT_STAT_CONN_DH_SESSION_SWEEPS 1151
/*! lock: checkpoint lock acquisitions */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1151
+#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1152
/*! lock: checkpoint lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1152
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1153
/*! lock: checkpoint lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1153
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1154
/*!
* lock: dhandle lock application thread time waiting for the dhandle
* lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1154
+#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1155
/*!
* lock: dhandle lock internal thread time waiting for the dhandle lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1155
+#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1156
/*! lock: dhandle read lock acquisitions */
-#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1156
+#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1157
/*! lock: dhandle write lock acquisitions */
-#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1157
+#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1158
/*! lock: metadata lock acquisitions */
-#define WT_STAT_CONN_LOCK_METADATA_COUNT 1158
+#define WT_STAT_CONN_LOCK_METADATA_COUNT 1159
/*! lock: metadata lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1159
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1160
/*! lock: metadata lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1160
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1161
/*! lock: schema lock acquisitions */
-#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1161
+#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1162
/*! lock: schema lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1162
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1163
/*! lock: schema lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1163
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1164
/*!
* lock: table lock application thread time waiting for the table lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1164
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1165
/*!
* lock: table lock internal thread time waiting for the table lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1165
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1166
/*! lock: table read lock acquisitions */
-#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1166
+#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1167
/*! lock: table write lock acquisitions */
-#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1167
+#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1168
/*! log: busy returns attempting to switch slots */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1168
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1169
/*! log: force checkpoint calls slept */
-#define WT_STAT_CONN_LOG_FORCE_CKPT_SLEEP 1169
+#define WT_STAT_CONN_LOG_FORCE_CKPT_SLEEP 1170
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1170
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1171
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1171
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1172
/*! log: log files manually zero-filled */
-#define WT_STAT_CONN_LOG_ZERO_FILLS 1172
+#define WT_STAT_CONN_LOG_ZERO_FILLS 1173
/*! log: log flush operations */
-#define WT_STAT_CONN_LOG_FLUSH 1173
+#define WT_STAT_CONN_LOG_FLUSH 1174
/*! log: log force write operations */
-#define WT_STAT_CONN_LOG_FORCE_WRITE 1174
+#define WT_STAT_CONN_LOG_FORCE_WRITE 1175
/*! log: log force write operations skipped */
-#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1175
+#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1176
/*! log: log records compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1176
+#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1177
/*! log: log records not compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1177
+#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1178
/*! log: log records too small to compress */
-#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1178
+#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1179
/*! log: log release advances write LSN */
-#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1179
+#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1180
/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1180
+#define WT_STAT_CONN_LOG_SCANS 1181
/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1181
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1182
/*! log: log server thread advances write LSN */
-#define WT_STAT_CONN_LOG_WRITE_LSN 1182
+#define WT_STAT_CONN_LOG_WRITE_LSN 1183
/*! log: log server thread write LSN walk skipped */
-#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1183
+#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1184
/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1184
+#define WT_STAT_CONN_LOG_SYNC 1185
/*! log: log sync time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DURATION 1185
+#define WT_STAT_CONN_LOG_SYNC_DURATION 1186
/*! log: log sync_dir operations */
-#define WT_STAT_CONN_LOG_SYNC_DIR 1186
+#define WT_STAT_CONN_LOG_SYNC_DIR 1187
/*! log: log sync_dir time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1187
+#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1188
/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1188
+#define WT_STAT_CONN_LOG_WRITES 1189
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1189
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1190
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1190
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1191
/*! log: number of pre-allocated log files to create */
-#define WT_STAT_CONN_LOG_PREALLOC_MAX 1191
+#define WT_STAT_CONN_LOG_PREALLOC_MAX 1192
/*! log: pre-allocated log files not ready and missed */
-#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1192
+#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1193
/*! log: pre-allocated log files prepared */
-#define WT_STAT_CONN_LOG_PREALLOC_FILES 1193
+#define WT_STAT_CONN_LOG_PREALLOC_FILES 1194
/*! log: pre-allocated log files used */
-#define WT_STAT_CONN_LOG_PREALLOC_USED 1194
+#define WT_STAT_CONN_LOG_PREALLOC_USED 1195
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1195
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1196
/*! log: slot close lost race */
-#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1196
+#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1197
/*! log: slot close unbuffered waits */
-#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1197
+#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1198
/*! log: slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1198
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1199
/*! log: slot join atomic update races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1199
+#define WT_STAT_CONN_LOG_SLOT_RACES 1200
/*! log: slot join calls atomic updates raced */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1200
+#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1201
/*! log: slot join calls did not yield */
-#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1201
+#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1202
/*! log: slot join calls found active slot closed */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1202
+#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1203
/*! log: slot join calls slept */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1203
+#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1204
/*! log: slot join calls yielded */
-#define WT_STAT_CONN_LOG_SLOT_YIELD 1204
+#define WT_STAT_CONN_LOG_SLOT_YIELD 1205
/*! log: slot join found active slot closed */
-#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1205
+#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1206
/*! log: slot joins yield time (usecs) */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1206
+#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1207
/*! log: slot transitions unable to find free slot */
-#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1207
+#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1208
/*! log: slot unbuffered writes */
-#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1208
+#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1209
/*! log: total in-memory size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_MEM 1209
+#define WT_STAT_CONN_LOG_COMPRESS_MEM 1210
/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1210
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1211
/*! log: total size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_LEN 1211
+#define WT_STAT_CONN_LOG_COMPRESS_LEN 1212
/*! log: written slots coalesced */
-#define WT_STAT_CONN_LOG_SLOT_COALESCED 1212
+#define WT_STAT_CONN_LOG_SLOT_COALESCED 1213
/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1213
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1214
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1214
+#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1215
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1215
+#define WT_STAT_CONN_REC_PAGES 1216
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1216
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1217
/*! reconciliation: pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE 1217
+#define WT_STAT_CONN_REC_PAGE_DELETE 1218
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1218
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1219
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1219
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1220
/*! session: open cursor count */
-#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1220
+#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1221
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1221
+#define WT_STAT_CONN_SESSION_OPEN 1222
/*! session: table alter failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1222
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1223
/*! session: table alter successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1223
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1224
/*! session: table alter unchanged and skipped */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1224
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1225
/*! session: table compact failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1225
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1226
/*! session: table compact successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1226
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1227
/*! session: table create failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1227
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1228
/*! session: table create successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1228
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1229
/*! session: table drop failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1229
+#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1230
/*! session: table drop successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1230
+#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1231
/*! session: table rebalance failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1231
+#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1232
/*! session: table rebalance successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1232
+#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1233
/*! session: table rename failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1233
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1234
/*! session: table rename successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1234
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1235
/*! session: table salvage failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1235
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1236
/*! session: table salvage successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1236
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1237
/*! session: table truncate failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1237
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1238
/*! session: table truncate successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1238
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1239
/*! session: table verify failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1239
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1240
/*! session: table verify successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1240
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1241
/*! thread-state: active filesystem fsync calls */
-#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1241
+#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1242
/*! thread-state: active filesystem read calls */
-#define WT_STAT_CONN_THREAD_READ_ACTIVE 1242
+#define WT_STAT_CONN_THREAD_READ_ACTIVE 1243
/*! thread-state: active filesystem write calls */
-#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1243
+#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1244
/*! thread-yield: application thread time evicting (usecs) */
-#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1244
+#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1245
/*! thread-yield: application thread time waiting for cache (usecs) */
-#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1245
+#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1246
/*!
* thread-yield: connection close blocked waiting for transaction state
* stabilization
*/
-#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1246
+#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1247
/*! thread-yield: connection close yielded for lsm manager shutdown */
-#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1247
+#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1248
/*! thread-yield: data handle lock yielded */
-#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1248
+#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1249
/*!
* thread-yield: get reference for page index and slot time sleeping
* (usecs)
*/
-#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1249
+#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1250
/*! thread-yield: log server sync yielded for log write */
-#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1250
+#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1251
/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1251
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1252
/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1252
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1253
/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1253
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1254
/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1254
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1255
/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1255
+#define WT_STAT_CONN_PAGE_SLEEP 1256
/*!
* thread-yield: page delete rollback time sleeping for state change
* (usecs)
*/
-#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1256
+#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1257
/*! thread-yield: page reconciliation yielded due to child modification */
-#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1257
+#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1258
/*!
* thread-yield: tree descend one level yielded for split page index
* update
*/
-#define WT_STAT_CONN_TREE_DESCEND_BLOCKED 1258
+#define WT_STAT_CONN_TREE_DESCEND_BLOCKED 1259
/*! transaction: number of named snapshots created */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1259
+#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1260
/*! transaction: number of named snapshots dropped */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1260
+#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1261
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1261
+#define WT_STAT_CONN_TXN_BEGIN 1262
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1262
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1263
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1263
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1264
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1264
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1265
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1265
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1266
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1266
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1267
/*! transaction: transaction checkpoint scrub dirty target */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1267
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1268
/*! transaction: transaction checkpoint scrub time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1268
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1269
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1269
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1270
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1270
+#define WT_STAT_CONN_TXN_CHECKPOINT 1271
/*!
* transaction: transaction checkpoints skipped because database was
* clean
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1271
+#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1272
/*! transaction: transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1272
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1273
/*!
* transaction: transaction fsync calls for checkpoint after allocating
* the transaction ID
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1273
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1274
/*!
* transaction: transaction fsync duration for checkpoint after
* allocating the transaction ID (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1274
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1275
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1275
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1276
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1276
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1277
/*!
* transaction: transaction range of IDs currently pinned by named
* snapshots
*/
-#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1277
+#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1278
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1278
+#define WT_STAT_CONN_TXN_SYNC 1279
/*! transaction: transactions commit timestamp queue inserts to head */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_HEAD 1279
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_HEAD 1280
/*! transaction: transactions commit timestamp queue inserts total */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS 1280
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS 1281
/*! transaction: transactions commit timestamp queue length */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN 1281
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN 1282
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1282
+#define WT_STAT_CONN_TXN_COMMIT 1283
/*! transaction: transactions read timestamp queue inserts to head */
-#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1283
+#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1284
/*! transaction: transactions read timestamp queue inserts total */
-#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1284
+#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1285
/*! transaction: transactions read timestamp queue length */
-#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1285
+#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1286
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1286
+#define WT_STAT_CONN_TXN_ROLLBACK 1287
/*! transaction: update conflicts */
-#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1287
+#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1288
/*!
* @}
diff --git a/src/log/log.c b/src/log/log.c
index 0b01b61ced3..4c32b9b02ed 100644
--- a/src/log/log.c
+++ b/src/log/log.c
@@ -1914,11 +1914,6 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags,
if (func == NULL)
return (0);
- if (LF_ISSET(WT_LOGSCAN_RECOVER))
- __wt_verbose(session, WT_VERB_LOG,
- "__wt_log_scan truncating to %" PRIu32 "/%" PRIu32,
- log->trunc_lsn.l.file, log->trunc_lsn.l.offset);
-
if (lsnp != NULL &&
LF_ISSET(WT_LOGSCAN_FIRST|WT_LOGSCAN_FROM_CKP))
WT_RET_MSG(session, WT_ERROR,
@@ -2042,8 +2037,13 @@ advance:
/*
* Truncate this log file before we move to the next.
*/
- if (LF_ISSET(WT_LOGSCAN_RECOVER))
+ if (LF_ISSET(WT_LOGSCAN_RECOVER) &&
+ __wt_log_cmp(&rd_lsn, &log->trunc_lsn) < 0) {
+ __wt_verbose(session, WT_VERB_LOG,
+ "Truncate end of log %" PRIu32 "/%" PRIu32,
+ rd_lsn.l.file, rd_lsn.l.offset);
WT_ERR(__log_truncate(session, &rd_lsn, true));
+ }
/*
* If we had a partial record, we'll want to break
* now after closing and truncating. Although for now
@@ -2228,7 +2228,7 @@ advance:
if (LF_ISSET(WT_LOGSCAN_RECOVER) &&
__wt_log_cmp(&rd_lsn, &log->trunc_lsn) < 0) {
__wt_verbose(session, WT_VERB_LOG,
- "__wt_log_scan truncating to %" PRIu32 "/%" PRIu32,
+ "End of recovery truncate end of log %" PRIu32 "/%" PRIu32,
rd_lsn.l.file, rd_lsn.l.offset);
WT_ERR(__log_truncate(session, &rd_lsn, false));
}
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
index 95d025247a6..6195726ec67 100644
--- a/src/lsm/lsm_tree.c
+++ b/src/lsm/lsm_tree.c
@@ -1280,7 +1280,14 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
} else
break;
}
+
+ /*
+ * Periodically check if we've timed out or eviction is stuck.
+ * Quit if eviction is stuck, we're making the problem worse.
+ */
WT_ERR(__wt_session_compact_check_timeout(session));
+ if (__wt_cache_stuck(session))
+ WT_ERR(EBUSY);
__wt_sleep(1, 0);
/*
diff --git a/src/meta/meta_track.c b/src/meta/meta_track.c
index 0757b96f587..0ad9a2aa429 100644
--- a/src/meta/meta_track.c
+++ b/src/meta/meta_track.c
@@ -167,12 +167,18 @@ __meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk)
static int
__meta_track_unroll(WT_SESSION_IMPL *session, WT_META_TRACK *trk)
{
+ WT_BM *bm;
+ WT_BTREE *btree;
WT_DECL_RET;
switch (trk->op) {
case WT_ST_EMPTY: /* Unused slot */
break;
case WT_ST_CHECKPOINT: /* Checkpoint, see above */
+ btree = trk->dhandle->handle;
+ bm = btree->bm;
+ WT_WITH_DHANDLE(session, trk->dhandle,
+ ret = bm->checkpoint_resolve(bm, session, true));
break;
case WT_ST_DROP_COMMIT:
break;
@@ -233,6 +239,9 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll)
WT_DECL_RET;
WT_META_TRACK *trk, *trk_orig;
WT_SESSION_IMPL *ckpt_session;
+ int saved_ret;
+
+ saved_ret = 0;
WT_ASSERT(session,
WT_META_TRACKING(session) && session->meta_track_nest > 0);
@@ -255,12 +264,9 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll)
if (trk == trk_orig)
return (0);
- if (unroll) {
- while (--trk >= trk_orig)
- WT_TRET(__meta_track_unroll(session, trk));
- /* Unroll operations don't need to flush the metadata. */
- return (ret);
- }
+ /* Unrolling doesn't require syncing the metadata. */
+ if (unroll)
+ goto done;
/*
* If we don't have the metadata cursor (e.g, we're in the process of
@@ -271,13 +277,12 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll)
goto done;
/* If we're logging, make sure the metadata update was flushed. */
- if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED)) {
+ if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED))
WT_WITH_DHANDLE(session,
WT_SESSION_META_DHANDLE(session),
ret = __wt_txn_checkpoint_log(
- session, false, WT_TXN_LOG_CKPT_SYNC, NULL));
- WT_RET(ret);
- } else {
+ session, false, WT_TXN_LOG_CKPT_SYNC, NULL));
+ else {
WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
ckpt_session = S2C(session)->meta_ckpt_session;
/*
@@ -288,21 +293,32 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll)
F_SET(ckpt_session, WT_SESSION_LOCKED_METADATA);
WT_WITH_METADATA_LOCK(session,
WT_WITH_DHANDLE(ckpt_session,
- WT_SESSION_META_DHANDLE(session),
- ret = __wt_checkpoint(ckpt_session, NULL)));
+ WT_SESSION_META_DHANDLE(session),
+ ret = __wt_checkpoint(ckpt_session, NULL)));
F_CLR(ckpt_session, WT_SESSION_LOCKED_METADATA);
ckpt_session->txn.id = WT_TXN_NONE;
- WT_RET(ret);
- WT_WITH_DHANDLE(session,
- WT_SESSION_META_DHANDLE(session),
- ret = __wt_checkpoint_sync(session, NULL));
- WT_RET(ret);
+ if (ret == 0)
+ WT_WITH_DHANDLE(session,
+ WT_SESSION_META_DHANDLE(session),
+ ret = __wt_checkpoint_sync(session, NULL));
}
-done: /* Apply any tracked operations post-commit. */
- for (; trk_orig < trk; trk_orig++)
- WT_TRET(__meta_track_apply(session, trk_orig));
- return (ret);
+done: /*
+ * Undo any tracked operations on failure.
+ * Apply any tracked operations post-commit.
+ */
+ if (unroll || ret != 0) {
+ saved_ret = ret;
+ ret = 0;
+ while (--trk >= trk_orig)
+ WT_TRET(__meta_track_unroll(session, trk));
+ } else
+ for (; trk_orig < trk; trk_orig++)
+ WT_TRET(__meta_track_apply(session, trk_orig));
+ if (ret != 0)
+ WT_PANIC_RET(session, ret,
+ "failed to apply or unroll all tracked operations");
+ return (saved_ret == 0 ? 0 : saved_ret);
}
/*
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index 108d9cf15f9..3e857fef324 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -39,17 +39,21 @@ typedef struct {
uint64_t orig_txn_checkpoint_gen;
/*
- * Track the oldest running transaction and the stable timestamp when
- * reconciliation starts.
+ * Track the oldest running transaction and whether to skew lookaside
+ * to the newest or oldest update.
*/
+ bool las_skew_oldest;
uint64_t last_running;
- WT_DECL_TIMESTAMP(stable_timestamp)
/* Track the page's min/maximum transactions. */
uint64_t max_txn;
WT_DECL_TIMESTAMP(max_timestamp)
+ WT_DECL_TIMESTAMP(max_onpage_timestamp)
WT_DECL_TIMESTAMP(min_saved_timestamp)
+ u_int updates_seen; /* Count of updates seen. */
+ u_int updates_unstable; /* Count of updates not visible_all. */
+
bool update_uncommitted; /* An update was uncommitted */
bool update_used; /* An update could be used */
@@ -378,9 +382,16 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
* Otherwise we would need to keep updates in memory that go back older
* than the version in the disk image, and since modify operations
* aren't idempotent, that is problematic.
+ *
+ * If we try to do eviction using transaction visibility, we had better
+ * have a snapshot. This doesn't apply to checkpoints: there are
+ * (rare) cases where we write data at read-uncommitted isolation.
*/
WT_ASSERT(session, !LF_ISSET(WT_REC_UPDATE_RESTORE) ||
LF_ISSET(WT_REC_VISIBLE_ALL));
+ WT_ASSERT(session, !LF_ISSET(WT_REC_EVICT) ||
+ LF_ISSET(WT_REC_VISIBLE_ALL) ||
+ F_ISSET(&session->txn, WT_TXN_HAS_SNAPSHOT));
/* We shouldn't get called with a clean page, that's an error. */
WT_ASSERT(session, __wt_page_is_modified(page));
@@ -443,6 +454,15 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
WT_ILLEGAL_VALUE_SET(session);
}
+ /*
+ * Update the global lookaside score. Only use observations during
+ * eviction, not checkpoints and don't count eviction of the lookaside
+ * table itself.
+ */
+ if (F_ISSET(r, WT_REC_EVICT) && !F_ISSET(btree, WT_BTREE_LOOKASIDE))
+ __wt_cache_update_lookaside_score(
+ session, r->updates_seen, r->updates_unstable);
+
/* Check for a successful reconciliation. */
WT_TRET(__rec_write_check_complete(session, r, ret, lookaside_retryp));
@@ -675,16 +695,14 @@ __rec_write_page_status(WT_SESSION_IMPL *session, WT_RECONCILE *r)
*/
WT_ASSERT(session,
!F_ISSET(r, WT_REC_EVICT) ||
- F_ISSET(r, WT_REC_UPDATE_RESTORE));
+ F_ISSET(r, WT_REC_LOOKASIDE | WT_REC_UPDATE_RESTORE));
} else {
/*
* Track the page's maximum transaction ID (used to decide if
* we're likely to be able to evict this page in the future).
*/
mod->rec_max_txn = r->max_txn;
-#ifdef HAVE_TIMESTAMPS
__wt_timestamp_set(&mod->rec_max_timestamp, &r->max_timestamp);
-#endif
/*
* Track the tree's maximum transaction ID (used to decide if
@@ -912,12 +930,12 @@ __rec_init(WT_SESSION_IMPL *session,
* uncommitted.
*/
txn_global = &S2C(session)->txn_global;
+ if (__wt_btree_immediately_durable(session))
+ r->las_skew_oldest = false;
+ else
+ WT_ORDERED_READ(r->las_skew_oldest,
+ txn_global->has_stable_timestamp);
WT_ORDERED_READ(r->last_running, txn_global->last_running);
-#ifdef HAVE_TIMESTAMPS
- WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
- __wt_timestamp_set(
- &r->stable_timestamp, &txn_global->stable_timestamp));
-#endif
/*
* When operating on the lookaside table, we should never try
@@ -957,12 +975,12 @@ __rec_init(WT_SESSION_IMPL *session,
/* Track the page's min/maximum transaction */
r->max_txn = WT_TXN_NONE;
-#ifdef HAVE_TIMESTAMPS
__wt_timestamp_set_zero(&r->max_timestamp);
+ __wt_timestamp_set_zero(&r->max_onpage_timestamp);
__wt_timestamp_set_inf(&r->min_saved_timestamp);
-#endif
/* Track if updates were used and/or uncommitted. */
+ r->updates_seen = r->updates_unstable = 0;
r->update_uncommitted = r->update_used = false;
/* Track if the page can be marked clean. */
@@ -1248,6 +1266,9 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
if ((txnid = upd->txnid) == WT_TXN_ABORTED)
continue;
+ ++r->updates_seen;
+ upd_memsize += WT_UPDATE_MEMSIZE(upd);
+
/*
* Track the first update in the chain that is not aborted and
* the maximum transaction ID.
@@ -1266,10 +1287,20 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
* concurrent transaction commits or rolls back while we are
* examining its updates.
*/
- if (WT_TXNID_LE(r->last_running, txnid))
+ if (F_ISSET(r, WT_REC_EVICT) &&
+ (F_ISSET(r, WT_REC_VISIBLE_ALL) ?
+ WT_TXNID_LE(r->last_running, txnid) :
+ !__txn_visible_id(session, txnid))) {
uncommitted = r->update_uncommitted = true;
+ continue;
+ }
- upd_memsize += WT_UPDATE_MEMSIZE(upd);
+#ifdef HAVE_TIMESTAMPS
+ /* Track the first update with non-zero timestamp. */
+ if (first_ts_upd == NULL &&
+ !__wt_timestamp_iszero(&upd->timestamp))
+ first_ts_upd = upd;
+#endif
/*
* Find the first update we can use.
@@ -1278,10 +1309,27 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
* uncommitted updates). Lookaside eviction can save any
* committed update. Regular eviction checks that the maximum
* transaction ID and timestamp seen are stable.
+ *
+ * Lookaside eviction tries to choose the same version as a
+ * subsequent checkpoint, so that checkpoint can skip over
+ * pages with lookaside entries. If the application has
+ * supplied a stable timestamp, we assume (a) that it is old,
+ * and (b) that the next checkpoint will use it, so we wait to
+ * see a stable update. If there is no stable timestamp, we
+ * assume the next checkpoint will write the most recent
+ * version (but we save enough information that checkpoint can
+ * fix things up if we choose an update that is too new).
*/
+ if (*updp == NULL && F_ISSET(r, WT_REC_LOOKASIDE) &&
+ F_ISSET(r, WT_REC_VISIBLE_ALL) && !r->las_skew_oldest)
+ *updp = upd;
+
if (F_ISSET(r, WT_REC_VISIBLE_ALL) ?
!__wt_txn_upd_visible_all(session, upd) :
!__wt_txn_upd_visible(session, upd)) {
+ if (F_ISSET(r, WT_REC_EVICT))
+ ++r->updates_unstable;
+
/*
* Rare case: when applications run at low isolation
* levels, update/restore eviction may see a stable
@@ -1291,21 +1339,21 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
* discard an uncommitted update.
*/
if (F_ISSET(r, WT_REC_UPDATE_RESTORE) &&
- *updp != NULL && uncommitted)
+ *updp != NULL && uncommitted) {
+ r->leave_dirty = true;
return (EBUSY);
+ }
continue;
}
+ /*
+ * Lookaside without stable timestamp was taken care of above
+ * (set to the first uncommitted transaction. Lookaside with
+ * stable timestamp always takes the first stable update.
+ */
if (*updp == NULL)
*updp = upd;
-
-#ifdef HAVE_TIMESTAMPS
- /* Track the first update with non-zero timestamp. */
- if (first_ts_upd == NULL &&
- !__wt_timestamp_iszero(&upd->timestamp))
- first_ts_upd = upd;
-#endif
}
/* Reconciliation should never see an aborted or reserved update. */
@@ -1360,9 +1408,9 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
#else
timestampp = NULL;
#endif
- all_visible = *updp == first_txn_upd &&
+ all_visible = *updp == first_txn_upd && !uncommitted &&
(F_ISSET(r, WT_REC_VISIBLE_ALL) ?
- !uncommitted && __wt_txn_visible_all(session, max_txn, timestampp) :
+ __wt_txn_visible_all(session, max_txn, timestampp) :
__wt_txn_visible(session, max_txn, timestampp));
if (all_visible)
@@ -1371,8 +1419,8 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
if (F_ISSET(r, WT_REC_VISIBILITY_ERR))
WT_PANIC_RET(session, EINVAL,
"reconciliation error, update not visible");
- if (!F_ISSET(r, WT_REC_LOOKASIDE))
- r->leave_dirty = true;
+
+ r->leave_dirty = true;
/*
* If not trying to evict the page, we know what we'll write and we're
@@ -1409,16 +1457,21 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
#ifdef HAVE_TIMESTAMPS
/* Track the oldest saved timestamp for lookaside. */
- if (first_ts_upd == NULL)
- __wt_timestamp_set_zero(&r->min_saved_timestamp);
- else if (F_ISSET(r, WT_REC_LOOKASIDE))
- for (upd = first_upd; upd != NULL; upd = upd->next)
+ if (F_ISSET(r, WT_REC_LOOKASIDE)) {
+ /* If no updates had timestamps, we're done. */
+ if (first_ts_upd == NULL)
+ __wt_timestamp_set_zero(&r->min_saved_timestamp);
+ for (upd = first_upd; upd != *updp; upd = upd->next) {
if (upd->txnid != WT_TXN_ABORTED &&
- upd->txnid != WT_TXN_NONE &&
- __wt_timestamp_cmp(
- &upd->timestamp, &r->min_saved_timestamp) < 0)
- __wt_timestamp_set(
- &r->min_saved_timestamp, &upd->timestamp);
+ __wt_timestamp_cmp(&upd->timestamp,
+ &r->min_saved_timestamp) < 0)
+ __wt_timestamp_set(&r->min_saved_timestamp,
+ &upd->timestamp);
+
+ WT_ASSERT(session, upd->txnid == WT_TXN_ABORTED ||
+ WT_TXNID_LE(upd->txnid, r->max_txn));
+ }
+ }
#endif
check_original_value:
@@ -1431,16 +1484,24 @@ check_original_value:
/*
* Returning an update means the original on-page value might be lost,
* and that's a problem if there's a reader that needs it. There are
- * two cases: any lookaside table eviction (because the backing disk
- * image is rewritten), or any reconciliation of a backing overflow
- * record that will be physically removed once it's no longer needed.
- */
- if (*updp != NULL && (F_ISSET(r, WT_REC_LOOKASIDE) ||
- (vpack != NULL &&
+ * three cases: any update from a modify operation (because the modify
+ * has to be applied to a stable update, not the new on-page update),
+ * any lookaside table eviction (because the backing disk image is
+ * rewritten), or any reconciliation of a backing overflow record that
+ * will be physically removed once it's no longer needed.
+ */
+ if (*updp != NULL && ((*updp)->type == WT_UPDATE_MODIFIED ||
+ F_ISSET(r, WT_REC_LOOKASIDE) || (vpack != NULL &&
vpack->ovfl && vpack->raw != WT_CELL_VALUE_OVFL_RM)))
WT_RET(
__rec_append_orig_value(session, page, first_upd, vpack));
+#ifdef HAVE_TIMESTAMPS
+ if ((upd = *updp) != NULL &&
+ __wt_timestamp_cmp(&upd->timestamp, &r->max_onpage_timestamp) > 0)
+ __wt_timestamp_set(&r->max_onpage_timestamp, &upd->timestamp);
+#endif
+
return (0);
}
@@ -3231,7 +3292,7 @@ __rec_split_write_supd(WT_SESSION_IMPL *session,
WT_RET(__rec_supd_move(session, multi, r->supd, r->supd_next));
r->supd_next = 0;
r->supd_memsize = 0;
- return (0);
+ goto done;
}
/*
@@ -3291,6 +3352,17 @@ __rec_split_write_supd(WT_SESSION_IMPL *session,
r->supd_next = j;
}
+done: /* Track the oldest timestamp seen so far. */
+ multi->page_las.las_skew_oldest = r->las_skew_oldest;
+ multi->page_las.las_max_txn = r->max_txn;
+ WT_ASSERT(session, r->max_txn != WT_TXN_NONE);
+#ifdef HAVE_TIMESTAMPS
+ __wt_timestamp_set(
+ &multi->page_las.min_timestamp, &r->min_saved_timestamp);
+ __wt_timestamp_set(
+ &multi->page_las.onpage_timestamp, &r->max_onpage_timestamp);
+#endif
+
err: __wt_scr_free(session, &key);
return (ret);
}
@@ -5859,11 +5931,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
r->multi->addr.addr = NULL;
mod->mod_disk_image = r->multi->disk_image;
r->multi->disk_image = NULL;
- mod->mod_replace_las_pageid = r->multi->las_pageid;
-#ifdef HAVE_TIMESTAMPS
- __wt_timestamp_set(&mod->mod_replace_las_min_timestamp,
- &r->min_saved_timestamp);
-#endif
+ mod->mod_page_las = r->multi->page_las;
} else
WT_RET(__wt_bt_write(session, r->wrapup_checkpoint,
NULL, NULL, true, F_ISSET(r, WT_REC_CHECKPOINT),
@@ -6008,9 +6076,9 @@ __rec_las_wrapup_err(WT_SESSION_IMPL *session, WT_RECONCILE *r)
* flags if lookaside table entries for this page have been written.
*/
for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
- if (multi->supd != NULL && multi->las_pageid != 0)
- WT_TRET(__wt_las_remove_block(
- session, NULL, btree_id, multi->las_pageid));
+ if (multi->supd != NULL && multi->page_las.las_pageid != 0)
+ WT_TRET(__wt_las_remove_block(session, NULL,
+ btree_id, multi->page_las.las_pageid));
return (ret);
}
diff --git a/src/support/stat.c b/src/support/stat.c
index 57dcd33c7f1..924afaa21d6 100644
--- a/src/support/stat.c
+++ b/src/support/stat.c
@@ -809,6 +809,7 @@ static const char * const __stats_connection_desc[] = {
"cache: internal pages evicted",
"cache: internal pages split during eviction",
"cache: leaf pages split during eviction",
+ "cache: lookaside score",
"cache: lookaside table entries",
"cache: lookaside table insert calls",
"cache: lookaside table remove calls",
@@ -1139,6 +1140,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cache_eviction_internal = 0;
stats->cache_eviction_split_internal = 0;
stats->cache_eviction_split_leaf = 0;
+ /* not clearing cache_lookaside_score */
/* not clearing cache_lookaside_entries */
stats->cache_lookaside_insert = 0;
stats->cache_lookaside_remove = 0;
@@ -1490,6 +1492,8 @@ __wt_stat_connection_aggregate(
WT_STAT_READ(from, cache_eviction_split_internal);
to->cache_eviction_split_leaf +=
WT_STAT_READ(from, cache_eviction_split_leaf);
+ to->cache_lookaside_score +=
+ WT_STAT_READ(from, cache_lookaside_score);
to->cache_lookaside_entries +=
WT_STAT_READ(from, cache_lookaside_entries);
to->cache_lookaside_insert +=
diff --git a/src/txn/txn.c b/src/txn/txn.c
index 91771403e13..3d45ff8a88c 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -578,10 +578,8 @@ __wt_txn_release(WT_SESSION_IMPL *session)
txn->id = WT_TXN_NONE;
}
-#ifdef HAVE_TIMESTAMPS
__wt_txn_clear_commit_timestamp(session);
__wt_txn_clear_read_timestamp(session);
-#endif
/* Free the scratch buffer allocated for logging. */
__wt_logrec_free(session, &txn->logrec);
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index afb3cba1db6..eb32ef2d06a 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -437,8 +437,7 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
for (;;) {
current_dirty =
(100.0 * __wt_cache_dirty_leaf_inuse(cache)) / cache_size;
- if (current_dirty <=
- (double)cache->eviction_checkpoint_target)
+ if (current_dirty <= (double)cache->eviction_checkpoint_target)
break;
__wt_sleep(0, stepdown_us / 10);
@@ -506,22 +505,53 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
}
/*
+ * __wt_checkpoint_progress --
+ * Output a checkpoint progress message.
+ */
+void
+__wt_checkpoint_progress(WT_SESSION_IMPL *session, bool closing)
+{
+ struct timespec cur_time;
+ WT_CONNECTION_IMPL *conn;
+ uint64_t time_diff;
+
+ conn = S2C(session);
+ __wt_epoch(session, &cur_time);
+
+ /* Time since the full database checkpoint started */
+ time_diff = WT_TIMEDIFF_SEC(cur_time,
+ conn->ckpt_timer_start);
+
+ if (closing || (time_diff / 20) > conn->ckpt_progress_msg_count) {
+ __wt_verbose(session, WT_VERB_CHECKPOINT_PROGRESS,
+ "Checkpoint %s for %" PRIu64
+ " seconds and wrote: %" PRIu64 " pages (%" PRIu64 " MB)",
+ closing ? "ran" : "has been running",
+ time_diff, conn->ckpt_write_pages,
+ conn->ckpt_write_bytes / WT_MEGABYTE);
+ conn->ckpt_progress_msg_count++;
+ }
+}
+
+/*
* __checkpoint_stats --
* Update checkpoint timer stats.
*/
static void
-__checkpoint_stats(
- WT_SESSION_IMPL *session, struct timespec *start, struct timespec *stop)
+__checkpoint_stats(WT_SESSION_IMPL *session)
{
+ struct timespec stop;
WT_CONNECTION_IMPL *conn;
uint64_t msec;
conn = S2C(session);
- /*
- * Get time diff in milliseconds.
- */
- msec = WT_TIMEDIFF_MS(*stop, *start);
+ /* Output a verbose progress message for long running checkpoints */
+ if (conn->ckpt_progress_msg_count > 0)
+ __wt_checkpoint_progress(session, true);
+
+ __wt_epoch(session, &stop);
+ msec = WT_TIMEDIFF_MS(stop, conn->ckpt_timer_scrub_end);
if (msec > conn->ckpt_time_max)
conn->ckpt_time_max = msec;
@@ -536,33 +566,29 @@ __checkpoint_stats(
* Output a verbose message with timing information
*/
static void
-__checkpoint_verbose_track(WT_SESSION_IMPL *session,
- const char *msg, struct timespec *start)
+__checkpoint_verbose_track(WT_SESSION_IMPL *session, const char *msg)
{
#ifdef HAVE_VERBOSE
struct timespec stop;
+ WT_CONNECTION_IMPL *conn;
uint64_t msec;
if (!WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT))
return;
+ conn = S2C(session);
__wt_epoch(session, &stop);
- /*
- * Get time diff in milliseconds.
- */
- msec = WT_TIMEDIFF_MS(stop, *start);
+ /* Get time diff in milliseconds. */
+ msec = WT_TIMEDIFF_MS(stop, conn->ckpt_timer_start);
__wt_verbose(session,
WT_VERB_CHECKPOINT, "time: %" PRIu64 " ms, gen: %" PRIu64
": Full database checkpoint %s",
msec, __wt_gen(session, WT_GEN_CHECKPOINT), msg);
- /* Update the timestamp so we are reporting intervals. */
- memcpy(start, &stop, sizeof(*start));
#else
WT_UNUSED(session);
WT_UNUSED(msg);
- WT_UNUSED(start);
#endif
}
@@ -713,7 +739,6 @@ static int
__txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
{
struct timespec fsync_start, fsync_stop;
- struct timespec start, stop, verb_timer;
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
@@ -745,7 +770,12 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
conn->cache->evict_max_page_size = 0;
/* Initialize the verbose tracking timer */
- __wt_epoch(session, &verb_timer);
+ __wt_epoch(session, &conn->ckpt_timer_start);
+
+ /* Initialize the checkpoint progress tracking data */
+ conn->ckpt_progress_msg_count = 0;
+ conn->ckpt_write_bytes = 0;
+ conn->ckpt_write_pages = 0;
/*
* Update the global oldest ID so we do all possible cleanup.
@@ -770,11 +800,10 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__wt_txn_checkpoint_log(
session, full, WT_TXN_LOG_CKPT_PREPARE, NULL));
- __checkpoint_verbose_track(session,
- "starting transaction", &verb_timer);
+ __checkpoint_verbose_track(session, "starting transaction");
if (full)
- __wt_epoch(session, &start);
+ __wt_epoch(session, &conn->ckpt_timer_scrub_end);
/*
* Start the checkpoint for real.
@@ -845,8 +874,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_presync));
__wt_evict_server_wake(session);
- __checkpoint_verbose_track(session,
- "committing transaction", &verb_timer);
+ __checkpoint_verbose_track(session, "committing transaction");
/*
* Checkpoints have to hit disk (it would be reasonable to configure for
@@ -860,7 +888,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_STAT_CONN_SET(session,
txn_checkpoint_fsync_post_duration, fsync_duration_usecs);
- __checkpoint_verbose_track(session, "sync completed", &verb_timer);
+ __checkpoint_verbose_track(session, "sync completed");
/*
* Commit the transaction now that we are sure that all files in the
@@ -898,8 +926,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
ret = __wt_checkpoint_sync(session, NULL));
WT_ERR(ret);
- __checkpoint_verbose_track(session,
- "metadata sync completed", &verb_timer);
+ __checkpoint_verbose_track(session, "metadata sync completed");
} else
WT_WITH_DHANDLE(session,
WT_SESSION_META_DHANDLE(session),
@@ -912,12 +939,16 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
*/
txn_global->checkpoint_state.pinned_id = WT_TXN_NONE;
- if (full) {
- __wt_epoch(session, &stop);
- __checkpoint_stats(session, &start, &stop);
- }
+ if (full)
+ __checkpoint_stats(session);
err: /*
+ * Reset the timer so that next checkpoint tracks the progress only if
+ * configured.
+ */
+ conn->ckpt_timer_start.tv_sec = 0;
+
+ /*
* XXX
* Rolling back the changes here is problematic.
*
diff --git a/src/txn/txn_timestamp.c b/src/txn/txn_timestamp.c
index 0201036684d..98887627bfc 100644
--- a/src/txn/txn_timestamp.c
+++ b/src/txn/txn_timestamp.c
@@ -467,11 +467,11 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
if (has_oldest || has_stable)
WT_RET(__wt_txn_update_pinned_timestamp(session));
}
+ return (0);
#else
- WT_RET_MSG(session, ENOTSUP, "set_timestamp requires a "
- "version of WiredTiger built with timestamp support");
+ WT_RET_MSG(session, ENOTSUP, "set_timestamp requires a "
+ "version of WiredTiger built with timestamp support");
#endif
- return (0);
}
#ifdef HAVE_TIMESTAMPS
@@ -687,6 +687,17 @@ __wt_txn_clear_read_timestamp(WT_SESSION_IMPL *session)
if (!F_ISSET(txn, WT_TXN_PUBLIC_TS_READ))
return;
+#ifdef HAVE_DIAGNOSTIC
+ {
+ wt_timestamp_t pinned_ts;
+
+ WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
+ __wt_timestamp_set(&pinned_ts, &txn_global->pinned_timestamp));
+ WT_ASSERT(session,
+ __wt_timestamp_cmp(&txn->read_timestamp, &pinned_ts) >= 0);
+ }
+#endif
+
__wt_writelock(session, &txn_global->read_timestamp_rwlock);
TAILQ_REMOVE(&txn_global->read_timestamph, txn, read_timestampq);
--txn_global->read_timestampq_len;
diff --git a/src/utilities/util_dump.c b/src/utilities/util_dump.c
index 2f572f3f370..f768d323afb 100644
--- a/src/utilities/util_dump.c
+++ b/src/utilities/util_dump.c
@@ -269,7 +269,7 @@ dump_add_config(WT_SESSION *session, char **bufp, size_t *leftp,
if (ret != 0)
return (util_err(session, ret, NULL));
*bufp += n;
- *leftp -= (size_t)n;
+ *leftp -= n;
return (0);
}
diff --git a/test/csuite/random_abort/main.c b/test/csuite/random_abort/main.c
index a171cfef13a..ad49f01dde5 100644
--- a/test/csuite/random_abort/main.c
+++ b/test/csuite/random_abort/main.c
@@ -56,6 +56,8 @@ static bool inmem;
#define ENV_CONFIG_REC "log=(recover=on)"
#define MAX_VAL 4096
+static void handler(int)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void usage(void)
WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
@@ -123,6 +125,8 @@ thread_run(void *arg)
/*
* Write our portion of the key space until we're killed.
*/
+ printf("Thread %" PRIu32 " starts at %" PRIu64 "\n",
+ td->id, td->start);
for (i = td->start; ; ++i) {
testutil_check(__wt_snprintf(
kname, sizeof(kname), "%" PRIu64, i));
@@ -185,7 +189,7 @@ fill_db(uint32_t nth)
printf("Create %" PRIu32 " writer threads\n", nth);
for (i = 0; i < nth; ++i) {
td[i].conn = conn;
- td[i].start = (UINT64_MAX / nth) * i;
+ td[i].start = WT_BILLION * (uint64_t)i;
td[i].id = i;
testutil_check(__wt_thread_create(
NULL, &thr[i], thread_run, &td[i]));
@@ -209,9 +213,24 @@ fill_db(uint32_t nth)
extern int __wt_optind;
extern char *__wt_optarg;
+static void
+handler(int sig)
+{
+ pid_t pid;
+
+ WT_UNUSED(sig);
+ pid = wait(NULL);
+ /*
+ * The core file will indicate why the child exited. Choose EINVAL here.
+ */
+ testutil_die(EINVAL,
+ "Child process %" PRIu64 " abnormally exited", (uint64_t)pid);
+}
+
int
main(int argc, char *argv[])
{
+ struct sigaction sa;
struct stat sb;
FILE *fp;
WT_CONNECTION *conn;
@@ -298,6 +317,9 @@ main(int argc, char *argv[])
* kill the child, run recovery and make sure all items we wrote
* exist after recovery runs.
*/
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = handler;
+ testutil_checksys(sigaction(SIGCHLD, &sa, NULL));
if ((pid = fork()) < 0)
testutil_die(errno, "fork");
@@ -311,15 +333,15 @@ main(int argc, char *argv[])
* Sleep for the configured amount of time before killing
* the child. Start the timeout from the time we notice that
* the table has been created. That allows the test to run
- * correctly on really slow machines. Verify the process ID
- * still exists in case the child aborts for some reason we
- * don't stay in this loop forever.
+ * correctly on really slow machines.
*/
testutil_check(__wt_snprintf(
buf, sizeof(buf), "%s/%s", home, fs_main));
- while (stat(buf, &sb) != 0 && kill(pid, 0) == 0)
+ while (stat(buf, &sb) != 0)
sleep(1);
sleep(timeout);
+ sa.sa_handler = SIG_DFL;
+ testutil_checksys(sigaction(SIGCHLD, &sa, NULL));
/*
* !!! It should be plenty long enough to make sure more than
diff --git a/test/csuite/timestamp_abort/main.c b/test/csuite/timestamp_abort/main.c
index f6dadd95495..ca5fa10c2db 100644
--- a/test/csuite/timestamp_abort/main.c
+++ b/test/csuite/timestamp_abort/main.c
@@ -56,6 +56,7 @@ static char home[1024]; /* Program working dir */
* Each worker thread creates its own records file that records the data it
* inserted and it records the timestamp that was used for that insertion.
*/
+#define INVALID_KEY UINT64_MAX
#define MAX_CKPT_INVL 5 /* Maximum interval between checkpoints */
#define MAX_TH 12
#define MAX_TIME 40
@@ -84,6 +85,22 @@ static uint64_t th_ts[MAX_TH];
"transaction_sync=(enabled,method=none)"
#define ENV_CONFIG_REC "log=(archive=false,recover=on)"
+typedef struct {
+ uint64_t absent_key; /* Last absent key */
+ uint64_t exist_key; /* First existing key after miss */
+ uint64_t first_key; /* First key in range */
+ uint64_t first_miss; /* First missing key */
+ uint64_t last_key; /* Last key in range */
+} REPORT;
+
+typedef struct {
+ WT_CONNECTION *conn;
+ uint64_t start;
+ uint32_t info;
+} THREAD_DATA;
+
+static void handler(int)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void usage(void)
WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
@@ -94,12 +111,6 @@ usage(void)
exit(EXIT_FAILURE);
}
-typedef struct {
- WT_CONNECTION *conn;
- uint64_t start;
- uint32_t info;
-} WT_THREAD_DATA;
-
/*
* thread_ts_run --
* Runner function for a timestamp thread.
@@ -109,11 +120,11 @@ thread_ts_run(void *arg)
{
WT_CURSOR *cur_stable;
WT_SESSION *session;
- WT_THREAD_DATA *td;
+ THREAD_DATA *td;
uint64_t i, last_ts, oldest_ts;
char tscfg[64];
- td = (WT_THREAD_DATA *)arg;
+ td = (THREAD_DATA *)arg;
last_ts = 0;
testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
@@ -177,7 +188,7 @@ thread_ckpt_run(void *arg)
FILE *fp;
WT_RAND_STATE rnd;
WT_SESSION *session;
- WT_THREAD_DATA *td;
+ THREAD_DATA *td;
uint64_t ts;
uint32_t sleep_time;
int i;
@@ -185,7 +196,7 @@ thread_ckpt_run(void *arg)
__wt_random_init(&rnd);
- td = (WT_THREAD_DATA *)arg;
+ td = (THREAD_DATA *)arg;
/*
* Keep a separate file with the records we wrote for checking.
*/
@@ -233,7 +244,7 @@ thread_run(void *arg)
WT_ITEM data;
WT_RAND_STATE rnd;
WT_SESSION *session;
- WT_THREAD_DATA *td;
+ THREAD_DATA *td;
uint64_t i, stable_ts;
char cbuf[MAX_VAL], lbuf[MAX_VAL], obuf[MAX_VAL];
char kname[64], tscfg[64];
@@ -244,7 +255,7 @@ thread_run(void *arg)
memset(obuf, 0, sizeof(obuf));
memset(kname, 0, sizeof(kname));
- td = (WT_THREAD_DATA *)arg;
+ td = (THREAD_DATA *)arg;
/*
* Set up the separate file for checking.
*/
@@ -310,7 +321,13 @@ thread_run(void *arg)
"commit_timestamp=%" PRIx64, stable_ts));
testutil_check(
session->commit_transaction(session, tscfg));
- th_ts[td->info] = stable_ts;
+ /*
+ * Update the thread's last-committed timestamp.
+ * Don't let the compiler re-order this statement,
+ * if we were to race with the timestamp thread, it
+ * might see our thread update before the commit.
+ */
+ WT_PUBLISH(th_ts[td->info], stable_ts);
} else
testutil_check(
session->commit_transaction(session, NULL));
@@ -343,13 +360,13 @@ run_workload(uint32_t nth)
{
WT_CONNECTION *conn;
WT_SESSION *session;
- WT_THREAD_DATA *td;
+ THREAD_DATA *td;
wt_thread_t *thr;
uint32_t ckpt_id, i, ts_id;
char envconf[512];
thr = dcalloc(nth+2, sizeof(*thr));
- td = dcalloc(nth+2, sizeof(WT_THREAD_DATA));
+ td = dcalloc(nth+2, sizeof(THREAD_DATA));
if (chdir(home) != 0)
testutil_die(errno, "Child chdir: %s", home);
if (inmem)
@@ -398,7 +415,7 @@ run_workload(uint32_t nth)
printf("Create %" PRIu32 " writer threads\n", nth);
for (i = 0; i < nth; ++i) {
td[i].conn = conn;
- td[i].start = (UINT64_MAX / nth) * i;
+ td[i].start = WT_BILLION * (uint64_t)i;
td[i].info = i;
testutil_check(__wt_thread_create(
NULL, &thr[i], thread_run, &td[i]));
@@ -434,18 +451,66 @@ timestamp_build(void)
extern int __wt_optind;
extern char *__wt_optarg;
+/*
+ * Initialize a report structure. Since zero is a valid key we
+ * cannot just clear it.
+ */
+static void
+initialize_rep(REPORT *r)
+{
+ r->first_key = r->first_miss = INVALID_KEY;
+ r->absent_key = r->exist_key = r->last_key = INVALID_KEY;
+}
+
+/*
+ * Print out information if we detect missing records in the
+ * middle of the data of a report structure.
+ */
+static void
+print_missing(REPORT *r, const char *fname, const char *msg)
+{
+ if (r->exist_key != INVALID_KEY)
+ printf("%s: %s error %" PRIu64
+ " absent records %" PRIu64 "-%" PRIu64
+ ". Then keys %" PRIu64 "-%" PRIu64 " exist."
+ " Key range %" PRIu64 "-%" PRIu64 "\n",
+ fname, msg,
+ r->exist_key - r->first_miss - 1,
+ r->first_miss, r->exist_key - 1,
+ r->exist_key, r->last_key,
+ r->first_key, r->last_key);
+}
+
+/*
+ * Signal handler to catch if the child died unexpectedly.
+ */
+static void
+handler(int sig)
+{
+ pid_t pid;
+
+ WT_UNUSED(sig);
+ pid = wait(NULL);
+ /*
+ * The core file will indicate why the child exited. Choose EINVAL here.
+ */
+ testutil_die(EINVAL,
+ "Child process %" PRIu64 " abnormally exited", (uint64_t)pid);
+}
+
int
main(int argc, char *argv[])
{
+ struct sigaction sa;
struct stat sb;
FILE *fp;
+ REPORT c_rep[MAX_TH], l_rep[MAX_TH], o_rep[MAX_TH];
WT_CONNECTION *conn;
WT_CURSOR *cur_coll, *cur_local, *cur_oplog, *cur_stable;
WT_RAND_STATE rnd;
WT_SESSION *session;
pid_t pid;
uint64_t absent_coll, absent_local, absent_oplog, count, key, last_key;
- uint64_t first_miss, middle_coll, middle_local, middle_oplog;
uint64_t stable_fp, stable_val, val[MAX_TH+1];
uint32_t i, nth, timeout;
int ch, status, ret;
@@ -524,6 +589,7 @@ main(int argc, char *argv[])
if (nth < MIN_TH)
nth = MIN_TH;
}
+
printf("Parent: compatibility: %s, "
"in-mem log sync: %s, timestamp in use: %s\n",
compat ? "true" : "false",
@@ -536,6 +602,9 @@ main(int argc, char *argv[])
* kill the child, run recovery and make sure all items we wrote
* exist after recovery runs.
*/
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = handler;
+ testutil_checksys(sigaction(SIGCHLD, &sa, NULL));
testutil_checksys((pid = fork()) < 0);
if (pid == 0) { /* child */
@@ -548,15 +617,15 @@ main(int argc, char *argv[])
* Sleep for the configured amount of time before killing
* the child. Start the timeout from the time we notice that
* the file has been created. That allows the test to run
- * correctly on really slow machines. Verify the process ID
- * still exists in case the child aborts for some reason we
- * don't stay in this loop forever.
+ * correctly on really slow machines.
*/
testutil_check(__wt_snprintf(
statname, sizeof(statname), "%s/%s", home, ckpt_file));
- while (stat(statname, &sb) != 0 && kill(pid, 0) == 0)
+ while (stat(statname, &sb) != 0)
sleep(1);
sleep(timeout);
+ sa.sa_handler = SIG_DFL;
+ testutil_checksys(sigaction(SIGCHLD, &sa, NULL));
/*
* !!! It should be plenty long enough to make sure more than
@@ -573,6 +642,12 @@ main(int argc, char *argv[])
*/
if (chdir(home) != 0)
testutil_die(errno, "parent chdir: %s", home);
+ /*
+ * The tables can get very large, so while we'd ideally like to
+ * copy the entire database, we only copy the log files for now.
+ * Otherwise it can take far too long to run the test, particularly
+ * in automated testing.
+ */
testutil_check(__wt_snprintf(buf, sizeof(buf),
"rm -rf ../%s.SAVE && mkdir ../%s.SAVE && "
"cp -p WiredTigerLog.* ../%s.SAVE",
@@ -619,7 +694,9 @@ main(int argc, char *argv[])
absent_coll = absent_local = absent_oplog = 0;
fatal = false;
for (i = 0; i < nth; ++i) {
- first_miss = middle_coll = middle_local = middle_oplog = 0;
+ initialize_rep(&c_rep[i]);
+ initialize_rep(&l_rep[i]);
+ initialize_rep(&o_rep[i]);
testutil_check(__wt_snprintf(
fname, sizeof(fname), RECORDS_FILE, i));
if ((fp = fopen(fname, "r")) == NULL)
@@ -632,9 +709,14 @@ main(int argc, char *argv[])
* but records may be missing at the end. If we did
* write-no-sync, we expect every key to have been recovered.
*/
- for (last_key = UINT64_MAX;; ++count, last_key = key) {
+ for (last_key = INVALID_KEY;; ++count, last_key = key) {
ret = fscanf(fp, "%" SCNu64 "%" SCNu64 "\n",
&stable_fp, &key);
+ if (last_key == INVALID_KEY) {
+ c_rep[i].first_key = key;
+ l_rep[i].first_key = key;
+ o_rep[i].first_key = key;
+ }
if (ret != EOF && ret != 2) {
/*
* If we find a partial line, consider it
@@ -651,7 +733,7 @@ main(int argc, char *argv[])
* written key at the end that can result in a false
* negative error for a missing record. Detect it.
*/
- if (last_key != UINT64_MAX && key != last_key + 1) {
+ if (last_key != INVALID_KEY && key != last_key + 1) {
printf("%s: Ignore partial record %" PRIu64
" last valid key %" PRIu64 "\n",
fname, key, last_key);
@@ -682,18 +764,16 @@ main(int argc, char *argv[])
fname, key, stable_fp, val[i]);
absent_coll++;
}
- if (middle_coll == 0)
- first_miss = key;
- middle_coll = key;
- } else if (middle_coll != 0) {
+ if (c_rep[i].first_miss == INVALID_KEY)
+ c_rep[i].first_miss = key;
+ c_rep[i].absent_key = key;
+ } else if (c_rep[i].absent_key != INVALID_KEY &&
+ c_rep[i].exist_key == INVALID_KEY) {
/*
- * We should never find an existing key after
- * we have detected one missing.
+ * If we get here we found a record that exists
+ * after absent records, a hole in our data.
*/
- printf("%s: COLLECTION after absent records %"
- PRIu64 "-%" PRIu64 " key %" PRIu64
- " exists\n",
- fname, first_miss, middle_coll, key);
+ c_rep[i].exist_key = key;
fatal = true;
}
/*
@@ -706,15 +786,16 @@ main(int argc, char *argv[])
printf("%s: LOCAL no record with key %"
PRIu64 "\n", fname, key);
absent_local++;
- middle_local = key;
- } else if (middle_local != 0) {
+ if (l_rep[i].first_miss == INVALID_KEY)
+ l_rep[i].first_miss = key;
+ l_rep[i].absent_key = key;
+ } else if (l_rep[i].absent_key != INVALID_KEY &&
+ l_rep[i].exist_key == INVALID_KEY) {
/*
* We should never find an existing key after
* we have detected one missing.
*/
- printf("%s: LOCAL after absent record at %"
- PRIu64 " key %" PRIu64 " exists\n",
- fname, middle_local, key);
+ l_rep[i].exist_key = key;
fatal = true;
}
/*
@@ -727,23 +808,28 @@ main(int argc, char *argv[])
printf("%s: OPLOG no record with key %"
PRIu64 "\n", fname, key);
absent_oplog++;
- middle_oplog = key;
- } else if (middle_oplog != 0) {
+ if (o_rep[i].first_miss == INVALID_KEY)
+ o_rep[i].first_miss = key;
+ o_rep[i].absent_key = key;
+ } else if (o_rep[i].absent_key != INVALID_KEY &&
+ o_rep[i].exist_key == INVALID_KEY) {
/*
* We should never find an existing key after
* we have detected one missing.
*/
- printf("%s: OPLOG after absent record at %"
- PRIu64 " key %" PRIu64 " exists\n",
- fname, middle_oplog, key);
+ o_rep[i].exist_key = key;
fatal = true;
}
}
+ c_rep[i].last_key = last_key;
+ l_rep[i].last_key = last_key;
+ o_rep[i].last_key = last_key;
testutil_checksys(fclose(fp) != 0);
+ print_missing(&c_rep[i], fname, "COLLECTION");
+ print_missing(&l_rep[i], fname, "LOCAL");
+ print_missing(&o_rep[i], fname, "OPLOG");
}
testutil_check(conn->close(conn, NULL));
- if (fatal)
- return (EXIT_FAILURE);
if (!inmem && absent_coll) {
printf("COLLECTION: %" PRIu64
" record(s) absent from %" PRIu64 "\n",
diff --git a/test/format/compact.c b/test/format/compact.c
index c1a73bea64b..2df3839f67b 100644
--- a/test/format/compact.c
+++ b/test/format/compact.c
@@ -66,9 +66,14 @@ compact(void *arg)
/*
* Compact can return EBUSY if concurrent with alter or if there
* is eviction pressure, or we collide with checkpoints.
+ *
+ * Compact returns ETIMEDOUT if the compaction doesn't finish in
+ * in some number of seconds. We don't configure a timeout and
+ * occasionally exceed the default of 1200 seconds.
*/
ret = session->compact(session, g.uri, NULL);
- if (ret != 0 && ret != EBUSY && ret != WT_ROLLBACK)
+ if (ret != 0 &&
+ ret != EBUSY && ret != ETIMEDOUT && ret != WT_ROLLBACK)
testutil_die(ret, "session.compact");
}
diff --git a/test/format/config.c b/test/format/config.c
index 049a655cb79..769ed608e64 100644
--- a/test/format/config.c
+++ b/test/format/config.c
@@ -181,6 +181,10 @@ config_setup(void)
g.c_cache = g.c_threads;
}
+ /* Check if a minimum cache size has been specified. */
+ if (g.c_cache_minimum != 0 && g.c_cache < g.c_cache_minimum)
+ g.c_cache = g.c_cache_minimum;
+
/* Give Helium configuration a final review. */
if (DATASOURCE("helium"))
config_helium_reset();
@@ -190,6 +194,25 @@ config_setup(void)
config_in_memory_reset();
/*
+ * Key/value minimum/maximum are related, correct unless specified by
+ * the configuration.
+ */
+ if (!config_is_perm("key_min") && g.c_key_min > g.c_key_max)
+ g.c_key_min = g.c_key_max;
+ if (!config_is_perm("key_max") && g.c_key_max < g.c_key_min)
+ g.c_key_max = g.c_key_min;
+ if (g.c_key_min > g.c_key_max)
+ testutil_die(EINVAL, "key_min may not be larger than key_max");
+
+ if (!config_is_perm("value_min") && g.c_value_min > g.c_value_max)
+ g.c_value_min = g.c_value_max;
+ if (!config_is_perm("value_max") && g.c_value_max < g.c_value_min)
+ g.c_value_max = g.c_value_min;
+ if (g.c_value_min > g.c_value_max)
+ testutil_die(EINVAL,
+ "value_min may not be larger than value_max");
+
+ /*
* Run-length is configured by a number of operations and a timer.
*
* If the operation count and the timer are both configured, do nothing.
@@ -213,25 +236,6 @@ config_setup(void)
config_single("timer=360", 0);
}
- /*
- * Key/value minimum/maximum are related, correct unless specified by
- * the configuration.
- */
- if (!config_is_perm("key_min") && g.c_key_min > g.c_key_max)
- g.c_key_min = g.c_key_max;
- if (!config_is_perm("key_max") && g.c_key_max < g.c_key_min)
- g.c_key_max = g.c_key_min;
- if (g.c_key_min > g.c_key_max)
- testutil_die(EINVAL, "key_min may not be larger than key_max");
-
- if (!config_is_perm("value_min") && g.c_value_min > g.c_value_max)
- g.c_value_min = g.c_value_max;
- if (!config_is_perm("value_max") && g.c_value_max < g.c_value_min)
- g.c_value_max = g.c_value_min;
- if (g.c_value_min > g.c_value_max)
- testutil_die(EINVAL,
- "value_min may not be larger than value_max");
-
/* Reset the key count. */
g.key_cnt = 0;
}
diff --git a/test/format/config.h b/test/format/config.h
index 6fb4071074d..7ac65147462 100644
--- a/test/format/config.h
+++ b/test/format/config.h
@@ -101,6 +101,10 @@ static CONFIG c[] = {
"size of the cache in MB",
0x0, 1, 100, 100 * 1024, &g.c_cache, NULL },
+ { "cache_minimum",
+ "minimum size of the cache in MB",
+ C_IGNORE, 1, 0, 100 * 1024, &g.c_cache_minimum, NULL },
+
{ "checkpoints",
"type of checkpoints (on | off | wiredtiger)",
C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_checkpoint},
@@ -115,7 +119,7 @@ static CONFIG c[] = {
{ "checksum",
"type of checksums (on | off | uncompressed)",
- C_IGNORE|C_STRING, 1, 3, 3, NULL, &g.c_checksum },
+ C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_checksum },
{ "chunk_size",
"LSM chunk size in MB",
@@ -159,7 +163,7 @@ static CONFIG c[] = {
{ "file_type",
"type of store to create (fix | var | row)",
- C_IGNORE|C_STRING, 1, 3, 3, NULL, &g.c_file_type },
+ C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_file_type },
{ "firstfit",
"if allocation is firstfit", /* 10% */
@@ -196,7 +200,7 @@ static CONFIG c[] = {
{ "isolation",
"isolation level "
"(random | read-uncommitted | read-committed | snapshot)",
- C_IGNORE|C_STRING, 1, 4, 4, NULL, &g.c_isolation },
+ C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_isolation },
{ "key_gap",
"gap between instantiated keys on a Btree page",
@@ -272,7 +276,7 @@ static CONFIG c[] = {
{ "quiet",
"quiet run (same as -q)",
- C_IGNORE|C_BOOL, 0, 0, 0, &g.c_quiet, NULL },
+ C_IGNORE|C_BOOL, 0, 0, 1, &g.c_quiet, NULL },
{ "read_pct",
"percent operations that are reads",
@@ -296,7 +300,7 @@ static CONFIG c[] = {
{ "runs",
"the number of runs",
- C_IGNORE, 0, UINT_MAX, UINT_MAX, &g.c_runs, NULL },
+ C_IGNORE, 0, 0, UINT_MAX, &g.c_runs, NULL },
{ "salvage",
"salvage testing", /* 100% */
@@ -319,8 +323,8 @@ static CONFIG c[] = {
0x0, 1, 32, 128, &g.c_threads, NULL },
{ "timer",
- "maximum time to run in minutes (default 20 minutes)",
- C_IGNORE, 0, UINT_MAX, UINT_MAX, &g.c_timer, NULL },
+ "maximum time to run in minutes",
+ C_IGNORE, 0, 0, UINT_MAX, &g.c_timer, NULL },
{ "transaction_timestamps", /* 10% */
"enable transaction timestamp support",
diff --git a/test/format/format.h b/test/format/format.h
index 96e1a0fe335..af66e166f47 100644
--- a/test/format/format.h
+++ b/test/format/format.h
@@ -150,6 +150,7 @@ typedef struct {
uint32_t c_bloom_hash_count;
uint32_t c_bloom_oldest;
uint32_t c_cache;
+ uint32_t c_cache_minimum;
char *c_checkpoint;
uint32_t c_checkpoint_log_size;
uint32_t c_checkpoint_wait;
diff --git a/test/mciproject.yml b/test/mciproject.yml
index 4b67299d14c..16e103e5366 100644
--- a/test/mciproject.yml
+++ b/test/mciproject.yml
@@ -65,7 +65,7 @@ tasks:
./build_posix/reconf
${configure_env_vars|} ./configure --enable-diagnostic --enable-python --enable-zlib --enable-strict --enable-verbose
${make_command|make} ${smp_command|} 2>&1
- TESTUTIL_ENABLE_LONG_TESTS=1 ${make_command|make} VERBOSE=1 check 2>&1
+ ${test_env_vars|} TESTUTIL_ENABLE_LONG_TESTS=1 ${make_command|make} VERBOSE=1 check 2>&1
fi
- command: archive.targz_pack
params:
@@ -96,15 +96,9 @@ tasks:
set -o errexit
set -o verbose
- # On 10.12, change the binary location with install_name_tool since DYLD_LIBRARY_PATH
- # appears not to work for dynamic modules loaded by python. For wt, the libtool generated
- # script has the wrong path for running on test machines.
- if [ "$(uname -s)" == "Darwin" ]; then
- WT_VERSION=$(m4 build_posix/aclocal/version.m4)
- install_name_tool -change /usr/local/lib/libwiredtiger-$WT_VERSION.dylib $(pwd)/.libs/libwiredtiger-$WT_VERSION.dylib lang/python/_wiredtiger.so
- install_name_tool -change /usr/local/lib/libwiredtiger-$WT_VERSION.dylib $(pwd)/.libs/libwiredtiger-$WT_VERSION.dylib .libs/wt
- fi
-
+ # Avoid /usr/bin/python, at least on macOS: with System Integrity
+ # Protection enabled, it ignores DYLD_LIBRARY_PATH and hence
+ # doesn't find the WiredTiger library in the local tree.
${test_env_vars|} python ./test/suite/run.py -v 2 ${smp_command|} 2>&1
- name: compile-windows-alt
@@ -186,7 +180,7 @@ buildvariants:
smp_command: -j $(sysctl -n hw.logicalcpu)
configure_env_vars: PATH=/opt/local/bin:$PATH
make_command: PATH=/opt/local/bin:$PATH ARCHFLAGS=-Wno-error=unused-command-line-argument-hard-error-in-future make
- test_env_vars: DYLD_LIBRARY_PATH=`pwd`/.libs
+ test_env_vars: PATH=/opt/local/bin:$PATH DYLD_LIBRARY_PATH=`pwd`/.libs
tasks:
- name: compile
- name: unit-test
diff --git a/test/suite/suite_subprocess.py b/test/suite/suite_subprocess.py
index 626a6b5efd3..71aab9c5422 100644
--- a/test/suite/suite_subprocess.py
+++ b/test/suite/suite_subprocess.py
@@ -26,8 +26,9 @@
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
-import os, subprocess
+import os, subprocess, sys
from run import wt_builddir
+from wttest import WiredTigerTestCase
# suite_subprocess.py
# Run a subprocess within the test suite
@@ -117,6 +118,28 @@ class suite_subprocess:
print 'ERROR: ' + filename + ' should not be empty (this command expected error output)'
self.assertNotEqual(filesize, 0, filename + ': expected to not be empty')
+ def verbose_env(self, envvar):
+ return envvar + '=' + str(os.environ.get(envvar)) + '\n'
+
+ def show_outputs(self, procargs, message, filenames):
+ out = 'ERROR: wt command ' + message + ': ' + str(procargs) + '\n' + \
+ self.verbose_env('PATH') + \
+ self.verbose_env('LD_LIBRARY_PATH') + \
+ self.verbose_env('DYLD_LIBRARY_PATH') + \
+ self.verbose_env('PYTHONPATH') + \
+ 'output files follow:'
+ WiredTigerTestCase.prout(out)
+ for filename in filenames:
+ maxbytes = 1024*100
+ with open(filename, 'r') as f:
+ contents = f.read(maxbytes)
+ if len(contents) > 0:
+ if len(contents) >= maxbytes:
+ contents += '...\n'
+ sepline = '*' * 50 + '\n'
+ out = sepline + filename + '\n' + sepline + contents
+ WiredTigerTestCase.prout(out)
+
# Run the wt utility.
def runWt(self, args, infilename=None,
outfilename=None, errfilename=None, closeconn=True,
@@ -131,10 +154,17 @@ class suite_subprocess:
wterrname = errfilename or "wt.err"
with open(wterrname, "w") as wterr:
with open(wtoutname, "w") as wtout:
- procargs = [os.path.join(wt_builddir, "wt")]
+ # Prefer running the actual 'wt' executable rather than the
+ # 'wt' script created by libtool. On OS/X with System Integrity
+ # Protection enabled, running a shell script strips
+ # environment variables needed to run 'wt'.
+ if sys.platform == "darwin":
+ wtexe = os.path.join(wt_builddir, ".libs", "wt")
+ else:
+ wtexe = os.path.join(wt_builddir, "wt")
+ procargs = [ wtexe ]
if self._gdbSubprocess:
- procargs = [os.path.join(wt_builddir, "libtool"),
- "--mode=execute", "gdb", "--args"] + procargs
+ procargs = [ "gdb", "--args" ] + procargs
procargs.extend(args)
if self._gdbSubprocess:
infilepart = ""
@@ -155,10 +185,16 @@ class suite_subprocess:
returncode = subprocess.call(
procargs, stdout=wtout, stderr=wterr)
if failure:
+ if returncode == 0:
+ self.show_outputs(procargs, "expected failure, got success",
+ [wtoutname, wterrname])
self.assertNotEqual(returncode, 0,
'expected failure: "' + \
str(procargs) + '": exited ' + str(returncode))
else:
+ if returncode != 0:
+ self.show_outputs(procargs, "expected success, got failure",
+ [wtoutname, wterrname])
self.assertEqual(returncode, 0,
'expected success: "' + \
str(procargs) + '": exited ' + str(returncode))