diff options
author | Luke Chen <luke.chen@mongodb.com> | 2017-11-03 14:16:15 +1100 |
---|---|---|
committer | Luke Chen <luke.chen@mongodb.com> | 2017-11-03 14:16:15 +1100 |
commit | 0a2f8f6ad756189263d050b29f69bc57b45b9816 (patch) | |
tree | 98250d6b6447c379ad6b2f2847b38da9df698c33 | |
parent | a31e9d415a25561bd36b001f3a6e9b0a6a115236 (diff) | |
parent | d4e28e6ca1430a7655621c47cc9d77092a813425 (diff) | |
download | mongo-0a2f8f6ad756189263d050b29f69bc57b45b9816.tar.gz |
Merge branch 'develop' into mongodb-3.6
70 files changed, 1206 insertions, 1032 deletions
diff --git a/dist/api_data.py b/dist/api_data.py index 8fcf99ad3c0..3d6d4712413 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -553,6 +553,7 @@ connection_runtime_config = [ 'api', 'block', 'checkpoint', + 'checkpoint_progress', 'compact', 'evict', 'evict_stuck', diff --git a/dist/flags.py b/dist/flags.py index 70602333ad5..21fd0756435 100644 --- a/dist/flags.py +++ b/dist/flags.py @@ -65,6 +65,7 @@ flags = { 'VERB_API', 'VERB_BLOCK', 'VERB_CHECKPOINT', + 'VERB_CHECKPOINT_PROGRESS', 'VERB_COMPACT', 'VERB_EVICT', 'VERB_EVICTSERVER', diff --git a/dist/package/debian/README.Debian b/dist/package/debian/README.Debian deleted file mode 100644 index 2028c0740dc..00000000000 --- a/dist/package/debian/README.Debian +++ /dev/null @@ -1,8 +0,0 @@ -wiredtiger for Debian ---------------------- - -This is a package of the WiredTiger database library for Debian based -systems. For more information on WiredTiger please visit: -http://www.wiredtiger.com or contact us at info@wiredtiger.com - - -- Alex <alexg@wiredtiger.com> Tue, 01 Apr 2014 15:50:02 +1100 diff --git a/dist/package/debian/README.source b/dist/package/debian/README.source deleted file mode 100644 index ddd6dc94c3d..00000000000 --- a/dist/package/debian/README.source +++ /dev/null @@ -1,9 +0,0 @@ -wiredtiger for Debian ---------------------- - -<this file describes information about the source package, see Debian policy -manual section 4.14. You WILL either need to modify or delete this file> - - - - diff --git a/dist/package/debian/changelog b/dist/package/debian/changelog deleted file mode 100644 index 1481a506d6d..00000000000 --- a/dist/package/debian/changelog +++ /dev/null @@ -1,5 +0,0 @@ -wiredtiger (2.1.2-1) UNRELEASED; urgency=low - - * Initial release of WiredTiger - - -- Alex <alexg@wiredtiger.com> Tue, 01 Apr 2014 15:50:02 +1100 diff --git a/dist/package/debian/compat b/dist/package/debian/compat deleted file mode 100644 index 45a4fb75db8..00000000000 --- a/dist/package/debian/compat +++ /dev/null @@ -1 +0,0 @@ -8 diff --git a/dist/package/debian/control b/dist/package/debian/control deleted file mode 100644 index 5ad2b71b4b9..00000000000 --- a/dist/package/debian/control +++ /dev/null @@ -1,36 +0,0 @@ -Source: wiredtiger -Priority: extra -Maintainer: Alex Gorrod <alexg@wiredtiger.com> -Build-Depends: debhelper (>= 8.0.0), autotools-dev -Standards-Version: 3.9.4 -Section: libs -Homepage: http://www.wiredtiger.com -#Vcs-Git: git://git.debian.org/collab-maint/wiredtiger.git -#Vcs-Browser: http://git.debian.org/?p=collab-maint/wiredtiger.git;a=summary - -Package: libwiredtiger-dev -Architecture: any -Section: libdevel -Priority: extra -Depends: ${misc:Depends} -Description: WiredTiger Database Libraries [development] - This is the development package which contains headers and static - libraries for the WiredTiger database library. - -Package: libwiredtiger -Architecture: any -Depends: ${shlibs:Depends}, - ${misc:Depends} -Description: WiredTiger Database Libraries [runtime] - This is the runtime package for programs that use the WiredTiger - database library. - -Package: wiredtiger-util -Architecture: any -Section: database -Priority: extra -Depends: ${shlibs:Depends}, - ${misc:Depends} -Description: WiredTiger Database Utilities - This package provides tools for manipulating WiredTiger databases - diff --git a/dist/package/debian/copyright b/dist/package/debian/copyright deleted file mode 100644 index 1394ad8b4bd..00000000000 --- a/dist/package/debian/copyright +++ /dev/null @@ -1,26 +0,0 @@ -Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ -Upstream-Name: wiredtiger -Source: <http://www.wiredtiger.com> - -Files: * -Copyright: (c) 2008-2014 WiredTiger, Inc. -License: - This program is free software: you can redistribute it and/or modify it under - the terms of either version 2 or version 3 of the GNU General Public License - as published by the Free Software Foundation. - . - On Debian GNU/Linux systems, the complete text of the GNU General - Public License can be found in `/usr/share/common-licenses/GPL-2' and - `/usr/share/common-licenses/GPL-3'. - . - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - details. - . - For a license to use the WiredTiger software under conditions other than those - described by the GNU General Public License, or for technical support for this - software, contact WiredTiger, Inc. at info@wiredtiger.com. - . - For further information, see the licensing section in the documentation. - diff --git a/dist/package/debian/docs b/dist/package/debian/docs deleted file mode 100644 index 50bd824bb7b..00000000000 --- a/dist/package/debian/docs +++ /dev/null @@ -1,2 +0,0 @@ -NEWS -README diff --git a/dist/package/debian/files b/dist/package/debian/files deleted file mode 100644 index 53662d0c48e..00000000000 --- a/dist/package/debian/files +++ /dev/null @@ -1,3 +0,0 @@ -libwiredtiger-dev_2.1.2-1_amd64.deb libdevel extra -libwiredtiger_2.1.2-1_amd64.deb libs extra -wiredtiger-util_2.1.2-1_amd64.deb database extra diff --git a/dist/package/debian/libwiredtiger-dev.dirs b/dist/package/debian/libwiredtiger-dev.dirs deleted file mode 100644 index da07fddd09b..00000000000 --- a/dist/package/debian/libwiredtiger-dev.dirs +++ /dev/null @@ -1,2 +0,0 @@ -usr/include -usr/lib diff --git a/dist/package/debian/libwiredtiger-dev.install b/dist/package/debian/libwiredtiger-dev.install deleted file mode 100644 index deb99408b27..00000000000 --- a/dist/package/debian/libwiredtiger-dev.install +++ /dev/null @@ -1,2 +0,0 @@ -usr/include/* -usr/lib/pkgconfig/* diff --git a/dist/package/debian/libwiredtiger-dev.substvars b/dist/package/debian/libwiredtiger-dev.substvars deleted file mode 100644 index abd3ebebc30..00000000000 --- a/dist/package/debian/libwiredtiger-dev.substvars +++ /dev/null @@ -1 +0,0 @@ -misc:Depends= diff --git a/dist/package/debian/libwiredtiger.dirs b/dist/package/debian/libwiredtiger.dirs deleted file mode 100644 index 68457717bd8..00000000000 --- a/dist/package/debian/libwiredtiger.dirs +++ /dev/null @@ -1 +0,0 @@ -usr/lib diff --git a/dist/package/debian/libwiredtiger.install b/dist/package/debian/libwiredtiger.install deleted file mode 100644 index 27fae7a0850..00000000000 --- a/dist/package/debian/libwiredtiger.install +++ /dev/null @@ -1,2 +0,0 @@ -usr/lib/libwiredtiger*.a -usr/lib/libwiredtiger*.so diff --git a/dist/package/debian/libwiredtiger.postinst.debhelper b/dist/package/debian/libwiredtiger.postinst.debhelper deleted file mode 100644 index 3d89d3ef629..00000000000 --- a/dist/package/debian/libwiredtiger.postinst.debhelper +++ /dev/null @@ -1,5 +0,0 @@ -# Automatically added by dh_makeshlibs -if [ "$1" = "configure" ]; then - ldconfig -fi -# End automatically added section diff --git a/dist/package/debian/libwiredtiger.postrm.debhelper b/dist/package/debian/libwiredtiger.postrm.debhelper deleted file mode 100644 index 7f44047270f..00000000000 --- a/dist/package/debian/libwiredtiger.postrm.debhelper +++ /dev/null @@ -1,5 +0,0 @@ -# Automatically added by dh_makeshlibs -if [ "$1" = "remove" ]; then - ldconfig -fi -# End automatically added section diff --git a/dist/package/debian/libwiredtiger.substvars b/dist/package/debian/libwiredtiger.substvars deleted file mode 100644 index 1e00e6fd7a6..00000000000 --- a/dist/package/debian/libwiredtiger.substvars +++ /dev/null @@ -1,2 +0,0 @@ -shlibs:Depends=libc6 (>= 2.14) -misc:Depends= diff --git a/dist/package/debian/rules b/dist/package/debian/rules deleted file mode 100755 index 312e24d2e6f..00000000000 --- a/dist/package/debian/rules +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/make -f -# -*- makefile -*- -# Sample debian/rules that uses debhelper. -# This file was originally written by Joey Hess and Craig Small. -# As a special exception, when this file is copied by dh-make into a -# dh-make output file, you may use that output file without restriction. -# This special exception was added by Craig Small in version 0.37 of dh-make. - -# Uncomment this to turn on verbose mode. -#export DH_VERBOSE=1 - -%: - dh $@ --with autotools-dev diff --git a/dist/package/debian/shlibs.local b/dist/package/debian/shlibs.local deleted file mode 100644 index a3b3face389..00000000000 --- a/dist/package/debian/shlibs.local +++ /dev/null @@ -1 +0,0 @@ -libwiredtiger 2.1.2 wiredtiger (>> 2.1.2-0), wiredtiger (<< 2.1.2-99) diff --git a/dist/package/debian/source/format b/dist/package/debian/source/format deleted file mode 100644 index 163aaf8d82b..00000000000 --- a/dist/package/debian/source/format +++ /dev/null @@ -1 +0,0 @@ -3.0 (quilt) diff --git a/dist/package/debian/watch b/dist/package/debian/watch deleted file mode 100644 index 9287dca3d91..00000000000 --- a/dist/package/debian/watch +++ /dev/null @@ -1,8 +0,0 @@ -# Watch control file for uscan -# to check for upstream updates and more. -# See uscan(1) for format - -# Compulsory line, this is a version 3 file -version=3 - -http://source.wiredtiger.com/releases/wiredtiger-(.*)\.tar\.bz2 diff --git a/dist/package/debian/wiredtiger-util.dirs b/dist/package/debian/wiredtiger-util.dirs deleted file mode 100644 index e7724817552..00000000000 --- a/dist/package/debian/wiredtiger-util.dirs +++ /dev/null @@ -1 +0,0 @@ -usr/bin diff --git a/dist/package/debian/wiredtiger-util.install b/dist/package/debian/wiredtiger-util.install deleted file mode 100644 index 1df36c612fb..00000000000 --- a/dist/package/debian/wiredtiger-util.install +++ /dev/null @@ -1 +0,0 @@ -usr/bin/* diff --git a/dist/package/debian/wiredtiger-util.substvars b/dist/package/debian/wiredtiger-util.substvars deleted file mode 100644 index 4dd9c7cf955..00000000000 --- a/dist/package/debian/wiredtiger-util.substvars +++ /dev/null @@ -1,2 +0,0 @@ -shlibs:Depends=libc6 (>= 2.14), wiredtiger (>> 2.1.2-0), wiredtiger (<< 2.1.2-99) -misc:Depends= diff --git a/dist/package/debian/wiredtiger.doc-base b/dist/package/debian/wiredtiger.doc-base deleted file mode 100644 index faa994f156f..00000000000 --- a/dist/package/debian/wiredtiger.doc-base +++ /dev/null @@ -1,12 +0,0 @@ -Document: wiredtiger -Title: Debian wiredtiger Manual -Author: WiredTiger, Inc. -Abstract: WiredTiger is a database storage engine library. -Section: library - -Format: postscript -Files: /usr/share/doc/wiredtiger/wiredtiger.ps.gz - -Format: HTML -Index: /usr/share/doc/wiredtiger/html/index.html -Files: /usr/share/doc/wiredtiger/html/*.html diff --git a/dist/package/wiredtiger.spec b/dist/package/wiredtiger.spec deleted file mode 100644 index f4cb78183d0..00000000000 --- a/dist/package/wiredtiger.spec +++ /dev/null @@ -1,58 +0,0 @@ -Name: wiredtiger -Version: 3.0.0 -Release: 1%{?dist} -Summary: WiredTiger data storage engine - -Group: Development/Libraries -License: GPLV2 or GPLV3 -URL: www.wiredtiger.com -Source0: http://source.wiredtiger.com/releases/%{name}-%{version}.tar.bz2 -BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX) - -BuildRequires: python-devel java-devel -Requires: jemalloc - -%description - -WiredTiger is a data storage engine that provides APIs for efficiently -storing data in highly concurrent applications. It includes functionality -for automatically maintaining indexes. It implements both row and column -store formats - so that all types of data can be stored space efficiently. - -WiredTiger is a library that can be accessed via C, Python and Java APIs. - - -%prep -%autosetup - - -%build -%configure --enable-java --enable-bzip2 --enable-snappy --enable-zlib -# Stop the build setting up an rpath -sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' libtool -sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool -make %{?_smp_mflags} - - -%install -rm -rf %{buildroot} -make install DESTDIR=%{buildroot} -# Need to resolve make install with --enable-python before we can -# install the python API. -# python setup.py install -O1 --skip-build --root $RPM_BUILD_ROOT - -%clean -rm -rf %{buildroot} - - -%files -%defattr(-,root,root,-) -%doc README LICENSE NEWS -%{_bindir}/* -%{_datadir}/* -%{_includedir}/* -%{_libdir}/* - - -%changelog - diff --git a/dist/s_define.list b/dist/s_define.list index b2f6cbec43e..dcaf975434f 100644 --- a/dist/s_define.list +++ b/dist/s_define.list @@ -58,7 +58,6 @@ WT_STAT_INCRV_BASE WT_STAT_WRITE WT_TIMEDIFF_US WT_TRET_ERROR_OK -WT_TXN_TIMESTAMP_FLAG_CHECK WT_UPDATE_SIZE WT_WITH_LOCK_NOWAIT WT_WITH_LOCK_WAIT diff --git a/dist/stat_data.py b/dist/stat_data.py index 24610b9ab14..64d3d46818b 100644 --- a/dist/stat_data.py +++ b/dist/stat_data.py @@ -257,6 +257,7 @@ connection_stats = [ CacheStat('cache_lookaside_entries', 'lookaside table entries', 'no_clear,no_scale'), CacheStat('cache_lookaside_insert', 'lookaside table insert calls'), CacheStat('cache_lookaside_remove', 'lookaside table remove calls'), + CacheStat('cache_lookaside_score', 'lookaside score', 'no_clear,no_scale'), CacheStat('cache_overhead', 'percentage overhead', 'no_clear,no_scale'), CacheStat('cache_pages_dirty', 'tracked dirty pages in the cache', 'no_clear,no_scale'), CacheStat('cache_pages_inuse', 'pages currently held in the cache', 'no_clear,no_scale'), diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index caa960d78ae..3df5920830c 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -733,6 +733,8 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref) WT_RET(ds->f(ds, ", entries %" PRIu32, entries)); WT_RET(ds->f(ds, ", %s", __wt_page_is_modified(page) ? "dirty" : "clean")); + WT_RET(ds->f(ds, + ", memory_size %" WT_SIZET_FMT, page->memory_footprint)); if (F_ISSET_ATOMIC(page, WT_PAGE_BUILD_KEYS)) WT_RET(ds->f(ds, ", keys-built")); @@ -1032,8 +1034,7 @@ __debug_modified(WT_DBG *ds, WT_UPDATE *upd) p = (size_t *)upd->data; memcpy(&nentries, p++, sizeof(size_t)); - data = upd->data + - sizeof(size_t) + ((size_t)nentries * 3 * sizeof(size_t)); + data = upd->data + sizeof(size_t) + (nentries * 3 * sizeof(size_t)); WT_RET(ds->f(ds, "%" WT_SIZET_FMT ": ", nentries)); for (; nentries-- > 0; data += data_size) { diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c index 838c6845b08..fe6be6517a2 100644 --- a/src/btree/bt_read.c +++ b/src/btree/bt_read.c @@ -88,7 +88,6 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id) uint32_t las_id, session_flags; const uint8_t *p; uint8_t upd_type; - int exact; cursor = NULL; page = ref->page; @@ -112,14 +111,9 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id) * in-order updates for a subsequent key. We process all of the updates * for a key and then insert those updates into the page, then all the * updates for the next key, and so on. - * - * Search for the block's unique prefix, stepping through any matching - * records. */ - cursor->set_key(cursor, - btree_id, ref->page_las->las_pageid, (uint64_t)0, &las_key); - if ((ret = cursor->search_near(cursor, &exact)) == 0 && exact < 0) - ret = cursor->next(cursor); + ret = __wt_las_cursor_position( + cursor, btree_id, ref->page_las->las_pageid); for (; ret == 0; ret = cursor->next(cursor)) { WT_ERR(cursor->get_key(cursor, &las_id, &las_pageid, &las_counter, &las_key)); @@ -436,6 +430,74 @@ err: /* } /* + * __las_page_skip -- + * Check if we can skip reading a page with lookaside entries. + */ +static inline bool +__las_page_skip(WT_SESSION_IMPL *session, WT_REF *ref) +{ + WT_TXN *txn; + bool skip; + + txn = &session->txn; + skip = false; + + if (!__wt_atomic_casv32(&ref->state, WT_REF_LOOKASIDE, WT_REF_LOCKED)) + return (false); + + /* + * Skip lookaside pages if reading without a timestamp and all the + * updates in lookaside are in the past. + * + * If we skip a lookaside page, the tree cannot be left clean: + * lookaside entries must be resolved before the tree can be discarded. + * + * Lookaside eviction preferentially chooses the newest updates when + * creating page image with no stable timestamp. If a stable timestamp + * has been set, we have to visit the page because eviction chooses old + * version of records in that case. + * + * One case where we may need to visit the page is if lookaside + * eviction is active in tree 2 when a checkpoint has started and is + * working its way through tree 1. In that case, lookaside may have + * created a page image with updates in the future of the checkpoint. + */ + if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT)) + goto done; + + if (WT_TXNID_LE(txn->snap_min, ref->page_las->las_max_txn)) + goto done; + + if (!F_ISSET(txn, WT_TXN_HAS_TS_READ) && + !ref->page_las->las_skew_oldest) { + skip = true; + goto done; + } + +#ifdef HAVE_TIMESTAMPS + /* + * Skip lookaside pages if reading as of a timestamp and all the + * updates are in the future. + */ + WT_ASSERT(session, + !F_ISSET(&session->txn, WT_TXN_HAS_TS_READ) || + __wt_timestamp_cmp(&ref->page_las->onpage_timestamp, + &session->txn.read_timestamp) <= 0); + + if (F_ISSET(&session->txn, WT_TXN_HAS_TS_READ) && + ref->page_las->las_skew_oldest && + __wt_timestamp_cmp( + &ref->page_las->min_timestamp, &session->txn.read_timestamp) > 0) { + skip = true; + goto done; + } +#endif + +done: WT_PUBLISH(ref->state, WT_REF_LOOKASIDE); + return (skip); +} + +/* * __wt_page_in_func -- * Acquire a hazard pointer to a page; if the page is not in-memory, * read it from the disk and build an in-memory version. @@ -472,29 +534,22 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags if (LF_ISSET(WT_READ_NO_EMPTY) && __wt_delete_page_skip(session, ref, false)) return (WT_NOTFOUND); - /* FALLTHROUGH */ - case WT_REF_DISK: + goto read; case WT_REF_LOOKASIDE: if (LF_ISSET(WT_READ_CACHE)) { - if (ref->state != WT_REF_LOOKASIDE || - !LF_ISSET(WT_READ_LOOKASIDE)) + if (!LF_ISSET(WT_READ_LOOKASIDE)) return (WT_NOTFOUND); -#ifdef HAVE_TIMESTAMPS - /* - * Skip lookaside pages if reading as of a - * timestamp and all the updates are in the - * future. - */ - if (F_ISSET( - &session->txn, WT_TXN_HAS_TS_READ) && - __wt_timestamp_cmp( - &ref->page_las->min_timestamp, - &session->txn.read_timestamp) > 0) + if (__las_page_skip(session, ref)) { + __wt_tree_modify_set(session); return (WT_NOTFOUND); -#endif + } } + goto read; + case WT_REF_DISK: + if (LF_ISSET(WT_READ_CACHE)) + return (WT_NOTFOUND); - /* +read: /* * The page isn't in memory, read it. If this thread is * allowed to do eviction work, check for space in the * cache. @@ -605,7 +660,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags continue; } - /* +skip_evict: /* * If we read the page and are configured to not trash * the cache, and no other thread has already used the * page, set the read generation so the page is evicted @@ -624,7 +679,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags __wt_cache_read_gen_new(session, page); } else if (!LF_ISSET(WT_READ_NO_GEN)) __wt_cache_read_gen_bump(session, page); -skip_evict: + /* * Check if we need an autocommit transaction. * Starting a transaction can trigger eviction, so skip diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 66f0478c542..dc699a6b23b 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -1383,11 +1383,11 @@ __split_multi_inmem( WT_DECL_RET; WT_PAGE *page; WT_SAVE_UPD *supd; - WT_UPDATE *prev_upd, *upd; + WT_UPDATE *upd; uint64_t recno; uint32_t i, slot; - WT_ASSERT(session, multi->las_pageid == 0); + WT_ASSERT(session, multi->page_las.las_pageid == 0); /* * In 04/2016, we removed column-store record numbers from the WT_PAGE @@ -1474,36 +1474,6 @@ __split_multi_inmem( break; WT_ILLEGAL_VALUE_ERR(session); } - - /* - * Discard the update used to create the on-page disk image. - * This is not just a performance issue: if the update used to - * create the value for this on-page disk image was a modify, - * and it was applied to the previous on-page value to - * determine a value to write to this disk image, that update - * cannot be applied to the new on-page value without risking - * corruption. - */ - if (supd->onpage_upd != NULL) { - for (prev_upd = upd; prev_upd != NULL && - prev_upd->next != supd->onpage_upd; - prev_upd = prev_upd->next) - ; - /* - * If the on-page update was in fact a tombstone, there - * will be no value on the page. Don't throw the - * tombstone away: we may need it to correctly resolve - * modifications. - */ - if (supd->onpage_upd->type == WT_UPDATE_DELETED && - prev_upd != NULL) - prev_upd = prev_upd->next; - if (prev_upd != NULL) { - __wt_update_obsolete_free( - session, page, prev_upd->next); - prev_upd->next = NULL; - } - } } /* @@ -1624,7 +1594,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session, * There can be an address or a disk image or both, but if there is * neither, there must be a backing lookaside page. */ - WT_ASSERT(session, multi->las_pageid != 0 || + WT_ASSERT(session, multi->page_las.las_pageid != 0 || multi->addr.addr != NULL || multi->disk_image != NULL); /* If closing the file, there better be an address. */ @@ -1664,7 +1634,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session, * WT_REF.state. Regardless of a backing address, WT_REF_LOOKASIDE * overrides WT_REF_DISK. */ - if (multi->las_pageid != 0) { + if (multi->page_las.las_pageid != 0) { /* * We should not have a disk image if we did lookaside * eviction. @@ -1672,11 +1642,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_ASSERT(session, multi->disk_image == NULL); WT_RET(__wt_calloc_one(session, &ref->page_las)); - ref->page_las->las_pageid = multi->las_pageid; -#ifdef HAVE_TIMESTAMPS - __wt_timestamp_set( - &ref->page_las->min_timestamp, &multi->las_min_timestamp); -#endif + *ref->page_las = multi->page_las; ref->state = WT_REF_LOOKASIDE; } diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c index 15d83169ea2..d15852af935 100644 --- a/src/btree/bt_sync.c +++ b/src/btree/bt_sync.c @@ -107,36 +107,6 @@ __sync_dup_walk( } /* - * __sync_evict_page -- - * Attempt to evict a page during a checkpoint walk. - */ -static int -__sync_evict_page(WT_SESSION_IMPL *session, WT_REF **walkp, uint32_t flags) -{ - WT_DECL_RET; - WT_REF *next, *to_evict; - - to_evict = *walkp; - next = NULL; - - /* - * Get the ref after the page we're trying to evicting. If the - * eviction is successful, the walk will continue from here. - */ - WT_RET(__sync_dup_walk(session, to_evict, flags, &next)); - WT_ERR(__wt_tree_walk(session, &next, flags)); - - WT_ERR(__wt_page_release_evict(session, to_evict)); - - /* Success: continue the walk at the next page. */ - *walkp = next; - return (0); - -err: WT_TRET(__wt_page_release(session, next, flags)); - return (ret); -} - -/* * __sync_file -- * Flush pages for a specific file. */ @@ -153,13 +123,13 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages; uint64_t oldest_id, saved_pinned_id; uint32_t flags; - bool evict_failed, skip_walk, timer; + bool timer, tried_eviction; conn = S2C(session); btree = S2BT(session); prev = walk = NULL; txn = &session->txn; - evict_failed = skip_walk = false; + tried_eviction = false; flags = WT_READ_CACHE | WT_READ_NO_GEN; internal_bytes = leaf_bytes = 0; @@ -266,12 +236,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) LF_SET(WT_READ_LOOKASIDE | WT_READ_WONT_NEED); for (;;) { - if (!skip_walk) { - WT_ERR(__sync_dup_walk( - session, walk, flags, &prev)); - WT_ERR(__wt_tree_walk(session, &walk, flags)); - } - skip_walk = false; + WT_ERR(__sync_dup_walk(session, walk, flags, &prev)); + WT_ERR(__wt_tree_walk(session, &walk, flags)); if (walk == NULL) break; @@ -317,29 +283,43 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) * visit. We want to avoid this code being too special * purpose, so try to reuse the ordinary eviction path. * - * If eviction succeeded, it steps to the next ref, so - * we have to skip the next walk. If eviction fails, - * remember so we don't retry it. + * Regardless of whether eviction succeeds or fails, + * the walk continues from the previous location. We + * remember whether we tried eviction, and don't try + * again. Even if eviction fails (the page may stay in + * cache clean but with history that cannot be + * discarded), that is not wasted effort because + * checkpoint doesn't need to write the page again. */ if (!WT_PAGE_IS_INTERNAL(page) && page->read_gen == WT_READGEN_WONT_NEED && - !evict_failed) { - if ((ret = __sync_evict_page( - session, &walk, flags)) == 0) { - evict_failed = false; - skip_walk = true; - } else { - walk = prev; - prev = NULL; - evict_failed = true; - } - WT_ERR_BUSY_OK(ret); + !tried_eviction) { + WT_ERR_BUSY_OK( + __wt_page_release_evict(session, walk)); + walk = prev; + prev = NULL; + tried_eviction = true; continue; } + tried_eviction = false; - evict_failed = false; WT_ERR(__wt_reconcile( session, walk, NULL, WT_REC_CHECKPOINT, NULL)); + + /* + * Update checkpoint IO tracking data if configured + * to log verbose progress messages. + */ + if (conn->ckpt_timer_start.tv_sec > 0) { + conn->ckpt_write_bytes += + page->memory_footprint; + ++conn->ckpt_write_pages; + + /* Periodically log checkpoint progress. */ + if (conn->ckpt_write_pages % 5000 == 0) + __wt_checkpoint_progress( + session, false); + } } break; case WT_SYNC_CLOSE: diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c index 13516d80c58..e2ebd38e82f 100644 --- a/src/cache/cache_las.c +++ b/src/cache/cache_las.c @@ -333,7 +333,7 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, insert_cnt = 0; btree_id = S2BT(session)->id; - las_pageid = multi->las_pageid = + las_pageid = multi->page_las.las_pageid = __wt_atomic_add64(&S2BT(session)->las_pageid, 1); /* @@ -437,6 +437,57 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, } /* + * __wt_las_cursor_position -- + * Position a lookaside cursor at the beginning of a block. + * + * There may be no block of lookaside entries if they have been removed by + * WT_CONNECTION::rollback_to_stable. + */ +int +__wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) +{ + WT_ITEM las_key; + uint64_t las_counter, las_pageid; + uint32_t las_id; + int exact; + + /* + * Because of the special visibility rules for lookaside, a new block + * can appear in between our search and the block of interest. Keep + * trying until we find it. + */ + for (;;) { + WT_CLEAR(las_key); + cursor->set_key(cursor, + btree_id, pageid, (uint64_t)0, &las_key); + WT_RET(cursor->search_near(cursor, &exact)); + if (exact < 0) { + WT_RET(cursor->next(cursor)); + + /* + * Because of the special visibility rules for + * lookaside, a new block can appear in between our + * search and the block of interest. Keep trying while + * we have a key lower that we expect. + * + * There may be no block of lookaside entries if they + * have been removed by + * WT_CONNECTION::rollback_to_stable. + */ + WT_RET(cursor->get_key(cursor, + &las_id, &las_pageid, &las_counter, &las_key)); + if (las_id < btree_id || (las_id == btree_id && + pageid != 0 && las_pageid < pageid)) + continue; + } + + return (0); + } + + /* NOTREACHED */ +} + +/* * __wt_las_remove_block -- * Remove all records matching a key prefix from the lookaside store. */ @@ -448,7 +499,6 @@ __wt_las_remove_block(WT_SESSION_IMPL *session, WT_ITEM las_key; uint64_t las_counter, las_pageid, remove_cnt; uint32_t las_id, session_flags; - int exact; bool local_cursor; remove_cnt = 0; @@ -464,10 +514,7 @@ __wt_las_remove_block(WT_SESSION_IMPL *session, * Search for the block's unique prefix and step through all matching * records, removing them. */ - las_key.size = 0; - cursor->set_key(cursor, btree_id, pageid, (uint64_t)0, &las_key); - if ((ret = cursor->search_near(cursor, &exact)) == 0 && exact < 0) - ret = cursor->next(cursor); + ret = __wt_las_cursor_position(cursor, btree_id, pageid); for (; ret == 0; ret = cursor->next(cursor)) { WT_ERR(cursor->get_key(cursor, &las_id, &las_pageid, &las_counter, &las_key)); diff --git a/src/config/config_def.c b/src/config/config_def.c index 4edd436712b..e7ead608672 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -177,14 +177,14 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { ",\"page_split_race\"]", NULL, 0 }, { "verbose", "list", - NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\"," - "\"handleops\",\"log\",\"lookaside_activity\",\"lsm\"," - "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\"," - "\"salvage\",\"shared_cache\",\"split\",\"temporary\"," - "\"thread_group\",\"timestamp\",\"transaction\",\"verify\"," - "\"version\",\"write\"]", + NULL, "choices=[\"api\",\"block\",\"checkpoint\"," + "\"checkpoint_progress\",\"compact\",\"evict\",\"evict_stuck\"," + "\"evictserver\",\"fileops\",\"handleops\",\"log\"," + "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\"," + "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," + "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"thread_group\",\"timestamp\"," + "\"transaction\",\"verify\",\"version\",\"write\"]", NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } }; @@ -832,14 +832,14 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { { "use_environment", "boolean", NULL, NULL, NULL, 0 }, { "use_environment_priv", "boolean", NULL, NULL, NULL, 0 }, { "verbose", "list", - NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\"," - "\"handleops\",\"log\",\"lookaside_activity\",\"lsm\"," - "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\"," - "\"salvage\",\"shared_cache\",\"split\",\"temporary\"," - "\"thread_group\",\"timestamp\",\"transaction\",\"verify\"," - "\"version\",\"write\"]", + NULL, "choices=[\"api\",\"block\",\"checkpoint\"," + "\"checkpoint_progress\",\"compact\",\"evict\",\"evict_stuck\"," + "\"evictserver\",\"fileops\",\"handleops\",\"log\"," + "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\"," + "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," + "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"thread_group\",\"timestamp\"," + "\"transaction\",\"verify\",\"version\",\"write\"]", NULL, 0 }, { "write_through", "list", NULL, "choices=[\"data\",\"log\"]", @@ -927,14 +927,14 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { { "use_environment", "boolean", NULL, NULL, NULL, 0 }, { "use_environment_priv", "boolean", NULL, NULL, NULL, 0 }, { "verbose", "list", - NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\"," - "\"handleops\",\"log\",\"lookaside_activity\",\"lsm\"," - "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\"," - "\"salvage\",\"shared_cache\",\"split\",\"temporary\"," - "\"thread_group\",\"timestamp\",\"transaction\",\"verify\"," - "\"version\",\"write\"]", + NULL, "choices=[\"api\",\"block\",\"checkpoint\"," + "\"checkpoint_progress\",\"compact\",\"evict\",\"evict_stuck\"," + "\"evictserver\",\"fileops\",\"handleops\",\"log\"," + "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\"," + "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," + "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"thread_group\",\"timestamp\"," + "\"transaction\",\"verify\",\"version\",\"write\"]", NULL, 0 }, { "version", "string", NULL, NULL, NULL, 0 }, { "write_through", "list", @@ -1017,14 +1017,14 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs, 2 }, { "verbose", "list", - NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\"," - "\"handleops\",\"log\",\"lookaside_activity\",\"lsm\"," - "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\"," - "\"salvage\",\"shared_cache\",\"split\",\"temporary\"," - "\"thread_group\",\"timestamp\",\"transaction\",\"verify\"," - "\"version\",\"write\"]", + NULL, "choices=[\"api\",\"block\",\"checkpoint\"," + "\"checkpoint_progress\",\"compact\",\"evict\",\"evict_stuck\"," + "\"evictserver\",\"fileops\",\"handleops\",\"log\"," + "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\"," + "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," + "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"thread_group\",\"timestamp\"," + "\"transaction\",\"verify\",\"version\",\"write\"]", NULL, 0 }, { "version", "string", NULL, NULL, NULL, 0 }, { "write_through", "list", @@ -1107,14 +1107,14 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { NULL, NULL, confchk_wiredtiger_open_transaction_sync_subconfigs, 2 }, { "verbose", "list", - NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\"," - "\"handleops\",\"log\",\"lookaside_activity\",\"lsm\"," - "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\"," - "\"salvage\",\"shared_cache\",\"split\",\"temporary\"," - "\"thread_group\",\"timestamp\",\"transaction\",\"verify\"," - "\"version\",\"write\"]", + NULL, "choices=[\"api\",\"block\",\"checkpoint\"," + "\"checkpoint_progress\",\"compact\",\"evict\",\"evict_stuck\"," + "\"evictserver\",\"fileops\",\"handleops\",\"log\"," + "\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\"," + "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," + "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"thread_group\",\"timestamp\"," + "\"transaction\",\"verify\",\"version\",\"write\"]", NULL, 0 }, { "write_through", "list", NULL, "choices=[\"data\",\"log\"]", diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index f7525f22787..5f77f27ee3f 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1808,6 +1808,7 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) { "api", WT_VERB_API }, { "block", WT_VERB_BLOCK }, { "checkpoint", WT_VERB_CHECKPOINT }, + { "checkpoint_progress",WT_VERB_CHECKPOINT_PROGRESS }, { "compact", WT_VERB_COMPACT }, { "evict", WT_VERB_EVICT }, { "evict_stuck", WT_VERB_EVICT_STUCK }, diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c index c83fb544982..007aa8757da 100644 --- a/src/conn/conn_cache.c +++ b/src/conn/conn_cache.c @@ -266,6 +266,19 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session) WT_STAT_SET(session, stats, cache_pages_dirty, cache->pages_dirty_intl + cache->pages_dirty_leaf); + WT_STAT_CONN_SET(session, cache_eviction_state, cache->flags); + WT_STAT_CONN_SET(session, + cache_eviction_aggressive_set, cache->evict_aggressive_score); + WT_STAT_CONN_SET(session, + cache_eviction_empty_score, cache->evict_empty_score); + WT_STAT_CONN_SET(session, + cache_lookaside_score, __wt_cache_lookaside_score(cache)); + + WT_STAT_CONN_SET(session, + cache_eviction_active_workers, conn->evict_threads.current_threads); + WT_STAT_CONN_SET(session, cache_eviction_stable_state_workers, + cache->evict_tune_workers_best); + /* * The number of files with active walks ~= number of hazard pointers * in the walk session. Note: reading without locking. diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c index 4475b27a7b8..afe29284d06 100644 --- a/src/conn/conn_cache_pool.c +++ b/src/conn/conn_cache_pool.c @@ -135,7 +135,7 @@ __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg) if (__wt_config_gets(session, &cfg[1], "shared_cache.size", &cval) == 0 && cval.val != 0) size = (uint64_t)cval.val; - else + else size = cp->size; if (__wt_config_gets(session, &cfg[1], "shared_cache.chunk", &cval) == 0 && cval.val != 0) diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index 2606c9d083b..7f55b1cc4fd 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -9,6 +9,81 @@ #include "wt_internal.h" /* + * __conn_dhandle_config_clear -- + * Clear the underlying object's configuration information. + */ +static void +__conn_dhandle_config_clear(WT_SESSION_IMPL *session) +{ + WT_DATA_HANDLE *dhandle; + const char **a; + + dhandle = session->dhandle; + + if (dhandle->cfg == NULL) + return; + for (a = dhandle->cfg; *a != NULL; ++a) + __wt_free(session, *a); + __wt_free(session, dhandle->cfg); +} + +/* + * __conn_dhandle_config_set -- + * Set up a btree handle's configuration information. + */ +static int +__conn_dhandle_config_set(WT_SESSION_IMPL *session) +{ + WT_DATA_HANDLE *dhandle; + WT_DECL_RET; + char *metaconf; + + dhandle = session->dhandle; + + /* + * Read the object's entry from the metadata file, we're done if we + * don't find one. + */ + if ((ret = + __wt_metadata_search(session, dhandle->name, &metaconf)) != 0) { + if (ret == WT_NOTFOUND) + ret = ENOENT; + WT_RET(ret); + } + + /* + * The defaults are included because persistent configuration + * information is stored in the metadata file and it may be from an + * earlier version of WiredTiger. If defaults are included in the + * configuration, we can add new configuration strings without + * upgrading the metadata file or writing special code in case a + * configuration string isn't initialized, as long as the new + * configuration string has an appropriate default value. + * + * The error handling is a little odd, but be careful: we're holding a + * chunk of allocated memory in metaconf. If we fail before we copy a + * reference to it into the object's configuration array, we must free + * it, after the copy, we don't want to free it. + */ + WT_ERR(__wt_calloc_def(session, 3, &dhandle->cfg)); + switch (dhandle->type) { + case WT_DHANDLE_TYPE_BTREE: + WT_ERR(__wt_strdup(session, + WT_CONFIG_BASE(session, file_meta), &dhandle->cfg[0])); + break; + case WT_DHANDLE_TYPE_TABLE: + WT_ERR(__wt_strdup(session, + WT_CONFIG_BASE(session, table_meta), &dhandle->cfg[0])); + break; + } + dhandle->cfg[1] = metaconf; + return (0); + +err: __wt_free(session, metaconf); + return (ret); +} + +/* * __conn_dhandle_destroy -- * Destroy a data handle. */ @@ -30,6 +105,7 @@ __conn_dhandle_destroy(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle) __wt_rwlock_destroy(session, &dhandle->rwlock); __wt_free(session, dhandle->name); __wt_free(session, dhandle->checkpoint); + __conn_dhandle_config_clear(session); __wt_spin_destroy(session, &dhandle->close_lock); __wt_stat_dsrc_discard(session, dhandle); __wt_overwrite_and_free(session, dhandle); @@ -316,81 +392,6 @@ err: __wt_spin_unlock(session, &dhandle->close_lock); } /* - * __conn_dhandle_config_clear -- - * Clear the underlying object's configuration information. - */ -static void -__conn_dhandle_config_clear(WT_SESSION_IMPL *session) -{ - WT_DATA_HANDLE *dhandle; - const char **a; - - dhandle = session->dhandle; - - if (dhandle->cfg == NULL) - return; - for (a = dhandle->cfg; *a != NULL; ++a) - __wt_free(session, *a); - __wt_free(session, dhandle->cfg); -} - -/* - * __conn_dhandle_config_set -- - * Set up a btree handle's configuration information. - */ -static int -__conn_dhandle_config_set(WT_SESSION_IMPL *session) -{ - WT_DATA_HANDLE *dhandle; - WT_DECL_RET; - char *metaconf; - - dhandle = session->dhandle; - - /* - * Read the object's entry from the metadata file, we're done if we - * don't find one. - */ - if ((ret = - __wt_metadata_search(session, dhandle->name, &metaconf)) != 0) { - if (ret == WT_NOTFOUND) - ret = ENOENT; - WT_RET(ret); - } - - /* - * The defaults are included because persistent configuration - * information is stored in the metadata file and it may be from an - * earlier version of WiredTiger. If defaults are included in the - * configuration, we can add new configuration strings without - * upgrading the metadata file or writing special code in case a - * configuration string isn't initialized, as long as the new - * configuration string has an appropriate default value. - * - * The error handling is a little odd, but be careful: we're holding a - * chunk of allocated memory in metaconf. If we fail before we copy a - * reference to it into the object's configuration array, we must free - * it, after the copy, we don't want to free it. - */ - WT_ERR(__wt_calloc_def(session, 3, &dhandle->cfg)); - switch (dhandle->type) { - case WT_DHANDLE_TYPE_BTREE: - WT_ERR(__wt_strdup(session, - WT_CONFIG_BASE(session, file_meta), &dhandle->cfg[0])); - break; - case WT_DHANDLE_TYPE_TABLE: - WT_ERR(__wt_strdup(session, - WT_CONFIG_BASE(session, table_meta), &dhandle->cfg[0])); - break; - } - dhandle->cfg[1] = metaconf; - return (0); - -err: __wt_free(session, metaconf); - return (ret); -} - -/* * __wt_conn_dhandle_open -- * Open the current data handle. */ @@ -746,7 +747,6 @@ __wt_conn_dhandle_discard_single( * After successfully removing the handle, clean it up. */ if (ret == 0 || final) { - __conn_dhandle_config_clear(session); WT_TRET(__conn_dhandle_destroy(session, dhandle)); session->dhandle = NULL; } diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 0205dbb08e3..02851492039 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -617,8 +617,6 @@ __evict_update_work(WT_SESSION_IMPL *session) F_CLR(cache, WT_CACHE_EVICT_CLEAN | WT_CACHE_EVICT_CLEAN_HARD); } - WT_STAT_CONN_SET(session, cache_eviction_state, cache->flags); - return (F_ISSET(cache, WT_CACHE_EVICT_ALL | WT_CACHE_EVICT_URGENT)); } @@ -727,9 +725,6 @@ __evict_pass(WT_SESSION_IMPL *session) txn_global->current != oldest_id && cache->evict_aggressive_score < 100) ++cache->evict_aggressive_score; - WT_STAT_CONN_SET(session, - cache_eviction_aggressive_set, - cache->evict_aggressive_score); prev = now; prev_oldest_id = oldest_id; } @@ -761,12 +756,8 @@ __evict_pass(WT_SESSION_IMPL *session) "%s", "unable to reach eviction goal"); break; } else { - if (cache->evict_aggressive_score > 0) { + if (cache->evict_aggressive_score > 0) --cache->evict_aggressive_score; - WT_STAT_CONN_SET(session, - cache_eviction_aggressive_set, - cache->evict_aggressive_score); - } loop = 0; eviction_progress = cache->eviction_progress; } @@ -983,8 +974,6 @@ __evict_tune_workers(WT_SESSION_IMPL *session) if (conn->evict_threads_max == conn->evict_threads_min) return; - eviction_progress_rate = 0; - __wt_epoch(session, ¤t_time); time_diff = WT_TIMEDIFF_MS(current_time, cache->evict_tune_last_time); @@ -1098,12 +1087,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) WT_STAT_CONN_INCR(session, cache_eviction_worker_removed); } - WT_STAT_CONN_SET(session, - cache_eviction_stable_state_workers, - cache->evict_tune_workers_best); cache->evict_tune_stable = true; - WT_STAT_CONN_SET(session, cache_eviction_active_workers, - conn->evict_threads.current_threads); goto done; } } @@ -1135,9 +1119,6 @@ __evict_tune_workers(WT_SESSION_IMPL *session) cache->evict_tune_last_action_time = current_time; } - WT_STAT_CONN_SET(session, cache_eviction_active_workers, - conn->evict_threads.current_threads); - done: cache->evict_tune_last_time = current_time; cache->evict_tune_progress_last = eviction_progress; } @@ -1187,11 +1168,8 @@ __evict_lru_walk(WT_SESSION_IMPL *session) cache = S2C(session)->cache; /* Age out the score of how much the queue has been empty recently. */ - if (cache->evict_empty_score > 0) { + if (cache->evict_empty_score > 0) --cache->evict_empty_score; - WT_STAT_CONN_SET(session, cache_eviction_empty_score, - cache->evict_empty_score); - } /* Fill the next queue (that isn't the urgent queue). */ queue = cache->evict_fill_queue; @@ -1221,14 +1199,10 @@ __evict_lru_walk(WT_SESSION_IMPL *session) */ if (__evict_queue_empty(queue, false)) { if (F_ISSET(cache, - WT_CACHE_EVICT_CLEAN_HARD | WT_CACHE_EVICT_DIRTY_HARD)) { + WT_CACHE_EVICT_CLEAN_HARD | WT_CACHE_EVICT_DIRTY_HARD)) cache->evict_empty_score = WT_MIN( cache->evict_empty_score + WT_EVICT_SCORE_BUMP, WT_EVICT_SCORE_MAX); - WT_STAT_CONN_SET(session, - cache_eviction_empty_score, - cache->evict_empty_score); - } WT_STAT_CONN_INCR(session, cache_eviction_queue_empty); } else WT_STAT_CONN_INCR(session, cache_eviction_queue_not_empty); @@ -1897,6 +1871,24 @@ __evict_walk_file(WT_SESSION_IMPL *session, F_ISSET(btree, WT_BTREE_LOOKASIDE)) goto fast; + /* + * If application threads are blocked on eviction of clean + * pages, and the only thing preventing a clean leaf page from + * being evicted is it contains historical data, mark it dirty + * so we can do lookaside eviction. We also mark the tree + * dirty to avoid an assertion that we don't discard dirty + * pages from a clean tree. + */ + if (F_ISSET(cache, WT_CACHE_EVICT_CLEAN_HARD) && + !F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE) && + !WT_PAGE_IS_INTERNAL(page) && + !modified && page->modify != NULL && + !__wt_txn_visible_all(session, page->modify->rec_max_txn, + WT_TIMESTAMP_NULL(&page->modify->rec_max_timestamp))) { + __wt_page_modify_set(session, page); + goto fast; + } + /* Skip clean pages if appropriate. */ if (!modified && !F_ISSET(cache, WT_CACHE_EVICT_CLEAN)) continue; diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index edf80ec4460..103c93a075b 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -364,16 +364,11 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) * re-instantiate the page in memory, else discard the page. */ if (mod->mod_disk_image == NULL) { - if (mod->mod_replace_las_pageid != 0) { + if (mod->mod_page_las.las_pageid != 0) { WT_RET( __wt_calloc_one(session, &ref->page_las)); - ref->page_las->las_pageid = - mod->mod_replace_las_pageid; -#ifdef HAVE_TIMESTAMPS - __wt_timestamp_set( - &ref->page_las->min_timestamp, - &mod->mod_replace_las_min_timestamp); -#endif + *ref->page_las = mod->mod_page_las; + __wt_page_modify_clear(session, ref->page); __wt_ref_out(session, ref); WT_PUBLISH(ref->state, WT_REF_LOOKASIDE); } else { @@ -567,7 +562,7 @@ __evict_review( if (F_ISSET(conn, WT_CONN_IN_MEMORY)) LF_SET(WT_REC_IN_MEMORY | WT_REC_SCRUB | WT_REC_UPDATE_RESTORE); - else { + else if (!WT_IS_METADATA(session->dhandle)) { if (!WT_SESSION_IS_CHECKPOINT(session)) { LF_SET(WT_REC_UPDATE_RESTORE); @@ -576,18 +571,13 @@ __evict_review( } /* - * If the cache is nearly stuck, check if - * reconciliation suggests trying the lookaside table - * unless lookaside eviction is disabled globally. - * - * We don't wait until the cache is completely stuck: - * for workloads where lookaside eviction is necessary - * to make progress, we don't want a single successful - * page eviction to make the cache "unstuck" so we have - * to wait again before evicting the next page. + * If the cache is under pressure with many updates + * that can't be evicted, check if reconciliation + * suggests trying the lookaside table. */ - if (__wt_cache_nearly_stuck(session) && - !F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE)) + if (!F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE) && + (__wt_cache_lookaside_score(cache) > 50 || + __wt_cache_stuck(session))) lookaside_retryp = &lookaside_retry; } } @@ -596,11 +586,21 @@ __evict_review( ret = __wt_reconcile(session, ref, NULL, flags, lookaside_retryp); /* - * If reconciliation fails, eviction is stuck and reconciliation - * reports it might succeed if we use the lookaside table, then - * configure reconciliation to write those updates to the lookaside - * table, allowing the eviction of pages we'd otherwise have to retain - * in cache to support older readers. + * If attempting eviction in service of a checkpoint, we may + * successfully reconcile but then find that there are updates on the + * page too new to evict. Give up evicting in that case: checkpoint + * will include the reconciled page when it visits the parent. + */ + if (WT_SESSION_IS_CHECKPOINT(session) && !__wt_page_is_modified(page) && + !__wt_txn_visible_all(session, page->modify->rec_max_txn, + WT_TIMESTAMP_NULL(&page->modify->rec_max_timestamp))) + return (EBUSY); + + /* + * If reconciliation fails but reports it might succeed if we use the + * lookaside table, try again with the lookaside table, allowing the + * eviction of pages we'd otherwise have to retain in cache to support + * older readers. */ if (ret == EBUSY && lookaside_retry) { LF_CLR(WT_REC_SCRUB | WT_REC_UPDATE_RESTORE); @@ -611,29 +611,16 @@ __evict_review( WT_RET(ret); /* - * If attempting eviction in service of a checkpoint, we may - * successfully reconcile but then find that there are updates on the - * page too new to evict. Give up in that case: checkpoint will - * reconcile the page normally. - */ - if (WT_SESSION_IS_CHECKPOINT(session) && !__wt_page_is_modified(page) && - !LF_ISSET(WT_REC_LOOKASIDE) && - !__wt_txn_visible_all(session, page->modify->rec_max_txn, - WT_TIMESTAMP_NULL(&page->modify->rec_max_timestamp))) - return (EBUSY); - - /* * Success: assert the page is clean or reconciliation was configured * for update/restore. If the page is clean, assert that reconciliation * was configured for a lookaside table, or it's not a durable object * (currently the lookaside table), or all page updates were globally * visible. */ - WT_ASSERT(session, - !__wt_page_is_modified(page) || LF_ISSET(WT_REC_UPDATE_RESTORE)); + WT_ASSERT(session, !__wt_page_is_modified(page) || + LF_ISSET(WT_REC_LOOKASIDE | WT_REC_UPDATE_RESTORE)); WT_ASSERT(session, __wt_page_is_modified(page) || - LF_ISSET(WT_REC_LOOKASIDE) || __wt_txn_visible_all(session, page->modify->rec_max_txn, WT_TIMESTAMP_NULL(&page->modify->rec_max_timestamp))); diff --git a/src/include/api.h b/src/include/api.h index e1c3ba9ff3c..aa080d2bcca 100644 --- a/src/include/api.h +++ b/src/include/api.h @@ -39,15 +39,10 @@ } while (0) /* An API call wrapped in a transaction if necessary. */ -#ifdef HAVE_TIMESTAMPS -#define WT_TXN_TIMESTAMP_FLAG_CHECK(s) __wt_txn_timestamp_flags((s)) -#else -#define WT_TXN_TIMESTAMP_FLAG_CHECK(s) -#endif #define TXN_API_CALL(s, h, n, bt, config, cfg) do { \ bool __autotxn = false; \ API_CALL(s, h, n, bt, config, cfg); \ - WT_TXN_TIMESTAMP_FLAG_CHECK(s); \ + __wt_txn_timestamp_flags(s); \ __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\ if (__autotxn) \ F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT) @@ -56,7 +51,7 @@ #define TXN_API_CALL_NOCONF(s, h, n, dh) do { \ bool __autotxn = false; \ API_CALL_NOCONF(s, h, n, dh); \ - WT_TXN_TIMESTAMP_FLAG_CHECK(s); \ + __wt_txn_timestamp_flags(s); \ __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\ if (__autotxn) \ F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT) diff --git a/src/include/btmem.h b/src/include/btmem.h index 158fcf87d29..c3646a2ae59 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -186,6 +186,19 @@ struct __wt_ovfl_reuse { ",value_format=" WT_UNCHECKED_STRING(QuBu) /* + * WT_PAGE_LOOKASIDE -- + * Related information for on-disk pages with lookaside entries. + */ +struct __wt_page_lookaside { + uint64_t las_pageid; /* Page ID in lookaside */ + uint64_t las_max_txn; /* Maximum transaction ID in + lookaside */ + WT_DECL_TIMESTAMP(min_timestamp) /* Min timestamp in lookaside */ + WT_DECL_TIMESTAMP(onpage_timestamp) /* Max timestamp on page */ + bool las_skew_oldest; /* On-page skewed to oldest */ +}; + +/* * WT_PAGE_MODIFY -- * When a page is modified, there's additional information to maintain. */ @@ -241,17 +254,14 @@ struct __wt_page_modify { void *disk_image; /* The page has lookaside entries. */ - uint64_t las_pageid; - WT_DECL_TIMESTAMP(las_min_timestamp) + WT_PAGE_LOOKASIDE page_las; } r; #undef mod_replace #define mod_replace u1.r.replace #undef mod_disk_image #define mod_disk_image u1.r.disk_image -#undef mod_replace_las_pageid -#define mod_replace_las_pageid u1.r.las_pageid -#undef mod_replace_las_min_timestamp -#define mod_replace_las_min_timestamp u1.r.las_min_timestamp +#undef mod_page_las +#define mod_page_las u1.r.page_las struct { /* Multiple replacement blocks */ struct __wt_multi { @@ -297,8 +307,7 @@ struct __wt_page_modify { uint32_t size; uint32_t checksum; - uint64_t las_pageid; - WT_DECL_TIMESTAMP(las_min_timestamp) + WT_PAGE_LOOKASIDE page_las; } *multi; uint32_t multi_entries; /* Multiple blocks element count */ } m; @@ -721,16 +730,6 @@ struct __wt_page_deleted { }; /* - * WT_PAGE_LOOKASIDE -- - * Related information for on-disk pages with lookaside entries. - */ -struct __wt_page_lookaside { - uint64_t las_pageid; /* Page ID in lookaside */ - WT_DECL_TIMESTAMP(min_timestamp) /* Oldest timestamp in - lookaside for the page */ -}; - -/* * WT_REF -- * A single in-memory page and the state information used to determine if * it's OK to dereference the pointer to the page. diff --git a/src/include/cache.h b/src/include/cache.h index 456cb0382e4..0a42853b95b 100644 --- a/src/include/cache.h +++ b/src/include/cache.h @@ -152,20 +152,28 @@ struct __wt_cache { #define WT_EVICT_SCORE_BUMP 10 #define WT_EVICT_SCORE_CUTOFF 10 #define WT_EVICT_SCORE_MAX 100 - uint32_t evict_aggressive_score;/* Score of how aggressive eviction - should be about selecting eviction - candidates. If eviction is - struggling to make progress, this - score rises (up to a maximum of - 100), at which point the cache is - "stuck" and transaction will be - rolled back. */ - uint32_t evict_empty_score; /* Score of how often LRU queues are - empty on refill. This score varies - between 0 (if the queue hasn't been - empty for a long time) and 100 (if - the queue has been empty the last 10 - times we filled up. */ + /* + * Score of how aggressive eviction should be about selecting eviction + * candidates. If eviction is struggling to make progress, this score + * rises (up to a maximum of 100), at which point the cache is "stuck" + * and transaction will be rolled back. + */ + uint32_t evict_aggressive_score; + + /* + * Score of how often LRU queues are empty on refill. This score varies + * between 0 (if the queue hasn't been empty for a long time) and 100 + * (if the queue has been empty the last 10 times we filled up. + */ + uint32_t evict_empty_score; + + /* + * Score of how much pressure storing historical versions is having on + * eviction. This score varies between 0, if reconciliation always + * sees updates that are globally visible and hence can be discarded, + * to 100 if no updates are globally visible. + */ + int32_t evict_lookaside_score; /* * Cache pool information. diff --git a/src/include/cache.i b/src/include/cache.i index 33b1bf2a7af..e160dbf4d64 100644 --- a/src/include/cache.i +++ b/src/include/cache.i @@ -79,22 +79,6 @@ __wt_cache_read_gen_new(WT_SESSION_IMPL *session, WT_PAGE *page) } /* - * __wt_cache_nearly_stuck -- - * Indicate if the cache is nearly stuck. - */ -static inline bool -__wt_cache_nearly_stuck(WT_SESSION_IMPL *session) -{ - WT_CACHE *cache; - - cache = S2C(session)->cache; - return (cache->evict_aggressive_score >= - (WT_EVICT_SCORE_MAX - WT_EVICT_SCORE_BUMP) && - F_ISSET(cache, - WT_CACHE_EVICT_CLEAN_HARD | WT_CACHE_EVICT_DIRTY_HARD)); -} - -/* * __wt_cache_stuck -- * Indicate if the cache is stuck (i.e., not making progress). */ @@ -205,6 +189,43 @@ __wt_cache_bytes_other(WT_CACHE *cache) } /* + * __wt_cache_lookaside_score -- + * Get the current lookaside score (between 0 and 100). + */ +static inline uint32_t +__wt_cache_lookaside_score(WT_CACHE *cache) +{ + int32_t global_score; + + global_score = cache->evict_lookaside_score; + return ((uint32_t)WT_MIN(WT_MAX(global_score, 0), 100)); +} + +/* + * __wt_cache_update_lookaside_score -- + * Update the lookaside score based how many unstable updates are seen. + */ +static inline void +__wt_cache_update_lookaside_score( + WT_SESSION_IMPL *session, u_int updates_seen, u_int updates_unstable) +{ + WT_CACHE *cache; + int32_t global_score, score; + + if (updates_seen == 0) + return; + + cache = S2C(session)->cache; + score = (int32_t)((100 * updates_unstable) / updates_seen); + global_score = cache->evict_lookaside_score; + + if (score > global_score && global_score < 100) + __wt_atomic_addi32(&cache->evict_lookaside_score, 1); + else if (score < global_score && global_score > 0) + __wt_atomic_subi32(&cache->evict_lookaside_score, 1); +} + +/* * __wt_session_can_wait -- * Return if a session available for a potentially slow operation. */ diff --git a/src/include/connection.h b/src/include/connection.h index 2fa440e4e08..c1d1921bdcc 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -268,6 +268,15 @@ struct __wt_connection_impl { uint64_t ckpt_time_recent; /* Checkpoint time recent/total */ uint64_t ckpt_time_total; + /* Checkpoint stats and verbosity timers */ + struct timespec ckpt_timer_start; + struct timespec ckpt_timer_scrub_end; + + /* Checkpoint progress message data */ + uint64_t ckpt_progress_msg_count; + uint64_t ckpt_write_bytes; + uint64_t ckpt_write_pages; + uint32_t stat_flags; /* Options declared in flags.py */ /* Connection statistics */ diff --git a/src/include/extern.h b/src/include/extern.h index fc0b5135882..bbe66abf753 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -207,6 +207,7 @@ extern int __wt_las_cursor_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRI extern void __wt_las_cursor( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags); extern int __wt_las_cursor_close( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_las_insert_block(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CURSOR *cursor, WT_MULTI *multi, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_las_remove_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern uint32_t __wt_checksum_sw(const void *chunk, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_checksum_init(void); @@ -791,6 +792,7 @@ extern int __wt_txn_global_shutdown(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_A extern int __wt_verbose_dump_txn_one(WT_SESSION_IMPL *session, WT_TXN *txn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_verbose_dump_txn(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_checkpoint_progress(WT_SESSION_IMPL *session, bool closing); extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[], bool waiting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/include/flags.h b/src/include/flags.h index dd98234f9e2..23be5fd2e14 100644 --- a/src/include/flags.h +++ b/src/include/flags.h @@ -93,34 +93,35 @@ #define WT_VERB_API 0x00000001 #define WT_VERB_BLOCK 0x00000002 #define WT_VERB_CHECKPOINT 0x00000004 -#define WT_VERB_COMPACT 0x00000008 -#define WT_VERB_EVICT 0x00000010 -#define WT_VERB_EVICTSERVER 0x00000020 -#define WT_VERB_EVICT_STUCK 0x00000040 -#define WT_VERB_FILEOPS 0x00000080 -#define WT_VERB_HANDLEOPS 0x00000100 -#define WT_VERB_LOG 0x00000200 -#define WT_VERB_LOOKASIDE 0x00000400 -#define WT_VERB_LSM 0x00000800 -#define WT_VERB_LSM_MANAGER 0x00001000 -#define WT_VERB_METADATA 0x00002000 -#define WT_VERB_MUTEX 0x00004000 -#define WT_VERB_OVERFLOW 0x00008000 -#define WT_VERB_READ 0x00010000 -#define WT_VERB_REBALANCE 0x00020000 -#define WT_VERB_RECONCILE 0x00040000 -#define WT_VERB_RECOVERY 0x00080000 -#define WT_VERB_RECOVERY_PROGRESS 0x00100000 -#define WT_VERB_SALVAGE 0x00200000 -#define WT_VERB_SHARED_CACHE 0x00400000 -#define WT_VERB_SPLIT 0x00800000 -#define WT_VERB_TEMPORARY 0x01000000 -#define WT_VERB_THREAD_GROUP 0x02000000 -#define WT_VERB_TIMESTAMP 0x04000000 -#define WT_VERB_TRANSACTION 0x08000000 -#define WT_VERB_VERIFY 0x10000000 -#define WT_VERB_VERSION 0x20000000 -#define WT_VERB_WRITE 0x40000000 +#define WT_VERB_CHECKPOINT_PROGRESS 0x00000008 +#define WT_VERB_COMPACT 0x00000010 +#define WT_VERB_EVICT 0x00000020 +#define WT_VERB_EVICTSERVER 0x00000040 +#define WT_VERB_EVICT_STUCK 0x00000080 +#define WT_VERB_FILEOPS 0x00000100 +#define WT_VERB_HANDLEOPS 0x00000200 +#define WT_VERB_LOG 0x00000400 +#define WT_VERB_LOOKASIDE 0x00000800 +#define WT_VERB_LSM 0x00001000 +#define WT_VERB_LSM_MANAGER 0x00002000 +#define WT_VERB_METADATA 0x00004000 +#define WT_VERB_MUTEX 0x00008000 +#define WT_VERB_OVERFLOW 0x00010000 +#define WT_VERB_READ 0x00020000 +#define WT_VERB_REBALANCE 0x00040000 +#define WT_VERB_RECONCILE 0x00080000 +#define WT_VERB_RECOVERY 0x00100000 +#define WT_VERB_RECOVERY_PROGRESS 0x00200000 +#define WT_VERB_SALVAGE 0x00400000 +#define WT_VERB_SHARED_CACHE 0x00800000 +#define WT_VERB_SPLIT 0x01000000 +#define WT_VERB_TEMPORARY 0x02000000 +#define WT_VERB_THREAD_GROUP 0x04000000 +#define WT_VERB_TIMESTAMP 0x08000000 +#define WT_VERB_TRANSACTION 0x10000000 +#define WT_VERB_VERIFY 0x20000000 +#define WT_VERB_VERSION 0x40000000 +#define WT_VERB_WRITE 0x80000000 /* * flags section: END * DO NOT EDIT: automatically built by dist/flags.py. diff --git a/src/include/lint.h b/src/include/lint.h index 97b91c4c061..c02e25592c0 100644 --- a/src/include/lint.h +++ b/src/include/lint.h @@ -84,8 +84,8 @@ __wt_atomic_cas_ptr(void *vp, void *orig, void *new) { return (false); } -static inline void WT_BARRIER(void) { return; } -static inline void WT_FULL_BARRIER(void) { return; } -static inline void WT_PAUSE(void) { return; } -static inline void WT_READ_BARRIER(void) { return; } -static inline void WT_WRITE_BARRIER(void) { return; } +static inline void WT_BARRIER(void) { } +static inline void WT_FULL_BARRIER(void) { } +static inline void WT_PAUSE(void) { } +static inline void WT_READ_BARRIER(void) { } +static inline void WT_WRITE_BARRIER(void) { } diff --git a/src/include/misc.h b/src/include/misc.h index e7b3e1931ac..2435d37ee20 100644 --- a/src/include/misc.h +++ b/src/include/misc.h @@ -275,8 +275,8 @@ typedef struct __wt_timestamp_t wt_timestamp_t; #define WT_TIMESTAMP_NULL(x) (x) #else typedef void wt_timestamp_t; -#define WT_TIMESTAMP_NULL(x) (NULL) #define WT_DECL_TIMESTAMP(x) +#define WT_TIMESTAMP_NULL(x) (NULL) #endif /* diff --git a/src/include/stat.h b/src/include/stat.h index 922b211bec4..12a7d532496 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -341,6 +341,7 @@ struct __wt_connection_stats { int64_t cache_eviction_internal; int64_t cache_eviction_split_internal; int64_t cache_eviction_split_leaf; + int64_t cache_lookaside_score; int64_t cache_lookaside_entries; int64_t cache_lookaside_insert; int64_t cache_lookaside_remove; diff --git a/src/include/txn.i b/src/include/txn.i index b0b71dbb3d0..9e70632d890 100644 --- a/src/include/txn.i +++ b/src/include/txn.i @@ -82,7 +82,9 @@ __wt_timestamp_set_zero(wt_timestamp_t *ts) { ts->val = 0; } -#else + +#else /* WT_TIMESTAMP_SIZE != 8 */ + #define WT_WITH_TIMESTAMP_READLOCK(s, l, e) do { \ __wt_readlock((s), (l)); \ e; \ @@ -141,6 +143,16 @@ __wt_timestamp_set_zero(wt_timestamp_t *ts) memset(ts->ts, 0x00, WT_TIMESTAMP_SIZE); } #endif /* WT_TIMESTAMP_SIZE == 8 */ + +#else /* !HAVE_TIMESTAMPS */ + +#define __wt_timestamp_set(dest, src) +#define __wt_timestamp_set_inf(ts) +#define __wt_timestamp_set_zero(ts) +#define __wt_txn_clear_commit_timestamp(session) +#define __wt_txn_clear_read_timestamp(session) +#define __wt_txn_timestamp_flags(session) + #endif /* HAVE_TIMESTAMPS */ /* diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index b9801aedfbb..41dd970d3ba 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -2201,11 +2201,11 @@ struct __wt_connection { * if WiredTiger is configured with --enable-verbose. Options are given * as a list\, such as <code>"verbose=[evictserver\,read]"</code>., a * list\, with values chosen from the following options: \c "api"\, \c - * "block"\, \c "checkpoint"\, \c "compact"\, \c "evict"\, \c - * "evict_stuck"\, \c "evictserver"\, \c "fileops"\, \c "handleops"\, \c - * "log"\, \c "lookaside_activity"\, \c "lsm"\, \c "lsm_manager"\, \c - * "metadata"\, \c "mutex"\, \c "overflow"\, \c "read"\, \c - * "rebalance"\, \c "reconcile"\, \c "recovery"\, \c + * "block"\, \c "checkpoint"\, \c "checkpoint_progress"\, \c "compact"\, + * \c "evict"\, \c "evict_stuck"\, \c "evictserver"\, \c "fileops"\, \c + * "handleops"\, \c "log"\, \c "lookaside_activity"\, \c "lsm"\, \c + * "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c + * "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c * "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c "split"\, * \c "temporary"\, \c "thread_group"\, \c "timestamp"\, \c * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default @@ -2841,9 +2841,9 @@ struct __wt_connection { * WiredTiger is configured with --enable-verbose. Options are given as a * list\, such as <code>"verbose=[evictserver\,read]"</code>., a list\, with * values chosen from the following options: \c "api"\, \c "block"\, \c - * "checkpoint"\, \c "compact"\, \c "evict"\, \c "evict_stuck"\, \c - * "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\, \c - * "lookaside_activity"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c + * "checkpoint"\, \c "checkpoint_progress"\, \c "compact"\, \c "evict"\, \c + * "evict_stuck"\, \c "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\, + * \c "lookaside_activity"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c * "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c * "recovery"\, \c "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c * "split"\, \c "temporary"\, \c "thread_group"\, \c "timestamp"\, \c @@ -4854,456 +4854,458 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1082 /*! cache: leaf pages split during eviction */ #define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1083 +/*! cache: lookaside score */ +#define WT_STAT_CONN_CACHE_LOOKASIDE_SCORE 1084 /*! cache: lookaside table entries */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_ENTRIES 1084 +#define WT_STAT_CONN_CACHE_LOOKASIDE_ENTRIES 1085 /*! cache: lookaside table insert calls */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1085 +#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1086 /*! cache: lookaside table remove calls */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1086 +#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1087 /*! cache: maximum bytes configured */ -#define WT_STAT_CONN_CACHE_BYTES_MAX 1087 +#define WT_STAT_CONN_CACHE_BYTES_MAX 1088 /*! cache: maximum page size at eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1088 +#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1089 /*! cache: modified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1089 +#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1090 /*! cache: modified pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1090 +#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1091 /*! cache: overflow pages read into cache */ -#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1091 +#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1092 /*! cache: page split during eviction deepened the tree */ -#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1092 +#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1093 /*! cache: page written requiring lookaside records */ -#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1093 +#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1094 /*! cache: pages currently held in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_INUSE 1094 +#define WT_STAT_CONN_CACHE_PAGES_INUSE 1095 /*! cache: pages evicted because they exceeded the in-memory maximum count */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1095 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1096 /*! * cache: pages evicted because they exceeded the in-memory maximum time * (usecs) */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_TIME 1096 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_TIME 1097 /*! cache: pages evicted because they had chains of deleted items count */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1097 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1098 /*! * cache: pages evicted because they had chains of deleted items time * (usecs) */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE_TIME 1098 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE_TIME 1099 /*! cache: pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP 1099 +#define WT_STAT_CONN_CACHE_EVICTION_APP 1100 /*! cache: pages queued for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1100 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1101 /*! cache: pages queued for urgent eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1101 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1102 /*! cache: pages queued for urgent eviction during walk */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1102 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1103 /*! cache: pages read into cache */ -#define WT_STAT_CONN_CACHE_READ 1103 +#define WT_STAT_CONN_CACHE_READ 1104 /*! cache: pages read into cache requiring lookaside entries */ -#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1104 +#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1105 /*! cache: pages requested from the cache */ -#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1105 +#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1106 /*! cache: pages seen by eviction walk */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1106 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1107 /*! cache: pages selected for eviction unable to be evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1107 +#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1108 /*! cache: pages walked for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_WALK 1108 +#define WT_STAT_CONN_CACHE_EVICTION_WALK 1109 /*! cache: pages written from cache */ -#define WT_STAT_CONN_CACHE_WRITE 1109 +#define WT_STAT_CONN_CACHE_WRITE 1110 /*! cache: pages written requiring in-memory restoration */ -#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1110 +#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1111 /*! cache: percentage overhead */ -#define WT_STAT_CONN_CACHE_OVERHEAD 1111 +#define WT_STAT_CONN_CACHE_OVERHEAD 1112 /*! cache: tracked bytes belonging to internal pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1112 +#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1113 /*! cache: tracked bytes belonging to leaf pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_LEAF 1113 +#define WT_STAT_CONN_CACHE_BYTES_LEAF 1114 /*! cache: tracked dirty bytes in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1114 +#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1115 /*! cache: tracked dirty pages in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1115 +#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1116 /*! cache: unmodified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1116 +#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1117 /*! connection: auto adjusting condition resets */ -#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1117 +#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1118 /*! connection: auto adjusting condition wait calls */ -#define WT_STAT_CONN_COND_AUTO_WAIT 1118 +#define WT_STAT_CONN_COND_AUTO_WAIT 1119 /*! connection: detected system time went backwards */ -#define WT_STAT_CONN_TIME_TRAVEL 1119 +#define WT_STAT_CONN_TIME_TRAVEL 1120 /*! connection: files currently open */ -#define WT_STAT_CONN_FILE_OPEN 1120 +#define WT_STAT_CONN_FILE_OPEN 1121 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1121 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1122 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1122 +#define WT_STAT_CONN_MEMORY_FREE 1123 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1123 +#define WT_STAT_CONN_MEMORY_GROW 1124 /*! connection: pthread mutex condition wait calls */ -#define WT_STAT_CONN_COND_WAIT 1124 +#define WT_STAT_CONN_COND_WAIT 1125 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1125 +#define WT_STAT_CONN_RWLOCK_READ 1126 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1126 +#define WT_STAT_CONN_RWLOCK_WRITE 1127 /*! connection: total fsync I/Os */ -#define WT_STAT_CONN_FSYNC_IO 1127 +#define WT_STAT_CONN_FSYNC_IO 1128 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1128 +#define WT_STAT_CONN_READ_IO 1129 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1129 +#define WT_STAT_CONN_WRITE_IO 1130 /*! cursor: cursor create calls */ -#define WT_STAT_CONN_CURSOR_CREATE 1130 +#define WT_STAT_CONN_CURSOR_CREATE 1131 /*! cursor: cursor insert calls */ -#define WT_STAT_CONN_CURSOR_INSERT 1131 +#define WT_STAT_CONN_CURSOR_INSERT 1132 /*! cursor: cursor modify calls */ -#define WT_STAT_CONN_CURSOR_MODIFY 1132 +#define WT_STAT_CONN_CURSOR_MODIFY 1133 /*! cursor: cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT 1133 +#define WT_STAT_CONN_CURSOR_NEXT 1134 /*! cursor: cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV 1134 +#define WT_STAT_CONN_CURSOR_PREV 1135 /*! cursor: cursor remove calls */ -#define WT_STAT_CONN_CURSOR_REMOVE 1135 +#define WT_STAT_CONN_CURSOR_REMOVE 1136 /*! cursor: cursor reserve calls */ -#define WT_STAT_CONN_CURSOR_RESERVE 1136 +#define WT_STAT_CONN_CURSOR_RESERVE 1137 /*! cursor: cursor reset calls */ -#define WT_STAT_CONN_CURSOR_RESET 1137 +#define WT_STAT_CONN_CURSOR_RESET 1138 /*! cursor: cursor restarted searches */ -#define WT_STAT_CONN_CURSOR_RESTART 1138 +#define WT_STAT_CONN_CURSOR_RESTART 1139 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1139 +#define WT_STAT_CONN_CURSOR_SEARCH 1140 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1140 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1141 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1141 +#define WT_STAT_CONN_CURSOR_UPDATE 1142 /*! cursor: truncate calls */ -#define WT_STAT_CONN_CURSOR_TRUNCATE 1142 +#define WT_STAT_CONN_CURSOR_TRUNCATE 1143 /*! data-handle: connection data handles currently active */ -#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1143 +#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1144 /*! data-handle: connection sweep candidate became referenced */ -#define WT_STAT_CONN_DH_SWEEP_REF 1144 +#define WT_STAT_CONN_DH_SWEEP_REF 1145 /*! data-handle: connection sweep dhandles closed */ -#define WT_STAT_CONN_DH_SWEEP_CLOSE 1145 +#define WT_STAT_CONN_DH_SWEEP_CLOSE 1146 /*! data-handle: connection sweep dhandles removed from hash list */ -#define WT_STAT_CONN_DH_SWEEP_REMOVE 1146 +#define WT_STAT_CONN_DH_SWEEP_REMOVE 1147 /*! data-handle: connection sweep time-of-death sets */ -#define WT_STAT_CONN_DH_SWEEP_TOD 1147 +#define WT_STAT_CONN_DH_SWEEP_TOD 1148 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_SWEEPS 1148 +#define WT_STAT_CONN_DH_SWEEPS 1149 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1149 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1150 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1150 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1151 /*! lock: checkpoint lock acquisitions */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1151 +#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1152 /*! lock: checkpoint lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1152 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1153 /*! lock: checkpoint lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1153 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1154 /*! * lock: dhandle lock application thread time waiting for the dhandle * lock (usecs) */ -#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1154 +#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1155 /*! * lock: dhandle lock internal thread time waiting for the dhandle lock * (usecs) */ -#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1155 +#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1156 /*! lock: dhandle read lock acquisitions */ -#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1156 +#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1157 /*! lock: dhandle write lock acquisitions */ -#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1157 +#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1158 /*! lock: metadata lock acquisitions */ -#define WT_STAT_CONN_LOCK_METADATA_COUNT 1158 +#define WT_STAT_CONN_LOCK_METADATA_COUNT 1159 /*! lock: metadata lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1159 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1160 /*! lock: metadata lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1160 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1161 /*! lock: schema lock acquisitions */ -#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1161 +#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1162 /*! lock: schema lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1162 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1163 /*! lock: schema lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1163 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1164 /*! * lock: table lock application thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1164 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1165 /*! * lock: table lock internal thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1165 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1166 /*! lock: table read lock acquisitions */ -#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1166 +#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1167 /*! lock: table write lock acquisitions */ -#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1167 +#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1168 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1168 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1169 /*! log: force checkpoint calls slept */ -#define WT_STAT_CONN_LOG_FORCE_CKPT_SLEEP 1169 +#define WT_STAT_CONN_LOG_FORCE_CKPT_SLEEP 1170 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1170 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1171 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1171 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1172 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1172 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1173 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1173 +#define WT_STAT_CONN_LOG_FLUSH 1174 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1174 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1175 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1175 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1176 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1176 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1177 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1177 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1178 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1178 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1179 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1179 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1180 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1180 +#define WT_STAT_CONN_LOG_SCANS 1181 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1181 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1182 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1182 +#define WT_STAT_CONN_LOG_WRITE_LSN 1183 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1183 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1184 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1184 +#define WT_STAT_CONN_LOG_SYNC 1185 /*! log: log sync time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DURATION 1185 +#define WT_STAT_CONN_LOG_SYNC_DURATION 1186 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1186 +#define WT_STAT_CONN_LOG_SYNC_DIR 1187 /*! log: log sync_dir time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1187 +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1188 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1188 +#define WT_STAT_CONN_LOG_WRITES 1189 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1189 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1190 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1190 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1191 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1191 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1192 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1192 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1193 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1193 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1194 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1194 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1195 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1195 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1196 /*! log: slot close lost race */ -#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1196 +#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1197 /*! log: slot close unbuffered waits */ -#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1197 +#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1198 /*! log: slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1198 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1199 /*! log: slot join atomic update races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1199 +#define WT_STAT_CONN_LOG_SLOT_RACES 1200 /*! log: slot join calls atomic updates raced */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1200 +#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1201 /*! log: slot join calls did not yield */ -#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1201 +#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1202 /*! log: slot join calls found active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1202 +#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1203 /*! log: slot join calls slept */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1203 +#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1204 /*! log: slot join calls yielded */ -#define WT_STAT_CONN_LOG_SLOT_YIELD 1204 +#define WT_STAT_CONN_LOG_SLOT_YIELD 1205 /*! log: slot join found active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1205 +#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1206 /*! log: slot joins yield time (usecs) */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1206 +#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1207 /*! log: slot transitions unable to find free slot */ -#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1207 +#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1208 /*! log: slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1208 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1209 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1209 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1210 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1210 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1211 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1211 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1212 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1212 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1213 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1213 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1214 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1214 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1215 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1215 +#define WT_STAT_CONN_REC_PAGES 1216 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1216 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1217 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1217 +#define WT_STAT_CONN_REC_PAGE_DELETE 1218 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1218 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1219 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1219 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1220 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1220 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1221 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1221 +#define WT_STAT_CONN_SESSION_OPEN 1222 /*! session: table alter failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1222 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1223 /*! session: table alter successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1223 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1224 /*! session: table alter unchanged and skipped */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1224 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1225 /*! session: table compact failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1225 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1226 /*! session: table compact successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1226 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1227 /*! session: table create failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1227 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1228 /*! session: table create successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1228 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1229 /*! session: table drop failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1229 +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1230 /*! session: table drop successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1230 +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1231 /*! session: table rebalance failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1231 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1232 /*! session: table rebalance successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1232 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1233 /*! session: table rename failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1233 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1234 /*! session: table rename successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1234 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1235 /*! session: table salvage failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1235 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1236 /*! session: table salvage successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1236 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1237 /*! session: table truncate failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1237 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1238 /*! session: table truncate successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1238 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1239 /*! session: table verify failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1239 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1240 /*! session: table verify successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1240 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1241 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1241 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1242 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_THREAD_READ_ACTIVE 1242 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1243 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1243 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1244 /*! thread-yield: application thread time evicting (usecs) */ -#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1244 +#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1245 /*! thread-yield: application thread time waiting for cache (usecs) */ -#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1245 +#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1246 /*! * thread-yield: connection close blocked waiting for transaction state * stabilization */ -#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1246 +#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1247 /*! thread-yield: connection close yielded for lsm manager shutdown */ -#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1247 +#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1248 /*! thread-yield: data handle lock yielded */ -#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1248 +#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1249 /*! * thread-yield: get reference for page index and slot time sleeping * (usecs) */ -#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1249 +#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1250 /*! thread-yield: log server sync yielded for log write */ -#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1250 +#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1251 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1251 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1252 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1252 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1253 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1253 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1254 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1254 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1255 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1255 +#define WT_STAT_CONN_PAGE_SLEEP 1256 /*! * thread-yield: page delete rollback time sleeping for state change * (usecs) */ -#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1256 +#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1257 /*! thread-yield: page reconciliation yielded due to child modification */ -#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1257 +#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1258 /*! * thread-yield: tree descend one level yielded for split page index * update */ -#define WT_STAT_CONN_TREE_DESCEND_BLOCKED 1258 +#define WT_STAT_CONN_TREE_DESCEND_BLOCKED 1259 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1259 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1260 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1260 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1261 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1261 +#define WT_STAT_CONN_TXN_BEGIN 1262 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1262 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1263 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1263 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1264 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1264 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1265 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1265 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1266 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1266 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1267 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1267 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1268 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1268 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1269 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1269 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1270 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1270 +#define WT_STAT_CONN_TXN_CHECKPOINT 1271 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1271 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1272 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1272 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1273 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1273 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1274 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1274 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1275 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1275 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1276 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1276 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1277 /*! * transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1277 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1278 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1278 +#define WT_STAT_CONN_TXN_SYNC 1279 /*! transaction: transactions commit timestamp queue inserts to head */ -#define WT_STAT_CONN_TXN_COMMIT_QUEUE_HEAD 1279 +#define WT_STAT_CONN_TXN_COMMIT_QUEUE_HEAD 1280 /*! transaction: transactions commit timestamp queue inserts total */ -#define WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS 1280 +#define WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS 1281 /*! transaction: transactions commit timestamp queue length */ -#define WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN 1281 +#define WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN 1282 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1282 +#define WT_STAT_CONN_TXN_COMMIT 1283 /*! transaction: transactions read timestamp queue inserts to head */ -#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1283 +#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1284 /*! transaction: transactions read timestamp queue inserts total */ -#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1284 +#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1285 /*! transaction: transactions read timestamp queue length */ -#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1285 +#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1286 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1286 +#define WT_STAT_CONN_TXN_ROLLBACK 1287 /*! transaction: update conflicts */ -#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1287 +#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1288 /*! * @} diff --git a/src/log/log.c b/src/log/log.c index 0b01b61ced3..4c32b9b02ed 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -1914,11 +1914,6 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, if (func == NULL) return (0); - if (LF_ISSET(WT_LOGSCAN_RECOVER)) - __wt_verbose(session, WT_VERB_LOG, - "__wt_log_scan truncating to %" PRIu32 "/%" PRIu32, - log->trunc_lsn.l.file, log->trunc_lsn.l.offset); - if (lsnp != NULL && LF_ISSET(WT_LOGSCAN_FIRST|WT_LOGSCAN_FROM_CKP)) WT_RET_MSG(session, WT_ERROR, @@ -2042,8 +2037,13 @@ advance: /* * Truncate this log file before we move to the next. */ - if (LF_ISSET(WT_LOGSCAN_RECOVER)) + if (LF_ISSET(WT_LOGSCAN_RECOVER) && + __wt_log_cmp(&rd_lsn, &log->trunc_lsn) < 0) { + __wt_verbose(session, WT_VERB_LOG, + "Truncate end of log %" PRIu32 "/%" PRIu32, + rd_lsn.l.file, rd_lsn.l.offset); WT_ERR(__log_truncate(session, &rd_lsn, true)); + } /* * If we had a partial record, we'll want to break * now after closing and truncating. Although for now @@ -2228,7 +2228,7 @@ advance: if (LF_ISSET(WT_LOGSCAN_RECOVER) && __wt_log_cmp(&rd_lsn, &log->trunc_lsn) < 0) { __wt_verbose(session, WT_VERB_LOG, - "__wt_log_scan truncating to %" PRIu32 "/%" PRIu32, + "End of recovery truncate end of log %" PRIu32 "/%" PRIu32, rd_lsn.l.file, rd_lsn.l.offset); WT_ERR(__log_truncate(session, &rd_lsn, false)); } diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index 95d025247a6..6195726ec67 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -1280,7 +1280,14 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) } else break; } + + /* + * Periodically check if we've timed out or eviction is stuck. + * Quit if eviction is stuck, we're making the problem worse. + */ WT_ERR(__wt_session_compact_check_timeout(session)); + if (__wt_cache_stuck(session)) + WT_ERR(EBUSY); __wt_sleep(1, 0); /* diff --git a/src/meta/meta_track.c b/src/meta/meta_track.c index 0757b96f587..0ad9a2aa429 100644 --- a/src/meta/meta_track.c +++ b/src/meta/meta_track.c @@ -167,12 +167,18 @@ __meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk) static int __meta_track_unroll(WT_SESSION_IMPL *session, WT_META_TRACK *trk) { + WT_BM *bm; + WT_BTREE *btree; WT_DECL_RET; switch (trk->op) { case WT_ST_EMPTY: /* Unused slot */ break; case WT_ST_CHECKPOINT: /* Checkpoint, see above */ + btree = trk->dhandle->handle; + bm = btree->bm; + WT_WITH_DHANDLE(session, trk->dhandle, + ret = bm->checkpoint_resolve(bm, session, true)); break; case WT_ST_DROP_COMMIT: break; @@ -233,6 +239,9 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll) WT_DECL_RET; WT_META_TRACK *trk, *trk_orig; WT_SESSION_IMPL *ckpt_session; + int saved_ret; + + saved_ret = 0; WT_ASSERT(session, WT_META_TRACKING(session) && session->meta_track_nest > 0); @@ -255,12 +264,9 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll) if (trk == trk_orig) return (0); - if (unroll) { - while (--trk >= trk_orig) - WT_TRET(__meta_track_unroll(session, trk)); - /* Unroll operations don't need to flush the metadata. */ - return (ret); - } + /* Unrolling doesn't require syncing the metadata. */ + if (unroll) + goto done; /* * If we don't have the metadata cursor (e.g, we're in the process of @@ -271,13 +277,12 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll) goto done; /* If we're logging, make sure the metadata update was flushed. */ - if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED)) { + if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED)) WT_WITH_DHANDLE(session, WT_SESSION_META_DHANDLE(session), ret = __wt_txn_checkpoint_log( - session, false, WT_TXN_LOG_CKPT_SYNC, NULL)); - WT_RET(ret); - } else { + session, false, WT_TXN_LOG_CKPT_SYNC, NULL)); + else { WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)); ckpt_session = S2C(session)->meta_ckpt_session; /* @@ -288,21 +293,32 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll) F_SET(ckpt_session, WT_SESSION_LOCKED_METADATA); WT_WITH_METADATA_LOCK(session, WT_WITH_DHANDLE(ckpt_session, - WT_SESSION_META_DHANDLE(session), - ret = __wt_checkpoint(ckpt_session, NULL))); + WT_SESSION_META_DHANDLE(session), + ret = __wt_checkpoint(ckpt_session, NULL))); F_CLR(ckpt_session, WT_SESSION_LOCKED_METADATA); ckpt_session->txn.id = WT_TXN_NONE; - WT_RET(ret); - WT_WITH_DHANDLE(session, - WT_SESSION_META_DHANDLE(session), - ret = __wt_checkpoint_sync(session, NULL)); - WT_RET(ret); + if (ret == 0) + WT_WITH_DHANDLE(session, + WT_SESSION_META_DHANDLE(session), + ret = __wt_checkpoint_sync(session, NULL)); } -done: /* Apply any tracked operations post-commit. */ - for (; trk_orig < trk; trk_orig++) - WT_TRET(__meta_track_apply(session, trk_orig)); - return (ret); +done: /* + * Undo any tracked operations on failure. + * Apply any tracked operations post-commit. + */ + if (unroll || ret != 0) { + saved_ret = ret; + ret = 0; + while (--trk >= trk_orig) + WT_TRET(__meta_track_unroll(session, trk)); + } else + for (; trk_orig < trk; trk_orig++) + WT_TRET(__meta_track_apply(session, trk_orig)); + if (ret != 0) + WT_PANIC_RET(session, ret, + "failed to apply or unroll all tracked operations"); + return (saved_ret == 0 ? 0 : saved_ret); } /* diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 108d9cf15f9..3e857fef324 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -39,17 +39,21 @@ typedef struct { uint64_t orig_txn_checkpoint_gen; /* - * Track the oldest running transaction and the stable timestamp when - * reconciliation starts. + * Track the oldest running transaction and whether to skew lookaside + * to the newest or oldest update. */ + bool las_skew_oldest; uint64_t last_running; - WT_DECL_TIMESTAMP(stable_timestamp) /* Track the page's min/maximum transactions. */ uint64_t max_txn; WT_DECL_TIMESTAMP(max_timestamp) + WT_DECL_TIMESTAMP(max_onpage_timestamp) WT_DECL_TIMESTAMP(min_saved_timestamp) + u_int updates_seen; /* Count of updates seen. */ + u_int updates_unstable; /* Count of updates not visible_all. */ + bool update_uncommitted; /* An update was uncommitted */ bool update_used; /* An update could be used */ @@ -378,9 +382,16 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, * Otherwise we would need to keep updates in memory that go back older * than the version in the disk image, and since modify operations * aren't idempotent, that is problematic. + * + * If we try to do eviction using transaction visibility, we had better + * have a snapshot. This doesn't apply to checkpoints: there are + * (rare) cases where we write data at read-uncommitted isolation. */ WT_ASSERT(session, !LF_ISSET(WT_REC_UPDATE_RESTORE) || LF_ISSET(WT_REC_VISIBLE_ALL)); + WT_ASSERT(session, !LF_ISSET(WT_REC_EVICT) || + LF_ISSET(WT_REC_VISIBLE_ALL) || + F_ISSET(&session->txn, WT_TXN_HAS_SNAPSHOT)); /* We shouldn't get called with a clean page, that's an error. */ WT_ASSERT(session, __wt_page_is_modified(page)); @@ -443,6 +454,15 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_ILLEGAL_VALUE_SET(session); } + /* + * Update the global lookaside score. Only use observations during + * eviction, not checkpoints and don't count eviction of the lookaside + * table itself. + */ + if (F_ISSET(r, WT_REC_EVICT) && !F_ISSET(btree, WT_BTREE_LOOKASIDE)) + __wt_cache_update_lookaside_score( + session, r->updates_seen, r->updates_unstable); + /* Check for a successful reconciliation. */ WT_TRET(__rec_write_check_complete(session, r, ret, lookaside_retryp)); @@ -675,16 +695,14 @@ __rec_write_page_status(WT_SESSION_IMPL *session, WT_RECONCILE *r) */ WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT) || - F_ISSET(r, WT_REC_UPDATE_RESTORE)); + F_ISSET(r, WT_REC_LOOKASIDE | WT_REC_UPDATE_RESTORE)); } else { /* * Track the page's maximum transaction ID (used to decide if * we're likely to be able to evict this page in the future). */ mod->rec_max_txn = r->max_txn; -#ifdef HAVE_TIMESTAMPS __wt_timestamp_set(&mod->rec_max_timestamp, &r->max_timestamp); -#endif /* * Track the tree's maximum transaction ID (used to decide if @@ -912,12 +930,12 @@ __rec_init(WT_SESSION_IMPL *session, * uncommitted. */ txn_global = &S2C(session)->txn_global; + if (__wt_btree_immediately_durable(session)) + r->las_skew_oldest = false; + else + WT_ORDERED_READ(r->las_skew_oldest, + txn_global->has_stable_timestamp); WT_ORDERED_READ(r->last_running, txn_global->last_running); -#ifdef HAVE_TIMESTAMPS - WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock, - __wt_timestamp_set( - &r->stable_timestamp, &txn_global->stable_timestamp)); -#endif /* * When operating on the lookaside table, we should never try @@ -957,12 +975,12 @@ __rec_init(WT_SESSION_IMPL *session, /* Track the page's min/maximum transaction */ r->max_txn = WT_TXN_NONE; -#ifdef HAVE_TIMESTAMPS __wt_timestamp_set_zero(&r->max_timestamp); + __wt_timestamp_set_zero(&r->max_onpage_timestamp); __wt_timestamp_set_inf(&r->min_saved_timestamp); -#endif /* Track if updates were used and/or uncommitted. */ + r->updates_seen = r->updates_unstable = 0; r->update_uncommitted = r->update_used = false; /* Track if the page can be marked clean. */ @@ -1248,6 +1266,9 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, if ((txnid = upd->txnid) == WT_TXN_ABORTED) continue; + ++r->updates_seen; + upd_memsize += WT_UPDATE_MEMSIZE(upd); + /* * Track the first update in the chain that is not aborted and * the maximum transaction ID. @@ -1266,10 +1287,20 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, * concurrent transaction commits or rolls back while we are * examining its updates. */ - if (WT_TXNID_LE(r->last_running, txnid)) + if (F_ISSET(r, WT_REC_EVICT) && + (F_ISSET(r, WT_REC_VISIBLE_ALL) ? + WT_TXNID_LE(r->last_running, txnid) : + !__txn_visible_id(session, txnid))) { uncommitted = r->update_uncommitted = true; + continue; + } - upd_memsize += WT_UPDATE_MEMSIZE(upd); +#ifdef HAVE_TIMESTAMPS + /* Track the first update with non-zero timestamp. */ + if (first_ts_upd == NULL && + !__wt_timestamp_iszero(&upd->timestamp)) + first_ts_upd = upd; +#endif /* * Find the first update we can use. @@ -1278,10 +1309,27 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, * uncommitted updates). Lookaside eviction can save any * committed update. Regular eviction checks that the maximum * transaction ID and timestamp seen are stable. + * + * Lookaside eviction tries to choose the same version as a + * subsequent checkpoint, so that checkpoint can skip over + * pages with lookaside entries. If the application has + * supplied a stable timestamp, we assume (a) that it is old, + * and (b) that the next checkpoint will use it, so we wait to + * see a stable update. If there is no stable timestamp, we + * assume the next checkpoint will write the most recent + * version (but we save enough information that checkpoint can + * fix things up if we choose an update that is too new). */ + if (*updp == NULL && F_ISSET(r, WT_REC_LOOKASIDE) && + F_ISSET(r, WT_REC_VISIBLE_ALL) && !r->las_skew_oldest) + *updp = upd; + if (F_ISSET(r, WT_REC_VISIBLE_ALL) ? !__wt_txn_upd_visible_all(session, upd) : !__wt_txn_upd_visible(session, upd)) { + if (F_ISSET(r, WT_REC_EVICT)) + ++r->updates_unstable; + /* * Rare case: when applications run at low isolation * levels, update/restore eviction may see a stable @@ -1291,21 +1339,21 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, * discard an uncommitted update. */ if (F_ISSET(r, WT_REC_UPDATE_RESTORE) && - *updp != NULL && uncommitted) + *updp != NULL && uncommitted) { + r->leave_dirty = true; return (EBUSY); + } continue; } + /* + * Lookaside without stable timestamp was taken care of above + * (set to the first uncommitted transaction. Lookaside with + * stable timestamp always takes the first stable update. + */ if (*updp == NULL) *updp = upd; - -#ifdef HAVE_TIMESTAMPS - /* Track the first update with non-zero timestamp. */ - if (first_ts_upd == NULL && - !__wt_timestamp_iszero(&upd->timestamp)) - first_ts_upd = upd; -#endif } /* Reconciliation should never see an aborted or reserved update. */ @@ -1360,9 +1408,9 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, #else timestampp = NULL; #endif - all_visible = *updp == first_txn_upd && + all_visible = *updp == first_txn_upd && !uncommitted && (F_ISSET(r, WT_REC_VISIBLE_ALL) ? - !uncommitted && __wt_txn_visible_all(session, max_txn, timestampp) : + __wt_txn_visible_all(session, max_txn, timestampp) : __wt_txn_visible(session, max_txn, timestampp)); if (all_visible) @@ -1371,8 +1419,8 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, if (F_ISSET(r, WT_REC_VISIBILITY_ERR)) WT_PANIC_RET(session, EINVAL, "reconciliation error, update not visible"); - if (!F_ISSET(r, WT_REC_LOOKASIDE)) - r->leave_dirty = true; + + r->leave_dirty = true; /* * If not trying to evict the page, we know what we'll write and we're @@ -1409,16 +1457,21 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, #ifdef HAVE_TIMESTAMPS /* Track the oldest saved timestamp for lookaside. */ - if (first_ts_upd == NULL) - __wt_timestamp_set_zero(&r->min_saved_timestamp); - else if (F_ISSET(r, WT_REC_LOOKASIDE)) - for (upd = first_upd; upd != NULL; upd = upd->next) + if (F_ISSET(r, WT_REC_LOOKASIDE)) { + /* If no updates had timestamps, we're done. */ + if (first_ts_upd == NULL) + __wt_timestamp_set_zero(&r->min_saved_timestamp); + for (upd = first_upd; upd != *updp; upd = upd->next) { if (upd->txnid != WT_TXN_ABORTED && - upd->txnid != WT_TXN_NONE && - __wt_timestamp_cmp( - &upd->timestamp, &r->min_saved_timestamp) < 0) - __wt_timestamp_set( - &r->min_saved_timestamp, &upd->timestamp); + __wt_timestamp_cmp(&upd->timestamp, + &r->min_saved_timestamp) < 0) + __wt_timestamp_set(&r->min_saved_timestamp, + &upd->timestamp); + + WT_ASSERT(session, upd->txnid == WT_TXN_ABORTED || + WT_TXNID_LE(upd->txnid, r->max_txn)); + } + } #endif check_original_value: @@ -1431,16 +1484,24 @@ check_original_value: /* * Returning an update means the original on-page value might be lost, * and that's a problem if there's a reader that needs it. There are - * two cases: any lookaside table eviction (because the backing disk - * image is rewritten), or any reconciliation of a backing overflow - * record that will be physically removed once it's no longer needed. - */ - if (*updp != NULL && (F_ISSET(r, WT_REC_LOOKASIDE) || - (vpack != NULL && + * three cases: any update from a modify operation (because the modify + * has to be applied to a stable update, not the new on-page update), + * any lookaside table eviction (because the backing disk image is + * rewritten), or any reconciliation of a backing overflow record that + * will be physically removed once it's no longer needed. + */ + if (*updp != NULL && ((*updp)->type == WT_UPDATE_MODIFIED || + F_ISSET(r, WT_REC_LOOKASIDE) || (vpack != NULL && vpack->ovfl && vpack->raw != WT_CELL_VALUE_OVFL_RM))) WT_RET( __rec_append_orig_value(session, page, first_upd, vpack)); +#ifdef HAVE_TIMESTAMPS + if ((upd = *updp) != NULL && + __wt_timestamp_cmp(&upd->timestamp, &r->max_onpage_timestamp) > 0) + __wt_timestamp_set(&r->max_onpage_timestamp, &upd->timestamp); +#endif + return (0); } @@ -3231,7 +3292,7 @@ __rec_split_write_supd(WT_SESSION_IMPL *session, WT_RET(__rec_supd_move(session, multi, r->supd, r->supd_next)); r->supd_next = 0; r->supd_memsize = 0; - return (0); + goto done; } /* @@ -3291,6 +3352,17 @@ __rec_split_write_supd(WT_SESSION_IMPL *session, r->supd_next = j; } +done: /* Track the oldest timestamp seen so far. */ + multi->page_las.las_skew_oldest = r->las_skew_oldest; + multi->page_las.las_max_txn = r->max_txn; + WT_ASSERT(session, r->max_txn != WT_TXN_NONE); +#ifdef HAVE_TIMESTAMPS + __wt_timestamp_set( + &multi->page_las.min_timestamp, &r->min_saved_timestamp); + __wt_timestamp_set( + &multi->page_las.onpage_timestamp, &r->max_onpage_timestamp); +#endif + err: __wt_scr_free(session, &key); return (ret); } @@ -5859,11 +5931,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) r->multi->addr.addr = NULL; mod->mod_disk_image = r->multi->disk_image; r->multi->disk_image = NULL; - mod->mod_replace_las_pageid = r->multi->las_pageid; -#ifdef HAVE_TIMESTAMPS - __wt_timestamp_set(&mod->mod_replace_las_min_timestamp, - &r->min_saved_timestamp); -#endif + mod->mod_page_las = r->multi->page_las; } else WT_RET(__wt_bt_write(session, r->wrapup_checkpoint, NULL, NULL, true, F_ISSET(r, WT_REC_CHECKPOINT), @@ -6008,9 +6076,9 @@ __rec_las_wrapup_err(WT_SESSION_IMPL *session, WT_RECONCILE *r) * flags if lookaside table entries for this page have been written. */ for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i) - if (multi->supd != NULL && multi->las_pageid != 0) - WT_TRET(__wt_las_remove_block( - session, NULL, btree_id, multi->las_pageid)); + if (multi->supd != NULL && multi->page_las.las_pageid != 0) + WT_TRET(__wt_las_remove_block(session, NULL, + btree_id, multi->page_las.las_pageid)); return (ret); } diff --git a/src/support/stat.c b/src/support/stat.c index 57dcd33c7f1..924afaa21d6 100644 --- a/src/support/stat.c +++ b/src/support/stat.c @@ -809,6 +809,7 @@ static const char * const __stats_connection_desc[] = { "cache: internal pages evicted", "cache: internal pages split during eviction", "cache: leaf pages split during eviction", + "cache: lookaside score", "cache: lookaside table entries", "cache: lookaside table insert calls", "cache: lookaside table remove calls", @@ -1139,6 +1140,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cache_eviction_internal = 0; stats->cache_eviction_split_internal = 0; stats->cache_eviction_split_leaf = 0; + /* not clearing cache_lookaside_score */ /* not clearing cache_lookaside_entries */ stats->cache_lookaside_insert = 0; stats->cache_lookaside_remove = 0; @@ -1490,6 +1492,8 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, cache_eviction_split_internal); to->cache_eviction_split_leaf += WT_STAT_READ(from, cache_eviction_split_leaf); + to->cache_lookaside_score += + WT_STAT_READ(from, cache_lookaside_score); to->cache_lookaside_entries += WT_STAT_READ(from, cache_lookaside_entries); to->cache_lookaside_insert += diff --git a/src/txn/txn.c b/src/txn/txn.c index 91771403e13..3d45ff8a88c 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -578,10 +578,8 @@ __wt_txn_release(WT_SESSION_IMPL *session) txn->id = WT_TXN_NONE; } -#ifdef HAVE_TIMESTAMPS __wt_txn_clear_commit_timestamp(session); __wt_txn_clear_read_timestamp(session); -#endif /* Free the scratch buffer allocated for logging. */ __wt_logrec_free(session, &txn->logrec); diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index afb3cba1db6..eb32ef2d06a 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -437,8 +437,7 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session) for (;;) { current_dirty = (100.0 * __wt_cache_dirty_leaf_inuse(cache)) / cache_size; - if (current_dirty <= - (double)cache->eviction_checkpoint_target) + if (current_dirty <= (double)cache->eviction_checkpoint_target) break; __wt_sleep(0, stepdown_us / 10); @@ -506,22 +505,53 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session) } /* + * __wt_checkpoint_progress -- + * Output a checkpoint progress message. + */ +void +__wt_checkpoint_progress(WT_SESSION_IMPL *session, bool closing) +{ + struct timespec cur_time; + WT_CONNECTION_IMPL *conn; + uint64_t time_diff; + + conn = S2C(session); + __wt_epoch(session, &cur_time); + + /* Time since the full database checkpoint started */ + time_diff = WT_TIMEDIFF_SEC(cur_time, + conn->ckpt_timer_start); + + if (closing || (time_diff / 20) > conn->ckpt_progress_msg_count) { + __wt_verbose(session, WT_VERB_CHECKPOINT_PROGRESS, + "Checkpoint %s for %" PRIu64 + " seconds and wrote: %" PRIu64 " pages (%" PRIu64 " MB)", + closing ? "ran" : "has been running", + time_diff, conn->ckpt_write_pages, + conn->ckpt_write_bytes / WT_MEGABYTE); + conn->ckpt_progress_msg_count++; + } +} + +/* * __checkpoint_stats -- * Update checkpoint timer stats. */ static void -__checkpoint_stats( - WT_SESSION_IMPL *session, struct timespec *start, struct timespec *stop) +__checkpoint_stats(WT_SESSION_IMPL *session) { + struct timespec stop; WT_CONNECTION_IMPL *conn; uint64_t msec; conn = S2C(session); - /* - * Get time diff in milliseconds. - */ - msec = WT_TIMEDIFF_MS(*stop, *start); + /* Output a verbose progress message for long running checkpoints */ + if (conn->ckpt_progress_msg_count > 0) + __wt_checkpoint_progress(session, true); + + __wt_epoch(session, &stop); + msec = WT_TIMEDIFF_MS(stop, conn->ckpt_timer_scrub_end); if (msec > conn->ckpt_time_max) conn->ckpt_time_max = msec; @@ -536,33 +566,29 @@ __checkpoint_stats( * Output a verbose message with timing information */ static void -__checkpoint_verbose_track(WT_SESSION_IMPL *session, - const char *msg, struct timespec *start) +__checkpoint_verbose_track(WT_SESSION_IMPL *session, const char *msg) { #ifdef HAVE_VERBOSE struct timespec stop; + WT_CONNECTION_IMPL *conn; uint64_t msec; if (!WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) return; + conn = S2C(session); __wt_epoch(session, &stop); - /* - * Get time diff in milliseconds. - */ - msec = WT_TIMEDIFF_MS(stop, *start); + /* Get time diff in milliseconds. */ + msec = WT_TIMEDIFF_MS(stop, conn->ckpt_timer_start); __wt_verbose(session, WT_VERB_CHECKPOINT, "time: %" PRIu64 " ms, gen: %" PRIu64 ": Full database checkpoint %s", msec, __wt_gen(session, WT_GEN_CHECKPOINT), msg); - /* Update the timestamp so we are reporting intervals. */ - memcpy(start, &stop, sizeof(*start)); #else WT_UNUSED(session); WT_UNUSED(msg); - WT_UNUSED(start); #endif } @@ -713,7 +739,6 @@ static int __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) { struct timespec fsync_start, fsync_stop; - struct timespec start, stop, verb_timer; WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; @@ -745,7 +770,12 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) conn->cache->evict_max_page_size = 0; /* Initialize the verbose tracking timer */ - __wt_epoch(session, &verb_timer); + __wt_epoch(session, &conn->ckpt_timer_start); + + /* Initialize the checkpoint progress tracking data */ + conn->ckpt_progress_msg_count = 0; + conn->ckpt_write_bytes = 0; + conn->ckpt_write_pages = 0; /* * Update the global oldest ID so we do all possible cleanup. @@ -770,11 +800,10 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(__wt_txn_checkpoint_log( session, full, WT_TXN_LOG_CKPT_PREPARE, NULL)); - __checkpoint_verbose_track(session, - "starting transaction", &verb_timer); + __checkpoint_verbose_track(session, "starting transaction"); if (full) - __wt_epoch(session, &start); + __wt_epoch(session, &conn->ckpt_timer_scrub_end); /* * Start the checkpoint for real. @@ -845,8 +874,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_presync)); __wt_evict_server_wake(session); - __checkpoint_verbose_track(session, - "committing transaction", &verb_timer); + __checkpoint_verbose_track(session, "committing transaction"); /* * Checkpoints have to hit disk (it would be reasonable to configure for @@ -860,7 +888,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_STAT_CONN_SET(session, txn_checkpoint_fsync_post_duration, fsync_duration_usecs); - __checkpoint_verbose_track(session, "sync completed", &verb_timer); + __checkpoint_verbose_track(session, "sync completed"); /* * Commit the transaction now that we are sure that all files in the @@ -898,8 +926,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) ret = __wt_checkpoint_sync(session, NULL)); WT_ERR(ret); - __checkpoint_verbose_track(session, - "metadata sync completed", &verb_timer); + __checkpoint_verbose_track(session, "metadata sync completed"); } else WT_WITH_DHANDLE(session, WT_SESSION_META_DHANDLE(session), @@ -912,12 +939,16 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) */ txn_global->checkpoint_state.pinned_id = WT_TXN_NONE; - if (full) { - __wt_epoch(session, &stop); - __checkpoint_stats(session, &start, &stop); - } + if (full) + __checkpoint_stats(session); err: /* + * Reset the timer so that next checkpoint tracks the progress only if + * configured. + */ + conn->ckpt_timer_start.tv_sec = 0; + + /* * XXX * Rolling back the changes here is problematic. * diff --git a/src/txn/txn_timestamp.c b/src/txn/txn_timestamp.c index 0201036684d..98887627bfc 100644 --- a/src/txn/txn_timestamp.c +++ b/src/txn/txn_timestamp.c @@ -467,11 +467,11 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) if (has_oldest || has_stable) WT_RET(__wt_txn_update_pinned_timestamp(session)); } + return (0); #else - WT_RET_MSG(session, ENOTSUP, "set_timestamp requires a " - "version of WiredTiger built with timestamp support"); + WT_RET_MSG(session, ENOTSUP, "set_timestamp requires a " + "version of WiredTiger built with timestamp support"); #endif - return (0); } #ifdef HAVE_TIMESTAMPS @@ -687,6 +687,17 @@ __wt_txn_clear_read_timestamp(WT_SESSION_IMPL *session) if (!F_ISSET(txn, WT_TXN_PUBLIC_TS_READ)) return; +#ifdef HAVE_DIAGNOSTIC + { + wt_timestamp_t pinned_ts; + + WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock, + __wt_timestamp_set(&pinned_ts, &txn_global->pinned_timestamp)); + WT_ASSERT(session, + __wt_timestamp_cmp(&txn->read_timestamp, &pinned_ts) >= 0); + } +#endif + __wt_writelock(session, &txn_global->read_timestamp_rwlock); TAILQ_REMOVE(&txn_global->read_timestamph, txn, read_timestampq); --txn_global->read_timestampq_len; diff --git a/src/utilities/util_dump.c b/src/utilities/util_dump.c index 2f572f3f370..f768d323afb 100644 --- a/src/utilities/util_dump.c +++ b/src/utilities/util_dump.c @@ -269,7 +269,7 @@ dump_add_config(WT_SESSION *session, char **bufp, size_t *leftp, if (ret != 0) return (util_err(session, ret, NULL)); *bufp += n; - *leftp -= (size_t)n; + *leftp -= n; return (0); } diff --git a/test/csuite/random_abort/main.c b/test/csuite/random_abort/main.c index a171cfef13a..ad49f01dde5 100644 --- a/test/csuite/random_abort/main.c +++ b/test/csuite/random_abort/main.c @@ -56,6 +56,8 @@ static bool inmem; #define ENV_CONFIG_REC "log=(recover=on)" #define MAX_VAL 4096 +static void handler(int) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); static void usage(void) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); static void @@ -123,6 +125,8 @@ thread_run(void *arg) /* * Write our portion of the key space until we're killed. */ + printf("Thread %" PRIu32 " starts at %" PRIu64 "\n", + td->id, td->start); for (i = td->start; ; ++i) { testutil_check(__wt_snprintf( kname, sizeof(kname), "%" PRIu64, i)); @@ -185,7 +189,7 @@ fill_db(uint32_t nth) printf("Create %" PRIu32 " writer threads\n", nth); for (i = 0; i < nth; ++i) { td[i].conn = conn; - td[i].start = (UINT64_MAX / nth) * i; + td[i].start = WT_BILLION * (uint64_t)i; td[i].id = i; testutil_check(__wt_thread_create( NULL, &thr[i], thread_run, &td[i])); @@ -209,9 +213,24 @@ fill_db(uint32_t nth) extern int __wt_optind; extern char *__wt_optarg; +static void +handler(int sig) +{ + pid_t pid; + + WT_UNUSED(sig); + pid = wait(NULL); + /* + * The core file will indicate why the child exited. Choose EINVAL here. + */ + testutil_die(EINVAL, + "Child process %" PRIu64 " abnormally exited", (uint64_t)pid); +} + int main(int argc, char *argv[]) { + struct sigaction sa; struct stat sb; FILE *fp; WT_CONNECTION *conn; @@ -298,6 +317,9 @@ main(int argc, char *argv[]) * kill the child, run recovery and make sure all items we wrote * exist after recovery runs. */ + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = handler; + testutil_checksys(sigaction(SIGCHLD, &sa, NULL)); if ((pid = fork()) < 0) testutil_die(errno, "fork"); @@ -311,15 +333,15 @@ main(int argc, char *argv[]) * Sleep for the configured amount of time before killing * the child. Start the timeout from the time we notice that * the table has been created. That allows the test to run - * correctly on really slow machines. Verify the process ID - * still exists in case the child aborts for some reason we - * don't stay in this loop forever. + * correctly on really slow machines. */ testutil_check(__wt_snprintf( buf, sizeof(buf), "%s/%s", home, fs_main)); - while (stat(buf, &sb) != 0 && kill(pid, 0) == 0) + while (stat(buf, &sb) != 0) sleep(1); sleep(timeout); + sa.sa_handler = SIG_DFL; + testutil_checksys(sigaction(SIGCHLD, &sa, NULL)); /* * !!! It should be plenty long enough to make sure more than diff --git a/test/csuite/timestamp_abort/main.c b/test/csuite/timestamp_abort/main.c index f6dadd95495..ca5fa10c2db 100644 --- a/test/csuite/timestamp_abort/main.c +++ b/test/csuite/timestamp_abort/main.c @@ -56,6 +56,7 @@ static char home[1024]; /* Program working dir */ * Each worker thread creates its own records file that records the data it * inserted and it records the timestamp that was used for that insertion. */ +#define INVALID_KEY UINT64_MAX #define MAX_CKPT_INVL 5 /* Maximum interval between checkpoints */ #define MAX_TH 12 #define MAX_TIME 40 @@ -84,6 +85,22 @@ static uint64_t th_ts[MAX_TH]; "transaction_sync=(enabled,method=none)" #define ENV_CONFIG_REC "log=(archive=false,recover=on)" +typedef struct { + uint64_t absent_key; /* Last absent key */ + uint64_t exist_key; /* First existing key after miss */ + uint64_t first_key; /* First key in range */ + uint64_t first_miss; /* First missing key */ + uint64_t last_key; /* Last key in range */ +} REPORT; + +typedef struct { + WT_CONNECTION *conn; + uint64_t start; + uint32_t info; +} THREAD_DATA; + +static void handler(int) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); static void usage(void) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); static void @@ -94,12 +111,6 @@ usage(void) exit(EXIT_FAILURE); } -typedef struct { - WT_CONNECTION *conn; - uint64_t start; - uint32_t info; -} WT_THREAD_DATA; - /* * thread_ts_run -- * Runner function for a timestamp thread. @@ -109,11 +120,11 @@ thread_ts_run(void *arg) { WT_CURSOR *cur_stable; WT_SESSION *session; - WT_THREAD_DATA *td; + THREAD_DATA *td; uint64_t i, last_ts, oldest_ts; char tscfg[64]; - td = (WT_THREAD_DATA *)arg; + td = (THREAD_DATA *)arg; last_ts = 0; testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session)); @@ -177,7 +188,7 @@ thread_ckpt_run(void *arg) FILE *fp; WT_RAND_STATE rnd; WT_SESSION *session; - WT_THREAD_DATA *td; + THREAD_DATA *td; uint64_t ts; uint32_t sleep_time; int i; @@ -185,7 +196,7 @@ thread_ckpt_run(void *arg) __wt_random_init(&rnd); - td = (WT_THREAD_DATA *)arg; + td = (THREAD_DATA *)arg; /* * Keep a separate file with the records we wrote for checking. */ @@ -233,7 +244,7 @@ thread_run(void *arg) WT_ITEM data; WT_RAND_STATE rnd; WT_SESSION *session; - WT_THREAD_DATA *td; + THREAD_DATA *td; uint64_t i, stable_ts; char cbuf[MAX_VAL], lbuf[MAX_VAL], obuf[MAX_VAL]; char kname[64], tscfg[64]; @@ -244,7 +255,7 @@ thread_run(void *arg) memset(obuf, 0, sizeof(obuf)); memset(kname, 0, sizeof(kname)); - td = (WT_THREAD_DATA *)arg; + td = (THREAD_DATA *)arg; /* * Set up the separate file for checking. */ @@ -310,7 +321,13 @@ thread_run(void *arg) "commit_timestamp=%" PRIx64, stable_ts)); testutil_check( session->commit_transaction(session, tscfg)); - th_ts[td->info] = stable_ts; + /* + * Update the thread's last-committed timestamp. + * Don't let the compiler re-order this statement, + * if we were to race with the timestamp thread, it + * might see our thread update before the commit. + */ + WT_PUBLISH(th_ts[td->info], stable_ts); } else testutil_check( session->commit_transaction(session, NULL)); @@ -343,13 +360,13 @@ run_workload(uint32_t nth) { WT_CONNECTION *conn; WT_SESSION *session; - WT_THREAD_DATA *td; + THREAD_DATA *td; wt_thread_t *thr; uint32_t ckpt_id, i, ts_id; char envconf[512]; thr = dcalloc(nth+2, sizeof(*thr)); - td = dcalloc(nth+2, sizeof(WT_THREAD_DATA)); + td = dcalloc(nth+2, sizeof(THREAD_DATA)); if (chdir(home) != 0) testutil_die(errno, "Child chdir: %s", home); if (inmem) @@ -398,7 +415,7 @@ run_workload(uint32_t nth) printf("Create %" PRIu32 " writer threads\n", nth); for (i = 0; i < nth; ++i) { td[i].conn = conn; - td[i].start = (UINT64_MAX / nth) * i; + td[i].start = WT_BILLION * (uint64_t)i; td[i].info = i; testutil_check(__wt_thread_create( NULL, &thr[i], thread_run, &td[i])); @@ -434,18 +451,66 @@ timestamp_build(void) extern int __wt_optind; extern char *__wt_optarg; +/* + * Initialize a report structure. Since zero is a valid key we + * cannot just clear it. + */ +static void +initialize_rep(REPORT *r) +{ + r->first_key = r->first_miss = INVALID_KEY; + r->absent_key = r->exist_key = r->last_key = INVALID_KEY; +} + +/* + * Print out information if we detect missing records in the + * middle of the data of a report structure. + */ +static void +print_missing(REPORT *r, const char *fname, const char *msg) +{ + if (r->exist_key != INVALID_KEY) + printf("%s: %s error %" PRIu64 + " absent records %" PRIu64 "-%" PRIu64 + ". Then keys %" PRIu64 "-%" PRIu64 " exist." + " Key range %" PRIu64 "-%" PRIu64 "\n", + fname, msg, + r->exist_key - r->first_miss - 1, + r->first_miss, r->exist_key - 1, + r->exist_key, r->last_key, + r->first_key, r->last_key); +} + +/* + * Signal handler to catch if the child died unexpectedly. + */ +static void +handler(int sig) +{ + pid_t pid; + + WT_UNUSED(sig); + pid = wait(NULL); + /* + * The core file will indicate why the child exited. Choose EINVAL here. + */ + testutil_die(EINVAL, + "Child process %" PRIu64 " abnormally exited", (uint64_t)pid); +} + int main(int argc, char *argv[]) { + struct sigaction sa; struct stat sb; FILE *fp; + REPORT c_rep[MAX_TH], l_rep[MAX_TH], o_rep[MAX_TH]; WT_CONNECTION *conn; WT_CURSOR *cur_coll, *cur_local, *cur_oplog, *cur_stable; WT_RAND_STATE rnd; WT_SESSION *session; pid_t pid; uint64_t absent_coll, absent_local, absent_oplog, count, key, last_key; - uint64_t first_miss, middle_coll, middle_local, middle_oplog; uint64_t stable_fp, stable_val, val[MAX_TH+1]; uint32_t i, nth, timeout; int ch, status, ret; @@ -524,6 +589,7 @@ main(int argc, char *argv[]) if (nth < MIN_TH) nth = MIN_TH; } + printf("Parent: compatibility: %s, " "in-mem log sync: %s, timestamp in use: %s\n", compat ? "true" : "false", @@ -536,6 +602,9 @@ main(int argc, char *argv[]) * kill the child, run recovery and make sure all items we wrote * exist after recovery runs. */ + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = handler; + testutil_checksys(sigaction(SIGCHLD, &sa, NULL)); testutil_checksys((pid = fork()) < 0); if (pid == 0) { /* child */ @@ -548,15 +617,15 @@ main(int argc, char *argv[]) * Sleep for the configured amount of time before killing * the child. Start the timeout from the time we notice that * the file has been created. That allows the test to run - * correctly on really slow machines. Verify the process ID - * still exists in case the child aborts for some reason we - * don't stay in this loop forever. + * correctly on really slow machines. */ testutil_check(__wt_snprintf( statname, sizeof(statname), "%s/%s", home, ckpt_file)); - while (stat(statname, &sb) != 0 && kill(pid, 0) == 0) + while (stat(statname, &sb) != 0) sleep(1); sleep(timeout); + sa.sa_handler = SIG_DFL; + testutil_checksys(sigaction(SIGCHLD, &sa, NULL)); /* * !!! It should be plenty long enough to make sure more than @@ -573,6 +642,12 @@ main(int argc, char *argv[]) */ if (chdir(home) != 0) testutil_die(errno, "parent chdir: %s", home); + /* + * The tables can get very large, so while we'd ideally like to + * copy the entire database, we only copy the log files for now. + * Otherwise it can take far too long to run the test, particularly + * in automated testing. + */ testutil_check(__wt_snprintf(buf, sizeof(buf), "rm -rf ../%s.SAVE && mkdir ../%s.SAVE && " "cp -p WiredTigerLog.* ../%s.SAVE", @@ -619,7 +694,9 @@ main(int argc, char *argv[]) absent_coll = absent_local = absent_oplog = 0; fatal = false; for (i = 0; i < nth; ++i) { - first_miss = middle_coll = middle_local = middle_oplog = 0; + initialize_rep(&c_rep[i]); + initialize_rep(&l_rep[i]); + initialize_rep(&o_rep[i]); testutil_check(__wt_snprintf( fname, sizeof(fname), RECORDS_FILE, i)); if ((fp = fopen(fname, "r")) == NULL) @@ -632,9 +709,14 @@ main(int argc, char *argv[]) * but records may be missing at the end. If we did * write-no-sync, we expect every key to have been recovered. */ - for (last_key = UINT64_MAX;; ++count, last_key = key) { + for (last_key = INVALID_KEY;; ++count, last_key = key) { ret = fscanf(fp, "%" SCNu64 "%" SCNu64 "\n", &stable_fp, &key); + if (last_key == INVALID_KEY) { + c_rep[i].first_key = key; + l_rep[i].first_key = key; + o_rep[i].first_key = key; + } if (ret != EOF && ret != 2) { /* * If we find a partial line, consider it @@ -651,7 +733,7 @@ main(int argc, char *argv[]) * written key at the end that can result in a false * negative error for a missing record. Detect it. */ - if (last_key != UINT64_MAX && key != last_key + 1) { + if (last_key != INVALID_KEY && key != last_key + 1) { printf("%s: Ignore partial record %" PRIu64 " last valid key %" PRIu64 "\n", fname, key, last_key); @@ -682,18 +764,16 @@ main(int argc, char *argv[]) fname, key, stable_fp, val[i]); absent_coll++; } - if (middle_coll == 0) - first_miss = key; - middle_coll = key; - } else if (middle_coll != 0) { + if (c_rep[i].first_miss == INVALID_KEY) + c_rep[i].first_miss = key; + c_rep[i].absent_key = key; + } else if (c_rep[i].absent_key != INVALID_KEY && + c_rep[i].exist_key == INVALID_KEY) { /* - * We should never find an existing key after - * we have detected one missing. + * If we get here we found a record that exists + * after absent records, a hole in our data. */ - printf("%s: COLLECTION after absent records %" - PRIu64 "-%" PRIu64 " key %" PRIu64 - " exists\n", - fname, first_miss, middle_coll, key); + c_rep[i].exist_key = key; fatal = true; } /* @@ -706,15 +786,16 @@ main(int argc, char *argv[]) printf("%s: LOCAL no record with key %" PRIu64 "\n", fname, key); absent_local++; - middle_local = key; - } else if (middle_local != 0) { + if (l_rep[i].first_miss == INVALID_KEY) + l_rep[i].first_miss = key; + l_rep[i].absent_key = key; + } else if (l_rep[i].absent_key != INVALID_KEY && + l_rep[i].exist_key == INVALID_KEY) { /* * We should never find an existing key after * we have detected one missing. */ - printf("%s: LOCAL after absent record at %" - PRIu64 " key %" PRIu64 " exists\n", - fname, middle_local, key); + l_rep[i].exist_key = key; fatal = true; } /* @@ -727,23 +808,28 @@ main(int argc, char *argv[]) printf("%s: OPLOG no record with key %" PRIu64 "\n", fname, key); absent_oplog++; - middle_oplog = key; - } else if (middle_oplog != 0) { + if (o_rep[i].first_miss == INVALID_KEY) + o_rep[i].first_miss = key; + o_rep[i].absent_key = key; + } else if (o_rep[i].absent_key != INVALID_KEY && + o_rep[i].exist_key == INVALID_KEY) { /* * We should never find an existing key after * we have detected one missing. */ - printf("%s: OPLOG after absent record at %" - PRIu64 " key %" PRIu64 " exists\n", - fname, middle_oplog, key); + o_rep[i].exist_key = key; fatal = true; } } + c_rep[i].last_key = last_key; + l_rep[i].last_key = last_key; + o_rep[i].last_key = last_key; testutil_checksys(fclose(fp) != 0); + print_missing(&c_rep[i], fname, "COLLECTION"); + print_missing(&l_rep[i], fname, "LOCAL"); + print_missing(&o_rep[i], fname, "OPLOG"); } testutil_check(conn->close(conn, NULL)); - if (fatal) - return (EXIT_FAILURE); if (!inmem && absent_coll) { printf("COLLECTION: %" PRIu64 " record(s) absent from %" PRIu64 "\n", diff --git a/test/format/compact.c b/test/format/compact.c index c1a73bea64b..2df3839f67b 100644 --- a/test/format/compact.c +++ b/test/format/compact.c @@ -66,9 +66,14 @@ compact(void *arg) /* * Compact can return EBUSY if concurrent with alter or if there * is eviction pressure, or we collide with checkpoints. + * + * Compact returns ETIMEDOUT if the compaction doesn't finish in + * in some number of seconds. We don't configure a timeout and + * occasionally exceed the default of 1200 seconds. */ ret = session->compact(session, g.uri, NULL); - if (ret != 0 && ret != EBUSY && ret != WT_ROLLBACK) + if (ret != 0 && + ret != EBUSY && ret != ETIMEDOUT && ret != WT_ROLLBACK) testutil_die(ret, "session.compact"); } diff --git a/test/format/config.c b/test/format/config.c index 049a655cb79..769ed608e64 100644 --- a/test/format/config.c +++ b/test/format/config.c @@ -181,6 +181,10 @@ config_setup(void) g.c_cache = g.c_threads; } + /* Check if a minimum cache size has been specified. */ + if (g.c_cache_minimum != 0 && g.c_cache < g.c_cache_minimum) + g.c_cache = g.c_cache_minimum; + /* Give Helium configuration a final review. */ if (DATASOURCE("helium")) config_helium_reset(); @@ -190,6 +194,25 @@ config_setup(void) config_in_memory_reset(); /* + * Key/value minimum/maximum are related, correct unless specified by + * the configuration. + */ + if (!config_is_perm("key_min") && g.c_key_min > g.c_key_max) + g.c_key_min = g.c_key_max; + if (!config_is_perm("key_max") && g.c_key_max < g.c_key_min) + g.c_key_max = g.c_key_min; + if (g.c_key_min > g.c_key_max) + testutil_die(EINVAL, "key_min may not be larger than key_max"); + + if (!config_is_perm("value_min") && g.c_value_min > g.c_value_max) + g.c_value_min = g.c_value_max; + if (!config_is_perm("value_max") && g.c_value_max < g.c_value_min) + g.c_value_max = g.c_value_min; + if (g.c_value_min > g.c_value_max) + testutil_die(EINVAL, + "value_min may not be larger than value_max"); + + /* * Run-length is configured by a number of operations and a timer. * * If the operation count and the timer are both configured, do nothing. @@ -213,25 +236,6 @@ config_setup(void) config_single("timer=360", 0); } - /* - * Key/value minimum/maximum are related, correct unless specified by - * the configuration. - */ - if (!config_is_perm("key_min") && g.c_key_min > g.c_key_max) - g.c_key_min = g.c_key_max; - if (!config_is_perm("key_max") && g.c_key_max < g.c_key_min) - g.c_key_max = g.c_key_min; - if (g.c_key_min > g.c_key_max) - testutil_die(EINVAL, "key_min may not be larger than key_max"); - - if (!config_is_perm("value_min") && g.c_value_min > g.c_value_max) - g.c_value_min = g.c_value_max; - if (!config_is_perm("value_max") && g.c_value_max < g.c_value_min) - g.c_value_max = g.c_value_min; - if (g.c_value_min > g.c_value_max) - testutil_die(EINVAL, - "value_min may not be larger than value_max"); - /* Reset the key count. */ g.key_cnt = 0; } diff --git a/test/format/config.h b/test/format/config.h index 6fb4071074d..7ac65147462 100644 --- a/test/format/config.h +++ b/test/format/config.h @@ -101,6 +101,10 @@ static CONFIG c[] = { "size of the cache in MB", 0x0, 1, 100, 100 * 1024, &g.c_cache, NULL }, + { "cache_minimum", + "minimum size of the cache in MB", + C_IGNORE, 1, 0, 100 * 1024, &g.c_cache_minimum, NULL }, + { "checkpoints", "type of checkpoints (on | off | wiredtiger)", C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_checkpoint}, @@ -115,7 +119,7 @@ static CONFIG c[] = { { "checksum", "type of checksums (on | off | uncompressed)", - C_IGNORE|C_STRING, 1, 3, 3, NULL, &g.c_checksum }, + C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_checksum }, { "chunk_size", "LSM chunk size in MB", @@ -159,7 +163,7 @@ static CONFIG c[] = { { "file_type", "type of store to create (fix | var | row)", - C_IGNORE|C_STRING, 1, 3, 3, NULL, &g.c_file_type }, + C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_file_type }, { "firstfit", "if allocation is firstfit", /* 10% */ @@ -196,7 +200,7 @@ static CONFIG c[] = { { "isolation", "isolation level " "(random | read-uncommitted | read-committed | snapshot)", - C_IGNORE|C_STRING, 1, 4, 4, NULL, &g.c_isolation }, + C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_isolation }, { "key_gap", "gap between instantiated keys on a Btree page", @@ -272,7 +276,7 @@ static CONFIG c[] = { { "quiet", "quiet run (same as -q)", - C_IGNORE|C_BOOL, 0, 0, 0, &g.c_quiet, NULL }, + C_IGNORE|C_BOOL, 0, 0, 1, &g.c_quiet, NULL }, { "read_pct", "percent operations that are reads", @@ -296,7 +300,7 @@ static CONFIG c[] = { { "runs", "the number of runs", - C_IGNORE, 0, UINT_MAX, UINT_MAX, &g.c_runs, NULL }, + C_IGNORE, 0, 0, UINT_MAX, &g.c_runs, NULL }, { "salvage", "salvage testing", /* 100% */ @@ -319,8 +323,8 @@ static CONFIG c[] = { 0x0, 1, 32, 128, &g.c_threads, NULL }, { "timer", - "maximum time to run in minutes (default 20 minutes)", - C_IGNORE, 0, UINT_MAX, UINT_MAX, &g.c_timer, NULL }, + "maximum time to run in minutes", + C_IGNORE, 0, 0, UINT_MAX, &g.c_timer, NULL }, { "transaction_timestamps", /* 10% */ "enable transaction timestamp support", diff --git a/test/format/format.h b/test/format/format.h index 96e1a0fe335..af66e166f47 100644 --- a/test/format/format.h +++ b/test/format/format.h @@ -150,6 +150,7 @@ typedef struct { uint32_t c_bloom_hash_count; uint32_t c_bloom_oldest; uint32_t c_cache; + uint32_t c_cache_minimum; char *c_checkpoint; uint32_t c_checkpoint_log_size; uint32_t c_checkpoint_wait; diff --git a/test/mciproject.yml b/test/mciproject.yml index 4b67299d14c..16e103e5366 100644 --- a/test/mciproject.yml +++ b/test/mciproject.yml @@ -65,7 +65,7 @@ tasks: ./build_posix/reconf ${configure_env_vars|} ./configure --enable-diagnostic --enable-python --enable-zlib --enable-strict --enable-verbose ${make_command|make} ${smp_command|} 2>&1 - TESTUTIL_ENABLE_LONG_TESTS=1 ${make_command|make} VERBOSE=1 check 2>&1 + ${test_env_vars|} TESTUTIL_ENABLE_LONG_TESTS=1 ${make_command|make} VERBOSE=1 check 2>&1 fi - command: archive.targz_pack params: @@ -96,15 +96,9 @@ tasks: set -o errexit set -o verbose - # On 10.12, change the binary location with install_name_tool since DYLD_LIBRARY_PATH - # appears not to work for dynamic modules loaded by python. For wt, the libtool generated - # script has the wrong path for running on test machines. - if [ "$(uname -s)" == "Darwin" ]; then - WT_VERSION=$(m4 build_posix/aclocal/version.m4) - install_name_tool -change /usr/local/lib/libwiredtiger-$WT_VERSION.dylib $(pwd)/.libs/libwiredtiger-$WT_VERSION.dylib lang/python/_wiredtiger.so - install_name_tool -change /usr/local/lib/libwiredtiger-$WT_VERSION.dylib $(pwd)/.libs/libwiredtiger-$WT_VERSION.dylib .libs/wt - fi - + # Avoid /usr/bin/python, at least on macOS: with System Integrity + # Protection enabled, it ignores DYLD_LIBRARY_PATH and hence + # doesn't find the WiredTiger library in the local tree. ${test_env_vars|} python ./test/suite/run.py -v 2 ${smp_command|} 2>&1 - name: compile-windows-alt @@ -186,7 +180,7 @@ buildvariants: smp_command: -j $(sysctl -n hw.logicalcpu) configure_env_vars: PATH=/opt/local/bin:$PATH make_command: PATH=/opt/local/bin:$PATH ARCHFLAGS=-Wno-error=unused-command-line-argument-hard-error-in-future make - test_env_vars: DYLD_LIBRARY_PATH=`pwd`/.libs + test_env_vars: PATH=/opt/local/bin:$PATH DYLD_LIBRARY_PATH=`pwd`/.libs tasks: - name: compile - name: unit-test diff --git a/test/suite/suite_subprocess.py b/test/suite/suite_subprocess.py index 626a6b5efd3..71aab9c5422 100644 --- a/test/suite/suite_subprocess.py +++ b/test/suite/suite_subprocess.py @@ -26,8 +26,9 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. -import os, subprocess +import os, subprocess, sys from run import wt_builddir +from wttest import WiredTigerTestCase # suite_subprocess.py # Run a subprocess within the test suite @@ -117,6 +118,28 @@ class suite_subprocess: print 'ERROR: ' + filename + ' should not be empty (this command expected error output)' self.assertNotEqual(filesize, 0, filename + ': expected to not be empty') + def verbose_env(self, envvar): + return envvar + '=' + str(os.environ.get(envvar)) + '\n' + + def show_outputs(self, procargs, message, filenames): + out = 'ERROR: wt command ' + message + ': ' + str(procargs) + '\n' + \ + self.verbose_env('PATH') + \ + self.verbose_env('LD_LIBRARY_PATH') + \ + self.verbose_env('DYLD_LIBRARY_PATH') + \ + self.verbose_env('PYTHONPATH') + \ + 'output files follow:' + WiredTigerTestCase.prout(out) + for filename in filenames: + maxbytes = 1024*100 + with open(filename, 'r') as f: + contents = f.read(maxbytes) + if len(contents) > 0: + if len(contents) >= maxbytes: + contents += '...\n' + sepline = '*' * 50 + '\n' + out = sepline + filename + '\n' + sepline + contents + WiredTigerTestCase.prout(out) + # Run the wt utility. def runWt(self, args, infilename=None, outfilename=None, errfilename=None, closeconn=True, @@ -131,10 +154,17 @@ class suite_subprocess: wterrname = errfilename or "wt.err" with open(wterrname, "w") as wterr: with open(wtoutname, "w") as wtout: - procargs = [os.path.join(wt_builddir, "wt")] + # Prefer running the actual 'wt' executable rather than the + # 'wt' script created by libtool. On OS/X with System Integrity + # Protection enabled, running a shell script strips + # environment variables needed to run 'wt'. + if sys.platform == "darwin": + wtexe = os.path.join(wt_builddir, ".libs", "wt") + else: + wtexe = os.path.join(wt_builddir, "wt") + procargs = [ wtexe ] if self._gdbSubprocess: - procargs = [os.path.join(wt_builddir, "libtool"), - "--mode=execute", "gdb", "--args"] + procargs + procargs = [ "gdb", "--args" ] + procargs procargs.extend(args) if self._gdbSubprocess: infilepart = "" @@ -155,10 +185,16 @@ class suite_subprocess: returncode = subprocess.call( procargs, stdout=wtout, stderr=wterr) if failure: + if returncode == 0: + self.show_outputs(procargs, "expected failure, got success", + [wtoutname, wterrname]) self.assertNotEqual(returncode, 0, 'expected failure: "' + \ str(procargs) + '": exited ' + str(returncode)) else: + if returncode != 0: + self.show_outputs(procargs, "expected success, got failure", + [wtoutname, wterrname]) self.assertEqual(returncode, 0, 'expected success: "' + \ str(procargs) + '": exited ' + str(returncode)) |