diff options
author | Luke Chen <luke.chen@mongodb.com> | 2019-07-10 13:26:20 +1000 |
---|---|---|
committer | Luke Chen <luke.chen@mongodb.com> | 2019-07-10 13:26:20 +1000 |
commit | 1a50f90ec4418b0e2f9bdadb79ea49684911b0fb (patch) | |
tree | 37d119374d45e1f1a508cadf40b3a86ea7e31523 | |
parent | 40b6fd725ccb0464644b1b9f75b113ebf7ed6a74 (diff) | |
download | mongo-1a50f90ec4418b0e2f9bdadb79ea49684911b0fb.tar.gz |
Import wiredtiger: 99e0760cc5f11440b21184874d8dd2ae5bde23d6 from branch mongodb-4.2
ref: d86b3a8a33..99e0760cc5
for: 4.2.0-rc3
WT-4502 Assertion checking hazard pointers on page discard is too strong
WT-4733 Change test/format to do longer-term repeatable read testing
WT-4743 Fix memory leaks and core dumps in page-split error handling
WT-4786 Coverity: Invalid format specifiers in printf-like invocations
WT-4792 Add stat to track pages queued for eviction after LRU sorting
WT-4805 Modify error message verification for test_prepare04.py
WT-4812 Fix the Python distribution script for Python3 changes
WT-4818 Add debugging of bad read file descriptor when files are missing during recovery
WT-4832 Change ordering on shutdown to close LSM and async threads earlier
WT-4836 Lower scheduling frequency for endianness compatibility tests
WT-4840 WT_CURSOR.modify must require explicit, snapshot-isolation transaction
WT-4864 Coverity: Redundant NULL check
WT-4866 Coverity: WT_SESSION.import can leak memory
WT-4867 Skip assertion when reserved updates are seen
WT-4882 Improve checkpoint performance when there are large metadata pages
WT-4887 Skip read timestamp required assert during recovery
WT-4888 Remove support for Berkeley DB
WT-4891 WT_SESSION.verify memory allocation overrun
WT-4892 Improve statistics about forced eviction
WT-4893 Fix a race between internal page child-page eviction checks and cursors in the tree
WT-4895 Fix debug eviction mode so it chooses skew more randomly
WT-4898 Don't allow the eviction server to reconcile if it's busy
WT-4910 Port Windows SConstruct to Python 3
WT-4918 LSM allocated mutexes leaked in readonly tests
WT-4920 Add statistics tracking when eviction server is waiting for page transitions
80 files changed, 2575 insertions, 3286 deletions
diff --git a/src/third_party/wiredtiger/.hgignore b/src/third_party/wiredtiger/.hgignore deleted file mode 100644 index 724585de3ac..00000000000 --- a/src/third_party/wiredtiger/.hgignore +++ /dev/null @@ -1,24 +0,0 @@ -~$ -\.l?o$ -\.swp$ -\.pyc$ -\.class$ -\.dSYM$ -/Makefile.in$ -/tags$ -^configure.*$ -^build.*/(\.deps|COPYING|ChangeLog|INSTALL|Makefile.*|NEWS|README) -^build.*/(aclocal\.m4|config\..*|configure|stamp-h1|prototype.chk|w.*\.h) -^build.*/(ex.*|lib.*|test.*|wt_.*|_wiredtiger.so) -^build_posix/gnu-support/(compile|depcomp|install-sh|ltmain.sh|missing) -^build_posix/autom4te.cache -^docs/(doxygen.log|installdox|search) -^docs/[^/]*\.(css|html|js|png)$ -^docs/java -^docs/latex -^docs/python -^docs/swig -^lang/python/(wiredtiger/__init__.py|wiredtiger_wrap.c) -^releases -^src/server -^test/bt/(CONFIG|__rand|__wt.bdb|__wt.run|__wt.wt|db|t|vgout\..*) diff --git a/src/third_party/wiredtiger/SConstruct b/src/third_party/wiredtiger/SConstruct index 786dff93a6d..45f81ee125d 100644 --- a/src/third_party/wiredtiger/SConstruct +++ b/src/third_party/wiredtiger/SConstruct @@ -46,9 +46,6 @@ AddOption("--enable-zlib", dest="zlib", type="string", nargs=1, action="store", AddOption("--prefix", dest="prefix", type="string", nargs=1, action="store", default="package", help="Install directory") -AddOption("--with-berkeley-db", dest="bdb", type="string", nargs=1, action="store", - help="Berkeley DB install path, ie, /usr/local") - # Get the swig binary from the command line option since SCONS cannot find it automatically # swig_binary = GetOption("lang-python") @@ -112,13 +109,12 @@ env['STATIC_AND_SHARED_OBJECTS_ARE_THE_SAME'] = 1 useZlib = GetOption("zlib") useSnappy = GetOption("snappy") useLz4 = GetOption("lz4") -useBdb = GetOption("bdb") useTcmalloc = GetOption("tcmalloc") wtlibs = [] conf = Configure(env) if not conf.CheckCHeader('stdlib.h'): - print 'stdlib.h must be installed!' + print('stdlib.h must be installed!') Exit(1) if useZlib: @@ -128,7 +124,7 @@ if useZlib: conf.env.Append(CPPDEFINES=["HAVE_BUILTIN_EXTENSION_ZLIB"]) wtlibs.append("zlib") else: - print 'zlib.h must be installed!' + print('zlib.h must be installed!') Exit(1) if useSnappy: @@ -138,7 +134,7 @@ if useSnappy: conf.env.Append(CPPDEFINES=['HAVE_BUILTIN_EXTENSION_SNAPPY']) wtlibs.append("snappy") else: - print 'snappy-c.h must be installed!' + print('snappy-c.h must be installed!') Exit(1) if useLz4: @@ -148,14 +144,7 @@ if useLz4: conf.env.Append(CPPDEFINES=['HAVE_BUILTIN_EXTENSION_LZ4']) wtlibs.append("lz4") else: - print 'lz4.h must be installed!' - Exit(1) - -if useBdb: - conf.env.Append(CPPPATH=[useBdb+ "/include"]) - conf.env.Append(LIBPATH=[useBdb+ "/lib"]) - if not conf.CheckCHeader('db.h'): - print 'db.h must be installed!' + print('lz4.h must be installed!') Exit(1) if useTcmalloc: @@ -166,7 +155,7 @@ if useTcmalloc: conf.env.Append(CPPDEFINES=['HAVE_LIBTCMALLOC']) conf.env.Append(CPPDEFINES=['HAVE_POSIX_MEMALIGN']) else: - print 'tcmalloc.h must be installed!' + print('tcmalloc.h must be installed!') Exit(1) env = conf.Finish() @@ -202,7 +191,7 @@ if (VERSION_MAJOR == None or VERSION_MINOR == None or VERSION_PATCH == None or VERSION_STRING == None): - print "Failed to find version variables in " + version_file + print("Failed to find version variables in " + version_file) Exit(1) wiredtiger_includes = """ @@ -323,7 +312,7 @@ if GetOption("lang-python"): # Check that this version of python is 64-bit # if sys.maxsize < 2**32: - print "The Python Interpreter must be 64-bit in order to build the python bindings" + print("The Python Interpreter must be 64-bit in order to build the python bindings") Exit(1) pythonMajorVersion = sys.version_info.major @@ -408,7 +397,7 @@ if enableJava and enableJava.count(",") == 1: env.Depends(wtJar, wtClasses) Default(wtJar) else: - print "Error using --enable-java, this option may contain two paths separated by comma, the first is the swig.exe binary and the second is the Java JDK directory. e.g. C:\Python27\python.exe C:\Python27\Scripts\scons.py --enable-java=\"C:\Program Files\swigwin-3.0.12\swig.exe\",\"C:\Program Files\Java\jdk1.8.0_151\"" + print("Error using --enable-java, this option may contain two paths separated by comma, the first is the swig.exe binary and the second is the Java JDK directory. e.g. C:\Python27\python.exe C:\Python27\Scripts\scons.py --enable-java=\"C:\Program Files\swigwin-3.0.12\swig.exe\",\"C:\Program Files\Java\jdk1.8.0_151\"") # Shim library of functions to emulate POSIX on Windows shim = env.Library("window_shim", @@ -439,7 +428,7 @@ examples = [ # WiredTiger Smoke Test support # Runs each test in a custom temporary directory def run_smoke_test(x): - print "Running Smoke Test: " + x + print("Running Smoke Test: " + x) # Make temp dir temp_dir = tempfile.mkdtemp(prefix="wt_home") @@ -508,6 +497,7 @@ t = env.Program("t_format", "test/format/ops.c", "test/format/rebalance.c", "test/format/salvage.c", + "test/format/snap.c", "test/format/t.c", "test/format/util.c", "test/format/wts.c"], @@ -541,7 +531,7 @@ for ex in examples: exp = env.Program(ex, "examples/c/" + ex + ".c", LIBS=[wtlib, shim, testutil] + wtlibs) Default(exp) if not ex == 'ex_log': - env.Alias("check", env.SmokeTest(exp)) + env.Alias("check", env.SmokeTest(exp)) # Install Target # diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/metadata-split-test.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/metadata-split-test.wtperf new file mode 100644 index 00000000000..413c725412c --- /dev/null +++ b/src/third_party/wiredtiger/bench/wtperf/runners/metadata-split-test.wtperf @@ -0,0 +1,16 @@ +# Create a lot of tables with a big metadata footprint. +# There have been cases where WiredTiger has done a bad job of splitting +# metadata pages, which leads to poor performance - this configuration looks +# for those. See JIRA ticket WT-4882 for context. +conn_config="cache_size=1G,eviction=(threads_max=8),file_manager=(close_idle_time=100000),checkpoint=(wait=2000,log_size=2GB),statistics_log=(wait=1,json,on_close),session_max=1000" +table_config="type=file,app_metadata=\"this_is_a_fairly_long_string_to_cause_splits_in_metadata_more_often_abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyzzzzzzzz\"" +table_count=2000 +icount=0 +random_range=1000000000 +pareto=10 +range_partition=true +report_interval=5 + +run_ops=0 +populate_threads=0 + diff --git a/src/third_party/wiredtiger/build_posix/Make.subdirs b/src/third_party/wiredtiger/build_posix/Make.subdirs index a7fc291fc61..307084aa7e5 100644 --- a/src/third_party/wiredtiger/build_posix/Make.subdirs +++ b/src/third_party/wiredtiger/build_posix/Make.subdirs @@ -16,7 +16,6 @@ ext/compressors/zstd ZSTD ext/encryptors/nop ext/encryptors/rotn ext/extractors/csv -ext/test/kvs_bdb HAVE_BERKELEY_DB ext/test/fail_fs . lang/java JAVA diff --git a/src/third_party/wiredtiger/build_posix/configure.ac.in b/src/third_party/wiredtiger/build_posix/configure.ac.in index e99d4dcc0c7..7c829f86f80 100644 --- a/src/third_party/wiredtiger/build_posix/configure.ac.in +++ b/src/third_party/wiredtiger/build_posix/configure.ac.in @@ -239,16 +239,6 @@ AC_DEFINE_UNQUOTED(WT_BUFFER_ALIGNMENT_DEFAULT, $BUFFER_ALIGNMENT, AC_SUBST(AM_CFLAGS) -# test/format optionally uses a Oracle Berkeley DB release for verification. -AC_MSG_CHECKING([if --with-berkeleydb=DIR option specified]) -AC_ARG_WITH(berkeleydb, - [AS_HELP_STRING([--with-berkeleydb=DIR], - [Specify installed library directory of Berkeley DB])], - [with_berkeleydb="$withval"], [with_berkeleydb="NO_BERKELEY_DB_LIBRARY"]) -AC_MSG_RESULT($with_berkeleydb) -AM_CONDITIONAL([HAVE_BERKELEY_DB], [test -d $with_berkeleydb]) -AC_SUBST(BERKELEY_DB_PATH, [$with_berkeleydb]) - # Warn that diagnostic builds should not be used in production if test "$wt_cv_enable_diagnostic" = "yes"; then AC_MSG_WARN( diff --git a/src/third_party/wiredtiger/dist/s_define.list b/src/third_party/wiredtiger/dist/s_define.list index 1c9121da3f2..85a240550ea 100644 --- a/src/third_party/wiredtiger/dist/s_define.list +++ b/src/third_party/wiredtiger/dist/s_define.list @@ -46,6 +46,7 @@ WT_PACKED_STRUCT_END WT_PADDING_CHECK WT_PREPARE_INIT WT_READ_BARRIER +WT_REF_SAVE_STATE_MAX WT_REF_SIZE WT_SESSION_LOCKED_CHECKPOINT WT_SESSION_LOCKED_HOTBACKUP diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index d8f89a9e6b9..6ce1ad16a5f 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -229,8 +229,6 @@ LZO LeafGreen LevelDB Levyx -Llqr -Llqrt LmT LoadLoad LockFile @@ -499,7 +497,6 @@ basecfg basho bbb bcr -bdb beginthreadex bigram binutils @@ -907,7 +904,6 @@ keyv kv kvraw kvs -kvsbdb lang las latencies @@ -948,6 +944,8 @@ lookaside lookup lookups lossy +lqr +lqrt lr lrt lru diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index 92008c8f7e0..846525d69b7 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -220,13 +220,15 @@ connection_stats = [ CacheStat('cache_eviction_dirty', 'modified pages evicted'), CacheStat('cache_eviction_empty_score', 'eviction empty score', 'no_clear,no_scale'), CacheStat('cache_eviction_fail', 'pages selected for eviction unable to be evicted'), - CacheStat('cache_eviction_force', 'pages evicted because they exceeded the in-memory maximum count'), - CacheStat('cache_eviction_force_delete', 'pages evicted because they had chains of deleted items count'), - CacheStat('cache_eviction_force_delete_time', 'pages evicted because they had chains of deleted items time (usecs)'), - CacheStat('cache_eviction_force_fail', 'failed eviction of pages that exceeded the in-memory maximum count'), - CacheStat('cache_eviction_force_fail_time', 'failed eviction of pages that exceeded the in-memory maximum time (usecs)'), + CacheStat('cache_eviction_force', 'forced eviction - pages selected count'), + CacheStat('cache_eviction_force_clean', 'forced eviction - pages evicted that were clean count'), + CacheStat('cache_eviction_force_clean_time', 'forced eviction - pages evicted that were clean time (usecs)'), + CacheStat('cache_eviction_force_delete', 'forced eviction - pages selected because of too many deleted items count'), + CacheStat('cache_eviction_force_dirty', 'forced eviction - pages evicted that were dirty count'), + CacheStat('cache_eviction_force_dirty_time', 'forced eviction - pages evicted that were dirty time (usecs)'), + CacheStat('cache_eviction_force_fail', 'forced eviction - pages selected unable to be evicted count'), + CacheStat('cache_eviction_force_fail_time', 'forced eviction - pages selected unable to be evicted time'), CacheStat('cache_eviction_force_retune', 'force re-tuning of eviction workers once in a while'), - CacheStat('cache_eviction_force_time', 'pages evicted because they exceeded the in-memory maximum time (usecs)'), CacheStat('cache_eviction_get_ref', 'eviction calls to get a page'), CacheStat('cache_eviction_get_ref_empty', 'eviction calls to get a page found queue empty'), CacheStat('cache_eviction_get_ref_empty2', 'eviction calls to get a page found queue empty after locking'), @@ -235,6 +237,7 @@ connection_stats = [ CacheStat('cache_eviction_maximum_page_size', 'maximum page size at eviction', 'no_clear,no_scale,size'), CacheStat('cache_eviction_pages_queued', 'pages queued for eviction'), CacheStat('cache_eviction_pages_queued_oldest', 'pages queued for urgent eviction during walk'), + CacheStat('cache_eviction_pages_queued_post_lru', 'pages queued for eviction post lru sorting'), CacheStat('cache_eviction_pages_queued_urgent', 'pages queued for urgent eviction'), CacheStat('cache_eviction_pages_seen', 'pages seen by eviction walk'), CacheStat('cache_eviction_queue_empty', 'eviction server candidate queue empty when topping up'), @@ -253,6 +256,9 @@ connection_stats = [ CacheStat('cache_eviction_target_page_lt64', 'eviction walk target pages histogram - 32-63'), CacheStat('cache_eviction_walk', 'pages walked for eviction'), CacheStat('cache_eviction_walk_from_root', 'eviction walks started from root of tree'), + CacheStat('cache_eviction_walk_internal_yield', 'eviction server waiting for an internal page yields'), + CacheStat('cache_eviction_walk_internal_wait', 'eviction server waiting for an internal page sleep (usec)'), + CacheStat('cache_eviction_walk_leaf_notfound', 'eviction server waiting for a leaf page'), CacheStat('cache_eviction_walk_passes', 'eviction passes of a file'), CacheStat('cache_eviction_walk_saved_pos', 'eviction walks started from saved location in tree'), CacheStat('cache_eviction_walks_abandoned', 'eviction walks abandoned'), diff --git a/src/third_party/wiredtiger/ext/test/kvs_bdb/Makefile.am b/src/third_party/wiredtiger/ext/test/kvs_bdb/Makefile.am deleted file mode 100644 index c7d0f580085..00000000000 --- a/src/third_party/wiredtiger/ext/test/kvs_bdb/Makefile.am +++ /dev/null @@ -1,11 +0,0 @@ -AM_CPPFLAGS = -I$(top_builddir) \ - -I$(top_srcdir)/src/include -I$(BERKELEY_DB_PATH)/include - -noinst_LTLIBRARIES = libwiredtiger_kvs_bdb.la -libwiredtiger_kvs_bdb_la_SOURCES = kvs_bdb.c -libwiredtiger_kvs_bdb_la_LIBADD = -L$(BERKELEY_DB_PATH)/lib -ldb - -# libtool hack: noinst_LTLIBRARIES turns off building shared libraries as well -# as installation, it will only build static libraries. As far as I can tell, -# the "approved" libtool way to turn them back on is by adding -rpath. -libwiredtiger_kvs_bdb_la_LDFLAGS = -avoid-version -module -rpath /nowhere diff --git a/src/third_party/wiredtiger/ext/test/kvs_bdb/kvs_bdb.c b/src/third_party/wiredtiger/ext/test/kvs_bdb/kvs_bdb.c deleted file mode 100644 index d0c176551f4..00000000000 --- a/src/third_party/wiredtiger/ext/test/kvs_bdb/kvs_bdb.c +++ /dev/null @@ -1,1107 +0,0 @@ -/*- - * Public Domain 2014-2019 MongoDB, Inc. - * Public Domain 2008-2014 WiredTiger, Inc. - * - * This is free and unencumbered software released into the public domain. - * - * Anyone is free to copy, modify, publish, use, compile, sell, or - * distribute this software, either in source code form or as a compiled - * binary, for any purpose, commercial or non-commercial, and by any - * means. - * - * In jurisdictions that recognize copyright laws, the author or authors - * of this software dedicate any and all copyright interest in the - * software to the public domain. We make this dedication for the benefit - * of the public at large and to the detriment of our heirs and - * successors. We intend this dedication to be an overt act of - * relinquishment in perpetuity of all present and future rights to this - * software under copyright law. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include <ctype.h> -#include <errno.h> -#include <pthread.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -/* - * Berkeley DB has an #ifdef we need to provide a value for, we'll see an - * undefined error if it's unset during a strict compile. - */ -#ifndef DB_DBM_HSEARCH -#define DB_DBM_HSEARCH 0 -#endif -#include <db.h> -#include <wiredtiger.h> -#include <wiredtiger_ext.h> - -#undef INLINE -#define INLINE inline /* Turn off inline */ - -#ifndef UINT32_MAX /* Maximum 32-bit unsigned */ -#define UINT32_MAX 4294967295U -#endif - -/* - * Macros to output an error message and set or return an error. - * Requires local variables: - * int ret; - */ -#undef ERET -#define ERET(wt_api, session, v, ...) do { \ - (void)wt_api->err_printf(wt_api, session, __VA_ARGS__); \ - return (v); \ -} while (0) -#undef ESET -#define ESET(wt_api, session, v, ...) do { \ - (void)wt_api->err_printf(wt_api, session, __VA_ARGS__); \ - ret = v; \ -} while (0) -#undef ETRET -#define ETRET(a) do { \ - int __ret; \ - if ((__ret = (a)) != 0 && \ - (__ret == WT_PANIC || \ - ret == 0 || ret == WT_DUPLICATE_KEY || ret == WT_NOTFOUND)) \ - ret = __ret; \ -} while (0) - -typedef struct __data_source DATA_SOURCE; - -typedef struct __cursor_source { - WT_CURSOR wtcursor; /* Must come first */ - - WT_EXTENSION_API *wt_api; /* Extension functions */ - - DATA_SOURCE *ds; /* Underlying Berkeley DB */ - - DB *db; /* Berkeley DB handles */ - DBC *dbc; - DBT key, value; - db_recno_t recno; - - int config_append; /* config "append" */ - int config_bitfield; /* config "value_format=#t" */ - int config_overwrite; /* config "overwrite" */ - int config_recno; /* config "key_format=r" */ -} CURSOR_SOURCE; - -struct __data_source { - WT_DATA_SOURCE wtds; /* Must come first */ - - WT_EXTENSION_API *wt_api; /* Extension functions */ - - /* - * We single thread all WT_SESSION methods and return EBUSY if a - * WT_SESSION method is called and there's an open cursor. - * - * XXX - * This only works for a single object: if there were more than one - * object in test/format, cursor open would use the passed-in uri to - * find a { lock, cursor-count } pair to reference from each cursor - * object, and each session.XXX method call would have to use the - * appropriate { lock, cursor-count } pair based on their passed-in - * uri. - */ - pthread_rwlock_t rwlock; /* Global lock */ - - DB_ENV *dbenv; /* Berkeley DB environment */ - int open_cursors; /* Open cursor count */ -}; - -/* - * os_errno -- - * Limit our use of errno so it's easy to remove. - */ -static int -os_errno(void) -{ - return (errno); -} - -/* - * lock_init -- - * Initialize an object's lock. - */ -static int -lock_init( - WT_EXTENSION_API *wt_api, WT_SESSION *session, pthread_rwlock_t *lockp) -{ - int ret = 0; - - if ((ret = pthread_rwlock_init(lockp, NULL)) != 0) - ERET(wt_api, session, WT_PANIC, "lock init: %s", strerror(ret)); - return (0); -} - -/* - * lock_destroy -- - * Destroy an object's lock. - */ -static int -lock_destroy( - WT_EXTENSION_API *wt_api, WT_SESSION *session, pthread_rwlock_t *lockp) -{ - int ret = 0; - - if ((ret = pthread_rwlock_destroy(lockp)) != 0) - ERET(wt_api, - session, WT_PANIC, "lock destroy: %s", strerror(ret)); - return (0); -} - -/* - * writelock -- - * Acquire a write lock. - */ -static INLINE int -writelock( - WT_EXTENSION_API *wt_api, WT_SESSION *session, pthread_rwlock_t *lockp) -{ - int ret = 0; - - if ((ret = pthread_rwlock_wrlock(lockp)) != 0) - ERET(wt_api, - session, WT_PANIC, "write-lock: %s", strerror(ret)); - return (0); -} - -/* - * unlock -- - * Release an object's lock. - */ -static INLINE int -unlock(WT_EXTENSION_API *wt_api, WT_SESSION *session, pthread_rwlock_t *lockp) -{ - int ret = 0; - - if ((ret = pthread_rwlock_unlock(lockp)) != 0) - ERET(wt_api, session, WT_PANIC, "unlock: %s", strerror(ret)); - return (0); -} - -static int -single_thread( - WT_DATA_SOURCE *wtds, WT_SESSION *session, pthread_rwlock_t *lockp) -{ - DATA_SOURCE *ds; - WT_EXTENSION_API *wt_api; - int ret = 0; - - ds = (DATA_SOURCE *)wtds; - wt_api = ds->wt_api; - - if ((ret = writelock(wt_api, session, lockp)) != 0) - return (ret); - if (ds->open_cursors != 0) { - if ((ret = unlock(wt_api, session, lockp)) != 0) - return (ret); - return (EBUSY); - } - return (0); -} - -static int -uri2name(WT_EXTENSION_API *wt_api, - WT_SESSION *session, const char *uri, const char **namep) -{ - const char *name; - - if ((name = strchr(uri, ':')) == NULL || *++name == '\0') - ERET(wt_api, session, EINVAL, "unsupported object: %s", uri); - *namep = name; - return (0); -} - -static INLINE int -recno_convert(WT_CURSOR *wtcursor, db_recno_t *recnop) -{ - CURSOR_SOURCE *cursor; - WT_EXTENSION_API *wt_api; - WT_SESSION *session; - - session = wtcursor->session; - cursor = (CURSOR_SOURCE *)wtcursor; - wt_api = cursor->wt_api; - - if (wtcursor->recno > UINT32_MAX) - ERET(wt_api, - session, ERANGE, "record number %" PRIuMAX ": %s", - (uintmax_t)wtcursor->recno, strerror(ERANGE)); - - *recnop = (uint32_t)wtcursor->recno; - return (0); -} - -static INLINE int -copyin_key(WT_CURSOR *wtcursor) -{ - CURSOR_SOURCE *cursor; - DBT *key; - int ret = 0; - - cursor = (CURSOR_SOURCE *)wtcursor; - key = &cursor->key; - - if (cursor->config_recno) { - if ((ret = recno_convert(wtcursor, &cursor->recno)) != 0) - return (ret); - key->data = &cursor->recno; - key->size = sizeof(db_recno_t); - } else { - key->data = (char *)wtcursor->key.data; - key->size = (uint32_t)wtcursor->key.size; - } - return (0); -} - -static INLINE void -copyout_key(WT_CURSOR *wtcursor) -{ - CURSOR_SOURCE *cursor; - DBT *key; - - cursor = (CURSOR_SOURCE *)wtcursor; - key = &cursor->key; - - if (cursor->config_recno) - wtcursor->recno = *(db_recno_t *)key->data; - else { - wtcursor->key.data = key->data; - wtcursor->key.size = key->size; - } -} - -static INLINE void -copyin_value(WT_CURSOR *wtcursor) -{ - CURSOR_SOURCE *cursor; - DBT *value; - - cursor = (CURSOR_SOURCE *)wtcursor; - value = &cursor->value; - - value->data = (char *)wtcursor->value.data; - value->size = (uint32_t)wtcursor->value.size; -} - -static INLINE void -copyout_value(WT_CURSOR *wtcursor) -{ - CURSOR_SOURCE *cursor; - DBT *value; - - cursor = (CURSOR_SOURCE *)wtcursor; - value = &cursor->value; - - wtcursor->value.data = value->data; - wtcursor->value.size = value->size; -} - -#if 0 -static int -bdb_dump(WT_CURSOR *wtcursor, WT_SESSION *session, const char *tag) -{ - CURSOR_SOURCE *cursor; - DB *db; - DBC *dbc; - DBT *key, *value; - WT_EXTENSION_API *wt_api; - int ret = 0; - - cursor = (CURSOR_SOURCE *)wtcursor; - wt_api = cursor->wt_api; - - db = cursor->db; - key = &cursor->key; - value = &cursor->value; - - if ((ret = db->cursor(db, NULL, &dbc, 0)) != 0) - ERET(wt_api, - session, WT_ERROR, "Db.cursor: %s", db_strerror(ret)); - printf("==> %s\n", tag); - while ((ret = dbc->get(dbc, key, value, DB_NEXT)) == 0) - if (cursor->config_recno) - printf("\t%llu/%.*s\n", - (unsigned long long)*(db_recno_t *)key->data, - (int)value->size, (char *)value->data); - else - printf("\t%.*s/%.*s\n", - (int)key->size, (char *)key->data, - (int)value->size, (char *)value->data); - - if (ret != DB_NOTFOUND) - ERET(wt_api, - session, WT_ERROR, "DbCursor.get: %s", db_strerror(ret)); - - return (0); -} -#endif - -static int -kvs_cursor_next(WT_CURSOR *wtcursor) -{ - CURSOR_SOURCE *cursor; - DBC *dbc; - DBT *key, *value; - WT_EXTENSION_API *wt_api; - WT_SESSION *session; - int ret = 0; - - session = wtcursor->session; - cursor = (CURSOR_SOURCE *)wtcursor; - wt_api = cursor->wt_api; - - dbc = cursor->dbc; - key = &cursor->key; - value = &cursor->value; - - if ((ret = dbc->get(dbc, key, value, DB_NEXT)) == 0) { - copyout_key(wtcursor); - copyout_value(wtcursor); - return (0); - } - - if (ret == DB_NOTFOUND || ret == DB_KEYEMPTY) - return (WT_NOTFOUND); - ERET(wt_api, session, WT_ERROR, "DbCursor.get: %s", db_strerror(ret)); -} - -static int -kvs_cursor_prev(WT_CURSOR *wtcursor) -{ - CURSOR_SOURCE *cursor; - DBC *dbc; - DBT *key, *value; - WT_EXTENSION_API *wt_api; - WT_SESSION *session; - int ret = 0; - - session = wtcursor->session; - cursor = (CURSOR_SOURCE *)wtcursor; - wt_api = cursor->wt_api; - - dbc = cursor->dbc; - key = &cursor->key; - value = &cursor->value; - - if ((ret = dbc->get(dbc, key, value, DB_PREV)) == 0) { - copyout_key(wtcursor); - copyout_value(wtcursor); - return (0); - } - - if (ret == DB_NOTFOUND || ret == DB_KEYEMPTY) - return (WT_NOTFOUND); - ERET(wt_api, session, WT_ERROR, "DbCursor.get: %s", db_strerror(ret)); -} - -static int -kvs_cursor_reset(WT_CURSOR *wtcursor) -{ - CURSOR_SOURCE *cursor; - DBC *dbc; - WT_EXTENSION_API *wt_api; - WT_SESSION *session; - int ret = 0; - - session = wtcursor->session; - cursor = (CURSOR_SOURCE *)wtcursor; - wt_api = cursor->wt_api; - - /* Close and re-open the Berkeley DB cursor */ - if ((dbc = cursor->dbc) != NULL) { - cursor->dbc = NULL; - if ((ret = dbc->close(dbc)) != 0) - ERET(wt_api, session, WT_ERROR, - "DbCursor.close: %s", db_strerror(ret)); - - if ((ret = cursor->db->cursor(cursor->db, NULL, &dbc, 0)) != 0) - ERET(wt_api, session, WT_ERROR, - "Db.cursor: %s", db_strerror(ret)); - cursor->dbc = dbc; - } - return (0); -} - -static int -kvs_cursor_search(WT_CURSOR *wtcursor) -{ - CURSOR_SOURCE *cursor; - DBC *dbc; - DBT *key, *value; - WT_EXTENSION_API *wt_api; - WT_SESSION *session; - int ret = 0; - - session = wtcursor->session; - cursor = (CURSOR_SOURCE *)wtcursor; - wt_api = cursor->wt_api; - - dbc = cursor->dbc; - key = &cursor->key; - value = &cursor->value; - - if ((ret = copyin_key(wtcursor)) != 0) - return (ret); - - if ((ret = dbc->get(dbc, key, value, DB_SET)) == 0) { - copyout_key(wtcursor); - copyout_value(wtcursor); - return (0); - } - - if (ret == DB_NOTFOUND || ret == DB_KEYEMPTY) - return (WT_NOTFOUND); - ERET(wt_api, session, WT_ERROR, "DbCursor.get: %s", db_strerror(ret)); -} - -static int -kvs_cursor_search_near(WT_CURSOR *wtcursor, int *exact) -{ - CURSOR_SOURCE *cursor; - DBC *dbc; - DBT *key, *value; - WT_EXTENSION_API *wt_api; - WT_SESSION *session; - size_t len; - int ret = 0; - - session = wtcursor->session; - cursor = (CURSOR_SOURCE *)wtcursor; - wt_api = cursor->wt_api; - - dbc = cursor->dbc; - key = &cursor->key; - value = &cursor->value; - - if ((ret = copyin_key(wtcursor)) != 0) - return (ret); - -retry: if ((ret = dbc->get(dbc, key, value, DB_SET_RANGE)) == 0) { - /* - * WiredTiger returns the logically adjacent key (which might - * be less than, equal to, or greater than the specified key), - * Berkeley DB returns a key equal to or greater than the - * specified key. Check for an exact match, otherwise Berkeley - * DB must have returned a larger key than the one specified. - */ - if (key->size == wtcursor->key.size && - memcmp(key->data, wtcursor->key.data, key->size) == 0) - *exact = 0; - else - *exact = 1; - copyout_key(wtcursor); - copyout_value(wtcursor); - return (0); - } - - /* - * Berkeley DB only returns keys equal to or greater than the specified - * key, while WiredTiger returns adjacent keys, that is, if there's a - * key smaller than the specified key, it's supposed to be returned. In - * other words, WiredTiger only fails if the store is empty. Read the - * last key in the store, and see if it's less than the specified key, - * in which case we have the right key to return. If it's not less than - * the specified key, we're racing with some other thread, throw up our - * hands and try again. - */ - if ((ret = dbc->get(dbc, key, value, DB_LAST)) == 0) { - len = key->size < wtcursor->key.size ? - key->size : wtcursor->key.size; - if (memcmp(key->data, wtcursor->key.data, len) < 0) { - *exact = -1; - copyout_key(wtcursor); - copyout_value(wtcursor); - return (0); - } - goto retry; - } - - if (ret == DB_NOTFOUND || ret == DB_KEYEMPTY) - return (WT_NOTFOUND); - ERET(wt_api, session, WT_ERROR, "DbCursor.get: %s", db_strerror(ret)); -} - -static int -kvs_cursor_insert(WT_CURSOR *wtcursor) -{ - CURSOR_SOURCE *cursor; - DB *db; - DBC *dbc; - DBT *key, *value; - WT_EXTENSION_API *wt_api; - WT_SESSION *session; - int ret = 0; - - session = wtcursor->session; - cursor = (CURSOR_SOURCE *)wtcursor; - wt_api = cursor->wt_api; - - dbc = cursor->dbc; - db = cursor->db; - key = &cursor->key; - value = &cursor->value; - - if ((ret = copyin_key(wtcursor)) != 0) - return (ret); - copyin_value(wtcursor); - - if (cursor->config_append) { - /* - * Berkeley DB cursors have no operation to append/create a - * new record and set the cursor; use the DB handle instead - * then set the cursor explicitly. - * - * When appending, we're allocating and returning a new record - * number. - */ - if ((ret = db->put(db, NULL, key, value, DB_APPEND)) != 0) - ERET(wt_api, - session, WT_ERROR, "Db.put: %s", db_strerror(ret)); - wtcursor->recno = *(db_recno_t *)key->data; - - if ((ret = dbc->get(dbc, key, value, DB_SET)) != 0) - ERET(wt_api, session, WT_ERROR, - "DbCursor.get: %s", db_strerror(ret)); - } else if (cursor->config_overwrite) { - if ((ret = dbc->put(dbc, key, value, DB_KEYFIRST)) != 0) - ERET(wt_api, session, WT_ERROR, - "DbCursor.put: %s", db_strerror(ret)); - } else { - /* - * Berkeley DB cursors don't have a no-overwrite flag; use - * the DB handle instead then set the cursor explicitly. - */ - if ((ret = - db->put(db, NULL, key, value, DB_NOOVERWRITE)) != 0) { - if (ret == DB_KEYEXIST) - return (WT_DUPLICATE_KEY); - ERET(wt_api, - session, WT_ERROR, "Db.put: %s", db_strerror(ret)); - } - if ((ret = dbc->get(dbc, key, value, DB_SET)) != 0) - ERET(wt_api, session, WT_ERROR, - "DbCursor.get: %s", db_strerror(ret)); - } - - return (0); -} - -static int -kvs_cursor_update(WT_CURSOR *wtcursor) -{ - CURSOR_SOURCE *cursor; - DBC *dbc; - DBT *key, *value; - WT_EXTENSION_API *wt_api; - WT_SESSION *session; - int ret = 0; - - session = wtcursor->session; - cursor = (CURSOR_SOURCE *)wtcursor; - wt_api = cursor->wt_api; - - dbc = cursor->dbc; - key = &cursor->key; - value = &cursor->value; - - if ((ret = copyin_key(wtcursor)) != 0) - return (ret); - copyin_value(wtcursor); - - if ((ret = dbc->put(dbc, key, value, DB_KEYFIRST)) != 0) - ERET(wt_api, - session, WT_ERROR, "DbCursor.put: %s", db_strerror(ret)); - - return (0); -} - -static int -kvs_cursor_remove(WT_CURSOR *wtcursor) -{ - CURSOR_SOURCE *cursor; - DBC *dbc; - DBT *key, *value; - WT_EXTENSION_API *wt_api; - WT_SESSION *session; - int ret = 0; - - session = wtcursor->session; - cursor = (CURSOR_SOURCE *)wtcursor; - wt_api = cursor->wt_api; - - dbc = cursor->dbc; - key = &cursor->key; - value = &cursor->value; - - /* - * WiredTiger's "remove" of a bitfield is really an update with a value - * of a single byte of zero. - */ - if (cursor->config_bitfield) { - wtcursor->value.size = 1; - wtcursor->value.data = "\0"; - return (kvs_cursor_update(wtcursor)); - } - - if ((ret = copyin_key(wtcursor)) != 0) - return (ret); - - if ((ret = dbc->get(dbc, key, value, DB_SET)) != 0) { - if (ret == DB_NOTFOUND || ret == DB_KEYEMPTY) - return (WT_NOTFOUND); - ERET(wt_api, - session, WT_ERROR, "DbCursor.get: %s", db_strerror(ret)); - } - if ((ret = dbc->del(dbc, 0)) != 0) - ERET(wt_api, - session, WT_ERROR, "DbCursor.del: %s", db_strerror(ret)); - - return (0); -} - -static int -kvs_cursor_close(WT_CURSOR *wtcursor) -{ - CURSOR_SOURCE *cursor; - DATA_SOURCE *ds; - DB *db; - DBC *dbc; - WT_EXTENSION_API *wt_api; - WT_SESSION *session; - int ret = 0; - - session = wtcursor->session; - cursor = (CURSOR_SOURCE *)wtcursor; - ds = cursor->ds; - wt_api = cursor->wt_api; - - dbc = cursor->dbc; - cursor->dbc = NULL; - if (dbc != NULL && (ret = dbc->close(dbc)) != 0) - ERET(wt_api, session, WT_ERROR, - "DbCursor.close: %s", db_strerror(ret)); - - db = cursor->db; - cursor->db = NULL; - if (db != NULL && (ret = db->close(db, 0)) != 0) - ERET(wt_api, - session, WT_ERROR, "Db.close: %s", db_strerror(ret)); - free(wtcursor); - - if ((ret = writelock(wt_api, session, &ds->rwlock)) != 0) - return (ret); - --ds->open_cursors; - if ((ret = unlock(wt_api, session, &ds->rwlock)) != 0) - return (ret); - - return (0); -} - -static int -kvs_session_create(WT_DATA_SOURCE *wtds, - WT_SESSION *session, const char *uri, WT_CONFIG_ARG *config) -{ - DATA_SOURCE *ds; - DB *db; - DBTYPE type; - WT_CONFIG_ITEM v; - WT_EXTENSION_API *wt_api; - int ret = 0; - const char *name; - - ds = (DATA_SOURCE *)wtds; - wt_api = ds->wt_api; - /* Get the object name */ - if ((ret = uri2name(wt_api, session, uri, &name)) != 0) - return (ret); - /* Check key/value formats */ - if ((ret = - wt_api->config_get(wt_api, session, config, "key_format", &v)) != 0) - ERET(wt_api, session, ret, - "key_format configuration: %s", - wt_api->strerror(wt_api, session, ret)); - type = v.len == 1 && v.str[0] == 'r' ? DB_RECNO : DB_BTREE; - - /* Create the Berkeley DB table */ - if ((ret = db_create(&db, ds->dbenv, 0)) != 0) - ERET(wt_api, - session, WT_ERROR, "db_create: %s", db_strerror(ret)); - if ((ret = db->open(db, NULL, name, NULL, type, DB_CREATE, 0)) != 0) - ERET(wt_api, - session, WT_ERROR, "Db.open: %s", uri, db_strerror(ret)); - if ((ret = db->close(db, 0)) != 0) - ERET(wt_api, session, WT_ERROR, "Db.close", db_strerror(ret)); - - return (0); -} - -static int -kvs_session_drop(WT_DATA_SOURCE *wtds, - WT_SESSION *session, const char *uri, WT_CONFIG_ARG *config) -{ - DB *db; - DATA_SOURCE *ds; - WT_EXTENSION_API *wt_api; - int ret = 0; - const char *name; - - (void)config; /* Unused parameters */ - - ds = (DATA_SOURCE *)wtds; - wt_api = ds->wt_api; - /* Get the object name */ - if ((ret = uri2name(wt_api, session, uri, &name)) != 0) - return (ret); - - if ((ret = single_thread(wtds, session, &ds->rwlock)) != 0) - return (ret); - - if ((ret = db_create(&db, ds->dbenv, 0)) != 0) - ESET(wt_api, - session, WT_ERROR, "db_create: %s", db_strerror(ret)); - else if ((ret = db->remove(db, name, NULL, 0)) != 0) - ESET(wt_api, - session, WT_ERROR, "Db.remove: %s", db_strerror(ret)); - /* db handle is dead */ - - ETRET(unlock(wt_api, session, &ds->rwlock)); - return (ret); -} - -static int -kvs_session_open_cursor(WT_DATA_SOURCE *wtds, WT_SESSION *session, - const char *uri, WT_CONFIG_ARG *config, WT_CURSOR **new_cursor) -{ - CURSOR_SOURCE *cursor; - DATA_SOURCE *ds; - DB *db; - WT_CONFIG_ITEM v; - WT_EXTENSION_API *wt_api; - int locked, ret; - const char *name; - - ds = (DATA_SOURCE *)wtds; - wt_api = ds->wt_api; - locked = 0; - /* Get the object name */ - if ((ret = uri2name(wt_api, session, uri, &name)) != 0) - return (ret); - /* Allocate the cursor */ - if ((cursor = calloc(1, sizeof(CURSOR_SOURCE))) == NULL) - return (os_errno()); - cursor->ds = (DATA_SOURCE *)wtds; - cursor->wt_api = wt_api; - /* Parse configuration */ - if ((ret = wt_api->config_get( - wt_api, session, config, "append", &v)) != 0) { - ESET(wt_api, session, ret, - "append configuration: %s", - wt_api->strerror(wt_api, session, ret)); - goto err; - } - cursor->config_append = v.val != 0; - - if ((ret = wt_api->config_get( - wt_api, session, config, "overwrite", &v)) != 0) { - ESET(wt_api, session, ret, - "overwrite configuration: %s", - wt_api->strerror(wt_api, session, ret)); - goto err; - } - cursor->config_overwrite = v.val != 0; - - if ((ret = wt_api->config_get( - wt_api, session, config, "key_format", &v)) != 0) { - ESET(wt_api, session, ret, - "key_format configuration: %s", - wt_api->strerror(wt_api, session, ret)); - goto err; - } - cursor->config_recno = v.len == 1 && v.str[0] == 'r'; - - if ((ret = wt_api->config_get( - wt_api, session, config, "value_format", &v)) != 0) { - ESET(wt_api, session, ret, - "value_format configuration: %s", - wt_api->strerror(wt_api, session, ret)); - goto err; - } - cursor->config_bitfield = - v.len == 2 && isdigit((u_char)v.str[0]) && v.str[1] == 't'; - - if ((ret = writelock(wt_api, session, &ds->rwlock)) != 0) - goto err; - locked = 1; - /* Open the Berkeley DB cursor */ - if ((ret = db_create(&cursor->db, ds->dbenv, 0)) != 0) { - ESET(wt_api, - session, WT_ERROR, "db_create: %s", db_strerror(ret)); - goto err; - } - db = cursor->db; - if ((ret = db->open(db, NULL, name, NULL, - cursor->config_recno ? DB_RECNO : DB_BTREE, DB_CREATE, 0)) != 0) { - ESET(wt_api, - session, WT_ERROR, "Db.open: %s", db_strerror(ret)); - goto err; - } - if ((ret = db->cursor(db, NULL, &cursor->dbc, 0)) != 0) { - ESET(wt_api, - session, WT_ERROR, "Db.cursor: %s", db_strerror(ret)); - goto err; - } - - /* Initialize the methods */ - cursor->wtcursor.next = kvs_cursor_next; - cursor->wtcursor.prev = kvs_cursor_prev; - cursor->wtcursor.reset = kvs_cursor_reset; - cursor->wtcursor.search = kvs_cursor_search; - cursor->wtcursor.search_near = kvs_cursor_search_near; - cursor->wtcursor.insert = kvs_cursor_insert; - cursor->wtcursor.update = kvs_cursor_update; - cursor->wtcursor.remove = kvs_cursor_remove; - cursor->wtcursor.close = kvs_cursor_close; - - *new_cursor = (WT_CURSOR *)cursor; - - ++ds->open_cursors; - - if (0) { -err: free(cursor); - } - - if (locked) - ETRET(unlock(wt_api, session, &ds->rwlock)); - return (ret); -} - -static int -kvs_session_rename(WT_DATA_SOURCE *wtds, WT_SESSION *session, - const char *uri, const char *newname, WT_CONFIG_ARG *config) -{ - DATA_SOURCE *ds; - DB *db; - WT_EXTENSION_API *wt_api; - int ret = 0; - const char *name; - - (void)config; /* Unused parameters */ - - ds = (DATA_SOURCE *)wtds; - wt_api = ds->wt_api; - /* Get the object name */ - if ((ret = uri2name(wt_api, session, uri, &name)) != 0) - return (ret); - - if ((ret = single_thread(wtds, session, &ds->rwlock)) != 0) - return (ret); - - if ((ret = db_create(&db, ds->dbenv, 0)) != 0) - ESET(wt_api, - session, WT_ERROR, "db_create: %s", db_strerror(ret)); - else if ((ret = db->rename(db, name, NULL, newname, 0)) != 0) - ESET(wt_api, - session, WT_ERROR, "Db.rename: %s", db_strerror(ret)); - /* db handle is dead */ - - ETRET(unlock(wt_api, session, &ds->rwlock)); - return (ret); -} - -static int -kvs_session_truncate(WT_DATA_SOURCE *wtds, - WT_SESSION *session, const char *uri, WT_CONFIG_ARG *config) -{ - DATA_SOURCE *ds; - DB *db; - WT_EXTENSION_API *wt_api; - int tret, ret = 0; - const char *name; - - (void)config; /* Unused parameters */ - - ds = (DATA_SOURCE *)wtds; - wt_api = ds->wt_api; - /* Get the object name */ - if ((ret = uri2name(wt_api, session, uri, &name)) != 0) - return (ret); - - if ((ret = single_thread(wtds, session, &ds->rwlock)) != 0) - return (ret); - - if ((ret = db_create(&db, ds->dbenv, 0)) != 0) - ESET(wt_api, - session, WT_ERROR, "db_create: %s", db_strerror(ret)); - else { - if ((ret = db->open(db, - NULL, name, NULL, DB_UNKNOWN, DB_TRUNCATE, 0)) != 0) - ESET(wt_api, session, WT_ERROR, - "Db.open: %s", db_strerror(ret)); - if ((tret = db->close(db, 0)) != 0) - ESET(wt_api, session, WT_ERROR, - "Db.close: %s", db_strerror(tret)); - } - - ETRET(unlock(wt_api, session, &ds->rwlock)); - return (ret); -} - -static int -kvs_session_verify(WT_DATA_SOURCE *wtds, - WT_SESSION *session, const char *uri, WT_CONFIG_ARG *config) -{ - DATA_SOURCE *ds; - DB *db; - WT_EXTENSION_API *wt_api; - int ret = 0; - const char *name; - - (void)config; /* Unused parameters */ - - ds = (DATA_SOURCE *)wtds; - wt_api = ds->wt_api; - /* Get the object name */ - if ((ret = uri2name(wt_api, session, uri, &name)) != 0) - return (ret); - - if ((ret = single_thread(wtds, session, &ds->rwlock)) != 0) - return (ret); - - if ((ret = db_create(&db, ds->dbenv, 0)) != 0) - ESET(wt_api, - session, WT_ERROR, "db_create: %s", db_strerror(ret)); - else if ((ret = db->verify(db, name, NULL, NULL, 0)) != 0) - ESET(wt_api, session, WT_ERROR, - "Db.verify: %s: %s", uri, db_strerror(ret)); - /* db handle is dead */ - - ETRET(unlock(wt_api, session, &ds->rwlock)); - return (ret); -} - -static int -kvs_terminate(WT_DATA_SOURCE *wtds, WT_SESSION *session) -{ - DB_ENV *dbenv; - DATA_SOURCE *ds; - WT_EXTENSION_API *wt_api; - int ret = 0; - - ds = (DATA_SOURCE *)wtds; - wt_api = ds->wt_api; - dbenv = ds->dbenv; - - if (dbenv != NULL && (ret = dbenv->close(dbenv, 0)) != 0) - ESET(wt_api, - session, WT_ERROR, "DbEnv.close: %s", db_strerror(ret)); - - ETRET(lock_destroy(wt_api, session, &ds->rwlock)); - - return (ret); -} - -int -wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config) -{ - /* - * List of the WT_DATA_SOURCE methods -- it's static so it breaks at - * compile-time should the structure change underneath us. - */ - static WT_DATA_SOURCE wtds = { - NULL, /* No session.alter */ - kvs_session_create, /* session.create */ - NULL, /* No session.compaction */ - kvs_session_drop, /* session.drop */ - kvs_session_open_cursor, /* session.open_cursor */ - kvs_session_rename, /* session.rename */ - NULL, /* No session.salvage */ - NULL, /* No session.size */ - kvs_session_truncate, /* session.truncate */ - NULL, /* No range_truncate */ - kvs_session_verify, /* session.verify */ - NULL, /* session.checkpoint */ - kvs_terminate, /* termination */ - NULL /* lsm_pre_merge */ - }; - DATA_SOURCE *ds; - DB_ENV *dbenv; - WT_EXTENSION_API *wt_api; - size_t len; - int ret = 0; - const char *home; - char *path; - - (void)config; /* Unused parameters */ - - ds = NULL; - dbenv = NULL; - path = NULL; - /* Acquire the extension API */ - wt_api = connection->get_extension_api(connection); - - /* Allocate the local data-source structure. */ - if ((ds = calloc(1, sizeof(DATA_SOURCE))) == NULL) - return (os_errno()); - ds->wt_api = wt_api; - /* Configure the global lock */ - if ((ret = lock_init(wt_api, NULL, &ds->rwlock)) != 0) - goto err; - - ds->wtds = wtds; /* Configure the methods */ - - /* Berkeley DB environment */ - if ((ret = db_env_create(&dbenv, 0)) != 0) { - ESET(wt_api, - NULL, WT_ERROR, "db_env_create: %s", db_strerror(ret)); - goto err; - } - dbenv->set_errpfx(dbenv, "bdb"); - dbenv->set_errfile(dbenv, stderr); - - home = connection->get_home(connection); - len = strlen(home) + 10; - if ((path = malloc(len)) == NULL) - goto err; - (void)snprintf(path, len, "%s/KVS", home); - if ((ret = dbenv->open(dbenv, path, - DB_CREATE | DB_INIT_LOCK | DB_INIT_MPOOL | DB_PRIVATE, 0)) != 0) { - ESET(wt_api, - NULL, WT_ERROR, "DbEnv.open: %s", db_strerror(ret)); - goto err; - } - ds->dbenv = dbenv; - - if ((ret = /* Add the data source */ - connection->add_data_source( - connection, "kvsbdb:", (WT_DATA_SOURCE *)ds, NULL)) != 0) { - ESET(wt_api, NULL, ret, "WT_CONNECTION.add_data_source"); - goto err; - } - - if (0) { -err: if (dbenv != NULL) - (void)dbenv->close(dbenv, 0); - free(ds); - } - free(path); - return (ret); -} - -int -wiredtiger_extension_terminate(WT_CONNECTION *connection) -{ - (void)connection; /* Unused parameters */ - - return (0); -} diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 786a70c3e62..51b42587f41 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -1,5 +1,5 @@ { - "commit": "d86b3a8a331a1ec478c4ea75ef1b15856b429790", + "commit": "99e0760cc5f11440b21184874d8dd2ae5bde23d6", "github": "wiredtiger/wiredtiger.git", "vendor": "wiredtiger", "branch": "mongodb-4.2" diff --git a/src/third_party/wiredtiger/lang/python/setup_pip.py b/src/third_party/wiredtiger/lang/python/setup_pip.py index cafa05a9732..ce0fd1b0d77 100755 --- a/src/third_party/wiredtiger/lang/python/setup_pip.py +++ b/src/third_party/wiredtiger/lang/python/setup_pip.py @@ -195,6 +195,7 @@ def source_filter(sources): movers = dict() py_dir = os.path.join('lang', 'python') pywt_dir = os.path.join(py_dir, 'wiredtiger') + pywt_build_dir = os.path.join('build_posix', py_dir, 'wiredtiger') pywt_prefix = pywt_dir + os.path.sep for f in sources: if not re.match(source_regex, f): @@ -211,7 +212,7 @@ def source_filter(sources): result.append(dest) # Add SWIG generated files result.append('wiredtiger.py') - movers['wiredtiger.py'] = os.path.join(pywt_dir, '__init__.py') + movers['wiredtiger.py'] = os.path.join(pywt_build_dir, '__init__.py') result.append(os.path.join(py_dir, 'wiredtiger_wrap.c')) return result, movers @@ -302,6 +303,17 @@ cppflags, cflags, ldflags = get_compile_flags(inc_paths, lib_paths) # If we are creating a source distribution, create a staging directory # with just the right sources. Put the result in the python dist directory. if pip_command == 'sdist': + + # Technically, this script can run under Python2, and will do the + # right thing. But if we're running with Python2, chances are we built + # WiredTiger using Python2, and a distribution built that way will + # only run under Python2, not Python3. If we do the WiredTiger configure, + # build and this script all using Python3, we'll end up with a distribution + # that installs and runs under either Python2 or Python3. + python2 = (sys.version_info[0] <= 2) + if python2: + die('Python3 should be used to create a source distribution') + sources, movers = source_filter(get_sources_curdir()) stage_dir = os.path.join(python_rel_dir, 'stage') shutil.rmtree(stage_dir, True) diff --git a/src/third_party/wiredtiger/src/async/async_api.c b/src/third_party/wiredtiger/src/async/async_api.c index e879067bbbb..9f9aa979139 100644 --- a/src/third_party/wiredtiger/src/async/async_api.c +++ b/src/third_party/wiredtiger/src/async/async_api.c @@ -345,7 +345,7 @@ __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]) return (ret); } if (!conn->async_cfg && run) /* Case 2 */ - return (__async_start(session)); + return (__wt_async_create(session, cfg)); if (!conn->async_cfg) /* Case 3 */ return (0); @@ -578,22 +578,25 @@ __async_runtime_config(WT_ASYNC_OP_IMPL *op, const char *cfg[]) */ int __wt_async_new_op(WT_SESSION_IMPL *session, const char *uri, - const char *config, const char *cfg[], WT_ASYNC_CALLBACK *cb, - WT_ASYNC_OP_IMPL **opp) + const char *config, WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP_IMPL **opp) { WT_ASYNC_OP_IMPL *op; WT_CONNECTION_IMPL *conn; WT_DECL_RET; + const char *cfg[] = { S2C(session)->cfg, NULL, NULL }; *opp = NULL; conn = S2C(session); if (!conn->async_cfg) + WT_RET(__wt_async_create(session, cfg)); + if (!conn->async_cfg) WT_RET_MSG( session, ENOTSUP, "Asynchronous operations not configured"); op = NULL; WT_ERR(__async_new_op_alloc(session, uri, config, &op)); + cfg[1] = config; WT_ERR(__async_runtime_config(op, cfg)); op->cb = cb; *opp = op; diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c index db6b458d671..a968370c2a5 100644 --- a/src/third_party/wiredtiger/src/block/block_ckpt.c +++ b/src/third_party/wiredtiger/src/block/block_ckpt.c @@ -140,7 +140,7 @@ err: /* } /* Checkpoints don't need the original information, discard it. */ - if (checkpoint && ci != NULL) + if (checkpoint) __wt_block_ckpt_destroy(session, ci); return (ret); diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c index 6a85ccf6c17..94fe29d468d 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curnext.c +++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c @@ -711,8 +711,10 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) */ if (page != NULL && (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD || - (newpage && cbt->page_deleted_count > 0))) + (newpage && cbt->page_deleted_count > 0))) { __wt_page_evict_soon(session, cbt->ref); + WT_STAT_CONN_INCR(session, cache_eviction_force_delete); + } cbt->page_deleted_count = 0; if (F_ISSET(cbt, WT_CBT_READ_ONCE)) diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c index 1b8df0008b9..5ac53cb2036 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curprev.c +++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c @@ -665,8 +665,10 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) */ if (page != NULL && (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD || - (newpage && cbt->page_deleted_count > 0))) + (newpage && cbt->page_deleted_count > 0))) { __wt_page_evict_soon(session, cbt->ref); + WT_STAT_CONN_INCR(session, cache_eviction_force_delete); + } cbt->page_deleted_count = 0; if (F_ISSET(cbt, WT_CBT_READ_ONCE)) diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c index b7176c74adf..51ccf1d6415 100644 --- a/src/third_party/wiredtiger/src/btree/bt_cursor.c +++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c @@ -1477,9 +1477,11 @@ __wt_btcur_modify(WT_CURSOR_BTREE *cbt, WT_MODIFY *entries, int nentries) * the update if the update chain is too long; third, there's a check * if the updated value is too large to store; fourth, to simplify the * count of bytes being added/removed; fifth, we can get into serious - * trouble if we attempt to modify a value that doesn't exist. For the - * fifth reason, verify we're not in a read-uncommitted transaction, - * that implies a value that might disappear out from under us. + * trouble if we attempt to modify a value that doesn't exist or read + * a value that might not exist in the future. For the fifth reason, + * fail if in anything other than a snapshot transaction, read-committed + * and read-uncommitted imply values that might disappear out from under + * us or an inability to repeat point-in-time reads. * * Also, an application might read a value outside of a transaction and * then call modify. For that to work, the read must be part of the @@ -1490,9 +1492,10 @@ __wt_btcur_modify(WT_CURSOR_BTREE *cbt, WT_MODIFY *entries, int nentries) * because it will work most of the time and the failure is unlikely to * be detected. Require explicit transactions for modify operations. */ - if (session->txn.isolation == WT_ISO_READ_UNCOMMITTED) + if (session->txn.isolation != WT_ISO_SNAPSHOT) WT_ERR_MSG(session, ENOTSUP, - "not supported in read-uncommitted transactions"); + "not supported in read-committed or read-uncommitted " + "transactions"); if (F_ISSET(&session->txn, WT_TXN_AUTOCOMMIT)) WT_ERR_MSG(session, ENOTSUP, "not supported in implicit transactions"); diff --git a/src/third_party/wiredtiger/src/btree/bt_discard.c b/src/third_party/wiredtiger/src/btree/bt_discard.c index 44d4e1a63a7..a3cbd6a2101 100644 --- a/src/third_party/wiredtiger/src/btree/bt_discard.c +++ b/src/third_party/wiredtiger/src/btree/bt_discard.c @@ -225,16 +225,6 @@ __free_page_modify(WT_SESSION_IMPL *session, WT_PAGE *page) } /* - * __free_page_int -- - * Discard a WT_PAGE_COL_INT or WT_PAGE_ROW_INT page. - */ -static void -__free_page_int(WT_SESSION_IMPL *session, WT_PAGE *page) -{ - __wt_free_ref_index(session, page, WT_INTL_INDEX_GET_SAFE(page), false); -} - -/* * __wt_free_ref -- * Discard the contents of a WT_REF structure (optionally including the * pages it references). @@ -248,9 +238,6 @@ __wt_free_ref( if (ref == NULL) return; - /* Assert there are no hazard pointers. */ - WT_ASSERT(session, __wt_hazard_check_assert(session, ref, false)); - /* * Optionally free the referenced pages. (The path to free referenced * page is used for error cleanup, no instantiated and then discarded @@ -289,7 +276,24 @@ __wt_free_ref( __wt_free(session, ref->page_del); } - __wt_overwrite_and_free(session, ref); + __wt_overwrite_and_free_len(session, ref, WT_REF_CLEAR_SIZE); +} + +/* + * __free_page_int -- + * Discard a WT_PAGE_COL_INT or WT_PAGE_ROW_INT page. + */ +static void +__free_page_int(WT_SESSION_IMPL *session, WT_PAGE *page) +{ + WT_PAGE_INDEX *pindex; + uint32_t i; + + for (pindex = + WT_INTL_INDEX_GET_SAFE(page), i = 0; i < pindex->entries; ++i) + __wt_free_ref(session, pindex->index[i], page->type, false); + + __wt_free(session, pindex); } /* @@ -300,14 +304,24 @@ void __wt_free_ref_index(WT_SESSION_IMPL *session, WT_PAGE *page, WT_PAGE_INDEX *pindex, bool free_pages) { + WT_REF *ref; uint32_t i; if (pindex == NULL) return; - for (i = 0; i < pindex->entries; ++i) - __wt_free_ref( - session, pindex->index[i], page->type, free_pages); + for (i = 0; i < pindex->entries; ++i) { + ref = pindex->index[i]; + + /* + * Used when unrolling splits and other error paths where there + * should never have been a hazard pointer taken. + */ + WT_ASSERT(session, + __wt_hazard_check_assert(session, ref, false)); + + __wt_free_ref(session, ref, page->type, free_pages); + } __wt_free(session, pindex); } diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index 8bd58c17975..c0a48edf639 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -1182,8 +1182,49 @@ err: __split_ref_final(session, &locked); switch (complete) { case WT_ERR_RETURN: + /* + * The replace-index variable is the internal page being split's + * new page index, referencing the first chunk of WT_REFs that + * aren't being moved to other pages. Those WT_REFs survive the + * failure, they're referenced from the page's current index. + * Simply free that memory, but nothing it references. + */ + __wt_free(session, replace_index); + + /* + * The alloc-index variable is the array of new WT_REF entries + * intended to be inserted into the page being split's parent. + * + * Except for the first slot (the original page's WT_REF), it's + * an array of newly allocated combined WT_PAGE_INDEX and WT_REF + * structures, each of which references a newly allocated (and + * modified) child page, each of which references an index of + * WT_REFs from the page being split. Free everything except for + * slot 1 and the WT_REFs in the child page indexes. + * + * First, skip slot 1. Second, we want to free all of the child + * pages referenced from the alloc-index array, but we can't + * just call the usual discard function because the WT_REFs + * referenced by the child pages remain referenced by the + * original page, after error. For each entry, free the child + * page's page index (so the underlying page-free function will + * ignore it), then call the general-purpose discard function. + */ + if (alloc_index == NULL) + break; + alloc_refp = alloc_index->index; + *alloc_refp++ = NULL; + for (i = 1; i < children; ++alloc_refp, ++i) { + ref = *alloc_refp; + if (ref == NULL || ref->page == NULL) + continue; + + child = ref->page; + child_pindex = WT_INTL_INDEX_GET_SAFE(child); + __wt_free(session, child_pindex); + WT_INTL_INDEX_SET(child, NULL); + } __wt_free_ref_index(session, page, alloc_index, true); - __wt_free_ref_index(session, page, replace_index, false); break; case WT_ERR_PANIC: __wt_err(session, ret, @@ -1408,8 +1449,20 @@ err: if (parent != NULL) __split_internal_unlock(session, page); /* A page may have been busy, in which case return without error. */ - WT_RET_BUSY_OK(ret); - return (0); + switch (ret) { + case 0: + case WT_PANIC: + break; + case EBUSY: + ret = 0; + break; + default: + __wt_err(session, ret, + "ignoring not-fatal error during parent page split"); + ret = 0; + break; + } + return (ret); } #ifdef HAVE_DIAGNOSTIC @@ -1625,7 +1678,7 @@ __split_multi_inmem_fail(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_REF *ref) /* * We failed creating new in-memory pages. For error-handling reasons, * we've left the update chains referenced by both the original and - * new pages. Discard the new allocated WT_REF structures and their + * new pages. Discard the newly allocated WT_REF structures and their * pages (setting a flag so the discard code doesn't discard the updates * on the page). * diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c index a7cb433b56a..30cdaf2225a 100644 --- a/src/third_party/wiredtiger/src/btree/bt_walk.c +++ b/src/third_party/wiredtiger/src/btree/bt_walk.c @@ -447,6 +447,13 @@ restart: /* WT_ERR_NOTFOUND_OK(ret); __wt_spin_backoff(&swap_yield, &swap_sleep); + if (swap_yield < 1000) + WT_STAT_CONN_INCR(session, + cache_eviction_walk_internal_yield); + if (swap_sleep != 0) + WT_STAT_CONN_INCRV(session, + cache_eviction_walk_internal_wait, + swap_sleep); } /* NOTREACHED */ } @@ -560,6 +567,8 @@ descend: /* * An expected error, so "couple" is unchanged. */ if (ret == WT_NOTFOUND) { + WT_STAT_CONN_INCR(session, + cache_eviction_walk_leaf_notfound); WT_NOT_READ(ret, 0); break; } diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index e68fb6c15f8..24df42b45f1 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -725,7 +725,9 @@ __conn_async_new_op(WT_CONNECTION *wt_conn, const char *uri, const char *config, conn = (WT_CONNECTION_IMPL *)wt_conn; CONNECTION_API_CALL(conn, session, async_new_op, config, cfg); - WT_ERR(__wt_async_new_op(session, uri, config, cfg, callback, &op)); + WT_UNUSED(cfg); + + WT_ERR(__wt_async_new_op(session, uri, config, callback, &op)); *asyncopp = &op->iface; @@ -1047,22 +1049,13 @@ __conn_close(WT_CONNECTION *wt_conn, const char *config) const char *ckpt_cfg; conn = (WT_CONNECTION_IMPL *)wt_conn; - ckpt_cfg = "use_timestamp=false"; CONNECTION_API_CALL(conn, session, close, config, cfg); - - /* The default session is used to access data handles during close. */ - F_CLR(session, WT_SESSION_NO_DATA_HANDLES); +err: WT_TRET(__wt_config_gets(session, cfg, "leak_memory", &cval)); if (cval.val != 0) F_SET(conn, WT_CONN_LEAK_MEMORY); - WT_TRET(__wt_config_gets(session, cfg, "use_timestamp", &cval)); - if (cval.val != 0) { - ckpt_cfg = "use_timestamp=true"; - if (conn->txn_global.has_stable_timestamp) - F_SET(conn, WT_CONN_CLOSING_TIMESTAMP); - } /* * Ramp the eviction dirty target down to encourage eviction threads to @@ -1071,7 +1064,7 @@ __conn_close(WT_CONNECTION *wt_conn, const char *config) conn->cache->eviction_dirty_trigger = 1.0; conn->cache->eviction_dirty_target = 0.1; -err: /* + /* * Rollback all running transactions. * We do this as a separate pass because an active transaction in one * session could cause trouble when closing a file, even if that @@ -1102,7 +1095,8 @@ err: /* WT_TRET(wt_session->close(wt_session, config)); } - WT_TRET(__wt_async_flush(session)); + /* Wait for in-flight operations to complete. */ + WT_TRET(__wt_txn_activity_drain(session)); /* * Disable lookaside eviction: it doesn't help us shut down and can @@ -1111,8 +1105,24 @@ err: /* */ F_SET(conn, WT_CONN_EVICTION_NO_LOOKASIDE); - /* Wait for in-flight operations to complete. */ - WT_TRET(__wt_txn_activity_drain(session)); + /* + * Clear any pending async operations and shut down the async worker + * threads and system before closing LSM. + */ + WT_TRET(__wt_async_flush(session)); + WT_TRET(__wt_async_destroy(session)); + + WT_TRET(__wt_lsm_manager_destroy(session)); + + /* + * After the async and LSM threads have exited, we shouldn't opening + * any more files. + */ + F_SET(conn, WT_CONN_CLOSING_NO_MORE_OPENS); + WT_FULL_BARRIER(); + + /* The default session is used to access data handles during close. */ + F_CLR(session, WT_SESSION_NO_DATA_HANDLES); /* * Perform a system-wide checkpoint so that all tables are consistent @@ -1121,6 +1131,13 @@ err: /* * shutting down all the subsystems. We have shut down all user * sessions, but send in true for waiting for internal races. */ + WT_TRET(__wt_config_gets(session, cfg, "use_timestamp", &cval)); + ckpt_cfg = "use_timestamp=false"; + if (cval.val != 0) { + ckpt_cfg = "use_timestamp=true"; + if (conn->txn_global.has_stable_timestamp) + F_SET(conn, WT_CONN_CLOSING_TIMESTAMP); + } if (!F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY)) { s = NULL; WT_TRET(__wt_open_internal_session( diff --git a/src/third_party/wiredtiger/src/conn/conn_handle.c b/src/third_party/wiredtiger/src/conn/conn_handle.c index faee6216ed7..6f61d10afbe 100644 --- a/src/third_party/wiredtiger/src/conn/conn_handle.c +++ b/src/third_party/wiredtiger/src/conn/conn_handle.c @@ -65,7 +65,7 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_RET(__wt_rwlock_init(session, &conn->hot_backup_lock)); WT_RWLOCK_INIT_TRACKED(session, &conn->table_lock, table); - /* Setup the spin locks for the LSM manager queues. */ + /* Setup serialization for the LSM manager queues. */ WT_RET(__wt_spin_init(session, &conn->lsm_manager.app_lock, "LSM application queue lock")); WT_RET(__wt_spin_init(session, @@ -130,6 +130,12 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) __wt_rwlock_destroy(session, &conn->table_lock); __wt_spin_destroy(session, &conn->turtle_lock); + /* Free LSM serialization resources. */ + __wt_spin_destroy(session, &conn->lsm_manager.switch_lock); + __wt_spin_destroy(session, &conn->lsm_manager.app_lock); + __wt_spin_destroy(session, &conn->lsm_manager.manager_lock); + __wt_cond_destroy(session, &conn->lsm_manager.work_cond); + /* Free allocated memory. */ __wt_free(session, conn->cfg); __wt_free(session, conn->debug_ckpt); diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c index 3a1025fea1b..c9d34987df4 100644 --- a/src/third_party/wiredtiger/src/conn/conn_open.c +++ b/src/third_party/wiredtiger/src/conn/conn_open.c @@ -76,32 +76,24 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) wt_conn = &conn->iface; session = conn->default_session; + /* + * The LSM and async services are not shut down in this path (which is + * called when wiredtiger_open hits an error (as well as during normal + * shutdown). Assert they're not running. + */ + WT_ASSERT(session, + !F_ISSET(conn, WT_CONN_SERVER_ASYNC | WT_CONN_SERVER_LSM)); + /* Shut down the subsystems, ensuring workers see the state change. */ F_SET(conn, WT_CONN_CLOSING); WT_FULL_BARRIER(); /* - * Clear any pending async operations and shut down the async worker - * threads and system before closing LSM. - */ - WT_TRET(__wt_async_flush(session)); - WT_TRET(__wt_async_destroy(session)); - - /* * Shut down server threads other than the eviction server, which is * needed later to close btree handles. Some of these threads access * btree handles, so take care in ordering shutdown to make sure they * exit before files are closed. */ - WT_TRET(__wt_lsm_manager_destroy(session)); - - /* - * Once the async and LSM threads exit, we shouldn't be opening any - * more files. - */ - F_SET(conn, WT_CONN_CLOSING_NO_MORE_OPENS); - WT_FULL_BARRIER(); - WT_TRET(__wt_capacity_server_destroy(session)); WT_TRET(__wt_checkpoint_server_destroy(session)); WT_TRET(__wt_statlog_destroy(session, true)); @@ -250,9 +242,6 @@ __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[]) /* Start the handle sweep thread. */ WT_RET(__wt_sweep_create(session)); - /* Start the optional async threads. */ - WT_RET(__wt_async_create(session, cfg)); - /* Start the optional capacity thread. */ WT_RET(__wt_capacity_server_create(session, cfg)); diff --git a/src/third_party/wiredtiger/src/cursor/cur_std.c b/src/third_party/wiredtiger/src/cursor/cur_std.c index 808780b4675..073df6eaaf6 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_std.c +++ b/src/third_party/wiredtiger/src/cursor/cur_std.c @@ -916,11 +916,16 @@ __cursor_modify(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries) /* * The underlying btree code cannot support WT_CURSOR.modify within - * a read-uncommitted transaction. Disallow it here for consistency. + * a read-committed or read-uncommitted transaction, or outside of + * an explicit transaction. Disallow here as well, for consistency. */ - if (session->txn.isolation == WT_ISO_READ_UNCOMMITTED) + if (session->txn.isolation != WT_ISO_SNAPSHOT) WT_ERR_MSG(session, ENOTSUP, - "not supported in read-uncommitted transactions"); + "not supported in read-committed or read-uncommitted " + "transactions"); + if (F_ISSET(&session->txn, WT_TXN_AUTOCOMMIT)) + WT_ERR_MSG(session, ENOTSUP, + "not supported in implicit transactions"); WT_ERR(__cursor_checkkey(cursor)); diff --git a/src/third_party/wiredtiger/src/docs/checkpoint.dox b/src/third_party/wiredtiger/src/docs/checkpoint.dox index 3d636cd17b6..2da3b1df272 100644 --- a/src/third_party/wiredtiger/src/docs/checkpoint.dox +++ b/src/third_party/wiredtiger/src/docs/checkpoint.dox @@ -55,11 +55,13 @@ archiving purposes. @section checkpoint_cursors Checkpoint cursors Cursors are normally opened in the most recent version of a data source. -However, a checkpoint configuration string may be provided to -WT_SESSION::open_cursor, opening a read-only, static view of the data -source. This provides a limited form of time-travel, as the static view -is not changed by subsequent checkpoints, and will persist until the -checkpoint cursor is closed. +However, a checkpoint configuration string may be provided +to WT_SESSION::open_cursor, opening a read-only, static view of the +data source. This provides a limited form of time-travel, as the static +view is not changed by subsequent checkpoints and will persist until +the checkpoint cursor is closed. While it is not an error to set a read +timestamp in a transaction including a checkpoint cursor, it also has no +effect on the checkpoint cursor's behavior. @section checkpoint_naming Checkpoint naming diff --git a/src/third_party/wiredtiger/src/docs/custom-data-sources.dox b/src/third_party/wiredtiger/src/docs/custom-data-sources.dox index 9082ee95dea..340f25997a5 100644 --- a/src/third_party/wiredtiger/src/docs/custom-data-sources.dox +++ b/src/third_party/wiredtiger/src/docs/custom-data-sources.dox @@ -6,12 +6,6 @@ support a set of methods for a different URI type (for example, in the same way WiredTiger supports the built-in type "file:", an application data source might support the type "dsrc:"). -The WiredTiger distribution includes an example of a complete custom -data source implementation (based on Oracle's Berkeley DB database -engine), in the file test/format/kvs_bdb.c. This example implementation is -public domain software, please feel free to use this code as a prototype -implementation of other custom data sources. - Applications register their WT_DATA_SOURCE interface implementations with WiredTiger using the WT_CONNECTION::add_data_source method: diff --git a/src/third_party/wiredtiger/src/docs/spell.ok b/src/third_party/wiredtiger/src/docs/spell.ok index cb65c7b1b01..98ec49779a4 100644 --- a/src/third_party/wiredtiger/src/docs/spell.ok +++ b/src/third_party/wiredtiger/src/docs/spell.ok @@ -120,8 +120,6 @@ autogen automake basecfg basho -bdb -bdbmap benchmarking bigram bindir diff --git a/src/third_party/wiredtiger/src/docs/upgrading.dox b/src/third_party/wiredtiger/src/docs/upgrading.dox index b4c7f5b7d2b..c5bb8ded037 100644 --- a/src/third_party/wiredtiger/src/docs/upgrading.dox +++ b/src/third_party/wiredtiger/src/docs/upgrading.dox @@ -1,6 +1,19 @@ /*! @page upgrading Upgrading WiredTiger applications </dl><hr> +@section version_321 Upgrading to Version 3.2.1 +<dl> + +<dt>WT_CURSOR::modify transaction requirements</dt> +<dd> +In previous releases of WiredTiger, WT_CURSOR::modify operations could be +performed within <code>read-committed</code> transactions. This implies +point-in-time reads that cannot be repeated. In the 3.2.1 release, +WT_CURSOR::modify operations must be performed in an explicit transaction +at snapshot isolation, and will fail if that's not the case. +</dd> + +</dl><hr> @section version_320 Upgrading to Version 3.2.0 <dl> diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index 931216376b9..b0b29942cb2 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -1373,6 +1373,8 @@ __evict_lru_walk(WT_SESSION_IMPL *session) } } + WT_STAT_CONN_INCRV(session, + cache_eviction_pages_queued_post_lru, queue->evict_candidates); queue->evict_current = queue->evict_queue; __wt_spin_unlock(session, &queue->evict_lock); diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c index 2510815401f..5df9b7ce9bb 100644 --- a/src/third_party/wiredtiger/src/evict/evict_page.c +++ b/src/third_party/wiredtiger/src/evict/evict_page.c @@ -55,16 +55,10 @@ __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) { WT_BTREE *btree; WT_DECL_RET; - WT_PAGE *page; - uint64_t time_start, time_stop; uint32_t evict_flags, previous_state; - bool locked, too_big; + bool locked; btree = S2BT(session); - locked = false; - page = ref->page; - time_start = __wt_clock(session); - evict_flags = LF_ISSET(WT_READ_NO_SPLIT) ? WT_EVICT_CALL_NO_SPLIT : 0; /* * This function always releases the hazard pointer - ensure that's @@ -73,47 +67,20 @@ __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) * without first locking the page, it could be evicted in between. */ previous_state = ref->state; - if ((previous_state == WT_REF_MEM || previous_state == WT_REF_LIMBO) && - WT_REF_CAS_STATE(session, ref, previous_state, WT_REF_LOCKED)) - locked = true; + locked = + (previous_state == WT_REF_MEM || previous_state == WT_REF_LIMBO) && + WT_REF_CAS_STATE(session, ref, previous_state, WT_REF_LOCKED); if ((ret = __wt_hazard_clear(session, ref)) != 0 || !locked) { if (locked) WT_REF_SET_STATE(ref, previous_state); return (ret == 0 ? EBUSY : ret); } - (void)__wt_atomic_addv32(&btree->evict_busy, 1); - - too_big = page->memory_footprint >= btree->splitmempage; + evict_flags = LF_ISSET(WT_READ_NO_SPLIT) ? WT_EVICT_CALL_NO_SPLIT : 0; + FLD_SET(evict_flags, WT_EVICT_CALL_URGENT); - /* - * Track how long the call to evict took. If eviction is successful then - * we have one of two pairs of stats to increment. - */ + (void)__wt_atomic_addv32(&btree->evict_busy, 1); ret = __wt_evict(session, ref, previous_state, evict_flags); - time_stop = __wt_clock(session); - if (ret == 0) { - if (too_big) { - WT_STAT_CONN_INCR(session, cache_eviction_force); - WT_STAT_CONN_INCRV(session, cache_eviction_force_time, - WT_CLOCKDIFF_US(time_stop, time_start)); - } else { - /* - * If the page isn't too big, we are evicting it because - * it had a chain of deleted entries that make traversal - * expensive. - */ - WT_STAT_CONN_INCR(session, cache_eviction_force_delete); - WT_STAT_CONN_INCRV(session, - cache_eviction_force_delete_time, - WT_CLOCKDIFF_US(time_stop, time_start)); - } - } else { - WT_STAT_CONN_INCR(session, cache_eviction_force_fail); - WT_STAT_CONN_INCRV(session, cache_eviction_force_fail_time, - WT_CLOCKDIFF_US(time_stop, time_start)); - } - (void)__wt_atomic_subv32(&btree->evict_busy, 1); return (ret); @@ -130,12 +97,14 @@ __wt_evict(WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_PAGE *page; + uint64_t time_start, time_stop; bool clean_page, closing, inmem_split, local_gen, tree_dead; conn = S2C(session); page = ref->page; closing = LF_ISSET(WT_EVICT_CALL_CLOSING); local_gen = false; + time_start = time_stop = 0; /* [-Werror=maybe-uninitialized] */ __wt_verbose(session, WT_VERB_EVICT, "page %p (%s)", (void *)page, __wt_page_type_string(page->type)); @@ -155,6 +124,16 @@ __wt_evict(WT_SESSION_IMPL *session, } /* + * Track how long forcible eviction took. Immediately increment the + * forcible eviction counter, we might do an in-memory split and not + * an eviction, which skips the other statistics. + */ + if (LF_ISSET(WT_EVICT_CALL_URGENT)) { + time_start = __wt_clock(session); + WT_STAT_CONN_INCR(session, cache_eviction_force); + } + + /* * Get exclusive access to the page if our caller doesn't have the tree * locked down. */ @@ -225,6 +204,21 @@ __wt_evict(WT_SESSION_IMPL *session, else WT_ERR(__evict_page_dirty_update(session, ref, flags)); + if (LF_ISSET(WT_EVICT_CALL_URGENT)) { + time_stop = __wt_clock(session); + if (clean_page) { + WT_STAT_CONN_INCR(session, cache_eviction_force_clean); + WT_STAT_CONN_INCRV(session, + cache_eviction_force_clean_time, + WT_CLOCKDIFF_US(time_stop, time_start)); + } + else { + WT_STAT_CONN_INCR(session, cache_eviction_force_dirty); + WT_STAT_CONN_INCRV(session, + cache_eviction_force_dirty_time, + WT_CLOCKDIFF_US(time_stop, time_start)); + } + } if (clean_page) { WT_STAT_CONN_INCR(session, cache_eviction_clean); WT_STAT_DATA_INCR(session, cache_eviction_clean); @@ -237,6 +231,14 @@ __wt_evict(WT_SESSION_IMPL *session, err: if (!closing) __evict_exclusive_clear(session, ref, previous_state); + if (LF_ISSET(WT_EVICT_CALL_URGENT)) { + time_stop = __wt_clock(session); + WT_STAT_CONN_INCR(session, cache_eviction_force_fail); + WT_STAT_CONN_INCRV(session, + cache_eviction_force_fail_time, + WT_CLOCKDIFF_US(time_stop, time_start)); + } + WT_STAT_CONN_INCR(session, cache_eviction_fail); WT_STAT_DATA_INCR(session, cache_eviction_fail); } @@ -470,11 +472,46 @@ __evict_child_check(WT_SESSION_IMPL *session, WT_REF *parent) WT_REF *child; bool active; + /* + * There may be cursors in the tree walking the list of child pages. + * The parent is locked, so all we care about is cursors already in the + * child pages, no thread can enter them. Any cursor moving through the + * child pages must be hazard pointer coupling between pages, where the + * page on which it currently has a hazard pointer must be in a state + * other than on-disk. Walk the child list forward, then backward, to + * ensure we don't race with a cursor walking in the opposite direction + * from our check. + */ + WT_INTL_FOREACH_BEGIN(session, parent->page, child) { + switch (child->state) { + case WT_REF_DISK: /* On-disk */ + case WT_REF_DELETED: /* On-disk, deleted */ + case WT_REF_LOOKASIDE: /* On-disk, lookaside */ + break; + default: + return (__wt_set_return(session, EBUSY)); + } + } WT_INTL_FOREACH_END; + WT_INTL_FOREACH_REVERSE_BEGIN(session, parent->page, child) { + switch (child->state) { + case WT_REF_DISK: /* On-disk */ + case WT_REF_DELETED: /* On-disk, deleted */ + case WT_REF_LOOKASIDE: /* On-disk, lookaside */ + break; + default: + return (__wt_set_return(session, EBUSY)); + } + } WT_INTL_FOREACH_END; + + /* + * The fast check is done and there are no cursors in the child pages. + * Make sure the child WT_REF structures pages can be discarded. + */ WT_INTL_FOREACH_BEGIN(session, parent->page, child) { switch (child->state) { case WT_REF_DISK: /* On-disk */ break; - case WT_REF_DELETED: /* Deleted */ + case WT_REF_DELETED: /* On-disk, deleted */ /* * If the child page was part of a truncate, * transaction rollback might switch this page into its @@ -498,7 +535,7 @@ __evict_child_check(WT_SESSION_IMPL *session, WT_REF *parent) if (active) return (__wt_set_return(session, EBUSY)); break; - case WT_REF_LOOKASIDE: + case WT_REF_LOOKASIDE: /* On-disk, lookaside */ /* * If the lookaside history is obsolete, the reference * can be ignored. @@ -520,9 +557,8 @@ __evict_child_check(WT_SESSION_IMPL *session, WT_REF *parent) * for conditions that would block its eviction. */ static int -__evict_review( - WT_SESSION_IMPL *session, WT_REF *ref, uint32_t evict_flags, - bool *inmem_splitp) +__evict_review(WT_SESSION_IMPL *session, + WT_REF *ref, uint32_t evict_flags, bool *inmem_splitp) { WT_CACHE *cache; WT_CONNECTION_IMPL *conn; @@ -646,41 +682,39 @@ __evict_review( if (closing) LF_SET(WT_REC_VISIBILITY_ERR); - else if (!WT_PAGE_IS_INTERNAL(page) && - !F_ISSET(S2BT(session), WT_BTREE_LOOKASIDE)) { - if (F_ISSET(conn, WT_CONN_IN_MEMORY)) - LF_SET(WT_REC_IN_MEMORY | - WT_REC_SCRUB | WT_REC_UPDATE_RESTORE); - else if (WT_SESSION_BTREE_SYNC(session)) - LF_SET(WT_REC_LOOKASIDE); - else if (!WT_IS_METADATA(session->dhandle)) { - LF_SET(WT_REC_UPDATE_RESTORE); + else if (WT_PAGE_IS_INTERNAL(page) || + F_ISSET(S2BT(session), WT_BTREE_LOOKASIDE)) + ; + else if (WT_SESSION_BTREE_SYNC(session)) + LF_SET(WT_REC_LOOKASIDE); + else if (F_ISSET(conn, WT_CONN_IN_MEMORY)) + LF_SET(WT_REC_IN_MEMORY | WT_REC_SCRUB | WT_REC_UPDATE_RESTORE); + else { + LF_SET(WT_REC_UPDATE_RESTORE); - /* - * Scrub if we're supposed to or toss it in sometimes - * if we are in debugging mode. - */ - if (F_ISSET(cache, WT_CACHE_EVICT_SCRUB) || - (F_ISSET(cache, WT_CACHE_EVICT_DEBUG_MODE) && - __wt_random(&session->rnd) % 3 == 0)) - LF_SET(WT_REC_SCRUB); + /* + * Scrub if we're supposed to or toss it in sometimes if we are + * in debugging mode. + */ + if (F_ISSET(cache, WT_CACHE_EVICT_SCRUB) || + (F_ISSET(cache, WT_CACHE_EVICT_DEBUG_MODE) && + __wt_random(&session->rnd) % 3 == 0)) + LF_SET(WT_REC_SCRUB); - /* - * If the cache is under pressure with many updates - * that can't be evicted, check if reconciliation - * suggests trying the lookaside table. - */ - if (F_ISSET(cache, WT_CACHE_EVICT_LOOKASIDE) && - !F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE)) { - if (F_ISSET(cache, - WT_CACHE_EVICT_DEBUG_MODE) && - __wt_random(&session->rnd) % 10 == 0) { - LF_CLR(WT_REC_SCRUB | - WT_REC_UPDATE_RESTORE); - LF_SET(WT_REC_LOOKASIDE); - } - lookaside_retryp = &lookaside_retry; + /* + * If the cache is under pressure with many updates that can't + * be evicted, check if reconciliation suggests trying the + * lookaside table. + */ + if (!WT_IS_METADATA(session->dhandle) && + F_ISSET(cache, WT_CACHE_EVICT_LOOKASIDE) && + !F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE)) { + if (F_ISSET(cache, WT_CACHE_EVICT_DEBUG_MODE) && + __wt_random(&session->rnd) % 10 == 0) { + LF_CLR(WT_REC_SCRUB | WT_REC_UPDATE_RESTORE); + LF_SET(WT_REC_LOOKASIDE); } + lookaside_retryp = &lookaside_retry; } } diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index 7729c9887d5..a7c289a7b7f 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -593,6 +593,14 @@ struct __wt_page { for (__refp = __pindex->index, \ __entries = __pindex->entries; __entries > 0; --__entries) {\ (ref) = *__refp++; +#define WT_INTL_FOREACH_REVERSE_BEGIN(session, page, ref) do { \ + WT_PAGE_INDEX *__pindex; \ + WT_REF **__refp; \ + uint32_t __entries; \ + WT_INTL_INDEX_GET(session, page, __pindex); \ + for (__refp = __pindex->index + __pindex->entries, \ + __entries = __pindex->entries; __entries > 0; --__entries) {\ + (ref) = *--__refp; #define WT_INTL_FOREACH_END \ } \ } while (0) @@ -897,43 +905,51 @@ struct __wt_ref { WT_PAGE_DELETED *page_del; /* Deleted page information */ WT_PAGE_LOOKASIDE *page_las; /* Lookaside information */ -/* A macro wrapper allowing us to remember the callers code location */ -#define WT_REF_CAS_STATE(session, ref, old_state, new_state) \ - __wt_ref_cas_state_int((session), (ref), (old_state), (new_state),\ - __FILE__, __LINE__) + /* + * In DIAGNOSTIC mode we overwrite the WT_REF on free to force failures. + * Don't clear the history in that case. + */ +#define WT_REF_CLEAR_SIZE (offsetof(WT_REF, hist)) + +#define WT_REF_SAVE_STATE_MAX 3 #ifdef HAVE_DIAGNOSTIC /* Capture history of ref state changes. */ struct __wt_ref_hist { WT_SESSION_IMPL *session; const char *name; - const char *file; - int line; - uint32_t state; - } hist[3]; + const char *func; + uint16_t line; + uint16_t state; + } hist[WT_REF_SAVE_STATE_MAX]; uint64_t histoff; #define WT_REF_SAVE_STATE(ref, s, f, l) do { \ (ref)->hist[(ref)->histoff].session = session; \ (ref)->hist[(ref)->histoff].name = session->name; \ - (ref)->hist[(ref)->histoff].file = (f); \ - (ref)->hist[(ref)->histoff].line = (l); \ - (ref)->hist[(ref)->histoff].state = s; \ + (ref)->hist[(ref)->histoff].func = (f); \ + (ref)->hist[(ref)->histoff].line = (uint16_t)(l); \ + (ref)->hist[(ref)->histoff].state = (uint16_t)(s); \ (ref)->histoff = \ ((ref)->histoff + 1) % WT_ELEMENTS((ref)->hist); \ } while (0) #define WT_REF_SET_STATE(ref, s) do { \ - WT_REF_SAVE_STATE(ref, s, __FILE__, __LINE__); \ + WT_REF_SAVE_STATE(ref, s, __func__, __LINE__); \ WT_PUBLISH((ref)->state, s); \ } while (0) #else #define WT_REF_SET_STATE(ref, s) WT_PUBLISH((ref)->state, s) #endif + +/* A macro wrapper allowing us to remember the callers code location */ +#define WT_REF_CAS_STATE(session, ref, old_state, new_state) \ + __wt_ref_cas_state_int( \ + session, ref, old_state, new_state, __func__, __LINE__) }; /* * WT_REF_SIZE is the expected structure size -- we verify the build to ensure * the compiler hasn't inserted padding which would break the world. */ #ifdef HAVE_DIAGNOSTIC -#define WT_REF_SIZE (56 + 3 * sizeof(WT_REF_HIST) + 8) +#define WT_REF_SIZE (56 + WT_REF_SAVE_STATE_MAX * sizeof(WT_REF_HIST) + 8) #else #define WT_REF_SIZE 56 #endif diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index 906c615057a..5cf67d8600f 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -1508,7 +1508,8 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) * memory_page_max setting, when we see many deleted items, and when we * are attempting to scan without trashing the cache. * - * Fast checks if eviction is disabled for this handle, operation or + * Fast checks if flag indicates no evict, session can't perform slow + * operation, eviction is disabled for this handle, operation or * tree, then perform a general check if eviction will be possible. * * Checkpoint should not queue pages for urgent eviction if it cannot @@ -1517,7 +1518,9 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) * checkpointed, and no other thread can help with that. */ page = ref->page; - if (WT_READGEN_EVICT_SOON(page->read_gen) && + if (!LF_ISSET(WT_READ_NO_EVICT) && + __wt_session_can_wait(session) && + WT_READGEN_EVICT_SOON(page->read_gen) && btree->evict_disabled == 0 && __wt_page_can_evict(session, ref, &inmem_split)) { if (!__wt_page_evict_clean(page) && diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h index 163e0a2753c..46718d9aba2 100644 --- a/src/third_party/wiredtiger/src/include/cache.h +++ b/src/third_party/wiredtiger/src/include/cache.h @@ -294,6 +294,7 @@ struct __wt_cache_pool { /* Flags used with __wt_evict */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_EVICT_CALL_CLOSING 0x1u /* Closing connection or tree */ -#define WT_EVICT_CALL_NO_SPLIT 0x2u /* Splits not allowed */ +#define WT_EVICT_CALL_CLOSING 0x1u /* Closing connection or tree */ +#define WT_EVICT_CALL_NO_SPLIT 0x2u /* Splits not allowed */ +#define WT_EVICT_CALL_URGENT 0x4u /* Urgent eviction */ /* AUTOMATIC FLAG VALUE GENERATION STOP */ diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i index 3c08f808c62..0cb3708a030 100644 --- a/src/third_party/wiredtiger/src/include/cursor.i +++ b/src/third_party/wiredtiger/src/include/cursor.i @@ -200,8 +200,10 @@ __cursor_reset(WT_CURSOR_BTREE *cbt) * If we were scanning and saw a lot of deleted records on this page, * try to evict the page when we release it. */ - if (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD) + if (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD) { __wt_page_evict_soon(session, cbt->ref); + WT_STAT_CONN_INCR(session, cache_eviction_force_delete); + } cbt->page_deleted_count = 0; /* diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index a4706a36796..1c704be9c3e 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -36,7 +36,7 @@ extern int __wt_apply_single_idx(WT_SESSION_IMPL *session, WT_INDEX *idx, WT_CUR extern int __wt_async_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_async_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_async_flush(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_async_new_op(WT_SESSION_IMPL *session, const char *uri, const char *config, const char *cfg[], WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP_IMPL **opp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_async_new_op(WT_SESSION_IMPL *session, const char *uri, const char *config, WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP_IMPL **opp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_async_op_enqueue(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_async_op_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -925,7 +925,7 @@ static inline bool __wt_page_is_empty(WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE( static inline bool __wt_page_is_modified(WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_page_las_active(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_rec_need_split(WT_RECONCILE *r, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -static inline bool __wt_ref_cas_state_int(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t old_state, uint32_t new_state, const char *file, int line) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_ref_cas_state_int(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t old_state, uint32_t new_state, const char *func, int line) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_ref_is_root(WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_row_leaf_key_info(WT_PAGE *page, void *copy, WT_IKEY **ikeyp, WT_CELL **cellp, void *datap, size_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_row_leaf_value(WT_PAGE *page, WT_ROW *rip, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index 26c030009e6..8968c5e1010 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -386,6 +386,9 @@ struct __wt_connection_stats { int64_t cache_eviction_server_evicting; int64_t cache_eviction_server_slept; int64_t cache_eviction_slow; + int64_t cache_eviction_walk_leaf_notfound; + int64_t cache_eviction_walk_internal_wait; + int64_t cache_eviction_walk_internal_yield; int64_t cache_eviction_state; int64_t cache_eviction_target_page_lt10; int64_t cache_eviction_target_page_lt32; @@ -404,11 +407,17 @@ struct __wt_connection_stats { int64_t cache_eviction_worker_evicting; int64_t cache_eviction_worker_removed; int64_t cache_eviction_stable_state_workers; - int64_t cache_eviction_force_fail; - int64_t cache_eviction_force_fail_time; int64_t cache_eviction_walks_active; int64_t cache_eviction_walks_started; int64_t cache_eviction_force_retune; + int64_t cache_eviction_force_clean; + int64_t cache_eviction_force_clean_time; + int64_t cache_eviction_force_dirty; + int64_t cache_eviction_force_dirty_time; + int64_t cache_eviction_force_delete; + int64_t cache_eviction_force; + int64_t cache_eviction_force_fail; + int64_t cache_eviction_force_fail_time; int64_t cache_eviction_hazard; int64_t cache_hazard_checks; int64_t cache_hazard_walks; @@ -427,12 +436,9 @@ struct __wt_connection_stats { int64_t cache_eviction_deepen; int64_t cache_write_lookaside; int64_t cache_pages_inuse; - int64_t cache_eviction_force; - int64_t cache_eviction_force_time; - int64_t cache_eviction_force_delete; - int64_t cache_eviction_force_delete_time; int64_t cache_eviction_app; int64_t cache_eviction_pages_queued; + int64_t cache_eviction_pages_queued_post_lru; int64_t cache_eviction_pages_queued_urgent; int64_t cache_eviction_pages_queued_oldest; int64_t cache_read; diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h index 7e73bbacf8e..ac4c4f0e2d8 100644 --- a/src/third_party/wiredtiger/src/include/txn.h +++ b/src/third_party/wiredtiger/src/include/txn.h @@ -251,6 +251,7 @@ struct __wt_txn_op { /* AUTOMATIC FLAG VALUE GENERATION START */ #define WT_TXN_OP_KEY_REPEATED 0x1u +#define WT_TXN_OP_KEY_RESERVED 0x2u /* AUTOMATIC FLAG VALUE GENERATION STOP */ uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index f904e62cc05..abb2b1ccc66 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -13,13 +13,13 @@ */ static inline bool __wt_ref_cas_state_int(WT_SESSION_IMPL *session, WT_REF *ref, - uint32_t old_state, uint32_t new_state, const char *file, int line) + uint32_t old_state, uint32_t new_state, const char *func, int line) { bool cas_result; /* Parameters that are used in a macro for diagnostic builds */ WT_UNUSED(session); - WT_UNUSED(file); + WT_UNUSED(func); WT_UNUSED(line); cas_result = __wt_atomic_casv32(&ref->state, old_state, new_state); @@ -31,7 +31,7 @@ __wt_ref_cas_state_int(WT_SESSION_IMPL *session, WT_REF *ref, * updated. */ if (cas_result) - WT_REF_SAVE_STATE(ref, new_state, file, line); + WT_REF_SAVE_STATE(ref, new_state, func, line); #endif return (cas_result); } @@ -1081,7 +1081,8 @@ __wt_txn_search_check(WT_SESSION_IMPL *session) * verify this transaction has one. Same if it should never have * a read timestamp. */ - if (FLD_ISSET(btree->assert_flags, WT_ASSERT_READ_TS_ALWAYS) && + if (!F_ISSET(S2C(session), WT_CONN_RECOVERING) && + FLD_ISSET(btree->assert_flags, WT_ASSERT_READ_TS_ALWAYS) && !F_ISSET(txn, WT_TXN_PUBLIC_TS_READ)) WT_RET_MSG(session, EINVAL, "read_timestamp required and " "none set on this transaction"); diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index 484175dd43e..119d734d01f 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -498,11 +498,7 @@ struct __wt_cursor { * format type \c u). * * The WT_CURSOR::modify method can only be called from within an - * explicit transaction configured at a higher isolation level than - * \c read-uncommitted. Using \c read-committed isolation is allowed, - * but requires caution: reading a value, re-positioning the cursor - * and then modifying the value based on the initial read could lead - * to unexpected results. Using \c snapshot isolation is recommended. + * explicit transaction configured at the snapshot isolation level. * * The WT_CURSOR::modify method stores a change record in cache and * writes a change record to the log instead of the usual complete @@ -2180,7 +2176,10 @@ struct __wt_connection { /*! * Close a connection. * - * Any open sessions will be closed. + * Any open sessions will be closed. This will release the resources + * associated with the session handle, including rolling back any + * active transactions and closing any cursors that remain open in the + * session. * * @snippet ex_all.c Close a connection * @@ -5196,735 +5195,738 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CACHE_EVICTION_SERVER_SLEPT 1060 /*! cache: eviction server unable to reach eviction goal */ #define WT_STAT_CONN_CACHE_EVICTION_SLOW 1061 +/*! cache: eviction server waiting for a leaf page */ +#define WT_STAT_CONN_CACHE_EVICTION_WALK_LEAF_NOTFOUND 1062 +/*! cache: eviction server waiting for an internal page sleep (usec) */ +#define WT_STAT_CONN_CACHE_EVICTION_WALK_INTERNAL_WAIT 1063 +/*! cache: eviction server waiting for an internal page yields */ +#define WT_STAT_CONN_CACHE_EVICTION_WALK_INTERNAL_YIELD 1064 /*! cache: eviction state */ -#define WT_STAT_CONN_CACHE_EVICTION_STATE 1062 +#define WT_STAT_CONN_CACHE_EVICTION_STATE 1065 /*! cache: eviction walk target pages histogram - 0-9 */ -#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT10 1063 +#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT10 1066 /*! cache: eviction walk target pages histogram - 10-31 */ -#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT32 1064 +#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT32 1067 /*! cache: eviction walk target pages histogram - 128 and higher */ -#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_GE128 1065 +#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_GE128 1068 /*! cache: eviction walk target pages histogram - 32-63 */ -#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT64 1066 +#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT64 1069 /*! cache: eviction walk target pages histogram - 64-128 */ -#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT128 1067 +#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT128 1070 /*! cache: eviction walks abandoned */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ABANDONED 1068 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ABANDONED 1071 /*! cache: eviction walks gave up because they restarted their walk twice */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STOPPED 1069 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STOPPED 1072 /*! * cache: eviction walks gave up because they saw too many pages and * found no candidates */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_NO_TARGETS 1070 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_NO_TARGETS 1073 /*! * cache: eviction walks gave up because they saw too many pages and * found too few candidates */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_RATIO 1071 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_RATIO 1074 /*! cache: eviction walks reached end of tree */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ENDED 1072 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ENDED 1075 /*! cache: eviction walks started from root of tree */ -#define WT_STAT_CONN_CACHE_EVICTION_WALK_FROM_ROOT 1073 +#define WT_STAT_CONN_CACHE_EVICTION_WALK_FROM_ROOT 1076 /*! cache: eviction walks started from saved location in tree */ -#define WT_STAT_CONN_CACHE_EVICTION_WALK_SAVED_POS 1074 +#define WT_STAT_CONN_CACHE_EVICTION_WALK_SAVED_POS 1077 /*! cache: eviction worker thread active */ -#define WT_STAT_CONN_CACHE_EVICTION_ACTIVE_WORKERS 1075 +#define WT_STAT_CONN_CACHE_EVICTION_ACTIVE_WORKERS 1078 /*! cache: eviction worker thread created */ -#define WT_STAT_CONN_CACHE_EVICTION_WORKER_CREATED 1076 +#define WT_STAT_CONN_CACHE_EVICTION_WORKER_CREATED 1079 /*! cache: eviction worker thread evicting pages */ -#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1077 +#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1080 /*! cache: eviction worker thread removed */ -#define WT_STAT_CONN_CACHE_EVICTION_WORKER_REMOVED 1078 +#define WT_STAT_CONN_CACHE_EVICTION_WORKER_REMOVED 1081 /*! cache: eviction worker thread stable number */ -#define WT_STAT_CONN_CACHE_EVICTION_STABLE_STATE_WORKERS 1079 -/*! - * cache: failed eviction of pages that exceeded the in-memory maximum - * count - */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1080 -/*! - * cache: failed eviction of pages that exceeded the in-memory maximum - * time (usecs) - */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL_TIME 1081 +#define WT_STAT_CONN_CACHE_EVICTION_STABLE_STATE_WORKERS 1082 /*! cache: files with active eviction walks */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1082 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1083 /*! cache: files with new eviction walks started */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1083 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1084 /*! cache: force re-tuning of eviction workers once in a while */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_RETUNE 1084 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_RETUNE 1085 +/*! cache: forced eviction - pages evicted that were clean count */ +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_CLEAN 1086 +/*! cache: forced eviction - pages evicted that were clean time (usecs) */ +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_CLEAN_TIME 1087 +/*! cache: forced eviction - pages evicted that were dirty count */ +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DIRTY 1088 +/*! cache: forced eviction - pages evicted that were dirty time (usecs) */ +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DIRTY_TIME 1089 +/*! + * cache: forced eviction - pages selected because of too many deleted + * items count + */ +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1090 +/*! cache: forced eviction - pages selected count */ +#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1091 +/*! cache: forced eviction - pages selected unable to be evicted count */ +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1092 +/*! cache: forced eviction - pages selected unable to be evicted time */ +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL_TIME 1093 /*! cache: hazard pointer blocked page eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1085 +#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1094 /*! cache: hazard pointer check calls */ -#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1086 +#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1095 /*! cache: hazard pointer check entries walked */ -#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1087 +#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1096 /*! cache: hazard pointer maximum array length */ -#define WT_STAT_CONN_CACHE_HAZARD_MAX 1088 +#define WT_STAT_CONN_CACHE_HAZARD_MAX 1097 /*! cache: in-memory page passed criteria to be split */ -#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1089 +#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1098 /*! cache: in-memory page splits */ -#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1090 +#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1099 /*! cache: internal pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1091 +#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1100 /*! cache: internal pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1092 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1101 /*! cache: leaf pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1093 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1102 /*! cache: maximum bytes configured */ -#define WT_STAT_CONN_CACHE_BYTES_MAX 1094 +#define WT_STAT_CONN_CACHE_BYTES_MAX 1103 /*! cache: maximum page size at eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1095 +#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1104 /*! cache: modified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1096 +#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1105 /*! cache: modified pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1097 +#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1106 /*! cache: operations timed out waiting for space in cache */ -#define WT_STAT_CONN_CACHE_TIMED_OUT_OPS 1098 +#define WT_STAT_CONN_CACHE_TIMED_OUT_OPS 1107 /*! cache: overflow pages read into cache */ -#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1099 +#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1108 /*! cache: page split during eviction deepened the tree */ -#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1100 +#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1109 /*! cache: page written requiring cache overflow records */ -#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1101 +#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1110 /*! cache: pages currently held in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_INUSE 1102 -/*! cache: pages evicted because they exceeded the in-memory maximum count */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1103 -/*! - * cache: pages evicted because they exceeded the in-memory maximum time - * (usecs) - */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_TIME 1104 -/*! cache: pages evicted because they had chains of deleted items count */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1105 -/*! - * cache: pages evicted because they had chains of deleted items time - * (usecs) - */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE_TIME 1106 +#define WT_STAT_CONN_CACHE_PAGES_INUSE 1111 /*! cache: pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP 1107 +#define WT_STAT_CONN_CACHE_EVICTION_APP 1112 /*! cache: pages queued for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1108 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1113 +/*! cache: pages queued for eviction post lru sorting */ +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_POST_LRU 1114 /*! cache: pages queued for urgent eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1109 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1115 /*! cache: pages queued for urgent eviction during walk */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1110 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1116 /*! cache: pages read into cache */ -#define WT_STAT_CONN_CACHE_READ 1111 +#define WT_STAT_CONN_CACHE_READ 1117 /*! cache: pages read into cache after truncate */ -#define WT_STAT_CONN_CACHE_READ_DELETED 1112 +#define WT_STAT_CONN_CACHE_READ_DELETED 1118 /*! cache: pages read into cache after truncate in prepare state */ -#define WT_STAT_CONN_CACHE_READ_DELETED_PREPARED 1113 +#define WT_STAT_CONN_CACHE_READ_DELETED_PREPARED 1119 /*! cache: pages read into cache requiring cache overflow entries */ -#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1114 +#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1120 /*! cache: pages read into cache requiring cache overflow for checkpoint */ -#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_CHECKPOINT 1115 +#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_CHECKPOINT 1121 /*! cache: pages read into cache skipping older cache overflow entries */ -#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_SKIPPED 1116 +#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_SKIPPED 1122 /*! * cache: pages read into cache with skipped cache overflow entries * needed later */ -#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_DELAY 1117 +#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_DELAY 1123 /*! * cache: pages read into cache with skipped cache overflow entries * needed later by checkpoint */ -#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_DELAY_CHECKPOINT 1118 +#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_DELAY_CHECKPOINT 1124 /*! cache: pages requested from the cache */ -#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1119 +#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1125 /*! cache: pages seen by eviction walk */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1120 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1126 /*! cache: pages selected for eviction unable to be evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1121 +#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1127 /*! cache: pages walked for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_WALK 1122 +#define WT_STAT_CONN_CACHE_EVICTION_WALK 1128 /*! cache: pages written from cache */ -#define WT_STAT_CONN_CACHE_WRITE 1123 +#define WT_STAT_CONN_CACHE_WRITE 1129 /*! cache: pages written requiring in-memory restoration */ -#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1124 +#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1130 /*! cache: percentage overhead */ -#define WT_STAT_CONN_CACHE_OVERHEAD 1125 +#define WT_STAT_CONN_CACHE_OVERHEAD 1131 /*! cache: tracked bytes belonging to internal pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1126 +#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1132 /*! cache: tracked bytes belonging to leaf pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_LEAF 1127 +#define WT_STAT_CONN_CACHE_BYTES_LEAF 1133 /*! cache: tracked dirty bytes in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1128 +#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1134 /*! cache: tracked dirty pages in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1129 +#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1135 /*! cache: unmodified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1130 +#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1136 /*! capacity: background fsync file handles considered */ -#define WT_STAT_CONN_FSYNC_ALL_FH_TOTAL 1131 +#define WT_STAT_CONN_FSYNC_ALL_FH_TOTAL 1137 /*! capacity: background fsync file handles synced */ -#define WT_STAT_CONN_FSYNC_ALL_FH 1132 +#define WT_STAT_CONN_FSYNC_ALL_FH 1138 /*! capacity: background fsync time (msecs) */ -#define WT_STAT_CONN_FSYNC_ALL_TIME 1133 +#define WT_STAT_CONN_FSYNC_ALL_TIME 1139 /*! capacity: bytes read */ -#define WT_STAT_CONN_CAPACITY_BYTES_READ 1134 +#define WT_STAT_CONN_CAPACITY_BYTES_READ 1140 /*! capacity: bytes written for checkpoint */ -#define WT_STAT_CONN_CAPACITY_BYTES_CKPT 1135 +#define WT_STAT_CONN_CAPACITY_BYTES_CKPT 1141 /*! capacity: bytes written for eviction */ -#define WT_STAT_CONN_CAPACITY_BYTES_EVICT 1136 +#define WT_STAT_CONN_CAPACITY_BYTES_EVICT 1142 /*! capacity: bytes written for log */ -#define WT_STAT_CONN_CAPACITY_BYTES_LOG 1137 +#define WT_STAT_CONN_CAPACITY_BYTES_LOG 1143 /*! capacity: bytes written total */ -#define WT_STAT_CONN_CAPACITY_BYTES_WRITTEN 1138 +#define WT_STAT_CONN_CAPACITY_BYTES_WRITTEN 1144 /*! capacity: threshold to call fsync */ -#define WT_STAT_CONN_CAPACITY_THRESHOLD 1139 +#define WT_STAT_CONN_CAPACITY_THRESHOLD 1145 /*! capacity: time waiting due to total capacity (usecs) */ -#define WT_STAT_CONN_CAPACITY_TIME_TOTAL 1140 +#define WT_STAT_CONN_CAPACITY_TIME_TOTAL 1146 /*! capacity: time waiting during checkpoint (usecs) */ -#define WT_STAT_CONN_CAPACITY_TIME_CKPT 1141 +#define WT_STAT_CONN_CAPACITY_TIME_CKPT 1147 /*! capacity: time waiting during eviction (usecs) */ -#define WT_STAT_CONN_CAPACITY_TIME_EVICT 1142 +#define WT_STAT_CONN_CAPACITY_TIME_EVICT 1148 /*! capacity: time waiting during logging (usecs) */ -#define WT_STAT_CONN_CAPACITY_TIME_LOG 1143 +#define WT_STAT_CONN_CAPACITY_TIME_LOG 1149 /*! capacity: time waiting during read (usecs) */ -#define WT_STAT_CONN_CAPACITY_TIME_READ 1144 +#define WT_STAT_CONN_CAPACITY_TIME_READ 1150 /*! connection: auto adjusting condition resets */ -#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1145 +#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1151 /*! connection: auto adjusting condition wait calls */ -#define WT_STAT_CONN_COND_AUTO_WAIT 1146 +#define WT_STAT_CONN_COND_AUTO_WAIT 1152 /*! connection: detected system time went backwards */ -#define WT_STAT_CONN_TIME_TRAVEL 1147 +#define WT_STAT_CONN_TIME_TRAVEL 1153 /*! connection: files currently open */ -#define WT_STAT_CONN_FILE_OPEN 1148 +#define WT_STAT_CONN_FILE_OPEN 1154 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1149 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1155 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1150 +#define WT_STAT_CONN_MEMORY_FREE 1156 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1151 +#define WT_STAT_CONN_MEMORY_GROW 1157 /*! connection: pthread mutex condition wait calls */ -#define WT_STAT_CONN_COND_WAIT 1152 +#define WT_STAT_CONN_COND_WAIT 1158 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1153 +#define WT_STAT_CONN_RWLOCK_READ 1159 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1154 +#define WT_STAT_CONN_RWLOCK_WRITE 1160 /*! connection: total fsync I/Os */ -#define WT_STAT_CONN_FSYNC_IO 1155 +#define WT_STAT_CONN_FSYNC_IO 1161 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1156 +#define WT_STAT_CONN_READ_IO 1162 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1157 +#define WT_STAT_CONN_WRITE_IO 1163 /*! cursor: cached cursor count */ -#define WT_STAT_CONN_CURSOR_CACHED_COUNT 1158 +#define WT_STAT_CONN_CURSOR_CACHED_COUNT 1164 /*! cursor: cursor bulk loaded cursor insert calls */ -#define WT_STAT_CONN_CURSOR_INSERT_BULK 1159 +#define WT_STAT_CONN_CURSOR_INSERT_BULK 1165 /*! cursor: cursor close calls that result in cache */ -#define WT_STAT_CONN_CURSOR_CACHE 1160 +#define WT_STAT_CONN_CURSOR_CACHE 1166 /*! cursor: cursor create calls */ -#define WT_STAT_CONN_CURSOR_CREATE 1161 +#define WT_STAT_CONN_CURSOR_CREATE 1167 /*! cursor: cursor insert calls */ -#define WT_STAT_CONN_CURSOR_INSERT 1162 +#define WT_STAT_CONN_CURSOR_INSERT 1168 /*! cursor: cursor insert key and value bytes */ -#define WT_STAT_CONN_CURSOR_INSERT_BYTES 1163 +#define WT_STAT_CONN_CURSOR_INSERT_BYTES 1169 /*! cursor: cursor modify calls */ -#define WT_STAT_CONN_CURSOR_MODIFY 1164 +#define WT_STAT_CONN_CURSOR_MODIFY 1170 /*! cursor: cursor modify key and value bytes affected */ -#define WT_STAT_CONN_CURSOR_MODIFY_BYTES 1165 +#define WT_STAT_CONN_CURSOR_MODIFY_BYTES 1171 /*! cursor: cursor modify value bytes modified */ -#define WT_STAT_CONN_CURSOR_MODIFY_BYTES_TOUCH 1166 +#define WT_STAT_CONN_CURSOR_MODIFY_BYTES_TOUCH 1172 /*! cursor: cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT 1167 +#define WT_STAT_CONN_CURSOR_NEXT 1173 /*! cursor: cursor operation restarted */ -#define WT_STAT_CONN_CURSOR_RESTART 1168 +#define WT_STAT_CONN_CURSOR_RESTART 1174 /*! cursor: cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV 1169 +#define WT_STAT_CONN_CURSOR_PREV 1175 /*! cursor: cursor remove calls */ -#define WT_STAT_CONN_CURSOR_REMOVE 1170 +#define WT_STAT_CONN_CURSOR_REMOVE 1176 /*! cursor: cursor remove key bytes removed */ -#define WT_STAT_CONN_CURSOR_REMOVE_BYTES 1171 +#define WT_STAT_CONN_CURSOR_REMOVE_BYTES 1177 /*! cursor: cursor reserve calls */ -#define WT_STAT_CONN_CURSOR_RESERVE 1172 +#define WT_STAT_CONN_CURSOR_RESERVE 1178 /*! cursor: cursor reset calls */ -#define WT_STAT_CONN_CURSOR_RESET 1173 +#define WT_STAT_CONN_CURSOR_RESET 1179 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1174 +#define WT_STAT_CONN_CURSOR_SEARCH 1180 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1175 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1181 /*! cursor: cursor sweep buckets */ -#define WT_STAT_CONN_CURSOR_SWEEP_BUCKETS 1176 +#define WT_STAT_CONN_CURSOR_SWEEP_BUCKETS 1182 /*! cursor: cursor sweep cursors closed */ -#define WT_STAT_CONN_CURSOR_SWEEP_CLOSED 1177 +#define WT_STAT_CONN_CURSOR_SWEEP_CLOSED 1183 /*! cursor: cursor sweep cursors examined */ -#define WT_STAT_CONN_CURSOR_SWEEP_EXAMINED 1178 +#define WT_STAT_CONN_CURSOR_SWEEP_EXAMINED 1184 /*! cursor: cursor sweeps */ -#define WT_STAT_CONN_CURSOR_SWEEP 1179 +#define WT_STAT_CONN_CURSOR_SWEEP 1185 /*! cursor: cursor truncate calls */ -#define WT_STAT_CONN_CURSOR_TRUNCATE 1180 +#define WT_STAT_CONN_CURSOR_TRUNCATE 1186 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1181 +#define WT_STAT_CONN_CURSOR_UPDATE 1187 /*! cursor: cursor update key and value bytes */ -#define WT_STAT_CONN_CURSOR_UPDATE_BYTES 1182 +#define WT_STAT_CONN_CURSOR_UPDATE_BYTES 1188 /*! cursor: cursor update value size change */ -#define WT_STAT_CONN_CURSOR_UPDATE_BYTES_CHANGED 1183 +#define WT_STAT_CONN_CURSOR_UPDATE_BYTES_CHANGED 1189 /*! cursor: cursors reused from cache */ -#define WT_STAT_CONN_CURSOR_REOPEN 1184 +#define WT_STAT_CONN_CURSOR_REOPEN 1190 /*! cursor: open cursor count */ -#define WT_STAT_CONN_CURSOR_OPEN_COUNT 1185 +#define WT_STAT_CONN_CURSOR_OPEN_COUNT 1191 /*! data-handle: connection data handle size */ -#define WT_STAT_CONN_DH_CONN_HANDLE_SIZE 1186 +#define WT_STAT_CONN_DH_CONN_HANDLE_SIZE 1192 /*! data-handle: connection data handles currently active */ -#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1187 +#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1193 /*! data-handle: connection sweep candidate became referenced */ -#define WT_STAT_CONN_DH_SWEEP_REF 1188 +#define WT_STAT_CONN_DH_SWEEP_REF 1194 /*! data-handle: connection sweep dhandles closed */ -#define WT_STAT_CONN_DH_SWEEP_CLOSE 1189 +#define WT_STAT_CONN_DH_SWEEP_CLOSE 1195 /*! data-handle: connection sweep dhandles removed from hash list */ -#define WT_STAT_CONN_DH_SWEEP_REMOVE 1190 +#define WT_STAT_CONN_DH_SWEEP_REMOVE 1196 /*! data-handle: connection sweep time-of-death sets */ -#define WT_STAT_CONN_DH_SWEEP_TOD 1191 +#define WT_STAT_CONN_DH_SWEEP_TOD 1197 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_SWEEPS 1192 +#define WT_STAT_CONN_DH_SWEEPS 1198 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1193 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1199 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1194 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1200 /*! lock: checkpoint lock acquisitions */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1195 +#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1201 /*! lock: checkpoint lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1196 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1202 /*! lock: checkpoint lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1197 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1203 /*! * lock: commit timestamp queue lock application thread time waiting * (usecs) */ -#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_APPLICATION 1198 +#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_APPLICATION 1204 /*! lock: commit timestamp queue lock internal thread time waiting (usecs) */ -#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_INTERNAL 1199 +#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_INTERNAL 1205 /*! lock: commit timestamp queue read lock acquisitions */ -#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_READ_COUNT 1200 +#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_READ_COUNT 1206 /*! lock: commit timestamp queue write lock acquisitions */ -#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WRITE_COUNT 1201 +#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WRITE_COUNT 1207 /*! lock: dhandle lock application thread time waiting (usecs) */ -#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1202 +#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1208 /*! lock: dhandle lock internal thread time waiting (usecs) */ -#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1203 +#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1209 /*! lock: dhandle read lock acquisitions */ -#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1204 +#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1210 /*! lock: dhandle write lock acquisitions */ -#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1205 +#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1211 /*! lock: metadata lock acquisitions */ -#define WT_STAT_CONN_LOCK_METADATA_COUNT 1206 +#define WT_STAT_CONN_LOCK_METADATA_COUNT 1212 /*! lock: metadata lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1207 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1213 /*! lock: metadata lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1208 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1214 /*! * lock: read timestamp queue lock application thread time waiting * (usecs) */ -#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1209 +#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1215 /*! lock: read timestamp queue lock internal thread time waiting (usecs) */ -#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1210 +#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1216 /*! lock: read timestamp queue read lock acquisitions */ -#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1211 +#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1217 /*! lock: read timestamp queue write lock acquisitions */ -#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1212 +#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1218 /*! lock: schema lock acquisitions */ -#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1213 +#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1219 /*! lock: schema lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1214 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1220 /*! lock: schema lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1215 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1221 /*! * lock: table lock application thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1216 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1222 /*! * lock: table lock internal thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1217 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1223 /*! lock: table read lock acquisitions */ -#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1218 +#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1224 /*! lock: table write lock acquisitions */ -#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1219 +#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1225 /*! lock: txn global lock application thread time waiting (usecs) */ -#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1220 +#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1226 /*! lock: txn global lock internal thread time waiting (usecs) */ -#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1221 +#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1227 /*! lock: txn global read lock acquisitions */ -#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1222 +#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1228 /*! lock: txn global write lock acquisitions */ -#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1223 +#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1229 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1224 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1230 /*! log: force archive time sleeping (usecs) */ -#define WT_STAT_CONN_LOG_FORCE_ARCHIVE_SLEEP 1225 +#define WT_STAT_CONN_LOG_FORCE_ARCHIVE_SLEEP 1231 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1226 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1232 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1227 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1233 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1228 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1234 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1229 +#define WT_STAT_CONN_LOG_FLUSH 1235 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1230 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1236 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1231 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1237 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1232 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1238 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1233 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1239 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1234 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1240 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1235 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1241 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1236 +#define WT_STAT_CONN_LOG_SCANS 1242 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1237 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1243 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1238 +#define WT_STAT_CONN_LOG_WRITE_LSN 1244 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1239 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1245 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1240 +#define WT_STAT_CONN_LOG_SYNC 1246 /*! log: log sync time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DURATION 1241 +#define WT_STAT_CONN_LOG_SYNC_DURATION 1247 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1242 +#define WT_STAT_CONN_LOG_SYNC_DIR 1248 /*! log: log sync_dir time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1243 +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1249 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1244 +#define WT_STAT_CONN_LOG_WRITES 1250 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1245 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1251 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1246 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1252 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1247 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1253 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1248 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1254 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1249 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1255 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1250 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1256 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1251 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1257 /*! log: slot close lost race */ -#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1252 +#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1258 /*! log: slot close unbuffered waits */ -#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1253 +#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1259 /*! log: slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1254 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1260 /*! log: slot join atomic update races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1255 +#define WT_STAT_CONN_LOG_SLOT_RACES 1261 /*! log: slot join calls atomic updates raced */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1256 +#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1262 /*! log: slot join calls did not yield */ -#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1257 +#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1263 /*! log: slot join calls found active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1258 +#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1264 /*! log: slot join calls slept */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1259 +#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1265 /*! log: slot join calls yielded */ -#define WT_STAT_CONN_LOG_SLOT_YIELD 1260 +#define WT_STAT_CONN_LOG_SLOT_YIELD 1266 /*! log: slot join found active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1261 +#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1267 /*! log: slot joins yield time (usecs) */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1262 +#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1268 /*! log: slot transitions unable to find free slot */ -#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1263 +#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1269 /*! log: slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1264 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1270 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1265 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1271 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1266 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1272 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1267 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1273 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1268 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1274 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1269 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1275 /*! perf: file system read latency histogram (bucket 1) - 10-49ms */ -#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1270 +#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1276 /*! perf: file system read latency histogram (bucket 2) - 50-99ms */ -#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1271 +#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1277 /*! perf: file system read latency histogram (bucket 3) - 100-249ms */ -#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1272 +#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1278 /*! perf: file system read latency histogram (bucket 4) - 250-499ms */ -#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1273 +#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1279 /*! perf: file system read latency histogram (bucket 5) - 500-999ms */ -#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1274 +#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1280 /*! perf: file system read latency histogram (bucket 6) - 1000ms+ */ -#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1275 +#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1281 /*! perf: file system write latency histogram (bucket 1) - 10-49ms */ -#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1276 +#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1282 /*! perf: file system write latency histogram (bucket 2) - 50-99ms */ -#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1277 +#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1283 /*! perf: file system write latency histogram (bucket 3) - 100-249ms */ -#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1278 +#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1284 /*! perf: file system write latency histogram (bucket 4) - 250-499ms */ -#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1279 +#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1285 /*! perf: file system write latency histogram (bucket 5) - 500-999ms */ -#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1280 +#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1286 /*! perf: file system write latency histogram (bucket 6) - 1000ms+ */ -#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1281 +#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1287 /*! perf: operation read latency histogram (bucket 1) - 100-249us */ -#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1282 +#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1288 /*! perf: operation read latency histogram (bucket 2) - 250-499us */ -#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1283 +#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1289 /*! perf: operation read latency histogram (bucket 3) - 500-999us */ -#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1284 +#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1290 /*! perf: operation read latency histogram (bucket 4) - 1000-9999us */ -#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1285 +#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1291 /*! perf: operation read latency histogram (bucket 5) - 10000us+ */ -#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1286 +#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1292 /*! perf: operation write latency histogram (bucket 1) - 100-249us */ -#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1287 +#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1293 /*! perf: operation write latency histogram (bucket 2) - 250-499us */ -#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1288 +#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1294 /*! perf: operation write latency histogram (bucket 3) - 500-999us */ -#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1289 +#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1295 /*! perf: operation write latency histogram (bucket 4) - 1000-9999us */ -#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1290 +#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1296 /*! perf: operation write latency histogram (bucket 5) - 10000us+ */ -#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1291 +#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1297 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1292 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1298 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1293 +#define WT_STAT_CONN_REC_PAGES 1299 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1294 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1300 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1295 +#define WT_STAT_CONN_REC_PAGE_DELETE 1301 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1296 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1302 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1297 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1303 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1298 +#define WT_STAT_CONN_SESSION_OPEN 1304 /*! session: session query timestamp calls */ -#define WT_STAT_CONN_SESSION_QUERY_TS 1299 +#define WT_STAT_CONN_SESSION_QUERY_TS 1305 /*! session: table alter failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1300 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1306 /*! session: table alter successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1301 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1307 /*! session: table alter unchanged and skipped */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1302 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1308 /*! session: table compact failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1303 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1309 /*! session: table compact successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1304 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1310 /*! session: table create failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1305 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1311 /*! session: table create successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1306 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1312 /*! session: table drop failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1307 +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1313 /*! session: table drop successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1308 +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1314 /*! session: table import failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_IMPORT_FAIL 1309 +#define WT_STAT_CONN_SESSION_TABLE_IMPORT_FAIL 1315 /*! session: table import successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_IMPORT_SUCCESS 1310 +#define WT_STAT_CONN_SESSION_TABLE_IMPORT_SUCCESS 1316 /*! session: table rebalance failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1311 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1317 /*! session: table rebalance successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1312 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1318 /*! session: table rename failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1313 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1319 /*! session: table rename successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1314 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1320 /*! session: table salvage failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1315 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1321 /*! session: table salvage successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1316 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1322 /*! session: table truncate failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1317 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1323 /*! session: table truncate successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1318 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1324 /*! session: table verify failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1319 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1325 /*! session: table verify successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1320 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1326 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1321 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1327 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_THREAD_READ_ACTIVE 1322 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1328 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1323 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1329 /*! thread-yield: application thread time evicting (usecs) */ -#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1324 +#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1330 /*! thread-yield: application thread time waiting for cache (usecs) */ -#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1325 +#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1331 /*! * thread-yield: connection close blocked waiting for transaction state * stabilization */ -#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1326 +#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1332 /*! thread-yield: connection close yielded for lsm manager shutdown */ -#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1327 +#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1333 /*! thread-yield: data handle lock yielded */ -#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1328 +#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1334 /*! * thread-yield: get reference for page index and slot time sleeping * (usecs) */ -#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1329 +#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1335 /*! thread-yield: log server sync yielded for log write */ -#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1330 +#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1336 /*! thread-yield: page access yielded due to prepare state change */ -#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1331 +#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1337 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1332 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1338 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1333 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1339 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1334 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1340 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1335 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1341 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1336 +#define WT_STAT_CONN_PAGE_SLEEP 1342 /*! * thread-yield: page delete rollback time sleeping for state change * (usecs) */ -#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1337 +#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1343 /*! thread-yield: page reconciliation yielded due to child modification */ -#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1338 +#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1344 /*! transaction: Number of prepared updates */ -#define WT_STAT_CONN_TXN_PREPARED_UPDATES_COUNT 1339 +#define WT_STAT_CONN_TXN_PREPARED_UPDATES_COUNT 1345 /*! transaction: Number of prepared updates added to cache overflow */ -#define WT_STAT_CONN_TXN_PREPARED_UPDATES_LOOKASIDE_INSERTS 1340 +#define WT_STAT_CONN_TXN_PREPARED_UPDATES_LOOKASIDE_INSERTS 1346 /*! transaction: Number of prepared updates resolved */ -#define WT_STAT_CONN_TXN_PREPARED_UPDATES_RESOLVED 1341 +#define WT_STAT_CONN_TXN_PREPARED_UPDATES_RESOLVED 1347 /*! transaction: commit timestamp queue entries walked */ -#define WT_STAT_CONN_TXN_COMMIT_QUEUE_WALKED 1342 +#define WT_STAT_CONN_TXN_COMMIT_QUEUE_WALKED 1348 /*! transaction: commit timestamp queue insert to empty */ -#define WT_STAT_CONN_TXN_COMMIT_QUEUE_EMPTY 1343 +#define WT_STAT_CONN_TXN_COMMIT_QUEUE_EMPTY 1349 /*! transaction: commit timestamp queue inserts to head */ -#define WT_STAT_CONN_TXN_COMMIT_QUEUE_HEAD 1344 +#define WT_STAT_CONN_TXN_COMMIT_QUEUE_HEAD 1350 /*! transaction: commit timestamp queue inserts total */ -#define WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS 1345 +#define WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS 1351 /*! transaction: commit timestamp queue length */ -#define WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN 1346 +#define WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN 1352 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1347 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1353 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1348 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1354 /*! transaction: prepared transactions */ -#define WT_STAT_CONN_TXN_PREPARE 1349 +#define WT_STAT_CONN_TXN_PREPARE 1355 /*! transaction: prepared transactions committed */ -#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1350 +#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1356 /*! transaction: prepared transactions currently active */ -#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1351 +#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1357 /*! transaction: prepared transactions rolled back */ -#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1352 +#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1358 /*! transaction: query timestamp calls */ -#define WT_STAT_CONN_TXN_QUERY_TS 1353 +#define WT_STAT_CONN_TXN_QUERY_TS 1359 /*! transaction: read timestamp queue entries walked */ -#define WT_STAT_CONN_TXN_READ_QUEUE_WALKED 1354 +#define WT_STAT_CONN_TXN_READ_QUEUE_WALKED 1360 /*! transaction: read timestamp queue insert to empty */ -#define WT_STAT_CONN_TXN_READ_QUEUE_EMPTY 1355 +#define WT_STAT_CONN_TXN_READ_QUEUE_EMPTY 1361 /*! transaction: read timestamp queue inserts to head */ -#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1356 +#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1362 /*! transaction: read timestamp queue inserts total */ -#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1357 +#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1363 /*! transaction: read timestamp queue length */ -#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1358 +#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1364 /*! transaction: rollback to stable calls */ -#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE 1359 +#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE 1365 /*! transaction: rollback to stable updates aborted */ -#define WT_STAT_CONN_TXN_ROLLBACK_UPD_ABORTED 1360 +#define WT_STAT_CONN_TXN_ROLLBACK_UPD_ABORTED 1366 /*! transaction: rollback to stable updates removed from cache overflow */ -#define WT_STAT_CONN_TXN_ROLLBACK_LAS_REMOVED 1361 +#define WT_STAT_CONN_TXN_ROLLBACK_LAS_REMOVED 1367 /*! transaction: set timestamp calls */ -#define WT_STAT_CONN_TXN_SET_TS 1362 +#define WT_STAT_CONN_TXN_SET_TS 1368 /*! transaction: set timestamp commit calls */ -#define WT_STAT_CONN_TXN_SET_TS_COMMIT 1363 +#define WT_STAT_CONN_TXN_SET_TS_COMMIT 1369 /*! transaction: set timestamp commit updates */ -#define WT_STAT_CONN_TXN_SET_TS_COMMIT_UPD 1364 +#define WT_STAT_CONN_TXN_SET_TS_COMMIT_UPD 1370 /*! transaction: set timestamp oldest calls */ -#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1365 +#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1371 /*! transaction: set timestamp oldest updates */ -#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1366 +#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1372 /*! transaction: set timestamp stable calls */ -#define WT_STAT_CONN_TXN_SET_TS_STABLE 1367 +#define WT_STAT_CONN_TXN_SET_TS_STABLE 1373 /*! transaction: set timestamp stable updates */ -#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1368 +#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1374 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1369 +#define WT_STAT_CONN_TXN_BEGIN 1375 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1370 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1376 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1371 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1377 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1372 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1378 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1373 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1379 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1374 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1380 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1375 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1381 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1376 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1382 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1377 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1383 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1378 +#define WT_STAT_CONN_TXN_CHECKPOINT 1384 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1379 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1385 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1380 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1386 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1381 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1387 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1382 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1388 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1383 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1389 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1384 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1390 /*! * transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1385 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1391 /*! transaction: transaction range of timestamps currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1386 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1392 /*! transaction: transaction range of timestamps pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1387 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1393 /*! * transaction: transaction range of timestamps pinned by the oldest * active read timestamp */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1388 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1394 /*! * transaction: transaction range of timestamps pinned by the oldest * timestamp */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1389 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1395 /*! transaction: transaction read timestamp of the oldest active reader */ -#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1390 +#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1396 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1391 +#define WT_STAT_CONN_TXN_SYNC 1397 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1392 +#define WT_STAT_CONN_TXN_COMMIT 1398 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1393 +#define WT_STAT_CONN_TXN_ROLLBACK 1399 /*! transaction: update conflicts */ -#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1394 +#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1400 /*! * @} diff --git a/src/third_party/wiredtiger/src/lsm/lsm_manager.c b/src/third_party/wiredtiger/src/lsm/lsm_manager.c index d1eceaf2afd..aa5f08034d4 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_manager.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_manager.c @@ -339,12 +339,6 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) } WT_STAT_CONN_INCRV(session, lsm_work_units_discarded, removed); - /* Free resources that are allocated in connection initialize */ - __wt_spin_destroy(session, &manager->switch_lock); - __wt_spin_destroy(session, &manager->app_lock); - __wt_spin_destroy(session, &manager->manager_lock); - __wt_cond_destroy(session, &manager->work_cond); - return (ret); } diff --git a/src/third_party/wiredtiger/src/meta/meta_ckpt.c b/src/third_party/wiredtiger/src/meta/meta_ckpt.c index 35b6389bced..49b2d2a2a2d 100644 --- a/src/third_party/wiredtiger/src/meta/meta_ckpt.c +++ b/src/third_party/wiredtiger/src/meta/meta_ckpt.c @@ -351,8 +351,12 @@ __wt_meta_ckptlist_get(WT_SESSION_IMPL *session, __wt_config_getones(session, config, "checkpoint", &v)) == 0) { __wt_config_subinit(session, &ckptconf, &v); for (; __wt_config_next(&ckptconf, &k, &v) == 0; ++slot) { + /* + * Allocate a slot for a new value, plus a slot to mark + * the end. + */ WT_ERR(__wt_realloc_def( - session, &allocated, slot + 1, &ckptbase)); + session, &allocated, slot + 2, &ckptbase)); ckpt = &ckptbase[slot]; WT_ERR(__ckpt_load(session, &k, &v, ckpt)); @@ -367,9 +371,6 @@ __wt_meta_ckptlist_get(WT_SESSION_IMPL *session, if (update) { /* - * Allocate an extra slot for a new value, plus a slot to mark - * mark the end. - * * This isn't clean, but there's necessary cooperation between * the schema layer (that maintains the list of checkpoints), * the btree layer (that knows when the root page is written, @@ -377,6 +378,8 @@ __wt_meta_ckptlist_get(WT_SESSION_IMPL *session, * actually creates the checkpoint). All of that cooperation is * handled in the array of checkpoint structures referenced from * the WT_BTREE structure. + * + * Allocate a slot for a new value, plus a slot to mark the end. */ WT_ERR(__wt_realloc_def( session, &allocated, slot + 2, &ckptbase)); diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c index 2766e74f92c..9222272767b 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c @@ -424,9 +424,9 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, r->unstable_txn = first_upd->txnid; if (first_ts_upd != NULL) { WT_ASSERT(session, - first_ts_upd->prepare_state == - WT_PREPARE_INPROGRESS || - first_ts_upd->start_ts <= first_ts_upd->durable_ts); + first_ts_upd->prepare_state == + WT_PREPARE_INPROGRESS || + first_ts_upd->start_ts <= first_ts_upd->durable_ts); if (r->unstable_timestamp < first_ts_upd->start_ts) r->unstable_timestamp = first_ts_upd->start_ts; @@ -452,8 +452,8 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, * both will be set. */ WT_ASSERT(session, - upd->prepare_state == WT_PREPARE_INPROGRESS || - upd->durable_ts >= upd->start_ts); + upd->prepare_state == WT_PREPARE_INPROGRESS || + upd->durable_ts >= upd->start_ts); if (r->unstable_timestamp > upd->start_ts) r->unstable_timestamp = upd->start_ts; diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index 7193e6f2b2c..5a78f0df88d 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -639,8 +639,7 @@ __rec_init(WT_SESSION_IMPL *session, else r->las_skew_newest = LF_ISSET(WT_REC_LOOKASIDE) && LF_ISSET(WT_REC_VISIBLE_ALL); - r->las_skew_newest = - LF_ISSET(WT_REC_LOOKASIDE) && LF_ISSET(WT_REC_VISIBLE_ALL); + if (r->las_skew_newest && !__wt_btree_immediately_durable(session) && txn_global->has_stable_timestamp && diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c index 4e1742a89db..7db62d29738 100644 --- a/src/third_party/wiredtiger/src/session/session_api.c +++ b/src/third_party/wiredtiger/src/session/session_api.c @@ -1127,6 +1127,8 @@ __session_import(WT_SESSION *wt_session, const char *uri, const char *config) WT_UNUSED(config); + value = NULL; + session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL_NOCONF(session, import); @@ -1148,6 +1150,7 @@ err: WT_STAT_CONN_INCR(session, session_table_import_fail); else WT_STAT_CONN_INCR(session, session_table_import_success); + __wt_free(session, value); API_END_RET_NOTFOUND_MAP(session, ret); } diff --git a/src/third_party/wiredtiger/src/support/hazard.c b/src/third_party/wiredtiger/src/support/hazard.c index 52f77b5b232..a44f349e3e5 100644 --- a/src/third_party/wiredtiger/src/support/hazard.c +++ b/src/third_party/wiredtiger/src/support/hazard.c @@ -208,9 +208,7 @@ __wt_hazard_clear(WT_SESSION_IMPL *session, WT_REF *ref) * We don't publish the hazard pointer clear in the * general case. It's not required for correctness; * it gives an eviction thread faster access to the - * page were the page selected for eviction, but the - * generation number was just set, it's unlikely the - * page will be selected for eviction. + * page were the page selected for eviction. */ hp->ref = NULL; @@ -330,6 +328,10 @@ __wt_hazard_check(WT_SESSION_IMPL *session, WT_SESSION_IMPL *s; uint32_t i, j, hazard_inuse, max, session_cnt, walk_cnt; + /* If a file can never be evicted, hazard pointers aren't required. */ + if (F_ISSET(S2BT(session), WT_BTREE_IN_MEMORY)) + return (NULL); + conn = S2C(session); WT_STAT_CONN_INCR(session, cache_hazard_checks); diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index e76930c3b38..95e04dd7d7c 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -827,6 +827,9 @@ static const char * const __stats_connection_desc[] = { "cache: eviction server evicting pages", "cache: eviction server slept, because we did not make progress with eviction", "cache: eviction server unable to reach eviction goal", + "cache: eviction server waiting for a leaf page", + "cache: eviction server waiting for an internal page sleep (usec)", + "cache: eviction server waiting for an internal page yields", "cache: eviction state", "cache: eviction walk target pages histogram - 0-9", "cache: eviction walk target pages histogram - 10-31", @@ -845,11 +848,17 @@ static const char * const __stats_connection_desc[] = { "cache: eviction worker thread evicting pages", "cache: eviction worker thread removed", "cache: eviction worker thread stable number", - "cache: failed eviction of pages that exceeded the in-memory maximum count", - "cache: failed eviction of pages that exceeded the in-memory maximum time (usecs)", "cache: files with active eviction walks", "cache: files with new eviction walks started", "cache: force re-tuning of eviction workers once in a while", + "cache: forced eviction - pages evicted that were clean count", + "cache: forced eviction - pages evicted that were clean time (usecs)", + "cache: forced eviction - pages evicted that were dirty count", + "cache: forced eviction - pages evicted that were dirty time (usecs)", + "cache: forced eviction - pages selected because of too many deleted items count", + "cache: forced eviction - pages selected count", + "cache: forced eviction - pages selected unable to be evicted count", + "cache: forced eviction - pages selected unable to be evicted time", "cache: hazard pointer blocked page eviction", "cache: hazard pointer check calls", "cache: hazard pointer check entries walked", @@ -868,12 +877,9 @@ static const char * const __stats_connection_desc[] = { "cache: page split during eviction deepened the tree", "cache: page written requiring cache overflow records", "cache: pages currently held in the cache", - "cache: pages evicted because they exceeded the in-memory maximum count", - "cache: pages evicted because they exceeded the in-memory maximum time (usecs)", - "cache: pages evicted because they had chains of deleted items count", - "cache: pages evicted because they had chains of deleted items time (usecs)", "cache: pages evicted by application threads", "cache: pages queued for eviction", + "cache: pages queued for eviction post lru sorting", "cache: pages queued for urgent eviction", "cache: pages queued for urgent eviction during walk", "cache: pages read into cache", @@ -1264,6 +1270,9 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cache_eviction_server_evicting = 0; stats->cache_eviction_server_slept = 0; stats->cache_eviction_slow = 0; + stats->cache_eviction_walk_leaf_notfound = 0; + stats->cache_eviction_walk_internal_wait = 0; + stats->cache_eviction_walk_internal_yield = 0; /* not clearing cache_eviction_state */ stats->cache_eviction_target_page_lt10 = 0; stats->cache_eviction_target_page_lt32 = 0; @@ -1282,11 +1291,17 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cache_eviction_worker_evicting = 0; stats->cache_eviction_worker_removed = 0; /* not clearing cache_eviction_stable_state_workers */ - stats->cache_eviction_force_fail = 0; - stats->cache_eviction_force_fail_time = 0; /* not clearing cache_eviction_walks_active */ stats->cache_eviction_walks_started = 0; stats->cache_eviction_force_retune = 0; + stats->cache_eviction_force_clean = 0; + stats->cache_eviction_force_clean_time = 0; + stats->cache_eviction_force_dirty = 0; + stats->cache_eviction_force_dirty_time = 0; + stats->cache_eviction_force_delete = 0; + stats->cache_eviction_force = 0; + stats->cache_eviction_force_fail = 0; + stats->cache_eviction_force_fail_time = 0; stats->cache_eviction_hazard = 0; stats->cache_hazard_checks = 0; stats->cache_hazard_walks = 0; @@ -1305,12 +1320,9 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cache_eviction_deepen = 0; stats->cache_write_lookaside = 0; /* not clearing cache_pages_inuse */ - stats->cache_eviction_force = 0; - stats->cache_eviction_force_time = 0; - stats->cache_eviction_force_delete = 0; - stats->cache_eviction_force_delete_time = 0; stats->cache_eviction_app = 0; stats->cache_eviction_pages_queued = 0; + stats->cache_eviction_pages_queued_post_lru = 0; stats->cache_eviction_pages_queued_urgent = 0; stats->cache_eviction_pages_queued_oldest = 0; stats->cache_read = 0; @@ -1704,6 +1716,12 @@ __wt_stat_connection_aggregate( to->cache_eviction_server_slept += WT_STAT_READ(from, cache_eviction_server_slept); to->cache_eviction_slow += WT_STAT_READ(from, cache_eviction_slow); + to->cache_eviction_walk_leaf_notfound += + WT_STAT_READ(from, cache_eviction_walk_leaf_notfound); + to->cache_eviction_walk_internal_wait += + WT_STAT_READ(from, cache_eviction_walk_internal_wait); + to->cache_eviction_walk_internal_yield += + WT_STAT_READ(from, cache_eviction_walk_internal_yield); to->cache_eviction_state += WT_STAT_READ(from, cache_eviction_state); to->cache_eviction_target_page_lt10 += WT_STAT_READ(from, cache_eviction_target_page_lt10); @@ -1739,16 +1757,27 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, cache_eviction_worker_removed); to->cache_eviction_stable_state_workers += WT_STAT_READ(from, cache_eviction_stable_state_workers); - to->cache_eviction_force_fail += - WT_STAT_READ(from, cache_eviction_force_fail); - to->cache_eviction_force_fail_time += - WT_STAT_READ(from, cache_eviction_force_fail_time); to->cache_eviction_walks_active += WT_STAT_READ(from, cache_eviction_walks_active); to->cache_eviction_walks_started += WT_STAT_READ(from, cache_eviction_walks_started); to->cache_eviction_force_retune += WT_STAT_READ(from, cache_eviction_force_retune); + to->cache_eviction_force_clean += + WT_STAT_READ(from, cache_eviction_force_clean); + to->cache_eviction_force_clean_time += + WT_STAT_READ(from, cache_eviction_force_clean_time); + to->cache_eviction_force_dirty += + WT_STAT_READ(from, cache_eviction_force_dirty); + to->cache_eviction_force_dirty_time += + WT_STAT_READ(from, cache_eviction_force_dirty_time); + to->cache_eviction_force_delete += + WT_STAT_READ(from, cache_eviction_force_delete); + to->cache_eviction_force += WT_STAT_READ(from, cache_eviction_force); + to->cache_eviction_force_fail += + WT_STAT_READ(from, cache_eviction_force_fail); + to->cache_eviction_force_fail_time += + WT_STAT_READ(from, cache_eviction_force_fail_time); to->cache_eviction_hazard += WT_STAT_READ(from, cache_eviction_hazard); to->cache_hazard_checks += WT_STAT_READ(from, cache_hazard_checks); @@ -1777,16 +1806,11 @@ __wt_stat_connection_aggregate( to->cache_write_lookaside += WT_STAT_READ(from, cache_write_lookaside); to->cache_pages_inuse += WT_STAT_READ(from, cache_pages_inuse); - to->cache_eviction_force += WT_STAT_READ(from, cache_eviction_force); - to->cache_eviction_force_time += - WT_STAT_READ(from, cache_eviction_force_time); - to->cache_eviction_force_delete += - WT_STAT_READ(from, cache_eviction_force_delete); - to->cache_eviction_force_delete_time += - WT_STAT_READ(from, cache_eviction_force_delete_time); to->cache_eviction_app += WT_STAT_READ(from, cache_eviction_app); to->cache_eviction_pages_queued += WT_STAT_READ(from, cache_eviction_pages_queued); + to->cache_eviction_pages_queued_post_lru += + WT_STAT_READ(from, cache_eviction_pages_queued_post_lru); to->cache_eviction_pages_queued_urgent += WT_STAT_READ(from, cache_eviction_pages_queued_urgent); to->cache_eviction_pages_queued_oldest += diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index 53bde4a499b..f7c94399710 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -804,13 +804,13 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) int64_t resolved_update_count, visited_update_count; uint32_t fileid; u_int i; - bool locked, prepare, readonly, update_timestamp; + bool locked, prepare, readonly, skip_update_assert, update_timestamp; txn = &session->txn; conn = S2C(session); txn_global = &conn->txn_global; prev_commit_timestamp = 0; /* -Wconditional-uninitialized */ - locked = false; + locked = skip_update_assert = false; resolved_update_count = visited_update_count = 0; WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING)); @@ -843,7 +843,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) "transaction"); WT_ASSERT(session, - txn->prepare_timestamp <= txn->commit_timestamp); + txn->prepare_timestamp <= txn->commit_timestamp); } else { if (F_ISSET(txn, WT_TXN_HAS_TS_PREPARE)) WT_ERR_MSG(session, EINVAL, @@ -987,10 +987,15 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) * it would have happened on a previous * modification in this txn. */ - if (!F_ISSET(op, WT_TXN_OP_KEY_REPEATED)) + if (!F_ISSET(op, WT_TXN_OP_KEY_REPEATED)) { + skip_update_assert = + skip_update_assert || + F_ISSET(op, WT_TXN_OP_KEY_RESERVED); WT_ERR(__wt_txn_resolve_prepared_op( session, op, true, &resolved_update_count)); + } + /* * We should resolve at least one or more * updates each time we call @@ -1015,7 +1020,8 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) __wt_txn_op_free(session, op); } - WT_ASSERT(session, resolved_update_count == visited_update_count); + WT_ASSERT(session, skip_update_assert || + resolved_update_count == visited_update_count); WT_STAT_CONN_INCRV(session, txn_prepared_updates_resolved, resolved_update_count); @@ -1123,8 +1129,14 @@ __wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[]) WT_RET(__wt_session_copy_values(session)); } - /* Prepare updates. */ - for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) { + /* + * Prepare updates, traverse the modification array in reverse order + * so that we visit the update chain in newest to oldest order + * allowing us to set the key repeated flag with reserved updates in + * the chain. + */ + for (i = txn->mod_count; i > 0; i--) { + op = &txn->mod[i - 1]; /* Assert it's not an update to the lookaside file. */ WT_ASSERT(session, S2C(session)->cache->las_fileid == 0 || !F_ISSET(op->btree, WT_BTREE_LOOKASIDE)); @@ -1165,9 +1177,23 @@ __wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[]) * Set the key repeated flag which tells us that we've * got multiple updates to the same key by the same txn. * This is later used in txn commit. + * + * When we see a reserved update we set the + * WT_UPDATE_RESERVED flag instead. We do this as we + * cannot know if our current update should specify the + * key repeated flag as we don't want to traverse the + * entire update chain to find out. i.e. if there is + * an update with our txnid after the reserved update + * we should set key repeated, but if there isn't we + * shouldn't. */ - if (upd->next != NULL && upd->txnid == upd->next->txnid) - F_SET(op, WT_TXN_OP_KEY_REPEATED); + if (upd->next != NULL && + upd->txnid == upd->next->txnid) { + if (upd->next->type == WT_UPDATE_RESERVE) + F_SET(op, WT_TXN_OP_KEY_RESERVED); + else + F_SET(op, WT_TXN_OP_KEY_REPEATED); + } break; case WT_TXN_OP_REF_DELETE: __wt_txn_op_apply_prepare_state( @@ -1209,12 +1235,13 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) WT_UPDATE *upd; int64_t resolved_update_count, visited_update_count; u_int i; - bool readonly; + bool readonly, skip_update_assert; WT_UNUSED(cfg); resolved_update_count = visited_update_count = 0; txn = &session->txn; readonly = txn->mod_count == 0; + skip_update_assert = false; WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING)); /* Rollback notification. */ @@ -1254,10 +1281,14 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) * it would have happened on a previous * modification in this txn. */ - if (!F_ISSET(op, WT_TXN_OP_KEY_REPEATED)) + if (!F_ISSET(op, WT_TXN_OP_KEY_REPEATED)) { + skip_update_assert = + skip_update_assert || + F_ISSET(op, WT_TXN_OP_KEY_RESERVED); WT_RET(__wt_txn_resolve_prepared_op( session, op, false, &resolved_update_count)); + } /* * We should resolve at least one or more * updates each time we call @@ -1290,7 +1321,8 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) __wt_txn_op_free(session, op); } - WT_ASSERT(session, resolved_update_count == visited_update_count); + WT_ASSERT(session, skip_update_assert || + resolved_update_count == visited_update_count); WT_STAT_CONN_INCRV(session, txn_prepared_updates_resolved, resolved_update_count); diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h index 3d4375cd137..087c2d4be19 100644 --- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h +++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h @@ -62,7 +62,7 @@ typedef struct { int nworkers; /* Number workers configured */ int ntables; /* Number tables configured */ int ntables_created; /* Number tables opened */ - int running; /* Whether to stop */ + volatile int running; /* Whether to stop */ int status; /* Exit status */ bool sweep_stress; /* Sweep stress test */ u_int ts; /* Current timestamp */ diff --git a/src/third_party/wiredtiger/test/csuite/Makefile.am b/src/third_party/wiredtiger/test/csuite/Makefile.am index f737bc97bed..e2b7233f45b 100644 --- a/src/third_party/wiredtiger/test/csuite/Makefile.am +++ b/src/third_party/wiredtiger/test/csuite/Makefile.am @@ -138,6 +138,10 @@ test_wt4803_cache_overflow_abort_SOURCES = wt4803_cache_overflow_abort/main.c noinst_PROGRAMS += test_wt4803_cache_overflow_abort all_TESTS += test_wt4803_cache_overflow_abort +test_wt4891_meta_ckptlist_get_alloc_SOURCES=wt4891_meta_ckptlist_get_alloc/main.c +noinst_PROGRAMS += test_wt4891_meta_ckptlist_get_alloc +all_TESTS += test_wt4891_meta_ckptlist_get_alloc + # Run this during a "make check" smoke test. TESTS = $(all_TESTS) LOG_COMPILER = env top_builddir=$(top_builddir) top_srcdir=$(top_srcdir) $(TEST_WRAPPER) diff --git a/src/third_party/wiredtiger/test/csuite/random_directio/util.c b/src/third_party/wiredtiger/test/csuite/random_directio/util.c index 47369f41843..8bab68ef59c 100644 --- a/src/third_party/wiredtiger/test/csuite/random_directio/util.c +++ b/src/third_party/wiredtiger/test/csuite/random_directio/util.c @@ -100,9 +100,11 @@ copy_directory(const char *fromdir, const char *todir, bool directio) testutil_check(__wt_snprintf(tofile, sizeof(tofile), "%s/%s", todir, dp->d_name)); rfd = open(fromfile, O_RDONLY | openflags, 0); - testutil_assert(rfd >= 0); + testutil_assertfmt(rfd >= 0, + "Open of source %s failed with %d\n", fromfile, errno); wfd = open(tofile, O_WRONLY | O_CREAT, 0666); - testutil_assert(wfd >= 0); + testutil_assertfmt(wfd >= 0, + "Open of dest %s failed with %d\n", tofile, errno); testutil_check(fstat(rfd, &sb)); /* diff --git a/src/third_party/wiredtiger/test/csuite/scope/main.c b/src/third_party/wiredtiger/test/csuite/scope/main.c index b65c9df8790..3a98fbc8fde 100644 --- a/src/third_party/wiredtiger/test/csuite/scope/main.c +++ b/src/third_party/wiredtiger/test/csuite/scope/main.c @@ -91,8 +91,12 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) int exact; bool recno, vstring; - /* Reserve requires a running transaction. */ - testutil_check(session->begin_transaction(session, NULL)); + /* + * Modify and reserve require a transaction, modify requires snapshot + * isolation. + */ + testutil_check( + session->begin_transaction(session, "isolation=snapshot")); cursor = NULL; for (op = ops; op->op != NULL; op++) { diff --git a/src/third_party/wiredtiger/test/csuite/wt4105_large_doc_small_upd/main.c b/src/third_party/wiredtiger/test/csuite/wt4105_large_doc_small_upd/main.c index 0a9a755202d..3e7d52de0a5 100644 --- a/src/third_party/wiredtiger/test/csuite/wt4105_large_doc_small_upd/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt4105_large_doc_small_upd/main.c @@ -136,8 +136,8 @@ main(int argc, char *argv[]) while (++j < MODIFY_COUNT) { for (i = 0; i < NUM_DOCS; i++) { /* Position the cursor. */ - testutil_check( - session2->begin_transaction(session2, NULL)); + testutil_check(session2->begin_transaction( + session2, "isolation=snapshot")); c->set_key(c, i); modify_entry.data.data = "abcdefghijklmnopqrstuvwxyz"; diff --git a/src/third_party/wiredtiger/test/csuite/wt4891_meta_ckptlist_get_alloc/main.c b/src/third_party/wiredtiger/test/csuite/wt4891_meta_ckptlist_get_alloc/main.c new file mode 100644 index 00000000000..4be4d5308ce --- /dev/null +++ b/src/third_party/wiredtiger/test/csuite/wt4891_meta_ckptlist_get_alloc/main.c @@ -0,0 +1,91 @@ +/*- + * Public Domain 2014-2019 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" +#define CHECKPOINT_COUNT 10 + +/* + * JIRA ticket reference: WT-4891 + * Test case description: Test wt_meta_ckptlist_get by creating a number of + * checkpoints and then running __wt_verify. + * Failure mode: If the bug still exists then this test will cause an + * error in address sanitized builds. + */ + +int +main(int argc, char *argv[]) +{ + TEST_OPTS *opts, _opts; + WT_CURSOR *cursor, *cursor_ckpt; + WT_SESSION *session; + int i; + + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + testutil_check(testutil_parse_opts(argc, argv, opts)); + + testutil_make_work_dir(opts->home); + + testutil_check(wiredtiger_open( + opts->home, NULL, "create", &opts->conn)); + + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + + testutil_check( + session->create(session, opts->uri, "key_format=S,value_format=i")); + + testutil_check( + session->open_cursor(session, opts->uri, NULL, NULL, &cursor)); + + /* + * Create checkpoints and keep them active by around by opening a + * checkpoint cursor for each one. + */ + for (i = 0; i < CHECKPOINT_COUNT; ++i) { + testutil_check( + session->begin_transaction(session, "isolation=snapshot")); + cursor->set_key(cursor, "key1"); + cursor->set_value(cursor, i); + testutil_check(cursor->update(cursor)); + testutil_check(session->commit_transaction(session, NULL)); + testutil_check(session->checkpoint(session, NULL)); + testutil_check(session->open_cursor(session, opts->uri, NULL, + "checkpoint=WiredTigerCheckpoint", &cursor_ckpt)); + } + + testutil_check(session->close(session, NULL)); + + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + + testutil_check(session->verify(session, opts->uri, NULL)); + + testutil_cleanup(opts); + + return (0); +} diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml index 5883b8dc42e..447691f24e2 100755 --- a/src/third_party/wiredtiger/test/evergreen.yml +++ b/src/third_party/wiredtiger/test/evergreen.yml @@ -701,6 +701,20 @@ tasks: ${test_env_vars|} $(pwd)/test/csuite/test_wt4803_cache_overflow_abort 2>&1 + - name: csuite-wt4891-meta-ckptlist-get-alloc-test + depends_on: + - name: compile + commands: + - func: "fetch artifacts" + - command: shell.exec + params: + working_dir: "wiredtiger/build_posix" + script: | + set -o errexit + set -o verbose + + ${test_env_vars|} $(pwd)/test/csuite/test_wt4891_meta_ckptlist_get_alloc 2>&1 + - name: csuite-rwlock-test depends_on: - name: compile @@ -1313,7 +1327,7 @@ buildvariants: display_name: Little-endian (x86) run_on: - ubuntu1404-test - batchtime: 1440 # 1 day + batchtime: 10080 # 7 days expansions: smp_command: -j $(grep -c ^processor /proc/cpuinfo) configure_env_vars: PATH=/opt/mongodbtoolchain/v3/bin:$PATH @@ -1330,7 +1344,7 @@ buildvariants: - enterprise run_on: - ubuntu1604-zseries-small - batchtime: 1440 # 1 day + batchtime: 10080 # 7 days expansions: smp_command: -j $(grep -c ^processor /proc/cpuinfo) configure_env_vars: PATH=/opt/mongodbtoolchain/v3/bin:$PATH diff --git a/src/third_party/wiredtiger/test/format/Makefile.am b/src/third_party/wiredtiger/test/format/Makefile.am index 3a74cb0c104..da55ffece4c 100644 --- a/src/third_party/wiredtiger/test/format/Makefile.am +++ b/src/third_party/wiredtiger/test/format/Makefile.am @@ -1,27 +1,14 @@ AM_CPPFLAGS = -I$(top_builddir) AM_CPPFLAGS +=-I$(top_srcdir)/src/include AM_CPPFLAGS +=-I$(top_srcdir)/test/utility -if HAVE_BERKELEY_DB -AM_CPPFLAGS +=-DHAVE_BERKELEY_DB -AM_CPPFLAGS +=-DBERKELEY_DB_PATH=\"$(BERKELEY_DB_PATH)\" -AM_CPPFLAGS +=-I$(BERKELEY_DB_PATH)/include -endif noinst_PROGRAMS = t -noinst_SCRIPTS = s_dumpcmp t_SOURCES =\ backup.c bulk.c compact.c config.c lrt.c ops.c rebalance.c \ - salvage.c t.c util.c wts.c - -if HAVE_BERKELEY_DB -t_SOURCES += bdb.c -endif + salvage.c snap.c t.c util.c wts.c t_LDADD = $(top_builddir)/test/utility/libtest_util.la t_LDADD +=$(top_builddir)/libwiredtiger.la -if HAVE_BERKELEY_DB -t_LDADD += -L$(BERKELEY_DB_PATH)/lib -ldb -endif t_LDFLAGS = -static #noinst_LTLIBRARIES = lzo_compress.la @@ -33,10 +20,6 @@ t_LDFLAGS = -static #lzo_compress_la_LIBADD = -llzo2 -lm #lzo_compress_la_LDFLAGS = -avoid-version -module -rpath /nowhere -s_dumpcmp: $(srcdir)/s_dumpcmp.sh - cp $(srcdir)/s_dumpcmp.sh $@ - chmod +x $@ - backup: rm -rf BACKUP && cp -p -r RUNDIR BACKUP refresh: diff --git a/src/third_party/wiredtiger/test/format/README b/src/third_party/wiredtiger/test/format/README deleted file mode 100644 index eebd4ee7e28..00000000000 --- a/src/third_party/wiredtiger/test/format/README +++ /dev/null @@ -1,11 +0,0 @@ -The test/format program randomly generates WiredTiger databases with -different configurations and different size objects and then does -operations on those files. The goal is to test the WiredTiger file -formats. - -test/format should be linked with a version of Berkeley DB (which it -uses to verify format's results). Use the configuration option ---with-berkeleydb=DIR to specify the top-level of an installed Berkeley -DB distribution directory, for example: - - --with-berkeleydb=/usr/local/BerkeleyDB.5.3 diff --git a/src/third_party/wiredtiger/test/format/backup.c b/src/third_party/wiredtiger/test/format/backup.c index 1bddb2e1a13..ba858a28d5b 100644 --- a/src/third_party/wiredtiger/test/format/backup.c +++ b/src/third_party/wiredtiger/test/format/backup.c @@ -104,10 +104,6 @@ backup(void *arg) conn = g.wts_conn; - /* Backups aren't supported for non-standard data sources. */ - if (DATASOURCE("kvsbdb")) - return (WT_THREAD_RET_VALUE); - /* Open a session. */ testutil_check(conn->open_session(conn, NULL, NULL, &session)); diff --git a/src/third_party/wiredtiger/test/format/bdb.c b/src/third_party/wiredtiger/test/format/bdb.c deleted file mode 100644 index f80e1b6d472..00000000000 --- a/src/third_party/wiredtiger/test/format/bdb.c +++ /dev/null @@ -1,281 +0,0 @@ -/*- - * Public Domain 2014-2019 MongoDB, Inc. - * Public Domain 2008-2014 WiredTiger, Inc. - * - * This is free and unencumbered software released into the public domain. - * - * Anyone is free to copy, modify, publish, use, compile, sell, or - * distribute this software, either in source code form or as a compiled - * binary, for any purpose, commercial or non-commercial, and by any - * means. - * - * In jurisdictions that recognize copyright laws, the author or authors - * of this software dedicate any and all copyright interest in the - * software to the public domain. We make this dedication for the benefit - * of the public at large and to the detriment of our heirs and - * successors. We intend this dedication to be an overt act of - * relinquishment in perpetuity of all present and future rights to this - * software under copyright law. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#define BDB 1 /* Berkeley DB header files */ -#include "format.h" - -static DBT key, value; -static WT_ITEM keyitem; - -#define bdb_die(ret, fmt, ...) \ - testutil_die(0, "%s/%d: %s: " fmt, \ - __func__, __LINE__, db_strerror(ret), __VA_ARGS__); - -static int -bdb_compare_reverse(DB *dbp, const DBT *k1, const DBT *k2 -#if DB_VERSION_MAJOR >= 6 - , size_t *locp -#endif -) -{ - size_t len; - int cmp; - - (void)(dbp); -#if DB_VERSION_MAJOR >= 6 - (void)(locp); -#endif - - len = (k1->size < k2->size) ? k1->size : k2->size; - if ((cmp = memcmp(k2->data, k1->data, len)) == 0) - cmp = ((int)k1->size - (int)k2->size); - return (cmp); -} - -void -bdb_open(void) -{ - DB *db; - DBC *dbc; - DB_ENV *dbenv; - - assert(db_env_create(&dbenv, 0) == 0); - dbenv->set_errpfx(dbenv, "bdb"); - dbenv->set_errfile(dbenv, stderr); - assert(dbenv->mutex_set_max(dbenv, 10000) == 0); - assert(dbenv->set_cachesize(dbenv, 0, 50 * 1024 * 1024, 1) == 0); - assert(dbenv->open(dbenv, NULL, - DB_CREATE | DB_INIT_LOCK | DB_INIT_MPOOL | DB_PRIVATE, 0) == 0); - assert(db_create(&db, dbenv, 0) == 0); - - if (g.c_reverse) - assert(db->set_bt_compare(db, bdb_compare_reverse) == 0); - - assert(db->open( - db, NULL, g.home_bdb, NULL, DB_BTREE, DB_CREATE, 0) == 0); - g.bdb = db; - assert(db->cursor(db, NULL, &dbc, 0) == 0); - g.dbc = dbc; - - key_gen_init(&keyitem); -} - -void -bdb_close(void) -{ - DB *db; - DBC *dbc; - DB_ENV *dbenv; - - dbc = g.dbc; - db = g.bdb; - dbenv = db->dbenv; - assert(dbc->close(dbc) == 0); - assert(db->close(db, 0) == 0); - assert(dbenv->close(dbenv, 0) == 0); - - key_gen_teardown(&keyitem); -} - -void -bdb_insert( - const void *key_data, size_t key_size, - const void *value_data, size_t value_size) -{ - DBC *dbc; - - key.data = (void *)key_data; - key.size = (u_int32_t)key_size; - value.data = (void *)value_data; - value.size = (u_int32_t)value_size; - - dbc = g.dbc; - - assert(dbc->put(dbc, &key, &value, DB_KEYFIRST) == 0); -} - -void -bdb_np(bool next, - void *keyp, size_t *keysizep, - void *valuep, size_t *valuesizep, int *notfoundp) -{ - DBC *dbc = g.dbc; - int ret; - - *notfoundp = 0; - if ((ret = - dbc->get(dbc, &key, &value, next ? DB_NEXT : DB_PREV)) != 0) { - if (ret != DB_NOTFOUND) - bdb_die(ret, "dbc.get: %s: {%.*s}", - next ? "DB_NEXT" : "DB_PREV", - (int)key.size, (char *)key.data); - *notfoundp = 1; - } else { - *(void **)keyp = key.data; - *keysizep = key.size; - *(void **)valuep = value.data; - *valuesizep = value.size; - } -} - -void -bdb_read(uint64_t keyno, void *valuep, size_t *valuesizep, int *notfoundp) -{ - DBC *dbc = g.dbc; - int ret; - - key_gen(&keyitem, keyno); - key.data = (void *)keyitem.data; - key.size = (u_int32_t)keyitem.size; - - *notfoundp = 0; - if ((ret = dbc->get(dbc, &key, &value, DB_SET)) != 0) { - if (ret != DB_NOTFOUND) - bdb_die(ret, "dbc.get: DB_SET: {%.*s}", - (int)key.size, (char *)key.data); - *notfoundp = 1; - } else { - *(void **)valuep = value.data; - *valuesizep = value.size; - } -} - -void -bdb_update(const void *arg_key, size_t arg_key_size, - const void *arg_value, size_t arg_value_size) -{ - DBC *dbc = g.dbc; - int ret; - - key.data = (void *)arg_key; - key.size = (u_int32_t)arg_key_size; - value.data = (void *)arg_value; - value.size = (u_int32_t)arg_value_size; - - if ((ret = dbc->put(dbc, &key, &value, DB_KEYFIRST)) != 0) - bdb_die(ret, "dbc.put: DB_KEYFIRST: {%.*s}{%.*s}", - (int)key.size, (char *)key.data, - (int)value.size, (char *)value.data); -} - -void -bdb_remove(uint64_t keyno, int *notfoundp) -{ - DBC *dbc = g.dbc; - size_t size; - int ret; - - size = 0; - key_gen(&keyitem, keyno); - key.data = (void *)keyitem.data; - key.size = (u_int32_t)keyitem.size; - - bdb_read(keyno, &value.data, &size, notfoundp); - value.size = (u_int32_t)size; - if (*notfoundp) - return; - - /* Deleting a fixed-length item is the same as setting the bits to 0. */ - if (g.type == FIX) - bdb_update(key.data, key.size, "", 1); - else - if ((ret = dbc->del(dbc, 0)) != 0) { - if (ret != DB_NOTFOUND) - bdb_die(ret, "dbc.del: {%.*s}", - (int)key.size, (char *)key.data); - *notfoundp = 1; - } -} - -void -bdb_truncate(uint64_t start, uint64_t stop) -{ - DBC *dbc = g.dbc; - size_t len; - int cmp, ret, notfound; - - /* Deleting a fixed-length item is the same as setting the bits to 0. */ - if (g.type == FIX) { - /* - * If we're deleting from/to the start/end of the database, - * correct for the number of records we have. - */ - if (start == 0) - start = 1; - if (stop == 0) - stop = g.rows; - for (; start <= stop; ++start) - bdb_remove(start, ¬found); - return; - } - - if (start == 0) { - ret = dbc->get(dbc, &key, &value, DB_FIRST); - if (ret != 0 && ret != DB_NOTFOUND) - bdb_die(ret, "%s", "dbc.get: DB_FIRST"); - } else { - key_gen(&keyitem, start); - key.data = (void *)keyitem.data; - key.size = (u_int32_t)keyitem.size; - ret = dbc->get(dbc, &key, &value, DB_SET_RANGE); - if (ret != 0 && ret != DB_NOTFOUND) - bdb_die(ret, "dbc.get: DB_SET: {%.*s}", - (int)key.size, (char *)key.data); - } - if (ret == DB_NOTFOUND) - return; - - if (stop == 0) { - do { - ret = dbc->del(dbc, 0); - if (ret != 0 && ret != DB_NOTFOUND) - bdb_die(ret, "dbc.del: {%.*s}", - (int)key.size, (char *)key.data); - } while ((ret = dbc->get(dbc, &key, &value, DB_NEXT)) == 0); - } else { - key_gen(&keyitem, stop); - do { - len = WT_MIN(key.size, keyitem.size); - cmp = memcmp(key.data, keyitem.data, len); - if (g.c_reverse) { - if (cmp < 0 || - (cmp == 0 && key.size < keyitem.size)) - break; - } else - if (cmp > 0 || - (cmp == 0 && key.size > keyitem.size)) - break; - ret = dbc->del(dbc, 0); - if (ret != 0 && ret != DB_NOTFOUND) - bdb_die(ret, "dbc.del: {%.*s}", - (int)key.size, (char *)key.data); - } while ((ret = dbc->get(dbc, &key, &value, DB_NEXT)) == 0); - } - if (ret != 0 && ret != DB_NOTFOUND) - bdb_die(ret, "%s", "dbc.get: DB_NEXT"); -} diff --git a/src/third_party/wiredtiger/test/format/bulk.c b/src/third_party/wiredtiger/test/format/bulk.c index 196cdb6b7ac..550d5f74d38 100644 --- a/src/third_party/wiredtiger/test/format/bulk.c +++ b/src/third_party/wiredtiger/test/format/bulk.c @@ -28,6 +28,49 @@ #include "format.h" +/* + * bulk_begin_transaction -- + * Begin a bulk-load transaction. + */ +static void +bulk_begin_transaction(WT_SESSION *session) +{ + uint64_t ts; + char buf[64]; + + wiredtiger_begin_transaction(session, "isolation=snapshot"); + ts = __wt_atomic_addv64(&g.timestamp, 1); + testutil_check(__wt_snprintf( + buf, sizeof(buf), "read_timestamp=%" PRIx64, ts)); + testutil_check(session->timestamp_transaction(session, buf)); +} + +/* + * bulk_commit_transaction -- + * Commit a bulk-load transaction. + */ +static void +bulk_commit_transaction(WT_SESSION *session) +{ + uint64_t ts; + char buf[64]; + + ts = __wt_atomic_addv64(&g.timestamp, 1); + testutil_check(__wt_snprintf( + buf, sizeof(buf), "commit_timestamp=%" PRIx64, ts)); + testutil_check(session->commit_transaction(session, buf)); +} + +/* + * bulk_rollback_transaction -- + * Rollback a bulk-load transaction. + */ +static void +bulk_rollback_transaction(WT_SESSION *session) +{ + testutil_check(session->rollback_transaction(session, NULL)); +} + void wts_load(void) { @@ -42,19 +85,13 @@ wts_load(void) testutil_check(conn->open_session(conn, NULL, NULL, &session)); - if (g.logging != 0) - (void)g.wt_api->msg_printf(g.wt_api, session, - "=============== bulk load start ==============="); + logop(session, "%s", "=============== bulk load start"); /* - * No bulk load with data-sources. - * * No bulk load with custom collators, the order of insertion will not * match the collation order. */ is_bulk = true; - if (DATASOURCE("kvsbdb")) - is_bulk = false; if (g.c_reverse) is_bulk = false; @@ -71,6 +108,9 @@ wts_load(void) key_gen_init(&key); val_gen_init(&value); + if (g.c_txn_timestamps) + bulk_begin_transaction(session); + for (;;) { if (++g.key_cnt > g.c_rows) { g.key_cnt = g.rows = g.c_rows; @@ -78,9 +118,15 @@ wts_load(void) } /* Report on progress every 100 inserts. */ - if (g.key_cnt % 1000 == 0) + if (g.key_cnt % 10000 == 0) { track("bulk load", g.key_cnt, NULL); + if (g.c_txn_timestamps) { + bulk_commit_transaction(session); + bulk_begin_transaction(session); + } + } + key_gen(&key, g.key_cnt); val_gen(NULL, &value, g.key_cnt); @@ -89,34 +135,24 @@ wts_load(void) if (!is_bulk) cursor->set_key(cursor, g.key_cnt); cursor->set_value(cursor, *(uint8_t *)value.data); - if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, session, - "%-10s %" PRIu64 " {0x%02" PRIx8 "}", - "bulk V", - g.key_cnt, ((uint8_t *)value.data)[0]); + logop(session, "%-10s %" PRIu64 " {0x%02" PRIx8 "}", + "bulk", g.key_cnt, ((uint8_t *)value.data)[0]); break; case VAR: if (!is_bulk) cursor->set_key(cursor, g.key_cnt); cursor->set_value(cursor, &value); - if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, session, - "%-10s %" PRIu64 " {%.*s}", "bulk V", - g.key_cnt, - (int)value.size, (char *)value.data); + logop(session, "%-10s %" PRIu64 " {%.*s}", "bulk", + g.key_cnt, (int)value.size, (char *)value.data); break; case ROW: cursor->set_key(cursor, &key); - if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, session, - "%-10s %" PRIu64 " {%.*s}", "bulk K", - g.key_cnt, (int)key.size, (char *)key.data); cursor->set_value(cursor, &value); - if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, session, - "%-10s %" PRIu64 " {%.*s}", "bulk V", - g.key_cnt, - (int)value.size, (char *)value.data); + logop(session, + "%-10s %" PRIu64 " {%.*s}, {%.*s}", "bulk", + g.key_cnt, + (int)key.size, (char *)key.data, + (int)value.size, (char *)value.data); break; } @@ -132,8 +168,14 @@ wts_load(void) * extra space once the run starts. */ if ((ret = cursor->insert(cursor)) != 0) { - if (ret != WT_CACHE_FULL) - testutil_die(ret, "cursor.insert"); + testutil_assert( + ret == WT_CACHE_FULL || ret == WT_ROLLBACK); + + if (g.c_txn_timestamps) { + bulk_rollback_transaction(session); + bulk_begin_transaction(session); + } + g.rows = --g.key_cnt; g.c_rows = (uint32_t)g.key_cnt; @@ -143,18 +185,14 @@ wts_load(void) g.c_delete_pct += 20; break; } - -#ifdef HAVE_BERKELEY_DB - if (SINGLETHREADED) - bdb_insert(key.data, key.size, value.data, value.size); -#endif } + if (g.c_txn_timestamps) + bulk_commit_transaction(session); + testutil_check(cursor->close(cursor)); - if (g.logging != 0) - (void)g.wt_api->msg_printf(g.wt_api, session, - "=============== bulk load stop ==============="); + logop(session, "%s", "=============== bulk load stop"); testutil_check(session->close(session, NULL)); diff --git a/src/third_party/wiredtiger/test/format/compact.c b/src/third_party/wiredtiger/test/format/compact.c index 95160dc1595..01b43351cd3 100644 --- a/src/third_party/wiredtiger/test/format/compact.c +++ b/src/third_party/wiredtiger/test/format/compact.c @@ -42,10 +42,6 @@ compact(void *arg) (void)(arg); - /* Compaction isn't supported for all data sources. */ - if (DATASOURCE("kvsbdb")) - return (WT_THREAD_RET_VALUE); - /* Open a session. */ conn = g.wts_conn; testutil_check(conn->open_session(conn, NULL, NULL, &session)); diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c index df3222b072a..dd655ac3b8d 100644 --- a/src/third_party/wiredtiger/test/format/config.c +++ b/src/third_party/wiredtiger/test/format/config.c @@ -39,7 +39,6 @@ static bool config_fix(void); static void config_in_memory(void); static void config_in_memory_reset(void); static int config_is_perm(const char *); -static void config_transaction(void); static void config_lrt(void); static void config_lsm_reset(void); static void config_map_checkpoint(const char *, u_int *); @@ -50,6 +49,7 @@ static void config_map_file_type(const char *, u_int *); static void config_map_isolation(const char *, u_int *); static void config_pct(void); static void config_reset(void); +static void config_transaction(void); /* * config_setup -- @@ -73,30 +73,30 @@ config_setup(void) */ if (!config_is_perm("file_type")) { if (config_is_perm("data_source") && DATASOURCE("lsm")) - config_single("file_type=row", 0); + config_single("file_type=row", false); else switch (mmrand(NULL, 1, 10)) { case 1: case 2: case 3: /* 30% */ - config_single("file_type=var", 0); + config_single("file_type=var", false); break; case 4: /* 10% */ if (config_fix()) { - config_single("file_type=fix", 0); + config_single("file_type=fix", false); break; } /* FALLTHROUGH */ /* 60% */ case 5: case 6: case 7: case 8: case 9: case 10: - config_single("file_type=row", 0); + config_single("file_type=row", false); break; } } config_map_file_type(g.c_file_type, &g.type); if (!config_is_perm("data_source")) { - config_single("data_source=table", 0); + config_single("data_source=table", false); switch (mmrand(NULL, 1, 5)) { case 1: /* 20% */ - config_single("data_source=file", 0); + config_single("data_source=file", false); break; case 2: /* 20% */ /* @@ -115,7 +115,7 @@ config_setup(void) break; if (config_is_perm("truncate") && g.c_truncate) break; - config_single("data_source=lsm", 0); + config_single("data_source=lsm", false); break; case 3: case 4: case 5: /* 60% */ break; @@ -135,8 +135,8 @@ config_setup(void) /* * Build the top-level object name: we're overloading data_source in - * our configuration, LSM or KVS devices are "tables", but files are - * tested as well. + * our configuration, LSM objects are "tables", but files are tested + * as well. */ g.uri = dmalloc(256); strcpy(g.uri, DATASOURCE("file") ? "file:" : "table:"); @@ -161,34 +161,25 @@ config_setup(void) testutil_check(__wt_snprintf(buf, sizeof(buf), "%s=%" PRIu32, cp->name, mmrand(NULL, cp->min, cp->maxrand))); - config_single(buf, 0); + config_single(buf, false); } - /* Required shared libraries. */ - if (DATASOURCE("kvsbdb") && access(KVS_BDB_PATH, R_OK) != 0) - testutil_die(errno, "kvsbdb shared library: %s", KVS_BDB_PATH); - - /* - * Only row-store tables support collation order. - * Some data-sources don't support user-specified collations. - */ - if (g.type != ROW || DATASOURCE("kvsbdb")) - config_single("reverse=off", 0); + /* Only row-store tables support collation order. */ + if (g.type != ROW) + config_single("reverse=off", false); - /* - * Periodically, run single-threaded so we can compare the results to - * a Berkeley DB copy, as long as the thread-count isn't nailed down. - */ - if (!config_is_perm("threads") && mmrand(NULL, 1, 20) == 1) - g.c_threads = 1; + /* First, transaction configuration, it configures other features. */ + config_transaction(); + /* Simple selection. */ config_checkpoint(); config_checksum(); config_compression("compression"); config_compression("logging_compression"); config_encryption(); config_lrt(); - config_transaction(); + + /* Configuration based on the configuration already chosen. */ config_pct(); config_cache(); @@ -233,12 +224,12 @@ config_setup(void) */ if (config_is_perm("timer")) { if (!config_is_perm("ops")) - config_single("ops=0", 0); + config_single("ops=0", false); } else { if (!config_is_perm("ops")) - config_single("timer=30", 0); + config_single("timer=30", false); else - config_single("timer=360", 0); + config_single("timer=360", false); } /* Reset the key count. */ @@ -317,13 +308,13 @@ config_checkpoint(void) if (!config_is_perm("checkpoints")) switch (mmrand(NULL, 1, 20)) { case 1: case 2: case 3: case 4: /* 20% */ - config_single("checkpoints=wiredtiger", 0); + config_single("checkpoints=wiredtiger", false); break; case 5: /* 5 % */ - config_single("checkpoints=off", 0); + config_single("checkpoints=off", false); break; default: /* 75% */ - config_single("checkpoints=on", 0); + config_single("checkpoints=on", false); break; } } @@ -339,13 +330,13 @@ config_checksum(void) if (!config_is_perm("checksum")) switch (mmrand(NULL, 1, 10)) { case 1: /* 10% */ - config_single("checksum=on", 0); + config_single("checksum=on", false); break; case 2: /* 10% */ - config_single("checksum=off", 0); + config_single("checksum=off", false); break; default: /* 80% */ - config_single("checksum=uncompressed", 0); + config_single("checksum=uncompressed", false); break; } } @@ -372,7 +363,7 @@ config_compression(const char *conf_name) if (strcmp(conf_name, "logging_compression") == 0 && g.c_logging == 0) { testutil_check(__wt_snprintf( confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr)); - config_single(confbuf, 0); + config_single(confbuf, false); return; } @@ -411,7 +402,7 @@ config_compression(const char *conf_name) testutil_check(__wt_snprintf( confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr)); - config_single(confbuf, 0); + config_single(confbuf, false); } /* @@ -437,7 +428,7 @@ config_encryption(void) break; } - config_single(cstr, 0); + config_single(cstr, false); } } @@ -504,30 +495,30 @@ config_in_memory_reset(void) /* Turn off a lot of stuff. */ if (!config_is_perm("alter")) - config_single("alter=off", 0); + config_single("alter=off", false); if (!config_is_perm("backups")) - config_single("backups=off", 0); + config_single("backups=off", false); if (!config_is_perm("checkpoints")) - config_single("checkpoints=off", 0); + config_single("checkpoints=off", false); if (!config_is_perm("compression")) - config_single("compression=none", 0); + config_single("compression=none", false); if (!config_is_perm("logging")) - config_single("logging=off", 0); + config_single("logging=off", false); if (!config_is_perm("rebalance")) - config_single("rebalance=off", 0); + config_single("rebalance=off", false); if (!config_is_perm("salvage")) - config_single("salvage=off", 0); + config_single("salvage=off", false); if (!config_is_perm("verify")) - config_single("verify=off", 0); + config_single("verify=off", false); /* * Keep keys/values small, overflow items aren't an issue for in-memory * configurations and it keeps us from overflowing the cache. */ if (!config_is_perm("key_max")) - config_single("key_max=32", 0); + config_single("key_max=32", false); if (!config_is_perm("value_max")) - config_single("value_max=80", 0); + config_single("value_max=80", false); /* * Size the cache relative to the initial data set, use 2x the base @@ -557,7 +548,7 @@ config_lsm_reset(void) * always result in a timeout). */ if (!config_is_perm("truncate")) - config_single("truncate=off", 0); + config_single("truncate=off", false); /* * LSM doesn't currently play nicely with timestamps, don't choose the @@ -567,80 +558,8 @@ config_lsm_reset(void) */ if (!config_is_perm("prepare") && !config_is_perm("transaction_timestamps")) { - config_single("prepare=off", 0); - config_single("transaction_timestamps=off", 0); - } -} - -/* - * config_transaction -- - * Transaction configuration. - */ -static void -config_transaction(void) -{ - char buf[256]; - const char *cstr; - bool timestamps; - - /* - * We cannot prepare a transaction if logging is configured, or if - * timestamps are not configured. - * - * Prepare isn't configured often, let it control other features, unless - * they're explicitly set/not-set. - */ - if (g.c_prepare && config_is_perm("prepare")) { - if (g.c_logging && config_is_perm("logging")) - testutil_die(EINVAL, - "prepare is incompatible with logging"); - if (!g.c_txn_timestamps && - config_is_perm("transaction_timestamps")) - testutil_die(EINVAL, - "prepare requires transaction timestamps"); - } - if (g.c_logging && config_is_perm("logging")) - config_single("prepare=off", 0); - if (!g.c_txn_timestamps && config_is_perm("transaction_timestamps")) - config_single("prepare=off", 0); - - if (g.c_prepare) { - config_single("logging=off", 0); - config_single("transaction_timestamps=on", 0); - } - - timestamps = g.c_txn_timestamps; - - /* - * Isolation: choose something if isolation wasn't specified. - * - * Timestamps can only be used with snapshot isolation. - */ - if (!config_is_perm("isolation")) { - /* Avoid "maybe uninitialized" warnings. */ - switch (timestamps ? 0 : mmrand(NULL, 1, 4)) { - case 1: - cstr = "isolation=random"; - break; - case 2: - cstr = "isolation=read-uncommitted"; - break; - case 3: - cstr = "isolation=read-committed"; - break; - case 4: - default: - cstr = "isolation=snapshot"; - break; - } - config_single(cstr, 0); - } - - if (!config_is_perm("transaction-frequency")) { - testutil_check(__wt_snprintf(buf, sizeof(buf), - "transaction-frequency=%" PRIu32, - timestamps ? 100: mmrand(NULL, 1, 100))); - config_single(buf, 0); + config_single("prepare=off", false); + config_single("transaction_timestamps=off", false); } } @@ -660,7 +579,7 @@ config_lrt(void) testutil_die(EINVAL, "long_running_txn not supported with fixed-length " "column store"); - config_single("long_running_txn=off", 0); + config_single("long_running_txn=off", false); } } @@ -676,7 +595,6 @@ config_pct(void) uint32_t *vp; /* Value store */ u_int order; /* Order of assignment */ } list[] = { -#define CONFIG_DELETE_ENTRY 0 { "delete_pct", &g.c_delete_pct, 0 }, { "insert_pct", &g.c_insert_pct, 0 }, #define CONFIG_MODIFY_ENTRY 2 @@ -711,30 +629,22 @@ config_pct(void) } /* - * Cursor modify isn't possible for read-uncommitted transactions. - * If both forced, it's an error, else, prefer the forced one, else, - * prefer modify operations. + * Cursor modify isn't possible for anything besides snapshot isolation + * transactions. If both forced, it's an error. The run-time operations + * code converts modify operations into updates if we're in some other + * transaction type, but if we're never going to be able to do a modify, + * turn it off in the CONFIG output to avoid misleading debuggers. */ - if (g.c_isolation_flag == ISOLATION_READ_UNCOMMITTED) { - if (config_is_perm("isolation")) { - if (config_is_perm("modify_pct") && g.c_modify_pct != 0) - testutil_die(EINVAL, - "WT_CURSOR.modify not supported with " - "read-uncommitted transactions"); - list[CONFIG_MODIFY_ENTRY].order = 0; - *list[CONFIG_MODIFY_ENTRY].vp = 0; - } else - config_single("isolation=random", 0); - } + if (g.c_isolation_flag == ISOLATION_READ_COMMITTED || + g.c_isolation_flag == ISOLATION_READ_UNCOMMITTED) { + if (config_is_perm("isolation") && + config_is_perm("modify_pct") && g.c_modify_pct != 0) + testutil_die(EINVAL, + "WT_CURSOR.modify only supported with " + "snapshot isolation transactions"); - /* - * If the delete percentage isn't nailed down, periodically set it to - * 0 so salvage gets run and so we can perform stricter sanity checks - * on key ordering. - */ - if (!config_is_perm("delete_pct") && mmrand(NULL, 1, 10) == 1) { - list[CONFIG_DELETE_ENTRY].order = 0; - *list[CONFIG_DELETE_ENTRY].vp = 0; + list[CONFIG_MODIFY_ENTRY].order = 0; + *list[CONFIG_MODIFY_ENTRY].vp = 0; } /* @@ -772,6 +682,94 @@ config_pct(void) } /* + * config_transaction -- + * Transaction configuration. + */ +static void +config_transaction(void) +{ + bool prepare_requires_ts; + + /* + * We can't prepare a transaction if logging is configured or timestamps + * aren't configured. Further, for repeatable reads to work in timestamp + * testing, all updates must be within a snapshot-isolation transaction. + * Check for incompatible configurations, then let prepare and timestamp + * drive the remaining configuration. + */ + prepare_requires_ts = false; + if (g.c_prepare) { + if (config_is_perm("prepare")) { + if (g.c_logging && config_is_perm("logging")) + testutil_die(EINVAL, + "prepare is incompatible with logging"); + if (!g.c_txn_timestamps && + config_is_perm("transaction_timestamps")) + testutil_die(EINVAL, + "prepare requires transaction timestamps"); + } else + if ((g.c_logging && config_is_perm("logging")) || + (!g.c_txn_timestamps && + config_is_perm("transaction_timestamps"))) + config_single("prepare=off", false); + if (g.c_prepare) { + prepare_requires_ts = true; + if (g.c_logging) + config_single("logging=off", false); + if (!g.c_txn_timestamps) + config_single( + "transaction_timestamps=on", false); + } + } + + if (g.c_txn_timestamps) { + if (prepare_requires_ts || + config_is_perm("transaction_timestamps")) { + if (g.c_isolation_flag != ISOLATION_SNAPSHOT && + config_is_perm("isolation")) + testutil_die(EINVAL, + "transaction_timestamps or prepare require " + "isolation=snapshot"); + if (g.c_txn_freq != 100 && + config_is_perm("transaction-frequency")) + testutil_die(EINVAL, + "transaction_timestamps or prepare require " + "transaction-frequency=100"); + } else + if ((g.c_isolation_flag != ISOLATION_SNAPSHOT && + config_is_perm("isolation")) || + (g.c_txn_freq != 100 && + config_is_perm("transaction-frequency"))) + config_single( + "transaction_timestamps=off", false); + } + if (g.c_txn_timestamps) { + if (g.c_isolation_flag != ISOLATION_SNAPSHOT) + config_single("isolation=snapshot", false); + if (g.c_txn_freq != 100) + config_single("transaction-frequency=100", false); + } else + if (!config_is_perm("isolation")) + switch (mmrand(NULL, 1, 4)) { + case 1: + config_single("isolation=random", false); + break; + case 2: + config_single( + "isolation=read-uncommitted", false); + break; + case 3: + config_single( + "isolation=read-committed", false); + break; + case 4: + default: + config_single("isolation=snapshot", false); + break; + } +} + +/* * config_error -- * Display configuration information on error. */ @@ -796,7 +794,7 @@ config_error(void) * Print configuration information. */ void -config_print(int error_display) +config_print(bool error_display) { CONFIG *cp; FILE *fp; @@ -846,7 +844,7 @@ config_file(const char *name) *p = '\0'; if (buf[0] == '\0' || buf[0] == '#') continue; - config_single(buf, 1); + config_single(buf, true); } fclose_and_clear(&fp); } @@ -925,7 +923,7 @@ config_find(const char *s, size_t len, bool fatal) * Set a single configuration structure value. */ void -config_single(const char *s, int perm) +config_single(const char *s, bool perm) { CONFIG *cp; long vlong; @@ -968,7 +966,6 @@ config_single(const char *s, int perm) } else if (strncmp(s, "data_source", strlen("data_source")) == 0 && strncmp("file", ep, strlen("file")) != 0 && - strncmp("kvsbdb", ep, strlen("kvsbdb")) != 0 && strncmp("lsm", ep, strlen("lsm")) != 0 && strncmp("table", ep, strlen("table")) != 0) { fprintf(stderr, diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h index 1df810c0702..c1aafcd214e 100644 --- a/src/third_party/wiredtiger/test/format/config.h +++ b/src/third_party/wiredtiger/test/format/config.h @@ -69,6 +69,14 @@ static CONFIG c[] = { "if altering the table is enabled", /* 10% */ C_BOOL, 10, 0, 0, &g.c_alter, NULL }, + { "assert_commit_timestamp", + "if assert commit_timestamp", /* 5% */ + C_BOOL, 5, 0, 0, &g.c_assert_commit_timestamp, NULL }, + + { "assert_read_timestamp", + "if assert read_timestamp", /* 5% */ + C_BOOL, 5, 0, 0, &g.c_assert_read_timestamp, NULL }, + { "auto_throttle", "if LSM inserts are throttled", /* 90% */ C_BOOL, 90, 0, 0, &g.c_auto_throttle, NULL }, @@ -138,7 +146,7 @@ static CONFIG c[] = { C_BOOL, 5, 0, 0, &g.c_data_extend, NULL }, { "data_source", - "data source (file | kvsbdb | lsm | table)", + "data source (file | lsm | table)", C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_data_source }, { "delete_pct", @@ -378,13 +386,13 @@ static CONFIG c[] = { "stress splits (#8)", /* 2% */ C_BOOL, 2, 0, 0, &g.c_timing_stress_split_8, NULL }, - { "transaction_timestamps", /* 10% */ + { "transaction_timestamps", /* 70% */ "enable transaction timestamp support", - C_BOOL, 10, 0, 0, &g.c_txn_timestamps, NULL }, + C_BOOL, 70, 0, 0, &g.c_txn_timestamps, NULL }, { "transaction-frequency", "percent operations done inside an explicit transaction", - C_IGNORE, 1, 0, 100, &g.c_txn_freq, NULL }, + 0x0, 1, 100, 100, &g.c_txn_freq, NULL }, { "truncate", /* 100% */ "enable truncation", diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h index 4806f883f72..7cba583b2b4 100644 --- a/src/third_party/wiredtiger/test/format/format.h +++ b/src/third_party/wiredtiger/test/format/format.h @@ -30,18 +30,6 @@ #include <signal.h> -#ifdef BDB -/* - * Berkeley DB has an #ifdef we need to provide a value for, we'll see an - * undefined error if it's unset during a strict compile. - */ -#ifndef DB_DBM_HSEARCH -#define DB_DBM_HSEARCH 0 -#endif -#include <assert.h> -#include <db.h> -#endif - #define EXTPATH "../../ext/" /* Extensions path */ #define LZ4_PATH \ @@ -59,9 +47,6 @@ #define ROTN_PATH \ EXTPATH "encryptors/rotn/.libs/libwiredtiger_rotn.so" -#define KVS_BDB_PATH \ - EXTPATH "test/kvs_bdb/.libs/libwiredtiger_kvs_bdb.so" - #undef M #define M(v) ((v) * WT_MILLION) /* Million */ #undef KILOBYTE @@ -82,7 +67,6 @@ typedef struct { char *home; /* Home directory */ char *home_backup; /* Hot-backup directory */ char *home_backup_init; /* Initialize backup command */ - char *home_bdb; /* BDB directory */ char *home_config; /* Run CONFIG file path */ char *home_init; /* Initialize home command */ char *home_log; /* Operation log file path */ @@ -92,11 +76,6 @@ typedef struct { char wiredtiger_open_config[8 * 1024]; /* Database open config */ -#ifdef HAVE_BERKELEY_DB - void *bdb; /* BDB comparison handle */ - void *dbc; /* BDB cursor handle */ -#endif - WT_CONNECTION *wts_conn; WT_EXTENSION_API *wt_api; @@ -105,11 +84,8 @@ typedef struct { uint32_t run_cnt; /* Run counter */ - enum { - LOG_FILE=1, /* Use a log file */ - LOG_OPS=2 /* Log all operations */ - } logging; - FILE *logfp; /* Log file */ + bool logging; /* log operations */ + FILE *logfp; /* log file */ bool replay; /* Replaying a run. */ bool workers_finished; /* Operations completed */ @@ -151,6 +127,8 @@ typedef struct { uint32_t c_abort; /* Config values */ uint32_t c_alter; + uint32_t c_assert_commit_timestamp; + uint32_t c_assert_read_timestamp; uint32_t c_auto_throttle; uint32_t c_backups; uint32_t c_bitcnt; @@ -280,14 +258,36 @@ typedef struct { } GLOBAL; extern GLOBAL g; +/* Worker thread operations. */ +typedef enum { INSERT, MODIFY, READ, REMOVE, TRUNCATE, UPDATE } thread_op; + +/* Worker read operations. */ +typedef enum { NEXT, PREV, SEARCH, SEARCH_NEAR } read_operation; + +typedef struct { + thread_op op; /* Operation */ + uint64_t keyno; /* Row number */ + + uint64_t ts; /* Read/commit timestamp */ + bool repeatable; /* Operation can be repeated */ + + uint64_t last; /* Inclusive end of a truncate range */ + + void *kdata; /* If an insert, the generated key */ + size_t ksize; + size_t kmemsize; + + void *vdata; /* If not a delete, the value */ + size_t vsize; + size_t vmemsize; +} SNAP_OPS; + typedef struct { int id; /* simple thread ID */ wt_thread_t tid; /* thread ID */ WT_RAND_STATE rnd; /* thread RNG state */ - bool prepare_txn; /* is prepare transaction */ - volatile bool quit; /* thread should quit */ uint64_t ops; /* total operations */ @@ -300,6 +300,9 @@ typedef struct { uint64_t truncate; uint64_t update; + WT_SESSION *session; /* WiredTiger session */ + WT_CURSOR *cursor; /* WiredTiger cursor */ + uint64_t keyno; /* key */ WT_ITEM *key, _key; /* key, value */ WT_ITEM *value, _value; @@ -307,6 +310,11 @@ typedef struct { uint64_t last; /* truncate range */ WT_ITEM *lastkey, _lastkey; + bool repeatable_reads; /* if read ops repeatable */ + uint64_t read_ts; /* read timestamp */ + uint64_t commit_ts; /* commit timestamp */ + SNAP_OPS *snap, *snap_first, snap_list[512]; + WT_ITEM *tbuf, _tbuf; /* temporary buffer */ #define TINFO_RUNNING 1 /* Running */ @@ -314,17 +322,13 @@ typedef struct { #define TINFO_JOINED 3 /* Resolved */ volatile int state; /* state */ } TINFO; +extern TINFO **tinfo_list; -#ifdef HAVE_BERKELEY_DB -void bdb_close(void); -void bdb_insert(const void *, size_t, const void *, size_t); -void bdb_np(bool, void *, size_t *, void *, size_t *, int *); -void bdb_open(void); -void bdb_read(uint64_t, void *, size_t *, int *); -void bdb_remove(uint64_t, int *); -void bdb_truncate(uint64_t, uint64_t); -void bdb_update(const void *, size_t, const void *, size_t); -#endif +#define logop(wt_session, fmt, ...) do { \ + if (g.logging) \ + testutil_check(g.wt_api->msg_printf( \ + g.wt_api, wt_session, fmt, __VA_ARGS__)); \ +} while (0) WT_THREAD_RET alter(void *); WT_THREAD_RET backup(void *); @@ -333,9 +337,9 @@ WT_THREAD_RET compact(void *); void config_clear(void); void config_error(void); void config_file(const char *); -void config_print(int); +void config_print(bool); void config_setup(void); -void config_single(const char *, int); +void config_single(const char *, bool); void fclose_and_clear(FILE **); void key_gen(WT_ITEM *, uint64_t); void key_gen_init(WT_ITEM *); @@ -348,6 +352,10 @@ void print_item(const char *, WT_ITEM *); void print_item_data(const char *, const uint8_t *, size_t); int read_row_worker(WT_CURSOR *, uint64_t, WT_ITEM *, WT_ITEM *, bool); uint32_t rng(WT_RAND_STATE *); +void snap_repeat_single(WT_CURSOR *, TINFO *); +int snap_repeat_txn(WT_CURSOR *, TINFO *); +void snap_repeat_update(TINFO *, bool); +void snap_track(TINFO *, thread_op); WT_THREAD_RET timestamp(void *); void track(const char *, uint64_t, TINFO *); void val_gen(WT_RAND_STATE *, WT_ITEM *, uint64_t); @@ -356,11 +364,11 @@ void val_gen_teardown(WT_ITEM *); void val_init(void); void val_teardown(void); void wts_close(void); -void wts_dump(const char *, int); +void wts_dump(const char *, bool); void wts_init(void); void wts_load(void); void wts_open(const char *, bool, WT_CONNECTION **); -void wts_ops(int); +void wts_ops(bool); void wts_read_scan(void); void wts_rebalance(void); void wts_reopen(void); @@ -368,50 +376,4 @@ void wts_salvage(void); void wts_stats(void); void wts_verify(const char *); -/* - * mmrand -- - * Return a random value between a min/max pair, inclusive. - */ -static inline uint32_t -mmrand(WT_RAND_STATE *rnd, u_int min, u_int max) -{ - uint32_t v; - u_int range; - - /* - * Test runs with small row counts can easily pass a max of 0 (for - * example, "g.rows / 20"). Avoid the problem. - */ - if (max <= min) - return (min); - - v = rng(rnd); - range = (max - min) + 1; - v %= range; - v += min; - return (v); -} - -static inline void -random_sleep(WT_RAND_STATE *rnd, u_int max_seconds) -{ - uint64_t i, micro_seconds; - - /* - * We need a fast way to choose a sleep time. We want to sleep a short - * period most of the time, but occasionally wait longer. Divide the - * maximum period of time into 10 buckets (where bucket 0 doesn't sleep - * at all), and roll dice, advancing to the next bucket 50% of the time. - * That means we'll hit the maximum roughly every 1K calls. - */ - for (i = 0;;) - if (rng(rnd) & 0x1 || ++i > 9) - break; - - if (i == 0) - __wt_yield(); - else { - micro_seconds = (uint64_t)max_seconds * WT_MILLION; - __wt_sleep(0, i * (micro_seconds / 10)); - } -} +#include "format.i" diff --git a/src/third_party/wiredtiger/test/format/format.i b/src/third_party/wiredtiger/test/format/format.i new file mode 100644 index 00000000000..a359a5c3492 --- /dev/null +++ b/src/third_party/wiredtiger/test/format/format.i @@ -0,0 +1,126 @@ +/*- + * Public Domain 2014-2019 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * read_op -- + * Perform a read operation, waiting out prepare conflicts. + */ +static inline int +read_op(WT_CURSOR *cursor, read_operation op, int *exactp) +{ + WT_DECL_RET; + + /* + * Read operations wait out prepare-conflicts. (As part of the snapshot + * isolation checks, we repeat reads that succeeded before, they should + * be repeatable.) + */ + switch (op) { + case NEXT: + while ((ret = cursor->next(cursor)) == WT_PREPARE_CONFLICT) + __wt_yield(); + break; + case PREV: + while ((ret = cursor->prev(cursor)) == WT_PREPARE_CONFLICT) + __wt_yield(); + break; + case SEARCH: + while ((ret = cursor->search(cursor)) == WT_PREPARE_CONFLICT) + __wt_yield(); + break; + case SEARCH_NEAR: + while ((ret = + cursor->search_near(cursor, exactp)) == WT_PREPARE_CONFLICT) + __wt_yield(); + break; + } + return (ret); +} + +/* + * mmrand -- + * Return a random value between a min/max pair, inclusive. + */ +static inline uint32_t +mmrand(WT_RAND_STATE *rnd, u_int min, u_int max) +{ + uint32_t v; + u_int range; + + /* + * Test runs with small row counts can easily pass a max of 0 (for + * example, "g.rows / 20"). Avoid the problem. + */ + if (max <= min) + return (min); + + v = rng(rnd); + range = (max - min) + 1; + v %= range; + v += min; + return (v); +} + +static inline void +random_sleep(WT_RAND_STATE *rnd, u_int max_seconds) +{ + uint64_t i, micro_seconds; + + /* + * We need a fast way to choose a sleep time. We want to sleep a short + * period most of the time, but occasionally wait longer. Divide the + * maximum period of time into 10 buckets (where bucket 0 doesn't sleep + * at all), and roll dice, advancing to the next bucket 50% of the time. + * That means we'll hit the maximum roughly every 1K calls. + */ + for (i = 0;;) + if (rng(rnd) & 0x1 || ++i > 9) + break; + + if (i == 0) + __wt_yield(); + else { + micro_seconds = (uint64_t)max_seconds * WT_MILLION; + __wt_sleep(0, i * (micro_seconds / 10)); + } +} + +static inline void +wiredtiger_begin_transaction(WT_SESSION *session, const char *config) +{ + WT_DECL_RET; + + /* + * Keep trying to start a new transaction if it's timing out. + * There are no resources pinned, it should succeed eventually. + */ + while ((ret = + session->begin_transaction(session, config)) == WT_CACHE_FULL) + __wt_yield(); + testutil_check(ret); +} diff --git a/src/third_party/wiredtiger/test/format/lrt.c b/src/third_party/wiredtiger/test/format/lrt.c index b36d033345d..58adfc11216 100644 --- a/src/third_party/wiredtiger/test/format/lrt.c +++ b/src/third_party/wiredtiger/test/format/lrt.c @@ -110,15 +110,7 @@ lrt(void *arg) */ testutil_check(session->snapshot(session, "name=test")); __wt_sleep(1, 0); - /* - * Keep trying to start a new transaction if it's - * timing out - we know there aren't any resources - * pinned so it should succeed eventually. - */ - while ((ret = session->begin_transaction( - session, "snapshot=test")) == WT_CACHE_FULL) - ; - testutil_check(ret); + wiredtiger_begin_transaction(session, "snapshot=test"); testutil_check(session->snapshot( session, "drop=(all)")); testutil_check(session->commit_transaction( diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c index 7d0cbd00b0b..a27dec3dd0c 100644 --- a/src/third_party/wiredtiger/test/format/ops.c +++ b/src/third_party/wiredtiger/test/format/ops.c @@ -45,10 +45,6 @@ static int row_truncate(TINFO *, WT_CURSOR *); static int row_update(TINFO *, WT_CURSOR *, bool); static void table_append_init(void); -#ifdef HAVE_BERKELEY_DB -static int notfound_chk(const char *, int, int, uint64_t); -#endif - static char modify_repl[256]; /* @@ -79,14 +75,16 @@ set_alarm(void) #endif } +TINFO **tinfo_list; + /* * wts_ops -- * Perform a number of operations in a set of threads. */ void -wts_ops(int lastrun) +wts_ops(bool lastrun) { - TINFO **tinfo_list, *tinfo, total; + TINFO *tinfo, total; WT_CONNECTION *conn; WT_SESSION *session; wt_thread_t alter_tid, backup_tid, checkpoint_tid, compact_tid, lrt_tid; @@ -143,17 +141,15 @@ wts_ops(int lastrun) g.rand_log_stop = true; /* Logging requires a session. */ - if (g.logging != 0) { + if (g.logging) testutil_check(conn->open_session(conn, NULL, NULL, &session)); - (void)g.wt_api->msg_printf(g.wt_api, session, - "=============== thread ops start ==============="); - } + logop(session, "%s", "=============== thread ops start"); /* * Create the per-thread structures and start the worker threads. * Allocate the thread structures separately to minimize false sharing. */ - tinfo_list = dcalloc((size_t)g.c_threads, sizeof(TINFO *)); + tinfo_list = dcalloc((size_t)g.c_threads + 1, sizeof(TINFO *)); for (i = 0; i < g.c_threads; ++i) { tinfo_list[i] = tinfo = dcalloc(1, sizeof(TINFO)); @@ -289,274 +285,97 @@ wts_ops(int lastrun) testutil_check(__wt_thread_join(NULL, ×tamp_tid)); g.workers_finished = false; - if (g.logging != 0) { - (void)g.wt_api->msg_printf(g.wt_api, session, - "=============== thread ops stop ==============="); + logop(session, "%s", "=============== thread ops stop"); + if (g.logging) testutil_check(session->close(session, NULL)); - } for (i = 0; i < g.c_threads; ++i) free(tinfo_list[i]); free(tinfo_list); } -typedef enum { NEXT, PREV, SEARCH, SEARCH_NEAR } read_operation; - /* - * read_op -- - * Perform a read operation, waiting out prepare conflicts. - */ -static inline int -read_op(WT_CURSOR *cursor, read_operation op, int *exactp) -{ - WT_DECL_RET; - - /* - * Read operations wait out prepare-conflicts. (As part of the snapshot - * isolation checks, we repeat reads that succeeded before, they should - * be repeatable.) - */ - switch (op) { - case NEXT: - while ((ret = cursor->next(cursor)) == WT_PREPARE_CONFLICT) - __wt_yield(); - break; - case PREV: - while ((ret = cursor->prev(cursor)) == WT_PREPARE_CONFLICT) - __wt_yield(); - break; - case SEARCH: - while ((ret = cursor->search(cursor)) == WT_PREPARE_CONFLICT) - __wt_yield(); - break; - case SEARCH_NEAR: - while ((ret = - cursor->search_near(cursor, exactp)) == WT_PREPARE_CONFLICT) - __wt_yield(); - break; - } - return (ret); -} - -typedef enum { INSERT, MODIFY, READ, REMOVE, TRUNCATE, UPDATE } thread_op; -typedef struct { - thread_op op; /* Operation */ - uint64_t keyno; /* Row number */ - uint64_t last; /* Inclusive end of a truncate range */ - - void *kdata; /* If an insert, the generated key */ - size_t ksize; - size_t kmemsize; - - void *vdata; /* If not a delete, the value */ - size_t vsize; - size_t vmemsize; -} SNAP_OPS; - -#define SNAP_TRACK(op, tinfo) do { \ - if (snap != NULL && \ - (size_t)(snap - snap_list) < WT_ELEMENTS(snap_list)) \ - snap_track(snap++, op, tinfo); \ -} while (0) - -/* - * snap_track -- - * Add a single snapshot isolation returned value to the list. + * begin_transaction_ts -- + * Begin a timestamped transaction. */ static void -snap_track(SNAP_OPS *snap, thread_op op, TINFO *tinfo) -{ - WT_ITEM *ip; - - snap->op = op; - snap->keyno = tinfo->keyno; - snap->last = op == TRUNCATE ? tinfo->last : 0; - - if (op == INSERT && g.type == ROW) { - ip = tinfo->key; - if (snap->kmemsize < ip->size) { - snap->kdata = drealloc(snap->kdata, ip->size); - snap->kmemsize = ip->size; - } - memcpy(snap->kdata, ip->data, snap->ksize = ip->size); - } - - if (op != REMOVE && op != TRUNCATE) { - ip = tinfo->value; - if (snap->vmemsize < ip->size) { - snap->vdata = drealloc(snap->vdata, ip->size); - snap->vmemsize = ip->size; - } - memcpy(snap->vdata, ip->data, snap->vsize = ip->size); - } -} - -/* - * snap_check -- - * Check snapshot isolation operations are repeatable. - */ -static int -snap_check(WT_CURSOR *cursor, - SNAP_OPS *start, SNAP_OPS *stop, WT_ITEM *key, WT_ITEM *value) +begin_transaction_ts(TINFO *tinfo, u_int *iso_configp) { + TINFO **tlp; WT_DECL_RET; - SNAP_OPS *p; - uint8_t bitfield; + WT_SESSION *session; + uint64_t ts; + const char *config; + char buf[64]; - for (; start < stop; ++start) { - /* - * We don't test all of the records in a truncate range, only - * the first because that matches the rest of the isolation - * checks. If a truncate range was from the start of the table, - * switch to the record at the end. - */ - if (start->op == TRUNCATE && start->keyno == 0) { - start->keyno = start->last; - testutil_assert(start->keyno != 0); - } + session = tinfo->session; - /* - * Check for subsequent changes to this record. If we find a - * read, don't treat it was a subsequent change, that way we - * verify the results of the change as well as the results of - * the read. - */ - for (p = start + 1; p < stop; ++p) { - if (p->op == READ) - continue; - if (p->keyno == start->keyno) - break; + config = "isolation=snapshot"; + *iso_configp = ISOLATION_SNAPSHOT; - if (p->op != TRUNCATE) - continue; - if (g.c_reverse && - (p->keyno == 0 || p->keyno >= start->keyno) && - (p->last == 0 || p->last <= start->keyno)) - break; - if (!g.c_reverse && - (p->keyno == 0 || p->keyno <= start->keyno) && - (p->last == 0 || p->last >= start->keyno)) - break; - } - if (p != stop) - continue; + /* + * Transaction reads are normally repeatable, but WiredTiger timestamps + * allow rewriting commits, that is, applications can specify at commit + * time the timestamp at which the commit happens. If that happens, our + * read might no longer be repeatable. Test in both modes: pick a read + * timestamp we know is repeatable (because it's at least as old as the + * oldest resolved commit timestamp in any thread), and pick a current + * timestamp, 50% of the time. + */ + ts = 0; + if (mmrand(&tinfo->rnd, 1, 2) == 1) + for (ts = UINT64_MAX, tlp = tinfo_list; *tlp != NULL; ++tlp) + ts = WT_MIN(ts, (*tlp)->commit_ts); + if (ts != 0) { + wiredtiger_begin_transaction(session, config); /* - * Retrieve the key/value pair by key. Row-store inserts have a - * unique generated key we saved, else generate the key from the - * key number. + * If the timestamp has aged out of the system, we'll get EINVAL + * when we try and set it. That kills the transaction, we have + * to restart. */ - if (start->op == INSERT && g.type == ROW) { - key->data = start->kdata; - key->size = start->ksize; - cursor->set_key(cursor, key); - } else { - switch (g.type) { - case FIX: - case VAR: - cursor->set_key(cursor, start->keyno); - break; - case ROW: - key_gen(key, start->keyno); - cursor->set_key(cursor, key); - break; - } - } - - switch (ret = read_op(cursor, SEARCH, NULL)) { - case 0: - if (g.type == FIX) { - testutil_check( - cursor->get_value(cursor, &bitfield)); - *(uint8_t *)(value->data) = bitfield; - value->size = 1; - } else - testutil_check( - cursor->get_value(cursor, value)); - break; - case WT_NOTFOUND: - break; - default: - return (ret); + testutil_check(__wt_snprintf( + buf, sizeof(buf), "read_timestamp=%" PRIx64, ts)); + ret = session->timestamp_transaction(session, buf); + if (ret == 0) { + tinfo->read_ts = ts; + tinfo->repeatable_reads = true; + logop(session, + "begin snapshot read-ts=%" PRIu64 " (repeatable)", + tinfo->read_ts); + return; } + if (ret != EINVAL) + testutil_check(ret); - /* Check for simple matches. */ - if (ret == 0 && - start->op != REMOVE && start->op != TRUNCATE && - value->size == start->vsize && - memcmp(value->data, start->vdata, value->size) == 0) - continue; - if (ret == WT_NOTFOUND && - (start->op == REMOVE || start->op == TRUNCATE)) - continue; + testutil_check(session->rollback_transaction(session, NULL)); + } - /* - * In fixed length stores, zero values at the end of the key - * space are returned as not-found, and not-found row reads - * are saved as zero values. Map back-and-forth for simplicity. - */ - if (g.type == FIX) { - if (ret == WT_NOTFOUND && - start->vsize == 1 && *(uint8_t *)start->vdata == 0) - continue; - if ((start->op == REMOVE || start->op == TRUNCATE) && - value->size == 1 && *(uint8_t *)value->data == 0) - continue; - } + wiredtiger_begin_transaction(session, config); - /* Things went pear-shaped. */ - switch (g.type) { - case FIX: - testutil_die(ret, - "snapshot-isolation: %" PRIu64 " search: " - "expected {0x%02x}, found {0x%02x}", - start->keyno, - start->op == REMOVE ? 0 : *(uint8_t *)start->vdata, - ret == WT_NOTFOUND ? 0 : *(uint8_t *)value->data); - /* NOTREACHED */ - case ROW: - fprintf(stderr, - "snapshot-isolation %.*s search mismatch\n", - (int)key->size, (const char *)key->data); + /* + * Otherwise, pick a current timestamp. + * + * Prepare returns an error if the prepare timestamp is less + * than any active read timestamp, single-thread transaction + * prepare and begin. + * + * Lock out the oldest timestamp update. + */ + testutil_check(pthread_rwlock_wrlock(&g.ts_lock)); - if (start->op == REMOVE) - fprintf(stderr, "expected {deleted}\n"); - else - print_item_data( - "expected", start->vdata, start->vsize); - if (ret == WT_NOTFOUND) - fprintf(stderr, "found {deleted}\n"); - else - print_item_data( - "found", value->data, value->size); + ts = __wt_atomic_addv64(&g.timestamp, 1); + testutil_check(__wt_snprintf( + buf, sizeof(buf), "read_timestamp=%" PRIx64, ts)); + testutil_check(session->timestamp_transaction(session, buf)); - testutil_die(ret, - "snapshot-isolation: %.*s search mismatch", - (int)key->size, key->data); - /* NOTREACHED */ - case VAR: - fprintf(stderr, - "snapshot-isolation %" PRIu64 " search mismatch\n", - start->keyno); + testutil_check(pthread_rwlock_unlock(&g.ts_lock)); - if (start->op == REMOVE) - fprintf(stderr, "expected {deleted}\n"); - else - print_item_data( - "expected", start->vdata, start->vsize); - if (ret == WT_NOTFOUND) - fprintf(stderr, "found {deleted}\n"); - else - print_item_data( - "found", value->data, value->size); + tinfo->read_ts = ts; + tinfo->repeatable_reads = false; - testutil_die(ret, - "snapshot-isolation: %" PRIu64 " search mismatch", - start->keyno); - /* NOTREACHED */ - } - } - return (0); + logop(session, "begin snapshot read-ts=%" PRIu64 " (not repeatable)", + tinfo->read_ts); } /* @@ -564,58 +383,42 @@ snap_check(WT_CURSOR *cursor, * Choose an isolation configuration and begin a transaction. */ static void -begin_transaction(TINFO *tinfo, WT_SESSION *session, u_int *iso_configp) +begin_transaction(TINFO *tinfo, u_int *iso_configp) { - WT_DECL_RET; + WT_SESSION *session; u_int v; - char buf[64]; - const char *config; + const char *config, *log; + + session = tinfo->session; if ((v = g.c_isolation_flag) == ISOLATION_RANDOM) v = mmrand(&tinfo->rnd, 1, 3); switch (v) { case 1: v = ISOLATION_READ_UNCOMMITTED; + log = "read-uncommitted"; config = "isolation=read-uncommitted"; break; case 2: v = ISOLATION_READ_COMMITTED; + log = "read-committed"; config = "isolation=read-committed"; break; case 3: default: v = ISOLATION_SNAPSHOT; + log = "snapshot"; config = "isolation=snapshot"; break; } *iso_configp = v; - /* - * Keep trying to start a new transaction if it's timing out - we know - * there aren't any resources pinned so it should succeed eventually. - */ - while ((ret = - session->begin_transaction(session, config)) == WT_CACHE_FULL) - ; - testutil_check(ret); + wiredtiger_begin_transaction(session, config); - if (v == ISOLATION_SNAPSHOT && g.c_txn_timestamps) { - /* - * Prepare returns an error if the prepare timestamp is less - * than any active read timestamp, single-thread transaction - * prepare and begin. - * - * Lock out the oldest timestamp update. - */ - testutil_check(pthread_rwlock_wrlock(&g.ts_lock)); - - testutil_check(__wt_snprintf(buf, sizeof(buf), - "read_timestamp=%" PRIx64, - __wt_atomic_addv64(&g.timestamp, 1))); - testutil_check(session->timestamp_transaction(session, buf)); + tinfo->read_ts = WT_TS_NONE; + tinfo->repeatable_reads = false; - testutil_check(pthread_rwlock_unlock(&g.ts_lock)); - } + logop(session, "begin %s", log); } /* @@ -623,13 +426,17 @@ begin_transaction(TINFO *tinfo, WT_SESSION *session, u_int *iso_configp) * Commit a transaction. */ static void -commit_transaction(TINFO *tinfo, WT_SESSION *session) +commit_transaction(TINFO *tinfo, bool prepared) { + WT_SESSION *session; uint64_t ts; char buf[64]; ++tinfo->commit; + session = tinfo->session; + + ts = 0; /* -Wconditional-uninitialized */ if (g.c_txn_timestamps) { /* Lock out the oldest timestamp update. */ testutil_check(pthread_rwlock_wrlock(&g.ts_lock)); @@ -639,7 +446,7 @@ commit_transaction(TINFO *tinfo, WT_SESSION *session) buf, sizeof(buf), "commit_timestamp=%" PRIx64, ts)); testutil_check(session->timestamp_transaction(session, buf)); - if (tinfo->prepare_txn) { + if (prepared) { testutil_check(__wt_snprintf(buf, sizeof(buf), "durable_timestamp=%" PRIx64, ts)); testutil_check( @@ -650,7 +457,12 @@ commit_transaction(TINFO *tinfo, WT_SESSION *session) } testutil_check(session->commit_transaction(session, NULL)); - tinfo->prepare_txn = false; + /* Remember our oldest commit timestamp. */ + tinfo->commit_ts = ts; + + logop(session, + "commit read-ts=%" PRIu64 ", commit-ts=%" PRIu64, + tinfo->read_ts, tinfo->commit_ts); } /* @@ -658,13 +470,18 @@ commit_transaction(TINFO *tinfo, WT_SESSION *session) * Rollback a transaction. */ static void -rollback_transaction(TINFO *tinfo, WT_SESSION *session) +rollback_transaction(TINFO *tinfo) { + WT_SESSION *session; + + session = tinfo->session; + ++tinfo->rollback; testutil_check(session->rollback_transaction(session, NULL)); - tinfo->prepare_txn = false; + logop(session, + "abort read-ts=%" PRIu64, tinfo->read_ts); } /* @@ -672,12 +489,15 @@ rollback_transaction(TINFO *tinfo, WT_SESSION *session) * Prepare a transaction if timestamps are in use. */ static int -prepare_transaction(TINFO *tinfo, WT_SESSION *session) +prepare_transaction(TINFO *tinfo) { WT_DECL_RET; + WT_SESSION *session; uint64_t ts; char buf[64]; + session = tinfo->session; + ++tinfo->prepare; /* @@ -699,7 +519,6 @@ prepare_transaction(TINFO *tinfo, WT_SESSION *session) testutil_check(pthread_rwlock_unlock(&g.ts_lock)); - tinfo->prepare_txn = true; return (ret); } @@ -728,15 +547,85 @@ prepare_transaction(TINFO *tinfo, WT_SESSION *session) } while (0) /* + * When in a transaction on the live table with snapshot isolation, track + * operations for later repetition. + */ +#define SNAP_TRACK(tinfo, op) do { \ + if (intxn && !ckpt_handle && iso_config == ISOLATION_SNAPSHOT) \ + snap_track(tinfo, op); \ +} while (0) + +/* + * ops_open_session -- + * Create a new session/cursor pair for the thread. + */ +static void +ops_open_session(TINFO *tinfo, bool *ckpt_handlep) +{ + WT_CONNECTION *conn; + WT_CURSOR *cursor; + WT_DECL_RET; + WT_SESSION *session; + + conn = g.wts_conn; + + /* Close any open session/cursor. */ + if ((session = tinfo->session) != NULL) + testutil_check(session->close(session, NULL)); + + testutil_check(conn->open_session(conn, NULL, NULL, &session)); + + /* + * 10% of the time, perform some read-only operations from a checkpoint. + * Skip if we are using data-sources or LSM, they don't support reading + * from checkpoints. + */ + cursor = NULL; + if (!DATASOURCE("lsm") && mmrand(&tinfo->rnd, 1, 10) == 1) { + /* + * WT_SESSION.open_cursor can return EBUSY if concurrent with a + * metadata operation, retry. + */ + while ((ret = session->open_cursor(session, g.uri, NULL, + "checkpoint=WiredTigerCheckpoint", &cursor)) == EBUSY) + __wt_yield(); + + /* + * If the checkpoint hasn't been created yet, ignore the error. + */ + if (ret != ENOENT) { + testutil_check(ret); + *ckpt_handlep = true; + } + } + if (cursor == NULL) { + /* + * Configure "append", in the case of column stores, we append + * when inserting new rows. + * + * WT_SESSION.open_cursor can return EBUSY if concurrent with a + * metadata operation, retry. + */ + while ((ret = session->open_cursor(session, + g.uri, NULL, "append", &cursor)) == EBUSY) + __wt_yield(); + + testutil_check(ret); + *ckpt_handlep = false; + } + + tinfo->session = session; + tinfo->cursor = cursor; +} + +/* * ops -- * Per-thread operations. */ static WT_THREAD_RET ops(void *arg) { - SNAP_OPS *snap, snap_list[128]; TINFO *tinfo; - WT_CONNECTION *conn; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION *session; @@ -744,17 +633,15 @@ ops(void *arg) uint64_t reset_op, session_op, truncate_op; uint32_t range, rnd; u_int i, j, iso_config; - bool greater_than, intxn, next, positioned, readonly; + bool ckpt_handle, greater_than, intxn, next, positioned, prepared; tinfo = arg; - conn = g.wts_conn; - readonly = false; /* -Wconditional-uninitialized */ + iso_config = ISOLATION_RANDOM; /* -Wconditional-uninitialized */ + ckpt_handle = false; /* -Wconditional-uninitialized */ - /* Initialize tracking of snapshot isolation transaction returns. */ - snap = NULL; - iso_config = 0; - memset(snap_list, 0, sizeof(snap_list)); + /* Tracking of transactional snapshot isolation operations. */ + tinfo->snap = tinfo->snap_first = tinfo->snap_list; /* Set up the default key and value buffers. */ tinfo->key = &tinfo->_key; @@ -785,74 +672,26 @@ ops(void *arg) * resolve any running transaction. */ if (intxn) { - commit_transaction(tinfo, session); + commit_transaction(tinfo, false); intxn = false; } - if (session != NULL) - testutil_check(session->close(session, NULL)); - testutil_check( - conn->open_session(conn, NULL, NULL, &session)); + ops_open_session(tinfo, &ckpt_handle); /* Pick the next session/cursor close/open. */ session_op += mmrand(&tinfo->rnd, 100, 5000); - /* - * 10% of the time, perform some read-only operations - * from a checkpoint. - * - * Skip if single-threaded and doing checks against a - * Berkeley DB database, that won't work because the - * Berkeley DB database won't match the checkpoint. - * - * Skip if we are using data-sources or LSM, they don't - * support reading from checkpoints. - */ - if (!SINGLETHREADED && - !DATASOURCE("kvsbdb") && !DATASOURCE("lsm") && - mmrand(&tinfo->rnd, 1, 10) == 1) { - /* - * open_cursor can return EBUSY if concurrent - * with a metadata operation, retry. - */ - while ((ret = session->open_cursor(session, - g.uri, NULL, - "checkpoint=WiredTigerCheckpoint", - &cursor)) == EBUSY) - __wt_yield(); - /* - * If the checkpoint hasn't been created yet, - * ignore the error. - */ - if (ret == ENOENT) - continue; - testutil_check(ret); - - /* Checkpoints are read-only. */ - readonly = true; - } else { - /* - * Configure "append", in the case of column - * stores, we append when inserting new rows. - * open_cursor can return EBUSY if concurrent - * with a metadata operation, retry. - */ - while ((ret = session->open_cursor(session, - g.uri, NULL, "append", &cursor)) == EBUSY) - __wt_yield(); - testutil_check(ret); - - /* Updates supported. */ - readonly = false; - } + session = tinfo->session; + cursor = tinfo->cursor; } /* - * Reset the session every now and then, just to make sure that - * operation gets tested. Note the test is not for equality, we - * have to do the reset outside of a transaction. + * If not in a transaction, reset the session now and then, just + * to make sure that operation gets tested. The test is not for + * equality, we have to do the reset outside of a transaction so + * we aren't likely to get an exact match. */ - if (tinfo->ops > reset_op && !intxn) { + if (!intxn && tinfo->ops > reset_op) { testutil_check(session->reset(session)); /* Pick the next reset operation. */ @@ -860,24 +699,34 @@ ops(void *arg) } /* - * If we're not single-threaded and not in a transaction, choose - * an isolation level and start a transaction some percentage of + * If not in a transaction, have a live handle and running in a + * timestamp world, occasionally repeat a timestamped operation. + */ + if (!intxn && !ckpt_handle && + g.c_txn_timestamps && mmrand(&tinfo->rnd, 1, 15) == 1) { + ++tinfo->search; + snap_repeat_single(cursor, tinfo); + } + + /* + * If not in a transaction and have a live handle, choose an + * isolation level and start a transaction some percentage of * the time. */ - if (!SINGLETHREADED && - !intxn && mmrand(&tinfo->rnd, 1, 100) <= g.c_txn_freq) { - begin_transaction(tinfo, session, &iso_config); - snap = - iso_config == ISOLATION_SNAPSHOT ? snap_list : NULL; + if (!intxn && (g.c_txn_timestamps || + mmrand(&tinfo->rnd, 1, 100) <= g.c_txn_freq)) { + if (g.c_txn_timestamps) + begin_transaction_ts(tinfo, &iso_config); + else + begin_transaction(tinfo, &iso_config); + + tinfo->snap_first = tinfo->snap; intxn = true; } - /* Select a row. */ - tinfo->keyno = mmrand(&tinfo->rnd, 1, (u_int)g.rows); - /* Select an operation. */ op = READ; - if (!readonly) { + if (!ckpt_handle) { i = mmrand(&tinfo->rnd, 1, 100); if (i < g.c_delete_pct && tinfo->ops > truncate_op) { op = TRUNCATE; @@ -897,6 +746,9 @@ ops(void *arg) op = UPDATE; } + /* Select a row. */ + tinfo->keyno = mmrand(&tinfo->rnd, 1, (u_int)g.rows); + /* * Inserts, removes and updates can be done following a cursor * set-key, or based on a cursor position taken from a previous @@ -909,13 +761,16 @@ ops(void *arg) ret = read_row(tinfo, cursor); if (ret == 0) { positioned = true; - SNAP_TRACK(READ, tinfo); + SNAP_TRACK(tinfo, READ); } else READ_OP_FAILED(true); } - /* Optionally reserve a row. */ - if (!readonly && intxn && mmrand(&tinfo->rnd, 0, 20) == 1) { + /* + * Optionally reserve a row. Reserving a row before a read isn't + * all that sensible, but not unexpected, either. + */ + if (intxn && !ckpt_handle && mmrand(&tinfo->rnd, 0, 20) == 1) { switch (g.type) { case ROW: ret = row_reserve(tinfo, cursor, positioned); @@ -958,17 +813,17 @@ ops(void *arg) positioned = false; if (ret == 0) { ++tinfo->insert; - SNAP_TRACK(INSERT, tinfo); + SNAP_TRACK(tinfo, INSERT); } else WRITE_OP_FAILED(false); break; case MODIFY: /* - * Change modify into update if not in a transaction - * or in a read-uncommitted transaction, modify isn't - * supported in those cases. + * Change modify into update if not part of a snapshot + * isolation transaction, modify isn't supported in + * those cases. */ - if (!intxn || iso_config == ISOLATION_READ_UNCOMMITTED) + if (!intxn || iso_config != ISOLATION_SNAPSHOT) goto update_instead_of_chosen_op; ++tinfo->update; @@ -982,7 +837,7 @@ ops(void *arg) } if (ret == 0) { positioned = true; - SNAP_TRACK(MODIFY, tinfo); + SNAP_TRACK(tinfo, MODIFY); } else WRITE_OP_FAILED(true); break; @@ -991,7 +846,7 @@ ops(void *arg) ret = read_row(tinfo, cursor); if (ret == 0) { positioned = true; - SNAP_TRACK(READ, tinfo); + SNAP_TRACK(tinfo, READ); } else READ_OP_FAILED(true); break; @@ -1012,7 +867,7 @@ remove_instead_of_truncate: * Don't set positioned: it's unchanged from the * previous state, but not necessarily set. */ - SNAP_TRACK(REMOVE, tinfo); + SNAP_TRACK(tinfo, REMOVE); } else WRITE_OP_FAILED(true); break; @@ -1084,7 +939,7 @@ remove_instead_of_truncate: positioned = false; if (ret == 0) { ++tinfo->truncate; - SNAP_TRACK(TRUNCATE, tinfo); + SNAP_TRACK(tinfo, TRUNCATE); } else WRITE_OP_FAILED(false); break; @@ -1102,7 +957,7 @@ update_instead_of_chosen_op: } if (ret == 0) { positioned = true; - SNAP_TRACK(UPDATE, tinfo); + SNAP_TRACK(tinfo, UPDATE); } else WRITE_OP_FAILED(false); break; @@ -1136,12 +991,14 @@ update_instead_of_chosen_op: continue; /* - * Ending the transaction. If in snapshot isolation, repeat the - * operations and confirm they're unchanged. + * Ending a transaction. If on a live handle and the transaction + * was configured for snapshot isolation, repeat the operations + * and confirm the results are unchanged. */ - if (snap != NULL) { - ret = snap_check( - cursor, snap_list, snap, tinfo->key, tinfo->value); + if (intxn && !ckpt_handle && iso_config == ISOLATION_SNAPSHOT) { + __wt_yield(); /* Encourage races */ + + ret = snap_repeat_txn(cursor, tinfo); testutil_assert(ret == 0 || ret == WT_ROLLBACK); if (ret == WT_ROLLBACK) goto rollback; @@ -1151,12 +1008,14 @@ update_instead_of_chosen_op: * If prepare configured, prepare the transaction 10% of the * time. */ + prepared = false; if (g.c_prepare && mmrand(&tinfo->rnd, 1, 10) == 1) { - ret = prepare_transaction(tinfo, session); + ret = prepare_transaction(tinfo); if (ret != 0) WRITE_OP_FAILED(false); - __wt_yield(); /* Let other threads proceed. */ + __wt_yield(); /* Encourage races */ + prepared = true; } /* @@ -1165,23 +1024,24 @@ update_instead_of_chosen_op: */ switch (rnd) { case 1: case 2: case 3: case 4: /* 40% */ - commit_transaction(tinfo, session); + commit_transaction(tinfo, prepared); + snap_repeat_update(tinfo, true); break; case 5: /* 10% */ -rollback: rollback_transaction(tinfo, session); +rollback: rollback_transaction(tinfo); + snap_repeat_update(tinfo, false); break; } intxn = false; - snap = NULL; } if (session != NULL) testutil_check(session->close(session, NULL)); - for (i = 0; i < WT_ELEMENTS(snap_list); ++i) { - free(snap_list[i].kdata); - free(snap_list[i].vdata); + for (i = 0; i < WT_ELEMENTS(tinfo->snap_list); ++i) { + free(tinfo->snap_list[i].kdata); + free(tinfo->snap_list[i].vdata); } key_gen_teardown(tinfo->key); val_gen_teardown(tinfo->value); @@ -1208,6 +1068,13 @@ wts_read_scan(void) conn = g.wts_conn; + /* + * We're not configuring transactions or read timestamps, if there's a + * diagnostic check, skip the scan. + */ + if (g.c_assert_read_timestamp) + return; + /* Set up the default key/value buffers. */ key_gen_init(&key); val_gen_init(&value); @@ -1311,36 +1178,20 @@ read_row_worker( } /* Log the operation */ - if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, - session, "%-10s%" PRIu64, "read", keyno); - -#ifdef HAVE_BERKELEY_DB - if (!SINGLETHREADED) - return (ret); - - /* Retrieve the BDB value. */ - { - WT_ITEM bdb_value; - int notfound; - - bdb_read(keyno, &bdb_value.data, &bdb_value.size, ¬found); - - /* Check for not-found status. */ - if (notfound_chk("read_row", ret, notfound, keyno)) - return (ret); + if (ret == 0) + switch (g.type) { + case FIX: + logop(session, "%-10s%" PRIu64 " {0x%02x}", + "read", keyno, ((char *)value->data)[0]); + break; + case ROW: + case VAR: + logop(session, "%-10s%" PRIu64 " {%.*s}", + "read", keyno, + (int)value->size, (char *)value->data); + break; + } - /* Compare the two. */ - if (value->size != bdb_value.size || - memcmp(value->data, bdb_value.data, value->size) != 0) { - fprintf(stderr, - "read_row: value mismatch %" PRIu64 ":\n", keyno); - print_item("bdb", &bdb_value); - print_item(" wt", value); - testutil_die(0, NULL); - } - } -#endif return (ret); } @@ -1372,7 +1223,7 @@ nextprev(TINFO *tinfo, WT_CURSOR *cursor, bool next) bool incrementing, record_gaps; keyno = 0; - which = next ? "WT_CURSOR.next" : "WT_CURSOR.prev"; + which = next ? "next" : "prev"; switch (ret = read_op(cursor, next ? NEXT : PREV, NULL)) { case 0: @@ -1482,8 +1333,9 @@ order_error_row: testutil_die(0, "%s returned {%.*s} then {%.*s}", which, - (int)tinfo->key->size, tinfo->key->data, - (int)key.size, key.data); + (int)tinfo->key->size, + (char *)tinfo->key->data, + (int)key.size, (char *)key.data); } testutil_check(__wt_buf_set((WT_SESSION_IMPL *) @@ -1497,69 +1349,24 @@ order_error_row: return (ret); } - if (ret == 0 && g.logging == LOG_OPS) + if (ret == 0) switch (g.type) { case FIX: - (void)g.wt_api->msg_printf(g.wt_api, - cursor->session, "%-10s%" PRIu64 " {0x%02x}", + logop(cursor->session, "%-10s%" PRIu64 " {0x%02x}", which, keyno, ((char *)value.data)[0]); break; case ROW: - (void)g.wt_api->msg_printf(g.wt_api, - cursor->session, "%-10s{%.*s}, {%.*s}", - which, (int)key.size, (char *)key.data, + logop(cursor->session, + "%-10s%" PRIu64 " {%.*s}, {%.*s}", which, keyno, + (int)key.size, (char *)key.data, (int)value.size, (char *)value.data); break; case VAR: - (void)g.wt_api->msg_printf(g.wt_api, - cursor->session, "%-10s%" PRIu64 " {%.*s}", + logop(cursor->session, "%-10s%" PRIu64 " {%.*s}", which, keyno, (int)value.size, (char *)value.data); break; } -#ifdef HAVE_BERKELEY_DB - if (!SINGLETHREADED) - return (ret); - - { - WT_ITEM bdb_key, bdb_value; - int notfound; - char *p; - - /* Retrieve the BDB key/value. */ - bdb_np(next, &bdb_key.data, &bdb_key.size, - &bdb_value.data, &bdb_value.size, ¬found); - if (notfound_chk( - next ? "nextprev(next)" : "nextprev(prev)", ret, notfound, keyno)) - return (ret); - - /* Compare the two. */ - if ((g.type == ROW && - (key.size != bdb_key.size || - memcmp(key.data, bdb_key.data, key.size) != 0)) || - (g.type != ROW && keyno != (uint64_t)atoll(bdb_key.data))) { - fprintf(stderr, "nextprev: %s KEY mismatch:\n", which); - goto mismatch; - } - if (value.size != bdb_value.size || - memcmp(value.data, bdb_value.data, value.size) != 0) { - fprintf(stderr, "nextprev: %s VALUE mismatch:\n", which); -mismatch: if (g.type == ROW) { - print_item("bdb-key", &bdb_key); - print_item(" wt-key", &key); - } else { - if ((p = (char *)strchr(bdb_key.data, '.')) != NULL) - *p = '\0'; - fprintf(stderr, - "\t" "bdb-key %.*s != wt-key %" PRIu64 "\n", - (int)bdb_key.size, (char *)bdb_key.data, keyno); - } - print_item("bdb-value", &bdb_value); - print_item(" wt-value", &value); - testutil_die(0, NULL); - } - } -#endif return (ret); } @@ -1580,10 +1387,9 @@ row_reserve(TINFO *tinfo, WT_CURSOR *cursor, bool positioned) if ((ret = cursor->reserve(cursor)) != 0) return (ret); - if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, cursor->session, - "%-10s{%.*s}", "reserve", - (int)tinfo->key->size, tinfo->key->data); + logop(cursor->session, + "%-10s%" PRIu64 " {%.*s}", "reserve", + tinfo->keyno, (int)tinfo->key->size, (char *)tinfo->key->data); return (0); } @@ -1603,9 +1409,7 @@ col_reserve(TINFO *tinfo, WT_CURSOR *cursor, bool positioned) if ((ret = cursor->reserve(cursor)) != 0) return (ret); - if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, cursor->session, - "%-10s%" PRIu64, "reserve", tinfo->keyno); + logop(cursor->session, "%-10s%" PRIu64, "reserve", tinfo->keyno); return (0); } @@ -1658,21 +1462,11 @@ row_modify(TINFO *tinfo, WT_CURSOR *cursor, bool positioned) testutil_check(cursor->get_value(cursor, tinfo->value)); - if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, cursor->session, - "%-10s{%.*s}, {%.*s}", - "modify", - (int)tinfo->key->size, tinfo->key->data, - (int)tinfo->value->size, tinfo->value->data); + logop(cursor->session, "%-10s%" PRIu64 " {%.*s}, {%.*s}", "modify", + tinfo->keyno, + (int)tinfo->key->size, (char *)tinfo->key->data, + (int)tinfo->value->size, (char *)tinfo->value->data); -#ifdef HAVE_BERKELEY_DB - if (!SINGLETHREADED) - return (0); - - bdb_update( - tinfo->key->data, tinfo->key->size, - tinfo->value->data, tinfo->value->size); -#endif return (0); } @@ -1696,22 +1490,9 @@ col_modify(TINFO *tinfo, WT_CURSOR *cursor, bool positioned) testutil_check(cursor->get_value(cursor, tinfo->value)); - if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, cursor->session, - "%-10s%" PRIu64 ", {%.*s}", - "modify", - tinfo->keyno, - (int)tinfo->value->size, tinfo->value->data); - -#ifdef HAVE_BERKELEY_DB - if (!SINGLETHREADED) - return (0); + logop(cursor->session, "%-10s%" PRIu64 ", {%.*s}", "modify", + tinfo->keyno, (int)tinfo->value->size, (char *)tinfo->value->data); - key_gen(tinfo->key, tinfo->keyno); - bdb_update( - tinfo->key->data, tinfo->key->size, - tinfo->value->data, tinfo->value->size); -#endif return (0); } @@ -1759,16 +1540,9 @@ row_truncate(TINFO *tinfo, WT_CURSOR *cursor) if (ret != 0) return (ret); - if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, session, - "%-10s%" PRIu64 ", %" PRIu64, - "truncate", - tinfo->keyno, tinfo->last); + logop(session, "%-10s%" PRIu64 ", %" PRIu64, + "truncate", tinfo->keyno, tinfo->last); -#ifdef HAVE_BERKELEY_DB - if (SINGLETHREADED) - bdb_truncate(tinfo->keyno, tinfo->last); -#endif return (0); } @@ -1811,16 +1585,9 @@ col_truncate(TINFO *tinfo, WT_CURSOR *cursor) if (ret != 0) return (ret); - if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, session, - "%-10s%" PRIu64 "-%" PRIu64, - "truncate", - tinfo->keyno, tinfo->last); + logop(session, + "%-10s%" PRIu64 "-%" PRIu64, "truncate", tinfo->keyno, tinfo->last); -#ifdef HAVE_BERKELEY_DB - if (SINGLETHREADED) - bdb_truncate(tinfo->keyno, tinfo->last); -#endif return (0); } @@ -1843,19 +1610,11 @@ row_update(TINFO *tinfo, WT_CURSOR *cursor, bool positioned) if ((ret = cursor->update(cursor)) != 0) return (ret); - if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, cursor->session, - "%-10s{%.*s}, {%.*s}", - "put", - (int)tinfo->key->size, tinfo->key->data, - (int)tinfo->value->size, tinfo->value->data); - -#ifdef HAVE_BERKELEY_DB - if (SINGLETHREADED) - bdb_update( - tinfo->key->data, tinfo->key->size, - tinfo->value->data, tinfo->value->size); -#endif + logop(cursor->session, "%-10s%" PRIu64 " {%.*s}, {%.*s}", "update", + tinfo->keyno, + (int)tinfo->key->size, (char *)tinfo->key->data, + (int)tinfo->value->size, (char *)tinfo->value->data); + return (0); } @@ -1879,28 +1638,14 @@ col_update(TINFO *tinfo, WT_CURSOR *cursor, bool positioned) if ((ret = cursor->update(cursor)) != 0) return (ret); - if (g.logging == LOG_OPS) { - if (g.type == FIX) - (void)g.wt_api->msg_printf(g.wt_api, cursor->session, - "%-10s%" PRIu64 " {0x%02" PRIx8 "}", - "update", tinfo->keyno, - ((uint8_t *)tinfo->value->data)[0]); - else - (void)g.wt_api->msg_printf(g.wt_api, cursor->session, - "%-10s%" PRIu64 " {%.*s}", - "update", tinfo->keyno, - (int)tinfo->value->size, - (char *)tinfo->value->data); - } + if (g.type == FIX) + logop(cursor->session, "%-10s%" PRIu64 " {0x%02" PRIx8 "}", + "update", tinfo->keyno, ((uint8_t *)tinfo->value->data)[0]); + else + logop(cursor->session, "%-10s%" PRIu64 " {%.*s}", + "update", tinfo->keyno, + (int)tinfo->value->size, (char *)tinfo->value->data); -#ifdef HAVE_BERKELEY_DB - if (SINGLETHREADED) { - key_gen(tinfo->key, tinfo->keyno); - bdb_update( - tinfo->key->data, tinfo->key->size, - tinfo->value->data, tinfo->value->size); - } -#endif return (0); } @@ -2027,19 +1772,11 @@ row_insert(TINFO *tinfo, WT_CURSOR *cursor, bool positioned) return (ret); /* Log the operation */ - if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, cursor->session, - "%-10s{%.*s}, {%.*s}", - "insert", - (int)tinfo->key->size, tinfo->key->data, - (int)tinfo->value->size, tinfo->value->data); - -#ifdef HAVE_BERKELEY_DB - if (SINGLETHREADED) - bdb_update( - tinfo->key->data, tinfo->key->size, - tinfo->value->data, tinfo->value->size); -#endif + logop(cursor->session, "%-10s%" PRIu64 " {%.*s}, {%.*s}", "insert", + tinfo->keyno, + (int)tinfo->key->size, (char *)tinfo->key->data, + (int)tinfo->value->size, (char *)tinfo->value->data); + return (0); } @@ -2065,28 +1802,14 @@ col_insert(TINFO *tinfo, WT_CURSOR *cursor) table_append(tinfo->keyno); /* Extend the object. */ - if (g.logging == LOG_OPS) { - if (g.type == FIX) - (void)g.wt_api->msg_printf(g.wt_api, cursor->session, - "%-10s%" PRIu64 " {0x%02" PRIx8 "}", - "insert", tinfo->keyno, - ((uint8_t *)tinfo->value->data)[0]); - else - (void)g.wt_api->msg_printf(g.wt_api, cursor->session, - "%-10s%" PRIu64 " {%.*s}", - "insert", tinfo->keyno, - (int)tinfo->value->size, - (char *)tinfo->value->data); - } + if (g.type == FIX) + logop(cursor->session, "%-10s%" PRIu64 " {0x%02" PRIx8 "}", + "insert", tinfo->keyno, ((uint8_t *)tinfo->value->data)[0]); + else + logop(cursor->session, "%-10s%" PRIu64 " {%.*s}", + "insert", tinfo->keyno, + (int)tinfo->value->size, (char *)tinfo->value->data); -#ifdef HAVE_BERKELEY_DB - if (SINGLETHREADED) { - key_gen(tinfo->key, tinfo->keyno); - bdb_update( - tinfo->key->data, tinfo->key->size, - tinfo->value->data, tinfo->value->size); - } -#endif return (0); } @@ -2111,18 +1834,8 @@ row_remove(TINFO *tinfo, WT_CURSOR *cursor, bool positioned) if (ret != 0 && ret != WT_NOTFOUND) return (ret); - if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, - cursor->session, "%-10s%" PRIu64, "remove", tinfo->keyno); - -#ifdef HAVE_BERKELEY_DB - if (SINGLETHREADED) { - int notfound; + logop(cursor->session, "%-10s%" PRIu64, "remove", tinfo->keyno); - bdb_remove(tinfo->keyno, ¬found); - (void)notfound_chk("row_remove", ret, notfound, tinfo->keyno); - } -#endif return (ret); } @@ -2145,49 +1858,7 @@ col_remove(TINFO *tinfo, WT_CURSOR *cursor, bool positioned) if (ret != 0 && ret != WT_NOTFOUND) return (ret); - if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, - cursor->session, "%-10s%" PRIu64, "remove", tinfo->keyno); - -#ifdef HAVE_BERKELEY_DB - if (SINGLETHREADED) { - int notfound; + logop(cursor->session, "%-10s%" PRIu64, "remove", tinfo->keyno); - bdb_remove(tinfo->keyno, ¬found); - (void)notfound_chk("col_remove", ret, notfound, tinfo->keyno); - } -#endif return (ret); } - -#ifdef HAVE_BERKELEY_DB -/* - * notfound_chk -- - * Compare notfound returns for consistency. - */ -static int -notfound_chk(const char *f, int wt_ret, int bdb_notfound, uint64_t keyno) -{ - /* Check for not found status. */ - if (bdb_notfound && wt_ret == WT_NOTFOUND) - return (1); - - if (bdb_notfound) { - fprintf(stderr, "%s: %s:", progname, f); - if (keyno != 0) - fprintf(stderr, " row %" PRIu64 ":", keyno); - fprintf(stderr, - " not found in Berkeley DB, found in WiredTiger\n"); - testutil_die(0, NULL); - } - if (wt_ret == WT_NOTFOUND) { - fprintf(stderr, "%s: %s:", progname, f); - if (keyno != 0) - fprintf(stderr, " row %" PRIu64 ":", keyno); - fprintf(stderr, - " found in Berkeley DB, not found in WiredTiger\n"); - testutil_die(0, NULL); - } - return (0); -} -#endif diff --git a/src/third_party/wiredtiger/test/format/rebalance.c b/src/third_party/wiredtiger/test/format/rebalance.c index 6539afd53eb..0a845e1b2fb 100644 --- a/src/third_party/wiredtiger/test/format/rebalance.c +++ b/src/third_party/wiredtiger/test/format/rebalance.c @@ -51,16 +51,12 @@ wts_rebalance(void) wts_reopen(); conn = g.wts_conn; testutil_check(conn->open_session(conn, NULL, NULL, &session)); - if (g.logging != 0) - (void)g.wt_api->msg_printf(g.wt_api, session, - "=============== rebalance start ==============="); + logop(session, "%s", "=============== rebalance start"); testutil_checkfmt( session->rebalance(session, g.uri, NULL), "%s", g.uri); - if (g.logging != 0) - (void)g.wt_api->msg_printf(g.wt_api, session, - "=============== rebalance stop ==============="); + logop(session, "%s", "=============== rebalance stop"); testutil_check(session->close(session, NULL)); wts_verify("post-rebalance verify"); diff --git a/src/third_party/wiredtiger/test/format/s_dumpcmp.sh b/src/third_party/wiredtiger/test/format/s_dumpcmp.sh deleted file mode 100755 index 453d42cda32..00000000000 --- a/src/third_party/wiredtiger/test/format/s_dumpcmp.sh +++ /dev/null @@ -1,67 +0,0 @@ -#! /bin/sh - -trap 'exit 1' 1 2 - -top=../.. - -home="RUNDIR" -wturi="file:wt" - -colflag=0 -bdbdir="" -while : - do case "$1" in - # -b means we need to dump the Berkeley DB database - -b) - bdbdir="$2"; - shift ; shift ;; - # -c means it was a column-store. - -c) - colflag=1 - shift ;; - -h) - shift ; - home=$1 - shift;; - -n) - shift ; - wturi=$1 - shift ;; - *) - break ;; - esac -done - -if test $# -ne 0; then - echo 'usage: s_dumpcmp [-bc]' >&2 - exit 1 -fi - -$top/wt -h $home dump $wturi | - sed -e '1,/^Data$/d' > $home/wt_dump - -if test "X$bdbdir" = "X"; then - exit 0 -fi - -if test $colflag -eq 0; then - $bdbdir/bin/db_dump -p $home/bdb | - sed -e '1,/HEADER=END/d' \ - -e '/DATA=END/d' \ - -e 's/^ //' > $home/bdb_dump -else - # Format stores record numbers in Berkeley DB as string keys, - # it's simpler that way. Convert record numbers from strings - # to numbers. - $bdbdir/bin/db_dump -p $home/bdb | - sed -e '1,/HEADER=END/d' \ - -e '/DATA=END/d' \ - -e 's/^ //' | - sed -e 's/^0*//' \ - -e 's/\.00$//' \ - -e N > $home/bdb_dump -fi - -cmp $home/wt_dump $home/bdb_dump > /dev/null - -exit $? diff --git a/src/third_party/wiredtiger/test/format/salvage.c b/src/third_party/wiredtiger/test/format/salvage.c index b7dc9d43201..f6ce1d3ca5c 100644 --- a/src/third_party/wiredtiger/test/format/salvage.c +++ b/src/third_party/wiredtiger/test/format/salvage.c @@ -144,10 +144,6 @@ wts_salvage(void) { WT_DECL_RET; - /* Some data-sources don't support salvage. */ - if (DATASOURCE("kvsbdb")) - return; - if (g.c_salvage == 0) return; @@ -164,15 +160,6 @@ wts_salvage(void) wts_verify("post-salvage verify"); wts_close(); - /* - * If no records were deleted, dump and compare against Berkeley DB. - * (The problem with deleting records is salvage restores deleted - * records if a page splits leaving a deleted record on one side of - * the split, so we cannot depend on correctness in that case.) - */ - if (g.c_delete_pct == 0) - wts_dump("salvage", SINGLETHREADED); - /* Corrupt the file randomly, salvage, then verify. */ if (corrupt()) { wts_open(g.home, true, &g.wts_conn); diff --git a/src/third_party/wiredtiger/test/format/snap.c b/src/third_party/wiredtiger/test/format/snap.c new file mode 100644 index 00000000000..a1853c56db9 --- /dev/null +++ b/src/third_party/wiredtiger/test/format/snap.c @@ -0,0 +1,495 @@ +/*- + * Public Domain 2014-2019 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "format.h" + +/* + * snap_track -- + * Add a single snapshot isolation returned value to the list. + */ +void +snap_track(TINFO *tinfo, thread_op op) +{ + WT_ITEM *ip; + SNAP_OPS *snap; + + snap = tinfo->snap; + snap->op = op; + snap->keyno = tinfo->keyno; + snap->ts = WT_TS_NONE; + snap->repeatable = false; + snap->last = op == TRUNCATE ? tinfo->last : 0; + + if (op == INSERT && g.type == ROW) { + ip = tinfo->key; + if (snap->kmemsize < ip->size) { + snap->kdata = drealloc(snap->kdata, ip->size); + snap->kmemsize = ip->size; + } + memcpy(snap->kdata, ip->data, snap->ksize = ip->size); + } + + if (op != REMOVE && op != TRUNCATE) { + ip = tinfo->value; + if (snap->vmemsize < ip->size) { + snap->vdata = drealloc(snap->vdata, ip->size); + snap->vmemsize = ip->size; + } + memcpy(snap->vdata, ip->data, snap->vsize = ip->size); + } + + /* + * Move to the next slot, wrap at the end of the circular buffer. + * + * It's possible to pass this transaction's buffer starting point and + * start replacing our own entries. That's OK, we just skip earlier + * operations when we check. + */ + if (++tinfo->snap >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)]) + tinfo->snap = tinfo->snap_list; +} + +/* + * snap_verify -- + * Repeat a read and verify the contents. + */ +static int +snap_verify(WT_CURSOR *cursor, TINFO *tinfo, SNAP_OPS *snap) +{ + WT_DECL_RET; + WT_ITEM *key, *value; + uint8_t bitfield; + + key = tinfo->key; + value = tinfo->value; + + /* + * Retrieve the key/value pair by key. Row-store inserts have a unique + * generated key we saved, else generate the key from the key number. + */ + if (snap->op == INSERT && g.type == ROW) { + key->data = snap->kdata; + key->size = snap->ksize; + cursor->set_key(cursor, key); + } else { + switch (g.type) { + case FIX: + case VAR: + cursor->set_key(cursor, snap->keyno); + break; + case ROW: + key_gen(key, snap->keyno); + cursor->set_key(cursor, key); + break; + } + } + + switch (ret = read_op(cursor, SEARCH, NULL)) { + case 0: + if (g.type == FIX) { + testutil_check(cursor->get_value(cursor, &bitfield)); + *(uint8_t *)(value->data) = bitfield; + value->size = 1; + } else + testutil_check(cursor->get_value(cursor, value)); + break; + case WT_NOTFOUND: + break; + default: + return (ret); + } + + /* Check for simple matches. */ + if (ret == 0 && + snap->op != REMOVE && snap->op != TRUNCATE && + value->size == snap->vsize && + memcmp(value->data, snap->vdata, value->size) == 0) + return (0); + if (ret == WT_NOTFOUND && (snap->op == REMOVE || snap->op == TRUNCATE)) + return (0); + + /* + * In fixed length stores, zero values at the end of the key space are + * returned as not-found, and not-found row reads are saved as zero + * values. Map back-and-forth for simplicity. + */ + if (g.type == FIX) { + if (ret == WT_NOTFOUND && + snap->vsize == 1 && *(uint8_t *)snap->vdata == 0) + return (0); + if ((snap->op == REMOVE || snap->op == TRUNCATE) && + value->size == 1 && *(uint8_t *)value->data == 0) + return (0); + } + + /* Things went pear-shaped. */ + switch (g.type) { + case FIX: + testutil_die(ret, + "snapshot-isolation: %" PRIu64 " search: " + "expected {0x%02x}, found {0x%02x}", + snap->keyno, + snap->op == REMOVE ? 0 : *(uint8_t *)snap->vdata, + ret == WT_NOTFOUND ? 0 : *(uint8_t *)value->data); + /* NOTREACHED */ + case ROW: + fprintf(stderr, + "snapshot-isolation %.*s search mismatch\n", + (int)key->size, (char *)key->data); + + if (snap->op == REMOVE) + fprintf(stderr, "expected {deleted}\n"); + else + print_item_data("expected", snap->vdata, snap->vsize); + if (ret == WT_NOTFOUND) + fprintf(stderr, " found {deleted}\n"); + else + print_item_data(" found", value->data, value->size); + + testutil_die(ret, + "snapshot-isolation: %.*s search mismatch", + (int)key->size, (char *)key->data); + /* NOTREACHED */ + case VAR: + fprintf(stderr, + "snapshot-isolation %" PRIu64 " search mismatch\n", + snap->keyno); + + if (snap->op == REMOVE) + fprintf(stderr, "expected {deleted}\n"); + else + print_item_data("expected", snap->vdata, snap->vsize); + if (ret == WT_NOTFOUND) + fprintf(stderr, " found {deleted}\n"); + else + print_item_data(" found", value->data, value->size); + + testutil_die(ret, + "snapshot-isolation: %" PRIu64 " search mismatch", + snap->keyno); + /* NOTREACHED */ + } + + /* NOTREACHED */ + return (1); +} + +/* + * snap_ts_clear -- + * Clear snapshots at or before a specified timestamp. + */ +static void +snap_ts_clear(TINFO *tinfo, uint64_t ts) +{ + SNAP_OPS *snap; + int count; + + /* Check from the first operation to the last. */ + for (snap = tinfo->snap_list, + count = WT_ELEMENTS(tinfo->snap_list); count > 0; --count, ++snap) + if (snap->repeatable && snap->ts <= ts) + snap->repeatable = false; +} + +/* + * snap_repeat_ok_match -- + * Compare two operations and see if they modified the same record. + */ +static bool +snap_repeat_ok_match(SNAP_OPS *current, SNAP_OPS *a) +{ + /* Reads are never a problem, there's no modification. */ + if (a->op == READ) + return (true); + + /* Check for a matching single record modification. */ + if (a->keyno == current->keyno) + return (false); + + /* Truncates are slightly harder, make sure the ranges don't overlap. */ + if (a->op == TRUNCATE) { + if (g.c_reverse && + (a->keyno == 0 || a->keyno >= current->keyno) && + (a->last == 0 || a->last <= current->keyno)) + return (false); + if (!g.c_reverse && + (a->keyno == 0 || a->keyno <= current->keyno) && + (a->last == 0 || a->last >= current->keyno)) + return (false); + } + + return (true); +} + +/* + * snap_repeat_ok_commit -- + * Return if an operation in the transaction can be repeated, where the + * transaction isn't yet committed (so all locks are in place), or has already + * committed successfully. + */ +static bool +snap_repeat_ok_commit( + TINFO *tinfo, SNAP_OPS *current, SNAP_OPS *first, SNAP_OPS *last) +{ + SNAP_OPS *p; + + /* + * For updates, check for subsequent changes to the record and don't + * repeat the read. For reads, check for either subsequent or previous + * changes to the record and don't repeat the read. (The reads are + * repeatable, but only at the commit timestamp, and the update will + * do the repeatable read in that case.) + */ + for (p = current;;) { + /* + * Wrap at the end of the circular buffer; "last" is the element + * after the last element we want to test. + */ + if (++p >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)]) + p = tinfo->snap_list; + if (p == last) + break; + + if (!snap_repeat_ok_match(current, p)) + return (false); + } + + if (current->op != READ) + return (true); + for (p = current;;) { + /* + * Wrap at the beginning of the circular buffer; "first" is the + * last element we want to test. + */ + if (p == first) + return (true); + if (--p < tinfo->snap_list) + p = &tinfo->snap_list[ + WT_ELEMENTS(tinfo->snap_list) - 1]; + + if (!snap_repeat_ok_match(current, p)) + return (false); + + } + /* NOTREACHED */ +} + +/* + * snap_repeat_ok_rollback -- + * Return if an operation in the transaction can be repeated, after a + * transaction has rolled back. + */ +static bool +snap_repeat_ok_rollback(TINFO *tinfo, SNAP_OPS *current, SNAP_OPS *first) +{ + SNAP_OPS *p; + + /* Ignore update operations, they can't be repeated after rollback. */ + if (current->op != READ) + return (false); + + /* + * Check for previous changes to the record and don't attempt to repeat + * the read in that case. + */ + for (p = current;;) { + /* + * Wrap at the beginning of the circular buffer; "first" is the + * last element we want to test. + */ + if (p == first) + return (true); + if (--p < tinfo->snap_list) + p = &tinfo->snap_list[ + WT_ELEMENTS(tinfo->snap_list) - 1]; + + if (!snap_repeat_ok_match(current, p)) + return (false); + + } + /* NOTREACHED */ +} + +/* + * snap_repeat_txn -- + * Repeat each operation done within a snapshot isolation transaction. + */ +int +snap_repeat_txn(WT_CURSOR *cursor, TINFO *tinfo) +{ + SNAP_OPS *current, *stop; + + /* Check from the first operation we saved to the last. */ + for (current = tinfo->snap_first, stop = tinfo->snap;; ++current) { + /* Wrap at the end of the circular buffer. */ + if (current >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)]) + current = tinfo->snap_list; + if (current == stop) + break; + + /* + * We don't test all of the records in a truncate range, only + * the first because that matches the rest of the isolation + * checks. If a truncate range was from the start of the table, + * switch to the record at the end. This is done in the first + * routine that considers if operations are repeatable, and the + * rest of those functions depend on it already being done. + */ + if (current->op == TRUNCATE && current->keyno == 0) { + current->keyno = current->last; + testutil_assert(current->keyno != 0); + } + + if (snap_repeat_ok_commit( + tinfo, current, tinfo->snap_first, stop)) + WT_RET(snap_verify(cursor, tinfo, current)); + } + + return (0); +} + +/* + * snap_repeat_update -- + * Update the list of snapshot operations based on final transaction + * resolution. + */ +void +snap_repeat_update(TINFO *tinfo, bool committed) +{ + SNAP_OPS *start, *stop; + + /* + * Check from the first operation we saved to the last. It's possible + * to update none at all if we did exactly the number of operations + * in the circular buffer, it will look like we didn't do any. That's + * OK, it's a big enough buffer that it's not going to matter. + */ + for (start = tinfo->snap_first, stop = tinfo->snap;; ++start) { + /* Wrap at the end of the circular buffer. */ + if (start >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)]) + start = tinfo->snap_list; + if (start == stop) + break; + + /* + * First, reads may simply not be repeatable because the read + * timestamp chosen wasn't older than all concurrently running + * uncommitted updates. + */ + if (!tinfo->repeatable_reads && start->op == READ) + continue; + + /* + * Second, check based on the transaction resolution (the rules + * are different if the transaction committed or rolled back). + */ + start->repeatable = committed ? snap_repeat_ok_commit( + tinfo, start, tinfo->snap_first, stop) : + snap_repeat_ok_rollback(tinfo, start, tinfo->snap_first); + + /* + * Repeat reads at the transaction's read timestamp and updates + * at the commit timestamp. + */ + if (start->repeatable) + start->ts = start->op == READ ? + tinfo->read_ts : tinfo->commit_ts; + } +} + +/* + * snap_repeat_single -- + * Repeat an historic operation. + */ +void +snap_repeat_single(WT_CURSOR *cursor, TINFO *tinfo) +{ + SNAP_OPS *snap; + WT_DECL_RET; + WT_SESSION *session; + int count; + u_int v; + char buf[64]; + + session = cursor->session; + + /* + * Start at a random spot in the list of operations and look for a read + * to retry. Stop when we've walked the entire list or found one. + */ + v = mmrand(&tinfo->rnd, 1, WT_ELEMENTS(tinfo->snap_list)) - 1; + for (snap = &tinfo->snap_list[v], + count = WT_ELEMENTS(tinfo->snap_list); count > 0; --count, ++snap) { + /* Wrap at the end of the circular buffer. */ + if (snap >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)]) + snap = tinfo->snap_list; + + if (snap->repeatable) + break; + } + + if (count == 0) + return; + + /* + * Start a new transaction. + * Set the read timestamp. + * Verify the record. + * Discard the transaction. + */ + while ((ret = session->begin_transaction( + session, "isolation=snapshot")) == WT_CACHE_FULL) + __wt_yield(); + testutil_check(ret); + + /* + * If the timestamp has aged out of the system, we'll get EINVAL when we + * try and set it. + */ + testutil_check(__wt_snprintf( + buf, sizeof(buf), "read_timestamp=%" PRIx64, snap->ts)); + + ret = session->timestamp_transaction(session, buf); + if (ret == 0) { + logop(session, "%-10s%" PRIu64 " ts=%" PRIu64 " {%.*s}", + "repeat", snap->keyno, snap->ts, + (int)snap->vsize, (char *)snap->vdata); + + /* The only expected error is rollback. */ + ret = snap_verify(cursor, tinfo, snap); + + if (ret != 0 && ret != WT_ROLLBACK) + testutil_check(ret); + } else if (ret == EINVAL) + snap_ts_clear(tinfo, snap->ts); + else + testutil_check(ret); + + /* Discard the transaction. */ + testutil_check(session->rollback_transaction(session, NULL)); +} diff --git a/src/third_party/wiredtiger/test/format/t.c b/src/third_party/wiredtiger/test/format/t.c index 7cdd9e38a2f..84175ba53d6 100644 --- a/src/third_party/wiredtiger/test/format/t.c +++ b/src/third_party/wiredtiger/test/format/t.c @@ -94,7 +94,7 @@ main(int argc, char *argv[]) home = NULL; onerun = 0; while ((ch = __wt_getopt( - progname, argc, argv, "1C:c:h:Llqrt:")) != EOF) + progname, argc, argv, "1C:c:h:lqrt:")) != EOF) switch (ch) { case '1': /* One run */ onerun = 1; @@ -108,16 +108,8 @@ main(int argc, char *argv[]) case 'h': home = __wt_optarg; break; - case 'L': /* Re-direct output to a log */ - /* - * The -l option is a superset of -L, ignore -L if we - * have already configured logging for operations. - */ - if (g.logging == 0) - g.logging = LOG_FILE; - break; - case 'l': /* Turn on operation logging */ - g.logging = LOG_OPS; + case 'l': /* Log operations to a file */ + g.logging = true; break; case 'q': /* Quiet */ g.c_quiet = 1; @@ -162,7 +154,7 @@ main(int argc, char *argv[]) * the base configuration. */ for (; *argv != NULL; ++argv) - config_single(*argv, 1); + config_single(*argv, true); /* * Multithreaded runs can be replayed: it's useful and we'll get the @@ -200,17 +192,13 @@ main(int argc, char *argv[]) startup(); /* Start a run */ config_setup(); /* Run configuration */ - config_print(0); /* Dump run configuration */ + config_print(false); /* Dump run configuration */ key_init(); /* Setup keys/values */ val_init(); start = time(NULL); track("starting up", 0ULL, NULL); -#ifdef HAVE_BERKELEY_DB - if (SINGLETHREADED) - bdb_open(); /* Initial file config */ -#endif wts_open(g.home, true, &g.wts_conn); wts_init(); @@ -248,10 +236,6 @@ main(int argc, char *argv[]) } track("shutting down", 0ULL, NULL); -#ifdef HAVE_BERKELEY_DB - if (SINGLETHREADED) - bdb_close(); -#endif wts_close(); /* @@ -260,13 +244,6 @@ main(int argc, char *argv[]) wts_rebalance(); /* - * If single-threaded, we can dump and compare the WiredTiger - * and Berkeley DB data sets. - */ - if (SINGLETHREADED) - wts_dump("standard", 1); - - /* * Salvage testing. */ wts_salvage(); @@ -286,7 +263,7 @@ main(int argc, char *argv[]) fclose_and_clear(&g.logfp); fclose_and_clear(&g.randfp); - config_print(0); + config_print(false); testutil_check(pthread_rwlock_destroy(&g.append_lock)); testutil_check(pthread_rwlock_destroy(&g.backup_lock)); @@ -316,7 +293,7 @@ startup(void) testutil_die(ret, "home directory initialization failed"); /* Open/truncate the logging file. */ - if (g.logging != 0 && (g.logfp = fopen(g.home_log, "w")) == NULL) + if (g.logging && (g.logfp = fopen(g.home_log, "w")) == NULL) testutil_die(errno, "fopen: %s", g.home_log); /* Open/truncate the random number logging file. */ @@ -331,25 +308,31 @@ startup(void) static void format_die(void) { + /* - * Single-thread error handling, our caller exits after calling - * us - don't release the lock. + * Turn off tracking and logging so we don't obscure the error message. + * The lock we're about to acquire will act as a barrier to flush the + * writes. This is really a "best effort" more than a guarantee, there's + * too much stuff in flight to be sure. */ - (void)pthread_rwlock_wrlock(&g.death_lock); + g.c_quiet = 1; + g.logging = false; - /* Try and turn off tracking so it doesn't obscure the error message. */ - if (!g.c_quiet) { - g.c_quiet = 1; - fprintf(stderr, "\n"); - } + /* + * Single-thread error handling, our caller exits after calling us (we + * never release the lock). + */ + (void)pthread_rwlock_wrlock(&g.death_lock); /* Flush/close any logging information. */ fclose_and_clear(&g.logfp); fclose_and_clear(&g.randfp); + fprintf(stderr, "\n"); + /* Display the configuration that failed. */ if (g.run_cnt) - config_print(1); + config_print(true); } /* @@ -360,7 +343,7 @@ static void usage(void) { fprintf(stderr, - "usage: %s [-1Llqr] [-C wiredtiger-config]\n " + "usage: %s [-1lqr] [-C wiredtiger-config]\n " "[-c config-file] [-h home] [name=value ...]\n", progname); fprintf(stderr, "%s", @@ -368,8 +351,7 @@ usage(void) "\t-C specify wiredtiger_open configuration arguments\n" "\t-c read test program configuration from a file\n" "\t-h home (default 'RUNDIR')\n" - "\t-L output to a log file\n" - "\t-l log operations (implies -L)\n" + "\t-l log operations to a file\n" "\t-q run quietly\n" "\t-r replay the last run\n"); diff --git a/src/third_party/wiredtiger/test/format/util.c b/src/third_party/wiredtiger/test/format/util.c index 874d99cd167..6f7783a3a32 100644 --- a/src/third_party/wiredtiger/test/format/util.c +++ b/src/third_party/wiredtiger/test/format/util.c @@ -104,8 +104,8 @@ key_gen_common(WT_ITEM *key, uint64_t keyno, const char * const suffix) len = 13; /* - * In a column-store, the key is only used for Berkeley DB inserts, - * and so it doesn't need a random length. + * In a column-store, the key isn't used, it doesn't need a random + * length. */ if (g.type == ROW) { p[len] = '/'; @@ -348,15 +348,9 @@ path_setup(const char *home) testutil_check(__wt_snprintf( g.home_stats, len, "%s/%s", g.home, "stats")); - /* BDB directory. */ - len = strlen(g.home) + strlen("bdb") + 2; - g.home_bdb = dmalloc(len); - testutil_check(__wt_snprintf(g.home_bdb, len, "%s/%s", g.home, "bdb")); - /* * Home directory initialize command: create the directory if it doesn't - * exist, else remove everything except the RNG log file, create the KVS - * subdirectory. + * exist, else remove everything except the RNG log file. * * Redirect the "cd" command to /dev/null so chatty cd implementations * don't add the new working directory to our output. @@ -366,16 +360,14 @@ path_setup(const char *home) #define CMD "del /q rand.copy & " \ "(IF EXIST %s\\rand copy /y %s\\rand rand.copy) & " \ "(IF EXIST %s rd /s /q %s) & mkdir %s & " \ - "(IF EXIST rand.copy copy rand.copy %s\\rand) & " \ - "cd %s & mkdir KVS" + "(IF EXIST rand.copy copy rand.copy %s\\rand)" len = strlen(g.home) * 7 + strlen(CMD) + 1; g.home_init = dmalloc(len); testutil_check(__wt_snprintf(g.home_init, len, CMD, g.home, g.home, g.home, g.home, g.home, g.home, g.home)); #else #define CMD "test -e %s || mkdir %s; " \ - "cd %s > /dev/null && rm -rf `ls | sed /rand/d`; " \ - "mkdir KVS" + "cd %s > /dev/null && rm -rf `ls | sed /rand/d`" len = strlen(g.home) * 3 + strlen(CMD) + 1; g.home_init = dmalloc(len); testutil_check(__wt_snprintf( @@ -536,7 +528,7 @@ checkpoint(void *arg) */ ckpt_config = NULL; backup_locked = false; - if (!DATASOURCE("kvsbdb") && !DATASOURCE("lsm")) + if (!DATASOURCE("lsm")) switch (mmrand(NULL, 1, 20)) { case 1: /* diff --git a/src/third_party/wiredtiger/test/format/vt.suppress b/src/third_party/wiredtiger/test/format/vt.suppress index 562af6810c0..135d32e56c7 100644 --- a/src/third_party/wiredtiger/test/format/vt.suppress +++ b/src/third_party/wiredtiger/test/format/vt.suppress @@ -27,20 +27,3 @@ fun:wts_startup fun:main } -{ - <Berkeley DB uninitialized write> - Memcheck:Param - write(buf) - obj:/lib/libc.so.7 - fun:__os_io - fun:__memp_pgwrite - fun:__memp_bhwrite - fun:__memp_sync_int - fun:__memp_fsync - fun:__db_sync - fun:__db_refresh - fun:__db_close - fun:__db_close_pp - fun:bdb_teardown - fun:main -} diff --git a/src/third_party/wiredtiger/test/format/wts.c b/src/third_party/wiredtiger/test/format/wts.c index 75f43c6922b..9e0a69aa433 100644 --- a/src/third_party/wiredtiger/test/format/wts.c +++ b/src/third_party/wiredtiger/test/format/wts.c @@ -96,6 +96,15 @@ handle_message(WT_EVENT_HANDLER *handler, (void)(handler); (void)(session); + /* + * WiredTiger logs a verbose message when the read timestamp is set to a + * value older than the oldest timestamp. Ignore the message, it happens + * when repeating operations to confirm timestamped values don't change + * underneath us. + */ + if (strstr(message, "less than the oldest timestamp") != NULL) + return (0); + /* Write and flush the message so we're up-to-date on error. */ if (g.logfp == NULL) { out = printf("%p:%s\n", (void *)session, message); @@ -261,14 +270,13 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) /* Extensions. */ CONFIG_APPEND(p, ",extensions=[" - "\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"],", + "\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"],", g.c_reverse ? REVERSE_PATH : "", access(LZ4_PATH, R_OK) == 0 ? LZ4_PATH : "", access(ROTN_PATH, R_OK) == 0 ? ROTN_PATH : "", access(SNAPPY_PATH, R_OK) == 0 ? SNAPPY_PATH : "", access(ZLIB_PATH, R_OK) == 0 ? ZLIB_PATH : "", - access(ZSTD_PATH, R_OK) == 0 ? ZSTD_PATH : "", - DATASOURCE("kvsbdb") ? KVS_BDB_PATH : ""); + access(ZSTD_PATH, R_OK) == 0 ? ZSTD_PATH : ""); /* * Put configuration file configuration options second to last. Put @@ -413,10 +421,16 @@ wts_init(void) /* Configure Btree split page percentage. */ CONFIG_APPEND(p, ",split_pct=%" PRIu32, g.c_split_pct); - /* Configure LSM and data-sources. */ - if (DATASOURCE("kvsbdb")) - CONFIG_APPEND(p, ",type=kvsbdb"); + /* + * Assertions. + * Assertions slow down the code for additional diagnostic checking. + */ + if (g.c_txn_timestamps && g.c_assert_commit_timestamp) + CONFIG_APPEND(p, ",assert=(commit_timestamp=key_consistent)"); + if (g.c_txn_timestamps && g.c_assert_read_timestamp) + CONFIG_APPEND(p, ",assert=(read_timestamp=always)"); + /* Configure LSM. */ if (DATASOURCE("lsm")) { CONFIG_APPEND(p, ",type=lsm,lsm=("); CONFIG_APPEND(p, @@ -467,43 +481,6 @@ wts_close(void) } void -wts_dump(const char *tag, int dump_bdb) -{ -#ifdef HAVE_BERKELEY_DB - size_t len; - char *cmd; - - /* - * In-memory configurations and data-sources don't support dump through - * the wt utility. - */ - if (g.c_in_memory != 0) - return; - if (DATASOURCE("kvsbdb")) - return; - - track("dump files and compare", 0ULL, NULL); - - len = strlen(g.home) + strlen(BERKELEY_DB_PATH) + strlen(g.uri) + 100; - cmd = dmalloc(len); - testutil_check(__wt_snprintf(cmd, len, - "sh s_dumpcmp -h %s %s %s %s %s %s", - g.home, - dump_bdb ? "-b " : "", - dump_bdb ? BERKELEY_DB_PATH : "", - g.type == FIX || g.type == VAR ? "-c" : "", - g.uri == NULL ? "" : "-n", - g.uri == NULL ? "" : g.uri)); - - testutil_checkfmt(system(cmd), "%s: dump comparison failed", tag); - free(cmd); -#else - (void)tag; /* [-Wunused-variable] */ - (void)dump_bdb; /* [-Wunused-variable] */ -#endif -} - -void wts_verify(const char *tag) { WT_CONNECTION *conn; @@ -517,9 +494,7 @@ wts_verify(const char *tag) track("verify", 0ULL, NULL); testutil_check(conn->open_session(conn, NULL, NULL, &session)); - if (g.logging != 0) - (void)g.wt_api->msg_printf(g.wt_api, session, - "=============== verify start ==============="); + logop(session, "%s", "=============== verify start"); /* * Verify can return EBUSY if the handle isn't available. Don't yield @@ -530,9 +505,7 @@ wts_verify(const char *tag) testutil_assertfmt( ret == 0 || ret == EBUSY, "session.verify: %s: %s", g.uri, tag); - if (g.logging != 0) - (void)g.wt_api->msg_printf(g.wt_api, session, - "=============== verify stop ==============="); + logop(session, "%s", "=============== verify stop"); testutil_check(session->close(session, NULL)); } @@ -557,10 +530,6 @@ wts_stats(void) if (g.c_statistics == 0) return; - /* Some data-sources don't support statistics. */ - if (DATASOURCE("kvsbdb")) - return; - conn = g.wts_conn; track("stat", 0ULL, NULL); diff --git a/src/third_party/wiredtiger/test/py_install/testbase.py b/src/third_party/wiredtiger/test/py_install/testbase.py new file mode 100755 index 00000000000..a7559981513 --- /dev/null +++ b/src/third_party/wiredtiger/test/py_install/testbase.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2019 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +# A quick sanity test of an installation via 'pip install wiredtiger'. + +import wiredtiger, shutil, os +from wiredtiger import wiredtiger_open, wiredtiger_version + +wthome = "WTPY_TEST" +shutil.rmtree(wthome, ignore_errors=True) +os.mkdir(wthome) +conn = wiredtiger_open(wthome, "create") +session = conn.open_session() +session.create('table:foo', 'key_format=S,value_format=i') +c = session.open_cursor('table:foo') +c['A'] = 100 +c['B'] = 200 +c['C'] = 300 +print('Expect 200 = ' + str(c['B'])) +if c['B'] != 200: + raise Exception('BAD RESULT') +c.close() +session.close() +conn.close() + +print(wiredtiger_version()) +print('testbase success.') diff --git a/src/third_party/wiredtiger/test/py_install/testpack.py b/src/third_party/wiredtiger/test/py_install/testpack.py new file mode 100755 index 00000000000..9c00184fc4f --- /dev/null +++ b/src/third_party/wiredtiger/test/py_install/testpack.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2019 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +# A quick sanity test of an installation via 'pip install wiredtiger'. +# This program only uses the packing API. + +import sys +from wiredtiger.packing import unpack, pack + +testval = '8281e420f2fa4a8381e40c5855ca808080808080e22fc0e20fc0' +# Jump through hoops to make code work for Py2 + Py3 +x = bytes(bytearray.fromhex(testval)) + +unpacked = unpack('iiiiiiiiiiiiii',x) +unexpect = [2, 1, 552802954, 3, 1, 207123978, 0, 0, 0, 0, 0, 0, 20480, 12288] +#print(str(unpacked))) +if unpacked != unexpect: + raise Exception('BAD RESULT FOR UNPACK') + +packed = pack('iiii', 1, 2, 3, 4) +expect = b'\x81\x82\x83\x84' +#print(str(packed))) +if packed != expect: + raise Exception('BAD RESULT FOR PACK') + +print('testpack success.') diff --git a/src/third_party/wiredtiger/test/suite/test_assert07.py b/src/third_party/wiredtiger/test/suite/test_assert07.py new file mode 100644 index 00000000000..679b8d6f205 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_assert07.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2019 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# test_assert07.py +# Verify that the resolved update assertion does not get triggerd by having +# reserved updates at different locations in the update chain. +# + +from suite_subprocess import suite_subprocess +import wiredtiger, wttest + +def timestamp_str(t): + return '%x' % t + +class test_assert07(wttest.WiredTigerTestCase, suite_subprocess): + def apply_timestamps(self, timestamp): + self.session.prepare_transaction( + 'prepare_timestamp=' + timestamp_str(timestamp)) + self.session.timestamp_transaction( + 'commit_timestamp=' + timestamp_str(timestamp)) + self.session.timestamp_transaction( + 'durable_timestamp=' + timestamp_str(timestamp)) + + def test_timestamp_alter(self): + base = 'assert07' + uri = 'file:' + base + + # No reserved, single update. + self.session.create(uri, 'key_format=S,value_format=S') + c = self.session.open_cursor(uri) + self.session.begin_transaction('isolation=snapshot') + c['key_ts1'] = 'value1' + self.apply_timestamps(1) + self.session.commit_transaction() + + # Reserved at the start of the chain, with one update. + self.session.begin_transaction('isolation=snapshot') + c.set_key('key_ts1') + c.reserve() + c['key_ts1'] = 'value2' + self.apply_timestamps(2) + self.session.commit_transaction() + + # Reserved at the end of the chain, with one update. + self.session.begin_transaction('isolation=snapshot') + c['key_ts1'] = 'value3' + c.set_key('key_ts1') + c.reserve() + self.apply_timestamps(3) + self.session.commit_transaction() + + # Reserved at the start of the chain, with multiple. + self.session.begin_transaction('isolation=snapshot') + c.set_key('key_ts1') + c.reserve() + c['key_ts1'] = 'value4' + c['key_ts1'] = 'value5' + self.apply_timestamps(4) + self.session.commit_transaction() + + # Reserved at the end of the chain, with multiple updates. + self.session.begin_transaction('isolation=snapshot') + c['key_ts1'] = 'value6' + c['key_ts1'] = 'value7' + c.set_key('key_ts1') + c.reserve() + self.apply_timestamps(5) + self.session.commit_transaction() + + # Reserved between two updates. + self.session.begin_transaction('isolation=snapshot') + c['key_ts1'] = 'value8' + c.set_key('key_ts1') + c.reserve() + c['key_ts1'] = 'value9' + self.apply_timestamps(6) + self.session.commit_transaction() + + # Reserved update with multiple extra updates. + self.session.begin_transaction('isolation=snapshot') + c['key_ts1'] = 'value10' + c.set_key('key_ts1') + c.reserve() + c['key_ts1'] = 'value11' + c['key_ts1'] = 'value12' + c['key_ts1'] = 'value13' + self.apply_timestamps(7) + self.session.commit_transaction() + + # Reserved updates with multiple extra updates. + self.session.begin_transaction('isolation=snapshot') + c['key_ts1'] = 'value14' + c.set_key('key_ts1') + c.reserve() + c['key_ts1'] = 'value15' + c['key_ts1'] = 'value16' + c.set_key('key_ts1') + c.reserve() + c['key_ts1'] = 'value17' + self.apply_timestamps(8) + self.session.commit_transaction() + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_cursor12.py b/src/third_party/wiredtiger/test/suite/test_cursor12.py index 121180430f7..db14237687e 100755 --- a/src/third_party/wiredtiger/test/suite/test_cursor12.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor12.py @@ -227,7 +227,7 @@ class test_cursor12(wttest.WiredTigerTestCase): self.assertEquals(c.update(), 0) c.reset() - self.session.begin_transaction() + self.session.begin_transaction("isolation=snapshot") c.set_key(ds.key(row)) mods = [] for j in i['mods']: @@ -265,6 +265,34 @@ class test_cursor12(wttest.WiredTigerTestCase): row = row + 1 c.close() + # Smoke-test the modify API, anything other than an explicit transaction + # in snapshot isolation fails. + def test_modify_txn_api(self): + ds = SimpleDataSet(self, self.uri, 100, key_format=self.keyfmt, value_format=self.valuefmt) + ds.populate() + + c = self.session.open_cursor(self.uri, None) + c.set_key(ds.key(10)) + msg = '/not supported/' + + self.session.begin_transaction() + mods = [] + mods.append(wiredtiger.Modify('-', 1, 1)) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: c.modify(mods), msg) + self.session.rollback_transaction() + + self.session.begin_transaction("isolation=read-uncommitted") + mods = [] + mods.append(wiredtiger.Modify('-', 1, 1)) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: c.modify(mods), msg) + self.session.rollback_transaction() + + self.session.begin_transaction("isolation=read-committed") + mods = [] + mods.append(wiredtiger.Modify('-', 1, 1)) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: c.modify(mods), msg) + self.session.rollback_transaction() + # Smoke-test the modify API, operating on a group of records. def test_modify_smoke(self): ds = SimpleDataSet(self, @@ -326,7 +354,7 @@ class test_cursor12(wttest.WiredTigerTestCase): ds.populate() c = self.session.open_cursor(self.uri, None) - self.session.begin_transaction() + self.session.begin_transaction("isolation=snapshot") c.set_key(ds.key(10)) orig = self.make_value('abcdefghijklmnopqrstuvwxyz') c.set_value(orig) @@ -356,7 +384,7 @@ class test_cursor12(wttest.WiredTigerTestCase): c.set_key(ds.key(10)) self.assertEquals(c.remove(), 0) - self.session.begin_transaction() + self.session.begin_transaction("isolation=snapshot") mods = [] mod = wiredtiger.Modify('ABCD', 3, 3) mods.append(mod) @@ -374,7 +402,7 @@ class test_cursor12(wttest.WiredTigerTestCase): ds.populate() # Start a transaction. - self.session.begin_transaction() + self.session.begin_transaction("isolation=snapshot") # Insert a new record. c = self.session.open_cursor(self.uri, None) @@ -393,7 +421,7 @@ class test_cursor12(wttest.WiredTigerTestCase): # Test that another transaction cannot modify our uncommitted record. xs = self.conn.open_session() xc = xs.open_cursor(self.uri, None) - xs.begin_transaction() + xs.begin_transaction("isolation=snapshot") xc.set_key(ds.key(30)) xc.set_value(ds.value(30)) mods = [] @@ -408,7 +436,7 @@ class test_cursor12(wttest.WiredTigerTestCase): self.session.rollback_transaction() # Test that we can't modify our aborted insert. - self.session.begin_transaction() + self.session.begin_transaction("isolation=snapshot") mods = [] mod = wiredtiger.Modify('ABCD', 3, 3) mods.append(mod) diff --git a/src/third_party/wiredtiger/test/suite/test_prepare04.py b/src/third_party/wiredtiger/test/suite/test_prepare04.py index 648c8fbb178..d65b96adefe 100644 --- a/src/third_party/wiredtiger/test/suite/test_prepare04.py +++ b/src/third_party/wiredtiger/test/suite/test_prepare04.py @@ -106,14 +106,17 @@ class test_prepare04(wttest.WiredTigerTestCase, suite_subprocess): s_other.begin_transaction(self.txn_config + self.ignore_config) c_other.set_key(1) if self.ignore == False and self.after_ts == True: - self.assertRaises(wiredtiger.WiredTigerError, lambda:c_other.search()) + # Make sure we get the expected prepare conflict message. + self.assertRaisesException(wiredtiger.WiredTigerError, lambda:c_other.search(), preparemsg) else: c_other.search() self.assertTrue(c_other.get_value() == 1) + c_other.set_value(3) - self.assertRaises(wiredtiger.WiredTigerError, lambda:c_other.update()) + + # Make sure we detect the conflict between operations. + self.assertRaisesException(wiredtiger.WiredTigerError, lambda:c_other.update(), conflictmsg) s_other.commit_transaction() - #''' self.session.timestamp_transaction('commit_timestamp=' + timestamp_str(300)) self.session.timestamp_transaction('durable_timestamp=' + timestamp_str(300)) diff --git a/src/third_party/wiredtiger/test/suite/valgrind-python.supp b/src/third_party/wiredtiger/test/suite/valgrind-python.supp index 87a582048e5..2b732c49481 100644 --- a/src/third_party/wiredtiger/test/suite/valgrind-python.supp +++ b/src/third_party/wiredtiger/test/suite/valgrind-python.supp @@ -42,11 +42,3 @@ ... fun:__wt_dlsym } - -{ - Berkeley DB: uninitialized writes. - Memcheck:Param - write(buf) - ... - fun:__memp_pgwrite -} |