summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2021-05-11 09:45:01 +1000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-05-10 23:58:13 +0000
commit94b923a9900f81e750e3d69dc9e27ae573fd16e6 (patch)
treed49203307b6b6abbc9c62085449f46ce1978bfec /src
parent521e05db88e1e1c89dfd8313500322bb90828a35 (diff)
downloadmongo-94b923a9900f81e750e3d69dc9e27ae573fd16e6.tar.gz
Import wiredtiger: aadac222429faa9b20d9344e3648a19be97811b9 from branch mongodb-4.4
ref: 03c93998a7..aadac22242 for: 4.4.7 WT-6204 Possible race between backup and checkpoint at file close WT-6362 Ensure that history store operations in user session context are correct WT-6387 Remove unused WT_CURSTD_UPDATE_LOCAL flag WT-6538 Fix onpage prepare visibility check if the start and stop are from the same prepared transaction WT-6893 Disable huffman config in compatibility test WT-6956 Cut WiredTiger 10.0.0 release WT-7076 Data placement for tiered storage in WiredTiger WT-7092 Reduce calls to hash URI when opening/closing cached cursors by one WT-7105 Add recovery error messages to include the URI WT-7133 Fix bug in stat collection when target pages are reduced due to high HS cache pressure WT-7173 Devise object naming scheme for tiered storage WT-7176 Adding Ubuntu 18.04 ASAN variant to wiredtiger build WT-7185 Avoid aborting a transaction if it is force evicting and oldest WT-7186 Correct expected in-memory abort updates in prepare scenario WT-7190 Limit eviction of non-history store pages when checkpoint is operating on history store WT-7191 Replace FNV hash with City hash WT-7229 Align out of order and mixed mode handling WT-7230 CMake build system support for x86 POSIX targets WT-7241 Add asserts to verify if prepared transaction abort mechanism is working as expected WT-7253 Add import functionality to test/format WT-7264 Creating a new configuration for search near that allows it to exit quickly when searching for prefixes WT-7266 Test to validate re-reading files that were closed with active history WT-7281 Add metric to record total sessions scanned WT-7282 Make backup debugging messages into verbose messages WT-7297 Fix search_near assertion WT-7312 Keys/Values updated to String type and save the created keys WT-7315 Implementation of the update thread operation in the test framework WT-7316 Adding operation throttles, and modifying component functionality to separate core loop WT-7329 Add hook capability to Python testing WT-7332 Add ability to cycle through create and drop tables in workgen WT-7345 Update incorrect copyright notices format WT-7346 Connect new API changes to local storage extension WT-7355 Create python hooks to validate tiered cursor implementation WT-7356 Implement bulk load for tiered tables WT-7365 Change the configuration file format WT-7367 Do not remove unstable updates of an in-memory database btree page WT-7368 Add WT_STORAGE_SOURCE.customize_file_system in place of locations WT-7374 Add missing branch checking logic for doc-update task WT-7376 Initialize tiered cursor name WT-7380 Fix wiredtiger connection string to clear statistics WT-7384 Fix an assert fire when inserting to the history store WT-7385 Remove 'auth_token' from being reconfigurable WT-7387 Replace cluster/member with hostid WT-7388 Add parens to assignment in conditional WT-7389 Remove on positioned tiered cursor should leave cursor positioned WT-7390 Add --noremove flag to Python test runner WT-7394 Coverity analysis defect 118020: Uninitialized scalar variable WT-7395 Coverity analysis defect 118042: Dereference after null check WT-7400 Set WT_HS_READ_ALL flag for the search before fixing the out of order timestamps WT-7403 Random cursor on empty tiered table loops forever WT-7409 Remove dead code WT-7410 Split session flags into two to accommodate more session flags in future WT-7413 Add an option to wtperf to run a backup operation WT-7414 Create a python test to ensure that all tables that are dropped during backup are exists in the backup WT-7415 Add new configuration files with backup option WT-7419 Tiered local storage changes to use WT file system WT-7420 Tiered local storage changed to flush files to bucket directory WT-7423 Clear checkpoint LSN and backup metadata on import WT-7425 Fix for the -C command line option WT-7428 Move bucket storage to tiered structure WT-7429 Set readonly metadata when switching tiered objects WT-7447 Fix the assert fire because onpage out of order update is not popped from the stack WT-7453 Coverity analysis defect 119968: Continue has no effect WT-7454 Coverity analysis defect 119967: Continue has no effect WT-7455 Coverity analysis defect 119966: Redundant test WT-7456 Coverity analysis defect 119965: Uninitialized pointer read WT-7457 Coverity: fix error in local store rename WT-7458 Coverity analysis defect 119949: Redundant test WT-7459 Coverity analysis defect 119947: Redundant test WT-7463 Use wt_off_t to avoid incompatible pointer types WT-7468 Fix tiered file and object metadata configuration WT-7474 Reset cursor-order checks whenever a search is performed WT-7475 Update format to use the new history-store cursor type WT-7476 Update configuration handling to allow for optional configuration settings WT-7477 Fix coverity bug: possible NULL dereference WT-7478 Fix coverity printf arg type to match format WT-7481 Fix the wrong assert of disk image write gen comparison with btree base write gen WT-7484 Coverity analysis defect 120014: Uninitialized scalar variable WT-7485 Coverity analysis defect 120018: Resource leak WT-7488 Coverity analysis defect 120015: Dereference after null check WT-7489 Avoid running RTS concurrently with checkpoint WT-7496 Add operations each tier can support to data structure WT-7500 Refactor tiered thread start code
Diffstat (limited to 'src')
-rw-r--r--src/third_party/wiredtiger/INSTALL8
-rw-r--r--src/third_party/wiredtiger/NEWS27
-rw-r--r--src/third_party/wiredtiger/README4
-rw-r--r--src/third_party/wiredtiger/RELEASE_INFO2
-rw-r--r--src/third_party/wiredtiger/bench/workgen/runner/many-dhandle-stress.py33
-rw-r--r--src/third_party/wiredtiger/bench/workgen/workgen.cxx212
-rw-r--r--src/third_party/wiredtiger/bench/workgen/workgen.h2
-rw-r--r--src/third_party/wiredtiger/bench/workgen/workgen_int.h2
-rwxr-xr-xsrc/third_party/wiredtiger/bench/workgen/wtperf.py14
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/config.c6
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/runners/500m-btree-50r50u-backup.wtperf27
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-readonly-backup.wtperf13
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/runners/many-dhandle-stress-backup.wtperf26
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/runners/many-dhandle-stress.wtperf2
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/runners/many-table-stress-backup.wtperf21
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/runners/medium-btree-backup.wtperf12
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-read-heavy-stress-backup.wtperf22
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-stress-backup.wtperf18
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/runners/small-btree-backup.wtperf9
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/track.c18
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/wtperf.c110
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/wtperf.h11
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i2
-rw-r--r--src/third_party/wiredtiger/build_cmake/README.md120
-rw-r--r--src/third_party/wiredtiger/build_cmake/configs/auto.cmake304
-rw-r--r--src/third_party/wiredtiger/build_cmake/configs/base.cmake184
-rw-r--r--src/third_party/wiredtiger/build_cmake/configs/compile_test/pthread_cond_monotonic_test.c40
-rw-r--r--src/third_party/wiredtiger/build_cmake/configs/wiredtiger_config.h.in157
-rw-r--r--src/third_party/wiredtiger/build_cmake/configs/x86/darwin/config.cmake11
-rw-r--r--src/third_party/wiredtiger/build_cmake/configs/x86/linux/config.cmake19
-rw-r--r--src/third_party/wiredtiger/build_cmake/helpers.cmake633
-rw-r--r--src/third_party/wiredtiger/build_cmake/install/install.cmake66
-rw-r--r--src/third_party/wiredtiger/build_cmake/install/wiredtiger.pc.in11
-rw-r--r--src/third_party/wiredtiger/build_cmake/strict/clang_strict.cmake50
-rw-r--r--src/third_party/wiredtiger/build_cmake/strict/gcc_strict.cmake79
-rw-r--r--src/third_party/wiredtiger/build_cmake/toolchains/clang.cmake36
-rw-r--r--src/third_party/wiredtiger/build_cmake/toolchains/gcc.cmake31
-rw-r--r--src/third_party/wiredtiger/build_cmake/toolchains/x86/darwin/plat_clang.cmake14
-rw-r--r--src/third_party/wiredtiger/build_cmake/toolchains/x86/darwin/plat_gcc.cmake14
-rw-r--r--src/third_party/wiredtiger/build_cmake/toolchains/x86/linux/plat_clang.cmake14
-rw-r--r--src/third_party/wiredtiger/build_cmake/toolchains/x86/linux/plat_gcc.cmake9
-rw-r--r--src/third_party/wiredtiger/build_posix/aclocal/version-set.m44
-rw-r--r--src/third_party/wiredtiger/build_posix/aclocal/version.m42
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py135
-rw-r--r--src/third_party/wiredtiger/dist/filelist3
-rw-r--r--src/third_party/wiredtiger/dist/s_copyright.list1
-rw-r--r--src/third_party/wiredtiger/dist/s_define.list2
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_install4
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_readme12
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok6
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_void5
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py10
-rw-r--r--src/third_party/wiredtiger/dist/test_data.py36
-rw-r--r--src/third_party/wiredtiger/examples/c/Makefile.am2
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_col_store.c2
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_storage_source.c1203
-rw-r--r--src/third_party/wiredtiger/ext/storage_sources/local_store/local_store.c1037
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/lang/python/wiredtiger.i104
-rw-r--r--src/third_party/wiredtiger/src/block/block_ckpt.c42
-rw-r--r--src/third_party/wiredtiger/src/block/block_mgr.c26
-rw-r--r--src/third_party/wiredtiger/src/block/block_read.c4
-rw-r--r--src/third_party/wiredtiger/src/block/block_tiered.c99
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c39
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curprev.c39
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c50
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_handle.c60
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_io.c23
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c8
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c269
-rw-r--r--src/third_party/wiredtiger/src/config/test_config.c66
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c142
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache_pool.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_capacity.c3
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_ckpt.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_dhandle.c31
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_log.c10
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_open.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_stat.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_sweep.c4
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_tiered.c225
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_backup.c20
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_backup_incr.c13
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_std.c59
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-transaction.dox8
-rw-r--r--src/third_party/wiredtiger/src/docs/custom-storage-sources.dox12
-rw-r--r--src/third_party/wiredtiger/src/docs/examples.dox3
-rw-r--r--src/third_party/wiredtiger/src/docs/spell.ok3
-rw-r--r--src/third_party/wiredtiger/src/docs/top/main.dox8
-rw-r--r--src/third_party/wiredtiger/src/docs/upgrading.dox53
-rw-r--r--src/third_party/wiredtiger/src/docs/wtperf.dox2
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c23
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c14
-rw-r--r--src/third_party/wiredtiger/src/history/hs_conn.c3
-rw-r--r--src/third_party/wiredtiger/src/history/hs_cursor.c12
-rw-r--r--src/third_party/wiredtiger/src/history/hs_rec.c565
-rw-r--r--src/third_party/wiredtiger/src/include/block.h1
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h25
-rw-r--r--src/third_party/wiredtiger/src/include/btree_cmp_inline.h21
-rw-r--r--src/third_party/wiredtiger/src/include/cache.h2
-rw-r--r--src/third_party/wiredtiger/src/include/cache_inline.h9
-rw-r--r--src/third_party/wiredtiger/src/include/config.h14
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h56
-rw-r--r--src/third_party/wiredtiger/src/include/dhandle.h29
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h68
-rw-r--r--src/third_party/wiredtiger/src/include/log.h2
-rw-r--r--src/third_party/wiredtiger/src/include/meta.h2
-rw-r--r--src/third_party/wiredtiger/src/include/os_fs_inline.h24
-rw-r--r--src/third_party/wiredtiger/src/include/schema.h371
-rw-r--r--src/third_party/wiredtiger/src/include/session.h76
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h17
-rw-r--r--src/third_party/wiredtiger/src/include/tiered.h104
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h3
-rw-r--r--src/third_party/wiredtiger/src/include/txn_inline.h18
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in1228
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger_ext.h15
-rw-r--r--src/third_party/wiredtiger/src/include/wt_internal.h10
-rw-r--r--src/third_party/wiredtiger/src/log/log.c8
-rw-r--r--src/third_party/wiredtiger/src/log/log_slot.c8
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_manager.c8
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_tree.c21
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_apply.c2
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_track.c6
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_turtle.c8
-rw-r--r--src/third_party/wiredtiger/src/os_common/os_fhandle.c2
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_dictionary.c2
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_visibility.c6
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_create.c137
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_drop.c11
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_list.c55
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_open.c4
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_truncate.c9
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_util.c4
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_worker.c8
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c29
-rw-r--r--src/third_party/wiredtiger/src/session/session_dhandle.c2
-rw-r--r--src/third_party/wiredtiger/src/support/modify.c112
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c69
-rw-r--r--src/third_party/wiredtiger/src/support/thread_group.c2
-rw-r--r--src/third_party/wiredtiger/src/support/update_vector.c111
-rw-r--r--src/third_party/wiredtiger/src/tiered/tiered_config.c188
-rw-r--r--src/third_party/wiredtiger/src/tiered/tiered_cursor.c155
-rw-r--r--src/third_party/wiredtiger/src/tiered/tiered_handle.c625
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c63
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c21
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_recover.c20
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c66
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_timestamp.c3
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/configs/config_example_test_default.txt48
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/configs/config_poc_test_default.txt62
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/configs/config_poc_test_stress.txt39
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/test_harness/connection_manager.h4
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/test_harness/core/component.h (renamed from src/third_party/wiredtiger/test/cppsuite/test_harness/component.h)42
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/test_harness/core/configuration.h (renamed from src/third_party/wiredtiger/test/cppsuite/test_harness/configuration.h)123
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/test_harness/core/throttle.h73
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/test_harness/runtime_monitor.h58
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/test_harness/test.h55
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/test_harness/thread_manager.h7
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/test_harness/timestamp_manager.h84
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/test_harness/util/api_const.h (renamed from src/third_party/wiredtiger/test/cppsuite/test_harness/api_const.h)8
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/test_harness/util/debug_utils.h (renamed from src/third_party/wiredtiger/test/cppsuite/test_harness/debug_utils.h)0
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/test_harness/workload/database_model.h89
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/test_harness/workload/database_operation.h274
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/test_harness/workload/random_generator.h (renamed from src/third_party/wiredtiger/test/cppsuite/test_harness/random_generator.h)0
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/test_harness/workload/thread_context.h (renamed from src/third_party/wiredtiger/test/cppsuite/test_harness/thread_context.h)119
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/test_harness/workload/workload_tracking.h (renamed from src/third_party/wiredtiger/test/cppsuite/test_harness/workload_tracking.h)14
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/test_harness/workload/workload_validation.h (renamed from src/third_party/wiredtiger/test/cppsuite/test_harness/workload_validation.h)349
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/test_harness/workload_generator.h272
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/tests/example_test.cxx55
-rwxr-xr-xsrc/third_party/wiredtiger/test/cppsuite/tests/run.cxx141
-rwxr-xr-xsrc/third_party/wiredtiger/test/evergreen.yml136
-rwxr-xr-xsrc/third_party/wiredtiger/test/evergreen/compatibility_test_for_releases.sh1
-rw-r--r--src/third_party/wiredtiger/test/format/Makefile.am2
-rw-r--r--src/third_party/wiredtiger/test/format/backup.c53
-rw-r--r--src/third_party/wiredtiger/test/format/config.c22
-rw-r--r--src/third_party/wiredtiger/test/format/config.h7
-rw-r--r--src/third_party/wiredtiger/test/format/format.h3
-rw-r--r--src/third_party/wiredtiger/test/format/hs.c27
-rw-r--r--src/third_party/wiredtiger/test/format/import.c223
-rw-r--r--src/third_party/wiredtiger/test/format/ops.c7
-rw-r--r--src/third_party/wiredtiger/test/format/wts.c2
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/hook_demo.py130
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/hook_tiered.py142
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/run.py22
-rw-r--r--src/third_party/wiredtiger/test/suite/test_backup21.py89
-rw-r--r--src/third_party/wiredtiger/test/suite/test_hs21.py200
-rw-r--r--src/third_party/wiredtiger/test/suite/test_hs22.py154
-rw-r--r--src/third_party/wiredtiger/test/suite/test_import10.py4
-rw-r--r--src/third_party/wiredtiger/test/suite/test_prepare14.py104
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/test_rollback_to_stable01.py1
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/test_rollback_to_stable10.py4
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/test_rollback_to_stable14.py23
-rw-r--r--src/third_party/wiredtiger/test/suite/test_rollback_to_stable16.py1
-rw-r--r--src/third_party/wiredtiger/test/suite/test_rollback_to_stable18.py116
-rw-r--r--src/third_party/wiredtiger/test/suite/test_rollback_to_stable19.py20
-rw-r--r--src/third_party/wiredtiger/test/suite/test_search_near01.py330
-rw-r--r--src/third_party/wiredtiger/test/suite/test_tiered01.py78
-rwxr-xr-x[-rw-r--r--]src/third_party/wiredtiger/test/suite/test_tiered02.py78
-rwxr-xr-x[-rw-r--r--]src/third_party/wiredtiger/test/suite/test_tiered04.py90
-rwxr-xr-x[-rw-r--r--]src/third_party/wiredtiger/test/suite/test_tiered05.py6
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/test_tiered06.py319
-rw-r--r--src/third_party/wiredtiger/test/suite/test_txn26.py65
-rw-r--r--src/third_party/wiredtiger/test/suite/test_util21.py2
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/wthooks.py259
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/wttest.py14
-rw-r--r--src/third_party/wiredtiger/test/utility/misc.c46
-rw-r--r--src/third_party/wiredtiger/test/utility/test_util.h2
208 files changed, 10400 insertions, 5069 deletions
diff --git a/src/third_party/wiredtiger/INSTALL b/src/third_party/wiredtiger/INSTALL
index 9b4a6fc20a7..b4437c7e864 100644
--- a/src/third_party/wiredtiger/INSTALL
+++ b/src/third_party/wiredtiger/INSTALL
@@ -1,11 +1,11 @@
-WiredTiger 10.0.0: (April 20, 2020)
+WiredTiger 10.0.1: (April 12, 2021)
-This is version 10.0.0 of WiredTiger.
+This is version 10.0.1 of WiredTiger.
Instructions for configuring, building, and installing WiredTiger are available online.
For Linux, MacOS, and other POSIX systems:
- http://source.wiredtiger.com/develop/build-posix.html
+ https://source.wiredtiger.com/develop/build-posix.html
For Windows:
- http://source.wiredtiger.com/develop/build-windows.html
+ https://source.wiredtiger.com/develop/build-windows.html
diff --git a/src/third_party/wiredtiger/NEWS b/src/third_party/wiredtiger/NEWS
index 46fc48ffc5c..0271bbd656a 100644
--- a/src/third_party/wiredtiger/NEWS
+++ b/src/third_party/wiredtiger/NEWS
@@ -1,6 +1,33 @@
Ticket reference tags refer to tickets in the MongoDB JIRA tracking system:
https://jira.mongodb.org
+WiredTiger release 10.0.0, 2021-04-12
+------------------------------------
+
+See the upgrading documentation for details of API and behavior changes.
+
+Significant changes:
+* WT-4008 Add ARM NEON support for row search operations
+* WT-5083 Add mips64el support
+* WT-6598 Add new API allowing changing dhandle hash bucket size
+* WT-4427 Make WiredTiger timestamps always on and 8 bytes
+* WT-4642 Store transaction IDs durably
+* WT-5225 Create persistent file for History store
+* WT-5500 Implement new history store format
+* WT-6677 Map read committed/uncommitted isolation to read-only transaction
+* WT-6710 Change default transaction isolation to snapshot
+* WT-4833 Drop support for Python2
+* WT-6410 Remove WT_SESSION.rebalance
+* WT-6528 Remove offensive terminology in WiredTiger API and source code
+* WT-6674 Remove Async API code and documentation
+* WT-6675 Remove WiredTiger Java language API and documentation
+* WT-6678 Remove Huffman Encoding support for Keys
+* WT-6713 Remove transaction support from custom data sources
+* WT-7295 Compatibility with older versions of WT
+
+See JIRA changelog for a full listing:
+https://jira.mongodb.org/projects/WT/versions/24082
+
WiredTiger release 3.2.1, 2019-08-27
------------------------------------
diff --git a/src/third_party/wiredtiger/README b/src/third_party/wiredtiger/README
index 6ddef162bd4..57b9e083e23 100644
--- a/src/third_party/wiredtiger/README
+++ b/src/third_party/wiredtiger/README
@@ -1,6 +1,6 @@
-WiredTiger 10.0.0: (March 18, 2020)
+WiredTiger 10.0.1: (April 12, 2021)
-This is version 10.0.0 of WiredTiger.
+This is version 10.0.1 of WiredTiger.
WiredTiger release packages and documentation can be found at:
diff --git a/src/third_party/wiredtiger/RELEASE_INFO b/src/third_party/wiredtiger/RELEASE_INFO
index 37e31bad921..85f216a7850 100644
--- a/src/third_party/wiredtiger/RELEASE_INFO
+++ b/src/third_party/wiredtiger/RELEASE_INFO
@@ -1,6 +1,6 @@
WIREDTIGER_VERSION_MAJOR=10
WIREDTIGER_VERSION_MINOR=0
-WIREDTIGER_VERSION_PATCH=0
+WIREDTIGER_VERSION_PATCH=1
WIREDTIGER_VERSION="$WIREDTIGER_VERSION_MAJOR.$WIREDTIGER_VERSION_MINOR.$WIREDTIGER_VERSION_PATCH"
WIREDTIGER_RELEASE_DATE=`date "+%B %e, %Y"`
diff --git a/src/third_party/wiredtiger/bench/workgen/runner/many-dhandle-stress.py b/src/third_party/wiredtiger/bench/workgen/runner/many-dhandle-stress.py
index 9d5feea9f1b..4fdd88dd88e 100644
--- a/src/third_party/wiredtiger/bench/workgen/runner/many-dhandle-stress.py
+++ b/src/third_party/wiredtiger/bench/workgen/runner/many-dhandle-stress.py
@@ -1,4 +1,32 @@
#/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+
# generated from ../wtperf/runners/many-dhandle-stress.wtperf
# The next lines are unneeded if this script is in the runner directory.
import sys
@@ -38,7 +66,7 @@ sample_rate=1
context = Context()
conn_config = ""
-conn_config += ",cache_size=10G,eviction=(threads_min=4,threads_max=4),file_manager=(close_idle_time=30),session_max=1000" # explicitly added
+conn_config += ",cache_size=10G,eviction=(threads_min=4,threads_max=4),file_manager=(close_idle_time=30),session_max=1000,statistics=[all,clear],statistics_log=(wait=1,json=false,on_close=true)" # explicitly added
conn = context.wiredtiger_open("create," + conn_config)
s = conn.open_session("")
@@ -90,6 +118,9 @@ workload.options.run_time=900
workload.options.max_latency=1000
workload.options.sample_rate=1
workload.options.sample_interval_ms = 5000
+# Uncomment to fail instead of generating a warning
+# workload.options.max_idle_table_cycle_fatal = True
+workload.options.max_idle_table_cycle = 2
workload.run(conn)
latency_filename = context.args.home + "/latency.out"
diff --git a/src/third_party/wiredtiger/bench/workgen/workgen.cxx b/src/third_party/wiredtiger/bench/workgen/workgen.cxx
index c937b4124dd..cc2c93d3f6b 100644
--- a/src/third_party/wiredtiger/bench/workgen/workgen.cxx
+++ b/src/third_party/wiredtiger/bench/workgen/workgen.cxx
@@ -137,13 +137,104 @@ static void *thread_workload(void *arg) {
return (NULL);
}
+static void *thread_idle_table_cycle_workload(void *arg) {
+ WorkloadRunnerConnection *runnerConnection = (WorkloadRunnerConnection *) arg;
+ WT_CONNECTION *connection = runnerConnection->connection;
+ WorkloadRunner *runner = runnerConnection->runner;
+
+ try {
+ runner->start_table_idle_cycle(connection);
+ } catch (WorkgenException &wge) {
+ std::cerr << "Exception while create/drop tables." << std::endl;
+ }
+
+ return (NULL);
+}
+
+int WorkloadRunner::check_timing(const char *name, uint64_t last_interval) {
+ WorkloadOptions *options = &_workload->options;
+ int msg_err;
+ const char *str;
+
+ msg_err = 0;
+
+ if (last_interval > options->max_idle_table_cycle) {
+ if (options->max_idle_table_cycle_fatal) {
+ msg_err = ETIMEDOUT;
+ str = "ERROR";
+ } else {
+ str = "WARNING";
+ }
+ std::cerr << str << ": Cycling idle table failed because " << name << " took " << last_interval << " seconds which is longer than configured acceptable maximum of " << options->max_idle_table_cycle << std::endl;
+ }
+ return (msg_err);
+}
+
+int WorkloadRunner::start_table_idle_cycle(WT_CONNECTION *conn) {
+ WT_SESSION *session;
+ WT_CURSOR *cursor;
+ uint64_t start, stop, last_interval;
+ int ret, cycle_count;
+ char uri[BUF_SIZE];
+
+ cycle_count = 0;
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) {
+ THROW("Error Opening a Session.");
+ }
+
+ for (cycle_count = 0 ; !stopping ; ++cycle_count) {
+ sprintf(uri, "table:test_cycle%04d", cycle_count);
+
+ workgen_clock(&start);
+ /* Create a table. */
+ if ((ret = session->create(session, uri, "key_format=S,value_format=S")) != 0) {
+ if (ret == EBUSY)
+ continue;
+ THROW("Table create failed in start_table_idle_cycle.");
+ }
+ workgen_clock(&stop);
+ last_interval = ns_to_sec(stop - start);
+ if ((ret = check_timing("CREATE", last_interval)) != 0)
+ THROW_ERRNO(ret, "WT_SESSION->create timeout.");
+ start = stop;
+
+ /* Open and close cursor. */
+ if ((ret = session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) {
+ THROW("Cursor open failed.");
+ }
+ if ((ret = cursor->close(cursor)) != 0) {
+ THROW("Cursor close failed.");
+ }
+ workgen_clock(&stop);
+ last_interval = ns_to_sec(stop - start);
+ if ((ret = check_timing("CURSOR", last_interval)) != 0)
+ THROW_ERRNO(ret, "WT_SESSION->open_cursor timeout.");
+ start = stop;
+
+ /*
+ * Drop the table. Keep retrying on EBUSY failure - it is an expected return when
+ * checkpoints are happening.
+ */
+ while ((ret = session->drop(session, uri, "force,checkpoint_wait=false")) == EBUSY)
+ sleep(1);
+
+ if (ret != 0) {
+ THROW("Table drop failed in cycle_idle_tables.");
+ }
+ workgen_clock(&stop);
+ last_interval = ns_to_sec(stop - start);
+ if ((ret = check_timing("DROP", last_interval)) != 0)
+ THROW_ERRNO(ret, "WT_SESSION->drop timeout.");
+ }
+ return 0;
+}
/*
* This function will sleep for "timestamp_advance" seconds, increment and set oldest_timestamp,
* stable_timestamp with the specified lag until stopping is set to true
*/
int WorkloadRunner::increment_timestamp(WT_CONNECTION *conn) {
- char buf[BUF_SIZE];
uint64_t time_us;
+ char buf[BUF_SIZE];
while (!stopping)
{
@@ -1933,9 +2024,9 @@ TableInternal::~TableInternal() {}
WorkloadOptions::WorkloadOptions() : max_latency(0),
report_file("workload.stat"), report_interval(0), run_time(0),
- sample_file("monitor.json"), sample_interval_ms(0), sample_rate(1),
- warmup(0), oldest_timestamp_lag(0.0), stable_timestamp_lag(0.0),
- timestamp_advance(0.0), _options() {
+ sample_file("monitor.json"), sample_interval_ms(0), max_idle_table_cycle(0),
+ sample_rate(1), warmup(0), oldest_timestamp_lag(0.0), stable_timestamp_lag(0.0),
+ timestamp_advance(0.0), max_idle_table_cycle_fatal(false), _options() {
_options.add_int("max_latency", max_latency,
"prints warning if any latency measured exceeds this number of "
"milliseconds. Requires sample_interval to be configured.");
@@ -1954,6 +2045,9 @@ WorkloadOptions::WorkloadOptions() : max_latency(0),
"When set to the empty string, no JSON is emitted.");
_options.add_int("sample_interval_ms", sample_interval_ms,
"performance logging every interval milliseconds, 0 to disable");
+ _options.add_int("max_idle_table_cycle", max_idle_table_cycle,
+ "maximum number of seconds a create or drop is allowed before aborting "
+ "or printing a warning based on max_idle_table_cycle_fatal setting.");
_options.add_int("sample_rate", sample_rate,
"how often the latency of operations is measured. 1 for every operation, "
"2 for every second operation, 3 for every third operation etc.");
@@ -1966,6 +2060,8 @@ WorkloadOptions::WorkloadOptions() : max_latency(0),
_options.add_double("timestamp_advance", timestamp_advance,
"how many seconds to wait before moving oldest and stable"
"timestamp forward");
+ _options.add_bool("max_idle_table_cycle_fatal", max_idle_table_cycle_fatal,
+ "print warning (false) or abort (true) of max_idle_table_cycle failure");
}
WorkloadOptions::WorkloadOptions(const WorkloadOptions &other) :
@@ -2125,13 +2221,15 @@ int WorkloadRunner::run_all(WT_CONNECTION *conn) {
WorkgenException *exception;
WorkloadOptions *options = &_workload->options;
WorkloadRunnerConnection *runnerConnection;
+ WorkloadRunnerConnection *createDropTableCycle;
Monitor monitor(*this);
std::ofstream monitor_out;
std::ofstream monitor_json;
std::ostream &out = *_report_out;
- pthread_t time_thandle;
+ pthread_t time_thandle, idle_table_thandle;
WT_DECL_RET;
+ runnerConnection = createDropTableCycle = NULL;
for (size_t i = 0; i < _trunners.size(); i++)
_trunners[i].get_static_counts(counts);
out << "Starting workload: " << _trunners.size() << " threads, ";
@@ -2188,58 +2286,84 @@ int WorkloadRunner::run_all(WT_CONNECTION *conn) {
std::cerr << "Stopping Time threads." << std::endl;
(void)pthread_join(time_thandle, &status);
delete runnerConnection;
+ runnerConnection = NULL;
+ stopping = true;
}
}
- // Treat warmup separately from report interval so that if we have a
- // warmup period we clear and ignore stats after it ends.
- if (options->warmup != 0)
- sleep((unsigned int)options->warmup);
+ // Start Idle table cycle thread
+ if (options->max_idle_table_cycle > 0) {
- // Clear stats after any warmup period completes.
- for (size_t i = 0; i < _trunners.size(); i++) {
- ThreadRunner *runner = &_trunners[i];
- runner->_stats.clear();
- }
+ createDropTableCycle = new WorkloadRunnerConnection();
+ createDropTableCycle->runner = this;
+ createDropTableCycle->connection = conn;
- workgen_epoch(&_start);
- timespec end = _start + options->run_time;
- timespec next_report = _start + options->report_interval;
+ if ((ret = pthread_create(&idle_table_thandle, NULL, thread_idle_table_cycle_workload,
+ createDropTableCycle)) != 0) {
+ std::cerr << "pthread_create failed err=" << ret << std::endl;
+ std::cerr << "Stopping Create Drop table idle cycle threads." << std::endl;
+ (void)pthread_join(idle_table_thandle, &status);
+ delete createDropTableCycle;
+ createDropTableCycle = NULL;
+ stopping = true;
+ }
+ }
- // Let the test run, reporting as needed.
- Stats curstats(false);
- timespec now = _start;
- while (now < end) {
- timespec sleep_amt;
+ timespec now;
- sleep_amt = end - now;
- if (next_report != 0) {
- timespec next_diff = next_report - now;
- if (next_diff < next_report)
- sleep_amt = next_diff;
+ /* Don't run the test if any of the above pthread_create fails. */
+ if (!stopping && ret == 0)
+ {
+ // Treat warmup separately from report interval so that if we have a
+ // warmup period we clear and ignore stats after it ends.
+ if (options->warmup != 0)
+ sleep((unsigned int)options->warmup);
+
+ // Clear stats after any warmup period completes.
+ for (size_t i = 0; i < _trunners.size(); i++) {
+ ThreadRunner *runner = &_trunners[i];
+ runner->_stats.clear();
}
- if (sleep_amt.tv_sec > 0)
- sleep((unsigned int)sleep_amt.tv_sec);
- else
- usleep((useconds_t)((sleep_amt.tv_nsec + 999)/ 1000));
- workgen_epoch(&now);
- if (now >= next_report && now < end && options->report_interval != 0) {
- report(options->report_interval, (now - _start).tv_sec, &curstats);
- while (now >= next_report)
- next_report += options->report_interval;
+ workgen_epoch(&_start);
+ timespec end = _start + options->run_time;
+ timespec next_report = _start + options->report_interval;
+
+ // Let the test run, reporting as needed.
+ Stats curstats(false);
+ now = _start;
+ while (now < end) {
+ timespec sleep_amt;
+
+ sleep_amt = end - now;
+ if (next_report != 0) {
+ timespec next_diff = next_report - now;
+ if (next_diff < next_report)
+ sleep_amt = next_diff;
+ }
+ if (sleep_amt.tv_sec > 0)
+ sleep((unsigned int)sleep_amt.tv_sec);
+ else
+ usleep((useconds_t)((sleep_amt.tv_nsec + 999)/ 1000));
+
+ workgen_epoch(&now);
+ if (now >= next_report && now < end && options->report_interval != 0) {
+ report(options->report_interval, (now - _start).tv_sec, &curstats);
+ while (now >= next_report)
+ next_report += options->report_interval;
+ }
}
}
- // signal all threads to stop
+ // signal all threads to stop.
if (options->run_time != 0)
for (size_t i = 0; i < _trunners.size(); i++)
_trunners[i]._stop = true;
if (options->sample_interval_ms > 0)
monitor._stop = true;
- if (options->oldest_timestamp_lag > 0 || options->stable_timestamp_lag > 0) {
- stopping = true;
- }
+
+ // Signal timestamp and idle table cycle thread to stop.
+ stopping = true;
// wait for all threads
exception = NULL;
@@ -2255,11 +2379,17 @@ int WorkloadRunner::run_all(WT_CONNECTION *conn) {
}
// Wait for the time increment thread
- if (options->oldest_timestamp_lag > 0 || options->stable_timestamp_lag > 0) {
+ if (runnerConnection != NULL) {
WT_TRET(pthread_join(time_thandle, &status));
delete runnerConnection;
}
+ // Wait for the idle table cycle thread.
+ if (createDropTableCycle != NULL) {
+ WT_TRET(pthread_join(idle_table_thandle, &status));
+ delete createDropTableCycle;
+ }
+
workgen_epoch(&now);
if (options->sample_interval_ms > 0) {
WT_TRET(pthread_join(monitor._handle, &status));
diff --git a/src/third_party/wiredtiger/bench/workgen/workgen.h b/src/third_party/wiredtiger/bench/workgen/workgen.h
index 46f81f6db0c..734b56cb224 100644
--- a/src/third_party/wiredtiger/bench/workgen/workgen.h
+++ b/src/third_party/wiredtiger/bench/workgen/workgen.h
@@ -436,11 +436,13 @@ struct WorkloadOptions {
int run_time;
int sample_interval_ms;
int sample_rate;
+ int max_idle_table_cycle;
std::string sample_file;
int warmup;
double oldest_timestamp_lag;
double stable_timestamp_lag;
double timestamp_advance;
+ bool max_idle_table_cycle_fatal;
WorkloadOptions();
WorkloadOptions(const WorkloadOptions &other);
diff --git a/src/third_party/wiredtiger/bench/workgen/workgen_int.h b/src/third_party/wiredtiger/bench/workgen/workgen_int.h
index 4cfa2047b2e..5e7982b3bea 100644
--- a/src/third_party/wiredtiger/bench/workgen/workgen_int.h
+++ b/src/third_party/wiredtiger/bench/workgen/workgen_int.h
@@ -278,6 +278,8 @@ struct WorkloadRunner {
~WorkloadRunner();
int run(WT_CONNECTION *conn);
int increment_timestamp(WT_CONNECTION *conn);
+ int start_table_idle_cycle(WT_CONNECTION *conn);
+ int check_timing(const char *name, uint64_t last_interval);
private:
int close_all();
diff --git a/src/third_party/wiredtiger/bench/workgen/wtperf.py b/src/third_party/wiredtiger/bench/workgen/wtperf.py
index 71d0f3495c0..54271650750 100755
--- a/src/third_party/wiredtiger/bench/workgen/wtperf.py
+++ b/src/third_party/wiredtiger/bench/workgen/wtperf.py
@@ -85,7 +85,9 @@ class Translator:
'readonly', 'reopen_connection', 'run_ops',
'sample_interval', 'sess_config', 'table_config',
'table_count', 'threads', 'transaction_config',
- 'value_sz' ]
+ 'value_sz',
+ 'max_idle_table_cycle',
+ 'max_idle_table_cycle_fatal' ]
def set_opt(self, optname, val):
if optname not in self.supported_opt_list:
@@ -535,6 +537,8 @@ class Translator:
self.get_string_opt('transaction_config', '')
self.get_boolean_opt('compact', False)
self.get_int_opt('pareto', 0)
+ self.get_int_opt('max_idle_table_cycle', 0)
+ self.get_boolean_opt('max_idle_table_cycle_fatal', False)
opts = self.options
if opts.range_partition and opts.random_range == 0:
self.fatal_error('range_partition requires random_range to be set')
@@ -547,6 +551,14 @@ class Translator:
workloadopts += 'workload.options.sample_interval_ms = ' + \
str(self.options.sample_interval_ms) + '\n'
+ if self.options.max_idle_table_cycle > 0:
+ workloadopts += 'workload.options.max_idle_table_cycle = ' + \
+ str(self.options.max_idle_table_cycle) + '\n'
+
+ if self.options.max_idle_table_cycle_fatal:
+ workloadopts += 'workload.options.max_idle_table_cycle_fatal = ' + \
+ str(self.options.max_idle_table_cycle_fatal) + '\n'
+
s = '#/usr/bin/env python\n'
s += '# generated from ' + self.filename + '\n'
s += self.prefix
diff --git a/src/third_party/wiredtiger/bench/wtperf/config.c b/src/third_party/wiredtiger/bench/wtperf/config.c
index 0b4b111fddf..a6c625e5ef0 100644
--- a/src/third_party/wiredtiger/bench/wtperf/config.c
+++ b/src/third_party/wiredtiger/bench/wtperf/config.c
@@ -722,7 +722,7 @@ config_sanity(WTPERF *wtperf)
if (opts->run_time > 0 &&
((opts->checkpoint_threads != 0 && opts->checkpoint_interval > opts->run_time) ||
opts->report_interval > opts->run_time || opts->sample_interval > opts->run_time ||
- opts->scan_interval > opts->run_time)) {
+ opts->scan_interval > opts->run_time || opts->backup_interval > opts->run_time)) {
fprintf(stderr, "interval value longer than the run-time\n");
return (EINVAL);
}
@@ -966,6 +966,10 @@ config_opt_print(WTPERF *wtperf)
"\t"
"Scan interval: %" PRIu32 "\n",
opts->scan_interval);
+ printf(
+ "\t"
+ "Backup interval: %" PRIu32 "\n",
+ opts->backup_interval);
printf(
"\t"
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/500m-btree-50r50u-backup.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/500m-btree-50r50u-backup.wtperf
new file mode 100644
index 00000000000..fdf6a83e94b
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/500m-btree-50r50u-backup.wtperf
@@ -0,0 +1,27 @@
+# wtperf options file: simulate MongoDB.
+# The configuration for the connection and table are from mongoDB.
+# We use multiple tables to simulate collections and indexes.
+# This test assumes that its correlating populate already completed and exists.
+#
+# Set cache to half of memory of AWS perf instance. Enable logging and
+# checkpoints. Collect wiredtiger stats for ftdc.
+conn_config="cache_size=16G,checkpoint=(wait=60,log_size=2GB),session_max=20000,log=(enabled),eviction=(threads_max=8)"
+create=false
+compression="snappy"
+sess_config="isolation=snapshot"
+table_config="type=file"
+table_count=2
+# close_conn as false allows this test to close/finish faster, but if running
+# as the set, the next test will need to run recovery.
+close_conn=false
+key_sz=40
+value_sz=120
+max_latency=2000
+pareto=20
+report_interval=10
+run_time=7200
+sample_interval=10
+sample_rate=1
+threads=((count=10,reads=1),(count=10,updates=1))
+warmup=120
+backup_interval=300
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-readonly-backup.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-readonly-backup.wtperf
new file mode 100644
index 00000000000..ef170c10201
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-readonly-backup.wtperf
@@ -0,0 +1,13 @@
+# wtperf options file: evict btree configuration
+conn_config="cache_size=50M,eviction=(threads_max=8),mmap=false"
+table_config="type=file"
+icount=10000000
+report_interval=5
+run_time=120
+populate_threads=1
+readonly=true
+threads=((count=16,reads=1))
+# Add throughput/latency monitoring
+max_latency=2000
+sample_interval=5
+backup_interval=40
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/many-dhandle-stress-backup.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/many-dhandle-stress-backup.wtperf
new file mode 100644
index 00000000000..75bb892a54e
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/many-dhandle-stress-backup.wtperf
@@ -0,0 +1,26 @@
+# This workload uses several tens of thousands of tables and the workload is evenly distributed
+# among them. The workload creates, opens and drop tables, and it generates warning if the time
+# taken is more than the configured max_idle_table_cycle.
+conn_config="cache_size=10G,eviction=(threads_min=4,threads_max=4),file_manager=(close_idle_time=30),session_max=1000,statistics=[all,clear],statistics_log=(wait=1,json=false,on_close=true)"
+table_config="type=file"
+table_count=15000
+max_idle_table_cycle=2
+# Uncomment to fail instead of generating a warning
+# max_idle_table_cycle_fatal=true
+random_range=1500000000
+pareto=10
+range_partition=true
+report_interval=5
+checkpoint_threads=1
+checkpoint_interval=30
+populate_threads=1
+pre_load_data=true
+# Uncomment to skip the populate phase, and use a database from a previous run as the baseline.
+# create=false
+icount=15000000
+run_time=900
+threads=((count=10,inserts=1,throttle=1000),(count=10,reads=1))
+max_latency=1000
+sample_interval=5
+sample_rate=1
+backup_interval=120
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/many-dhandle-stress.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/many-dhandle-stress.wtperf
index 160cc429896..2c6e13a486c 100644
--- a/src/third_party/wiredtiger/bench/wtperf/runners/many-dhandle-stress.wtperf
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/many-dhandle-stress.wtperf
@@ -1,7 +1,7 @@
# This workload uses several tens of thousands of tables and the workload is evenly distributed
# among them. The workload creates, opens and drop tables, and it generates warning if the time
# taken is more than the configured max_idle_table_cycle.
-conn_config="cache_size=10G,eviction=(threads_min=4,threads_max=4),file_manager=(close_idle_time=30),session_max=1000"
+conn_config="cache_size=10G,eviction=(threads_min=4,threads_max=4),file_manager=(close_idle_time=30),session_max=1000,statistics=[all,clear],statistics_log=(wait=1,json=false,on_close=true)"
table_config="type=file"
table_count=15000
max_idle_table_cycle=2
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/many-table-stress-backup.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/many-table-stress-backup.wtperf
new file mode 100644
index 00000000000..87be01a94b2
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/many-table-stress-backup.wtperf
@@ -0,0 +1,21 @@
+# Create a set of tables with uneven distribution of data
+conn_config="cache_size=1G,eviction=(threads_max=8),file_manager=(close_idle_time=100000),checkpoint=(wait=20,log_size=2GB),session_max=1000"
+table_config="type=file"
+table_count=2000
+icount=0
+random_range=1000000000
+pareto=10
+range_partition=true
+report_interval=5
+
+run_ops=1000000
+populate_threads=0
+icount=0
+threads=((count=60,inserts=1))
+
+# Warn if a latency over 1 second is seen
+max_latency=1000
+sample_interval=5
+sample_rate=1
+
+backup_interval=1
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/medium-btree-backup.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/medium-btree-backup.wtperf
new file mode 100644
index 00000000000..39560a23190
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/medium-btree-backup.wtperf
@@ -0,0 +1,12 @@
+# wtperf options file: medium btree configuration
+conn_config="cache_size=1G"
+table_config="type=file"
+icount=50000000
+report_interval=5
+run_time=120
+populate_threads=1
+threads=((count=16,reads=1))
+# Add throughput/latency monitoring
+max_latency=2000
+sample_interval=5
+backup_interval=40
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-read-heavy-stress-backup.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-read-heavy-stress-backup.wtperf
new file mode 100644
index 00000000000..f174a5cab5b
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-read-heavy-stress-backup.wtperf
@@ -0,0 +1,22 @@
+# Drive a constant high workload through, even if WiredTiger isn't keeping
+# up by dividing the workload across a lot of threads. This needs to be
+# tuned to the particular machine so the workload is close to capacity in the
+# steady state, but not overwhelming.
+conn_config="cache_size=20GB,session_max=1000,eviction=(threads_min=4,threads_max=8),log=(enabled=false),transaction_sync=(enabled=false),checkpoint_sync=true,checkpoint=(wait=60)"
+table_config="allocation_size=4k,memory_page_max=10MB,prefix_compression=false,split_pct=90,leaf_page_max=32k,internal_page_max=16k,type=file"
+# Divide original icount by database_count.
+table_count=8
+compression=snappy
+icount=200000000
+populate_threads=1
+reopen_connection=false
+log_like_table=true
+#pareto=5
+report_interval=1
+run_time=3600
+threads=((count=10,throttle=250,inserts=1),(count=10,throttle=250,updates=1),(count=80,throttle=600,reads=1,ops_per_txn=3))
+value_sz=500
+sample_interval=5
+sample_rate=1
+backup_interval=240
+
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-stress-backup.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-stress-backup.wtperf
new file mode 100644
index 00000000000..ec42ec20789
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-stress-backup.wtperf
@@ -0,0 +1,18 @@
+# wtperf options file: multi-database configuration attempting to
+# trigger slow operations by overloading CPU and disk.
+# References Jira WT-2131
+conn_config="cache_size=2GB,eviction=(threads_min=2,threads_max=8),log=(enabled=false),direct_io=(data,checkpoint),buffer_alignment=4096,checkpoint_sync=true,checkpoint=(wait=60)"
+table_config="allocation_size=4k,prefix_compression=false,split_pct=75,leaf_page_max=4k,internal_page_max=16k,leaf_item_max=1433,internal_item_max=3100,type=file"
+# Divide original icount by database_count.
+database_count=5
+icount=50000
+populate_threads=1
+random_range=50000000
+report_interval=5
+run_time=3600
+threads=((count=1,inserts=1),(count=10,reads=1))
+value_sz=100
+max_latency=1000
+sample_interval=5
+sample_rate=1
+backup_interval=240
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/small-btree-backup.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/small-btree-backup.wtperf
new file mode 100644
index 00000000000..5469ad09498
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/small-btree-backup.wtperf
@@ -0,0 +1,9 @@
+# wtperf options file: small btree configuration
+conn_config="cache_size=500MB"
+table_config="type=file"
+icount=500000
+report_interval=5
+run_time=120
+populate_threads=1
+threads=((count=8,reads=1))
+backup_interval=40
diff --git a/src/third_party/wiredtiger/bench/wtperf/track.c b/src/third_party/wiredtiger/bench/wtperf/track.c
index 93c270e8040..14c914393ac 100644
--- a/src/third_party/wiredtiger/bench/wtperf/track.c
+++ b/src/third_party/wiredtiger/bench/wtperf/track.c
@@ -49,6 +49,24 @@ sum_pop_ops(WTPERF *wtperf)
}
/*
+ * Return total backup operations.
+ */
+uint64_t
+sum_backup_ops(WTPERF *wtperf)
+{
+ CONFIG_OPTS *opts;
+ uint64_t total;
+
+ opts = wtperf->opts;
+
+ if (opts->backup_interval > 0)
+ total = wtperf->backupthreads->backup.ops;
+ else
+ total = 0;
+ return (total);
+}
+
+/*
* Return total checkpoint operations.
*/
uint64_t
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.c b/src/third_party/wiredtiger/bench/wtperf/wtperf.c
index a0df4a9acdf..b8a36408962 100644
--- a/src/third_party/wiredtiger/bench/wtperf/wtperf.c
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.c
@@ -1121,13 +1121,13 @@ monitor(void *arg)
cur_updates = (updates - last_updates) / opts->sample_interval;
(void)fprintf(fp,
- "%s,%" PRIu32 ",%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%c,%c,%" PRIu32
+ "%s,%" PRIu32 ",%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%c,%c,%c%" PRIu32
",%" PRIu32 ",%" PRIu32 ",%" PRIu32 ",%" PRIu32 ",%" PRIu32 ",%" PRIu32 ",%" PRIu32
",%" PRIu32 ",%" PRIu32 ",%" PRIu32 ",%" PRIu32 "\n",
buf, wtperf->totalsec, cur_inserts, cur_modifies, cur_reads, cur_updates,
- wtperf->ckpt ? 'Y' : 'N', wtperf->scan ? 'Y' : 'N', insert_avg, insert_min, insert_max,
- modify_avg, modify_min, modify_max, read_avg, read_min, read_max, update_avg, update_min,
- update_max);
+ wtperf->backup ? 'Y' : 'N', wtperf->ckpt ? 'Y' : 'N', wtperf->scan ? 'Y' : 'N',
+ insert_avg, insert_min, insert_max, modify_avg, modify_min, modify_max, read_avg,
+ read_min, read_max, update_avg, update_min, update_max);
if (jfp != NULL) {
buf_size = strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S", &localt);
testutil_assert(buf_size != 0);
@@ -1217,6 +1217,83 @@ err:
}
static WT_THREAD_RET
+backup_worker(void *arg)
+{
+ CONFIG_OPTS *opts;
+ WTPERF *wtperf;
+ WTPERF_THREAD *thread;
+ WT_CONNECTION *conn;
+ WT_CURSOR *backup_cursor;
+ WT_DECL_RET;
+ WT_SESSION *session;
+ const char *key;
+ uint32_t i;
+
+ thread = (WTPERF_THREAD *)arg;
+ wtperf = thread->wtperf;
+ opts = wtperf->opts;
+ conn = wtperf->conn;
+ session = NULL;
+
+ if ((ret = conn->open_session(conn, NULL, opts->sess_config, &session)) != 0) {
+ lprintf(wtperf, ret, 0, "open_session failed in backup thread.");
+ goto err;
+ }
+
+ while (!wtperf->stop) {
+ /* Break the sleep up, so we notice interrupts faster. */
+ for (i = 0; i < opts->backup_interval; i++) {
+ sleep(1);
+ if (wtperf->stop)
+ break;
+ }
+ /* If the workers are done, don't bother with a final call. */
+ if (wtperf->stop)
+ break;
+
+ wtperf->backup = true;
+ /* Cleanup the data from the previous backup and create the backup directories. */
+ testutil_create_backup_directory(wtperf->home);
+
+ /*
+ * open_cursor can return EBUSY if concurrent with a metadata operation, retry in that case.
+ */
+ while (
+ (ret = session->open_cursor(session, "backup:", NULL, NULL, &backup_cursor)) == EBUSY)
+ __wt_yield();
+ if (ret != 0)
+ goto err;
+
+ while ((ret = backup_cursor->next(backup_cursor)) == 0) {
+ testutil_check(backup_cursor->get_key(backup_cursor, &key));
+ testutil_copy_file(session, key);
+ }
+
+ if (ret != WT_NOTFOUND) {
+ testutil_check(backup_cursor->close(backup_cursor));
+ goto err;
+ }
+
+ testutil_check(backup_cursor->close(backup_cursor));
+ wtperf->backup = false;
+ ++thread->backup.ops;
+ }
+
+ if (session != NULL && ((ret = session->close(session, NULL)) != 0)) {
+ lprintf(wtperf, ret, 0, "Error closing session in backup worker.");
+ goto err;
+ }
+
+ /* Notify our caller we failed and shut the system down. */
+ if (0) {
+err:
+ wtperf->error = wtperf->stop = true;
+ }
+
+ return (WT_THREAD_RET_VALUE);
+}
+
+static WT_THREAD_RET
checkpoint_worker(void *arg)
{
CONFIG_OPTS *opts;
@@ -1515,7 +1592,7 @@ execute_workload(WTPERF *wtperf)
WT_CONNECTION *conn;
WT_SESSION **sessions;
wt_thread_t idle_table_cycle_thread;
- uint64_t last_ckpts, last_scans;
+ uint64_t last_backup, last_ckpts, last_scans;
uint64_t last_inserts, last_reads, last_truncates;
uint64_t last_modifies, last_updates;
uint32_t interval, run_ops, run_time;
@@ -1528,7 +1605,7 @@ execute_workload(WTPERF *wtperf)
wtperf->insert_ops = wtperf->read_ops = wtperf->truncate_ops = 0;
wtperf->modify_ops = wtperf->update_ops = 0;
- last_ckpts = last_scans = 0;
+ last_backup = last_ckpts = last_scans = 0;
last_inserts = last_reads = last_truncates = 0;
last_modifies = last_updates = 0;
ret = 0;
@@ -1614,12 +1691,13 @@ execute_workload(WTPERF *wtperf)
lprintf(wtperf, 0, 1,
"%" PRIu64 " inserts, %" PRIu64 " modifies, %" PRIu64 " reads, %" PRIu64
- " truncates, %" PRIu64 " updates, %" PRIu64 " checkpoints, %" PRIu64 " scans in %" PRIu32
- " secs (%" PRIu32 " total secs)",
+ " truncates, %" PRIu64 " updates, %" PRIu64 " backups, %" PRIu64 " checkpoints, %" PRIu64
+ " scans in %" PRIu32 " secs (%" PRIu32 " total secs)",
wtperf->insert_ops - last_inserts, wtperf->modify_ops - last_modifies,
wtperf->read_ops - last_reads, wtperf->truncate_ops - last_truncates,
- wtperf->update_ops - last_updates, wtperf->ckpt_ops - last_ckpts,
- wtperf->scan_ops - last_scans, opts->report_interval, wtperf->totalsec);
+ wtperf->update_ops - last_updates, wtperf->backup_ops - last_backup,
+ wtperf->ckpt_ops - last_ckpts, wtperf->scan_ops - last_scans, opts->report_interval,
+ wtperf->totalsec);
last_inserts = wtperf->insert_ops;
last_modifies = wtperf->modify_ops;
last_reads = wtperf->read_ops;
@@ -1627,6 +1705,7 @@ execute_workload(WTPERF *wtperf)
last_updates = wtperf->update_ops;
last_ckpts = wtperf->ckpt_ops;
last_scans = wtperf->scan_ops;
+ last_backup = wtperf->backup_ops;
}
/* Notify the worker threads they are done. */
@@ -1830,6 +1909,7 @@ wtperf_copy(const WTPERF *src, WTPERF **retp)
dest->uris[i] = dstrdup(src->uris[i]);
}
+ dest->backupthreads = NULL;
dest->ckptthreads = NULL;
dest->scanthreads = NULL;
dest->popthreads = NULL;
@@ -1873,6 +1953,7 @@ wtperf_free(WTPERF *wtperf)
free(wtperf->uris);
}
+ free(wtperf->backupthreads);
free(wtperf->ckptthreads);
free(wtperf->scanthreads);
free(wtperf->popthreads);
@@ -2052,6 +2133,12 @@ start_run(WTPERF *wtperf)
/* Didn't create, set insert count. */
if (opts->create == 0 && opts->random_range == 0 && find_table_count(wtperf) != 0)
goto err;
+ /* Start the backup thread. */
+ if (opts->backup_interval != 0) {
+ lprintf(wtperf, 0, 1, "Starting 1 backup thread");
+ wtperf->backupthreads = dcalloc(1, sizeof(WTPERF_THREAD));
+ start_threads(wtperf, NULL, wtperf->backupthreads, 1, backup_worker);
+ }
/* Start the checkpoint thread. */
if (opts->checkpoint_threads != 0) {
lprintf(
@@ -2079,6 +2166,7 @@ start_run(WTPERF *wtperf)
wtperf->read_ops = sum_read_ops(wtperf);
wtperf->truncate_ops = sum_truncate_ops(wtperf);
wtperf->update_ops = sum_update_ops(wtperf);
+ wtperf->backup_ops = sum_backup_ops(wtperf);
wtperf->ckpt_ops = sum_ckpt_ops(wtperf);
wtperf->scan_ops = sum_scan_ops(wtperf);
total_ops = wtperf->insert_ops + wtperf->modify_ops + wtperf->read_ops + wtperf->update_ops;
@@ -2103,6 +2191,7 @@ start_run(WTPERF *wtperf)
"Executed %" PRIu64 " update operations (%" PRIu64 "%%) %" PRIu64 " ops/sec",
wtperf->update_ops, (wtperf->update_ops * 100) / total_ops,
wtperf->update_ops / run_time);
+ lprintf(wtperf, 0, 1, "Executed %" PRIu64 " backup operations", wtperf->backup_ops);
lprintf(wtperf, 0, 1, "Executed %" PRIu64 " checkpoint operations", wtperf->ckpt_ops);
lprintf(wtperf, 0, 1, "Executed %" PRIu64 " scan operations", wtperf->scan_ops);
@@ -2118,6 +2207,7 @@ err:
/* Notify the worker threads they are done. */
wtperf->stop = true;
+ stop_threads(1, wtperf->backupthreads);
stop_threads(1, wtperf->ckptthreads);
stop_threads(1, wtperf->scanthreads);
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.h b/src/third_party/wiredtiger/bench/wtperf/wtperf.h
index 1a43f6e16da..5cedb438507 100644
--- a/src/third_party/wiredtiger/bench/wtperf/wtperf.h
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.h
@@ -131,9 +131,10 @@ struct __wtperf { /* Per-database structure */
const char *compress_ext; /* Compression extension for conn */
const char *compress_table; /* Compression arg to table create */
- WTPERF_THREAD *ckptthreads; /* Checkpoint threads */
- WTPERF_THREAD *popthreads; /* Populate threads */
- WTPERF_THREAD *scanthreads; /* Scan threads */
+ WTPERF_THREAD *backupthreads; /* Backup threads */
+ WTPERF_THREAD *ckptthreads; /* Checkpoint threads */
+ WTPERF_THREAD *popthreads; /* Populate threads */
+ WTPERF_THREAD *scanthreads; /* Scan threads */
#define WORKLOAD_MAX 50
WTPERF_THREAD *workers; /* Worker threads */
@@ -143,6 +144,7 @@ struct __wtperf { /* Per-database structure */
u_int workload_cnt;
/* State tracking variables. */
+ uint64_t backup_ops; /* backup operations */
uint64_t ckpt_ops; /* checkpoint operations */
uint64_t scan_ops; /* scan operations */
uint64_t insert_ops; /* insert operations */
@@ -154,6 +156,7 @@ struct __wtperf { /* Per-database structure */
uint64_t insert_key; /* insert key */
uint64_t log_like_table_key; /* used to allocate IDs for log table */
+ volatile bool backup; /* backup in progress */
volatile bool ckpt; /* checkpoint in progress */
volatile bool scan; /* scan in progress */
volatile bool error; /* thread error */
@@ -247,6 +250,7 @@ struct __wtperf_thread { /* Per-thread structure */
TRUNCATE_CONFIG trunc_cfg; /* Truncate configuration */
+ TRACK backup; /* Backup operations */
TRACK ckpt; /* Checkpoint operations */
TRACK insert; /* Insert operations */
TRACK modify; /* Modify operations */
@@ -279,6 +283,7 @@ void setup_truncate(WTPERF *, WTPERF_THREAD *, WT_SESSION *);
void start_idle_table_cycle(WTPERF *, wt_thread_t *);
void stop_idle_table_cycle(WTPERF *, wt_thread_t);
void worker_throttle(WTPERF_THREAD *);
+uint64_t sum_backup_ops(WTPERF *);
uint64_t sum_ckpt_ops(WTPERF *);
uint64_t sum_scan_ops(WTPERF *);
uint64_t sum_insert_ops(WTPERF *);
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i
index 0713e666e7e..d971f8e4bae 100644
--- a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i
@@ -81,6 +81,8 @@
* options are appended to existing content, whereas STRING options overwrite.
*/
DEF_OPT_AS_UINT32(
+ backup_interval, 0, "backup the database every interval seconds during the workload phase, 0 to disable")
+DEF_OPT_AS_UINT32(
checkpoint_interval, 120, "checkpoint every interval seconds during the workload phase.")
DEF_OPT_AS_UINT32(checkpoint_stress_rate, 0,
"checkpoint every rate operations during the populate phase in the populate thread(s), 0 to "
diff --git a/src/third_party/wiredtiger/build_cmake/README.md b/src/third_party/wiredtiger/build_cmake/README.md
new file mode 100644
index 00000000000..370df3732c7
--- /dev/null
+++ b/src/third_party/wiredtiger/build_cmake/README.md
@@ -0,0 +1,120 @@
+# Building WiredTiger with CMake
+> *CMake support for building wiredtiger is an active work-in-progress. As of this time CMake can **only** build the WiredTiger library for POSIX platforms (Linux & Darwin) on x86 hosts. We suggest you continue using the autoconf build until further support is added.*
+
+### Build Dependencies
+
+To build with CMake we **require** the following dependencies:
+
+* `cmake` : Official CMake install instructions found here: https://cmake.org/install/
+ * *WiredTiger supports CMake 3.11+*
+* `ninja` : Official ninja install instructions found here: https://ninja-build.org/
+
+We also strongly suggest the following dependencies are also installed (for improved build times):
+
+* `ccache` : Official ccache download instructions found here: https://ccache.dev/download.html
+
+##### Package Manager Instructions
+
+Alternatively you can use your system's package manager to install the dependencies listed above. Depending on the system, the following commands can be run:
+
+###### Install commands for Ubuntu & Debian (tested on Ubuntu 18.04)
+
+```bash
+sudo apt-get install cmake cmake-curses-gui
+sudo apt-get install ccache
+sudo apt-get install ninja-build
+```
+
+###### Install commands for Mac (using HomeBrew)
+
+```bash
+brew install ninja
+brew install ccache
+brew install cmake
+```
+
+
+
+### Building the WiredTiger Library
+
+> *The below commands are written for Linux and Darwin hosts. Windows instructions coming soon!*
+
+Building the WiredTiger library is relatively straightforward. Navigate to the top level of the WiredTiger repository and run the following commands:
+
+###### Configure your build
+
+```bash
+# Create a new directory to run your build from
+$ mkdir build && cd build
+# Run the cmake configure step. Note: '-G Ninja' tells CMake to generate a ninja build
+$ cmake -G Ninja ../.
+...
+-- Configuring done
+-- Generating done
+-- Build files have been written to: /home/wiredtiger/build
+```
+
+*See [Configuration Options](#configuration-options) for additional configuration options.*
+
+###### Run your build
+
+In the same directory you configured your build, run the `ninja` command to start the build:
+
+```bash
+$ ninja
+...
+[211/211 (100%) 2.464s] Creating library symlink libwiredtiger.so
+```
+
+*Note: Ninja doesn't need a `-j` option; it knows how many cores are available.*
+
+###### Configuration Options
+
+There are a number of additional configuration options you can pass to the CMake configuration step. A summary of some important options you will come to know:
+
+* `-DENABLE_STATIC=1` : Compile WiredTiger as a static library
+* `-DENABLE_LZ4=1` : Build the lz4 compressor extension
+* `-DENABLE_SNAPPY=1` : Build the snappy compressor extension
+* `-DENABLE_ZLIB=1` : Build the zlib compressor extension
+* `-DENABLE_ZSTD=1` : Build the libzstd compressor extension
+* `-DHAVE_DIAGNOSTIC=1` : Enable WiredTiger diagnostics
+* `-DHAVE_ATTACH=1` : Enable to pause for debugger attach on failure
+* `-DENABLE_STRICT=1` : Compile with strict compiler warnings enabled
+* `-DCMAKE_INSTALL_PREFIX=<path-to-install-directory>` : Path to install directory
+
+---
+
+An example of using the above configuration options during the configuration step:
+
+```bash
+$ cmake -DENABLE_STATIC=1 -DENABLE_LZ4=1 -DENABLE_SNAPPY=1 -DENABLE_ZLIB=1 -DENABLE_ZSTD=1 -DHAVE_DIAGNOSTIC=1 -DHAVE_ATTACH=1 -DENABLE_STRICT=1 -G Ninja ../.
+```
+
+---
+
+You can further look at all the available configuration options (and also dynamically change them!) by running `ccmake` in your build directory:
+
+```bash
+$ cd build
+$ ccmake .
+```
+
+*The configuration options can also be viewed in `build_cmake/configs/base.cmake`*.
+
+###### Switching between GCC and Clang
+
+By default CMake will use your default system compiler (`cc`). If you want to use a specific toolchain you can pass a toolchain file! We have provided a toolchain file for both GCC (`build_cmake/toolchains/gcc.cmake`) and Clang (`build_cmake/toolchains/clang.cmake`). To use either toolchain you can pass the `-DCMAKE_TOOLCHAIN_FILE=` to the CMake configuration step. For example:
+
+*Using the GCC Toolchain*
+
+```bash
+$ cd build
+$ cmake -DCMAKE_TOOLCHAIN_FILE=../build_cmake/toolchains/gcc.cmake -G Ninja ../.
+```
+
+*Using the Clang Toolchain*
+
+```bash
+$ cd build
+$ cmake -DCMAKE_TOOLCHAIN_FILE=../build_cmake/toolchains/clang.cmake -G Ninja ../.
+```
diff --git a/src/third_party/wiredtiger/build_cmake/configs/auto.cmake b/src/third_party/wiredtiger/build_cmake/configs/auto.cmake
new file mode 100644
index 00000000000..c3868bbdcd9
--- /dev/null
+++ b/src/third_party/wiredtiger/build_cmake/configs/auto.cmake
@@ -0,0 +1,304 @@
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+# All rights reserved.
+#
+# See the file LICENSE for redistribution information
+#
+
+include(build_cmake/helpers.cmake)
+
+### Auto configure options and checks that we can infer from our toolchain environment.
+
+## Assert type sizes.
+assert_type_size("size_t" 8)
+assert_type_size("ssize_t" 8)
+assert_type_size("time_t" 8)
+assert_type_size("off_t" 0)
+assert_type_size("uintptr_t" 0)
+test_type_size("uintmax_t" u_intmax_size)
+test_type_size("unsigned long long" u_long_long_size)
+set(default_uintmax_def " ")
+if(${u_intmax_size} STREQUAL "")
+ if(${unsigned long long} STREQUAL "")
+ set(default_uintmax_def "typedef unsigned long uintmax_t\\;")
+ else()
+ set(default_uintmax_def "typedef unsigned long long uintmax_t\\;")
+ endif()
+endif()
+
+config_string(
+ off_t_decl
+ "off_t type declaration."
+ DEFAULT "typedef off_t wt_off_t\\;"
+ INTERNAL
+)
+
+config_string(
+ uintprt_t_decl
+ "uintptr_t type declaration."
+ DEFAULT "${default_uintmax_def}"
+ INTERNAL
+)
+
+config_include(
+ HAVE_SYS_TYPES_H
+ "Include header sys/types.h exists."
+ FILE "sys/types.h"
+)
+
+config_include(
+ HAVE_INTTYPES_H
+ "Include header inttypes.h exists."
+ FILE "inttypes.h"
+)
+
+config_include(
+ HAVE_STDARG_H
+ "Include header stdarg.h exists."
+ FILE "stdarg.h"
+)
+
+config_include(
+ HAVE_STDBOOL_H
+ "Include header stdbool.h exists."
+ FILE "stdbool.h"
+)
+
+config_include(
+ HAVE_STDINT_H
+ "Include header stdint.h exists."
+ FILE "stdint.h"
+)
+
+config_include(
+ HAVE_STDLIB_H
+ "Include header stdlib.h exists."
+ FILE "stdlib.h"
+)
+
+config_include(
+ HAVE_STDIO_H
+ "Include header stdio.h exists."
+ FILE "stdio.h"
+)
+
+config_include(
+ HAVE_STRINGS_H
+ "Include header strings.h exists."
+ FILE "strings.h"
+)
+
+config_include(
+ HAVE_STRING_H
+ "Include header string.h exists."
+ FILE "string.h"
+)
+
+config_include(
+ HAVE_SYS_STAT_H
+ "Include header sys/stat.h exists."
+ FILE "sys/stat.h"
+)
+
+config_include(
+ HAVE_UNISTD_H
+ "Include header unistd.h exists."
+ FILE "unistd.h"
+)
+
+config_include(
+ HAVE_X86INTRIN_H
+ "Include header x86intrin.h exists."
+ FILE "x86intrin.h"
+)
+
+config_include(
+ HAVE_DLFCN_H
+ "Include header dlfcn.h exists."
+ FILE "dlfcn.h"
+)
+
+config_include(
+ HAVE_MEMORY_H
+ "Include header memory.h exists."
+ FILE "memory.h"
+)
+
+config_func(
+ HAVE_CLOCK_GETTIME
+ "Function clock_gettime exists."
+ FUNC "clock_gettime"
+ FILES "time.h"
+)
+
+config_func(
+ HAVE_FALLOCATE
+ "Function fallocate exists."
+ FUNC "fallocate"
+ FILES "fcntl.h"
+)
+
+config_func(
+ HAVE_FDATASYNC
+ "Function fdatasync exists."
+ FUNC "fdatasync"
+ FILES "unistd.h"
+ DEPENDS "NOT WT_DARWIN"
+)
+
+config_func(
+ HAVE_FTRUNCATE
+ "Function ftruncate exists."
+ FUNC "ftruncate"
+ FILES "unistd.h;sys/types.h"
+)
+
+config_func(
+ HAVE_GETTIMEOFDAY
+ "Function gettimeofday exists."
+ FUNC "gettimeofday"
+ FILES "sys/time.h"
+)
+
+config_func(
+ HAVE_POSIX_FADVISE
+ "Function posix_fadvise exists."
+ FUNC "posix_fadvise"
+ FILES "fcntl.h"
+)
+
+config_func(
+ HAVE_POSIX_FALLOCATE
+ "Function posix_fallocate exists."
+ FUNC "posix_fallocate"
+ FILES "fcntl.h"
+)
+
+config_func(
+ HAVE_POSIX_MADVISE
+ "Function posix_madvise exists."
+ FUNC "posix_madvise"
+ FILES "sys/mman.h"
+)
+
+config_func(
+ HAVE_POSIX_MEMALIGN
+ "Function posix_memalign exists."
+ FUNC "posix_memalign"
+ FILES "stdlib.h"
+)
+
+config_func(
+ HAVE_SETRLIMIT
+ "Function setrlimit exists."
+ FUNC "setrlimit"
+ FILES "sys/time.h;sys/resource.h"
+)
+
+config_func(
+ HAVE_STRTOUQ
+ "Function strtouq exists."
+ FUNC "strtouq"
+ FILES "stdlib.h"
+)
+
+config_func(
+ HAVE_SYNC_FILE_RANGE
+ "Function sync_file_range exists."
+ FUNC "sync_file_range"
+ FILES "fcntl.h"
+)
+
+config_func(
+ HAVE_TIMER_CREATE
+ "Function timer_create exists."
+ FUNC "timer_create"
+ FILES "signal.h;time.h"
+ LIBS "rt"
+)
+
+config_lib(
+ HAVE_LIBPTHREAD
+ "Pthread library exists."
+ LIB "pthread"
+ FUNC "pthread_create"
+)
+
+config_lib(
+ HAVE_LIBRT
+ "rt library exists."
+ LIB "rt"
+ FUNC "timer_create"
+)
+
+config_lib(
+ HAVE_LIBDL
+ "dl library exists."
+ LIB "dl"
+ FUNC "dlopen"
+)
+
+config_lib(
+ HAVE_LIBLZ4
+ "lz4 library exists."
+ LIB "lz4"
+ FUNC "LZ4_versionNumber"
+)
+
+config_lib(
+ HAVE_LIBSNAPPY
+ "snappy library exists."
+ LIB "snappy"
+ FUNC "snappy_compress"
+)
+
+config_lib(
+ HAVE_LIBZ
+ "zlib library exists."
+ LIB "z"
+ FUNC "zlibVersion"
+)
+
+config_lib(
+ HAVE_LIBZSTD
+ "zstd library exists."
+ LIB "zstd"
+ FUNC "ZSTD_versionString"
+)
+
+config_lib(
+ HAVE_LIBTCMALLOC
+ "tcmalloc library exists."
+ LIB "tcmalloc"
+ FUNC "tc_malloc"
+)
+
+config_compile(
+ HAVE_PTHREAD_COND_MONOTONIC
+ "If pthread condition variables support monotonic clocks."
+ SOURCE "${CMAKE_CURRENT_LIST_DIR}/compile_test/pthread_cond_monotonic_test.c"
+ LIBS "pthread"
+ DEPENDS "HAVE_LIBPTHREAD"
+)
+
+set(wiredtiger_includes_decl)
+if(HAVE_SYS_TYPES_H)
+ list(APPEND wiredtiger_includes_decl "#include <sys/types.h>")
+endif()
+if(HAVE_INTTYPES_H)
+ list(APPEND wiredtiger_includes_decl "#include <inttypes.h>")
+endif()
+if(HAVE_STDARG_H)
+ list(APPEND wiredtiger_includes_decl "#include <stdarg.h>")
+endif()
+if(HAVE_STDBOOL_H)
+ list(APPEND wiredtiger_includes_decl "#include <stdbool.h>")
+endif()
+if(HAVE_STDINT_H)
+ list(APPEND wiredtiger_includes_decl "#include <stdint.h>")
+endif()
+if(HAVE_STDIO_H)
+ list(APPEND wiredtiger_includes_decl "#include <stdio.h>")
+endif()
+string(REGEX REPLACE ";" "\n" wiredtiger_includes_decl "${wiredtiger_includes_decl}")
diff --git a/src/third_party/wiredtiger/build_cmake/configs/base.cmake b/src/third_party/wiredtiger/build_cmake/configs/base.cmake
new file mode 100644
index 00000000000..20ae0a319d0
--- /dev/null
+++ b/src/third_party/wiredtiger/build_cmake/configs/base.cmake
@@ -0,0 +1,184 @@
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+# All rights reserved.
+#
+# See the file LICENSE for redistribution information
+#
+
+include(build_cmake/helpers.cmake)
+
+# WiredTiger-related configuration options.
+
+config_choice(
+ WT_ARCH
+ "Target architecture for WiredTiger"
+ OPTIONS
+ "x86;WT_X86;"
+ "arm64;WT_ARM64;"
+ "ppc64;WT_PPC64;"
+ "zseries;WT_ZSERIES;"
+)
+
+config_choice(
+ WT_OS
+ "Target OS for WiredTiger"
+ OPTIONS
+ "darwin;WT_DARWIN;"
+ "windows;WT_WIN;"
+ "linux;WT_LINUX;"
+)
+
+config_bool(
+ WT_POSIX
+ "Is a posix platform"
+ DEFAULT ON
+ DEPENDS "WT_LINUX OR WT_DARWIN"
+)
+
+config_string(
+ WT_BUFFER_ALIGNMENT_DEFAULT
+ "WiredTiger buffer boundary aligment"
+ DEFAULT 0
+)
+
+config_bool(
+ HAVE_DIAGNOSTIC
+ "Enable WiredTiger diagnostics"
+ DEFAULT OFF
+)
+
+config_bool(
+ HAVE_ATTACH
+ "Enable to pause for debugger attach on failure"
+ DEFAULT OFF
+)
+
+config_bool(
+ ENABLE_STATIC
+ "Compile as a static library"
+ DEFAULT OFF
+)
+
+config_bool(
+ ENABLE_STRICT
+ "Compile with strict compiler warnings enabled"
+ DEFAULT ON
+)
+
+config_bool(
+ ENABLE_PYTHON
+ "Configure the python API"
+ DEFAULT OFF
+ DEPENDS "NOT ENABLE_STATIC"
+)
+
+config_bool(
+ WT_STANDALONE_BUILD
+ "Support standalone build"
+ DEFAULT ON
+)
+
+config_bool(
+ HAVE_NO_CRC32_HARDWARE
+ "Disable any crc32 hardware support"
+ DEFAULT OFF
+)
+
+config_choice(
+ SPINLOCK_TYPE
+ "Set a spinlock type"
+ OPTIONS
+ "pthread;SPINLOCK_PTHREAD_MUTEX;HAVE_LIBPTHREAD"
+ "gcc;SPINLOCK_GCC;"
+ "msvc;SPINLOCK_MSVC;WT_WIN"
+ "pthread_adaptive;SPINLOCK_PTHREAD_ADAPTIVE;HAVE_LIBPTHREAD"
+)
+
+config_bool(
+ ENABLE_LZ4
+ "Build the lz4 compressor extension"
+ DEFAULT OFF
+ DEPENDS "HAVE_LIBLZ4"
+ # Specifically throw a fatal error if a user tries to enable the lz4 compressor without
+ # actually having the library available (as opposed to silently defaulting to OFF).
+ DEPENDS_ERROR ON "Failed to find lz4 library"
+)
+
+config_bool(
+ ENABLE_SNAPPY
+ "Build the snappy compressor extension"
+ DEFAULT OFF
+ DEPENDS "HAVE_LIBSNAPPY"
+ # Specifically throw a fatal error if a user tries to enable the snappy compressor without
+ # actually having the library available (as opposed to silently defaulting to OFF).
+ DEPENDS_ERROR ON "Failed to find snappy library"
+)
+
+config_bool(
+ ENABLE_ZLIB
+ "Build the zlib compressor extension"
+ DEFAULT OFF
+ DEPENDS "HAVE_LIBZ"
+ # Specifically throw a fatal error if a user tries to enable the zlib compressor without
+ # actually having the library available (as opposed to silently defaulting to OFF).
+ DEPENDS_ERROR ON "Failed to find zlib library"
+)
+
+config_bool(
+ ENABLE_ZSTD
+ "Build the libzstd compressor extension"
+ DEFAULT OFF
+ DEPENDS "HAVE_LIBZSTD"
+ # Specifically throw a fatal error if a user tries to enable the zstd compressor without
+ # actually having the library available (as opposed to silently defaulting to OFF).
+ DEPENDS_ERROR ON "Failed to find zstd library"
+)
+
+config_bool(
+ ENABLE_TCMALLOC
+ "Use TCMalloc as the backend allocator"
+ DEFAULT OFF
+ DEPENDS "HAVE_LIBTCMALLOC"
+ # Specifically throw a fatal error if a user tries to enable the tcmalloc allocator without
+ # actually having the library available (as opposed to silently defaulting to OFF).
+ DEPENDS_ERROR ON "Failed to find tcmalloc library"
+)
+
+config_string(
+ CC_OPTIMIZE_LEVEL
+ "CC optimization level"
+ DEFAULT "-O3"
+)
+
+config_string(
+ VERSION_MAJOR
+ "Major version number for WiredTiger"
+ DEFAULT 10
+)
+
+config_string(
+ VERSION_MINOR
+ "Minor version number for WiredTiger"
+ DEFAULT 0
+)
+
+config_string(
+ VERSION_PATCH
+ "Path version number for WiredTiger"
+ DEFAULT 0
+)
+
+
+string(TIMESTAMP config_date "%Y-%m-%d")
+config_string(
+ VERSION_STRING
+ "Version string for WiredTiger"
+ DEFAULT "\"WiredTiger 10.0.0 (${config_date})\""
+)
+
+if(HAVE_DIAGNOSTIC)
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g" CACHE STRING "" FORCE)
+endif()
+
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CC_OPTIMIZE_LEVEL}" CACHE STRING "" FORCE)
diff --git a/src/third_party/wiredtiger/build_cmake/configs/compile_test/pthread_cond_monotonic_test.c b/src/third_party/wiredtiger/build_cmake/configs/compile_test/pthread_cond_monotonic_test.c
new file mode 100644
index 00000000000..64645ec889f
--- /dev/null
+++ b/src/third_party/wiredtiger/build_cmake/configs/compile_test/pthread_cond_monotonic_test.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2014-present MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <time.h>
+
+int
+main()
+{
+ int ret;
+ pthread_condattr_t condattr;
+ pthread_cond_t cond;
+ pthread_mutex_t mtx;
+ struct timespec ts;
+
+ if ((ret = pthread_condattr_init(&condattr)) != 0)
+ exit(1);
+ if ((ret = pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC)) != 0)
+ exit(1);
+ if ((ret = pthread_cond_init(&cond, &condattr)) != 0)
+ exit(1);
+ if ((ret = pthread_mutex_init(&mtx, NULL)) != 0)
+ exit(1);
+ if ((ret = clock_gettime(CLOCK_MONOTONIC, &ts)) != 0)
+ exit(1);
+ ts.tv_sec += 1;
+ if ((ret = pthread_mutex_lock(&mtx)) != 0)
+ exit(1);
+ if ((ret = pthread_cond_timedwait(&cond, &mtx, &ts)) != 0 && ret != EINTR && ret != ETIMEDOUT)
+ exit(1);
+
+ exit(0);
+}
diff --git a/src/third_party/wiredtiger/build_cmake/configs/wiredtiger_config.h.in b/src/third_party/wiredtiger/build_cmake/configs/wiredtiger_config.h.in
new file mode 100644
index 00000000000..f772594fc45
--- /dev/null
+++ b/src/third_party/wiredtiger/build_cmake/configs/wiredtiger_config.h.in
@@ -0,0 +1,157 @@
+/*-
+ * Copyright (c) 2014-present MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#ifndef __WIREDTIGER_CONFIG_H_
+#define __WIREDTIGER_CONFIG_H_
+
+/* Define to 1 to pause for debugger attach on failure. */
+#cmakedefine HAVE_ATTACH 1
+
+/* LZ4 support automatically loaded. */
+#cmakedefine HAVE_BUILTIN_EXTENSION_LZ4 1
+
+/* Snappy support automatically loaded. */
+#cmakedefine HAVE_BUILTIN_EXTENSION_SNAPPY 1
+
+/* ZLIB support automatically loaded. */
+#cmakedefine HAVE_BUILTIN_EXTENSION_ZLIB 1
+
+/* ZSTD support automatically loaded. */
+#cmakedefine HAVE_BUILTIN_EXTENSION_ZSTD 1
+
+/* Define to 1 if you have the `clock_gettime' function. */
+#cmakedefine HAVE_CLOCK_GETTIME 1
+
+/* Define to 1 for diagnostic tests. */
+#cmakedefine HAVE_DIAGNOSTIC 1
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#cmakedefine HAVE_DLFCN_H 1
+
+/* Define to 1 if you have the `fallocate' function. */
+#cmakedefine HAVE_FALLOCATE 1
+
+/* Define to 1 if you have the `fdatasync' function. */
+#cmakedefine HAVE_FDATASYNC 1
+
+/* Define to 1 if you have the `ftruncate' function. */
+#cmakedefine HAVE_FTRUNCATE 1
+
+/* Define to 1 if you have the `gettimeofday' function. */
+#cmakedefine HAVE_GETTIMEOFDAY 1
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#cmakedefine HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the `dl' library (-ldl). */
+#cmakedefine HAVE_LIBDL 1
+
+/* Define to 1 if you have the `lz4' library (-llz4). */
+#cmakedefine HAVE_LIBLZ4 1
+
+/* Define to 1 if you have the `pthread' library (-lpthread). */
+#cmakedefine HAVE_LIBPTHREAD 1
+
+/* Define to 1 if you have the `rt' library (-lrt). */
+#cmakedefine HAVE_LIBRT 1
+
+/* Define to 1 if you have the `snappy' library (-lsnappy). */
+#cmakedefine HAVE_LIBSNAPPY 1
+
+/* Define to 1 if you have the `tcmalloc' library (-ltcmalloc). */
+#cmakedefine HAVE_LIBTCMALLOC 1
+
+/* Define to 1 if you have the `z' library (-lz). */
+#cmakedefine HAVE_LIBZ 1
+
+/* Define to 1 if you have the `zstd' library (-lzstd). */
+#cmakedefine HAVE_LIBZSTD 1
+
+/* Define to 1 if you have the <memory.h> header file. */
+#cmakedefine HAVE_MEMORY_H 1
+
+/* Define to 1 to disable any crc32 hardware support. */
+#cmakedefine HAVE_NO_CRC32_HARDWARE
+
+/* Define to 1 if you have the `posix_fadvise' function. */
+#cmakedefine HAVE_POSIX_FADVISE 1
+
+/* Define to 1 if you have the `posix_fallocate' function. */
+#cmakedefine HAVE_POSIX_FALLOCATE 1
+
+/* Define to 1 if you have the `posix_madvise' function. */
+#cmakedefine HAVE_POSIX_MADVISE 1
+
+/* Define to 1 if `posix_memalign' works. */
+#cmakedefine HAVE_POSIX_MEMALIGN 1
+
+/* Define to 1 if pthread condition variables support monotonic clocks. */
+#cmakedefine HAVE_PTHREAD_COND_MONOTONIC 1;
+
+/* Define to 1 if you have the `setrlimit' function. */
+#cmakedefine HAVE_SETRLIMIT 1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#cmakedefine HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#cmakedefine HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#cmakedefine HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#cmakedefine HAVE_STRING_H 1
+
+/* Define to 1 if you have the `strtouq' function. */
+#cmakedefine HAVE_STRTOUQ 1
+
+/* Define to 1 if you have the `sync_file_range' function. */
+#cmakedefine HAVE_SYNC_FILE_RANGE 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#cmakedefine HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the `timer_create' function. */
+#cmakedefine HAVE_TIMER_CREATE 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#cmakedefine HAVE_UNISTD_H 1
+
+/* Define to 1 if you have the <x86intrin.h> header file. */
+#cmakedefine HAVE_X86INTRIN_H 1
+
+/* Spinlock type from mutex.h. */
+#cmakedefine SPINLOCK_TYPE @SPINLOCK_TYPE_CONFIG_VAR@
+
+/* Version number of package */
+#define VERSION "@VERSION_MAJOR@.@VERSION_MINOR@.@VERSION_PATCH@"
+
+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
+ significant byte first (like Motorola and SPARC, unlike Intel). */
+#if defined AC_APPLE_UNIVERSAL_BUILD
+# if defined __BIG_ENDIAN__
+# define WORDS_BIGENDIAN 1
+# endif
+#else
+# ifndef WORDS_BIGENDIAN
+/* # undef WORDS_BIGENDIAN */
+# endif
+#endif
+
+/* Default alignment of buffers used for I/O. */
+#define WT_BUFFER_ALIGNMENT_DEFAULT @WT_BUFFER_ALIGNMENT_DEFAULT@
+
+/* Define to 1 to support standalone build. */
+#cmakedefine WT_STANDALONE_BUILD 1
+
+#ifndef _DARWIN_USE_64_BIT_INODE
+# define _DARWIN_USE_64_BIT_INODE 1
+#endif
+
+#endif
diff --git a/src/third_party/wiredtiger/build_cmake/configs/x86/darwin/config.cmake b/src/third_party/wiredtiger/build_cmake/configs/x86/darwin/config.cmake
new file mode 100644
index 00000000000..2d3f7ead67a
--- /dev/null
+++ b/src/third_party/wiredtiger/build_cmake/configs/x86/darwin/config.cmake
@@ -0,0 +1,11 @@
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+# All rights reserved.
+#
+# See the file LICENSE for redistribution information
+#
+
+set(WT_ARCH "x86" CACHE STRING "")
+set(WT_OS "darwin" CACHE STRING "")
+set(WT_POSIX ON CACHE BOOL "")
diff --git a/src/third_party/wiredtiger/build_cmake/configs/x86/linux/config.cmake b/src/third_party/wiredtiger/build_cmake/configs/x86/linux/config.cmake
new file mode 100644
index 00000000000..e7d5742fb27
--- /dev/null
+++ b/src/third_party/wiredtiger/build_cmake/configs/x86/linux/config.cmake
@@ -0,0 +1,19 @@
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+# All rights reserved.
+#
+# See the file LICENSE for redistribution information
+#
+
+set(WT_ARCH "x86" CACHE STRING "")
+set(WT_OS "linux" CACHE STRING "")
+set(WT_POSIX ON CACHE BOOL "")
+
+# Linux requires '_GNU_SOURCE' to be defined for access to GNU/Linux extension functions
+# e.g. Access to O_DIRECT on Linux. Append this macro to our compiler flags for Linux-based
+# builds.
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_GNU_SOURCE" CACHE STRING "" FORCE)
+
+# Linux requires buffers aligned to 4KB boundaries for O_DIRECT to work.
+set(WT_BUFFER_ALIGNMENT_DEFAULT "4096" CACHE STRING "")
diff --git a/src/third_party/wiredtiger/build_cmake/helpers.cmake b/src/third_party/wiredtiger/build_cmake/helpers.cmake
new file mode 100644
index 00000000000..896518f92f5
--- /dev/null
+++ b/src/third_party/wiredtiger/build_cmake/helpers.cmake
@@ -0,0 +1,633 @@
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+# All rights reserved.
+#
+# See the file LICENSE for redistribution information
+#
+
+include(CheckIncludeFiles)
+include(CheckSymbolExists)
+include(CheckLibraryExists)
+include(CheckTypeSize)
+
+# Helper function for evaluating a list of dependencies. Mostly used by the
+# "config_X" helpers to evaluate the dependencies required to enable the config
+# option.
+# depends - a list (semicolon seperated) of dependencies to evaluate.
+# enabled - name of the output variable set with either 'ON' or 'OFF' (based
+# on evaluated dependencies). Output variable is set in the callers scope.
+function(eval_dependency depends enabled)
+ # If no dependencies were given then we default to an enabled state.
+ if(("${depends}" STREQUAL "") OR ("${depends}" STREQUAL "NOTFOUND"))
+ set(enabled ON PARENT_SCOPE)
+ return()
+ endif()
+ # Evaluate each dependency.
+ set(is_enabled ON)
+ foreach(dependency ${depends})
+ string(REGEX REPLACE " +" ";" dependency "${dependency}")
+ if(NOT (${dependency}))
+ set(is_enabled OFF)
+ break()
+ endif()
+ endforeach()
+ set(enabled ${is_enabled} PARENT_SCOPE)
+endfunction()
+
+# config_string(config_name description DEFAULT <default string> [DEPENDS <deps>] [INTERNAL])
+# Defines a string configuration option. The configuration option is stored in the cmake cache
+# and can be exported to the wiredtiger config header.
+# config_name - name of the configuration option.
+# description - docstring to describe the configuration option (viewable in the cmake-gui).
+# DEFAULT <default string> - Default value of the configuration string. Used when not manually set
+# by a cmake script or in the cmake-gui.
+# DEPENDS <deps> - list of dependencies (semicolon seperated) required for the configuration string
+# to be present and set in the cache. If any of the dependencies aren't met, the
+# configuration value won't be present in the cache.
+# INTERNAL - hides the configuration option from the cmake-gui by default. Useful if you don't want
+# to expose the variable by default to the user i.e keep it internal to the implementation
+# (but still need it in the cache).
+function(config_string config_name description)
+ cmake_parse_arguments(
+ PARSE_ARGV
+ 2
+ "CONFIG_STR"
+ "INTERNAL"
+ "DEFAULT;DEPENDS"
+ ""
+ )
+
+ if (NOT "${CONFIG_STR_UNPARSED_ARGUMENTS}" STREQUAL "")
+ message(FATAL_ERROR "Unknown arguments to config_str: ${CONFIG_STR_UNPARSED_ARGUMENTS}")
+ endif()
+ # We require a default value (not optional).
+ if ("${CONFIG_STR_DEFAULT}" STREQUAL "")
+ message(FATAL_ERROR "No default value passed")
+ endif()
+
+ # Check that the configs dependencies are enabled before setting it to a visible enabled state.
+ eval_dependency("${CONFIG_STR_DEPENDS}" enabled)
+ set(default_value "${CONFIG_STR_DEFAULT}")
+ if(enabled)
+ # Set an internal cache variable "${config_name}_DISABLED" to capture its enabled/disabled state
+ # We want to ensure we capture a transition from a disabled to enabled state when dependencies are met.
+ if(${config_name}_DISABLED)
+ unset(${config_name}_DISABLED CACHE)
+ set(${config_name} ${default_value} CACHE STRING "${description}" FORCE)
+ else()
+ set(${config_name} ${default_value} CACHE STRING "${description}")
+ endif()
+ if (CONFIG_STR_INTERNAL)
+ # Mark as an advanced variable, hiding it from initial UI's views.
+ mark_as_advanced(FORCE ${config_name})
+ endif()
+ else()
+ # Config doesn't meet dependency requirements, remove it from the cache and flag it as disabled.
+ unset(${config_name} CACHE)
+ set(${config_name}_DISABLED ON CACHE INTERNAL "" FORCE)
+ endif()
+endfunction()
+
+# config_choice(config_name description OPTIONS <opts>)
+# Defines a configuration option, bounded with pre-set toggleable values. The configuration option is stored
+# in the cmake cache and can be exported to the wiredtiger config header. We default to the first *available* option in the
+# list if the config has not been manually set by a cmake script or in the cmake-gui.
+# config_name - name of the configuration option.
+# description - docstring to describe the configuration option (viewable in the cmake-gui).
+# OPTIONS - a list of option values that the configuration option can be set to. Each option is itself a semicolon
+# seperated list consisting of "<option-name>;<config-name>;<option-dependencies>".
+# * option-name: name of the given option stored in the ${config_name} cache variable and presented
+# to users in the gui (usually something understandable).
+# * config-name: an additional cached configuration variable that is made available if the option is selected.
+# It is only present if the option is chosen, otherwise it is unset.
+# * option-dependencies: dependencies required for the option to be made available. If its dependencies aren't met
+# the given option will become un-selectable.
+function(config_choice config_name description)
+ cmake_parse_arguments(
+ PARSE_ARGV
+ 2
+ "CONFIG_OPT"
+ ""
+ ""
+ "OPTIONS"
+ )
+
+ if (NOT "${CONFIG_OPT_UNPARSED_ARGUMENTS}" STREQUAL "")
+ message(FATAL_ERROR "Unknown arguments to config_opt: ${CONFIG_OPT_UNPARSED_ARGUMENTS}")
+ endif()
+ # We require option values (not optional)
+ if ("${CONFIG_OPT_OPTIONS}" STREQUAL "")
+ message(FATAL_ERROR "No options passed")
+ endif()
+
+ set(found_option ON)
+ set(found_pre_set OFF)
+ set(default_config_field "")
+ set(default_config_var "")
+ foreach(curr_option ${CONFIG_OPT_OPTIONS})
+ list(LENGTH curr_option opt_length)
+ if (NOT opt_length EQUAL 3)
+ message(FATAL_ERROR "Invalid option format: ${curr_option}")
+ endif()
+ # We expect three items defined for each option.
+ list(GET curr_option 0 option_config_field)
+ list(GET curr_option 1 option_config_var)
+ list(GET curr_option 2 option_depends)
+ # Check that the options dependencies are enabled before setting it to a selectable state.
+ eval_dependency("${option_depends}" enabled)
+ if(enabled)
+ list(APPEND all_option_config_fields ${option_config_field})
+ # The first valid/selectable option found will be the default config value.
+ if (found_option)
+ set(found_option OFF)
+ set(default_config_field "${option_config_field}")
+ set(default_config_var "${option_config_var}")
+ endif()
+ # Check if the option is already set with this given field. We don't want to override the configs value
+ # with a default value if its already been pre-set in the cache e.g. by early config scripts.
+ if("${${config_name}}" STREQUAL "${option_config_field}")
+ set(${option_config_var} ON CACHE INTERNAL "" FORCE)
+ set(${config_name}_CONFIG_VAR ${option_config_var} CACHE INTERNAL "" FORCE)
+ set(found_pre_set ON)
+ set(found_option OFF)
+ set(default_config_field "${option_config_field}")
+ set(default_config_var "${option_config_var}")
+ else()
+ # Clear the cache of the current set value.
+ set(${option_config_var} OFF CACHE INTERNAL "" FORCE)
+ endif()
+ else()
+ unset(${option_config_var} CACHE)
+ # Check if the option is already set with this given field - we want to clear it if so.
+ if ("${${config_name}_CONFIG_VAR}" STREQUAL "${option_config_var}")
+ unset(${config_name}_CONFIG_VAR CACHE)
+ endif()
+ if("${${config_name}}" STREQUAL "${option_config_field}")
+ unset(${config_name} CACHE)
+ endif()
+ endif()
+ endforeach()
+ # If the config hasn't been set we can load it with the default option found earlier.
+ if(NOT found_pre_set)
+ set(${default_config_var} ON CACHE INTERNAL "" FORCE)
+ set(${config_name} ${default_config_field} CACHE STRING ${description})
+ set(${config_name}_CONFIG_VAR ${default_config_var} CACHE INTERNAL "" FORCE)
+ endif()
+ set_property(CACHE ${config_name} PROPERTY STRINGS ${all_option_config_fields})
+endfunction()
+
+# config_bool(config_name description DEFAULT <default-value> [DEPENDS <deps>] [DEPENDS_ERROR <config-val> <error-string>])
+# Defines a boolean (ON/OFF) configuration option . The configuration option is stored in the cmake cache
+# and can be exported to the wiredtiger config header.
+# config_name - name of the configuration option.
+# description - docstring to describe the configuration option (viewable in the cmake-gui).
+# DEFAULT <default-value> - default value of the configuration bool (ON/OFF). Used when not manually set
+# by a cmake script or in the cmake-gui or when dependencies aren't met.
+# DEPENDS <deps> - list of dependencies (semicolon seperated) required for the configuration bool
+# to be set to the desired value. If any of the dependencies aren't met the configuration value
+# will be set to its default value.
+# DEPENDS_ERROR <config-val> <error-string> - specifically throw a fatal error when the configuration option is set to
+# <config-val> despite failing on its dependencies. This is mainly used for commandline-like options where you want
+# to signal a specific error to the caller when dependencies aren't met e.g. toolchain is missing library (as opposed to
+# silently defaulting).
+function(config_bool config_name description)
+ cmake_parse_arguments(
+ PARSE_ARGV
+ 2
+ "CONFIG_BOOL"
+ ""
+ "DEFAULT;DEPENDS"
+ "DEPENDS_ERROR"
+ )
+
+ if(NOT "${CONFIG_BOOL_UNPARSED_ARGUMENTS}" STREQUAL "")
+ message(FATAL_ERROR "Unknown arguments to config_bool: ${CONFIG_BOOL_UNPARSED_ARGUMENTS}")
+ endif()
+ # We require a default value (not optional).
+ if("${CONFIG_BOOL_DEFAULT}" STREQUAL "")
+ message(FATAL_ERROR "No default value passed")
+ endif()
+
+ set(depends_err_value)
+ set(depends_err_message "")
+ # If DEPENDS_ERROR is specifically set, parse the value we want to throw an error on if the dependency fails.
+ if(CONFIG_BOOL_DEPENDS_ERROR)
+ list(LENGTH CONFIG_BOOL_DEPENDS_ERROR depends_error_length)
+ if(NOT depends_error_length EQUAL 2)
+ message(FATAL_ERROR "Invalid usage of DEPENDS_ERROR: requires <Error Value> <Error Message>")
+ else()
+ list(GET CONFIG_BOOL_DEPENDS_ERROR 0 err_val)
+ if(err_val)
+ set(depends_err_value "1")
+ else()
+ set(depends_err_value "0")
+ endif()
+ list(GET CONFIG_BOOL_DEPENDS_ERROR 1 depends_err_message)
+ endif()
+ endif()
+
+ # Check that the configs dependencies are enabled before setting it to a visible enabled state.
+ eval_dependency("${CONFIG_BOOL_DEPENDS}" enabled)
+ if(enabled)
+ # Set an internal cache variable "${config_name}_DISABLED" to capture its enabled/disabled state.
+ # We want to ensure we capture a transition from a disabled to enabled state when dependencies are met.
+ if(${config_name}_DISABLED)
+ unset(${config_name}_DISABLED CACHE)
+ set(${config_name} ${CONFIG_BOOL_DEFAULT} CACHE STRING "${description}" FORCE)
+ else()
+ set(${config_name} ${CONFIG_BOOL_DEFAULT} CACHE STRING "${description}")
+ endif()
+ else()
+ set(config_value "0")
+ if (${${config_name}})
+ set(config_value "1")
+ endif()
+ # If the user tries to set the config option to a given value when its dependencies
+ # are not met, throw an error (when DEPENDS_ERROR is explicitly set).
+ if(CONFIG_BOOL_DEPENDS_ERROR)
+ if(${depends_err_value} EQUAL ${config_value})
+ message(FATAL_ERROR "Unable to set ${config_name}: ${depends_err_message}")
+ endif()
+ endif()
+ # Config doesn't meet dependency requirements, set its default state and flag it as disabled.
+ set(${config_name} ${CONFIG_BOOL_DEFAULT} CACHE STRING "${description}" FORCE)
+ set(${config_name}_DISABLED ON CACHE INTERNAL "" FORCE)
+ endif()
+endfunction()
+
+# config_func(config_name description FUNC <function-symbol> FILE <include-header> [DEPENDS <deps>] [LIBS <library-dependencies>])
+# Defines a boolean (0/1) configuration option based on whether a given function symbol exists.
+# The configuration option is stored in the cmake cache and can be exported to the wiredtiger config header.
+# config_name - name of the configuration option.
+# description - docstring to describe the configuration option (viewable in the cmake-gui).
+# FUNC <function-symbol> - function symbol we want to search for.
+# FILE <include-header> - header we expect the function symbol to be defined e.g a std header.
+# DEPENDS <deps> - list of dependencies (semicolon seperated) required for the configuration to be evaluated.
+# If any of the dependencies aren't met the configuration value will be set to '0' (false).
+# LIBS <library-dependencies> - a list of any additional library dependencies needed to successfully link with the function symbol.
+function(config_func config_name description)
+ cmake_parse_arguments(
+ PARSE_ARGV
+ 2
+ "CONFIG_FUNC"
+ ""
+ "FUNC;DEPENDS;FILES;LIBS"
+ ""
+ )
+
+ if (NOT "${CONFIG_FUNC_UNPARSED_ARGUMENTS}" STREQUAL "")
+ message(FATAL_ERROR "Unknown arguments to config_func: ${CONFIG_FUNC_UNPARSED_ARGUMENTS}")
+ endif()
+ # We require an include header (not optional).
+ if ("${CONFIG_FUNC_FILES}" STREQUAL "")
+ message(FATAL_ERROR "No file list passed")
+ endif()
+ # We require a function symbol (not optional).
+ if ("${CONFIG_FUNC_FUNC}" STREQUAL "")
+ message(FATAL_ERROR "No function passed")
+ endif()
+
+ # Check that the configs dependencies are enabled before setting it to a visible enabled state.
+ eval_dependency("${CONFIG_FUNC_DEPENDS}" enabled)
+ if(enabled)
+ set(CMAKE_REQUIRED_LIBRARIES "${CONFIG_FUNC_LIBS}")
+ if((NOT "${WT_ARCH}" STREQUAL "") AND (NOT "${WT_ARCH}" STREQUAL ""))
+ # 'check_symbol_exists' won't use our current cache when test compiling the function symbol.
+ # To get around this we need to ensure we manually forward WT_ARCH and WT_OS as a minimum. This is particularly
+ # needed if 'check_symbol_exists' will leverage one of our toolchain files.
+ set(CMAKE_REQUIRED_FLAGS "-DWT_ARCH=${WT_ARCH} -DWT_OS=${WT_OS}")
+ endif()
+ check_symbol_exists(${CONFIG_FUNC_FUNC} "${CONFIG_FUNC_FILES}" has_symbol_${config_name})
+ set(CMAKE_REQUIRED_LIBRARIES)
+ set(CMAKE_REQUIRED_FLAGS)
+ set(has_symbol "0")
+ if(has_symbol_${config_name})
+ set(has_symbol ${has_symbol_${config_name}})
+ endif()
+ # Set an internal cache variable "${config_name}_DISABLED" to capture its enabled/disabled state.
+ # We want to ensure we capture a transition from a disabled to enabled state when dependencies are met.
+ if(${config_name}_DISABLED)
+ unset(${config_name}_DISABLED CACHE)
+ set(${config_name} ${has_symbol} CACHE STRING "${description}" FORCE)
+ else()
+ set(${config_name} ${has_symbol} CACHE STRING "${description}")
+ endif()
+ # 'check_symbol_exists' sets our given temp variable into the cache. Clear this so it doesn't persist between
+ # configuration runs.
+ unset(has_symbol_${config_name} CACHE)
+ else()
+ # Config doesn't meet dependency requirements, set a disabled state.
+ set(${config_name} 0 CACHE INTERNAL "" FORCE)
+ set(${config_name}_DISABLED ON CACHE INTERNAL "" FORCE)
+ endif()
+endfunction()
+
+
+# config_include(config_name description FILE <include-header> [DEPENDS <deps>])
+# Defines a boolean (0/1) configuration option based on whether a given include header exists.
+# The configuration option is stored in the cmake cache and can be exported to the wiredtiger config header.
+# config_name - name of the configuration option.
+# description - docstring to describe the configuration option (viewable in the cmake-gui).
+# FILE <include-header> - header we want to search for e.g a std header.
+# DEPENDS <deps> - list of dependencies (semicolon seperated) required for the configuration to be evaluated.
+# If any of the dependencies aren't met the configuration value will be set to '0' (false).
+function(config_include config_name description)
+ cmake_parse_arguments(
+ PARSE_ARGV
+ 2
+ "CONFIG_INCLUDE"
+ ""
+ "FILE;DEPENDS"
+ ""
+ )
+
+ if (NOT "${CONFIG_INCLUDE_UNPARSED_ARGUMENTS}" STREQUAL "")
+ message(FATAL_ERROR "Unknown arguments to config_func: ${CONFIG_INCLUDE_UNPARSED_ARGUMENTS}")
+ endif()
+ # We require a include header (not optional).
+ if ("${CONFIG_INCLUDE_FILE}" STREQUAL "")
+ message(FATAL_ERROR "No include file passed")
+ endif()
+
+ # Check that the configs dependencies are enabled before setting it to a visible enabled state.
+ eval_dependency("${CONFIG_INCLUDE_DEPENDS}" enabled)
+ if(enabled)
+ # 'check_include_files' won't use our current cache when test compiling the include header.
+ # To get around this we need to ensure we manually forward WT_ARCH and WT_OS as a minimum. This is particularly
+ # needed if 'check_include_files' will leverage one of our toolchain files.
+ if((NOT "${WT_ARCH}" STREQUAL "") AND (NOT "${WT_ARCH}" STREQUAL ""))
+ set(CMAKE_REQUIRED_FLAGS "-DWT_ARCH=${WT_ARCH} -DWT_OS=${WT_OS}")
+ endif()
+ check_include_files(${CONFIG_INCLUDE_FILE} has_include_${config_name})
+ set(CMAKE_REQUIRED_FLAGS)
+ set(has_include "0")
+ if(has_include_${config_name})
+ set(has_include ${has_include_${config_name}})
+ endif()
+ # Set an internal cache variable "${config_name}_DISABLED" to capture its enabled/disabled state.
+ # We want to ensure we capture a transition from a disabled to enabled state when dependencies are met.
+ if(${config_name}_DISABLED)
+ unset(${config_name}_DISABLED CACHE)
+ set(${config_name} ${has_include} CACHE STRING "${description}" FORCE)
+ else()
+ set(${config_name} ${has_include} CACHE STRING "${description}")
+ endif()
+ # 'check_include_files' sets our given temp variable into the cache. Clear this so it doesn't persist between
+ # configuration runs.
+ unset(has_include_${config_name} CACHE)
+ else()
+ set(${config_name} OFF CACHE INTERNAL "" FORCE)
+ set(${config_name}_DISABLED ON CACHE INTERNAL "" FORCE)
+ endif()
+ # Set an internal cahce variable with the CPP include statement. We can use this when building out our config header.
+ if (${${config_name}})
+ set(${config_name}_DECL "#include <${CONFIG_INCLUDE_FILE}>" CACHE INTERNAL "")
+ endif()
+endfunction()
+
+# config_lib(config_name description LIB <library> FUNC <function-symbol> [DEPENDS <deps>])
+# Defines a boolean (0/1) configuration option based on whether a given library exists.
+# The configuration option is stored in the cmake cache and can be exported to the wiredtiger config header.
+# config_name - name of the configuration option.
+# description - docstring to describe the configuration option (viewable in the cmake-gui).
+# LIB <library> - library we are searching for (defined as if we are linking against it e.g -lpthread).
+# FUNC <function-symbol> - function symbol we expect to be available to link against within the library.
+# DEPENDS <deps> - list of dependencies (semicolon seperated) required for the configuration to be evaluated.
+# If any of the dependencies aren't met the configuration value will be set to '0' (false).
+function(config_lib config_name description)
+ cmake_parse_arguments(
+ PARSE_ARGV
+ 2
+ "CONFIG_LIB"
+ ""
+ "LIB;FUNC;DEPENDS"
+ ""
+ )
+
+ if (NOT "${CONFIG_LIB_UNPARSED_ARGUMENTS}" STREQUAL "")
+ message(FATAL_ERROR "Unknown arguments to config_lib: ${CONFIG_LIB_UNPARSED_ARGUMENTS}")
+ endif()
+ # We require a library (not optional).
+ if ("${CONFIG_LIB_LIB}" STREQUAL "")
+ message(FATAL_ERROR "No library passed")
+ endif()
+ # We require a function within the library (not optional).
+ if ("${CONFIG_LIB_FUNC}" STREQUAL "")
+ message(FATAL_ERROR "No library function passed")
+ endif()
+
+ # Check that the configs dependencies are enabled before setting it to a visible enabled state.
+ eval_dependency("${CONFIG_LIB_DEPENDS}" enabled)
+ if(enabled)
+ # 'check_library_exists' won't use our current cache when test compiling the library.
+ # To get around this we need to ensure we manually forward WT_ARCH and WT_OS as a minimum. This is particularly
+ # needed if 'check_library_exists' will leverage one of our toolchain files.
+ if((NOT "${WT_ARCH}" STREQUAL "") AND (NOT "${WT_ARCH}" STREQUAL ""))
+ set(CMAKE_REQUIRED_FLAGS "-DWT_ARCH=${WT_ARCH} -DWT_OS=${WT_OS}")
+ endif()
+ check_library_exists(${CONFIG_LIB_LIB} ${CONFIG_LIB_FUNC} "" has_lib_${config_name})
+ set(CMAKE_REQUIRED_FLAGS)
+ set(has_lib "0")
+ if(has_lib_${config_name})
+ set(has_lib ${has_lib_${config_name}})
+ endif()
+ # Set an internal cache variable "${config_name}_DISABLED" to capture its enabled/disabled state.
+ # We want to ensure we capture a transition from a disabled to enabled state when dependencies are met.
+ if(${config_name}_DISABLED)
+ unset(${config_name}_DISABLED CACHE)
+ set(${config_name} ${has_lib} CACHE STRING "${description}" FORCE)
+ else()
+ set(${config_name} ${has_lib} CACHE STRING "${description}")
+ endif()
+ # 'check_library_exists' sets our given temp variable into the cache. Clear this so it doesn't persist between
+ # configuration runs.
+ unset(has_lib_${config_name} CACHE)
+ else()
+ set(${config_name} 0 CACHE INTERNAL "" FORCE)
+ set(${config_name}_DISABLED ON CACHE INTERNAL "" FORCE)
+ endif()
+endfunction()
+
+# config_compile(config_name description SOURCE <source-file> [DEPENDS <deps>] [LIBS <library-dependencies>])
+# Defines a boolean (0/1) configuration option based on whether a source file can be successfully compiled and run. Used
+# to determine if more fine grained functionality is supported on a given target environment (beyond what function
+# symbols, libraries and headers are available). The configuration option is stored in the cmake cache and can be
+# exported to the wiredtiger config header.
+# config_name - name of the configuration option.
+# description - docstring to describe the configuration option (viewable in the cmake-gui).
+# SOURCE <source-file> - specific source file we want to test compile.
+# DEPENDS <deps> - list of dependencies (semicolon seperated) required for the configuration to be evaluated.
+# If any of the dependencies aren't met the configuration value will be set to '0' (false).
+# LIBS <library-dependencies> - a list of any additional library dependencies needed to successfully compile the source.
+function(config_compile config_name description)
+ cmake_parse_arguments(
+ PARSE_ARGV
+ 2
+ "CONFIG_COMPILE"
+ ""
+ "SOURCE;DEPENDS;LIBS"
+ ""
+ )
+
+ if (NOT "${CONFIG_COMPILE_UNPARSED_ARGUMENTS}" STREQUAL "")
+ message(FATAL_ERROR "Unknown arguments to config_compile: ${CONFIG_COMPILE_UNPARSED_ARGUMENTS}")
+ endif()
+ # We require a source file (not optional).
+ if ("${CONFIG_COMPILE_SOURCE}" STREQUAL "")
+ message(FATAL_ERROR "No source passed")
+ endif()
+
+ # Check that the configs dependencies are enabled before setting it to a visible enabled state.
+ eval_dependency("${CONFIG_COMPILE_DEPENDS}" enabled)
+ if(enabled)
+ # Test compile the source file.
+ try_run(
+ can_run_${config_name} can_compile_${config_name}
+ ${CMAKE_CURRENT_BINARY_DIR}
+ ${CONFIG_COMPILE_SOURCE}
+ CMAKE_FLAGS "-DWT_ARCH=${WT_ARCH}" "-DWT_OS=${WT_OS}"
+ LINK_LIBRARIES "${CONFIG_COMPILE_LIBS}"
+ )
+ set(can_run "0")
+ if((NOT "${can_run_${config_name}}" STREQUAL "FAILED_TO_RUN") AND
+ ("${can_run_${config_name}}" STREQUAL "0"))
+ set(can_run "1")
+ endif()
+ # Set an internal cache variable "${config_name}_DISABLED" to capture its enabled/disabled state.
+ # We want to ensure we capture a transition from a disabled to enabled state when dependencies are met.
+ if(${config_name}_DISABLED)
+ unset(${config_name}_DISABLED CACHE)
+ set(${config_name} ${can_run} CACHE STRING "${description}" FORCE)
+ else()
+ set(${config_name} ${can_run} CACHE STRING "${description}")
+ endif()
+ # 'try_run' sets our given temp variable into the cache. Clear this so it doesn't persist between
+ # configuration runs.
+ unset(can_run_${config_name} CACHE)
+ unset(can_compile_${config_name} CACHE)
+ else()
+ set(${config_name} 0 CACHE INTERNAL "" FORCE)
+ set(${config_name}_DISABLED ON CACHE INTERNAL "" FORCE)
+ endif()
+endfunction()
+
+# test_type_size(type output_size)
+# Helper function that tests for a given types size and returns its value if found.
+# type - name of the type to test.
+# output_size - name of the output variable, set with either the types size or "" (empty string)
+# if not found.
+# EXTRA_INCLUDES - extra/optional include files to access the given type e.g. a custom typedef in an include header.
+function(test_type_size type output_size)
+ cmake_parse_arguments(
+ PARSE_ARGV
+ 2
+ "TEST_TYPE"
+ ""
+ ""
+ "EXTRA_INCLUDES"
+ )
+
+ if (NOT "${TEST_TYPE_UNPARSED_ARGUMENTS}" STREQUAL "")
+ message(FATAL_ERROR "Unknown arguments to assert_type: ${TEST_TYPE_UNPARSED_ARGUMENTS}")
+ endif()
+
+ set(CMAKE_EXTRA_INCLUDE_FILES "${TEST_TYPE_EXTRA_INCLUDES}")
+ check_type_size(${type} TEST_TYPE)
+ set(CMAKE_EXTRA_INCLUDE_FILES)
+
+ if(NOT HAVE_TEST_TYPE)
+ set(${output_size} "" PARENT_SCOPE)
+ else()
+ set(${output_size} ${TEST_TYPE} PARENT_SCOPE)
+ endif()
+endfunction()
+
+# assert_type_size(type size)
+# Wrapper function around 'test_type_size' that additionally asserts whether the given types meets an expected size.
+# Throws a fatal error if the type is not found or doesn't equal the expected size.
+# type - eame of the type to test.
+# size - expected size of the type.
+# EXTRA_INCLUDES - extra/optional include files to access the given type e.g. a custom typedef in an include header.
+function(assert_type_size type size)
+ cmake_parse_arguments(
+ PARSE_ARGV
+ 2
+ "ASSERT_TYPE"
+ ""
+ ""
+ "EXTRA_INCLUDES"
+ )
+
+ if (NOT "${ASSERT_TYPE_UNPARSED_ARGUMENTS}" STREQUAL "")
+ message(FATAL_ERROR "Unknown arguments to assert_type: ${ASSERT_TYPE_UNPARSED_ARGUMENTS}")
+ endif()
+
+ set(additional_args "")
+ if(${ASSERT_TYPE_EXTRA_INCLUDES})
+ set(additional_args "EXTRA_INCLUDES ${ASSERT_TYPE_EXTRA_INCLUDES}")
+ endif()
+ test_type_size(${type} output_type_size ${additional_args})
+
+ if(${output_type_size} EQUAL "")
+ # Type does not exist.
+ message(FATAL_ERROR "Type assertion failed: ${type} does not exists")
+ endif()
+
+ if((NOT ${size} EQUAL 0) AND (NOT ${output_type_size} EQUAL ${size}))
+ # Type does not meet size assertion.
+ message(FATAL_ERROR "Type assertion failed: ${type} does not equal size ${size}")
+ endif()
+endfunction()
+
+# parse_filelist_source(filelist output_var)
+# A helper function that parses the list of sources (usually found in "dist/filelist"). This returning a list of
+# sources that can then be used to generate the necessary build rules for the wiredtiger library. Additionally
+# uses the config values "WT_ARCH" and "WT_OS" when extracting platform specific sources from the file list.
+# filelist - Destination of 'filelist' file.
+# output_var - name of the output variable that will be set with the parsed sources. Output variable is set in
+# the callers scope.
+function(parse_filelist_source filelist output_var)
+ set(arch_host "")
+ set(plat_host "")
+ # Determine architecture host for our filelist parse.
+ if(WT_X86)
+ set(arch_host "X86_HOST")
+ elseif(WT_ARM64)
+ set(arch_host "ARM64_HOST")
+ elseif(WT_PPC64)
+ set(arch_host "POWERPC_HOST")
+ elseif(WT_ZSERIES)
+ set(arch_host "ZSERIES_HOST")
+ endif()
+ # Determine platform host for our filelist parse.
+ if(WT_POSIX)
+ set(plat_host "POSIX_HOST")
+ elseif(WT_WIN)
+ set(plat_host "WINDOWS_HOST")
+ endif()
+
+ # Read file list and parse into list.
+ file(READ "${filelist}" contents NEWLINE_CONSUME)
+ string(REGEX REPLACE "\n" ";" contents "${contents}")
+ set(output_files "")
+ foreach(file ${contents})
+ if(${file} MATCHES "^#.*$")
+ continue()
+ endif()
+ string(REGEX REPLACE "[ \t\r]+" ";" file_contents ${file})
+ list(LENGTH file_contents file_contents_len)
+ if (file_contents_len EQUAL 1)
+ list(APPEND output_files ${file})
+ elseif(file_contents_len EQUAL 2)
+ list(GET file_contents 0 file_name)
+ list(GET file_contents 1 file_group)
+ if ((${file_group} STREQUAL "${plat_host}") OR (${file_group} STREQUAL "${arch_host}"))
+ list(APPEND output_files ${file_name})
+ endif()
+ else()
+ message(FATAL_ERROR "filelist (${filelist}) has an unexpected format [Invalid Line: \"${file}]\"")
+ endif()
+ endforeach()
+ set(${output_var} ${output_files} PARENT_SCOPE)
+endfunction()
diff --git a/src/third_party/wiredtiger/build_cmake/install/install.cmake b/src/third_party/wiredtiger/build_cmake/install/install.cmake
new file mode 100644
index 00000000000..8573b10ea1d
--- /dev/null
+++ b/src/third_party/wiredtiger/build_cmake/install/install.cmake
@@ -0,0 +1,66 @@
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+# All rights reserved.
+#
+# See the file LICENSE for redistribution information
+#
+
+include(GNUInstallDirs)
+
+# Library installs
+
+# Define the public headers for wiredtiger library to be used when installing the target.
+set_property(
+ TARGET wiredtiger
+ PROPERTY PUBLIC_HEADER
+ ${CMAKE_BINARY_DIR}/include/wiredtiger.h
+ ${CMAKE_SOURCE_DIR}/src/include/wiredtiger_ext.h
+)
+# Set the version property of the wiredtiger library so we can export a versioned install.
+set_target_properties(wiredtiger PROPERTIES VERSION "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}")
+
+# Install the wiredtiger library target.
+install(TARGETS wiredtiger
+ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+)
+
+# Create our wiredtiger pkgconfig (for POSIX builds).
+if(WT_POSIX)
+ configure_file(${CMAKE_CURRENT_LIST_DIR}/wiredtiger.pc.in wiredtiger.pc @ONLY)
+ install(
+ FILES ${CMAKE_BINARY_DIR}/wiredtiger.pc
+ DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig
+ )
+endif()
+
+# Install our wiredtiger compressor extensions (provided we have enabled/built them).
+if(HAVE_BUILTIN_EXTENSION_LZ4 OR ENABLE_LZ4)
+ install(TARGETS wiredtiger_lz4
+ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ )
+endif()
+
+if(HAVE_BUILTIN_EXTENSION_SNAPPY OR ENABLE_SNAPPY)
+ install(TARGETS wiredtiger_snappy
+ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ )
+endif()
+
+if(HAVE_BUILTIN_EXTENSION_ZLIB OR ENABLE_ZLIB)
+ install(TARGETS wiredtiger_zlib
+ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ )
+endif()
+
+if(HAVE_BUILTIN_EXTENSION_ZSTD OR ENABLE_ZSTD)
+ install(TARGETS wiredtiger_zstd
+ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ )
+endif()
diff --git a/src/third_party/wiredtiger/build_cmake/install/wiredtiger.pc.in b/src/third_party/wiredtiger/build_cmake/install/wiredtiger.pc.in
new file mode 100644
index 00000000000..9ed407b7fe4
--- /dev/null
+++ b/src/third_party/wiredtiger/build_cmake/install/wiredtiger.pc.in
@@ -0,0 +1,11 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=@CMAKE_INSTALL_PREFIX@
+libdir=${exec_prefix}/@CMAKE_INSTALL_LIBDIR@
+includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
+
+Name: WiredTiger
+Description: The WiredTiger Data Engine
+Requires:
+Version: @VERSION_MAJOR@.@VERSION_MINOR@.@VERSION_PATCH@
+Libs: -L${libdir} -lwiredtiger
+Cflags: -I${includedir}
diff --git a/src/third_party/wiredtiger/build_cmake/strict/clang_strict.cmake b/src/third_party/wiredtiger/build_cmake/strict/clang_strict.cmake
new file mode 100644
index 00000000000..4ab3fa76ccb
--- /dev/null
+++ b/src/third_party/wiredtiger/build_cmake/strict/clang_strict.cmake
@@ -0,0 +1,50 @@
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+# All rights reserved.
+#
+# See the file LICENSE for redistribution information.
+#
+
+string(APPEND clang_base_c_flags " -Weverything -Werror")
+string(APPEND clang_base_c_flags " -Wno-cast-align")
+string(APPEND clang_base_c_flags " -Wno-documentation-unknown-command")
+string(APPEND clang_base_c_flags " -Wno-format-nonliteral")
+string(APPEND clang_base_c_flags " -Wno-packed")
+string(APPEND clang_base_c_flags " -Wno-padded")
+string(APPEND clang_base_c_flags " -Wno-reserved-id-macro")
+string(APPEND clang_base_c_flags " -Wno-zero-length-array")
+
+# We should turn on cast-qual, but not as a fatal error: see WT-2690.
+# For now, turn it off.
+# string(APPEND clang_base_c_flags "-Wno-error=cast-qual")
+string(APPEND clang_base_c_flags " -Wno-cast-qual")
+
+# Turn off clang thread-safety-analysis, it doesn't like some of the
+# code patterns in WiredTiger.
+string(APPEND clang_base_c_flags " -Wno-thread-safety-analysis")
+
+# On Centos 7.3.1611, system header files aren't compatible with
+# -Wdisabled-macro-expansion.
+string(APPEND clang_base_c_flags " -Wno-disabled-macro-expansion")
+
+# We occasionally use an extra semicolon to indicate an empty loop or
+# conditional body.
+string(APPEND clang_base_c_flags " -Wno-extra-semi-stmt")
+
+# Ignore unrecognized options.
+string(APPEND clang_base_c_flags " -Wno-unknown-warning-option")
+
+if(WT_DARWIN AND (CMAKE_C_COMPILER_VERSION VERSION_EQUAL 4.1))
+ # Apple clang has its own numbering system, and older OS X
+ # releases need some special love. Turn off some flags for
+ # Apple's clang 4.1:
+ # Apple clang version 4.1
+ # ((tags/Apple/clang-421.11.66) (based on LLVM 3.1svn)
+ string(APPEND clang_base_c_flags " -Wno-attributes")
+ string(APPEND clang_base_c_flags " -Wno-pedantic")
+ string(APPEND clang_base_c_flags " -Wno-unused-command-line-argument")
+endif()
+
+# Set our base clang flags to ensure it propogates to the rest of our build.
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${clang_base_c_flags}" CACHE STRING "" FORCE)
diff --git a/src/third_party/wiredtiger/build_cmake/strict/gcc_strict.cmake b/src/third_party/wiredtiger/build_cmake/strict/gcc_strict.cmake
new file mode 100644
index 00000000000..d417315fc4b
--- /dev/null
+++ b/src/third_party/wiredtiger/build_cmake/strict/gcc_strict.cmake
@@ -0,0 +1,79 @@
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+# All rights reserved.
+#
+# See the file LICENSE for redistribution information.
+#
+
+string(APPEND gcc_base_c_flags " -Wall")
+string(APPEND gcc_base_c_flags " -Wextra")
+string(APPEND gcc_base_c_flags " -Werror")
+string(APPEND gcc_base_c_flags " -Waggregate-return")
+string(APPEND gcc_base_c_flags " -Wbad-function-cast")
+string(APPEND gcc_base_c_flags " -Wcast-align")
+string(APPEND gcc_base_c_flags " -Wdeclaration-after-statement")
+string(APPEND gcc_base_c_flags " -Wdouble-promotion")
+string(APPEND gcc_base_c_flags " -Wfloat-equal")
+string(APPEND gcc_base_c_flags " -Wformat-nonliteral")
+string(APPEND gcc_base_c_flags " -Wformat-security")
+string(APPEND gcc_base_c_flags " -Wformat=2")
+string(APPEND gcc_base_c_flags " -Winit-self")
+string(APPEND gcc_base_c_flags " -Wjump-misses-init")
+string(APPEND gcc_base_c_flags " -Wmissing-declarations")
+string(APPEND gcc_base_c_flags " -Wmissing-field-initializers")
+string(APPEND gcc_base_c_flags " -Wmissing-prototypes")
+string(APPEND gcc_base_c_flags " -Wnested-externs")
+string(APPEND gcc_base_c_flags " -Wold-style-definition")
+string(APPEND gcc_base_c_flags " -Wpacked")
+string(APPEND gcc_base_c_flags " -Wpointer-arith")
+string(APPEND gcc_base_c_flags " -Wpointer-sign")
+string(APPEND gcc_base_c_flags " -Wredundant-decls")
+string(APPEND gcc_base_c_flags " -Wshadow")
+string(APPEND gcc_base_c_flags " -Wsign-conversion")
+string(APPEND gcc_base_c_flags " -Wstrict-prototypes")
+string(APPEND gcc_base_c_flags " -Wswitch-enum")
+string(APPEND gcc_base_c_flags " -Wundef")
+string(APPEND gcc_base_c_flags " -Wuninitialized")
+string(APPEND gcc_base_c_flags " -Wunreachable-code")
+string(APPEND gcc_base_c_flags " -Wunused")
+string(APPEND gcc_base_c_flags " -Wwrite-strings")
+
+# Non-fatal informational warnings.
+# We don't turn on the unsafe-loop-optimizations warning after gcc7,
+# it's too noisy to tolerate. Regardless, don't fail even when it's
+# configured.
+string(APPEND gcc_base_c_flags " -Wno-error=unsafe-loop-optimizations")
+if(${CMAKE_C_COMPILER_VERSION} VERSION_EQUAL 4.7)
+ string(APPEND gcc_base_c_flags " -Wno-c11-extensions")
+ string(APPEND gcc_base_c_flags " -Wunsafe-loop-optimizations")
+elseif(${CMAKE_C_COMPILER_VERSION} VERSION_EQUAL 5)
+ string(APPEND gcc_base_c_flags " -Wunsafe-loop-optimizations")
+endif()
+
+if(${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 5)
+ string(APPEND gcc_base_c_flags " -Wformat-signedness")
+ string(APPEND gcc_base_c_flags " -Wjump-misses-init")
+ string(APPEND gcc_base_c_flags " -Wredundant-decls")
+ string(APPEND gcc_base_c_flags " -Wunused-macros")
+ string(APPEND gcc_base_c_flags " -Wvariadic-macros")
+endif()
+if(${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 6)
+ string(APPEND gcc_base_c_flags " -Wduplicated-cond")
+ string(APPEND gcc_base_c_flags " -Wlogical-op")
+ string(APPEND gcc_base_c_flags " -Wunused-const-variable=2")
+endif()
+if(${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 7)
+ string(APPEND gcc_base_c_flags " -Walloca")
+ string(APPEND gcc_base_c_flags " -Walloc-zero")
+ string(APPEND gcc_base_c_flags " -Wduplicated-branches")
+ string(APPEND gcc_base_c_flags " -Wformat-overflow=2")
+ string(APPEND gcc_base_c_flags " -Wformat-truncation=2")
+ string(APPEND gcc_base_c_flags " -Wrestrict")
+endif()
+if(${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 8)
+ string(APPEND gcc_base_c_flags " -Wmultistatement-macros")
+endif()
+
+# Set our base gcc flags to ensure it propogates to the rest of our build.
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${gcc_base_c_flags}" CACHE STRING "" FORCE)
diff --git a/src/third_party/wiredtiger/build_cmake/toolchains/clang.cmake b/src/third_party/wiredtiger/build_cmake/toolchains/clang.cmake
new file mode 100644
index 00000000000..252f935976f
--- /dev/null
+++ b/src/third_party/wiredtiger/build_cmake/toolchains/clang.cmake
@@ -0,0 +1,36 @@
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+# All rights reserved.
+#
+# See the file LICENSE for redistribution information.
+#
+
+cmake_minimum_required(VERSION 3.11.0)
+
+set(CMAKE_C_COMPILER "clang")
+set(CMAKE_C_COMPILER_ID "Clang")
+
+set(CMAKE_CXX_COMPILER "clang++")
+set(CMAKE_CXX_COMPILER_ID "Clang++")
+
+set(CMAKE_ASM_COMPILER "clang")
+set(CMAKE_ASM_COMPILER_ID "Clang")
+
+if(NOT "${COMPILE_DEFINITIONS}" STREQUAL "")
+ ### Additional check to overcome check_[symbol|include|function]_exits using toolchain file without passing WT_ARCH and WT_OS.
+ string(REGEX MATCH "-DWT_ARCH=([A-Za-z0-9]+) -DWT_OS=([A-Za-z0-9]+)" _ ${COMPILE_DEFINITIONS})
+ set(wt_config_arch ${CMAKE_MATCH_1})
+ set(wt_config_os ${CMAKE_MATCH_2})
+else()
+ set(wt_config_arch ${WT_ARCH})
+ set(wt_config_os ${WT_OS})
+endif()
+
+# Include any platform specific clang configurations and flags e.g. target-tuple, flags.
+if((NOT "${wt_config_arch}" STREQUAL "") AND (NOT "${wt_config_os}" STREQUAL ""))
+ if(NOT EXISTS "${CMAKE_CURRENT_LIST_DIR}/${wt_config_arch}/${wt_config_os}/plat_clang.cmake")
+ message(FATAL_ERROR "(${wt_config_arch}/${wt_config_os}) directory does not have a plat_clang.cmake file")
+ endif()
+ include("${CMAKE_CURRENT_LIST_DIR}/${wt_config_arch}/${wt_config_os}/plat_clang.cmake")
+endif()
diff --git a/src/third_party/wiredtiger/build_cmake/toolchains/gcc.cmake b/src/third_party/wiredtiger/build_cmake/toolchains/gcc.cmake
new file mode 100644
index 00000000000..25333335a2e
--- /dev/null
+++ b/src/third_party/wiredtiger/build_cmake/toolchains/gcc.cmake
@@ -0,0 +1,31 @@
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+# All rights reserved.
+#
+# See the file LICENSE for redistribution information.
+#
+
+cmake_minimum_required(VERSION 3.11.0)
+
+if(NOT "${COMPILE_DEFINITIONS}" STREQUAL "")
+ ### Additional check to overcome check_[symbol|include|function]_exits using toolchain file without passing WT_ARCH and WT_OS.
+ string(REGEX MATCH "-DWT_ARCH=([A-Za-z0-9]+) -DWT_OS=([A-Za-z0-9]+)" _ ${COMPILE_DEFINITIONS})
+ set(wt_config_arch ${CMAKE_MATCH_1})
+ set(wt_config_os ${CMAKE_MATCH_2})
+else()
+ set(wt_config_arch ${WT_ARCH})
+ set(wt_config_os ${WT_OS})
+endif()
+
+# Include any platform specific gcc configurations and flags e.g. target-tuple, flags.
+if((NOT "${wt_config_arch}" STREQUAL "") AND (NOT "${wt_config_os}" STREQUAL ""))
+ if(NOT EXISTS "${CMAKE_CURRENT_LIST_DIR}/${wt_config_arch}/${wt_config_os}/plat_gcc.cmake")
+ message(FATAL_ERROR "(${wt_config_arch}/${wt_config_os}) directory does not have a plat_gcc.cmake file")
+ endif()
+ include("${CMAKE_CURRENT_LIST_DIR}/${wt_config_arch}/${wt_config_os}/plat_gcc.cmake")
+endif()
+
+set(CMAKE_C_COMPILER "${CROSS_COMPILER_PREFIX}gcc")
+set(CMAKE_CXX_COMPILER "${CROSS_COMPILER_PREFIX}g++")
+set(CMAKE_ASM_COMPILER "${CROSS_COMPILER_PREFIX}gcc")
diff --git a/src/third_party/wiredtiger/build_cmake/toolchains/x86/darwin/plat_clang.cmake b/src/third_party/wiredtiger/build_cmake/toolchains/x86/darwin/plat_clang.cmake
new file mode 100644
index 00000000000..da6aa0762b2
--- /dev/null
+++ b/src/third_party/wiredtiger/build_cmake/toolchains/x86/darwin/plat_clang.cmake
@@ -0,0 +1,14 @@
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+# All rights reserved.
+#
+# See the file LICENSE for redistribution information.
+#
+
+set(TRIPLE_TARGET "x86_64-apple-darwin")
+
+set(CROSS_COMPILER_PREFIX ${TRIPLE_TARGET}-)
+set(CMAKE_C_COMPILER_TARGET "${TRIPLE_TARGET}")
+set(CMAKE_CXX_COMPILER_TARGET "${TRIPLE_TARGET}")
+set(CMAKE_ASM_COMPILER_TARGET "${TRIPLE_TARGET}")
diff --git a/src/third_party/wiredtiger/build_cmake/toolchains/x86/darwin/plat_gcc.cmake b/src/third_party/wiredtiger/build_cmake/toolchains/x86/darwin/plat_gcc.cmake
new file mode 100644
index 00000000000..8ec2f561e54
--- /dev/null
+++ b/src/third_party/wiredtiger/build_cmake/toolchains/x86/darwin/plat_gcc.cmake
@@ -0,0 +1,14 @@
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+# All rights reserved.
+#
+# See the file LICENSE for redistribution information.
+#
+
+# We are not cross-compiling if our system is Darwin, hence the "x86_64-apple-darwin-"
+# prefix is not necessary when we are not cross-compiling. Just default to the host
+# installed 'gcc' binary.
+if (NOT "${CMAKE_HOST_SYSTEM_NAME}" STREQUAL "Darwin")
+ set(CROSS_COMPILER_PREFIX "x86_64-apple-darwin-" CACHE INTERNAL "" FORCE)
+endif()
diff --git a/src/third_party/wiredtiger/build_cmake/toolchains/x86/linux/plat_clang.cmake b/src/third_party/wiredtiger/build_cmake/toolchains/x86/linux/plat_clang.cmake
new file mode 100644
index 00000000000..7eb4615b92d
--- /dev/null
+++ b/src/third_party/wiredtiger/build_cmake/toolchains/x86/linux/plat_clang.cmake
@@ -0,0 +1,14 @@
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+# All rights reserved.
+#
+# See the file LICENSE for redistribution information.
+#
+
+set(TRIPLE_TARGET "x86_64-pc-linux-gnu")
+
+set(CROSS_COMPILER_PREFIX ${TRIPLE_TARGET}-)
+set(CMAKE_C_COMPILER_TARGET "${TRIPLE_TARGET}")
+set(CMAKE_CXX_COMPILER_TARGET "${TRIPLE_TARGET}")
+set(CMAKE_ASM_COMPILER_TARGET "${TRIPLE_TARGET}")
diff --git a/src/third_party/wiredtiger/build_cmake/toolchains/x86/linux/plat_gcc.cmake b/src/third_party/wiredtiger/build_cmake/toolchains/x86/linux/plat_gcc.cmake
new file mode 100644
index 00000000000..fc1f4307a08
--- /dev/null
+++ b/src/third_party/wiredtiger/build_cmake/toolchains/x86/linux/plat_gcc.cmake
@@ -0,0 +1,9 @@
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+# All rights reserved.
+#
+# See the file LICENSE for redistribution information.
+#
+
+set(CROSS_COMPILER_PREFIX "x86_64-linux-gnu-" CACHE INTERNAL "" FORCE)
diff --git a/src/third_party/wiredtiger/build_posix/aclocal/version-set.m4 b/src/third_party/wiredtiger/build_posix/aclocal/version-set.m4
index 3ca21022bfa..f03a1f7819f 100644
--- a/src/third_party/wiredtiger/build_posix/aclocal/version-set.m4
+++ b/src/third_party/wiredtiger/build_posix/aclocal/version-set.m4
@@ -2,8 +2,8 @@ dnl build by dist/s_version
VERSION_MAJOR=10
VERSION_MINOR=0
-VERSION_PATCH=0
-VERSION_STRING='"WiredTiger 10.0.0: (March 18, 2020)"'
+VERSION_PATCH=1
+VERSION_STRING='"WiredTiger 10.0.1: (April 12, 2021)"'
AC_SUBST(VERSION_MAJOR)
AC_SUBST(VERSION_MINOR)
diff --git a/src/third_party/wiredtiger/build_posix/aclocal/version.m4 b/src/third_party/wiredtiger/build_posix/aclocal/version.m4
index e953552c356..c76126e107a 100644
--- a/src/third_party/wiredtiger/build_posix/aclocal/version.m4
+++ b/src/third_party/wiredtiger/build_posix/aclocal/version.m4
@@ -1,2 +1,2 @@
dnl WiredTiger product version for AC_INIT. Maintained by dist/s_version
-10.0.0
+10.0.1
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index 2027298bb2e..953474d404f 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -205,18 +205,37 @@ lsm_config = [
]),
]
-tiered_config = common_runtime_config + [
- Config('tiered', '', r'''
- options only relevant for tiered data sources''',
+tiered_config = [
+ Config('tiered_storage', '', r'''
+ configure a storage source for this table''',
type='category', subconfig=[
- Config('chunk_size', '1GB', r'''
- the maximum size of the hot chunk of tiered tree. This
- limit is soft - it is possible for chunks to be temporarily
- larger than this value''',
- min='1M'),
- Config('tiers', '', r'''
- list of data sources to combine into a tiered storage structure''', type='list')
- ]),
+ Config('name', 'none', r'''
+ permitted values are \c "none"
+ or custom storage source name created with
+ WT_CONNECTION::add_storage_source.
+ See @ref custom_storage_sources for more information'''),
+ Config('auth_token', '', r'''
+ authentication string identifier'''),
+ Config('bucket', '', r'''
+ the bucket indicating the location for this table'''),
+ Config('bucket_prefix', '', r'''
+ the unique bucket prefix for this table'''),
+ Config('local_retention', '300', r'''
+ time in seconds to retain data on tiered storage on the local tier
+ for faster read access''',
+ min='0', max='10000'),
+ Config('object_target_size', '10M', r'''
+ the approximate size of objects before creating them on the
+ tiered storage tier''',
+ min='100K', max='10TB'),
+ ]),
+]
+
+tiered_tree_config = [
+ Config('bucket', '', r'''
+ the bucket indicating the location for this table'''),
+ Config('bucket_prefix', '', r'''
+ the unique bucket prefix for this table'''),
]
file_runtime_config = common_runtime_config + [
@@ -258,7 +277,7 @@ file_runtime_config = common_runtime_config + [
]
# Per-file configuration
-file_config = format_meta + file_runtime_config + [
+file_config = format_meta + file_runtime_config + tiered_config + [
Config('block_allocation', 'best', r'''
configure block allocation. Permitted values are \c "best" or \c "first";
the \c "best" configuration uses a best-fit algorithm,
@@ -413,27 +432,6 @@ file_config = format_meta + file_runtime_config + [
split into smaller pages, where each page is the specified
percentage of the maximum Btree page size''',
min='50', max='100'),
- Config('tiered_storage', '', r'''
- configure a storage source for this table''',
- type='category', subconfig=[
- Config('name', 'none', r'''
- Permitted values are \c "none"
- or custom storage source name created with
- WT_CONNECTION::add_storage_source.
- See @ref custom_storage_sources for more information'''),
- Config('auth_token', '', r'''
- authentication string identifier'''),
- Config('bucket', '', r'''
- The bucket indicating the location for this table'''),
- Config('local_retention', '300', r'''
- time in seconds to retain data on tiered storage on the local tier
- for faster read access''',
- min='0', max='10000'),
- Config('object_target_size', '10M', r'''
- the approximate size of objects before creating them on the
- tiered storage tier''',
- min='100K', max='10TB'),
- ]),
]
# File metadata, including both configurable and non-configurable (internal)
@@ -451,7 +449,7 @@ file_meta = file_config + [
]
lsm_meta = file_config + lsm_config + [
- Config('last', '', r'''
+ Config('last', '0', r'''
the last allocated chunk ID'''),
Config('chunks', '', r'''
active chunks in the LSM tree'''),
@@ -459,7 +457,17 @@ lsm_meta = file_config + lsm_config + [
obsolete chunks in the LSM tree'''),
]
-tiered_meta = tiered_config
+tiered_meta = common_meta + tiered_config + [
+ Config('last', '0', r'''
+ the last allocated object ID'''),
+ Config('tiers', '', r'''
+ list of data sources to combine into a tiered storage structure''', type='list'),
+]
+
+tier_meta = file_meta + tiered_tree_config
+# Objects need to have the readonly setting set and bucket_prefix.
+# The file_meta already contains those pieces.
+object_meta = file_meta
table_only_config = [
Config('colgroups', '', r'''
@@ -742,6 +750,21 @@ connection_runtime_config = [
this will update the value if one is already set''',
min='1MB', max='10TB')
]),
+ Config('statistics', 'none', r'''
+ Maintain database statistics, which may impact performance.
+ Choosing "all" maintains all statistics regardless of cost,
+ "fast" maintains a subset of statistics that are relatively
+ inexpensive, "none" turns off all statistics. The "clear"
+ configuration resets statistics after they are gathered,
+ where appropriate (for example, a cache size statistic is
+ not cleared, while the count of cursor insert operations will
+ be cleared). When "clear" is configured for the database,
+ gathered statistics are reset each time a statistics cursor
+ is used to gather statistics, as well as each time statistics
+ are logged using the \c statistics_log configuration. See
+ @ref statistics for more information''',
+ type='list',
+ choices=['all', 'cache_walk', 'fast', 'none', 'clear', 'tree_walk']),
Config('tiered_manager', '', r'''
tiered storage manager configuration options''',
type='category', undoc=True, subconfig=[
@@ -760,21 +783,6 @@ connection_runtime_config = [
management inside WiredTiger''',
min='0', max='100000'),
]),
- Config('statistics', 'none', r'''
- Maintain database statistics, which may impact performance.
- Choosing "all" maintains all statistics regardless of cost,
- "fast" maintains a subset of statistics that are relatively
- inexpensive, "none" turns off all statistics. The "clear"
- configuration resets statistics after they are gathered,
- where appropriate (for example, a cache size statistic is
- not cleared, while the count of cursor insert operations will
- be cleared). When "clear" is configured for the database,
- gathered statistics are reset each time a statistics cursor
- is used to gather statistics, as well as each time statistics
- are logged using the \c statistics_log configuration. See
- @ref statistics for more information''',
- type='list',
- choices=['all', 'cache_walk', 'fast', 'none', 'clear', 'tree_walk']),
Config('timing_stress_for_test', '', r'''
enable code that interrupts the usual timing of operations with a goal
of uncovering race conditions and unexpected blocking. This option is
@@ -820,6 +828,7 @@ connection_runtime_config = [
'split',
'temporary',
'thread_group',
+ 'tiered',
'timestamp',
'transaction',
'verify',
@@ -951,8 +960,6 @@ connection_reconfigure_statistics_log_configuration = [
]
tiered_storage_configuration_common = [
- Config('auth_token', '', r'''
- authentication token string'''),
Config('local_retention', '300', r'''
time in seconds to retain data on tiered storage on the local tier for
faster read access''',
@@ -979,14 +986,10 @@ wiredtiger_open_tiered_storage_configuration = [
authentication string identifier'''),
Config('bucket', '', r'''
bucket string identifier where the objects should reside'''),
- Config('cluster', '', r'''
- unique string identifier identifying the cluster owning these objects.
- This identifier is used in naming since objects multiple instances can share
- the object storage bucket'''),
- Config('member', '', r'''
- unique string identifier identifying the member within a cluster.
- This identifier is used in naming objects since multiple nodes in a
- cluster could write to the same table in the object storage bucket'''),
+ Config('bucket_prefix', '', r'''
+ unique string prefix to identify our objects in the bucket.
+ Multiple instances can share the storage bucket and this
+ identifier is used in naming objects'''),
Config('name', 'none', r'''
Permitted values are \c "none"
or custom storage name created with
@@ -1242,6 +1245,14 @@ cursor_runtime_config = [
if the record exists, WT_CURSOR::update fails with ::WT_NOTFOUND
if the record does not exist''',
type='boolean'),
+ Config('prefix_search', 'false', r'''
+ when performing a search near for a prefix, if set to true this
+ configuration will allow the search near to exit early if it has left
+ the key range defined by the prefix. This is relevant when the table
+ contains a large number of records which potentially aren't visible to
+ the caller of search near, as such a large number of records could be skipped.
+ The prefix_search configuration provides a fast exit in this scenario.''', type='boolean',
+ undoc=True),
]
methods = {
@@ -1255,8 +1266,12 @@ methods = {
'lsm.meta' : Method(lsm_meta),
+'object.meta' : Method(object_meta),
+
'table.meta' : Method(table_meta),
+'tier.meta' : Method(tier_meta),
+
'tiered.meta' : Method(tiered_meta),
'WT_CURSOR.close' : Method([]),
diff --git a/src/third_party/wiredtiger/dist/filelist b/src/third_party/wiredtiger/dist/filelist
index e3444aaa058..f6bfd0f0fc7 100644
--- a/src/third_party/wiredtiger/dist/filelist
+++ b/src/third_party/wiredtiger/dist/filelist
@@ -12,6 +12,7 @@ src/block/block_open.c
src/block/block_read.c
src/block/block_session.c
src/block/block_slvg.c
+src/block/block_tiered.c
src/block/block_vrfy.c
src/block/block_write.c
src/bloom/bloom.c
@@ -208,6 +209,8 @@ src/support/scratch.c
src/support/stat.c
src/support/thread_group.c
src/support/timestamp.c
+src/support/update_vector.c
+src/tiered/tiered_config.c
src/tiered/tiered_cursor.c
src/tiered/tiered_handle.c
src/txn/txn.c
diff --git a/src/third_party/wiredtiger/dist/s_copyright.list b/src/third_party/wiredtiger/dist/s_copyright.list
index a869daa8835..f1c879a555b 100644
--- a/src/third_party/wiredtiger/dist/s_copyright.list
+++ b/src/third_party/wiredtiger/dist/s_copyright.list
@@ -1,5 +1,6 @@
skip bench/workgen/workgen/workgen.py
skip bench/workgen/workgen_wrap.cxx
+skip build_cmake/install/wiredtiger.pc.in
skip build_win/wiredtiger_config.h
skip dist/api_config.py
skip dist/api_config_gen.py
diff --git a/src/third_party/wiredtiger/dist/s_define.list b/src/third_party/wiredtiger/dist/s_define.list
index 98e39d86683..3e0f6af6581 100644
--- a/src/third_party/wiredtiger/dist/s_define.list
+++ b/src/third_party/wiredtiger/dist/s_define.list
@@ -70,8 +70,6 @@ WT_SESSION_LOCKED_TURTLE
WT_SINGLE_THREAD_CHECK_START
WT_SINGLE_THREAD_CHECK_STOP
WT_SIZE_CHECK
-WT_SS_OPEN_CREATE
-WT_SS_OPEN_READONLY
WT_STATS_FIELD_TO_OFFSET
WT_STATS_SLOT_ID
WT_STAT_CONN_DATA_DECRV
diff --git a/src/third_party/wiredtiger/dist/s_install b/src/third_party/wiredtiger/dist/s_install
index 8b39a2f1e85..8b1f1275ecf 100755
--- a/src/third_party/wiredtiger/dist/s_install
+++ b/src/third_party/wiredtiger/dist/s_install
@@ -34,10 +34,10 @@ This is version $WIREDTIGER_VERSION of WiredTiger.
Instructions for configuring, building, and installing WiredTiger are available online.
For Linux, MacOS, and other POSIX systems:
- http://source.wiredtiger.com/develop/build-posix.html
+ https://source.wiredtiger.com/develop/build-posix.html
For Windows:
- http://source.wiredtiger.com/develop/build-windows.html
+ https://source.wiredtiger.com/develop/build-windows.html
END_TEXT
cmp $t $f > /dev/null 2>&1 ||
diff --git a/src/third_party/wiredtiger/dist/s_readme b/src/third_party/wiredtiger/dist/s_readme
index 316e60773e0..38d6942be4c 100755
--- a/src/third_party/wiredtiger/dist/s_readme
+++ b/src/third_party/wiredtiger/dist/s_readme
@@ -33,29 +33,29 @@ This is version $WIREDTIGER_VERSION of WiredTiger.
WiredTiger release packages and documentation can be found at:
- http://source.wiredtiger.com/
+ https://source.wiredtiger.com/
The documentation for this specific release can be found at:
- http://source.wiredtiger.com/$WIREDTIGER_VERSION/index.html
+ https://source.wiredtiger.com/$WIREDTIGER_VERSION/index.html
The WiredTiger source code can be found at:
- http://github.com/wiredtiger/wiredtiger
+ https://github.com/wiredtiger/wiredtiger
WiredTiger uses JIRA for issue management:
- http://jira.mongodb.org/browse/WT
+ https://jira.mongodb.org/browse/WT
Please do not report issues through GitHub.
WiredTiger licensing information can be found at:
- http://source.wiredtiger.com/license.html
+ https://source.wiredtiger.com/license.html
For general questions and discussion, there's a WiredTiger group:
- http://groups.google.com/group/wiredtiger-users
+ https://groups.google.com/group/wiredtiger-users
END_TEXT
cmp $t $f > /dev/null 2>&1 ||
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index fe984309e96..9c6976641b0 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -21,6 +21,7 @@ Ailamaki
Alakuijala
Alexandrescu's
Alloc
+Async
AsyncOp
Athanassoulis
Athlon
@@ -539,6 +540,7 @@ boolean
br
breakpoint
bs
+bstorage
bswap
bt
btcur
@@ -739,6 +741,7 @@ dup
eee
eg
egrep
+el
emp
encodings
encryptor
@@ -1044,6 +1047,7 @@ metadata's
metafile
mfence
minorp
+mips
mkdir
mmap
mmrand
@@ -1108,6 +1112,7 @@ nuls
num
numSymbols
numbare
+objs
offpage
ok
oldv
@@ -1480,6 +1485,7 @@ wrlsn
ws
wtbench
wti
+wtobj
wtperf
wtperf's
wts
diff --git a/src/third_party/wiredtiger/dist/s_void b/src/third_party/wiredtiger/dist/s_void
index 8e823c1f229..95cf8ec5215 100755
--- a/src/third_party/wiredtiger/dist/s_void
+++ b/src/third_party/wiredtiger/dist/s_void
@@ -78,6 +78,7 @@ func_ok()
-e '/int __wt_stat_dsrc_desc$/d' \
-e '/int __wt_stat_join_desc$/d' \
-e '/int __wt_stat_session_desc/d' \
+ -e '/int __wt_tiered_close/d' \
-e '/int __wt_txn_read_upd_list$/d' \
-e '/int __wt_txn_rollback_required$/d' \
-e '/int __wt_win_directory_list_free$/d' \
@@ -105,11 +106,11 @@ func_ok()
-e '/int index_compare_primary$/d' \
-e '/int index_compare_u$/d' \
-e '/int index_extractor_u$/d' \
+ -e '/int local_directory_list_free$/d' \
-e '/int local_err$/d' \
-e '/int local_file_lock$/d' \
-e '/int local_file_sync$/d' \
- -e '/int local_location_handle_close$/d' \
- -e '/int local_location_list_free$/d' \
+ -e '/int local_fs_terminate$/d' \
-e '/int log_print_err$/d' \
-e '/int lz4_error$/d' \
-e '/int lz4_pre_size$/d' \
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index 44741c7380e..34e5b020a4a 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -223,7 +223,6 @@ connection_stats = [
CacheStat('cache_eviction_force_hs_fail', 'forced eviction - history store pages failed to evict while session has history store cursor open'),
CacheStat('cache_eviction_force_hs_success', 'forced eviction - history store pages successfully evicted while session has history store cursor open'),
CacheStat('cache_eviction_force_retune', 'force re-tuning of eviction workers once in a while'),
- CacheStat('cache_eviction_force_rollback', 'forced eviction - session returned rollback error while force evicting due to being oldest'),
CacheStat('cache_eviction_get_ref', 'eviction calls to get a page'),
CacheStat('cache_eviction_get_ref_empty', 'eviction calls to get a page found queue empty'),
CacheStat('cache_eviction_get_ref_empty2', 'eviction calls to get a page found queue empty after locking'),
@@ -532,6 +531,7 @@ connection_stats = [
TxnStat('txn_checkpoint_prep_running', 'transaction checkpoint prepare currently running', 'no_clear,no_scale'),
TxnStat('txn_checkpoint_prep_total', 'transaction checkpoint prepare total time (msecs)', 'no_clear,no_scale'),
TxnStat('txn_checkpoint_running', 'transaction checkpoint currently running', 'no_clear,no_scale'),
+ TxnStat('txn_checkpoint_running_hs', 'transaction checkpoint currently running for history store file', 'no_clear,no_scale'),
TxnStat('txn_checkpoint_scrub_target', 'transaction checkpoint scrub dirty target', 'no_clear,no_scale'),
TxnStat('txn_checkpoint_scrub_time', 'transaction checkpoint scrub time (msecs)', 'no_clear,no_scale'),
TxnStat('txn_checkpoint_skipped', 'transaction checkpoints skipped because database was clean'),
@@ -559,6 +559,7 @@ connection_stats = [
TxnStat('txn_rts_pages_visited', 'rollback to stable pages visited'),
TxnStat('txn_rts_tree_walk_skip_pages', 'rollback to stable tree walk skipping pages'),
TxnStat('txn_rts_upd_aborted', 'rollback to stable updates aborted'),
+ TxnStat('txn_sessions_walked', 'sessions scanned in each walk of concurrent sessions'),
TxnStat('txn_set_ts', 'set timestamp calls'),
TxnStat('txn_set_ts_durable', 'set timestamp durable calls'),
TxnStat('txn_set_ts_durable_upd', 'set timestamp durable updates'),
@@ -754,6 +755,7 @@ conn_dsrc_stats = [
CacheStat('cache_bytes_read', 'bytes read into cache', 'size'),
CacheStat('cache_bytes_write', 'bytes written from cache', 'size'),
CacheStat('cache_eviction_checkpoint', 'checkpoint blocked page eviction'),
+ CacheStat('cache_eviction_blocked_checkpoint_hs', 'checkpoint of history store file blocked non-history store page eviction'),
CacheStat('cache_eviction_clean', 'unmodified pages evicted'),
CacheStat('cache_eviction_deepen', 'page split during eviction deepened the tree'),
CacheStat('cache_eviction_dirty', 'modified pages evicted'),
@@ -780,11 +782,10 @@ conn_dsrc_stats = [
CacheStat('cache_hs_insert_restart', 'history store table insert calls that returned restart'),
CacheStat('cache_hs_key_truncate', 'history store table truncation to remove an update'),
CacheStat('cache_hs_key_truncate_onpage_removal', 'history store table truncation to remove range of updates due to key being removed from the data page during reconciliation'),
- CacheStat('cache_hs_key_truncate_non_ts', 'history store table truncation to remove range of updates due to non timestamped update on data page'),
+ CacheStat('cache_hs_order_remove', 'history store table truncation to remove range of updates due to out-of-order timestamp update on data page'),
CacheStat('cache_hs_key_truncate_rts', 'history store table truncation by rollback to stable to remove an update'),
CacheStat('cache_hs_key_truncate_rts_unstable', 'history store table truncation by rollback to stable to remove an unstable update'),
- CacheStat('cache_hs_order_fixup_insert', 'history store table out-of-order updates that were fixed up during insertion'),
- CacheStat('cache_hs_order_fixup_move', 'history store table out-of-order updates that were fixed up by moving existing records'),
+ CacheStat('cache_hs_order_reinsert', 'history store table out-of-order updates that were fixed up by reinserting with the fixed timestamp'),
CacheStat('cache_hs_order_lose_durable_timestamp', 'history store table out-of-order resolved updates that lose their durable timestamp'),
CacheStat('cache_hs_read', 'history store table reads'),
CacheStat('cache_hs_read_miss', 'history store table reads missed'),
@@ -812,6 +813,7 @@ conn_dsrc_stats = [
CursorStat('cursor_prev_skip_ge_100', 'cursor prev calls that skip greater than or equal to 100 entries'),
CursorStat('cursor_prev_skip_lt_100', 'cursor prev calls that skip less than 100 entries'),
CursorStat('cursor_prev_skip_total', 'Total number of entries skipped by cursor prev calls'),
+ CursorStat('cursor_search_near_prefix_fast_paths', 'Total number of times a search near has exited due to prefix config'),
CursorStat('cursor_skip_hs_cur_position', 'Total number of entries skipped to position the history store cursor'),
##########################################
# Checkpoint cleanup statistics
diff --git a/src/third_party/wiredtiger/dist/test_data.py b/src/third_party/wiredtiger/dist/test_data.py
index 863d58bb358..d0c8d36e955 100644
--- a/src/third_party/wiredtiger/dist/test_data.py
+++ b/src/third_party/wiredtiger/dist/test_data.py
@@ -41,19 +41,28 @@ class Config:
def __ge__(self, other):
return self.name >= other.name
+#
+# A generic configuration used by some components to define their tick rate.
+#
+throttle_config = [
+ Config('op_count', 1, r'''
+ The number of operations to be performed within the defined interval, e.g.
+ 20 op_count with an interval of a second is equal to 20 ops per second.''',
+ min=1, max=10000),
+ Config('interval', 's', r'''
+ The interval to considered, either second, minute or hour.
+ The default interval is seconds.''',
+ choices=['s', 'm', 'h'])
+]
#
# Record config specifies the format of the keys and values used in the database
#
-record_config = [
+record_config = throttle_config + [
Config('key_size', 0, r'''
The size of the keys created''', min=0, max=10000),
- Config('key_format', 'i', r'''
- The format of the keys in the database'''),
Config('value_size', 0, r'''
The size of the values created''', min=0, max=1000000000),
- Config('value_format', 'S', r'''
- The format of the values stored in the database.''')
]
#
@@ -67,14 +76,6 @@ populate_config = [
]
#
-# A generic configuration used by some components to define their tick rate.
-#
-throttle_config = [
- Config('rate_per_second',1,r'''
- The number of times an operation should be performed per second''', min=1,max=1000),
-]
-
-#
# A generic configuration used by various other configurations to define whether that component or
# similar is enabled or not.
#
@@ -98,6 +99,8 @@ range_config = [
The maximum a value can be in a range''')
]
+component_config = enable_config + throttle_config
+
transaction_config = [
Config('ops_per_transaction', '', r'''
Defines how many operations a transaction can perform, the range is defined with a minimum
@@ -109,7 +112,7 @@ transaction_config = [
# Configuration that applies to the runtime monitor component, this should be a list of statistics
# that need to be checked by the component.
#
-runtime_monitor = throttle_config + [
+runtime_monitor = component_config + [
Config('stat_cache_size', '', '''
The maximum cache percentage that can be hit while running.''',
type='category', subconfig=limit_stat)
@@ -118,7 +121,7 @@ runtime_monitor = throttle_config + [
#
# Configuration that applies to the timestamp_manager component.
#
-timestamp_manager = enable_config + [
+timestamp_manager = component_config + [
Config('oldest_lag', 0, r'''
The duration between the stable and oldest timestamps''', min=0, max=1000000),
Config('stable_lag', 0, r'''
@@ -133,7 +136,7 @@ workload_tracking = enable_config
#
# Configuration that applies to the workload_generator component.
#
-workload_generator = transaction_config + record_config + populate_config + [
+workload_generator = component_config + transaction_config + record_config + populate_config + [
Config('read_threads', 0, r'''
The number of threads performing read operations''', min=0, max=100),
Config('insert_threads', 0, r'''
@@ -175,5 +178,6 @@ test_config = [
]
methods = {
+ 'example_test' : Method(test_config),
'poc_test' : Method(test_config),
}
diff --git a/src/third_party/wiredtiger/examples/c/Makefile.am b/src/third_party/wiredtiger/examples/c/Makefile.am
index 664e0f33ce7..6c20b31a146 100644
--- a/src/third_party/wiredtiger/examples/c/Makefile.am
+++ b/src/third_party/wiredtiger/examples/c/Makefile.am
@@ -23,7 +23,6 @@ noinst_PROGRAMS = \
ex_pack \
ex_process \
ex_schema \
- ex_storage_source \
ex_smoke \
ex_stat \
ex_sync \
@@ -31,7 +30,6 @@ noinst_PROGRAMS = \
ex_encrypt_LDFLAGS = -rdynamic
ex_file_system_LDFLAGS = -rdynamic
-ex_storage_source_LDFLAGS = -rdynamic
# The examples can be run with no arguments as simple smoke tests
TESTS = $(noinst_PROGRAMS)
diff --git a/src/third_party/wiredtiger/examples/c/ex_col_store.c b/src/third_party/wiredtiger/examples/c/ex_col_store.c
index a1d90ba3698..fba9a68f0b4 100644
--- a/src/third_party/wiredtiger/examples/c/ex_col_store.c
+++ b/src/third_party/wiredtiger/examples/c/ex_col_store.c
@@ -1,5 +1,5 @@
/*-
- * Public Domain 2014-2020 MongoDB, Inc.
+ * Public Domain 2014-present MongoDB, Inc.
* Public Domain 2008-2014 WiredTiger, Inc.
*
* This is free and unencumbered software released into the public domain.
diff --git a/src/third_party/wiredtiger/examples/c/ex_storage_source.c b/src/third_party/wiredtiger/examples/c/ex_storage_source.c
deleted file mode 100644
index 6cfd0cb3e07..00000000000
--- a/src/third_party/wiredtiger/examples/c/ex_storage_source.c
+++ /dev/null
@@ -1,1203 +0,0 @@
-/*-
- * Public Domain 2014-present MongoDB, Inc.
- * Public Domain 2008-2014 WiredTiger, Inc.
- *
- * This is free and unencumbered software released into the public domain.
- *
- * Anyone is free to copy, modify, publish, use, compile, sell, or
- * distribute this software, either in source code form or as a compiled
- * binary, for any purpose, commercial or non-commercial, and by any
- * means.
- *
- * In jurisdictions that recognize copyright laws, the author or authors
- * of this software dedicate any and all copyright interest in the
- * software to the public domain. We make this dedication for the benefit
- * of the public at large and to the detriment of our heirs and
- * successors. We intend this dedication to be an overt act of
- * relinquishment in perpetuity of all present and future rights to this
- * software under copyright law.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * ex_storage_source.c
- * demonstrates how to use the custom storage source interface
- */
-#include <test_util.h>
-
-#ifdef __GNUC__
-#if __GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ > 0)
-/*
- * !!!
- * GCC with -Wformat-truncation complains about calls to snprintf in this file.
- * There's nothing wrong, this makes the warning go away.
- */
-#pragma GCC diagnostic ignored "-Wformat-truncation"
-#endif
-#endif
-
-/*
- * This example code uses pthread functions for portable locking, we ignore errors for simplicity.
- */
-static void
-allocate_storage_source_lock(pthread_rwlock_t *lockp)
-{
- error_check(pthread_rwlock_init(lockp, NULL));
-}
-
-static void
-destroy_storage_source_lock(pthread_rwlock_t *lockp)
-{
- error_check(pthread_rwlock_destroy(lockp));
-}
-
-static void
-lock_storage_source(pthread_rwlock_t *lockp)
-{
- error_check(pthread_rwlock_wrlock(lockp));
-}
-
-static void
-unlock_storage_source(pthread_rwlock_t *lockp)
-{
- error_check(pthread_rwlock_unlock(lockp));
-}
-
-/*
- * Example storage source implementation, using memory buffers to represent objects.
- */
-typedef struct {
- WT_STORAGE_SOURCE iface;
-
- /*
- * WiredTiger performs schema and I/O operations in parallel, all storage sources and file
- * handle access must be thread-safe. This example uses a single, global storage source lock for
- * simplicity; real applications might require finer granularity, for example, a single lock for
- * the storage source handle list and per-handle locks serializing I/O.
- */
- pthread_rwlock_t lock; /* Lock */
-
- int closed_object_count;
- int opened_object_count;
- int opened_unique_object_count;
- int read_ops;
- int write_ops;
-
- /* Queue of file handles */
- TAILQ_HEAD(demo_file_handle_qh, demo_file_handle) fileq;
-
- WT_EXTENSION_API *wtext; /* Extension functions */
-
-} DEMO_STORAGE_SOURCE;
-
-typedef struct demo_file_handle {
- WT_FILE_HANDLE iface;
-
- /*
- * Add custom file handle fields after the interface.
- */
- DEMO_STORAGE_SOURCE *demo_ss; /* Enclosing storage source */
-
- TAILQ_ENTRY(demo_file_handle) q; /* Queue of handles */
- uint32_t ref; /* Reference count */
-
- char *buf; /* In-memory contents */
- size_t bufsize; /* In-memory buffer size */
-
- size_t size; /* Read/write data size */
-} DEMO_FILE_HANDLE;
-
-typedef struct demo_location_handle {
- WT_LOCATION_HANDLE iface;
-
- char *loc_string; /* location as a string. */
-} DEMO_LOCATION_HANDLE;
-
-#define LOCATION_STRING(lh) (((DEMO_LOCATION_HANDLE *)lh)->loc_string)
-
-/*
- * Extension initialization function.
- */
-#ifdef _WIN32
-/*
- * Explicitly export this function so it is visible when loading extensions.
- */
-__declspec(dllexport)
-#endif
- int demo_storage_source_create(WT_CONNECTION *, WT_CONFIG_ARG *);
-
-/*
- * Forward function declarations for storage source API implementation.
- */
-static int demo_ss_exist(
- WT_STORAGE_SOURCE *, WT_SESSION *, WT_LOCATION_HANDLE *, const char *, bool *);
-static int demo_ss_location_handle(
- WT_STORAGE_SOURCE *, WT_SESSION *, const char *, WT_LOCATION_HANDLE **);
-static int demo_ss_location_list(WT_STORAGE_SOURCE *, WT_SESSION *, WT_LOCATION_HANDLE *,
- const char *, uint32_t, char ***, uint32_t *);
-static int demo_ss_location_list_free(WT_STORAGE_SOURCE *, WT_SESSION *, char **, uint32_t);
-static int demo_ss_open(WT_STORAGE_SOURCE *, WT_SESSION *, WT_LOCATION_HANDLE *, const char *,
- uint32_t, WT_FILE_HANDLE **);
-static int demo_ss_remove(
- WT_STORAGE_SOURCE *, WT_SESSION *, WT_LOCATION_HANDLE *, const char *, uint32_t);
-static int demo_ss_size(
- WT_STORAGE_SOURCE *, WT_SESSION *, WT_LOCATION_HANDLE *, const char *, wt_off_t *);
-static int demo_ss_terminate(WT_STORAGE_SOURCE *, WT_SESSION *);
-
-/*
- * Forward function declarations for location API implementation.
- */
-static int demo_location_close(WT_LOCATION_HANDLE *, WT_SESSION *);
-
-/*
- * Forward function declarations for file handle API implementation.
- */
-static int demo_file_close(WT_FILE_HANDLE *, WT_SESSION *);
-static int demo_file_lock(WT_FILE_HANDLE *, WT_SESSION *, bool);
-static int demo_file_read(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, void *);
-static int demo_file_size(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t *);
-static int demo_file_sync(WT_FILE_HANDLE *, WT_SESSION *);
-static int demo_file_truncate(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t);
-static int demo_file_write(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, const void *);
-
-/*
- * Forward function declarations for internal functions.
- */
-static int demo_handle_remove(WT_SESSION *, DEMO_FILE_HANDLE *);
-static DEMO_FILE_HANDLE *demo_handle_search(
- WT_STORAGE_SOURCE *, WT_LOCATION_HANDLE *, const char *);
-
-#define DEMO_FILE_SIZE_INCREMENT 32768
-
-/*
- * string_match --
- * Return if a string matches a byte string of len bytes.
- */
-static bool
-byte_string_match(const char *str, const char *bytes, size_t len)
-{
- return (strncmp(str, bytes, len) == 0 && (str)[(len)] == '\0');
-}
-
-/*
- * demo_storage_source_create --
- * Initialize the demo storage source.
- */
-int
-demo_storage_source_create(WT_CONNECTION *conn, WT_CONFIG_ARG *config)
-{
- DEMO_STORAGE_SOURCE *demo_ss;
- WT_CONFIG_ITEM k, v;
- WT_CONFIG_PARSER *config_parser;
- WT_EXTENSION_API *wtext;
- WT_STORAGE_SOURCE *storage_source;
- int ret = 0;
-
- wtext = conn->get_extension_api(conn);
-
- if ((demo_ss = calloc(1, sizeof(DEMO_STORAGE_SOURCE))) == NULL) {
- (void)wtext->err_printf(
- wtext, NULL, "demo_storage_source_create: %s", wtext->strerror(wtext, NULL, ENOMEM));
- return (ENOMEM);
- }
- demo_ss->wtext = wtext;
- storage_source = (WT_STORAGE_SOURCE *)demo_ss;
-
- /*
- * Applications may have their own configuration information to pass to the underlying
- * filesystem implementation. See the main function for the setup of those configuration
- * strings; here we parse configuration information as passed in by main, through WiredTiger.
- */
- if ((ret = wtext->config_parser_open_arg(wtext, NULL, config, &config_parser)) != 0) {
- (void)wtext->err_printf(wtext, NULL, "WT_EXTENSION_API.config_parser_open: config: %s",
- wtext->strerror(wtext, NULL, ret));
- goto err;
- }
-
- /* Step through our configuration values. */
- printf("Custom storage source configuration\n");
- while ((ret = config_parser->next(config_parser, &k, &v)) == 0) {
- if (byte_string_match("config_string", k.str, k.len)) {
- printf(
- "\t"
- "key %.*s=\"%.*s\"\n",
- (int)k.len, k.str, (int)v.len, v.str);
- continue;
- }
- if (byte_string_match("config_value", k.str, k.len)) {
- printf(
- "\t"
- "key %.*s=%" PRId64 "\n",
- (int)k.len, k.str, v.val);
- continue;
- }
- ret = EINVAL;
- (void)wtext->err_printf(wtext, NULL,
- "WT_CONFIG_PARSER.next: unexpected configuration "
- "information: %.*s=%.*s: %s",
- (int)k.len, k.str, (int)v.len, v.str, wtext->strerror(wtext, NULL, ret));
- goto err;
- }
-
- /* Check for expected parser termination and close the parser. */
- if (ret != WT_NOTFOUND) {
- (void)wtext->err_printf(
- wtext, NULL, "WT_CONFIG_PARSER.next: config: %s", wtext->strerror(wtext, NULL, ret));
- goto err;
- }
- if ((ret = config_parser->close(config_parser)) != 0) {
- (void)wtext->err_printf(
- wtext, NULL, "WT_CONFIG_PARSER.close: config: %s", wtext->strerror(wtext, NULL, ret));
- goto err;
- }
-
- allocate_storage_source_lock(&demo_ss->lock);
-
- /* Initialize the in-memory jump table. */
- storage_source->ss_exist = demo_ss_exist;
- storage_source->ss_location_handle = demo_ss_location_handle;
- storage_source->ss_location_list = demo_ss_location_list;
- storage_source->ss_location_list_free = demo_ss_location_list_free;
- storage_source->ss_open_object = demo_ss_open;
- storage_source->ss_remove = demo_ss_remove;
- storage_source->ss_size = demo_ss_size;
- storage_source->terminate = demo_ss_terminate;
-
- if ((ret = conn->add_storage_source(conn, "demo", storage_source, NULL)) != 0) {
- (void)wtext->err_printf(
- wtext, NULL, "WT_CONNECTION.set_storage_source: %s", wtext->strerror(wtext, NULL, ret));
- goto err;
- }
-
- return (0);
-
-err:
- free(demo_ss);
- /* An error installing the storage source is fatal. */
- exit(1);
-}
-
-/*
- * demo_ss_open --
- * fopen for our demo storage source.
- */
-static int
-demo_ss_open(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *name, uint32_t flags,
- WT_FILE_HANDLE **file_handlep)
-{
- DEMO_FILE_HANDLE *demo_fh;
- DEMO_STORAGE_SOURCE *demo_ss;
- WT_EXTENSION_API *wtext;
- WT_FILE_HANDLE *file_handle;
- const char *location;
- char *full_name;
- size_t name_len;
- int ret = 0;
-
- (void)flags; /* Unused */
-
- *file_handlep = NULL;
-
- demo_ss = (DEMO_STORAGE_SOURCE *)storage_source;
- demo_fh = NULL;
- wtext = demo_ss->wtext;
-
- lock_storage_source(&demo_ss->lock);
- ++demo_ss->opened_object_count;
-
- /*
- * First search the file queue, if we find it, assert there's only a single reference, we only
- * support a single handle on any file.
- */
- demo_fh = demo_handle_search(storage_source, location_handle, name);
- if (demo_fh != NULL) {
- if (demo_fh->ref != 0) {
- (void)wtext->err_printf(wtext, session, "demo_ss_open: %s: file already open", name);
- ret = EBUSY;
- goto err;
- }
-
- demo_fh->ref = 1;
- *file_handlep = (WT_FILE_HANDLE *)demo_fh;
- unlock_storage_source(&demo_ss->lock);
- return (0);
- }
-
- /* The file hasn't been opened before, create a new one. */
- if ((demo_fh = calloc(1, sizeof(DEMO_FILE_HANDLE))) == NULL) {
- ret = ENOMEM;
- goto err;
- }
-
- /* Initialize private information. */
- demo_fh->demo_ss = demo_ss;
- demo_fh->ref = 1;
- if ((demo_fh->buf = calloc(1, DEMO_FILE_SIZE_INCREMENT)) == NULL) {
- ret = ENOMEM;
- goto err;
- }
- demo_fh->bufsize = DEMO_FILE_SIZE_INCREMENT;
- demo_fh->size = 0;
-
- /* Construct the public name. */
- location = LOCATION_STRING(location_handle);
- name_len = strlen(location) + strlen(name) + 1;
- full_name = calloc(1, name_len);
- if (snprintf(full_name, name_len, "%s%s", location, name) != (ssize_t)(name_len - 1)) {
- ret = ENOMEM;
- goto err;
- }
-
- /* Initialize public information. */
- file_handle = (WT_FILE_HANDLE *)demo_fh;
- file_handle->name = full_name;
-
- /*
- * Setup the function call table for our custom storage source. Set the function pointer to NULL
- * where our implementation doesn't support the functionality.
- */
- file_handle->close = demo_file_close;
- file_handle->fh_advise = NULL;
- file_handle->fh_extend = NULL;
- file_handle->fh_extend_nolock = NULL;
- file_handle->fh_lock = demo_file_lock;
- file_handle->fh_map = NULL;
- file_handle->fh_map_discard = NULL;
- file_handle->fh_map_preload = NULL;
- file_handle->fh_read = demo_file_read;
- file_handle->fh_size = demo_file_size;
- file_handle->fh_sync = demo_file_sync;
- file_handle->fh_sync_nowait = NULL;
- file_handle->fh_truncate = demo_file_truncate;
- file_handle->fh_unmap = NULL;
- file_handle->fh_write = demo_file_write;
-
- TAILQ_INSERT_HEAD(&demo_ss->fileq, demo_fh, q);
- ++demo_ss->opened_unique_object_count;
-
- *file_handlep = file_handle;
-
- if (0) {
-err:
- free(demo_fh->buf);
- free(demo_fh);
- }
-
- unlock_storage_source(&demo_ss->lock);
- return (ret);
-}
-
-/*
- * demo_ss_location_handle --
- * Return a location handle from a location string.
- */
-static int
-demo_ss_location_handle(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- const char *location_info, WT_LOCATION_HANDLE **location_handlep)
-{
- DEMO_LOCATION_HANDLE *demo_loc;
- size_t len;
- int ret;
- char *p;
-
- (void)storage_source; /* Unused */
- (void)session; /* Unused */
-
- ret = 0;
- p = NULL;
- demo_loc = NULL;
-
- /*
- * We save the location string we're given followed by a slash delimiter. We won't allow slashes
- * in the location info parameter.
- */
- if (strchr(location_info, '/') != NULL)
- return (EINVAL);
- len = strlen(location_info) + 2;
- p = malloc(len);
- if (snprintf(p, len, "%s/", location_info) != (ssize_t)(len - 1)) {
- ret = ENOMEM;
- goto err;
- }
-
- /*
- * Now create the location handle and save the string.
- */
- if ((demo_loc = calloc(1, sizeof(DEMO_LOCATION_HANDLE))) == NULL) {
- ret = ENOMEM;
- goto err;
- }
-
- /* Initialize private information. */
- demo_loc->loc_string = p;
-
- /* Initialize public information. */
- demo_loc->iface.close = demo_location_close;
-
- *location_handlep = &demo_loc->iface;
-
-err:
- if (ret != 0) {
- free(p);
- free(demo_loc);
- return (ret);
- }
- return (0);
-}
-
-/*
- * demo_ss_location_list --
- * Return a list of object names for the given location.
- */
-static int
-demo_ss_location_list(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *prefix, uint32_t limit, char ***dirlistp,
- uint32_t *countp)
-{
- DEMO_FILE_HANDLE *demo_fh;
- DEMO_STORAGE_SOURCE *demo_ss;
- size_t location_len, prefix_len;
- uint32_t allocated, count;
- int ret = 0;
- const char *location;
- char **entries, *name;
- void *p;
-
- (void)session; /* Unused */
-
- demo_ss = (DEMO_STORAGE_SOURCE *)storage_source;
-
- *dirlistp = NULL;
- *countp = 0;
-
- entries = NULL;
- allocated = count = 0;
- location = LOCATION_STRING(location_handle);
- location_len = strlen(location);
- prefix_len = (prefix == NULL ? 0 : strlen(prefix));
-
- lock_storage_source(&demo_ss->lock);
- TAILQ_FOREACH (demo_fh, &demo_ss->fileq, q) {
- name = demo_fh->iface.name;
- if (strncmp(name, location, location_len) != 0)
- continue;
- name += location_len;
- if (prefix != NULL && strncmp(name, prefix, prefix_len) != 0)
- continue;
-
- /*
- * Increase the list size in groups of 10, it doesn't matter if the list is a bit longer
- * than necessary.
- */
- if (count >= allocated) {
- p = realloc(entries, (allocated + 10) * sizeof(*entries));
- if (p == NULL) {
- ret = ENOMEM;
- goto err;
- }
-
- entries = p;
- memset(entries + allocated * sizeof(*entries), 0, 10 * sizeof(*entries));
- allocated += 10;
- }
- entries[count++] = strdup(name);
- if (limit > 0 && count >= limit)
- break;
- }
-
- *dirlistp = entries;
- *countp = count;
-
-err:
- unlock_storage_source(&demo_ss->lock);
- if (ret == 0)
- return (0);
-
- if (entries != NULL) {
- while (count > 0)
- free(entries[--count]);
- free(entries);
- }
-
- return (ret);
-}
-
-/*
- * demo_ss_location_list_free --
- * Free memory allocated by demo_ss_location_list.
- */
-static int
-demo_ss_location_list_free(
- WT_STORAGE_SOURCE *storage_source, WT_SESSION *session, char **dirlist, uint32_t count)
-{
- (void)storage_source;
- (void)session;
-
- if (dirlist != NULL) {
- while (count > 0)
- free(dirlist[--count]);
- free(dirlist);
- }
- return (0);
-}
-
-/*
- * demo_ss_exist --
- * Return if the file exists.
- */
-static int
-demo_ss_exist(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *name, bool *existp)
-{
- DEMO_STORAGE_SOURCE *demo_ss;
-
- (void)session; /* Unused */
-
- demo_ss = (DEMO_STORAGE_SOURCE *)storage_source;
-
- lock_storage_source(&demo_ss->lock);
- *existp = demo_handle_search(storage_source, location_handle, name) != NULL;
- unlock_storage_source(&demo_ss->lock);
-
- return (0);
-}
-
-/*
- * demo_ss_remove --
- * POSIX remove.
- */
-static int
-demo_ss_remove(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *name, uint32_t flags)
-{
- DEMO_STORAGE_SOURCE *demo_ss;
- DEMO_FILE_HANDLE *demo_fh;
- int ret = 0;
-
- (void)session; /* Unused */
- (void)flags; /* Unused */
-
- demo_ss = (DEMO_STORAGE_SOURCE *)storage_source;
-
- ret = ENOENT;
- lock_storage_source(&demo_ss->lock);
- if ((demo_fh = demo_handle_search(storage_source, location_handle, name)) != NULL)
- ret = demo_handle_remove(session, demo_fh);
- unlock_storage_source(&demo_ss->lock);
-
- return (ret);
-}
-
-/*
- * demo_ss_size --
- * Get the size of a file in bytes, by file name.
- */
-static int
-demo_ss_size(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *name, wt_off_t *sizep)
-{
- DEMO_STORAGE_SOURCE *demo_ss;
- DEMO_FILE_HANDLE *demo_fh;
- int ret = 0;
-
- demo_ss = (DEMO_STORAGE_SOURCE *)storage_source;
-
- ret = ENOENT;
- lock_storage_source(&demo_ss->lock);
- if ((demo_fh = demo_handle_search(storage_source, location_handle, name)) != NULL)
- ret = demo_file_size((WT_FILE_HANDLE *)demo_fh, session, sizep);
- unlock_storage_source(&demo_ss->lock);
-
- return (ret);
-}
-
-/*
- * demo_ss_terminate --
- * Discard any resources on termination.
- */
-static int
-demo_ss_terminate(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session)
-{
- DEMO_FILE_HANDLE *demo_fh, *demo_fh_tmp;
- DEMO_STORAGE_SOURCE *demo_ss;
- int ret = 0, tret;
-
- demo_ss = (DEMO_STORAGE_SOURCE *)storage_source;
-
- TAILQ_FOREACH_SAFE(demo_fh, &demo_ss->fileq, q, demo_fh_tmp)
- if ((tret = demo_handle_remove(session, demo_fh)) != 0 && ret == 0)
- ret = tret;
-
- printf("Custom storage source\n");
- printf("\t%d unique object opens\n", demo_ss->opened_unique_object_count);
- printf("\t%d objects opened\n", demo_ss->opened_object_count);
- printf("\t%d objects closed\n", demo_ss->closed_object_count);
- printf("\t%d reads, %d writes\n", demo_ss->read_ops, demo_ss->write_ops);
-
- destroy_storage_source_lock(&demo_ss->lock);
- free(demo_ss);
-
- return (ret);
-}
-
-/*
- * demo_location_close --
- * Free a location handle created by ss_location_handle.
- */
-static int
-demo_location_close(WT_LOCATION_HANDLE *location_handle, WT_SESSION *session)
-{
- (void)session; /* Unused */
-
- free(LOCATION_STRING(location_handle));
- free(location_handle);
- return (0);
-}
-
-/*
- * demo_file_close --
- * ANSI C close.
- */
-static int
-demo_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *session)
-{
- DEMO_FILE_HANDLE *demo_fh;
- DEMO_STORAGE_SOURCE *demo_ss;
-
- (void)session; /* Unused */
-
- demo_fh = (DEMO_FILE_HANDLE *)file_handle;
- demo_ss = demo_fh->demo_ss;
-
- lock_storage_source(&demo_ss->lock);
- if (--demo_fh->ref == 0)
- ++demo_ss->closed_object_count;
- unlock_storage_source(&demo_ss->lock);
-
- return (0);
-}
-
-/*
- * demo_file_lock --
- * Lock/unlock a file.
- */
-static int
-demo_file_lock(WT_FILE_HANDLE *file_handle, WT_SESSION *session, bool lock)
-{
- /* Locks are always granted. */
- (void)file_handle; /* Unused */
- (void)session; /* Unused */
- (void)lock; /* Unused */
- return (0);
-}
-
-/*
- * demo_file_read --
- * POSIX pread.
- */
-static int
-demo_file_read(
- WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset, size_t len, void *buf)
-{
- DEMO_FILE_HANDLE *demo_fh;
- DEMO_STORAGE_SOURCE *demo_ss;
- WT_EXTENSION_API *wtext;
- size_t off;
- int ret = 0;
-
- demo_fh = (DEMO_FILE_HANDLE *)file_handle;
- demo_ss = demo_fh->demo_ss;
- wtext = demo_ss->wtext;
- off = (size_t)offset;
-
- lock_storage_source(&demo_ss->lock);
- ++demo_ss->read_ops;
- if (off < demo_fh->size) {
- if (len > demo_fh->size - off)
- len = demo_fh->size - off;
- memcpy(buf, (uint8_t *)demo_fh->buf + off, len);
- } else
- ret = EIO; /* EOF */
- unlock_storage_source(&demo_ss->lock);
- if (ret == 0)
- return (0);
-
- (void)wtext->err_printf(wtext, session,
- "%s: handle-read: failed to read %zu bytes at offset %zu: %s", demo_fh->iface.name, len, off,
- wtext->strerror(wtext, NULL, ret));
- return (ret);
-}
-
-/*
- * demo_file_size --
- * Get the size of a file in bytes, by file handle.
- */
-static int
-demo_file_size(WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t *sizep)
-{
- DEMO_FILE_HANDLE *demo_fh;
- DEMO_STORAGE_SOURCE *demo_ss;
-
- (void)session; /* Unused */
-
- demo_fh = (DEMO_FILE_HANDLE *)file_handle;
- demo_ss = demo_fh->demo_ss;
-
- lock_storage_source(&demo_ss->lock);
- *sizep = (wt_off_t)demo_fh->size;
- unlock_storage_source(&demo_ss->lock);
- return (0);
-}
-
-/*
- * demo_file_sync --
- * Ensure the content of the file is stable. This is a no-op in our memory backed storage
- * source.
- */
-static int
-demo_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *session)
-{
- (void)file_handle; /* Unused */
- (void)session; /* Unused */
-
- return (0);
-}
-
-/*
- * demo_buffer_resize --
- * Resize the write buffer.
- */
-static int
-demo_buffer_resize(WT_SESSION *session, DEMO_FILE_HANDLE *demo_fh, wt_off_t offset)
-{
- DEMO_STORAGE_SOURCE *demo_ss;
- WT_EXTENSION_API *wtext;
- size_t off;
- void *p;
-
- demo_ss = demo_fh->demo_ss;
- wtext = demo_ss->wtext;
- off = (size_t)offset;
-
- /* Grow the buffer as necessary and clear any new space in the file. */
- if (demo_fh->bufsize >= off)
- return (0);
-
- if ((p = realloc(demo_fh->buf, off)) == NULL) {
- (void)wtext->err_printf(wtext, session, "%s: failed to resize buffer", demo_fh->iface.name,
- wtext->strerror(wtext, NULL, ENOMEM));
- return (ENOMEM);
- }
- memset((uint8_t *)p + demo_fh->bufsize, 0, off - demo_fh->bufsize);
- demo_fh->buf = p;
- demo_fh->bufsize = off;
-
- return (0);
-}
-
-/*
- * demo_file_truncate --
- * POSIX ftruncate.
- */
-static int
-demo_file_truncate(WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset)
-{
- DEMO_FILE_HANDLE *demo_fh;
- DEMO_STORAGE_SOURCE *demo_ss;
- WT_EXTENSION_API *wtext;
-
- (void)file_handle; /* Unused */
- (void)session; /* Unused */
- (void)offset; /* Unused */
-
- demo_fh = (DEMO_FILE_HANDLE *)file_handle;
- demo_ss = demo_fh->demo_ss;
- wtext = demo_ss->wtext;
-
- (void)wtext->err_printf(wtext, session, "%s: truncate not supported in storage source",
- demo_fh->iface.name, wtext->strerror(wtext, NULL, ENOTSUP));
- return (ENOTSUP);
-}
-
-/*
- * demo_file_write --
- * POSIX pwrite.
- */
-static int
-demo_file_write(
- WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset, size_t len, const void *buf)
-{
- DEMO_FILE_HANDLE *demo_fh;
- DEMO_STORAGE_SOURCE *demo_ss;
- WT_EXTENSION_API *wtext;
- size_t off;
- int ret = 0;
-
- demo_fh = (DEMO_FILE_HANDLE *)file_handle;
- demo_ss = demo_fh->demo_ss;
- wtext = demo_ss->wtext;
- off = (size_t)offset;
-
- lock_storage_source(&demo_ss->lock);
- ++demo_ss->write_ops;
- if ((ret = demo_buffer_resize(
- session, demo_fh, offset + (wt_off_t)(len + DEMO_FILE_SIZE_INCREMENT))) == 0) {
- memcpy((uint8_t *)demo_fh->buf + off, buf, len);
- if (off + len > demo_fh->size)
- demo_fh->size = off + len;
- }
- unlock_storage_source(&demo_ss->lock);
- if (ret == 0)
- return (0);
-
- (void)wtext->err_printf(wtext, session,
- "%s: handle-write: failed to write %zu bytes at offset %zu: %s", demo_fh->iface.name, len,
- off, wtext->strerror(wtext, NULL, ret));
- return (ret);
-}
-
-/*
- * demo_handle_remove --
- * Destroy an in-memory file handle. Should only happen on remove or shutdown.
- */
-static int
-demo_handle_remove(WT_SESSION *session, DEMO_FILE_HANDLE *demo_fh)
-{
- DEMO_STORAGE_SOURCE *demo_ss;
- WT_EXTENSION_API *wtext;
-
- demo_ss = demo_fh->demo_ss;
- wtext = demo_ss->wtext;
-
- if (demo_fh->ref != 0) {
- (void)wtext->err_printf(wtext, session, "demo_handle_remove: %s: file is currently open",
- demo_fh->iface.name, wtext->strerror(wtext, NULL, EBUSY));
- return (EBUSY);
- }
-
- TAILQ_REMOVE(&demo_ss->fileq, demo_fh, q);
-
- /* Clean up private information. */
- free(demo_fh->buf);
-
- /* Clean up public information. */
- free(demo_fh->iface.name);
-
- free(demo_fh);
-
- return (0);
-}
-
-/*
- * demo_handle_search --
- * Return a matching handle, if one exists.
- */
-static DEMO_FILE_HANDLE *
-demo_handle_search(
- WT_STORAGE_SOURCE *storage_source, WT_LOCATION_HANDLE *location_handle, const char *name)
-{
- DEMO_FILE_HANDLE *demo_fh;
- DEMO_STORAGE_SOURCE *demo_ss;
- size_t len;
- const char *location;
-
- demo_ss = (DEMO_STORAGE_SOURCE *)storage_source;
- location = LOCATION_STRING(location_handle);
- len = strlen(location);
-
- TAILQ_FOREACH (demo_fh, &demo_ss->fileq, q)
- if (strncmp(demo_fh->iface.name, location, len) == 0 &&
- strcmp(&demo_fh->iface.name[len], name) == 0)
- break;
- return (demo_fh);
-}
-
-static const char *home;
-
-static int
-demo_test_create(WT_STORAGE_SOURCE *ss, WT_SESSION *session, WT_LOCATION_HANDLE *location,
- const char *objname, const char *content)
-{
- WT_FILE_HANDLE *fh;
- const char *op;
- size_t len;
- int ret, t_ret;
-
- fh = NULL;
- len = strlen(content) + 1;
- op = "open";
- if ((ret = ss->ss_open_object(ss, session, location, objname, WT_SS_OPEN_CREATE, &fh)) != 0)
- goto err;
- op = "write";
- if ((ret = fh->fh_write(fh, session, 0, len, content)) != 0)
- goto err;
-
-err:
- if (fh != NULL && (t_ret = fh->close(fh, session)) != 0 && ret == 0) {
- op = "close";
- ret = t_ret;
- }
- if (ret != 0)
- fprintf(stderr, "demo failed during %s: %s\n", op, wiredtiger_strerror(ret));
- else
- printf("demo succeeded create %s\n", objname);
-
- return (ret);
-}
-
-static int
-demo_test_read(WT_STORAGE_SOURCE *ss, WT_SESSION *session, WT_LOCATION_HANDLE *location,
- const char *objname, const char *content)
-{
- WT_FILE_HANDLE *fh;
- char buf[100];
- const char *op;
- size_t len;
- wt_off_t size;
- int ret, t_ret;
-
- fh = NULL;
- len = strlen(content) + 1;
-
- /* Set the op string so that on error we know what failed. */
- op = "open";
- if ((ret = ss->ss_open_object(ss, session, location, objname, WT_SS_OPEN_READONLY, &fh)) != 0)
- goto err;
- op = "size";
- if ((ret = fh->fh_size(fh, session, &size)) != 0)
- goto err;
- op = "size-compare";
- if ((size_t)size != len || (size_t)size > sizeof(buf)) {
- ret = EINVAL;
- goto err;
- }
- op = "read";
- if ((ret = fh->fh_read(fh, session, 0, len, buf)) != 0)
- goto err;
- op = "read-compare";
- if (strncmp(buf, content, len) != 0) {
- ret = EINVAL;
- goto err;
- }
-
-err:
- if (fh != NULL && (t_ret = fh->close(fh, session)) != 0 && ret == 0) {
- op = "close";
- ret = t_ret;
- }
- if (ret != 0)
- fprintf(stderr, "demo failed during %s: %s\n", op, wiredtiger_strerror(ret));
- else
- printf("demo succeeded read %s\n", objname);
-
- return (ret);
-}
-
-static int
-demo_test_list(WT_STORAGE_SOURCE *ss, WT_SESSION *session, const char *description,
- WT_LOCATION_HANDLE *location, const char *prefix, uint32_t limit, uint32_t expect)
-{
- char **obj_list;
- const char *op;
- uint32_t i, obj_count;
- int ret, t_ret;
-
- obj_list = NULL;
- /* Set the op string so that on error we know what failed. */
- op = "location_list";
- if ((ret = ss->ss_location_list(ss, session, location, prefix, limit, &obj_list, &obj_count)) !=
- 0)
- goto err;
- op = "location_list count";
- if (obj_count != expect) {
- ret = EINVAL;
- goto err;
- }
- printf("list: %s:\n", description);
- for (i = 0; i < obj_count; i++) {
- printf(" %s\n", obj_list[i]);
- }
-
-err:
- if (obj_list != NULL &&
- (t_ret = ss->ss_location_list_free(ss, session, obj_list, obj_count)) != 0 && ret == 0) {
- op = "location_list_free";
- ret = t_ret;
- }
- if (ret != 0)
- fprintf(stderr, "demo failed during %s: %s\n", op, wiredtiger_strerror(ret));
- else
- printf("demo succeeded location_list %s\n", description);
-
- return (ret);
-}
-
-static int
-demo_test_storage_source(WT_STORAGE_SOURCE *ss, WT_SESSION *session)
-{
- WT_LOCATION_HANDLE *location1, *location2;
- const char *op;
- int ret, t_ret;
- bool exist;
-
- location1 = location2 = NULL;
-
- /* Create two locations. Set the op string so that on error we know what failed. */
- op = "location_handle";
- if ((ret = ss->ss_location_handle(ss, session, "location-one", &location1)) != 0)
- goto err;
- if ((ret = ss->ss_location_handle(ss, session, "location-two", &location2)) != 0)
- goto err;
-
- /*
- * Create and existence checks. In location-one, create "A". In location-two, create "A", "B",
- * "AA". We'll do simple lists of both locations, and a list of location-two with a prefix.
- */
- op = "create/exist checks";
- if ((ret = demo_test_create(ss, session, location1, "A", "location-one-A")) != 0)
- goto err;
-
- if ((ret = ss->ss_exist(ss, session, location1, "A", &exist)) != 0)
- goto err;
- if (!exist) {
- fprintf(stderr, "Exist test failed for A\n");
- ret = EINVAL;
- goto err;
- }
- if ((ret = ss->ss_exist(ss, session, location2, "A", &exist)) != 0)
- goto err;
- if (exist) {
- fprintf(stderr, "Exist test failed for A in location2\n");
- ret = EINVAL;
- goto err;
- }
-
- if ((ret = demo_test_create(ss, session, location2, "A", "location-two-A")) != 0)
- goto err;
- if ((ret = demo_test_create(ss, session, location2, "B", "location-two-B")) != 0)
- goto err;
- if ((ret = demo_test_create(ss, session, location2, "AA", "location-two-AA")) != 0)
- goto err;
-
- /* Make sure the objects contain the expected data. */
- op = "read checks";
- if ((ret = demo_test_read(ss, session, location1, "A", "location-one-A")) != 0)
- goto err;
- if ((ret = demo_test_read(ss, session, location2, "A", "location-two-A")) != 0)
- goto err;
- if ((ret = demo_test_read(ss, session, location2, "B", "location-two-B")) != 0)
- goto err;
-
- /*
- * List the locations. For location-one, we expect just one object.
- */
- op = "list checks";
- if ((ret = demo_test_list(ss, session, "location1", location1, NULL, 0, 1)) != 0)
- goto err;
-
- /*
- * For location-two, we expect three objects.
- */
- if ((ret = demo_test_list(ss, session, "location2", location2, NULL, 0, 3)) != 0)
- goto err;
-
- /*
- * If we limit the number of objects received to 2, we should only see 2.
- */
- if ((ret = demo_test_list(ss, session, "location2, limit:2", location2, NULL, 2, 2)) != 0)
- goto err;
-
- /*
- * With a prefix of "A", and no limit, we'll see two objects.
- */
- if ((ret = demo_test_list(ss, session, "location2: A", location2, "A", 0, 2)) != 0)
- goto err;
-
- /*
- * With a prefix of "A", and a limit of one, we'll see just one object.
- */
- if ((ret = demo_test_list(ss, session, "location2: A, limit:1", location2, "A", 1, 1)) != 0)
- goto err;
-
-err:
- if (location1 != NULL && (t_ret = location1->close(location1, session)) != 0 && ret == 0)
- ret = t_ret;
- if (location2 != NULL && (t_ret = location2->close(location2, session)) != 0 && ret == 0)
- ret = t_ret;
- if (ret != 0)
- fprintf(stderr, "demo failed during %s: %s\n", op, wiredtiger_strerror(ret));
-
- return (ret);
-}
-
-int
-main(void)
-{
- WT_CONNECTION *conn;
- WT_SESSION *session;
- WT_STORAGE_SOURCE *storage_source;
- const char *open_config;
- int ret;
-
- fprintf(stderr, "ex_storage_source: starting\n");
- /*
- * Create a clean test directory for this run of the test program if the environment variable
- * isn't already set (as is done by make check).
- */
- if (getenv("WIREDTIGER_HOME") == NULL) {
- home = "WT_HOME";
- if ((ret = system("rm -rf WT_HOME && mkdir WT_HOME")) != 0) {
- fprintf(stderr, "system: directory recreate failed: %s\n", strerror(ret));
- return (EXIT_FAILURE);
- }
- } else
- home = NULL;
-
- /*! [WT_STORAGE_SOURCE register] */
- /*
- * Setup a configuration string that will load our custom storage source. Use the special local
- * extension to indicate that the entry point is in the same executable. Finally, pass in two
- * pieces of configuration information to our initialization function as the "config" value.
- */
- open_config =
- "create,log=(enabled=true),extensions=(local={entry=demo_storage_source_create,"
- "config={config_string=\"demo-storage-source\",config_value=37}})";
- /* Open a connection to the database, creating it if necessary. */
- if ((ret = wiredtiger_open(home, NULL, open_config, &conn)) != 0) {
- fprintf(stderr, "Error connecting to %s: %s\n", home == NULL ? "." : home,
- wiredtiger_strerror(ret));
- return (EXIT_FAILURE);
- }
- /*! [WT_STORAGE_SOURCE register] */
-
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) {
- fprintf(stderr, "WT_CONNECTION.open_session: %s\n", wiredtiger_strerror(ret));
- return (EXIT_FAILURE);
- }
-
- if ((ret = conn->get_storage_source(conn, "demo", &storage_source)) != 0) {
- fprintf(stderr, "WT_CONNECTION.get_storage_source: %s\n", wiredtiger_strerror(ret));
- return (EXIT_FAILURE);
- }
- /*
- * At the moment, the infrastructure within WiredTiger that would use the storage source
- * extension does not exist. So call the interface directly as a demonstration.
- */
- if ((ret = demo_test_storage_source(storage_source, session)) != 0) {
- fprintf(stderr, "storage source test failed: %s\n", wiredtiger_strerror(ret));
- return (EXIT_FAILURE);
- }
- if ((ret = conn->close(conn, NULL)) != 0) {
- fprintf(stderr, "Error closing connection to %s: %s\n", home == NULL ? "." : home,
- wiredtiger_strerror(ret));
- return (EXIT_FAILURE);
- }
-
- return (EXIT_SUCCESS);
-}
diff --git a/src/third_party/wiredtiger/ext/storage_sources/local_store/local_store.c b/src/third_party/wiredtiger/ext/storage_sources/local_store/local_store.c
index 0161ba928fb..00f65988843 100644
--- a/src/third_party/wiredtiger/ext/storage_sources/local_store/local_store.c
+++ b/src/third_party/wiredtiger/ext/storage_sources/local_store/local_store.c
@@ -1,5 +1,5 @@
/*-
- * Public Domain 2014-2020 MongoDB, Inc.
+ * Public Domain 2014-present MongoDB, Inc.
* Public Domain 2008-2014 WiredTiger, Inc.
*
* This is free and unencumbered software released into the public domain.
@@ -91,18 +91,34 @@ typedef struct {
} LOCAL_STORAGE;
+typedef struct {
+ /* Must come first - this is the interface for the file system we are implementing. */
+ WT_FILE_SYSTEM file_system;
+ LOCAL_STORAGE *local_storage;
+
+ /* This is WiredTiger's file system, it is used in implementing the local file system. */
+ WT_FILE_SYSTEM *wt_fs;
+
+ char *auth_token; /* Identifier for key management system */
+ char *bucket_dir; /* Directory that stands in for cloud storage bucket */
+ char *cache_dir; /* Directory for pre-flushed objects and cached objects */
+ char *fs_prefix; /* File system prefix, allowing for a "directory" within a bucket */
+} LOCAL_FILE_SYSTEM;
+
/*
* Indicates a object that has not yet been flushed.
*/
typedef struct local_flush_item {
- char *src_path; /* File name to copy from, object name derived from this */
- char *marker_path; /* Marker name to remove when done */
+ char *src_path; /* File name to copy from, object name and cache name derived from this */
/*
* These fields would be used in performing a flush.
*/
- char *auth_token; /* Identifier for key management system */
- char *bucket; /* Bucket name */
+ char *auth_token; /* Identifier for key management system */
+ char *bucket; /* Bucket name */
+ char *cache_dir; /* Cache directory */
+ char *fs_prefix; /* Prefix for file system */
+ WT_FS_OPEN_FILE_TYPE file_type; /* File type */
TAILQ_ENTRY(local_flush_item) q; /* Queue of items */
} LOCAL_FLUSH_ITEM;
@@ -111,65 +127,59 @@ typedef struct local_file_handle {
WT_FILE_HANDLE iface; /* Must come first */
LOCAL_STORAGE *local; /* Enclosing storage source */
- int fd; /* File descriptor */
+ WT_FILE_HANDLE *fh; /* File handle */
char *path; /* Path name of file */
- char *temp_path; /* Temporary (hidden) name, set if newly created */
LOCAL_FLUSH_ITEM *flush; /* Flush information, set if newly created */
TAILQ_ENTRY(local_file_handle) q; /* Queue of handles */
} LOCAL_FILE_HANDLE;
-typedef struct local_location {
- WT_LOCATION_HANDLE iface; /* Must come first */
-
- char *cluster_prefix; /* Cluster prefix */
- char *auth_token; /* Identifier for key management system */
- char *bucket; /* Actually a directory path for local implementation */
-} LOCAL_LOCATION;
-
/*
* Forward function declarations for internal functions
*/
-static int local_config_dup(
- LOCAL_STORAGE *, WT_SESSION *, WT_CONFIG_ITEM *, const char *, const char *, char **);
static int local_configure(LOCAL_STORAGE *, WT_CONFIG_ARG *);
static int local_configure_int(LOCAL_STORAGE *, WT_CONFIG_ARG *, const char *, uint32_t *);
static int local_delay(LOCAL_STORAGE *);
static int local_err(LOCAL_STORAGE *, WT_SESSION *, int, const char *, ...);
static void local_flush_free(LOCAL_FLUSH_ITEM *);
-static int local_location_decode(LOCAL_STORAGE *, WT_LOCATION_HANDLE *, char **, char **, char **);
-static int local_location_path(
- LOCAL_STORAGE *, WT_LOCATION_HANDLE *, const char *, const char *, char **);
+static int local_get_directory(const char *, ssize_t len, char **);
+static int local_location_path(WT_FILE_SYSTEM *, const char *, char **);
+static int local_writeable(LOCAL_STORAGE *, const char *name, bool *writeable);
/*
* Forward function declarations for storage source API implementation
*/
-static int local_exist(
- WT_STORAGE_SOURCE *, WT_SESSION *, WT_LOCATION_HANDLE *, const char *, bool *);
+static int local_exist(WT_FILE_SYSTEM *, WT_SESSION *, const char *, bool *);
+static int local_customize_file_system(WT_STORAGE_SOURCE *, WT_SESSION *, const char *,
+ const char *, const char *, const char *, WT_FILE_SYSTEM **);
static int local_flush(
- WT_STORAGE_SOURCE *, WT_SESSION *, WT_LOCATION_HANDLE *, const char *, const char *);
+ WT_STORAGE_SOURCE *, WT_SESSION *, WT_FILE_SYSTEM *, const char *, const char *);
static int local_flush_one(LOCAL_STORAGE *, WT_SESSION *, LOCAL_FLUSH_ITEM *);
-static int local_location_handle(
- WT_STORAGE_SOURCE *, WT_SESSION *, const char *, WT_LOCATION_HANDLE **);
-static int local_location_handle_close(WT_LOCATION_HANDLE *, WT_SESSION *);
-static int local_location_list(WT_STORAGE_SOURCE *, WT_SESSION *, WT_LOCATION_HANDLE *,
- const char *, uint32_t, char ***, uint32_t *);
-static int local_location_list_free(WT_STORAGE_SOURCE *, WT_SESSION *, char **, uint32_t);
-static int local_location_list_internal(WT_STORAGE_SOURCE *, WT_SESSION *, WT_LOCATION_HANDLE *,
- const char *, const char *, uint32_t, char ***, uint32_t *);
-static int local_open(WT_STORAGE_SOURCE *, WT_SESSION *, WT_LOCATION_HANDLE *, const char *,
- uint32_t, WT_FILE_HANDLE **);
-static int local_remove(
- WT_STORAGE_SOURCE *, WT_SESSION *, WT_LOCATION_HANDLE *, const char *, uint32_t);
-static int local_size(
- WT_STORAGE_SOURCE *, WT_SESSION *, WT_LOCATION_HANDLE *, const char *, wt_off_t *);
static int local_terminate(WT_STORAGE_SOURCE *, WT_SESSION *);
/*
+ * Forward function declarations for file system API implementation
+ */
+static int local_directory_list(
+ WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, char ***, uint32_t *);
+static int local_directory_list_add(LOCAL_STORAGE *, char ***, const char *, uint32_t, uint32_t *);
+static int local_directory_list_internal(
+ WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, uint32_t, char ***, uint32_t *);
+static int local_directory_list_single(
+ WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, char ***, uint32_t *);
+static int local_directory_list_free(WT_FILE_SYSTEM *, WT_SESSION *, char **, uint32_t);
+static int local_fs_terminate(WT_FILE_SYSTEM *, WT_SESSION *);
+static int local_open(WT_FILE_SYSTEM *, WT_SESSION *, const char *, WT_FS_OPEN_FILE_TYPE file_type,
+ uint32_t, WT_FILE_HANDLE **);
+static int local_remove(WT_FILE_SYSTEM *, WT_SESSION *, const char *, uint32_t);
+static int local_rename(WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, uint32_t);
+static int local_size(WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *);
+
+/*
* Forward function declarations for file handle API implementation
*/
static int local_file_close(WT_FILE_HANDLE *, WT_SESSION *);
-static int local_file_close_internal(LOCAL_STORAGE *, WT_SESSION *, LOCAL_FILE_HANDLE *, bool);
+static int local_file_close_internal(LOCAL_STORAGE *, WT_SESSION *, LOCAL_FILE_HANDLE *);
static int local_file_lock(WT_FILE_HANDLE *, WT_SESSION *, bool);
static int local_file_read(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, void *);
static int local_file_size(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t *);
@@ -180,8 +190,7 @@ static int local_file_write(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, co
* Report an error for a file operation. Note that local_err returns its third argument, and this
* macro will too.
*/
-#define local_file_err(fh, session, ret, str) \
- local_err((fh)->local, session, ret, "\"%s\": %s", fh->iface.name, str)
+#define FS2LOCAL(fs) (((LOCAL_FILE_SYSTEM *)(fs))->local_storage)
#define VERBOSE(local, ...) \
do { \
@@ -191,56 +200,6 @@ static int local_file_write(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, co
#define SHOW_STRING(s) (((s) == NULL) ? "<null>" : (s))
/*
- * Some files are created with "marker" prefixes in their name.
- *
- * When an object is created and the file handle has not been closed, the contents are written into
- * a file marked as temporary. When that file handle closes, the temporary file will be renamed to
- * its final name, without the marker. At that point the object becomes "visible" to other API
- * calls.
- *
- * Additionally, when an object is created, an empty marker file is created that indicates that the
- * file will need to be flushed (transferred to the cloud). That empty marker file is removed when
- * the object has been flushed. We already track in memory what objects need to be flushed, but
- * having a file representation gives us a record of what needs to be done if we were to crash.
- */
-static const char *MARKER_NEED_FLUSH = "FLUSH_";
-static const char *MARKER_TEMPORARY = "TEMP_";
-
-/*
- * local_config_dup --
- * Make a copy of a configuration string as an allocated C string.
- */
-static int
-local_config_dup(LOCAL_STORAGE *local, WT_SESSION *session, WT_CONFIG_ITEM *v, const char *suffix,
- const char *disallowed, char **result)
-{
- size_t len;
- int ret;
- char *p;
-
- if (suffix == NULL)
- suffix = "";
- len = v->len + strlen(suffix) + 1;
- if ((p = malloc(len)) == NULL)
- return (local_err(local, session, ENOMEM, "configuration parsing"));
- (void)snprintf(p, len, "%.*s", (int)v->len, v->str);
-
- /*
- * Check for illegal characters before adding the suffix, as the suffix may contain such
- * characters.
- */
- if (disallowed != NULL && strstr(p, disallowed) != NULL) {
- ret = local_err(local, session, EINVAL,
- "characters \"%s\" disallowed in configuration string \"%s\"", disallowed, p);
- free(p);
- return (ret);
- }
- (void)strcat(p, suffix);
- *result = p;
- return (0);
-}
-
-/*
* local_configure
* Parse the configuration for the keys we care about.
*/
@@ -346,90 +305,223 @@ local_flush_free(LOCAL_FLUSH_ITEM *flush)
if (flush != NULL) {
free(flush->auth_token);
free(flush->bucket);
- free(flush->marker_path);
+ free(flush->cache_dir);
+ free(flush->fs_prefix);
free(flush->src_path);
free(flush);
}
}
/*
- * local_location_decode --
- * Break down a location into component parts.
+ * local_get_directory --
+ * Return a copy of a directory name after verifying that it is a directory.
*/
static int
-local_location_decode(LOCAL_STORAGE *local, WT_LOCATION_HANDLE *location_handle, char **bucket_name,
- char **cluster_prefix, char **auth_token)
+local_get_directory(const char *s, ssize_t len, char **copy)
{
- LOCAL_LOCATION *location;
- char *p;
+ struct stat sb;
+ int ret;
+ char *dirname;
+
+ if (len == -1)
+ len = (ssize_t)strlen(s);
+ dirname = strndup(s, (size_t)len + 1); /* Room for null */
+ if (dirname == NULL)
+ return (ENOMEM);
+ ret = stat(dirname, &sb);
+ if (ret != 0)
+ ret = errno;
+ else if ((sb.st_mode & S_IFMT) != S_IFDIR)
+ ret = EINVAL;
+ if (ret != 0)
+ free(dirname);
+ else
+ *copy = dirname;
+ return (ret);
+}
- location = (LOCAL_LOCATION *)location_handle;
+/*
+ * local_writeable --
+ * Check if a file can be written, or equivalently, check to see that it has not been flushed.
+ * This will be true if it is in the regular file system (not one managed by local_store).
+ */
+static int
+local_writeable(LOCAL_STORAGE *local, const char *name, bool *writeablep)
+{
+ struct stat sb;
+ int ret;
- if (bucket_name != NULL) {
- if ((p = strdup(location->bucket)) == NULL)
- return (local_err(local, NULL, ENOMEM, "local_location_decode"));
- *bucket_name = p;
- }
- if (cluster_prefix != NULL) {
- if ((p = strdup(location->cluster_prefix)) == NULL)
- return (local_err(local, NULL, ENOMEM, "local_location_decode"));
- *cluster_prefix = p;
- }
- if (auth_token != NULL) {
- if ((p = strdup(location->auth_token)) == NULL)
- return (local_err(local, NULL, ENOMEM, "local_location_decode"));
- *auth_token = p;
- }
+ ret = 0;
+ *writeablep = false;
- return (0);
+ if (stat(name, &sb) == 0)
+ *writeablep = true;
+ else if (errno != ENOENT)
+ ret = local_err(local, NULL, errno, "%s: stat", name);
+
+ return (ret);
}
/*
* local_location_path --
- * Construct a pathname from the location and local name.
+ * Construct a pathname from the file system and local name.
*/
int
-local_location_path(LOCAL_STORAGE *local, WT_LOCATION_HANDLE *location_handle, const char *name,
- const char *marker, char **pathp)
+local_location_path(WT_FILE_SYSTEM *file_system, const char *name, char **pathp)
{
- LOCAL_LOCATION *location;
+ LOCAL_FILE_SYSTEM *local_fs;
size_t len;
int ret;
char *p;
ret = 0;
- location = (LOCAL_LOCATION *)location_handle;
-
- /* If this is a marker file, it will be hidden from all namespaces. */
- if (marker == NULL)
- marker = "";
- len = strlen(location->bucket) + strlen(marker) + strlen(location->cluster_prefix) +
- strlen(name) + 2;
+ local_fs = (LOCAL_FILE_SYSTEM *)file_system;
+
+ /* Skip over "./" and variations (".//", ".///./././//") at the beginning of the name. */
+ while (*name == '.') {
+ if (name[1] != '/')
+ break;
+ name += 2;
+ while (*name == '/')
+ name++;
+ }
+ len = strlen(local_fs->cache_dir) + strlen(local_fs->fs_prefix) + strlen(name) + 2;
if ((p = malloc(len)) == NULL)
- return (local_err(local, NULL, ENOMEM, "local_location_path"));
- snprintf(p, len, "%s/%s%s%s", location->bucket, marker, location->cluster_prefix, name);
+ return (local_err(FS2LOCAL(file_system), NULL, ENOMEM, "local_location_path"));
+ snprintf(p, len, "%s/%s%s", local_fs->cache_dir, local_fs->fs_prefix, name);
*pathp = p;
return (ret);
}
/*
+ * local_customize_file_system --
+ * Return a customized file system to access the local storage source objects.
+ */
+static int
+local_customize_file_system(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
+ const char *bucket_name, const char *prefix, const char *auth_token, const char *config,
+ WT_FILE_SYSTEM **file_systemp)
+{
+ LOCAL_STORAGE *local;
+ LOCAL_FILE_SYSTEM *fs;
+ WT_CONFIG_ITEM cachedir;
+ WT_FILE_SYSTEM *wt_fs;
+ int ret;
+ const char *p;
+ char buf[1024];
+
+ local = (LOCAL_STORAGE *)storage_source;
+
+ fs = NULL;
+ ret = 0;
+
+ /* Parse configuration string. */
+ if ((ret = local->wt_api->config_get_string(
+ local->wt_api, session, config, "cache_directory", &cachedir)) != 0) {
+ if (ret == WT_NOTFOUND) {
+ ret = 0;
+ cachedir.len = 0;
+ } else {
+ ret = local_err(local, session, ret, "customize_file_system: config parsing");
+ goto err;
+ }
+ }
+
+ if ((ret = local->wt_api->file_system_get(local->wt_api, session, &wt_fs)) != 0) {
+ ret =
+ local_err(local, session, ret, "local_file_system: cannot get WiredTiger file system");
+ goto err;
+ }
+ if ((fs = calloc(1, sizeof(LOCAL_FILE_SYSTEM))) == NULL) {
+ ret = local_err(local, session, ENOMEM, "local_file_system");
+ goto err;
+ }
+ fs->local_storage = local;
+ fs->wt_fs = wt_fs;
+
+ if ((fs->auth_token = strdup(auth_token)) == NULL) {
+ ret = local_err(local, session, ENOMEM, "local_file_system.auth_token");
+ goto err;
+ }
+ /*
+ * Get the bucket directory and the cache directory.
+ */
+ if ((ret = local_get_directory(bucket_name, -1, &fs->bucket_dir)) != 0) {
+ ret = local_err(local, session, ret, "%s: bucket directory", bucket_name);
+ goto err;
+ }
+
+ /*
+ * The default cache directory is named "cache-<name>", where name is the last component of the
+ * bucket name's path. We'll create it if it doesn't exist.
+ */
+ if (cachedir.len == 0) {
+ if ((p = strrchr(bucket_name, '/')) != NULL)
+ p++;
+ else
+ p = bucket_name;
+ snprintf(buf, sizeof(buf), "cache-%s", p);
+ cachedir.str = buf;
+ cachedir.len = strlen(buf);
+ (void)mkdir(buf, 0777);
+ }
+ if ((ret = local_get_directory(cachedir.str, (ssize_t)cachedir.len, &fs->cache_dir)) != 0) {
+ ret =
+ local_err(local, session, ret, "%*s: cache directory", (int)cachedir.len, cachedir.str);
+ goto err;
+ }
+ if ((fs->fs_prefix = strdup(prefix)) == NULL) {
+ ret = local_err(local, session, ENOMEM, "local_file_system.prefix");
+ goto err;
+ }
+ fs->file_system.fs_directory_list = local_directory_list;
+ fs->file_system.fs_directory_list_single = local_directory_list_single;
+ fs->file_system.fs_directory_list_free = local_directory_list_free;
+ fs->file_system.fs_exist = local_exist;
+ fs->file_system.fs_open_file = local_open;
+ fs->file_system.fs_remove = local_remove;
+ fs->file_system.fs_rename = local_rename;
+ fs->file_system.fs_size = local_size;
+ fs->file_system.terminate = local_fs_terminate;
+
+err:
+ if (ret == 0)
+ *file_systemp = &fs->file_system;
+ else if (fs != NULL) {
+ free(fs->auth_token);
+ free(fs->bucket_dir);
+ free(fs->cache_dir);
+ free(fs->fs_prefix);
+ free(fs);
+ }
+ return (ret);
+}
+
+/*
* local_exist --
* Return if the file exists.
*/
static int
-local_exist(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *name, bool *existp)
+local_exist(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, bool *existp)
{
struct stat sb;
LOCAL_STORAGE *local;
int ret;
char *path;
- local = (LOCAL_STORAGE *)storage_source;
+ local = FS2LOCAL(file_system);
path = NULL;
+ /* If the file exists directly in the file system, it's not yet flushed, and we're done. */
+ ret = stat(name, &sb);
+ if (ret == 0) {
+ *existp = true;
+ return (0);
+ } else if (errno != ENOENT)
+ ret = local_err(local, session, errno, "%s: ss_exist stat", path);
+
local->op_count++;
- if ((ret = local_location_path(local, location_handle, name, NULL, &path)) != 0)
+ if ((ret = local_location_path(file_system, name, &path)) != 0)
goto err;
ret = stat(path, &sb);
@@ -451,8 +543,8 @@ err:
* Return when the files have been flushed.
*/
static int
-local_flush(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *name, const char *config)
+local_flush(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session, WT_FILE_SYSTEM *file_system,
+ const char *name, const char *config)
{
LOCAL_STORAGE *local;
LOCAL_FLUSH_ITEM *flush, *safe_flush;
@@ -469,13 +561,12 @@ local_flush(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
local = (LOCAL_STORAGE *)storage_source;
match = NULL;
- if (location_handle == NULL && name != NULL)
- return local_err(local, session, EINVAL, "flush: cannot specify name without location");
+ if (file_system == NULL && name != NULL)
+ return local_err(local, session, EINVAL, "flush: cannot specify name without file system");
local->op_count++;
- if (location_handle != NULL) {
- if ((ret = local_location_path(
- local, location_handle, name == NULL ? "" : name, NULL, &match)) != 0)
+ if (file_system != NULL) {
+ if ((ret = local_location_path(file_system, name == NULL ? "" : name, &match)) != 0)
goto err;
}
VERBOSE(local, "Flush: match=%s\n", SHOW_STRING(match));
@@ -496,7 +587,7 @@ local_flush(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
/*
* We must match against the bucket and the name if given.
* Our match string is of the form:
- * <bucket_name>/<cluster_prefix><name>
+ * <bucket_name>/<fs_prefix><name>
*
* If name is given, we must match the entire path.
* If name is not given, we must match up to the beginning
@@ -536,145 +627,112 @@ err:
static int
local_flush_one(LOCAL_STORAGE *local, WT_SESSION *session, LOCAL_FLUSH_ITEM *flush)
{
- int ret;
+ WT_FILE_HANDLE *dest, *src;
+ WT_FILE_SYSTEM *wt_fs;
+ wt_off_t copy_size, file_size, left;
+ int ret, t_ret;
char *object_name;
+ char buffer[1024 * 64];
+ char dest_path[1024];
+ ssize_t pos;
ret = 0;
+ src = dest = NULL;
object_name = strrchr(flush->src_path, '/');
- if (object_name == NULL)
+ if (object_name == NULL) {
ret = local_err(local, session, errno, "%s: unexpected src path", flush->src_path);
- else {
- object_name++;
-
- /* Here's where we would copy the file to a cloud object. */
- VERBOSE(local, "Flush object: from=%s, bucket=%s, object=%s, auth_token=%s, \n",
- flush->src_path, flush->bucket, object_name, flush->auth_token);
- local->object_flushes++;
-
- if ((ret = local_delay(local)) != 0)
- return (ret);
+ goto err;
}
- /* When we're done with flushing this file, remove the flush marker file. */
- if (ret == 0 && (ret = unlink(flush->marker_path)) < 0)
- ret = local_err(
- local, session, errno, "%s: unlink flush marker file failed", flush->marker_path);
-
- return (ret);
-}
-
-/*
- * local_location_handle --
- * Return a location handle from a location string.
- */
-static int
-local_location_handle(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- const char *location_info, WT_LOCATION_HANDLE **location_handlep)
-{
- LOCAL_LOCATION *location;
- LOCAL_STORAGE *local;
- WT_CONFIG_ITEM value;
- WT_CONFIG_PARSER *parser;
- WT_EXTENSION_API *wt_api;
- int ret, t_ret;
+ object_name++;
- ret = 0;
- location = NULL;
- local = (LOCAL_STORAGE *)storage_source;
- wt_api = local->wt_api;
- parser = NULL;
-
- local->op_count++;
- if ((ret = wt_api->config_parser_open(
- wt_api, session, location_info, strlen(location_info), &parser)) != 0)
- return (ret);
+ /*
+ * Here's where we flush the file to the cloud. This "local" implementation copies the file to
+ * the bucket directory.
+ */
+ VERBOSE(local, "Flush object: from=%s, bucket=%s, object=%s, auth_token=%s, \n",
+ flush->src_path, flush->bucket, object_name, flush->auth_token);
- if ((location = calloc(1, sizeof(*location))) == NULL) {
- ret = ENOMEM;
+ if ((ret = local_delay(local)) != 0)
goto err;
- }
- if ((ret = parser->get(parser, "bucket", &value)) != 0) {
- if (ret == WT_NOTFOUND)
- ret = local_err(local, session, EINVAL, "ss_location_handle: missing bucket parameter");
- goto err;
- }
- if (value.len == 0) {
+ if ((ret = local->wt_api->file_system_get(local->wt_api, session, &wt_fs)) != 0) {
ret =
- local_err(local, session, EINVAL, "ss_location_handle: bucket_name must be non-empty");
+ local_err(local, session, ret, "local_file_system: cannot get WiredTiger file system");
goto err;
}
- if ((ret = local_config_dup(local, session, &value, NULL, NULL, &location->bucket)) != 0)
- goto err;
+ snprintf(dest_path, sizeof(dest_path), "%s/%s", flush->bucket, object_name);
- if ((ret = parser->get(parser, "cluster", &value)) != 0) {
- if (ret == WT_NOTFOUND)
- ret =
- local_err(local, session, EINVAL, "ss_location_handle: missing cluster parameter");
+ if ((ret = wt_fs->fs_open_file(
+ wt_fs, session, flush->src_path, flush->file_type, WT_FS_OPEN_READONLY, &src)) != 0) {
+ ret = local_err(local, session, ret, "%s: cannot open for read", flush->src_path);
goto err;
}
- if ((ret = local_config_dup(local, session, &value, "_", "_/", &location->cluster_prefix)) != 0)
- goto err;
- if ((ret = parser->get(parser, "auth_token", &value)) != 0) {
- if (ret == WT_NOTFOUND)
- ret =
- local_err(local, session, EINVAL, "ss_location_handle: missing auth_token parameter");
+ if ((ret = wt_fs->fs_open_file(
+ wt_fs, session, dest_path, flush->file_type, WT_FS_OPEN_CREATE, &dest)) != 0) {
+ ret = local_err(local, session, ret, "%s: cannot create", dest_path);
goto err;
}
- if ((ret = local_config_dup(local, session, &value, NULL, NULL, &location->auth_token)) != 0)
+ if ((ret = wt_fs->fs_size(wt_fs, session, flush->src_path, &file_size)) != 0) {
+ ret = local_err(local, session, ret, "%s: cannot get size", flush->src_path);
goto err;
-
- VERBOSE(local, "Location: (bucket=%s,cluster=%s,auth_token=%s)\n",
- SHOW_STRING(location->bucket), SHOW_STRING(location->cluster_prefix),
- SHOW_STRING(location->auth_token));
-
- location->iface.close = local_location_handle_close;
- *location_handlep = &location->iface;
-
- if (0) {
-err:
- (void)local_location_handle_close(&location->iface, session);
}
+ for (pos = 0, left = file_size; left > 0; pos += copy_size, left -= copy_size) {
+ copy_size = left < (wt_off_t)sizeof(buffer) ? left : (wt_off_t)sizeof(buffer);
+ if ((ret = src->fh_read(src, session, pos, (size_t)copy_size, buffer)) != 0) {
+ ret = local_err(local, session, ret, "%s: cannot read", flush->src_path);
+ goto err;
+ }
+ if ((ret = dest->fh_write(dest, session, pos, (size_t)copy_size, buffer)) != 0) {
+ ret = local_err(local, session, ret, "%s: cannot write", dest_path);
+ goto err;
+ }
+ }
+ if ((ret = dest->fh_sync(dest, session)) != 0) {
+ ret = local_err(local, session, ret, "%s: cannot sync", dest_path);
+ goto err;
+ }
+ local->object_flushes++;
- if (parser != NULL)
- if ((t_ret = parser->close(parser)) != 0 && ret == 0)
+err:
+ /* When we're done with flushing this file, set the file to readonly. */
+ if (ret == 0 && (ret = chmod(flush->src_path, 0444)) < 0)
+ ret = local_err(local, session, errno, "%s: chmod flushed file failed", flush->src_path);
+ if (src != NULL && (t_ret = src->close(src, session)) != 0)
+ if (ret == 0)
+ ret = t_ret;
+ if (dest != NULL && (t_ret = dest->close(dest, session)) != 0)
+ if (ret == 0)
ret = t_ret;
return (ret);
}
/*
- * local_location_handle_close --
- * Free a location handle created by ss_location_handle.
+ * local_directory_list --
+ * Return a list of object names for the given location.
*/
static int
-local_location_handle_close(WT_LOCATION_HANDLE *location_handle, WT_SESSION *session)
+local_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *directory,
+ const char *prefix, char ***dirlistp, uint32_t *countp)
{
- LOCAL_LOCATION *location;
-
- (void)session; /* Unused */
-
- location = (LOCAL_LOCATION *)location_handle;
- free(location->auth_token);
- free(location->bucket);
- free(location->cluster_prefix);
- free(location);
- return (0);
+ FS2LOCAL(file_system)->op_count++;
+ return (
+ local_directory_list_internal(file_system, session, directory, prefix, 0, dirlistp, countp));
}
/*
- * local_location_list --
- * Return a list of object names for the given location.
+ * local_directory_list_single --
+ * Return a single file name for the given location.
*/
static int
-local_location_list(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *prefix, uint32_t limit, char ***dirlistp,
- uint32_t *countp)
+local_directory_list_single(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *directory,
+ const char *prefix, char ***dirlistp, uint32_t *countp)
{
- ((LOCAL_STORAGE *)storage_source)->op_count++;
- return (local_location_list_internal(
- storage_source, session, location_handle, NULL, prefix, limit, dirlistp, countp));
+ FS2LOCAL(file_system)->op_count++;
+ return (
+ local_directory_list_internal(file_system, session, directory, prefix, 1, dirlistp, countp));
}
/*
@@ -682,12 +740,12 @@ local_location_list(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
* Free memory allocated by local_location_list.
*/
static int
-local_location_list_free(
- WT_STORAGE_SOURCE *storage_source, WT_SESSION *session, char **dirlist, uint32_t count)
+local_directory_list_free(
+ WT_FILE_SYSTEM *file_system, WT_SESSION *session, char **dirlist, uint32_t count)
{
(void)session;
- ((LOCAL_STORAGE *)storage_source)->op_count++;
+ FS2LOCAL(file_system)->op_count++;
if (dirlist != NULL) {
while (count > 0)
free(dirlist[--count]);
@@ -697,82 +755,120 @@ local_location_list_free(
}
/*
+ * local_directory_list_add --
+ * Add an entry to the directory list, growing as needed.
+ */
+static int
+local_directory_list_add(
+ LOCAL_STORAGE *local, char ***entriesp, const char *s, uint32_t count, uint32_t *allocatedp)
+{
+ size_t alloc_sz;
+ char **entries, **new_entries;
+
+ entries = *entriesp;
+ if (count >= *allocatedp) {
+ *allocatedp += 10;
+ alloc_sz = sizeof(char *) * (*allocatedp);
+ if ((new_entries = realloc(entries, alloc_sz)) == NULL)
+ return (local_err(local, NULL, ENOMEM, "cannot grow directory list"));
+ entries = new_entries;
+ *entriesp = entries;
+ }
+ if ((entries[count] = strdup(s)) == NULL)
+ return (local_err(local, NULL, ENOMEM, "cannot grow directory list"));
+
+ return (0);
+}
+
+/*
* local_location_list_internal --
- * Return a list of object names for the given location, matching the given marker if needed.
+ * Return a list of object names for the given location.
*/
static int
-local_location_list_internal(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *marker, const char *prefix, uint32_t limit,
- char ***dirlistp, uint32_t *countp)
+local_directory_list_internal(WT_FILE_SYSTEM *file_system, WT_SESSION *session,
+ const char *directory, const char *prefix, uint32_t limit, char ***dirlistp, uint32_t *countp)
{
struct dirent *dp;
DIR *dirp;
- LOCAL_LOCATION *location;
+ LOCAL_FILE_SYSTEM *local_fs;
+ LOCAL_FLUSH_ITEM *flush;
LOCAL_STORAGE *local;
- size_t alloc_sz, cluster_len, marker_len, prefix_len;
+ size_t dir_len, fs_prefix_len, prefix_len;
uint32_t allocated, count;
int ret, t_ret;
- char **entries, **new_entries;
+ char **entries;
const char *basename;
- local = (LOCAL_STORAGE *)storage_source;
- location = (LOCAL_LOCATION *)location_handle;
+ local_fs = (LOCAL_FILE_SYSTEM *)file_system;
+ local = local_fs->local_storage;
entries = NULL;
allocated = count = 0;
- cluster_len = strlen(location->cluster_prefix);
- marker_len = (marker == NULL ? 0 : strlen(marker));
+ fs_prefix_len = strlen(local_fs->fs_prefix);
+ dir_len = (directory == NULL ? 0 : strlen(directory));
prefix_len = (prefix == NULL ? 0 : strlen(prefix));
ret = 0;
*dirlistp = NULL;
*countp = 0;
- if ((dirp = opendir(location->bucket)) == NULL) {
+ if ((dirp = opendir(local_fs->cache_dir)) == NULL) {
ret = errno;
if (ret == 0)
ret = EINVAL;
- return (local_err(local, session, ret, "%s: ss_location_list: opendir", location->bucket));
+ return (
+ local_err(local, session, ret, "%s: ss_directory_list: opendir", local_fs->cache_dir));
}
+ /*
+ * We list items in the cache directory as well as items in the "to be flushed" list.
+ */
for (count = 0; (dp = readdir(dirp)) != NULL && (limit == 0 || count < limit);) {
/* Skip . and .. */
basename = dp->d_name;
if (strcmp(basename, ".") == 0 || strcmp(basename, "..") == 0)
continue;
- if (marker_len == 0) {
- /* Skip over any marker files. */
- if (strncmp(basename, MARKER_TEMPORARY, strlen(MARKER_TEMPORARY)) == 0 ||
- strncmp(basename, MARKER_NEED_FLUSH, strlen(MARKER_NEED_FLUSH)) == 0)
- continue;
- } else {
- /* Match only the indicated marker files. */
- if (strncmp(basename, marker, marker_len) != 0)
- continue;
- basename += marker_len;
- }
- /* Skip files not associated with our cluster. */
- if (strncmp(basename, location->cluster_prefix, cluster_len) != 0)
+ /* Match only the indicated directory files. */
+ if (directory != NULL && strncmp(basename, directory, dir_len) != 0)
+ continue;
+ basename += dir_len;
+
+ /* Skip files not associated with our file system prefix. */
+ if (strncmp(basename, local_fs->fs_prefix, fs_prefix_len) != 0)
continue;
- basename += cluster_len;
+ basename += fs_prefix_len;
/* The list of files is optionally filtered by a prefix. */
if (prefix != NULL && strncmp(basename, prefix, prefix_len) != 0)
continue;
- if (count >= allocated) {
- allocated += 10;
- alloc_sz = sizeof(char *) * allocated;
- if ((new_entries = realloc(entries, alloc_sz)) == NULL) {
- ret = ENOMEM;
- goto err;
- }
- entries = new_entries;
- }
- if ((entries[count] = strdup(basename)) == NULL) {
- ret = ENOMEM;
+ if ((ret = local_directory_list_add(local, &entries, basename, count, &allocated)) != 0)
+ goto err;
+ count++;
+ }
+
+ TAILQ_FOREACH (flush, &local->flushq, q) {
+ if (limit != 0 && count >= limit)
+ break;
+
+ /* Skip files not associated with this file system. */
+ if (strcmp(local_fs->bucket_dir, flush->bucket) != 0 ||
+ strcmp(local_fs->cache_dir, flush->cache_dir) != 0 ||
+ strcmp(local_fs->fs_prefix, flush->fs_prefix) != 0)
+ continue;
+
+ basename = strrchr(flush->src_path, '/');
+ if (basename == NULL)
+ basename = flush->src_path;
+ else
+ basename++;
+
+ /* The list of files is optionally filtered by a prefix. */
+ if (prefix != NULL && strncmp(basename, prefix, prefix_len) != 0)
+ continue;
+
+ if ((ret = local_directory_list_add(local, &entries, basename, count, &allocated)) != 0)
goto err;
- }
count++;
}
@@ -782,7 +878,7 @@ local_location_list_internal(WT_STORAGE_SOURCE *storage_source, WT_SESSION *sess
err:
if (closedir(dirp) != 0) {
t_ret =
- local_err(local, session, errno, "%s: ss_location_list: closedir", location->bucket);
+ local_err(local, session, errno, "%s: ss_directory_list: closedir", local_fs->cache_dir);
if (ret == 0)
ret = t_ret;
}
@@ -798,88 +894,141 @@ err:
}
/*
+ * local_fs_terminate --
+ * Discard any resources on termination of the file system
+ */
+static int
+local_fs_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *session)
+{
+ LOCAL_FILE_SYSTEM *local_fs;
+
+ (void)session; /* unused */
+
+ local_fs = (LOCAL_FILE_SYSTEM *)file_system;
+ FS2LOCAL(file_system)->op_count++;
+ free(local_fs->auth_token);
+ free(local_fs->bucket_dir);
+ free(local_fs->cache_dir);
+ free(local_fs->fs_prefix);
+ free(file_system);
+
+ return (0);
+}
+
+/*
* local_open --
* fopen for our local storage source
*/
static int
-local_open(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *name, uint32_t flags,
- WT_FILE_HANDLE **file_handlep)
+local_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name,
+ WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, WT_FILE_HANDLE **file_handlep)
{
LOCAL_FILE_HANDLE *local_fh;
+ LOCAL_FILE_SYSTEM *local_fs;
LOCAL_FLUSH_ITEM *flush;
LOCAL_STORAGE *local;
- WT_FILE_HANDLE *file_handle;
- int fd, oflags, ret;
- char *open_name;
+ WT_FILE_HANDLE *file_handle, *wt_fh;
+ WT_FILE_SYSTEM *wt_fs;
+ struct stat sb;
+ int ret;
+ bool create, exists;
(void)flags; /* Unused */
- fd = oflags = ret = 0;
+ ret = 0;
*file_handlep = NULL;
local_fh = NULL;
- local = (LOCAL_STORAGE *)storage_source;
+ local_fs = (LOCAL_FILE_SYSTEM *)file_system;
+ local = local_fs->local_storage;
+ wt_fs = local_fs->wt_fs;
- local->op_count++;
- if (flags == WT_SS_OPEN_CREATE)
- oflags = O_WRONLY | O_CREAT;
- else if (flags == WT_SS_OPEN_READONLY)
- oflags = O_RDONLY;
- else {
- ret = local_err(local, session, EINVAL, "open: invalid flags: 0x%x", flags);
- goto err;
- }
+ /*
+ * We expect that the local file system will be used narrowly, like when creating or opening a
+ * data file or turtle file. It would be unexpected to try to open a non-data file (like a log
+ * file) in that narrow part of code, so we make it an error here.
+ *
+ * Relaxing this constraint to allow opening of, say, log files, would be straightforward - we
+ * would not translate the path or do any tracking for flushing. But there's a catch. Other
+ * parts of the API, like remove and rename, have no flag indicating that they are operating on
+ * a log file, so we wouldn't know whether to do path translation. Of course, we could peek at
+ * the name, but that would be bad form.
+ */
+ if (file_type != WT_FS_OPEN_FILE_TYPE_DATA && file_type != WT_FS_OPEN_FILE_TYPE_REGULAR)
+ return (local_err(
+ local, session, EINVAL, "%s: open: only data file and regular types supported", name));
/* Create a new handle. */
if ((local_fh = calloc(1, sizeof(LOCAL_FILE_HANDLE))) == NULL) {
ret = ENOMEM;
goto err;
}
- if ((ret = local_location_path(local, location_handle, name, NULL, &local_fh->path)) != 0)
- goto err;
- if (flags == WT_SS_OPEN_CREATE) {
+ create = ((flags & WT_FS_OPEN_CREATE) != 0);
+ if (!create) {
+ ret = stat(name, &sb);
+ if (ret != 0 && errno != ENOENT) {
+ ret = local_err(local, session, errno, "%s: local_open stat", name);
+ goto err;
+ }
+ exists = (ret == 0);
+ } else
+ exists = false;
+ if (create || exists) {
+ /* The file has not been flushed, use the file directly in the file system. */
+ if ((local_fh->path = strdup(name)) == NULL) {
+ ret = local_err(local, session, ENOMEM, "local_open");
+ goto err;
+ }
+ } else {
+ if ((ret = local_location_path(file_system, name, &local_fh->path)) != 0)
+ goto err;
+ ret = stat(local_fh->path, &sb);
+ if (ret != 0 && errno != ENOENT) {
+ ret = local_err(local, session, errno, "%s: local_open stat", local_fh->path);
+ goto err;
+ }
+ exists = (ret == 0);
+ }
+ /*
+ * TODO: tiered: If the file doesn't exist locally, make a copy of it from the cloud here.
+ *
+ */
+#if 0
+ if ((flags & WT_FS_OPEN_READONLY) != 0 && !exists) {
+ }
+#endif
+
+ if (create && !exists) {
if ((flush = calloc(1, sizeof(LOCAL_FLUSH_ITEM))) == NULL) {
ret = ENOMEM;
goto err;
}
local_fh->flush = flush;
- /*
- * Create a marker file that indicates that the file will need to be flushed.
- */
- if ((ret = local_location_path(
- local, location_handle, name, MARKER_NEED_FLUSH, &flush->marker_path)) != 0)
- goto err;
- if ((fd = open(flush->marker_path, O_WRONLY | O_CREAT, 0666)) < 0) {
- ret = local_err(local, session, errno, "ss_open_object: open: %s", flush->marker_path);
+ if ((flush->auth_token = strdup(local_fs->auth_token)) == NULL) {
+ ret = local_err(local, session, ENOMEM, "open.auth_token");
goto err;
}
- if (close(fd) < 0) {
- ret = local_err(local, session, errno, "ss_open_object: close: %s", flush->marker_path);
+ if ((flush->bucket = strdup(local_fs->bucket_dir)) == NULL) {
+ ret = local_err(local, session, ENOMEM, "open.bucket");
goto err;
}
- if ((ret = local_location_decode(
- local, location_handle, &flush->bucket, NULL, &flush->auth_token)) != 0)
+ if ((flush->cache_dir = strdup(local_fs->cache_dir)) == NULL) {
+ ret = local_err(local, session, ENOMEM, "open.cache_dir");
goto err;
-
- /*
- * For the file handle, we will be writing into a file marked as temporary. When the handle
- * is closed, we'll move it to its final name.
- */
- if ((ret = local_location_path(
- local, location_handle, name, MARKER_TEMPORARY, &local_fh->temp_path)) != 0)
+ }
+ if ((flush->fs_prefix = strdup(local_fs->fs_prefix)) == NULL) {
+ ret = local_err(local, session, ENOMEM, "open.fs_prefix");
goto err;
+ }
+ flush->file_type = file_type;
+ }
- open_name = local_fh->temp_path;
- } else
- open_name = local_fh->path;
-
- /* Set file mode so it can only be reopened as readonly. */
- if ((fd = open(open_name, oflags, 0444)) < 0) {
- ret = local_err(local, session, errno, "ss_open_object: open: %s", open_name);
+ if ((ret = wt_fs->fs_open_file(wt_fs, session, local_fh->path, file_type, flags, &wt_fh)) !=
+ 0) {
+ ret = local_err(local, session, ret, "ss_open_object: open: %s", local_fh->path);
goto err;
}
- local_fh->fd = fd;
+ local_fh->fh = wt_fh;
local_fh->local = local;
/* Initialize public information. */
@@ -921,46 +1070,138 @@ local_open(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
*file_handlep = file_handle;
- VERBOSE(local, "File opened: %s final path=%s, temp path=%s\n", SHOW_STRING(name),
- SHOW_STRING(local_fh->path), SHOW_STRING(local_fh->temp_path));
+ VERBOSE(
+ local, "File opened: %s final path=%s\n", SHOW_STRING(name), SHOW_STRING(local_fh->path));
- if (0) {
err:
+ if (ret != 0) {
if (local_fh != NULL)
- local_file_close_internal(local, session, local_fh, true);
+ local_file_close_internal(local, session, local_fh);
+ }
+ return (ret);
+}
+
+/*
+ * local_rename --
+ * POSIX rename, for files not yet flushed to the cloud. If a file has been flushed, we don't
+ * support this operation. That is because cloud implementations may not support it, and more
+ * importantly, we consider anything in the cloud to be readonly as far as the custom file
+ * system is concerned.
+ */
+static int
+local_rename(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *from, const char *to,
+ uint32_t flags)
+{
+ LOCAL_FILE_SYSTEM *local_fs;
+ LOCAL_FLUSH_ITEM *flush;
+ LOCAL_STORAGE *local;
+ WT_FILE_SYSTEM *wt_fs;
+ int ret, t_ret;
+ char *copy;
+ bool writeable;
+
+ local = FS2LOCAL(file_system);
+ local_fs = (LOCAL_FILE_SYSTEM *)file_system;
+ wt_fs = local_fs->wt_fs;
+
+ local->op_count++;
+ if ((ret = local_writeable(local, from, &writeable)) != 0)
+ goto err;
+ if (!writeable) {
+ ret = local_err(local, session, ENOTSUP, "%s: rename of flushed file not allowed", from);
+ goto err;
}
+
+ if ((ret = wt_fs->fs_rename(wt_fs, session, from, to, flags)) != 0) {
+ ret = local_err(local, session, ret, "fs_rename");
+ goto err;
+ }
+
+ /*
+ * Find any flush entry that matches, and rename that too.
+ */
+ if ((ret = pthread_rwlock_wrlock(&local->flush_lock)) != 0) {
+ ret = local_err(local, session, ret, "ss_remove: pthread_rwlock_wrlock");
+ goto err;
+ }
+
+ TAILQ_FOREACH (flush, &local->flushq, q) {
+ if (strcmp(flush->src_path, from) == 0) {
+ if ((copy = strdup(to)) == NULL)
+ ret = ENOMEM;
+ else {
+ free(flush->src_path);
+ flush->src_path = copy;
+ }
+ break;
+ }
+ }
+
+ if ((t_ret = pthread_rwlock_unlock(&local->flush_lock)) != 0) {
+ (void)local_err(local, session, t_ret, "ss_remove: pthread_rwlock_unlock");
+ if (ret == 0)
+ ret = t_ret;
+ goto err;
+ }
+
+err:
return (ret);
}
/*
* local_remove --
- * POSIX remove.
+ * POSIX remove, for files not yet flushed to the cloud. If a file has been flushed, we don't
+ * support this operation. We consider anything in the cloud to be readonly as far as the custom
+ * file system is concerned.
*/
static int
-local_remove(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *name, uint32_t flags)
+local_remove(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, uint32_t flags)
{
+ LOCAL_FLUSH_ITEM *flush;
LOCAL_STORAGE *local;
int ret;
- char *path;
+ bool writeable;
(void)flags; /* Unused */
- local = (LOCAL_STORAGE *)storage_source;
- path = NULL;
+ local = FS2LOCAL(file_system);
local->op_count++;
- if ((ret = local_location_path(local, location_handle, name, NULL, &path)) != 0)
+ if ((ret = local_writeable(local, name, &writeable)) != 0)
+ goto err;
+ if (!writeable) {
+ ret = local_err(local, session, ENOTSUP, "%s: remove of flushed file not allowed", name);
goto err;
+ }
- ret = unlink(path);
+ ret = unlink(name);
if (ret != 0) {
- ret = local_err(local, session, errno, "%s: ss_remove unlink", path);
+ ret = local_err(local, session, errno, "%s: ss_remove unlink", name);
+ goto err;
+ }
+
+ /*
+ * Find any flush entry that matches, and remove that too.
+ */
+ if ((ret = pthread_rwlock_wrlock(&local->flush_lock)) != 0) {
+ ret = local_err(local, session, ret, "ss_remove: pthread_rwlock_wrlock");
+ goto err;
+ }
+
+ TAILQ_FOREACH (flush, &local->flushq, q) {
+ if (strcmp(flush->src_path, name) == 0) {
+ TAILQ_REMOVE(&local->flushq, flush, q);
+ local_flush_free(flush);
+ break;
+ }
+ }
+
+ if ((ret = pthread_rwlock_unlock(&local->flush_lock)) != 0) {
+ ret = local_err(local, session, ret, "ss_remove: pthread_rwlock_unlock");
goto err;
}
err:
- free(path);
return (ret);
}
@@ -969,22 +1210,29 @@ err:
* Get the size of a file in bytes, by file name.
*/
static int
-local_size(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *name, wt_off_t *sizep)
+local_size(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name, wt_off_t *sizep)
{
struct stat sb;
LOCAL_STORAGE *local;
int ret;
char *path;
- local = (LOCAL_STORAGE *)storage_source;
+ local = FS2LOCAL(file_system);
path = NULL;
local->op_count++;
- if ((ret = local_location_path(local, location_handle, name, NULL, &path)) != 0)
- goto err;
- ret = stat(path, &sb);
+ /* If the file exists directly in the file system, it's not yet flushed, so use it */
+ ret = stat(name, &sb);
+ if (ret == ENOENT) {
+ /* Otherwise, we'll see if it's in the cache directory. */
+ if ((ret = local_location_path(file_system, name, &path)) != 0)
+ goto err;
+
+ ret = stat(path, &sb);
+ /* TODO: tiered: if we still get an ENOENT, then we'd need to ping the cloud to get the
+ * size. */
+ }
if (ret == 0)
*sizep = sb.st_size;
else
@@ -1013,13 +1261,13 @@ local_terminate(WT_STORAGE_SOURCE *storage, WT_SESSION *session)
/*
* We should be single threaded at this point, so it is safe to destroy the lock and access the
- * file handle list without it.
+ * file handle list without locking it.
*/
if ((ret = pthread_rwlock_destroy(&local->file_handle_lock)) != 0)
(void)local_err(local, session, ret, "terminate: pthread_rwlock_destroy");
TAILQ_FOREACH_SAFE(local_fh, &local->fileq, q, safe_fh)
- local_file_close_internal(local, session, local_fh, true);
+ local_file_close_internal(local, session, local_fh);
free(local);
return (ret);
@@ -1073,7 +1321,7 @@ local_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *session)
}
}
- if ((t_ret = local_file_close_internal(local, session, local_fh, false)) != 0) {
+ if ((t_ret = local_file_close_internal(local, session, local_fh)) != 0) {
if (ret == 0)
ret = t_ret;
}
@@ -1086,26 +1334,17 @@ local_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *session)
* Internal file handle close.
*/
static int
-local_file_close_internal(
- LOCAL_STORAGE *local, WT_SESSION *session, LOCAL_FILE_HANDLE *local_fh, bool final)
+local_file_close_internal(LOCAL_STORAGE *local, WT_SESSION *session, LOCAL_FILE_HANDLE *local_fh)
{
int ret;
+ WT_FILE_HANDLE *wt_fh;
ret = 0;
- if ((close(local_fh->fd)) < 0)
- ret = local_err(local, session, errno, "WT_FILE_HANDLE->close: close");
-
- /*
- * If this is a normal close (not a termination cleanup), and this handle creates an object,
- * move the temp file to its final position.
- */
- if (!final && ret == 0 && local_fh->temp_path != NULL) {
- if ((ret = rename(local_fh->temp_path, local_fh->path)) < 0)
- ret = local_err(local, session, errno, "FILE_HANDLE->close: rename");
- }
+ wt_fh = local_fh->fh;
+ if (wt_fh != NULL && (ret = wt_fh->close(wt_fh, session)) != 0)
+ ret = local_err(local, session, ret, "WT_FILE_HANDLE->close: close");
local_flush_free(local_fh->flush);
- free(local_fh->temp_path);
free(local_fh->path);
free(local_fh->iface.name);
free(local_fh);
@@ -1138,25 +1377,13 @@ local_file_read(
WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset, size_t len, void *buf)
{
LOCAL_FILE_HANDLE *local_fh;
- ssize_t nbytes;
- int ret;
- uint8_t *addr;
+ WT_FILE_HANDLE *wt_fh;
local_fh = (LOCAL_FILE_HANDLE *)file_handle;
- ret = 0;
+ wt_fh = local_fh->fh;
local_fh->local->read_ops++;
- for (addr = buf; ret == 0 && len > 0;) {
- nbytes = pread(local_fh->fd, addr, len, offset);
- if (nbytes < 0)
- ret = local_file_err(local_fh, session, errno, "pread");
- else {
- addr += nbytes;
- len -= (size_t)nbytes;
- offset += nbytes;
- }
- }
- return (ret);
+ return (wt_fh->fh_read(wt_fh, session, offset, len, buf));
}
/*
@@ -1166,20 +1393,14 @@ local_file_read(
static int
local_file_size(WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t *sizep)
{
- struct stat sb;
LOCAL_FILE_HANDLE *local_fh;
- int ret;
+ WT_FILE_HANDLE *wt_fh;
local_fh = (LOCAL_FILE_HANDLE *)file_handle;
+ wt_fh = local_fh->fh;
local_fh->local->fh_ops++;
- ret = fstat(local_fh->fd, &sb);
- if (ret == 0)
- *sizep = sb.st_size;
- else
- ret = local_file_err(local_fh, session, ret, "fh_size fstat");
-
- return (ret);
+ return (wt_fh->fh_size(wt_fh, session, sizep));
}
/*
@@ -1190,15 +1411,13 @@ static int
local_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *session)
{
LOCAL_FILE_HANDLE *local_fh;
- int ret;
+ WT_FILE_HANDLE *wt_fh;
local_fh = (LOCAL_FILE_HANDLE *)file_handle;
+ wt_fh = local_fh->fh;
local_fh->local->fh_ops++;
- if ((ret = fsync(local_fh->fd)) < 0)
- ret = local_file_err(local_fh, session, errno, "fsync");
-
- return (ret);
+ return (wt_fh->fh_sync(wt_fh, session));
}
/*
@@ -1210,25 +1429,13 @@ local_file_write(
WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset, size_t len, const void *buf)
{
LOCAL_FILE_HANDLE *local_fh;
- ssize_t nbytes;
- int ret;
- const uint8_t *addr;
+ WT_FILE_HANDLE *wt_fh;
local_fh = (LOCAL_FILE_HANDLE *)file_handle;
- ret = 0;
+ wt_fh = local_fh->fh;
local_fh->local->write_ops++;
- for (addr = buf; ret == 0 && len > 0;) {
- nbytes = pwrite(local_fh->fd, addr, len, offset);
- if (nbytes < 0)
- ret = local_file_err(local_fh, session, errno, "pwrite");
- else {
- addr += nbytes;
- len -= (size_t)nbytes;
- offset += nbytes;
- }
- }
- return (ret);
+ return (wt_fh->fh_write(wt_fh, session, offset, len, buf));
}
/*
@@ -1255,14 +1462,8 @@ wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
* Allocate a local storage structure, with a WT_STORAGE structure as the first field, allowing
* us to treat references to either type of structure as a reference to the other type.
*/
- local->storage_source.ss_exist = local_exist;
+ local->storage_source.ss_customize_file_system = local_customize_file_system;
local->storage_source.ss_flush = local_flush;
- local->storage_source.ss_location_handle = local_location_handle;
- local->storage_source.ss_location_list = local_location_list;
- local->storage_source.ss_location_list_free = local_location_list_free;
- local->storage_source.ss_open_object = local_open;
- local->storage_source.ss_remove = local_remove;
- local->storage_source.ss_size = local_size;
local->storage_source.terminate = local_terminate;
if ((ret = local_configure(local, config)) != 0) {
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index cdef002e2ab..392d1bb1861 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-4.4",
- "commit": "03c93998a738e95d0b95ad7a2188799bad6c1ff6"
+ "commit": "aadac222429faa9b20d9344e3648a19be97811b9"
}
diff --git a/src/third_party/wiredtiger/lang/python/wiredtiger.i b/src/third_party/wiredtiger/lang/python/wiredtiger.i
index 829f58d6ac3..3dbf69d35c2 100644
--- a/src/third_party/wiredtiger/lang/python/wiredtiger.i
+++ b/src/third_party/wiredtiger/lang/python/wiredtiger.i
@@ -75,10 +75,10 @@ from packing import pack, unpack
$1 = &temp;
}
%typemap(in, numinputs=0) WT_FILE_HANDLE ** (WT_FILE_HANDLE *temp = NULL) {
- $1 = &temp;
+ $1 = &temp;
}
-%typemap(in, numinputs=0) WT_LOCATION_HANDLE ** (WT_LOCATION_HANDLE *temp = NULL) {
- $1 = &temp;
+%typemap(in, numinputs=0) WT_FILE_SYSTEM ** (WT_FILE_SYSTEM *temp = NULL) {
+ $1 = &temp;
}
%typemap(in, numinputs=0) WT_STORAGE_SOURCE ** (WT_STORAGE_SOURCE *temp = NULL) {
$1 = &temp;
@@ -190,12 +190,12 @@ from packing import pack, unpack
$1 = &val;
}
-%typemap(in,numinputs=0) (char ***object_list, int *countp) (char **list, uint32_t nentries) {
+%typemap(in,numinputs=0) (char ***dirlist, int *countp) (char **list, uint32_t nentries) {
$1 = &list;
$2 = &nentries;
}
-%typemap(argout) (char ***object_list, int *countp) {
+%typemap(argout) (char ***dirlist, int *countp) {
int i;
char **list;
@@ -203,8 +203,8 @@ from packing import pack, unpack
list = (*$1);
/*
* When we're done with the individual C strings, free them.
- * In theory, we should call the ss_location_list_free() method,
- * but that's awkward, since we don't have the storage_source and session.
+ * In theory, we should call the fs_directory_list_free() method,
+ * but that's awkward, since we don't have the file system and session.
*/
for (i = 0; i < *$2; i++) {
PyObject *o = PyString_InternFromString(list[i]);
@@ -219,8 +219,8 @@ from packing import pack, unpack
$result = SWIG_NewPointerObj(SWIG_as_voidptr(*$1), SWIGTYPE_p___wt_file_handle, 0);
}
-%typemap(argout) WT_LOCATION_HANDLE ** {
- $result = SWIG_NewPointerObj(SWIG_as_voidptr(*$1), SWIGTYPE_p___wt_location_handle, 0);
+%typemap(argout) WT_FILE_SYSTEM ** {
+ $result = SWIG_NewPointerObj(SWIG_as_voidptr(*$1), SWIGTYPE_p___wt_file_system, 0);
}
%typemap(argout) WT_STORAGE_SOURCE ** {
@@ -340,8 +340,8 @@ DESTRUCTOR(__wt_connection, close)
DESTRUCTOR(__wt_cursor, close)
DESTRUCTOR(__wt_file_handle, close)
DESTRUCTOR(__wt_session, close)
-DESTRUCTOR(__wt_storage_source, close)
-DESTRUCTOR(__wt_location_handle, close)
+DESTRUCTOR(__wt_storage_source, ss_terminate)
+DESTRUCTOR(__wt_file_system, fs_terminate)
/*
* OVERRIDE_METHOD must be used when overriding or extending an existing
@@ -518,7 +518,7 @@ SELFHELPER(struct __wt_connection, connection)
SELFHELPER(struct __wt_session, session)
SELFHELPER(struct __wt_cursor, cursor)
SELFHELPER(struct __wt_file_handle, file_handle)
-SELFHELPER(struct __wt_location_handle, location_handle)
+SELFHELPER(struct __wt_file_system, file_system)
SELFHELPER(struct __wt_storage_source, storage_source)
/*
@@ -985,40 +985,42 @@ typedef int int_void;
};
%enddef
-SIDESTEP_METHOD(__wt_storage_source, ss_location_handle,
- (WT_SESSION *session, const char *config, WT_LOCATION_HANDLE **handle),
- (self, session, config, handle))
+SIDESTEP_METHOD(__wt_storage_source, ss_customize_file_system,
+ (WT_SESSION *session, const char *bucket_name, const char *prefix,
+ const char *auth_token, const char *config, WT_FILE_SYSTEM **file_systemp),
+ (self, session, bucket_name, prefix, auth_token, config, file_systemp))
+
+SIDESTEP_METHOD(__wt_storage_source, ss_flush,
+ (WT_SESSION *session, WT_FILE_SYSTEM *file_system,
+ const char *name, const char *config),
+ (self, session, file_system, name, config))
-SIDESTEP_METHOD(__wt_location_handle, close,
+SIDESTEP_METHOD(__wt_storage_source, terminate,
(WT_SESSION *session),
(self, session))
-SIDESTEP_METHOD(__wt_storage_source, ss_exist,
- (WT_SESSION *session, WT_LOCATION_HANDLE *location_handle,
- const char *name, bool *existp),
- (self, session, location_handle, name, existp))
+SIDESTEP_METHOD(__wt_file_system, fs_exist,
+ (WT_SESSION *session, const char *name, bool *existp),
+ (self, session, name, existp))
-SIDESTEP_METHOD(__wt_storage_source, ss_flush,
- (WT_SESSION *session, WT_LOCATION_HANDLE *location_handle,
- const char *name, const char *config),
- (self, session, location_handle, name, config))
+SIDESTEP_METHOD(__wt_file_system, fs_open_file,
+ (WT_SESSION *session, const char *name, WT_FS_OPEN_FILE_TYPE file_type,
+ uint32_t flags, WT_FILE_HANDLE **file_handlep),
+ (self, session, name, file_type, flags, file_handlep))
-SIDESTEP_METHOD(__wt_storage_source, ss_open_object,
- (WT_SESSION *session, WT_LOCATION_HANDLE *location_handle,
- const char *name, uint32_t flags, WT_FILE_HANDLE **file_handlep),
- (self, session, location_handle, name, flags, file_handlep))
+SIDESTEP_METHOD(__wt_file_system, fs_remove,
+ (WT_SESSION *session, const char *name, uint32_t flags),
+ (self, session, name, flags))
-SIDESTEP_METHOD(__wt_storage_source, ss_remove,
- (WT_SESSION *session, WT_LOCATION_HANDLE *location_handle,
- const char *name, uint32_t flags),
- (self, session, location_handle, name, flags))
+SIDESTEP_METHOD(__wt_file_system, fs_rename,
+ (WT_SESSION *session, const char *from, const char *to, uint32_t flags),
+ (self, session, from, to, flags))
-SIDESTEP_METHOD(__wt_storage_source, ss_size,
- (WT_SESSION *session, WT_LOCATION_HANDLE *location_handle,
- const char *name, wt_off_t *sizep),
- (self, session, location_handle, name, sizep))
+SIDESTEP_METHOD(__wt_file_system, fs_size,
+ (WT_SESSION *session, const char *name, wt_off_t *sizep),
+ (self, session, name, sizep))
-SIDESTEP_METHOD(__wt_storage_source, terminate,
+SIDESTEP_METHOD(__wt_file_system, terminate,
(WT_SESSION *session),
(self, session))
@@ -1092,20 +1094,26 @@ SIDESTEP_METHOD(__wt_file_handle, fh_write,
}
};
-%ignore __wt_storage_source::ss_location_list;
-%rename (ss_location_list) __wt_storage_source::_ss_location_list;
-%extend __wt_storage_source {
- int _ss_location_list(WT_SESSION *session, WT_LOCATION_HANDLE *handle, const char *prefix,
- uint32_t limit, char ***object_list, int *countp) {
- return (self->ss_location_list(self, session, handle, prefix, limit, object_list, countp));
+%ignore __wt_file_system::fs_directory_list;
+%ignore __wt_file_system::fs_directory_list_single;
+%rename (fs_directory_list) __wt_file_system::_fs_directory_list;
+%rename (fs_directory_list_single) __wt_file_system::_fs_directory_list_single;
+%extend __wt_file_system {
+ int _fs_directory_list(WT_SESSION *session, const char *directory, const char *prefix,
+ char ***dirlist, int *countp) {
+ return (self->fs_directory_list(self, session, directory, prefix, dirlist, countp));
+ }
+ int _fs_directory_list_single(WT_SESSION *session, const char *directory, const char *prefix,
+ char ***dirlist, int *countp) {
+ return (self->fs_directory_list_single(self, session, directory, prefix, dirlist, countp));
}
};
/*
- * No need for a location_list_free method, as the list and its components
- * are freed immediately after the location_list call.
+ * No need for a directory_list_free method, as the list and its components
+ * are freed immediately after the directory_list call.
*/
-%ignore __wt_storage_source::ss_location_list_free;
+%ignore __wt_file_system::fs_directory_list_free;
%{
int diagnostic_build() {
@@ -1164,7 +1172,7 @@ OVERRIDE_METHOD(__wt_session, WT_SESSION, log_printf, (self, msg))
%rename(Connection) __wt_connection;
%rename(FileHandle) __wt_file_handle;
%rename(StorageSource) __wt_storage_source;
-%rename(LocationHandle) __wt_location_handle;
+%rename(FileSystem) __wt_file_system;
%include "wiredtiger.h"
@@ -1441,7 +1449,7 @@ def _rename_with_prefix(prefix, toclass):
_rename_with_prefix('WT_STAT_CONN_', stat.conn)
_rename_with_prefix('WT_STAT_DSRC_', stat.dsrc)
_rename_with_prefix('WT_STAT_SESSION_', stat.session)
-_rename_with_prefix('WT_SS_', StorageSource)
+_rename_with_prefix('WT_FS_', FileSystem)
_rename_with_prefix('WT_FILE_HANDLE_', FileHandle)
del _rename_with_prefix
%}
diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c
index 176d6570ff3..0f4af019b52 100644
--- a/src/third_party/wiredtiger/src/block/block_ckpt.c
+++ b/src/third_party/wiredtiger/src/block/block_ckpt.c
@@ -98,10 +98,7 @@ __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint
block, &endp, ci->root_logid, ci->root_offset, ci->root_size, ci->root_checksum));
*root_addr_sizep = WT_PTRDIFF(endp, root_addr);
- if (block->log_structured) {
- block->logid = ci->root_logid;
- WT_ERR(__wt_block_newfile(session, block));
- }
+ WT_ERR(__wt_block_tiered_load(session, block, ci));
}
/*
@@ -468,37 +465,6 @@ __ckpt_add_blk_mods_ext(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_BLOCK_CK
}
/*
- * __wt_block_newfile --
- * Switch a log-structured block object to a new file.
- */
-int
-__wt_block_newfile(WT_SESSION_IMPL *session, WT_BLOCK *block)
-{
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- const char *filename;
-
- /* Bump to a new file ID. */
- ++block->logid;
-
- WT_ERR(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(__wt_buf_fmt(session, tmp, "%s.%08" PRIu32, block->name, block->logid));
- filename = tmp->data;
- WT_ERR(__wt_close(session, &block->fh));
- WT_ERR(__wt_open(session, filename, WT_FS_OPEN_FILE_TYPE_DATA,
- WT_FS_OPEN_CREATE | block->file_flags, &block->fh));
- WT_ERR(__wt_desc_write(session, block->fh, block->allocsize));
-
- block->size = block->allocsize;
- __wt_block_ckpt_destroy(session, &block->live);
- WT_ERR(__wt_block_ckpt_init(session, &block->live, "live"));
-
-err:
- __wt_scr_free(session, &tmp);
- return (ret);
-}
-
-/*
* __ckpt_process --
* Process the list of checkpoints.
*/
@@ -780,8 +746,12 @@ live_update:
ci->ckpt_discard = ci->discard;
WT_ERR(__wt_block_extlist_init(session, &ci->discard, "live", "discard", false));
+ /*
+ * TODO: tiered: for now we are switching files on a checkpoint, we'll want to do it only on
+ * flush_tier.
+ */
if (block->log_structured)
- WT_ERR(__wt_block_newfile(session, block));
+ WT_ERR(__wt_block_tiered_newfile(session, block));
#ifdef HAVE_DIAGNOSTIC
/*
diff --git a/src/third_party/wiredtiger/src/block/block_mgr.c b/src/third_party/wiredtiger/src/block/block_mgr.c
index 00db82934dd..4be319fe79c 100644
--- a/src/third_party/wiredtiger/src/block/block_mgr.c
+++ b/src/third_party/wiredtiger/src/block/block_mgr.c
@@ -289,6 +289,30 @@ __bm_compact_start_readonly(WT_BM *bm, WT_SESSION_IMPL *session)
}
/*
+ * __bm_flush_tier --
+ * Flush the underlying file to the shared tier.
+ */
+static int
+__bm_flush_tier(WT_BM *bm, WT_SESSION_IMPL *session, uint8_t **flush_cookie, size_t *cookie_size)
+{
+ return (__wt_block_tiered_flush(session, bm->block, flush_cookie, cookie_size));
+}
+
+/*
+ * __bm_flush_tier_readonly --
+ * Flush the underlying file to the shared tier; readonly version.
+ */
+static int
+__bm_flush_tier_readonly(
+ WT_BM *bm, WT_SESSION_IMPL *session, uint8_t **flush_cookie, size_t *cookie_size)
+{
+ WT_UNUSED(flush_cookie);
+ WT_UNUSED(cookie_size);
+
+ return (__bm_readonly(bm, session));
+}
+
+/*
* __bm_free --
* Free a block of space to the underlying file.
*/
@@ -565,6 +589,7 @@ __bm_method_set(WT_BM *bm, bool readonly)
bm->compact_skip = __bm_compact_skip;
bm->compact_start = __bm_compact_start;
bm->corrupt = __wt_bm_corrupt;
+ bm->flush_tier = __bm_flush_tier;
bm->free = __bm_free;
bm->is_mapped = __bm_is_mapped;
bm->map_discard = __bm_map_discard;
@@ -591,6 +616,7 @@ __bm_method_set(WT_BM *bm, bool readonly)
bm->compact_page_skip = __bm_compact_page_skip_readonly;
bm->compact_skip = __bm_compact_skip_readonly;
bm->compact_start = __bm_compact_start_readonly;
+ bm->flush_tier = __bm_flush_tier_readonly;
bm->free = __bm_free_readonly;
bm->salvage_end = __bm_salvage_end_readonly;
bm->salvage_next = __bm_salvage_next_readonly;
diff --git a/src/third_party/wiredtiger/src/block/block_read.c b/src/third_party/wiredtiger/src/block/block_read.c
index 08069728c8c..80dbb3aac21 100644
--- a/src/third_party/wiredtiger/src/block/block_read.c
+++ b/src/third_party/wiredtiger/src/block/block_read.c
@@ -222,11 +222,11 @@ __wt_block_fh(WT_SESSION_IMPL *session, WT_BLOCK *block, uint32_t logid, WT_FH *
return (0);
}
- /* TODO: fh readlock */
+ /* TODO: tiered: fh readlock; we may want a reference count on each file handle given out. */
if (logid * sizeof(WT_FILE_HANDLE *) < block->lfh_alloc && (*fhp = block->lfh[logid]) != NULL)
return (0);
- /* TODO: fh writelock */
+ /* TODO: tiered: fh writelock */
/* Ensure the array goes far enough. */
WT_RET(__wt_realloc_def(session, &block->lfh_alloc, logid + 1, &block->lfh));
if (logid >= block->max_logid)
diff --git a/src/third_party/wiredtiger/src/block/block_tiered.c b/src/third_party/wiredtiger/src/block/block_tiered.c
new file mode 100644
index 00000000000..776b2a127ad
--- /dev/null
+++ b/src/third_party/wiredtiger/src/block/block_tiered.c
@@ -0,0 +1,99 @@
+/*-
+ * Copyright (c) 2014-present MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __wt_block_tiered_flush --
+ * Flush this file, start another file.
+ */
+int
+__wt_block_tiered_flush(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t **flush_cookie, size_t *cookie_size)
+{
+ /* TODO: tiered: fill in the cookie. */
+ (void)flush_cookie;
+ (void)cookie_size;
+
+ return (__wt_block_tiered_newfile(session, block));
+}
+
+/*
+ * __wt_block_tiered_load --
+ * Set up log-structured processing when loading a new root page.
+ */
+int
+__wt_block_tiered_load(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci)
+{
+ /*
+ * TODO: tiered: this call currently advances the object id, that's probably not appropriate for
+ * readonly opens. Perhaps it's also not appropriate for opening at an older checkpoint?
+ */
+ if (block->log_structured) {
+ block->logid = ci->root_logid;
+
+ /* Advance to the next file for future changes. */
+ WT_RET(__wt_block_tiered_newfile(session, block));
+ }
+ return (0);
+}
+
+/*
+ * __wt_block_tiered_newfile --
+ * Switch a log-structured block object to a new file.
+ */
+int
+__wt_block_tiered_newfile(WT_SESSION_IMPL *session, WT_BLOCK *block)
+{
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_STORAGE_SOURCE *storage_source;
+ const char *filename;
+
+ /* Get the old file name again. */
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+
+ /*
+ * TODO: tiered: We will get rid of the log id, and this name generation will be replaced by the
+ * name generated by __tiered_switch.
+ */
+ WT_ERR(__wt_buf_fmt(session, tmp, "%s.%08" PRIu32, block->name, block->logid));
+ filename = tmp->data;
+ WT_ERR(__wt_close(session, &block->fh));
+
+ /*
+ * TODO: tiered: Assert that session->bucket_storage is not NULL. We can't do that while we have
+ * tests that use block_allocation=log without setting up bucket storage. This whole function is
+ * going to look very different when flush_tier is fully integrated.
+ */
+ if (session->bucket_storage != NULL && block->logid != 0) {
+ storage_source = session->bucket_storage->storage_source;
+ WT_ASSERT(session, storage_source != NULL);
+ WT_ERR(storage_source->ss_flush(
+ storage_source, &session->iface, session->bucket_storage->file_system, filename, NULL));
+ }
+ /* Bump to a new file ID. */
+ ++block->logid;
+ WT_ERR(__wt_buf_fmt(session, tmp, "%s.%08" PRIu32, block->name, block->logid));
+ filename = tmp->data;
+
+ WT_WITH_BUCKET_STORAGE(session->bucket_storage, session, {
+ ret = __wt_open(session, filename, WT_FS_OPEN_FILE_TYPE_DATA,
+ WT_FS_OPEN_CREATE | block->file_flags, &block->fh);
+ });
+ WT_ERR(ret);
+ WT_ERR(__wt_desc_write(session, block->fh, block->allocsize));
+
+ block->size = block->allocsize;
+ __wt_block_ckpt_destroy(session, &block->live);
+ WT_ERR(__wt_block_ckpt_init(session, &block->live, "live"));
+
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
+}
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index 18fdef73315..a2b7f161d3e 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -305,7 +305,8 @@ restart_read:
* Move to the next row-store item.
*/
static inline int
-__cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart, size_t *skippedp)
+__cursor_row_next(
+ WT_CURSOR_BTREE *cbt, bool newpage, bool restart, size_t *skippedp, WT_ITEM *prefix)
{
WT_CELL_UNPACK_KV kpack;
WT_INSERT *ins;
@@ -402,6 +403,17 @@ restart_read_insert:
restart_read_page:
rip = &page->pg_row[cbt->slot];
WT_RET(__cursor_row_slot_key_return(cbt, rip, &kpack, &kpack_used));
+ /*
+ * If the cursor has prefix search configured we can early exit here if the key that we are
+ * visiting is after our prefix.
+ */
+ if (F_ISSET(&cbt->iface, WT_CURSTD_PREFIX_SEARCH) && prefix != NULL &&
+ __wt_prefix_match(prefix, &cbt->iface.key) < 0) {
+ /* It is not okay for the user to have a custom collator. */
+ WT_ASSERT(session, CUR2BT(cbt)->collator == NULL);
+ WT_STAT_CONN_DATA_INCR(session, cursor_search_near_prefix_fast_paths);
+ return (WT_NOTFOUND);
+ }
WT_RET(__wt_txn_read(
session, cbt, &cbt->iface.key, WT_RECNO_OOB, WT_ROW_UPDATE(page, rip), NULL));
if (cbt->upd_value->type == WT_UPDATE_INVALID) {
@@ -622,11 +634,12 @@ __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt)
}
/*
- * __wt_btcur_next --
- * Move to the next record in the tree.
+ * __wt_btcur_next_prefix --
+ * Move to the next record in the tree. Taking an optional prefix item for a special case of
+ * search near.
*/
int
-__wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
+__wt_btcur_next_prefix(WT_CURSOR_BTREE *cbt, WT_ITEM *prefix, bool truncating)
{
WT_CURSOR *cursor;
WT_DECL_RET;
@@ -692,8 +705,14 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
total_skipped += skipped;
break;
case WT_PAGE_ROW_LEAF:
- ret = __cursor_row_next(cbt, newpage, restart, &skipped);
+ ret = __cursor_row_next(cbt, newpage, restart, &skipped, prefix);
total_skipped += skipped;
+ /*
+ * We can directly return WT_NOTFOUND here as the caller expects the cursor to be
+ * positioned when traversing keys for prefix search near.
+ */
+ if (ret == WT_NOTFOUND && F_ISSET(&cbt->iface, WT_CURSTD_PREFIX_SEARCH))
+ return (WT_NOTFOUND);
break;
default:
WT_ERR(__wt_illegal_value(session, page->type));
@@ -774,3 +793,13 @@ err:
F_CLR(cbt, WT_CBT_ITERATE_RETRY_PREV);
return (ret);
}
+
+/*
+ * __wt_btcur_next --
+ * Move to the next record in the tree.
+ */
+int
+__wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
+{
+ return (__wt_btcur_next_prefix(cbt, NULL, truncating));
+}
diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c
index 7517eac77d8..867a46201a4 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curprev.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c
@@ -441,10 +441,12 @@ restart_read:
/*
* __cursor_row_prev --
- * Move to the previous row-store item.
+ * Move to the previous row-store item. Taking an optional prefix item for a special case of
+ * search near.
*/
static inline int
-__cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart, size_t *skippedp)
+__cursor_row_prev(
+ WT_CURSOR_BTREE *cbt, bool newpage, bool restart, size_t *skippedp, WT_ITEM *prefix)
{
WT_CELL_UNPACK_KV kpack;
WT_INSERT *ins;
@@ -553,6 +555,17 @@ restart_read_insert:
restart_read_page:
rip = &page->pg_row[cbt->slot];
WT_RET(__cursor_row_slot_key_return(cbt, rip, &kpack, &kpack_used));
+ /*
+ * If the cursor has prefix search configured we can early exit here if the key we are
+ * visiting is before our prefix.
+ */
+ if (F_ISSET(&cbt->iface, WT_CURSTD_PREFIX_SEARCH) && prefix != NULL &&
+ __wt_prefix_match(prefix, &cbt->iface.key) > 0) {
+ /* It is not okay for the user to have a custom collator. */
+ WT_ASSERT(session, CUR2BT(cbt)->collator == NULL);
+ WT_STAT_CONN_DATA_INCR(session, cursor_search_near_prefix_fast_paths);
+ return (WT_NOTFOUND);
+ }
WT_RET(__wt_txn_read(
session, cbt, &cbt->iface.key, WT_RECNO_OOB, WT_ROW_UPDATE(page, rip), NULL));
if (cbt->upd_value->type == WT_UPDATE_INVALID) {
@@ -572,11 +585,11 @@ restart_read_page:
}
/*
- * __wt_btcur_prev --
+ * __wt_btcur_prev_prefix --
* Move to the previous record in the tree.
*/
int
-__wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
+__wt_btcur_prev_prefix(WT_CURSOR_BTREE *cbt, WT_ITEM *prefix, bool truncating)
{
WT_CURSOR *cursor;
WT_DECL_RET;
@@ -653,8 +666,14 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
total_skipped += skipped;
break;
case WT_PAGE_ROW_LEAF:
- ret = __cursor_row_prev(cbt, newpage, restart, &skipped);
+ ret = __cursor_row_prev(cbt, newpage, restart, &skipped, prefix);
total_skipped += skipped;
+ /*
+ * We can directly return WT_NOTFOUND here as the caller will reset the cursor for
+ * us, this way we don't leave the cursor positioned after returning WT_NOTFOUND.
+ */
+ if (ret == WT_NOTFOUND && F_ISSET(&cbt->iface, WT_CURSTD_PREFIX_SEARCH))
+ return (WT_NOTFOUND);
break;
default:
WT_ERR(__wt_illegal_value(session, page->type));
@@ -726,3 +745,13 @@ err:
F_CLR(cbt, WT_CBT_ITERATE_RETRY_NEXT);
return (ret);
}
+
+/*
+ * __wt_btcur_prev --
+ * Move to the previous record in the tree.
+ */
+int
+__wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
+{
+ return (__wt_btcur_prev_prefix(cbt, NULL, truncating));
+}
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index e50327ea193..9449be9603a 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -348,6 +348,15 @@ __cursor_col_search(WT_CURSOR_BTREE *cbt, WT_REF *leaf, bool *leaf_foundp)
WT_SESSION_IMPL *session;
session = CUR2S(cbt);
+
+#ifdef HAVE_DIAGNOSTIC
+ /*
+ * Turn off cursor-order checks in all cases on search. The search/search-near functions turn
+ * them back on after a successful search.
+ */
+ __wt_cursor_key_order_reset(cbt);
+#endif
+
WT_WITH_PAGE_INDEX(
session, ret = __wt_col_search(cbt, cbt->iface.recno, leaf, false, leaf_foundp));
return (ret);
@@ -364,6 +373,15 @@ __cursor_row_search(WT_CURSOR_BTREE *cbt, bool insert, WT_REF *leaf, bool *leaf_
WT_SESSION_IMPL *session;
session = CUR2S(cbt);
+
+#ifdef HAVE_DIAGNOSTIC
+ /*
+ * Turn off cursor-order checks in all cases on search. The search/search-near functions turn
+ * them back on after a successful search.
+ */
+ __wt_cursor_key_order_reset(cbt);
+#endif
+
WT_WITH_PAGE_INDEX(
session, ret = __wt_row_search(cbt, &cbt->iface.key, insert, leaf, false, leaf_foundp));
return (ret);
@@ -690,7 +708,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
* here because at low isolation levels, new records could appear as we are stepping through
* the tree.
*/
- while ((ret = __wt_btcur_next(cbt, false)) != WT_NOTFOUND) {
+ while ((ret = __wt_btcur_next_prefix(cbt, &state.key, false)) != WT_NOTFOUND) {
WT_ERR(ret);
if (btree->type == BTREE_ROW)
WT_ERR(__wt_compare(session, btree->collator, &cursor->key, &state.key, &exact));
@@ -703,7 +721,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
/*
* We walked to the end of the tree without finding a match. Walk backwards instead.
*/
- while ((ret = __wt_btcur_prev(cbt, false)) != WT_NOTFOUND) {
+ while ((ret = __wt_btcur_prev_prefix(cbt, &state.key, false)) != WT_NOTFOUND) {
WT_ERR(ret);
if (btree->type == BTREE_ROW)
WT_ERR(__wt_compare(session, btree->collator, &cursor->key, &state.key, &exact));
@@ -725,6 +743,11 @@ err:
#endif
if (ret != 0) {
+ /*
+ * It is important that this reset is kept as the cursor state is modified in the above prev
+ * and next loops. Those internally do reset the cursor but not when performing a prefix
+ * search near.
+ */
WT_TRET(__cursor_reset(cbt));
__cursor_state_restore(cursor, &state);
}
@@ -1168,7 +1191,7 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
WT_DECL_RET;
WT_SESSION_IMPL *session;
uint64_t yield_count, sleep_usecs;
- bool leaf_found, valid;
+ bool valid;
btree = CUR2BT(cbt);
cursor = &cbt->iface;
@@ -1221,30 +1244,11 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
WT_ERR(__cursor_localvalue(cursor));
__cursor_state_save(cursor, &state);
- /* If our caller configures for a local search and we have a page pinned, do that search. */
- if (F_ISSET(cursor, WT_CURSTD_UPDATE_LOCAL) && __cursor_page_pinned(cbt, true)) {
- __wt_txn_cursor_op(session);
- WT_ERR(__wt_txn_autocommit_check(session));
-
- WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(cbt, true, cbt->ref, &leaf_found) :
- __cursor_col_search(cbt, cbt->ref, &leaf_found));
- /*
- * Only use the pinned page search results if search returns an exact match or a slot other
- * than the page's boundary slots, if that's not the case, the record might belong on an
- * entirely different page. This test is simplistic as we're ignoring append lists (there
- * may be no page slots or we might be legitimately positioned after the last page slot).
- * Ignore those cases, it makes things too complicated.
- */
- if (leaf_found &&
- (cbt->compare == 0 || (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1)))
- goto update_local;
- }
-
retry:
WT_ERR(__cursor_func_init(cbt, true));
WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(cbt, true, NULL, NULL) :
__cursor_col_search(cbt, NULL, NULL));
-update_local:
+
if (btree->type == BTREE_ROW) {
/*
* If not overwriting, check for conflicts and fail if the key does not exist.
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c
index 629f013c4c2..0d36f155f7a 100644
--- a/src/third_party/wiredtiger/src/btree/bt_handle.c
+++ b/src/third_party/wiredtiger/src/btree/bt_handle.c
@@ -115,8 +115,11 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[])
if (!WT_PREFIX_SKIP(filename, "file:"))
WT_ERR_MSG(session, EINVAL, "expected a 'file:' URI");
- WT_ERR(__wt_block_manager_open(session, filename, dhandle->cfg, forced_salvage,
- F_ISSET(btree, WT_BTREE_READONLY), btree->allocsize, &btree->bm));
+ WT_WITH_BUCKET_STORAGE(btree->bstorage, session,
+ ret = __wt_block_manager_open(session, filename, dhandle->cfg, forced_salvage,
+ F_ISSET(btree, WT_BTREE_READONLY), btree->allocsize, &btree->bm));
+ WT_ERR(ret);
+
bm = btree->bm;
/*
@@ -297,56 +300,6 @@ __wt_btree_config_encryptor(
}
/*
- * __btree_config_tiered --
- * Return a bucket storage handle based on the configuration.
- */
-static int
-__btree_config_tiered(WT_SESSION_IMPL *session, const char **cfg, WT_BUCKET_STORAGE **bstoragep)
-{
- WT_BUCKET_STORAGE *bstorage;
- WT_CONFIG_ITEM bucket, cval;
- WT_DECL_RET;
- bool local_free;
-
- /*
- * We do not use __wt_config_gets_none for name because "none" and the empty string have
- * different meanings. The empty string means inherit the system tiered storage setting and
- * "none" means this table is not using tiered storage.
- */
- *bstoragep = NULL;
- local_free = false;
- WT_RET(__wt_config_gets(session, cfg, "tiered_storage.name", &cval));
- if (cval.len == 0)
- *bstoragep = S2C(session)->bstorage;
- else if (!WT_STRING_MATCH("none", cval.str, cval.len)) {
- WT_RET(__wt_config_gets_none(session, cfg, "tiered_storage.bucket", &bucket));
- WT_RET(__wt_tiered_bucket_config(session, &cval, &bucket, bstoragep));
- local_free = true;
- WT_ASSERT(session, *bstoragep != NULL);
- }
- bstorage = *bstoragep;
- if (bstorage != NULL) {
- /*
- * If we get here then we have a valid bucket storage entry. Now see if the config overrides
- * any of the other settings.
- */
- if (bstorage != S2C(session)->bstorage)
- WT_ERR(__wt_tiered_common_config(session, cfg, bstorage));
- WT_STAT_DATA_SET(session, tiered_object_size, bstorage->object_size);
- WT_STAT_DATA_SET(session, tiered_retention, bstorage->retain_secs);
- }
- return (0);
-err:
- /* If the bucket storage was set up with copies of the strings, free them here. */
- if (bstorage != NULL && local_free && F_ISSET(bstorage, WT_BUCKET_FREE)) {
- __wt_free(session, bstorage->auth_token);
- __wt_free(session, bstorage->bucket);
- __wt_free(session, bstorage);
- }
- return (ret);
-}
-
-/*
* __btree_conf --
* Configure a WT_BTREE structure.
*/
@@ -531,9 +484,6 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
F_SET(btree, WT_BTREE_NO_LOGGING);
}
- /* Configure tiered storage. */
- WT_RET(__btree_config_tiered(session, cfg, &btree->bstorage));
-
/* Configure encryption. */
WT_RET(__wt_btree_config_encryptor(session, cfg, &btree->kencryptor));
diff --git a/src/third_party/wiredtiger/src/btree/bt_io.c b/src/third_party/wiredtiger/src/btree/bt_io.c
index 373cc7b71f1..3b763a68172 100644
--- a/src/third_party/wiredtiger/src/btree/bt_io.c
+++ b/src/third_party/wiredtiger/src/btree/bt_io.c
@@ -35,12 +35,16 @@ __wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t
* into the caller's buffer. Else, read directly into the caller's buffer.
*/
if (btree->compressor == NULL && btree->kencryptor == NULL) {
- WT_RET(bm->read(bm, session, buf, addr, addr_size));
+ WT_WITH_BUCKET_STORAGE(
+ btree->bstorage, session, { ret = bm->read(bm, session, buf, addr, addr_size); });
+ WT_RET(ret);
dsk = buf->data;
ip = NULL;
} else {
WT_RET(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(bm->read(bm, session, tmp, addr, addr_size));
+ WT_WITH_BUCKET_STORAGE(
+ btree->bstorage, session, { ret = bm->read(bm, session, tmp, addr, addr_size); });
+ WT_ERR(ret);
dsk = tmp->data;
ip = tmp;
}
@@ -303,7 +307,12 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *add
if (encrypted)
F_SET(dsk, WT_PAGE_ENCRYPTED);
- WT_ASSERT(session, (dsk->write_gen != 0 && dsk->write_gen > btree->base_write_gen));
+ /*
+ * The page image must have a proper write generation number before writing it to disk. The page
+ * images that are created during recovery may have the write generation number less than the
+ * btree base write generation number, so don't verify it.
+ */
+ WT_ASSERT(session, dsk->write_gen != 0);
/*
* Checksum the data if the buffer isn't compressed or checksums are configured.
@@ -324,9 +333,13 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *add
if (timer)
time_start = __wt_clock(session);
- /* Call the block manager to write the block. */
- WT_ERR(checkpoint ? bm->checkpoint(bm, session, ip, btree->ckpt, data_checksum) :
+ WT_WITH_BUCKET_STORAGE(btree->bstorage, session, {
+ /* Call the block manager to write the block. */
+ ret =
+ (checkpoint ? bm->checkpoint(bm, session, ip, btree->ckpt, data_checksum) :
bm->write(bm, session, ip, addr, addr_sizep, data_checksum, checkpoint_io));
+ });
+ WT_ERR(ret);
/* Update some statistics now that the write is done */
if (timer) {
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index ab91af5b21a..dc070d5c700 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -344,14 +344,6 @@ read:
else if (ret == EBUSY) {
WT_NOT_READ(ret, 0);
WT_STAT_CONN_INCR(session, page_forcible_evict_blocked);
- /*
- * Forced eviction failed: check if this transaction is keeping content pinned
- * in cache.
- */
- if (force_attempts > 1 &&
- (ret = __wt_txn_is_blocking(session, true)) == WT_ROLLBACK)
- WT_STAT_CONN_INCR(session, cache_eviction_force_rollback);
- WT_RET(ret);
stalled = true;
break;
}
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index 43bfb1f769f..25cbb0e8b33 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -96,7 +96,6 @@ static const WT_CONFIG_CHECK confchk_tiered_manager_subconfigs[] = {
{"wait", "int", NULL, "min=0,max=100000", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure_tiered_storage_subconfigs[] = {
- {"auth_token", "string", NULL, NULL, NULL, 0},
{"local_retention", "int", NULL, "min=0,max=10000", NULL, 0},
{"object_target_size", "int", NULL, "min=100K,max=10TB", NULL, 0},
{NULL, NULL, NULL, NULL, NULL, 0}};
@@ -135,7 +134,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
confchk_WT_CONNECTION_reconfigure_statistics_log_subconfigs, 5},
{"tiered_manager", "category", NULL, NULL, confchk_tiered_manager_subconfigs, 3},
{"tiered_storage", "category", NULL, NULL,
- confchk_WT_CONNECTION_reconfigure_tiered_storage_subconfigs, 3},
+ confchk_WT_CONNECTION_reconfigure_tiered_storage_subconfigs, 2},
{"timing_stress_for_test", "list", NULL,
"choices=[\"aggressive_sweep\",\"backup_rename\","
"\"checkpoint_slow\",\"history_store_checkpoint_delay\","
@@ -152,8 +151,8 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
"\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
"\"reconcile\",\"recovery\",\"recovery_progress\",\"rts\","
"\"salvage\",\"shared_cache\",\"split\",\"temporary\","
- "\"thread_group\",\"timestamp\",\"transaction\",\"verify\","
- "\"version\",\"write\"]",
+ "\"thread_group\",\"tiered\",\"timestamp\",\"transaction\","
+ "\"verify\",\"version\",\"write\"]",
NULL, 0},
{NULL, NULL, NULL, NULL, NULL, 0}};
@@ -165,7 +164,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_set_timestamp[] = {
static const WT_CONFIG_CHECK confchk_WT_CURSOR_reconfigure[] = {
{"append", "boolean", NULL, NULL, NULL, 0}, {"overwrite", "boolean", NULL, NULL, NULL, 0},
- {NULL, NULL, NULL, NULL, NULL, 0}};
+ {"prefix_search", "boolean", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_assert_subconfigs[] = {
{"commit_timestamp", "string", NULL,
@@ -258,12 +257,9 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_create_lsm_subconfigs[] = {
{"merge_max", "int", NULL, "min=2,max=100", NULL, 0},
{"merge_min", "int", NULL, "max=100", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
-static const WT_CONFIG_CHECK confchk_WT_SESSION_create_tiered_subconfigs[] = {
- {"chunk_size", "int", NULL, "min=1M", NULL, 0}, {"tiers", "list", NULL, NULL, NULL, 0},
- {NULL, NULL, NULL, NULL, NULL, 0}};
-
static const WT_CONFIG_CHECK confchk_WT_SESSION_create_tiered_storage_subconfigs[] = {
{"auth_token", "string", NULL, NULL, NULL, 0}, {"bucket", "string", NULL, NULL, NULL, 0},
+ {"bucket_prefix", "string", NULL, NULL, NULL, 0},
{"local_retention", "int", NULL, "min=0,max=10000", NULL, 0},
{"name", "string", NULL, NULL, NULL, 0},
{"object_target_size", "int", NULL, "min=100K,max=10TB", NULL, 0},
@@ -308,9 +304,8 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_create[] = {
{"split_deepen_min_child", "int", NULL, NULL, NULL, 0},
{"split_deepen_per_child", "int", NULL, NULL, NULL, 0},
{"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
- {"tiered", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_subconfigs, 2},
{"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
- 5},
+ 6},
{"type", "string", NULL, NULL, NULL, 0},
{"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
{"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
@@ -362,9 +357,9 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor[] = {
{"incremental", "category", NULL, NULL, confchk_WT_SESSION_open_cursor_incremental_subconfigs, 7},
{"next_random", "boolean", NULL, NULL, NULL, 0},
{"next_random_sample_size", "string", NULL, NULL, NULL, 0},
- {"overwrite", "boolean", NULL, NULL, NULL, 0}, {"raw", "boolean", NULL, NULL, NULL, 0},
- {"read_once", "boolean", NULL, NULL, NULL, 0}, {"readonly", "boolean", NULL, NULL, NULL, 0},
- {"skip_sort_check", "boolean", NULL, NULL, NULL, 0},
+ {"overwrite", "boolean", NULL, NULL, NULL, 0}, {"prefix_search", "boolean", NULL, NULL, NULL, 0},
+ {"raw", "boolean", NULL, NULL, NULL, 0}, {"read_once", "boolean", NULL, NULL, NULL, 0},
+ {"readonly", "boolean", NULL, NULL, NULL, 0}, {"skip_sort_check", "boolean", NULL, NULL, NULL, 0},
{"statistics", "list", NULL,
"choices=[\"all\",\"cache_walk\",\"fast\",\"clear\","
"\"size\",\"tree_walk\"]",
@@ -460,7 +455,7 @@ static const WT_CONFIG_CHECK confchk_file_config[] = {
{"split_deepen_per_child", "int", NULL, NULL, NULL, 0},
{"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
{"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
- 5},
+ 6},
{"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
{"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
{"write_timestamp_usage", "string", NULL,
@@ -508,7 +503,7 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = {
{"split_deepen_per_child", "int", NULL, NULL, NULL, 0},
{"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
{"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
- 5},
+ 6},
{"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
{"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
{"version", "string", NULL, NULL, NULL, 0},
@@ -572,9 +567,58 @@ static const WT_CONFIG_CHECK confchk_lsm_meta[] = {
{"split_deepen_per_child", "int", NULL, NULL, NULL, 0},
{"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
{"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
- 5},
+ 6},
+ {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
+ {"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
+ {"write_timestamp_usage", "string", NULL,
+ "choices=[\"always\",\"key_consistent\",\"mixed_mode\","
+ "\"never\",\"none\",\"ordered\"]",
+ NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_object_meta[] = {
+ {"access_pattern_hint", "string", NULL, "choices=[\"none\",\"random\",\"sequential\"]", NULL, 0},
+ {"allocation_size", "int", NULL, "min=512B,max=128MB", NULL, 0},
+ {"app_metadata", "string", NULL, NULL, NULL, 0},
+ {"assert", "category", NULL, NULL, confchk_assert_subconfigs, 4},
+ {"block_allocation", "string", NULL, "choices=[\"best\",\"first\",\"log-structured\"]", NULL, 0},
+ {"block_compressor", "string", NULL, NULL, NULL, 0},
+ {"cache_resident", "boolean", NULL, NULL, NULL, 0}, {"checkpoint", "string", NULL, NULL, NULL, 0},
+ {"checkpoint_backup_info", "string", NULL, NULL, NULL, 0},
+ {"checkpoint_lsn", "string", NULL, NULL, NULL, 0},
+ {"checksum", "string", NULL, "choices=[\"on\",\"off\",\"uncompressed\"]", NULL, 0},
+ {"collator", "string", NULL, NULL, NULL, 0}, {"columns", "list", NULL, NULL, NULL, 0},
+ {"dictionary", "int", NULL, "min=0", NULL, 0},
+ {"encryption", "category", NULL, NULL, confchk_WT_SESSION_create_encryption_subconfigs, 2},
+ {"format", "string", NULL, "choices=[\"btree\"]", NULL, 0},
+ {"huffman_key", "string", NULL, NULL, NULL, 0}, {"huffman_value", "string", NULL, NULL, NULL, 0},
+ {"id", "string", NULL, NULL, NULL, 0},
+ {"ignore_in_memory_cache_size", "boolean", NULL, NULL, NULL, 0},
+ {"internal_item_max", "int", NULL, "min=0", NULL, 0},
+ {"internal_key_max", "int", NULL, "min=0", NULL, 0},
+ {"internal_key_truncate", "boolean", NULL, NULL, NULL, 0},
+ {"internal_page_max", "int", NULL, "min=512B,max=512MB", NULL, 0},
+ {"key_format", "format", __wt_struct_confchk, NULL, NULL, 0},
+ {"key_gap", "int", NULL, "min=0", NULL, 0}, {"leaf_item_max", "int", NULL, "min=0", NULL, 0},
+ {"leaf_key_max", "int", NULL, "min=0", NULL, 0},
+ {"leaf_page_max", "int", NULL, "min=512B,max=512MB", NULL, 0},
+ {"leaf_value_max", "int", NULL, "min=0", NULL, 0},
+ {"log", "category", NULL, NULL, confchk_WT_SESSION_create_log_subconfigs, 1},
+ {"memory_page_image_max", "int", NULL, "min=0", NULL, 0},
+ {"memory_page_max", "int", NULL, "min=512B,max=10TB", NULL, 0},
+ {"os_cache_dirty_max", "int", NULL, "min=0", NULL, 0},
+ {"os_cache_max", "int", NULL, "min=0", NULL, 0},
+ {"prefix_compression", "boolean", NULL, NULL, NULL, 0},
+ {"prefix_compression_min", "int", NULL, "min=0", NULL, 0},
+ {"readonly", "boolean", NULL, NULL, NULL, 0},
+ {"split_deepen_min_child", "int", NULL, NULL, NULL, 0},
+ {"split_deepen_per_child", "int", NULL, NULL, NULL, 0},
+ {"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
+ {"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
+ 6},
{"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
{"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
+ {"version", "string", NULL, NULL, NULL, 0},
{"write_timestamp_usage", "string", NULL,
"choices=[\"always\",\"key_consistent\",\"mixed_mode\","
"\"never\",\"none\",\"ordered\"]",
@@ -595,10 +639,64 @@ static const WT_CONFIG_CHECK confchk_table_meta[] = {
NULL, 0},
{NULL, NULL, NULL, NULL, NULL, 0}};
+static const WT_CONFIG_CHECK confchk_tier_meta[] = {
+ {"access_pattern_hint", "string", NULL, "choices=[\"none\",\"random\",\"sequential\"]", NULL, 0},
+ {"allocation_size", "int", NULL, "min=512B,max=128MB", NULL, 0},
+ {"app_metadata", "string", NULL, NULL, NULL, 0},
+ {"assert", "category", NULL, NULL, confchk_assert_subconfigs, 4},
+ {"block_allocation", "string", NULL, "choices=[\"best\",\"first\",\"log-structured\"]", NULL, 0},
+ {"block_compressor", "string", NULL, NULL, NULL, 0}, {"bucket", "string", NULL, NULL, NULL, 0},
+ {"bucket_prefix", "string", NULL, NULL, NULL, 0},
+ {"cache_resident", "boolean", NULL, NULL, NULL, 0}, {"checkpoint", "string", NULL, NULL, NULL, 0},
+ {"checkpoint_backup_info", "string", NULL, NULL, NULL, 0},
+ {"checkpoint_lsn", "string", NULL, NULL, NULL, 0},
+ {"checksum", "string", NULL, "choices=[\"on\",\"off\",\"uncompressed\"]", NULL, 0},
+ {"collator", "string", NULL, NULL, NULL, 0}, {"columns", "list", NULL, NULL, NULL, 0},
+ {"dictionary", "int", NULL, "min=0", NULL, 0},
+ {"encryption", "category", NULL, NULL, confchk_WT_SESSION_create_encryption_subconfigs, 2},
+ {"format", "string", NULL, "choices=[\"btree\"]", NULL, 0},
+ {"huffman_key", "string", NULL, NULL, NULL, 0}, {"huffman_value", "string", NULL, NULL, NULL, 0},
+ {"id", "string", NULL, NULL, NULL, 0},
+ {"ignore_in_memory_cache_size", "boolean", NULL, NULL, NULL, 0},
+ {"internal_item_max", "int", NULL, "min=0", NULL, 0},
+ {"internal_key_max", "int", NULL, "min=0", NULL, 0},
+ {"internal_key_truncate", "boolean", NULL, NULL, NULL, 0},
+ {"internal_page_max", "int", NULL, "min=512B,max=512MB", NULL, 0},
+ {"key_format", "format", __wt_struct_confchk, NULL, NULL, 0},
+ {"key_gap", "int", NULL, "min=0", NULL, 0}, {"leaf_item_max", "int", NULL, "min=0", NULL, 0},
+ {"leaf_key_max", "int", NULL, "min=0", NULL, 0},
+ {"leaf_page_max", "int", NULL, "min=512B,max=512MB", NULL, 0},
+ {"leaf_value_max", "int", NULL, "min=0", NULL, 0},
+ {"log", "category", NULL, NULL, confchk_WT_SESSION_create_log_subconfigs, 1},
+ {"memory_page_image_max", "int", NULL, "min=0", NULL, 0},
+ {"memory_page_max", "int", NULL, "min=512B,max=10TB", NULL, 0},
+ {"os_cache_dirty_max", "int", NULL, "min=0", NULL, 0},
+ {"os_cache_max", "int", NULL, "min=0", NULL, 0},
+ {"prefix_compression", "boolean", NULL, NULL, NULL, 0},
+ {"prefix_compression_min", "int", NULL, "min=0", NULL, 0},
+ {"readonly", "boolean", NULL, NULL, NULL, 0},
+ {"split_deepen_min_child", "int", NULL, NULL, NULL, 0},
+ {"split_deepen_per_child", "int", NULL, NULL, NULL, 0},
+ {"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
+ {"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
+ 6},
+ {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
+ {"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
+ {"version", "string", NULL, NULL, NULL, 0},
+ {"write_timestamp_usage", "string", NULL,
+ "choices=[\"always\",\"key_consistent\",\"mixed_mode\","
+ "\"never\",\"none\",\"ordered\"]",
+ NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
+
static const WT_CONFIG_CHECK confchk_tiered_meta[] = {
{"app_metadata", "string", NULL, NULL, NULL, 0},
{"assert", "category", NULL, NULL, confchk_assert_subconfigs, 4},
- {"tiered", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_subconfigs, 2},
+ {"collator", "string", NULL, NULL, NULL, 0}, {"columns", "list", NULL, NULL, NULL, 0},
+ {"last", "string", NULL, NULL, NULL, 0},
+ {"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
+ 6},
+ {"tiers", "list", NULL, NULL, NULL, 0},
{"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
{"write_timestamp_usage", "string", NULL,
"choices=[\"always\",\"key_consistent\",\"mixed_mode\","
@@ -634,10 +732,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_statistics_log_subconfigs[]
{NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_tiered_storage_subconfigs[] = {
- {"auth_token", "string", NULL, NULL, NULL, 0}, {"auth_token", "string", NULL, NULL, NULL, 0},
- {"bucket", "string", NULL, NULL, NULL, 0}, {"cluster", "string", NULL, NULL, NULL, 0},
+ {"auth_token", "string", NULL, NULL, NULL, 0}, {"bucket", "string", NULL, NULL, NULL, 0},
+ {"bucket_prefix", "string", NULL, NULL, NULL, 0},
{"local_retention", "int", NULL, "min=0,max=10000", NULL, 0},
- {"member", "string", NULL, NULL, NULL, 0}, {"name", "string", NULL, NULL, NULL, 0},
+ {"name", "string", NULL, NULL, NULL, 0},
{"object_target_size", "int", NULL, "min=100K,max=10TB", NULL, 0},
{NULL, NULL, NULL, NULL, NULL, 0}};
@@ -696,7 +794,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
NULL, 0},
{"statistics_log", "category", NULL, NULL, confchk_wiredtiger_open_statistics_log_subconfigs, 6},
{"tiered_manager", "category", NULL, NULL, confchk_tiered_manager_subconfigs, 3},
- {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 8},
+ {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 6},
{"timing_stress_for_test", "list", NULL,
"choices=[\"aggressive_sweep\",\"backup_rename\","
"\"checkpoint_slow\",\"history_store_checkpoint_delay\","
@@ -717,8 +815,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
"\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
"\"reconcile\",\"recovery\",\"recovery_progress\",\"rts\","
"\"salvage\",\"shared_cache\",\"split\",\"temporary\","
- "\"thread_group\",\"timestamp\",\"transaction\",\"verify\","
- "\"version\",\"write\"]",
+ "\"thread_group\",\"tiered\",\"timestamp\",\"transaction\","
+ "\"verify\",\"version\",\"write\"]",
NULL, 0},
{"verify_metadata", "boolean", NULL, NULL, NULL, 0},
{"write_through", "list", NULL, "choices=[\"data\",\"log\"]", NULL, 0},
@@ -774,7 +872,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
NULL, 0},
{"statistics_log", "category", NULL, NULL, confchk_wiredtiger_open_statistics_log_subconfigs, 6},
{"tiered_manager", "category", NULL, NULL, confchk_tiered_manager_subconfigs, 3},
- {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 8},
+ {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 6},
{"timing_stress_for_test", "list", NULL,
"choices=[\"aggressive_sweep\",\"backup_rename\","
"\"checkpoint_slow\",\"history_store_checkpoint_delay\","
@@ -795,8 +893,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
"\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
"\"reconcile\",\"recovery\",\"recovery_progress\",\"rts\","
"\"salvage\",\"shared_cache\",\"split\",\"temporary\","
- "\"thread_group\",\"timestamp\",\"transaction\",\"verify\","
- "\"version\",\"write\"]",
+ "\"thread_group\",\"tiered\",\"timestamp\",\"transaction\","
+ "\"verify\",\"version\",\"write\"]",
NULL, 0},
{"verify_metadata", "boolean", NULL, NULL, NULL, 0}, {"version", "string", NULL, NULL, NULL, 0},
{"write_through", "list", NULL, "choices=[\"data\",\"log\"]", NULL, 0},
@@ -849,7 +947,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
NULL, 0},
{"statistics_log", "category", NULL, NULL, confchk_wiredtiger_open_statistics_log_subconfigs, 6},
{"tiered_manager", "category", NULL, NULL, confchk_tiered_manager_subconfigs, 3},
- {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 8},
+ {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 6},
{"timing_stress_for_test", "list", NULL,
"choices=[\"aggressive_sweep\",\"backup_rename\","
"\"checkpoint_slow\",\"history_store_checkpoint_delay\","
@@ -868,8 +966,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
"\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
"\"reconcile\",\"recovery\",\"recovery_progress\",\"rts\","
"\"salvage\",\"shared_cache\",\"split\",\"temporary\","
- "\"thread_group\",\"timestamp\",\"transaction\",\"verify\","
- "\"version\",\"write\"]",
+ "\"thread_group\",\"tiered\",\"timestamp\",\"transaction\","
+ "\"verify\",\"version\",\"write\"]",
NULL, 0},
{"verify_metadata", "boolean", NULL, NULL, NULL, 0}, {"version", "string", NULL, NULL, NULL, 0},
{"write_through", "list", NULL, "choices=[\"data\",\"log\"]", NULL, 0},
@@ -922,7 +1020,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
NULL, 0},
{"statistics_log", "category", NULL, NULL, confchk_wiredtiger_open_statistics_log_subconfigs, 6},
{"tiered_manager", "category", NULL, NULL, confchk_tiered_manager_subconfigs, 3},
- {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 8},
+ {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 6},
{"timing_stress_for_test", "list", NULL,
"choices=[\"aggressive_sweep\",\"backup_rename\","
"\"checkpoint_slow\",\"history_store_checkpoint_delay\","
@@ -941,8 +1039,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
"\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
"\"reconcile\",\"recovery\",\"recovery_progress\",\"rts\","
"\"salvage\",\"shared_cache\",\"split\",\"temporary\","
- "\"thread_group\",\"timestamp\",\"transaction\",\"verify\","
- "\"version\",\"write\"]",
+ "\"thread_group\",\"tiered\",\"timestamp\",\"transaction\","
+ "\"verify\",\"version\",\"write\"]",
NULL, 0},
{"verify_metadata", "boolean", NULL, NULL, NULL, 0},
{"write_through", "list", NULL, "choices=[\"data\",\"log\"]", NULL, 0},
@@ -985,8 +1083,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"statistics=none,statistics_log=(json=false,on_close=false,"
"sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"tiered_manager=(threads_max=8,threads_min=1,wait=0),"
- "tiered_storage=(auth_token=,local_retention=300,"
- "object_target_size=10M),timing_stress_for_test=,verbose=[]",
+ "tiered_storage=(local_retention=300,object_target_size=10M),"
+ "timing_stress_for_test=,verbose=[]",
confchk_WT_CONNECTION_reconfigure, 29},
{"WT_CONNECTION.rollback_to_stable", "", NULL, 0}, {"WT_CONNECTION.set_file_system", "", NULL, 0},
{"WT_CONNECTION.set_timestamp",
@@ -994,7 +1092,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"oldest_timestamp=,stable_timestamp=",
confchk_WT_CONNECTION_set_timestamp, 5},
{"WT_CURSOR.close", "", NULL, 0},
- {"WT_CURSOR.reconfigure", "append=false,overwrite=true", confchk_WT_CURSOR_reconfigure, 2},
+ {"WT_CURSOR.reconfigure", "append=false,overwrite=true,prefix_search=false",
+ confchk_WT_CURSOR_reconfigure, 3},
{"WT_SESSION.alter",
"access_pattern_hint=none,app_metadata=,"
"assert=(commit_timestamp=none,durable_timestamp=none,"
@@ -1036,11 +1135,10 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0,"
"prefix_compression=false,prefix_compression_min=4,readonly=false"
",source=,split_deepen_min_child=0,split_deepen_per_child=0,"
- "split_pct=90,tiered=(chunk_size=1GB,tiers=),"
- "tiered_storage=(auth_token=,bucket=,local_retention=300,name=,"
- "object_target_size=10M),type=file,value_format=u,verbose=[],"
- "write_timestamp_usage=none",
- confchk_WT_SESSION_create, 50},
+ "split_pct=90,tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
+ "local_retention=300,name=,object_target_size=10M),type=file,"
+ "value_format=u,verbose=[],write_timestamp_usage=none",
+ confchk_WT_SESSION_create, 49},
{"WT_SESSION.drop",
"checkpoint_wait=true,force=false,lock_wait=true,"
"remove_files=true",
@@ -1058,9 +1156,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"debug=(release_evict=false),dump=,incremental=(consolidate=false"
",enabled=false,file=,force_stop=false,granularity=16MB,src_id=,"
"this_id=),next_random=false,next_random_sample_size=0,"
- "overwrite=true,raw=false,read_once=false,readonly=false,"
- "skip_sort_check=false,statistics=,target=",
- confchk_WT_SESSION_open_cursor, 16},
+ "overwrite=true,prefix_search=false,raw=false,read_once=false,"
+ "readonly=false,skip_sort_check=false,statistics=,target=",
+ confchk_WT_SESSION_open_cursor, 17},
{"WT_SESSION.prepare_transaction", "prepare_timestamp=", confchk_WT_SESSION_prepare_transaction,
1},
{"WT_SESSION.query_timestamp", "get=read", confchk_WT_SESSION_query_timestamp, 1},
@@ -1104,9 +1202,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false,"
"prefix_compression_min=4,readonly=false,split_deepen_min_child=0"
",split_deepen_per_child=0,split_pct=90,"
- "tiered_storage=(auth_token=,bucket=,local_retention=300,name=,"
- "object_target_size=10M),value_format=u,verbose=[],"
- "write_timestamp_usage=none",
+ "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
+ "local_retention=300,name=,object_target_size=10M),value_format=u"
+ ",verbose=[],write_timestamp_usage=none",
confchk_file_config, 41},
{"file.meta",
"access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
@@ -1124,9 +1222,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false,"
"prefix_compression_min=4,readonly=false,split_deepen_min_child=0"
",split_deepen_per_child=0,split_pct=90,"
- "tiered_storage=(auth_token=,bucket=,local_retention=300,name=,"
- "object_target_size=10M),value_format=u,verbose=[],"
- "version=(major=0,minor=0),write_timestamp_usage=none",
+ "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
+ "local_retention=300,name=,object_target_size=10M),value_format=u"
+ ",verbose=[],version=(major=0,minor=0),write_timestamp_usage=none",
confchk_file_meta, 46},
{"index.meta",
"app_metadata=,assert=(commit_timestamp=none,"
@@ -1144,7 +1242,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"name=),format=btree,huffman_key=,huffman_value=,"
"ignore_in_memory_cache_size=false,internal_item_max=0,"
"internal_key_max=0,internal_key_truncate=true,"
- "internal_page_max=4KB,key_format=u,key_gap=10,last=,"
+ "internal_page_max=4KB,key_format=u,key_gap=10,last=0,"
"leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,"
"leaf_value_max=0,log=(enabled=true),lsm=(auto_throttle=true,"
"bloom=true,bloom_bit_count=16,bloom_config=,bloom_hash_count=8,"
@@ -1155,22 +1253,63 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"os_cache_max=0,prefix_compression=false,prefix_compression_min=4"
",readonly=false,split_deepen_min_child=0,"
"split_deepen_per_child=0,split_pct=90,"
- "tiered_storage=(auth_token=,bucket=,local_retention=300,name=,"
- "object_target_size=10M),value_format=u,verbose=[],"
- "write_timestamp_usage=none",
+ "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
+ "local_retention=300,name=,object_target_size=10M),value_format=u"
+ ",verbose=[],write_timestamp_usage=none",
confchk_lsm_meta, 45},
+ {"object.meta",
+ "access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
+ "assert=(commit_timestamp=none,durable_timestamp=none,"
+ "read_timestamp=none,write_timestamp=off),block_allocation=best,"
+ "block_compressor=,cache_resident=false,checkpoint=,"
+ "checkpoint_backup_info=,checkpoint_lsn=,checksum=uncompressed,"
+ "collator=,columns=,dictionary=0,encryption=(keyid=,name=),"
+ "format=btree,huffman_key=,huffman_value=,id=,"
+ "ignore_in_memory_cache_size=false,internal_item_max=0,"
+ "internal_key_max=0,internal_key_truncate=true,"
+ "internal_page_max=4KB,key_format=u,key_gap=10,leaf_item_max=0,"
+ "leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=0,"
+ "log=(enabled=true),memory_page_image_max=0,memory_page_max=5MB,"
+ "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false,"
+ "prefix_compression_min=4,readonly=false,split_deepen_min_child=0"
+ ",split_deepen_per_child=0,split_pct=90,"
+ "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
+ "local_retention=300,name=,object_target_size=10M),value_format=u"
+ ",verbose=[],version=(major=0,minor=0),write_timestamp_usage=none",
+ confchk_object_meta, 46},
{"table.meta",
"app_metadata=,assert=(commit_timestamp=none,"
"durable_timestamp=none,read_timestamp=none,write_timestamp=off),"
"colgroups=,collator=,columns=,key_format=u,value_format=u,"
"verbose=[],write_timestamp_usage=none",
confchk_table_meta, 9},
+ {"tier.meta",
+ "access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
+ "assert=(commit_timestamp=none,durable_timestamp=none,"
+ "read_timestamp=none,write_timestamp=off),block_allocation=best,"
+ "block_compressor=,bucket=,bucket_prefix=,cache_resident=false,"
+ "checkpoint=,checkpoint_backup_info=,checkpoint_lsn=,"
+ "checksum=uncompressed,collator=,columns=,dictionary=0,"
+ "encryption=(keyid=,name=),format=btree,huffman_key=,"
+ "huffman_value=,id=,ignore_in_memory_cache_size=false,"
+ "internal_item_max=0,internal_key_max=0,"
+ "internal_key_truncate=true,internal_page_max=4KB,key_format=u,"
+ "key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,"
+ "leaf_value_max=0,log=(enabled=true),memory_page_image_max=0,"
+ "memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0,"
+ "prefix_compression=false,prefix_compression_min=4,readonly=false"
+ ",split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,"
+ "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
+ "local_retention=300,name=,object_target_size=10M),value_format=u"
+ ",verbose=[],version=(major=0,minor=0),write_timestamp_usage=none",
+ confchk_tier_meta, 48},
{"tiered.meta",
"app_metadata=,assert=(commit_timestamp=none,"
"durable_timestamp=none,read_timestamp=none,write_timestamp=off),"
- "tiered=(chunk_size=1GB,tiers=),verbose=[],"
- "write_timestamp_usage=none",
- confchk_tiered_meta, 5},
+ "collator=,columns=,last=0,tiered_storage=(auth_token=,bucket=,"
+ "bucket_prefix=,local_retention=300,name=,object_target_size=10M)"
+ ",tiers=,verbose=[],write_timestamp_usage=none",
+ confchk_tiered_meta, 9},
{"wiredtiger_open",
"buffer_alignment=-1,builtin_extension_config=,cache_cursors=true"
",cache_max_wait_ms=0,cache_overhead=8,cache_size=100MB,"
@@ -1200,8 +1339,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"reserve=0,size=500MB),statistics=none,statistics_log=(json=false"
",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\""
",wait=0),tiered_manager=(threads_max=8,threads_min=1,wait=0),"
- "tiered_storage=(auth_token=,auth_token=,bucket=,cluster=,"
- "local_retention=300,member=,name=,object_target_size=10M),"
+ "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
+ "local_retention=300,name=,object_target_size=10M),"
"timing_stress_for_test=,transaction_sync=(enabled=false,"
"method=fsync),use_environment=true,use_environment_priv=false,"
"verbose=[],verify_metadata=false,write_through=",
@@ -1235,8 +1374,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"reserve=0,size=500MB),statistics=none,statistics_log=(json=false"
",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\""
",wait=0),tiered_manager=(threads_max=8,threads_min=1,wait=0),"
- "tiered_storage=(auth_token=,auth_token=,bucket=,cluster=,"
- "local_retention=300,member=,name=,object_target_size=10M),"
+ "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
+ "local_retention=300,name=,object_target_size=10M),"
"timing_stress_for_test=,transaction_sync=(enabled=false,"
"method=fsync),use_environment=true,use_environment_priv=false,"
"verbose=[],verify_metadata=false,version=(major=0,minor=0),"
@@ -1270,8 +1409,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"statistics=none,statistics_log=(json=false,on_close=false,"
"path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"tiered_manager=(threads_max=8,threads_min=1,wait=0),"
- "tiered_storage=(auth_token=,auth_token=,bucket=,cluster=,"
- "local_retention=300,member=,name=,object_target_size=10M),"
+ "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
+ "local_retention=300,name=,object_target_size=10M),"
"timing_stress_for_test=,transaction_sync=(enabled=false,"
"method=fsync),verbose=[],verify_metadata=false,version=(major=0,"
"minor=0),write_through=",
@@ -1304,8 +1443,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"statistics=none,statistics_log=(json=false,on_close=false,"
"path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"tiered_manager=(threads_max=8,threads_min=1,wait=0),"
- "tiered_storage=(auth_token=,auth_token=,bucket=,cluster=,"
- "local_retention=300,member=,name=,object_target_size=10M),"
+ "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
+ "local_retention=300,name=,object_target_size=10M),"
"timing_stress_for_test=,transaction_sync=(enabled=false,"
"method=fsync),verbose=[],verify_metadata=false,write_through=",
confchk_wiredtiger_open_usercfg, 51},
diff --git a/src/third_party/wiredtiger/src/config/test_config.c b/src/third_party/wiredtiger/src/config/test_config.c
index 1b42bbee4d1..bb46c2a1f24 100644
--- a/src/third_party/wiredtiger/src/config/test_config.c
+++ b/src/third_party/wiredtiger/src/config/test_config.c
@@ -7,19 +7,23 @@ static const WT_CONFIG_CHECK confchk_stat_cache_size_subconfigs[] = {
{NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_runtime_monitor_subconfigs[] = {
- {"rate_per_second", "int", NULL, "min=1,max=1000", NULL, 0},
+ {"enabled", "boolean", NULL, NULL, NULL, 0},
+ {"interval", "string", NULL, "choices=[\"s\",\"m\",\"h\"]", NULL, 0},
+ {"op_count", "int", NULL, "min=1,max=10000", NULL, 0},
{"stat_cache_size", "category", NULL, NULL, confchk_stat_cache_size_subconfigs, 2},
{NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_timestamp_manager_subconfigs[] = {
{"enabled", "boolean", NULL, NULL, NULL, 0},
+ {"interval", "string", NULL, "choices=[\"s\",\"m\",\"h\"]", NULL, 0},
{"oldest_lag", "int", NULL, "min=0,max=1000000", NULL, 0},
+ {"op_count", "int", NULL, "min=1,max=10000", NULL, 0},
{"stable_lag", "int", NULL, "min=0,max=1000000", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_insert_config_subconfigs[] = {
- {"key_format", "string", NULL, NULL, NULL, 0},
+ {"interval", "string", NULL, "choices=[\"s\",\"m\",\"h\"]", NULL, 0},
{"key_size", "int", NULL, "min=0,max=10000", NULL, 0},
- {"value_format", "string", NULL, NULL, NULL, 0},
+ {"op_count", "int", NULL, "min=1,max=10000", NULL, 0},
{"value_size", "int", NULL, "min=0,max=1000000000", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_ops_per_transaction_subconfigs[] = {
@@ -27,50 +31,76 @@ static const WT_CONFIG_CHECK confchk_ops_per_transaction_subconfigs[] = {
{NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_update_config_subconfigs[] = {
- {"key_format", "string", NULL, NULL, NULL, 0},
+ {"interval", "string", NULL, "choices=[\"s\",\"m\",\"h\"]", NULL, 0},
{"key_size", "int", NULL, "min=0,max=10000", NULL, 0},
- {"value_format", "string", NULL, NULL, NULL, 0},
+ {"op_count", "int", NULL, "min=1,max=10000", NULL, 0},
{"value_size", "int", NULL, "min=0,max=1000000000", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_workload_generator_subconfigs[] = {
{"collection_count", "int", NULL, "min=0,max=200000", NULL, 0},
+ {"enabled", "boolean", NULL, NULL, NULL, 0},
{"insert_config", "category", NULL, NULL, confchk_insert_config_subconfigs, 4},
{"insert_threads", "int", NULL, "min=0,max=20", NULL, 0},
+ {"interval", "string", NULL, "choices=[\"s\",\"m\",\"h\"]", NULL, 0},
+ {"interval", "string", NULL, "choices=[\"s\",\"m\",\"h\"]", NULL, 0},
{"key_count", "int", NULL, "min=0,max=1000000", NULL, 0},
- {"key_format", "string", NULL, NULL, NULL, 0},
{"key_size", "int", NULL, "min=0,max=10000", NULL, 0},
+ {"op_count", "int", NULL, "min=1,max=10000", NULL, 0},
+ {"op_count", "int", NULL, "min=1,max=10000", NULL, 0},
{"ops_per_transaction", "category", NULL, NULL, confchk_ops_per_transaction_subconfigs, 2},
{"read_threads", "int", NULL, "min=0,max=100", NULL, 0},
{"update_config", "category", NULL, NULL, confchk_update_config_subconfigs, 4},
{"update_threads", "int", NULL, "min=0,max=20", NULL, 0},
- {"value_format", "string", NULL, NULL, NULL, 0},
{"value_size", "int", NULL, "min=0,max=1000000000", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_workload_tracking_subconfigs[] = {
{"enabled", "boolean", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
+static const WT_CONFIG_CHECK confchk_example_test[] = {
+ {"cache_size_mb", "int", NULL, "min=0,max=100000000000", NULL, 0},
+ {"duration_seconds", "int", NULL, "min=0,max=1000000", NULL, 0},
+ {"enable_logging", "boolean", NULL, NULL, NULL, 0},
+ {"runtime_monitor", "category", NULL, NULL, confchk_runtime_monitor_subconfigs, 4},
+ {"timestamp_manager", "category", NULL, NULL, confchk_timestamp_manager_subconfigs, 5},
+ {"workload_generator", "category", NULL, NULL, confchk_workload_generator_subconfigs, 15},
+ {"workload_tracking", "category", NULL, NULL, confchk_workload_tracking_subconfigs, 1},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
+
static const WT_CONFIG_CHECK confchk_poc_test[] = {
{"cache_size_mb", "int", NULL, "min=0,max=100000000000", NULL, 0},
{"duration_seconds", "int", NULL, "min=0,max=1000000", NULL, 0},
{"enable_logging", "boolean", NULL, NULL, NULL, 0},
- {"runtime_monitor", "category", NULL, NULL, confchk_runtime_monitor_subconfigs, 2},
- {"timestamp_manager", "category", NULL, NULL, confchk_timestamp_manager_subconfigs, 3},
- {"workload_generator", "category", NULL, NULL, confchk_workload_generator_subconfigs, 12},
+ {"runtime_monitor", "category", NULL, NULL, confchk_runtime_monitor_subconfigs, 4},
+ {"timestamp_manager", "category", NULL, NULL, confchk_timestamp_manager_subconfigs, 5},
+ {"workload_generator", "category", NULL, NULL, confchk_workload_generator_subconfigs, 15},
{"workload_tracking", "category", NULL, NULL, confchk_workload_tracking_subconfigs, 1},
{NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_ENTRY config_entries[] = {
+ {"example_test",
+ "cache_size_mb=0,duration_seconds=0,enable_logging=true,"
+ "runtime_monitor=(enabled=false,interval=s,op_count=1,"
+ "stat_cache_size=(enabled=false,limit=)),"
+ "timestamp_manager=(enabled=false,interval=s,oldest_lag=0,"
+ "op_count=1,stable_lag=0),workload_generator=(collection_count=1,"
+ "enabled=false,insert_config=(interval=s,key_size=0,op_count=1,"
+ "value_size=0),insert_threads=0,interval=s,interval=s,key_count=0"
+ ",key_size=0,op_count=1,op_count=1,ops_per_transaction=(max=1,"
+ "min=),read_threads=0,update_config=(interval=s,key_size=0,"
+ "op_count=1,value_size=0),update_threads=0,value_size=0),"
+ "workload_tracking=(enabled=false)",
+ confchk_example_test, 7},
{"poc_test",
"cache_size_mb=0,duration_seconds=0,enable_logging=true,"
- "runtime_monitor=(rate_per_second=1,"
+ "runtime_monitor=(enabled=false,interval=s,op_count=1,"
"stat_cache_size=(enabled=false,limit=)),"
- "timestamp_manager=(enabled=false,oldest_lag=0,stable_lag=0),"
- "workload_generator=(collection_count=1,"
- "insert_config=(key_format=i,key_size=0,value_format=S,"
- "value_size=0),insert_threads=0,key_count=0,key_format=i,"
- "key_size=0,ops_per_transaction=(max=1,min=),read_threads=0,"
- "update_config=(key_format=i,key_size=0,value_format=S,"
- "value_size=0),update_threads=0,value_format=S,value_size=0),"
+ "timestamp_manager=(enabled=false,interval=s,oldest_lag=0,"
+ "op_count=1,stable_lag=0),workload_generator=(collection_count=1,"
+ "enabled=false,insert_config=(interval=s,key_size=0,op_count=1,"
+ "value_size=0),insert_threads=0,interval=s,interval=s,key_count=0"
+ ",key_size=0,op_count=1,op_count=1,ops_per_transaction=(max=1,"
+ "min=),read_threads=0,update_config=(interval=s,key_size=0,"
+ "op_count=1,value_size=0),update_threads=0,value_size=0),"
"workload_tracking=(enabled=false)",
confchk_poc_test, 7},
{NULL, NULL, NULL, 0}};
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index 17e40ef84e6..47a28e016f2 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -664,116 +664,6 @@ __wt_conn_remove_extractor(WT_SESSION_IMPL *session)
}
/*
- * __tiered_confchk --
- * Check for a valid tiered storage source.
- */
-static int
-__tiered_confchk(
- WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cname, WT_NAMED_STORAGE_SOURCE **nstoragep)
-{
- WT_CONNECTION_IMPL *conn;
- WT_NAMED_STORAGE_SOURCE *nstorage;
-
- *nstoragep = NULL;
-
- if (cname->len == 0 || WT_STRING_MATCH("none", cname->str, cname->len))
- return (0);
-
- conn = S2C(session);
- TAILQ_FOREACH (nstorage, &conn->storagesrcqh, q)
- if (WT_STRING_MATCH(nstorage->name, cname->str, cname->len)) {
- *nstoragep = nstorage;
- return (0);
- }
- WT_RET_MSG(session, EINVAL, "unknown storage source '%.*s'", (int)cname->len, cname->str);
-}
-
-/*
- * __wt_tiered_bucket_config --
- * Given a configuration, configure the bucket storage.
- */
-int
-__wt_tiered_bucket_config(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, WT_CONFIG_ITEM *bucket,
- WT_BUCKET_STORAGE **bstoragep)
-{
- WT_BUCKET_STORAGE *bstorage, *new;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_NAMED_STORAGE_SOURCE *nstorage;
-#if 0
- WT_STORAGE_SOURCE *custom, *storage;
-#else
- WT_STORAGE_SOURCE *storage;
-#endif
- uint64_t hash_bucket, hash;
-
- *bstoragep = NULL;
-
- bstorage = new = NULL;
- conn = S2C(session);
-
- __wt_spin_lock(session, &conn->storage_lock);
-
- WT_ERR(__tiered_confchk(session, cval, &nstorage));
- if (nstorage == NULL) {
- if (bucket->len != 0)
- WT_ERR_MSG(
- session, EINVAL, "tiered_storage.bucket requires tiered_storage.name to be set");
- goto out;
- }
-
- /*
- * Check if tiered storage is set on the connection. If someone wants tiered storage on a table,
- * it needs to be configured on the database as well.
- */
- if (conn->bstorage == NULL && bstoragep != &conn->bstorage)
- WT_ERR_MSG(
- session, EINVAL, "table tiered storage requires connection tiered storage to be set");
- hash = __wt_hash_city64(bucket->str, bucket->len);
- hash_bucket = hash & (conn->hash_size - 1);
- TAILQ_FOREACH (bstorage, &nstorage->buckethashqh[hash_bucket], q)
- if (WT_STRING_MATCH(bstorage->bucket, bucket->str, bucket->len))
- goto out;
-
- WT_ERR(__wt_calloc_one(session, &new));
- WT_ERR(__wt_strndup(session, bucket->str, bucket->len, &new->bucket));
- storage = nstorage->storage_source;
-#if 0
- if (storage->customize != NULL) {
- custom = NULL;
- WT_ERR(storage->customize(storage, &session->iface, cfg_arg, &custom));
- if (custom != NULL) {
- bstorage->owned = 1;
- storage = custom;
- }
- }
-#endif
- new->storage_source = storage;
- if (bstorage != NULL) {
- new->object_size = bstorage->object_size;
- new->retain_secs = bstorage->retain_secs;
- WT_ERR(__wt_strdup(session, bstorage->auth_token, &new->auth_token));
- }
- TAILQ_INSERT_HEAD(&nstorage->bucketqh, new, q);
- TAILQ_INSERT_HEAD(&nstorage->buckethashqh[hash_bucket], new, hashq);
- F_SET(new, WT_BUCKET_FREE);
-
-out:
- __wt_spin_unlock(session, &conn->storage_lock);
- *bstoragep = new;
- return (0);
-
-err:
- if (bstorage != NULL) {
- __wt_free(session, new->auth_token);
- __wt_free(session, new->bucket);
- __wt_free(session, new);
- }
- __wt_spin_unlock(session, &conn->storage_lock);
- return (ret);
-}
-
-/*
* __conn_add_storage_source --
* WT_CONNECTION->add_storage_source method.
*/
@@ -864,10 +754,6 @@ __wt_conn_remove_storage_source(WT_SESSION_IMPL *session)
while ((bstorage = TAILQ_FIRST(&nstorage->bucketqh)) != NULL) {
/* Remove from the connection's list, free memory. */
TAILQ_REMOVE(&nstorage->bucketqh, bstorage, q);
- storage = bstorage->storage_source;
- WT_ASSERT(session, storage != NULL);
- if (bstorage->owned && storage->terminate != NULL)
- WT_TRET(storage->terminate(storage, (WT_SESSION *)session));
__wt_free(session, bstorage->auth_token);
__wt_free(session, bstorage->bucket);
__wt_free(session, bstorage);
@@ -888,6 +774,25 @@ __wt_conn_remove_storage_source(WT_SESSION_IMPL *session)
}
/*
+ * __conn_ext_file_system_get --
+ * WT_EXTENSION.file_system_get method. Get file system in use.
+ */
+static int
+__conn_ext_file_system_get(
+ WT_EXTENSION_API *wt_api, WT_SESSION *session, WT_FILE_SYSTEM **file_system)
+{
+ WT_FILE_SYSTEM *fs;
+
+ WT_UNUSED(session);
+
+ fs = ((WT_CONNECTION_IMPL *)wt_api->conn)->file_system;
+ if (fs == NULL)
+ return (WT_NOTFOUND);
+ *file_system = fs;
+ return (0);
+}
+
+/*
* __conn_get_extension_api --
* WT_CONNECTION.get_extension_api method.
*/
@@ -911,6 +816,7 @@ __conn_get_extension_api(WT_CONNECTION *wt_conn)
conn->extension_api.config_get_string = __wt_ext_config_get_string;
conn->extension_api.config_parser_open = __wt_ext_config_parser_open;
conn->extension_api.config_parser_open_arg = __wt_ext_config_parser_open_arg;
+ conn->extension_api.file_system_get = __conn_ext_file_system_get;
conn->extension_api.metadata_insert = __wt_ext_metadata_insert;
conn->extension_api.metadata_remove = __wt_ext_metadata_remove;
conn->extension_api.metadata_search = __wt_ext_metadata_search;
@@ -2898,6 +2804,12 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, const char *c
WT_ERR(__conn_load_extensions(session, cfg, false));
/*
+ * Do some early initialization for tiered storage, as this may affect our choice of file system
+ * for some operations.
+ */
+ WT_ERR(__wt_tiered_conn_config(session, cfg, false));
+
+ /*
* The metadata/log encryptor is configured after extensions, since
* extensions may load encryptors. We have to do this before creating
* the metadata file.
@@ -2982,7 +2894,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, const char *c
* FIXME-WT-6682: temporarily disable history store verification.
*/
if (verify_meta) {
- WT_ERR(__wt_open_internal_session(conn, "verify hs", false, 0, &verify_session));
+ WT_ERR(__wt_open_internal_session(conn, "verify hs", false, 0, 0, &verify_session));
ret = __wt_hs_verify(verify_session);
WT_TRET(__wt_session_close_internal(verify_session));
WT_ERR(ret);
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c
index 2634be7105e..53c4d4b1cc1 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache.c
@@ -251,7 +251,7 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[])
WT_RET(__wt_spin_init(session, &cache->evict_queue_lock, "cache eviction queue"));
WT_RET(__wt_spin_init(session, &cache->evict_walk_lock, "cache walk"));
if ((ret = __wt_open_internal_session(
- conn, "evict pass", false, WT_SESSION_NO_DATA_HANDLES, &cache->walk_session)) != 0)
+ conn, "evict pass", false, WT_SESSION_NO_DATA_HANDLES, 0, &cache->walk_session)) != 0)
WT_RET_MSG(NULL, ret, "Failed to create session for eviction walks");
/* Allocate the LRU eviction queue. */
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
index 149d2eac2d6..d7a317fd09c 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
@@ -241,7 +241,7 @@ __wt_conn_cache_pool_open(WT_SESSION_IMPL *session)
*/
session_flags = WT_SESSION_NO_DATA_HANDLES;
if ((ret = __wt_open_internal_session(
- conn, "cache-pool", false, session_flags, &cache->cp_session)) != 0)
+ conn, "cache-pool", false, session_flags, 0, &cache->cp_session)) != 0)
WT_RET_MSG(NULL, ret, "Failed to create session for cache pool");
/*
diff --git a/src/third_party/wiredtiger/src/conn/conn_capacity.c b/src/third_party/wiredtiger/src/conn/conn_capacity.c
index 3c453a79ede..a12ccec9147 100644
--- a/src/third_party/wiredtiger/src/conn/conn_capacity.c
+++ b/src/third_party/wiredtiger/src/conn/conn_capacity.c
@@ -134,7 +134,8 @@ __capacity_server_start(WT_CONNECTION_IMPL *conn)
/*
* The capacity server gets its own session.
*/
- WT_RET(__wt_open_internal_session(conn, "capacity-server", false, 0, &conn->capacity_session));
+ WT_RET(
+ __wt_open_internal_session(conn, "capacity-server", false, 0, 0, &conn->capacity_session));
session = conn->capacity_session;
WT_RET(__wt_cond_alloc(session, "capacity server", &conn->capacity_cond));
diff --git a/src/third_party/wiredtiger/src/conn/conn_ckpt.c b/src/third_party/wiredtiger/src/conn/conn_ckpt.c
index 8a1f599a18d..7ac53585134 100644
--- a/src/third_party/wiredtiger/src/conn/conn_ckpt.c
+++ b/src/third_party/wiredtiger/src/conn/conn_ckpt.c
@@ -144,7 +144,7 @@ __ckpt_server_start(WT_CONNECTION_IMPL *conn)
*/
session_flags = WT_SESSION_CAN_WAIT;
WT_RET(__wt_open_internal_session(
- conn, "checkpoint-server", true, session_flags, &conn->ckpt_session));
+ conn, "checkpoint-server", true, session_flags, 0, &conn->ckpt_session));
session = conn->ckpt_session;
WT_RET(__wt_cond_alloc(session, "checkpoint server", &conn->ckpt_cond));
diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c
index 73180a119f5..0c39475b207 100644
--- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c
+++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c
@@ -102,6 +102,9 @@ __conn_dhandle_config_set(WT_SESSION_IMPL *session)
case WT_DHANDLE_TYPE_TIERED:
WT_ERR(__wt_strdup(session, WT_CONFIG_BASE(session, tiered_meta), &dhandle->cfg[0]));
break;
+ case WT_DHANDLE_TYPE_TIERED_TREE:
+ WT_ERR(__wt_strdup(session, WT_CONFIG_BASE(session, tier_meta), &dhandle->cfg[0]));
+ break;
}
dhandle->cfg[1] = metaconf;
dhandle->meta_base = base;
@@ -133,6 +136,9 @@ __conn_dhandle_destroy(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle)
case WT_DHANDLE_TYPE_TIERED:
ret = __wt_tiered_close(session, (WT_TIERED *)dhandle);
break;
+ case WT_DHANDLE_TYPE_TIERED_TREE:
+ ret = __wt_tiered_tree_close(session, (WT_TIERED_TREE *)dhandle);
+ break;
}
__wt_rwlock_destroy(session, &dhandle->rwlock);
@@ -157,6 +163,7 @@ __wt_conn_dhandle_alloc(WT_SESSION_IMPL *session, const char *uri, const char *c
WT_DECL_RET;
WT_TABLE *table;
WT_TIERED *tiered;
+ WT_TIERED_TREE *tiered_tree;
uint64_t bucket;
/*
@@ -172,6 +179,10 @@ __wt_conn_dhandle_alloc(WT_SESSION_IMPL *session, const char *uri, const char *c
WT_RET(__wt_calloc_one(session, &table));
dhandle = (WT_DATA_HANDLE *)table;
dhandle->type = WT_DHANDLE_TYPE_TABLE;
+ } else if (WT_PREFIX_MATCH(uri, "tier:")) {
+ WT_RET(__wt_calloc_one(session, &tiered_tree));
+ dhandle = (WT_DATA_HANDLE *)tiered_tree;
+ dhandle->type = WT_DHANDLE_TYPE_TIERED_TREE;
} else if (WT_PREFIX_MATCH(uri, "tiered:")) {
WT_RET(__wt_calloc_one(session, &tiered));
dhandle = (WT_DATA_HANDLE *)tiered;
@@ -234,7 +245,7 @@ __wt_conn_dhandle_find(WT_SESSION_IMPL *session, const char *uri, const char *ch
conn = S2C(session);
/* We must be holding the handle list lock at a higher level. */
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST));
bucket = __wt_hash_city64(uri, strlen(uri)) & (conn->dh_hash_size - 1);
if (checkpoint == NULL) {
@@ -301,9 +312,9 @@ __wt_conn_dhandle_close(WT_SESSION_IMPL *session, bool final, bool mark_dead)
* schema lock we might deadlock with a thread that has the schema lock and wants a handle lock.
*/
no_schema_lock = false;
- if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) {
+ if (!FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA)) {
no_schema_lock = true;
- F_SET(session, WT_SESSION_NO_SCHEMA_LOCK);
+ FLD_SET(session->lock_flags, WT_SESSION_NO_SCHEMA_LOCK);
}
/*
@@ -378,6 +389,9 @@ __wt_conn_dhandle_close(WT_SESSION_IMPL *session, bool final, bool mark_dead)
case WT_DHANDLE_TYPE_TIERED:
WT_TRET(__wt_tiered_close(session, (WT_TIERED *)dhandle));
break;
+ case WT_DHANDLE_TYPE_TIERED_TREE:
+ WT_TRET(__wt_tiered_tree_close(session, (WT_TIERED_TREE *)dhandle));
+ break;
}
/*
@@ -415,7 +429,7 @@ err:
__wt_spin_unlock(session, &dhandle->close_lock);
if (no_schema_lock)
- F_CLR(session, WT_SESSION_NO_SCHEMA_LOCK);
+ FLD_CLR(session->lock_flags, WT_SESSION_NO_SCHEMA_LOCK);
if (is_btree)
__wt_evict_file_exclusive_off(session);
@@ -536,6 +550,9 @@ __wt_conn_dhandle_open(WT_SESSION_IMPL *session, const char *cfg[], uint32_t fla
case WT_DHANDLE_TYPE_TIERED:
WT_ERR(__wt_tiered_open(session, cfg));
break;
+ case WT_DHANDLE_TYPE_TIERED_TREE:
+ WT_ERR(__wt_tiered_tree_open(session, cfg));
+ break;
}
/*
@@ -756,7 +773,7 @@ __wt_conn_dhandle_close_all(WT_SESSION_IMPL *session, const char *uri, bool remo
conn = S2C(session);
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
WT_ASSERT(session, session->dhandle == NULL);
/*
@@ -795,7 +812,7 @@ __conn_dhandle_remove(WT_SESSION_IMPL *session, bool final)
dhandle = session->dhandle;
bucket = dhandle->name_hash & (conn->dh_hash_size - 1);
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
WT_ASSERT(session, dhandle != conn->cache->walk_tree);
/* Check if the handle was reacquired by a session while we waited. */
@@ -833,7 +850,7 @@ __wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, bool final, bool mark
* Kludge: interrupt the eviction server in case it is holding the handle list lock.
*/
set_pass_intr = false;
- if (!F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) {
+ if (!FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST)) {
set_pass_intr = true;
(void)__wt_atomic_addv32(&S2C(session)->cache->pass_intr, 1);
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c
index d913b251050..532975fc571 100644
--- a/src/third_party/wiredtiger/src/conn/conn_log.c
+++ b/src/third_party/wiredtiger/src/conn/conn_log.c
@@ -54,7 +54,7 @@ __logmgr_force_archive(WT_SESSION_IMPL *session, uint32_t lognum)
log = conn->log;
sleep_usecs = yield_cnt = 0;
- WT_RET(__wt_open_internal_session(conn, "compatibility-reconfig", true, 0, &tmp_session));
+ WT_RET(__wt_open_internal_session(conn, "compatibility-reconfig", true, 0, 0, &tmp_session));
while (log->first_lsn.l.file < lognum) {
/*
* Force a checkpoint to be written in the new log file and force the archiving of all
@@ -1043,7 +1043,7 @@ __wt_logmgr_open(WT_SESSION_IMPL *session)
*/
session_flags = WT_SESSION_NO_DATA_HANDLES;
WT_RET(__wt_open_internal_session(
- conn, "log-close-server", false, session_flags, &conn->log_file_session));
+ conn, "log-close-server", false, session_flags, 0, &conn->log_file_session));
WT_RET(__wt_cond_alloc(conn->log_file_session, "log close server", &conn->log_file_cond));
/*
@@ -1058,7 +1058,7 @@ __wt_logmgr_open(WT_SESSION_IMPL *session)
* runs.
*/
WT_RET(__wt_open_internal_session(
- conn, "log-wrlsn-server", false, session_flags, &conn->log_wrlsn_session));
+ conn, "log-wrlsn-server", false, session_flags, 0, &conn->log_wrlsn_session));
WT_RET(__wt_cond_auto_alloc(
conn->log_wrlsn_session, "log write lsn server", 10000, WT_MILLION, &conn->log_wrlsn_cond));
WT_RET(__wt_thread_create(
@@ -1076,8 +1076,8 @@ __wt_logmgr_open(WT_SESSION_IMPL *session)
__wt_cond_signal(session, conn->log_cond);
} else {
/* The log server gets its own session. */
- WT_RET(
- __wt_open_internal_session(conn, "log-server", false, session_flags, &conn->log_session));
+ WT_RET(__wt_open_internal_session(
+ conn, "log-server", false, session_flags, 0, &conn->log_session));
WT_RET(__wt_cond_auto_alloc(
conn->log_session, "log server", 50000, WT_MILLION, &conn->log_cond));
diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c
index 0fa1a53b629..20467b4228b 100644
--- a/src/third_party/wiredtiger/src/conn/conn_open.c
+++ b/src/third_party/wiredtiger/src/conn/conn_open.c
@@ -28,7 +28,7 @@ __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[])
* Open the default session. We open this before starting service threads because those may
* allocate and use session resources that need to get cleaned up on close.
*/
- WT_RET(__wt_open_internal_session(conn, "connection", false, 0, &session));
+ WT_RET(__wt_open_internal_session(conn, "connection", false, 0, 0, &session));
/*
* The connection's default session is originally a static structure, swap that out for a more
diff --git a/src/third_party/wiredtiger/src/conn/conn_stat.c b/src/third_party/wiredtiger/src/conn/conn_stat.c
index 64e67b1acf1..a0473f24d78 100644
--- a/src/third_party/wiredtiger/src/conn/conn_stat.c
+++ b/src/third_party/wiredtiger/src/conn/conn_stat.c
@@ -617,7 +617,7 @@ __statlog_start(WT_CONNECTION_IMPL *conn)
FLD_SET(conn->server_flags, WT_CONN_SERVER_STATISTICS);
/* The statistics log server gets its own session. */
- WT_RET(__wt_open_internal_session(conn, "statlog-server", true, 0, &conn->stat_session));
+ WT_RET(__wt_open_internal_session(conn, "statlog-server", true, 0, 0, &conn->stat_session));
session = conn->stat_session;
WT_RET(__wt_cond_alloc(session, "statistics log server", &conn->stat_cond));
diff --git a/src/third_party/wiredtiger/src/conn/conn_sweep.c b/src/third_party/wiredtiger/src/conn/conn_sweep.c
index 7abd47d626e..90f050e161d 100644
--- a/src/third_party/wiredtiger/src/conn/conn_sweep.c
+++ b/src/third_party/wiredtiger/src/conn/conn_sweep.c
@@ -388,8 +388,8 @@ __wt_sweep_create(WT_SESSION_IMPL *session)
* manager. Sweep should not block due to the cache being full.
*/
session_flags = WT_SESSION_CAN_WAIT | WT_SESSION_IGNORE_CACHE_SIZE;
- WT_RET(
- __wt_open_internal_session(conn, "sweep-server", true, session_flags, &conn->sweep_session));
+ WT_RET(__wt_open_internal_session(
+ conn, "sweep-server", true, session_flags, 0, &conn->sweep_session));
session = conn->sweep_session;
WT_RET(__wt_cond_alloc(session, "handle sweep server", &conn->sweep_cond));
diff --git a/src/third_party/wiredtiger/src/conn/conn_tiered.c b/src/third_party/wiredtiger/src/conn/conn_tiered.c
index 2c0d95542ce..4d8a2ab5958 100644
--- a/src/third_party/wiredtiger/src/conn/conn_tiered.c
+++ b/src/third_party/wiredtiger/src/conn/conn_tiered.c
@@ -23,18 +23,46 @@
* __flush_tier_once --
* Perform one iteration of tiered storage maintenance.
*/
-static void
+static int
__flush_tier_once(WT_SESSION_IMPL *session, bool force)
{
- WT_UNUSED(session);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ const char *key, *value;
+
WT_UNUSED(force);
+ __wt_verbose(session, WT_VERB_TIERED, "%s", "FLUSH_TIER_ONCE: Called");
/*
* - See if there is any "merging" work to do to prepare and create an object that is
* suitable for placing onto tiered storage.
* - Do the work to create said objects.
* - Move the objects.
*/
- return;
+ cursor = NULL;
+ WT_RET(__wt_metadata_cursor(session, &cursor));
+ while (cursor->next(cursor) == 0) {
+ cursor->get_key(cursor, &key);
+ cursor->get_value(cursor, &value);
+ /* For now just switch tiers which just does metadata manipulation. */
+ if (WT_PREFIX_MATCH(key, "tiered:")) {
+ __wt_verbose(session, WT_VERB_TIERED, "FLUSH_TIER_ONCE: %s %s", key, value);
+ WT_ERR(__wt_session_get_dhandle(session, key, NULL, NULL, WT_DHANDLE_EXCLUSIVE));
+ /*
+ * When we call wt_tiered_switch the session->dhandle points to the tiered: entry and
+ * the arg is the config string that is currently in the metadata.
+ */
+ WT_ERR(__wt_tiered_switch(session, value));
+ WT_ERR(__wt_session_release_dhandle(session));
+ }
+ }
+ WT_ERR(__wt_metadata_cursor_release(session, &cursor));
+
+ return (0);
+
+err:
+ WT_TRET(__wt_session_release_dhandle(session));
+ WT_TRET(__wt_metadata_cursor_release(session, &cursor));
+ return (ret);
}
/*
@@ -92,6 +120,24 @@ err:
}
/*
+ * __tier_storage_copy --
+ * Perform one iteration of copying newly flushed objects to the shared storage.
+ */
+static int
+__tier_storage_copy(WT_SESSION_IMPL *session)
+{
+ /*
+ * Walk the work queue and copy file:<name> to shared storage object:<name>. Walk a tiered
+ * table's tiers array and copy it to any tier that allows WT_TIERS_OP_FLUSH.
+ */
+ /* XXX: We don't want to call this here, it is just to quiet the compiler that this function
+ * can return NULL. So it is a placeholder until we have real content here.
+ */
+ WT_RET(__tier_storage_remove_local(session, NULL, 0));
+ return (0);
+}
+
+/*
* __tier_storage_remove --
* Perform one iteration of tiered storage local tier removal.
*/
@@ -117,11 +163,12 @@ int
__wt_flush_tier(WT_SESSION_IMPL *session, const char *config)
{
WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
const char *cfg[3];
bool force;
WT_STAT_CONN_INCR(session, flush_tier);
- if (FLD_ISSET(S2C(session)->server_flags, WT_CONN_SERVER_TIERED))
+ if (FLD_ISSET(S2C(session)->server_flags, WT_CONN_SERVER_TIERED_MGR))
WT_RET_MSG(
session, EINVAL, "Cannot call flush_tier when storage manager thread is configured");
@@ -131,8 +178,8 @@ __wt_flush_tier(WT_SESSION_IMPL *session, const char *config)
WT_RET(__wt_config_gets(session, cfg, "force", &cval));
force = cval.val != 0;
- __flush_tier_once(session, force);
- return (0);
+ WT_WITH_SCHEMA_LOCK(session, ret = __flush_tier_once(session, force));
+ return (ret);
}
/*
@@ -147,7 +194,7 @@ __tiered_manager_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp)
WT_TIERED_MANAGER *mgr;
conn = S2C(session);
- mgr = &conn->tiered_manager;
+ mgr = &conn->tiered_mgr;
/* Only start the server if wait time is non-zero */
WT_RET(__wt_config_gets(session, cfg, "tiered_manager.wait", &cval));
@@ -171,92 +218,79 @@ __tiered_manager_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp)
}
/*
- * __wt_tiered_common_config --
- * Parse configuration options common to connection and btrees.
+ * __tiered_server_run_chk --
+ * Check to decide if the tiered storage server should continue running.
*/
-int
-__wt_tiered_common_config(WT_SESSION_IMPL *session, const char **cfg, WT_BUCKET_STORAGE *bstorage)
+static bool
+__tiered_server_run_chk(WT_SESSION_IMPL *session)
{
- WT_CONFIG_ITEM cval;
-
- WT_RET(__wt_config_gets(session, cfg, "tiered_storage.local_retention", &cval));
- bstorage->retain_secs = (uint64_t)cval.val;
-
- WT_RET(__wt_config_gets(session, cfg, "tiered_storage.object_target_size", &cval));
- bstorage->object_size = (uint64_t)cval.val;
-
- WT_RET(__wt_config_gets(session, cfg, "tiered_storage.auth_token", &cval));
- /*
- * This call is purposely the last configuration processed so we don't need memory management
- * code and an error label to free it. Note this if any code is added after this line.
- */
- WT_RET(__wt_strndup(session, cval.str, cval.len, &bstorage->auth_token));
- return (0);
+ return (FLD_ISSET(S2C(session)->server_flags, WT_CONN_SERVER_TIERED));
}
/*
- * __tiered_config --
- * Parse and setup the storage server options.
+ * __tiered_server --
+ * The tiered storage server thread.
*/
-static int
-__tiered_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp, bool reconfig)
+static WT_THREAD_RET
+__tiered_server(void *arg)
{
- WT_CONFIG_ITEM bucket, cval;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ WT_ITEM path, tmp;
+ WT_SESSION_IMPL *session;
+ session = arg;
conn = S2C(session);
- if (!reconfig) {
- WT_RET(__wt_config_gets(session, cfg, "tiered_storage.name", &cval));
- WT_RET(__wt_config_gets(session, cfg, "tiered_storage.bucket", &bucket));
- WT_RET(__wt_tiered_bucket_config(session, &cval, &bucket, &conn->bstorage));
- }
- /* If the connection is not set up for tiered storage there is nothing more to do. */
- if (conn->bstorage == NULL)
- return (0);
+ WT_CLEAR(path);
+ WT_CLEAR(tmp);
- WT_ASSERT(session, conn->bstorage != NULL);
- WT_RET(__wt_tiered_common_config(session, cfg, conn->bstorage));
- WT_STAT_CONN_SET(session, tiered_object_size, conn->bstorage->object_size);
- WT_STAT_CONN_SET(session, tiered_retention, conn->bstorage->retain_secs);
+ for (;;) {
+ /* Wait until the next event. */
+ __wt_cond_wait(session, conn->tiered_cond, WT_MINUTE, __tiered_server_run_chk);
- /* The strings for unique identification are connection level not per bucket. */
- WT_RET(__wt_config_gets(session, cfg, "tiered_storage.cluster", &cval));
- WT_ERR(__wt_strndup(session, cval.str, cval.len, &conn->tiered_cluster));
- WT_ERR(__wt_config_gets(session, cfg, "tiered_storage.member", &cval));
- WT_ERR(__wt_strndup(session, cval.str, cval.len, &conn->tiered_member));
+ /* Check if we're quitting or being reconfigured. */
+ if (!__tiered_server_run_chk(session))
+ break;
- return (__tiered_manager_config(session, cfg, runp));
+ /*
+ * Here is where we do work. Work we expect to do:
+ * - Copy any files that need moving from a flush tier call.
+ * - Remove any cached objects that are aged out.
+ */
+ WT_ERR(__tier_storage_copy(session));
+ WT_ERR(__tier_storage_remove(session, false));
+ }
+
+ if (0) {
err:
- __wt_free(session, conn->bstorage->auth_token);
- __wt_free(session, conn->bstorage->bucket);
- __wt_free(session, conn->bstorage);
- __wt_free(session, conn->tiered_cluster);
- __wt_free(session, conn->tiered_member);
- return (ret);
+ WT_IGNORE_RET(__wt_panic(session, ret, "storage server error"));
+ }
+ __wt_buf_free(session, &path);
+ __wt_buf_free(session, &tmp);
+ return (WT_THREAD_RET_VALUE);
}
/*
- * __tiered_server_run_chk --
- * Check to decide if the tiered storage server should continue running.
+ * __tiered_mgr_run_chk --
+ * Check to decide if the tiered storage manager should continue running.
*/
static bool
-__tiered_server_run_chk(WT_SESSION_IMPL *session)
+__tiered_mgr_run_chk(WT_SESSION_IMPL *session)
{
WT_CONNECTION_IMPL *conn;
conn = S2C(session);
- return ((FLD_ISSET(conn->server_flags, WT_CONN_SERVER_TIERED)) &&
- !F_ISSET(&conn->tiered_manager, WT_TIERED_MANAGER_SHUTDOWN));
+ return ((FLD_ISSET(conn->server_flags, WT_CONN_SERVER_TIERED_MGR)) &&
+ !F_ISSET(&conn->tiered_mgr, WT_TIERED_MANAGER_SHUTDOWN));
}
/*
- * __tiered_server --
- * The tiered storage server thread.
+ * __tiered_mgr_server --
+ * The tiered storage manager thread.
*/
static WT_THREAD_RET
-__tiered_server(void *arg)
+__tiered_mgr_server(void *arg)
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
@@ -266,22 +300,21 @@ __tiered_server(void *arg)
session = arg;
conn = S2C(session);
- mgr = &conn->tiered_manager;
+ mgr = &conn->tiered_mgr;
WT_CLEAR(path);
WT_CLEAR(tmp);
for (;;) {
/* Wait until the next event. */
- __wt_cond_wait(session, conn->tiered_cond, mgr->wait_usecs, __tiered_server_run_chk);
+ __wt_cond_wait(session, conn->tiered_mgr_cond, mgr->wait_usecs, __tiered_mgr_run_chk);
/* Check if we're quitting or being reconfigured. */
- if (!__tiered_server_run_chk(session))
+ if (!__tiered_mgr_run_chk(session))
break;
/*
* Here is where we do work. Work we expect to do:
- *
*/
__flush_tier_once(session, false);
WT_ERR(__tier_storage_remove(session, false));
@@ -295,10 +328,31 @@ err:
__wt_buf_free(session, &tmp);
return (WT_THREAD_RET_VALUE);
}
+/*
+ * __tiered_mgr_start --
+ * Start the tiered manager flush thread.
+ */
+static int
+__tiered_mgr_start(WT_CONNECTION_IMPL *conn)
+{
+ WT_SESSION_IMPL *session;
+
+ FLD_SET(conn->server_flags, WT_CONN_SERVER_TIERED_MGR);
+ WT_RET(__wt_open_internal_session(
+ conn, "storage-mgr-server", true, 0, 0, &conn->tiered_mgr_session));
+ session = conn->tiered_mgr_session;
+
+ WT_RET(__wt_cond_alloc(session, "storage server", &conn->tiered_mgr_cond));
+
+ /* Start the thread. */
+ WT_RET(__wt_thread_create(session, &conn->tiered_mgr_tid, __tiered_mgr_server, session));
+ conn->tiered_mgr_tid_set = true;
+ return (0);
+}
/*
* __wt_tiered_storage_create --
- * Start the tiered storage server thread.
+ * Start the tiered storage subsystem.
*/
int
__wt_tiered_storage_create(WT_SESSION_IMPL *session, const char *cfg[], bool reconfig)
@@ -312,14 +366,14 @@ __wt_tiered_storage_create(WT_SESSION_IMPL *session, const char *cfg[], bool rec
/* Destroy any existing thread since we could be a reconfigure. */
WT_RET(__wt_tiered_storage_destroy(session));
- WT_RET(__tiered_config(session, cfg, &start, reconfig));
- if (!start)
- return (0);
+ if (reconfig)
+ WT_RET(__wt_tiered_conn_config(session, cfg, reconfig));
+ WT_RET(__tiered_manager_config(session, cfg, &start));
- /* Set first, the thread might run before we finish up. */
+ /* Start the internal thread. */
FLD_SET(conn->server_flags, WT_CONN_SERVER_TIERED);
- WT_ERR(__wt_open_internal_session(conn, "storage-server", true, 0, &conn->tiered_session));
+ WT_ERR(__wt_open_internal_session(conn, "storage-server", true, 0, 0, &conn->tiered_session));
session = conn->tiered_session;
WT_ERR(__wt_cond_alloc(session, "storage server", &conn->tiered_cond));
@@ -328,6 +382,10 @@ __wt_tiered_storage_create(WT_SESSION_IMPL *session, const char *cfg[], bool rec
WT_ERR(__wt_thread_create(session, &conn->tiered_tid, __tiered_server, session));
conn->tiered_tid_set = true;
+ /* After starting non-configurable threads, start the tiered manager if needed. */
+ if (start)
+ WT_ERR(__tiered_mgr_start(conn));
+
if (0) {
err:
WT_TRET(__wt_tiered_storage_destroy(session));
@@ -346,23 +404,32 @@ __wt_tiered_storage_destroy(WT_SESSION_IMPL *session)
WT_DECL_RET;
conn = S2C(session);
- __wt_free(session, conn->tiered_cluster);
- __wt_free(session, conn->tiered_member);
- /* Stop the server thread. */
- FLD_CLR(conn->server_flags, WT_CONN_SERVER_TIERED);
+ /* Stop the internal server thread. */
+ FLD_CLR(conn->server_flags, WT_CONN_SERVER_TIERED | WT_CONN_SERVER_TIERED_MGR);
if (conn->tiered_tid_set) {
__wt_cond_signal(session, conn->tiered_cond);
WT_TRET(__wt_thread_join(session, &conn->tiered_tid));
conn->tiered_tid_set = false;
}
__wt_cond_destroy(session, &conn->tiered_cond);
-
- /* Close the server thread's session. */
if (conn->tiered_session != NULL) {
WT_TRET(__wt_session_close_internal(conn->tiered_session));
conn->tiered_session = NULL;
}
+ /* Stop the storage manager thread. */
+ if (conn->tiered_mgr_tid_set) {
+ __wt_cond_signal(session, conn->tiered_mgr_cond);
+ WT_TRET(__wt_thread_join(session, &conn->tiered_mgr_tid));
+ conn->tiered_mgr_tid_set = false;
+ }
+ __wt_cond_destroy(session, &conn->tiered_mgr_cond);
+
+ if (conn->tiered_mgr_session != NULL) {
+ WT_TRET(__wt_session_close_internal(conn->tiered_mgr_session));
+ conn->tiered_mgr_session = NULL;
+ }
+
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c
index 28ffa72c6c8..2d7be4f4af7 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_backup.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c
@@ -335,10 +335,12 @@ __backup_add_id(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval)
blk = NULL;
for (i = 0; i < WT_BLKINCR_MAX; ++i) {
blk = &conn->incr_backups[i];
- __wt_verbose(session, WT_VERB_BACKUP, "blk[%u] flags 0x%" PRIx64, i, blk->flags);
/* If it isn't already in use, we can use it. */
- if (!F_ISSET(blk, WT_BLKINCR_INUSE))
+ if (!F_ISSET(blk, WT_BLKINCR_INUSE)) {
+ __wt_verbose(session, WT_VERB_BACKUP, "Free blk[%u] entry", i);
break;
+ }
+ __wt_verbose(session, WT_VERB_BACKUP, "Entry blk[%u] has flags 0x%" PRIx64, i, blk->flags);
}
/*
* We didn't find an entry. This should not happen.
@@ -364,11 +366,12 @@ __backup_add_id(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval)
/*
* If we don't find any checkpoint, backup files need to be full copy.
*/
- __wt_verbose(session, WT_VERB_BACKUP, "ID %s: Did not find any metadata checkpoint for %s.",
- blk->id_str, WT_METAFILE_URI);
+ __wt_verbose(session, WT_VERB_BACKUP,
+ "Backup id %s: Did not find any metadata checkpoint for %s.", blk->id_str,
+ WT_METAFILE_URI);
F_SET(blk, WT_BLKINCR_FULL);
} else {
- __wt_verbose(session, WT_VERB_BACKUP, "Using backup slot %u for id %s", i, blk->id_str);
+ __wt_verbose(session, WT_VERB_BACKUP, "Backup id %s using backup slot %u", blk->id_str, i);
F_CLR(blk, WT_BLKINCR_FULL);
}
F_SET(blk, WT_BLKINCR_VALID);
@@ -402,11 +405,12 @@ __backup_find_id(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, WT_BLKINCR **in
WT_RET_MSG(session, EINVAL, "Incremental backup structure already in use");
if (incrp != NULL)
*incrp = blk;
- __wt_verbose(session, WT_VERB_BACKUP, "Found backup slot %u for id %s", i, blk->id_str);
+ __wt_verbose(
+ session, WT_VERB_BACKUP, "Found src id %s at backup slot %u", blk->id_str, i);
return (0);
}
}
- __wt_verbose(session, WT_VERB_BACKUP, "Did not find %.*s", (int)cval->len, cval->str);
+ __wt_verbose(session, WT_VERB_BACKUP, "Search %.*s not found", (int)cval->len, cval->str);
return (WT_NOTFOUND);
}
@@ -474,6 +478,8 @@ __backup_config(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *cfg[
if (conn->incr_granularity != 0)
WT_RET_MSG(session, EINVAL, "Cannot change the incremental backup granularity");
conn->incr_granularity = (uint64_t)cval.val;
+ __wt_verbose(session, WT_VERB_BACKUP, "Backup config set granularity value %" PRIu64,
+ conn->incr_granularity);
}
/* Granularity can only be set once at the beginning */
F_SET(conn, WT_CONN_INCR_BACKUP);
diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c b/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c
index 1a1db66520e..9b31214a0ee 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c
@@ -70,6 +70,12 @@ __curbackup_incr_blkmod(WT_SESSION_IMPL *session, WT_BTREE *btree, WT_CURSOR_BAC
cb->nbits = (uint64_t)b.val;
WT_ERR(__wt_config_subgets(session, &v, "offset", &b));
cb->offset = (uint64_t)b.val;
+
+ __wt_verbose(session, WT_VERB_BACKUP,
+ "Found modified incr block gran %" PRIu64 " nbits %" PRIu64 " offset %" PRIu64,
+ cb->granularity, cb->nbits, cb->offset);
+ __wt_verbose(session, WT_VERB_BACKUP, "Modified incr block config: \"%s\"", config);
+
/*
* The rename configuration string component was added later. So don't error if we don't
* find it in the string. If we don't have it, we're not doing a rename.
@@ -144,6 +150,8 @@ __curbackup_incr_next(WT_CURSOR *cursor)
* incremental cursor below and return WT_NOTFOUND.
*/
F_SET(cb, WT_CURBACKUP_INCR_INIT);
+ __wt_verbose(session, WT_VERB_BACKUP, "Set key WT_BACKUP_FILE %s size %" PRIuMAX,
+ cb->incr_file, (uintmax_t)size);
__wt_cursor_set_key(cursor, 0, size, WT_BACKUP_FILE);
} else {
if (!F_ISSET(cb, WT_CURBACKUP_INCR_INIT)) {
@@ -171,6 +179,8 @@ __curbackup_incr_next(WT_CURSOR *cursor)
if (F_ISSET(cb, WT_CURBACKUP_RENAME) ||
(F_ISSET(cb, WT_CURBACKUP_CKPT_FAKE) && F_ISSET(cb, WT_CURBACKUP_HAS_CB_INFO))) {
WT_ERR(__wt_fs_size(session, cb->incr_file, &size));
+ __wt_verbose(session, WT_VERB_BACKUP,
+ "Set key WT_BACKUP_FILE %s size %" PRIuMAX, cb->incr_file, (uintmax_t)size);
__wt_cursor_set_key(cursor, 0, size, WT_BACKUP_FILE);
goto done;
}
@@ -206,6 +216,9 @@ __curbackup_incr_next(WT_CURSOR *cursor)
WT_ERR(WT_NOTFOUND);
WT_ASSERT(session, cb->granularity != 0);
WT_ASSERT(session, total_len != 0);
+ __wt_verbose(session, WT_VERB_BACKUP,
+ "Set key WT_BACKUP_RANGE %s offset %" PRIu64 " length %" PRIu64, cb->incr_file,
+ cb->offset + cb->granularity * start_bitoff, total_len);
__wt_cursor_set_key(
cursor, cb->offset + cb->granularity * start_bitoff, total_len, WT_BACKUP_RANGE);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_std.c b/src/third_party/wiredtiger/src/cursor/cur_std.c
index 1e5d2a5dec7..19a50939a7a 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_std.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_std.c
@@ -757,18 +757,35 @@ err:
}
/*
+ * __wt_cursor_get_hash --
+ * Get hash value from the given uri.
+ */
+void
+__wt_cursor_get_hash(
+ WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *to_dup, uint64_t *hash_value)
+{
+ if (to_dup != NULL) {
+ WT_ASSERT(session, uri == NULL);
+ *hash_value = to_dup->uri_hash;
+ } else {
+ WT_ASSERT(session, uri != NULL);
+ *hash_value = __wt_hash_city64(uri, strlen(uri));
+ }
+}
+
+/*
* __wt_cursor_cache_get --
* Open a matching cursor from the cache.
*/
int
-__wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *to_dup,
- const char *cfg[], WT_CURSOR **cursorp)
+__wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri, uint64_t hash_value,
+ WT_CURSOR *to_dup, const char *cfg[], WT_CURSOR **cursorp)
{
WT_CONFIG_ITEM cval;
WT_CURSOR *cursor;
WT_CURSOR_BTREE *cbt;
WT_DECL_RET;
- uint64_t bucket, hash_value;
+ uint64_t bucket;
uint32_t overwrite_flag;
bool have_config;
@@ -818,18 +835,8 @@ __wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *to_d
return (WT_NOTFOUND);
}
- /*
- * Caller guarantees that exactly one of the URI and the duplicate cursor is non-NULL.
- */
- if (to_dup != NULL) {
- WT_ASSERT(session, uri == NULL);
+ if (to_dup != NULL)
uri = to_dup->uri;
- hash_value = to_dup->uri_hash;
- } else {
- WT_ASSERT(session, uri != NULL);
- hash_value = __wt_hash_city64(uri, strlen(uri));
- }
-
/*
* Walk through all cursors, if there is a cached cursor that matches uri and configuration, use
* it.
@@ -848,7 +855,8 @@ __wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *to_d
* For these configuration values, there is no difference in the resulting cursor other
* than flag values, so fix them up according to the given configuration.
*/
- F_CLR(cursor, WT_CURSTD_APPEND | WT_CURSTD_RAW | WT_CURSTD_OVERWRITE);
+ F_CLR(cursor,
+ WT_CURSTD_APPEND | WT_CURSTD_PREFIX_SEARCH | WT_CURSTD_RAW | WT_CURSTD_OVERWRITE);
F_SET(cursor, overwrite_flag);
/*
* If this is a btree cursor, clear its read_once flag.
@@ -1052,6 +1060,22 @@ __wt_cursor_reconfigure(WT_CURSOR *cursor, const char *config)
} else
WT_ERR_NOTFOUND_OK(ret, false);
+ /* Set the prefix search near flag. */
+ if ((ret = __wt_config_getones(session, config, "prefix_key", &cval)) == 0) {
+ if (cval.val) {
+ /* Prefix search near configuration can only be used for row-store. */
+ if (WT_CURSOR_RECNO(cursor))
+ WT_ERR_MSG(
+ session, EINVAL, "cannot use prefix key search near for column store formats");
+ if (CUR2BT(cursor)->collator != NULL)
+ WT_ERR_MSG(
+ session, EINVAL, "cannot use prefix key search near with a custom collator");
+ F_SET(cursor, WT_CURSTD_PREFIX_SEARCH);
+ } else
+ F_CLR(cursor, WT_CURSTD_PREFIX_SEARCH);
+ } else
+ WT_ERR_NOTFOUND_OK(ret, false);
+
WT_ERR(__cursor_config_debug(cursor, cfg));
err:
@@ -1113,8 +1137,11 @@ __wt_cursor_init(
session = CUR2S(cursor);
- if (cursor->internal_uri == NULL)
+ if (cursor->internal_uri == NULL) {
+ /* Various cursor code assumes there is an internal URI, so there better be one to set. */
+ WT_ASSERT(session, uri != NULL);
WT_RET(__wt_strdup(session, uri, &cursor->internal_uri));
+ }
/*
* append The append flag is only relevant to column stores.
diff --git a/src/third_party/wiredtiger/src/docs/arch-transaction.dox b/src/third_party/wiredtiger/src/docs/arch-transaction.dox
index d15a3cbb8d5..bb26829f509 100644
--- a/src/third_party/wiredtiger/src/docs/arch-transaction.dox
+++ b/src/third_party/wiredtiger/src/docs/arch-transaction.dox
@@ -156,14 +156,6 @@ chain, it checks the version on the disk image, which is the version that was ch
to disk in the last reconciliation. If it is still invisible, WiredTiger will search the history
store to check if there is a version visible to the reader there.
-The repeated read guarantee under snapshot isolation may break in one case if the timestamps
-committed to the updates are out of order, e.g,
-
-`U@20 -> U@30 -> U@15`
-
-In the above example, reading with timestamp 15 doesn't guarantee to return the third update. In
-some cases, users may read the second update U@30 if it is moved to the history store.
-
@subsection Durability
WiredTiger transactions support commit level durability and checkpoint level durability. An
diff --git a/src/third_party/wiredtiger/src/docs/custom-storage-sources.dox b/src/third_party/wiredtiger/src/docs/custom-storage-sources.dox
index e1a0b10644e..ffa8ef11783 100644
--- a/src/third_party/wiredtiger/src/docs/custom-storage-sources.dox
+++ b/src/third_party/wiredtiger/src/docs/custom-storage-sources.dox
@@ -63,12 +63,10 @@ It must always be provided when WiredTiger is reopened (again, with the ::wiredt
@section storage_examples Storage source examples
-There are two kinds of example code with overlapping functionality.
-A simple, self contained storage source example is in @ex_ref{ex_storage_source.c}.
-This example includes a small demo storage source that is a no-op and
-simply returns. This example also shows how a storage source is configured
-within an application. The second set of examples are in \c ext/storage. These are
-storage source only (no application level code), showing how a storage source
-might be packaged in a loadable shared library.
+An example of a storage source exists in \c ext/storage_sources/local_store/local_store.c.
+This storage source emulates cloud storage by storing all objects on the local file system.
+This example does not include application level code to call it. By default, WiredTiger builds
+it as a loadable shared library, and it can be loaded during a ::wiredtiger_open call as with
+any other extension, and \c local_store can be specified to be used with tiered storage system.
*/
diff --git a/src/third_party/wiredtiger/src/docs/examples.dox b/src/third_party/wiredtiger/src/docs/examples.dox
index 26167ab0631..d5a5102be61 100644
--- a/src/third_party/wiredtiger/src/docs/examples.dox
+++ b/src/third_party/wiredtiger/src/docs/examples.dox
@@ -52,9 +52,6 @@ Shows how to create column-oriented data and access individual columns.
@example ex_stat.c
Shows how to access database and table statistics.
-@example ex_storage_source.c
-Shows how to extend WiredTiger with a custom storage source implementation.
-
@example ex_thread.c
Shows how to access a database with multiple threads.
diff --git a/src/third_party/wiredtiger/src/docs/spell.ok b/src/third_party/wiredtiger/src/docs/spell.ok
index 8a19d8a5d27..cb5f89ab81d 100644
--- a/src/third_party/wiredtiger/src/docs/spell.ok
+++ b/src/third_party/wiredtiger/src/docs/spell.ok
@@ -121,6 +121,8 @@ WiredTiger
WiredTiger's
WiredTigerCheckpoint
WiredTigerException
+WiredTigerHS
+WiredTigerLAS
WiredTigerLog
WiredTigerPanicException
WiredTigerPreplog
@@ -499,6 +501,7 @@ readonly
realclean
realloc
realloc'd
+rebalance
recno
recnoN
recnum
diff --git a/src/third_party/wiredtiger/src/docs/top/main.dox b/src/third_party/wiredtiger/src/docs/top/main.dox
index 1df794243b6..bcb293f04b8 100644
--- a/src/third_party/wiredtiger/src/docs/top/main.dox
+++ b/src/third_party/wiredtiger/src/docs/top/main.dox
@@ -6,12 +6,12 @@ WiredTiger is an high performance, scalable, production quality, NoSQL,
@section releases Releases
<table>
-@row{<b>WiredTiger 3.2.1</b> (current),
+@row{<b>WiredTiger 10.0.0</b> (current),
+ <a href="releases/wiredtiger-10.0.0.tar.bz2"><b>[Release package]</b></a>,
+ <a href="10.0.0/index.html"><b>[Documentation]</b></a>}
+@row{<b>WiredTiger 3.2.1</b> (previous),
<a href="releases/wiredtiger-3.2.1.tar.bz2"><b>[Release package]</b></a>,
<a href="3.2.1/index.html"><b>[Documentation]</b></a>}
-@row{<b>WiredTiger 3.1.0</b> (previous),
- <a href="releases/wiredtiger-3.1.0.tar.bz2"><b>[Release package]</b></a>,
- <a href="3.1.0/index.html"><b>[Documentation]</b></a>}
@row{<b>Development branch</b>,
<a href="https://github.com/wiredtiger/wiredtiger"><b>[Source code]</b></a>,
<a href="develop/index.html"><b>[Documentation]</b></a>}
diff --git a/src/third_party/wiredtiger/src/docs/upgrading.dox b/src/third_party/wiredtiger/src/docs/upgrading.dox
index 5b072c319fd..e92dde30464 100644
--- a/src/third_party/wiredtiger/src/docs/upgrading.dox
+++ b/src/third_party/wiredtiger/src/docs/upgrading.dox
@@ -1,14 +1,59 @@
/*! @page upgrading Upgrading WiredTiger applications
</dl><hr>
-@section version_322 Upgrading to Version 3.2.2
+@section version_1000 Upgrading to Version 10.0.0
<dl>
+<dt>LAS and HS files</dt>
+<dd>
+The WiredTigerLAS.wt file will no longer be generated by the cache overflow mechanism.
+Instead, a WiredTigerHS.wt file will be generated as a history store for updates.
+Same as other files generated and maintained by WiredTiger storage engine, no manual
+intervention should be performed to the history store file.
+</dd>
+
+<dt>Default transaction isolation level</dt>
+<dd>
+The default transaction isolation level has been changed from "read-committed" to "snapshot"
+in WiredTiger.
+</dd>
+
+<dt>Read committed/uncommitted isolation level</dt>
+<dd>
+If the user provides a read committed/uncommitted isolation, WiredTiger will perform only
+read operations under this isolation. Any write operations will get an error.
+</dd>
+
+<dt>Python 2 support</dt>
+<dd>
+The support for Python 2 has been dropped from WiredTiger.
+</dd>
+
<dt>Asynchronous API</dt>
<dd>
The asynchronous API has been removed from WiredTiger.
</dd>
+<dt>Huffman Encoding support for keys</dt>
+<dd>
+The Huffman Encoding support for keys has been removed from WiredTiger.
+</dd>
+
+<dt>Transaction support for custom data sources</dt>
+<dd>
+The transaction support for custom data sources have been removed from WiredTiger.
+</dd>
+
+<dt>WT_SESSION.rebalance API</dt>
+<dd>
+The WT_SESSION.rebalance API has been removed from WiredTiger.
+</dd>
+
+<dt>Java language API</dt>
+<dd>
+The Java language API has been removed from WiredTiger.
+</dd>
+
<dt>Named snapshots</dt>
<dd>
Named snapshot functionality has been removed from WiredTiger as timestamps offer a better solution
@@ -17,6 +62,12 @@ across sessions. The WT_SESSION.begin_transaction method's \c snapshot configura
WT_SESSION::snapshot method have been removed and are no longer available.
</dd>
+<dt>Btree version and Compatibility with older releases</dt>
+<dd>
+The Btree version WT_BTREE_MAJOR_VERSION_MAX has been bumped in this release. Databases created with
+this release version cannot be downgraded to older versions as the underlying file format has changed.
+</dd>
+
</dl><hr>
@section version_321 Upgrading to Version 3.2.1
<dl>
diff --git a/src/third_party/wiredtiger/src/docs/wtperf.dox b/src/third_party/wiredtiger/src/docs/wtperf.dox
index 809472b3e80..916755ade32 100644
--- a/src/third_party/wiredtiger/src/docs/wtperf.dox
+++ b/src/third_party/wiredtiger/src/docs/wtperf.dox
@@ -128,6 +128,8 @@ configuration options:
DO NOT EDIT: THIS PART OF THE FILE IS GENERATED BY dist/s_docs.
\endif
+@par backup_interval (unsigned int, default=0)
+backup the database every interval seconds during the workload phase, 0 to disable
@par checkpoint_interval (unsigned int, default=120)
checkpoint every interval seconds during the workload phase.
@par checkpoint_stress_rate (unsigned int, default=0)
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 2269a925d3d..189952f0f42 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -292,11 +292,11 @@ __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread)
* set the flag on both sessions because we may call clear_walk when we are walking with the
* walk session, locked.
*/
- F_SET(session, WT_SESSION_LOCKED_PASS);
- F_SET(cache->walk_session, WT_SESSION_LOCKED_PASS);
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_PASS);
+ FLD_SET(cache->walk_session->lock_flags, WT_SESSION_LOCKED_PASS);
ret = __evict_server(session, &did_work);
- F_CLR(cache->walk_session, WT_SESSION_LOCKED_PASS);
- F_CLR(session, WT_SESSION_LOCKED_PASS);
+ FLD_CLR(cache->walk_session->lock_flags, WT_SESSION_LOCKED_PASS);
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_PASS);
was_intr = cache->pass_intr != 0;
__wt_spin_unlock(session, &cache->evict_pass_lock);
WT_ERR(ret);
@@ -733,11 +733,11 @@ __evict_pass(WT_SESSION_IMPL *session)
* race conditions that other threads can enter into the flow of evict server when there
* is already another server is running.
*/
- F_CLR(session, WT_SESSION_LOCKED_PASS);
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_PASS);
__wt_spin_unlock(session, &cache->evict_pass_lock);
ret = __evict_lru_pages(session, true);
__wt_spin_lock(session, &cache->evict_pass_lock);
- F_SET(session, WT_SESSION_LOCKED_PASS);
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_PASS);
WT_RET(ret);
}
@@ -809,7 +809,7 @@ __evict_clear_walk(WT_SESSION_IMPL *session)
btree = S2BT(session);
cache = S2C(session)->cache;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_PASS));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_PASS));
if (session->dhandle == cache->walk_tree)
cache->walk_tree = NULL;
@@ -1741,9 +1741,10 @@ __evict_walk_tree(WT_SESSION_IMPL *session, WT_EVICT_QUEUE *queue, u_int max_ent
*/
if (!WT_IS_HS(btree->dhandle) && __wt_cache_hs_dirty(session)) {
/* If target pages are less than 10, keep it like that. */
- target_pages = target_pages < 10 ? target_pages : target_pages / 10;
- WT_STAT_CONN_INCR(session, cache_eviction_target_page_reduced);
- WT_STAT_DATA_INCR(session, cache_eviction_target_page_reduced);
+ if (target_pages >= 10) {
+ target_pages = target_pages / 10;
+ WT_STAT_CONN_DATA_INCR(session, cache_eviction_target_page_reduced);
+ }
}
/* If we don't want any pages from this tree, move on. */
@@ -2375,7 +2376,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, bool readonly, d
* rolled back. Ignore if in recovery, those transactions can't be rolled back.
*/
if (!F_ISSET(conn, WT_CONN_RECOVERING) && __wt_cache_stuck(session)) {
- ret = __wt_txn_is_blocking(session, false);
+ ret = __wt_txn_is_blocking(session);
if (ret == WT_ROLLBACK) {
--cache->evict_aggressive_score;
WT_STAT_CONN_INCR(session, txn_fail_cache);
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index 5d6954cb594..26b38dc5996 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -576,6 +576,20 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t evict_flags, bool
return (0);
/*
+ * If we are trying to evict a dirty page that does not belong to history store(HS) and
+ * checkpoint is processing the HS file, then avoid evicting the dirty non-HS page for now if
+ * the cache is already dominated by dirty HS content.
+ *
+ * Evicting a non-HS dirty page can generate even more HS content. As we can not evict HS pages
+ * while checkpoint is operating on the HS file, we can end up in a situation where we exceed
+ * the cache size limits.
+ */
+ if (conn->txn_global.checkpoint_running_hs && !WT_IS_HS(btree->dhandle) &&
+ __wt_cache_hs_dirty(session) && __wt_cache_full(session)) {
+ WT_STAT_CONN_INCR(session, cache_eviction_blocked_checkpoint_hs);
+ return (__wt_set_return(session, EBUSY));
+ }
+ /*
* If reconciliation is disabled for this thread (e.g., during an eviction that writes to the
* history store), give up.
*/
diff --git a/src/third_party/wiredtiger/src/history/hs_conn.c b/src/third_party/wiredtiger/src/history/hs_conn.c
index 6163d0042c7..25c4d4f695e 100644
--- a/src/third_party/wiredtiger/src/history/hs_conn.c
+++ b/src/third_party/wiredtiger/src/history/hs_conn.c
@@ -15,8 +15,7 @@
static int
__hs_start_internal_session(WT_SESSION_IMPL *session, WT_SESSION_IMPL **int_sessionp)
{
- WT_ASSERT(session, !F_ISSET(session, WT_CONN_HS_OPEN));
- return (__wt_open_internal_session(S2C(session), "hs_access", true, 0, int_sessionp));
+ return (__wt_open_internal_session(S2C(session), "hs_access", true, 0, 0, int_sessionp));
}
/*
diff --git a/src/third_party/wiredtiger/src/history/hs_cursor.c b/src/third_party/wiredtiger/src/history/hs_cursor.c
index 31da7b2cc9b..1799b068e7e 100644
--- a/src/third_party/wiredtiger/src/history/hs_cursor.c
+++ b/src/third_party/wiredtiger/src/history/hs_cursor.c
@@ -113,9 +113,9 @@ __wt_hs_find_upd(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM *key,
WT_DECL_ITEM(orig_hs_value_buf);
WT_DECL_RET;
WT_ITEM hs_key, recno_key;
- WT_MODIFY_VECTOR modifies;
WT_TXN_SHARED *txn_shared;
WT_UPDATE *mod_upd;
+ WT_UPDATE_VECTOR modifies;
wt_timestamp_t durable_timestamp, durable_timestamp_tmp;
wt_timestamp_t hs_stop_durable_ts, hs_stop_durable_ts_tmp, read_timestamp;
uint64_t upd_type_full;
@@ -126,7 +126,7 @@ __wt_hs_find_upd(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM *key,
mod_upd = NULL;
orig_hs_value_buf = NULL;
WT_CLEAR(hs_key);
- __wt_modify_vector_init(session, &modifies);
+ __wt_update_vector_init(session, &modifies);
txn_shared = WT_SESSION_TXN_SHARED(session);
upd_found = false;
@@ -201,7 +201,7 @@ __wt_hs_find_upd(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM *key,
while (upd_type == WT_UPDATE_MODIFY) {
WT_ERR(__wt_upd_alloc(session, hs_value, upd_type, &mod_upd, NULL));
- WT_ERR(__wt_modify_vector_push(&modifies, mod_upd));
+ WT_ERR(__wt_update_vector_push(&modifies, mod_upd));
mod_upd = NULL;
/*
@@ -230,7 +230,7 @@ __wt_hs_find_upd(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM *key,
}
WT_ASSERT(session, upd_type == WT_UPDATE_STANDARD);
while (modifies.size > 0) {
- __wt_modify_vector_pop(&modifies, &mod_upd);
+ __wt_update_vector_pop(&modifies, &mod_upd);
WT_ERR(__wt_modify_apply_item(session, value_format, hs_value, mod_upd->data));
__wt_free_update_list(session, &mod_upd);
}
@@ -258,10 +258,10 @@ err:
__wt_free_update_list(session, &mod_upd);
while (modifies.size > 0) {
- __wt_modify_vector_pop(&modifies, &mod_upd);
+ __wt_update_vector_pop(&modifies, &mod_upd);
__wt_free_update_list(session, &mod_upd);
}
- __wt_modify_vector_free(&modifies);
+ __wt_update_vector_free(&modifies);
if (ret == 0) {
if (upd_found)
diff --git a/src/third_party/wiredtiger/src/history/hs_rec.c b/src/third_party/wiredtiger/src/history/hs_rec.c
index 56573d374bd..0e7e2424c57 100644
--- a/src/third_party/wiredtiger/src/history/hs_rec.c
+++ b/src/third_party/wiredtiger/src/history/hs_rec.c
@@ -8,10 +8,8 @@
#include "wt_internal.h"
-static int __hs_delete_key_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
- uint32_t btree_id, const WT_ITEM *key, bool reinsert);
-static int __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
- WT_BTREE *btree, const WT_ITEM *key, wt_timestamp_t ts, uint64_t *hs_counter);
+static int __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
+ uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, uint64_t *hs_counter);
/*
* __hs_verbose_cache_stats --
@@ -89,6 +87,16 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree,
counter = 0;
/*
+ * We might be entering this code from application thread's context. We should make sure that we
+ * are not using snapshot associated with application session to perform visibility checks on
+ * history store records. Note that the history store cursor performs visibility checks based on
+ * snapshot if none of WT_CURSTD_HS_READ_ALL or WT_CURSTD_HS_READ_COMMITTED flags are set.
+ */
+ WT_ASSERT(session,
+ F_ISSET(session, WT_SESSION_INTERNAL) ||
+ F_ISSET(cursor, WT_CURSTD_HS_READ_ALL | WT_CURSTD_HS_READ_COMMITTED));
+
+ /*
* Keep track if the caller had set WT_CURSTD_HS_READ_ALL flag on the history store cursor. We
* want to preserve the flags set by the caller when we exit from this function. Also, we want
* to explicitly set the flag WT_CURSTD_HS_READ_ALL only for the search_near operations on the
@@ -142,13 +150,14 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree,
&upd_type_full_diag, existing_val));
WT_ERR(__wt_compare(session, NULL, existing_val, hs_value, &cmp));
/*
- * We shouldn't be inserting the same value again for the key unless coming from a
- * different transaction. If the updates are from the same transaction, the start
- * timestamp for each update should be different.
+ * Same value should not be inserted again unless 1. previous entry is already
+ * deleted(i.e. the stop timestamp is globally visible), 2. from a different
+ * transaction 3. with a different timestamp if from the same transaction.
*/
if (cmp == 0)
WT_ASSERT(session,
- tw->start_txn == WT_TXN_NONE ||
+ __wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw) ||
+ tw->start_txn == WT_TXN_NONE ||
tw->start_txn != hs_cbt->upd_value->tw.start_txn ||
tw->start_ts != hs_cbt->upd_value->tw.start_ts);
counter = hs_counter + 1;
@@ -160,30 +169,25 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree,
}
/*
- * If we're inserting a non-zero timestamp, look ahead for any higher timestamps. If we find
- * updates, we should remove them and reinsert them at the current timestamp.
+ * Look ahead for any higher timestamps. If we find updates, we should remove them and reinsert
+ * them at the current timestamp. If there were no keys equal to or less than our target key, we
+ * would have received WT_NOT_FOUND. In that case we need to search again with a higher
+ * timestamp.
*/
- if (tw->start_ts != WT_TS_NONE) {
- /*
- * If there were no keys equal to or less than our target key, we would have received
- * WT_NOTFOUND. In that case we need to search again with a higher timestamp as the cursor
- * would not be positioned correctly.
- */
- if (ret == 0)
- WT_ERR_NOTFOUND_OK(cursor->next(cursor), true);
- else {
- F_SET(cursor, WT_CURSTD_HS_READ_ALL);
+ if (ret == 0)
+ WT_ERR_NOTFOUND_OK(cursor->next(cursor), true);
+ else {
+ F_SET(cursor, WT_CURSTD_HS_READ_ALL);
- cursor->set_key(cursor, 3, btree->id, key, tw->start_ts + 1);
- WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_after(session, cursor), true);
+ cursor->set_key(cursor, 3, btree->id, key, tw->start_ts + 1);
+ WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_after(session, cursor), true);
- if (!hs_read_all_flag)
- F_CLR(cursor, WT_CURSTD_HS_READ_ALL);
- }
- if (ret == 0)
- WT_ERR(__hs_fixup_out_of_order_from_pos(
- session, cursor, btree, key, tw->start_ts, &counter));
+ if (!hs_read_all_flag)
+ F_CLR(cursor, WT_CURSTD_HS_READ_ALL);
}
+ if (ret == 0)
+ WT_ERR(__hs_delete_reinsert_from_pos(
+ session, cursor, btree->id, key, tw->start_ts + 1, true, &counter));
#ifdef HAVE_DIAGNOSTIC
/*
@@ -222,21 +226,21 @@ err:
* Get the next update and its full value.
*/
static inline int
-__hs_next_upd_full_value(WT_SESSION_IMPL *session, WT_MODIFY_VECTOR *modifies,
+__hs_next_upd_full_value(WT_SESSION_IMPL *session, WT_UPDATE_VECTOR *updates,
WT_ITEM *older_full_value, WT_ITEM *full_value, WT_UPDATE **updp)
{
WT_UPDATE *upd;
*updp = NULL;
- __wt_modify_vector_pop(modifies, &upd);
+ __wt_update_vector_pop(updates, &upd);
if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (modifies->size == 0) {
+ if (updates->size == 0) {
WT_ASSERT(session, older_full_value == NULL);
*updp = upd;
return (0);
}
- __wt_modify_vector_pop(modifies, &upd);
+ __wt_update_vector_pop(updates, &upd);
WT_ASSERT(session, upd->type == WT_UPDATE_STANDARD);
full_value->data = upd->data;
full_value->size = upd->size;
@@ -273,19 +277,18 @@ __wt_hs_insert_updates(
/* If the limit is exceeded, we will insert a full update to the history store */
#define MAX_REVERSE_MODIFY_NUM 16
WT_MODIFY entries[MAX_REVERSE_MODIFY_NUM];
- WT_MODIFY_VECTOR modifies;
+ WT_UPDATE_VECTOR updates;
+ WT_UPDATE_VECTOR out_of_order_ts_updates;
WT_SAVE_UPD *list;
- WT_UPDATE *first_globally_visible_upd, *first_non_ts_upd;
+ WT_UPDATE *first_globally_visible_upd, *fix_ts_upd, *min_ts_upd, *out_of_order_ts_upd;
WT_UPDATE *non_aborted_upd, *oldest_upd, *prev_upd, *tombstone, *upd;
WT_TIME_WINDOW tw;
wt_off_t hs_size;
- wt_timestamp_t min_insert_ts;
uint64_t insert_cnt, max_hs_size;
uint32_t i;
uint8_t *p;
int nentries;
- char ts_string[3][WT_TS_INT_STRING_SIZE];
- bool enable_reverse_modify, hs_inserted, squashed, ts_updates_in_hs;
+ bool enable_reverse_modify, hs_inserted, squashed;
*cache_write_hs = false;
btree = S2BT(session);
@@ -296,7 +299,17 @@ __wt_hs_insert_updates(
WT_RET(__wt_curhs_open(session, NULL, &hs_cursor));
F_SET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
- __wt_modify_vector_init(session, &modifies);
+ __wt_update_vector_init(session, &updates);
+ /*
+ * We use another stack to store the out-of-order timestamp updates (including updates without a
+ * timestamp). We walk the update chain from the newest to the oldest. Once an out-of-order
+ * timestamp update is detected, and it has a lower timestamp than the head of the stack, it is
+ * pushed to the stack. When we are inserting updates to the history store, we compare the
+ * update's timestamp with the head of the stack. If it is larger than the out-of-order
+ * timestamp, we fix the timestamp by inserting with the out-of-order timestamp. If the update
+ * we are inserting is the head of the stack, we pop it from the stack.
+ */
+ __wt_update_vector_init(session, &out_of_order_ts_updates);
if (!btree->hs_entries)
btree->hs_entries = true;
@@ -349,12 +362,8 @@ __wt_hs_insert_updates(
WT_ERR(__wt_illegal_value(session, page->type));
}
- first_globally_visible_upd = first_non_ts_upd = NULL;
- ts_updates_in_hs = false;
+ first_globally_visible_upd = min_ts_upd = out_of_order_ts_upd = NULL;
enable_reverse_modify = true;
- min_insert_ts = WT_TS_MAX;
-
- __wt_modify_vector_clear(&modifies);
/*
* The algorithm assumes the oldest update on the update chain in memory is either a full
@@ -390,32 +399,21 @@ __wt_hs_insert_updates(
non_aborted_upd = upd;
- /* If we've seen a smaller timestamp before, use that instead. */
- if (min_insert_ts < upd->start_ts) {
+ /* Detect out of order timestamp update. */
+ if (min_ts_upd != NULL && min_ts_upd->start_ts < upd->start_ts &&
+ out_of_order_ts_upd != min_ts_upd) {
/*
- * Resolved prepared updates will lose their durable timestamp here. This is a
- * wrinkle in our handling of out-of-order updates.
+ * Always insert full update to the history store if we detect out of order
+ * timestamp update.
*/
- if (upd->start_ts != upd->durable_ts) {
- WT_ASSERT(session, min_insert_ts < upd->durable_ts);
- WT_STAT_CONN_DATA_INCR(session, cache_hs_order_lose_durable_timestamp);
- }
- __wt_verbose(session, WT_VERB_TIMESTAMP,
- "fixing out-of-order updates during insertion; start_ts=%s, durable_start_ts=%s, "
- "min_insert_ts=%s",
- __wt_timestamp_to_string(upd->start_ts, ts_string[0]),
- __wt_timestamp_to_string(upd->durable_ts, ts_string[1]),
- __wt_timestamp_to_string(min_insert_ts, ts_string[2]));
- upd->start_ts = upd->durable_ts = min_insert_ts;
- WT_STAT_CONN_DATA_INCR(session, cache_hs_order_fixup_insert);
- } else if (upd->start_ts != WT_TS_NONE)
- /*
- * Don't reset to WT_TS_NONE as we don't want to clear the timestamps for updates
- * older than the update without timestamp.
- */
- min_insert_ts = upd->start_ts;
+ enable_reverse_modify = false;
+ WT_ERR(__wt_update_vector_push(&out_of_order_ts_updates, min_ts_upd));
+ out_of_order_ts_upd = min_ts_upd;
+ } else if (upd->prepare_state != WT_PREPARE_INPROGRESS &&
+ (min_ts_upd == NULL || upd->start_ts < min_ts_upd->start_ts))
+ min_ts_upd = upd;
- WT_ERR(__wt_modify_vector_push(&modifies, upd));
+ WT_ERR(__wt_update_vector_push(&updates, upd));
/* Track the first update that is globally visible. */
if (first_globally_visible_upd == NULL && __wt_txn_upd_visible_all(session, upd))
@@ -433,19 +431,6 @@ __wt_hs_insert_updates(
prev_upd->start_ts == upd->start_ts)
enable_reverse_modify = false;
- /* Always insert full update to the history store if the timestamps are not in order. */
- if (prev_upd != NULL && prev_upd->start_ts < upd->start_ts)
- enable_reverse_modify = false;
-
- /* Find the first update without timestamp. */
- if (first_non_ts_upd == NULL && upd->start_ts == WT_TS_NONE)
- first_non_ts_upd = upd;
- else if (first_non_ts_upd != NULL && upd->start_ts != WT_TS_NONE) {
- F_SET(upd, WT_UPDATE_BEHIND_MIXED_MODE);
- if (F_ISSET(upd, WT_UPDATE_HS))
- ts_updates_in_hs = true;
- }
-
/*
* No need to continue if we see the first self contained value after the first globally
* visible value.
@@ -464,54 +449,66 @@ __wt_hs_insert_updates(
prev_upd = upd = NULL;
/* Construct the oldest full update. */
- WT_ASSERT(session, modifies.size > 0);
+ WT_ASSERT(session, updates.size > 0);
- __wt_modify_vector_peek(&modifies, &oldest_upd);
+ __wt_update_vector_peek(&updates, &oldest_upd);
WT_ASSERT(session,
oldest_upd->type == WT_UPDATE_STANDARD || oldest_upd->type == WT_UPDATE_TOMBSTONE);
/*
- * Clear the history store here if the oldest update is a tombstone and it is the first
- * update without timestamp on the update chain because we don't have the cursor placed at
- * the correct place to delete the history store records when inserting the first update and
- * it may be skipped if there is nothing to insert to the history store.
+ * Fix the history store record here if the oldest update is a tombstone because we don't
+ * have the cursor placed at the correct place to fix the history store records when
+ * inserting the first update and it may be skipped if there is nothing to insert to the
+ * history store.
*/
- if (oldest_upd->type == WT_UPDATE_TOMBSTONE && oldest_upd == first_non_ts_upd &&
- !F_ISSET(first_non_ts_upd, WT_UPDATE_CLEARED_HS)) {
- /* We can only delete history store entries that have timestamps. */
- WT_ERR(__wt_hs_delete_key_from_ts(session, hs_cursor, btree->id, key, 1, true));
- WT_STAT_CONN_INCR(session, cache_hs_key_truncate_non_ts);
- WT_STAT_DATA_INCR(session, cache_hs_key_truncate_non_ts);
- F_SET(first_non_ts_upd, WT_UPDATE_CLEARED_HS);
- } else if (first_non_ts_upd != NULL && !F_ISSET(first_non_ts_upd, WT_UPDATE_CLEARED_HS) &&
- (list->ins == NULL || ts_updates_in_hs)) {
- WT_ERR(__wt_hs_delete_key_from_ts(session, hs_cursor, btree->id, key, 1, true));
- WT_STAT_CONN_INCR(session, cache_hs_key_truncate_non_ts);
- WT_STAT_DATA_INCR(session, cache_hs_key_truncate_non_ts);
- F_SET(first_non_ts_upd, WT_UPDATE_CLEARED_HS);
+ if (oldest_upd->type == WT_UPDATE_TOMBSTONE) {
+ if (out_of_order_ts_upd != NULL && out_of_order_ts_upd->start_ts < oldest_upd->start_ts)
+ fix_ts_upd = out_of_order_ts_upd;
+ else
+ fix_ts_upd = oldest_upd;
+
+ if (!F_ISSET(fix_ts_upd, WT_UPDATE_FIXED_HS)) {
+ /* Delete and reinsert any update of the key with a higher timestamp.
+ */
+ WT_ERR(__wt_hs_delete_key_from_ts(
+ session, hs_cursor, btree->id, key, fix_ts_upd->start_ts + 1, true));
+ F_SET(fix_ts_upd, WT_UPDATE_FIXED_HS);
+ }
}
- WT_ERR(__hs_next_upd_full_value(session, &modifies, NULL, full_value, &upd));
+ WT_ERR(__hs_next_upd_full_value(session, &updates, NULL, full_value, &upd));
hs_inserted = squashed = false;
/*
* Flush the updates on stack. Stopping once we run out or we reach the onpage upd start
- * time point, we can squash modifies with the same start time point as the onpage upd away.
+ * time point, we can squash updates with the same start time point as the onpage update
+ * away.
*/
- for (; modifies.size > 0 &&
+ for (; updates.size > 0 &&
!(upd->txnid == list->onpage_upd->txnid &&
upd->start_ts == list->onpage_upd->start_ts);
tmp = full_value, full_value = prev_full_value, prev_full_value = tmp,
upd = prev_upd) {
WT_ASSERT(session, upd->type == WT_UPDATE_STANDARD || upd->type == WT_UPDATE_MODIFY);
- tw.durable_start_ts = upd->durable_ts;
- tw.start_ts = upd->start_ts;
- tw.start_txn = upd->txnid;
tombstone = NULL;
- __wt_modify_vector_peek(&modifies, &prev_upd);
+ __wt_update_vector_peek(&updates, &prev_upd);
+
+ if (out_of_order_ts_updates.size > 0) {
+ __wt_update_vector_peek(&out_of_order_ts_updates, &out_of_order_ts_upd);
+ } else
+ out_of_order_ts_upd = NULL;
+
+ if (out_of_order_ts_upd != NULL && out_of_order_ts_upd->start_ts < upd->start_ts) {
+ tw.durable_start_ts = out_of_order_ts_upd->durable_ts;
+ tw.start_ts = out_of_order_ts_upd->start_ts;
+ } else {
+ tw.durable_start_ts = upd->durable_ts;
+ tw.start_ts = upd->start_ts;
+ }
+ tw.start_txn = upd->txnid;
/*
* For any uncommitted prepared updates written to disk, the stop timestamp of the last
@@ -531,8 +528,30 @@ __wt_hs_insert_updates(
* timestamp is globally visible. i.e. durable timestamp of data store version.
*/
WT_ASSERT(session, prev_upd->start_ts <= prev_upd->durable_ts);
- tw.durable_stop_ts = prev_upd->durable_ts;
- tw.stop_ts = prev_upd->start_ts;
+
+ /*
+ * Pop from the out of order timestamp updates stack if the previous update or the
+ * current update is at the head of the stack. We need to check both cases because
+ * if there is a tombstone older than the out of order timestamp, we would not pop
+ * it because we skip the tombstone. Pop it when we are inserting it instead.
+ */
+ if (out_of_order_ts_upd != NULL &&
+ ((out_of_order_ts_upd->txnid == prev_upd->txnid &&
+ out_of_order_ts_upd->start_ts == prev_upd->start_ts) ||
+ (out_of_order_ts_upd->txnid == upd->txnid &&
+ out_of_order_ts_upd->start_ts == upd->start_ts))) {
+ __wt_update_vector_pop(&out_of_order_ts_updates, &out_of_order_ts_upd);
+ out_of_order_ts_upd = NULL;
+ }
+
+ if (out_of_order_ts_upd != NULL &&
+ out_of_order_ts_upd->start_ts < prev_upd->start_ts) {
+ tw.durable_stop_ts = out_of_order_ts_upd->durable_ts;
+ tw.stop_ts = out_of_order_ts_upd->start_ts;
+ } else {
+ tw.durable_stop_ts = prev_upd->durable_ts;
+ tw.stop_ts = prev_upd->start_ts;
+ }
tw.stop_txn = prev_upd->txnid;
if (prev_upd->type == WT_UPDATE_TOMBSTONE)
@@ -540,7 +559,7 @@ __wt_hs_insert_updates(
}
WT_ERR(
- __hs_next_upd_full_value(session, &modifies, full_value, prev_full_value, &prev_upd));
+ __hs_next_upd_full_value(session, &updates, full_value, prev_full_value, &prev_upd));
/* Squash the updates from the same transaction. */
if (upd->start_ts == prev_upd->start_ts && upd->txnid == prev_upd->txnid) {
@@ -557,34 +576,6 @@ __wt_hs_insert_updates(
continue;
}
- /*
- * When we see an update older than a mixed mode update we need to insert it with a zero
- * start and stop timestamp. This means it'll still exist but only use txnid visibility
- * rules. As such older readers should still be able to see it.
- */
- if (F_ISSET(upd, WT_UPDATE_BEHIND_MIXED_MODE)) {
- tw.start_ts = tw.durable_start_ts = WT_TS_NONE;
- tw.stop_ts = tw.durable_stop_ts = WT_TS_NONE;
- }
-
- /*
- * If the time points are out of order (which can happen if the application performs
- * updates with out-of-order timestamps), so this value can never be seen, don't bother
- * inserting it. However if it was made obsolete by a mixed mode operation we still want
- * to insert it, it will be flagged as such.
- *
- * FIXME-WT-6443: We should be able to replace this with an assertion.
- */
- if (!F_ISSET(upd, WT_UPDATE_BEHIND_MIXED_MODE) &&
- (tw.stop_ts < upd->start_ts ||
- (tw.stop_ts == upd->start_ts && tw.stop_txn <= upd->txnid))) {
- __wt_verbose(session, WT_VERB_TIMESTAMP,
- "Warning: fixing out-of-order timestamps %s earlier than previous update %s",
- __wt_timestamp_to_string(tw.stop_ts, ts_string[0]),
- __wt_timestamp_to_string(upd->start_ts, ts_string[1]));
- continue;
- }
-
/* We should never write a prepared update to the history store. */
WT_ASSERT(session,
upd->prepare_state != WT_PREPARE_INPROGRESS &&
@@ -646,8 +637,24 @@ __wt_hs_insert_updates(
}
}
- if (modifies.size > 0)
+ /* If we squash the onpage value, there may be one or more updates left in the stack. */
+ if (updates.size > 0)
WT_STAT_CONN_DATA_INCR(session, cache_hs_write_squash);
+
+ __wt_update_vector_clear(&updates);
+ /*
+ * In the case that the onpage value is an out of order timestamp update and the update
+ * older than it is a tombstone, it remains in the stack. Clean it up.
+ */
+ WT_ASSERT(session, out_of_order_ts_updates.size <= 1);
+#ifdef HAVE_DIAGNOSTIC
+ if (out_of_order_ts_updates.size == 1) {
+ __wt_update_vector_peek(&out_of_order_ts_updates, &upd);
+ WT_ASSERT(session,
+ upd->txnid == list->onpage_upd->txnid && upd->start_ts == list->onpage_upd->start_ts);
+ }
+#endif
+ __wt_update_vector_clear(&out_of_order_ts_updates);
}
WT_ERR(__wt_block_manager_named_size(session, WT_HS_FILE, &hs_size));
@@ -671,7 +678,8 @@ err:
/* modify_value is allocated in __wt_modify_pack. Free it if it is allocated. */
if (modify_value != NULL)
__wt_scr_free(session, &modify_value);
- __wt_modify_vector_free(&modifies);
+ __wt_update_vector_free(&updates);
+ __wt_update_vector_free(&out_of_order_ts_updates);
__wt_scr_free(session, &full_value);
__wt_scr_free(session, &prev_full_value);
@@ -681,44 +689,59 @@ err:
/*
* __wt_hs_delete_key_from_ts --
- * Delete history store content of a given key from a timestamp.
+ * Delete history store content of a given key from a timestamp and optionally reinsert them
+ * with ts-1 timestamp.
*/
int
__wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btree_id,
const WT_ITEM *key, wt_timestamp_t ts, bool reinsert)
{
WT_DECL_RET;
- bool hs_read_committed;
+ WT_ITEM hs_key;
+ wt_timestamp_t hs_ts;
+ uint64_t hs_counter;
+ uint32_t hs_btree_id;
+ bool hs_read_all_flag;
+
+ /*
+ * If we will delete all the updates of the key from the history store, we should not reinsert
+ * any update.
+ */
+ WT_ASSERT(session, ts > WT_TS_NONE || !reinsert);
- hs_read_committed = F_ISSET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
- if (!hs_read_committed)
- F_SET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
+ hs_read_all_flag = F_ISSET(hs_cursor, WT_CURSTD_HS_READ_ALL);
hs_cursor->set_key(hs_cursor, 3, btree_id, key, ts);
+ F_SET(hs_cursor, WT_CURSTD_HS_READ_ALL);
WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_after(session, hs_cursor), true);
/* Empty history store is fine. */
if (ret == WT_NOTFOUND) {
ret = 0;
goto done;
+ } else {
+ WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_ts, &hs_counter));
+ ++hs_counter;
}
- WT_ERR(__hs_delete_key_from_pos(session, hs_cursor, btree_id, key, reinsert));
+ WT_ERR(
+ __hs_delete_reinsert_from_pos(session, hs_cursor, btree_id, key, ts, reinsert, &hs_counter));
done:
err:
- if (!hs_read_committed)
- F_CLR(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
+ if (!hs_read_all_flag)
+ F_CLR(hs_cursor, WT_CURSTD_HS_READ_ALL);
return (ret);
}
/*
- * __hs_fixup_out_of_order_from_pos --
- * Fixup existing out-of-order updates in the history store. This function works by looking
- * ahead of the current cursor position for entries for the same key, removing them and
- * reinserting them at the timestamp that is currently being inserted.
+ * __hs_delete_reinsert_from_pos --
+ * Delete updates in the history store if the start timestamp of the update is larger or equal
+ * to the specified timestamp and optionally reinsert them with ts-1 timestamp. This function
+ * works by looking ahead of the current cursor position for entries for the same key, removing
+ * them.
*/
static int
-__hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_BTREE *btree,
- const WT_ITEM *key, wt_timestamp_t ts, uint64_t *counter)
+__hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btree_id,
+ const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, uint64_t *counter)
{
WT_CURSOR *hs_insert_cursor;
WT_CURSOR_BTREE *hs_cbt;
@@ -741,24 +764,21 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
#ifndef HAVE_DIAGNOSTIC
WT_UNUSED(key);
#endif
- /*
- * Position ourselves at the beginning of the key range that we may have to fixup. Prior to
- * getting here, we've positioned our cursor at the end of a key/timestamp range and then done a
- * "next". Normally that would leave us pointing at higher timestamps for the same key (if any)
- * but in the case where our insertion timestamp is the lowest for that key, our cursor may be
- * pointing at the previous key and can potentially race with additional key insertions. We need
- * to keep doing "next" until we've got a key greater than the one we attempted to position
- * ourselves with.
- */
+
+ /* If we will delete all the updates of the key from the history store, we should not reinsert
+ * any update. */
+ WT_ASSERT(session, ts > WT_TS_NONE || !reinsert);
+
for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
/* We shouldn't have crossed the btree and user key search space. */
WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_ts, &hs_counter));
- WT_ASSERT(session, hs_btree_id == btree->id);
+ WT_ASSERT(session, hs_btree_id == btree_id);
#ifdef HAVE_DIAGNOSTIC
WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp));
WT_ASSERT(session, cmp == 0);
#endif
- if (hs_ts > ts)
+ /* We find a key that is larger or equal to the specified timestamp*/
+ if (hs_ts >= ts)
break;
}
if (ret == WT_NOTFOUND)
@@ -766,7 +786,7 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
WT_ERR(ret);
/*
- * The goal of this fixup function is to move out-of-order content to maintain ordering in the
+ * The goal of this function is to move out-of-order content to maintain ordering in the
* history store. We do this by removing content with higher timestamps and reinserting it
* behind (from search's point of view) the newly inserted update. Even though these updates
* will all have the same timestamp, they cannot be discarded since older readers may need to
@@ -784,11 +804,24 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
* 2 foo 3 1 bbb
* 2 foo 3 2 ccc
* 2 foo 3 3 ddd
+ *
+ * Another example, if we're inserting an update at timestamp 0 with value ddd:
+ * btree key ts counter value
+ * 2 foo 5 0 aaa
+ * 2 foo 6 0 bbb
+ * 2 foo 7 0 ccc
+ *
+ * We want to end up with this:
+ * btree key ts counter value
+ * 2 foo 0 0 aaa
+ * 2 foo 0 1 bbb
+ * 2 foo 0 2 ccc
+ * 2 foo 0 3 ddd
*/
for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
/* We shouldn't have crossed the btree and user key search space. */
WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_ts, &hs_counter));
- WT_ASSERT(session, hs_btree_id == btree->id);
+ WT_ASSERT(session, hs_btree_id == btree_id);
#ifdef HAVE_DIAGNOSTIC
WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp));
WT_ASSERT(session, cmp == 0);
@@ -796,170 +829,70 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
/*
* If we got here, we've got out-of-order updates in the history store.
*
- * Our strategy to rectify this is to remove all records for the same key with a higher
- * timestamp than the one that we're inserting on and reinsert them at the same timestamp
- * that we're inserting with.
+ * Our strategy to rectify this is to remove all records for the same key with a timestamp
+ * higher or equal than the specified timestamp and reinsert them at the smaller timestamp,
+ * which is the timestamp of the update we are about to insert to the history store.
*/
- WT_ASSERT(session, hs_ts > ts);
-
- /*
- * Don't incur the overhead of opening this new cursor unless we need it. In the regular
- * case, we'll never get here.
- */
- if (hs_insert_cursor == NULL)
- WT_ERR(__wt_curhs_open(session, NULL, &hs_insert_cursor));
-
- /*
- * If these history store records are resolved prepared updates, their durable timestamps
- * will be clobbered by our fix-up process. Keep track of how often this is happening.
- */
- if (hs_cbt->upd_value->tw.start_ts != hs_cbt->upd_value->tw.durable_start_ts ||
- hs_cbt->upd_value->tw.stop_ts != hs_cbt->upd_value->tw.durable_stop_ts)
- WT_STAT_CONN_DATA_INCR(session, cache_hs_order_lose_durable_timestamp);
-
- __wt_verbose(session, WT_VERB_TIMESTAMP,
- "fixing existing out-of-order updates by moving them; start_ts=%s, durable_start_ts=%s, "
- "stop_ts=%s, durable_stop_ts=%s, new_ts=%s",
- __wt_timestamp_to_string(hs_cbt->upd_value->tw.start_ts, ts_string[0]),
- __wt_timestamp_to_string(hs_cbt->upd_value->tw.durable_start_ts, ts_string[1]),
- __wt_timestamp_to_string(hs_cbt->upd_value->tw.stop_ts, ts_string[2]),
- __wt_timestamp_to_string(hs_cbt->upd_value->tw.durable_stop_ts, ts_string[3]),
- __wt_timestamp_to_string(ts, ts_string[4]));
-
- hs_insert_tw.start_ts = hs_insert_tw.durable_start_ts = ts;
- hs_insert_tw.start_txn = hs_cbt->upd_value->tw.start_txn;
-
- /*
- * We're going to be inserting something immediately after with the same timestamp. Either
- * another moved update OR the update itself that triggered the correction. In either case,
- * we should preserve the stop transaction id.
- */
- hs_insert_tw.stop_ts = hs_insert_tw.durable_stop_ts = ts;
- hs_insert_tw.stop_txn = hs_cbt->upd_value->tw.stop_txn;
-
- WT_ASSERT(session, hs_insert_tw.stop_txn >= hs_insert_tw.start_txn);
-
- /* Extract the underlying value for reinsertion. */
- WT_ERR(hs_cursor->get_value(
- hs_cursor, &tw.durable_stop_ts, &tw.durable_start_ts, &hs_upd_type, &hs_value));
-
- /* Insert the value back with different timestamps. */
- hs_insert_cursor->set_key(hs_insert_cursor, 4, btree->id, &hs_key, ts, *counter);
- hs_insert_cursor->set_value(hs_insert_cursor, &hs_insert_tw, hs_insert_tw.durable_stop_ts,
- hs_insert_tw.durable_start_ts, (uint64_t)hs_upd_type, &hs_value);
- WT_ERR(hs_insert_cursor->insert(hs_insert_cursor));
- ++(*counter);
+ WT_ASSERT(session, hs_ts >= ts);
- /* Delete the entry with higher timestamp. */
- WT_ERR(hs_cursor->remove(hs_cursor));
- WT_STAT_CONN_INCR(session, cache_hs_order_fixup_move);
- WT_STAT_DATA_INCR(session, cache_hs_order_fixup_move);
- }
- if (ret == WT_NOTFOUND)
- ret = 0;
-err:
- if (hs_insert_cursor != NULL)
- hs_insert_cursor->close(hs_insert_cursor);
- return (ret);
-}
-
-/*
- * __hs_delete_key_from_pos --
- * Delete an entire key's worth of data in the history store. If we chose to reinsert the values
- * the reinserted values will have 0 start and stop timestamps to ensure that they only use
- * txnid based visibility rules.
- */
-static int
-__hs_delete_key_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btree_id,
- const WT_ITEM *key, bool reinsert)
-{
- WT_CURSOR *hs_insert_cursor;
- WT_CURSOR_BTREE *hs_cbt;
- WT_DECL_RET;
- WT_ITEM hs_key, hs_value;
- WT_TIME_WINDOW hs_insert_tw;
- wt_timestamp_t durable_timestamp, hs_start_ts, hs_stop_durable_ts;
- uint64_t hs_counter, hs_insert_counter, hs_upd_type;
- uint32_t hs_btree_id;
-
- hs_cbt = __wt_curhs_get_cbt(hs_cursor);
- hs_insert_counter = 0;
- WT_CLEAR(hs_key);
- WT_CLEAR(hs_value);
-
- hs_insert_cursor = NULL;
- if (reinsert) {
- /*
- * Determine the starting value of our counter, i.e. highest counter value of the timestamp
- * range for timestamp 0. We'll be inserting at timestamp 0 and don't want to overwrite a
- * currently existing counter.
- *
- * The cursor will also be positioned at the start of the range that we wish to start
- * inserting.
- */
- WT_WITHOUT_DHANDLE(session, ret = __wt_curhs_open(session, NULL, &hs_insert_cursor));
- WT_ERR(ret);
- F_SET(hs_insert_cursor, WT_CURSTD_HS_READ_COMMITTED);
- hs_insert_cursor->set_key(hs_insert_cursor, 4, btree_id, key, WT_TS_NONE, UINT64_MAX);
- WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_before(session, hs_insert_cursor), true);
-
- if (ret == WT_NOTFOUND) {
- hs_insert_counter = 0;
- ret = 0;
- } else {
- WT_ERR(hs_insert_cursor->get_key(
- hs_insert_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_insert_counter));
- WT_ASSERT(session, hs_start_ts == WT_TS_NONE);
+ if (reinsert) {
/*
- * Increment the history store counter that we'll be using to insert with to avoid
- * overwriting the record we just found.
+ * Don't incur the overhead of opening this new cursor unless we need it. In the regular
+ * case, we'll never get here.
*/
- hs_insert_counter++;
- }
- }
-
- /* Begin iterating over the range of entries we expect to replace. */
- for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
- WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter));
+ if (hs_insert_cursor == NULL)
+ WT_ERR(__wt_curhs_open(session, NULL, &hs_insert_cursor));
- if (reinsert) {
- WT_ERR(hs_cursor->get_value(
- hs_cursor, &hs_stop_durable_ts, &durable_timestamp, &hs_upd_type, &hs_value));
-
- /* Reinsert entry with zero timestamp. */
- hs_insert_tw.start_ts = hs_insert_tw.durable_start_ts = WT_TS_NONE;
+ /*
+ * If these history store records are resolved prepared updates, their durable
+ * timestamps will be clobbered by our fix-up process. Keep track of how often this is
+ * happening.
+ */
+ if (hs_cbt->upd_value->tw.start_ts != hs_cbt->upd_value->tw.durable_start_ts ||
+ hs_cbt->upd_value->tw.stop_ts != hs_cbt->upd_value->tw.durable_stop_ts)
+ WT_STAT_CONN_DATA_INCR(session, cache_hs_order_lose_durable_timestamp);
+
+ __wt_verbose(session, WT_VERB_TIMESTAMP,
+ "fixing existing out-of-order updates by moving them; start_ts=%s, "
+ "durable_start_ts=%s, "
+ "stop_ts=%s, durable_stop_ts=%s, new_ts=%s",
+ __wt_timestamp_to_string(hs_cbt->upd_value->tw.start_ts, ts_string[0]),
+ __wt_timestamp_to_string(hs_cbt->upd_value->tw.durable_start_ts, ts_string[1]),
+ __wt_timestamp_to_string(hs_cbt->upd_value->tw.stop_ts, ts_string[2]),
+ __wt_timestamp_to_string(hs_cbt->upd_value->tw.durable_stop_ts, ts_string[3]),
+ __wt_timestamp_to_string(ts, ts_string[4]));
+
+ hs_insert_tw.start_ts = hs_insert_tw.durable_start_ts = ts - 1;
hs_insert_tw.start_txn = hs_cbt->upd_value->tw.start_txn;
- hs_insert_tw.stop_ts = hs_insert_tw.durable_stop_ts = WT_TS_NONE;
+ /*
+ * We're going to insert something immediately after with the smaller timestamp. Either
+ * another moved update OR the update itself triggered the correction. In either case,
+ * we should preserve the stop transaction id.
+ */
+ hs_insert_tw.stop_ts = hs_insert_tw.durable_stop_ts = ts - 1;
hs_insert_tw.stop_txn = hs_cbt->upd_value->tw.stop_txn;
+ /* Extract the underlying value for reinsertion. */
+ WT_ERR(hs_cursor->get_value(
+ hs_cursor, &tw.durable_stop_ts, &tw.durable_start_ts, &hs_upd_type, &hs_value));
+
+ /* Insert the value back with different timestamps. */
hs_insert_cursor->set_key(
- hs_insert_cursor, 4, btree_id, key, WT_TS_NONE, hs_insert_counter);
- hs_insert_cursor->set_value(hs_insert_cursor, &hs_insert_tw, WT_TS_NONE, WT_TS_NONE,
- (uint64_t)hs_upd_type, &hs_value);
+ hs_insert_cursor, 4, btree_id, &hs_key, hs_insert_tw.start_ts, *counter);
+ hs_insert_cursor->set_value(hs_insert_cursor, &hs_insert_tw,
+ hs_insert_tw.durable_stop_ts, hs_insert_tw.durable_start_ts, (uint64_t)hs_upd_type,
+ &hs_value);
WT_ERR(hs_insert_cursor->insert(hs_insert_cursor));
- WT_STAT_CONN_INCR(session, cache_hs_insert);
- WT_STAT_DATA_INCR(session, cache_hs_insert);
-
- hs_insert_counter++;
+ ++(*counter);
+ WT_STAT_CONN_INCR(session, cache_hs_order_reinsert);
+ WT_STAT_DATA_INCR(session, cache_hs_order_reinsert);
}
- /*
- * Remove the key using history store cursor interface.
- *
- * If anything fails after this point and we're reinserting we need to panic as it will
- * leave our history store in an unexpected state with duplicate entries.
- */
- if ((ret = hs_cursor->remove(hs_cursor)) != 0) {
- if (reinsert)
- WT_ERR_PANIC(session, WT_PANIC,
- "Failed to insert tombstone, history store now "
- " contains duplicate values.");
- else
- WT_ERR(ret);
- }
- WT_STAT_CONN_INCR(session, cache_hs_key_truncate);
- WT_STAT_DATA_INCR(session, cache_hs_key_truncate);
+ /* Delete the out-of-order entry. */
+ WT_ERR(hs_cursor->remove(hs_cursor));
+ WT_STAT_CONN_INCR(session, cache_hs_order_remove);
+ WT_STAT_DATA_INCR(session, cache_hs_order_remove);
}
if (ret == WT_NOTFOUND)
ret = 0;
diff --git a/src/third_party/wiredtiger/src/include/block.h b/src/third_party/wiredtiger/src/include/block.h
index 3b0370f63dd..b8a982e1713 100644
--- a/src/third_party/wiredtiger/src/include/block.h
+++ b/src/third_party/wiredtiger/src/include/block.h
@@ -185,6 +185,7 @@ struct __wt_bm {
int (*compact_skip)(WT_BM *, WT_SESSION_IMPL *, bool *);
int (*compact_start)(WT_BM *, WT_SESSION_IMPL *);
int (*corrupt)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
+ int (*flush_tier)(WT_BM *, WT_SESSION_IMPL *, uint8_t **, size_t *);
int (*free)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
bool (*is_mapped)(WT_BM *, WT_SESSION_IMPL *);
int (*map_discard)(WT_BM *, WT_SESSION_IMPL *, void *, size_t);
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 79297af1743..24562280ac1 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -1111,14 +1111,13 @@ struct __wt_update {
volatile uint8_t prepare_state; /* prepare state */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_UPDATE_BEHIND_MIXED_MODE 0x01u /* Update that older than a mixed mode update. */
-#define WT_UPDATE_CLEARED_HS 0x02u /* Update that cleared the history store. */
-#define WT_UPDATE_DS 0x04u /* Update has been written to the data store. */
-#define WT_UPDATE_HS 0x08u /* Update has been written to history store. */
-#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x10u /* Prepared update restored from data store. */
-#define WT_UPDATE_RESTORED_FAST_TRUNCATE 0x20u /* Fast truncate instantiation */
-#define WT_UPDATE_RESTORED_FROM_DS 0x40u /* Update restored from data store. */
-#define WT_UPDATE_RESTORED_FROM_HS 0x80u /* Update restored from history store. */
+#define WT_UPDATE_DS 0x01u /* Update has been written to the data store. */
+#define WT_UPDATE_FIXED_HS 0x02u /* Update that fixed the history store. */
+#define WT_UPDATE_HS 0x04u /* Update has been written to history store. */
+#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x08u /* Prepared update restored from data store. */
+#define WT_UPDATE_RESTORED_FAST_TRUNCATE 0x10u /* Fast truncate instantiation */
+#define WT_UPDATE_RESTORED_FROM_DS 0x20u /* Update restored from data store. */
+#define WT_UPDATE_RESTORED_FROM_HS 0x40u /* Update restored from history store. */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint8_t flags;
@@ -1187,17 +1186,17 @@ struct __wt_update_value {
* avoid heap allocation, add a few additional slots to that array.
*/
#define WT_MAX_MODIFY_UPDATE 10
-#define WT_MODIFY_VECTOR_STACK_SIZE (WT_MAX_MODIFY_UPDATE + 10)
+#define WT_UPDATE_VECTOR_STACK_SIZE 20
/*
- * WT_MODIFY_VECTOR --
- * A resizable array for storing modify updates. The allocation strategy is similar to that of
+ * WT_UPDATE_VECTOR --
+ * A resizable array for storing updates. The allocation strategy is similar to that of
* llvm::SmallVector<T> where we keep space on the stack for the regular case but fall back to
* dynamic allocation as needed.
*/
-struct __wt_modify_vector {
+struct __wt_update_vector {
WT_SESSION_IMPL *session;
- WT_UPDATE *list[WT_MODIFY_VECTOR_STACK_SIZE];
+ WT_UPDATE *list[WT_UPDATE_VECTOR_STACK_SIZE];
WT_UPDATE **listp;
size_t allocated_bytes;
size_t size;
diff --git a/src/third_party/wiredtiger/src/include/btree_cmp_inline.h b/src/third_party/wiredtiger/src/include/btree_cmp_inline.h
index 18d8a8e5158..0c7eaf9fdb9 100644
--- a/src/third_party/wiredtiger/src/include/btree_cmp_inline.h
+++ b/src/third_party/wiredtiger/src/include/btree_cmp_inline.h
@@ -23,11 +23,12 @@
* __wt_lex_compare --
* Lexicographic comparison routine. Returns: < 0 if user_item is lexicographically < tree_item
* = 0 if user_item is lexicographically = tree_item > 0 if user_item is lexicographically >
- * tree_item We use the names "user" and "tree" so it's clear in the btree code which the
- * application is looking at when we call its comparison function.
+ * tree_item. We use the names "user" and "tree" so it's clear in the btree code which the
+ * application is looking at when we call its comparison function. If prefix is specified, 0 can
+ * be returned when the user_item is equal to the tree_item for the minimum size.
*/
static inline int
-__wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item)
+__wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item, bool prefix)
{
size_t len, usz, tsz;
const uint8_t *userp, *treep;
@@ -92,7 +93,7 @@ __wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item)
return (*userp < *treep ? -1 : 1);
/* Contents are equal up to the smallest length. */
- return ((usz == tsz) ? 0 : (usz < tsz) ? -1 : 1);
+ return ((usz == tsz || prefix) ? 0 : (usz < tsz) ? -1 : 1);
}
/*
@@ -104,13 +105,23 @@ __wt_compare(WT_SESSION_IMPL *session, WT_COLLATOR *collator, const WT_ITEM *use
const WT_ITEM *tree_item, int *cmpp)
{
if (collator == NULL) {
- *cmpp = __wt_lex_compare(user_item, tree_item);
+ *cmpp = __wt_lex_compare(user_item, tree_item, false);
return (0);
}
return (collator->compare(collator, &session->iface, user_item, tree_item, cmpp));
}
/*
+ * __wt_prefix_match --
+ * Check if the prefix item is equal to the leading bytes of the tree item.
+ */
+static inline int
+__wt_prefix_match(const WT_ITEM *prefix, const WT_ITEM *tree_item)
+{
+ return (__wt_lex_compare(prefix, tree_item, true));
+}
+
+/*
* __wt_lex_compare_skip --
* Lexicographic comparison routine, skipping leading bytes. Returns: < 0 if user_item is
* lexicographically < tree_item = 0 if user_item is lexicographically = tree_item > 0 if
diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h
index f2899eda401..61f1e9f6f1b 100644
--- a/src/third_party/wiredtiger/src/include/cache.h
+++ b/src/third_party/wiredtiger/src/include/cache.h
@@ -246,7 +246,7 @@ struct __wt_cache {
#define WT_WITH_PASS_LOCK(session, op) \
do { \
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_PASS)); \
+ WT_ASSERT(session, !FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_PASS)); \
WT_WITH_LOCK_WAIT(session, &cache->evict_pass_lock, WT_SESSION_LOCKED_PASS, op); \
} while (0)
diff --git a/src/third_party/wiredtiger/src/include/cache_inline.h b/src/third_party/wiredtiger/src/include/cache_inline.h
index 7bfa9dd70cd..866c19d6172 100644
--- a/src/third_party/wiredtiger/src/include/cache_inline.h
+++ b/src/third_party/wiredtiger/src/include/cache_inline.h
@@ -260,7 +260,8 @@ __wt_session_can_wait(WT_SESSION_IMPL *session)
* LSM sets the "ignore cache size" flag when holding the LSM tree lock, in that case, or when
* holding the schema lock, we don't want this thread to block for eviction.
*/
- return (!F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_LOCKED_SCHEMA));
+ return (!(F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE) ||
+ FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA)));
}
/*
@@ -498,9 +499,9 @@ __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool readonly, bo
* holding the handle list, schema or table locks (which can block checkpoints and eviction),
* don't block the thread for eviction.
*/
- if (F_ISSET(session,
- WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_LOCKED_SCHEMA |
- WT_SESSION_LOCKED_TABLE))
+ if (F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE) ||
+ FLD_ISSET(session->lock_flags,
+ WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_LOCKED_SCHEMA | WT_SESSION_LOCKED_TABLE))
return (0);
/* In memory configurations don't block when the cache is full. */
diff --git a/src/third_party/wiredtiger/src/include/config.h b/src/third_party/wiredtiger/src/include/config.h
index 9830801a01e..3b092857ed3 100644
--- a/src/third_party/wiredtiger/src/include/config.h
+++ b/src/third_party/wiredtiger/src/include/config.h
@@ -100,12 +100,14 @@ struct __wt_config_parser_impl {
#define WT_CONFIG_ENTRY_file_meta 46
#define WT_CONFIG_ENTRY_index_meta 47
#define WT_CONFIG_ENTRY_lsm_meta 48
-#define WT_CONFIG_ENTRY_table_meta 49
-#define WT_CONFIG_ENTRY_tiered_meta 50
-#define WT_CONFIG_ENTRY_wiredtiger_open 51
-#define WT_CONFIG_ENTRY_wiredtiger_open_all 52
-#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 53
-#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 54
+#define WT_CONFIG_ENTRY_object_meta 49
+#define WT_CONFIG_ENTRY_table_meta 50
+#define WT_CONFIG_ENTRY_tier_meta 51
+#define WT_CONFIG_ENTRY_tiered_meta 52
+#define WT_CONFIG_ENTRY_wiredtiger_open 53
+#define WT_CONFIG_ENTRY_wiredtiger_open_all 54
+#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 55
+#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 56
/*
* configuration section: END
* DO NOT EDIT: automatically built by dist/flags.py.
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index 61bbe022371..fc40f05e0d5 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -33,10 +33,11 @@ extern WT_PROCESS __wt_process;
/*
* WT_BUCKET_STORAGE --
- * A list entry for a storage source with a unique (name, bucket).
+ * A list entry for a storage source with a unique name (bucket, prefix).
*/
struct __wt_bucket_storage {
- const char *bucket; /* Bucket location */
+ const char *bucket; /* Bucket name */
+ const char *bucket_prefix; /* Bucket prefix */
int owned; /* Storage needs to be terminated */
uint64_t object_size; /* Tiered object size */
uint64_t retain_secs; /* Tiered period */
@@ -53,6 +54,15 @@ struct __wt_bucket_storage {
uint32_t flags;
};
+/* Call a function with the bucket storage and its associated file system. */
+#define WT_WITH_BUCKET_STORAGE(bsto, s, e) \
+ do { \
+ WT_BUCKET_STORAGE *__saved_bstorage = (s)->bucket_storage; \
+ (s)->bucket_storage = ((bsto) == NULL ? S2C(s)->bstorage : (bsto)); \
+ e; \
+ (s)->bucket_storage = __saved_bstorage; \
+ } while (0)
+
/*
* WT_KEYED_ENCRYPTOR --
* A list entry for an encryptor with a unique (name, keyid).
@@ -156,22 +166,22 @@ struct __wt_name_flag {
* Macros to ensure the dhandle is inserted or removed from both the main queue and the hashed
* queue.
*/
-#define WT_CONN_DHANDLE_INSERT(conn, dhandle, bucket) \
- do { \
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \
- TAILQ_INSERT_HEAD(&(conn)->dhqh, dhandle, q); \
- TAILQ_INSERT_HEAD(&(conn)->dhhash[bucket], dhandle, hashq); \
- ++(conn)->dh_bucket_count[bucket]; \
- ++(conn)->dhandle_count; \
+#define WT_CONN_DHANDLE_INSERT(conn, dhandle, bucket) \
+ do { \
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \
+ TAILQ_INSERT_HEAD(&(conn)->dhqh, dhandle, q); \
+ TAILQ_INSERT_HEAD(&(conn)->dhhash[bucket], dhandle, hashq); \
+ ++(conn)->dh_bucket_count[bucket]; \
+ ++(conn)->dhandle_count; \
} while (0)
-#define WT_CONN_DHANDLE_REMOVE(conn, dhandle, bucket) \
- do { \
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \
- TAILQ_REMOVE(&(conn)->dhqh, dhandle, q); \
- TAILQ_REMOVE(&(conn)->dhhash[bucket], dhandle, hashq); \
- --(conn)->dh_bucket_count[bucket]; \
- --(conn)->dhandle_count; \
+#define WT_CONN_DHANDLE_REMOVE(conn, dhandle, bucket) \
+ do { \
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \
+ TAILQ_REMOVE(&(conn)->dhqh, dhandle, q); \
+ TAILQ_REMOVE(&(conn)->dhhash[bucket], dhandle, hashq); \
+ --(conn)->dh_bucket_count[bucket]; \
+ --(conn)->dhandle_count; \
} while (0)
/*
@@ -378,7 +388,8 @@ struct __wt_connection_impl {
WT_LSM_MANAGER lsm_manager; /* LSM worker thread information */
- WT_BUCKET_STORAGE *bstorage; /* Bucket storage for the connection */
+ WT_BUCKET_STORAGE *bstorage; /* Bucket storage for the connection */
+ WT_BUCKET_STORAGE bstorage_none; /* Bucket storage for "none" */
WT_KEYED_ENCRYPTOR *kencryptor; /* Encryptor for metadata and log */
@@ -406,11 +417,13 @@ struct __wt_connection_impl {
wt_thread_t tiered_tid; /* Tiered thread */
bool tiered_tid_set; /* Tiered thread set */
WT_CONDVAR *tiered_cond; /* Tiered wait mutex */
+ bool tiered_server_running; /* Internal tiered server operating */
- const char *tiered_cluster; /* Tiered storage cluster name */
- const char *tiered_member; /* Tiered storage member name */
- WT_TIERED_MANAGER tiered_manager; /* Tiered worker thread information */
- bool tiered_server_running; /* Internal tiered server operating */
+ WT_TIERED_MANAGER tiered_mgr; /* Tiered manager thread information */
+ WT_SESSION_IMPL *tiered_mgr_session; /* Tiered manager thread session */
+ wt_thread_t tiered_mgr_tid; /* Tiered manager thread */
+ bool tiered_mgr_tid_set; /* Tiered manager thread set */
+ WT_CONDVAR *tiered_mgr_cond; /* Tiered manager wait mutex */
uint32_t tiered_threads_max; /* Max tiered threads */
uint32_t tiered_threads_min; /* Min tiered threads */
@@ -613,6 +626,7 @@ struct __wt_connection_impl {
#define WT_CONN_SERVER_STATISTICS 0x10u
#define WT_CONN_SERVER_SWEEP 0x20u
#define WT_CONN_SERVER_TIERED 0x40u
+#define WT_CONN_SERVER_TIERED_MGR 0x80u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint32_t server_flags;
diff --git a/src/third_party/wiredtiger/src/include/dhandle.h b/src/third_party/wiredtiger/src/include/dhandle.h
index 51357bd03c6..967d0b08be4 100644
--- a/src/third_party/wiredtiger/src/include/dhandle.h
+++ b/src/third_party/wiredtiger/src/include/dhandle.h
@@ -42,17 +42,17 @@
#define WT_DHANDLE_RELEASE(dhandle) (void)__wt_atomic_sub32(&(dhandle)->session_ref, 1)
-#define WT_DHANDLE_NEXT(session, dhandle, head, field) \
- do { \
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); \
- if ((dhandle) == NULL) \
- (dhandle) = TAILQ_FIRST(head); \
- else { \
- WT_DHANDLE_RELEASE(dhandle); \
- (dhandle) = TAILQ_NEXT(dhandle, field); \
- } \
- if ((dhandle) != NULL) \
- WT_DHANDLE_ACQUIRE(dhandle); \
+#define WT_DHANDLE_NEXT(session, dhandle, head, field) \
+ do { \
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST)); \
+ if ((dhandle) == NULL) \
+ (dhandle) = TAILQ_FIRST(head); \
+ else { \
+ WT_DHANDLE_RELEASE(dhandle); \
+ (dhandle) = TAILQ_NEXT(dhandle, field); \
+ } \
+ if ((dhandle) != NULL) \
+ WT_DHANDLE_ACQUIRE(dhandle); \
} while (0)
/*
@@ -84,7 +84,12 @@ struct __wt_data_handle {
WT_DATA_SOURCE *dsrc; /* Data source for this handle */
void *handle; /* Generic handle */
- enum { WT_DHANDLE_TYPE_BTREE, WT_DHANDLE_TYPE_TABLE, WT_DHANDLE_TYPE_TIERED } type;
+ enum {
+ WT_DHANDLE_TYPE_BTREE,
+ WT_DHANDLE_TYPE_TABLE,
+ WT_DHANDLE_TYPE_TIERED,
+ WT_DHANDLE_TYPE_TIERED_TREE
+ } type;
bool compact_skip; /* If the handle failed to compact */
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index ef14f73ccfc..fb5c8e361ba 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -175,8 +175,6 @@ extern int __wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mappe
extern int __wt_block_misplaced(WT_SESSION_IMPL *session, WT_BLOCK *block, const char *list,
wt_off_t offset, uint32_t size, bool live, const char *func, int line)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_newfile(WT_SESSION_IMPL *session, WT_BLOCK *block)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_off_free(WT_SESSION_IMPL *session, WT_BLOCK *block, uint32_t logid,
wt_off_t offset, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_off_remove_overlap(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el,
@@ -199,6 +197,12 @@ extern int __wt_block_salvage_valid(WT_SESSION_IMPL *session, WT_BLOCK *block, u
size_t addr_size, bool valid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_size_alloc(WT_SESSION_IMPL *session, WT_SIZE **szp)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_tiered_flush(WT_SESSION_IMPL *session, WT_BLOCK *block,
+ uint8_t **flush_cookie, size_t *cookie_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_tiered_load(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_tiered_newfile(WT_SESSION_IMPL *session, WT_BLOCK *block)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region,
@@ -262,10 +266,14 @@ extern int __wt_btcur_modify(WT_CURSOR_BTREE *cbt, WT_MODIFY *entries, int nentr
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btcur_next_prefix(WT_CURSOR_BTREE *cbt, WT_ITEM *prefix, bool truncating)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btcur_prev_prefix(WT_CURSOR_BTREE *cbt, WT_ITEM *prefix, bool truncating)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_remove(WT_CURSOR_BTREE *cbt, bool positioned)
@@ -518,8 +526,9 @@ extern int __wt_curmetadata_open(WT_SESSION_IMPL *session, const char *uri, WT_C
const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_cursor_cache(WT_CURSOR *cursor, WT_DATA_HANDLE *dhandle)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *to_dup,
- const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri, uint64_t hash_value,
+ WT_CURSOR *to_dup, const char *cfg[], WT_CURSOR **cursorp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_cursor_cache_release(WT_SESSION_IMPL *session, WT_CURSOR *cursor, bool *released)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_cursor_cached(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -1112,8 +1121,6 @@ extern int __wt_modify_pack(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries,
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_modify_reconstruct_from_upd_list(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
WT_UPDATE *upd, WT_UPDATE_VALUE *upd_value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_modify_vector_push(WT_MODIFY_VECTOR *modifies, WT_UPDATE *upd)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_msg(WT_SESSION_IMPL *session, const char *fmt, ...)
WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 2, 3)))
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -1132,8 +1139,8 @@ extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_FS_OPEN_FILE
extern int __wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name,
- bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+ bool open_metadata, uint32_t session_flags, uint32_t session_lock_flags,
+ WT_SESSION_IMPL **sessionp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler,
const char *config, bool open_metadata, WT_SESSION_IMPL **sessionp)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -1285,6 +1292,8 @@ extern int __wt_schema_get_table(WT_SESSION_IMPL *session, const char *name, siz
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_get_table_uri(WT_SESSION_IMPL *session, const char *uri, bool ok_incomplete,
uint32_t flags, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_get_tiered_uri(WT_SESSION_IMPL *session, const char *uri, uint32_t flags,
+ WT_TIERED **tieredp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_index_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname,
const char *config, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_internal_session(WT_SESSION_IMPL *session, WT_SESSION_IMPL **int_sessionp)
@@ -1310,6 +1319,8 @@ extern int __wt_schema_range_truncate(WT_SESSION_IMPL *session, WT_CURSOR *start
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_release_table(WT_SESSION_IMPL *session, WT_TABLE **tablep)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_release_tiered(WT_SESSION_IMPL *session, WT_TIERED **tieredp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_rename(WT_SESSION_IMPL *session, const char *uri, const char *newuri,
const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_session_release(WT_SESSION_IMPL *session, WT_SESSION_IMPL *int_session)
@@ -1442,19 +1453,28 @@ extern int __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *
extern int __wt_thread_group_resize(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group,
uint32_t new_min, uint32_t new_max, uint32_t flags)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_tiered_bucket_config(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval,
- WT_CONFIG_ITEM *bucket, WT_BUCKET_STORAGE **bstoragep)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_tiered_bucket_config(WT_SESSION_IMPL *session, const char *cfg[],
+ WT_BUCKET_STORAGE **bstoragep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_tiered_close(WT_SESSION_IMPL *session, WT_TIERED *tiered)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_tiered_common_config(WT_SESSION_IMPL *session, const char **cfg,
- WT_BUCKET_STORAGE *bstorage) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_tiered_conn_config(WT_SESSION_IMPL *session, const char **cfg, bool reconfig)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_tiered_name(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle, uint64_t id,
+ uint32_t flags, const char **retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_tiered_open(WT_SESSION_IMPL *session, const char *cfg[])
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_tiered_storage_create(WT_SESSION_IMPL *session, const char *cfg[], bool reconfig)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_tiered_storage_destroy(WT_SESSION_IMPL *session)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_tiered_switch(WT_SESSION_IMPL *session, const char *config)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_tiered_tree_close(WT_SESSION_IMPL *session, WT_TIERED_TREE *tiered_tree)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_tiered_tree_create(WT_SESSION_IMPL *session, const char *uri, bool exclusive,
+ bool import, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_tiered_tree_open(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_time_aggregate_validate(WT_SESSION_IMPL *session, WT_TIME_AGGREGATE *ta,
WT_TIME_AGGREGATE *parent, bool silent) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_time_value_validate(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw,
@@ -1506,7 +1526,7 @@ extern int __wt_txn_global_shutdown(WT_SESSION_IMPL *session, const char **cfg)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_init(WT_SESSION_IMPL *session, WT_SESSION_IMPL *session_ret)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_is_blocking(WT_SESSION_IMPL *session, bool conservative)
+extern int __wt_txn_is_blocking(WT_SESSION_IMPL *session)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_log_commit(WT_SESSION_IMPL *session, const char *cfg[])
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -1554,6 +1574,8 @@ extern int __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session, bool force
extern int __wt_unexpected_object_type(
WT_SESSION_IMPL *session, const char *uri, const char *expect) WT_GCC_FUNC_DECL_ATTRIBUTE((cold))
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_update_vector_push(WT_UPDATE_VECTOR *updates, WT_UPDATE *upd)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[])
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_value_return(WT_CURSOR_BTREE *cbt, WT_UPDATE_VALUE *upd_value)
@@ -1660,6 +1682,8 @@ extern void __wt_conn_foc_discard(WT_SESSION_IMPL *session);
extern void __wt_conn_stat_init(WT_SESSION_IMPL *session);
extern void __wt_connection_destroy(WT_CONNECTION_IMPL *conn);
extern void __wt_cursor_close(WT_CURSOR *cursor);
+extern void __wt_cursor_get_hash(
+ WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *to_dup, uint64_t *hash_value);
extern void __wt_cursor_key_order_reset(WT_CURSOR_BTREE *cbt);
extern void __wt_cursor_reopen(WT_CURSOR *cursor, WT_DATA_HANDLE *dhandle);
extern void __wt_cursor_set_key(WT_CURSOR *cursor, ...);
@@ -1732,11 +1756,6 @@ extern void __wt_meta_track_discard(WT_SESSION_IMPL *session);
extern void __wt_meta_track_sub_on(WT_SESSION_IMPL *session);
extern void __wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase)
WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
-extern void __wt_modify_vector_clear(WT_MODIFY_VECTOR *modifies);
-extern void __wt_modify_vector_free(WT_MODIFY_VECTOR *modifies);
-extern void __wt_modify_vector_init(WT_SESSION_IMPL *session, WT_MODIFY_VECTOR *modifies);
-extern void __wt_modify_vector_peek(WT_MODIFY_VECTOR *modifies, WT_UPDATE **updp);
-extern void __wt_modify_vector_pop(WT_MODIFY_VECTOR *modifies, WT_UPDATE **updp);
extern void __wt_optrack_flush_buffer(WT_SESSION_IMPL *s);
extern void __wt_optrack_record_funcid(
WT_SESSION_IMPL *session, const char *func, uint16_t *func_idp);
@@ -1802,6 +1821,11 @@ extern void __wt_txn_release_resources(WT_SESSION_IMPL *session);
extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session);
extern void __wt_txn_stats_update(WT_SESSION_IMPL *session);
extern void __wt_txn_truncate_end(WT_SESSION_IMPL *session);
+extern void __wt_update_vector_clear(WT_UPDATE_VECTOR *updates);
+extern void __wt_update_vector_free(WT_UPDATE_VECTOR *updates);
+extern void __wt_update_vector_init(WT_SESSION_IMPL *session, WT_UPDATE_VECTOR *updates);
+extern void __wt_update_vector_peek(WT_UPDATE_VECTOR *updates, WT_UPDATE **updp);
+extern void __wt_update_vector_pop(WT_UPDATE_VECTOR *updates, WT_UPDATE **updp);
extern void __wt_verbose_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t ts, const char *msg);
extern void __wt_verbose_worker(WT_SESSION_IMPL *session, const char *fmt, ...)
WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 2, 3))) WT_GCC_FUNC_DECL_ATTRIBUTE((cold));
@@ -1813,6 +1837,8 @@ static inline WT_CELL *__wt_cell_leaf_value_parse(WT_PAGE *page, WT_CELL *cell)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline WT_CURSOR_BTREE *__wt_curhs_get_cbt(WT_CURSOR *cursor)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline WT_FILE_SYSTEM *__wt_fs_file_system(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline WT_IKEY *__wt_ref_key_instantiated(WT_REF *ref)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline WT_VISIBLE_TYPE __wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd)
@@ -1986,7 +2012,7 @@ static inline int __wt_getline(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, WT_IT
static inline int __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp, size_t new_ins_size,
u_int skipdepth, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item)
+static inline int __wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item, bool prefix)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_lex_compare_short(const WT_ITEM *user_item, const WT_ITEM *tree_item)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -2011,6 +2037,8 @@ static inline int __wt_page_swap_func(
const char *func, int line
#endif
) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_prefix_match(const WT_ITEM *prefix, const WT_ITEM *tree_item)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_read(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len,
void *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r,
diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h
index f562e5da383..4f23f98b463 100644
--- a/src/third_party/wiredtiger/src/include/log.h
+++ b/src/third_party/wiredtiger/src/include/log.h
@@ -216,7 +216,7 @@ struct __wt_logslot {
#define WT_WITH_SLOT_LOCK(session, log, op) \
do { \
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SLOT)); \
+ WT_ASSERT(session, !FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SLOT)); \
WT_WITH_LOCK_WAIT(session, &(log)->log_slot_lock, WT_SESSION_LOCKED_SLOT, op); \
} while (0)
diff --git a/src/third_party/wiredtiger/src/include/meta.h b/src/third_party/wiredtiger/src/include/meta.h
index 01f0c6de92a..22b0de65308 100644
--- a/src/third_party/wiredtiger/src/include/meta.h
+++ b/src/third_party/wiredtiger/src/include/meta.h
@@ -70,7 +70,7 @@
*/
#define WT_WITH_TURTLE_LOCK(session, op) \
do { \
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_TURTLE)); \
+ WT_ASSERT(session, !FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TURTLE)); \
WT_WITH_LOCK_WAIT(session, &S2C(session)->turtle_lock, WT_SESSION_LOCKED_TURTLE, op); \
} while (0)
diff --git a/src/third_party/wiredtiger/src/include/os_fs_inline.h b/src/third_party/wiredtiger/src/include/os_fs_inline.h
index 56d0bc2a5f3..2276f096312 100644
--- a/src/third_party/wiredtiger/src/include/os_fs_inline.h
+++ b/src/third_party/wiredtiger/src/include/os_fs_inline.h
@@ -7,6 +7,16 @@
*/
/*
+ * __wt_fs_file_system --
+ * Get the active file system handle.
+ */
+static inline WT_FILE_SYSTEM *
+__wt_fs_file_system(WT_SESSION_IMPL *session)
+{
+ return (S2FS(session));
+}
+
+/*
* __wt_fs_directory_list --
* Return a list of files from a directory.
*/
@@ -27,7 +37,7 @@ __wt_fs_directory_list(
WT_RET(__wt_filename(session, dir, &path));
- file_system = S2C(session)->file_system;
+ file_system = __wt_fs_file_system(session);
wt_session = (WT_SESSION *)session;
ret = file_system->fs_directory_list(file_system, wt_session, path, prefix, dirlistp, countp);
@@ -56,7 +66,7 @@ __wt_fs_directory_list_single(
WT_RET(__wt_filename(session, dir, &path));
- file_system = S2C(session)->file_system;
+ file_system = __wt_fs_file_system(session);
wt_session = (WT_SESSION *)session;
ret = file_system->fs_directory_list_single(
file_system, wt_session, path, prefix, dirlistp, countp);
@@ -77,7 +87,7 @@ __wt_fs_directory_list_free(WT_SESSION_IMPL *session, char ***dirlistp, u_int co
WT_SESSION *wt_session;
if (*dirlistp != NULL) {
- file_system = S2C(session)->file_system;
+ file_system = __wt_fs_file_system(session);
wt_session = (WT_SESSION *)session;
ret = file_system->fs_directory_list_free(file_system, wt_session, *dirlistp, count);
}
@@ -102,7 +112,7 @@ __wt_fs_exist(WT_SESSION_IMPL *session, const char *name, bool *existp)
WT_RET(__wt_filename(session, name, &path));
- file_system = S2C(session)->file_system;
+ file_system = __wt_fs_file_system(session);
wt_session = (WT_SESSION *)session;
ret = file_system->fs_exist(file_system, wt_session, path, existp);
@@ -137,7 +147,7 @@ __wt_fs_remove(WT_SESSION_IMPL *session, const char *name, bool durable)
WT_RET(__wt_filename(session, name, &path));
- file_system = S2C(session)->file_system;
+ file_system = __wt_fs_file_system(session);
wt_session = (WT_SESSION *)session;
ret = file_system->fs_remove(file_system, wt_session, path, durable ? WT_FS_DURABLE : 0);
@@ -176,7 +186,7 @@ __wt_fs_rename(WT_SESSION_IMPL *session, const char *from, const char *to, bool
WT_ERR(__wt_filename(session, from, &from_path));
WT_ERR(__wt_filename(session, to, &to_path));
- file_system = S2C(session)->file_system;
+ file_system = __wt_fs_file_system(session);
wt_session = (WT_SESSION *)session;
ret = file_system->fs_rename(
file_system, wt_session, from_path, to_path, durable ? WT_FS_DURABLE : 0);
@@ -203,7 +213,7 @@ __wt_fs_size(WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep)
WT_RET(__wt_filename(session, name, &path));
- file_system = S2C(session)->file_system;
+ file_system = __wt_fs_file_system(session);
wt_session = (WT_SESSION *)session;
ret = file_system->fs_size(file_system, wt_session, path, sizep);
diff --git a/src/third_party/wiredtiger/src/include/schema.h b/src/third_party/wiredtiger/src/include/schema.h
index ebee4b3ca21..9d2487798b5 100644
--- a/src/third_party/wiredtiger/src/include/schema.h
+++ b/src/third_party/wiredtiger/src/include/schema.h
@@ -87,17 +87,17 @@ struct __wt_table {
* WT_WITH_LOCK_WAIT --
* Wait for a lock, perform an operation, drop the lock.
*/
-#define WT_WITH_LOCK_WAIT(session, lock, flag, op) \
- do { \
- if (F_ISSET(session, (flag))) { \
- op; \
- } else { \
- __wt_spin_lock_track(session, lock); \
- F_SET(session, (flag)); \
- op; \
- F_CLR(session, (flag)); \
- __wt_spin_unlock(session, lock); \
- } \
+#define WT_WITH_LOCK_WAIT(session, lock, flag, op) \
+ do { \
+ if (FLD_ISSET(session->lock_flags, (flag))) { \
+ op; \
+ } else { \
+ __wt_spin_lock_track(session, lock); \
+ FLD_SET(session->lock_flags, (flag)); \
+ op; \
+ FLD_CLR(session->lock_flags, (flag)); \
+ __wt_spin_unlock(session, lock); \
+ } \
} while (0)
/*
@@ -107,12 +107,12 @@ struct __wt_table {
#define WT_WITH_LOCK_NOWAIT(session, ret, lock, flag, op) \
do { \
(ret) = 0; \
- if (F_ISSET(session, (flag))) { \
+ if (FLD_ISSET(session->lock_flags, (flag))) { \
op; \
} else if (((ret) = __wt_spin_trylock_track(session, lock)) == 0) { \
- F_SET(session, (flag)); \
+ FLD_SET(session->lock_flags, (flag)); \
op; \
- F_CLR(session, (flag)); \
+ FLD_CLR(session->lock_flags, (flag)); \
__wt_spin_unlock(session, lock); \
} \
} while (0)
@@ -137,17 +137,17 @@ struct __wt_table {
* discard handles, and we only expect it to be held across short
* operations.
*/
-#define WT_WITH_HANDLE_LIST_READ_LOCK(session, op) \
- do { \
- if (F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) { \
- op; \
- } else { \
- __wt_readlock(session, &S2C(session)->dhandle_lock); \
- F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
- op; \
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
- __wt_readunlock(session, &S2C(session)->dhandle_lock); \
- } \
+#define WT_WITH_HANDLE_LIST_READ_LOCK(session, op) \
+ do { \
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST)) { \
+ op; \
+ } else { \
+ __wt_readlock(session, &S2C(session)->dhandle_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
+ op; \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
+ __wt_readunlock(session, &S2C(session)->dhandle_lock); \
+ } \
} while (0)
/*
@@ -156,18 +156,19 @@ struct __wt_table {
* operation, drop the lock. The handle list lock is a read-write lock so
* the implementation is different to the other lock macros.
*/
-#define WT_WITH_HANDLE_LIST_WRITE_LOCK(session, op) \
- do { \
- if (F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)) { \
- op; \
- } else { \
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ)); \
- __wt_writelock(session, &S2C(session)->dhandle_lock); \
- F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
- op; \
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
- __wt_writeunlock(session, &S2C(session)->dhandle_lock); \
- } \
+#define WT_WITH_HANDLE_LIST_WRITE_LOCK(session, op) \
+ do { \
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)) { \
+ op; \
+ } else { \
+ WT_ASSERT( \
+ session, !FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_READ)); \
+ __wt_writelock(session, &S2C(session)->dhandle_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
+ op; \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
+ __wt_writeunlock(session, &S2C(session)->dhandle_lock); \
+ } \
} while (0)
/*
@@ -186,8 +187,8 @@ struct __wt_table {
#define WT_WITH_SCHEMA_LOCK(session, op) \
do { \
WT_ASSERT(session, \
- F_ISSET(session, WT_SESSION_LOCKED_SCHEMA) || \
- !F_ISSET(session, \
+ FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA) || \
+ !FLD_ISSET(session->lock_flags, \
WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_NO_SCHEMA_LOCK | \
WT_SESSION_LOCKED_TABLE)); \
WT_WITH_LOCK_WAIT(session, &S2C(session)->schema_lock, WT_SESSION_LOCKED_SCHEMA, op); \
@@ -195,8 +196,8 @@ struct __wt_table {
#define WT_WITH_SCHEMA_LOCK_NOWAIT(session, ret, op) \
do { \
WT_ASSERT(session, \
- F_ISSET(session, WT_SESSION_LOCKED_SCHEMA) || \
- !F_ISSET(session, \
+ FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA) || \
+ !FLD_ISSET(session->lock_flags, \
WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_NO_SCHEMA_LOCK | \
WT_SESSION_LOCKED_TABLE)); \
WT_WITH_LOCK_NOWAIT( \
@@ -214,47 +215,49 @@ struct __wt_table {
* to discard handles, and we only expect it to be held across short
* operations.
*/
-#define WT_WITH_TABLE_READ_LOCK(session, op) \
- do { \
- if (F_ISSET(session, WT_SESSION_LOCKED_TABLE)) { \
- op; \
- } else { \
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); \
- __wt_readlock(session, &S2C(session)->table_lock); \
- F_SET(session, WT_SESSION_LOCKED_TABLE_READ); \
- op; \
- F_CLR(session, WT_SESSION_LOCKED_TABLE_READ); \
- __wt_readunlock(session, &S2C(session)->table_lock); \
- } \
- } while (0)
-
-#define WT_WITH_TABLE_WRITE_LOCK(session, op) \
+#define WT_WITH_TABLE_READ_LOCK(session, op) \
do { \
- if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)) { \
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE)) { \
op; \
} else { \
- WT_ASSERT(session, \
- !F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ | WT_SESSION_LOCKED_HANDLE_LIST)); \
- __wt_writelock(session, &S2C(session)->table_lock); \
- F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \
+ WT_ASSERT(session, !FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST)); \
+ __wt_readlock(session, &S2C(session)->table_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_TABLE_READ); \
op; \
- F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \
- __wt_writeunlock(session, &S2C(session)->table_lock); \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_TABLE_READ); \
+ __wt_readunlock(session, &S2C(session)->table_lock); \
} \
} while (0)
-#define WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret, op) \
- do { \
- WT_ASSERT(session, \
- F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE) || \
- !F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ | WT_SESSION_LOCKED_HANDLE_LIST)); \
- if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)) { \
- op; \
- } else if (((ret) = __wt_try_writelock(session, &S2C(session)->table_lock)) == 0) { \
- F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \
- op; \
- F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \
- __wt_writeunlock(session, &S2C(session)->table_lock); \
- } \
+
+#define WT_WITH_TABLE_WRITE_LOCK(session, op) \
+ do { \
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE)) { \
+ op; \
+ } else { \
+ WT_ASSERT(session, \
+ !FLD_ISSET(session->lock_flags, \
+ WT_SESSION_LOCKED_TABLE_READ | WT_SESSION_LOCKED_HANDLE_LIST)); \
+ __wt_writelock(session, &S2C(session)->table_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE); \
+ op; \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE); \
+ __wt_writeunlock(session, &S2C(session)->table_lock); \
+ } \
+ } while (0)
+#define WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret, op) \
+ do { \
+ WT_ASSERT(session, \
+ FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE) || \
+ !FLD_ISSET( \
+ session->lock_flags, WT_SESSION_LOCKED_TABLE_READ | WT_SESSION_LOCKED_HANDLE_LIST)); \
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE)) { \
+ op; \
+ } else if (((ret) = __wt_try_writelock(session, &S2C(session)->table_lock)) == 0) { \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE); \
+ op; \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE); \
+ __wt_writeunlock(session, &S2C(session)->table_lock); \
+ } \
} while (0)
/*
@@ -263,47 +266,47 @@ struct __wt_table {
* there is no hot backup in progress. The skipp parameter can be used to
* check whether the operation got skipped or not.
*/
-#define WT_WITH_HOTBACKUP_READ_LOCK(session, op, skipp) \
- do { \
- WT_CONNECTION_IMPL *__conn = S2C(session); \
- if ((skipp) != (bool *)NULL) \
- *(bool *)(skipp) = true; \
- if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP)) { \
- if (__conn->hot_backup_start == 0) { \
- if ((skipp) != (bool *)NULL) \
- *(bool *)(skipp) = false; \
- op; \
- } \
- } else { \
- __wt_readlock(session, &__conn->hot_backup_lock); \
- F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
- if (__conn->hot_backup_start == 0) { \
- if ((skipp) != (bool *)NULL) \
- *(bool *)(skipp) = false; \
- op; \
- } \
- F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
- __wt_readunlock(session, &__conn->hot_backup_lock); \
- } \
+#define WT_WITH_HOTBACKUP_READ_LOCK(session, op, skipp) \
+ do { \
+ WT_CONNECTION_IMPL *__conn = S2C(session); \
+ if ((skipp) != (bool *)NULL) \
+ *(bool *)(skipp) = true; \
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HOTBACKUP)) { \
+ if (__conn->hot_backup_start == 0) { \
+ if ((skipp) != (bool *)NULL) \
+ *(bool *)(skipp) = false; \
+ op; \
+ } \
+ } else { \
+ __wt_readlock(session, &__conn->hot_backup_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_HOTBACKUP_READ); \
+ if (__conn->hot_backup_start == 0) { \
+ if ((skipp) != (bool *)NULL) \
+ *(bool *)(skipp) = false; \
+ op; \
+ } \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_HOTBACKUP_READ); \
+ __wt_readunlock(session, &__conn->hot_backup_lock); \
+ } \
} while (0)
/*
* WT_WITH_HOTBACKUP_WRITE_LOCK --
* Acquire the hot backup write lock and perform an operation.
*/
-#define WT_WITH_HOTBACKUP_WRITE_LOCK(session, op) \
- do { \
- WT_CONNECTION_IMPL *__conn = S2C(session); \
- if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE)) { \
- op; \
- } else { \
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP_READ)); \
- __wt_writelock(session, &__conn->hot_backup_lock); \
- F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE); \
- op; \
- F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE); \
- __wt_writeunlock(session, &__conn->hot_backup_lock); \
- } \
+#define WT_WITH_HOTBACKUP_WRITE_LOCK(session, op) \
+ do { \
+ WT_CONNECTION_IMPL *__conn = S2C(session); \
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HOTBACKUP_WRITE)) { \
+ op; \
+ } else { \
+ WT_ASSERT(session, !FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HOTBACKUP_READ)); \
+ __wt_writelock(session, &__conn->hot_backup_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_HOTBACKUP_WRITE); \
+ op; \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_HOTBACKUP_WRITE); \
+ __wt_writeunlock(session, &__conn->hot_backup_lock); \
+ } \
} while (0)
/*
@@ -314,18 +317,18 @@ struct __wt_table {
* WT_WITH_HOTBACKUP_READ_LOCK which checks that there is no hot backup in
* progress.
*/
-#define WT_WITH_HOTBACKUP_READ_LOCK_UNCOND(session, op) \
- do { \
- WT_CONNECTION_IMPL *__conn = S2C(session); \
- if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP)) { \
- op; \
- } else { \
- __wt_readlock(session, &__conn->hot_backup_lock); \
- F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
- op; \
- F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
- __wt_readunlock(session, &__conn->hot_backup_lock); \
- } \
+#define WT_WITH_HOTBACKUP_READ_LOCK_UNCOND(session, op) \
+ do { \
+ WT_CONNECTION_IMPL *__conn = S2C(session); \
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HOTBACKUP)) { \
+ op; \
+ } else { \
+ __wt_readlock(session, &__conn->hot_backup_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_HOTBACKUP_READ); \
+ op; \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_HOTBACKUP_READ); \
+ __wt_readunlock(session, &__conn->hot_backup_lock); \
+ } \
} while (0)
/*
@@ -333,64 +336,66 @@ struct __wt_table {
* Drop the handle, table and/or schema locks, perform an operation,
* re-acquire the lock(s).
*/
-#define WT_WITHOUT_LOCKS(session, op) \
- do { \
- WT_CONNECTION_IMPL *__conn = S2C(session); \
- bool __checkpoint_locked = F_ISSET(session, WT_SESSION_LOCKED_CHECKPOINT); \
- bool __handle_read_locked = F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
- bool __handle_write_locked = F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
- bool __table_read_locked = F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ); \
- bool __table_write_locked = F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE); \
- bool __schema_locked = F_ISSET(session, WT_SESSION_LOCKED_SCHEMA); \
- if (__handle_read_locked) { \
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
- __wt_readunlock(session, &__conn->dhandle_lock); \
- } \
- if (__handle_write_locked) { \
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
- __wt_writeunlock(session, &__conn->dhandle_lock); \
- } \
- if (__table_read_locked) { \
- F_CLR(session, WT_SESSION_LOCKED_TABLE_READ); \
- __wt_readunlock(session, &__conn->table_lock); \
- } \
- if (__table_write_locked) { \
- F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \
- __wt_writeunlock(session, &__conn->table_lock); \
- } \
- if (__schema_locked) { \
- F_CLR(session, WT_SESSION_LOCKED_SCHEMA); \
- __wt_spin_unlock(session, &__conn->schema_lock); \
- } \
- if (__checkpoint_locked) { \
- F_CLR(session, WT_SESSION_LOCKED_CHECKPOINT); \
- __wt_spin_unlock(session, &__conn->checkpoint_lock); \
- } \
- __wt_yield(); \
- op; \
- __wt_yield(); \
- if (__checkpoint_locked) { \
- __wt_spin_lock(session, &__conn->checkpoint_lock); \
- F_SET(session, WT_SESSION_LOCKED_CHECKPOINT); \
- } \
- if (__schema_locked) { \
- __wt_spin_lock(session, &__conn->schema_lock); \
- F_SET(session, WT_SESSION_LOCKED_SCHEMA); \
- } \
- if (__table_read_locked) { \
- __wt_readlock(session, &__conn->table_lock); \
- F_SET(session, WT_SESSION_LOCKED_TABLE_READ); \
- } \
- if (__table_write_locked) { \
- __wt_writelock(session, &__conn->table_lock); \
- F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \
- } \
- if (__handle_read_locked) { \
- __wt_readlock(session, &__conn->dhandle_lock); \
- F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
- } \
- if (__handle_write_locked) { \
- __wt_writelock(session, &__conn->dhandle_lock); \
- F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
- } \
+#define WT_WITHOUT_LOCKS(session, op) \
+ do { \
+ WT_CONNECTION_IMPL *__conn = S2C(session); \
+ bool __checkpoint_locked = FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_CHECKPOINT); \
+ bool __handle_read_locked = \
+ FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
+ bool __handle_write_locked = \
+ FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
+ bool __table_read_locked = FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE_READ); \
+ bool __table_write_locked = FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE); \
+ bool __schema_locked = FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA); \
+ if (__handle_read_locked) { \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
+ __wt_readunlock(session, &__conn->dhandle_lock); \
+ } \
+ if (__handle_write_locked) { \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
+ __wt_writeunlock(session, &__conn->dhandle_lock); \
+ } \
+ if (__table_read_locked) { \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_TABLE_READ); \
+ __wt_readunlock(session, &__conn->table_lock); \
+ } \
+ if (__table_write_locked) { \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE); \
+ __wt_writeunlock(session, &__conn->table_lock); \
+ } \
+ if (__schema_locked) { \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_SCHEMA); \
+ __wt_spin_unlock(session, &__conn->schema_lock); \
+ } \
+ if (__checkpoint_locked) { \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_CHECKPOINT); \
+ __wt_spin_unlock(session, &__conn->checkpoint_lock); \
+ } \
+ __wt_yield(); \
+ op; \
+ __wt_yield(); \
+ if (__checkpoint_locked) { \
+ __wt_spin_lock(session, &__conn->checkpoint_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_CHECKPOINT); \
+ } \
+ if (__schema_locked) { \
+ __wt_spin_lock(session, &__conn->schema_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA); \
+ } \
+ if (__table_read_locked) { \
+ __wt_readlock(session, &__conn->table_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_TABLE_READ); \
+ } \
+ if (__table_write_locked) { \
+ __wt_writelock(session, &__conn->table_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE); \
+ } \
+ if (__handle_read_locked) { \
+ __wt_readlock(session, &__conn->dhandle_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
+ } \
+ if (__handle_write_locked) { \
+ __wt_writelock(session, &__conn->dhandle_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
+ } \
} while (0)
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index 9d783cede10..f7ec0464a29 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -37,6 +37,11 @@ struct __wt_hazard {
#define S2BT(session) ((WT_BTREE *)(session)->dhandle->handle)
#define S2BT_SAFE(session) ((session)->dhandle == NULL ? NULL : S2BT(session))
+/* Get the file system for a session */
+#define S2FS(session) \
+ ((session)->bucket_storage == NULL ? S2C(session)->file_system : \
+ (session)->bucket_storage->file_system)
+
typedef TAILQ_HEAD(__wt_cursor_list, __wt_cursor) WT_CURSOR_LIST;
/* Number of cursors cached to trigger cursor sweep. */
@@ -68,7 +73,8 @@ struct __wt_session_impl {
uint64_t operation_timeout_us; /* Maximum operation period before rollback */
u_int api_call_counter; /* Depth of api calls */
- WT_DATA_HANDLE *dhandle; /* Current data handle */
+ WT_DATA_HANDLE *dhandle; /* Current data handle */
+ WT_BUCKET_STORAGE *bucket_storage; /* Current bucket storage and file system */
/*
* Each session keeps a cache of data handles. The set of handles can grow quite large so we
@@ -166,38 +172,42 @@ struct __wt_session_impl {
#endif
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_SESSION_BACKUP_CURSOR 0x00000001u
-#define WT_SESSION_BACKUP_DUP 0x00000002u
-#define WT_SESSION_CACHE_CURSORS 0x00000004u
-#define WT_SESSION_CAN_WAIT 0x00000008u
-#define WT_SESSION_EVICTION 0x00000010u
-#define WT_SESSION_IGNORE_CACHE_SIZE 0x00000020u
-#define WT_SESSION_IMPORT 0x00000040u
-#define WT_SESSION_IMPORT_REPAIR 0x00000080u
-#define WT_SESSION_INSTANTIATE_PREPARE 0x00000100u
-#define WT_SESSION_INTERNAL 0x00000200u
-#define WT_SESSION_LOCKED_CHECKPOINT 0x00000400u
-#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x00000800u
-#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x00001000u
-#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x00002000u
-#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x00004000u
-#define WT_SESSION_LOCKED_METADATA 0x00008000u
-#define WT_SESSION_LOCKED_PASS 0x00010000u
-#define WT_SESSION_LOCKED_SCHEMA 0x00020000u
-#define WT_SESSION_LOCKED_SLOT 0x00040000u
-#define WT_SESSION_LOCKED_TABLE_READ 0x00080000u
-#define WT_SESSION_LOCKED_TABLE_WRITE 0x00100000u
-#define WT_SESSION_LOCKED_TURTLE 0x00200000u
-#define WT_SESSION_LOGGING_INMEM 0x00400000u
-#define WT_SESSION_NO_DATA_HANDLES 0x00800000u
-#define WT_SESSION_NO_LOGGING 0x01000000u
-#define WT_SESSION_NO_RECONCILE 0x02000000u
-#define WT_SESSION_NO_SCHEMA_LOCK 0x04000000u
-#define WT_SESSION_QUIET_CORRUPT_FILE 0x08000000u
-#define WT_SESSION_READ_WONT_NEED 0x10000000u
-#define WT_SESSION_RESOLVING_TXN 0x20000000u
-#define WT_SESSION_ROLLBACK_TO_STABLE 0x40000000u
-#define WT_SESSION_SCHEMA_TXN 0x80000000u
+#define WT_SESSION_LOCKED_CHECKPOINT 0x0001u
+#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x0002u
+#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x0004u
+#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x0008u
+#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x0010u
+#define WT_SESSION_LOCKED_METADATA 0x0020u
+#define WT_SESSION_LOCKED_PASS 0x0040u
+#define WT_SESSION_LOCKED_SCHEMA 0x0080u
+#define WT_SESSION_LOCKED_SLOT 0x0100u
+#define WT_SESSION_LOCKED_TABLE_READ 0x0200u
+#define WT_SESSION_LOCKED_TABLE_WRITE 0x0400u
+#define WT_SESSION_LOCKED_TURTLE 0x0800u
+#define WT_SESSION_NO_SCHEMA_LOCK 0x1000u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t lock_flags;
+
+/* AUTOMATIC FLAG VALUE GENERATION START */
+#define WT_SESSION_BACKUP_CURSOR 0x00001u
+#define WT_SESSION_BACKUP_DUP 0x00002u
+#define WT_SESSION_CACHE_CURSORS 0x00004u
+#define WT_SESSION_CAN_WAIT 0x00008u
+#define WT_SESSION_EVICTION 0x00010u
+#define WT_SESSION_IGNORE_CACHE_SIZE 0x00020u
+#define WT_SESSION_IMPORT 0x00040u
+#define WT_SESSION_IMPORT_REPAIR 0x00080u
+#define WT_SESSION_INSTANTIATE_PREPARE 0x00100u
+#define WT_SESSION_INTERNAL 0x00200u
+#define WT_SESSION_LOGGING_INMEM 0x00400u
+#define WT_SESSION_NO_DATA_HANDLES 0x00800u
+#define WT_SESSION_NO_LOGGING 0x01000u
+#define WT_SESSION_NO_RECONCILE 0x02000u
+#define WT_SESSION_QUIET_CORRUPT_FILE 0x04000u
+#define WT_SESSION_READ_WONT_NEED 0x08000u
+#define WT_SESSION_RESOLVING_TXN 0x10000u
+#define WT_SESSION_ROLLBACK_TO_STABLE 0x20000u
+#define WT_SESSION_SCHEMA_TXN 0x40000u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint32_t flags;
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index 7e9ff35c7c9..534d4a1cf40 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -379,7 +379,6 @@ struct __wt_connection_stats {
int64_t cache_eviction_force;
int64_t cache_eviction_force_fail;
int64_t cache_eviction_force_fail_time;
- int64_t cache_eviction_force_rollback;
int64_t cache_hazard_checks;
int64_t cache_hazard_walks;
int64_t cache_hazard_max;
@@ -635,6 +634,7 @@ struct __wt_connection_stats {
int64_t txn_rts_pages_visited;
int64_t txn_rts_tree_walk_skip_pages;
int64_t txn_rts_upd_aborted;
+ int64_t txn_sessions_walked;
int64_t txn_set_ts;
int64_t txn_set_ts_durable;
int64_t txn_set_ts_durable_upd;
@@ -644,6 +644,7 @@ struct __wt_connection_stats {
int64_t txn_set_ts_stable_upd;
int64_t txn_begin;
int64_t txn_checkpoint_running;
+ int64_t txn_checkpoint_running_hs;
int64_t txn_checkpoint_generation;
int64_t txn_hs_ckpt_duration;
int64_t txn_checkpoint_time_max;
@@ -687,6 +688,7 @@ struct __wt_connection_stats {
int64_t cache_bytes_read;
int64_t cache_bytes_write;
int64_t cache_eviction_checkpoint;
+ int64_t cache_eviction_blocked_checkpoint_hs;
int64_t cache_eviction_target_page_lt10;
int64_t cache_eviction_target_page_lt32;
int64_t cache_eviction_target_page_ge128;
@@ -705,8 +707,7 @@ struct __wt_connection_stats {
int64_t cache_hs_insert;
int64_t cache_hs_insert_restart;
int64_t cache_hs_order_lose_durable_timestamp;
- int64_t cache_hs_order_fixup_move;
- int64_t cache_hs_order_fixup_insert;
+ int64_t cache_hs_order_reinsert;
int64_t cache_hs_read;
int64_t cache_hs_read_miss;
int64_t cache_hs_read_squash;
@@ -714,7 +715,7 @@ struct __wt_connection_stats {
int64_t cache_hs_key_truncate_rts;
int64_t cache_hs_key_truncate;
int64_t cache_hs_key_truncate_onpage_removal;
- int64_t cache_hs_key_truncate_non_ts;
+ int64_t cache_hs_order_remove;
int64_t cache_hs_write_squash;
int64_t cache_inmem_splittable;
int64_t cache_inmem_split;
@@ -741,6 +742,7 @@ struct __wt_connection_stats {
int64_t cursor_next_skip_total;
int64_t cursor_prev_skip_total;
int64_t cursor_skip_hs_cur_position;
+ int64_t cursor_search_near_prefix_fast_paths;
int64_t cursor_next_hs_tombstone;
int64_t cursor_next_skip_ge_100;
int64_t cursor_next_skip_lt_100;
@@ -905,6 +907,7 @@ struct __wt_dsrc_stats {
int64_t cache_bytes_read;
int64_t cache_bytes_write;
int64_t cache_eviction_checkpoint;
+ int64_t cache_eviction_blocked_checkpoint_hs;
int64_t cache_eviction_target_page_lt10;
int64_t cache_eviction_target_page_lt32;
int64_t cache_eviction_target_page_ge128;
@@ -923,8 +926,7 @@ struct __wt_dsrc_stats {
int64_t cache_hs_insert;
int64_t cache_hs_insert_restart;
int64_t cache_hs_order_lose_durable_timestamp;
- int64_t cache_hs_order_fixup_move;
- int64_t cache_hs_order_fixup_insert;
+ int64_t cache_hs_order_reinsert;
int64_t cache_hs_read;
int64_t cache_hs_read_miss;
int64_t cache_hs_read_squash;
@@ -932,7 +934,7 @@ struct __wt_dsrc_stats {
int64_t cache_hs_key_truncate_rts;
int64_t cache_hs_key_truncate;
int64_t cache_hs_key_truncate_onpage_removal;
- int64_t cache_hs_key_truncate_non_ts;
+ int64_t cache_hs_order_remove;
int64_t cache_hs_write_squash;
int64_t cache_inmem_splittable;
int64_t cache_inmem_split;
@@ -959,6 +961,7 @@ struct __wt_dsrc_stats {
int64_t cursor_next_skip_total;
int64_t cursor_prev_skip_total;
int64_t cursor_skip_hs_cur_position;
+ int64_t cursor_search_near_prefix_fast_paths;
int64_t cursor_next_hs_tombstone;
int64_t cursor_next_skip_ge_100;
int64_t cursor_next_skip_lt_100;
diff --git a/src/third_party/wiredtiger/src/include/tiered.h b/src/third_party/wiredtiger/src/include/tiered.h
index 06a49c20a59..6cd9162e1c4 100644
--- a/src/third_party/wiredtiger/src/include/tiered.h
+++ b/src/third_party/wiredtiger/src/include/tiered.h
@@ -28,7 +28,7 @@ struct __wt_tiered_manager {
/*
* WT_CURSOR_TIERED --
- * An tiered cursor.
+ * A tiered cursor.
*/
struct __wt_cursor_tiered {
WT_CURSOR iface;
@@ -49,17 +49,111 @@ struct __wt_cursor_tiered {
};
/*
+ * Define the maximum number of tiers for convenience. We expect at most two initially. This can
+ * change if more are needed. It is easier to have the array statically allocated initially than
+ * worrying about the memory management. For now also assign types to slots. Local files in slot 0.
+ * Shared tier top level in slot 1.
+ */
+#define WT_TIERED_INDEX_INVALID (uint32_t) - 1
+#define WT_TIERED_INDEX_LOCAL 0
+#define WT_TIERED_INDEX_SHARED 1
+
+#define WT_TIERED_MAX_TIERS 4
+
+/* Object name types */
+/* AUTOMATIC FLAG VALUE GENERATION START */
+#define WT_TIERED_NAME_LOCAL 0x1u
+#define WT_TIERED_NAME_OBJECT 0x2u
+#define WT_TIERED_NAME_PREFIX 0x4u
+#define WT_TIERED_NAME_SHARED 0x8u
+/* AUTOMATIC FLAG VALUE GENERATION STOP */
+
+/*
+ * WT_TIERED_TIERS --
+ * Information we need to keep about each tier such as its data handle and name.
+ * We define operations that each tier can accept. The local tier should be able to accept
+ * reads and writes. The shared tier can do reads and flushes. Other ideas for future tiers
+ * may include a merge tier that is read only or an archival tier that is flush only.
+ */
+struct __wt_tiered_tiers {
+ WT_DATA_HANDLE *tier; /* Data handle for this tier */
+ const char *name; /* Tier's metadata name */
+/* AUTOMATIC FLAG VALUE GENERATION START */
+#define WT_TIERS_OP_FLUSH 0x1u
+#define WT_TIERS_OP_READ 0x2u
+#define WT_TIERS_OP_WRITE 0x4u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags; /* Flags including operations */
+};
+
+/*
* WT_TIERED --
- * Handle for a tiered data source.
+ * Handle for a tiered data source. This data structure is used as the basis for metadata
+ * as the top level definition of a tiered table. This structure tells us where to find the
+ * parts of the tree and in what order we should look at the tiers. Prior to the first call
+ * to flush_tier after the creation of this table the only tier that exists will be the local
+ * disk represented by a file: URI. Then a second (or more) set of tiers will be where the
+ * tiered data lives. The non-local tier will point to a tier: URI and that is described by a
+ * WT_TIERED_TREE data structure that will encapsulate what the current state of the
+ * individual objects is.
*/
struct __wt_tiered {
WT_DATA_HANDLE iface;
- const char *name, *config, *filename;
+ const char *obj_config; /* Config to use for each object */
const char *key_format, *value_format;
- WT_DATA_HANDLE **tiers;
- u_int ntiers;
+ WT_BUCKET_STORAGE *bstorage;
+
+ WT_TIERED_TIERS tiers[WT_TIERED_MAX_TIERS]; /* Tiers array */
+
+ uint64_t current_id; /* Current object id number */
+ uint64_t next_id; /* Next object number */
WT_COLLATOR *collator; /* TODO: handle custom collation */
+ /* TODO: What about compression, encryption, etc? Do we need to worry about that here? */
+
+/* AUTOMATIC FLAG VALUE GENERATION START */
+#define WT_TIERED_FLAG_UNUSED 0x1u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
+};
+
+/*
+ * WT_TIERED_OBJECT --
+ * Definition of a tiered object. This is a single object in a tiered tree.
+ * This is the lowest level data structure and item that makes
+ * up a tiered table. This structure contains the information needed to construct the name of
+ * this object and how to access it.
+ */
+struct __wt_tiered_object {
+ const char *uri; /* Data source for this object */
+ WT_TIERED_TREE *tree; /* Pointer to tree this object is part of */
+ uint64_t count; /* Approximate count of records */
+ uint64_t size; /* Final size of object */
+ uint64_t switch_txn; /* Largest txn that can write to this object */
+ uint64_t switch_ts; /* Timestamp for switching */
+ uint32_t id; /* This object's id */
+ uint32_t generation; /* Do we need this?? */
+ uint32_t refcnt; /* Number of references */
+
+/* AUTOMATIC FLAG VALUE GENERATION START */
+#define WT_TIERED_OBJ_LOCAL 0x1u /* Local resident also */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
+};
+
+/*
+ * WT_TIERED_TREE --
+ * Definition of the shared tiered portion of a tree.
+ */
+struct __wt_tiered_tree {
+ WT_DATA_HANDLE iface;
+ const char *name, *config;
+ const char *key_format, *value_format;
+
+/* AUTOMATIC FLAG VALUE GENERATION START */
+#define WT_TIERED_TREE_UNUSED 0x1u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
};
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index 23ed483a3fe..7dbc17b9063 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -133,8 +133,6 @@ struct __wt_txn_global {
bool oldest_is_pinned;
bool stable_is_pinned;
- WT_SPINLOCK id_lock;
-
/* Protects the active transaction states. */
WT_RWLOCK rwlock;
@@ -151,6 +149,7 @@ struct __wt_txn_global {
* once checkpoint has finished reading a table, it won't revisit it.
*/
volatile bool checkpoint_running; /* Checkpoint running */
+ volatile bool checkpoint_running_hs; /* Checkpoint running and processing history store file */
volatile uint32_t checkpoint_id; /* Checkpoint's session ID */
WT_TXN_SHARED checkpoint_txn_shared; /* Checkpoint's txn shared state */
wt_timestamp_t checkpoint_timestamp; /* Checkpoint's timestamp */
diff --git a/src/third_party/wiredtiger/src/include/txn_inline.h b/src/third_party/wiredtiger/src/include/txn_inline.h
index 6c89b2024bf..0deaf77a532 100644
--- a/src/third_party/wiredtiger/src/include/txn_inline.h
+++ b/src/third_party/wiredtiger/src/include/txn_inline.h
@@ -631,7 +631,14 @@ __wt_txn_tw_stop_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
static inline bool
__wt_txn_tw_start_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
{
- return ((WT_TIME_WINDOW_HAS_STOP(tw) || !tw->prepare) &&
+ /*
+ * Check the prepared flag if there is no stop time point or the start and stop time points are
+ * from the same transaction.
+ */
+ return (((WT_TIME_WINDOW_HAS_STOP(tw) &&
+ (tw->start_txn != tw->stop_txn || tw->start_ts != tw->stop_ts ||
+ tw->durable_start_ts != tw->durable_stop_ts)) ||
+ !tw->prepare) &&
__wt_txn_visible(session, tw->start_txn, tw->start_ts));
}
@@ -642,7 +649,14 @@ __wt_txn_tw_start_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
static inline bool
__wt_txn_tw_start_visible_all(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
{
- return ((WT_TIME_WINDOW_HAS_STOP(tw) || !tw->prepare) &&
+ /*
+ * Check the prepared flag if there is no stop time point or the start and stop time points are
+ * from the same transaction.
+ */
+ return (((WT_TIME_WINDOW_HAS_STOP(tw) &&
+ (tw->start_txn != tw->stop_txn || tw->start_ts != tw->stop_ts ||
+ tw->durable_start_ts != tw->durable_stop_ts)) ||
+ !tw->prepare) &&
__wt_txn_visible_all(session, tw->start_txn, tw->durable_start_ts));
}
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index fcceedbe660..a4a1b584b35 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -80,7 +80,6 @@ struct __wt_modify; typedef struct __wt_modify WT_MODIFY;
struct __wt_session; typedef struct __wt_session WT_SESSION;
#if !defined(DOXYGEN)
struct __wt_storage_source; typedef struct __wt_storage_source WT_STORAGE_SOURCE;
-struct __wt_location_handle; typedef struct __wt_location_handle WT_LOCATION_HANDLE;
#endif
#if defined(SWIGJAVA)
@@ -716,9 +715,9 @@ struct __wt_cursor {
#define WT_CURSTD_META_INUSE 0x0040000u
#define WT_CURSTD_OPEN 0x0080000u
#define WT_CURSTD_OVERWRITE 0x0100000u
-#define WT_CURSTD_RAW 0x0200000u
-#define WT_CURSTD_RAW_SEARCH 0x0400000u
-#define WT_CURSTD_UPDATE_LOCAL 0x0800000u
+#define WT_CURSTD_PREFIX_SEARCH 0x0200000u
+#define WT_CURSTD_RAW 0x0400000u
+#define WT_CURSTD_RAW_SEARCH 0x0800000u
#define WT_CURSTD_VALUE_EXT 0x1000000u /* Value points out of tree. */
#define WT_CURSTD_VALUE_INT 0x2000000u /* Value points into tree. */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
@@ -1254,31 +1253,23 @@ struct __wt_session {
* size\, that is\, when a Btree page is split\, it will be split into smaller pages\, where
* each page is the specified percentage of the maximum Btree page size., an integer between
* 50 and 100; default \c 90.}
- * @config{tiered = (, options only relevant for tiered data sources., a set of related
- * configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;chunk_size, the
- * maximum size of the hot chunk of tiered tree. This limit is soft - it is possible for
- * chunks to be temporarily larger than this value., an integer greater than or equal to 1M;
- * default \c 1GB.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;tiers, list of data sources to combine
- * into a tiered storage structure., a list of strings; default empty.}
- * @config{ ),,}
* @config{tiered_storage = (, configure a storage source for this table., a set of related
* configuration options defined below.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;auth_token,
* authentication string identifier., a string; default empty.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;bucket, The bucket indicating the location for this
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;bucket, the bucket indicating the location for this
* table., a string; default empty.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;local_retention, time
- * in seconds to retain data on tiered storage on the local tier for faster read access., an
- * integer between 0 and 10000; default \c 300.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;name,
- * Permitted values are \c "none" or custom storage source name created with
- * WT_CONNECTION::add_storage_source. See @ref custom_storage_sources for more
- * information., a string; default \c none.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;
- * object_target_size, the approximate size of objects before creating them on the tiered
- * storage tier., an integer between 100K and 10TB; default \c 10M.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;bucket_prefix, the
+ * unique bucket prefix for this table., a string; default empty.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;local_retention, time in seconds to retain data on tiered
+ * storage on the local tier for faster read access., an integer between 0 and 10000;
+ * default \c 300.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;name, permitted values are \c "none" or
+ * custom storage source name created with WT_CONNECTION::add_storage_source. See @ref
+ * custom_storage_sources for more information., a string; default \c none.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;object_target_size, the approximate size of objects
+ * before creating them on the tiered storage tier., an integer between 100K and 10TB;
+ * default \c 10M.}
* @config{ ),,}
* @config{type, set the type of data source used to store a column group\, index or simple
* table. By default\, a \c "file:" URI is derived from the object name. The \c type
@@ -2238,14 +2229,12 @@ struct __wt_connection {
* @config{tiered_storage = (, enable tiered storage. Enabling tiered storage may use one
* session from the configured session_max., a set of related configuration options defined
* below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;auth_token, authentication token string., a
- * string; default empty.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;local_retention, time in seconds
- * to retain data on tiered storage on the local tier for faster read access., an integer
- * between 0 and 10000; default \c 300.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;object_target_size,
- * the approximate size of objects before creating them on the tiered storage tier., an
- * integer between 100K and 10TB; default \c 10M.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;local_retention, time in seconds to retain data
+ * on tiered storage on the local tier for faster read access., an integer between 0 and
+ * 10000; default \c 300.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;object_target_size, the
+ * approximate size of objects before creating them on the tiered storage tier., an integer
+ * between 100K and 10TB; default \c 10M.}
* @config{ ),,}
* @config{verbose, enable messages for various events. Options are given as a list\, such
* as <code>"verbose=[evictserver\,read]"</code>., a list\, with values chosen from the
@@ -2255,8 +2244,8 @@ struct __wt_connection {
* "handleops"\, \c "log"\, \c "history_store"\, \c "history_store_activity"\, \c "lsm"\, \c
* "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c "read"\, \c "reconcile"\,
* \c "recovery"\, \c "recovery_progress"\, \c "rts"\, \c "salvage"\, \c "shared_cache"\, \c
- * "split"\, \c "temporary"\, \c "thread_group"\, \c "timestamp"\, \c "transaction"\, \c
- * "verify"\, \c "version"\, \c "write"; default \c [].}
+ * "split"\, \c "temporary"\, \c "thread_group"\, \c "tiered"\, \c "timestamp"\, \c
+ * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default \c [].}
* @configend
* @errors
*/
@@ -3009,7 +2998,8 @@ struct __wt_connection {
* "history_store"\, \c "history_store_activity"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c
* "mutex"\, \c "overflow"\, \c "read"\, \c "reconcile"\, \c "recovery"\, \c "recovery_progress"\,
* \c "rts"\, \c "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, \c "thread_group"\,
- * \c "timestamp"\, \c "transaction"\, \c "verify"\, \c "version"\, \c "write"; default \c [].}
+ * \c "tiered"\, \c "timestamp"\, \c "transaction"\, \c "verify"\, \c "version"\, \c "write";
+ * default \c [].}
* @config{verify_metadata, open connection and verify any WiredTiger metadata. This API allows
* verification and detection of corruption in WiredTiger metadata., a boolean flag; default \c
* false.}
@@ -4232,7 +4222,6 @@ struct __wt_extractor {
int (*terminate)(WT_EXTRACTOR *extractor, WT_SESSION *session);
};
-#if !defined(SWIG)
/*! WT_FILE_SYSTEM::open_file file types */
typedef enum {
WT_FS_OPEN_FILE_TYPE_CHECKPOINT,/*!< open a data file checkpoint */
@@ -4444,7 +4433,6 @@ struct __wt_file_system {
*/
int (*terminate)(WT_FILE_SYSTEM *file_system, WT_SESSION *session);
};
-#endif /* !defined(SWIG) */
/*! WT_FILE_HANDLE::fadvise flags: no longer need */
#define WT_FILE_HANDLE_DONTNEED 1
@@ -4735,28 +4723,6 @@ struct __wt_file_handle {
#if !defined(DOXYGEN)
/* This interface is not yet public. */
-/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_SS_OPEN_CREATE 0x1u
-#define WT_SS_OPEN_READONLY 0x2u
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
-
-/*!
- * A location handle, and its encoding is defined by each implementation
- * of the WT_STORAGE_SOURCE interface.
- */
-struct __wt_location_handle {
- /*!
- * Close a location handle, the handle will not be further accessed by
- * WiredTiger.
- *
- * @errors
- *
- * @param location_handle the WT_LOCATION_HANDLE
- * @param session the current WiredTiger session
- */
- int (*close)(WT_LOCATION_HANDLE *location_handle, WT_SESSION *session);
-};
-
/*!
* The interface implemented by applications to provide a storage source
* implementation. This documentation refers to "object" and "bucket"
@@ -4773,66 +4739,56 @@ struct __wt_location_handle {
*/
struct __wt_storage_source {
/*!
- * Return a location handle from a location string.
- * A location string may encode a bucket name, or the equivalent for this
- * storage source, authorization information for that bucket,
- * naming prefixes to be used for objects in that bucket, etc.
+ * Create a customized file system to access the storage source
+ * objects.
+ *
+ * The file system returned behaves as if objects in the specified buckets are
+ * files in the file system. In particular, the fs_open_file method requires
+ * its flags argument to include either WT_FS_OPEN_CREATE or WT_FS_OPEN_READONLY.
+ * Objects being created are not deemed to "exist" and be visible to
+ * WT_FILE_SYSTEM::fs_exist and other file system methods until the new handle has
+ * been closed. Objects once created are immutable. That is, only objects that
+ * do not already exist can be opened with the create flag, and objects that
+ * already exist can only be opened with the readonly flag. Only objects that
+ * exist can be transferred to the underlying shared object storage. This can
+ * happen at any time after an object is created, and can be forced to happen using
+ * WT_STORAGE_SOURCE::ss_flush.
+ *
+ * Additionally file handles returned by the file system behave as file handles to a
+ * local file. For example, WT_FILE_HANDLE::fh_sync synchronizes writes to the
+ * local file, and does not imply any transferring of data to the shared object store.
+ *
+ * The directory argument to the WT_FILE_SYSTEM::fs_directory_list method is normally
+ * the empty string as the cloud equivalent (bucket) has already been given when
+ * customizing the file system. If specified, the directory path is interpreted
+ * as another prefix, which is removed from the results.
+ *
+ * Names used by the file system methods are generally flat. However, in some
+ * implementations of a file system returned by a storage source, "..", ".", "/"
+ * may have a particular meaning, as in a POSIX file system. We suggest that
+ * these constructs be avoided when a caller chooses file names within the returned
+ * file system; they may be rejected by the implementation. Within a bucket name,
+ * these characters may or may not be acceptable. That is implementation dependent.
+ * In the prefix, "/" is specifically allowed, as this may have performance or
+ * administrative benefits. That said, within a prefix, certain combinations
+ * involving "/" may be rejected, for example "/../".
*
* @errors
*
* @param storage_source the WT_STORAGE_SOURCE
* @param session the current WiredTiger session
- * @param location the location string
- * @param[out] location_handle the allocated handle
+ * @param bucket_name the name of the bucket. Use of '/' is implementation dependent.
+ * @param prefix a prefix for each file. If used, the prefix will be added to the
+ * name of each object created or otherwise accessed in the bucket. Also, only
+ * objects with this prefix will be visible, and the prefix will be removed when
+ * listed. Prefixes may contain '/' as a separator.
+ * @param auth_token the authorization identifier.
+ * @param config additional configuration, currently must be NULL.
+ * @param[out] file_system the customized file system returned
*/
- int (*ss_location_handle)(WT_STORAGE_SOURCE *storage_source,
- WT_SESSION *session, const char *location, WT_LOCATION_HANDLE **location_handle);
-
- /*!
- * Return a list of object names for the given location.
- *
- * @errors
- *
- * @param storage_source the WT_STORAGE_SOURCE
- * @param session the current WiredTiger session
- * @param location_handle the location to list
- * @param prefix if not NULL, only files with names matching the prefix
- * are returned
- * @param limit if not 0, limits the number of objects listed to this number.
- * @param[out] object_list the method returns an allocated array of
- * individually allocated strings, one for each object in the location.
- * @param[out] countp the number of entries returned
- */
- int (*ss_location_list)(WT_STORAGE_SOURCE *storage_source,
- WT_SESSION *session, WT_LOCATION_HANDLE *location_handle, const char *prefix,
- uint32_t limit, char ***object_list, uint32_t *countp);
-
- /*!
- * Free memory allocated by WT_STORAGE_SOURCE::location_list.
- *
- * @errors
- *
- * @param storage_source the WT_STORAGE_SOURCE
- * @param session the current WiredTiger session
- * @param object_list array returned by WT_STORAGE_SOURCE::location_list
- * @param count count returned by WT_STORAGE_SOURCE::location_list
- */
- int (*ss_location_list_free)(WT_STORAGE_SOURCE *storage_source,
- WT_SESSION *session, char **object_list, uint32_t count);
-
- /*!
- * Return if the named object exists in the location.
- *
- * @errors
- *
- * @param storage_source the WT_STORAGE_SOURCE
- * @param session the current WiredTiger session
- * @param location_handle the location to search
- * @param name the name of the object
- * @param[out] existp If the named storage source object exists
- */
- int (*ss_exist)(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *name, bool *existp);
+ int (*ss_customize_file_system)(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
+ const char *bucket_name, const char *prefix, const char *auth_token, const char *config,
+ WT_FILE_SYSTEM **file_system);
/*!
* Flush any existing objects that match the location and name from
@@ -4844,85 +4800,13 @@ struct __wt_storage_source {
*
* @param storage_source the WT_STORAGE_SOURCE
* @param session the current WiredTiger session
- * @param location_handle the location to flush (or NULL for all)
+ * @param file_system if NULL, all objects are considered, otherwise only objects
+ * managed by the given file system.
* @param name the name of the object to flush (or NULL for all)
* @param config additional configuration, currently must be NULL
*/
int (*ss_flush)(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *name, const char *config);
-
- /*!
- * Open a handle for a named storage source object.
- *
- * Objects created are not deemed to "exist" and be visible to other APIs
- * like WT_STORAGE_SOURCE::ss_exist until the new handle has been closed.
- * Objects once created are immutable. That is, only objects that do not already
- * exist can be opened with the create flag, and objects that already exist can
- * only be opened with the readonly flag.
- *
- * Only objects that exist can be transferred to and made visible in the underlying
- * shared object store. However, they don't need to be transferred immediately when
- * the created handle is closed. Transfers can be forced with WT_STORAGE_SOURCE::ss_flush.
- *
- * File handles returned behave as file handles to a local file. For example,
- * WT_FILE_HANDLE::fh_sync synchronizes writes to the local file, and does not
- * imply any transferring of data to the shared object store.
- *
- * The method should return ENOENT if the object is not being created and
- * does not exist.
- *
- * The method should return EACCES if the object cannot be opened given
- * permissions by the location.
- *
- * @errors
- *
- * @param storage_source the WT_STORAGE_SOURCE
- * @param session the current WiredTiger session
- * @param location_handle the location where the object will be stored.
- * @param name the name of the object within the location.
- * @param flags flags indicating how to open the object, exactly one of
- * ::WT_SS_OPEN_CREATE, ::WT_SS_OPEN_READONLY.
- * @param[out] file_handlep the handle to the newly opened object. Storage
- * source implementations must allocate memory for the handle and
- * the WT_FILE_HANDLE::name field, and fill in the WT_FILE_HANDLE::
- * fields. Applications wanting to associate private information
- * with the WT_FILE_HANDLE:: structure should declare and allocate
- * their own structure as a superset of a WT_FILE_HANDLE:: structure.
- */
- int (*ss_open_object)(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *name, uint32_t flags,
- WT_FILE_HANDLE **file_handlep);
-
- /*!
- * Remove a named storage source object
- *
- * This method is not required if storage source is configured readonly
- * and should be set to NULL when not required by the storage source implementation.
- *
- * @errors
- *
- * @param storage_source the WT_STORAGE_SOURCE
- * @param session the current WiredTiger session
- * @param location_handle the location containing the object
- * @param name the name of the storage source object
- * @param flags must be 0
- */
- int (*ss_remove)(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *name, uint32_t flags);
-
- /*!
- * Return the size of a named storage source object
- *
- * @errors
- *
- * @param storage_source the WT_STORAGE_SOURCE
- * @param session the current WiredTiger session
- * @param location_handle the location containing the object
- * @param name the name of the storage source object
- * @param[out] sizep the size of the storage source object
- */
- int (*ss_size)(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *name, wt_off_t *sizep);
+ WT_FILE_SYSTEM *file_system, const char *name, const char *config);
/*!
* A callback performed when the storage source is closed and will no
@@ -5193,572 +5077,569 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1064
/*! cache: forced eviction - pages selected unable to be evicted time */
#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL_TIME 1065
-/*!
- * cache: forced eviction - session returned rollback error while force
- * evicting due to being oldest
- */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_ROLLBACK 1066
/*! cache: hazard pointer check calls */
-#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1067
+#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1066
/*! cache: hazard pointer check entries walked */
-#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1068
+#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1067
/*! cache: hazard pointer maximum array length */
-#define WT_STAT_CONN_CACHE_HAZARD_MAX 1069
+#define WT_STAT_CONN_CACHE_HAZARD_MAX 1068
/*! cache: history store score */
-#define WT_STAT_CONN_CACHE_HS_SCORE 1070
+#define WT_STAT_CONN_CACHE_HS_SCORE 1069
/*! cache: history store table max on-disk size */
-#define WT_STAT_CONN_CACHE_HS_ONDISK_MAX 1071
+#define WT_STAT_CONN_CACHE_HS_ONDISK_MAX 1070
/*! cache: history store table on-disk size */
-#define WT_STAT_CONN_CACHE_HS_ONDISK 1072
+#define WT_STAT_CONN_CACHE_HS_ONDISK 1071
/*! cache: internal pages queued for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL_PAGES_QUEUED 1073
+#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL_PAGES_QUEUED 1072
/*! cache: internal pages seen by eviction walk */
-#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL_PAGES_SEEN 1074
+#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL_PAGES_SEEN 1073
/*! cache: internal pages seen by eviction walk that are already queued */
-#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL_PAGES_ALREADY_QUEUED 1075
+#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL_PAGES_ALREADY_QUEUED 1074
/*! cache: maximum bytes configured */
-#define WT_STAT_CONN_CACHE_BYTES_MAX 1076
+#define WT_STAT_CONN_CACHE_BYTES_MAX 1075
/*! cache: maximum page size at eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1077
+#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1076
/*! cache: modified pages evicted by application threads */
-#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1078
+#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1077
/*! cache: operations timed out waiting for space in cache */
-#define WT_STAT_CONN_CACHE_TIMED_OUT_OPS 1079
+#define WT_STAT_CONN_CACHE_TIMED_OUT_OPS 1078
/*! cache: pages currently held in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_INUSE 1080
+#define WT_STAT_CONN_CACHE_PAGES_INUSE 1079
/*! cache: pages evicted by application threads */
-#define WT_STAT_CONN_CACHE_EVICTION_APP 1081
+#define WT_STAT_CONN_CACHE_EVICTION_APP 1080
/*! cache: pages evicted in parallel with checkpoint */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_IN_PARALLEL_WITH_CHECKPOINT 1082
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_IN_PARALLEL_WITH_CHECKPOINT 1081
/*! cache: pages queued for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1083
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1082
/*! cache: pages queued for eviction post lru sorting */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_POST_LRU 1084
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_POST_LRU 1083
/*! cache: pages queued for urgent eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1085
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1084
/*! cache: pages queued for urgent eviction during walk */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1086
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1085
/*!
* cache: pages queued for urgent eviction from history store due to high
* dirty content
*/
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT_HS_DIRTY 1087
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT_HS_DIRTY 1086
/*! cache: pages seen by eviction walk that are already queued */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_ALREADY_QUEUED 1088
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_ALREADY_QUEUED 1087
/*! cache: pages selected for eviction unable to be evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1089
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1088
/*!
* cache: pages selected for eviction unable to be evicted as the parent
* page has overflow items
*/
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL_PARENT_HAS_OVERFLOW_ITEMS 1090
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL_PARENT_HAS_OVERFLOW_ITEMS 1089
/*!
* cache: pages selected for eviction unable to be evicted because of
* active children on an internal page
*/
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL_ACTIVE_CHILDREN_ON_AN_INTERNAL_PAGE 1091
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL_ACTIVE_CHILDREN_ON_AN_INTERNAL_PAGE 1090
/*!
* cache: pages selected for eviction unable to be evicted because of
* failure in reconciliation
*/
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL_IN_RECONCILIATION 1092
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL_IN_RECONCILIATION 1091
/*! cache: pages walked for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK 1093
+#define WT_STAT_CONN_CACHE_EVICTION_WALK 1092
/*! cache: percentage overhead */
-#define WT_STAT_CONN_CACHE_OVERHEAD 1094
+#define WT_STAT_CONN_CACHE_OVERHEAD 1093
/*! cache: tracked bytes belonging to internal pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1095
+#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1094
/*! cache: tracked bytes belonging to leaf pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_LEAF 1096
+#define WT_STAT_CONN_CACHE_BYTES_LEAF 1095
/*! cache: tracked dirty pages in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1097
+#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1096
/*! capacity: background fsync file handles considered */
-#define WT_STAT_CONN_FSYNC_ALL_FH_TOTAL 1098
+#define WT_STAT_CONN_FSYNC_ALL_FH_TOTAL 1097
/*! capacity: background fsync file handles synced */
-#define WT_STAT_CONN_FSYNC_ALL_FH 1099
+#define WT_STAT_CONN_FSYNC_ALL_FH 1098
/*! capacity: background fsync time (msecs) */
-#define WT_STAT_CONN_FSYNC_ALL_TIME 1100
+#define WT_STAT_CONN_FSYNC_ALL_TIME 1099
/*! capacity: bytes read */
-#define WT_STAT_CONN_CAPACITY_BYTES_READ 1101
+#define WT_STAT_CONN_CAPACITY_BYTES_READ 1100
/*! capacity: bytes written for checkpoint */
-#define WT_STAT_CONN_CAPACITY_BYTES_CKPT 1102
+#define WT_STAT_CONN_CAPACITY_BYTES_CKPT 1101
/*! capacity: bytes written for eviction */
-#define WT_STAT_CONN_CAPACITY_BYTES_EVICT 1103
+#define WT_STAT_CONN_CAPACITY_BYTES_EVICT 1102
/*! capacity: bytes written for log */
-#define WT_STAT_CONN_CAPACITY_BYTES_LOG 1104
+#define WT_STAT_CONN_CAPACITY_BYTES_LOG 1103
/*! capacity: bytes written total */
-#define WT_STAT_CONN_CAPACITY_BYTES_WRITTEN 1105
+#define WT_STAT_CONN_CAPACITY_BYTES_WRITTEN 1104
/*! capacity: threshold to call fsync */
-#define WT_STAT_CONN_CAPACITY_THRESHOLD 1106
+#define WT_STAT_CONN_CAPACITY_THRESHOLD 1105
/*! capacity: time waiting due to total capacity (usecs) */
-#define WT_STAT_CONN_CAPACITY_TIME_TOTAL 1107
+#define WT_STAT_CONN_CAPACITY_TIME_TOTAL 1106
/*! capacity: time waiting during checkpoint (usecs) */
-#define WT_STAT_CONN_CAPACITY_TIME_CKPT 1108
+#define WT_STAT_CONN_CAPACITY_TIME_CKPT 1107
/*! capacity: time waiting during eviction (usecs) */
-#define WT_STAT_CONN_CAPACITY_TIME_EVICT 1109
+#define WT_STAT_CONN_CAPACITY_TIME_EVICT 1108
/*! capacity: time waiting during logging (usecs) */
-#define WT_STAT_CONN_CAPACITY_TIME_LOG 1110
+#define WT_STAT_CONN_CAPACITY_TIME_LOG 1109
/*! capacity: time waiting during read (usecs) */
-#define WT_STAT_CONN_CAPACITY_TIME_READ 1111
+#define WT_STAT_CONN_CAPACITY_TIME_READ 1110
/*! connection: auto adjusting condition resets */
-#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1112
+#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1111
/*! connection: auto adjusting condition wait calls */
-#define WT_STAT_CONN_COND_AUTO_WAIT 1113
+#define WT_STAT_CONN_COND_AUTO_WAIT 1112
/*!
* connection: auto adjusting condition wait raced to update timeout and
* skipped updating
*/
-#define WT_STAT_CONN_COND_AUTO_WAIT_SKIPPED 1114
+#define WT_STAT_CONN_COND_AUTO_WAIT_SKIPPED 1113
/*! connection: detected system time went backwards */
-#define WT_STAT_CONN_TIME_TRAVEL 1115
+#define WT_STAT_CONN_TIME_TRAVEL 1114
/*! connection: files currently open */
-#define WT_STAT_CONN_FILE_OPEN 1116
+#define WT_STAT_CONN_FILE_OPEN 1115
/*! connection: hash bucket array size for data handles */
-#define WT_STAT_CONN_BUCKETS_DH 1117
+#define WT_STAT_CONN_BUCKETS_DH 1116
/*! connection: hash bucket array size general */
-#define WT_STAT_CONN_BUCKETS 1118
+#define WT_STAT_CONN_BUCKETS 1117
/*! connection: memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 1119
+#define WT_STAT_CONN_MEMORY_ALLOCATION 1118
/*! connection: memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 1120
+#define WT_STAT_CONN_MEMORY_FREE 1119
/*! connection: memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 1121
+#define WT_STAT_CONN_MEMORY_GROW 1120
/*! connection: pthread mutex condition wait calls */
-#define WT_STAT_CONN_COND_WAIT 1122
+#define WT_STAT_CONN_COND_WAIT 1121
/*! connection: pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 1123
+#define WT_STAT_CONN_RWLOCK_READ 1122
/*! connection: pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 1124
+#define WT_STAT_CONN_RWLOCK_WRITE 1123
/*! connection: total fsync I/Os */
-#define WT_STAT_CONN_FSYNC_IO 1125
+#define WT_STAT_CONN_FSYNC_IO 1124
/*! connection: total read I/Os */
-#define WT_STAT_CONN_READ_IO 1126
+#define WT_STAT_CONN_READ_IO 1125
/*! connection: total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1127
+#define WT_STAT_CONN_WRITE_IO 1126
/*! cursor: cached cursor count */
-#define WT_STAT_CONN_CURSOR_CACHED_COUNT 1128
+#define WT_STAT_CONN_CURSOR_CACHED_COUNT 1127
/*! cursor: cursor bulk loaded cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT_BULK 1129
+#define WT_STAT_CONN_CURSOR_INSERT_BULK 1128
/*! cursor: cursor close calls that result in cache */
-#define WT_STAT_CONN_CURSOR_CACHE 1130
+#define WT_STAT_CONN_CURSOR_CACHE 1129
/*! cursor: cursor create calls */
-#define WT_STAT_CONN_CURSOR_CREATE 1131
+#define WT_STAT_CONN_CURSOR_CREATE 1130
/*! cursor: cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT 1132
+#define WT_STAT_CONN_CURSOR_INSERT 1131
/*! cursor: cursor insert key and value bytes */
-#define WT_STAT_CONN_CURSOR_INSERT_BYTES 1133
+#define WT_STAT_CONN_CURSOR_INSERT_BYTES 1132
/*! cursor: cursor modify calls */
-#define WT_STAT_CONN_CURSOR_MODIFY 1134
+#define WT_STAT_CONN_CURSOR_MODIFY 1133
/*! cursor: cursor modify key and value bytes affected */
-#define WT_STAT_CONN_CURSOR_MODIFY_BYTES 1135
+#define WT_STAT_CONN_CURSOR_MODIFY_BYTES 1134
/*! cursor: cursor modify value bytes modified */
-#define WT_STAT_CONN_CURSOR_MODIFY_BYTES_TOUCH 1136
+#define WT_STAT_CONN_CURSOR_MODIFY_BYTES_TOUCH 1135
/*! cursor: cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT 1137
+#define WT_STAT_CONN_CURSOR_NEXT 1136
/*! cursor: cursor operation restarted */
-#define WT_STAT_CONN_CURSOR_RESTART 1138
+#define WT_STAT_CONN_CURSOR_RESTART 1137
/*! cursor: cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV 1139
+#define WT_STAT_CONN_CURSOR_PREV 1138
/*! cursor: cursor remove calls */
-#define WT_STAT_CONN_CURSOR_REMOVE 1140
+#define WT_STAT_CONN_CURSOR_REMOVE 1139
/*! cursor: cursor remove key bytes removed */
-#define WT_STAT_CONN_CURSOR_REMOVE_BYTES 1141
+#define WT_STAT_CONN_CURSOR_REMOVE_BYTES 1140
/*! cursor: cursor reserve calls */
-#define WT_STAT_CONN_CURSOR_RESERVE 1142
+#define WT_STAT_CONN_CURSOR_RESERVE 1141
/*! cursor: cursor reset calls */
-#define WT_STAT_CONN_CURSOR_RESET 1143
+#define WT_STAT_CONN_CURSOR_RESET 1142
/*! cursor: cursor search calls */
-#define WT_STAT_CONN_CURSOR_SEARCH 1144
+#define WT_STAT_CONN_CURSOR_SEARCH 1143
/*! cursor: cursor search history store calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_HS 1145
+#define WT_STAT_CONN_CURSOR_SEARCH_HS 1144
/*! cursor: cursor search near calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1146
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1145
/*! cursor: cursor sweep buckets */
-#define WT_STAT_CONN_CURSOR_SWEEP_BUCKETS 1147
+#define WT_STAT_CONN_CURSOR_SWEEP_BUCKETS 1146
/*! cursor: cursor sweep cursors closed */
-#define WT_STAT_CONN_CURSOR_SWEEP_CLOSED 1148
+#define WT_STAT_CONN_CURSOR_SWEEP_CLOSED 1147
/*! cursor: cursor sweep cursors examined */
-#define WT_STAT_CONN_CURSOR_SWEEP_EXAMINED 1149
+#define WT_STAT_CONN_CURSOR_SWEEP_EXAMINED 1148
/*! cursor: cursor sweeps */
-#define WT_STAT_CONN_CURSOR_SWEEP 1150
+#define WT_STAT_CONN_CURSOR_SWEEP 1149
/*! cursor: cursor truncate calls */
-#define WT_STAT_CONN_CURSOR_TRUNCATE 1151
+#define WT_STAT_CONN_CURSOR_TRUNCATE 1150
/*! cursor: cursor update calls */
-#define WT_STAT_CONN_CURSOR_UPDATE 1152
+#define WT_STAT_CONN_CURSOR_UPDATE 1151
/*! cursor: cursor update key and value bytes */
-#define WT_STAT_CONN_CURSOR_UPDATE_BYTES 1153
+#define WT_STAT_CONN_CURSOR_UPDATE_BYTES 1152
/*! cursor: cursor update value size change */
-#define WT_STAT_CONN_CURSOR_UPDATE_BYTES_CHANGED 1154
+#define WT_STAT_CONN_CURSOR_UPDATE_BYTES_CHANGED 1153
/*! cursor: cursors reused from cache */
-#define WT_STAT_CONN_CURSOR_REOPEN 1155
+#define WT_STAT_CONN_CURSOR_REOPEN 1154
/*! data-handle: connection data handle size */
-#define WT_STAT_CONN_DH_CONN_HANDLE_SIZE 1156
+#define WT_STAT_CONN_DH_CONN_HANDLE_SIZE 1155
/*! data-handle: connection data handles currently active */
-#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1157
+#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1156
/*! data-handle: connection sweep candidate became referenced */
-#define WT_STAT_CONN_DH_SWEEP_REF 1158
+#define WT_STAT_CONN_DH_SWEEP_REF 1157
/*! data-handle: connection sweep dhandles closed */
-#define WT_STAT_CONN_DH_SWEEP_CLOSE 1159
+#define WT_STAT_CONN_DH_SWEEP_CLOSE 1158
/*! data-handle: connection sweep dhandles removed from hash list */
-#define WT_STAT_CONN_DH_SWEEP_REMOVE 1160
+#define WT_STAT_CONN_DH_SWEEP_REMOVE 1159
/*! data-handle: connection sweep time-of-death sets */
-#define WT_STAT_CONN_DH_SWEEP_TOD 1161
+#define WT_STAT_CONN_DH_SWEEP_TOD 1160
/*! data-handle: connection sweeps */
-#define WT_STAT_CONN_DH_SWEEPS 1162
+#define WT_STAT_CONN_DH_SWEEPS 1161
/*!
* data-handle: connection sweeps skipped due to checkpoint gathering
* handles
*/
-#define WT_STAT_CONN_DH_SWEEP_SKIP_CKPT 1163
+#define WT_STAT_CONN_DH_SWEEP_SKIP_CKPT 1162
/*! data-handle: session dhandles swept */
-#define WT_STAT_CONN_DH_SESSION_HANDLES 1164
+#define WT_STAT_CONN_DH_SESSION_HANDLES 1163
/*! data-handle: session sweep attempts */
-#define WT_STAT_CONN_DH_SESSION_SWEEPS 1165
+#define WT_STAT_CONN_DH_SESSION_SWEEPS 1164
/*! lock: checkpoint lock acquisitions */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1166
+#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1165
/*! lock: checkpoint lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1167
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1166
/*! lock: checkpoint lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1168
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1167
/*! lock: dhandle lock application thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1169
+#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1168
/*! lock: dhandle lock internal thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1170
+#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1169
/*! lock: dhandle read lock acquisitions */
-#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1171
+#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1170
/*! lock: dhandle write lock acquisitions */
-#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1172
+#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1171
/*!
* lock: durable timestamp queue lock application thread time waiting
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WAIT_APPLICATION 1173
+#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WAIT_APPLICATION 1172
/*!
* lock: durable timestamp queue lock internal thread time waiting
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WAIT_INTERNAL 1174
+#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WAIT_INTERNAL 1173
/*! lock: durable timestamp queue read lock acquisitions */
-#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_READ_COUNT 1175
+#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_READ_COUNT 1174
/*! lock: durable timestamp queue write lock acquisitions */
-#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WRITE_COUNT 1176
+#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WRITE_COUNT 1175
/*! lock: metadata lock acquisitions */
-#define WT_STAT_CONN_LOCK_METADATA_COUNT 1177
+#define WT_STAT_CONN_LOCK_METADATA_COUNT 1176
/*! lock: metadata lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1178
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1177
/*! lock: metadata lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1179
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1178
/*!
* lock: read timestamp queue lock application thread time waiting
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1180
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1179
/*! lock: read timestamp queue lock internal thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1181
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1180
/*! lock: read timestamp queue read lock acquisitions */
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1182
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1181
/*! lock: read timestamp queue write lock acquisitions */
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1183
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1182
/*! lock: schema lock acquisitions */
-#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1184
+#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1183
/*! lock: schema lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1185
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1184
/*! lock: schema lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1186
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1185
/*!
* lock: table lock application thread time waiting for the table lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1187
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1186
/*!
* lock: table lock internal thread time waiting for the table lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1188
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1187
/*! lock: table read lock acquisitions */
-#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1189
+#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1188
/*! lock: table write lock acquisitions */
-#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1190
+#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1189
/*! lock: txn global lock application thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1191
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1190
/*! lock: txn global lock internal thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1192
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1191
/*! lock: txn global read lock acquisitions */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1193
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1192
/*! lock: txn global write lock acquisitions */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1194
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1193
/*! log: busy returns attempting to switch slots */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1195
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1194
/*! log: force archive time sleeping (usecs) */
-#define WT_STAT_CONN_LOG_FORCE_ARCHIVE_SLEEP 1196
+#define WT_STAT_CONN_LOG_FORCE_ARCHIVE_SLEEP 1195
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1197
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1196
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1198
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1197
/*! log: log files manually zero-filled */
-#define WT_STAT_CONN_LOG_ZERO_FILLS 1199
+#define WT_STAT_CONN_LOG_ZERO_FILLS 1198
/*! log: log flush operations */
-#define WT_STAT_CONN_LOG_FLUSH 1200
+#define WT_STAT_CONN_LOG_FLUSH 1199
/*! log: log force write operations */
-#define WT_STAT_CONN_LOG_FORCE_WRITE 1201
+#define WT_STAT_CONN_LOG_FORCE_WRITE 1200
/*! log: log force write operations skipped */
-#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1202
+#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1201
/*! log: log records compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1203
+#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1202
/*! log: log records not compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1204
+#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1203
/*! log: log records too small to compress */
-#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1205
+#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1204
/*! log: log release advances write LSN */
-#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1206
+#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1205
/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1207
+#define WT_STAT_CONN_LOG_SCANS 1206
/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1208
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1207
/*! log: log server thread advances write LSN */
-#define WT_STAT_CONN_LOG_WRITE_LSN 1209
+#define WT_STAT_CONN_LOG_WRITE_LSN 1208
/*! log: log server thread write LSN walk skipped */
-#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1210
+#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1209
/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1211
+#define WT_STAT_CONN_LOG_SYNC 1210
/*! log: log sync time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DURATION 1212
+#define WT_STAT_CONN_LOG_SYNC_DURATION 1211
/*! log: log sync_dir operations */
-#define WT_STAT_CONN_LOG_SYNC_DIR 1213
+#define WT_STAT_CONN_LOG_SYNC_DIR 1212
/*! log: log sync_dir time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1214
+#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1213
/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1215
+#define WT_STAT_CONN_LOG_WRITES 1214
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1216
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1215
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1217
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1216
/*! log: number of pre-allocated log files to create */
-#define WT_STAT_CONN_LOG_PREALLOC_MAX 1218
+#define WT_STAT_CONN_LOG_PREALLOC_MAX 1217
/*! log: pre-allocated log files not ready and missed */
-#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1219
+#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1218
/*! log: pre-allocated log files prepared */
-#define WT_STAT_CONN_LOG_PREALLOC_FILES 1220
+#define WT_STAT_CONN_LOG_PREALLOC_FILES 1219
/*! log: pre-allocated log files used */
-#define WT_STAT_CONN_LOG_PREALLOC_USED 1221
+#define WT_STAT_CONN_LOG_PREALLOC_USED 1220
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1222
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1221
/*! log: slot close lost race */
-#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1223
+#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1222
/*! log: slot close unbuffered waits */
-#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1224
+#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1223
/*! log: slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1225
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1224
/*! log: slot join atomic update races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1226
+#define WT_STAT_CONN_LOG_SLOT_RACES 1225
/*! log: slot join calls atomic updates raced */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1227
+#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1226
/*! log: slot join calls did not yield */
-#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1228
+#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1227
/*! log: slot join calls found active slot closed */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1229
+#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1228
/*! log: slot join calls slept */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1230
+#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1229
/*! log: slot join calls yielded */
-#define WT_STAT_CONN_LOG_SLOT_YIELD 1231
+#define WT_STAT_CONN_LOG_SLOT_YIELD 1230
/*! log: slot join found active slot closed */
-#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1232
+#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1231
/*! log: slot joins yield time (usecs) */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1233
+#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1232
/*! log: slot transitions unable to find free slot */
-#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1234
+#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1233
/*! log: slot unbuffered writes */
-#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1235
+#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1234
/*! log: total in-memory size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_MEM 1236
+#define WT_STAT_CONN_LOG_COMPRESS_MEM 1235
/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1237
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1236
/*! log: total size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_LEN 1238
+#define WT_STAT_CONN_LOG_COMPRESS_LEN 1237
/*! log: written slots coalesced */
-#define WT_STAT_CONN_LOG_SLOT_COALESCED 1239
+#define WT_STAT_CONN_LOG_SLOT_COALESCED 1238
/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1240
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1239
/*! perf: file system read latency histogram (bucket 1) - 10-49ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1241
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1240
/*! perf: file system read latency histogram (bucket 2) - 50-99ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1242
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1241
/*! perf: file system read latency histogram (bucket 3) - 100-249ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1243
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1242
/*! perf: file system read latency histogram (bucket 4) - 250-499ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1244
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1243
/*! perf: file system read latency histogram (bucket 5) - 500-999ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1245
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1244
/*! perf: file system read latency histogram (bucket 6) - 1000ms+ */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1246
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1245
/*! perf: file system write latency histogram (bucket 1) - 10-49ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1247
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1246
/*! perf: file system write latency histogram (bucket 2) - 50-99ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1248
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1247
/*! perf: file system write latency histogram (bucket 3) - 100-249ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1249
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1248
/*! perf: file system write latency histogram (bucket 4) - 250-499ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1250
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1249
/*! perf: file system write latency histogram (bucket 5) - 500-999ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1251
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1250
/*! perf: file system write latency histogram (bucket 6) - 1000ms+ */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1252
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1251
/*! perf: operation read latency histogram (bucket 1) - 100-249us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1253
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1252
/*! perf: operation read latency histogram (bucket 2) - 250-499us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1254
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1253
/*! perf: operation read latency histogram (bucket 3) - 500-999us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1255
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1254
/*! perf: operation read latency histogram (bucket 4) - 1000-9999us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1256
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1255
/*! perf: operation read latency histogram (bucket 5) - 10000us+ */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1257
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1256
/*! perf: operation write latency histogram (bucket 1) - 100-249us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1258
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1257
/*! perf: operation write latency histogram (bucket 2) - 250-499us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1259
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1258
/*! perf: operation write latency histogram (bucket 3) - 500-999us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1260
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1259
/*! perf: operation write latency histogram (bucket 4) - 1000-9999us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1261
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1260
/*! perf: operation write latency histogram (bucket 5) - 10000us+ */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1262
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1261
/*! reconciliation: internal-page overflow keys */
-#define WT_STAT_CONN_REC_OVERFLOW_KEY_INTERNAL 1263
+#define WT_STAT_CONN_REC_OVERFLOW_KEY_INTERNAL 1262
/*! reconciliation: leaf-page overflow keys */
-#define WT_STAT_CONN_REC_OVERFLOW_KEY_LEAF 1264
+#define WT_STAT_CONN_REC_OVERFLOW_KEY_LEAF 1263
/*! reconciliation: maximum seconds spent in a reconciliation call */
-#define WT_STAT_CONN_REC_MAXIMUM_SECONDS 1265
+#define WT_STAT_CONN_REC_MAXIMUM_SECONDS 1264
/*!
* reconciliation: page reconciliation calls that resulted in values with
* prepared transaction metadata
*/
-#define WT_STAT_CONN_REC_PAGES_WITH_PREPARE 1266
+#define WT_STAT_CONN_REC_PAGES_WITH_PREPARE 1265
/*!
* reconciliation: page reconciliation calls that resulted in values with
* timestamps
*/
-#define WT_STAT_CONN_REC_PAGES_WITH_TS 1267
+#define WT_STAT_CONN_REC_PAGES_WITH_TS 1266
/*!
* reconciliation: page reconciliation calls that resulted in values with
* transaction ids
*/
-#define WT_STAT_CONN_REC_PAGES_WITH_TXN 1268
+#define WT_STAT_CONN_REC_PAGES_WITH_TXN 1267
/*! reconciliation: pages written including at least one prepare state */
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_PREPARED 1269
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_PREPARED 1268
/*! reconciliation: pages written including at least one start timestamp */
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TS 1270
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TS 1269
/*! reconciliation: records written including a prepare state */
-#define WT_STAT_CONN_REC_TIME_WINDOW_PREPARED 1271
+#define WT_STAT_CONN_REC_TIME_WINDOW_PREPARED 1270
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1272
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1271
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1273
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1272
/*! session: flush_tier operation calls */
-#define WT_STAT_CONN_FLUSH_TIER 1274
+#define WT_STAT_CONN_FLUSH_TIER 1273
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1275
+#define WT_STAT_CONN_SESSION_OPEN 1274
/*! session: session query timestamp calls */
-#define WT_STAT_CONN_SESSION_QUERY_TS 1276
+#define WT_STAT_CONN_SESSION_QUERY_TS 1275
/*! session: table alter failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1277
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1276
/*! session: table alter successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1278
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1277
/*! session: table alter unchanged and skipped */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1279
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1278
/*! session: table compact failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1280
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1279
/*! session: table compact successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1281
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1280
/*! session: table create failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1282
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1281
/*! session: table create successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1283
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1282
/*! session: table drop failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1284
+#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1283
/*! session: table drop successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1285
+#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1284
/*! session: table rename failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1286
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1285
/*! session: table rename successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1287
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1286
/*! session: table salvage failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1288
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1287
/*! session: table salvage successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1289
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1288
/*! session: table truncate failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1290
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1289
/*! session: table truncate successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1291
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1290
/*! session: table verify failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1292
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1291
/*! session: table verify successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1293
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1292
/*! thread-state: active filesystem fsync calls */
-#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1294
+#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1293
/*! thread-state: active filesystem read calls */
-#define WT_STAT_CONN_THREAD_READ_ACTIVE 1295
+#define WT_STAT_CONN_THREAD_READ_ACTIVE 1294
/*! thread-state: active filesystem write calls */
-#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1296
+#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1295
/*! thread-yield: application thread time evicting (usecs) */
-#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1297
+#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1296
/*! thread-yield: application thread time waiting for cache (usecs) */
-#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1298
+#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1297
/*!
* thread-yield: connection close blocked waiting for transaction state
* stabilization
*/
-#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1299
+#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1298
/*! thread-yield: connection close yielded for lsm manager shutdown */
-#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1300
+#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1299
/*! thread-yield: data handle lock yielded */
-#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1301
+#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1300
/*!
* thread-yield: get reference for page index and slot time sleeping
* (usecs)
*/
-#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1302
+#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1301
/*! thread-yield: log server sync yielded for log write */
-#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1303
+#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1302
/*! thread-yield: page access yielded due to prepare state change */
-#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1304
+#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1303
/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1305
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1304
/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1306
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1305
/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1307
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1306
/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1308
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1307
/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1309
+#define WT_STAT_CONN_PAGE_SLEEP 1308
/*!
* thread-yield: page delete rollback time sleeping for state change
* (usecs)
*/
-#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1310
+#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1309
/*! thread-yield: page reconciliation yielded due to child modification */
-#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1311
+#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1310
/*! transaction: Number of prepared updates */
-#define WT_STAT_CONN_TXN_PREPARED_UPDATES_COUNT 1312
+#define WT_STAT_CONN_TXN_PREPARED_UPDATES_COUNT 1311
/*! transaction: prepared transactions */
-#define WT_STAT_CONN_TXN_PREPARE 1313
+#define WT_STAT_CONN_TXN_PREPARE 1312
/*! transaction: prepared transactions committed */
-#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1314
+#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1313
/*! transaction: prepared transactions currently active */
-#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1315
+#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1314
/*! transaction: prepared transactions rolled back */
-#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1316
+#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1315
/*! transaction: query timestamp calls */
-#define WT_STAT_CONN_TXN_QUERY_TS 1317
+#define WT_STAT_CONN_TXN_QUERY_TS 1316
/*! transaction: rollback to stable calls */
-#define WT_STAT_CONN_TXN_RTS 1318
+#define WT_STAT_CONN_TXN_RTS 1317
/*! transaction: rollback to stable pages visited */
-#define WT_STAT_CONN_TXN_RTS_PAGES_VISITED 1319
+#define WT_STAT_CONN_TXN_RTS_PAGES_VISITED 1318
/*! transaction: rollback to stable tree walk skipping pages */
-#define WT_STAT_CONN_TXN_RTS_TREE_WALK_SKIP_PAGES 1320
+#define WT_STAT_CONN_TXN_RTS_TREE_WALK_SKIP_PAGES 1319
/*! transaction: rollback to stable updates aborted */
-#define WT_STAT_CONN_TXN_RTS_UPD_ABORTED 1321
+#define WT_STAT_CONN_TXN_RTS_UPD_ABORTED 1320
+/*! transaction: sessions scanned in each walk of concurrent sessions */
+#define WT_STAT_CONN_TXN_SESSIONS_WALKED 1321
/*! transaction: set timestamp calls */
#define WT_STAT_CONN_TXN_SET_TS 1322
/*! transaction: set timestamp durable calls */
@@ -5777,396 +5658,406 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_TXN_BEGIN 1329
/*! transaction: transaction checkpoint currently running */
#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1330
+/*!
+ * transaction: transaction checkpoint currently running for history
+ * store file
+ */
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING_HS 1331
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1331
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1332
/*!
* transaction: transaction checkpoint history store file duration
* (usecs)
*/
-#define WT_STAT_CONN_TXN_HS_CKPT_DURATION 1332
+#define WT_STAT_CONN_TXN_HS_CKPT_DURATION 1333
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1333
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1334
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1334
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1335
/*!
* transaction: transaction checkpoint most recent duration for gathering
* all handles (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION 1335
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION 1336
/*!
* transaction: transaction checkpoint most recent duration for gathering
* applied handles (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_APPLY 1336
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_APPLY 1337
/*!
* transaction: transaction checkpoint most recent duration for gathering
* skipped handles (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_SKIP 1337
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_SKIP 1338
/*! transaction: transaction checkpoint most recent handles applied */
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_APPLIED 1338
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_APPLIED 1339
/*! transaction: transaction checkpoint most recent handles skipped */
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_SKIPPED 1339
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_SKIPPED 1340
/*! transaction: transaction checkpoint most recent handles walked */
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_WALKED 1340
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_WALKED 1341
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1341
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1342
/*! transaction: transaction checkpoint prepare currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING 1342
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING 1343
/*! transaction: transaction checkpoint prepare max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX 1343
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX 1344
/*! transaction: transaction checkpoint prepare min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN 1344
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN 1345
/*! transaction: transaction checkpoint prepare most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT 1345
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT 1346
/*! transaction: transaction checkpoint prepare total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL 1346
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL 1347
/*! transaction: transaction checkpoint scrub dirty target */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1347
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1348
/*! transaction: transaction checkpoint scrub time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1348
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1349
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1349
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1350
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1350
+#define WT_STAT_CONN_TXN_CHECKPOINT 1351
/*!
* transaction: transaction checkpoints skipped because database was
* clean
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1351
+#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1352
/*! transaction: transaction failures due to history store */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1352
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1353
/*!
* transaction: transaction fsync calls for checkpoint after allocating
* the transaction ID
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1353
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1354
/*!
* transaction: transaction fsync duration for checkpoint after
* allocating the transaction ID (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1354
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1355
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1355
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1356
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1356
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1357
/*! transaction: transaction range of timestamps currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1357
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1358
/*! transaction: transaction range of timestamps pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1358
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1359
/*!
* transaction: transaction range of timestamps pinned by the oldest
* active read timestamp
*/
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1359
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1360
/*!
* transaction: transaction range of timestamps pinned by the oldest
* timestamp
*/
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1360
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1361
/*! transaction: transaction read timestamp of the oldest active reader */
-#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1361
+#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1362
/*! transaction: transaction rollback to stable currently running */
-#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE_RUNNING 1362
+#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE_RUNNING 1363
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1363
+#define WT_STAT_CONN_TXN_SYNC 1364
/*! transaction: transaction walk of concurrent sessions */
-#define WT_STAT_CONN_TXN_WALK_SESSIONS 1364
+#define WT_STAT_CONN_TXN_WALK_SESSIONS 1365
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1365
+#define WT_STAT_CONN_TXN_COMMIT 1366
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1366
+#define WT_STAT_CONN_TXN_ROLLBACK 1367
/*! LSM: sleep for LSM checkpoint throttle */
-#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1367
+#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1368
/*! LSM: sleep for LSM merge throttle */
-#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1368
+#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1369
/*! cache: bytes currently in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INUSE 1369
+#define WT_STAT_CONN_CACHE_BYTES_INUSE 1370
/*! cache: bytes dirty in the cache cumulative */
-#define WT_STAT_CONN_CACHE_BYTES_DIRTY_TOTAL 1370
+#define WT_STAT_CONN_CACHE_BYTES_DIRTY_TOTAL 1371
/*! cache: bytes read into cache */
-#define WT_STAT_CONN_CACHE_BYTES_READ 1371
+#define WT_STAT_CONN_CACHE_BYTES_READ 1372
/*! cache: bytes written from cache */
-#define WT_STAT_CONN_CACHE_BYTES_WRITE 1372
+#define WT_STAT_CONN_CACHE_BYTES_WRITE 1373
/*! cache: checkpoint blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1373
+#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1374
+/*!
+ * cache: checkpoint of history store file blocked non-history store page
+ * eviction
+ */
+#define WT_STAT_CONN_CACHE_EVICTION_BLOCKED_CHECKPOINT_HS 1375
/*! cache: eviction walk target pages histogram - 0-9 */
-#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT10 1374
+#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT10 1376
/*! cache: eviction walk target pages histogram - 10-31 */
-#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT32 1375
+#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT32 1377
/*! cache: eviction walk target pages histogram - 128 and higher */
-#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_GE128 1376
+#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_GE128 1378
/*! cache: eviction walk target pages histogram - 32-63 */
-#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT64 1377
+#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT64 1379
/*! cache: eviction walk target pages histogram - 64-128 */
-#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT128 1378
+#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT128 1380
/*!
* cache: eviction walk target pages reduced due to history store cache
* pressure
*/
-#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_REDUCED 1379
+#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_REDUCED 1381
/*! cache: eviction walks abandoned */
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ABANDONED 1380
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ABANDONED 1382
/*! cache: eviction walks gave up because they restarted their walk twice */
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STOPPED 1381
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STOPPED 1383
/*!
* cache: eviction walks gave up because they saw too many pages and
* found no candidates
*/
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_NO_TARGETS 1382
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_NO_TARGETS 1384
/*!
* cache: eviction walks gave up because they saw too many pages and
* found too few candidates
*/
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_RATIO 1383
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_RATIO 1385
/*! cache: eviction walks reached end of tree */
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ENDED 1384
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ENDED 1386
/*! cache: eviction walks restarted */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK_RESTART 1385
+#define WT_STAT_CONN_CACHE_EVICTION_WALK_RESTART 1387
/*! cache: eviction walks started from root of tree */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK_FROM_ROOT 1386
+#define WT_STAT_CONN_CACHE_EVICTION_WALK_FROM_ROOT 1388
/*! cache: eviction walks started from saved location in tree */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK_SAVED_POS 1387
+#define WT_STAT_CONN_CACHE_EVICTION_WALK_SAVED_POS 1389
/*! cache: hazard pointer blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1388
+#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1390
/*! cache: history store table insert calls */
-#define WT_STAT_CONN_CACHE_HS_INSERT 1389
+#define WT_STAT_CONN_CACHE_HS_INSERT 1391
/*! cache: history store table insert calls that returned restart */
-#define WT_STAT_CONN_CACHE_HS_INSERT_RESTART 1390
+#define WT_STAT_CONN_CACHE_HS_INSERT_RESTART 1392
/*!
* cache: history store table out-of-order resolved updates that lose
* their durable timestamp
*/
-#define WT_STAT_CONN_CACHE_HS_ORDER_LOSE_DURABLE_TIMESTAMP 1391
+#define WT_STAT_CONN_CACHE_HS_ORDER_LOSE_DURABLE_TIMESTAMP 1393
/*!
* cache: history store table out-of-order updates that were fixed up by
- * moving existing records
- */
-#define WT_STAT_CONN_CACHE_HS_ORDER_FIXUP_MOVE 1392
-/*!
- * cache: history store table out-of-order updates that were fixed up
- * during insertion
+ * reinserting with the fixed timestamp
*/
-#define WT_STAT_CONN_CACHE_HS_ORDER_FIXUP_INSERT 1393
+#define WT_STAT_CONN_CACHE_HS_ORDER_REINSERT 1394
/*! cache: history store table reads */
-#define WT_STAT_CONN_CACHE_HS_READ 1394
+#define WT_STAT_CONN_CACHE_HS_READ 1395
/*! cache: history store table reads missed */
-#define WT_STAT_CONN_CACHE_HS_READ_MISS 1395
+#define WT_STAT_CONN_CACHE_HS_READ_MISS 1396
/*! cache: history store table reads requiring squashed modifies */
-#define WT_STAT_CONN_CACHE_HS_READ_SQUASH 1396
+#define WT_STAT_CONN_CACHE_HS_READ_SQUASH 1397
/*!
* cache: history store table truncation by rollback to stable to remove
* an unstable update
*/
-#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_RTS_UNSTABLE 1397
+#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_RTS_UNSTABLE 1398
/*!
* cache: history store table truncation by rollback to stable to remove
* an update
*/
-#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_RTS 1398
+#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_RTS 1399
/*! cache: history store table truncation to remove an update */
-#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE 1399
+#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE 1400
/*!
* cache: history store table truncation to remove range of updates due
* to key being removed from the data page during reconciliation
*/
-#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_ONPAGE_REMOVAL 1400
+#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_ONPAGE_REMOVAL 1401
/*!
* cache: history store table truncation to remove range of updates due
- * to non timestamped update on data page
+ * to out-of-order timestamp update on data page
*/
-#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_NON_TS 1401
+#define WT_STAT_CONN_CACHE_HS_ORDER_REMOVE 1402
/*! cache: history store table writes requiring squashed modifies */
-#define WT_STAT_CONN_CACHE_HS_WRITE_SQUASH 1402
+#define WT_STAT_CONN_CACHE_HS_WRITE_SQUASH 1403
/*! cache: in-memory page passed criteria to be split */
-#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1403
+#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1404
/*! cache: in-memory page splits */
-#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1404
+#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1405
/*! cache: internal pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1405
+#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1406
/*! cache: internal pages split during eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1406
+#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1407
/*! cache: leaf pages split during eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1407
+#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1408
/*! cache: modified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1408
+#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1409
/*! cache: overflow pages read into cache */
-#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1409
+#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1410
/*! cache: page split during eviction deepened the tree */
-#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1410
+#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1411
/*! cache: page written requiring history store records */
-#define WT_STAT_CONN_CACHE_WRITE_HS 1411
+#define WT_STAT_CONN_CACHE_WRITE_HS 1412
/*! cache: pages read into cache */
-#define WT_STAT_CONN_CACHE_READ 1412
+#define WT_STAT_CONN_CACHE_READ 1413
/*! cache: pages read into cache after truncate */
-#define WT_STAT_CONN_CACHE_READ_DELETED 1413
+#define WT_STAT_CONN_CACHE_READ_DELETED 1414
/*! cache: pages read into cache after truncate in prepare state */
-#define WT_STAT_CONN_CACHE_READ_DELETED_PREPARED 1414
+#define WT_STAT_CONN_CACHE_READ_DELETED_PREPARED 1415
/*! cache: pages requested from the cache */
-#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1415
+#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1416
/*! cache: pages seen by eviction walk */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1416
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1417
/*! cache: pages written from cache */
-#define WT_STAT_CONN_CACHE_WRITE 1417
+#define WT_STAT_CONN_CACHE_WRITE 1418
/*! cache: pages written requiring in-memory restoration */
-#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1418
+#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1419
/*! cache: tracked dirty bytes in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1419
+#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1420
/*! cache: unmodified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1420
+#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1421
/*! checkpoint-cleanup: pages added for eviction */
-#define WT_STAT_CONN_CC_PAGES_EVICT 1421
+#define WT_STAT_CONN_CC_PAGES_EVICT 1422
/*! checkpoint-cleanup: pages removed */
-#define WT_STAT_CONN_CC_PAGES_REMOVED 1422
+#define WT_STAT_CONN_CC_PAGES_REMOVED 1423
/*! checkpoint-cleanup: pages skipped during tree walk */
-#define WT_STAT_CONN_CC_PAGES_WALK_SKIPPED 1423
+#define WT_STAT_CONN_CC_PAGES_WALK_SKIPPED 1424
/*! checkpoint-cleanup: pages visited */
-#define WT_STAT_CONN_CC_PAGES_VISITED 1424
+#define WT_STAT_CONN_CC_PAGES_VISITED 1425
/*! cursor: Total number of entries skipped by cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT_SKIP_TOTAL 1425
+#define WT_STAT_CONN_CURSOR_NEXT_SKIP_TOTAL 1426
/*! cursor: Total number of entries skipped by cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV_SKIP_TOTAL 1426
+#define WT_STAT_CONN_CURSOR_PREV_SKIP_TOTAL 1427
/*!
* cursor: Total number of entries skipped to position the history store
* cursor
*/
-#define WT_STAT_CONN_CURSOR_SKIP_HS_CUR_POSITION 1427
+#define WT_STAT_CONN_CURSOR_SKIP_HS_CUR_POSITION 1428
+/*!
+ * cursor: Total number of times a search near has exited due to prefix
+ * config
+ */
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR_PREFIX_FAST_PATHS 1429
/*!
* cursor: cursor next calls that skip due to a globally visible history
* store tombstone
*/
-#define WT_STAT_CONN_CURSOR_NEXT_HS_TOMBSTONE 1428
+#define WT_STAT_CONN_CURSOR_NEXT_HS_TOMBSTONE 1430
/*!
* cursor: cursor next calls that skip greater than or equal to 100
* entries
*/
-#define WT_STAT_CONN_CURSOR_NEXT_SKIP_GE_100 1429
+#define WT_STAT_CONN_CURSOR_NEXT_SKIP_GE_100 1431
/*! cursor: cursor next calls that skip less than 100 entries */
-#define WT_STAT_CONN_CURSOR_NEXT_SKIP_LT_100 1430
+#define WT_STAT_CONN_CURSOR_NEXT_SKIP_LT_100 1432
/*!
* cursor: cursor prev calls that skip due to a globally visible history
* store tombstone
*/
-#define WT_STAT_CONN_CURSOR_PREV_HS_TOMBSTONE 1431
+#define WT_STAT_CONN_CURSOR_PREV_HS_TOMBSTONE 1433
/*!
* cursor: cursor prev calls that skip greater than or equal to 100
* entries
*/
-#define WT_STAT_CONN_CURSOR_PREV_SKIP_GE_100 1432
+#define WT_STAT_CONN_CURSOR_PREV_SKIP_GE_100 1434
/*! cursor: cursor prev calls that skip less than 100 entries */
-#define WT_STAT_CONN_CURSOR_PREV_SKIP_LT_100 1433
+#define WT_STAT_CONN_CURSOR_PREV_SKIP_LT_100 1435
/*! cursor: open cursor count */
-#define WT_STAT_CONN_CURSOR_OPEN_COUNT 1434
+#define WT_STAT_CONN_CURSOR_OPEN_COUNT 1436
/*! reconciliation: approximate byte size of timestamps in pages written */
-#define WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TS 1435
+#define WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TS 1437
/*!
* reconciliation: approximate byte size of transaction IDs in pages
* written
*/
-#define WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TXN 1436
+#define WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TXN 1438
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1437
+#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1439
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1438
+#define WT_STAT_CONN_REC_PAGES 1440
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1439
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1441
/*! reconciliation: pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE 1440
+#define WT_STAT_CONN_REC_PAGE_DELETE 1442
/*!
* reconciliation: pages written including an aggregated newest start
* durable timestamp
*/
-#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_START_DURABLE_TS 1441
+#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_START_DURABLE_TS 1443
/*!
* reconciliation: pages written including an aggregated newest stop
* durable timestamp
*/
-#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_DURABLE_TS 1442
+#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_DURABLE_TS 1444
/*!
* reconciliation: pages written including an aggregated newest stop
* timestamp
*/
-#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TS 1443
+#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TS 1445
/*!
* reconciliation: pages written including an aggregated newest stop
* transaction ID
*/
-#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TXN 1444
+#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TXN 1446
/*!
* reconciliation: pages written including an aggregated newest
* transaction ID
*/
-#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_TXN 1445
+#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_TXN 1447
/*!
* reconciliation: pages written including an aggregated oldest start
* timestamp
*/
-#define WT_STAT_CONN_REC_TIME_AGGR_OLDEST_START_TS 1446
+#define WT_STAT_CONN_REC_TIME_AGGR_OLDEST_START_TS 1448
/*! reconciliation: pages written including an aggregated prepare */
-#define WT_STAT_CONN_REC_TIME_AGGR_PREPARED 1447
+#define WT_STAT_CONN_REC_TIME_AGGR_PREPARED 1449
/*!
* reconciliation: pages written including at least one start durable
* timestamp
*/
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_START_TS 1448
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_START_TS 1450
/*!
* reconciliation: pages written including at least one start transaction
* ID
*/
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TXN 1449
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TXN 1451
/*!
* reconciliation: pages written including at least one stop durable
* timestamp
*/
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_STOP_TS 1450
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_STOP_TS 1452
/*! reconciliation: pages written including at least one stop timestamp */
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TS 1451
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TS 1453
/*!
* reconciliation: pages written including at least one stop transaction
* ID
*/
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TXN 1452
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TXN 1454
/*! reconciliation: records written including a start durable timestamp */
-#define WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_START_TS 1453
+#define WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_START_TS 1455
/*! reconciliation: records written including a start timestamp */
-#define WT_STAT_CONN_REC_TIME_WINDOW_START_TS 1454
+#define WT_STAT_CONN_REC_TIME_WINDOW_START_TS 1456
/*! reconciliation: records written including a start transaction ID */
-#define WT_STAT_CONN_REC_TIME_WINDOW_START_TXN 1455
+#define WT_STAT_CONN_REC_TIME_WINDOW_START_TXN 1457
/*! reconciliation: records written including a stop durable timestamp */
-#define WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_STOP_TS 1456
+#define WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_STOP_TS 1458
/*! reconciliation: records written including a stop timestamp */
-#define WT_STAT_CONN_REC_TIME_WINDOW_STOP_TS 1457
+#define WT_STAT_CONN_REC_TIME_WINDOW_STOP_TS 1459
/*! reconciliation: records written including a stop transaction ID */
-#define WT_STAT_CONN_REC_TIME_WINDOW_STOP_TXN 1458
+#define WT_STAT_CONN_REC_TIME_WINDOW_STOP_TXN 1460
/*! session: tiered storage local retention time (secs) */
-#define WT_STAT_CONN_TIERED_RETENTION 1459
+#define WT_STAT_CONN_TIERED_RETENTION 1461
/*! session: tiered storage object size */
-#define WT_STAT_CONN_TIERED_OBJECT_SIZE 1460
+#define WT_STAT_CONN_TIERED_OBJECT_SIZE 1462
/*! transaction: race to read prepared update retry */
-#define WT_STAT_CONN_TXN_READ_RACE_PREPARE_UPDATE 1461
+#define WT_STAT_CONN_TXN_READ_RACE_PREPARE_UPDATE 1463
/*!
* transaction: rollback to stable history store records with stop
* timestamps older than newer records
*/
-#define WT_STAT_CONN_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 1462
+#define WT_STAT_CONN_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 1464
/*! transaction: rollback to stable inconsistent checkpoint */
-#define WT_STAT_CONN_TXN_RTS_INCONSISTENT_CKPT 1463
+#define WT_STAT_CONN_TXN_RTS_INCONSISTENT_CKPT 1465
/*! transaction: rollback to stable keys removed */
-#define WT_STAT_CONN_TXN_RTS_KEYS_REMOVED 1464
+#define WT_STAT_CONN_TXN_RTS_KEYS_REMOVED 1466
/*! transaction: rollback to stable keys restored */
-#define WT_STAT_CONN_TXN_RTS_KEYS_RESTORED 1465
+#define WT_STAT_CONN_TXN_RTS_KEYS_RESTORED 1467
/*! transaction: rollback to stable restored tombstones from history store */
-#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES 1466
+#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES 1468
/*! transaction: rollback to stable restored updates from history store */
-#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_UPDATES 1467
+#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_UPDATES 1469
/*! transaction: rollback to stable sweeping history store keys */
-#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1468
+#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1470
/*! transaction: rollback to stable updates removed from history store */
-#define WT_STAT_CONN_TXN_RTS_HS_REMOVED 1469
+#define WT_STAT_CONN_TXN_RTS_HS_REMOVED 1471
/*! transaction: transaction checkpoints due to obsolete pages */
-#define WT_STAT_CONN_TXN_CHECKPOINT_OBSOLETE_APPLIED 1470
+#define WT_STAT_CONN_TXN_CHECKPOINT_OBSOLETE_APPLIED 1472
/*! transaction: update conflicts */
-#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1471
+#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1473
/*!
* @}
@@ -6509,64 +6400,64 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2111
/*! cache: checkpoint blocked page eviction */
#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2112
+/*!
+ * cache: checkpoint of history store file blocked non-history store page
+ * eviction
+ */
+#define WT_STAT_DSRC_CACHE_EVICTION_BLOCKED_CHECKPOINT_HS 2113
/*! cache: eviction walk target pages histogram - 0-9 */
-#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT10 2113
+#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT10 2114
/*! cache: eviction walk target pages histogram - 10-31 */
-#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT32 2114
+#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT32 2115
/*! cache: eviction walk target pages histogram - 128 and higher */
-#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_GE128 2115
+#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_GE128 2116
/*! cache: eviction walk target pages histogram - 32-63 */
-#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT64 2116
+#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT64 2117
/*! cache: eviction walk target pages histogram - 64-128 */
-#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT128 2117
+#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT128 2118
/*!
* cache: eviction walk target pages reduced due to history store cache
* pressure
*/
-#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_REDUCED 2118
+#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_REDUCED 2119
/*! cache: eviction walks abandoned */
-#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_ABANDONED 2119
+#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_ABANDONED 2120
/*! cache: eviction walks gave up because they restarted their walk twice */
-#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_STOPPED 2120
+#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_STOPPED 2121
/*!
* cache: eviction walks gave up because they saw too many pages and
* found no candidates
*/
-#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_GAVE_UP_NO_TARGETS 2121
+#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_GAVE_UP_NO_TARGETS 2122
/*!
* cache: eviction walks gave up because they saw too many pages and
* found too few candidates
*/
-#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_GAVE_UP_RATIO 2122
+#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_GAVE_UP_RATIO 2123
/*! cache: eviction walks reached end of tree */
-#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_ENDED 2123
+#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_ENDED 2124
/*! cache: eviction walks restarted */
-#define WT_STAT_DSRC_CACHE_EVICTION_WALK_RESTART 2124
+#define WT_STAT_DSRC_CACHE_EVICTION_WALK_RESTART 2125
/*! cache: eviction walks started from root of tree */
-#define WT_STAT_DSRC_CACHE_EVICTION_WALK_FROM_ROOT 2125
+#define WT_STAT_DSRC_CACHE_EVICTION_WALK_FROM_ROOT 2126
/*! cache: eviction walks started from saved location in tree */
-#define WT_STAT_DSRC_CACHE_EVICTION_WALK_SAVED_POS 2126
+#define WT_STAT_DSRC_CACHE_EVICTION_WALK_SAVED_POS 2127
/*! cache: hazard pointer blocked page eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2127
+#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2128
/*! cache: history store table insert calls */
-#define WT_STAT_DSRC_CACHE_HS_INSERT 2128
+#define WT_STAT_DSRC_CACHE_HS_INSERT 2129
/*! cache: history store table insert calls that returned restart */
-#define WT_STAT_DSRC_CACHE_HS_INSERT_RESTART 2129
+#define WT_STAT_DSRC_CACHE_HS_INSERT_RESTART 2130
/*!
* cache: history store table out-of-order resolved updates that lose
* their durable timestamp
*/
-#define WT_STAT_DSRC_CACHE_HS_ORDER_LOSE_DURABLE_TIMESTAMP 2130
+#define WT_STAT_DSRC_CACHE_HS_ORDER_LOSE_DURABLE_TIMESTAMP 2131
/*!
* cache: history store table out-of-order updates that were fixed up by
- * moving existing records
- */
-#define WT_STAT_DSRC_CACHE_HS_ORDER_FIXUP_MOVE 2131
-/*!
- * cache: history store table out-of-order updates that were fixed up
- * during insertion
+ * reinserting with the fixed timestamp
*/
-#define WT_STAT_DSRC_CACHE_HS_ORDER_FIXUP_INSERT 2132
+#define WT_STAT_DSRC_CACHE_HS_ORDER_REINSERT 2132
/*! cache: history store table reads */
#define WT_STAT_DSRC_CACHE_HS_READ 2133
/*! cache: history store table reads missed */
@@ -6592,9 +6483,9 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_DSRC_CACHE_HS_KEY_TRUNCATE_ONPAGE_REMOVAL 2139
/*!
* cache: history store table truncation to remove range of updates due
- * to non timestamped update on data page
+ * to out-of-order timestamp update on data page
*/
-#define WT_STAT_DSRC_CACHE_HS_KEY_TRUNCATE_NON_TS 2140
+#define WT_STAT_DSRC_CACHE_HS_ORDER_REMOVE 2140
/*! cache: history store table writes requiring squashed modifies */
#define WT_STAT_DSRC_CACHE_HS_WRITE_SQUASH 2141
/*! cache: in-memory page passed criteria to be split */
@@ -6651,141 +6542,146 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
*/
#define WT_STAT_DSRC_CURSOR_SKIP_HS_CUR_POSITION 2166
/*!
+ * cursor: Total number of times a search near has exited due to prefix
+ * config
+ */
+#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR_PREFIX_FAST_PATHS 2167
+/*!
* cursor: cursor next calls that skip due to a globally visible history
* store tombstone
*/
-#define WT_STAT_DSRC_CURSOR_NEXT_HS_TOMBSTONE 2167
+#define WT_STAT_DSRC_CURSOR_NEXT_HS_TOMBSTONE 2168
/*!
* cursor: cursor next calls that skip greater than or equal to 100
* entries
*/
-#define WT_STAT_DSRC_CURSOR_NEXT_SKIP_GE_100 2168
+#define WT_STAT_DSRC_CURSOR_NEXT_SKIP_GE_100 2169
/*! cursor: cursor next calls that skip less than 100 entries */
-#define WT_STAT_DSRC_CURSOR_NEXT_SKIP_LT_100 2169
+#define WT_STAT_DSRC_CURSOR_NEXT_SKIP_LT_100 2170
/*!
* cursor: cursor prev calls that skip due to a globally visible history
* store tombstone
*/
-#define WT_STAT_DSRC_CURSOR_PREV_HS_TOMBSTONE 2170
+#define WT_STAT_DSRC_CURSOR_PREV_HS_TOMBSTONE 2171
/*!
* cursor: cursor prev calls that skip greater than or equal to 100
* entries
*/
-#define WT_STAT_DSRC_CURSOR_PREV_SKIP_GE_100 2171
+#define WT_STAT_DSRC_CURSOR_PREV_SKIP_GE_100 2172
/*! cursor: cursor prev calls that skip less than 100 entries */
-#define WT_STAT_DSRC_CURSOR_PREV_SKIP_LT_100 2172
+#define WT_STAT_DSRC_CURSOR_PREV_SKIP_LT_100 2173
/*! cursor: open cursor count */
-#define WT_STAT_DSRC_CURSOR_OPEN_COUNT 2173
+#define WT_STAT_DSRC_CURSOR_OPEN_COUNT 2174
/*! reconciliation: approximate byte size of timestamps in pages written */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_BYTES_TS 2174
+#define WT_STAT_DSRC_REC_TIME_WINDOW_BYTES_TS 2175
/*!
* reconciliation: approximate byte size of transaction IDs in pages
* written
*/
-#define WT_STAT_DSRC_REC_TIME_WINDOW_BYTES_TXN 2175
+#define WT_STAT_DSRC_REC_TIME_WINDOW_BYTES_TXN 2176
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2176
+#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2177
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_DSRC_REC_PAGES 2177
+#define WT_STAT_DSRC_REC_PAGES 2178
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_DSRC_REC_PAGES_EVICTION 2178
+#define WT_STAT_DSRC_REC_PAGES_EVICTION 2179
/*! reconciliation: pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE 2179
+#define WT_STAT_DSRC_REC_PAGE_DELETE 2180
/*!
* reconciliation: pages written including an aggregated newest start
* durable timestamp
*/
-#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_START_DURABLE_TS 2180
+#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_START_DURABLE_TS 2181
/*!
* reconciliation: pages written including an aggregated newest stop
* durable timestamp
*/
-#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_DURABLE_TS 2181
+#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_DURABLE_TS 2182
/*!
* reconciliation: pages written including an aggregated newest stop
* timestamp
*/
-#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_TS 2182
+#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_TS 2183
/*!
* reconciliation: pages written including an aggregated newest stop
* transaction ID
*/
-#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_TXN 2183
+#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_TXN 2184
/*!
* reconciliation: pages written including an aggregated newest
* transaction ID
*/
-#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_TXN 2184
+#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_TXN 2185
/*!
* reconciliation: pages written including an aggregated oldest start
* timestamp
*/
-#define WT_STAT_DSRC_REC_TIME_AGGR_OLDEST_START_TS 2185
+#define WT_STAT_DSRC_REC_TIME_AGGR_OLDEST_START_TS 2186
/*! reconciliation: pages written including an aggregated prepare */
-#define WT_STAT_DSRC_REC_TIME_AGGR_PREPARED 2186
+#define WT_STAT_DSRC_REC_TIME_AGGR_PREPARED 2187
/*!
* reconciliation: pages written including at least one start durable
* timestamp
*/
-#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_DURABLE_START_TS 2187
+#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_DURABLE_START_TS 2188
/*!
* reconciliation: pages written including at least one start transaction
* ID
*/
-#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_START_TXN 2188
+#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_START_TXN 2189
/*!
* reconciliation: pages written including at least one stop durable
* timestamp
*/
-#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_DURABLE_STOP_TS 2189
+#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_DURABLE_STOP_TS 2190
/*! reconciliation: pages written including at least one stop timestamp */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_STOP_TS 2190
+#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_STOP_TS 2191
/*!
* reconciliation: pages written including at least one stop transaction
* ID
*/
-#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_STOP_TXN 2191
+#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_STOP_TXN 2192
/*! reconciliation: records written including a start durable timestamp */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_DURABLE_START_TS 2192
+#define WT_STAT_DSRC_REC_TIME_WINDOW_DURABLE_START_TS 2193
/*! reconciliation: records written including a start timestamp */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_START_TS 2193
+#define WT_STAT_DSRC_REC_TIME_WINDOW_START_TS 2194
/*! reconciliation: records written including a start transaction ID */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_START_TXN 2194
+#define WT_STAT_DSRC_REC_TIME_WINDOW_START_TXN 2195
/*! reconciliation: records written including a stop durable timestamp */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_DURABLE_STOP_TS 2195
+#define WT_STAT_DSRC_REC_TIME_WINDOW_DURABLE_STOP_TS 2196
/*! reconciliation: records written including a stop timestamp */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_STOP_TS 2196
+#define WT_STAT_DSRC_REC_TIME_WINDOW_STOP_TS 2197
/*! reconciliation: records written including a stop transaction ID */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_STOP_TXN 2197
+#define WT_STAT_DSRC_REC_TIME_WINDOW_STOP_TXN 2198
/*! session: tiered storage local retention time (secs) */
-#define WT_STAT_DSRC_TIERED_RETENTION 2198
+#define WT_STAT_DSRC_TIERED_RETENTION 2199
/*! session: tiered storage object size */
-#define WT_STAT_DSRC_TIERED_OBJECT_SIZE 2199
+#define WT_STAT_DSRC_TIERED_OBJECT_SIZE 2200
/*! transaction: race to read prepared update retry */
-#define WT_STAT_DSRC_TXN_READ_RACE_PREPARE_UPDATE 2200
+#define WT_STAT_DSRC_TXN_READ_RACE_PREPARE_UPDATE 2201
/*!
* transaction: rollback to stable history store records with stop
* timestamps older than newer records
*/
-#define WT_STAT_DSRC_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 2201
+#define WT_STAT_DSRC_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 2202
/*! transaction: rollback to stable inconsistent checkpoint */
-#define WT_STAT_DSRC_TXN_RTS_INCONSISTENT_CKPT 2202
+#define WT_STAT_DSRC_TXN_RTS_INCONSISTENT_CKPT 2203
/*! transaction: rollback to stable keys removed */
-#define WT_STAT_DSRC_TXN_RTS_KEYS_REMOVED 2203
+#define WT_STAT_DSRC_TXN_RTS_KEYS_REMOVED 2204
/*! transaction: rollback to stable keys restored */
-#define WT_STAT_DSRC_TXN_RTS_KEYS_RESTORED 2204
+#define WT_STAT_DSRC_TXN_RTS_KEYS_RESTORED 2205
/*! transaction: rollback to stable restored tombstones from history store */
-#define WT_STAT_DSRC_TXN_RTS_HS_RESTORE_TOMBSTONES 2205
+#define WT_STAT_DSRC_TXN_RTS_HS_RESTORE_TOMBSTONES 2206
/*! transaction: rollback to stable restored updates from history store */
-#define WT_STAT_DSRC_TXN_RTS_HS_RESTORE_UPDATES 2206
+#define WT_STAT_DSRC_TXN_RTS_HS_RESTORE_UPDATES 2207
/*! transaction: rollback to stable sweeping history store keys */
-#define WT_STAT_DSRC_TXN_RTS_SWEEP_HS_KEYS 2207
+#define WT_STAT_DSRC_TXN_RTS_SWEEP_HS_KEYS 2208
/*! transaction: rollback to stable updates removed from history store */
-#define WT_STAT_DSRC_TXN_RTS_HS_REMOVED 2208
+#define WT_STAT_DSRC_TXN_RTS_HS_REMOVED 2209
/*! transaction: transaction checkpoints due to obsolete pages */
-#define WT_STAT_DSRC_TXN_CHECKPOINT_OBSOLETE_APPLIED 2209
+#define WT_STAT_DSRC_TXN_CHECKPOINT_OBSOLETE_APPLIED 2210
/*! transaction: update conflicts */
-#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2210
+#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2211
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger_ext.h b/src/third_party/wiredtiger/src/include/wiredtiger_ext.h
index 7251ea7fcc0..0efdc3bfefc 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger_ext.h
+++ b/src/third_party/wiredtiger/src/include/wiredtiger_ext.h
@@ -239,6 +239,21 @@ struct __wt_extension_api {
WT_CONFIG_ARG *config, WT_CONFIG_PARSER **config_parserp);
/*!
+ * Get the file system abstraction used by WiredTiger.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle (or NULL if none available)
+ * @param file_system the returned file system handle.
+ * @errors
+ * If called from an extension's initialization routine, this may
+ * return WT_NOTFOUND if the file system has not yet been established.
+ *
+ * @snippet ex_data_source.c WT_EXTENSION metadata insert
+ */
+ int (*file_system_get)(
+ WT_EXTENSION_API *wt_api, WT_SESSION *session, WT_FILE_SYSTEM **file_system);
+
+ /*!
* Insert a row into the metadata if it does not already exist.
*
* @param wt_api the extension handle
diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h
index 79b10aeda22..b29575e0628 100644
--- a/src/third_party/wiredtiger/src/include/wt_internal.h
+++ b/src/third_party/wiredtiger/src/include/wt_internal.h
@@ -239,8 +239,6 @@ struct __wt_lsm_worker_args;
typedef struct __wt_lsm_worker_args WT_LSM_WORKER_ARGS;
struct __wt_lsm_worker_cookie;
typedef struct __wt_lsm_worker_cookie WT_LSM_WORKER_COOKIE;
-struct __wt_modify_vector;
-typedef struct __wt_modify_vector WT_MODIFY_VECTOR;
struct __wt_multi;
typedef struct __wt_multi WT_MULTI;
struct __wt_myslot;
@@ -323,6 +321,12 @@ struct __wt_tiered;
typedef struct __wt_tiered WT_TIERED;
struct __wt_tiered_manager;
typedef struct __wt_tiered_manager WT_TIERED_MANAGER;
+struct __wt_tiered_object;
+typedef struct __wt_tiered_object WT_TIERED_OBJECT;
+struct __wt_tiered_tiers;
+typedef struct __wt_tiered_tiers WT_TIERED_TIERS;
+struct __wt_tiered_tree;
+typedef struct __wt_tiered_tree WT_TIERED_TREE;
struct __wt_time_aggregate;
typedef struct __wt_time_aggregate WT_TIME_AGGREGATE;
struct __wt_time_window;
@@ -341,6 +345,8 @@ struct __wt_update;
typedef struct __wt_update WT_UPDATE;
struct __wt_update_value;
typedef struct __wt_update_value WT_UPDATE_VALUE;
+struct __wt_update_vector;
+typedef struct __wt_update_vector WT_UPDATE_VECTOR;
union __wt_lsn;
typedef union __wt_lsn WT_LSN;
union __wt_rand_state;
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c
index 9a2632da7c9..e95dde65807 100644
--- a/src/third_party/wiredtiger/src/log/log.c
+++ b/src/third_party/wiredtiger/src/log/log.c
@@ -157,7 +157,7 @@ __log_wait_for_earlier_slot(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
* If we're on a locked path and the write LSN is not advancing, unlock in case an earlier
* thread is trying to switch its slot and complete its operation.
*/
- if (F_ISSET(session, WT_SESSION_LOCKED_SLOT))
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SLOT))
__wt_spin_unlock(session, &log->log_slot_lock);
/*
* This may not be initialized if we are starting at an older log file version. So only
@@ -169,7 +169,7 @@ __log_wait_for_earlier_slot(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
__wt_yield();
else
__wt_cond_wait(session, log->log_write_cond, 200, NULL);
- if (F_ISSET(session, WT_SESSION_LOCKED_SLOT))
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SLOT))
__wt_spin_lock(session, &log->log_slot_lock);
}
}
@@ -1129,7 +1129,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created)
* write to the log. If the log file size is small we could fill a log file before the previous
* one is closed. Wait for that to close.
*/
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SLOT));
for (yield_cnt = 0; log->log_close_fh != NULL;) {
WT_STAT_CONN_INCR(session, log_close_yields);
/*
@@ -1334,7 +1334,7 @@ __wt_log_acquire(WT_SESSION_IMPL *session, uint64_t recsize, WT_LOGSLOT *slot)
* the release LSN. That way when log files switch, we're waiting for the correct LSN from
* outstanding writes.
*/
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SLOT));
/*
* We need to set the release LSN earlier, before a log file change.
*/
diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c
index 30c3cd41016..a4583462300 100644
--- a/src/third_party/wiredtiger/src/log/log_slot.c
+++ b/src/third_party/wiredtiger/src/log/log_slot.c
@@ -103,7 +103,7 @@ __log_slot_close(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *releasep, boo
*releasep = false;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SLOT));
conn = S2C(session);
log = conn->log;
if (slot == NULL)
@@ -235,7 +235,7 @@ __log_slot_new(WT_SESSION_IMPL *session)
int count;
#endif
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SLOT));
conn = S2C(session);
log = conn->log;
#ifdef HAVE_DIAGNOSTIC
@@ -318,7 +318,7 @@ __log_slot_switch_internal(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool for
release = false;
slot = myslot->slot;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SLOT));
/*
* If someone else raced us to closing this specific slot, we're done here.
@@ -526,7 +526,7 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT
log = conn->log;
time_start = time_stop = 0;
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SLOT));
+ WT_ASSERT(session, !FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SLOT));
WT_ASSERT(session, mysize != 0);
/*
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_manager.c b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
index 0d5f1eae331..4dfcaa65902 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_manager.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
@@ -208,7 +208,7 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session)
* files. Use read-uncommitted isolation to avoid keeping updates in cache unnecessarily.
*/
for (i = 0; i < WT_LSM_MAX_WORKERS; i++) {
- WT_ERR(__wt_open_internal_session(conn, "lsm-worker", false, 0, &worker_session));
+ WT_ERR(__wt_open_internal_session(conn, "lsm-worker", false, 0, 0, &worker_session));
worker_session->isolation = WT_ISO_READ_UNCOMMITTED;
manager->lsm_worker_cookies[i].session = worker_session;
}
@@ -356,7 +356,7 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session)
if (TAILQ_EMPTY(&conn->lsmqh))
continue;
__wt_readlock(session, &conn->dhandle_lock);
- F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ);
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_READ);
dhandle_locked = true;
TAILQ_FOREACH (lsm_tree, &conn->lsmqh, q) {
if (!lsm_tree->active)
@@ -403,14 +403,14 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session)
}
}
__wt_readunlock(session, &conn->dhandle_lock);
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ);
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_READ);
dhandle_locked = false;
}
err:
if (dhandle_locked) {
__wt_readunlock(session, &conn->dhandle_lock);
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ);
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_READ);
}
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
index 39264213ecc..be9e9dae345 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
@@ -73,7 +73,8 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final)
/* We may be destroying an lsm_tree before it was added. */
if (F_ISSET(lsm_tree, WT_LSM_TREE_OPEN)) {
- WT_ASSERT(session, final || F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
+ WT_ASSERT(
+ session, final || FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
TAILQ_REMOVE(&S2C(session)->lsmqh, lsm_tree, q);
}
@@ -282,7 +283,7 @@ __lsm_tree_cleanup_old(WT_SESSION_IMPL *session, const char *uri)
int
__wt_lsm_tree_setup_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk)
{
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA));
__wt_epoch(session, &chunk->create_time);
WT_RET(__wt_spin_init(session, &chunk->timestamp_spinlock, "LSM chunk timestamp"));
@@ -376,7 +377,7 @@ __lsm_tree_find(WT_SESSION_IMPL *session, const char *uri, bool exclusive, WT_LS
WT_LSM_TREE *lsm_tree;
*treep = NULL;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST));
/* See if the tree is already open. */
TAILQ_FOREACH (lsm_tree, &S2C(session)->lsmqh, q)
@@ -461,7 +462,7 @@ __lsm_tree_open(WT_SESSION_IMPL *session, const char *uri, bool exclusive, WT_LS
conn = S2C(session);
lsm_tree = NULL;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
/* Start the LSM manager thread if it isn't running. */
WT_RET(__wt_lsm_manager_start(session));
@@ -1006,7 +1007,8 @@ __wt_lsm_tree_readlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
* Diagnostic: avoid deadlocks with the schema lock: if we need it for an operation, we should
* already have it.
*/
- F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_SCHEMA_LOCK);
+ F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE);
+ FLD_SET(session->lock_flags, WT_SESSION_NO_SCHEMA_LOCK);
}
/*
@@ -1016,7 +1018,8 @@ __wt_lsm_tree_readlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
void
__wt_lsm_tree_readunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_SCHEMA_LOCK);
+ F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE);
+ FLD_CLR(session->lock_flags, WT_SESSION_NO_SCHEMA_LOCK);
__wt_readunlock(session, &lsm_tree->rwlock);
}
@@ -1034,7 +1037,8 @@ __wt_lsm_tree_writelock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
* Diagnostic: avoid deadlocks with the schema lock: if we need it for an operation, we should
* already have it.
*/
- F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_SCHEMA_LOCK);
+ F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE);
+ FLD_SET(session->lock_flags, WT_SESSION_NO_SCHEMA_LOCK);
}
/*
@@ -1044,7 +1048,8 @@ __wt_lsm_tree_writelock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
void
__wt_lsm_tree_writeunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_SCHEMA_LOCK);
+ F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE);
+ FLD_CLR(session->lock_flags, WT_SESSION_NO_SCHEMA_LOCK);
__wt_writeunlock(session, &lsm_tree->rwlock);
}
diff --git a/src/third_party/wiredtiger/src/meta/meta_apply.c b/src/third_party/wiredtiger/src/meta/meta_apply.c
index 66663f9b4f0..a729ce1d69b 100644
--- a/src/third_party/wiredtiger/src/meta/meta_apply.c
+++ b/src/third_party/wiredtiger/src/meta/meta_apply.c
@@ -73,7 +73,7 @@ __wt_meta_apply_all(WT_SESSION_IMPL *session, int (*file_func)(WT_SESSION_IMPL *
WT_CURSOR *cursor;
WT_DECL_RET;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA));
WT_RET(__wt_metadata_cursor(session, &cursor));
WT_SAVE_DHANDLE(session, ret = __meta_btree_apply(session, cursor, file_func, name_func, cfg));
WT_TRET(__wt_metadata_cursor_release(session, &cursor));
diff --git a/src/third_party/wiredtiger/src/meta/meta_track.c b/src/third_party/wiredtiger/src/meta/meta_track.c
index 95125961485..0739175c3a1 100644
--- a/src/third_party/wiredtiger/src/meta/meta_track.c
+++ b/src/third_party/wiredtiger/src/meta/meta_track.c
@@ -276,14 +276,14 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll)
WT_WITH_DHANDLE(session, WT_SESSION_META_DHANDLE(session),
ret = __wt_txn_checkpoint_log(session, false, WT_TXN_LOG_CKPT_SYNC, NULL));
else {
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA));
ckpt_session = S2C(session)->meta_ckpt_session;
/*
* If this operation is part of a running transaction, that should be included in the
* checkpoint.
*/
ckpt_session->txn->id = session->txn->id;
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_METADATA));
+ WT_ASSERT(session, !FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_METADATA));
WT_WITH_DHANDLE(ckpt_session, WT_SESSION_META_DHANDLE(session),
WT_WITH_METADATA_LOCK(ckpt_session, ret = __wt_checkpoint(ckpt_session, NULL)));
ckpt_session->txn->id = WT_TXN_NONE;
@@ -515,7 +515,7 @@ __wt_meta_track_init(WT_SESSION_IMPL *session)
conn = S2C(session);
if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) {
WT_RET(__wt_open_internal_session(
- conn, "metadata-ckpt", false, WT_SESSION_NO_DATA_HANDLES, &conn->meta_ckpt_session));
+ conn, "metadata-ckpt", false, WT_SESSION_NO_DATA_HANDLES, 0, &conn->meta_ckpt_session));
/*
* Set session transaction isolation to read-committed isolation, we rely on that for the
diff --git a/src/third_party/wiredtiger/src/meta/meta_turtle.c b/src/third_party/wiredtiger/src/meta/meta_turtle.c
index a3eeae7c295..6c0b432a067 100644
--- a/src/third_party/wiredtiger/src/meta/meta_turtle.c
+++ b/src/third_party/wiredtiger/src/meta/meta_turtle.c
@@ -129,7 +129,9 @@ __metadata_load_bulk(WT_SESSION_IMPL *session)
WT_ERR(cursor->get_value(cursor, &value));
filecfg[1] = value;
WT_ERR(__wt_direct_io_size_check(session, filecfg, "allocation_size", &allocsize));
- WT_ERR(__wt_block_manager_create(session, key, allocsize));
+ WT_WITH_BUCKET_STORAGE(
+ NULL, session, ret = __wt_block_manager_create(session, key, allocsize));
+ WT_ERR(ret);
}
WT_ERR_NOTFOUND_OK(ret, false);
@@ -339,7 +341,7 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep)
*valuep = NULL;
/* Require single-threading. */
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TURTLE));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TURTLE));
/*
* Open the turtle file; there's one case where we won't find the turtle file, yet still
@@ -404,7 +406,7 @@ __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value)
conn = S2C(session);
/* Require single-threading. */
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TURTLE));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TURTLE));
/*
* Create the turtle setup file: we currently re-write it from scratch every time.
diff --git a/src/third_party/wiredtiger/src/os_common/os_fhandle.c b/src/third_party/wiredtiger/src/os_common/os_fhandle.c
index 18024f50ee3..f39fbd599e7 100644
--- a/src/third_party/wiredtiger/src/os_common/os_fhandle.c
+++ b/src/third_party/wiredtiger/src/os_common/os_fhandle.c
@@ -215,7 +215,7 @@ __wt_open(WT_SESSION_IMPL *session, const char *name, WT_FS_OPEN_FILE_TYPE file_
*fhp = NULL;
conn = S2C(session);
- file_system = conn->file_system;
+ file_system = __wt_fs_file_system(session);
fh = NULL;
open_called = false;
path = NULL;
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_dictionary.c b/src/third_party/wiredtiger/src/reconcile/rec_dictionary.c
index 93c748e54bc..2e6c6f832f6 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_dictionary.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_dictionary.c
@@ -156,7 +156,7 @@ __wt_rec_dictionary_lookup(
*dpp = NULL;
/* Search the dictionary, and return any match we find. */
- hash = __wt_hash_fnv64(val->buf.data, val->buf.size);
+ hash = __wt_hash_city64(val->buf.data, val->buf.size);
for (dp = __rec_dictionary_skip_search(r->dictionary_head, hash);
dp != NULL && dp->hash == hash; dp = dp->next[0]) {
WT_RET(
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
index 0848660c455..d1e4d909b50 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
@@ -216,8 +216,10 @@ __rec_need_save_upd(
if (F_ISSET(r, WT_REC_CHECKPOINT) && upd_select->upd == NULL)
return (false);
- return (!__wt_txn_tw_stop_visible_all(session, &upd_select->tw) &&
- !__wt_txn_tw_start_visible_all(session, &upd_select->tw));
+ if (WT_TIME_WINDOW_HAS_STOP(&upd_select->tw))
+ return (!__wt_txn_tw_stop_visible_all(session, &upd_select->tw));
+ else
+ return (!__wt_txn_tw_start_visible_all(session, &upd_select->tw));
}
/*
diff --git a/src/third_party/wiredtiger/src/schema/schema_create.c b/src/third_party/wiredtiger/src/schema/schema_create.c
index de019758a1b..bc76391db43 100644
--- a/src/third_party/wiredtiger/src/schema/schema_create.c
+++ b/src/third_party/wiredtiger/src/schema/schema_create.c
@@ -97,6 +97,29 @@ err:
}
/*
+ * __create_file_block_manager --
+ * Create a new file in the block manager, and track it.
+ */
+static int
+__create_file_block_manager(
+ WT_SESSION_IMPL *session, const char *uri, const char *filename, uint32_t allocsize)
+{
+ WT_RET(__wt_block_manager_create(session, filename, allocsize));
+
+ /*
+ * Track the creation of this file.
+ *
+ * If something down the line fails, we're going to need to roll this back. Specifically do NOT
+ * track the op in the import case since we do not want to wipe a data file just because we fail
+ * to import it.
+ */
+ if (WT_META_TRACKING(session))
+ WT_RET(__wt_meta_track_fileop(session, NULL, uri));
+
+ return (0);
+}
+
+/*
* __create_file --
* Create a new 'file:' object.
*/
@@ -189,29 +212,19 @@ __create_file(
uri);
}
}
- } else {
+ } else
/* Create the file. */
- WT_ERR(__wt_block_manager_create(session, filename, allocsize));
-
- /*
- * Track the creation of this file.
- *
- * If something down the line fails, we're going to need to roll this back. Specifically do
- * NOT track the op in the import case since we do not want to wipe a data file just because
- * we fail to import it.
- */
- if (WT_META_TRACKING(session))
- WT_ERR(__wt_meta_track_fileop(session, NULL, uri));
- }
+ WT_ERR(__create_file_block_manager(session, uri, filename, allocsize));
/*
- * If creating an ordinary file, append the file ID and current version numbers to the passed-in
- * configuration and insert the resulting configuration into the metadata.
+ * If creating an ordinary file, update the file ID and current version numbers and strip the
+ * incremental backup information and checkpoint LSN from the extracted metadata.
*/
if (!is_metadata) {
if (!import_repair) {
WT_ERR(__wt_scr_alloc(session, 0, &val));
- WT_ERR(__wt_buf_fmt(session, val, "id=%" PRIu32 ",version=(major=%d,minor=%d)",
+ WT_ERR(__wt_buf_fmt(session, val,
+ "id=%" PRIu32 ",version=(major=%d,minor=%d),checkpoint_backup_info=,checkpoint_lsn=",
++S2C(session)->next_file_id, WT_BTREE_MAJOR_VERSION_MAX,
WT_BTREE_MINOR_VERSION_MAX));
for (p = filecfg; *p != NULL; ++p)
@@ -272,10 +285,20 @@ __wt_schema_colgroup_source(
prefix = cval.str;
len = cval.len;
suffix = "";
- } else {
+ } else if ((S2C(session)->bstorage == NULL) ||
+ ((ret = __wt_config_getones(session, config, "tiered_storage.name", &cval)) == 0 &&
+ cval.len != 0 && WT_STRING_MATCH("none", cval.str, cval.len))) {
+ /*
+ * If we're using tiered storage, the default is not file unless the user explicitly turns
+ * off using tiered storage for this create. Otherwise the default prefix is tiered.
+ */
prefix = "file";
len = strlen(prefix);
suffix = ".wt";
+ } else {
+ prefix = "tiered";
+ len = strlen(prefix);
+ suffix = "";
}
WT_RET_NOTFOUND_OK(ret);
@@ -676,7 +699,7 @@ __create_table(
cgname = filename = NULL;
table = NULL;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE));
tablename = uri;
WT_PREFIX_SKIP_REQUIRED(session, tablename, "table:");
@@ -768,24 +791,51 @@ err:
}
/*
+ * __create_object --
+ * Create a tiered object for the given name.
+ */
+static int
+__create_object(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const char *config)
+{
+ WT_UNUSED(exclusive);
+ WT_RET(__wt_metadata_insert(session, uri, config));
+ return (0);
+}
+
+/*
+ * __wt_tiered_tree_create --
+ * Create a tiered tree structure for the given name.
+ */
+int
+__wt_tiered_tree_create(
+ WT_SESSION_IMPL *session, const char *uri, bool exclusive, bool import, const char *config)
+{
+ WT_UNUSED(exclusive);
+ WT_UNUSED(import);
+ WT_RET(__wt_metadata_insert(session, uri, config));
+ return (0);
+}
+
+/*
* __create_tiered --
* Create a tiered tree structure for the given name.
*/
static int
__create_tiered(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const char *config)
{
- WT_CONFIG cparser;
- WT_CONFIG_ITEM ckey, cval, tierconf;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_ITEM(tmp);
WT_DECL_RET;
- int ntiers;
+ WT_TIERED *tiered;
char *meta_value;
- const char *cfg[] = {WT_CONFIG_BASE(session, tiered_meta), config, NULL};
+ const char *cfg[5] = {WT_CONFIG_BASE(session, tiered_meta), NULL, NULL, NULL, NULL};
const char *metadata;
+ conn = S2C(session);
metadata = NULL;
- ntiers = 0;
+ tiered = NULL;
- /* If it can be opened, it already exists. */
+ /* Check if the tiered table already exists. */
if ((ret = __wt_metadata_search(session, uri, &meta_value)) != WT_NOTFOUND) {
if (exclusive)
WT_TRET(EEXIST);
@@ -793,23 +843,34 @@ __create_tiered(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const
}
WT_RET_NOTFOUND_OK(ret);
- /* A tiered cursor must specify at least one underlying table */
- WT_RET(__wt_config_gets(session, cfg, "tiered.tiers", &tierconf));
- __wt_config_subinit(session, &cparser, &tierconf);
-
- while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0)
- ++ntiers;
- WT_RET_NOTFOUND_OK(ret);
-
- if (ntiers == 0)
- WT_RET_MSG(session, EINVAL, "tiered table must specify at least one tier");
-
- if (!F_ISSET(S2C(session), WT_CONN_READONLY)) {
+ /*
+ * We're creating a tiered table. Set the initial tiers list to empty. Opening the table will
+ * cause us to create our first file or tiered object.
+ */
+ if (!F_ISSET(conn, WT_CONN_READONLY)) {
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ /*
+ * By default use the connection level bucket and prefix. Then we add in any user
+ * configuration that may override the system one.
+ */
+ WT_ERR(__wt_buf_fmt(session, tmp, ",tiered_storage=(bucket=%s,bucket_prefix=%s)",
+ conn->bstorage->bucket, conn->bstorage->bucket_prefix));
+ cfg[1] = tmp->data;
+ cfg[2] = config;
+ cfg[3] = "tiers=()";
WT_ERR(__wt_config_merge(session, cfg, NULL, &metadata));
WT_ERR(__wt_metadata_insert(session, uri, metadata));
}
+ WT_ERR(__wt_schema_get_tiered_uri(session, uri, WT_DHANDLE_EXCLUSIVE, &tiered));
+ if (WT_META_TRACKING(session)) {
+ WT_WITH_DHANDLE(session, &tiered->iface, ret = __wt_meta_track_handle_lock(session, true));
+ WT_ERR(ret);
+ tiered = NULL;
+ }
err:
+ WT_TRET(__wt_schema_release_tiered(session, &tiered));
+ __wt_scr_free(session, &tmp);
__wt_free(session, meta_value);
__wt_free(session, metadata);
return (ret);
@@ -880,8 +941,12 @@ __schema_create(WT_SESSION_IMPL *session, const char *uri, const char *config)
ret = __wt_lsm_tree_create(session, uri, exclusive, config);
else if (WT_PREFIX_MATCH(uri, "index:"))
ret = __create_index(session, uri, exclusive, config);
+ else if (WT_PREFIX_MATCH(uri, "object:"))
+ ret = __create_object(session, uri, exclusive, config);
else if (WT_PREFIX_MATCH(uri, "table:"))
ret = __create_table(session, uri, exclusive, import, config);
+ else if (WT_PREFIX_MATCH(uri, "tier:"))
+ ret = __wt_tiered_tree_create(session, uri, exclusive, import, config);
else if (WT_PREFIX_MATCH(uri, "tiered:"))
ret = __create_tiered(session, uri, exclusive, config);
else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL)
diff --git a/src/third_party/wiredtiger/src/schema/schema_drop.c b/src/third_party/wiredtiger/src/schema/schema_drop.c
index d5f4264ebc8..8c6226b2dec 100644
--- a/src/third_party/wiredtiger/src/schema/schema_drop.c
+++ b/src/third_party/wiredtiger/src/schema/schema_drop.c
@@ -56,7 +56,7 @@ __drop_colgroup(WT_SESSION_IMPL *session, const char *uri, bool force, const cha
WT_DECL_RET;
WT_TABLE *table;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE));
/* If we can get the colgroup, detach it from the table. */
if ((ret = __wt_schema_get_colgroup(session, uri, force, &table, &colgroup)) == 0) {
@@ -102,7 +102,7 @@ __drop_table(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
const char *name;
bool tracked;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE));
name = uri;
WT_PREFIX_SKIP_REQUIRED(session, name, "table:");
@@ -186,9 +186,10 @@ __drop_tiered(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
tiered = (WT_TIERED *)session->dhandle;
/* Drop the tiers. */
- for (i = 0; i < tiered->ntiers; i++) {
- tier = tiered->tiers[i];
- WT_ERR(__wt_schema_drop(session, tier->name, cfg));
+ for (i = 0; i < WT_TIERED_MAX_TIERS; i++) {
+ tier = tiered->tiers[i].tier;
+ if (tier != NULL)
+ WT_ERR(__wt_schema_drop(session, tier->name, cfg));
}
ret = __wt_metadata_remove(session, uri);
diff --git a/src/third_party/wiredtiger/src/schema/schema_list.c b/src/third_party/wiredtiger/src/schema/schema_list.c
index 341b5e2a65d..3d901b99425 100644
--- a/src/third_party/wiredtiger/src/schema/schema_list.c
+++ b/src/third_party/wiredtiger/src/schema/schema_list.c
@@ -9,6 +9,58 @@
#include "wt_internal.h"
/*
+ * __schema_get_tiered_uri --
+ * Get the tiered handle for the named table. This function overwrites the dhandle.
+ */
+static int
+__schema_get_tiered_uri(
+ WT_SESSION_IMPL *session, const char *uri, uint32_t flags, WT_TIERED **tieredp)
+{
+ WT_DECL_RET;
+ WT_TIERED *tiered;
+
+ *tieredp = NULL;
+
+ WT_ERR(__wt_session_get_dhandle(session, uri, NULL, NULL, flags));
+ tiered = (WT_TIERED *)session->dhandle;
+ *tieredp = tiered;
+err:
+ return (ret);
+}
+/*
+ * __wt_schema_get_tiered_uri --
+ * Get the tiered handle for the named table.
+ */
+int
+__wt_schema_get_tiered_uri(
+ WT_SESSION_IMPL *session, const char *uri, uint32_t flags, WT_TIERED **tieredp)
+{
+ WT_DECL_RET;
+
+ WT_SAVE_DHANDLE(session, ret = __schema_get_tiered_uri(session, uri, flags, tieredp));
+ return (ret);
+}
+
+/*
+ * __wt_schema_release_tiered --
+ * Release a tiered handle.
+ */
+int
+__wt_schema_release_tiered(WT_SESSION_IMPL *session, WT_TIERED **tieredp)
+{
+ WT_DECL_RET;
+ WT_TIERED *tiered;
+
+ if ((tiered = *tieredp) == NULL)
+ return (0);
+ *tieredp = NULL;
+
+ WT_WITH_DHANDLE(session, &tiered->iface, ret = __wt_session_release_dhandle(session));
+
+ return (ret);
+}
+
+/*
* __wt_schema_get_table_uri --
* Get the table handle for the named table.
*/
@@ -165,7 +217,8 @@ __wt_schema_close_table(WT_SESSION_IMPL *session, WT_TABLE *table)
table->idx_alloc = 0;
WT_ASSERT(session,
- F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE) || F_ISSET(S2C(session), WT_CONN_CLOSING));
+ FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE) ||
+ F_ISSET(S2C(session), WT_CONN_CLOSING));
table->cg_complete = table->idx_complete = false;
return (ret);
diff --git a/src/third_party/wiredtiger/src/schema/schema_open.c b/src/third_party/wiredtiger/src/schema/schema_open.c
index 8202607fb74..d75d1b2f121 100644
--- a/src/third_party/wiredtiger/src/schema/schema_open.c
+++ b/src/third_party/wiredtiger/src/schema/schema_open.c
@@ -42,7 +42,7 @@ __wt_schema_open_colgroups(WT_SESSION_IMPL *session, WT_TABLE *table)
u_int i;
char *cgconfig;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE));
if (table->cg_complete)
return (0);
@@ -406,7 +406,7 @@ __schema_open_table(WT_SESSION_IMPL *session)
table_cfg = table->iface.cfg;
tablename = table->iface.name;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE));
WT_RET(__wt_config_gets(session, table_cfg, "columns", &cval));
WT_RET(__wt_config_gets(session, table_cfg, "key_format", &cval));
diff --git a/src/third_party/wiredtiger/src/schema/schema_truncate.c b/src/third_party/wiredtiger/src/schema/schema_truncate.c
index 35e13389373..6dbeb264d80 100644
--- a/src/third_party/wiredtiger/src/schema/schema_truncate.c
+++ b/src/third_party/wiredtiger/src/schema/schema_truncate.c
@@ -52,9 +52,12 @@ __truncate_tiered(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
WT_STAT_DATA_INCR(session, cursor_truncate);
- /* Truncate the column groups. */
- for (i = 0; i < tiered->ntiers; i++)
- WT_ERR(__wt_schema_truncate(session, tiered->tiers[i]->name, cfg));
+ /* Truncate the tiered entries. */
+ for (i = 0; i < WT_TIERED_MAX_TIERS; i++) {
+ if (tiered->tiers[i].tier == NULL)
+ continue;
+ WT_ERR(__wt_schema_truncate(session, tiered->tiers[i].name, cfg));
+ }
err:
WT_TRET(__wt_session_release_dhandle(session));
diff --git a/src/third_party/wiredtiger/src/schema/schema_util.c b/src/third_party/wiredtiger/src/schema/schema_util.c
index 1687d51eac4..eb342f32a5a 100644
--- a/src/third_party/wiredtiger/src/schema/schema_util.c
+++ b/src/third_party/wiredtiger/src/schema/schema_util.c
@@ -87,8 +87,8 @@ __wt_schema_internal_session(WT_SESSION_IMPL *session, WT_SESSION_IMPL **int_ses
if (F_ISSET(session->txn, WT_TXN_RUNNING)) {
/* We should not have a schema txn running now. */
WT_ASSERT(session, !F_ISSET(session, WT_SESSION_SCHEMA_TXN));
- WT_RET(
- __wt_open_internal_session(S2C(session), "schema", true, session->flags, int_sessionp));
+ WT_RET(__wt_open_internal_session(
+ S2C(session), "schema", true, session->flags, session->lock_flags, int_sessionp));
}
return (0);
}
diff --git a/src/third_party/wiredtiger/src/schema/schema_worker.c b/src/third_party/wiredtiger/src/schema/schema_worker.c
index 12c1d453742..c8c8ecd7558 100644
--- a/src/third_party/wiredtiger/src/schema/schema_worker.c
+++ b/src/third_party/wiredtiger/src/schema/schema_worker.c
@@ -59,8 +59,10 @@ __wt_schema_tiered_worker(WT_SESSION_IMPL *session, const char *uri,
WT_RET(__wt_session_get_dhandle(session, uri, NULL, NULL, open_flags));
tiered = (WT_TIERED *)session->dhandle;
- for (i = 0; i < tiered->ntiers; i++) {
- dhandle = tiered->tiers[i];
+ for (i = 0; i < WT_TIERED_MAX_TIERS; i++) {
+ dhandle = tiered->tiers[i].tier;
+ if (dhandle == NULL)
+ continue;
WT_SAVE_DHANDLE(session,
ret = __wt_schema_worker(session, dhandle->name, file_func, name_func, cfg, open_flags));
WT_ERR(ret);
@@ -142,7 +144,7 @@ __wt_schema_worker(WT_SESSION_IMPL *session, const char *uri,
* checkpoints, do not. Opening indexes requires the handle write lock, so check whether
* that lock is held when deciding what to do.
*/
- if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE))
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE))
WT_ERR(__wt_schema_open_indices(session, table));
for (i = 0; i < table->nindices; i++) {
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index cb4e12df887..338df444cd2 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -429,7 +429,7 @@ err:
*/
static int
__session_open_cursor_int(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
- WT_CURSOR *other, const char *cfg[], WT_CURSOR **cursorp)
+ WT_CURSOR *other, const char *cfg[], uint64_t hash_value, WT_CURSOR **cursorp)
{
WT_COLGROUP *colgroup;
WT_DATA_SOURCE *dsrc;
@@ -528,6 +528,9 @@ __session_open_cursor_int(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *
*cursorp = NULL;
}
+ if (*cursorp != NULL)
+ (*cursorp)->uri_hash = hash_value;
+
return (ret);
}
@@ -540,18 +543,22 @@ __wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, co
WT_CURSOR **cursorp)
{
WT_DECL_RET;
+ uint64_t hash_value;
+
+ hash_value = 0;
/* We should not open other cursors when there are open history store cursors in the session. */
WT_ASSERT(session, strcmp(uri, WT_HS_URI) == 0 || session->hs_cursor_counter == 0);
/* We do not cache any subordinate tables/files cursors. */
if (owner == NULL) {
- if ((ret = __wt_cursor_cache_get(session, uri, NULL, cfg, cursorp)) == 0)
+ __wt_cursor_get_hash(session, uri, NULL, &hash_value);
+ if ((ret = __wt_cursor_cache_get(session, uri, hash_value, NULL, cfg, cursorp)) == 0)
return (0);
WT_RET_NOTFOUND_OK(ret);
}
- return (__session_open_cursor_int(session, uri, owner, NULL, cfg, cursorp));
+ return (__session_open_cursor_int(session, uri, owner, NULL, cfg, hash_value, cursorp));
}
/*
@@ -565,10 +572,11 @@ __session_open_cursor(WT_SESSION *wt_session, const char *uri, WT_CURSOR *to_dup
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
+ uint64_t hash_value;
bool dup_backup, statjoin;
cursor = *cursorp = NULL;
-
+ hash_value = 0;
dup_backup = false;
session = (WT_SESSION_IMPL *)wt_session;
SESSION_API_CALL(session, open_cursor, config, cfg);
@@ -579,7 +587,8 @@ __session_open_cursor(WT_SESSION *wt_session, const char *uri, WT_CURSOR *to_dup
WT_ERR_MSG(session, EINVAL,
"should be passed either a URI or a cursor to duplicate, but not both");
- if ((ret = __wt_cursor_cache_get(session, uri, to_dup, cfg, &cursor)) == 0)
+ __wt_cursor_get_hash(session, uri, to_dup, &hash_value);
+ if ((ret = __wt_cursor_cache_get(session, uri, hash_value, to_dup, cfg, &cursor)) == 0)
goto done;
/*
@@ -600,8 +609,11 @@ __session_open_cursor(WT_SESSION *wt_session, const char *uri, WT_CURSOR *to_dup
}
}
+ if (config != NULL && (WT_PREFIX_MATCH(uri, "backup:") || to_dup != NULL))
+ __wt_verbose(session, WT_VERB_BACKUP, "Backup cursor config \"%s\"", config);
+
WT_ERR(__session_open_cursor_int(
- session, uri, NULL, statjoin || dup_backup ? to_dup : NULL, cfg, &cursor));
+ session, uri, NULL, statjoin || dup_backup ? to_dup : NULL, cfg, hash_value, &cursor));
done:
if (to_dup != NULL && !statjoin && !dup_backup)
@@ -2142,7 +2154,7 @@ __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, con
* caller decline this work.
*/
if (open_metadata) {
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
+ WT_ASSERT(session, !FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA));
if ((ret = __wt_metadata_cursor(session, NULL)) != 0) {
WT_TRET(__wt_session_close_internal(session));
return (ret);
@@ -2159,7 +2171,7 @@ __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, con
*/
int
__wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, bool open_metadata,
- uint32_t session_flags, WT_SESSION_IMPL **sessionp)
+ uint32_t session_flags, uint32_t session_lock_flags, WT_SESSION_IMPL **sessionp)
{
WT_SESSION_IMPL *session;
@@ -2175,6 +2187,7 @@ __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, bool open
* during close. Set a flag to avoid this: internal sessions are not closed automatically.
*/
F_SET(session, session_flags | WT_SESSION_INTERNAL);
+ FLD_SET(session->lock_flags, session_lock_flags);
*sessionp = session;
return (0);
diff --git a/src/third_party/wiredtiger/src/session/session_dhandle.c b/src/third_party/wiredtiger/src/session/session_dhandle.c
index 502610d434b..98e58f56aea 100644
--- a/src/third_party/wiredtiger/src/session/session_dhandle.c
+++ b/src/third_party/wiredtiger/src/session/session_dhandle.c
@@ -494,7 +494,7 @@ __wt_session_get_dhandle(WT_SESSION_IMPL *session, const char *uri, const char *
* handles in the meantime. A combination of the schema and handle list locks are used to
* enforce this.
*/
- if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) {
+ if (!FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA)) {
dhandle->excl_session = NULL;
dhandle->excl_ref = 0;
F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE);
diff --git a/src/third_party/wiredtiger/src/support/modify.c b/src/third_party/wiredtiger/src/support/modify.c
index 8e5d955f94d..390e7d2acd7 100644
--- a/src/third_party/wiredtiger/src/support/modify.c
+++ b/src/third_party/wiredtiger/src/support/modify.c
@@ -437,108 +437,6 @@ err:
}
/*
- * __wt_modify_vector_init --
- * Initialize a modify vector.
- */
-void
-__wt_modify_vector_init(WT_SESSION_IMPL *session, WT_MODIFY_VECTOR *modifies)
-{
- WT_CLEAR(*modifies);
- modifies->session = session;
- modifies->listp = modifies->list;
-}
-
-/*
- * __wt_modify_vector_push --
- * Push a modify update pointer to a modify vector. If we exceed the allowed stack space in the
- * vector, we'll be doing malloc here.
- */
-int
-__wt_modify_vector_push(WT_MODIFY_VECTOR *modifies, WT_UPDATE *upd)
-{
- WT_DECL_RET;
- bool migrate_from_stack;
-
- migrate_from_stack = false;
-
- if (modifies->size >= WT_MODIFY_VECTOR_STACK_SIZE) {
- if (modifies->allocated_bytes == 0 && modifies->size == WT_MODIFY_VECTOR_STACK_SIZE) {
- migrate_from_stack = true;
- modifies->listp = NULL;
- }
- WT_ERR(__wt_realloc_def(
- modifies->session, &modifies->allocated_bytes, modifies->size + 1, &modifies->listp));
- if (migrate_from_stack)
- memcpy(modifies->listp, modifies->list, sizeof(modifies->list));
- }
- modifies->listp[modifies->size++] = upd;
- return (0);
-
-err:
- /*
- * This only happens when we're migrating from the stack to the heap but failed to allocate. In
- * that case, point back to the stack allocated memory and set the allocation to zero to
- * indicate that we don't have heap memory to free.
- *
- * If we're already on the heap, we have nothing to do. The realloc call above won't touch the
- * list pointer unless allocation is successful and we won't have incremented the size yet.
- */
- if (modifies->listp == NULL) {
- WT_ASSERT(modifies->session, modifies->size == WT_MODIFY_VECTOR_STACK_SIZE);
- modifies->listp = modifies->list;
- modifies->allocated_bytes = 0;
- }
- return (ret);
-}
-
-/*
- * __wt_modify_vector_pop --
- * Pop an update pointer off a modify vector.
- */
-void
-__wt_modify_vector_pop(WT_MODIFY_VECTOR *modifies, WT_UPDATE **updp)
-{
- WT_ASSERT(modifies->session, modifies->size > 0);
-
- *updp = modifies->listp[--modifies->size];
-}
-
-/*
- * __wt_modify_vector_peek --
- * Peek an update pointer off a modify vector.
- */
-void
-__wt_modify_vector_peek(WT_MODIFY_VECTOR *modifies, WT_UPDATE **updp)
-{
- WT_ASSERT(modifies->session, modifies->size > 0);
-
- *updp = modifies->listp[modifies->size - 1];
-}
-
-/*
- * __wt_modify_vector_clear --
- * Clear a modify vector.
- */
-void
-__wt_modify_vector_clear(WT_MODIFY_VECTOR *modifies)
-{
- modifies->size = 0;
-}
-
-/*
- * __wt_modify_vector_free --
- * Free any resources associated with a modify vector. If we exceeded the allowed stack space on
- * the vector and had to fallback to dynamic allocations, we'll be doing a free here.
- */
-void
-__wt_modify_vector_free(WT_MODIFY_VECTOR *modifies)
-{
- if (modifies->allocated_bytes != 0)
- __wt_free(modifies->session, modifies->listp);
- __wt_modify_vector_init(modifies->session, modifies);
-}
-
-/*
* __wt_modify_reconstruct_from_upd_list --
* Takes an in-memory modify and populates an update value with the reconstructed full value.
*/
@@ -548,8 +446,8 @@ __wt_modify_reconstruct_from_upd_list(
{
WT_CURSOR *cursor;
WT_DECL_RET;
- WT_MODIFY_VECTOR modifies;
WT_TIME_WINDOW tw;
+ WT_UPDATE_VECTOR modifies;
WT_ASSERT(session, upd->type == WT_UPDATE_MODIFY);
@@ -560,7 +458,7 @@ __wt_modify_reconstruct_from_upd_list(
upd_value->tw.start_txn = upd->txnid;
/* Construct full update */
- __wt_modify_vector_init(session, &modifies);
+ __wt_update_vector_init(session, &modifies);
/* Find a complete update. */
for (; upd != NULL; upd = upd->next) {
if (upd->txnid == WT_TXN_ABORTED)
@@ -570,7 +468,7 @@ __wt_modify_reconstruct_from_upd_list(
break;
if (upd->type == WT_UPDATE_MODIFY)
- WT_ERR(__wt_modify_vector_push(&modifies, upd));
+ WT_ERR(__wt_update_vector_push(&modifies, upd));
}
/*
* If there's no full update, the base item is the on-page item. If the update is a tombstone,
@@ -599,11 +497,11 @@ __wt_modify_reconstruct_from_upd_list(
}
/* Once we have a base item, roll forward through any visible modify updates. */
while (modifies.size > 0) {
- __wt_modify_vector_pop(&modifies, &upd);
+ __wt_update_vector_pop(&modifies, &upd);
WT_ERR(__wt_modify_apply_item(session, cursor->value_format, &upd_value->buf, upd->data));
}
upd_value->type = WT_UPDATE_STANDARD;
err:
- __wt_modify_vector_free(&modifies);
+ __wt_update_vector_free(&modifies);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 499781e181e..d622d44589e 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -118,6 +118,7 @@ static const char *const __stats_dsrc_desc[] = {
"cache: bytes read into cache",
"cache: bytes written from cache",
"cache: checkpoint blocked page eviction",
+ "cache: checkpoint of history store file blocked non-history store page eviction",
"cache: eviction walk target pages histogram - 0-9",
"cache: eviction walk target pages histogram - 10-31",
"cache: eviction walk target pages histogram - 128 and higher",
@@ -136,8 +137,8 @@ static const char *const __stats_dsrc_desc[] = {
"cache: history store table insert calls",
"cache: history store table insert calls that returned restart",
"cache: history store table out-of-order resolved updates that lose their durable timestamp",
- "cache: history store table out-of-order updates that were fixed up by moving existing records",
- "cache: history store table out-of-order updates that were fixed up during insertion",
+ "cache: history store table out-of-order updates that were fixed up by reinserting with the "
+ "fixed timestamp",
"cache: history store table reads",
"cache: history store table reads missed",
"cache: history store table reads requiring squashed modifies",
@@ -146,8 +147,8 @@ static const char *const __stats_dsrc_desc[] = {
"cache: history store table truncation to remove an update",
"cache: history store table truncation to remove range of updates due to key being removed from "
"the data page during reconciliation",
- "cache: history store table truncation to remove range of updates due to non timestamped update "
- "on data page",
+ "cache: history store table truncation to remove range of updates due to out-of-order timestamp "
+ "update on data page",
"cache: history store table writes requiring squashed modifies",
"cache: in-memory page passed criteria to be split",
"cache: in-memory page splits",
@@ -174,6 +175,7 @@ static const char *const __stats_dsrc_desc[] = {
"cursor: Total number of entries skipped by cursor next calls",
"cursor: Total number of entries skipped by cursor prev calls",
"cursor: Total number of entries skipped to position the history store cursor",
+ "cursor: Total number of times a search near has exited due to prefix config",
"cursor: cursor next calls that skip due to a globally visible history store tombstone",
"cursor: cursor next calls that skip greater than or equal to 100 entries",
"cursor: cursor next calls that skip less than 100 entries",
@@ -372,6 +374,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->cache_bytes_read = 0;
stats->cache_bytes_write = 0;
stats->cache_eviction_checkpoint = 0;
+ stats->cache_eviction_blocked_checkpoint_hs = 0;
stats->cache_eviction_target_page_lt10 = 0;
stats->cache_eviction_target_page_lt32 = 0;
stats->cache_eviction_target_page_ge128 = 0;
@@ -390,8 +393,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->cache_hs_insert = 0;
stats->cache_hs_insert_restart = 0;
stats->cache_hs_order_lose_durable_timestamp = 0;
- stats->cache_hs_order_fixup_move = 0;
- stats->cache_hs_order_fixup_insert = 0;
+ stats->cache_hs_order_reinsert = 0;
stats->cache_hs_read = 0;
stats->cache_hs_read_miss = 0;
stats->cache_hs_read_squash = 0;
@@ -399,7 +401,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->cache_hs_key_truncate_rts = 0;
stats->cache_hs_key_truncate = 0;
stats->cache_hs_key_truncate_onpage_removal = 0;
- stats->cache_hs_key_truncate_non_ts = 0;
+ stats->cache_hs_order_remove = 0;
stats->cache_hs_write_squash = 0;
stats->cache_inmem_splittable = 0;
stats->cache_inmem_split = 0;
@@ -426,6 +428,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->cursor_next_skip_total = 0;
stats->cursor_prev_skip_total = 0;
stats->cursor_skip_hs_cur_position = 0;
+ stats->cursor_search_near_prefix_fast_paths = 0;
stats->cursor_next_hs_tombstone = 0;
stats->cursor_next_skip_ge_100 = 0;
stats->cursor_next_skip_lt_100 = 0;
@@ -610,6 +613,7 @@ __wt_stat_dsrc_aggregate_single(WT_DSRC_STATS *from, WT_DSRC_STATS *to)
to->cache_bytes_read += from->cache_bytes_read;
to->cache_bytes_write += from->cache_bytes_write;
to->cache_eviction_checkpoint += from->cache_eviction_checkpoint;
+ to->cache_eviction_blocked_checkpoint_hs += from->cache_eviction_blocked_checkpoint_hs;
to->cache_eviction_target_page_lt10 += from->cache_eviction_target_page_lt10;
to->cache_eviction_target_page_lt32 += from->cache_eviction_target_page_lt32;
to->cache_eviction_target_page_ge128 += from->cache_eviction_target_page_ge128;
@@ -628,8 +632,7 @@ __wt_stat_dsrc_aggregate_single(WT_DSRC_STATS *from, WT_DSRC_STATS *to)
to->cache_hs_insert += from->cache_hs_insert;
to->cache_hs_insert_restart += from->cache_hs_insert_restart;
to->cache_hs_order_lose_durable_timestamp += from->cache_hs_order_lose_durable_timestamp;
- to->cache_hs_order_fixup_move += from->cache_hs_order_fixup_move;
- to->cache_hs_order_fixup_insert += from->cache_hs_order_fixup_insert;
+ to->cache_hs_order_reinsert += from->cache_hs_order_reinsert;
to->cache_hs_read += from->cache_hs_read;
to->cache_hs_read_miss += from->cache_hs_read_miss;
to->cache_hs_read_squash += from->cache_hs_read_squash;
@@ -637,7 +640,7 @@ __wt_stat_dsrc_aggregate_single(WT_DSRC_STATS *from, WT_DSRC_STATS *to)
to->cache_hs_key_truncate_rts += from->cache_hs_key_truncate_rts;
to->cache_hs_key_truncate += from->cache_hs_key_truncate;
to->cache_hs_key_truncate_onpage_removal += from->cache_hs_key_truncate_onpage_removal;
- to->cache_hs_key_truncate_non_ts += from->cache_hs_key_truncate_non_ts;
+ to->cache_hs_order_remove += from->cache_hs_order_remove;
to->cache_hs_write_squash += from->cache_hs_write_squash;
to->cache_inmem_splittable += from->cache_inmem_splittable;
to->cache_inmem_split += from->cache_inmem_split;
@@ -664,6 +667,7 @@ __wt_stat_dsrc_aggregate_single(WT_DSRC_STATS *from, WT_DSRC_STATS *to)
to->cursor_next_skip_total += from->cursor_next_skip_total;
to->cursor_prev_skip_total += from->cursor_prev_skip_total;
to->cursor_skip_hs_cur_position += from->cursor_skip_hs_cur_position;
+ to->cursor_search_near_prefix_fast_paths += from->cursor_search_near_prefix_fast_paths;
to->cursor_next_hs_tombstone += from->cursor_next_hs_tombstone;
to->cursor_next_skip_ge_100 += from->cursor_next_skip_ge_100;
to->cursor_next_skip_lt_100 += from->cursor_next_skip_lt_100;
@@ -843,6 +847,8 @@ __wt_stat_dsrc_aggregate(WT_DSRC_STATS **from, WT_DSRC_STATS *to)
to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read);
to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write);
to->cache_eviction_checkpoint += WT_STAT_READ(from, cache_eviction_checkpoint);
+ to->cache_eviction_blocked_checkpoint_hs +=
+ WT_STAT_READ(from, cache_eviction_blocked_checkpoint_hs);
to->cache_eviction_target_page_lt10 += WT_STAT_READ(from, cache_eviction_target_page_lt10);
to->cache_eviction_target_page_lt32 += WT_STAT_READ(from, cache_eviction_target_page_lt32);
to->cache_eviction_target_page_ge128 += WT_STAT_READ(from, cache_eviction_target_page_ge128);
@@ -865,8 +871,7 @@ __wt_stat_dsrc_aggregate(WT_DSRC_STATS **from, WT_DSRC_STATS *to)
to->cache_hs_insert_restart += WT_STAT_READ(from, cache_hs_insert_restart);
to->cache_hs_order_lose_durable_timestamp +=
WT_STAT_READ(from, cache_hs_order_lose_durable_timestamp);
- to->cache_hs_order_fixup_move += WT_STAT_READ(from, cache_hs_order_fixup_move);
- to->cache_hs_order_fixup_insert += WT_STAT_READ(from, cache_hs_order_fixup_insert);
+ to->cache_hs_order_reinsert += WT_STAT_READ(from, cache_hs_order_reinsert);
to->cache_hs_read += WT_STAT_READ(from, cache_hs_read);
to->cache_hs_read_miss += WT_STAT_READ(from, cache_hs_read_miss);
to->cache_hs_read_squash += WT_STAT_READ(from, cache_hs_read_squash);
@@ -876,7 +881,7 @@ __wt_stat_dsrc_aggregate(WT_DSRC_STATS **from, WT_DSRC_STATS *to)
to->cache_hs_key_truncate += WT_STAT_READ(from, cache_hs_key_truncate);
to->cache_hs_key_truncate_onpage_removal +=
WT_STAT_READ(from, cache_hs_key_truncate_onpage_removal);
- to->cache_hs_key_truncate_non_ts += WT_STAT_READ(from, cache_hs_key_truncate_non_ts);
+ to->cache_hs_order_remove += WT_STAT_READ(from, cache_hs_order_remove);
to->cache_hs_write_squash += WT_STAT_READ(from, cache_hs_write_squash);
to->cache_inmem_splittable += WT_STAT_READ(from, cache_inmem_splittable);
to->cache_inmem_split += WT_STAT_READ(from, cache_inmem_split);
@@ -903,6 +908,8 @@ __wt_stat_dsrc_aggregate(WT_DSRC_STATS **from, WT_DSRC_STATS *to)
to->cursor_next_skip_total += WT_STAT_READ(from, cursor_next_skip_total);
to->cursor_prev_skip_total += WT_STAT_READ(from, cursor_prev_skip_total);
to->cursor_skip_hs_cur_position += WT_STAT_READ(from, cursor_skip_hs_cur_position);
+ to->cursor_search_near_prefix_fast_paths +=
+ WT_STAT_READ(from, cursor_search_near_prefix_fast_paths);
to->cursor_next_hs_tombstone += WT_STAT_READ(from, cursor_next_hs_tombstone);
to->cursor_next_skip_ge_100 += WT_STAT_READ(from, cursor_next_skip_ge_100);
to->cursor_next_skip_lt_100 += WT_STAT_READ(from, cursor_next_skip_lt_100);
@@ -1025,8 +1032,6 @@ static const char *const __stats_connection_desc[] = {
"cache: forced eviction - pages selected count",
"cache: forced eviction - pages selected unable to be evicted count",
"cache: forced eviction - pages selected unable to be evicted time",
- "cache: forced eviction - session returned rollback error while force evicting due to being "
- "oldest",
"cache: hazard pointer check calls",
"cache: hazard pointer check entries walked",
"cache: hazard pointer maximum array length",
@@ -1284,6 +1289,7 @@ static const char *const __stats_connection_desc[] = {
"transaction: rollback to stable pages visited",
"transaction: rollback to stable tree walk skipping pages",
"transaction: rollback to stable updates aborted",
+ "transaction: sessions scanned in each walk of concurrent sessions",
"transaction: set timestamp calls",
"transaction: set timestamp durable calls",
"transaction: set timestamp durable updates",
@@ -1293,6 +1299,7 @@ static const char *const __stats_connection_desc[] = {
"transaction: set timestamp stable updates",
"transaction: transaction begins",
"transaction: transaction checkpoint currently running",
+ "transaction: transaction checkpoint currently running for history store file",
"transaction: transaction checkpoint generation",
"transaction: transaction checkpoint history store file duration (usecs)",
"transaction: transaction checkpoint max time (msecs)",
@@ -1337,6 +1344,7 @@ static const char *const __stats_connection_desc[] = {
"cache: bytes read into cache",
"cache: bytes written from cache",
"cache: checkpoint blocked page eviction",
+ "cache: checkpoint of history store file blocked non-history store page eviction",
"cache: eviction walk target pages histogram - 0-9",
"cache: eviction walk target pages histogram - 10-31",
"cache: eviction walk target pages histogram - 128 and higher",
@@ -1355,8 +1363,8 @@ static const char *const __stats_connection_desc[] = {
"cache: history store table insert calls",
"cache: history store table insert calls that returned restart",
"cache: history store table out-of-order resolved updates that lose their durable timestamp",
- "cache: history store table out-of-order updates that were fixed up by moving existing records",
- "cache: history store table out-of-order updates that were fixed up during insertion",
+ "cache: history store table out-of-order updates that were fixed up by reinserting with the "
+ "fixed timestamp",
"cache: history store table reads",
"cache: history store table reads missed",
"cache: history store table reads requiring squashed modifies",
@@ -1365,8 +1373,8 @@ static const char *const __stats_connection_desc[] = {
"cache: history store table truncation to remove an update",
"cache: history store table truncation to remove range of updates due to key being removed from "
"the data page during reconciliation",
- "cache: history store table truncation to remove range of updates due to non timestamped update "
- "on data page",
+ "cache: history store table truncation to remove range of updates due to out-of-order timestamp "
+ "update on data page",
"cache: history store table writes requiring squashed modifies",
"cache: in-memory page passed criteria to be split",
"cache: in-memory page splits",
@@ -1393,6 +1401,7 @@ static const char *const __stats_connection_desc[] = {
"cursor: Total number of entries skipped by cursor next calls",
"cursor: Total number of entries skipped by cursor prev calls",
"cursor: Total number of entries skipped to position the history store cursor",
+ "cursor: Total number of times a search near has exited due to prefix config",
"cursor: cursor next calls that skip due to a globally visible history store tombstone",
"cursor: cursor next calls that skip greater than or equal to 100 entries",
"cursor: cursor next calls that skip less than 100 entries",
@@ -1544,7 +1553,6 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cache_eviction_force = 0;
stats->cache_eviction_force_fail = 0;
stats->cache_eviction_force_fail_time = 0;
- stats->cache_eviction_force_rollback = 0;
stats->cache_hazard_checks = 0;
stats->cache_hazard_walks = 0;
stats->cache_hazard_max = 0;
@@ -1800,6 +1808,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->txn_rts_pages_visited = 0;
stats->txn_rts_tree_walk_skip_pages = 0;
stats->txn_rts_upd_aborted = 0;
+ stats->txn_sessions_walked = 0;
stats->txn_set_ts = 0;
stats->txn_set_ts_durable = 0;
stats->txn_set_ts_durable_upd = 0;
@@ -1809,6 +1818,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->txn_set_ts_stable_upd = 0;
stats->txn_begin = 0;
/* not clearing txn_checkpoint_running */
+ /* not clearing txn_checkpoint_running_hs */
/* not clearing txn_checkpoint_generation */
stats->txn_hs_ckpt_duration = 0;
/* not clearing txn_checkpoint_time_max */
@@ -1852,6 +1862,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cache_bytes_read = 0;
stats->cache_bytes_write = 0;
stats->cache_eviction_checkpoint = 0;
+ stats->cache_eviction_blocked_checkpoint_hs = 0;
stats->cache_eviction_target_page_lt10 = 0;
stats->cache_eviction_target_page_lt32 = 0;
stats->cache_eviction_target_page_ge128 = 0;
@@ -1870,8 +1881,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cache_hs_insert = 0;
stats->cache_hs_insert_restart = 0;
stats->cache_hs_order_lose_durable_timestamp = 0;
- stats->cache_hs_order_fixup_move = 0;
- stats->cache_hs_order_fixup_insert = 0;
+ stats->cache_hs_order_reinsert = 0;
stats->cache_hs_read = 0;
stats->cache_hs_read_miss = 0;
stats->cache_hs_read_squash = 0;
@@ -1879,7 +1889,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cache_hs_key_truncate_rts = 0;
stats->cache_hs_key_truncate = 0;
stats->cache_hs_key_truncate_onpage_removal = 0;
- stats->cache_hs_key_truncate_non_ts = 0;
+ stats->cache_hs_order_remove = 0;
stats->cache_hs_write_squash = 0;
stats->cache_inmem_splittable = 0;
stats->cache_inmem_split = 0;
@@ -1906,6 +1916,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cursor_next_skip_total = 0;
stats->cursor_prev_skip_total = 0;
stats->cursor_skip_hs_cur_position = 0;
+ stats->cursor_search_near_prefix_fast_paths = 0;
stats->cursor_next_hs_tombstone = 0;
stats->cursor_next_skip_ge_100 = 0;
stats->cursor_next_skip_lt_100 = 0;
@@ -2036,7 +2047,6 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
to->cache_eviction_force += WT_STAT_READ(from, cache_eviction_force);
to->cache_eviction_force_fail += WT_STAT_READ(from, cache_eviction_force_fail);
to->cache_eviction_force_fail_time += WT_STAT_READ(from, cache_eviction_force_fail_time);
- to->cache_eviction_force_rollback += WT_STAT_READ(from, cache_eviction_force_rollback);
to->cache_hazard_checks += WT_STAT_READ(from, cache_hazard_checks);
to->cache_hazard_walks += WT_STAT_READ(from, cache_hazard_walks);
if ((v = WT_STAT_READ(from, cache_hazard_max)) > to->cache_hazard_max)
@@ -2309,6 +2319,7 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
to->txn_rts_pages_visited += WT_STAT_READ(from, txn_rts_pages_visited);
to->txn_rts_tree_walk_skip_pages += WT_STAT_READ(from, txn_rts_tree_walk_skip_pages);
to->txn_rts_upd_aborted += WT_STAT_READ(from, txn_rts_upd_aborted);
+ to->txn_sessions_walked += WT_STAT_READ(from, txn_sessions_walked);
to->txn_set_ts += WT_STAT_READ(from, txn_set_ts);
to->txn_set_ts_durable += WT_STAT_READ(from, txn_set_ts_durable);
to->txn_set_ts_durable_upd += WT_STAT_READ(from, txn_set_ts_durable_upd);
@@ -2318,6 +2329,7 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
to->txn_set_ts_stable_upd += WT_STAT_READ(from, txn_set_ts_stable_upd);
to->txn_begin += WT_STAT_READ(from, txn_begin);
to->txn_checkpoint_running += WT_STAT_READ(from, txn_checkpoint_running);
+ to->txn_checkpoint_running_hs += WT_STAT_READ(from, txn_checkpoint_running_hs);
to->txn_checkpoint_generation += WT_STAT_READ(from, txn_checkpoint_generation);
to->txn_hs_ckpt_duration += WT_STAT_READ(from, txn_hs_ckpt_duration);
to->txn_checkpoint_time_max += WT_STAT_READ(from, txn_checkpoint_time_max);
@@ -2364,6 +2376,8 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read);
to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write);
to->cache_eviction_checkpoint += WT_STAT_READ(from, cache_eviction_checkpoint);
+ to->cache_eviction_blocked_checkpoint_hs +=
+ WT_STAT_READ(from, cache_eviction_blocked_checkpoint_hs);
to->cache_eviction_target_page_lt10 += WT_STAT_READ(from, cache_eviction_target_page_lt10);
to->cache_eviction_target_page_lt32 += WT_STAT_READ(from, cache_eviction_target_page_lt32);
to->cache_eviction_target_page_ge128 += WT_STAT_READ(from, cache_eviction_target_page_ge128);
@@ -2386,8 +2400,7 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
to->cache_hs_insert_restart += WT_STAT_READ(from, cache_hs_insert_restart);
to->cache_hs_order_lose_durable_timestamp +=
WT_STAT_READ(from, cache_hs_order_lose_durable_timestamp);
- to->cache_hs_order_fixup_move += WT_STAT_READ(from, cache_hs_order_fixup_move);
- to->cache_hs_order_fixup_insert += WT_STAT_READ(from, cache_hs_order_fixup_insert);
+ to->cache_hs_order_reinsert += WT_STAT_READ(from, cache_hs_order_reinsert);
to->cache_hs_read += WT_STAT_READ(from, cache_hs_read);
to->cache_hs_read_miss += WT_STAT_READ(from, cache_hs_read_miss);
to->cache_hs_read_squash += WT_STAT_READ(from, cache_hs_read_squash);
@@ -2397,7 +2410,7 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
to->cache_hs_key_truncate += WT_STAT_READ(from, cache_hs_key_truncate);
to->cache_hs_key_truncate_onpage_removal +=
WT_STAT_READ(from, cache_hs_key_truncate_onpage_removal);
- to->cache_hs_key_truncate_non_ts += WT_STAT_READ(from, cache_hs_key_truncate_non_ts);
+ to->cache_hs_order_remove += WT_STAT_READ(from, cache_hs_order_remove);
to->cache_hs_write_squash += WT_STAT_READ(from, cache_hs_write_squash);
to->cache_inmem_splittable += WT_STAT_READ(from, cache_inmem_splittable);
to->cache_inmem_split += WT_STAT_READ(from, cache_inmem_split);
@@ -2424,6 +2437,8 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
to->cursor_next_skip_total += WT_STAT_READ(from, cursor_next_skip_total);
to->cursor_prev_skip_total += WT_STAT_READ(from, cursor_prev_skip_total);
to->cursor_skip_hs_cur_position += WT_STAT_READ(from, cursor_skip_hs_cur_position);
+ to->cursor_search_near_prefix_fast_paths +=
+ WT_STAT_READ(from, cursor_search_near_prefix_fast_paths);
to->cursor_next_hs_tombstone += WT_STAT_READ(from, cursor_next_hs_tombstone);
to->cursor_next_skip_ge_100 += WT_STAT_READ(from, cursor_next_skip_ge_100);
to->cursor_next_skip_lt_100 += WT_STAT_READ(from, cursor_next_skip_lt_100);
diff --git a/src/third_party/wiredtiger/src/support/thread_group.c b/src/third_party/wiredtiger/src/support/thread_group.c
index 7b70ef63906..6bac6d5450d 100644
--- a/src/third_party/wiredtiger/src/support/thread_group.c
+++ b/src/third_party/wiredtiger/src/support/thread_group.c
@@ -181,7 +181,7 @@ __thread_group_resize(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, uint32_t
/* Threads get their own session. */
session_flags = LF_ISSET(WT_THREAD_CAN_WAIT) ? WT_SESSION_CAN_WAIT : 0;
WT_ERR(
- __wt_open_internal_session(conn, group->name, false, session_flags, &thread->session));
+ __wt_open_internal_session(conn, group->name, false, session_flags, 0, &thread->session));
if (LF_ISSET(WT_THREAD_PANIC_FAIL))
F_SET(thread, WT_THREAD_PANIC_FAIL);
thread->id = i;
diff --git a/src/third_party/wiredtiger/src/support/update_vector.c b/src/third_party/wiredtiger/src/support/update_vector.c
new file mode 100644
index 00000000000..ef57b28f7f4
--- /dev/null
+++ b/src/third_party/wiredtiger/src/support/update_vector.c
@@ -0,0 +1,111 @@
+/*-
+ * Copyright (c) 2014-present MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __wt_update_vector_init --
+ * Initialize a update vector.
+ */
+void
+__wt_update_vector_init(WT_SESSION_IMPL *session, WT_UPDATE_VECTOR *updates)
+{
+ WT_CLEAR(*updates);
+ updates->session = session;
+ updates->listp = updates->list;
+}
+
+/*
+ * __wt_update_vector_push --
+ * Push a update pointer to a update vector. If we exceed the allowed stack space in the vector,
+ * we'll be doing malloc here.
+ */
+int
+__wt_update_vector_push(WT_UPDATE_VECTOR *updates, WT_UPDATE *upd)
+{
+ WT_DECL_RET;
+ bool migrate_from_stack;
+
+ migrate_from_stack = false;
+
+ if (updates->size >= WT_UPDATE_VECTOR_STACK_SIZE) {
+ if (updates->allocated_bytes == 0 && updates->size == WT_UPDATE_VECTOR_STACK_SIZE) {
+ migrate_from_stack = true;
+ updates->listp = NULL;
+ }
+ WT_ERR(__wt_realloc_def(
+ updates->session, &updates->allocated_bytes, updates->size + 1, &updates->listp));
+ if (migrate_from_stack)
+ memcpy(updates->listp, updates->list, sizeof(updates->list));
+ }
+ updates->listp[updates->size++] = upd;
+ return (0);
+
+err:
+ /*
+ * This only happens when we're migrating from the stack to the heap but failed to allocate. In
+ * that case, point back to the stack allocated memory and set the allocation to zero to
+ * indicate that we don't have heap memory to free.
+ *
+ * If we're already on the heap, we have nothing to do. The realloc call above won't touch the
+ * list pointer unless allocation is successful and we won't have incremented the size yet.
+ */
+ if (updates->listp == NULL) {
+ WT_ASSERT(updates->session, updates->size == WT_UPDATE_VECTOR_STACK_SIZE);
+ updates->listp = updates->list;
+ updates->allocated_bytes = 0;
+ }
+ return (ret);
+}
+
+/*
+ * __wt_update_vector_pop --
+ * Pop an update pointer off a update vector.
+ */
+void
+__wt_update_vector_pop(WT_UPDATE_VECTOR *updates, WT_UPDATE **updp)
+{
+ WT_ASSERT(updates->session, updates->size > 0);
+
+ *updp = updates->listp[--updates->size];
+}
+
+/*
+ * __wt_update_vector_peek --
+ * Peek an update pointer off a update vector.
+ */
+void
+__wt_update_vector_peek(WT_UPDATE_VECTOR *updates, WT_UPDATE **updp)
+{
+ WT_ASSERT(updates->session, updates->size > 0);
+
+ *updp = updates->listp[updates->size - 1];
+}
+
+/*
+ * __wt_update_vector_clear --
+ * Clear a update vector.
+ */
+void
+__wt_update_vector_clear(WT_UPDATE_VECTOR *updates)
+{
+ updates->size = 0;
+}
+
+/*
+ * __wt_update_vector_free --
+ * Free any resources associated with a update vector. If we exceeded the allowed stack space on
+ * the vector and had to fallback to dynamic allocations, we'll be doing a free here.
+ */
+void
+__wt_update_vector_free(WT_UPDATE_VECTOR *updates)
+{
+ if (updates->allocated_bytes != 0)
+ __wt_free(updates->session, updates->listp);
+ __wt_update_vector_init(updates->session, updates);
+}
diff --git a/src/third_party/wiredtiger/src/tiered/tiered_config.c b/src/third_party/wiredtiger/src/tiered/tiered_config.c
new file mode 100644
index 00000000000..23eb24131cc
--- /dev/null
+++ b/src/third_party/wiredtiger/src/tiered/tiered_config.c
@@ -0,0 +1,188 @@
+/*-
+ * Copyright (c) 2014-present MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __tiered_confchk --
+ * Check for a valid tiered storage source.
+ */
+static int
+__tiered_confchk(
+ WT_SESSION_IMPL *session, WT_CONFIG_ITEM *name, WT_NAMED_STORAGE_SOURCE **nstoragep)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_NAMED_STORAGE_SOURCE *nstorage;
+
+ *nstoragep = NULL;
+
+ if (name->len == 0 || WT_STRING_MATCH("none", name->str, name->len))
+ return (0);
+
+ conn = S2C(session);
+ TAILQ_FOREACH (nstorage, &conn->storagesrcqh, q)
+ if (WT_STRING_MATCH(nstorage->name, name->str, name->len)) {
+ *nstoragep = nstorage;
+ return (0);
+ }
+ WT_RET_MSG(session, EINVAL, "unknown storage source '%.*s'", (int)name->len, name->str);
+}
+
+/*
+ * __tiered_common_config --
+ * Parse configuration options common to connection and btrees.
+ */
+static int
+__tiered_common_config(WT_SESSION_IMPL *session, const char **cfg, WT_BUCKET_STORAGE *bstorage)
+{
+ WT_CONFIG_ITEM cval;
+
+ WT_RET(__wt_config_gets(session, cfg, "tiered_storage.local_retention", &cval));
+ bstorage->retain_secs = (uint64_t)cval.val;
+
+ WT_RET(__wt_config_gets(session, cfg, "tiered_storage.object_target_size", &cval));
+ bstorage->object_size = (uint64_t)cval.val;
+
+ return (0);
+}
+
+/*
+ * __wt_tiered_bucket_config --
+ * Given a configuration, (re)configure the bucket storage and return that structure.
+ */
+int
+__wt_tiered_bucket_config(
+ WT_SESSION_IMPL *session, const char *cfg[], WT_BUCKET_STORAGE **bstoragep)
+{
+ WT_BUCKET_STORAGE *bstorage, *new;
+ WT_CONFIG_ITEM auth, bucket, name, prefix;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_NAMED_STORAGE_SOURCE *nstorage;
+ WT_STORAGE_SOURCE *storage;
+ uint64_t hash_bucket, hash;
+
+ *bstoragep = NULL;
+
+ WT_RET(__wt_config_gets(session, cfg, "tiered_storage.name", &name));
+ bstorage = new = NULL;
+ conn = S2C(session);
+
+ __wt_spin_lock(session, &conn->storage_lock);
+
+ WT_ERR(__tiered_confchk(session, &name, &nstorage));
+ if (nstorage == NULL) {
+ WT_ERR(__wt_config_gets(session, cfg, "tiered_storage.bucket", &bucket));
+ if (bucket.len != 0)
+ WT_ERR_MSG(
+ session, EINVAL, "tiered_storage.bucket requires tiered_storage.name to be set");
+ goto done;
+ }
+ /*
+ * Check if tiered storage is set on the connection. If someone wants tiered storage on a table,
+ * it needs to be configured on the database as well.
+ */
+ if (conn->bstorage == NULL && bstoragep != &conn->bstorage)
+ WT_ERR_MSG(
+ session, EINVAL, "table tiered storage requires connection tiered storage to be set");
+ /* A bucket and bucket_prefix are required, auth_token is not. */
+ WT_ERR(__wt_config_gets(session, cfg, "tiered_storage.bucket", &bucket));
+ if (bucket.len == 0)
+ WT_ERR_MSG(session, EINVAL, "table tiered storage requires bucket to be set");
+ WT_ERR(__wt_config_gets(session, cfg, "tiered_storage.bucket_prefix", &prefix));
+ if (prefix.len == 0)
+ WT_ERR_MSG(session, EINVAL, "table tiered storage requires bucket_prefix to be set");
+ WT_ERR(__wt_config_gets(session, cfg, "tiered_storage.auth_token", &auth));
+
+ hash = __wt_hash_city64(bucket.str, bucket.len);
+ hash_bucket = hash & (conn->hash_size - 1);
+ TAILQ_FOREACH (bstorage, &nstorage->buckethashqh[hash_bucket], q) {
+ if (WT_STRING_MATCH(bstorage->bucket, bucket.str, bucket.len) &&
+ (WT_STRING_MATCH(bstorage->bucket_prefix, prefix.str, prefix.len))) {
+ *bstoragep = bstorage;
+ goto done;
+ }
+ }
+
+ WT_ERR(__wt_calloc_one(session, &new));
+ WT_ERR(__wt_strndup(session, auth.str, auth.len, &new->auth_token));
+ WT_ERR(__wt_strndup(session, bucket.str, bucket.len, &new->bucket));
+ WT_ERR(__wt_strndup(session, prefix.str, prefix.len, &new->bucket_prefix));
+
+ storage = nstorage->storage_source;
+ WT_ERR(storage->ss_customize_file_system(storage, &session->iface, new->bucket,
+ new->bucket_prefix, new->auth_token, NULL, &new->file_system));
+ new->storage_source = storage;
+
+ /* If we're creating a new bucket storage, parse the other settings into it. */
+ TAILQ_INSERT_HEAD(&nstorage->bucketqh, new, q);
+ TAILQ_INSERT_HEAD(&nstorage->buckethashqh[hash_bucket], new, hashq);
+ F_SET(new, WT_BUCKET_FREE);
+ WT_ERR(__tiered_common_config(session, cfg, new));
+ *bstoragep = new;
+
+done:
+ if (0) {
+err:
+ if (new != NULL) {
+ __wt_free(session, new->bucket);
+ __wt_free(session, new->bucket_prefix);
+ }
+ __wt_free(session, new);
+ }
+ __wt_spin_unlock(session, &conn->storage_lock);
+ return (ret);
+}
+
+/*
+ * __wt_tiered_conn_config --
+ * Parse and setup the storage server options for the connection.
+ */
+int
+__wt_tiered_conn_config(WT_SESSION_IMPL *session, const char **cfg, bool reconfig)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+
+ conn = S2C(session);
+
+ if (!reconfig)
+ WT_RET(__wt_tiered_bucket_config(session, cfg, &conn->bstorage));
+
+ /* If the connection is not set up for tiered storage there is nothing more to do. */
+ if (conn->bstorage == NULL)
+ return (0);
+ __wt_verbose(session, WT_VERB_TIERED, "TIERED_CONFIG: bucket %s", conn->bstorage->bucket);
+ __wt_verbose(
+ session, WT_VERB_TIERED, "TIERED_CONFIG: prefix %s", conn->bstorage->bucket_prefix);
+
+ /*
+ * If reconfiguring, see if the other settings have changed on the system bucket storage.
+ */
+ WT_ASSERT(session, conn->bstorage != NULL);
+ if (reconfig)
+ WT_ERR(__tiered_common_config(session, cfg, conn->bstorage));
+
+ WT_STAT_CONN_SET(session, tiered_object_size, conn->bstorage->object_size);
+ WT_STAT_CONN_SET(session, tiered_retention, conn->bstorage->retain_secs);
+
+ /*
+ * Set up the designated file system for the "none" bucket.
+ */
+ WT_ASSERT(session, conn->file_system != NULL);
+ conn->bstorage_none.file_system = conn->file_system;
+
+ return (0);
+
+err:
+ __wt_free(session, conn->bstorage->auth_token);
+ __wt_free(session, conn->bstorage->bucket);
+ __wt_free(session, conn->bstorage->bucket_prefix);
+ __wt_free(session, conn->bstorage);
+ return (ret);
+}
diff --git a/src/third_party/wiredtiger/src/tiered/tiered_cursor.c b/src/third_party/wiredtiger/src/tiered/tiered_cursor.c
index f0aa30d2023..db45db54f9e 100644
--- a/src/third_party/wiredtiger/src/tiered/tiered_cursor.c
+++ b/src/third_party/wiredtiger/src/tiered/tiered_cursor.c
@@ -8,9 +8,9 @@
#include "wt_internal.h"
-#define WT_FORALL_CURSORS(curtiered, c, i) \
- for ((i) = (curtiered)->tiered->ntiers; (i) > 0;) \
- if (((c) = (curtiered)->cursors[--(i)]) != NULL)
+#define WT_FORALL_CURSORS(curtiered, c, i) \
+ for ((i) = 0; i < WT_TIERED_MAX_TIERS;) \
+ if (((c) = (curtiered)->cursors[(i)++]) != NULL)
#define WT_TIERED_CURCMP(s, tiered, c1, c2, cmp) \
__wt_compare(s, (tiered)->collator, &(c1)->key, &(c2)->key, &(cmp))
@@ -34,8 +34,6 @@ __curtiered_open_cursors(WT_CURSOR_TIERED *curtiered)
dhandle = NULL;
tiered = curtiered->tiered;
- WT_ASSERT(session, tiered->ntiers > 0);
-
/*
* If the key is pointing to memory that is pinned by a tier cursor, take a copy before closing
* cursors.
@@ -46,14 +44,16 @@ __curtiered_open_cursors(WT_CURSOR_TIERED *curtiered)
F_CLR(curtiered, WT_CURTIERED_ITERATE_NEXT | WT_CURTIERED_ITERATE_PREV);
WT_ASSERT(session, curtiered->cursors == NULL);
- WT_ERR(__wt_calloc_def(session, tiered->ntiers, &curtiered->cursors));
+ WT_ERR(__wt_calloc_def(session, WT_TIERED_MAX_TIERS, &curtiered->cursors));
/* Open the cursors for tiers that have changed. */
__wt_verbose(session, WT_VERB_TIERED,
- "tiered opening cursor session(%p):tiered cursor(%p), tiers: %u", (void *)session,
- (void *)curtiered, tiered->ntiers);
- for (i = 0; i != tiered->ntiers; i++) {
- dhandle = tiered->tiers[i];
+ "tiered opening cursor session(%p):tiered cursor(%p), tiers: %d", (void *)session,
+ (void *)curtiered, (int)WT_TIERED_MAX_TIERS);
+ for (i = 0; i < WT_TIERED_MAX_TIERS; i++) {
+ dhandle = tiered->tiers[i].tier;
+ if (dhandle == NULL)
+ continue;
/*
* Read from the checkpoint if the file has been written. Once all cursors switch, the
@@ -87,7 +87,7 @@ __curtiered_close_cursors(WT_SESSION_IMPL *session, WT_CURSOR_TIERED *curtiered)
return (0);
/* Walk the cursors, closing them. */
- for (i = 0; i < curtiered->tiered->ntiers; i++) {
+ for (i = 0; i < WT_TIERED_MAX_TIERS; i++) {
if ((c = (curtiered)->cursors[i]) != NULL) {
curtiered->cursors[i] = NULL;
WT_RET(c->close(c));
@@ -795,15 +795,13 @@ __curtiered_put(WT_CURSOR_TIERED *curtiered, const WT_ITEM *key, const WT_ITEM *
bool position, bool reserve)
{
WT_CURSOR *primary;
- WT_TIERED *tiered;
-
- tiered = curtiered->tiered;
+ int (*func)(WT_CURSOR *);
/*
* Clear the existing cursor position. Don't clear the primary cursor: we're about to use it
* anyway.
*/
- primary = curtiered->cursors[tiered->ntiers - 1];
+ primary = curtiered->cursors[WT_TIERED_INDEX_LOCAL];
WT_RET(__curtiered_reset_cursors(curtiered, primary));
/* If necessary, set the position for future scans. */
@@ -811,14 +809,15 @@ __curtiered_put(WT_CURSOR_TIERED *curtiered, const WT_ITEM *key, const WT_ITEM *
curtiered->current = primary;
primary->set_key(primary, key);
- if (reserve) {
- WT_RET(primary->reserve(primary));
- } else {
- primary->set_value(primary, value);
- WT_RET(primary->insert(primary));
- }
- return (0);
+ /* Our API always leaves the cursor positioned after a reserve call. */
+ WT_ASSERT(CUR2S(curtiered), !reserve || position);
+ func = primary->insert;
+ if (position)
+ func = reserve ? primary->reserve : primary->update;
+ if (!reserve)
+ primary->set_value(primary, value);
+ return (func(primary));
}
/*
@@ -1010,21 +1009,6 @@ err:
}
/*
- * __curtiered_random_tier --
- * Pick a tier at random, weighted by the size of all tiers. Weighting proportional to documents
- * avoids biasing towards small tiers. Then return the cursor on the tier we have picked.
- */
-static void
-__curtiered_random_tier(WT_SESSION_IMPL *session, WT_CURSOR_TIERED *curtiered, WT_CURSOR **cursor)
-{
- u_int i;
-
- /* TODO: make randomness respect tree size. */
- i = __wt_random(&session->rnd) % curtiered->tiered->ntiers;
- *cursor = curtiered->cursors[i];
-}
-
-/*
* __curtiered_next_random --
* WT_CURSOR->next method for the tiered cursor type when configured with next_random.
*/
@@ -1035,6 +1019,7 @@ __curtiered_next_random(WT_CURSOR *cursor)
WT_CURSOR_TIERED *curtiered;
WT_DECL_RET;
WT_SESSION_IMPL *session;
+ u_int i, tier;
int exact;
c = NULL;
@@ -1044,29 +1029,33 @@ __curtiered_next_random(WT_CURSOR *cursor)
__cursor_novalue(cursor);
WT_ERR(__curtiered_enter(curtiered, false));
- for (;;) {
- __curtiered_random_tier(session, curtiered, &c);
- /*
- * This call to next_random on the tier can potentially end in WT_NOTFOUND if the tier we
- * picked is empty. We want to retry in that case.
- */
+ /*
+ * Select a random tier. If it is empty, try the next tier and so on, wrapping around until we
+ * find something or run out of tiers.
+ */
+ tier = __wt_random(&session->rnd) % WT_TIERED_MAX_TIERS;
+ for (i = 0; i < WT_TIERED_MAX_TIERS; i++) {
+ c = curtiered->cursors[tier];
WT_ERR_NOTFOUND_OK(__wt_curfile_next_random(c), true);
- if (ret == WT_NOTFOUND)
+ if (ret == WT_NOTFOUND) {
+ if (++tier == WT_TIERED_MAX_TIERS)
+ tier = 0;
continue;
+ }
F_SET(cursor, WT_CURSTD_KEY_INT);
WT_ERR(c->get_key(c, &cursor->key));
/*
- * Search near the current key to resolve any tombstones and position to a valid document.
- * If we see a WT_NOTFOUND here that is valid, as the tree has no documents visible to us.
+ * Search near the current key to resolve any tombstones and position to a valid record. If
+ * we see a WT_NOTFOUND here that is valid, as the tree has no documents visible to us.
*/
WT_ERR(__curtiered_search_near(cursor, &exact));
break;
}
- /* We have found a valid doc. Set that we are now positioned */
- if (0) {
err:
+ if (ret != 0) {
+ /* We didn't find a valid record. Don't leave cursor positioned */
F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
}
__curtiered_leave(curtiered);
@@ -1074,6 +1063,70 @@ err:
}
/*
+ * __curtiered_insert_bulk --
+ * WT_CURSOR->insert method for tiered bulk cursors.
+ */
+static int
+__curtiered_insert_bulk(WT_CURSOR *cursor)
+{
+ WT_CURSOR *bulk_cursor;
+ WT_CURSOR_TIERED *curtiered;
+ WT_SESSION_IMPL *session;
+
+ curtiered = (WT_CURSOR_TIERED *)cursor;
+ session = CUR2S(curtiered);
+ bulk_cursor = curtiered->cursors[WT_TIERED_INDEX_LOCAL];
+
+ WT_ASSERT(session, bulk_cursor != NULL);
+ bulk_cursor->set_key(bulk_cursor, &cursor->key);
+ bulk_cursor->set_value(bulk_cursor, &cursor->value);
+ WT_RET(bulk_cursor->insert(bulk_cursor));
+
+ return (0);
+}
+
+/*
+ * __curtiered_open_bulk --
+ * WT_SESSION->open_cursor method for tiered bulk cursors.
+ */
+static int
+__curtiered_open_bulk(WT_CURSOR_TIERED *curtiered, const char *cfg[])
+{
+ WT_CURSOR *cursor;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ WT_TIERED *tiered;
+
+ cursor = &curtiered->iface;
+ session = CUR2S(curtiered);
+ tiered = curtiered->tiered;
+
+ /* Bulk cursors only support insert and close. */
+ __wt_cursor_set_notsup(cursor);
+ cursor->insert = __curtiered_insert_bulk;
+ cursor->close = __wt_curtiered_close;
+
+ WT_ASSERT(session, curtiered->cursors == NULL);
+ WT_ERR(__wt_calloc_def(session, WT_TIERED_MAX_TIERS, &curtiered->cursors));
+
+ /* Open a bulk cursor on the local tier. */
+ dhandle = tiered->tiers[WT_TIERED_INDEX_LOCAL].tier;
+ WT_ASSERT(session, dhandle != NULL);
+ WT_ERR(__wt_open_cursor(
+ session, dhandle->name, cursor, cfg, &curtiered->cursors[WT_TIERED_INDEX_LOCAL]));
+
+ /* Child cursors always use overwrite and raw mode. */
+ F_SET(curtiered->cursors[WT_TIERED_INDEX_LOCAL], WT_CURSTD_OVERWRITE | WT_CURSTD_RAW);
+
+ if (0) {
+err:
+ __wt_free(session, curtiered->cursors);
+ }
+ return (ret);
+}
+
+/*
* __wt_curtiered_open --
* WT_SESSION->open_cursor method for tiered cursors.
*/
@@ -1129,7 +1182,7 @@ __wt_curtiered_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
/* Check whether the exclusive open for a bulk load succeeded. */
if (bulk && ret == EBUSY)
- WT_ERR_MSG(session, EINVAL, "bulk-load is only supported on newly created trees");
+ ret = EINVAL;
/* Flag any errors from the tree get. */
WT_ERR(ret);
@@ -1142,7 +1195,7 @@ __wt_curtiered_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
cursor = (WT_CURSOR *)curtiered;
*cursor = iface;
cursor->session = (WT_SESSION *)session;
- WT_ERR(__wt_strdup(session, tiered->name, &cursor->uri));
+ WT_ERR(__wt_strdup(session, tiered->iface.name, &cursor->uri));
cursor->key_format = tiered->key_format;
cursor->value_format = tiered->value_format;
@@ -1159,7 +1212,7 @@ __wt_curtiered_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
WT_ERR(__wt_cursor_init(cursor, cursor->uri, owner, cfg, cursorp));
if (bulk)
- WT_ERR(ENOTSUP); /* TODO */
+ WT_ERR(__curtiered_open_bulk(curtiered, cfg));
if (0) {
err:
diff --git a/src/third_party/wiredtiger/src/tiered/tiered_handle.c b/src/third_party/wiredtiger/src/tiered/tiered_handle.c
index e326e247717..a1bb6bc37a6 100644
--- a/src/third_party/wiredtiger/src/tiered/tiered_handle.c
+++ b/src/third_party/wiredtiger/src/tiered/tiered_handle.c
@@ -9,62 +9,529 @@
#include "wt_internal.h"
/*
+ * __tiered_dhandle_setup --
+ * Given a tiered index and name, set up the dhandle information.
+ */
+static int
+__tiered_dhandle_setup(WT_SESSION_IMPL *session, WT_TIERED *tiered, uint32_t i, const char *name)
+{
+ WT_DECL_RET;
+ WT_TIERED_TIERS *tier;
+ uint32_t id, type;
+
+ WT_RET(__wt_session_get_dhandle(session, name, NULL, NULL, 0));
+ if (i == WT_TIERED_INDEX_INVALID) {
+ type = session->dhandle->type;
+ if (type == WT_DHANDLE_TYPE_BTREE)
+ id = WT_TIERED_INDEX_LOCAL;
+ else if (type == WT_DHANDLE_TYPE_TIERED)
+ id = WT_TIERED_INDEX_LOCAL;
+ else
+ WT_ERR_MSG(
+ session, EINVAL, "Unknown or unsupported tiered dhandle type %" PRIu32, type);
+ } else {
+ WT_ASSERT(session, i < WT_TIERED_MAX_TIERS);
+ id = i;
+ }
+ /* Reference the dhandle and set it in the tier array. */
+ tier = &tiered->tiers[id];
+ (void)__wt_atomic_addi32(&session->dhandle->session_inuse, 1);
+ tier->tier = session->dhandle;
+
+ /* The Btree needs to use the bucket storage to do file system operations. */
+ if (session->dhandle->type == WT_DHANDLE_TYPE_BTREE)
+ ((WT_BTREE *)session->dhandle->handle)->bstorage = tiered->bstorage;
+err:
+ WT_RET(__wt_session_release_dhandle(session));
+ return (ret);
+}
+
+/*
+ * __tiered_init_tiers --
+ * Given a tiered table 'tiers' configuration set up the dhandle array.
+ */
+static int
+__tiered_init_tiers(WT_SESSION_IMPL *session, WT_TIERED *tiered, WT_CONFIG_ITEM *tierconf)
+{
+ WT_CONFIG cparser;
+ WT_CONFIG_ITEM ckey, cval;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ __wt_config_subinit(session, &cparser, tierconf);
+ while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0) {
+ /* Set up the tiers array based on the metadata. */
+ WT_ERR(__wt_buf_fmt(session, tmp, "%.*s", (int)ckey.len, ckey.str));
+ __wt_verbose(
+ session, WT_VERB_TIERED, "INIT_TIERS: tiered URI dhandle %s", (char *)tmp->data);
+ WT_ERR(__tiered_dhandle_setup(
+ session, tiered, WT_TIERED_INDEX_INVALID, (const char *)tmp->data));
+ }
+ WT_ERR_NOTFOUND_OK(ret, false);
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
+}
+
+/*
+ * __tiered_create_local --
+ * Create a new local name for a tiered table. Must be called single threaded.
+ */
+static int
+__tiered_create_local(WT_SESSION_IMPL *session, WT_TIERED *tiered)
+{
+ WT_DECL_RET;
+ WT_TIERED_TIERS *this_tier;
+ const char *cfg[4] = {NULL, NULL, NULL, NULL};
+ const char *config, *name;
+
+ config = name = NULL;
+
+ /* If this ever can be multi-threaded, this would need to be atomic. */
+ tiered->current_id = tiered->next_id++;
+ /* XXX Remove when we have real flags. */
+ F_SET(tiered, WT_TIERED_FLAG_UNUSED);
+ WT_ERR(
+ __wt_tiered_name(session, &tiered->iface, tiered->current_id, WT_TIERED_NAME_LOCAL, &name));
+ __wt_verbose(session, WT_VERB_TIERED, "TIER_CREATE_LOCAL: LOCAL: %s", name);
+ cfg[0] = WT_CONFIG_BASE(session, object_meta);
+ cfg[1] = tiered->obj_config;
+ __wt_verbose(session, WT_VERB_TIERED, "TIER_CREATE_LOCAL: obj_config: %s : %s", name, cfg[1]);
+ WT_ASSERT(session, tiered->obj_config != NULL);
+ WT_ERR(__wt_config_merge(session, cfg, NULL, (const char **)&config));
+ /*
+ * XXX Need to verify user doesn't create a table of the same name. What does LSM do? It
+ * definitely has the same problem with chunks.
+ */
+ __wt_verbose(
+ session, WT_VERB_TIERED, "TIER_CREATE_LOCAL: schema create LOCAL: %s : %s", name, config);
+ WT_ERR(__wt_schema_create(session, name, config));
+ this_tier = &tiered->tiers[WT_TIERED_INDEX_LOCAL];
+ if (this_tier->name != NULL)
+ __wt_free(session, this_tier->name);
+ this_tier->name = name;
+ F_SET(this_tier, WT_TIERS_OP_READ | WT_TIERS_OP_WRITE);
+
+ if (0) {
+err:
+ /* Only free name on error. */
+ __wt_free(session, name);
+ }
+ __wt_free(session, config);
+ return (ret);
+}
+
+/*
+ * __tiered_create_object --
+ * Create an object name of the given number.
+ */
+static int
+__tiered_create_object(WT_SESSION_IMPL *session, WT_TIERED *tiered)
+{
+ WT_DECL_RET;
+ const char *cfg[4] = {NULL, NULL, NULL, NULL};
+ const char *config, *name, *orig_name;
+
+ config = name = NULL;
+ config = name = orig_name = NULL;
+ orig_name = tiered->tiers[WT_TIERED_INDEX_LOCAL].name;
+ /*
+ * If we have an existing local file in the tier, alter the table to indicate this one is now
+ * readonly.
+ */
+ if (orig_name != NULL) {
+ cfg[0] = "readonly=true";
+ WT_WITHOUT_DHANDLE(session, ret = __wt_schema_alter(session, orig_name, cfg));
+ WT_ERR(ret);
+ }
+ /*
+ * Create the name and metadata of the new shared object of the current local object.
+ * The data structure keeps this id so that we don't have to parse and manipulate strings.
+ * I.e. if we have file:example-000000002.wt we want object:example-000000002.wtobj.
+ */
+ WT_ERR(
+ __wt_tiered_name(session, &tiered->iface, tiered->current_id, WT_TIERED_NAME_OBJECT, &name));
+ cfg[0] = WT_CONFIG_BASE(session, object_meta);
+ cfg[1] = tiered->obj_config;
+ cfg[2] = "readonly=true";
+ WT_ASSERT(session, tiered->obj_config != NULL);
+ WT_ERR(__wt_config_merge(session, cfg, NULL, (const char **)&config));
+ __wt_verbose(
+ session, WT_VERB_TIERED, "TIER_CREATE_OBJECT: schema create %s : %s", name, config);
+ /* Create the new shared object. */
+ WT_ERR(__wt_schema_create(session, name, config));
+
+err:
+ __wt_free(session, config);
+ __wt_free(session, name);
+ return (ret);
+}
+
+/*
+ * __tiered_create_tier_tree --
+ * Create a tier name for a tiered table.
+ */
+static int
+__tiered_create_tier_tree(WT_SESSION_IMPL *session, WT_TIERED *tiered)
+{
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_TIERED_TIERS *this_tier;
+ const char *cfg[4] = {NULL, NULL, NULL, NULL};
+ const char *config, *name;
+
+ config = name = NULL;
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+
+ /* Create tier:example for the new tiered tree. */
+ WT_ERR(__wt_tiered_name(session, &tiered->iface, 0, WT_TIERED_NAME_SHARED, &name));
+ cfg[0] = WT_CONFIG_BASE(session, tier_meta);
+ WT_ASSERT(session, tiered->bstorage != NULL);
+ WT_ERR(__wt_buf_fmt(session, tmp, ",readonly=true,tiered_storage=(bucket=%s,bucket_prefix=%s)",
+ tiered->bstorage->bucket, tiered->bstorage->bucket_prefix));
+ cfg[2] = tmp->data;
+ WT_ERR(__wt_config_merge(session, cfg, NULL, &config));
+ /* Set up a tier:example metadata for the first time. */
+ __wt_verbose(session, WT_VERB_TIERED, "CREATE_TIER_TREE: schema create: %s : %s", name, config);
+ WT_ERR(__wt_schema_create(session, name, config));
+ this_tier = &tiered->tiers[WT_TIERED_INDEX_SHARED];
+ WT_ASSERT(session, this_tier->name == NULL);
+ this_tier->name = name;
+ F_SET(this_tier, WT_TIERS_OP_FLUSH | WT_TIERS_OP_READ);
+
+ if (0)
+err:
+ /* Only free on error. */
+ __wt_free(session, name);
+ __wt_free(session, config);
+ __wt_scr_free(session, &tmp);
+ return (ret);
+}
+
+/*
+ * __tiered_update_dhandles --
+ * Update the dhandle list for a tiered structure after object switching.
+ */
+static int
+__tiered_update_dhandles(WT_SESSION_IMPL *session, WT_TIERED *tiered)
+{
+ WT_DECL_RET;
+ uint32_t i;
+
+ /* Now get the dhandle and add it to the array. */
+ for (i = 0; i < WT_TIERED_MAX_TIERS; ++i) {
+ /*
+ * If we have a tiered dhandle we can either skip if it is the same name or we decrement the
+ * old one and get a new one for the new name.
+ */
+ if (tiered->tiers[i].tier != NULL) {
+ WT_ASSERT(session, tiered->tiers[i].name != NULL);
+ if (strcmp(tiered->tiers[i].tier->name, tiered->tiers[i].name) == 0)
+ continue;
+ else
+ (void)__wt_atomic_subi32(&tiered->tiers[i].tier->session_inuse, 1);
+ }
+ if (tiered->tiers[i].name == NULL)
+ continue;
+ __wt_verbose(
+ session, WT_VERB_TIERED, "UPDATE_DH: Get dhandle for %s", tiered->tiers[i].name);
+ WT_ERR(__tiered_dhandle_setup(session, tiered, i, tiered->tiers[i].name));
+ }
+err:
+ __wt_verbose(session, WT_VERB_TIERED, "UPDATE_DH: DONE ret %d", ret);
+ if (ret != 0) {
+ /* Need to undo our dhandles. Close and dereference all. */
+ for (i = 0; i < WT_TIERED_MAX_TIERS; ++i) {
+ if (tiered->tiers[i].tier != NULL)
+ (void)__wt_atomic_subi32(&tiered->tiers[i].tier->session_inuse, 1);
+ __wt_free(session, tiered->tiers[i].name);
+ tiered->tiers[i].tier = NULL;
+ tiered->tiers[i].name = NULL;
+ }
+ }
+ return (ret);
+}
+
+/*
+ * __tiered_update_metadata --
+ * Update the metadata for a tiered structure after object switching.
+ */
+static int
+__tiered_update_metadata(WT_SESSION_IMPL *session, WT_TIERED *tiered, const char *orig_config)
+{
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ uint32_t i;
+ const char *cfg[4] = {NULL, NULL, NULL, NULL};
+ const char *newconfig;
+
+ dhandle = &tiered->iface;
+ newconfig = NULL;
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+
+ WT_RET(__wt_buf_fmt(session, tmp, "last=%" PRIu64 ",tiers=(\"", tiered->current_id));
+ for (i = 0; i < WT_TIERED_MAX_TIERS; ++i) {
+ if (tiered->tiers[i].name == NULL) {
+ __wt_verbose(session, WT_VERB_TIERED, "TIER_UPDATE_META: names[%" PRIu32 "] NULL", i);
+ continue;
+ }
+ __wt_verbose(session, WT_VERB_TIERED, "TIER_UPDATE_META: names[%" PRIu32 "]: %s", i,
+ tiered->tiers[i].name);
+ WT_RET(__wt_buf_catfmt(session, tmp, "%s%s\"", i == 0 ? "" : ",", tiered->tiers[i].name));
+ }
+ WT_RET(__wt_buf_catfmt(session, tmp, ")"));
+
+ cfg[0] = WT_CONFIG_BASE(session, tiered_meta);
+ cfg[1] = orig_config;
+ cfg[2] = tmp->data;
+ WT_ERR(__wt_config_merge(session, cfg, NULL, &newconfig));
+ __wt_verbose(
+ session, WT_VERB_TIERED, "TIER_UPDATE_META: Update TIERED: %s %s", dhandle->name, newconfig);
+ WT_ERR(__wt_metadata_update(session, dhandle->name, newconfig));
+
+err:
+ __wt_free(session, newconfig);
+ __wt_scr_free(session, &tmp);
+ return (ret);
+}
+
+/*
+ * __tiered_switch --
+ * Given a tiered table, make all the metadata updates underneath to switch to the next object.
+ * The switch handles going from nothing to local-only, local-only to both local and shared, and
+ * having shared-only and creating a local object. Must be single threaded.
+ */
+static int
+__tiered_switch(WT_SESSION_IMPL *session, const char *config)
+{
+ WT_DECL_RET;
+ WT_TIERED *tiered;
+ bool need_object, need_tree, tracking;
+
+ tiered = (WT_TIERED *)session->dhandle;
+ __wt_verbose(
+ session, WT_VERB_TIERED, "TIER_SWITCH: called %s %s", session->dhandle->name, config);
+
+ need_object = tiered->tiers[WT_TIERED_INDEX_LOCAL].tier != NULL;
+ need_tree = need_object && tiered->tiers[WT_TIERED_INDEX_SHARED].tier == NULL;
+ /*
+ * There are four possibilities to our tiers configuration. In all of them we need to create
+ * a new local tier file object dhandle and add it as element index zero of the tiers array.
+ * Then we may or may not do other operations depending on the state otherwise. These are
+ * presented in order of increasing amount of work that needs to be done.
+ * 1. tiers=() - New and empty. We only need to add in the local file object.
+ * 2. tiers=("tier:...") - Existing shared tier only. Here too we only need to add
+ * in the local file object.
+ * 3. tiers=("file:...", "tier:...") - Both local and shared tiers exist in the metadata.
+ * We need to create and add the next local file object (N + 1) and create a shared
+ * object in the metadata for the current local file object (N).
+ * 4. tiers=("file:...") - Existing local tier only. We need to do all of the parts listed
+ * in the #3 above, and also create the shared tier metadata entry.
+ *
+ * Step 4 must be done after some order of 1-3.
+ * 1. Create the "object:" entry in metadata if needed.
+ * 2. Create the "tier:" entry in metadata if needed.
+ * 3. Create the new "file:" local entry in metadata.
+ * 4. Update the "tiered:" with new tiers and object number.
+ * 5. Meta tracking off to "commit" all the metadata operations.
+ * 6. Revise the dhandles in the tiered structure to reflect new state of the world.
+ */
+
+ /*
+ * To be implemented with flush_tier:
+ * - Close the current object.
+ * - Copy the current one to the cloud. It also remains in the local store.
+ */
+
+ WT_RET(__wt_meta_track_on(session));
+ tracking = true;
+ /* Create the object: entry in the metadata. */
+ if (need_object)
+ WT_ERR(__tiered_create_object(session, tiered));
+
+ if (need_tree)
+ WT_ERR(__tiered_create_tier_tree(session, tiered));
+
+ /* We always need to create a local object. */
+ WT_ERR(__tiered_create_local(session, tiered));
+
+ /*
+ * Note that removal of overlapping local objects is not in the purview of this function. Some
+ * other mechanism will remove outdated tiers. Here's where it could be done though.
+ */
+
+ /* Update the tiered: metadata to new object number and tiered array. */
+ WT_ERR(__tiered_update_metadata(session, tiered, config));
+ tracking = false;
+ WT_ERR(__wt_meta_track_off(session, true, ret != 0));
+ WT_ERR(__tiered_update_dhandles(session, tiered));
+err:
+ __wt_verbose(session, WT_VERB_TIERED, "TIER_SWITCH: DONE ret %d", ret);
+ if (tracking)
+ WT_RET(__wt_meta_track_off(session, true, ret != 0));
+ return (ret);
+}
+
+/*
+ * __wt_tiered_switch --
+ * Switch metadata, external version.
+ */
+int
+__wt_tiered_switch(WT_SESSION_IMPL *session, const char *config)
+{
+ WT_DECL_RET;
+
+ /*
+ * For now just a wrapper to internal function. Maybe there's more to do externally, like wrap
+ * it in a lock or with a dhandle or walk the dhandle list here rather than higher up.
+ */
+ WT_SAVE_DHANDLE(session, ret = __tiered_switch(session, config));
+ return (ret);
+}
+
+/*
+ * __wt_tiered_name --
+ * Given a dhandle structure and object number generate the URI name of the given type. XXX
+ * Currently this is only used in this file but I anticipate it may be of use outside. If not,
+ * make this static and tiered_name instead.
+ */
+int
+__wt_tiered_name(
+ WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle, uint64_t id, uint32_t flags, const char **retp)
+{
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ const char *name;
+
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ name = dhandle->name;
+ /* Skip the prefix depending on what we're given. */
+ if (dhandle->type == WT_DHANDLE_TYPE_TIERED)
+ WT_PREFIX_SKIP_REQUIRED(session, name, "tiered:");
+ else {
+ WT_ASSERT(session, dhandle->type == WT_DHANDLE_TYPE_TIERED_TREE);
+ WT_ASSERT(session, !LF_ISSET(WT_TIERED_NAME_SHARED));
+ WT_PREFIX_SKIP_REQUIRED(session, name, "tier:");
+ }
+
+ /*
+ * Separate object numbers from the base table name with a dash. Separate from the suffix with a
+ * dot. We generate a different name style based on the type.
+ */
+ if (LF_ISSET(WT_TIERED_NAME_LOCAL)) {
+ if (LF_ISSET(WT_TIERED_NAME_PREFIX))
+ WT_ERR(__wt_buf_fmt(session, tmp, "file:%s-", name));
+ else
+ WT_ERR(__wt_buf_fmt(session, tmp, "file:%s-%010" PRIu64 ".wt", name, id));
+ } else if (LF_ISSET(WT_TIERED_NAME_OBJECT)) {
+ if (LF_ISSET(WT_TIERED_NAME_PREFIX))
+ WT_ERR(__wt_buf_fmt(session, tmp, "object:%s-", name));
+ else
+ WT_ERR(__wt_buf_fmt(session, tmp, "object:%s-%010" PRIu64 ".wtobj", name, id));
+ } else {
+ WT_ASSERT(session, !LF_ISSET(WT_TIERED_NAME_PREFIX));
+ WT_ASSERT(session, LF_ISSET(WT_TIERED_NAME_SHARED));
+ WT_ERR(__wt_buf_fmt(session, tmp, "tier:%s", name));
+ }
+ WT_ERR(__wt_strndup(session, tmp->data, tmp->size, retp));
+ __wt_verbose(session, WT_VERB_TIERED, "Generated tiered name: %s", *retp);
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
+}
+
+/*
* __tiered_open --
* Open a tiered data handle (internal version).
*/
static int
__tiered_open(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONFIG cparser;
- WT_CONFIG_ITEM ckey, cval, tierconf;
+ WT_CONFIG_ITEM cval, tierconf;
WT_DATA_HANDLE *dhandle;
- WT_DECL_ITEM(buf);
+ WT_DECL_ITEM(tmp);
WT_DECL_RET;
WT_TIERED *tiered;
- u_int i;
- const char **tiered_cfg;
+ uint32_t unused;
+ char *metaconf;
+ const char *obj_cfg[] = {WT_CONFIG_BASE(session, object_meta), NULL, NULL};
+ const char **tiered_cfg, *config;
dhandle = session->dhandle;
tiered = (WT_TIERED *)dhandle;
tiered_cfg = dhandle->cfg;
+ config = NULL;
+ metaconf = NULL;
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
WT_UNUSED(cfg);
- WT_RET(__wt_config_gets(session, tiered_cfg, "key_format", &cval));
- WT_RET(__wt_strndup(session, cval.str, cval.len, &tiered->key_format));
- WT_RET(__wt_config_gets(session, tiered_cfg, "value_format", &cval));
- WT_RET(__wt_strndup(session, cval.str, cval.len, &tiered->value_format));
-
- /* Point to some items in the copy to save re-parsing. */
- WT_RET(__wt_config_gets(session, tiered_cfg, "tiered.tiers", &tierconf));
-
- /* Count the number of tiers. */
- __wt_config_subinit(session, &cparser, &tierconf);
- while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0)
- ++tiered->ntiers;
- WT_RET_NOTFOUND_OK(ret);
-
- WT_ASSERT(session, tiered->ntiers > 0);
-
- WT_RET(__wt_scr_alloc(session, 0, &buf));
- WT_ERR(__wt_calloc_def(session, tiered->ntiers, &tiered->tiers));
-
- __wt_config_subinit(session, &cparser, &tierconf);
- for (i = 0; i < tiered->ntiers; i++) {
- WT_ERR(__wt_config_next(&cparser, &ckey, &cval));
- WT_ERR(__wt_buf_fmt(session, buf, "%.*s", (int)ckey.len, ckey.str));
- WT_ERR(__wt_session_get_dhandle(session, (const char *)buf->data, NULL, cfg, 0));
- (void)__wt_atomic_addi32(&session->dhandle->session_inuse, 1);
- /* Load in reverse order (based on LSM logic). */
- tiered->tiers[(tiered->ntiers - 1) - i] = session->dhandle;
- WT_ERR(__wt_session_release_dhandle(session));
+ /* Set up the bstorage from the configuration first. */
+ WT_RET(__wt_config_gets(session, tiered_cfg, "tiered_storage.name", &cval));
+ if (cval.len == 0)
+ tiered->bstorage = S2C(session)->bstorage;
+ else
+ WT_ERR(__wt_tiered_bucket_config(session, tiered_cfg, &tiered->bstorage));
+ WT_ASSERT(session, tiered->bstorage != NULL);
+ /* Collapse into one string for later use in switch. */
+ WT_ERR(__wt_config_merge(session, tiered_cfg, NULL, &config));
+
+ /*
+ * Pull in any configuration of the original table for the object and file components that may
+ * have been sent in on the create.
+ */
+ obj_cfg[1] = config;
+ WT_ERR(__wt_config_collapse(session, obj_cfg, &metaconf));
+ tiered->obj_config = metaconf;
+ metaconf = NULL;
+ __wt_verbose(session, WT_VERB_TIERED, "TIERED_OPEN: obj_config %s", tiered->obj_config);
+
+ WT_ERR(__wt_config_getones(session, config, "key_format", &cval));
+ WT_ERR(__wt_strndup(session, cval.str, cval.len, &tiered->key_format));
+ WT_ERR(__wt_config_getones(session, config, "value_format", &cval));
+ WT_ERR(__wt_strndup(session, cval.str, cval.len, &tiered->value_format));
+
+ WT_ERR(__wt_config_getones(session, config, "last", &cval));
+ tiered->current_id = (uint64_t)cval.val;
+ tiered->next_id = tiered->current_id + 1;
+ __wt_verbose(session, WT_VERB_TIERED, "TIERED_OPEN: current %d, next %d",
+ (int)tiered->current_id, (int)tiered->next_id);
+
+ ret = __wt_config_getones(session, config, "tiers", &tierconf);
+ WT_ERR_NOTFOUND_OK(ret, true);
+
+ /* Open tiers if we have them, otherwise initialize. */
+ if (tiered->current_id != 0)
+ WT_ERR(__tiered_init_tiers(session, tiered, &tierconf));
+ else {
+ __wt_verbose(
+ session, WT_VERB_TIERED, "TIERED_OPEN: create %s config %s", dhandle->name, config);
+ WT_ERR(__wt_tiered_switch(session, config));
+
+ /* XXX brute force, need to figure out functions to use to do this properly. */
+ /* We need to update the dhandle config entry to reflect the new tiers metadata. */
+ WT_ERR(__wt_metadata_search(session, dhandle->name, &metaconf));
+ __wt_verbose(session, WT_VERB_TIERED, "TIERED_OPEN: after switch meta conf %s %s",
+ dhandle->name, metaconf);
+ __wt_free(session, dhandle->cfg[1]);
+ dhandle->cfg[1] = metaconf;
+ }
+ if (0) {
+ /* Temp code to keep s_all happy. */
+ FLD_SET(unused, WT_TIERED_OBJ_LOCAL | WT_TIERED_TREE_UNUSED);
}
if (0) {
err:
+ __wt_free(session, tiered->obj_config);
__wt_free(session, tiered->tiers);
+ __wt_free(session, metaconf);
}
- __wt_scr_free(session, &buf);
+ __wt_verbose(session, WT_VERB_TIERED, "TIERED_OPEN: Done ret %d", ret);
+ __wt_scr_free(session, &tmp);
+ __wt_free(session, config);
return (ret);
}
@@ -84,22 +551,96 @@ __wt_tiered_open(WT_SESSION_IMPL *session, const char *cfg[])
/*
* __wt_tiered_close --
- * Close a tiered data handle.
+ * Close a tiered data handle. TODO: When this returns an actual meaningful return value, remove
+ * its entry from s_void.
*/
int
__wt_tiered_close(WT_SESSION_IMPL *session, WT_TIERED *tiered)
{
- WT_DECL_RET;
- u_int i;
+#if 0
+ WT_DATA_HANDLE *dhandle;
+#endif
+ uint32_t i;
- ret = 0;
__wt_free(session, tiered->key_format);
__wt_free(session, tiered->value_format);
- if (tiered->tiers != NULL) {
- for (i = 0; i < tiered->ntiers; i++)
- (void)__wt_atomic_subi32(&tiered->tiers[i]->session_inuse, 1);
- __wt_free(session, tiered->tiers);
+ __wt_free(session, tiered->obj_config);
+ __wt_verbose(session, WT_VERB_TIERED, "%s", "TIERED_CLOSE: called");
+ /*
+ * For the moment we don't have anything to return. But all the callers currently expect a real
+ * return value from a close function. And this may become more complex later. During connection
+ * close the other dhandles may be closed and freed before this dhandle. So just free the names.
+ */
+ for (i = 0; i < WT_TIERED_MAX_TIERS; i++) {
+#if 0
+ dhandle = tiered->tiers[i].tier;
+ /*
+ * XXX We cannot decrement on connection close but we need to decrement on sweep close or
+ * other individual close.
+ */
+ (void)__wt_atomic_subi32(&dhandle->session_inuse, 1);
+#endif
+ if (tiered->tiers[i].name != NULL)
+ __wt_free(session, tiered->tiers[i].name);
}
+ return (0);
+}
+
+/*
+ * __wt_tiered_tree_open --
+ * Open a tiered tree data handle.
+ */
+int
+__wt_tiered_tree_open(WT_SESSION_IMPL *session, const char *cfg[])
+{
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ const char *key, *object, *value;
+
+ WT_UNUSED(cfg);
+ object = NULL;
+ /*
+ * Set dhandle->handle with tiered tree structure, initialized.
+ */
+ __wt_verbose(session, WT_VERB_TIERED, "TIERED_TREE_OPEN: Called %s", session->dhandle->name);
+ WT_ASSERT(session, session->dhandle != NULL);
+ WT_RET(__wt_metadata_cursor(session, &cursor));
+ WT_ERR(__wt_tiered_name(
+ session, session->dhandle, 0, WT_TIERED_NAME_OBJECT | WT_TIERED_NAME_PREFIX, &object));
+ /*
+ * Walk looking for our objects.
+ */
+ while (cursor->next(cursor) == 0) {
+ cursor->get_key(cursor, &key);
+ cursor->get_value(cursor, &value);
+ /*
+ * NOTE: Here we do anything we need to do to open or access each shared object.
+ */
+ if (!WT_STRING_MATCH(key, object, strlen(object)))
+ continue;
+ __wt_verbose(
+ session, WT_VERB_TIERED, "TIERED_TREE_OPEN: metadata for %s: %s", object, value);
+ }
+err:
+ WT_TRET(__wt_metadata_cursor_release(session, &cursor));
+ __wt_free(session, object);
+ return (ret);
+}
+
+/*
+ * __wt_tiered_tree_close --
+ * Close a tiered tree data handle.
+ */
+int
+__wt_tiered_tree_close(WT_SESSION_IMPL *session, WT_TIERED_TREE *tiered_tree)
+{
+ WT_DECL_RET;
+
+ __wt_verbose(session, WT_VERB_TIERED, "TIERED_TREE_CLOSE: called %s", tiered_tree->iface.name);
+ ret = 0;
+ __wt_free(session, tiered_tree->key_format);
+ __wt_free(session, tiered_tree->value_format);
+
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 10887763194..9aa2f085386 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -171,6 +171,7 @@ __wt_txn_active(WT_SESSION_IMPL *session, uint64_t txnid)
WT_ORDERED_READ(session_cnt, conn->session_cnt);
WT_STAT_CONN_INCR(session, txn_walk_sessions);
for (i = 0, s = txn_global->txn_shared_list; i < session_cnt; i++, s++) {
+ WT_STAT_CONN_INCR(session, txn_sessions_walked);
/* If the transaction is in the list, it is uncommitted. */
if (s->id == txnid)
goto done;
@@ -243,6 +244,7 @@ __txn_get_snapshot_int(WT_SESSION_IMPL *session, bool publish)
WT_ORDERED_READ(session_cnt, conn->session_cnt);
WT_STAT_CONN_INCR(session, txn_walk_sessions);
for (i = 0, s = txn_global->txn_shared_list; i < session_cnt; i++, s++) {
+ WT_STAT_CONN_INCR(session, txn_sessions_walked);
/*
* Build our snapshot of any concurrent transaction IDs.
*
@@ -344,6 +346,7 @@ __txn_oldest_scan(WT_SESSION_IMPL *session, uint64_t *oldest_idp, uint64_t *last
WT_ORDERED_READ(session_cnt, conn->session_cnt);
WT_STAT_CONN_INCR(session, txn_walk_sessions);
for (i = 0, s = txn_global->txn_shared_list; i < session_cnt; i++, s++) {
+ WT_STAT_CONN_INCR(session, txn_sessions_walked);
/* Update the last running transaction ID. */
while ((id = s->id) != WT_TXN_NONE && WT_TXNID_LE(prev_oldest_id, id) &&
WT_TXNID_LT(id, last_running)) {
@@ -1046,6 +1049,9 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit,
WT_DECL_RET;
WT_TXN *txn;
WT_UPDATE *fix_upd, *tombstone, *upd;
+#ifdef HAVE_DIAGNOSTIC
+ WT_UPDATE *head_upd;
+#endif
size_t not_used;
uint32_t hs_btree_id;
bool upd_appended;
@@ -1057,12 +1063,18 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit,
WT_RET(__txn_search_prepared_op(session, op, cursorp, &upd));
+ __wt_verbose(session, WT_VERB_TRANSACTION,
+ "resolving prepared op for txnid: %" PRIu64 " that %s", txn->id,
+ commit ? "committed" : "roll backed");
/*
* Aborted updates can exist in the update chain of our transaction. Generally this will occur
* due to a reserved update. As such we should skip over these updates.
*/
for (; upd != NULL && upd->txnid == WT_TXN_ABORTED; upd = upd->next)
;
+#ifdef HAVE_DIAGNOSTIC
+ head_upd = upd;
+#endif
/*
* The head of the update chain is not a prepared update, which means all the prepared updates
@@ -1171,6 +1183,28 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit,
if (fix_upd != NULL)
WT_ERR(__txn_fixup_prepared_update(session, hs_cursor, fix_upd, commit));
+#ifdef HAVE_DIAGNOSTIC
+ for (; head_upd != NULL; head_upd = head_upd->next) {
+ /*
+ * Assert if we still have an update from the current transaction that hasn't been aborted.
+ * Only perform this check if aborting the prepared transaction.
+ */
+ WT_ASSERT(
+ session, commit || head_upd->txnid == WT_TXN_ABORTED || head_upd->txnid != txn->id);
+
+ if (head_upd->txnid == WT_TXN_ABORTED)
+ continue;
+
+ /*
+ * If we restored an update from the history store, it should be the last update on the
+ * chain.
+ */
+ if (upd_appended && head_upd->type == WT_UPDATE_STANDARD &&
+ F_ISSET(head_upd, WT_UPDATE_RESTORED_FROM_HS))
+ WT_ASSERT(session, head_upd->next == NULL);
+ }
+#endif
+
err:
if (hs_cursor != NULL)
WT_TRET(hs_cursor->close(hs_cursor));
@@ -1350,7 +1384,7 @@ __txn_mod_compare(const void *a, const void *b)
*/
if (aopt->type == WT_TXN_OP_BASIC_ROW || aopt->type == WT_TXN_OP_INMEM_ROW)
return (aopt->btree->collator == NULL ?
- __wt_lex_compare(&aopt->u.op_row.key, &bopt->u.op_row.key) :
+ __wt_lex_compare(&aopt->u.op_row.key, &bopt->u.op_row.key, false) :
0);
return (aopt->u.op_col.recno < bopt->u.op_col.recno);
}
@@ -2025,7 +2059,6 @@ __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[])
txn_global->current = txn_global->last_running = txn_global->metadata_pinned =
txn_global->oldest_id = WT_TXN_FIRST;
- WT_RET(__wt_spin_init(session, &txn_global->id_lock, "transaction id lock"));
WT_RWLOCK_INIT_TRACKED(session, &txn_global->rwlock, txn_global);
WT_RET(__wt_rwlock_init(session, &txn_global->visibility_rwlock));
@@ -2053,7 +2086,6 @@ __wt_txn_global_destroy(WT_SESSION_IMPL *session)
if (txn_global == NULL)
return;
- __wt_spin_destroy(session, &txn_global->id_lock);
__wt_rwlock_destroy(session, &txn_global->rwlock);
__wt_rwlock_destroy(session, &txn_global->visibility_rwlock);
__wt_free(session, txn_global->txn_shared_list);
@@ -2127,7 +2159,7 @@ __wt_txn_global_shutdown(WT_SESSION_IMPL *session, const char **cfg)
}
s = NULL;
- WT_TRET(__wt_open_internal_session(conn, "close_ckpt", true, 0, &s));
+ WT_TRET(__wt_open_internal_session(conn, "close_ckpt", true, 0, 0, &s));
if (s != NULL) {
const char *checkpoint_cfg[] = {
WT_CONFIG_BASE(session, WT_SESSION_checkpoint), ckpt_cfg, NULL};
@@ -2152,7 +2184,7 @@ __wt_txn_global_shutdown(WT_SESSION_IMPL *session, const char **cfg)
* eviction.
*/
int
-__wt_txn_is_blocking(WT_SESSION_IMPL *session, bool conservative)
+__wt_txn_is_blocking(WT_SESSION_IMPL *session)
{
WT_TXN *txn;
WT_TXN_SHARED *txn_shared;
@@ -2166,10 +2198,6 @@ __wt_txn_is_blocking(WT_SESSION_IMPL *session, bool conservative)
if (F_ISSET(txn, WT_TXN_PREPARE))
return (0);
- /* The checkpoint transaction shouldn't be blocking but if it is don't roll it back. */
- if (WT_SESSION_IS_CHECKPOINT(session))
- return (0);
-
/*
* MongoDB can't (yet) handle rolling back read only transactions. For this reason, don't check
* unless there's at least one update or we're configured to time out thread operations (a way
@@ -2179,22 +2207,6 @@ __wt_txn_is_blocking(WT_SESSION_IMPL *session, bool conservative)
return (0);
/*
- * Be less aggressive about aborting the oldest transaction in the case of trying to make
- * forced eviction successful. Specifically excuse it if:
- * * Hasn't done many updates
- * * Is in the middle of a commit or abort
- *
- * This threshold that we're comparing the number of updates to is related and must be greater
- * than the threshold we use in reconciliation's "need split" helper. If we're going to rollback
- * a transaction, we need to have considered splitting the page in the case that its updates are
- * on a single page.
- */
- if (conservative &&
- (txn->mod_count < (10 + WT_REC_SPLIT_MIN_ITEMS_USE_MEM) ||
- F_ISSET(session, WT_SESSION_RESOLVING_TXN)))
- return (0);
-
- /*
* Check if either the transaction's ID or its pinned ID is equal to the oldest transaction ID.
*/
return (txn_shared->id == global_oldest || txn_shared->pinned_id == global_oldest ?
@@ -2337,6 +2349,7 @@ __wt_verbose_dump_txn(WT_SESSION_IMPL *session)
*/
WT_STAT_CONN_INCR(session, txn_walk_sessions);
for (i = 0, s = txn_global->txn_shared_list; i < session_cnt; i++, s++) {
+ WT_STAT_CONN_INCR(session, txn_sessions_walked);
/* Skip sessions with no active transaction */
if ((id = s->id) == WT_TXN_NONE && s->pinned_id == WT_TXN_NONE)
continue;
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index 5c6026cd176..dba739792a2 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -897,7 +897,13 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
*/
if (F_ISSET(hs_dhandle, WT_DHANDLE_OPEN)) {
time_start_hs = __wt_clock(session);
+ conn->txn_global.checkpoint_running_hs = true;
+ WT_STAT_CONN_SET(session, txn_checkpoint_running_hs, 1);
+
WT_WITH_DHANDLE(session, hs_dhandle, ret = __wt_checkpoint(session, cfg));
+
+ WT_STAT_CONN_SET(session, txn_checkpoint_running_hs, 0);
+ conn->txn_global.checkpoint_running_hs = false;
WT_ERR(ret);
/*
@@ -1870,8 +1876,9 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_ASSERT(session, session->dhandle->checkpoint == NULL);
/* We must hold the metadata lock if checkpointing the metadata. */
- WT_ASSERT(
- session, !WT_IS_METADATA(session->dhandle) || F_ISSET(session, WT_SESSION_LOCKED_METADATA));
+ WT_ASSERT(session,
+ !WT_IS_METADATA(session->dhandle) ||
+ FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_METADATA));
WT_RET(__wt_config_gets_def(session, cfg, "force", 0, &cval));
force = cval.val != 0;
@@ -1942,6 +1949,16 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final)
return (__wt_set_return(session, EBUSY));
/*
+ * Make sure there isn't a potential race between backup copying the metadata and a checkpoint
+ * changing the metadata. Backup holds both the checkpoint and schema locks. Checkpoint should
+ * hold those also except on the final checkpoint during close. Confirm the caller either is the
+ * final checkpoint or holds at least one of the locks.
+ */
+ WT_ASSERT(session,
+ final ||
+ (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_CHECKPOINT) ||
+ FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA)));
+ /*
* Turn on metadata tracking if:
* - The session is not already doing metadata tracking.
* - The file was not bulk loaded.
diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c
index 2baf6131d97..a1cbbdc564a 100644
--- a/src/third_party/wiredtiger/src/txn/txn_recover.c
+++ b/src/third_party/wiredtiger/src/txn/txn_recover.c
@@ -558,6 +558,7 @@ __recovery_correct_write_gen(WT_SESSION_IMPL *session)
WT_DECL_RET;
char *config, *uri;
+ uri = NULL;
WT_RET(__wt_metadata_cursor(session, &cursor));
while ((ret = cursor->next(cursor)) == 0) {
WT_ERR(cursor->get_key(cursor, &uri));
@@ -573,6 +574,8 @@ __recovery_correct_write_gen(WT_SESSION_IMPL *session)
WT_ERR_NOTFOUND_OK(ret, false);
err:
+ if (ret != 0 && uri != NULL)
+ __wt_err(session, ret, "unable to correct write gen for %s", uri);
WT_TRET(__wt_metadata_cursor_release(session, &cursor));
return (ret);
}
@@ -586,6 +589,7 @@ static int
__recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config)
{
WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
WT_LSN lsn;
uint32_t fileid, lsnfile, lsnoffset;
@@ -606,7 +610,9 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config)
"metadata corruption: files %s and %s have the same file ID %u", uri,
r->files[fileid].uri, fileid);
WT_RET(__wt_strdup(r->session, uri, &r->files[fileid].uri));
- WT_RET(__wt_config_getones(r->session, config, "checkpoint_lsn", &cval));
+ if ((ret = __wt_config_getones(r->session, config, "checkpoint_lsn", &cval)) != 0)
+ WT_RET_MSG(
+ r->session, ret, "Failed recovery setup for %s: cannot parse config '%s'", uri, config);
/* If there is no checkpoint logged for the file, apply everything. */
if (cval.type != WT_CONFIG_ITEM_STRUCT)
WT_INIT_LSN(&lsn);
@@ -614,8 +620,9 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config)
else if (sscanf(cval.str, "(%" SCNu32 ",%" SCNu32 ")", &lsnfile, &lsnoffset) == 2)
WT_SET_LSN(&lsn, lsnfile, lsnoffset);
else
- WT_RET_MSG(
- r->session, EINVAL, "Failed to parse checkpoint LSN '%.*s'", (int)cval.len, cval.str);
+ WT_RET_MSG(r->session, EINVAL,
+ "Failed recovery setup for %s: cannot parse checkpoint LSN '%.*s'", uri, (int)cval.len,
+ cval.str);
WT_ASSIGN_LSN(&r->files[fileid].ckpt_lsn, &lsn);
__wt_verbose(r->session, WT_VERB_RECOVERY,
@@ -627,7 +634,9 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config)
WT_ASSIGN_LSN(&r->max_ckpt_lsn, &lsn);
/* Update the base write gen based on this file's configuration. */
- return (__wt_metadata_update_base_write_gen(r->session, config));
+ if ((ret = __wt_metadata_update_base_write_gen(r->session, config)) != 0)
+ WT_RET_MSG(r->session, ret, "Failed recovery setup for %s: cannot update write gen", uri);
+ return (0);
}
/*
@@ -786,7 +795,8 @@ __wt_txn_recover(WT_SESSION_IMPL *session, const char *cfg[])
was_backup = F_ISSET(conn, WT_CONN_WAS_BACKUP);
/* We need a real session for recovery. */
- WT_RET(__wt_open_internal_session(conn, "txn-recover", false, WT_SESSION_NO_LOGGING, &session));
+ WT_RET(
+ __wt_open_internal_session(conn, "txn-recover", false, WT_SESSION_NO_LOGGING, 0, &session));
r.session = session;
WT_MAX_LSN(&r.max_ckpt_lsn);
WT_MAX_LSN(&r.max_rec_lsn);
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index 4f4edaec110..de2ff910072 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -377,10 +377,14 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page
* stop timestamp if the original update's commit timestamp is out of order. We may see
* records newer than or equal to the onpage value if eviction runs concurrently with
* checkpoint. In that case, don't verify the first record.
+ *
+ * If we have fixed the out-of-order timestamps, then the newer update reinserted with an
+ * older timestamp may have a durable timestamp that is smaller than the current stop
+ * durable timestamp.
*/
WT_ASSERT(session,
hs_stop_durable_ts <= newer_hs_durable_ts || hs_start_ts == hs_stop_durable_ts ||
- first_record);
+ hs_start_ts == newer_hs_durable_ts || first_record);
if (hs_stop_durable_ts < newer_hs_durable_ts)
WT_STAT_CONN_DATA_INCR(session, txn_rts_hs_stop_older_than_newer_start);
@@ -419,7 +423,7 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page
__wt_timestamp_to_string(hs_durable_ts, ts_string[1]),
__wt_timestamp_to_string(hs_stop_durable_ts, ts_string[2]),
__wt_timestamp_to_string(rollback_timestamp, ts_string[3]), hs_tw->start_txn, type);
- WT_ASSERT(session, hs_tw->start_ts < unpack->tw.start_ts);
+ WT_ASSERT(session, hs_tw->start_ts <= unpack->tw.start_ts);
valid_update_found = true;
break;
}
@@ -1408,7 +1412,7 @@ __rollback_to_stable_btree_apply(WT_SESSION_IMPL *session)
__wt_timestamp_to_string(rollback_timestamp, ts_string[0]),
__wt_timestamp_to_string(txn_global->oldest_timestamp, ts_string[1]));
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA));
WT_RET(__wt_metadata_cursor(session, &cursor));
if (F_ISSET(S2C(session), WT_CONN_RECOVERING))
@@ -1586,39 +1590,58 @@ err:
* Rollback all modifications with timestamps more recent than the passed in timestamp.
*/
static int
-__rollback_to_stable(WT_SESSION_IMPL *session)
+__rollback_to_stable(WT_SESSION_IMPL *session, bool no_ckpt)
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ WT_TXN_GLOBAL *txn_global;
conn = S2C(session);
+ txn_global = &conn->txn_global;
+
+ /*
+ * Rollback to stable should ignore tombstones in the history store since it needs to scan the
+ * entire table sequentially.
+ */
+ F_SET(session, WT_SESSION_ROLLBACK_TO_STABLE);
- WT_RET(__rollback_to_stable_check(session));
+ WT_ERR(__rollback_to_stable_check(session));
/*
* Allocate a non-durable btree bitstring. We increment the global value before using it, so the
* current value is already in use, and hence we need to add one here.
*/
conn->stable_rollback_maxfile = conn->next_file_id + 1;
- WT_WITH_SCHEMA_LOCK(session, ret = __rollback_to_stable_btree_apply(session));
+ WT_ERR(__rollback_to_stable_btree_apply(session));
+
+ /* Rollback the global durable timestamp to the stable timestamp. */
+ txn_global->has_durable_timestamp = txn_global->has_stable_timestamp;
+ txn_global->durable_timestamp = txn_global->stable_timestamp;
+
+ /*
+ * If the configuration is not in-memory, forcibly log a checkpoint after rollback to stable to
+ * ensure that both in-memory and on-disk versions are the same unless caller requested for no
+ * checkpoint.
+ */
+ if (!F_ISSET(conn, WT_CONN_IN_MEMORY) && !no_ckpt)
+ WT_ERR(session->iface.checkpoint(&session->iface, "force=1"));
+err:
+ F_CLR(session, WT_SESSION_ROLLBACK_TO_STABLE);
return (ret);
}
/*
* __wt_rollback_to_stable --
- * Rollback all modifications with timestamps more recent than the passed in timestamp.
+ * Rollback the database to the stable timestamp.
*/
int
__wt_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[], bool no_ckpt)
{
WT_DECL_RET;
- WT_TXN_GLOBAL *txn_global;
WT_UNUSED(cfg);
- txn_global = &S2C(session)->txn_global;
-
/*
* Don't use the connection's default session: we are working on data handles and (a) don't want
* to cache all of them forever, plus (b) can't guarantee that no other method will be called
@@ -1626,30 +1649,13 @@ __wt_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[], bool no_ckp
* rollback to stable doesn't generate log records.
*/
WT_RET(__wt_open_internal_session(S2C(session), "txn rollback_to_stable", true,
- F_MASK(session, WT_SESSION_NO_LOGGING), &session));
+ F_MASK(session, WT_SESSION_NO_LOGGING), 0, &session));
- /*
- * Rollback to stable should ignore tombstones in the history store since it needs to scan the
- * entire table sequentially.
- */
WT_STAT_CONN_SET(session, txn_rollback_to_stable_running, 1);
- F_SET(session, WT_SESSION_ROLLBACK_TO_STABLE);
- ret = __rollback_to_stable(session);
- F_CLR(session, WT_SESSION_ROLLBACK_TO_STABLE);
+ WT_WITH_CHECKPOINT_LOCK(
+ session, WT_WITH_SCHEMA_LOCK(session, ret = __rollback_to_stable(session, no_ckpt)));
WT_STAT_CONN_SET(session, txn_rollback_to_stable_running, 0);
- WT_RET(ret);
- /* Rollback the global durable timestamp to the stable timestamp. */
- txn_global->has_durable_timestamp = txn_global->has_stable_timestamp;
- txn_global->durable_timestamp = txn_global->stable_timestamp;
-
- /*
- * If the configuration is not in-memory, forcibly log a checkpoint after rollback to stable to
- * ensure that both in-memory and on-disk versions are the same unless caller requested for no
- * checkpoint.
- */
- if (!F_ISSET(S2C(session), WT_CONN_IN_MEMORY) && !no_ckpt)
- WT_TRET(session->iface.checkpoint(&session->iface, "force=1"));
WT_TRET(__wt_session_close_internal(session));
return (ret);
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index 6b046373187..6acd265fd2d 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -113,6 +113,7 @@ __wt_txn_get_pinned_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *tsp, uin
WT_ORDERED_READ(session_cnt, conn->session_cnt);
WT_STAT_CONN_INCR(session, txn_walk_sessions);
for (i = 0, s = txn_global->txn_shared_list; i < session_cnt; i++, s++) {
+ WT_STAT_CONN_INCR(session, txn_sessions_walked);
__txn_get_read_timestamp(s, &tmp_read_ts);
/*
* A zero timestamp is possible here only when the oldest timestamp is not accounted for.
@@ -172,6 +173,7 @@ __txn_global_query_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *tsp, cons
WT_ORDERED_READ(session_cnt, conn->session_cnt);
WT_STAT_CONN_INCR(session, txn_walk_sessions);
for (i = 0, s = txn_global->txn_shared_list; i < session_cnt; i++, s++) {
+ WT_STAT_CONN_INCR(session, txn_sessions_walked);
__txn_get_durable_timestamp(s, &tmpts);
if (tmpts != WT_TS_NONE && --tmpts < ts)
ts = tmpts;
@@ -504,6 +506,7 @@ __txn_assert_after_reads(WT_SESSION_IMPL *session, const char *op, wt_timestamp_
WT_ORDERED_READ(session_cnt, S2C(session)->session_cnt);
WT_STAT_CONN_INCR(session, txn_walk_sessions);
for (i = 0, s = txn_global->txn_shared_list; i < session_cnt; i++, s++) {
+ WT_STAT_CONN_INCR(session, txn_sessions_walked);
__txn_get_read_timestamp(s, &tmp_timestamp);
if (tmp_timestamp != WT_TS_NONE && tmp_timestamp >= ts) {
__wt_readunlock(session, &txn_global->rwlock);
diff --git a/src/third_party/wiredtiger/test/cppsuite/configs/config_example_test_default.txt b/src/third_party/wiredtiger/test/cppsuite/configs/config_example_test_default.txt
new file mode 100644
index 00000000000..b46fed225eb
--- /dev/null
+++ b/src/third_party/wiredtiger/test/cppsuite/configs/config_example_test_default.txt
@@ -0,0 +1,48 @@
+# Same parameters as config_poc_test_default
+duration_seconds=10,
+cache_size_mb=1000,
+enable_logging=true,
+runtime_monitor=
+(
+ op_count=3,
+ interval=s,
+ stat_cache_size=
+ (
+ enabled=true,
+ limit=100
+ )
+),
+timestamp_manager=
+(
+ enabled=true,
+ oldest_lag=1,
+ stable_lag=1
+),
+workload_generator=
+(
+ collection_count=2,
+ key_count=5,
+ key_size=1,
+ ops_per_transaction=
+ (
+ min=5,
+ max=50
+ ),
+ read_threads=1,
+ update_threads=1,
+ value_size=10,
+ update_config=
+ (
+ op_count=1,
+ interval=s
+ ),
+ insert_config=
+ (
+ op_count=1,
+ interval=s
+ )
+),
+workload_tracking=
+(
+ enabled=true
+)
diff --git a/src/third_party/wiredtiger/test/cppsuite/configs/config_poc_test_default.txt b/src/third_party/wiredtiger/test/cppsuite/configs/config_poc_test_default.txt
index 52f4f536876..c677142234d 100644
--- a/src/third_party/wiredtiger/test/cppsuite/configs/config_poc_test_default.txt
+++ b/src/third_party/wiredtiger/test/cppsuite/configs/config_poc_test_default.txt
@@ -1,40 +1,50 @@
# Sets up a basic database with 2 collections and 5 keys and run thread for 10 seconds.
# All components are enabled.
# Used as a basic test for the framework.
-duration_seconds=10
-cache_size_mb=1000
-enable_logging=true
+duration_seconds=10,
+cache_size_mb=1000,
+enable_logging=true,
runtime_monitor=
-{
- rate_per_second=3
+(
+ op_count=3,
+ interval=s,
stat_cache_size=
- {
- enabled=true
+ (
+ enabled=true,
limit=100
- }
-}
+ )
+),
timestamp_manager=
-{
- enabled=true
- oldest_lag=1
+(
+ enabled=true,
+ oldest_lag=1,
stable_lag=1
-}
+),
workload_generator=
-{
- collection_count=2
- key_count=5
- key_format=i
- key_size=1
+(
+ collection_count=2,
+ key_count=5,
+ key_size=1,
ops_per_transaction=
- {
- min=5
+ (
+ min=5,
max=50
- }
- read_threads=1
+ ),
+ read_threads=1,
+ update_threads=1,
+ update_config=
+ (
+ op_count=1,
+ interval=s
+ ),
+ insert_config=
+ (
+ op_count=1,
+ interval=s
+ ),
value_size=10
- value_format=S
-}
+),
workload_tracking=
-{
+(
enabled=true
-}
+)
diff --git a/src/third_party/wiredtiger/test/cppsuite/configs/config_poc_test_stress.txt b/src/third_party/wiredtiger/test/cppsuite/configs/config_poc_test_stress.txt
new file mode 100644
index 00000000000..6067bea3983
--- /dev/null
+++ b/src/third_party/wiredtiger/test/cppsuite/configs/config_poc_test_stress.txt
@@ -0,0 +1,39 @@
+# Sets up a basic database with 2 collections and 50000 keys and run thread for 10 seconds.
+# All components are enabled.
+# Used as a stress test for the framework.
+duration_seconds=10,
+cache_size_mb=5000,
+enable_logging=true,
+runtime_monitor=
+(
+ rate_per_second=3,
+ stat_cache_size=
+ (
+ enabled=true,
+ limit=100
+ )
+),
+timestamp_manager=
+(
+ enabled=true,
+ oldest_lag=1,
+ stable_lag=1
+),
+workload_generator=
+(
+ collection_count=2,
+ key_count=50000,
+ key_size=10,
+ ops_per_transaction=
+ (
+ min=5,
+ max=50
+ ),
+ read_threads=1,
+ update_threads=1,
+ value_size=2000
+),
+workload_tracking=
+(
+ enabled=true
+)
diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/connection_manager.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/connection_manager.h
index 23df4dfc001..29c76b59a2b 100644
--- a/src/third_party/wiredtiger/test/cppsuite/test_harness/connection_manager.h
+++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/connection_manager.h
@@ -36,8 +36,8 @@ extern "C" {
#include "wiredtiger.h"
}
-#include "api_const.h"
-#include "debug_utils.h"
+#include "util/api_const.h"
+#include "util/debug_utils.h"
namespace test_harness {
/*
diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/component.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/core/component.h
index 466fdbe890e..91b165d8f29 100644
--- a/src/third_party/wiredtiger/test/cppsuite/test_harness/component.h
+++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/core/component.h
@@ -30,6 +30,7 @@
#define COMPONENT_H
#include "configuration.h"
+#include "throttle.h"
namespace test_harness {
/*
@@ -38,7 +39,7 @@ namespace test_harness {
*/
class component {
public:
- component(configuration *config) : _enabled(true), _running(false), _config(config) {}
+ component(const std::string &name, configuration *config) : _name(name), _config(config) {}
~component()
{
@@ -56,13 +57,35 @@ class component {
virtual void
load()
{
- _running = true;
+ debug_print("Loading component: " + _name, DEBUG_INFO);
+ _enabled = _config->get_optional_bool(ENABLED, true);
+ _throttle = throttle(_config);
+ /* If we're not enabled we shouldn't be running. */
+ _running = _enabled;
}
/*
- * The run phase encompasses all operations that occur during the primary phase of the workload.
+ * The run function provides a top level loop that calls the do_work function every X seconds as
+ * defined by the throttle. Each run() method defined by the components is called in its own
+ * thread by the top level test class.
+ *
+ * If a component does not wish to use the standard run function, it can be overloaded.
*/
- virtual void run() = 0;
+ virtual void
+ run()
+ {
+ debug_print("Running component: " + _name, DEBUG_INFO);
+ while (_enabled && _running) {
+ do_work();
+ _throttle.sleep();
+ }
+ }
+
+ virtual void
+ do_work()
+ {
+ /* Not implemented. */
+ }
bool
is_enabled() const
@@ -78,15 +101,18 @@ class component {
virtual void
finish()
{
+ debug_print("Finishing component: " + _name, DEBUG_INFO);
_running = false;
}
- static const std::string name;
-
protected:
- bool _enabled;
- volatile bool _running;
+ bool _enabled = false;
+ volatile bool _running = false;
+ throttle _throttle;
configuration *_config;
+
+ private:
+ std::string _name;
};
} // namespace test_harness
#endif
diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/configuration.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/core/configuration.h
index adae5b1b8c5..c2b9494487f 100644
--- a/src/third_party/wiredtiger/test/cppsuite/test_harness/configuration.h
+++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/core/configuration.h
@@ -35,6 +35,8 @@ extern "C" {
#include "test_util.h"
}
+enum class types { BOOL, INT, STRING, STRUCT };
+
namespace test_harness {
class configuration {
public:
@@ -74,70 +76,105 @@ class configuration {
}
/*
- * Wrapper functions for retrieving basic configuration values. Ideally the tests can avoid
- * using the config item struct provided by wiredtiger. However if they still wish to use it the
- * get and next functions can be used.
+ * Wrapper functions for retrieving basic configuration values. Ideally tests can avoid using
+ * the config item struct provided by wiredtiger.
+ *
+ * When getting a configuration value that may not exist for that configuration string or
+ * component, the optional forms of the functions can be used. In this case a default value must
+ * be passed and it will be set to that value.
*/
- int
- get_string(const std::string &key, std::string &value) const
+ std::string
+ get_string(const std::string &key)
{
- WT_CONFIG_ITEM temp_value;
- testutil_check(_config_parser->get(_config_parser, key.c_str(), &temp_value));
- if (temp_value.type != WT_CONFIG_ITEM::WT_CONFIG_ITEM_STRING ||
- temp_value.type != WT_CONFIG_ITEM::WT_CONFIG_ITEM_ID)
- return (-1);
- value = std::string(temp_value.str, temp_value.len);
- return (0);
+ return get<std::string>(key, false, types::STRING, "", config_item_to_string);
}
- int
- get_bool(const std::string &key, bool &value) const
+ std::string
+ get_optional_string(const std::string &key, const std::string &def)
{
- WT_CONFIG_ITEM temp_value;
- testutil_check(_config_parser->get(_config_parser, key.c_str(), &temp_value));
- if (temp_value.type != WT_CONFIG_ITEM::WT_CONFIG_ITEM_BOOL)
- return (-1);
- value = (temp_value.val != 0);
- return (0);
+ return get<std::string>(key, true, types::STRING, def, config_item_to_string);
}
- int
- get_int(const std::string &key, int64_t &value) const
+ bool
+ get_bool(const std::string &key)
{
- WT_CONFIG_ITEM temp_value;
- testutil_check(_config_parser->get(_config_parser, key.c_str(), &temp_value));
- if (temp_value.type != WT_CONFIG_ITEM::WT_CONFIG_ITEM_NUM)
- return (-1);
- value = temp_value.val;
- return (0);
+ return get<bool>(key, false, types::BOOL, false, config_item_to_bool);
}
- configuration *
- get_subconfig(const std::string &key) const
+ bool
+ get_optional_bool(const std::string &key, const bool def)
{
- WT_CONFIG_ITEM subconfig;
- testutil_check(get(key, &subconfig));
- return new configuration(subconfig);
+ return get<bool>(key, true, types::BOOL, def, config_item_to_bool);
}
- /*
- * Basic configuration parsing helper functions.
- */
- int
- next(WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value) const
+ int64_t
+ get_int(const std::string &key)
+ {
+ return get<int64_t>(key, false, types::INT, 0, config_item_to_int);
+ }
+
+ int64_t
+ get_optional_int(const std::string &key, const int64_t def)
{
- return (_config_parser->next(_config_parser, key, value));
+ return get<int64_t>(key, true, types::INT, def, config_item_to_int);
}
- int
- get(const std::string &key, WT_CONFIG_ITEM *value) const
+ configuration *
+ get_subconfig(const std::string &key)
{
- return (_config_parser->get(_config_parser, key.c_str(), value));
+ return get<configuration *>(key, false, types::STRUCT, nullptr,
+ [](WT_CONFIG_ITEM item) { return new configuration(item); });
}
private:
+ static bool
+ config_item_to_bool(const WT_CONFIG_ITEM item)
+ {
+ return (item.val != 0);
+ }
+
+ static int64_t
+ config_item_to_int(const WT_CONFIG_ITEM item)
+ {
+ return (item.val);
+ }
+
+ static std::string
+ config_item_to_string(const WT_CONFIG_ITEM item)
+ {
+ return std::string(item.str, item.len);
+ }
+
+ template <typename T>
+ T
+ get(const std::string &key, bool optional, types type, T def, T (*func)(WT_CONFIG_ITEM item))
+ {
+ WT_DECL_RET;
+ WT_CONFIG_ITEM value = {"", 0, 1, WT_CONFIG_ITEM::WT_CONFIG_ITEM_BOOL};
+ const char *error_msg = "Configuration value doesn't match requested type";
+
+ ret = _config_parser->get(_config_parser, key.c_str(), &value);
+ if (ret == WT_NOTFOUND && optional)
+ return (def);
+ else if (ret != 0)
+ testutil_die(ret, "Error while finding config");
+
+ if (type == types::STRING &&
+ (value.type != WT_CONFIG_ITEM::WT_CONFIG_ITEM_STRING &&
+ value.type != WT_CONFIG_ITEM::WT_CONFIG_ITEM_ID))
+ testutil_die(-1, error_msg);
+ else if (type == types::BOOL && value.type != WT_CONFIG_ITEM::WT_CONFIG_ITEM_BOOL)
+ testutil_die(-1, error_msg);
+ else if (type == types::INT && value.type != WT_CONFIG_ITEM::WT_CONFIG_ITEM_NUM)
+ testutil_die(-1, error_msg);
+ else if (type == types::STRUCT && value.type != WT_CONFIG_ITEM::WT_CONFIG_ITEM_STRUCT)
+ testutil_die(-1, error_msg);
+
+ return func(value);
+ }
+
std::string _config;
- WT_CONFIG_PARSER *_config_parser;
+ WT_CONFIG_PARSER *_config_parser = nullptr;
};
} // namespace test_harness
diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/core/throttle.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/core/throttle.h
new file mode 100644
index 00000000000..bfe5816c70e
--- /dev/null
+++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/core/throttle.h
@@ -0,0 +1,73 @@
+/*-
+ * Public Domain 2014-present MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef THROTTLE_H
+#define THROTTLE_H
+
+#include <thread>
+
+#include "configuration.h"
+
+namespace test_harness {
+class throttle {
+ public:
+ throttle(const int64_t op_count, const char interval)
+ {
+ testutil_assert(op_count > 0);
+ /* Lazily compute the ms for every type. */
+ if (interval == 's')
+ _ms = 1000 / op_count;
+ else if (interval == 'm')
+ _ms = (60 * 1000) / op_count;
+ else if (interval == 'h')
+ _ms = (60 * 60 * 1000) / op_count;
+ else
+ testutil_die(-1, "Specified throttle interval not supported.");
+ }
+
+ throttle(configuration *config)
+ : throttle(
+ config->get_optional_int(OP_COUNT, 1), config->get_optional_string(INTERVAL, "s")[0])
+ {
+ }
+
+ /* Default to a second per operation. */
+ throttle() : throttle(1, 's') {}
+
+ void
+ sleep()
+ {
+ std::this_thread::sleep_for(std::chrono::milliseconds(_ms));
+ }
+
+ private:
+ uint64_t _ms = 1000;
+};
+} // namespace test_harness
+
+#endif
diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/runtime_monitor.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/runtime_monitor.h
index e81a8bfe47b..b7897eb39f1 100644
--- a/src/third_party/wiredtiger/test/cppsuite/test_harness/runtime_monitor.h
+++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/runtime_monitor.h
@@ -29,16 +29,15 @@
#ifndef RUNTIME_MONITOR_H
#define RUNTIME_MONITOR_H
-#include <thread>
-
extern "C" {
#include "wiredtiger.h"
}
-#include "api_const.h"
-#include "component.h"
+#include "util/debug_utils.h"
+#include "util/api_const.h"
+#include "core/component.h"
+#include "core/throttle.h"
#include "connection_manager.h"
-#include "debug_utils.h"
namespace test_harness {
/* Static statistic get function. */
@@ -55,8 +54,7 @@ class statistic {
public:
statistic(configuration *config)
{
- testutil_assert(config != nullptr);
- testutil_check(config->get_bool(ENABLED, _enabled));
+ _enabled = config->get_bool(ENABLED);
}
/* Check that the given statistic is within bounds. */
@@ -79,7 +77,7 @@ class cache_limit_statistic : public statistic {
public:
cache_limit_statistic(configuration *config) : statistic(config)
{
- testutil_check(config->get_int(LIMIT, limit));
+ limit = config->get_int(LIMIT);
}
void
@@ -104,7 +102,7 @@ class cache_limit_statistic : public statistic {
debug_print(error_string, DEBUG_ERROR);
testutil_assert(use_percent < limit);
} else
- debug_print("Usage: " + std::to_string(use_percent), DEBUG_TRACE);
+ debug_print("Cache usage: " + std::to_string(use_percent), DEBUG_TRACE);
}
private:
@@ -117,7 +115,7 @@ class cache_limit_statistic : public statistic {
*/
class runtime_monitor : public component {
public:
- runtime_monitor(configuration *config) : component(config), _ops(1) {}
+ runtime_monitor(configuration *config) : component("runtime_monitor", config) {}
~runtime_monitor()
{
@@ -135,37 +133,35 @@ class runtime_monitor : public component {
{
configuration *sub_config;
std::string statistic_list;
- /* Parse the configuration for the runtime monitor. */
- testutil_check(_config->get_int(RATE_PER_SECOND, _ops));
- /* Load known statistics. */
- sub_config = _config->get_subconfig(STAT_CACHE_SIZE);
- _stats.push_back(new cache_limit_statistic(sub_config));
- delete sub_config;
+ /* Load the general component things. */
component::load();
+
+ if (_enabled) {
+ _session = connection_manager::instance().create_session();
+
+ /* Open our statistic cursor. */
+ _session->open_cursor(_session, STATISTICS_URI, nullptr, nullptr, &_cursor);
+
+ /* Load known statistics. */
+ sub_config = _config->get_subconfig(STAT_CACHE_SIZE);
+ _stats.push_back(new cache_limit_statistic(sub_config));
+ delete sub_config;
+ }
}
void
- run()
+ do_work()
{
- WT_SESSION *session = connection_manager::instance().create_session();
- WT_CURSOR *cursor = nullptr;
-
- /* Open a statistics cursor. */
- testutil_check(session->open_cursor(session, STATISTICS_URI, nullptr, nullptr, &cursor));
-
- while (_running) {
- /* Sleep so that we do x operations per second. To be replaced by throttles. */
- std::this_thread::sleep_for(std::chrono::milliseconds(1000 / _ops));
- for (const auto &it : _stats) {
- if (it->is_enabled())
- it->check(cursor);
- }
+ for (const auto &it : _stats) {
+ if (it->is_enabled())
+ it->check(_cursor);
}
}
private:
- int64_t _ops;
+ WT_CURSOR *_cursor = nullptr;
+ WT_SESSION *_session = nullptr;
std::vector<statistic *> _stats;
};
} // namespace test_harness
diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/test.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/test.h
index e11d17ab51b..a753e131f0f 100644
--- a/src/third_party/wiredtiger/test/cppsuite/test_harness/test.h
+++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/test.h
@@ -38,34 +38,32 @@ extern "C" {
#include "wiredtiger.h"
}
-#include "api_const.h"
-#include "component.h"
-#include "configuration.h"
+#include "util/api_const.h"
+#include "core/component.h"
+#include "core/configuration.h"
#include "connection_manager.h"
#include "runtime_monitor.h"
#include "timestamp_manager.h"
#include "thread_manager.h"
#include "workload_generator.h"
-#include "workload_validation.h"
+#include "workload/workload_validation.h"
namespace test_harness {
/*
* The base class for a test, the standard usage pattern is to just call run().
*/
-class test {
+class test : public database_operation {
public:
test(const std::string &config, const std::string &name)
{
- _configuration = new configuration(name, config);
- _runtime_monitor = new runtime_monitor(_configuration->get_subconfig(RUNTIME_MONITOR));
- _timestamp_manager =
- new timestamp_manager(_configuration->get_subconfig(TIMESTAMP_MANAGER));
- _workload_tracking = new workload_tracking(_configuration->get_subconfig(WORKLOAD_TRACKING),
+ _config = new configuration(name, config);
+ _runtime_monitor = new runtime_monitor(_config->get_subconfig(RUNTIME_MONITOR));
+ _timestamp_manager = new timestamp_manager(_config->get_subconfig(TIMESTAMP_MANAGER));
+ _workload_tracking = new workload_tracking(_config->get_subconfig(WORKLOAD_TRACKING),
OPERATION_TRACKING_TABLE_CONFIG, TABLE_OPERATION_TRACKING, SCHEMA_TRACKING_TABLE_CONFIG,
TABLE_SCHEMA_TRACKING);
- _workload_generator =
- new workload_generator(_configuration->get_subconfig(WORKLOAD_GENERATOR),
- _timestamp_manager, _workload_tracking);
+ _workload_generator = new workload_generator(
+ _config->get_subconfig(WORKLOAD_GENERATOR), this, _timestamp_manager, _workload_tracking);
_thread_manager = new thread_manager();
/*
* Ordering is not important here, any dependencies between components should be resolved
@@ -77,13 +75,13 @@ class test {
~test()
{
- delete _configuration;
+ delete _config;
delete _runtime_monitor;
delete _timestamp_manager;
delete _thread_manager;
delete _workload_generator;
delete _workload_tracking;
- _configuration = nullptr;
+ _config = nullptr;
_runtime_monitor = nullptr;
_timestamp_manager = nullptr;
_thread_manager = nullptr;
@@ -103,15 +101,16 @@ class test {
virtual void
run()
{
- int64_t cache_size_mb = 100, duration_seconds = 0;
+ int64_t cache_size_mb, duration_seconds;
bool enable_logging, is_success = true;
/* Build the database creation config string. */
std::string db_create_config = CONNECTION_CREATE;
- testutil_check(_configuration->get_int(CACHE_SIZE_MB, cache_size_mb));
+ /* Get the cache size, and turn logging on or off. */
+ cache_size_mb = _config->get_int(CACHE_SIZE_MB);
db_create_config += ",statistics=(fast),cache_size=" + std::to_string(cache_size_mb) + "MB";
- testutil_check(_configuration->get_bool(ENABLE_LOGGING, enable_logging));
+ enable_logging = _config->get_bool(ENABLE_LOGGING);
db_create_config += ",log=(enabled=" + std::string(enable_logging ? "true" : "false") + ")";
/* Set up the test environment. */
@@ -125,12 +124,12 @@ class test {
for (const auto &it : _components)
_thread_manager->add_thread(&component::run, it);
- /* Sleep duration seconds. */
- testutil_check(_configuration->get_int(DURATION_SECONDS, duration_seconds));
+ /* The test will run for the duration as defined in the config. */
+ duration_seconds = _config->get_int(DURATION_SECONDS);
testutil_assert(duration_seconds >= 0);
std::this_thread::sleep_for(std::chrono::seconds(duration_seconds));
- /* End the test. */
+ /* End the test by calling finish on all known components. */
for (const auto &it : _components)
it->finish();
_thread_manager->join();
@@ -139,7 +138,7 @@ class test {
if (_workload_tracking->is_enabled()) {
workload_validation wv;
is_success = wv.validate(_workload_tracking->get_operation_table_name(),
- _workload_tracking->get_schema_table_name());
+ _workload_tracking->get_schema_table_name(), _workload_generator->get_database());
}
debug_print(is_success ? "SUCCESS" : "FAILED", DEBUG_INFO);
@@ -177,12 +176,12 @@ class test {
private:
std::string _name;
std::vector<component *> _components;
- configuration *_configuration;
- runtime_monitor *_runtime_monitor;
- thread_manager *_thread_manager;
- timestamp_manager *_timestamp_manager;
- workload_generator *_workload_generator;
- workload_tracking *_workload_tracking;
+ configuration *_config;
+ runtime_monitor *_runtime_monitor = nullptr;
+ thread_manager *_thread_manager = nullptr;
+ timestamp_manager *_timestamp_manager = nullptr;
+ workload_generator *_workload_generator = nullptr;
+ workload_tracking *_workload_tracking = nullptr;
};
} // namespace test_harness
diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/thread_manager.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/thread_manager.h
index 749f5c1d8f3..b7f736c169d 100644
--- a/src/third_party/wiredtiger/test/cppsuite/test_harness/thread_manager.h
+++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/thread_manager.h
@@ -31,7 +31,8 @@
#include <thread>
-#include "thread_context.h"
+#include "workload/database_operation.h"
+#include "workload/thread_context.h"
namespace test_harness {
/* Class that handles threads, from their initialization to their deletion. */
@@ -56,10 +57,10 @@ class thread_manager {
*/
template <typename Callable>
void
- add_thread(thread_context *tc, Callable &&fct)
+ add_thread(thread_context *tc, database_operation *db_operation, Callable &&fct)
{
tc->set_running(true);
- std::thread *t = new std::thread(fct, std::ref(*tc));
+ std::thread *t = new std::thread(fct, std::ref(*tc), std::ref(*db_operation));
_workers.push_back(t);
}
diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/timestamp_manager.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/timestamp_manager.h
index 8a5940c7637..96b5f6bc69c 100644
--- a/src/third_party/wiredtiger/test/cppsuite/test_harness/timestamp_manager.h
+++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/timestamp_manager.h
@@ -34,7 +34,7 @@
#include <sstream>
#include <thread>
-#include "component.h"
+#include "core/component.h"
namespace test_harness {
/*
@@ -44,63 +44,54 @@ namespace test_harness {
*/
class timestamp_manager : public component {
public:
- timestamp_manager(configuration *config)
- : /* _periodic_update_s is hardcoded to 1 second for now. */
- component(config), _increment_ts(0U), _latest_ts(0U), _oldest_lag(0), _oldest_ts(0U),
- _periodic_update_s(1), _stable_lag(0), _stable_ts(0U)
- {
- }
+ timestamp_manager(configuration *config) : component("timestamp_manager", config) {}
void
load()
{
- testutil_assert(_config != nullptr);
- testutil_check(_config->get_int(OLDEST_LAG, _oldest_lag));
+ component::load();
+
+ _oldest_lag = _config->get_int(OLDEST_LAG);
testutil_assert(_oldest_lag >= 0);
- testutil_check(_config->get_int(STABLE_LAG, _stable_lag));
+ _stable_lag = _config->get_int(STABLE_LAG);
testutil_assert(_stable_lag >= 0);
- testutil_check(_config->get_bool(ENABLED, _enabled));
- component::load();
}
void
- run()
+ do_work()
{
std::string config;
/* latest_ts_s represents the time component of the latest timestamp provided. */
wt_timestamp_t latest_ts_s;
- while (_enabled && _running) {
- /* Timestamps are checked periodically. */
- std::this_thread::sleep_for(std::chrono::seconds(_periodic_update_s));
- latest_ts_s = (_latest_ts >> 32);
- /*
- * Keep a time window between the latest and stable ts less than the max defined in the
- * configuration.
- */
- testutil_assert(latest_ts_s >= _stable_ts);
- if ((latest_ts_s - _stable_ts) > _stable_lag) {
- _stable_ts = latest_ts_s - _stable_lag;
- config += std::string(STABLE_TS) + "=" + decimal_to_hex(_stable_ts);
- }
-
- /*
- * Keep a time window between the stable and oldest ts less than the max defined in the
- * configuration.
- */
- testutil_assert(_stable_ts > _oldest_ts);
- if ((_stable_ts - _oldest_ts) > _oldest_lag) {
- _oldest_ts = _stable_ts - _oldest_lag;
- if (!config.empty())
- config += ",";
- config += std::string(OLDEST_TS) + "=" + decimal_to_hex(_oldest_ts);
- }
-
- /* Save the new timestamps. */
- if (!config.empty()) {
- connection_manager::instance().set_timestamp(config);
- config = "";
- }
+ /* Timestamps are checked periodically. */
+ latest_ts_s = (_latest_ts >> 32);
+ /*
+ * Keep a time window between the latest and stable ts less than the max defined in the
+ * configuration.
+ */
+ testutil_assert(latest_ts_s >= _stable_ts);
+ if ((latest_ts_s - _stable_ts) > _stable_lag) {
+ _stable_ts = latest_ts_s - _stable_lag;
+ config += std::string(STABLE_TS) + "=" + decimal_to_hex(_stable_ts);
+ }
+
+ /*
+ * Keep a time window between the stable and oldest ts less than the max defined in the
+ * configuration.
+ */
+ testutil_assert(_stable_ts >= _oldest_ts);
+ if ((_stable_ts - _oldest_ts) > _oldest_lag) {
+ _oldest_ts = _stable_ts - _oldest_lag;
+ if (!config.empty())
+ config += ",";
+ config += std::string(OLDEST_TS) + "=" + decimal_to_hex(_oldest_ts);
+ }
+
+ /* Save the new timestamps. */
+ if (!config.empty()) {
+ connection_manager::instance().set_timestamp(config);
+ config = "";
}
}
@@ -140,14 +131,13 @@ class timestamp_manager : public component {
}
private:
- const wt_timestamp_t _periodic_update_s;
std::atomic<wt_timestamp_t> _increment_ts;
- wt_timestamp_t _latest_ts, _oldest_ts, _stable_ts;
+ wt_timestamp_t _latest_ts = 0U, _oldest_ts = 0U, _stable_ts = 0U;
/*
* _oldest_lag is the time window between the stable and oldest timestamps.
* _stable_lag is the time window between the latest and stable timestamps.
*/
- int64_t _oldest_lag, _stable_lag;
+ int64_t _oldest_lag = 0, _stable_lag = 0;
};
} // namespace test_harness
diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/api_const.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/util/api_const.h
index 46a6a775677..2ea702b4848 100644
--- a/src/third_party/wiredtiger/test/cppsuite/test_harness/api_const.h
+++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/util/api_const.h
@@ -44,16 +44,21 @@ static const char *COLLECTION_COUNT = "collection_count";
static const char *DURATION_SECONDS = "duration_seconds";
static const char *ENABLED = "enabled";
static const char *ENABLE_LOGGING = "enable_logging";
+static const char *INTERVAL = "interval";
+static const char *INSERT_CONFIG = "insert_config";
static const char *KEY_COUNT = "key_count";
+static const char *KEY_SIZE = "key_size";
static const char *LIMIT = "limit";
static const char *MAX = "max";
static const char *MIN = "min";
static const char *OLDEST_LAG = "oldest_lag";
+static const char *OP_COUNT = "op_count";
static const char *OPS_PER_TRANSACTION = "ops_per_transaction";
-static const char *RATE_PER_SECOND = "rate_per_second";
static const char *READ_THREADS = "read_threads";
static const char *STABLE_LAG = "stable_lag";
static const char *STAT_CACHE_SIZE = "stat_cache_size";
+static const char *UPDATE_THREADS = "update_threads";
+static const char *UPDATE_CONFIG = "update_config";
static const char *VALUE_SIZE = "value_size";
/* WiredTiger API consts. */
@@ -63,6 +68,7 @@ static const char *OLDEST_TS = "oldest_timestamp";
static const char *STABLE_TS = "stable_timestamp";
/* Test harness consts. */
+static const char *DEFAULT_FRAMEWORK_SCHEMA = "key_format=S,value_format=S";
static const char *TABLE_OPERATION_TRACKING = "table:operation_tracking";
static const char *TABLE_SCHEMA_TRACKING = "table:schema_tracking";
static const char *STATISTICS_URI = "statistics:";
diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/debug_utils.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/util/debug_utils.h
index da09a08c9d8..da09a08c9d8 100644
--- a/src/third_party/wiredtiger/test/cppsuite/test_harness/debug_utils.h
+++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/util/debug_utils.h
diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/database_model.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/database_model.h
new file mode 100644
index 00000000000..07e7c007ea7
--- /dev/null
+++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/database_model.h
@@ -0,0 +1,89 @@
+/*-
+ * Public Domain 2014-present MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef DATABASE_MODEL_H
+#define DATABASE_MODEL_H
+
+#include <map>
+#include <string>
+
+namespace test_harness {
+
+/* Key/Value type. */
+typedef std::string key_value_t;
+
+/* Representation of key states. */
+struct key_t {
+ bool exists;
+};
+
+/* Iterator type used to iterate over keys that are stored in the data model. */
+typedef std::map<test_harness::key_value_t, test_harness::key_t>::const_iterator keys_iterator_t;
+
+/* Representation of a value. */
+struct value_t {
+ key_value_t value;
+};
+
+/* A collection is made of mapped Key objects. */
+struct collection_t {
+ std::map<key_value_t, key_t> keys;
+ std::map<key_value_t, value_t> *values = {nullptr};
+};
+
+/* Representation of the collections in memory. */
+class database {
+ public:
+ const keys_iterator_t
+ get_collection_keys_begin(const std::string &collection_name) const
+ {
+ return (collections.at(collection_name).keys.begin());
+ }
+
+ const keys_iterator_t
+ get_collection_keys_end(const std::string &collection_name) const
+ {
+ return (collections.at(collection_name).keys.end());
+ }
+
+ const std::vector<std::string>
+ get_collection_names() const
+ {
+ std::vector<std::string> collection_names;
+
+ for (auto const &it : collections)
+ collection_names.push_back(it.first);
+
+ return (collection_names);
+ }
+
+ std::map<std::string, collection_t> collections;
+};
+} // namespace test_harness
+
+#endif
diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/database_operation.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/database_operation.h
new file mode 100644
index 00000000000..7a88ed9b662
--- /dev/null
+++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/database_operation.h
@@ -0,0 +1,274 @@
+/*-
+ * Public Domain 2014-present MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef DATABASE_OPERATION_H
+#define DATABASE_OPERATION_H
+
+#include "database_model.h"
+#include "workload_tracking.h"
+#include "thread_context.h"
+
+namespace test_harness {
+class database_operation {
+ public:
+ /*
+ * Function that performs the following steps using the configuration that is defined by the
+ * test:
+ * - Create the working dir.
+ * - Open a connection.
+ * - Open a session.
+ * - Create n collections as per the configuration.
+ * - Open a cursor on each collection.
+ * - Insert m key/value pairs in each collection. Values are random strings which size is
+ * defined by the configuration.
+ * - Store in memory the created collections and the generated keys that were inserted.
+ */
+ virtual void
+ populate(database &database, timestamp_manager *timestamp_manager, configuration *config,
+ workload_tracking *tracking)
+ {
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ wt_timestamp_t ts;
+ int64_t collection_count, key_count, key_cpt, key_size, value_size;
+ std::string collection_name, cfg, home;
+ key_value_t generated_key, generated_value;
+ bool ts_enabled = timestamp_manager->is_enabled();
+
+ cursor = nullptr;
+ collection_count = key_count = key_size = value_size = 0;
+
+ /* Get a session. */
+ session = connection_manager::instance().create_session();
+ /* Create n collections as per the configuration and store each collection name. */
+ collection_count = config->get_int(COLLECTION_COUNT);
+ for (int i = 0; i < collection_count; ++i) {
+ collection_name = "table:collection" + std::to_string(i);
+ database.collections[collection_name] = {};
+ testutil_check(
+ session->create(session, collection_name.c_str(), DEFAULT_FRAMEWORK_SCHEMA));
+ ts = timestamp_manager->get_next_ts();
+ testutil_check(tracking->save(tracking_operation::CREATE, collection_name, 0, "", ts));
+ }
+ debug_print(std::to_string(collection_count) + " collections created", DEBUG_TRACE);
+
+ /* Open a cursor on each collection and use the configuration to insert key/value pairs. */
+ key_count = config->get_int(KEY_COUNT);
+ value_size = config->get_int(VALUE_SIZE);
+ testutil_assert(value_size > 0);
+ key_size = config->get_int(KEY_SIZE);
+ testutil_assert(key_size > 0);
+ /* Keys must be unique. */
+ testutil_assert(key_count <= pow(10, key_size));
+
+ for (const auto &it_collections : database.collections) {
+ collection_name = it_collections.first;
+ key_cpt = 0;
+ /* WiredTiger lets you open a cursor on a collection using the same pointer. When a
+ * session is closed, WiredTiger APIs close the cursors too. */
+ testutil_check(
+ session->open_cursor(session, collection_name.c_str(), NULL, NULL, &cursor));
+ for (size_t j = 0; j < key_count; ++j) {
+ /* Generation of a unique key. */
+ generated_key = number_to_string(key_size, key_cpt);
+ ++key_cpt;
+ /*
+ * Generation of a random string value using the size defined in the test
+ * configuration.
+ */
+ generated_value =
+ random_generator::random_generator::instance().generate_string(value_size);
+ ts = timestamp_manager->get_next_ts();
+ if (ts_enabled)
+ testutil_check(session->begin_transaction(session, ""));
+ testutil_check(insert(cursor, tracking, collection_name, generated_key.c_str(),
+ generated_value.c_str(), ts));
+ if (ts_enabled) {
+ cfg = std::string(COMMIT_TS) + "=" + timestamp_manager->decimal_to_hex(ts);
+ testutil_check(session->commit_transaction(session, cfg.c_str()));
+ }
+ /* Update the memory representation of the collections. */
+ database.collections[collection_name].keys[generated_key].exists = true;
+ /* Values are not stored here. */
+ database.collections[collection_name].values = nullptr;
+ }
+ }
+ debug_print("Populate stage done", DEBUG_TRACE);
+ }
+
+ /* Basic read operation that walks a cursors across all collections. */
+ virtual void
+ read_operation(thread_context &context, WT_SESSION *session)
+ {
+ WT_CURSOR *cursor;
+ std::vector<WT_CURSOR *> cursors;
+
+ testutil_assert(session != nullptr);
+ /* Get a cursor for each collection in collection_names. */
+ for (const auto &it : context.get_collection_names()) {
+ testutil_check(session->open_cursor(session, it.c_str(), NULL, NULL, &cursor));
+ cursors.push_back(cursor);
+ }
+
+ while (!cursors.empty() && context.is_running()) {
+ /* Walk each cursor. */
+ for (const auto &it : cursors) {
+ if (it->next(it) != 0)
+ it->reset(it);
+ }
+ }
+ }
+
+ /*
+ * Basic update operation that updates all the keys to a random value in each collection.
+ */
+ virtual void
+ update_operation(thread_context &context, WT_SESSION *session)
+ {
+ WT_CURSOR *cursor;
+ wt_timestamp_t ts;
+ std::vector<WT_CURSOR *> cursors;
+ std::string collection_name;
+ std::vector<std::string> collection_names = context.get_collection_names();
+ key_value_t generated_value, key;
+ int64_t cpt, value_size = context.get_value_size();
+
+ testutil_assert(session != nullptr);
+ /* Get a cursor for each collection in collection_names. */
+ for (const auto &it : collection_names) {
+ testutil_check(session->open_cursor(session, it.c_str(), NULL, NULL, &cursor));
+ cursors.push_back(cursor);
+ }
+
+ cpt = 0;
+ /* Walk each cursor. */
+ for (const auto &it : cursors) {
+ collection_name = collection_names[cpt];
+ /* Walk each key. */
+ for (keys_iterator_t iter_key = context.get_collection_keys_begin(collection_name);
+ iter_key != context.get_collection_keys_end(collection_name); ++iter_key) {
+ /* Do not process removed keys. */
+ if (!iter_key->second.exists)
+ continue;
+
+ ts = context.get_timestamp_manager()->get_next_ts();
+
+ /* Start a transaction if possible. */
+ if (!context.is_in_transaction()) {
+ context.begin_transaction(session, "");
+ context.set_commit_timestamp(session, ts);
+ }
+ generated_value =
+ random_generator::random_generator::instance().generate_string(value_size);
+ testutil_check(update(context.get_tracking(), it, collection_name,
+ iter_key->first.c_str(), generated_value.c_str(), ts));
+
+ /* Commit the current transaction if possible. */
+ context.increment_operation_count();
+ if (context.can_commit_transaction())
+ context.commit_transaction(session, "");
+ }
+ ++cpt;
+ }
+
+ /*
+ * The update operations will be later on inside a loop that will be managed through
+ * throttle management.
+ */
+ while (context.is_running())
+ context.sleep();
+
+ /* Make sure the last operation is committed now the work is finished. */
+ if (context.is_in_transaction())
+ context.commit_transaction(session, "");
+ }
+
+ private:
+ /* WiredTiger APIs wrappers for single operations. */
+ template <typename K, typename V>
+ int
+ insert(WT_CURSOR *cursor, workload_tracking *tracking, const std::string &collection_name,
+ const K &key, const V &value, wt_timestamp_t ts)
+ {
+ int error_code;
+
+ testutil_assert(cursor != nullptr);
+ cursor->set_key(cursor, key);
+ cursor->set_value(cursor, value);
+ error_code = cursor->insert(cursor);
+
+ if (error_code == 0) {
+ debug_print("key/value inserted", DEBUG_TRACE);
+ error_code =
+ tracking->save(tracking_operation::INSERT, collection_name, key, value, ts);
+ } else
+ debug_print("key/value insertion failed", DEBUG_ERROR);
+
+ return (error_code);
+ }
+
+ template <typename K, typename V>
+ static int
+ update(workload_tracking *tracking, WT_CURSOR *cursor, const std::string &collection_name,
+ K key, V value, wt_timestamp_t ts)
+ {
+ int error_code;
+
+ testutil_assert(tracking != nullptr);
+ testutil_assert(cursor != nullptr);
+ cursor->set_key(cursor, key);
+ cursor->set_value(cursor, value);
+ error_code = cursor->update(cursor);
+
+ if (error_code == 0) {
+ debug_print("key/value update", DEBUG_TRACE);
+ error_code =
+ tracking->save(tracking_operation::UPDATE, collection_name, key, value, ts);
+ } else
+ debug_print("key/value update failed", DEBUG_ERROR);
+
+ return (error_code);
+ }
+
+ /*
+ * Convert a number to a string. If the resulting string is less than the given length, padding
+ * of '0' is added.
+ */
+ static std::string
+ number_to_string(uint64_t size, uint64_t value)
+ {
+ std::string str, value_str = std::to_string(value);
+ testutil_assert(size >= value_str.size());
+ uint64_t diff = size - value_str.size();
+ std::string s(diff, '0');
+ str = s.append(value_str);
+ return (str);
+ }
+};
+} // namespace test_harness
+#endif
diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/random_generator.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/random_generator.h
index 7df4d7da3fb..7df4d7da3fb 100644
--- a/src/third_party/wiredtiger/test/cppsuite/test_harness/random_generator.h
+++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/random_generator.h
diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/thread_context.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/thread_context.h
index 61ee7e01a88..e5275bc7819 100644
--- a/src/third_party/wiredtiger/test/cppsuite/test_harness/thread_context.h
+++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/thread_context.h
@@ -29,6 +29,8 @@
#ifndef THREAD_CONTEXT_H
#define THREAD_CONTEXT_H
+#include "../core/throttle.h"
+#include "database_model.h"
#include "random_generator.h"
#include "workload_tracking.h"
@@ -48,13 +50,10 @@ enum class thread_operation {
/* Container class for a thread and any data types it may need to interact with the database. */
class thread_context {
public:
- thread_context(timestamp_manager *timestamp_manager, workload_tracking *tracking,
- std::vector<std::string> collection_names, thread_operation type, int64_t max_op,
- int64_t min_op, int64_t value_size)
- : _collection_names(collection_names), _current_op_count(0U), _in_txn(false),
- _running(false), _min_op(min_op), _max_op(max_op), _max_op_count(0),
- _timestamp_manager(timestamp_manager), _type(type), _tracking(tracking),
- _value_size(value_size)
+ thread_context(timestamp_manager *timestamp_manager, workload_tracking *tracking, database &db,
+ thread_operation type, int64_t max_op, int64_t min_op, int64_t value_size, throttle throttle)
+ : _database(db), _min_op(min_op), _max_op(max_op), _timestamp_manager(timestamp_manager),
+ _type(type), _tracking(tracking), _value_size(value_size), _throttle(throttle)
{
}
@@ -64,10 +63,22 @@ class thread_context {
_running = false;
}
- const std::vector<std::string> &
+ const std::vector<std::string>
get_collection_names() const
{
- return (_collection_names);
+ return (_database.get_collection_names());
+ }
+
+ const keys_iterator_t
+ get_collection_keys_begin(const std::string &collection_name) const
+ {
+ return (_database.get_collection_keys_begin(collection_name));
+ }
+
+ const keys_iterator_t
+ get_collection_keys_end(const std::string &collection_name) const
+ {
+ return (_database.get_collection_keys_end(collection_name));
}
thread_operation
@@ -76,6 +87,12 @@ class thread_context {
return (_type);
}
+ timestamp_manager *
+ get_timestamp_manager() const
+ {
+ return (_timestamp_manager);
+ }
+
workload_tracking *
get_tracking() const
{
@@ -94,6 +111,18 @@ class thread_context {
return (_running);
}
+ bool
+ is_in_transaction() const
+ {
+ return (_in_txn);
+ }
+
+ void
+ sleep()
+ {
+ _throttle.sleep();
+ }
+
void
set_running(bool running)
{
@@ -105,7 +134,7 @@ class thread_context {
{
if (!_in_txn && _timestamp_manager->is_enabled()) {
testutil_check(
- session->begin_transaction(session, config.empty() ? NULL : config.c_str()));
+ session->begin_transaction(session, config.empty() ? nullptr : config.c_str()));
/* This randomizes the number of operations to be executed in one transaction. */
_max_op_count = random_generator::instance().generate_integer(_min_op, _max_op);
_current_op_count = 0;
@@ -113,65 +142,69 @@ class thread_context {
}
}
- /* Returns true if the current transaction has been committed. */
+ /*
+ * The current transaction can be committed if:
+ * - The timestamp manager is enabled and
+ * - A transaction has started and
+ * - The thread is done working. This is useful when the test is ended and the thread has
+ * not reached the maximum number of operations per transaction or
+ * - The number of operations executed in the current transaction has exceeded the
+ * threshold.
+ */
bool
- commit_transaction(WT_SESSION *session, const std::string &config)
+ can_commit_transaction() const
{
- if (!_timestamp_manager->is_enabled())
- return (true);
+ return (_timestamp_manager->is_enabled() && _in_txn &&
+ (!_running || (_current_op_count > _max_op_count)));
+ }
+ void
+ commit_transaction(WT_SESSION *session, const std::string &config)
+ {
/* A transaction cannot be committed if not started. */
testutil_assert(_in_txn);
- /* The current transaction should be committed if:
- * - The thread is done working. This is useful when the test is ended and the thread has
- * not reached the maximum number of operations per transaction.
- * - The number of operations executed in the current transaction has exceeded the
- * threshold.
- */
- if (!_running || (++_current_op_count > _max_op_count)) {
- testutil_check(
- session->commit_transaction(session, config.empty() ? nullptr : config.c_str()));
- _in_txn = false;
- }
+ testutil_check(
+ session->commit_transaction(session, config.empty() ? nullptr : config.c_str()));
+ _in_txn = false;
+ }
- return (!_in_txn);
+ void
+ increment_operation_count(uint64_t inc = 1)
+ {
+ _current_op_count += inc;
}
/*
- * Set a commit timestamp if the timestamp manager is enabled and always return the timestamp
- * that should have been used for the commit.
+ * Set a commit timestamp if the timestamp manager is enabled.
*/
- wt_timestamp_t
- set_commit_timestamp(WT_SESSION *session)
+ void
+ set_commit_timestamp(WT_SESSION *session, wt_timestamp_t ts)
{
+ if (!_timestamp_manager->is_enabled())
+ return;
- wt_timestamp_t ts = _timestamp_manager->get_next_ts();
- std::string config;
-
- if (_timestamp_manager->is_enabled()) {
- config = std::string(COMMIT_TS) + "=" + _timestamp_manager->decimal_to_hex(ts);
- testutil_check(session->timestamp_transaction(session, config.c_str()));
- }
-
- return (ts);
+ std::string config = std::string(COMMIT_TS) + "=" + _timestamp_manager->decimal_to_hex(ts);
+ testutil_check(session->timestamp_transaction(session, config.c_str()));
}
private:
- const std::vector<std::string> _collection_names;
+ /* Representation of the collections and their key/value pairs in memory. */
+ database _database;
/*
* _current_op_count is the current number of operations that have been executed in the current
* transaction.
*/
- uint64_t _current_op_count;
- bool _in_txn, _running;
+ uint64_t _current_op_count = 0U;
+ bool _in_txn = false, _running = false;
/*
* _min_op and _max_op are the minimum and maximum number of operations within one transaction.
* _max_op_count is the current maximum number of operations that can be executed in the current
* transaction. _max_op_count will always be <= _max_op.
*/
- int64_t _min_op, _max_op, _max_op_count;
+ int64_t _min_op, _max_op, _max_op_count = 0;
timestamp_manager *_timestamp_manager;
const thread_operation _type;
+ throttle _throttle;
workload_tracking *_tracking;
/* Temporary member that comes from the test configuration. */
int64_t _value_size;
diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/workload_tracking.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/workload_tracking.h
index d1464e60970..4d1b2d755a8 100644
--- a/src/third_party/wiredtiger/test/cppsuite/test_harness/workload_tracking.h
+++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/workload_tracking.h
@@ -33,7 +33,7 @@
* Default schema for tracking operations on collections (key_format: Collection name / Key /
* Timestamp, value_format: Operation type / Value)
*/
-#define OPERATION_TRACKING_KEY_FORMAT WT_UNCHECKED_STRING(Sii)
+#define OPERATION_TRACKING_KEY_FORMAT WT_UNCHECKED_STRING(SSQ)
#define OPERATION_TRACKING_VALUE_FORMAT WT_UNCHECKED_STRING(iS)
#define OPERATION_TRACKING_TABLE_CONFIG \
"key_format=" OPERATION_TRACKING_KEY_FORMAT ",value_format=" OPERATION_TRACKING_VALUE_FORMAT
@@ -42,7 +42,7 @@
* Default schema for tracking schema operations on collections (key_format: Collection name /
* Timestamp, value_format: Operation type)
*/
-#define SCHEMA_TRACKING_KEY_FORMAT WT_UNCHECKED_STRING(Si)
+#define SCHEMA_TRACKING_KEY_FORMAT WT_UNCHECKED_STRING(SQ)
#define SCHEMA_TRACKING_VALUE_FORMAT WT_UNCHECKED_STRING(i)
#define SCHEMA_TRACKING_TABLE_CONFIG \
"key_format=" SCHEMA_TRACKING_KEY_FORMAT ",value_format=" SCHEMA_TRACKING_VALUE_FORMAT
@@ -57,8 +57,7 @@ class workload_tracking : public component {
workload_tracking(configuration *_config, const std::string &operation_table_config,
const std::string &operation_table_name, const std::string &schema_table_config,
const std::string &schema_table_name)
- : component(_config), _cursor_operations(nullptr), _cursor_schema(nullptr),
- _operation_table_config(operation_table_config),
+ : component("workload_tracking", _config), _operation_table_config(operation_table_config),
_operation_table_name(operation_table_name), _schema_table_config(schema_table_config),
_schema_table_name(schema_table_name)
{
@@ -81,7 +80,8 @@ class workload_tracking : public component {
{
WT_SESSION *session;
- testutil_check(_config->get_bool(ENABLED, _enabled));
+ component::load();
+
if (!_enabled)
return;
@@ -145,8 +145,8 @@ class workload_tracking : public component {
}
private:
- WT_CURSOR *_cursor_operations;
- WT_CURSOR *_cursor_schema;
+ WT_CURSOR *_cursor_operations = nullptr;
+ WT_CURSOR *_cursor_schema = nullptr;
const std::string _operation_table_config;
const std::string _operation_table_name;
const std::string _schema_table_config;
diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/workload_validation.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/workload_validation.h
index 86ff567bcc2..5ef7992e773 100644
--- a/src/third_party/wiredtiger/test/cppsuite/test_harness/workload_validation.h
+++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload/workload_validation.h
@@ -35,112 +35,114 @@ extern "C" {
#include "wiredtiger.h"
}
+#include "database_model.h"
+
namespace test_harness {
+
/*
* Class that can validate database state and collection data.
*/
class workload_validation {
public:
/*
- * Validate the on disk data against what has been tracked during the test. The first step is to
- * replay the tracked operations so a representation in memory of the collections is created.
- * This representation is then compared to what is on disk. The second step is to go through
- * what has been saved on disk and make sure the memory representation has the same data.
+ * Validate the on disk data against what has been tracked during the test.
+ * - The first step is to replay the tracked operations so a representation in memory of the
+ * collections is created. This representation is then compared to what is on disk.
+ * - The second step is to go through what has been saved on disk and make sure the memory
+ * representation has the same data.
* operation_table_name is the collection that contains all the operations about the key/value
* pairs in the different collections used during the test. schema_table_name is the collection
* that contains all the operations about the creation or deletion of collections during the
* test.
*/
bool
- validate(const std::string &operation_table_name, const std::string &schema_table_name)
+ validate(const std::string &operation_table_name, const std::string &schema_table_name,
+ database &database)
{
WT_SESSION *session;
std::string collection_name;
- /*
- * Representation in memory of the collections at the end of the test. The first level is a
- * map that contains collection names as keys. The second level is another map that contains
- * the different key/value pairs within a given collection. If a collection yields to a null
- * map of key/value pairs, this means the collection should not be present on disk. If a
- * value from a key/value pair is null, this means the key should not be present in the
- * collection on disk.
- */
- std::map<std::string, std::map<int, std::string *> *> collections;
/* Existing collections after the test. */
- std::vector<std::string> created_collections;
- bool is_valid;
+ std::vector<std::string> created_collections, deleted_collections;
+ bool is_valid = true;
session = connection_manager::instance().create_session();
- /* Retrieve the created collections that need to be checked. */
+ /* Retrieve the collections that were created and deleted during the test. */
collection_name = schema_table_name;
- created_collections = parse_schema_tracking_table(session, collection_name);
+ parse_schema_tracking_table(
+ session, collection_name, created_collections, deleted_collections);
- /* Allocate memory to the operations performed on the created collections. */
+ /* Make sure they exist in memory. */
for (auto const &it : created_collections) {
- std::map<int, std::string *> *map = new std::map<int, std::string *>();
- collections[it] = map;
+ if (database.collections.count(it) == 0) {
+ debug_print("Collection missing in memory: " + it, DEBUG_ERROR);
+ is_valid = false;
+ break;
+ }
}
- /*
- * Build in memory the final state of each created collection according to the tracked
- * operations.
- */
- collection_name = operation_table_name;
- for (auto const &active_collection : created_collections)
- parse_operation_tracking_table(
- session, collection_name, active_collection, collections);
-
- /* Check all tracked operations in memory against the database on disk. */
- is_valid = check_reference(session, collections);
+ if (!is_valid)
+ return (is_valid);
- /* Check what has been saved on disk against what has been tracked. */
- if (is_valid) {
- for (auto const &collection : created_collections) {
- is_valid = check_disk_state(session, collection, collections);
- if (!is_valid) {
- debug_print(
- "check_disk_state failed for collection " + collection, DEBUG_ERROR);
- break;
- }
+ /* Make sure they don't exist in memory nor on disk. */
+ for (auto const &it : deleted_collections) {
+ if (database.collections.count(it) > 0) {
+ debug_print(
+ "Collection present in memory while it has been tracked as deleted: " + it,
+ DEBUG_ERROR);
+ is_valid = false;
+ break;
}
+ if (!verify_collection_state(session, it, false)) {
+ debug_print(
+ "Collection present on disk while it has been tracked as deleted: " + it,
+ DEBUG_ERROR);
+ is_valid = false;
+ break;
+ }
+ }
- } else
- debug_print("check_reference failed!", DEBUG_ERROR);
-
- /* Clean the allocated memory. */
- clean_memory(collections);
-
- return (is_valid);
- }
-
- /* Clean the memory used to represent the collections after the test. */
- void
- clean_memory(std::map<std::string, std::map<int, std::string *> *> &collections)
- {
- for (auto &it_collections : collections) {
- if (it_collections.second == nullptr)
- continue;
+ for (auto const &collection_name : database.get_collection_names()) {
+ if (!is_valid)
+ break;
- for (auto &it_operations : *it_collections.second) {
- delete it_operations.second;
- it_operations.second = nullptr;
+ /* Get the values associated to the different keys in the current collection. */
+ parse_operation_tracking_table(
+ session, operation_table_name, collection_name, database);
+ /* Check all tracked operations in memory against the database on disk. */
+ if (!check_reference(session, collection_name, database)) {
+ debug_print(
+ "check_reference failed for collection " + collection_name, DEBUG_ERROR);
+ is_valid = false;
+ }
+ /* Check what has been saved on disk against what has been tracked. */
+ else if (!check_disk_state(session, collection_name, database)) {
+ debug_print(
+ "check_disk_state failed for collection " + collection_name, DEBUG_ERROR);
+ is_valid = false;
}
- delete it_collections.second;
- it_collections.second = nullptr;
+ /* Clear memory. */
+ delete database.collections[collection_name].values;
+ database.collections[collection_name].values = nullptr;
}
+
+ return (is_valid);
}
+ private:
/*
+ * Read the tracking table to retrieve the created and deleted collections during the test.
* collection_name is the collection that contains the operations on the different collections
* during the test.
*/
- const std::vector<std::string>
- parse_schema_tracking_table(WT_SESSION *session, const std::string &collection_name)
+ void
+ parse_schema_tracking_table(WT_SESSION *session, const std::string &collection_name,
+ std::vector<std::string> &created_collections, std::vector<std::string> &deleted_collections)
{
WT_CURSOR *cursor;
+ wt_timestamp_t key_timestamp;
const char *key_collection_name;
- int key_timestamp, value_operation_type;
- std::vector<std::string> created_collections;
+ int value_operation_type;
testutil_check(session->open_cursor(session, collection_name.c_str(), NULL, NULL, &cursor));
@@ -154,16 +156,18 @@ class workload_validation {
if (static_cast<tracking_operation>(value_operation_type) ==
tracking_operation::CREATE) {
+ deleted_collections.erase(std::remove(deleted_collections.begin(),
+ deleted_collections.end(), key_collection_name),
+ deleted_collections.end());
created_collections.push_back(key_collection_name);
} else if (static_cast<tracking_operation>(value_operation_type) ==
tracking_operation::DELETE_COLLECTION) {
created_collections.erase(std::remove(created_collections.begin(),
created_collections.end(), key_collection_name),
created_collections.end());
+ deleted_collections.push_back(key_collection_name);
}
}
-
- return (created_collections);
}
/*
@@ -174,32 +178,44 @@ class workload_validation {
*/
void
parse_operation_tracking_table(WT_SESSION *session, const std::string &tracking_collection_name,
- const std::string &collection_name,
- std::map<std::string, std::map<int, std::string *> *> &collections)
+ const std::string &collection_name, database &database)
{
WT_CURSOR *cursor;
- int error_code, exact, key, key_timestamp, value_operation_type;
- const char *key_collection_name, *value;
+ wt_timestamp_t key_timestamp;
+ int exact, value_operation_type;
+ const char *key, *key_collection_name, *value;
+ std::vector<key_value_t> collection_keys;
+ std::string key_str;
+
+ /* Retrieve all keys from the given collection. */
+ for (auto const &it : database.collections.at(collection_name).keys)
+ collection_keys.push_back(it.first);
+ /* There must be at least a key. */
+ testutil_assert(!collection_keys.empty());
+ /* Sort keys. */
+ std::sort(collection_keys.begin(), collection_keys.end());
+ /* Use the first key as a parameter for search_near. */
+ key_str = collection_keys[0];
testutil_check(
session->open_cursor(session, tracking_collection_name.c_str(), NULL, NULL, &cursor));
- /* Our keys start at 0. */
- cursor->set_key(cursor, collection_name.c_str(), 0);
- error_code = cursor->search_near(cursor, &exact);
-
+ cursor->set_key(cursor, collection_name.c_str(), key_str.c_str());
+ testutil_check(cursor->search_near(cursor, &exact));
/*
- * As we don't support deletion, the searched collection is expected to be found. Since the
- * timestamp which is part of the key is not provided, exact is expected to be > 0.
+ * Since the timestamp which is part of the key is not provided, exact cannot be 0. If it is
+ * -1, we need to go to the next key.
*/
- testutil_check(exact < 1);
+ testutil_assert(exact != 0);
+ if (exact < 0)
+ testutil_check(cursor->next(cursor));
- while (error_code == 0) {
+ do {
testutil_check(cursor->get_key(cursor, &key_collection_name, &key, &key_timestamp));
testutil_check(cursor->get_value(cursor, &value_operation_type, &value));
debug_print("Collection name is " + std::string(key_collection_name), DEBUG_TRACE);
- debug_print("Key is " + std::to_string(key), DEBUG_TRACE);
+ debug_print("Key is " + std::string(key), DEBUG_TRACE);
debug_print("Timestamp is " + std::to_string(key_timestamp), DEBUG_TRACE);
debug_print("Operation type is " + std::to_string(value_operation_type), DEBUG_TRACE);
debug_print("Value is " + std::string(value), DEBUG_TRACE);
@@ -217,141 +233,138 @@ class workload_validation {
/*
* Operations are parsed from the oldest to the most recent one. It is safe to
* assume the key has been inserted previously in an existing collection and can be
- * deleted safely.
+ * safely deleted.
*/
- delete collections.at(key_collection_name)->at(key);
- collections.at(key_collection_name)->at(key) = nullptr;
+ database.collections.at(key_collection_name).keys.at(std::string(key)).exists =
+ false;
+ delete database.collections.at(key_collection_name).values;
+ database.collections.at(key_collection_name).values = nullptr;
break;
case tracking_operation::INSERT: {
/* Keys are unique, it is safe to assume the key has not been encountered before. */
- std::pair<int, std::string *> pair(key, new std::string(value));
- collections.at(key_collection_name)->insert(pair);
+ database.collections[key_collection_name].keys[std::string(key)].exists = true;
+ if (database.collections[key_collection_name].values == nullptr) {
+ database.collections[key_collection_name].values =
+ new std::map<key_value_t, value_t>();
+ }
+ value_t v;
+ v.value = key_value_t(value);
+ std::pair<key_value_t, value_t> pair(key_value_t(key), v);
+ database.collections[key_collection_name].values->insert(pair);
break;
}
- case tracking_operation::CREATE:
- case tracking_operation::DELETE_COLLECTION:
- testutil_die(DEBUG_ABORT, "Unexpected operation in the tracking table: %d",
- static_cast<tracking_operation>(value_operation_type));
+ case tracking_operation::UPDATE:
+ database.collections[key_collection_name].values->at(key).value =
+ key_value_t(value);
+ break;
default:
- testutil_die(
- DEBUG_ABORT, "tracking operation is unknown : %d", value_operation_type);
+ testutil_die(DEBUG_ABORT, "Unexpected operation in the tracking table: %d",
+ value_operation_type);
break;
}
- error_code = cursor->next(cursor);
- }
+ } while (cursor->next(cursor) == 0);
if (cursor->reset(cursor) != 0)
debug_print("Cursor could not be reset !", DEBUG_ERROR);
}
/*
- * Compare the tracked operations against what has been saved on disk. collections is the
+ * Compare the tracked operations against what has been saved on disk. database is the
* representation in memory of the collections after the test according to the tracking table.
*/
bool
check_reference(
- WT_SESSION *session, std::map<std::string, std::map<int, std::string *> *> &collections)
+ WT_SESSION *session, const std::string &collection_name, const database &database)
{
+ bool is_valid;
+ collection_t collection;
+ key_t key;
+ key_value_t key_str;
+
+ /* Check the collection exists on disk. */
+ is_valid = verify_collection_state(session, collection_name, true);
- bool collection_exists, is_valid = true;
- std::map<int, std::string *> *collection;
- workload_validation wv;
- std::string *value;
-
- for (const auto &it_collections : collections) {
- /* Check the collection is in the correct state. */
- collection_exists = (it_collections.second != nullptr);
- is_valid = wv.verify_database_state(session, it_collections.first, collection_exists);
-
- if (is_valid && collection_exists) {
- collection = it_collections.second;
- for (const auto &it_operations : *collection) {
- value = (*collection)[it_operations.first];
- /* The key/value pair exists. */
- if (value != nullptr)
- is_valid = (wv.is_key_present(
- session, it_collections.first, it_operations.first) == true);
- /* The key has been deleted. */
- else
- is_valid = (wv.is_key_present(
- session, it_collections.first, it_operations.first) == false);
-
- /* Check the associated value is valid. */
- if (is_valid && (value != nullptr)) {
- is_valid = (wv.verify_value(
- session, it_collections.first, it_operations.first, *value));
- }
-
- if (!is_valid) {
- debug_print(
- "check_reference failed for key " + std::to_string(it_operations.first),
- DEBUG_ERROR);
- break;
- }
+ if (is_valid) {
+ collection = database.collections.at(collection_name);
+ /* Walk through each key/value pair of the current collection. */
+ for (const auto &keys : collection.keys) {
+ key_str = keys.first;
+ key = keys.second;
+ /* The key/value pair exists. */
+ if (key.exists)
+ is_valid = (is_key_present(session, collection_name, key_str.c_str()) == true);
+ /* The key has been deleted. */
+ else
+ is_valid = (is_key_present(session, collection_name, key_str.c_str()) == false);
+
+ /* Check the associated value is valid. */
+ if (is_valid && key.exists) {
+ testutil_assert(collection.values != nullptr);
+ is_valid = verify_value(session, collection_name, key_str.c_str(),
+ collection.values->at(key_str).value);
}
- }
- if (!is_valid) {
- debug_print(
- "check_reference failed for collection " + it_collections.first, DEBUG_ERROR);
- break;
+ if (!is_valid) {
+ debug_print("check_reference failed for key " + key_str, DEBUG_ERROR);
+ break;
+ }
}
}
+ if (!is_valid)
+ debug_print("check_reference failed for collection " + collection_name, DEBUG_ERROR);
+
return (is_valid);
}
/* Check what is present on disk against what has been tracked. */
bool
- check_disk_state(WT_SESSION *session, const std::string &collection_name,
- std::map<std::string, std::map<int, std::string *> *> &collections)
+ check_disk_state(
+ WT_SESSION *session, const std::string &collection_name, const database &database)
{
WT_CURSOR *cursor;
- int key;
- const char *value;
- bool is_valid;
- std::string *value_str;
- std::map<int, std::string *> *collection;
+ collection_t collection;
+ bool is_valid = true;
+ /* Key/value pairs on disk. */
+ const char *key_on_disk, *value_on_disk;
+ key_value_t key_str, value_str;
testutil_check(session->open_cursor(session, collection_name.c_str(), NULL, NULL, &cursor));
- /* Check the collection has been tracked and contains data. */
- is_valid =
- ((collections.count(collection_name) > 0) && (collections[collection_name] != nullptr));
-
- if (!is_valid)
- debug_print(
- "Collection " + collection_name + " has not been tracked or has been deleted",
- DEBUG_ERROR);
- else
- collection = collections[collection_name];
+ collection = database.collections.at(collection_name);
/* Read the collection on disk. */
while (is_valid && (cursor->next(cursor) == 0)) {
- testutil_check(cursor->get_key(cursor, &key));
- testutil_check(cursor->get_value(cursor, &value));
+ testutil_check(cursor->get_key(cursor, &key_on_disk));
+ testutil_check(cursor->get_value(cursor, &value_on_disk));
- debug_print("Key is " + std::to_string(key), DEBUG_TRACE);
- debug_print("Value is " + std::string(value), DEBUG_TRACE);
+ key_str = std::string(key_on_disk);
+
+ debug_print("Key on disk is " + key_str, DEBUG_TRACE);
+ debug_print("Value on disk is " + std::string(value_on_disk), DEBUG_TRACE);
- if (collection->count(key) > 0) {
- value_str = collection->at(key);
+ /* Check the key on disk has been saved in memory too. */
+ if ((collection.keys.count(key_str) > 0) && collection.keys.at(key_str).exists) {
+ /* Memory should be allocated for values. */
+ testutil_assert(collection.values != nullptr);
+ value_str = collection.values->at(key_str).value;
/*
* Check the key/value pair on disk matches the one in memory from the tracked
* operations.
*/
- is_valid = (value_str != nullptr) && (*value_str == std::string(value));
+ is_valid = (value_str == key_value_t(value_on_disk));
if (!is_valid)
- debug_print(" Key/Value pair mismatch.\n Disk key: " + std::to_string(key) +
- "\n Disk value: " + std ::string(value) +
- "\n Tracking table key: " + std::to_string(key) +
- "\n Tracking table value: " + (value_str == nullptr ? "NULL" : *value_str),
+ debug_print(" Key/Value pair mismatch.\n Disk key: " + key_str +
+ "\n Disk value: " + std ::string(value_on_disk) +
+ "\n Tracking table key: " + key_str + "\n Tracking table value exists: " +
+ std::to_string(collection.keys.at(key_str).exists) +
+ "\n Tracking table value: " + value_str,
DEBUG_ERROR);
} else {
is_valid = false;
debug_print(
- "The key " + std::to_string(key) + " present on disk has not been tracked",
+ "The key " + std::string(key_on_disk) + " present on disk has not been tracked",
DEBUG_ERROR);
}
}
@@ -364,7 +377,7 @@ class workload_validation {
* needs to be set to true if the collection is expected to be existing, false otherwise.
*/
bool
- verify_database_state(
+ verify_collection_state(
WT_SESSION *session, const std::string &collection_name, bool exists) const
{
WT_CURSOR *cursor;
@@ -396,10 +409,8 @@ class workload_validation {
testutil_check(cursor->search(cursor));
testutil_check(cursor->get_value(cursor, &value));
- return (value == expected_value);
+ return (key_value_t(value) == expected_value);
}
-
- private:
};
} // namespace test_harness
diff --git a/src/third_party/wiredtiger/test/cppsuite/test_harness/workload_generator.h b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload_generator.h
index f9445cd892a..9413834ba31 100644
--- a/src/third_party/wiredtiger/test/cppsuite/test_harness/workload_generator.h
+++ b/src/third_party/wiredtiger/test/cppsuite/test_harness/workload_generator.h
@@ -33,8 +33,11 @@
#include <atomic>
#include <map>
-#include "random_generator.h"
-#include "workload_tracking.h"
+#include "core/throttle.h"
+#include "workload/database_model.h"
+#include "workload/database_operation.h"
+#include "workload/random_generator.h"
+#include "workload/workload_tracking.h"
namespace test_harness {
/*
@@ -42,9 +45,10 @@ namespace test_harness {
*/
class workload_generator : public component {
public:
- workload_generator(configuration *configuration, timestamp_manager *timestamp_manager,
- workload_tracking *tracking)
- : component(configuration), _timestamp_manager(timestamp_manager), _tracking(tracking)
+ workload_generator(configuration *configuration, database_operation *db_operation,
+ timestamp_manager *timestamp_manager, workload_tracking *tracking)
+ : component("workload_generator", configuration), _database_operation(db_operation),
+ _timestamp_manager(timestamp_manager), _tracking(tracking)
{
}
@@ -58,118 +62,73 @@ class workload_generator : public component {
workload_generator(const workload_generator &) = delete;
workload_generator &operator=(const workload_generator &) = delete;
- /*
- * Function that performs the following steps using the configuration that is defined by the
- * test:
- * - Create the working dir.
- * - Open a connection.
- * - Open a session.
- * - Create n collections as per the configuration.
- * - Open a cursor on each collection.
- * - Insert m key/value pairs in each collection. Values are random strings which size is
- * defined by the configuration.
- */
- void
- populate()
- {
- WT_CURSOR *cursor;
- WT_SESSION *session;
- wt_timestamp_t ts;
- int64_t collection_count, key_count, value_size;
- std::string collection_name, config, generated_value, home;
- bool ts_enabled = _timestamp_manager->is_enabled();
-
- cursor = nullptr;
- collection_count = key_count = value_size = 0;
- collection_name = "";
-
- /* Get a session. */
- session = connection_manager::instance().create_session();
- /* Create n collections as per the configuration and store each collection name. */
- testutil_check(_config->get_int(COLLECTION_COUNT, collection_count));
- for (int i = 0; i < collection_count; ++i) {
- collection_name = "table:collection" + std::to_string(i);
- testutil_check(session->create(session, collection_name.c_str(), DEFAULT_TABLE_SCHEMA));
- ts = _timestamp_manager->get_next_ts();
- testutil_check(_tracking->save(tracking_operation::CREATE, collection_name, 0, "", ts));
- _collection_names.push_back(collection_name);
- }
- debug_print(std::to_string(collection_count) + " collections created", DEBUG_TRACE);
-
- /* Open a cursor on each collection and use the configuration to insert key/value pairs. */
- testutil_check(_config->get_int(KEY_COUNT, key_count));
- testutil_check(_config->get_int(VALUE_SIZE, value_size));
- testutil_assert(value_size >= 0);
- for (const auto &collection_name : _collection_names) {
- /* WiredTiger lets you open a cursor on a collection using the same pointer. When a
- * session is closed, WiredTiger APIs close the cursors too. */
- testutil_check(
- session->open_cursor(session, collection_name.c_str(), NULL, NULL, &cursor));
- for (size_t j = 0; j < key_count; ++j) {
- /*
- * Generation of a random string value using the size defined in the test
- * configuration.
- */
- generated_value =
- random_generator::random_generator::instance().generate_string(value_size);
- ts = _timestamp_manager->get_next_ts();
- if (ts_enabled)
- testutil_check(session->begin_transaction(session, ""));
- testutil_check(insert(cursor, collection_name, j + 1, generated_value.c_str(), ts));
- if (ts_enabled) {
- config = std::string(COMMIT_TS) + "=" + _timestamp_manager->decimal_to_hex(ts);
- testutil_check(session->commit_transaction(session, config.c_str()));
- }
- }
- }
- debug_print("Populate stage done", DEBUG_TRACE);
- }
-
/* Do the work of the main part of the workload. */
void
run()
{
- configuration *sub_config;
- int64_t read_threads, min_operation_per_transaction, max_operation_per_transaction,
- value_size;
+ configuration *transaction_config, *update_config, *insert_config;
+ int64_t min_operation_per_transaction, max_operation_per_transaction, read_threads,
+ update_threads, value_size;
/* Populate the database. */
- populate();
+ _database_operation->populate(_database, _timestamp_manager, _config, _tracking);
/* Retrieve useful parameters from the test configuration. */
- testutil_check(_config->get_int(READ_THREADS, read_threads));
- sub_config = _config->get_subconfig(OPS_PER_TRANSACTION);
- testutil_check(sub_config->get_int(MIN, min_operation_per_transaction));
- testutil_check(sub_config->get_int(MAX, max_operation_per_transaction));
+ transaction_config = _config->get_subconfig(OPS_PER_TRANSACTION);
+ update_config = _config->get_subconfig(UPDATE_CONFIG);
+ insert_config = _config->get_subconfig(INSERT_CONFIG);
+ read_threads = _config->get_int(READ_THREADS);
+ update_threads = _config->get_int(UPDATE_THREADS);
+
+ min_operation_per_transaction = transaction_config->get_int(MIN);
+ max_operation_per_transaction = transaction_config->get_int(MAX);
testutil_assert(max_operation_per_transaction >= min_operation_per_transaction);
- testutil_check(_config->get_int(VALUE_SIZE, value_size));
+ value_size = _config->get_int(VALUE_SIZE);
testutil_assert(value_size >= 0);
- delete sub_config;
-
/* Generate threads to execute read operations on the collections. */
for (int i = 0; i < read_threads; ++i) {
- thread_context *tc = new thread_context(_timestamp_manager, _tracking,
- _collection_names, thread_operation::READ, max_operation_per_transaction,
- min_operation_per_transaction, value_size);
+ thread_context *tc = new thread_context(_timestamp_manager, _tracking, _database,
+ thread_operation::READ, max_operation_per_transaction, min_operation_per_transaction,
+ value_size, throttle());
_workers.push_back(tc);
- _thread_manager.add_thread(tc, &execute_operation);
+ _thread_manager.add_thread(tc, _database_operation, &execute_operation);
}
+
+ /* Generate threads to execute update operations on the collections. */
+ for (int i = 0; i < update_threads; ++i) {
+ thread_context *tc = new thread_context(_timestamp_manager, _tracking, _database,
+ thread_operation::UPDATE, max_operation_per_transaction,
+ min_operation_per_transaction, value_size, throttle(update_config));
+ _workers.push_back(tc);
+ _thread_manager.add_thread(tc, _database_operation, &execute_operation);
+ }
+
+ delete transaction_config;
+ delete update_config;
+ delete insert_config;
}
void
finish()
{
- for (const auto &it : _workers) {
+ component::finish();
+
+ for (const auto &it : _workers)
it->finish();
- }
_thread_manager.join();
debug_print("Workload generator: run stage done", DEBUG_TRACE);
}
+ database &
+ get_database()
+ {
+ return _database;
+ }
+
/* Workload threaded operations. */
static void
- execute_operation(thread_context &context)
+ execute_operation(thread_context &context, database_operation &db_operation)
{
WT_SESSION *session;
@@ -177,7 +136,7 @@ class workload_generator : public component {
switch (context.get_thread_operation()) {
case thread_operation::READ:
- read_operation(context, session);
+ db_operation.read_operation(context, session);
break;
case thread_operation::REMOVE:
case thread_operation::INSERT:
@@ -186,7 +145,7 @@ class workload_generator : public component {
std::this_thread::sleep_for(std::chrono::seconds(1));
break;
case thread_operation::UPDATE:
- update_operation(context, session);
+ db_operation.update_operation(context, session);
break;
default:
testutil_die(DEBUG_ABORT, "system: thread_operation is unknown : %d",
@@ -195,134 +154,9 @@ class workload_generator : public component {
}
}
- /*
- * Basic update operation that currently update the same key with a random value in each
- * collection.
- */
- static void
- update_operation(thread_context &context, WT_SESSION *session)
- {
- WT_CURSOR *cursor;
- wt_timestamp_t ts;
- std::vector<WT_CURSOR *> cursors;
- std::vector<std::string> collection_names;
- std::string generated_value;
- bool has_committed = true;
- int64_t cpt, value_size = context.get_value_size();
-
- testutil_assert(session != nullptr);
- /* Get a cursor for each collection in collection_names. */
- for (const auto &it : context.get_collection_names()) {
- testutil_check(session->open_cursor(session, it.c_str(), NULL, NULL, &cursor));
- cursors.push_back(cursor);
- collection_names.push_back(it);
- }
-
- while (context.is_running()) {
- /* Walk each cursor. */
- context.begin_transaction(session, "");
- ts = context.set_commit_timestamp(session);
- cpt = 0;
- for (const auto &it : cursors) {
- generated_value =
- random_generator::random_generator::instance().generate_string(value_size);
- /* Key is hard coded for now. */
- testutil_check(update(context.get_tracking(), it, collection_names[cpt], 1,
- generated_value.c_str(), ts));
- ++cpt;
- }
- has_committed = context.commit_transaction(session, "");
- }
-
- /* Make sure the last operation is committed now the work is finished. */
- if (!has_committed)
- context.commit_transaction(session, "");
- }
-
- /* Basic read operation that walks a cursors across all collections. */
- static void
- read_operation(thread_context &context, WT_SESSION *session)
- {
- WT_CURSOR *cursor;
- std::vector<WT_CURSOR *> cursors;
-
- testutil_assert(session != nullptr);
- /* Get a cursor for each collection in collection_names. */
- for (const auto &it : context.get_collection_names()) {
- testutil_check(session->open_cursor(session, it.c_str(), NULL, NULL, &cursor));
- cursors.push_back(cursor);
- }
-
- while (context.is_running()) {
- /* Walk each cursor. */
- for (const auto &it : cursors) {
- if (it->next(it) != 0)
- it->reset(it);
- }
- }
- }
-
- /* WiredTiger APIs wrappers for single operations. */
- template <typename K, typename V>
- int
- insert(WT_CURSOR *cursor, const std::string &collection_name, K key, V value, wt_timestamp_t ts)
- {
- int error_code;
-
- testutil_assert(cursor != nullptr);
- cursor->set_key(cursor, key);
- cursor->set_value(cursor, value);
- error_code = cursor->insert(cursor);
-
- if (error_code == 0) {
- debug_print("key/value inserted", DEBUG_TRACE);
- error_code =
- _tracking->save(tracking_operation::INSERT, collection_name, key, value, ts);
- } else
- debug_print("key/value insertion failed", DEBUG_ERROR);
-
- return (error_code);
- }
-
- static int
- search(WT_CURSOR *cursor)
- {
- testutil_assert(cursor != nullptr);
- return (cursor->search(cursor));
- }
-
- static int
- search_near(WT_CURSOR *cursor, int *exact)
- {
- testutil_assert(cursor != nullptr);
- return (cursor->search_near(cursor, exact));
- }
-
- template <typename K, typename V>
- static int
- update(workload_tracking *tracking, WT_CURSOR *cursor, const std::string &collection_name,
- K key, V value, wt_timestamp_t ts)
- {
- int error_code;
-
- testutil_assert(tracking != nullptr);
- testutil_assert(cursor != nullptr);
- cursor->set_key(cursor, key);
- cursor->set_value(cursor, value);
- error_code = cursor->update(cursor);
-
- if (error_code == 0) {
- debug_print("key/value update", DEBUG_TRACE);
- error_code =
- tracking->save(tracking_operation::UPDATE, collection_name, key, value, ts);
- } else
- debug_print("key/value update failed", DEBUG_ERROR);
-
- return (error_code);
- }
-
private:
- std::vector<std::string> _collection_names;
+ database _database;
+ database_operation *_database_operation;
thread_manager _thread_manager;
timestamp_manager *_timestamp_manager;
workload_tracking *_tracking;
diff --git a/src/third_party/wiredtiger/test/cppsuite/tests/example_test.cxx b/src/third_party/wiredtiger/test/cppsuite/tests/example_test.cxx
new file mode 100644
index 00000000000..cc08d3d003a
--- /dev/null
+++ b/src/third_party/wiredtiger/test/cppsuite/tests/example_test.cxx
@@ -0,0 +1,55 @@
+/*-
+ * Public Domain 2014-present MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "test_harness/test.h"
+
+/*
+ * Class that defines operations that do nothing as an example.
+ * This shows how database operations can be overriden and customized.
+ */
+class example_test : public test_harness::test {
+ public:
+ example_test(const std::string &config, const std::string &name) : test(config, name) {}
+
+ virtual void
+ populate(test_harness::database &database, test_harness::timestamp_manager *_timestamp_manager,
+ test_harness::configuration *_config, test_harness::workload_tracking *tracking)
+ {
+ std::cout << "populate: nothing done." << std::endl;
+ }
+ virtual void
+ read_operation(test_harness::thread_context &context, WT_SESSION *session)
+ {
+ std::cout << "read_operation: nothing done." << std::endl;
+ }
+ virtual void
+ update_operation(test_harness::thread_context &context, WT_SESSION *session)
+ {
+ std::cout << "update_operation: nothing done." << std::endl;
+ }
+};
diff --git a/src/third_party/wiredtiger/test/cppsuite/tests/run.cxx b/src/third_party/wiredtiger/test/cppsuite/tests/run.cxx
index f9d5902b7ff..5fe6641cc3b 100755
--- a/src/third_party/wiredtiger/test/cppsuite/tests/run.cxx
+++ b/src/third_party/wiredtiger/test/cppsuite/tests/run.cxx
@@ -30,51 +30,25 @@
#include <iostream>
#include <string>
-#include "test_harness/debug_utils.h"
+#include "test_harness/util/debug_utils.h"
#include "test_harness/test.h"
+#include "example_test.cxx"
#include "poc_test.cxx"
std::string
parse_configuration_from_file(const std::string &filename)
{
- std::string cfg, line, prev_line, error;
+ std::string cfg, line, error;
std::ifstream cFile(filename);
if (cFile.is_open()) {
while (getline(cFile, line)) {
-
- if (line[0] == '#' || line.empty())
- continue;
-
/* Whitespaces are only for readability, they can be removed safely. */
line.erase(std::remove_if(line.begin(), line.end(), isspace), line.end());
-
- if (prev_line == line && line != "}") {
- error =
- "Error when parsing configuration. Two consecutive lines are equal to " + line;
- testutil_die(EINVAL, error.c_str());
- break;
- }
-
- /* Start of a sub config. */
- if (line == "{")
- cfg += "(";
- /* End of a sub config. */
- else if (line == "}")
- cfg += ")";
- else {
- /* First line. */
- if (cfg.empty())
- cfg += line;
- /* No comma needed at the start of a subconfig. */
- else if (prev_line == "{")
- cfg += line;
- else
- cfg += "," + line;
- }
-
- prev_line = line;
+ if (line[0] == '#' || line.empty())
+ continue;
+ cfg += line;
}
} else {
@@ -86,31 +60,60 @@ parse_configuration_from_file(const std::string &filename)
}
void
+print_help()
+{
+ std::cout << "NAME" << std::endl;
+ std::cout << "\trun" << std::endl;
+ std::cout << std::endl;
+ std::cout << "SYNOPSIS" << std::endl;
+ std::cout << "\trun [OPTIONS]" << std::endl;
+ std::cout << "\trun -C [CONFIGURATION]" << std::endl;
+ std::cout << "\trun -f [FILE]" << std::endl;
+ std::cout << "\trun -l [TRACEL_LEVEL]" << std::endl;
+ std::cout << "\trun -t [TEST_NAME]" << std::endl;
+ std::cout << std::endl;
+ std::cout << "DESCRIPTION" << std::endl;
+ std::cout << "\trun executes the test framework." << std::endl;
+ std::cout << "\tIf no test is indicated, all tests are executed." << std::endl;
+ std::cout
+ << "\tIf no configuration is indicated, the default configuration for each test will be used."
+ << std::endl;
+ std::cout
+ << "\tIf a configuration is indicated, the given configuration will be used either for "
+ "all tests or the test indicated."
+ << std::endl;
+ std::cout << std::endl;
+ std::cout << "OPTIONS" << std::endl;
+ std::cout << "\t-h Output a usage message and exit." << std::endl;
+ std::cout << "\t-C Configuration. Cannot be used with -f." << std::endl;
+ std::cout << "\t-f File that contains the configuration. Cannot be used with -C." << std::endl;
+ std::cout << "\t-l Trace level from 0 (default) to 2." << std::endl;
+ std::cout << "\t-t Test name to be executed." << std::endl;
+}
+
+void
value_missing_error(const std::string &str)
{
- test_harness::debug_print("Value missing for option " + str, DEBUG_ERROR);
+ test_harness::debug_print(
+ "Value missing for option " + str + ".\nTry './run -h' for more information.", DEBUG_ERROR);
}
/*
* Run a specific test.
- * config_name is the configuration name. The default configuration is used if it is left empty.
+ * test_name: specifies which test to run.
+ * config: defines the configuration used for the test.
*/
int64_t
-run_test(const std::string &test_name, const std::string &config_name = "")
+run_test(const std::string &test_name, const std::string &config)
{
- std::string cfg, cfg_path;
int error_code = 0;
- if (config_name.empty())
- cfg_path = "configs/config_" + test_name + "_default.txt";
- else
- cfg_path = config_name;
- cfg = parse_configuration_from_file(cfg_path);
-
- test_harness::debug_print("Configuration\t: " + cfg, DEBUG_INFO);
+ test_harness::debug_print("Configuration\t:" + config, DEBUG_INFO);
if (test_name == "poc_test")
- poc_test(cfg, test_name).run();
+ poc_test(config, test_name).run();
+ else if (test_name == "example_test")
+ example_test(config, test_name).run();
else {
test_harness::debug_print("Test not found: " + test_name, DEBUG_ERROR);
error_code = -1;
@@ -125,19 +128,24 @@ run_test(const std::string &test_name, const std::string &config_name = "")
int
main(int argc, char *argv[])
{
- std::string cfg, config_name, test_name;
+ std::string cfg, config_filename, test_name, current_test_name;
int64_t error_code = 0;
- const std::vector<std::string> all_tests = {"poc_test"};
+ const std::vector<std::string> all_tests = {"example_test", "poc_test"};
/* Parse args
- * -C : Configuration. Cannot be used with -f.
- * -f : Filename that contains the configuration. Cannot be used with -C.
+ * -C : Configuration. Cannot be used with -f. If no specific test is specified to be run, the
+ * same coniguration will be used for all existing tests.
+ * -f : Filename that contains the configuration. Cannot be used with -C. If no specific test
+ * is specified to be run, the same coniguration will be used for all existing tests.
* -l : Trace level.
* -t : Test to run. All tests are run if not specified.
*/
for (int i = 1; (i < argc) && (error_code == 0); ++i) {
- if (std::string(argv[i]) == "-C") {
- if (!config_name.empty()) {
+ if (std::string(argv[i]) == "-h") {
+ print_help();
+ return 0;
+ } else if (std::string(argv[i]) == "-C") {
+ if (!config_filename.empty()) {
test_harness::debug_print("Option -C cannot be used with -f", DEBUG_ERROR);
error_code = -1;
} else if ((i + 1) < argc)
@@ -151,7 +159,7 @@ main(int argc, char *argv[])
test_harness::debug_print("Option -f cannot be used with -C", DEBUG_ERROR);
error_code = -1;
} else if ((i + 1) < argc)
- config_name = argv[++i];
+ config_filename = argv[++i];
else {
value_missing_error(argv[i]);
error_code = -1;
@@ -180,14 +188,33 @@ main(int argc, char *argv[])
/* Run all tests. */
test_harness::debug_print("Running all tests.", DEBUG_INFO);
for (auto const &it : all_tests) {
- error_code = run_test(it);
- if (error_code != 0) {
- test_harness::debug_print("Test " + it + " failed.", DEBUG_ERROR);
- break;
+ current_test_name = it;
+ /* Configuration parsing. */
+ if (!config_filename.empty())
+ cfg = parse_configuration_from_file(config_filename);
+ else if (cfg.empty()) {
+ config_filename = "configs/config_" + current_test_name + "_default.txt";
+ cfg = parse_configuration_from_file(config_filename);
}
+
+ error_code = run_test(current_test_name, cfg);
+ if (error_code != 0)
+ break;
+ }
+ } else {
+ current_test_name = test_name;
+ /* Configuration parsing. */
+ if (!config_filename.empty())
+ cfg = parse_configuration_from_file(config_filename);
+ else if (cfg.empty()) {
+ config_filename = "configs/config_" + test_name + "_default.txt";
+ cfg = parse_configuration_from_file(config_filename);
}
- } else
- error_code = run_test(test_name, config_name);
+ error_code = run_test(current_test_name, cfg);
+ }
+
+ if (error_code != 0)
+ test_harness::debug_print("Test " + current_test_name + " failed.", DEBUG_ERROR);
}
return (error_code);
diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml
index 1fc83cfe862..b4ab0507399 100755
--- a/src/third_party/wiredtiger/test/evergreen.yml
+++ b/src/third_party/wiredtiger/test/evergreen.yml
@@ -205,13 +205,19 @@ functions:
# Use separate shell.exec with "silent" directive to avoid exposing credentail in task log.
- command: shell.exec
params:
- working_dir: "wiredtiger/wiredtiger.github.com"
+ working_dir: "wiredtiger"
shell: bash
silent: true
script: |
set -o errexit
set -o verbose
+ if [[ "${branch_name}" != "develop" ]]; then
+ echo "We only run the documentation update task on the WiredTiger (develop) Evergreen project."
+ exit 0
+ fi
+
+ cd wiredtiger.github.com
git push https://${doc-update-github-token}@github.com/wiredtiger/wiredtiger.github.com
"make check directory":
command: shell.exec
@@ -300,11 +306,11 @@ functions:
"truncated log test":
command: shell.exec
params:
- working_dir: "wiredtiger/build_posix/test/csuite"
+ working_dir: "wiredtiger/build_posix/"
script: |
set -o errexit
set -o verbose
- ${test_env_vars|} ./test_truncated_log ${truncated_log_args|} 2>&1
+ ${test_env_vars|} ./test/csuite/test_truncated_log ${truncated_log_args|} 2>&1
"recovery stress test script":
command: shell.exec
params:
@@ -554,17 +560,6 @@ tasks:
- func: "upload artifact"
- func: "cleanup"
- - name: compile-asan
- tags: ["pull_request"]
- commands:
- - func: "get project"
- - func: "compile wiredtiger"
- vars:
- configure_env_vars: CC=/opt/mongodbtoolchain/v3/bin/clang CXX=/opt/mongodbtoolchain/v3/bin/clang++ PATH=/opt/mongodbtoolchain/v3/bin:$PATH CFLAGS="-fsanitize=address -fno-omit-frame-pointer -ggdb" CXXFLAGS="-fsanitize=address -fno-omit-frame-pointer -ggdb"
- posix_configure_flags: --enable-silent-rules --enable-strict --enable-diagnostic --disable-static
- - func: "upload artifact"
- - func: "cleanup"
-
- name: compile-msan
commands:
- func: "get project"
@@ -700,22 +695,6 @@ tasks:
smp_command: -j 1
test_env_vars: MSAN_OPTIONS=abort_on_error=1:disable_coredump=0 MSAN_SYMBOLIZER_PATH=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer TESTUTIL_SLOW_MACHINE=1
- - name: make-check-asan-test
- depends_on:
- - name: compile-asan
- commands:
- - func: "fetch artifacts"
- vars:
- dependent_task: compile-asan
- - func: "compile wiredtiger"
- vars:
- configure_env_vars: CC=/opt/mongodbtoolchain/v3/bin/clang CXX=/opt/mongodbtoolchain/v3/bin/clang++ PATH=/opt/mongodbtoolchain/v3/bin:$PATH CFLAGS="-fsanitize=address -fno-omit-frame-pointer -ggdb" CXXFLAGS="-fsanitize=address -fno-omit-frame-pointer -ggdb"
- posix_configure_flags: --enable-silent-rules --enable-strict --enable-diagnostic --disable-static
- - func: "make check all"
- vars:
- smp_command: -j 1
- test_env_vars: ASAN_OPTIONS=detect_leaks=1:abort_on_error=1:disable_coredump=0 ASAN_SYMBOLIZER_PATH=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer TESTUTIL_SLOW_MACHINE=1 TESTUTIL_BYPASS_ASAN=1
-
- name: make-check-linux-no-ftruncate-test
depends_on:
- name: compile-linux-no-ftruncate
@@ -731,7 +710,7 @@ tasks:
# Start of normal make check test tasks
- name: lang-python-test
- tags: ["pull_request"]
+ tags: ["pull_request", "python"]
depends_on:
- name: compile
commands:
@@ -752,23 +731,6 @@ tasks:
vars:
directory: examples/c
- - name: examples-c-asan-test
- tags: ["pull_request"]
- depends_on:
- - name: compile-asan
- commands:
- - func: "fetch artifacts"
- vars:
- dependent_task: compile-asan
- - func: "compile wiredtiger"
- vars:
- configure_env_vars: CC=/opt/mongodbtoolchain/v3/bin/clang CXX=/opt/mongodbtoolchain/v3/bin/clang++ PATH=/opt/mongodbtoolchain/v3/bin:$PATH CFLAGS="-fsanitize=address -ggdb" CXXFLAGS="-fsanitize=address -ggdb"
- posix_configure_flags: --enable-silent-rules --enable-strict --enable-diagnostic --disable-static
- - func: "make check directory"
- vars:
- test_env_vars: ASAN_OPTIONS=detect_leaks=1:abort_on_error=1:disable_coredump=0 ASAN_SYMBOLIZER_PATH=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer
- directory: examples/c
-
- name: examples-c-production-disable-shared-test
tags: ["pull_request"]
depends_on:
@@ -952,6 +914,24 @@ tasks:
set -o verbose
UBSAN_OPTIONS=print_stacktrace=1:halt_on_error=1:abort_on_error=1:disable_coredump=0 ./ex_access
+ # Start of cppsuite test tasks.
+
+ - name: poc-test-cpp
+ tags: ["pull_request"]
+ depends_on:
+ - name: compile
+ commands:
+ - func: "fetch artifacts"
+ - command: shell.exec
+ params:
+ working_dir: "wiredtiger/build_posix/"
+ script: |
+ set -o errexit
+ set -o verbose
+
+ ${test_env_vars|} $(pwd)/test/cppsuite/run -t poc_test -f test/cppsuite/configs/config_poc_test_default.txt -l 1
+
+ # End of cppsuite test tasks.
# Start of csuite test tasks
- name: csuite-incr-backup-test
@@ -1508,6 +1488,7 @@ tasks:
# Start of Python unit test tasks
- name: unit-test
+ tags: ["python"]
depends_on:
- name: compile
commands:
@@ -1515,12 +1496,14 @@ tasks:
- func: "unit test"
- name: unit-test-with-compile
+ tags: ["python"]
commands:
- func: "get project"
- func: "compile wiredtiger"
- func: "unit test"
- name: unit-test-long
+ tags: ["python"]
depends_on:
- name: compile
commands:
@@ -1530,6 +1513,7 @@ tasks:
unit_test_args: -v 2 --long
- name: unit-linux-no-ftruncate-test
+ tags: ["python"]
depends_on:
- name: compile-linux-no-ftruncate
commands:
@@ -1540,6 +1524,7 @@ tasks:
# Run the tests that uses suite_random with a random starting seed
- name: unit-test-random-seed
+ tags: ["python"]
depends_on:
- name: compile
commands:
@@ -1551,7 +1536,7 @@ tasks:
# and we use the -b option of the test/suite/run.py script to split up the tests.
- name: unit-test-bucket00
- tags: ["pull_request", "unit_test"]
+ tags: ["pull_request", "python", "unit_test"]
depends_on:
- name: compile
commands:
@@ -1561,7 +1546,7 @@ tasks:
unit_test_args: -v 2 -b 0/11
- name: unit-test-bucket01
- tags: ["pull_request", "unit_test"]
+ tags: ["pull_request", "python", "unit_test"]
depends_on:
- name: compile
commands:
@@ -1571,7 +1556,7 @@ tasks:
unit_test_args: -v 2 -b 1/11
- name: unit-test-bucket02
- tags: ["pull_request", "unit_test"]
+ tags: ["pull_request", "python", "unit_test"]
depends_on:
- name: compile
commands:
@@ -1581,7 +1566,7 @@ tasks:
unit_test_args: -v 2 -b 2/11
- name: unit-test-bucket03
- tags: ["pull_request", "unit_test"]
+ tags: ["pull_request", "python", "unit_test"]
depends_on:
- name: compile
commands:
@@ -1591,7 +1576,7 @@ tasks:
unit_test_args: -v 2 -b 3/11
- name: unit-test-bucket04
- tags: ["pull_request", "unit_test"]
+ tags: ["pull_request", "python", "unit_test"]
depends_on:
- name: compile
commands:
@@ -1601,7 +1586,7 @@ tasks:
unit_test_args: -v 2 -b 4/11
- name: unit-test-bucket05
- tags: ["pull_request", "unit_test"]
+ tags: ["pull_request", "python", "unit_test"]
depends_on:
- name: compile
commands:
@@ -1611,7 +1596,7 @@ tasks:
unit_test_args: -v 2 -b 5/11
- name: unit-test-bucket06
- tags: ["pull_request", "unit_test"]
+ tags: ["pull_request", "python", "unit_test"]
depends_on:
- name: compile
commands:
@@ -1621,7 +1606,7 @@ tasks:
unit_test_args: -v 2 -b 6/11
- name: unit-test-bucket07
- tags: ["pull_request", "unit_test"]
+ tags: ["pull_request", "python", "unit_test"]
depends_on:
- name: compile
commands:
@@ -1631,7 +1616,7 @@ tasks:
unit_test_args: -v 2 -b 7/11
- name: unit-test-bucket08
- tags: ["pull_request", "unit_test"]
+ tags: ["pull_request", "python", "unit_test"]
depends_on:
- name: compile
commands:
@@ -1641,7 +1626,7 @@ tasks:
unit_test_args: -v 2 -b 8/11
- name: unit-test-bucket09
- tags: ["pull_request", "unit_test"]
+ tags: ["pull_request", "python", "unit_test"]
depends_on:
- name: compile
commands:
@@ -1651,7 +1636,7 @@ tasks:
unit_test_args: -v 2 -b 9/11
- name: unit-test-bucket10
- tags: ["pull_request", "unit_test"]
+ tags: ["pull_request", "python", "unit_test"]
depends_on:
- name: compile
commands:
@@ -1676,7 +1661,7 @@ tasks:
sh s_all -A -E 2>&1
- name: conf-dump-test
- tags: ["pull_request"]
+ tags: ["pull_request", "python"]
depends_on:
- name: compile
commands:
@@ -1735,14 +1720,15 @@ tasks:
- func: "fetch artifacts"
- command: shell.exec
params:
- working_dir: "wiredtiger"
+ working_dir: "wiredtiger/build_posix"
script: |
set -o errexit
set -o verbose
if [ "Windows_NT" = "$OS" ]; then
+ cd ..
cmd.exe /c t_fops.exe
else
- build_posix/test/fops/t
+ ${test_env_vars|} test/fops/t
fi
- name: million-collection-test
@@ -2561,7 +2547,6 @@ buildvariants:
- name: linux-directio
distros: ubuntu1804-build
- name: syscall-linux
- - name: make-check-asan-test
- name: configure-combinations
- name: checkpoint-filetypes-test
- name: unit-test-long
@@ -2575,6 +2560,29 @@ buildvariants:
- name: static-wt-build-test
- name: format-failure-configs-test
+- name: ubuntu1804-asan
+ display_name: "! Ubuntu 18.04 ASAN"
+ run_on:
+ - ubuntu1804-test
+ expansions:
+ configure_env_vars:
+ CC=/opt/mongodbtoolchain/v3/bin/clang
+ CXX=/opt/mongodbtoolchain/v3/bin/clang++
+ PATH=/opt/mongodbtoolchain/v3/bin:$PATH
+ CFLAGS="-fsanitize=address -fno-omit-frame-pointer -ggdb"
+ CXXFLAGS="-fsanitize=address -fno-omit-frame-pointer -ggdb"
+ posix_configure_flags: --enable-silent-rules --enable-strict --enable-diagnostic --disable-static --prefix=$(pwd)/LOCAL_INSTALL
+ smp_command: -j $(grep -c ^processor /proc/cpuinfo)
+ make_command: PATH=/opt/mongodbtoolchain/v3/bin:$PATH make
+ test_env_vars:
+ ASAN_OPTIONS="detect_leaks=1:abort_on_error=1:disable_coredump=0"
+ ASAN_SYMBOLIZER_PATH=/opt/mongodbtoolchain/v3/bin/llvm-symbolizer
+ TESTUTIL_BYPASS_ASAN=1
+ LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libeatmydata.so PATH=/opt/mongodbtoolchain/v3/bin:$PATH LD_LIBRARY_PATH=$(pwd)/.libs top_srcdir=$(pwd)/.. top_builddir=$(pwd)
+ tasks:
+ - name: ".pull_request !.windows_only !.pull_request_compilers !.python"
+ - examples-c-test
+
- name: ubuntu1804-compilers
display_name: "! Ubuntu 18.04 Compilers"
run_on:
@@ -2663,8 +2671,6 @@ buildvariants:
- name: linux-directio
distros: rhel80-build
- name: syscall-linux
- - name: compile-asan
- - name: make-check-asan-test
- name: checkpoint-filetypes-test
- name: unit-test-long
- name: spinlock-gcc-test
diff --git a/src/third_party/wiredtiger/test/evergreen/compatibility_test_for_releases.sh b/src/third_party/wiredtiger/test/evergreen/compatibility_test_for_releases.sh
index 4d5a04bf0e7..4c62d379d38 100755
--- a/src/third_party/wiredtiger/test/evergreen/compatibility_test_for_releases.sh
+++ b/src/third_party/wiredtiger/test/evergreen/compatibility_test_for_releases.sh
@@ -72,6 +72,7 @@ run_format()
args+="checkpoints=1 " # Force periodic writes
args+="compression=snappy " # We only built with snappy, force the choice
args+="data_source=table "
+ args+="huffman_key=0 " # Not supoprted by newer releases
args+="in_memory=0 " # Interested in the on-disk format
args+="leak_memory=1 " # Faster runs
args+="logging=1 " # Test log compatibility
diff --git a/src/third_party/wiredtiger/test/format/Makefile.am b/src/third_party/wiredtiger/test/format/Makefile.am
index aa49dc6d732..771e41fd662 100644
--- a/src/third_party/wiredtiger/test/format/Makefile.am
+++ b/src/third_party/wiredtiger/test/format/Makefile.am
@@ -4,7 +4,7 @@ AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
noinst_PROGRAMS = t
t_SOURCES =\
- alter.c backup.c bulk.c checkpoint.c compact.c config.c config_compat.c hs.c kv.c ops.c \
+ alter.c backup.c bulk.c checkpoint.c compact.c config.c config_compat.c hs.c import.c kv.c ops.c \
random.c salvage.c snap.c t.c trace.c util.c wts.c
t_LDADD = $(top_builddir)/test/utility/libtest_util.la
diff --git a/src/third_party/wiredtiger/test/format/backup.c b/src/third_party/wiredtiger/test/format/backup.c
index afeed3b247a..8ec113dd6d8 100644
--- a/src/third_party/wiredtiger/test/format/backup.c
+++ b/src/third_party/wiredtiger/test/format/backup.c
@@ -266,21 +266,21 @@ copy_blocks(WT_SESSION *session, WT_CURSOR *bkup_c, const char *name)
len = strlen(g.home) + strlen(name) + 10;
tmp = dmalloc(len);
testutil_check(__wt_snprintf(tmp, len, "%s/%s", g.home, name));
- error_sys_check(rfd = open(tmp, O_RDONLY, 0));
+ error_sys_check(rfd = open(tmp, O_RDONLY, 0644));
free(tmp);
tmp = NULL;
len = strlen(g.home) + strlen("BACKUP") + strlen(name) + 10;
tmp = dmalloc(len);
testutil_check(__wt_snprintf(tmp, len, "%s/BACKUP/%s", g.home, name));
- error_sys_check(wfd1 = open(tmp, O_WRONLY | O_CREAT, 0));
+ error_sys_check(wfd1 = open(tmp, O_WRONLY | O_CREAT, 0644));
free(tmp);
tmp = NULL;
len = strlen(g.home) + strlen("BACKUP.copy") + strlen(name) + 10;
tmp = dmalloc(len);
testutil_check(__wt_snprintf(tmp, len, "%s/BACKUP.copy/%s", g.home, name));
- error_sys_check(wfd2 = open(tmp, O_WRONLY | O_CREAT, 0));
+ error_sys_check(wfd2 = open(tmp, O_WRONLY | O_CREAT, 0644));
free(tmp);
tmp = NULL;
@@ -349,39 +349,6 @@ copy_blocks(WT_SESSION *session, WT_CURSOR *bkup_c, const char *name)
free(tmp);
}
-/*
- * copy_file --
- * Copy a single file into the backup directories.
- */
-static void
-copy_file(WT_SESSION *session, const char *name)
-{
- size_t len;
- char *first, *second;
-
- len = strlen("BACKUP") + strlen(name) + 10;
- first = dmalloc(len);
- testutil_check(__wt_snprintf(first, len, "BACKUP/%s", name));
- testutil_check(__wt_copy_and_sync(session, name, first));
-
- /*
- * Save another copy of the original file to make debugging recovery errors easier.
- */
- len = strlen("BACKUP.copy") + strlen(name) + 10;
- second = dmalloc(len);
- testutil_check(__wt_snprintf(second, len, "BACKUP.copy/%s", name));
- testutil_check(__wt_copy_and_sync(session, first, second));
-
- free(first);
- free(second);
-}
-
-/*
- * Backup directory initialize command, remove and re-create the primary backup directory, plus a
- * copy we maintain for recovery testing.
- */
-#define HOME_BACKUP_INIT_CMD "rm -rf %s/BACKUP %s/BACKUP.copy && mkdir %s/BACKUP %s/BACKUP.copy"
-
#define RESTORE_SKIP 1
#define RESTORE_SUCCESS 0
/*
@@ -507,11 +474,10 @@ backup(void *arg)
WT_CURSOR *backup_cursor;
WT_DECL_RET;
WT_SESSION *session;
- size_t len;
u_int incremental, period;
uint64_t src_id, this_id;
const char *config, *key;
- char cfg[512], *cmd;
+ char cfg[512];
bool full, incr_full;
(void)(arg);
@@ -615,12 +581,7 @@ backup(void *arg)
/* If we're taking a full backup, create the backup directories. */
if (full || incremental == 0) {
- len = strlen(g.home) * 4 + strlen(HOME_BACKUP_INIT_CMD) + 1;
- cmd = dmalloc(len);
- testutil_check(
- __wt_snprintf(cmd, len, HOME_BACKUP_INIT_CMD, g.home, g.home, g.home, g.home));
- testutil_checkfmt(system(cmd), "%s", "backup directory creation failed");
- free(cmd);
+ testutil_create_backup_directory(g.home);
}
/*
@@ -636,12 +597,12 @@ backup(void *arg)
testutil_check(backup_cursor->get_key(backup_cursor, &key));
if (g.c_backup_incr_flag == INCREMENTAL_BLOCK) {
if (full)
- copy_file(session, key);
+ testutil_copy_file(session, key);
else
copy_blocks(session, backup_cursor, key);
} else
- copy_file(session, key);
+ testutil_copy_file(session, key);
active_files_add(active_now, key);
}
if (ret != WT_NOTFOUND)
diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c
index 0094cec7c88..20431b3f1ab 100644
--- a/src/third_party/wiredtiger/test/format/config.c
+++ b/src/third_party/wiredtiger/test/format/config.c
@@ -695,11 +695,11 @@ config_in_memory(void)
*/
if (config_is_perm("backup"))
return;
- if (config_is_perm("checkpoint"))
- return;
if (config_is_perm("btree.compression"))
return;
- if (config_is_perm("runs.source") && DATASOURCE("lsm"))
+ if (config_is_perm("checkpoint"))
+ return;
+ if (config_is_perm("import"))
return;
if (config_is_perm("logging"))
return;
@@ -709,6 +709,8 @@ config_in_memory(void)
return;
if (config_is_perm("ops.verify"))
return;
+ if (config_is_perm("runs.source") && DATASOURCE("lsm"))
+ return;
if (!config_is_perm("runs.in_memory") && mmrand(NULL, 1, 20) == 1)
g.c_in_memory = 1;
@@ -724,18 +726,20 @@ config_in_memory_reset(void)
uint32_t cache;
/* Turn off a lot of stuff. */
- if (!config_is_perm("ops.alter"))
- config_single("ops.alter=off", false);
if (!config_is_perm("backup"))
config_single("backup=off", false);
- if (!config_is_perm("checkpoint"))
- config_single("checkpoint=off", false);
if (!config_is_perm("btree.compression"))
config_single("btree.compression=none", false);
- if (!config_is_perm("ops.hs_cursor"))
- config_single("ops.hs_cursor=off", false);
+ if (!config_is_perm("checkpoint"))
+ config_single("checkpoint=off", false);
+ if (!config_is_perm("import"))
+ config_single("import=off", false);
if (!config_is_perm("logging"))
config_single("logging=off", false);
+ if (!config_is_perm("ops.alter"))
+ config_single("ops.alter=off", false);
+ if (!config_is_perm("ops.hs_cursor"))
+ config_single("ops.hs_cursor=off", false);
if (!config_is_perm("ops.salvage"))
config_single("ops.salvage=off", false);
if (!config_is_perm("ops.verify"))
diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h
index 44906866aaa..a06509b0dba 100644
--- a/src/third_party/wiredtiger/test/format/config.h
+++ b/src/third_party/wiredtiger/test/format/config.h
@@ -182,6 +182,13 @@ static CONFIG c[] = {
{"format.major_timeout", "long-running operations timeout (minutes)", C_IGNORE, 0, 0, 1000,
&g.c_major_timeout, NULL},
+ /*
+ * 0%
+ * FIXME-WT-7418 and FIXME-WT-7416: Temporarily disable import until WT_ROLLBACK error and
+ * interaction with backup thread is fixed. Should be 20%
+ */
+ {"import", "import table from newly created database", C_BOOL, 0, 0, 0, &g.c_import, NULL},
+
/* 50% */
{"logging", "configure logging", C_BOOL, 50, 0, 0, &g.c_logging, NULL},
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index 81eec697d4e..7aefc071396 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -172,6 +172,7 @@ typedef struct {
uint32_t c_firstfit;
uint32_t c_hs_cursor;
uint32_t c_huffman_value;
+ uint32_t c_import;
uint32_t c_in_memory;
uint32_t c_independent_thread_rng;
uint32_t c_insert_pct;
@@ -384,6 +385,7 @@ WT_THREAD_RET backup(void *);
WT_THREAD_RET checkpoint(void *);
WT_THREAD_RET compact(void *);
WT_THREAD_RET hs_cursor(void *);
+WT_THREAD_RET import(void *);
WT_THREAD_RET random_kv(void *);
WT_THREAD_RET timestamp(void *);
@@ -395,6 +397,7 @@ void config_final(void);
void config_print(bool);
void config_run(void);
void config_single(const char *, bool);
+void create_database(const char *home, WT_CONNECTION **connp);
void fclose_and_clear(FILE **);
bool fp_readv(FILE *, char *, uint32_t *);
void key_gen_common(WT_ITEM *, uint64_t, const char *);
diff --git a/src/third_party/wiredtiger/test/format/hs.c b/src/third_party/wiredtiger/test/format/hs.c
index e226fe1b3c0..d338a714cf5 100644
--- a/src/third_party/wiredtiger/test/format/hs.c
+++ b/src/third_party/wiredtiger/test/format/hs.c
@@ -48,7 +48,7 @@ hs_cursor(void *arg)
uint32_t hs_btree_id, i;
u_int period;
int exact;
- bool restart;
+ bool next, restart;
(void)(arg); /* Unused parameter */
@@ -69,23 +69,12 @@ hs_cursor(void *arg)
hs_counter = 0; /* [-Wconditional-uninitialized] */
hs_btree_id = 0; /* [-Wconditional-uninitialized] */
for (restart = true;;) {
- /*
- * open_cursor can return EBUSY if concurrent with a metadata operation, retry in that case.
- */
- while ((ret = session->open_cursor(session, WT_HS_URI, NULL, NULL, &cursor)) == EBUSY)
- __wt_yield();
- testutil_check(ret);
-
- /*
- * The history file has mostly tombstones, ignore them and retrieve the underlying values.
- * We don't care about tombstones, but we do want to hit every key rather than skip over
- * them. This is a rollback-to-stable flag we're using for our own purposes.
- */
- F_SET(cursor, WT_CURSTD_IGNORE_TOMBSTONE);
+ testutil_check(__wt_curhs_open((WT_SESSION_IMPL *)session, NULL, &cursor));
+ F_SET(cursor, WT_CURSTD_HS_READ_COMMITTED);
/* Search to the last-known location. */
if (!restart) {
- cursor->set_key(cursor, hs_btree_id, &key, hs_start_ts, hs_counter);
+ cursor->set_key(cursor, 4, hs_btree_id, &key, hs_start_ts, hs_counter);
/*
* Limit expected errors because this is a diagnostic check (the WiredTiger API allows
@@ -99,8 +88,9 @@ hs_cursor(void *arg)
* Get some more key/value pairs. Always retrieve at least one key, that ensures we have a
* valid key when we copy it to start the next run.
*/
+ next = mmrand(NULL, 0, 1) == 1;
for (i = mmrand(NULL, 1, 1000); i > 0; --i) {
- if ((ret = cursor->next(cursor)) == 0) {
+ if ((ret = (next ? cursor->next(cursor) : cursor->prev(cursor))) == 0) {
testutil_check(
cursor->get_key(cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter));
testutil_check(cursor->get_value(
@@ -116,7 +106,8 @@ hs_cursor(void *arg)
* Otherwise, reset so we'll start over.
*/
if (ret == 0) {
- testutil_check(__wt_buf_set(CUR2S(cursor), &key, hs_key.data, hs_key.size));
+ testutil_check(
+ __wt_buf_set((WT_SESSION_IMPL *)session, &key, hs_key.data, hs_key.size));
restart = false;
} else
restart = true;
@@ -130,7 +121,7 @@ hs_cursor(void *arg)
break;
}
- __wt_buf_free(CUR2S(cursor), &key);
+ __wt_buf_free((WT_SESSION_IMPL *)session, &key);
testutil_check(session->close(session, NULL));
#endif
diff --git a/src/third_party/wiredtiger/test/format/import.c b/src/third_party/wiredtiger/test/format/import.c
new file mode 100644
index 00000000000..0b85515b806
--- /dev/null
+++ b/src/third_party/wiredtiger/test/format/import.c
@@ -0,0 +1,223 @@
+/*-
+ * Public Domain 2014-present MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "format.h"
+
+static void copy_file_into_directory(WT_SESSION *, const char *);
+static void get_file_metadata(WT_SESSION *, const char **, const char **);
+static void populate_table(WT_SESSION *);
+static void verify_import(WT_SESSION *);
+
+/*
+ * Import directory initialize command, remove and create import directory, to place new database
+ * connection.
+ */
+#define HOME_IMPORT_INIT_CMD "rm -rf %s/" IMPORT_DIR "&& mkdir %s/" IMPORT_DIR
+#define IMPORT_DIR "IMPORT"
+/*
+ * The number of entries in the import table, primary use for validating contents after import.
+ * There is no benefit to varying the number of entries in the import table.
+ */
+#define IMPORT_ENTRIES 1000
+#define IMPORT_TABLE_CONFIG "key_format=i,value_format=i"
+#define IMPORT_URI "table:import"
+#define IMPORT_URI_FILE "file:import.wt"
+
+/*
+ * import --
+ * Periodically import table.
+ */
+WT_THREAD_RET
+import(void *arg)
+{
+ WT_CONNECTION *conn, *import_conn;
+ WT_DECL_RET;
+ WT_SESSION *import_session, *session;
+ size_t cmd_len;
+ uint32_t import_value;
+ u_int period;
+ char buf[2048], *cmd;
+ const char *file_config, *table_config;
+
+ WT_UNUSED(arg);
+ conn = g.wts_conn;
+ file_config = table_config = NULL;
+ import_value = 0;
+
+ /*
+ * Create a new database, primarily used for testing import.
+ */
+ cmd_len = strlen(g.home) * 2 + strlen(HOME_IMPORT_INIT_CMD) + 1;
+ cmd = dmalloc(cmd_len);
+ testutil_check(__wt_snprintf(cmd, cmd_len, HOME_IMPORT_INIT_CMD, g.home, g.home));
+ testutil_checkfmt(system(cmd), "%s", "import directory creation failed");
+ free(cmd);
+
+ cmd_len = strlen(g.home) + strlen(IMPORT_DIR) + 10;
+ cmd = dmalloc(cmd_len);
+ testutil_check(__wt_snprintf(cmd, cmd_len, "%s/%s", g.home, IMPORT_DIR));
+ /* Open a connection to the database, creating it if necessary. */
+ create_database(cmd, &import_conn);
+ free(cmd);
+
+ /*
+ * Open two sessions, one for test/format database and one for the import database.
+ */
+ testutil_check(import_conn->open_session(import_conn, NULL, NULL, &import_session));
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+ /* Create new table and populate with data in import database. */
+ testutil_checkfmt(
+ import_session->create(import_session, IMPORT_URI, IMPORT_TABLE_CONFIG), "%s", IMPORT_URI);
+ populate_table(import_session);
+
+ /* Grab metadata information for table from import database connection. */
+ get_file_metadata(import_session, &file_config, &table_config);
+
+ while (!g.workers_finished) {
+ /* Copy table into test/format database directory. */
+ copy_file_into_directory(import_session, "import.wt");
+
+ /* Perform import with either repair or file metadata. */
+ memset(buf, 0, sizeof(buf));
+ import_value = mmrand(NULL, 0, 1);
+ if (import_value == 0) {
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "import=(enabled,repair=true)"));
+ if ((ret = session->create(session, IMPORT_URI, buf)) != 0)
+ testutil_die(ret, "session.import", ret);
+ } else {
+ testutil_check(__wt_snprintf(buf, sizeof(buf),
+ "%s,import=(enabled,repair=false,file_metadata=(%s))", table_config, file_config));
+ if ((ret = session->create(session, IMPORT_URI, buf)) != 0)
+ testutil_die(ret, "session.import", ret);
+ }
+
+ verify_import(session);
+
+ /* Perform checkpoint, to make sure we perform drop */
+ session->checkpoint(session, NULL);
+
+ /* Drop import table, so we can import the table again */
+ while ((ret = session->drop(session, IMPORT_URI, NULL)) == EBUSY) {
+ __wt_yield();
+ }
+ testutil_check(ret);
+
+ period = mmrand(NULL, 1, 10);
+ while (period > 0 && !g.workers_finished) {
+ --period;
+ __wt_sleep(1, 0);
+ }
+ }
+ wts_close(&import_conn, &import_session);
+ testutil_check(session->close(session, NULL));
+ return (WT_THREAD_RET_VALUE);
+}
+
+/*
+ * verify_import --
+ * Verify all the values inside the imported table.
+ */
+static void
+verify_import(WT_SESSION *session)
+{
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ int iteration, key, value;
+
+ iteration = 0;
+ testutil_check(session->open_cursor(session, IMPORT_URI, NULL, NULL, &cursor));
+
+ while ((ret = cursor->next(cursor)) == 0) {
+ error_check(cursor->get_key(cursor, &key));
+ testutil_assert(key == iteration);
+ error_check(cursor->get_value(cursor, &value));
+ testutil_assert(value == iteration);
+ iteration++;
+ }
+ testutil_assert(iteration == IMPORT_ENTRIES);
+ scan_end_check(ret == WT_NOTFOUND);
+ testutil_check(cursor->close(cursor));
+}
+
+/*
+ * populate_table --
+ * Populate the import table with simple data.
+ */
+static void
+populate_table(WT_SESSION *session)
+{
+ WT_CURSOR *cursor;
+ int i;
+
+ testutil_check(session->open_cursor(session, IMPORT_URI, NULL, NULL, &cursor));
+
+ for (i = 0; i < IMPORT_ENTRIES; ++i) {
+ cursor->set_key(cursor, i);
+ cursor->set_value(cursor, i);
+ testutil_check(cursor->insert(cursor));
+ }
+ testutil_check(cursor->close(cursor));
+ testutil_check(session->checkpoint(session, NULL));
+}
+
+/*
+ * get_file_metadata --
+ * Get import file and table metadata information from import database connection.
+ */
+static void
+get_file_metadata(WT_SESSION *session, const char **file_config, const char **table_config)
+{
+ WT_CURSOR *metadata_cursor;
+
+ testutil_check(session->open_cursor(session, "metadata:", NULL, NULL, &metadata_cursor));
+ metadata_cursor->set_key(metadata_cursor, IMPORT_URI);
+ testutil_check(metadata_cursor->search(metadata_cursor));
+ metadata_cursor->get_value(metadata_cursor, table_config);
+
+ metadata_cursor->set_key(metadata_cursor, IMPORT_URI_FILE);
+ testutil_check(metadata_cursor->search(metadata_cursor));
+ metadata_cursor->get_value(metadata_cursor, file_config);
+
+ testutil_check(metadata_cursor->close(metadata_cursor));
+}
+
+/*
+ * copy_file_into_directory --
+ * Copy a single file into the test/format directory.
+ */
+static void
+copy_file_into_directory(WT_SESSION *session, const char *name)
+{
+ size_t buf_len;
+ char to[64];
+
+ buf_len = strlen(name) + 10;
+ testutil_check(__wt_snprintf(to, buf_len, "../%s", name));
+ testutil_check(__wt_copy_and_sync(session, name, to));
+}
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index 7e8a12c5434..0e5f8a30422 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -235,7 +235,7 @@ operations(u_int ops_seconds, bool lastrun)
TINFO *tinfo, total;
WT_CONNECTION *conn;
WT_SESSION *session;
- wt_thread_t alter_tid, backup_tid, checkpoint_tid, compact_tid, hs_tid, random_tid;
+ wt_thread_t alter_tid, backup_tid, checkpoint_tid, compact_tid, hs_tid, import_tid, random_tid;
wt_thread_t timestamp_tid;
int64_t fourths, quit_fourths, thread_ops;
uint32_t i;
@@ -249,6 +249,7 @@ operations(u_int ops_seconds, bool lastrun)
memset(&checkpoint_tid, 0, sizeof(checkpoint_tid));
memset(&compact_tid, 0, sizeof(compact_tid));
memset(&hs_tid, 0, sizeof(hs_tid));
+ memset(&import_tid, 0, sizeof(import_tid));
memset(&random_tid, 0, sizeof(random_tid));
memset(&timestamp_tid, 0, sizeof(timestamp_tid));
@@ -302,6 +303,8 @@ operations(u_int ops_seconds, bool lastrun)
testutil_check(__wt_thread_create(NULL, &compact_tid, compact, NULL));
if (g.c_hs_cursor)
testutil_check(__wt_thread_create(NULL, &hs_tid, hs_cursor, NULL));
+ if (g.c_import)
+ testutil_check(__wt_thread_create(NULL, &import_tid, import, NULL));
if (g.c_random_cursor)
testutil_check(__wt_thread_create(NULL, &random_tid, random_kv, NULL));
if (g.c_txn_timestamps)
@@ -386,6 +389,8 @@ operations(u_int ops_seconds, bool lastrun)
testutil_check(__wt_thread_join(NULL, &compact_tid));
if (g.c_hs_cursor)
testutil_check(__wt_thread_join(NULL, &hs_tid));
+ if (g.c_import)
+ testutil_check(__wt_thread_join(NULL, &import_tid));
if (g.c_random_cursor)
testutil_check(__wt_thread_join(NULL, &random_tid));
if (g.c_txn_timestamps)
diff --git a/src/third_party/wiredtiger/test/format/wts.c b/src/third_party/wiredtiger/test/format/wts.c
index cc489c52623..3b37b3a43d1 100644
--- a/src/third_party/wiredtiger/test/format/wts.c
+++ b/src/third_party/wiredtiger/test/format/wts.c
@@ -148,7 +148,7 @@ static WT_EVENT_HANDLER event_handler = {
* create_database --
* Create a WiredTiger database.
*/
-static void
+void
create_database(const char *home, WT_CONNECTION **connp)
{
WT_CONNECTION *conn;
diff --git a/src/third_party/wiredtiger/test/suite/hook_demo.py b/src/third_party/wiredtiger/test/suite/hook_demo.py
new file mode 100755
index 00000000000..113c427c8b7
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/hook_demo.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# [TEST_TAGS]
+# ignored_file
+# [END_TAGS]
+
+# hook_demo.py
+# Demonstration of hooks. Run via:
+# python run.py --hook demo=N base01
+#
+# These hooks are set up:
+# - alter wiredtiger_open arguments (in a benign way)
+# - report after wiredtiger_open is called.
+# - notify on session.open_cursor
+# - intercept the session.create call
+#
+# With N == 0, the session.create call reports its arguments and calls original session.create.
+# with N == 1, it does an additional session.drop call (which should cause tests to fail);
+# with N == 2, it does an additional session.create after the drop call (which should work).
+#
+# Note that notify hooks don't have to simply report, they can call other methods,
+# set attributes on objects, etc. For example, one can save the open_cursor
+# config string as an attribute on the cursor object, and examine it in another
+# hooked method.
+from __future__ import print_function
+
+import os, sys, wthooks
+from wttest import WiredTigerTestCase
+
+# Print to /dev/tty for debugging, since anything extraneous to stdout/stderr will
+# cause a test error.
+def tty(s):
+ WiredTigerTestCase.tty(s)
+
+# These are the hook functions that are run when particular APIs are called.
+
+# Called to manipulate args for wiredtiger_open
+def wiredtiger_open_args(ignored_self, args):
+ tty('>>> wiredtiger_open, adding cache_size')
+ args = list(args) # convert from a readonly tuple to a writeable list
+ args[-1] += ',,,cache_size=500M,,,' # modify the last arg
+ return args
+
+# Called to notify after successful wiredtiger_open
+def wiredtiger_open_notify(ignored_self, ret, *args):
+ tty('>>> wiredtiger_open({}) returned {}'.format(args, ret))
+
+# Called to notify after successful Session.open_cursor
+def session_open_cursor_notify(self, ret, *args):
+ tty('>>> session.open_cursor({}) returned {}, session is {}'.format(args, ret, self))
+
+# Called to replace Session.create
+# We do different things (described above) as indicated by our command line argument.
+def session_create_replace(arg, orig_session_create, session_self, uri, config):
+ tty('>>> session.create({},{}), session is {}'.format(uri, config, session_self))
+ if arg == 0:
+ # Just do a regular create
+ return orig_session_create(session_self, uri, config)
+ elif arg == 1:
+ # Do a regular create, followed by a drop. This will cause test failures.
+ ret = orig_session_create(session_self, uri, config)
+ # We didn't replace drop, so we can call it as a method
+ tty('>>> session.drop({})'.format(uri))
+ session_self.drop(uri)
+ return ret
+ elif arg == 2:
+ # Do a regular create, followed by a drop, then another create. Should work.
+ ret = orig_session_create(session_self, uri, config)
+ # We didn't replace drop, so we can call it as a method
+ tty('>>> session.drop({})'.format(uri))
+ session_self.drop(uri)
+ tty('>>> session.create({},{})'.format(uri, config))
+ orig_session_create(session_self, uri, config)
+ return ret
+
+# Every hook file must have one or more classes descended from WiredTigerHook
+# This is where the hook functions are 'hooked' to API methods.
+class DemoHookCreator(wthooks.WiredTigerHookCreator):
+ def __init__(self, arg=0):
+ # An argument may alter the test
+ if arg == None:
+ self.arg = 0
+ else:
+ self.arg = int(arg)
+
+ # We have an opportunity to filter the list of tests to be run.
+ # For this demo, we don't filter.
+ def filter_tests(self, tests):
+ print('Filtering: ' + str(tests))
+ return tests
+
+ def setup_hooks(self):
+ tty('>> SETUP HOOKS RUN')
+ orig_session_create = self.Session['create'] # gets original function
+ self.wiredtiger['wiredtiger_open'] = (wthooks.HOOK_ARGS, wiredtiger_open_args)
+ self.wiredtiger['wiredtiger_open'] = (wthooks.HOOK_NOTIFY, wiredtiger_open_notify)
+ self.Session['create'] = (wthooks.HOOK_REPLACE, lambda s, uri, config:
+ session_create_replace(self.arg, orig_session_create, s, uri, config))
+ self.Session['open_cursor'] = (wthooks.HOOK_NOTIFY, session_open_cursor_notify)
+
+# Every hook file must have a top level initialize function,
+# returning a list of WiredTigerHook objects.
+def initialize(arg):
+ return [DemoHookCreator(arg)]
diff --git a/src/third_party/wiredtiger/test/suite/hook_tiered.py b/src/third_party/wiredtiger/test/suite/hook_tiered.py
new file mode 100755
index 00000000000..5bb97ea399b
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/hook_tiered.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# [TEST_TAGS]
+# ignored_file
+# [END_TAGS]
+
+# hook_tiered.py
+#
+# Substitute tiered tables for regular (row-store) tables in Python tests.
+#
+# These hooks can be used to run the existing cursor tests on tiered tables.
+# They identify tests that create row-store tables and create tiered tables
+# instead. The hook takes an optional argument to specify how many tiers
+# to create. The default is 2.
+#
+# To run with 3 tiers per table:
+# ../test/suite/run.py --hooks tiered=3 cursor
+#
+# The hooks work with may other tests in the python suite but also encounter
+# a variety of failures that I haven't tried to sort out.
+from __future__ import print_function
+
+import os, sys, wthooks
+import unittest
+from wttest import WiredTigerTestCase
+
+# These are the hook functions that are run when particular APIs are called.
+
+# Called to replace Session.create
+def session_create_replace(ntiers, orig_session_create, session_self, uri, config):
+ if config == None:
+ base_config = ""
+ else:
+ base_config = config
+
+ # If the test is creating a table (not colstore or lsm), create a tiered table instead,
+ # using arg to determine number of tiers. Otherwise just do the create as normal.
+ #
+ # NOTE: The following code uses the old API for creating tiered tables. As of WT-7173
+ # this no longer works. It will be updated and fixed in WT-7440.
+ if (uri.startswith("table:") and "key_format=r" not in base_config and
+ "type=lsm" not in base_config):
+ tier_string = ""
+ for i in range(ntiers):
+ new_uri = uri.replace('table:', 'file:tier' + str(i) + '_')
+ orig_session_create(session_self, new_uri, config)
+ tier_string = tier_string + '"' + new_uri + '", '
+ tier_config = 'type=tiered,tiered=(tiers=(' + tier_string[0:-2] + ')),' + base_config
+ WiredTigerTestCase.verbose(None, 3,
+ 'Creating tiered table {} with config = \'{}\''.format(uri, tier_config))
+ ret = orig_session_create(session_self, uri, tier_config)
+ else:
+ ret = orig_session_create(session_self, uri, config)
+ return ret
+
+# Called to replace Session.drop
+def session_drop_replace(ntiers, orig_session_drop, session_self, uri, config):
+ # Drop isn't implemented for tiered tables. Only do the delete if this could be a
+ # uri we created a tiered table for. Note this isn't a precise match for when we
+ # did/didn't create a tiered table, but we don't have the create config around to check.
+ ret = 0
+ if not uri.startswith("table:"):
+ ret = orig_session_drop(session_self, uri, config)
+ return ret
+
+# Called to replace Session.verify
+def session_verify_replace(ntiers, orig_session_verify, session_self, uri):
+ return 0
+
+# Every hook file must have one or more classes descended from WiredTigerHook
+# This is where the hook functions are 'hooked' to API methods.
+class TieredHookCreator(wthooks.WiredTigerHookCreator):
+ def __init__(self, ntiers=0):
+ # Argument specifies the number of tiers to test. The default is 2.
+ if ntiers == None:
+ self.ntiers = 2
+ else:
+ self.ntiers = int(ntiers)
+
+ # Is this test one we should skip? We skip tests of features supported on standard
+ # tables but not tiered tables, specififically cursor caching and checkpoint cursors.
+ def skip_test(self, test):
+ skip = ["bulk_backup",
+ "checkpoint",
+ "test_cursor13_big",
+ "test_cursor13_drops",
+ "test_cursor13_dup",
+ "test_cursor13_reopens"]
+ for item in skip:
+ if item in str(test):
+ return True
+ return False
+
+ # Remove tests that won't work on tiered cursors
+ def filter_tests(self, tests):
+ new_tests = unittest.TestSuite()
+ new_tests.addTests([t for t in tests if not self.skip_test(t)])
+ return new_tests
+
+ def setup_hooks(self):
+ orig_session_create = self.Session['create']
+ self.Session['create'] = (wthooks.HOOK_REPLACE, lambda s, uri, config:
+ session_create_replace(self.ntiers, orig_session_create, s, uri, config))
+
+ orig_session_drop = self.Session['drop']
+ self.Session['drop'] = (wthooks.HOOK_REPLACE, lambda s, uri, config:
+ session_drop_replace(self.ntiers, orig_session_drop, s, uri, config))
+
+ orig_session_verify = self.Session['verify']
+ self.Session['verify'] = (wthooks.HOOK_REPLACE, lambda s, uri:
+ session_verify_replace(self.ntiers, orig_session_verify, s, uri))
+
+# Every hook file must have a top level initialize function,
+# returning a list of WiredTigerHook objects.
+def initialize(arg):
+ return [TieredHookCreator(arg)]
diff --git a/src/third_party/wiredtiger/test/suite/run.py b/src/third_party/wiredtiger/test/suite/run.py
index a5ae88fa966..8d74b84259d 100755
--- a/src/third_party/wiredtiger/test/suite/run.py
+++ b/src/third_party/wiredtiger/test/suite/run.py
@@ -119,8 +119,10 @@ Options:\n\
be run without executing any.\n\
-g | --gdb all subprocesses (like calls to wt) use gdb\n\
-h | --help show this message\n\
+ | --hook name[=arg] set up hooks from hook_<name>.py, with optional arg\n\
-j N | --parallel N run all tests in parallel using N processes\n\
-l | --long run the entire test suite\n\
+ | --noremove do not remove WT_TEST or -D target before run\n\
-p | --preserve preserve output files in WT_TEST/<testname>\n\
-r N | --random-sample N randomly sort scenarios to be run, then\n\
execute every Nth (2<=N<=1000) scenario.\n\
@@ -306,6 +308,7 @@ def error(exitval, prefix, msg):
if __name__ == '__main__':
# Turn numbers and ranges into test module names
preserve = timestamp = debug = dryRun = gdbSub = lldbSub = longtest = ignoreStdout = False
+ removeAtStart = True
asan = False
parallel = 0
random_sample = 0
@@ -318,6 +321,7 @@ if __name__ == '__main__':
verbose = 1
args = sys.argv[1:]
testargs = []
+ hook_names = []
while len(args) > 0:
arg = args.pop(0)
from unittest import defaultTestLoader as loader
@@ -367,9 +371,18 @@ if __name__ == '__main__':
if option == '-help' or option == 'h':
usage()
sys.exit(0)
+ if option == '-hook':
+ if len(args) == 0:
+ usage()
+ sys.exit(2)
+ hook_names.append(args.pop(0))
+ continue
if option == '-long' or option == 'l':
longtest = True
continue
+ if option == '-noremove':
+ removeAtStart = False
+ continue
if option == '-random-sample' or option == 'r':
if len(args) == 0:
usage()
@@ -519,11 +532,13 @@ if __name__ == '__main__':
tests = unittest.TestSuite()
from testscenarios.scenarios import generate_scenarios
+ import wthooks
+ hookmgr = wthooks.WiredTigerHookManager(hook_names)
# All global variables should be set before any test classes are loaded.
# That way, verbose printing can be done at the class definition level.
- wttest.WiredTigerTestCase.globalSetup(preserve, timestamp, gdbSub, lldbSub,
- verbose, wt_builddir, dirarg,
- longtest, ignoreStdout, seedw, seedz)
+ wttest.WiredTigerTestCase.globalSetup(preserve, removeAtStart, timestamp, gdbSub, lldbSub,
+ verbose, wt_builddir, dirarg, longtest,
+ ignoreStdout, seedw, seedz, hookmgr)
# Without any tests listed as arguments, do discovery
if len(testargs) == 0:
@@ -542,6 +557,7 @@ if __name__ == '__main__':
for arg in testargs:
testsFromArg(tests, loader, arg, scenario)
+ tests = hookmgr.filter_tests(tests)
# Shuffle the tests and create a new suite containing every Nth test from
# the original suite
if random_sample > 0:
diff --git a/src/third_party/wiredtiger/test/suite/test_backup21.py b/src/third_party/wiredtiger/test/suite/test_backup21.py
new file mode 100644
index 00000000000..42e2405c22c
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_backup21.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import queue, threading, wiredtiger, wttest
+from wtbackup import backup_base
+from wtscenario import make_scenarios
+from wtthread import op_thread
+
+# test_backup21.py
+# Run create/drop operations while backup is ongoing.
+class test_backup21(backup_base):
+ # Backup directory name.
+ dir = 'backup.dir'
+ uri = 'test_backup21'
+ ops = 50
+ key_fmt = "S"
+
+ def test_concurrent_operations_with_backup(self):
+ done = threading.Event()
+ table_uri = 'table:' + self.uri
+
+ # Create and populate the table.
+ self.session.create(table_uri, "key_format=S,value_format=S")
+ self.add_data(table_uri, 'key', 'value', True)
+
+ work_queue = queue.Queue()
+ t = op_thread(self.conn, [table_uri], self.key_fmt, work_queue, done)
+ try:
+ t.start()
+ # Place create or drop operation into work queue.
+ iteration = 0
+ op = 't'
+ for _ in range(0, self.ops):
+ # Open backup cursor.
+ bkup_c = self.session.open_cursor('backup:', None, None)
+ work_queue.put_nowait((op, str(iteration), 'value'))
+
+ all_files = self.take_full_backup(self.dir, bkup_c)
+ if op == 't':
+ # Newly created table shouldn't be present in backup.
+ self.assertTrue(self.uri + str(iteration) + ".wt" not in all_files)
+ iteration = iteration + 1
+ else:
+ # Dropped table should still be present in backup.
+ self.assertTrue(self.uri + str(iteration) + ".wt" in all_files)
+ iteration = iteration + 1
+ bkup_c.close()
+ # Once we reach midway point, start drop operations.
+ if iteration == self.ops/2:
+ iteration = 0
+ op = 'd'
+ except:
+ # Deplete the work queue if there's an error.
+ while not work_queue.empty():
+ work_queue.get()
+ work_queue.task_done()
+ raise
+ finally:
+ work_queue.join()
+ done.set()
+ t.join()
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_hs21.py b/src/third_party/wiredtiger/test/suite/test_hs21.py
new file mode 100644
index 00000000000..e2c8885661f
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_hs21.py
@@ -0,0 +1,200 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import time, re
+import wiredtiger, wttest
+from wtdataset import SimpleDataSet
+from wiredtiger import stat
+
+def timestamp_str(t):
+ return '%x' % t
+
+# test_hs21.py
+# Test we don't lose any data when idle files with an active history are closed/sweeped.
+# Files with active history, ie content newer than the oldest timestamp can be closed when idle.
+# We want to ensure that when an active history file is idle closed we can continue reading the
+# correct version of data and their base write generation hasn't changed (since we haven't
+# restarted the system).
+class test_hs21(wttest.WiredTigerTestCase):
+ # Configure handle sweeping to occur within a specific amount of time.
+ conn_config = 'file_manager=(close_handle_minimum=0,close_idle_time=2,close_scan_interval=1),' + \
+ 'statistics=(all),operation_tracking=(enabled=false)'
+ session_config = 'isolation=snapshot'
+ file_name = 'test_hs21'
+ numfiles = 10
+ nrows = 10000
+
+ def large_updates(self, uri, value, ds, nrows, commit_ts):
+ # Update a large number of records, we'll hang if the history store table isn't working.
+ session = self.session
+ cursor = session.open_cursor(uri)
+ session.begin_transaction()
+ for i in range(1, nrows + 1):
+ cursor[ds.key(i)] = value
+ session.commit_transaction('commit_timestamp=' + timestamp_str(commit_ts))
+ cursor.close()
+
+ def check(self, session, check_value, uri, nrows, read_ts=-1):
+ # Validate we read an expected value (optionally at a given read timestamp).
+ if read_ts != -1:
+ session.begin_transaction('read_timestamp=' + timestamp_str(read_ts))
+ cursor = session.open_cursor(uri)
+ count = 0
+ for k, v in cursor:
+ self.assertEqual(v, check_value)
+ count += 1
+ if read_ts != -1:
+ session.rollback_transaction()
+ self.assertEqual(count, nrows)
+ cursor.close()
+
+ def parse_run_write_gen(self, uri):
+ meta_cursor = self.session.open_cursor('metadata:')
+ config = meta_cursor[uri]
+ meta_cursor.close()
+ # The search string will look like: 'run_write_gen=<num>'.
+ # Just reverse the string and take the digits from the back until we hit '='.
+ write_gen = re.search('run_write_gen=\d+', config)
+ self.assertTrue(write_gen is not None)
+ write_gen_str = str()
+ for c in reversed(write_gen.group(0)):
+ if not c.isdigit():
+ self.assertEqual(c, '=')
+ break
+ write_gen_str = c + write_gen_str
+ return int(write_gen_str)
+
+ def test_hs(self):
+ active_files = []
+ value1 = 'a' * 500
+ value2 = 'd' * 500
+
+ # Set up 'numfiles' with 'numrows' entries. We want to create a number of files that
+ # contain active history (content newer than the oldest timestamp).
+ for f in range(self.numfiles):
+ table_uri = 'table:%s.%d' % (self.file_name, f)
+ file_uri = 'file:%s.%d.wt' % (self.file_name, f)
+ # Create a small table.
+ ds = SimpleDataSet(
+ self, table_uri, 0, key_format='S', value_format='S', config='log=(enabled=false)')
+ ds.populate()
+ # Checkpoint to ensure we write the files metadata checkpoint value.
+ self.session.checkpoint()
+ # Get the base write gen of the file so we can compare after the handles get closed.
+ base_write_gen = self.parse_run_write_gen(file_uri)
+ active_files.append((base_write_gen, ds))
+
+ # Pin oldest and stable to timestamp 1.
+ self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(1) +
+ ',stable_timestamp=' + timestamp_str(1))
+
+ # Perform a series of updates over our files at timestamp 2. This being data we can later assert
+ # to ensure the history store is working as intended.
+ for (_, ds) in active_files:
+ # Load data at timestamp 2.
+ self.large_updates(ds.uri, value1, ds, self.nrows // 2 , 2)
+
+ # We want to create a long running read transaction in a seperate session which we will persist over the closing and
+ # re-opening of handles. We want to ensure the correct data gets read throughout this time period.
+ session_read = self.conn.open_session()
+ session_read.begin_transaction('read_timestamp=' + timestamp_str(2))
+ # Check our inital set of updates are seen at the read timestamp.
+ for (_, ds) in active_files:
+ # Check that all updates at timestamp 2 are seen.
+ self.check(session_read, value1, ds.uri, self.nrows // 2)
+
+ # Perform a series of updates over over files at a later timestamp. Checking the history store data is consistent
+ # with old and new timestamps.
+ for (_, ds) in active_files:
+ # Load more data with a later timestamp.
+ self.large_updates(ds.uri, value2, ds, self.nrows, 100)
+ # Check that the new updates are only seen after the update timestamp.
+ self.check(self.session, value1, ds.uri, self.nrows // 2, 2)
+ self.check(self.session, value2, ds.uri, self.nrows, 100)
+
+ # Our sweep scan interval is every 1 second and the amount of idle time needed for a handle to be closed is 2 seconds.
+ # It should take roughly 3 seconds for the sweep server to close our file handles. Lets wait at least double
+ # that to be safe.
+ max = 6
+ sleep = 0
+ # After waiting for the sweep server to remove our idle handles, the only open
+ # handles that should be the metadata file, history store file and lock file.
+ final_numfiles = 3
+ # Open the stats cursor to collect the dhandle sweep status.
+ stat_cursor = self.session.open_cursor('statistics:', None, None)
+ while sleep < max:
+ # We continue doing checkpoints which as a side effect runs the session handle sweep. This encouraging the idle
+ # handles get removed.
+ # Note, though checkpointing blocks sweeping, the checkpoint should be fast and not add too much extra time to the
+ # overall test time.
+ self.session.checkpoint()
+ sleep += 0.5
+ time.sleep(0.5)
+ stat_cursor.reset()
+ curr_files_open = stat_cursor[stat.conn.file_open][2]
+ curr_dhandles_removed = stat_cursor[stat.conn.dh_sweep_remove][2]
+ curr_dhandle_sweep_closes = stat_cursor[stat.conn.dh_sweep_close][2]
+
+ self.printVerbose(3, "==== loop " + str(sleep))
+ self.printVerbose(3, "Number of files open: " + str(curr_files_open))
+ self.printVerbose(3, "Number of connection sweep dhandles closed: " + str(curr_dhandle_sweep_closes))
+ self.printVerbose(3, "Number of connection sweep dhandles removed from hashlist: " + str(curr_dhandles_removed))
+
+ # We've sweeped all the handles we can if we are left with the number of final dhandles
+ # that we expect to be always open.
+ if curr_files_open == final_numfiles and curr_dhandle_sweep_closes >= self.numfiles:
+ break
+
+ stat_cursor.reset()
+ final_dhandle_sweep_closes = stat_cursor[stat.conn.dh_sweep_close][2]
+ stat_cursor.close()
+ # We want to assert our active history files have all been closed.
+ self.assertGreaterEqual(final_dhandle_sweep_closes, self.numfiles)
+
+ # Using our long running read transaction, we want to now check the correct data can still be read after the
+ # handles have been closed.
+ for (_, ds) in active_files:
+ # Check that all updates at timestamp 2 are seen.
+ self.check(session_read, value1, ds.uri, self.nrows // 2)
+ session_read.rollback_transaction()
+
+ # Perform a series of checks over our files to ensure that our transactions have been written
+ # before the dhandles were closed/sweeped.
+ # Also despite the dhandle is being re-opened, we don't expect the base write generation
+ # to have changed since we haven't actually restarted the system.
+ for idx, (initial_base_write_gen, ds) in enumerate(active_files):
+ # Check that the most recent transaction has the correct data.
+ self.check(self.session, value2, ds.uri, self.nrows, 100)
+ file_uri = 'file:%s.%d.wt' % (self.file_name, idx)
+ # Get the current base_write_gen and ensure it hasn't changed since being
+ # closed.
+ base_write_gen = self.parse_run_write_gen(file_uri)
+ self.assertEqual(initial_base_write_gen, base_write_gen)
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_hs22.py b/src/third_party/wiredtiger/test/suite/test_hs22.py
new file mode 100644
index 00000000000..cf30767b8bc
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_hs22.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+
+def timestamp_str(t):
+ return '%x' % t
+
+# test_hs22.py
+# Test the case that out of order timestamp
+# update is followed by a tombstone.
+class test_hs22(wttest.WiredTigerTestCase):
+ conn_config = 'cache_size=50MB'
+ session_config = 'isolation=snapshot'
+
+ def test_onpage_out_of_order_timestamp_update(self):
+ uri = 'table:test_hs22'
+ self.session.create(uri, 'key_format=S,value_format=S')
+ cursor = self.session.open_cursor(uri)
+ self.conn.set_timestamp(
+ 'oldest_timestamp=' + timestamp_str(1) + ',stable_timestamp=' + timestamp_str(1))
+
+ value1 = 'a'
+ value2 = 'b'
+
+ # Insert a key.
+ self.session.begin_transaction()
+ cursor[str(0)] = value1
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(10))
+
+ # Remove the key.
+ self.session.begin_transaction()
+ cursor.set_key(str(0))
+ self.assertEqual(cursor.remove(), 0)
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(20))
+
+ # Do an out of order timestamp
+ # update and write it to the data
+ # store later.
+ self.session.begin_transaction()
+ cursor[str(0)] = value2
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(15))
+
+ # Insert another key.
+ self.session.begin_transaction()
+ cursor[str(1)] = value1
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(20))
+
+ # Update the key.
+ self.session.begin_transaction()
+ cursor[str(1)] = value2
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(30))
+
+ # Do a checkpoint to trigger
+ # history store reconciliation.
+ self.session.checkpoint()
+
+ evict_cursor = self.session.open_cursor(uri, None, "debug=(release_evict)")
+
+ # Search the key to evict it.
+ self.session.begin_transaction("read_timestamp=" + timestamp_str(15))
+ self.assertEqual(evict_cursor[str(0)], value2)
+ self.assertEqual(evict_cursor.reset(), 0)
+ self.session.rollback_transaction()
+
+ # Search the key again to verify the data is still as expected.
+ self.session.begin_transaction("read_timestamp=" + timestamp_str(15))
+ self.assertEqual(cursor[str(0)], value2)
+ self.session.rollback_transaction()
+
+ def test_out_of_order_timestamp_update_newer_than_tombstone(self):
+ uri = 'table:test_hs22'
+ self.session.create(uri, 'key_format=S,value_format=S')
+ cursor = self.session.open_cursor(uri)
+ self.conn.set_timestamp(
+ 'oldest_timestamp=' + timestamp_str(1) + ',stable_timestamp=' + timestamp_str(1))
+
+ value1 = 'a'
+ value2 = 'b'
+
+ # Insert a key.
+ self.session.begin_transaction()
+ cursor[str(0)] = value1
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(10))
+
+ # Remove a key.
+ self.session.begin_transaction()
+ cursor.set_key(str(0))
+ self.assertEqual(cursor.remove(), 0)
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(20))
+
+ # Do an out of order timestamp
+ # update and write it to the
+ # history store later.
+ self.session.begin_transaction()
+ cursor[str(0)] = value2
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(15))
+
+ # Add another update.
+ self.session.begin_transaction()
+ cursor[str(0)] = value1
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(20))
+
+ # Insert another key.
+ self.session.begin_transaction()
+ cursor[str(1)] = value1
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(20))
+
+ # Update the key.
+ self.session.begin_transaction()
+ cursor[str(1)] = value2
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(30))
+
+ # Do a checkpoint to trigger
+ # history store reconciliation.
+ self.session.checkpoint()
+
+ evict_cursor = self.session.open_cursor(uri, None, "debug=(release_evict)")
+
+ # Search the key to evict it.
+ self.session.begin_transaction("read_timestamp=" + timestamp_str(15))
+ self.assertEqual(evict_cursor[str(0)], value2)
+ self.assertEqual(evict_cursor.reset(), 0)
+ self.session.rollback_transaction()
+
+ # Search the key again to verify the data is still as expected.
+ self.session.begin_transaction("read_timestamp=" + timestamp_str(15))
+ self.assertEqual(cursor[str(0)], value2)
+ self.session.rollback_transaction()
diff --git a/src/third_party/wiredtiger/test/suite/test_import10.py b/src/third_party/wiredtiger/test/suite/test_import10.py
index 0d56f799291..7715a2754ad 100644
--- a/src/third_party/wiredtiger/test/suite/test_import10.py
+++ b/src/third_party/wiredtiger/test/suite/test_import10.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
#
-# Public Domain 2014-2021 MongoDB, Inc.
+# Public Domain 2014-present MongoDB, Inc.
# Public Domain 2008-2014 WiredTiger, Inc.
#
# This is free and unencumbered software released into the public domain.
@@ -87,7 +87,7 @@ class test_import10(backup_base):
cursor.close()
all_files = self.take_full_backup(self.dir, bkup_c)
- self.assertTrue(self.uri + "wt" not in all_files)
+ self.assertTrue(self.uri + ".wt" not in all_files)
bkup_c.close()
if __name__ == '__main__':
diff --git a/src/third_party/wiredtiger/test/suite/test_prepare14.py b/src/third_party/wiredtiger/test/suite/test_prepare14.py
new file mode 100644
index 00000000000..fb32aefc713
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_prepare14.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wttest
+from wiredtiger import WT_NOTFOUND
+from wtscenario import make_scenarios
+
+def timestamp_str(t):
+ return '%x' % t
+
+# test_prepare14.py
+# Test that the transaction visibility of an on-disk update
+# that has both the start and the stop time points from the
+# same uncommitted prepared transaction.
+class test_prepare14(wttest.WiredTigerTestCase):
+ session_config = 'isolation=snapshot'
+
+ in_memory_values = [
+ ('no_inmem', dict(in_memory=False)),
+ ('inmem', dict(in_memory=True))
+ ]
+
+ key_format_values = [
+ ('column', dict(key_format='r')),
+ ('integer_row', dict(key_format='i')),
+ ]
+
+ scenarios = make_scenarios(in_memory_values, key_format_values)
+
+ def conn_config(self):
+ config = 'cache_size=50MB'
+ if self.in_memory:
+ config += ',in_memory=true'
+ else:
+ config += ',in_memory=false'
+ return config
+
+ def test_prepare14(self):
+ # Prepare transactions for column store table is not yet supported.
+ if self.key_format == 'r':
+ self.skipTest('Prepare transactions for column store table is not yet supported')
+
+ # Create a table without logging.
+ uri = "table:prepare14"
+ create_config = 'allocation_size=512,key_format=S,value_format=S'
+ self.session.create(uri, create_config)
+
+ # Pin oldest and stable timestamps to 10.
+ self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(10) +
+ ',stable_timestamp=' + timestamp_str(10))
+
+ value = 'a'
+
+ # Perform several updates and removes.
+ s = self.conn.open_session()
+ cursor = s.open_cursor(uri)
+ s.begin_transaction()
+ cursor[str(0)] = value
+ cursor.set_key(str(0))
+ cursor.remove()
+ cursor.close()
+ s.prepare_transaction('prepare_timestamp=' + timestamp_str(20))
+
+ # Configure debug behavior on a cursor to evict the page positioned on when the reset API is used.
+ evict_cursor = self.session.open_cursor(uri, None, "debug=(release_evict)")
+
+ # Search for the key so we position our cursor on the page that we want to evict.
+ self.session.begin_transaction("ignore_prepare = true")
+ evict_cursor.set_key(str(0))
+ self.assertEquals(evict_cursor.search(), WT_NOTFOUND)
+ evict_cursor.reset()
+ evict_cursor.close()
+ self.session.commit_transaction()
+
+ self.session.begin_transaction("ignore_prepare = true")
+ cursor2 = self.session.open_cursor(uri)
+ cursor2.set_key(str(0))
+ self.assertEquals(cursor2.search(), WT_NOTFOUND)
+ self.session.commit_transaction()
diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable01.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable01.py
index b3bf62f03ef..da5b6d1ca91 100755
--- a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable01.py
+++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable01.py
@@ -110,6 +110,7 @@ class test_rollback_to_stable_base(wttest.WiredTigerTestCase):
count += 1
session.commit_transaction()
self.assertEqual(count, nrows)
+ cursor.close()
# Test that rollback to stable clears the remove operation.
class test_rollback_to_stable01(test_rollback_to_stable_base):
diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable10.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable10.py
index 50a31085c38..8aefee48b16 100755
--- a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable10.py
+++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable10.py
@@ -207,6 +207,10 @@ class test_rollback_to_stable10(test_rollback_to_stable_base):
def test_rollback_to_stable_prepare(self):
nrows = 1000
+ # Prepare transactions for column store table is not yet supported.
+ if self.prepare and self.key_format == 'r':
+ self.skipTest('Prepare transactions for column store table is not yet supported')
+
# Create a table without logging.
self.pr("create/populate tables")
uri_1 = "table:rollback_to_stable10_1"
diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable14.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable14.py
index 20d0f9f7744..66614938d3c 100755
--- a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable14.py
+++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable14.py
@@ -179,7 +179,10 @@ class test_rollback_to_stable14(test_rollback_to_stable_base):
self.assertEqual(keys_removed, 0)
self.assertEqual(hs_restore_updates, nrows)
self.assertEqual(keys_restored, 0)
- self.assertEqual(upd_aborted, 0)
+ if self.prepare:
+ self.assertGreaterEqual(upd_aborted, 0)
+ else:
+ self.assertEqual(upd_aborted, 0)
self.assertGreater(pages_visited, 0)
self.assertGreaterEqual(hs_removed, nrows)
self.assertGreaterEqual(hs_sweep, 0)
@@ -196,6 +199,10 @@ class test_rollback_to_stable14(test_rollback_to_stable_base):
def test_rollback_to_stable_same_ts(self):
nrows = 1500
+ # Prepare transactions for column store table is not yet supported.
+ if self.prepare and self.key_format == 'r':
+ self.skipTest('Prepare transactions for column store table is not yet supported')
+
# Create a table without logging.
self.pr("create/populate table")
uri = "table:rollback_to_stable14"
@@ -277,7 +284,10 @@ class test_rollback_to_stable14(test_rollback_to_stable_base):
self.assertEqual(keys_removed, 0)
self.assertEqual(hs_restore_updates, nrows)
self.assertEqual(keys_restored, 0)
- self.assertEqual(upd_aborted, 0)
+ if self.prepare:
+ self.assertGreaterEqual(upd_aborted, 0)
+ else:
+ self.assertEqual(upd_aborted, 0)
self.assertGreater(pages_visited, 0)
self.assertGreaterEqual(hs_removed, nrows * 3)
self.assertGreaterEqual(hs_sweep, 0)
@@ -292,6 +302,10 @@ class test_rollback_to_stable14(test_rollback_to_stable_base):
def test_rollback_to_stable_same_ts_append(self):
nrows = 1500
+ # Prepare transactions for column store table is not yet supported.
+ if self.prepare and self.key_format == 'r':
+ self.skipTest('Prepare transactions for column store table is not yet supported')
+
# Create a table without logging.
self.pr("create/populate table")
uri = "table:rollback_to_stable14"
@@ -373,7 +387,10 @@ class test_rollback_to_stable14(test_rollback_to_stable_base):
self.assertEqual(keys_removed, 0)
self.assertEqual(hs_restore_updates, nrows)
self.assertEqual(keys_restored, 0)
- self.assertEqual(upd_aborted, 0)
+ if self.prepare:
+ self.assertGreaterEqual(upd_aborted, 0)
+ else:
+ self.assertEqual(upd_aborted, 0)
self.assertGreater(pages_visited, 0)
self.assertGreaterEqual(hs_removed, nrows * 3)
self.assertGreaterEqual(hs_sweep, 0)
diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable16.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable16.py
index 2539a4b88d0..0c0a3235e94 100644
--- a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable16.py
+++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable16.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
#
# Public Domain 2014-present MongoDB, Inc.
# Public Domain 2008-2014 WiredTiger, Inc.
diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable18.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable18.py
new file mode 100644
index 00000000000..68c2e8d0205
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable18.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import fnmatch, os, shutil, time
+from helper import simulate_crash_restart
+from test_rollback_to_stable01 import test_rollback_to_stable_base
+from wiredtiger import stat
+from wtdataset import SimpleDataSet
+from wtscenario import make_scenarios
+
+def timestamp_str(t):
+ return '%x' % t
+
+# test_rollback_to_stable18.py
+# Test the rollback to stable shouldn't skip any pages that don't have aggregated time window.
+class test_rollback_to_stable18(test_rollback_to_stable_base):
+ session_config = 'isolation=snapshot'
+
+ key_format_values = [
+ ('column', dict(key_format='r')),
+ ('integer_row', dict(key_format='i')),
+ ]
+
+ prepare_values = [
+ ('no_prepare', dict(prepare=False)),
+ ('prepare', dict(prepare=True))
+ ]
+
+ scenarios = make_scenarios(key_format_values, prepare_values)
+
+ def conn_config(self):
+ config = 'cache_size=50MB,in_memory=true,statistics=(all),log=(enabled=false),eviction_dirty_trigger=5,eviction_updates_trigger=5'
+ return config
+
+ def test_rollback_to_stable(self):
+ nrows = 10000
+
+ # Prepare transactions for column store table is not yet supported.
+ if self.prepare and self.key_format == 'r':
+ self.skipTest('Prepare transactions for column store table is not yet supported')
+
+ # Create a table without logging.
+ uri = "table:rollback_to_stable18"
+ ds = SimpleDataSet(
+ self, uri, 0, key_format=self.key_format, value_format="S", config='log=(enabled=false)')
+ ds.populate()
+
+ # Pin oldest and stable to timestamp 10.
+ self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(10) +
+ ',stable_timestamp=' + timestamp_str(10))
+
+ value_a = "aaaaa" * 100
+
+ # Perform several updates.
+ self.large_updates(uri, value_a, ds, nrows, self.prepare, 20)
+
+ # Perform several removes.
+ self.large_removes(uri, ds, nrows, self.prepare, 30)
+
+ # Verify data is visible and correct.
+ self.check(value_a, uri, nrows, 20)
+ self.check(None, uri, 0, 30)
+
+ # Configure debug behavior on a cursor to evict the page positioned on when the reset API is used.
+ evict_cursor = self.session.open_cursor(uri, None, "debug=(release_evict)")
+
+ # Search for the key so we position our cursor on the page that we want to evict.
+ evict_cursor.set_key(1)
+ evict_cursor.search()
+ evict_cursor.reset()
+ evict_cursor.close()
+
+ # Pin stable and oldest to timestamp 30 if prepare otherwise 20.
+ if self.prepare:
+ self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(30) +
+ ',stable_timestamp=' + timestamp_str(30))
+ else:
+ self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(20) +
+ ',stable_timestamp=' + timestamp_str(20))
+
+ # Perform rollback to stable.
+ self.conn.rollback_to_stable()
+
+ # Verify data is not visible.
+ self.check(value_a, uri, nrows, 30)
+
+ stat_cursor = self.session.open_cursor('statistics:', None, None)
+ calls = stat_cursor[stat.conn.txn_rts][2]
+ upd_aborted = stat_cursor[stat.conn.txn_rts_upd_aborted][2]
+ self.assertEqual(calls, 1)
+ self.assertEqual(upd_aborted, nrows)
diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable19.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable19.py
index 3a24113fa32..284499dae64 100644
--- a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable19.py
+++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable19.py
@@ -29,7 +29,7 @@
import fnmatch, os, shutil, time
from helper import simulate_crash_restart
from test_rollback_to_stable01 import test_rollback_to_stable_base
-from wiredtiger import stat
+from wiredtiger import stat, WT_NOTFOUND
from wtdataset import SimpleDataSet
from wtscenario import make_scenarios
@@ -102,11 +102,18 @@ class test_rollback_to_stable19(test_rollback_to_stable_base):
# Search for the key so we position our cursor on the page that we want to evict.
self.session.begin_transaction("ignore_prepare = true")
evict_cursor.set_key(1)
- evict_cursor.search()
+ self.assertEquals(evict_cursor.search(), WT_NOTFOUND)
evict_cursor.reset()
evict_cursor.close()
self.session.commit_transaction()
+ # Search to make sure the data is not visible
+ self.session.begin_transaction("ignore_prepare = true")
+ cursor2 = self.session.open_cursor(uri)
+ cursor2.set_key(1)
+ self.assertEquals(cursor2.search(), WT_NOTFOUND)
+ self.session.commit_transaction()
+
# Pin stable timestamp to 20.
self.conn.set_timestamp('stable_timestamp=' + timestamp_str(20))
if not self.in_memory:
@@ -175,11 +182,18 @@ class test_rollback_to_stable19(test_rollback_to_stable_base):
# Search for the key so we position our cursor on the page that we want to evict.
self.session.begin_transaction("ignore_prepare = true")
evict_cursor.set_key(1)
- evict_cursor.search()
+ self.assertEquals(evict_cursor.search(), WT_NOTFOUND)
evict_cursor.reset()
evict_cursor.close()
self.session.commit_transaction()
+ # Search to make sure the data is not visible
+ self.session.begin_transaction("ignore_prepare = true")
+ cursor2 = self.session.open_cursor(uri)
+ cursor2.set_key(1)
+ self.assertEquals(cursor2.search(), WT_NOTFOUND)
+ self.session.commit_transaction()
+
# Pin stable timestamp to 40.
self.conn.set_timestamp('stable_timestamp=' + timestamp_str(40))
if not self.in_memory:
diff --git a/src/third_party/wiredtiger/test/suite/test_search_near01.py b/src/third_party/wiredtiger/test/suite/test_search_near01.py
new file mode 100644
index 00000000000..2e54671c06c
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_search_near01.py
@@ -0,0 +1,330 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+
+import time, wiredtiger, wttest, unittest
+from wiredtiger import stat
+
+def timestamp_str(t):
+ return '%x' % t
+
+# test_search_near01.py
+# Test various prefix search near scenarios.
+class test_search_near01(wttest.WiredTigerTestCase):
+ conn_config = 'statistics=(all)'
+ session_config = 'isolation=snapshot'
+
+ def get_stat(self, stat, local_session = None):
+ if (local_session != None):
+ stat_cursor = local_session.open_cursor('statistics:')
+ else:
+ stat_cursor = self.session.open_cursor('statistics:')
+ val = stat_cursor[stat][2]
+ stat_cursor.close()
+ return val
+
+ def unique_insert(self, cursor, prefix, id, keys):
+ key = prefix + ',' + str(id)
+ keys.append(key)
+ cursor.set_key(prefix)
+ cursor.set_value(prefix)
+ self.assertEqual(cursor.insert(), 0)
+ cursor.set_key(prefix)
+ self.assertEqual(cursor.remove(), 0)
+ cursor.set_key(prefix)
+ cursor.search_near()
+ cursor.set_key(key)
+ cursor.set_value(key)
+ self.assertEqual(cursor.insert(), 0)
+
+ def test_base_scenario(self):
+ uri = 'table:test_base_scenario'
+ self.session.create(uri, 'key_format=u,value_format=u')
+ cursor = self.session.open_cursor(uri)
+ session2 = self.conn.open_session()
+ cursor3 = self.session.open_cursor(uri, None, "debug=(release_evict=true)")
+
+ # Basic character array.
+ l = "abcdefghijklmnopqrstuvwxyz"
+
+ # Start our older reader.
+ session2.begin_transaction()
+
+ key_count = 26*26*26
+ # Insert keys aaa -> zzz.
+ self.session.begin_transaction()
+ for i in range (0, 26):
+ for j in range (0, 26):
+ for k in range (0, 26):
+ cursor[l[i] + l[j] + l[k]] = l[i] + l[j] + l[k]
+ self.session.commit_transaction()
+
+ # Evict the whole range.
+ for i in range (0, 26):
+ for j in range(0, 26):
+ cursor3.set_key(l[i] + l[j] + 'a')
+ cursor3.search()
+ cursor3.reset()
+
+ # Search near for the "aa" part of the range.
+ cursor2 = session2.open_cursor(uri)
+ cursor2.set_key('aa')
+ cursor2.search_near()
+
+ skip_count = self.get_stat(stat.conn.cursor_next_skip_lt_100)
+ # This should be equal to roughly key_count * 2 as we're going to traverse the whole
+ # range forward, and then the whole range backwards.
+ self.assertGreater(skip_count, key_count * 2)
+
+ cursor2.reconfigure("prefix_key=true")
+ cursor2.set_key('aa')
+ cursor2.search_near()
+
+ prefix_skip_count = self.get_stat(stat.conn.cursor_next_skip_lt_100)
+ # We should've skipped ~26*2 here as we're only looking at the "aa" range * 2.
+ self.assertGreaterEqual(prefix_skip_count - skip_count, 26*2)
+ skip_count = prefix_skip_count
+
+ # The prefix code will have come into play at once as we walked to "aba". The prev
+ # traversal will go off the end of the file and as such we don't expect it to increment
+ # this statistic again.
+ self.assertEqual(self.get_stat(stat.conn.cursor_search_near_prefix_fast_paths), 1)
+
+ # Search for a key not at the start.
+ cursor2.set_key('bb')
+ cursor2.search_near()
+
+ # Assert it to have only incremented the skipped statistic ~26*2 times.
+ prefix_skip_count = self.get_stat(stat.conn.cursor_next_skip_lt_100)
+ self.assertGreaterEqual(prefix_skip_count - skip_count, 26*2)
+ skip_count = prefix_skip_count
+
+ # Here we should've hit the prefix fast path code twice. Plus the time we already did.
+ self.assertEqual(self.get_stat(stat.conn.cursor_search_near_prefix_fast_paths), 2+1)
+
+ cursor2.close()
+ cursor2 = session2.open_cursor(uri)
+ cursor2.set_key('bb')
+ cursor2.search_near()
+ # Assert that we've incremented the stat key_count times, as we closed the cursor and
+ # reopened it.
+ #
+ # This validates cursor caching logic, as if we don't clear the flag correctly this will
+ # fail.
+ #
+ # It should be closer to key_count * 2 but this an approximation.
+ prefix_skip_count = self.get_stat(stat.conn.cursor_next_skip_lt_100)
+ self.assertGreaterEqual(prefix_skip_count - skip_count, key_count)
+
+ # This test aims to simulate a unique index insertion.
+ def test_unique_index_case(self):
+ uri = 'table:test_unique_index_case'
+ self.session.create(uri, 'key_format=u,value_format=u')
+ cursor = self.session.open_cursor(uri)
+ session2 = self.conn.open_session()
+ cursor3 = self.session.open_cursor(uri, None, "debug=(release_evict=true)")
+ l = "abcdefghijklmnopqrstuvwxyz"
+
+ # A unique index has the following insertion method:
+ # 1. Insert the prefix
+ # 2. Remove the prefix
+ # 3. Search near for the prefix
+ # 4. Insert the full value
+ # All of these operations are wrapped in the same txn, this test attempts to test scenarios
+ # that could arise from this insertion method.
+
+ # A unique index key has the format (prefix, _id), we'll insert keys that look similar.
+
+ # Start our old reader txn.
+ session2.begin_transaction()
+
+ key_count = 26*26
+ id = 0
+ cc_id = 0
+ keys = []
+
+ # Insert keys aa,1 -> zz,N
+ for i in range (0, 26):
+ for j in range (0, 26):
+ # Skip inserting 'c'.
+ if (i == 2 and j == 2):
+ cc_id = id
+ id = id + 1
+ continue
+ self.session.begin_transaction()
+ prefix = l[i] + l[j]
+ self.unique_insert(cursor, prefix, id, keys)
+ id = id + 1
+ self.session.commit_transaction()
+
+ # Evict the whole range.
+ for i in keys:
+ cursor3.set_key(i)
+ cursor3.search()
+ cursor3.reset()
+
+ # Using our older reader attempt to find a value.
+ # Search near for the "cc" prefix.
+ cursor2 = session2.open_cursor(uri)
+ cursor2.set_key('cc')
+ cursor2.search_near()
+
+ skip_count = self.get_stat(stat.conn.cursor_next_skip_lt_100)
+ # This should be equal to roughly key_count * 2 as we're going to traverse most of the
+ # range forward, and then the whole range backwards.
+ self.assertGreater(skip_count, key_count * 2)
+
+ cursor2.reconfigure("prefix_key=true")
+ cursor2.set_key('cc')
+ cursor2.search_near()
+ self.assertEqual(self.get_stat(stat.conn.cursor_search_near_prefix_fast_paths), 2)
+
+ # This still isn't visible to our older reader and as such we expect this statistic to
+ # increment twice.
+ self.unique_insert(cursor2, 'cc', cc_id, keys)
+ self.assertEqual(self.get_stat(stat.conn.cursor_search_near_prefix_fast_paths), 4)
+
+ # In order for prefix key fast pathing to work we rely on some guarantees provided by row
+ # search. Test some of the guarantees.
+ def test_row_search(self):
+ uri = 'table:test_row_search'
+ self.session.create(uri, 'key_format=u,value_format=u')
+ cursor = self.session.open_cursor(uri)
+ session2 = self.conn.open_session()
+ l = "abcdefghijklmnopqrstuvwxyz"
+ # Insert keys a -> z, except c
+ self.session.begin_transaction()
+ for i in range (0, 26):
+ if (i == 2):
+ continue
+ cursor[l[i]] = l[i]
+ self.session.commit_transaction()
+ # Start our older reader transaction.
+ session2.begin_transaction()
+ # Insert a few keys in the 'c' range
+ self.session.begin_transaction()
+ cursor['c'] = 'c'
+ cursor['cc'] = 'cc'
+ cursor['ccc'] = 'ccc'
+ self.session.commit_transaction()
+ # Search_near for 'c' and assert we skip 3 entries. Internally the row search is landing on
+ # 'c'.
+ cursor2 = session2.open_cursor(uri)
+ cursor2.set_key('c')
+ cursor2.search_near()
+
+ skip_count = self.get_stat(stat.conn.cursor_next_skip_lt_100)
+ self.assertEqual(skip_count, 3)
+ session2.commit_transaction()
+
+ # Perform an insertion and removal of a key next to another key, then search for the
+ # removed key.
+ self.session.begin_transaction()
+ cursor.set_key('dd')
+ cursor.set_value('dd')
+ cursor.insert()
+ cursor.set_key('dd')
+ cursor.remove()
+ cursor.set_key('ddd')
+ cursor.set_value('ddd')
+ cursor.insert()
+ cursor.set_key('dd')
+ cursor.search_near()
+ self.session.commit_transaction()
+ skip_count = self.get_stat(stat.conn.cursor_next_skip_lt_100)
+ self.assertEqual(skip_count, 4)
+
+ # Test a basic prepared scenario.
+ def test_prepared(self):
+ uri = 'table:test_base_scenario'
+ self.session.create(uri, 'key_format=u,value_format=u')
+ cursor = self.session.open_cursor(uri)
+ session2 = self.conn.open_session()
+ cursor3 = session2.open_cursor(uri, None, "debug=(release_evict=true)")
+ # Insert an update without timestamp
+ l = "abcdefghijklmnopqrstuvwxyz"
+ session2.begin_transaction()
+
+ key_count = 26*26
+
+ # Insert 'cc'
+ self.session.begin_transaction()
+ cursor['cc'] = 'cc'
+ self.session.commit_transaction()
+
+ # Prepare keys aa -> zz
+ self.session.begin_transaction()
+ for i in range (0, 26):
+ if (i == 2):
+ continue
+ for j in range (0, 26):
+ cursor[l[i] + l[j]] = l[i] + l[j]
+
+ self.session.prepare_transaction('prepare_timestamp=2')
+
+ # Evict the whole range.
+ for i in range (0, 26):
+ for j in range(0, 26):
+ cursor3.set_key(l[i] + l[j])
+ cursor3.search()
+ cursor3.reset()
+
+ # Search near for the "aa" part of the range.
+ cursor2 = session2.open_cursor(uri)
+ cursor2.set_key('c')
+ cursor2.search_near()
+
+ skip_count = self.get_stat(stat.conn.cursor_next_skip_lt_100, session2)
+ # This should be equal to roughly key_count * 2 as we're going to traverse the whole
+ # range forward, and then the whole range backwards.
+ self.assertGreater(skip_count, key_count)
+
+ cursor2.reconfigure("prefix_key=true")
+ cursor2.set_key('c')
+ cursor2.search_near()
+
+ prefix_skip_count = self.get_stat(stat.conn.cursor_next_skip_lt_100, session2)
+ self.assertEqual(prefix_skip_count - skip_count, 3)
+ skip_count = prefix_skip_count
+
+ self.assertEqual(self.get_stat(stat.conn.cursor_search_near_prefix_fast_paths, session2), 2)
+
+ session2.rollback_transaction()
+ session2.begin_transaction('ignore_prepare=true')
+ cursor4 = session2.open_cursor(uri)
+ cursor4.reconfigure("prefix_key=true")
+ cursor4.set_key('c')
+ cursor4.search_near()
+ prefix_skip_count = self.get_stat(stat.conn.cursor_next_skip_lt_100, session2)
+ self.assertEqual(prefix_skip_count - skip_count, 2)
+ skip_count = prefix_skip_count
+
+ cursor4.reconfigure("prefix_key=false")
+ cursor4.set_key('c')
+ cursor4.search_near()
+ self.assertEqual(self.get_stat(stat.conn.cursor_next_skip_lt_100, session2) - skip_count, 2)
diff --git a/src/third_party/wiredtiger/test/suite/test_tiered01.py b/src/third_party/wiredtiger/test/suite/test_tiered01.py
deleted file mode 100644
index 8356f066d81..00000000000
--- a/src/third_party/wiredtiger/test/suite/test_tiered01.py
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/usr/bin/env python
-#
-# Public Domain 2014-present MongoDB, Inc.
-# Public Domain 2008-2014 WiredTiger, Inc.
-#
-# This is free and unencumbered software released into the public domain.
-#
-# Anyone is free to copy, modify, publish, use, compile, sell, or
-# distribute this software, either in source code form or as a compiled
-# binary, for any purpose, commercial or non-commercial, and by any
-# means.
-#
-# In jurisdictions that recognize copyright laws, the author or authors
-# of this software dedicate any and all copyright interest in the
-# software to the public domain. We make this dedication for the benefit
-# of the public at large and to the detriment of our heirs and
-# successors. We intend this dedication to be an overt act of
-# relinquishment in perpetuity of all present and future rights to this
-# software under copyright law.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-
-import wiredtiger, wtscenario, wttest
-from wtdataset import SimpleDataSet
-
-# test_tiered01.py
-# Basic tiered tree test
-class test_tiered01(wttest.WiredTigerTestCase):
- K = 1024
- M = 1024 * K
- G = 1024 * M
- uri = "table:test_tiered01"
-
- # Occasionally add a lot of records.
- record_count_scenarios = wtscenario.quick_scenarios(
- 'nrecs', [10, 10000], [0.9, 0.1])
-
- config_vars = []
-
- scenarios = wtscenario.make_scenarios(record_count_scenarios, prune=100, prunelong=500)
-
- # Test create of an object.
- def test_tiered(self):
- self.session.create('file:first.wt', 'key_format=S')
- self.session.create('file:second.wt', 'key_format=S')
- args = 'type=tiered,key_format=S'
- args += ',tiered=(' # Start the tiered configuration options.
- args += 'tiers=("file:first.wt", "file:second.wt"),'
- # add names to args, e.g. args += ',session_max=30'
- for var in self.config_vars:
- value = getattr(self, 's_' + var)
- if value != None:
- if var == 'verbose':
- value = '[' + str(value) + ']'
- value = {True : 'true', False : 'false'}.get(value, value)
- args += ',' + var + '=' + str(value)
- args += ')' # Close the tiered configuration option group
- self.verbose(3,
- 'Test tiered with config: ' + args + ' count: ' + str(self.nrecs))
- SimpleDataSet(self, self.uri, self.nrecs, config=args).populate()
-
- # self.session.drop(self.uri)
-
- # It is an error to configure a tiered table with no tiers
- def test_no_tiers(self):
- msg = '/tiered table must specify at least one tier/'
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.session.create(self.uri, 'type=tiered,key_format=S,tiered=(tiers=())'),
- msg)
-
-if __name__ == '__main__':
- wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_tiered02.py b/src/third_party/wiredtiger/test/suite/test_tiered02.py
index 3317ecdb5b6..4b638a4015f 100644..100755
--- a/src/third_party/wiredtiger/test/suite/test_tiered02.py
+++ b/src/third_party/wiredtiger/test/suite/test_tiered02.py
@@ -26,7 +26,7 @@
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
-import wiredtiger, wtscenario, wttest
+import os, wiredtiger, wtscenario, wttest
from wtdataset import SimpleDataSet
# test_tiered02.py
@@ -37,27 +37,83 @@ class test_tiered02(wttest.WiredTigerTestCase):
G = 1024 * M
uri = "file:test_tiered02"
- # Occasionally add a lot of records, so that merges (and bloom) happen.
- record_count_scenarios = wtscenario.quick_scenarios(
- 'nrecs', [10, 10000], [0.9, 0.1])
+ auth_token = "test_token"
+ bucket = "mybucket"
+ bucket_prefix = "pfx_"
+ extension_name = "local_store"
- scenarios = wtscenario.make_scenarios(record_count_scenarios, prune=100, prunelong=500)
+ def conn_config(self):
+ os.makedirs(self.bucket, exist_ok=True)
+ return \
+ 'tiered_storage=(auth_token={},bucket={},bucket_prefix={},name={})'.format( \
+ self.auth_token, self.bucket, self.bucket_prefix, self.extension_name)
- # Test drop of an object.
+ # Load the local store extension, but skip the test if it is missing.
+ def conn_extensions(self, extlist):
+ extlist.skip_if_missing = True
+ extlist.extension('storage_sources', self.extension_name)
+
+ def confirm_flush(self, increase=True):
+ # TODO: tiered: flush tests disabled, as the interface
+ # for flushing will be changed.
+ return
+
+ self.flushed_objects
+ got = sorted(list(os.listdir(self.bucket)))
+ self.pr('Flushed objects: ' + str(got))
+ if increase:
+ self.assertGreater(len(got), self.flushed_objects)
+ else:
+ self.assertEqual(len(got), self.flushed_objects)
+ self.flushed_objects = len(got)
+
+ # Test tiered storage with the old prototype way of signaling flushing to the shared
+ # tier via checkpoints. When flush_tier is working, the checkpoint calls can be
+ # replaced with flush_tier.
def test_tiered(self):
+ self.flushed_objects = 0
args = 'key_format=S,block_allocation=log-structured'
- self.verbose(3,
- 'Test log-structured allocation with config: ' + args + ' count: ' + str(self.nrecs))
- #ds = SimpleDataSet(self, self.uri, self.nrecs, config=args)
+ self.verbose(3, 'Test log-structured allocation with config: ' + args)
+
ds = SimpleDataSet(self, self.uri, 10, config=args)
ds.populate()
+ ds.check()
self.session.checkpoint()
- ds = SimpleDataSet(self, self.uri, 10000, config=args)
+ # For some reason, every checkpoint does not cause a flush.
+ # As we're about to move to a new model of flushing, we're not going to chase this error.
+ #self.confirm_flush()
+
+ ds = SimpleDataSet(self, self.uri, 50, config=args)
ds.populate()
+ ds.check()
+ self.session.checkpoint()
+ self.confirm_flush()
+
+ ds = SimpleDataSet(self, self.uri, 100, config=args)
+ ds.populate()
+ ds.check()
+ self.session.checkpoint()
+ self.confirm_flush()
+
+ ds = SimpleDataSet(self, self.uri, 200, config=args)
+ ds.populate()
+ ds.check()
+ self.close_conn()
+ self.confirm_flush() # closing the connection does a checkpoint
self.reopen_conn()
- ds = SimpleDataSet(self, self.uri, 1000, config=args)
+ # Check what was there before
+ ds = SimpleDataSet(self, self.uri, 200, config=args)
+ ds.check()
+
+ # Now add some more.
+ ds = SimpleDataSet(self, self.uri, 300, config=args)
ds.populate()
+ ds.check()
+
+ # We haven't done a checkpoint/flush so there should be
+ # nothing extra on the shared tier.
+ self.confirm_flush(increase=False)
if __name__ == '__main__':
wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_tiered04.py b/src/third_party/wiredtiger/test/suite/test_tiered04.py
index f4007f4df49..0347647031f 100644..100755
--- a/src/third_party/wiredtiger/test/suite/test_tiered04.py
+++ b/src/third_party/wiredtiger/test/suite/test_tiered04.py
@@ -33,14 +33,22 @@ StorageSource = wiredtiger.StorageSource # easy access to constants
# test_tiered04.py
# Basic tiered storage API test.
class test_tiered04(wttest.WiredTigerTestCase):
- uri = "table:test_tiered04_sys"
- uri1 = "table:test_tiered04"
+
+ # If the 'uri' changes all the other names must change with it.
+ fileuri = 'file:test_tiered04-0000000001.wt'
+ objuri = 'object:test_tiered04-0000000001.wtobj'
+ tiereduri = "tiered:test_tiered04"
+ uri = "table:test_tiered04"
+
+ uri1 = "table:test_other_tiered04"
uri_none = "table:test_local04"
auth_token = "test_token"
bucket = "mybucket"
bucket1 = "otherbucket"
extension_name = "local_store"
+ prefix = "this_pfx"
+ prefix1 = "other_pfx"
object_sys = "9M"
object_sys_val = 9 * 1024 * 1024
object_uri = "15M"
@@ -48,10 +56,13 @@ class test_tiered04(wttest.WiredTigerTestCase):
retention = 600
retention1 = 350
def conn_config(self):
+ os.mkdir(self.bucket)
+ os.mkdir(self.bucket1)
return \
'statistics=(all),' + \
'tiered_storage=(auth_token=%s,' % self.auth_token + \
'bucket=%s,' % self.bucket + \
+ 'bucket_prefix=%s,' % self.prefix + \
'local_retention=%d,' % self.retention + \
'name=%s,' % self.extension_name + \
'object_target_size=%s)' % self.object_sys
@@ -61,8 +72,18 @@ class test_tiered04(wttest.WiredTigerTestCase):
extlist.skip_if_missing = True
extlist.extension('storage_sources', self.extension_name)
+ # Check for a specific string as part of the uri's metadata.
+ def check_metadata(self, uri, val_str):
+ c = self.session.open_cursor('metadata:')
+ val = c[uri]
+ c.close()
+ self.assertTrue(val_str in val)
+
def get_stat(self, stat, uri):
- stat_cursor = self.session.open_cursor('statistics:' + uri)
+ if uri == None:
+ stat_cursor = self.session.open_cursor('statistics:')
+ else:
+ stat_cursor = self.session.open_cursor('statistics:' + uri)
val = stat_cursor[stat][2]
stat_cursor.close()
return val
@@ -72,48 +93,73 @@ class test_tiered04(wttest.WiredTigerTestCase):
# Create three tables. One using the system tiered storage, one
# specifying its own bucket and object size and one using no
# tiered storage. Use stats to verify correct setup.
- base_create = 'key_format=S'
+ intl_page = 'internal_page_max=16K'
+ base_create = 'key_format=S,' + intl_page
+ self.pr("create sys")
self.session.create(self.uri, base_create)
conf = \
',tiered_storage=(auth_token=%s,' % self.auth_token + \
'bucket=%s,' % self.bucket1 + \
+ 'bucket_prefix=%s,' % self.prefix1 + \
'local_retention=%d,' % self.retention1 + \
'name=%s,' % self.extension_name + \
'object_target_size=%s)' % self.object_uri
+ self.pr("create non-sys tiered")
self.session.create(self.uri1, base_create + conf)
conf = ',tiered_storage=(name=none)'
+ self.pr("create non tiered/local")
self.session.create(self.uri_none, base_create + conf)
- # Verify the table settings.
- obj = self.get_stat(stat.dsrc.tiered_object_size, self.uri)
+ #self.pr("open cursor")
+ #c = self.session.open_cursor(self.uri)
+ self.pr("flush tier")
+ self.session.flush_tier(None)
+
+ self.pr("flush tier again")
+ self.session.flush_tier(None)
+ calls = self.get_stat(stat.conn.flush_tier, None)
+ self.assertEqual(calls, 2)
+ obj = self.get_stat(stat.conn.tiered_object_size, None)
self.assertEqual(obj, self.object_sys_val)
- obj = self.get_stat(stat.dsrc.tiered_object_size, self.uri1)
- self.assertEqual(obj, self.object_uri_val)
- obj = self.get_stat(stat.dsrc.tiered_object_size, self.uri_none)
- self.assertEqual(obj, 0)
- retain = self.get_stat(stat.dsrc.tiered_retention, self.uri)
- self.assertEqual(retain, self.retention)
- retain = self.get_stat(stat.dsrc.tiered_retention, self.uri1)
- self.assertEqual(retain, self.retention1)
- retain = self.get_stat(stat.dsrc.tiered_retention, self.uri_none)
- self.assertEqual(retain, 0)
+ self.check_metadata(self.tiereduri, intl_page)
+ self.check_metadata(self.fileuri, intl_page)
+ self.check_metadata(self.objuri, intl_page)
+
+ #self.pr("verify stats")
+ # Verify the table settings.
+ #obj = self.get_stat(stat.dsrc.tiered_object_size, self.uri)
+ #self.assertEqual(obj, self.object_sys_val)
+ #obj = self.get_stat(stat.dsrc.tiered_object_size, self.uri1)
+ #self.assertEqual(obj, self.object_uri_val)
+ #obj = self.get_stat(stat.dsrc.tiered_object_size, self.uri_none)
+ #self.assertEqual(obj, 0)
+
+ #retain = self.get_stat(stat.dsrc.tiered_retention, self.uri)
+ #self.assertEqual(retain, self.retention)
+ #retain = self.get_stat(stat.dsrc.tiered_retention, self.uri1)
+ #self.assertEqual(retain, self.retention1)
+ #retain = self.get_stat(stat.dsrc.tiered_retention, self.uri_none)
+ #self.assertEqual(retain, 0)
# Now test some connection statistics with operations.
- retain = self.get_stat(stat.conn.tiered_retention, '')
+ retain = self.get_stat(stat.conn.tiered_retention, None)
self.assertEqual(retain, self.retention)
self.session.flush_tier(None)
self.session.flush_tier('force=true')
- calls = self.get_stat(stat.conn.flush_tier, '')
- self.assertEqual(calls, 2)
+ calls = self.get_stat(stat.conn.flush_tier, None)
+ self.assertEqual(calls, 4)
+
+ # Test reconfiguration.
new = self.retention * 2
config = 'tiered_storage=(local_retention=%d)' % new
+ self.pr("reconfigure")
self.conn.reconfigure(config)
self.session.flush_tier(None)
- retain = self.get_stat(stat.conn.tiered_retention, '')
- calls = self.get_stat(stat.conn.flush_tier, '')
+ retain = self.get_stat(stat.conn.tiered_retention, None)
+ calls = self.get_stat(stat.conn.flush_tier, None)
self.assertEqual(retain, new)
- self.assertEqual(calls, 3)
+ self.assertEqual(calls, 5)
if __name__ == '__main__':
wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_tiered05.py b/src/third_party/wiredtiger/test/suite/test_tiered05.py
index 098c079fb3d..5cbfe4366c7 100644..100755
--- a/src/third_party/wiredtiger/test/suite/test_tiered05.py
+++ b/src/third_party/wiredtiger/test/suite/test_tiered05.py
@@ -36,17 +36,23 @@ class test_tiered05(wttest.WiredTigerTestCase):
uri = "table:test_tiered05"
auth_token = "test_token"
+ bucket = "my_bucket"
+ bucket_prefix = "my_prefix"
extension_name = "local_store"
+ bucket = "./objects"
def conn_extensions(self, extlist):
extlist.skip_if_missing = True
extlist.extension('storage_sources', self.extension_name)
def conn_config(self):
+ os.mkdir(self.bucket)
return \
'statistics=(fast),' + \
'tiered_manager=(wait=10),' + \
'tiered_storage=(auth_token=%s,' % self.auth_token + \
+ 'bucket=%s,' % self.bucket + \
+ 'bucket_prefix=%s,' % self.bucket_prefix + \
'name=%s,' % self.extension_name + \
'object_target_size=20M)'
diff --git a/src/third_party/wiredtiger/test/suite/test_tiered06.py b/src/third_party/wiredtiger/test/suite/test_tiered06.py
index aba6b8a81b2..e0614cd8c1b 100755
--- a/src/third_party/wiredtiger/test/suite/test_tiered06.py
+++ b/src/third_party/wiredtiger/test/suite/test_tiered06.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
#
-# Public Domain 2014-2020 MongoDB, Inc.
+# Public Domain 2014-present MongoDB, Inc.
# Public Domain 2008-2014 WiredTiger, Inc.
#
# This is free and unencumbered software released into the public domain.
@@ -27,7 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
import os, wiredtiger, wttest
-StorageSource = wiredtiger.StorageSource # easy access to constants
+FileSystem = wiredtiger.FileSystem # easy access to constants
# test_tiered06.py
# Test the local storage source.
@@ -64,58 +64,79 @@ class test_tiered06(wttest.WiredTigerTestCase):
local = self.get_local_storage_source()
os.mkdir("objects")
- location = local.ss_location_handle(session,
- 'cluster="cluster1",bucket="./objects",auth_token="Secret"')
+ fs = local.ss_customize_file_system(session, "./objects", "cluster1-", "Secret", None)
# The object doesn't exist yet.
- self.assertFalse(local.ss_exist(session, location, 'foobar'))
+ self.assertFalse(fs.fs_exist(session, 'foobar'))
- fh = local.ss_open_object(session, location, 'foobar', StorageSource.open_create)
+ fh = fs.fs_open_file(session, 'foobar', FileSystem.open_file_type_data, FileSystem.open_create)
+
+ # Just like a regular file system, the object exists now.
+ self.assertTrue(fs.fs_exist(session, 'foobar'))
outbytes = ('MORE THAN ENOUGH DATA\n'*100000).encode()
fh.fh_write(session, 0, outbytes)
- # The object doesn't even exist now.
- self.assertFalse(local.ss_exist(session, location, 'foobar'))
-
# The object exists after close
fh.close(session)
- self.assertTrue(local.ss_exist(session, location, 'foobar'))
+ self.assertTrue(fs.fs_exist(session, 'foobar'))
- fh = local.ss_open_object(session, location, 'foobar', StorageSource.open_readonly)
+ fh = fs.fs_open_file(session, 'foobar', FileSystem.open_file_type_data, FileSystem.open_readonly)
inbytes = bytes(1000000) # An empty buffer with a million zero bytes.
fh.fh_read(session, 0, inbytes) # read into the buffer
self.assertEquals(outbytes[0:1000000], inbytes)
- self.assertEquals(local.ss_size(session, location, 'foobar'), len(outbytes))
+ self.assertEquals(fs.fs_size(session, 'foobar'), len(outbytes))
self.assertEquals(fh.fh_size(session), len(outbytes))
fh.close(session)
# The fh_lock call doesn't do anything in the local store implementation.
- fh = local.ss_open_object(session, location, 'foobar', StorageSource.open_readonly)
+ fh = fs.fs_open_file(session, 'foobar', FileSystem.open_file_type_data, FileSystem.open_readonly)
fh.fh_lock(session, True)
fh.fh_lock(session, False)
fh.close(session)
- self.assertEquals(local.ss_location_list(session, location, '', 0), ['foobar'])
+ self.assertEquals(fs.fs_directory_list(session, '', ''), ['foobar'])
+
+ # Newly created objects are in the list.
+ fh = fs.fs_open_file(session, 'zzz', FileSystem.open_file_type_data, FileSystem.open_create)
+
+ # TODO: tiered: the newly created file should be visible, but it is not yet.
+ # self.assertEquals(sorted(fs.fs_directory_list(session, '', '')), ['foobar', 'zzz' ])
- # Make sure any new object is not in the list until it is closed.
- fh = local.ss_open_object(session, location, 'zzz', StorageSource.open_create)
- self.assertEquals(local.ss_location_list(session, location, '', 0), ['foobar'])
# Sync merely syncs to the local disk.
fh.fh_sync(session)
fh.close(session) # zero length
- self.assertEquals(sorted(local.ss_location_list(session, location, '', 0)),
- ['foobar', 'zzz'])
+ self.assertEquals(sorted(fs.fs_directory_list(session, '', '')), ['foobar', 'zzz' ])
+
+ # See that we can rename objects.
+ fs.fs_rename(session, 'zzz', 'yyy', 0)
+ self.assertEquals(sorted(fs.fs_directory_list(session, '', '')), ['foobar', 'yyy' ])
# See that we can remove objects.
- local.ss_remove(session, location, 'zzz', 0)
- self.assertEquals(local.ss_location_list(session, location, '', 0), ['foobar'])
+ fs.fs_remove(session, 'yyy', 0)
+ self.assertEquals(fs.fs_directory_list(session, '', ''), ['foobar'])
+
+ # TODO: tiered: flush tests disabled, as the interface
+ # for flushing will be changed.
+ return
# Flushing doesn't do anything that's visible.
- local.ss_flush(session, location, None, '')
- self.assertEquals(local.ss_location_list(session, location, '', 0), ['foobar'])
+ local.ss_flush(session, fs, None, '')
+ self.assertEquals(fs.fs_directory_list(session, '', ''), ['foobar'])
+
+ # Files that have been flushed cannot be manipulated.
+ with self.expectedStderrPattern('foobar: rename of flushed file not allowed'):
+ self.assertRaisesException(wiredtiger.WiredTigerError,
+ lambda: fs.fs_rename(session, 'foobar', 'barfoo', 0))
+ self.assertEquals(fs.fs_directory_list(session, '', ''), ['foobar'])
- location.close(session)
+ # Files that have been flushed cannot be manipulated through the custom file system.
+ with self.expectedStderrPattern('foobar: remove of flushed file not allowed'):
+ self.assertRaisesException(wiredtiger.WiredTigerError,
+ lambda: fs.fs_remove(session, 'foobar', 0))
+ self.assertEquals(fs.fs_directory_list(session, '', ''), ['foobar'])
+
+ fs.terminate(session)
def test_local_write_read(self):
# Write and read to a file non-sequentially.
@@ -124,14 +145,13 @@ class test_tiered06(wttest.WiredTigerTestCase):
local = self.get_local_storage_source()
os.mkdir("objects")
- location = local.ss_location_handle(session,
- 'cluster="cluster1",bucket="./objects",auth_token="Secret"')
+ fs = local.ss_customize_file_system(session, "./objects", "cluster1-", "Secret", None)
# We call these 4K chunks of data "blocks" for this test, but that doesn't
# necessarily relate to WT block sizing.
nblocks = 1000
block_size = 4096
- fh = local.ss_open_object(session, location, 'abc', StorageSource.open_create)
+ fh = fs.fs_open_file(session, 'abc', FileSystem.open_file_type_data, FileSystem.open_create)
# blocks filled with 'a', etc.
a_block = ('a' * block_size).encode()
@@ -153,7 +173,7 @@ class test_tiered06(wttest.WiredTigerTestCase):
fh.close(session)
in_block = bytes(block_size)
- fh = local.ss_open_object(session, location, 'abc', StorageSource.open_readonly)
+ fh = fs.fs_open_file(session, 'abc', FileSystem.open_file_type_data, FileSystem.open_readonly)
# Do some spot checks, reading non-sequentially
fh.fh_read(session, 500 * block_size, in_block) # divisible by 2, not 3
@@ -176,90 +196,203 @@ class test_tiered06(wttest.WiredTigerTestCase):
self.assertEquals(in_block, a_block)
fh.close(session)
- def create_in_loc(self, loc, objname):
+ def create_with_fs(self, fs, fname):
session = self.session
- fh = self.local.ss_open_object(session, loc, objname, StorageSource.open_create)
+ fh = fs.fs_open_file(session, fname, FileSystem.open_file_type_data, FileSystem.open_create)
fh.fh_write(session, 0, 'some stuff'.encode())
fh.close(session)
- def check(self, loc, prefix, limit, expect):
- # We don't require any sorted output for location lists,
+ objectdir1 = "./objects1"
+ objectdir2 = "./objects2"
+
+ cachedir1 = "./cache1"
+ cachedir2 = "./cache2"
+
+ def check(self, fs, prefix, expect):
+ # We don't require any sorted output for directory lists,
# so we'll sort before comparing.'
- got = sorted(self.local.ss_location_list(self.session, loc, prefix, limit))
+ got = sorted(fs.fs_directory_list(self.session, '', prefix))
expect = sorted(expect)
self.assertEquals(got, expect)
- def test_local_locations(self):
- # Test using various buckets, clusters
+ # Check that objects are "in the cloud" after a flush.
+ # Using the local storage module, they are actually going to be in either
+ # objectdir1 or objectdir2
+ def check_objects(self, expect1, expect2):
+ got = sorted(list(os.listdir(self.objectdir1)))
+ expect = sorted(expect1)
+ self.assertEquals(got, expect)
+ got = sorted(list(os.listdir(self.objectdir2)))
+ expect = sorted(expect2)
+ self.assertEquals(got, expect)
+
+ def test_local_file_systems(self):
+ # Test using various buckets, hosts
session = self.session
local = self.conn.get_storage_source('local_store')
self.local = local
- os.mkdir("objects1")
- os.mkdir("objects2")
-
- # Any of the activity that happens in the various locations
- # should be independent.
- location1 = local.ss_location_handle(session,
- 'cluster="cluster1",bucket="./objects1",auth_token="k1"')
- location2 = local.ss_location_handle(session,
- 'cluster="cluster1",bucket="./objects2",auth_token="k2"')
- location3 = local.ss_location_handle(session,
- 'cluster="cluster2",bucket="./objects1",auth_token="k3"')
- location4 = local.ss_location_handle(session,
- 'cluster="cluster2",bucket="./objects2",auth_token="k4"')
-
- # Create files in the locations with some name overlap
- self.create_in_loc(location1, 'alpaca')
- self.create_in_loc(location2, 'bear')
- self.create_in_loc(location3, 'crab')
- self.create_in_loc(location4, 'deer')
+ os.mkdir(self.objectdir1)
+ os.mkdir(self.objectdir2)
+ os.mkdir(self.cachedir1)
+ os.mkdir(self.cachedir2)
+ config1 = "cache_directory=" + self.cachedir1
+ config2 = "cache_directory=" + self.cachedir2
+ bad_config = "cache_directory=BAD"
+
+ # Create file system objects. First try some error cases.
+ errmsg = '/No such file or directory/'
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: local.ss_customize_file_system(
+ session, "./objects1", "pre1-", "k1", bad_config), errmsg)
+
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: local.ss_customize_file_system(
+ session, "./objects_BAD", "pre1-", "k1", config1), errmsg)
+
+ # Create an empty file, try to use it as a directory.
+ with open("some_file", "w"):
+ pass
+ errmsg = '/Invalid argument/'
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: local.ss_customize_file_system(
+ session, "some_file", "pre1-", "k1", config1), errmsg)
+
+ # Now create some file systems that should succeed.
+ # Use either different bucket directories or different prefixes,
+ # so activity that happens in the various file systems should be independent.
+ fs1 = local.ss_customize_file_system(session, "./objects1", "pre1-", "k1", config1)
+ fs2 = local.ss_customize_file_system(session, "./objects2", "pre1-", "k2", config2)
+ fs3 = local.ss_customize_file_system(session, "./objects1", "pre2-", "k3", config1)
+ fs4 = local.ss_customize_file_system(session, "./objects2", "pre2-", "k4", config2)
+
+ # Create files in the file systems with some name overlap
+ self.create_with_fs(fs1, 'alpaca')
+ self.create_with_fs(fs2, 'bear')
+ self.create_with_fs(fs3, 'crab')
+ self.create_with_fs(fs4, 'deer')
for a in ['beagle', 'bird', 'bison', 'bat']:
- self.create_in_loc(location1, a)
+ self.create_with_fs(fs1, a)
for a in ['bird', 'bison', 'bat', 'badger']:
- self.create_in_loc(location2, a)
+ self.create_with_fs(fs2, a)
for a in ['bison', 'bat', 'badger', 'baboon']:
- self.create_in_loc(location3, a)
+ self.create_with_fs(fs3, a)
for a in ['bat', 'badger', 'baboon', 'beagle']:
- self.create_in_loc(location4, a)
+ self.create_with_fs(fs4, a)
# Make sure we see the expected file names
- self.check(location1, '', 0, ['alpaca', 'beagle', 'bird', 'bison', 'bat'])
- self.check(location1, 'a', 0, ['alpaca'])
- self.check(location1, 'b', 0, ['beagle', 'bird', 'bison', 'bat'])
- self.check(location1, 'c', 0, [])
- self.check(location1, 'd', 0, [])
-
- self.check(location2, '', 0, ['bear', 'bird', 'bison', 'bat', 'badger'])
- self.check(location2, 'a', 0, [])
- self.check(location2, 'b', 0, ['bear', 'bird', 'bison', 'bat', 'badger'])
- self.check(location2, 'c', 0, [])
- self.check(location2, 'd', 0, [])
-
- self.check(location3, '', 0, ['crab', 'bison', 'bat', 'badger', 'baboon'])
- self.check(location3, 'a', 0, [])
- self.check(location3, 'b', 0, ['bison', 'bat', 'badger', 'baboon'])
- self.check(location3, 'c', 0, ['crab'])
- self.check(location3, 'd', 0, [])
-
- self.check(location4, '', 0, ['deer', 'bat', 'badger', 'baboon', 'beagle'])
- self.check(location4, 'a', 0, [])
- self.check(location4, 'b', 0, ['bat', 'badger', 'baboon', 'beagle'])
- self.check(location4, 'c', 0, [])
- self.check(location4, 'd', 0, ['deer'])
-
- # Flushing doesn't do anything that's visible, but calling it still exercises code paths.
- # At some point, we'll have statistics we can check.
+ self.check(fs1, '', ['alpaca', 'beagle', 'bird', 'bison', 'bat'])
+ self.check(fs1, 'a', ['alpaca'])
+ self.check(fs1, 'b', ['beagle', 'bird', 'bison', 'bat'])
+ self.check(fs1, 'c', [])
+ self.check(fs1, 'd', [])
+
+ self.check(fs2, '', ['bear', 'bird', 'bison', 'bat', 'badger'])
+ self.check(fs2, 'a', [])
+ self.check(fs2, 'b', ['bear', 'bird', 'bison', 'bat', 'badger'])
+ self.check(fs2, 'c', [])
+ self.check(fs2, 'd', [])
+
+ self.check(fs3, '', ['crab', 'bison', 'bat', 'badger', 'baboon'])
+ self.check(fs3, 'a', [])
+ self.check(fs3, 'b', ['bison', 'bat', 'badger', 'baboon'])
+ self.check(fs3, 'c', ['crab'])
+ self.check(fs3, 'd', [])
+
+ self.check(fs4, '', ['deer', 'bat', 'badger', 'baboon', 'beagle'])
+ self.check(fs4, 'a', [])
+ self.check(fs4, 'b', ['bat', 'badger', 'baboon', 'beagle'])
+ self.check(fs4, 'c', [])
+ self.check(fs4, 'd', ['deer'])
+
+ # Flushing copies files to one of the subdirectories:
+ # "./objects1" (for fs1 and fs3)
+ # "./objects2" (for fs2 and fs4)
#
- # For now, we can turn on the verbose config option for the local_store extension to verify.
- local.ss_flush(session, location4, None, '')
- local.ss_flush(session, location3, 'badger', '')
- local.ss_flush(session, location3, 'c', '') # make sure we don't flush prefixes
- local.ss_flush(session, location3, 'b', '') # or suffixes
- local.ss_flush(session, location3, 'crab', '')
- local.ss_flush(session, location3, 'crab', '') # should do nothing
- local.ss_flush(session, None, None, '') # flush everything else
- local.ss_flush(session, None, None, '') # should do nothing
+ # After every flush, we'll check that the right objects appear in the right directory.
+ # check_objects takes two lists: objects expected to be in ./objects1,
+ # and objects expected to be in ./objects2 .
+ self.check_objects([], [])
+
+ # TODO: tiered: flush tests disabled, as the interface
+ # for flushing will be changed.
+ enable_fs_flush_tests = False
+ if enable_fs_flush_tests:
+ local.ss_flush(session, fs4, None, '')
+ self.check_objects([], ['pre2-deer', 'pre2-bat', 'pre2-badger', 'pre2-baboon', 'pre2-beagle'])
+
+ local.ss_flush(session, fs3, 'badger', '')
+ self.check_objects(['pre2-badger'],
+ ['pre2-deer', 'pre2-bat', 'pre2-badger', 'pre2-baboon', 'pre2-beagle'])
+
+ #local.ss_flush(session, fs3, 'c', '') # make sure we don't flush prefixes
+ self.check_objects(['pre2-badger'],
+ ['pre2-deer', 'pre2-bat', 'pre2-badger', 'pre2-baboon', 'pre2-beagle'])
+
+ local.ss_flush(session, fs3, 'b', '') # or suffixes
+ self.check_objects(['pre2-badger'],
+ ['pre2-deer', 'pre2-bat', 'pre2-badger', 'pre2-baboon', 'pre2-beagle'])
+
+ local.ss_flush(session, fs3, 'crab', '')
+ self.check_objects(['pre2-crab', 'pre2-badger'],
+ ['pre2-deer', 'pre2-bat', 'pre2-badger', 'pre2-baboon', 'pre2-beagle'])
+
+ local.ss_flush(session, fs3, 'crab', '') # should do nothing
+ self.check_objects(['pre2-crab', 'pre2-badger'],
+ ['pre2-deer', 'pre2-bat', 'pre2-badger', 'pre2-baboon', 'pre2-beagle'])
+
+ local.ss_flush(session, None, None, '') # flush everything else
+ self.check_objects(['pre1-alpaca', 'pre1-beagle', 'pre1-bird', 'pre1-bison', 'pre1-bat',
+ 'pre2-crab', 'pre2-bison', 'pre2-bat', 'pre2-badger', 'pre2-baboon'],
+ ['pre1-bear', 'pre1-bird', 'pre1-bison', 'pre1-bat', 'pre1-badger',
+ 'pre2-deer', 'pre2-bat', 'pre2-badger', 'pre2-baboon', 'pre2-beagle'])
+
+ local.ss_flush(session, None, None, '') # should do nothing
+ self.check_objects(['pre1-alpaca', 'pre1-beagle', 'pre1-bird', 'pre1-bison', 'pre1-bat',
+ 'pre2-crab', 'pre2-bison', 'pre2-bat', 'pre2-badger', 'pre2-baboon'],
+ ['pre1-bear', 'pre1-bird', 'pre1-bison', 'pre1-bat', 'pre1-badger',
+ 'pre2-deer', 'pre2-bat', 'pre2-badger', 'pre2-baboon', 'pre2-beagle'])
+
+ self.create_with_fs(fs4, 'zebra') # should do nothing in the objects directories
+ self.create_with_fs(fs4, 'yeti') # should do nothing in the objects directories
+ self.check_objects(['pre1-alpaca', 'pre1-beagle', 'pre1-bird', 'pre1-bison', 'pre1-bat',
+ 'pre2-crab', 'pre2-bison', 'pre2-bat', 'pre2-badger', 'pre2-baboon'],
+ ['pre1-bear', 'pre1-bird', 'pre1-bison', 'pre1-bat', 'pre1-badger',
+ 'pre2-deer', 'pre2-bat', 'pre2-badger', 'pre2-baboon', 'pre2-beagle'])
+
+ # Try remove and rename, should be possible until we flush
+ self.check(fs4, '', ['deer', 'bat', 'badger', 'baboon', 'beagle', 'yeti', 'zebra'])
+ fs4.fs_remove(session, 'yeti', 0)
+ self.check(fs4, '', ['deer', 'bat', 'badger', 'baboon', 'beagle', 'zebra'])
+ fs4.fs_rename(session, 'zebra', 'okapi', 0)
+ self.check(fs4, '', ['deer', 'bat', 'badger', 'baboon', 'beagle', 'okapi'])
+ local.ss_flush(session, None, None, '')
+ self.check(fs4, '', ['deer', 'bat', 'badger', 'baboon', 'beagle', 'okapi'])
+ self.check_objects(['pre1-alpaca', 'pre1-beagle', 'pre1-bird', 'pre1-bison', 'pre1-bat',
+ 'pre2-crab', 'pre2-bison', 'pre2-bat', 'pre2-badger', 'pre2-baboon'],
+ ['pre1-bear', 'pre1-bird', 'pre1-bison', 'pre1-bat', 'pre1-badger',
+ 'pre2-deer', 'pre2-bat', 'pre2-badger', 'pre2-baboon', 'pre2-beagle',
+ 'pre2-okapi'])
+
+ errmsg = '/rename of flushed file not allowed/'
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: fs4.fs_rename(session, 'okapi', 'zebra', 0), errmsg)
+
+ # XXX
+ # At the moment, removal of flushed files is not allowed - as flushed files are immutable.
+ # We may need to explicitly evict flushed files from cache directory via the API, if so,
+ # the API to do that might be on the local store object, not the file system.
+ errmsg = '/remove of flushed file not allowed/'
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: fs4.fs_remove(session, 'okapi', 0), errmsg)
+
+ # No change since last time.
+ self.check(fs4, '', ['deer', 'bat', 'badger', 'baboon', 'beagle', 'okapi'])
+ self.check_objects(['pre1-alpaca', 'pre1-beagle', 'pre1-bird', 'pre1-bison', 'pre1-bat',
+ 'pre2-crab', 'pre2-bison', 'pre2-bat', 'pre2-badger', 'pre2-baboon'],
+ ['pre1-bear', 'pre1-bird', 'pre1-bison', 'pre1-bat', 'pre1-badger',
+ 'pre2-deer', 'pre2-bat', 'pre2-badger', 'pre2-baboon', 'pre2-beagle',
+ 'pre2-okapi'])
if __name__ == '__main__':
wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_txn26.py b/src/third_party/wiredtiger/test/suite/test_txn26.py
new file mode 100644
index 00000000000..75633b275e3
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_txn26.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+
+# test_txn26.py
+# Test that commit should fail if commit timestamp is smaller or equal to the active timestamp. Our handling of out of order timestamp relies on this to ensure repeated reads are working as expected.
+def timestamp_str(t):
+ return '%x' % t
+class test_txn26(wttest.WiredTigerTestCase):
+ conn_config = 'cache_size=50MB'
+ session_config = 'isolation=snapshot'
+
+ def test_commit_larger_than_active_timestamp(self):
+ if not wiredtiger.diagnostic_build():
+ self.skipTest('requires a diagnostic build')
+
+ uri = 'table:test_txn26'
+ self.session.create(uri, 'key_format=S,value_format=S')
+ cursor = self.session.open_cursor(uri)
+ self.conn.set_timestamp(
+ 'oldest_timestamp=' + timestamp_str(1) + ',stable_timestamp=' + timestamp_str(1))
+
+ value = 'a'
+
+ # Start a session with timestamp 10
+ session2 = self.conn.open_session(self.session_config)
+ session2.begin_transaction('read_timestamp=' + timestamp_str(10))
+
+ # Try to commit at timestamp 10
+ self.session.begin_transaction()
+ cursor[str(0)] = value
+ with self.expectedStderrPattern("must be greater than the latest active read timestamp"):
+ try:
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(10))
+ except wiredtiger.WiredTigerError as e:
+ gotException = True
+ self.pr('got expected exception: ' + str(e))
+ self.assertTrue(str(e).find('nvalid argument') >= 0)
+ self.assertTrue(gotException, msg = 'expected exception')
diff --git a/src/third_party/wiredtiger/test/suite/test_util21.py b/src/third_party/wiredtiger/test/suite/test_util21.py
index cdd117649db..2271ad8b312 100644
--- a/src/third_party/wiredtiger/test/suite/test_util21.py
+++ b/src/third_party/wiredtiger/test/suite/test_util21.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
#
-# Public Domain 2014-2021 MongoDB, Inc.
+# Public Domain 2014-present MongoDB, Inc.
# Public Domain 2008-2014 WiredTiger, Inc.
#
# This is free and unencumbered software released into the public domain.
diff --git a/src/third_party/wiredtiger/test/suite/wthooks.py b/src/third_party/wiredtiger/test/suite/wthooks.py
new file mode 100755
index 00000000000..56827350e29
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/wthooks.py
@@ -0,0 +1,259 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# [TEST_TAGS]
+# ignored_file
+# [END_TAGS]
+#
+# WiredTigerHookManager
+# Manage running of hooks
+#
+from __future__ import print_function
+
+import os, sys
+from importlib import import_module
+from abc import ABC, abstractmethod
+import wiredtiger
+
+# Three kinds of hooks available:
+HOOK_REPLACE = 1 # replace the call with the hook function
+HOOK_NOTIFY = 2 # call the hook function after the function
+HOOK_ARGS = 3 # transform the arg list before the call
+
+# Print to /dev/tty for debugging, since anything extraneous to stdout/stderr will
+# cause a test error.
+def tty(message):
+ from wttest import WiredTigerTestCase
+ WiredTigerTestCase.tty(message)
+
+################
+# Hooks Overview
+#
+# Here are some useful terms to know, with some commentary for each.
+#
+# API functions
+# potentially any WiredTiger API functions that a hook creator wishes to modify (like
+# Session.rename). In Python most everything is an object. Of course an instance of
+# "Session" is an object, but also the "Session" class itself is an object. The Session.rename
+# function is also an object (of a certain form that can be called). Also in Python,
+# attributes on an object don't have to be "pre-declared", they can be created at any time.
+# So it's easy to imagine assigning Session._rename_orig to be (the original value of)
+# Session.rename, and then assigning Session.rename to be some other function object, that
+# knows how to do something and then perhaps calls Session._rename_orig . This is the
+# essence of the hook concept.
+#
+# Hook Creator:
+# A way to attach a set of "behavior modifications" to various API functions. More precisely,
+# a hook creator derives from WiredTigerHookCreator and sets up a number of "hook functions",
+# that are actions that are done either just before, after, or instead of, an API function.
+# A XxxxHookCreator lives in a hook_xxxx.py file. When a HookCreator is loaded, it may be
+# given an optional argument. This argument comes from the original python command line.
+# For example, "python run.py --hook abc" loads hook_abc.py (where it expects to find a hook).
+# "python run.py --hook abc=123" loads hook_abc.py with an argument "123".
+#
+# Hook Function:
+# One function that will be called before, after or instead of, an API function. A hook
+# function will be bound to an API function. It is the job of the HookCreator to set up that
+# binding. It is possible to have multiple hook functions bound to the same API function.
+# A hook function that replaces an API function will have the same args as the function
+# it replaces (but there is a trick to give it additional context if needed -
+# see session_create_replace in hook_demo.py).
+
+# For every API function altered, there is one of these objects
+# stashed in the <class>._<api_name>_hooks attribute.
+class WiredTigerHookInfo(object):
+ def __init__(self):
+ self.arg_funcs = [] # The set of hook functions for manipulating arguments
+ self.notify_funcs = [] # The set of hook functions for manipulating arguments
+ # At the moment, we can only replace a method once.
+ # If needed, we can think about removing this restriction.
+ self.replace_func = None
+
+# hooked_function -
+# A helper function for the hook manager.
+def hooked_function(self, orig_func, hook_info_name, *args):
+ hook_info = getattr(self, hook_info_name)
+
+ notifies = []
+ replace_func = None
+
+ # The three kinds of hooks are acted upon at different times.
+ # Before we call the function, we modify the args as indicated
+ # by hooks. Then we call the function, possibly with a replacement.
+ # Finally, we'll call any notify hooks.
+ #
+ # We only walk through the hook list once, and process the config
+ # hooks while we're doing that, and copy any other hooks needed.
+ for hook_func in hook_info.arg_funcs:
+ args = hook_func(self, args)
+ call_func = hook_info.replace_func
+ if call_func == None:
+ call_func = orig_func
+ if self == wiredtiger:
+ ret = call_func(*args)
+ else:
+ ret = call_func(self, *args)
+ for hook_func in hook_info.notify_funcs:
+ hook_func(ret, self, *args)
+ return ret
+
+# WiredTigerHookManager -
+# The hook manager class. There is only one hook manager. It is responsible for finding all the
+# HookCreators at the beginning of the run, and calling setup_hooks() for each one, to have it bind
+# hook functions to API functions. The hook manager is initialized with a list of hook names. Each
+# name is expanded, for example, "demo" causes the hook manager to load hook_demo.py, and to call
+# the "initialize" global function in that file. We expect "initialize" to return a list of objects
+# (hooks) derived from WiredTigerHook (class defined below). Generally, "initialize" returns a
+# single object (setting up some number of "hook functions") but to allow flexibility for different
+# sorts of packaging, we allow any number of hooks to be returned.
+#
+# A hook can set up any number of "hook functions". See hook_demo.py for a sample hook class.
+class WiredTigerHookManager(object):
+ def __init__(self, hooknames = []):
+ self.hooks = []
+ names_seen = []
+ for name in hooknames:
+ # The hooks are indicated as "somename=arg" or simply "somename".
+ # hook_somename.py will be imported, and initialized with the arg.
+ # Names must be unique, as we stash some info into extra fields
+ # on the connection/session/cursor, these are named using the
+ # unique name of the hook.
+ if '=' in name:
+ name,arg = name.split('=', 1)
+ else:
+ arg = None
+ if name in names_seen:
+ raise Exception(name + ': hook name cannot be used multiple times')
+ names_seen.append(name)
+
+ modname = 'hook_' + name
+ try:
+ imported = import_module(modname)
+ for hook in imported.initialize(arg):
+ hook._initialize(name, self)
+ self.hooks.append(hook)
+ except:
+ print('Cannot import hook: ' + name + ', check file ' + modname + '.py')
+ raise
+ for hook in self.hooks:
+ hook.setup_hooks()
+
+ def add_hook(self, clazz, method_name, hook_type, hook_func):
+ if not hasattr(clazz, method_name):
+ raise Exception('Cannot find method ' + method_name + ' on class ' + str(clazz))
+
+ # We need to set up some extra attributes on the Connection class.
+ # Given that the method name is XXXX, and class is Connection, here's what we're doing:
+ # orig = wiredtiger.Connection.XXXX
+ # wiredtiger.Connection._XXXX_hooks = WiredTigerHookInfo()
+ # wiredtiger.Connection._XXXX_orig = wiredtiger.Connection.XXXX
+ # wiredtiger.Connection.XXXX = lambda self, *args:
+ # hooked_function(self, orig, '_XXXX_hooks', *args)
+ hook_info_name = '_' + method_name + '_hooks'
+ orig_name = '_' + method_name + '_orig'
+ if not hasattr(clazz, hook_info_name):
+ #tty('Setting up hook on ' + str(clazz) + '.' + method_name)
+ orig_func = getattr(clazz, method_name)
+ if orig_func == None:
+ raise Exception('method ' + method_name + ' hook setup: method does not exist')
+ setattr(clazz, hook_info_name, WiredTigerHookInfo())
+
+ # If we're using the wiredtiger module and not a class, we need a slightly different
+ # style of hooked_function, since there is no self. What would be the "self" argument
+ # is in fact the class.
+ if clazz == wiredtiger:
+ f = lambda *args: hooked_function(wiredtiger, orig_func, hook_info_name, *args)
+ else:
+ f = lambda self, *args: hooked_function(self, orig_func, hook_info_name, *args)
+ setattr(clazz, method_name, f)
+ setattr(clazz, orig_name, orig_func)
+
+ # Now add to the list of hook functions
+ # If it's a replace hook, we only allow one of them for a given method name
+ hook_info = getattr(clazz, hook_info_name)
+ if hook_type == HOOK_ARGS:
+ hook_info.arg_funcs.append(hook_func)
+ elif hook_type == HOOK_NOTIFY:
+ hook_info.notify_funcs.append(hook_func)
+ elif hook_type == HOOK_REPLACE:
+ if hook_info.replace_func == None:
+ hook_info.replace_func = hook_func
+ else:
+ raise Exception('method ' + method_name + ' hook setup: trying to replace the same method with two hooks')
+ #tty('Setting up hooks list in ' + str(clazz) + '.' + hook_info_name)
+
+ def get_function(self, clazz, method_name):
+ orig_name = '_' + method_name + '_orig'
+ if hasattr(clazz, orig_name):
+ orig_func = getattr(clazz, orig_name)
+ else:
+ orig_func = getattr(clazz, method_name)
+ return orig_func
+
+ def filter_tests(self, tests):
+ for hook in self.hooks:
+ tests = hook.filter_tests(tests)
+ return tests
+
+class HookCreatorProxy(object):
+ def __init__(self, hookmgr, clazz):
+ self.hookmgr = hookmgr
+ self.clazz = clazz
+
+ # Get the original function/method before any hooks applied
+ def __getitem__(self, name):
+ return self.hookmgr.get_function(self.clazz, name)
+
+ # Get the original function/method before any hooks applied
+ def __setitem__(self, name, value):
+ try:
+ hooktype = int(value[0])
+ fcn = value[1]
+ except:
+ raise ValueError('value must be (HOOK_xxxx, function)')
+ self.hookmgr.add_hook(self.clazz, name, hooktype, fcn)
+
+# Hooks must derive from this class
+class WiredTigerHookCreator(ABC):
+ # This is called right after creation and should not be overridden.
+ def _initialize(self, name, hookmgr):
+ self.name = name
+ self.hookmgr = hookmgr
+ self.wiredtiger = HookCreatorProxy(self.hookmgr, wiredtiger)
+ self.Connection = HookCreatorProxy(self.hookmgr, wiredtiger.Connection)
+ self.Session = HookCreatorProxy(self.hookmgr, wiredtiger.Session)
+ self.Cursor = HookCreatorProxy(self.hookmgr, wiredtiger.Cursor)
+
+ # default version of filter_tests, can be overridden
+ def filter_tests(self, tests):
+ return tests
+
+ @abstractmethod
+ def setup_hooks(self):
+ """Set up all hooks using add_*_hook methods."""
+ return
diff --git a/src/third_party/wiredtiger/test/suite/wttest.py b/src/third_party/wiredtiger/test/suite/wttest.py
index be38c1a748a..87db069fefe 100755
--- a/src/third_party/wiredtiger/test/suite/wttest.py
+++ b/src/third_party/wiredtiger/test/suite/wttest.py
@@ -43,7 +43,7 @@ except ImportError:
from contextlib import contextmanager
import errno, glob, os, re, shutil, sys, time, traceback
-import wiredtiger, wtscenario
+import wiredtiger, wtscenario, wthooks
def shortenWithEllipsis(s, maxlen):
if len(s) > maxlen:
@@ -183,6 +183,7 @@ class ExtensionList(list):
class WiredTigerTestCase(unittest.TestCase):
_globalSetup = False
_printOnceSeen = {}
+ _ttyDescriptor = None # set this early, to allow tty() to be called any time.
# conn_config can be overridden to add to basic connection configuration.
# Can be a string or a callable function or lambda expression.
@@ -200,14 +201,15 @@ class WiredTigerTestCase(unittest.TestCase):
conn_extensions = ()
@staticmethod
- def globalSetup(preserveFiles = False, useTimestamp = False,
+ def globalSetup(preserveFiles = False, removeAtStart = True, useTimestamp = False,
gdbSub = False, lldbSub = False, verbose = 1, builddir = None, dirarg = None,
- longtest = False, ignoreStdout = False, seedw = 0, seedz = 0):
+ longtest = False, ignoreStdout = False, seedw = 0, seedz = 0, hookmgr = None):
WiredTigerTestCase._preserveFiles = preserveFiles
d = 'WT_TEST' if dirarg == None else dirarg
if useTimestamp:
d += '.' + time.strftime('%Y%m%d-%H%M%S', time.localtime())
- shutil.rmtree(d, ignore_errors=True)
+ if removeAtStart:
+ shutil.rmtree(d, ignore_errors=True)
os.makedirs(d)
wtscenario.set_long_run(longtest)
WiredTigerTestCase._parentTestdir = d
@@ -224,9 +226,11 @@ class WiredTigerTestCase(unittest.TestCase):
WiredTigerTestCase._stderr = sys.stderr
WiredTigerTestCase._concurrent = False
WiredTigerTestCase._globalSetup = True
- WiredTigerTestCase._ttyDescriptor = None
WiredTigerTestCase._seeds = [521288629, 362436069]
WiredTigerTestCase._randomseed = False
+ if hookmgr == None:
+ hookmgr = wthooks.WiredTigerHookManager()
+ WiredTigerTestCase._hookmgr = hookmgr
if seedw != 0 and seedz != 0:
WiredTigerTestCase._randomseed = True
WiredTigerTestCase._seeds = [seedw, seedz]
diff --git a/src/third_party/wiredtiger/test/utility/misc.c b/src/third_party/wiredtiger/test/utility/misc.c
index fc0896aced6..674a868c35e 100644
--- a/src/third_party/wiredtiger/test/utility/misc.c
+++ b/src/third_party/wiredtiger/test/utility/misc.c
@@ -35,6 +35,12 @@ void (*custom_die)(void) = NULL;
const char *progname = "program name not set";
/*
+ * Backup directory initialize command, remove and re-create the primary backup directory, plus a
+ * copy we maintain for recovery testing.
+ */
+#define HOME_BACKUP_INIT_CMD "rm -rf %s/BACKUP %s/BACKUP.copy && mkdir %s/BACKUP %s/BACKUP.copy"
+
+/*
* testutil_die --
* Report an error and abort.
*/
@@ -256,6 +262,46 @@ testutil_timestamp_parse(const char *str, uint64_t *tsp)
testutil_assert(p - str <= 16);
}
+void
+testutil_create_backup_directory(const char *home)
+{
+ size_t len;
+ char *cmd;
+
+ len = strlen(home) * 4 + strlen(HOME_BACKUP_INIT_CMD) + 1;
+ cmd = dmalloc(len);
+ testutil_check(__wt_snprintf(cmd, len, HOME_BACKUP_INIT_CMD, home, home, home, home));
+ testutil_checkfmt(system(cmd), "%s", "backup directory creation failed");
+ free(cmd);
+}
+
+/*
+ * copy_file --
+ * Copy a single file into the backup directories.
+ */
+void
+testutil_copy_file(WT_SESSION *session, const char *name)
+{
+ size_t len;
+ char *first, *second;
+
+ len = strlen("BACKUP") + strlen(name) + 10;
+ first = dmalloc(len);
+ testutil_check(__wt_snprintf(first, len, "BACKUP/%s", name));
+ testutil_check(__wt_copy_and_sync(session, name, first));
+
+ /*
+ * Save another copy of the original file to make debugging recovery errors easier.
+ */
+ len = strlen("BACKUP.copy") + strlen(name) + 10;
+ second = dmalloc(len);
+ testutil_check(__wt_snprintf(second, len, "BACKUP.copy/%s", name));
+ testutil_check(__wt_copy_and_sync(session, first, second));
+
+ free(first);
+ free(second);
+}
+
/*
* testutil_is_flag_set --
* Return if an environment variable flag is set.
diff --git a/src/third_party/wiredtiger/test/utility/test_util.h b/src/third_party/wiredtiger/test/utility/test_util.h
index be32e7c0206..b657d5717a7 100644
--- a/src/third_party/wiredtiger/test/utility/test_util.h
+++ b/src/third_party/wiredtiger/test/utility/test_util.h
@@ -274,6 +274,8 @@ int testutil_parse_opts(int, char *const *, TEST_OPTS *);
void testutil_print_command_line(int argc, char *const *argv);
void testutil_progress(TEST_OPTS *, const char *);
void testutil_timestamp_parse(const char *, uint64_t *);
+void testutil_create_backup_directory(const char *);
+void testutil_copy_file(WT_SESSION *, const char *);
#ifndef _WIN32
void testutil_sleep_wait(uint32_t, pid_t);
#endif