From c4612847ed88c5e0500f0e9ecb2ecdcb49621cf1 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Thu, 4 Aug 2016 16:10:27 +1000 Subject: Import wiredtiger-wiredtiger-2.8.0-589-ga9e9696.tar.gz from wiredtiger branch mongodb-3.4 ref: d8fb874..a9e9696 for: 3.3.11 SERVER-24971 Excessive memory held by sessions when application threads do evictions WT-1162 Add latency to Jenkins wtperf tests and plots WT-2026 Maximum pages size at eviction too large WT-2239 Make sure LSM cursors read up to date dsk_gen, it was racing with compact WT-2353 Failure to create async threads as part of a wiredtiger_open call will cause a hang WT-2380 Make scripts fail if code doesn't match style WT-2486 Update make check so that it runs faster WT-2578 remove write barriers from the TAILQ_INSERT_XXX macros WT-2648 cache-line alignment for new ports WT-2665 Limit allocator fragmentation in WiredTiger WT-2693 Check open_cursor error paths for consistent handling WT-2708 split child-update race with reconciliation/eviction WT-2711 Change statistics log configuration options WT-2728 Don't re-read log file headers during log_flush WT-2729 Focus eviction walks in largest trees WT-2730 cursor next/prev can return the wrong key/value pair when crossing a page boundary WT-2731 Raw compression can create pages that are larger than expected WT-2732 Coverity analysis defect 99665: Redundant test WT-2737 Scrub dirty pages rather than evicting them WT-2738 Remove the ability to change the default checkpoint name WT-2739 pluggable file systems documentation cleanups WT-2743 Thread count statistics always report 0 WT-2744 partial line even with line buffering set WT-2746 track checkpoint I/O separately from eviction I/O WT-2751 column-store statistics incorrectly calculates the number of entries WT-2752 Fixes to zipfian wtperf workload config WT-2755 flexelint configuration treats size_t as 4B type WT-2756 Upgrade the autoconf archive package to check for swig 3.0 WT-2757 Column tables behave differently when column names are provided WT-2759 Releasing the hot-backup lock doesn't require the schema lock. WT-2760 Fix a bug in backup related to directory sync. Change the filesystem API to make durable the default WT-2762 wtstats tool fails if checkpoint runs WT-2763 Unit test test_intpack failing on OSX WT-2764 Optimize checkpoints to reduce throughput disruption WT-2765 wt dump: indices need to be shown in the dump output WT-2767 test suite needs way to run an individual scenario WT-2769 Update documentation to reflect correct limits of memory_page_max WT-2770 Add statistics tracking schema operations WT-2772 Investigate log performance testing weirdness WT-2773 search_near in indexes does not find exact matches WT-2774 minor cleanups/improvements WT-2778 Python test suite: make scenario initialization consistent WT-2779 Raw compression created unexpectedly large pages on disk WT-2781 Enhance bulk cursor option with an option to return immediately on contention WT-2782 Missing a fs_directory_list_free in ex_file_system.c WT-2785 Scrub dirty pages rather than evicting them: single-page reconciliation WT-2791 Enhance OS X Evergreen unit test WT-2793 wtperf config improvements WT-2796 Memory leak in reconciliation uncovered by stress testing WT-2798 Crash vulnerability with nojournal after create during checkpoint WT-2800 Illegal file format in test/format on PPC WT-2801 Crash vulnerability from eviction of metadata during checkpoint WT-2802 Transaction commit causes heap-use-after free WT-2803 Add verbose functionality to WT Evergreen tests WT-2804 Don't read values in a tree without a snapshot WT-2805 Infinite recursion if error streams fail WT-2806 wtperf allocation size off-by-one --- src/third_party/wiredtiger/SConstruct | 2 +- .../wiredtiger/bench/wtperf/Makefile.am | 12 +- .../wiredtiger/bench/wtperf/idle_table_cycle.c | 3 +- .../wtperf/runners/checkpoint_schema_race.wtperf | 20 + .../wtperf/runners/evict-btree-stress-multi.wtperf | 12 + .../bench/wtperf/runners/fruit-lsm.wtperf | 22 - .../bench/wtperf/runners/fruit-short.wtperf | 20 - .../bench/wtperf/runners/log-append-large.wtperf | 10 - .../bench/wtperf/runners/log-append-zero.wtperf | 8 - .../bench/wtperf/runners/log-append.wtperf | 8 - .../bench/wtperf/runners/log-nockpt.wtperf | 12 - .../bench/wtperf/runners/log-noprealloc.wtperf | 11 - .../wiredtiger/bench/wtperf/runners/log.wtperf | 27 +- .../runners/multi-btree-zipfian-populate.wtperf | 19 + .../runners/multi-btree-zipfian-workload.wtperf | 18 + .../bench/wtperf/runners/overflow-10k-short.wtperf | 19 - .../bench/wtperf/runners/overflow-10k.wtperf | 16 +- .../wtperf/runners/overflow-130k-short.wtperf | 19 - .../bench/wtperf/runners/overflow-130k.wtperf | 18 +- src/third_party/wiredtiger/bench/wtperf/wtperf.c | 166 ++--- src/third_party/wiredtiger/bench/wtperf/wtperf.h | 84 +-- .../wiredtiger/bench/wtperf/wtperf_opt.i | 1 + .../wiredtiger/bench/wtperf/wtperf_throttle.c | 13 +- .../wiredtiger/build_posix/Make.subdirs | 8 +- .../wiredtiger/build_posix/aclocal/ax_pkg_swig.m4 | 10 +- src/third_party/wiredtiger/dist/api_data.py | 228 ++++--- src/third_party/wiredtiger/dist/flags.py | 5 +- src/third_party/wiredtiger/dist/s_all | 16 + src/third_party/wiredtiger/dist/s_string.ok | 9 + src/third_party/wiredtiger/dist/s_style | 2 +- src/third_party/wiredtiger/dist/stat_data.py | 48 +- src/third_party/wiredtiger/examples/c/ex_all.c | 27 +- .../wiredtiger/examples/c/ex_file_system.c | 39 +- .../java/com/wiredtiger/examples/ex_all.java | 22 +- .../ext/compressors/zlib/zlib_compress.c | 2 +- src/third_party/wiredtiger/src/async/async_api.c | 12 + src/third_party/wiredtiger/src/block/block_ckpt.c | 2 +- src/third_party/wiredtiger/src/block/block_ext.c | 7 +- src/third_party/wiredtiger/src/block/block_mgr.c | 13 +- src/third_party/wiredtiger/src/block/block_open.c | 27 +- .../wiredtiger/src/block/block_session.c | 2 +- src/third_party/wiredtiger/src/block/block_write.c | 50 +- src/third_party/wiredtiger/src/btree/bt_curnext.c | 14 +- src/third_party/wiredtiger/src/btree/bt_curprev.c | 6 +- src/third_party/wiredtiger/src/btree/bt_discard.c | 4 +- src/third_party/wiredtiger/src/btree/bt_handle.c | 2 + src/third_party/wiredtiger/src/btree/bt_huffman.c | 3 +- src/third_party/wiredtiger/src/btree/bt_io.c | 9 +- src/third_party/wiredtiger/src/btree/bt_ovfl.c | 2 + src/third_party/wiredtiger/src/btree/bt_page.c | 1 + src/third_party/wiredtiger/src/btree/bt_read.c | 27 +- src/third_party/wiredtiger/src/btree/bt_split.c | 141 +++-- src/third_party/wiredtiger/src/btree/bt_stat.c | 5 +- src/third_party/wiredtiger/src/btree/bt_sync.c | 58 +- src/third_party/wiredtiger/src/btree/bt_walk.c | 20 +- src/third_party/wiredtiger/src/btree/row_srch.c | 2 +- src/third_party/wiredtiger/src/config/config_def.c | 209 ++++--- src/third_party/wiredtiger/src/conn/conn_api.c | 19 +- src/third_party/wiredtiger/src/conn/conn_cache.c | 40 +- src/third_party/wiredtiger/src/conn/conn_ckpt.c | 71 +-- src/third_party/wiredtiger/src/conn/conn_dhandle.c | 10 + src/third_party/wiredtiger/src/conn/conn_log.c | 52 +- src/third_party/wiredtiger/src/conn/conn_stat.c | 159 +++-- src/third_party/wiredtiger/src/cursor/cur_backup.c | 58 +- src/third_party/wiredtiger/src/cursor/cur_config.c | 4 +- src/third_party/wiredtiger/src/cursor/cur_ds.c | 5 +- src/third_party/wiredtiger/src/cursor/cur_dump.c | 4 +- src/third_party/wiredtiger/src/cursor/cur_file.c | 38 +- src/third_party/wiredtiger/src/cursor/cur_index.c | 52 +- src/third_party/wiredtiger/src/cursor/cur_log.c | 26 +- .../wiredtiger/src/cursor/cur_metadata.c | 11 +- src/third_party/wiredtiger/src/cursor/cur_stat.c | 28 +- src/third_party/wiredtiger/src/cursor/cur_table.c | 15 +- src/third_party/wiredtiger/src/docs/security.dox | 21 +- src/third_party/wiredtiger/src/docs/spell.ok | 3 + src/third_party/wiredtiger/src/docs/statistics.dox | 31 +- src/third_party/wiredtiger/src/docs/upgrading.dox | 44 +- src/third_party/wiredtiger/src/docs/wtperf.dox | 2 + src/third_party/wiredtiger/src/evict/evict_lru.c | 634 ++++++++++++-------- src/third_party/wiredtiger/src/evict/evict_page.c | 190 ++++-- src/third_party/wiredtiger/src/include/api.h | 2 + src/third_party/wiredtiger/src/include/block.h | 2 +- src/third_party/wiredtiger/src/include/btmem.h | 36 +- src/third_party/wiredtiger/src/include/btree.h | 15 +- src/third_party/wiredtiger/src/include/btree.i | 188 +++--- src/third_party/wiredtiger/src/include/cache.h | 35 +- src/third_party/wiredtiger/src/include/cache.i | 95 ++- .../wiredtiger/src/include/connection.h | 2 +- src/third_party/wiredtiger/src/include/cursor.h | 9 +- src/third_party/wiredtiger/src/include/cursor.i | 7 +- src/third_party/wiredtiger/src/include/extern.h | 23 +- src/third_party/wiredtiger/src/include/flags.h | 13 +- src/third_party/wiredtiger/src/include/hardware.h | 11 +- src/third_party/wiredtiger/src/include/intpack.i | 8 +- .../wiredtiger/src/include/os_fhandle.i | 12 +- src/third_party/wiredtiger/src/include/os_fs.i | 75 +-- .../wiredtiger/src/include/os_fstream.i | 2 +- src/third_party/wiredtiger/src/include/queue.h | 174 ++++-- src/third_party/wiredtiger/src/include/stat.h | 38 +- src/third_party/wiredtiger/src/include/txn.h | 1 + src/third_party/wiredtiger/src/include/txn.i | 14 +- .../wiredtiger/src/include/wiredtiger.in | 667 +++++++++++---------- src/third_party/wiredtiger/src/log/log.c | 59 +- src/third_party/wiredtiger/src/lsm/lsm_cursor.c | 23 +- src/third_party/wiredtiger/src/lsm/lsm_tree.c | 14 +- src/third_party/wiredtiger/src/lsm/lsm_work_unit.c | 2 +- src/third_party/wiredtiger/src/meta/meta_table.c | 3 + src/third_party/wiredtiger/src/meta/meta_track.c | 13 +- src/third_party/wiredtiger/src/meta/meta_turtle.c | 11 +- .../wiredtiger/src/os_common/filename.c | 54 +- .../wiredtiger/src/os_common/os_fhandle.c | 32 +- .../wiredtiger/src/os_common/os_fs_inmemory.c | 12 +- .../wiredtiger/src/os_common/os_fstream.c | 2 +- src/third_party/wiredtiger/src/os_posix/os_fs.c | 119 +++- src/third_party/wiredtiger/src/os_win/os_fs.c | 28 +- src/third_party/wiredtiger/src/os_win/os_path.c | 26 +- .../wiredtiger/src/reconcile/rec_write.c | 319 +++++----- .../wiredtiger/src/schema/schema_rename.c | 2 +- .../wiredtiger/src/session/session_api.c | 168 +++++- .../wiredtiger/src/session/session_compact.c | 14 +- src/third_party/wiredtiger/src/support/err.c | 6 + src/third_party/wiredtiger/src/support/stat.c | 121 +++- src/third_party/wiredtiger/src/txn/txn.c | 59 +- src/third_party/wiredtiger/src/txn/txn_ckpt.c | 474 +++++++++++---- src/third_party/wiredtiger/src/txn/txn_log.c | 2 +- .../wiredtiger/src/utilities/util_dump.c | 48 +- .../test/cursor_order/cursor_order_ops.c | 3 +- src/third_party/wiredtiger/test/format/ops.c | 6 +- src/third_party/wiredtiger/test/format/smoke.sh | 2 +- .../wiredtiger/test/manydbs/Makefile.am | 3 +- src/third_party/wiredtiger/test/manydbs/smoke.sh | 18 - src/third_party/wiredtiger/test/mciproject.yml | 32 +- .../wiredtiger/test/recovery/Makefile.am | 3 +- .../wiredtiger/test/recovery/random-abort.c | 18 +- src/third_party/wiredtiger/test/recovery/smoke.sh | 8 + src/third_party/wiredtiger/test/suite/helper.py | 43 ++ src/third_party/wiredtiger/test/suite/run.py | 41 +- .../wiredtiger/test/suite/test_async01.py | 4 +- .../wiredtiger/test/suite/test_async02.py | 4 +- .../wiredtiger/test/suite/test_backup02.py | 4 +- .../wiredtiger/test/suite/test_backup03.py | 36 +- .../wiredtiger/test/suite/test_backup04.py | 4 +- .../wiredtiger/test/suite/test_backup05.py | 1 - .../wiredtiger/test/suite/test_base02.py | 4 +- .../wiredtiger/test/suite/test_base05.py | 4 +- .../wiredtiger/test/suite/test_bug003.py | 4 +- .../wiredtiger/test/suite/test_bug006.py | 4 +- .../wiredtiger/test/suite/test_bug008.py | 4 +- .../wiredtiger/test/suite/test_bug009.py | 1 - .../wiredtiger/test/suite/test_bug011.py | 2 +- .../wiredtiger/test/suite/test_bug016.py | 109 ++++ .../wiredtiger/test/suite/test_bulk01.py | 4 +- .../wiredtiger/test/suite/test_bulk02.py | 7 +- .../wiredtiger/test/suite/test_checkpoint01.py | 14 +- .../wiredtiger/test/suite/test_checkpoint02.py | 4 +- .../wiredtiger/test/suite/test_colgap.py | 6 +- .../wiredtiger/test/suite/test_collator.py | 1 - .../wiredtiger/test/suite/test_compact01.py | 4 +- .../wiredtiger/test/suite/test_compact02.py | 7 +- .../wiredtiger/test/suite/test_compress01.py | 4 +- .../wiredtiger/test/suite/test_config03.py | 7 +- .../wiredtiger/test/suite/test_cursor01.py | 4 +- .../wiredtiger/test/suite/test_cursor02.py | 4 +- .../wiredtiger/test/suite/test_cursor03.py | 4 +- .../wiredtiger/test/suite/test_cursor04.py | 4 +- .../wiredtiger/test/suite/test_cursor06.py | 4 +- .../wiredtiger/test/suite/test_cursor07.py | 4 +- .../wiredtiger/test/suite/test_cursor08.py | 12 +- .../wiredtiger/test/suite/test_cursor09.py | 4 +- .../wiredtiger/test/suite/test_cursor_compare.py | 4 +- .../wiredtiger/test/suite/test_cursor_pin.py | 4 +- .../wiredtiger/test/suite/test_cursor_random.py | 8 +- .../wiredtiger/test/suite/test_cursor_random02.py | 4 +- src/third_party/wiredtiger/test/suite/test_drop.py | 4 +- src/third_party/wiredtiger/test/suite/test_dump.py | 11 +- src/third_party/wiredtiger/test/suite/test_dupc.py | 4 +- .../wiredtiger/test/suite/test_durability01.py | 1 - .../wiredtiger/test/suite/test_empty.py | 4 +- .../wiredtiger/test/suite/test_encrypt01.py | 5 +- .../wiredtiger/test/suite/test_encrypt02.py | 4 +- .../wiredtiger/test/suite/test_encrypt03.py | 4 +- .../wiredtiger/test/suite/test_encrypt04.py | 5 +- .../wiredtiger/test/suite/test_encrypt05.py | 5 +- .../wiredtiger/test/suite/test_encrypt06.py | 4 +- .../wiredtiger/test/suite/test_encrypt07.py | 1 - src/third_party/wiredtiger/test/suite/test_excl.py | 4 +- .../wiredtiger/test/suite/test_huffman01.py | 4 +- .../wiredtiger/test/suite/test_huffman02.py | 4 +- .../wiredtiger/test/suite/test_index02.py | 68 +++ .../wiredtiger/test/suite/test_inmem01.py | 88 +-- .../wiredtiger/test/suite/test_intpack.py | 35 +- .../wiredtiger/test/suite/test_join01.py | 10 +- .../wiredtiger/test/suite/test_join02.py | 4 +- .../wiredtiger/test/suite/test_join03.py | 1 - .../wiredtiger/test/suite/test_join04.py | 1 - .../wiredtiger/test/suite/test_join05.py | 1 - .../wiredtiger/test/suite/test_join06.py | 4 +- .../wiredtiger/test/suite/test_join07.py | 4 +- .../wiredtiger/test/suite/test_join08.py | 1 - .../wiredtiger/test/suite/test_jsondump01.py | 50 +- .../wiredtiger/test/suite/test_lsm01.py | 8 +- .../test/suite/test_metadata_cursor01.py | 4 +- .../wiredtiger/test/suite/test_nsnap01.py | 1 - .../wiredtiger/test/suite/test_nsnap02.py | 1 - .../wiredtiger/test/suite/test_nsnap03.py | 1 - .../wiredtiger/test/suite/test_nsnap04.py | 1 - .../wiredtiger/test/suite/test_overwrite.py | 4 +- .../wiredtiger/test/suite/test_perf001.py | 4 +- .../wiredtiger/test/suite/test_readonly01.py | 5 +- .../wiredtiger/test/suite/test_rebalance.py | 4 +- .../wiredtiger/test/suite/test_reconfig01.py | 19 +- .../wiredtiger/test/suite/test_reconfig02.py | 33 +- .../wiredtiger/test/suite/test_rename.py | 4 +- .../wiredtiger/test/suite/test_schema02.py | 4 +- .../wiredtiger/test/suite/test_schema03.py | 15 +- .../wiredtiger/test/suite/test_schema04.py | 4 +- .../wiredtiger/test/suite/test_schema05.py | 4 +- .../wiredtiger/test/suite/test_schema06.py | 5 +- .../wiredtiger/test/suite/test_split.py | 1 - .../wiredtiger/test/suite/test_stat01.py | 4 +- .../wiredtiger/test/suite/test_stat02.py | 35 +- .../wiredtiger/test/suite/test_stat03.py | 4 +- .../wiredtiger/test/suite/test_stat04.py | 10 +- .../wiredtiger/test/suite/test_stat05.py | 13 +- .../wiredtiger/test/suite/test_stat_log01.py | 16 +- .../wiredtiger/test/suite/test_sweep01.py | 4 +- .../wiredtiger/test/suite/test_sweep03.py | 4 +- .../wiredtiger/test/suite/test_truncate01.py | 13 +- .../wiredtiger/test/suite/test_truncate02.py | 5 +- .../wiredtiger/test/suite/test_txn01.py | 4 +- .../wiredtiger/test/suite/test_txn02.py | 12 +- .../wiredtiger/test/suite/test_txn03.py | 4 +- .../wiredtiger/test/suite/test_txn04.py | 4 +- .../wiredtiger/test/suite/test_txn05.py | 5 +- .../wiredtiger/test/suite/test_txn06.py | 1 - .../wiredtiger/test/suite/test_txn07.py | 5 +- .../wiredtiger/test/suite/test_txn08.py | 1 - .../wiredtiger/test/suite/test_txn09.py | 9 +- .../wiredtiger/test/suite/test_txn10.py | 1 - .../wiredtiger/test/suite/test_txn12.py | 1 - .../wiredtiger/test/suite/test_txn13.py | 4 +- .../wiredtiger/test/suite/test_txn14.py | 4 +- .../wiredtiger/test/suite/test_txn15.py | 4 +- .../wiredtiger/test/suite/test_upgrade.py | 4 +- .../wiredtiger/test/suite/test_util02.py | 4 +- .../wiredtiger/test/suite/test_util03.py | 4 +- .../wiredtiger/test/suite/test_util13.py | 4 +- .../wiredtiger/test/suite/wtscenario.py | 36 +- src/third_party/wiredtiger/test/suite/wttest.py | 6 +- src/third_party/wiredtiger/test/thread/smoke.sh | 6 +- src/third_party/wiredtiger/test/utility/misc.c | 15 + .../wiredtiger/test/utility/test_util.h | 1 + .../wiredtiger/tools/wtstats/stat_data.py | 47 +- .../wiredtiger/tools/wtstats/wtstats.py | 2 + 254 files changed, 4447 insertions(+), 2911 deletions(-) create mode 100644 src/third_party/wiredtiger/bench/wtperf/runners/checkpoint_schema_race.wtperf create mode 100644 src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-stress-multi.wtperf delete mode 100644 src/third_party/wiredtiger/bench/wtperf/runners/fruit-lsm.wtperf delete mode 100644 src/third_party/wiredtiger/bench/wtperf/runners/fruit-short.wtperf delete mode 100644 src/third_party/wiredtiger/bench/wtperf/runners/log-append-large.wtperf delete mode 100644 src/third_party/wiredtiger/bench/wtperf/runners/log-append-zero.wtperf delete mode 100644 src/third_party/wiredtiger/bench/wtperf/runners/log-append.wtperf delete mode 100644 src/third_party/wiredtiger/bench/wtperf/runners/log-nockpt.wtperf delete mode 100644 src/third_party/wiredtiger/bench/wtperf/runners/log-noprealloc.wtperf create mode 100644 src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-zipfian-populate.wtperf create mode 100644 src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-zipfian-workload.wtperf delete mode 100644 src/third_party/wiredtiger/bench/wtperf/runners/overflow-10k-short.wtperf delete mode 100644 src/third_party/wiredtiger/bench/wtperf/runners/overflow-130k-short.wtperf delete mode 100755 src/third_party/wiredtiger/test/manydbs/smoke.sh create mode 100755 src/third_party/wiredtiger/test/recovery/smoke.sh create mode 100644 src/third_party/wiredtiger/test/suite/test_bug016.py create mode 100644 src/third_party/wiredtiger/test/suite/test_index02.py (limited to 'src/third_party/wiredtiger') diff --git a/src/third_party/wiredtiger/SConstruct b/src/third_party/wiredtiger/SConstruct index a5dd8761d6c..b0ce771e9bd 100644 --- a/src/third_party/wiredtiger/SConstruct +++ b/src/third_party/wiredtiger/SConstruct @@ -484,7 +484,7 @@ t = env.Program("wtperf", [ "bench/wtperf/wtperf_throttle.c", "bench/wtperf/wtperf_truncate.c", ], - LIBS=[wtlib, shim] + wtlibs) + LIBS=[wtlib, shim, testutil] + wtlibs) Default(t) #Build the Examples diff --git a/src/third_party/wiredtiger/bench/wtperf/Makefile.am b/src/third_party/wiredtiger/bench/wtperf/Makefile.am index cc1f84b5406..57792e3887f 100644 --- a/src/third_party/wiredtiger/bench/wtperf/Makefile.am +++ b/src/third_party/wiredtiger/bench/wtperf/Makefile.am @@ -1,13 +1,17 @@ -AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include - -LDADD = $(top_builddir)/libwiredtiger.la -lm +AM_CPPFLAGS = -I$(top_builddir) +AM_CPPFLAGS +=-I$(top_srcdir)/src/include +AM_CPPFLAGS +=-I$(top_srcdir)/test/utility noinst_PROGRAMS = wtperf -wtperf_LDFLAGS = -static wtperf_SOURCES =\ config.c idle_table_cycle.c misc.c track.c wtperf.c \ wtperf.h wtperf_opt.i wtperf_throttle.c wtperf_truncate.c +wtperf_LDADD = $(top_builddir)/test/utility/libtest_util.la +wtperf_LDADD +=$(top_builddir)/libwiredtiger.la +wtperf_LDADD +=-lm +wtperf_LDFLAGS = -static + TESTS = smoke.sh AM_TESTS_ENVIRONMENT = rm -rf WT_TEST ; mkdir WT_TEST ; # automake 1.11 compatibility diff --git a/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c b/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c index b699b5b9dd1..3c079bb560f 100644 --- a/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c +++ b/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c @@ -129,7 +129,8 @@ cycle_idle_tables(void *arg) * Drop the table. Keep retrying on EBUSY failure - it is an * expected return when checkpoints are happening. */ - while ((ret = session->drop(session, uri, "force")) == EBUSY) + while ((ret = session->drop( + session, uri, "force,checkpoint_wait=false")) == EBUSY) __wt_sleep(1, 0); if (ret != 0 && ret != EBUSY) { diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/checkpoint_schema_race.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/checkpoint_schema_race.wtperf new file mode 100644 index 00000000000..ade8e88ee9b --- /dev/null +++ b/src/third_party/wiredtiger/bench/wtperf/runners/checkpoint_schema_race.wtperf @@ -0,0 +1,20 @@ +# Check create and drop behavior concurrent with checkpoints (WT-2798). +# Setup a multiple tables and a cache size large enough that checkpoints can +# take a long time. +conn_config="cache_size=8GB,log=(enabled=false),checkpoint=(wait=30)" +table_config="leaf_page_max=4k,internal_page_max=16k,type=file" +icount=10000000 +table_count=100 +table_count_idle=100 +# Turn on create/drop of idle tables, but don't worry if individual operations +# take a long time. +idle_table_cycle=120 +populate_threads=5 +checkpoint_threads=0 +report_interval=5 +# 100 million +random_range=10000000 +run_time=300 +# Setup a workload that dirties a lot of the cache +threads=((count=2,reads=1),(count=2,inserts=1),(count=2,updates=1)) +value_sz=500 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-stress-multi.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-stress-multi.wtperf new file mode 100644 index 00000000000..9699b9ae3bb --- /dev/null +++ b/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-stress-multi.wtperf @@ -0,0 +1,12 @@ +conn_config="cache_size=1G,eviction=(threads_max=4),session_max=2000" +table_config="type=file" +table_count=100 +icount=100000000 +report_interval=5 +run_time=600 +populate_threads=1 +threads=((count=100,updates=1,reads=4,ops_per_txn=30)) +# Warn if a latency over a quarter second is seen +max_latency=250 +sample_interval=5 +sample_rate=1 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/fruit-lsm.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/fruit-lsm.wtperf deleted file mode 100644 index e5817554201..00000000000 --- a/src/third_party/wiredtiger/bench/wtperf/runners/fruit-lsm.wtperf +++ /dev/null @@ -1,22 +0,0 @@ -# wtperf options file: simulate riak and its test1 and test2 configuration -# The configuration for the connection and table are from riak and the -# specification of the data (count, size, threads) is from basho_bench. -# -conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600),log=(enabled=true),transaction_sync=(enabled=true,method=none),checkpoint=(wait=180),lsm_manager=(worker_thread_max=12)" -#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)" -#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024" -compact=true -sess_config="isolation=snapshot" -table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB),type=lsm,leaf_page_max=16K,leaf_item_max=4K,os_cache_dirty_max=16MB" -icount=25000000 -key_sz=40 -value_sz=800 -#max_latency=2000 -pareto=20 -populate_threads=20 -report_interval=10 -random_value=true -run_time=18000 -sample_interval=10 -table_count=8 -threads=((count=20,read=6,update=1)) diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/fruit-short.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/fruit-short.wtperf deleted file mode 100644 index 10cb423a92d..00000000000 --- a/src/third_party/wiredtiger/bench/wtperf/runners/fruit-short.wtperf +++ /dev/null @@ -1,20 +0,0 @@ -# wtperf options file: simulate riak and its test1 and test2 configuration -# The configuration for the connection and table are from riak and the -# specification of the data (count, size, threads) is from basho_bench. -# -#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)" -conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,lsm_manager=(worker_thread_max=6)" -compact=true -sess_config="isolation=snapshot" -table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB),type=lsm,leaf_page_max=16K" -icount=25000000 -key_sz=40 -value_sz=800 -max_latency=2000 -pareto=20 -populate_threads=20 -report_interval=10 -random_value=true -run_time=1800 -sample_interval=10 -threads=((count=20,read=6,update=1)) diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/log-append-large.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/log-append-large.wtperf deleted file mode 100644 index c1364c17c28..00000000000 --- a/src/third_party/wiredtiger/bench/wtperf/runners/log-append-large.wtperf +++ /dev/null @@ -1,10 +0,0 @@ -# wtperf options file: Test a log file with a multi-threaded -# append workload. We want to create a very large number of log file -# switches with fewer records per log file than we have active threads. -conn_config="cache_size=1G,log=(enabled=true,file_max=20MB),checkpoint=(log_size=1G)" -table_config="type=file" -icount=1000 -report_interval=5 -run_time=0 -value_sz=5000000 -populate_threads=8 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/log-append-zero.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/log-append-zero.wtperf deleted file mode 100644 index 973d2cddd0d..00000000000 --- a/src/third_party/wiredtiger/bench/wtperf/runners/log-append-zero.wtperf +++ /dev/null @@ -1,8 +0,0 @@ -# wtperf options file: Test a log file with a multi-threaded -# append workload. -conn_config="cache_size=1G,log=(enabled=true,file_max=20MB,zero_fill=true),checkpoint=(log_size=1G)" -table_config="type=file" -icount=50000000 -report_interval=5 -run_time=0 -populate_threads=8 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/log-append.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/log-append.wtperf deleted file mode 100644 index 9d0a78e3c61..00000000000 --- a/src/third_party/wiredtiger/bench/wtperf/runners/log-append.wtperf +++ /dev/null @@ -1,8 +0,0 @@ -# wtperf options file: Test a log file with a multi-threaded -# append workload. -conn_config="cache_size=1G,log=(enabled=true,file_max=20MB),checkpoint=(log_size=1G)" -table_config="type=file" -icount=50000000 -report_interval=5 -run_time=0 -populate_threads=8 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/log-nockpt.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/log-nockpt.wtperf deleted file mode 100644 index a078cead740..00000000000 --- a/src/third_party/wiredtiger/bench/wtperf/runners/log-nockpt.wtperf +++ /dev/null @@ -1,12 +0,0 @@ -# wtperf options file: Test performance with a log file enabled. -# Set the log file reasonably small to catch log-swtich bottle -# necks. -conn_config="cache_size=1G,log=(enabled=true,file_max=20MB)" -table_config="type=file" -icount=50000 -report_interval=5 -run_time=40 -populate_threads=1 -random_range=50000000 -threads=((count=8,inserts=1)) - diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/log-noprealloc.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/log-noprealloc.wtperf deleted file mode 100644 index 66032f599aa..00000000000 --- a/src/third_party/wiredtiger/bench/wtperf/runners/log-noprealloc.wtperf +++ /dev/null @@ -1,11 +0,0 @@ -# wtperf options file: Test performance with a log file enabled. -# Set the log file reasonably small to catch log-swtich bottle -# necks. -conn_config="cache_size=1G,log=(enabled=true,file_max=200K,prealloc=false),checkpoint=(log_size=500MB)" -table_config="type=file" -icount=50000 -report_interval=5 -run_time=120 -populate_threads=1 -random_range=50000000 -threads=((count=8,inserts=1)) diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/log.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/log.wtperf index 32a9cc3b0a6..6cf50dfb5a5 100644 --- a/src/third_party/wiredtiger/bench/wtperf/runners/log.wtperf +++ b/src/third_party/wiredtiger/bench/wtperf/runners/log.wtperf @@ -1,10 +1,27 @@ +# # wtperf options file: Test performance with a log file enabled. # Set the log file small to catch log-swtich bottlenecks. -conn_config="cache_size=1G,log=(enabled=true,file_max=200K),checkpoint=(log_size=500MB)" +# +# Perform updates instead of inserts to stress logging not eviction, +# page splits or reconciliation. Have it fit in cache. +# +# We expect this test can and will be run in other forms from the command +# line to change log file size, pre-allocation, zero filling, logging off +# and checkpoint off. +# +# Jenkins runs for perf testing: +# - Config as-is +# - Config + "-C "log=(enabled,file_max=1M)": small log files and switching +# - Config + "-C "log=(enabled,zero_fill=true,file_max=1M)": zero-filling +# - Config + "-C "checkpoint=(wait=0)": no checkpoints +# - Config + "-C "log=(enabled,prealloc=false,file_max=1M)": no pre-allocation +# +conn_config="cache_size=5G,log=(enabled=true),checkpoint=(log_size=500M),eviction=(threads_max=4)" table_config="type=file" -icount=50000 +icount=1000000 report_interval=5 -run_time=120 +run_time=180 populate_threads=1 -random_range=50000000 -threads=((count=8,inserts=1)) +threads=((count=8,updates=1)) +# Warm up the cache for a minute. +warmup=60 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-zipfian-populate.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-zipfian-populate.wtperf new file mode 100644 index 00000000000..ddd9c055eac --- /dev/null +++ b/src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-zipfian-populate.wtperf @@ -0,0 +1,19 @@ +# Create a set of tables with uneven distribution of data +conn_config="cache_size=1G,eviction=(threads_max=4),file_manager=(close_idle_time=100000),checkpoint=(wait=60,log_size=2GB),statistics=(fast),statistics_log=(wait=5,json),session_max=1000" +table_config="type=file" +table_count=100 +icount=0 +random_range=1000000000 +pareto=10 +range_partition=true +report_interval=5 + +run_ops=10000000 +populate_threads=0 +icount=0 +threads=((count=20,inserts=1)) + +# Warn if a latency over 1 second is seen +max_latency=1000 +sample_interval=5 +sample_rate=1 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-zipfian-workload.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-zipfian-workload.wtperf new file mode 100644 index 00000000000..380350c88c8 --- /dev/null +++ b/src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-zipfian-workload.wtperf @@ -0,0 +1,18 @@ +# Read from a set of tables with uneven distribution of data +conn_config="cache_size=1G,eviction=(threads_max=4),file_manager=(close_idle_time=100000),checkpoint=(wait=60,log_size=2GB),statistics=(fast),statistics_log=(wait=5,json),session_max=1000" +table_config="type=file" +table_count=100 +icount=0 +random_range=1000000000 +pareto=10 +range_partition=true +report_interval=5 +create=false + +run_time=600 +threads=((count=20,reads=1)) + +# Warn if a latency over 1 second is seen +max_latency=1000 +sample_interval=5 +sample_rate=1 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/overflow-10k-short.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/overflow-10k-short.wtperf deleted file mode 100644 index 47228079db8..00000000000 --- a/src/third_party/wiredtiger/bench/wtperf/runners/overflow-10k-short.wtperf +++ /dev/null @@ -1,19 +0,0 @@ -# wtperf options file: simulate riak and a short form of its voxer config. -# The configuration for the connection and table are from riak and the -# specification of the data (count, size, threads) is from basho_bench. -# -#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)" -conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,lsm_manager=(worker_thread_max=6)" -compact=true -compression="snappy" -sess_config="isolation=snapshot" -table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB),type=lsm,leaf_page_max=16K,os_cache_dirty_max=16MB" -icount=15000 -key_sz=40 -value_sz=10000 -max_latency=2000 -populate_threads=1 -report_interval=5 -random_value=true -run_time=300 -threads=((count=10,read=1),(count=10,update=1)) diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/overflow-10k.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/overflow-10k.wtperf index 9b4ed2acaee..5d7eeea9cf2 100644 --- a/src/third_party/wiredtiger/bench/wtperf/runners/overflow-10k.wtperf +++ b/src/third_party/wiredtiger/bench/wtperf/runners/overflow-10k.wtperf @@ -1,9 +1,7 @@ -# wtperf options file: simulate riak and its test1 and test2 configuration -# The configuration for the connection and table are from riak and the -# specification of the data (count, size, threads) is from basho_bench. # -#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)" -conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,lsm_manager=(worker_thread_max=6)" +# Run with overflow items and LSM. +# +conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,lsm_manager=(worker_thread_max=6),statistics=(fast),statistics_log=(wait=10)" compact=true compression="snappy" sess_config="isolation=snapshot" @@ -13,8 +11,8 @@ key_sz=40 value_sz=10000 max_latency=2000 populate_threads=1 -report_interval=10 +report_interval=5 random_value=true -run_time=18000 -sample_interval=10 -threads=((count=20,read=1,update=1)) +run_time=300 +threads=((count=10,read=1),(count=10,update=1)) +warmup=30 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/overflow-130k-short.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/overflow-130k-short.wtperf deleted file mode 100644 index 83f67062bf8..00000000000 --- a/src/third_party/wiredtiger/bench/wtperf/runners/overflow-130k-short.wtperf +++ /dev/null @@ -1,19 +0,0 @@ -# wtperf options file: simulate riak and a short form of its voxer config. -# The configuration for the connection and table are from riak and the -# specification of the data (count, size, threads) is from basho_bench. -# -#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)" -conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,lsm_manager=(worker_thread_max=6)" -compact=true -compression="snappy" -sess_config="isolation=snapshot" -table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB),type=lsm,leaf_page_max=16K,os_cache_dirty_max=16MB" -icount=15000 -key_sz=40 -value_sz=130000 -max_latency=2000 -populate_threads=1 -report_interval=5 -random_value=true -run_time=300 -threads=((count=10,read=1),(count=10,update=1)) diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/overflow-130k.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/overflow-130k.wtperf index a3439f0c575..2be01afd08a 100644 --- a/src/third_party/wiredtiger/bench/wtperf/runners/overflow-130k.wtperf +++ b/src/third_party/wiredtiger/bench/wtperf/runners/overflow-130k.wtperf @@ -1,20 +1,18 @@ -# wtperf options file: simulate riak and its test1 and test2 configuration -# The configuration for the connection and table are from riak and the -# specification of the data (count, size, threads) is from basho_bench. # -#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)" -conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,lsm_manager=(worker_thread_max=6)" +# Run with very large overflow items and btree. +# +conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,statistics=(fast),statistics_log=(wait=10)" compact=true compression="snappy" sess_config="isolation=snapshot" -table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB),type=lsm,leaf_page_max=16K,os_cache_dirty_max=16MB" +table_config="internal_page_max=128K,type=file,leaf_page_max=16K,os_cache_dirty_max=16MB,leaf_value_max=32K" icount=15000 key_sz=40 value_sz=130000 max_latency=2000 populate_threads=1 -report_interval=10 +report_interval=5 random_value=true -run_time=18000 -sample_interval=10 -threads=((count=20,read=1,update=1)) +run_time=300 +threads=((count=10,read=1),(count=10,update=1)) +warmup=30 diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.c b/src/third_party/wiredtiger/bench/wtperf/wtperf.c index 9d35f6fa640..58271106d61 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf.c +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.c @@ -36,7 +36,6 @@ static const CONFIG default_cfg = { NULL, /* reopen config */ NULL, /* base_uri */ NULL, /* uris */ - NULL, /* helium_mount */ NULL, /* conn */ NULL, /* logf */ NULL, /* async */ @@ -73,14 +72,14 @@ static const char * const debug_cconfig = ""; static const char * const debug_tconfig = ""; static void *checkpoint_worker(void *); -static int create_tables(CONFIG *); -static int drop_all_tables(CONFIG *); +static int drop_all_tables(CONFIG *); static int execute_populate(CONFIG *); static int execute_workload(CONFIG *); static int find_table_count(CONFIG *); static void *monitor(void *); static void *populate_thread(void *); static void randomize_value(CONFIG_THREAD *, char *); +static void recreate_dir(const char *); static int start_all_runs(CONFIG *); static int start_run(CONFIG *); static int start_threads(CONFIG *, @@ -93,10 +92,6 @@ static void *worker(void *); static uint64_t wtperf_rand(CONFIG_THREAD *); static uint64_t wtperf_value_range(CONFIG *); -#define HELIUM_NAME "dev1" -#define HELIUM_PATH \ - "../../ext/test/helium/.libs/libwiredtiger_helium.so" -#define HELIUM_CONFIG ",type=helium" #define INDEX_COL_NAMES ",columns=(key,val)" /* Retrieve an ID for the next insert operation. */ @@ -154,6 +149,23 @@ randomize_value(CONFIG_THREAD *thread, char *value_buf) vb[i] = ((rand_val >> 16) % 255) + 1; } +/* + * Partition data by key ranges. + */ +static uint32_t +map_key_to_table(CONFIG *cfg, uint64_t k) +{ + if (cfg->range_partition) { + /* Take care to return a result in [0..table_count-1]. */ + if (k > cfg->icount + cfg->random_range) + return (0); + return ((uint32_t)((k - 1) / + ((cfg->icount + cfg->random_range + cfg->table_count - 1) / + cfg->table_count))); + } else + return ((uint32_t)(k % cfg->table_count)); +} + /* * Figure out and extend the size of the value string, used for growing * updates. We know that the value to be updated is in the threads value @@ -393,7 +405,7 @@ worker_async(void *arg) * Then retry to get an async op. */ while ((ret = conn->async_new_op( - conn, cfg->uris[next_val % cfg->table_count], + conn, cfg->uris[map_key_to_table(cfg, next_val)], NULL, &cb, &asyncop)) == EBUSY) (void)usleep(10000); if (ret != 0) @@ -466,7 +478,7 @@ do_range_reads(CONFIG *cfg, WT_CURSOR *cursor) range_key_buf = &buf[0]; /* Save where the first key is for comparisons. */ - cursor->get_key(cursor, &range_key_buf); + testutil_check(cursor->get_key(cursor, &range_key_buf)); extract_key(range_key_buf, &next_val); for (range = 0; range < cfg->read_range; ++range) { @@ -477,7 +489,7 @@ do_range_reads(CONFIG *cfg, WT_CURSOR *cursor) break; /* Retrieve and decode the key */ - cursor->get_key(cursor, &range_key_buf); + testutil_check(cursor->get_key(cursor, &range_key_buf)); extract_key(range_key_buf, &next_val); if (next_val < prev_val) { lprintf(cfg, EINVAL, 0, @@ -547,9 +559,8 @@ worker(void *arg) } } /* Setup the timer for throttling. */ - if (thread->workload->throttle != 0 && - (ret = setup_throttle(thread)) != 0) - goto err; + if (thread->workload->throttle != 0) + setup_throttle(thread); /* Setup for truncate */ if (thread->workload->truncate != 0) @@ -611,7 +622,7 @@ worker(void *arg) /* * Spread the data out around the multiple databases. */ - cursor = cursors[next_val % cfg->table_count]; + cursor = cursors[map_key_to_table(cfg, next_val)]; /* * Skip the first time we do an operation, when trk->ops @@ -1010,7 +1021,7 @@ populate_thread(void *arg) /* * Figure out which table this op belongs to. */ - cursor = cursors[op % cfg->table_count]; + cursor = cursors[map_key_to_table(cfg, op)]; generate_key(cfg, key_buf, op); measure_latency = cfg->sample_interval != 0 && @@ -1148,7 +1159,7 @@ populate_async(void *arg) * Allocate an async op for whichever table. */ while ((ret = conn->async_new_op( - conn, cfg->uris[op % cfg->table_count], + conn, cfg->uris[map_key_to_table(cfg, op)], NULL, &cb, &asyncop)) == EBUSY) (void)usleep(10000); if (ret != 0) @@ -1858,7 +1869,7 @@ create_uris(CONFIG *cfg) base_uri_len = strlen(cfg->base_uri); cfg->uris = dcalloc(cfg->table_count, sizeof(char *)); for (i = 0; i < cfg->table_count; i++) { - uri = cfg->uris[i] = dcalloc(base_uri_len + 5, 1); + uri = cfg->uris[i] = dcalloc(base_uri_len + 6, 1); /* * If there is only one table, just use base name. */ @@ -1877,9 +1888,6 @@ create_tables(CONFIG *cfg) int ret; char buf[512]; - if (cfg->create == 0) - return (0); - if ((ret = cfg->conn->open_session( cfg->conn, NULL, cfg->sess_config, &session)) != 0) { lprintf(cfg, ret, 0, @@ -1971,13 +1979,10 @@ start_all_runs(CONFIG *cfg) if (strcmp(cfg->monitor_dir, cfg->home) == 0) next_cfg->monitor_dir = new_home; - /* Create clean home directories. */ - snprintf(cmd_buf, cmd_len, "rm -rf %s && mkdir %s", - next_cfg->home, next_cfg->home); - if ((ret = system(cmd_buf)) != 0) { - fprintf(stderr, "%s: failed\n", cmd_buf); - goto err; - } + /* If creating the sub-database, recreate it's home */ + if (cfg->create != 0) + recreate_dir(next_cfg->home); + if ((ret = pthread_create( &threads[i], NULL, thread_run_wtperf, next_cfg)) != 0) { lprintf(cfg, ret, 0, "Error creating thread"); @@ -2024,8 +2029,8 @@ start_run(CONFIG *cfg) { pthread_t monitor_thread; uint64_t total_ops; + uint32_t run_time; int monitor_created, ret, t_ret; - char helium_buf[256]; monitor_created = ret = 0; /* [-Wconditional-uninitialized] */ @@ -2040,21 +2045,10 @@ start_run(CONFIG *cfg) goto err; } - /* Configure optional Helium volume. */ - if (cfg->helium_mount != NULL) { - snprintf(helium_buf, sizeof(helium_buf), - "entry=wiredtiger_extension_init,config=[" - "%s=[helium_devices=\"he://./%s\"," - "helium_o_volume_truncate=1]]", - HELIUM_NAME, cfg->helium_mount); - if ((ret = cfg->conn->load_extension( - cfg->conn, HELIUM_PATH, helium_buf)) != 0) - lprintf(cfg, - ret, 0, "Error loading Helium: %s", helium_buf); - } - create_uris(cfg); - if ((ret = create_tables(cfg)) != 0) + + /* If creating, create the tables. */ + if (cfg->create != 0 && (ret = create_tables(cfg)) != 0) goto err; /* Start the monitor thread. */ @@ -2083,7 +2077,8 @@ start_run(CONFIG *cfg) goto err; /* Didn't create, set insert count. */ - if (cfg->create == 0 && find_table_count(cfg) != 0) + if (cfg->create == 0 && cfg->random_range == 0 && + find_table_count(cfg) != 0) goto err; /* Start the checkpoint thread. */ if (cfg->checkpoint_threads != 0) { @@ -2108,26 +2103,27 @@ start_run(CONFIG *cfg) cfg->ckpt_ops = sum_ckpt_ops(cfg); total_ops = cfg->read_ops + cfg->insert_ops + cfg->update_ops; + run_time = cfg->run_time == 0 ? 1 : cfg->run_time; lprintf(cfg, 0, 1, "Executed %" PRIu64 " read operations (%" PRIu64 "%%) %" PRIu64 " ops/sec", cfg->read_ops, (cfg->read_ops * 100) / total_ops, - cfg->read_ops / cfg->run_time); + cfg->read_ops / run_time); lprintf(cfg, 0, 1, "Executed %" PRIu64 " insert operations (%" PRIu64 "%%) %" PRIu64 " ops/sec", cfg->insert_ops, (cfg->insert_ops * 100) / total_ops, - cfg->insert_ops / cfg->run_time); + cfg->insert_ops / run_time); lprintf(cfg, 0, 1, "Executed %" PRIu64 " truncate operations (%" PRIu64 "%%) %" PRIu64 " ops/sec", cfg->truncate_ops, (cfg->truncate_ops * 100) / total_ops, - cfg->truncate_ops / cfg->run_time); + cfg->truncate_ops / run_time); lprintf(cfg, 0, 1, "Executed %" PRIu64 " update operations (%" PRIu64 "%%) %" PRIu64 " ops/sec", cfg->update_ops, (cfg->update_ops * 100) / total_ops, - cfg->update_ops / cfg->run_time); + cfg->update_ops / run_time); lprintf(cfg, 0, 1, "Executed %" PRIu64 " checkpoint operations", cfg->ckpt_ops); @@ -2182,18 +2178,21 @@ err: if (ret == 0) extern int __wt_optind, __wt_optreset; extern char *__wt_optarg; +void (*custom_die)(void) = NULL; int main(int argc, char *argv[]) { CONFIG *cfg, _cfg; size_t req_len, sreq_len; - int ch, monitor_set, ret; - const char *opts = "C:H:h:m:O:o:T:"; + bool monitor_set; + int ch, ret; + const char *opts = "C:h:m:O:o:T:"; const char *config_opts; char *cc_buf, *sess_cfg, *tc_buf, *user_cconfig, *user_tconfig; - monitor_set = ret = 0; + monitor_set = false; + ret = 0; config_opts = NULL; cc_buf = sess_cfg = tc_buf = user_cconfig = user_tconfig = NULL; @@ -2219,8 +2218,12 @@ main(int argc, char *argv[]) strcat(user_cconfig, __wt_optarg); } break; - case 'H': - cfg->helium_mount = __wt_optarg; + case 'h': + cfg->home = __wt_optarg; + break; + case 'm': + cfg->monitor_dir = __wt_optarg; + monitor_set = true; break; case 'O': config_opts = __wt_optarg; @@ -2236,15 +2239,7 @@ main(int argc, char *argv[]) strcat(user_tconfig, __wt_optarg); } break; - case 'h': - cfg->home = __wt_optarg; - break; - case 'm': - cfg->monitor_dir = __wt_optarg; - monitor_set = 1; - break; case '?': - fprintf(stderr, "Invalid option\n"); usage(); goto einval; } @@ -2300,7 +2295,7 @@ main(int argc, char *argv[]) * to 4096 if needed. */ req_len = strlen(",async=(enabled=true,threads=)") + 4; - cfg->async_config = dcalloc(req_len, 1); + cfg->async_config = dmalloc(req_len); snprintf(cfg->async_config, req_len, ",async=(enabled=true,threads=%" PRIu32 ")", cfg->async_threads); @@ -2321,13 +2316,9 @@ main(int argc, char *argv[]) } /* Build the URI from the table name. */ - req_len = strlen("table:") + - strlen(HELIUM_NAME) + strlen(cfg->table_name) + 2; - cfg->base_uri = dcalloc(req_len, 1); - snprintf(cfg->base_uri, req_len, "table:%s%s%s", - cfg->helium_mount == NULL ? "" : HELIUM_NAME, - cfg->helium_mount == NULL ? "" : "/", - cfg->table_name); + req_len = strlen("table:") + strlen(cfg->table_name) + 2; + cfg->base_uri = dmalloc(req_len); + snprintf(cfg->base_uri, req_len, "table:%s", cfg->table_name); /* Make stdout line buffered, so verbose output appears quickly. */ __wt_stream_set_line_buffer(stdout); @@ -2346,13 +2337,13 @@ main(int argc, char *argv[]) if (cfg->session_count_idle > 0) { sreq_len = strlen(",session_max=") + 6; req_len += sreq_len; - sess_cfg = dcalloc(sreq_len, 1); + sess_cfg = dmalloc(sreq_len); snprintf(sess_cfg, sreq_len, ",session_max=%" PRIu32, cfg->session_count_idle + cfg->workers_cnt + cfg->populate_threads + 10); } - cc_buf = dcalloc(req_len, 1); + cc_buf = dmalloc(req_len); /* * This is getting hard to parse. */ @@ -2368,36 +2359,34 @@ main(int argc, char *argv[]) if ((ret = config_opt_str(cfg, "conn_config", cc_buf)) != 0) goto err; } - if (cfg->verbose > 1 || cfg->index || cfg->helium_mount != NULL || + if (cfg->verbose > 1 || cfg->index || user_tconfig != NULL || cfg->compress_table != NULL) { - req_len = strlen(cfg->table_config) + strlen(HELIUM_CONFIG) + - strlen(debug_tconfig) + 3; + req_len = strlen(cfg->table_config) + strlen(debug_tconfig) + 3; if (user_tconfig != NULL) req_len += strlen(user_tconfig); if (cfg->compress_table != NULL) req_len += strlen(cfg->compress_table); if (cfg->index) req_len += strlen(INDEX_COL_NAMES); - tc_buf = dcalloc(req_len, 1); + tc_buf = dmalloc(req_len); /* * This is getting hard to parse. */ - snprintf(tc_buf, req_len, "%s%s%s%s%s%s%s%s", + snprintf(tc_buf, req_len, "%s%s%s%s%s%s%s", cfg->table_config, cfg->index ? INDEX_COL_NAMES : "", cfg->compress_table ? cfg->compress_table : "", cfg->verbose > 1 ? ",": "", cfg->verbose > 1 ? debug_tconfig : "", user_tconfig ? ",": "", - user_tconfig ? user_tconfig : "", - cfg->helium_mount == NULL ? "" : HELIUM_CONFIG); + user_tconfig ? user_tconfig : ""); if ((ret = config_opt_str(cfg, "table_config", tc_buf)) != 0) goto err; } if (cfg->log_partial && cfg->table_count > 1) { req_len = strlen(cfg->table_config) + strlen(LOG_PARTIAL_CONFIG) + 1; - cfg->partial_config = dcalloc(req_len, 1); + cfg->partial_config = dmalloc(req_len); snprintf(cfg->partial_config, req_len, "%s%s", cfg->table_config, LOG_PARTIAL_CONFIG); } @@ -2410,7 +2399,7 @@ main(int argc, char *argv[]) strlen(READONLY_CONFIG) + 1; else req_len = strlen(cfg->conn_config) + 1; - cfg->reopen_config = dcalloc(req_len, 1); + cfg->reopen_config = dmalloc(req_len); if (cfg->readonly) snprintf(cfg->reopen_config, req_len, "%s%s", cfg->conn_config, READONLY_CONFIG); @@ -2422,6 +2411,10 @@ main(int argc, char *argv[]) if ((ret = config_sanity(cfg)) != 0) goto err; + /* If creating, remove and re-create the home directory. */ + if (cfg->create != 0) + recreate_dir(cfg->home); + /* Write a copy of the config. */ config_to_file(cfg); @@ -2536,6 +2529,19 @@ stop_threads(CONFIG *cfg, u_int num, CONFIG_THREAD *threads) return (0); } +static void +recreate_dir(const char *name) +{ + char *buf; + size_t len; + + len = strlen(name) * 2 + 100; + buf = dmalloc(len); + (void)snprintf(buf, len, "rm -rf %s && mkdir %s", name, name); + testutil_checkfmt(system(buf), "system: %s", buf); + free(buf); +} + static int drop_all_tables(CONFIG *cfg) { @@ -2615,7 +2621,7 @@ wtperf_rand(CONFIG_THREAD *thread) * first item in the table being "hot". */ if (rval > wtperf_value_range(cfg)) - rval = wtperf_value_range(cfg); + rval = 0; } /* * Wrap the key to within the expected range and avoid zero: we never diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.h b/src/third_party/wiredtiger/bench/wtperf/wtperf.h index d874fa4eefe..27c3832d316 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf.h +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.h @@ -29,14 +29,11 @@ #ifndef HAVE_WTPERF_H #define HAVE_WTPERF_H -#include +#include "test_util.h" + #include #include -#ifdef _WIN32 -#include "windows_shim.h" -#endif - #include "config_opt.h" typedef struct __config CONFIG; @@ -83,7 +80,6 @@ typedef struct { typedef struct { uint64_t stone_gap; uint64_t needed_stones; - uint64_t final_stone_gap; uint64_t expected_total; uint64_t total_inserts; uint64_t last_total_inserts; @@ -126,7 +122,6 @@ struct __config { /* Configuration structure */ char *reopen_config; /* Config string for conn reopen */ char *base_uri; /* Object URI */ char **uris; /* URIs if multiple tables */ - const char *helium_mount; /* Optional Helium mount point */ WT_CONNECTION *conn; /* Database connection */ @@ -281,7 +276,7 @@ void latency_print(CONFIG *); int run_truncate( CONFIG *, CONFIG_THREAD *, WT_CURSOR *, WT_SESSION *, int *); int setup_log_file(CONFIG *); -int setup_throttle(CONFIG_THREAD*); +void setup_throttle(CONFIG_THREAD*); int setup_truncate(CONFIG *, CONFIG_THREAD *, WT_SESSION *); int start_idle_table_cycle(CONFIG *, pthread_t *); int stop_idle_table_cycle(CONFIG *, pthread_t); @@ -292,7 +287,7 @@ uint64_t sum_read_ops(CONFIG *); uint64_t sum_truncate_ops(CONFIG *); uint64_t sum_update_ops(CONFIG *); void usage(void); -int worker_throttle(CONFIG_THREAD*); +void worker_throttle(CONFIG_THREAD*); void lprintf(const CONFIG *, int err, uint32_t, const char *, ...) #if defined(__GNUC__) @@ -328,75 +323,4 @@ die(int e, const char *str) fprintf(stderr, "Call to %s failed: %s", str, wiredtiger_strerror(e)); exit(EXIT_FAILURE); } - -/* - * dmalloc -- - * Call malloc, dying on failure. - */ -static inline void * -dmalloc(size_t len) -{ - void *p; - - if ((p = malloc(len)) == NULL) - die(errno, "malloc"); - return (p); -} - -/* - * dcalloc -- - * Call calloc, dying on failure. - */ -static inline void * -dcalloc(size_t num, size_t size) -{ - void *p; - - if ((p = calloc(num, size)) == NULL) - die(errno, "calloc"); - return (p); -} - -/* - * drealloc -- - * Call realloc, dying on failure. - */ -static inline void * -drealloc(void *p, size_t len) -{ - void *repl; - - if ((repl = realloc(p, len)) == NULL) - die(errno, "realloc"); - return (repl); -} - -/* - * dstrdup -- - * Call strdup, dying on failure. - */ -static inline char * -dstrdup(const char *str) -{ - char *p; - - if ((p = strdup(str)) == NULL) - die(errno, "strdup"); - return (p); -} - -/* - * dstrndup -- - * Call emulating strndup, dying on failure. Don't use actual strndup here - * as it is not supported within MSVC. - */ -static inline char * -dstrndup(const char *str, const size_t len) -{ - char *p; - - p = dcalloc(len + 1, sizeof(char)); - memcpy(p, str, len); - return (p); -} #endif diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i index 2afd20f777f..f6c96febc85 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i @@ -144,6 +144,7 @@ DEF_OPT_AS_UINT32(random_range, 0, "if non zero choose a value from within this range as the key for " "insert operations") DEF_OPT_AS_BOOL(random_value, 0, "generate random content for the value") +DEF_OPT_AS_BOOL(range_partition, 0, "partition data by range (vs hash)") DEF_OPT_AS_UINT32(read_range, 0, "scan a range of keys after each search") DEF_OPT_AS_BOOL(readonly, 0, "reopen the connection between populate and workload phases in readonly " diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c b/src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c index a98fd9b18d7..e49bca00d07 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c @@ -31,7 +31,7 @@ /* * Put the initial config together for running a throttled workload. */ -int +void setup_throttle(CONFIG_THREAD *thread) { THROTTLE_CONFIG *throttle_cfg; @@ -70,15 +70,14 @@ setup_throttle(CONFIG_THREAD *thread) throttle_cfg->ops_count = throttle_cfg->ops_per_increment; /* Set the first timestamp of when we incremented */ - WT_RET(__wt_epoch(NULL, &throttle_cfg->last_increment)); - return (0); + testutil_check(__wt_epoch(NULL, &throttle_cfg->last_increment)); } /* * Run the throttle function. We will sleep if needed and then reload the * counter to perform more operations. */ -int +void worker_throttle(CONFIG_THREAD *thread) { THROTTLE_CONFIG *throttle_cfg; @@ -87,7 +86,7 @@ worker_throttle(CONFIG_THREAD *thread) throttle_cfg = &thread->throttle_cfg; - WT_RET(__wt_epoch(NULL, &now)); + testutil_check(__wt_epoch(NULL, &now)); /* * If we did enough operations in the current interval, sleep for @@ -102,7 +101,7 @@ worker_throttle(CONFIG_THREAD *thread) /* * After sleeping, set the interval to the current time. */ - WT_RET(__wt_epoch(NULL, &throttle_cfg->last_increment)); + testutil_check(__wt_epoch(NULL, &throttle_cfg->last_increment)); } else { throttle_cfg->ops_count = (usecs_delta * throttle_cfg->ops_per_increment) / @@ -115,6 +114,4 @@ worker_throttle(CONFIG_THREAD *thread) */ throttle_cfg->ops_count = WT_MIN(throttle_cfg->ops_count, thread->workload->throttle); - - return (0); } diff --git a/src/third_party/wiredtiger/build_posix/Make.subdirs b/src/third_party/wiredtiger/build_posix/Make.subdirs index 64749378ed1..0b5175e4196 100644 --- a/src/third_party/wiredtiger/build_posix/Make.subdirs +++ b/src/third_party/wiredtiger/build_posix/Make.subdirs @@ -18,14 +18,15 @@ ext/extractors/csv ext/test/kvs_bdb HAVE_BERKELEY_DB . api/leveldb LEVELDB -bench/wtperf examples/c lang/java JAVA examples/java JAVA lang/python PYTHON -# Make the tests +# Test/Benchmark support library. test/utility + +# Test programs. test/bloom test/checkpoint test/csuite @@ -39,3 +40,6 @@ test/readonly test/recovery test/salvage test/thread + +# Benchmark programs. +bench/wtperf diff --git a/src/third_party/wiredtiger/build_posix/aclocal/ax_pkg_swig.m4 b/src/third_party/wiredtiger/build_posix/aclocal/ax_pkg_swig.m4 index 9ebdeb531b9..89941bc3fa9 100644 --- a/src/third_party/wiredtiger/build_posix/aclocal/ax_pkg_swig.m4 +++ b/src/third_party/wiredtiger/build_posix/aclocal/ax_pkg_swig.m4 @@ -32,9 +32,9 @@ # LICENSE # # Copyright (c) 2008 Sebastian Huber -# Copyright (c) 2008 Alan W. Irwin +# Copyright (c) 2008 Alan W. Irwin # Copyright (c) 2008 Rafael Laboissiere -# Copyright (c) 2008 Andrew Collier +# Copyright (c) 2008 Andrew Collier # Copyright (c) 2011 Murray Cumming # # This program is free software; you can redistribute it and/or modify it @@ -63,11 +63,11 @@ # modified version of the Autoconf Macro, you may extend this special # exception to the GPL to apply to your modified version as well. -#serial 8 +#serial 11 AC_DEFUN([AX_PKG_SWIG],[ - # Some systems have SWIG 2.0 named "swig2.0" - AC_PATH_PROGS([SWIG],[swig2.0 swig]) + # Ubuntu has swig 2.0 as /usr/bin/swig2.0 + AC_PATH_PROGS([SWIG],[swig swig3.0 swig2.0]) if test -z "$SWIG" ; then m4_ifval([$3],[$3],[:]) elif test -n "$1" ; then diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index 90b1c8378a2..1302247e88e 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -247,8 +247,8 @@ file_config = format_meta + [ Config('memory_page_max', '5MB', r''' the maximum size a page can grow to in memory before being reconciled to disk. The specified size will be adjusted to a lower - bound of 50 * leaf_page_max, and an upper bound of - cache_size / 2. This limit is soft - it is possible + bound of leaf_page_max, and an upper bound of + cache_size / 10. This limit is soft - it is possible for pages to be temporarily larger than this value. This setting is ignored for LSM trees, see \c chunk_size''', min='512B', max='10TB'), @@ -373,8 +373,6 @@ connection_runtime_config = [ periodically checkpoint the database. Enabling the checkpoint server uses a session from the configured session_max''', type='category', subconfig=[ - Config('name', '"WiredTigerCheckpoint"', r''' - the checkpoint name'''), Config('log_size', '0', r''' wait for this amount of log record bytes to be written to the log between each checkpoint. A database can configure @@ -388,16 +386,31 @@ connection_runtime_config = [ ]), Config('error_prefix', '', r''' prefix string for error messages'''), - Config('eviction_dirty_target', '80', r''' + Config('eviction', '', r''' + eviction configuration options.''', + type='category', subconfig=[ + Config('threads_max', '1', r''' + maximum number of threads WiredTiger will start to help evict + pages from cache. The number of threads started will vary + depending on the current eviction load. Each eviction worker + thread uses a session from the configured session_max''', + min=1, max=20), + Config('threads_min', '1', r''' + minimum number of threads WiredTiger will start to help evict + pages from cache. The number of threads currently running will + vary depending on the current eviction load''', + min=1, max=20), + ]), + Config('eviction_dirty_target', '5', r''' continue evicting until the cache has less dirty memory than the value, as a percentage of the total cache size. Dirty pages will only be evicted if the cache is full enough to trigger eviction''', - min=5, max=99), - Config('eviction_dirty_trigger', '95', r''' + min=1, max=99), + Config('eviction_dirty_trigger', '20', r''' trigger eviction when the cache is using this much memory for dirty content, as a percentage of the total cache size. This setting only alters behavior if it is lower than eviction_trigger''', - min=5, max=99), + min=1, max=99), Config('eviction_target', '80', r''' continue evicting until the cache has less total memory than the value, as a percentage of the total cache size. Must be less than @@ -420,40 +433,6 @@ connection_runtime_config = [ interval in seconds at which to check for files that are inactive and close them''', min=1, max=100000), ]), - Config('log', '', r''' - enable logging. Enabling logging uses three sessions from the - configured session_max''', - type='category', subconfig=[ - Config('archive', 'true', r''' - automatically archive unneeded log files''', - type='boolean'), - Config('compressor', 'none', r''' - configure a compressor for log records. Permitted values are - \c "none" or custom compression engine name created with - WT_CONNECTION::add_compressor. If WiredTiger has builtin support - for \c "snappy", \c "lz4" or \c "zlib" compression, these names - are also available. See @ref compression for more information'''), - Config('enabled', 'false', r''' - enable logging subsystem''', - type='boolean'), - Config('file_max', '100MB', r''' - the maximum size of log files''', - min='100KB', max='2GB'), - Config('path', '"."', r''' - the path to a directory into which the log files are written. - If the value is not an absolute path name, the files are created - relative to the database home'''), - Config('prealloc', 'true', r''' - pre-allocate log files.''', - type='boolean'), - Config('recover', 'on', r''' - run recovery or error if recovery needs to run after an - unclean shutdown.''', - choices=['error','on']), - Config('zero_fill', 'false', r''' - manually write zeroes into log files''', - type='boolean'), - ]), Config('lsm_manager', '', r''' configure database wide options for LSM tree management. The LSM manager is started automatically the first time an LSM tree is opened. @@ -472,21 +451,6 @@ connection_runtime_config = [ Config('lsm_merge', 'true', r''' merge LSM chunks where possible (deprecated)''', type='boolean', undoc=True), - Config('eviction', '', r''' - eviction configuration options.''', - type='category', subconfig=[ - Config('threads_max', '1', r''' - maximum number of threads WiredTiger will start to help evict - pages from cache. The number of threads started will vary - depending on the current eviction load. Each eviction worker - thread uses a session from the configured session_max''', - min=1, max=20), - Config('threads_min', '1', r''' - minimum number of threads WiredTiger will start to help evict - pages from cache. The number of threads currently running will - vary depending on the current eviction load''', - min=1, max=20), - ]), Config('shared_cache', '', r''' shared cache configuration options. A database should configure either a cache_size or a shared_cache not both. Enabling a @@ -525,38 +489,6 @@ connection_runtime_config = [ are logged using the \c statistics_log configuration. See @ref statistics for more information''', type='list', choices=['all', 'fast', 'none', 'clear']), - Config('statistics_log', '', r''' - log any statistics the database is configured to maintain, - to a file. See @ref statistics for more information. Enabling - the statistics log server uses a session from the configured - session_max''', - type='category', subconfig=[ - Config('json', 'false', r''' - encode statistics in JSON format''', - type='boolean'), - Config('on_close', 'false', r'''log statistics on database close''', - type='boolean'), - Config('path', '"WiredTigerStat.%d.%H"', r''' - the pathname to a file into which the log records are written, - may contain ISO C standard strftime conversion specifications. - If the value is not an absolute path name, the file is created - relative to the database home'''), - Config('sources', '', r''' - if non-empty, include statistics for the list of data source - URIs, if they are open at the time of the statistics logging. - The list may include URIs matching a single data source - ("table:mytable"), or a URI matching all data sources of a - particular type ("table:")''', - type='list'), - Config('timestamp', '"%b %d %H:%M:%S"', r''' - a timestamp prepended to each log record, may contain strftime - conversion specifications, when \c json is configured, defaults - to \c "%FT%Y.000Z"'''), - Config('wait', '0', r''' - seconds to wait between each write of the log records; setting - this value above 0 configures statistics logging''', - min='0', max='100000'), - ]), Config('verbose', '', r''' enable messages for various events. Only available if WiredTiger is configured with --enable-verbose. Options are given as a @@ -590,13 +522,113 @@ connection_runtime_config = [ 'write']), ] +# wiredtiger_open and WT_CONNECTION.reconfigure log configurations. +log_configuration_common = [ + Config('archive', 'true', r''' + automatically archive unneeded log files''', + type='boolean'), + Config('prealloc', 'true', r''' + pre-allocate log files.''', + type='boolean'), + Config('zero_fill', 'false', r''' + manually write zeroes into log files''', + type='boolean') +] +connection_reconfigure_log_configuration = [ + Config('log', '', r''' + enable logging. Enabling logging uses three sessions from the + configured session_max''', + type='category', subconfig= + log_configuration_common) +] +wiredtiger_open_log_configuration = [ + Config('log', '', r''' + enable logging. Enabling logging uses three sessions from the + configured session_max''', + type='category', subconfig= + log_configuration_common + [ + Config('enabled', 'false', r''' + enable logging subsystem''', + type='boolean'), + Config('compressor', 'none', r''' + configure a compressor for log records. Permitted values are + \c "none" or custom compression engine name created with + WT_CONNECTION::add_compressor. If WiredTiger has builtin support + for \c "snappy", \c "lz4" or \c "zlib" compression, these names + are also available. See @ref compression for more information'''), + Config('file_max', '100MB', r''' + the maximum size of log files''', + min='100KB', max='2GB'), + Config('path', '"."', r''' + the name of a directory into which log files are written. The + directory must already exist. If the value is not an absolute + path, the path is relative to the database home (see @ref + absolute_path for more information)'''), + Config('recover', 'on', r''' + run recovery or error if recovery needs to run after an + unclean shutdown''', + choices=['error','on']) + ]), +] + +# wiredtiger_open and WT_CONNECTION.reconfigure statistics log configurations. +statistics_log_configuration_common = [ + Config('json', 'false', r''' + encode statistics in JSON format''', + type='boolean'), + Config('on_close', 'false', r'''log statistics on database close''', + type='boolean'), + Config('sources', '', r''' + if non-empty, include statistics for the list of data source + URIs, if they are open at the time of the statistics logging. + The list may include URIs matching a single data source + ("table:mytable"), or a URI matching all data sources of a + particular type ("table:")''', + type='list'), + Config('timestamp', '"%b %d %H:%M:%S"', r''' + a timestamp prepended to each log record, may contain strftime + conversion specifications, when \c json is configured, defaults + to \c "%FT%Y.000Z"'''), + Config('wait', '0', r''' + seconds to wait between each write of the log records; setting + this value above 0 configures statistics logging''', + min='0', max='100000'), +] +connection_reconfigure_statistics_log_configuration = [ + Config('statistics_log', '', r''' + log any statistics the database is configured to maintain, + to a file. See @ref statistics for more information. Enabling + the statistics log server uses a session from the configured + session_max''', + type='category', subconfig= + statistics_log_configuration_common) +] +wiredtiger_open_statistics_log_configuration = [ + Config('statistics_log', '', r''' + log any statistics the database is configured to maintain, + to a file. See @ref statistics for more information. Enabling + the statistics log server uses a session from the configured + session_max''', + type='category', subconfig= + statistics_log_configuration_common + [ + Config('path', '"."', r''' + the name of a directory into which statistics files are written. + The directory must already exist. If the value is not an absolute + path, the path is relative to the database home (see @ref + absolute_path for more information)''') + ]) +] + session_config = [ Config('isolation', 'read-committed', r''' the default isolation level for operations in this session''', choices=['read-uncommitted', 'read-committed', 'snapshot']), ] -wiredtiger_open_common = connection_runtime_config + [ +wiredtiger_open_common =\ + connection_runtime_config +\ + wiredtiger_open_log_configuration +\ + wiredtiger_open_statistics_log_configuration + [ Config('buffer_alignment', '-1', r''' in-memory alignment (in bytes) for buffers used for I/O. The default value of -1 indicates a platform-specific alignment value @@ -788,8 +820,9 @@ methods = { 'WT_SESSION.drop' : Method([ Config('checkpoint_wait', 'true', r''' - wait for the checkpoint lock, if \c checkpoint_wait=false, fail if - this lock is not available immediately''', + wait for the checkpoint lock, if \c checkpoint_wait=false, perform + the drop operation without taking a lock, returning EBUSY if the + operation conflicts with a running checkpoint''', type='boolean', undoc=True), Config('force', 'false', r''' return success if the object does not exist''', @@ -870,6 +903,11 @@ methods = { "WiredTigerCheckpoint" opens the most recent internal checkpoint taken for the object). The cursor does not support data modification'''), + Config('checkpoint_wait', 'true', r''' + wait for the checkpoint lock, if \c checkpoint_wait=false, open the + cursor without taking a lock, returning EBUSY if the operation + conflicts with a running checkpoint''', + type='boolean', undoc=True), Config('dump', '', r''' configure the cursor for dump format inputs and outputs: "hex" selects a simple hexadecimal format, "json" selects a JSON format @@ -1084,7 +1122,11 @@ methods = { don't free memory during close''', type='boolean'), ]), -'WT_CONNECTION.reconfigure' : Method(connection_runtime_config), +'WT_CONNECTION.reconfigure' : Method( + connection_reconfigure_log_configuration +\ + connection_reconfigure_statistics_log_configuration +\ + connection_runtime_config +), 'WT_CONNECTION.set_file_system' : Method([]), 'WT_CONNECTION.load_extension' : Method([ diff --git a/src/third_party/wiredtiger/dist/flags.py b/src/third_party/wiredtiger/dist/flags.py index b5f36fb707a..8091283a8c0 100644 --- a/src/third_party/wiredtiger/dist/flags.py +++ b/src/third_party/wiredtiger/dist/flags.py @@ -37,10 +37,13 @@ flags = { 'READ_WONT_NEED', ], 'rec_write' : [ + 'CHECKPOINTING', + 'EVICTING', 'EVICT_IN_MEMORY', + 'EVICT_INMEM_SPLIT', 'EVICT_LOOKASIDE', + 'EVICT_SCRUB', 'EVICT_UPDATE_RESTORE', - 'EVICTING', 'VISIBILITY_ERR', ], 'txn_log_checkpoint' : [ diff --git a/src/third_party/wiredtiger/dist/s_all b/src/third_party/wiredtiger/dist/s_all index 46a68864906..33b8f6a76ba 100755 --- a/src/third_party/wiredtiger/dist/s_all +++ b/src/third_party/wiredtiger/dist/s_all @@ -15,6 +15,8 @@ echo 'dist/s_all run started...' force= reconf=0 +errmode=0 +errfound=0 while : do case "$1" in -A) # Reconfigure the library build. @@ -23,6 +25,9 @@ while : -f) # Force versions to be updated force="-f" shift;; + -E) # Return an error code on failure + errmode=1 + shift;; *) break;; esac @@ -48,6 +53,14 @@ errchk() echo "#######################" rm -f $2 + + # Some tests shouldn't return an error, we exclude them here. + case "$1" in + *s_export*) + break;; + *) + errfound=1;; + esac } run() @@ -108,3 +121,6 @@ for f in `find . -name ${t_pfx}\*`; do done echo 'dist/s_all run finished' +if test $errmode -ne 0; then + exit $errfound; +fi diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index 7966ff2cf2e..8c5f1e99bff 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -86,6 +86,7 @@ DbEnv Decrement Decrypt DeleteFileA +EACCES EAGAIN EB EBUSY @@ -117,6 +118,7 @@ FLv FNV FORALL FOREACH +FS FULLFSYNC FindClose FindFirstFile @@ -204,6 +206,7 @@ MERCHANTABILITY METADATA MONGODB MSVC +MULTI MULTIBLOCK MUTEX Manos @@ -326,6 +329,7 @@ UID UIDs UINT ULINE +UNC URI URIs UTF @@ -528,6 +532,7 @@ cust customp cv cxa +dT data's database's datalen @@ -557,6 +562,7 @@ dequeued der dereference desc +designator dest destSize dev @@ -932,6 +938,7 @@ prepend prepended prepending presize +presync primary's printf printlog @@ -1065,6 +1072,7 @@ tV tablename tcbench td +tempdir testutil th tid @@ -1091,6 +1099,7 @@ txn txnc txnid txnmin +txt typedef uB uS diff --git a/src/third_party/wiredtiger/dist/s_style b/src/third_party/wiredtiger/dist/s_style index a222c004cc3..e33db5a5fab 100755 --- a/src/third_party/wiredtiger/dist/s_style +++ b/src/third_party/wiredtiger/dist/s_style @@ -33,7 +33,7 @@ else exit 1; fi - egrep -w 'a a|an an|and and|are are|be be|by by|for for|from from|if if|in in|is is|it it|of of|the the|this this|to to|was was|were were|when when|with with|a an|an a|a the|the a' $f > $t + egrep -w 'a a|an an|and and|are are|be be|by by|for for|from from|if if|in in[^-]|is is|it it|of of|the the|this this|to to|was was|were were|when when|with with|a an|an a|a the|the a' $f > $t test -s $t && { echo "paired typo" echo "============================" diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index 694ffc86ee4..51cc487f04c 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -81,10 +81,10 @@ class SessionStat(Stat): prefix = 'session' def __init__(self, name, desc, flags=''): Stat.__init__(self, name, SessionStat.prefix, desc, flags) -class ThreadState(Stat): +class ThreadStat(Stat): prefix = 'thread-state' def __init__(self, name, desc, flags=''): - Stat.__init__(self, name, ThreadState.prefix, desc, flags) + Stat.__init__(self, name, ThreadStat.prefix, desc, flags) class TxnStat(Stat): prefix = 'transaction' def __init__(self, name, desc, flags=''): @@ -105,7 +105,7 @@ groups['evict'] = [ BlockStat.prefix, CacheStat.prefix, ConnStat.prefix, - ThreadState.prefix + ThreadStat.prefix ] groups['lsm'] = [LSMStat.prefix, TxnStat.prefix] groups['memory'] = [CacheStat.prefix, ConnStat.prefix, RecStat.prefix] @@ -113,7 +113,7 @@ groups['system'] = [ ConnStat.prefix, DhandleStat.prefix, SessionStat.prefix, - ThreadState.prefix + ThreadStat.prefix ] ########################################## @@ -159,6 +159,7 @@ connection_stats = [ BlockStat('block_byte_map_read', 'mapped bytes read', 'size'), BlockStat('block_byte_read', 'bytes read', 'size'), BlockStat('block_byte_write', 'bytes written', 'size'), + BlockStat('block_byte_write_checkpoint', 'bytes written for checkpoint', 'size'), BlockStat('block_map_read', 'mapped blocks read'), BlockStat('block_preload', 'blocks pre-loaded'), BlockStat('block_read', 'blocks read'), @@ -168,11 +169,12 @@ connection_stats = [ # Cache and eviction statistics ########################################## CacheStat('cache_bytes_dirty', 'tracked dirty bytes in the cache', 'no_clear,no_scale,size'), + CacheStat('cache_bytes_image', 'bytes belonging to page images in the cache', 'no_clear,no_scale,size'), CacheStat('cache_bytes_internal', 'tracked bytes belonging to internal pages in the cache', 'no_clear,no_scale,size'), CacheStat('cache_bytes_inuse', 'bytes currently in the cache', 'no_clear,no_scale,size'), CacheStat('cache_bytes_leaf', 'tracked bytes belonging to leaf pages in the cache', 'no_clear,no_scale,size'), CacheStat('cache_bytes_max', 'maximum bytes configured', 'no_clear,no_scale,size'), - CacheStat('cache_bytes_overflow', 'tracked bytes belonging to overflow pages in the cache', 'no_clear,no_scale,size'), + CacheStat('cache_bytes_other', 'bytes not belonging to page images in the cache', 'no_clear,no_scale,size'), CacheStat('cache_bytes_read', 'bytes read into cache', 'size'), CacheStat('cache_bytes_write', 'bytes written from cache', 'size'), CacheStat('cache_eviction_aggressive_set', 'eviction currently operating in aggressive mode', 'no_clear,no_scale'), @@ -193,7 +195,8 @@ connection_stats = [ CacheStat('cache_eviction_internal', 'internal pages evicted'), CacheStat('cache_eviction_maximum_page_size', 'maximum page size at eviction', 'no_clear,no_scale,size'), CacheStat('cache_eviction_pages_queued', 'pages queued for eviction'), - CacheStat('cache_eviction_pages_queued_oldest', 'pages queued for urgent eviction'), + CacheStat('cache_eviction_pages_queued_oldest', 'pages queued for urgent eviction during walk'), + CacheStat('cache_eviction_pages_queued_urgent', 'pages queued for urgent eviction'), CacheStat('cache_eviction_pages_seen', 'pages seen by eviction walk'), CacheStat('cache_eviction_queue_empty', 'eviction server candidate queue empty when topping up'), CacheStat('cache_eviction_queue_not_empty', 'eviction server candidate queue not empty when topping up'), @@ -215,12 +218,14 @@ connection_stats = [ CacheStat('cache_inmem_splittable', 'in-memory page passed criteria to be split'), CacheStat('cache_lookaside_insert', 'lookaside table insert calls'), CacheStat('cache_lookaside_remove', 'lookaside table remove calls'), + CacheStat('cache_overflow_value', 'overflow values cached in memory', 'no_scale'), CacheStat('cache_overhead', 'percentage overhead', 'no_clear,no_scale'), CacheStat('cache_pages_dirty', 'tracked dirty pages in the cache', 'no_clear,no_scale'), CacheStat('cache_pages_inuse', 'pages currently held in the cache', 'no_clear,no_scale'), CacheStat('cache_pages_requested', 'pages requested from the cache'), CacheStat('cache_read', 'pages read into cache'), CacheStat('cache_read_lookaside', 'pages read into cache requiring lookaside entries'), + CacheStat('cache_read_overflow', 'overflow pages read into cache'), CacheStat('cache_write', 'pages written from cache'), CacheStat('cache_write_lookaside', 'page written requiring lookaside records'), CacheStat('cache_write_restore', 'pages written requiring in-memory restoration'), @@ -294,11 +299,11 @@ connection_stats = [ TxnStat('txn_begin', 'transaction begins'), TxnStat('txn_checkpoint', 'transaction checkpoints'), TxnStat('txn_checkpoint_fsync_post', 'transaction fsync calls for checkpoint after allocating the transaction ID'), - TxnStat('txn_checkpoint_fsync_post_duration', 'transaction fsync duration for checkpoint after allocating the transaction ID (usecs)'), - TxnStat('txn_checkpoint_fsync_pre', 'transaction fsync calls for checkpoint before allocating the transaction ID'), - TxnStat('txn_checkpoint_fsync_pre_duration', 'transaction fsync duration for checkpoint before allocating the transaction ID (usecs)'), + TxnStat('txn_checkpoint_fsync_post_duration', 'transaction fsync duration for checkpoint after allocating the transaction ID (usecs)', 'no_clear,no_scale'), TxnStat('txn_checkpoint_generation', 'transaction checkpoint generation', 'no_clear,no_scale'), TxnStat('txn_checkpoint_running', 'transaction checkpoint currently running', 'no_clear,no_scale'), + TxnStat('txn_checkpoint_scrub_target', 'transaction checkpoint scrub dirty target', 'no_clear,no_scale'), + TxnStat('txn_checkpoint_scrub_time', 'transaction checkpoint scrub time (msecs)', 'no_clear,no_scale'), TxnStat('txn_checkpoint_time_max', 'transaction checkpoint max time (msecs)', 'no_clear,no_scale'), TxnStat('txn_checkpoint_time_min', 'transaction checkpoint min time (msecs)', 'no_clear,no_scale'), TxnStat('txn_checkpoint_time_recent', 'transaction checkpoint most recent time (msecs)', 'no_clear,no_scale'), @@ -332,6 +337,22 @@ connection_stats = [ ########################################## SessionStat('session_cursor_open', 'open cursor count', 'no_clear,no_scale'), SessionStat('session_open', 'open session count', 'no_clear,no_scale'), + SessionStat('session_table_compact_fail', 'table compact failed calls', 'no_clear,no_scale'), + SessionStat('session_table_compact_success', 'table compact successful calls', 'no_clear,no_scale'), + SessionStat('session_table_create_fail', 'table create failed calls', 'no_clear,no_scale'), + SessionStat('session_table_create_success', 'table create successful calls', 'no_clear,no_scale'), + SessionStat('session_table_drop_fail', 'table drop failed calls', 'no_clear,no_scale'), + SessionStat('session_table_drop_success', 'table drop successful calls', 'no_clear,no_scale'), + SessionStat('session_table_rebalance_fail', 'table rebalance failed calls', 'no_clear,no_scale'), + SessionStat('session_table_rebalance_success', 'table rebalance successful calls', 'no_clear,no_scale'), + SessionStat('session_table_rename_fail', 'table rename failed calls', 'no_clear,no_scale'), + SessionStat('session_table_rename_success', 'table rename successful calls', 'no_clear,no_scale'), + SessionStat('session_table_salvage_fail', 'table salvage failed calls', 'no_clear,no_scale'), + SessionStat('session_table_salvage_success', 'table salvage successful calls', 'no_clear,no_scale'), + SessionStat('session_table_truncate_fail', 'table truncate failed calls', 'no_clear,no_scale'), + SessionStat('session_table_truncate_success', 'table truncate successful calls', 'no_clear,no_scale'), + SessionStat('session_table_verify_fail', 'table verify failed calls', 'no_clear,no_scale'), + SessionStat('session_table_verify_success', 'table verify successful calls', 'no_clear,no_scale'), ########################################## # Total cursor operations @@ -349,11 +370,11 @@ connection_stats = [ CursorStat('cursor_update', 'cursor update calls'), ########################################## - # Thread State statistics + # Thread Count statistics ########################################## - ThreadState('fsync_active', 'active filesystem fsync calls','no_clear,no_scale'), - ThreadState('read_active', 'active filesystem read calls','no_clear,no_scale'), - ThreadState('write_active', 'active filesystem write calls','no_clear,no_scale'), + ThreadStat('thread_fsync_active', 'active filesystem fsync calls','no_clear,no_scale'), + ThreadStat('thread_read_active', 'active filesystem read calls','no_clear,no_scale'), + ThreadStat('thread_write_active', 'active filesystem write calls','no_clear,no_scale'), ########################################## # Yield statistics @@ -451,6 +472,7 @@ dsrc_stats = [ ########################################## # Cache and eviction statistics ########################################## + CacheStat('cache_bytes_inuse', 'bytes currently in the cache', 'no_clear,no_scale,size'), CacheStat('cache_bytes_read', 'bytes read into cache', 'size'), CacheStat('cache_bytes_write', 'bytes written from cache', 'size'), CacheStat('cache_eviction_checkpoint', 'checkpoint blocked page eviction'), diff --git a/src/third_party/wiredtiger/examples/c/ex_all.c b/src/third_party/wiredtiger/examples/c/ex_all.c index dd807922c10..e8727df3f60 100644 --- a/src/third_party/wiredtiger/examples/c/ex_all.c +++ b/src/third_party/wiredtiger/examples/c/ex_all.c @@ -1160,34 +1160,27 @@ main(void) if (ret == 0) (void)conn->close(conn, NULL); +#ifdef MIGHT_NOT_RUN + /* + * Don't run this code, statistics logging doesn't yet support tables. + */ /*! [Statistics logging with a table] */ ret = wiredtiger_open(home, NULL, "create, statistics_log=(" - "sources=(\"lsm:table1\",\"lsm:table2\"), wait=5)", + "sources=(\"table:table1\",\"table:table2\"), wait=5)", &conn); /*! [Statistics logging with a table] */ if (ret == 0) (void)conn->close(conn, NULL); - /*! [Statistics logging with all tables] */ - ret = wiredtiger_open(home, NULL, - "create, statistics_log=(sources=(\"lsm:\"), wait=5)", - &conn); - /*! [Statistics logging with all tables] */ - if (ret == 0) - (void)conn->close(conn, NULL); - -#ifdef MIGHT_NOT_RUN /* - * This example code gets run, and a non-existent log file path might - * cause the open to fail. The documentation requires code snippets, - * use #ifdef's to avoid running it. + * Don't run this code, statistics logging doesn't yet support indexes. */ - /*! [Statistics logging with path] */ + /*! [Statistics logging with a source type] */ ret = wiredtiger_open(home, NULL, - "create," - "statistics_log=(wait=120,path=/log/log.%m.%d.%y)", &conn); - /*! [Statistics logging with path] */ + "create, statistics_log=(sources=(\"index:\"), wait=5)", + &conn); + /*! [Statistics logging with a source type] */ if (ret == 0) (void)conn->close(conn, NULL); diff --git a/src/third_party/wiredtiger/examples/c/ex_file_system.c b/src/third_party/wiredtiger/examples/c/ex_file_system.c index 77e8f40480b..55ee20e9331 100644 --- a/src/third_party/wiredtiger/examples/c/ex_file_system.c +++ b/src/third_party/wiredtiger/examples/c/ex_file_system.c @@ -118,18 +118,17 @@ int demo_file_system_create(WT_CONNECTION *, WT_CONFIG_ARG *); /* * Forward function declarations for file system API implementation */ -static int demo_fs_open(WT_FILE_SYSTEM *, - WT_SESSION *, const char *, WT_OPEN_FILE_TYPE, uint32_t, WT_FILE_HANDLE **); +static int demo_fs_open(WT_FILE_SYSTEM *, WT_SESSION *, + const char *, WT_FS_OPEN_FILE_TYPE, uint32_t, WT_FILE_HANDLE **); static int demo_fs_directory_list(WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, char ***, uint32_t *); static int demo_fs_directory_list_free( WT_FILE_SYSTEM *, WT_SESSION *, char **, uint32_t); -static int demo_fs_directory_sync(WT_FILE_SYSTEM *file_system, - WT_SESSION *session, const char *directory); static int demo_fs_exist(WT_FILE_SYSTEM *, WT_SESSION *, const char *, bool *); -static int demo_fs_remove(WT_FILE_SYSTEM *, WT_SESSION *, const char *); +static int demo_fs_remove( + WT_FILE_SYSTEM *, WT_SESSION *, const char *, uint32_t); static int demo_fs_rename( - WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *); + WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, uint32_t); static int demo_fs_size( WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *); static int demo_fs_terminate(WT_FILE_SYSTEM *, WT_SESSION *); @@ -255,7 +254,6 @@ demo_file_system_create(WT_CONNECTION *conn, WT_CONFIG_ARG *config) /* Initialize the in-memory jump table. */ file_system->fs_directory_list = demo_fs_directory_list; file_system->fs_directory_list_free = demo_fs_directory_list_free; - file_system->fs_directory_sync = demo_fs_directory_sync; file_system->fs_exist = demo_fs_exist; file_system->fs_open_file = demo_fs_open; file_system->fs_remove = demo_fs_remove; @@ -282,7 +280,7 @@ err: free(demo_fs); */ static int demo_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session, - const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags, + const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, WT_FILE_HANDLE **file_handlep) { DEMO_FILE_HANDLE *demo_fh; @@ -468,21 +466,6 @@ demo_fs_directory_list_free(WT_FILE_SYSTEM *file_system, return (0); } -/* - * demo_fs_directory_sync -- - * Directory sync for our demo file system, which is a no-op. - */ -static int -demo_fs_directory_sync(WT_FILE_SYSTEM *file_system, - WT_SESSION *session, const char *directory) -{ - (void)file_system; /* Unused */ - (void)session; /* Unused */ - (void)directory; /* Unused */ - - return (0); -} - /* * demo_fs_exist -- * Return if the file exists. @@ -507,13 +490,15 @@ demo_fs_exist(WT_FILE_SYSTEM *file_system, * POSIX remove. */ static int -demo_fs_remove( - WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name) +demo_fs_remove(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *name, uint32_t flags) { DEMO_FILE_SYSTEM *demo_fs; DEMO_FILE_HANDLE *demo_fh; int ret = 0; + (void)flags; /* Unused */ + demo_fs = (DEMO_FILE_SYSTEM *)file_system; ret = ENOENT; @@ -531,13 +516,15 @@ demo_fs_remove( */ static int demo_fs_rename(WT_FILE_SYSTEM *file_system, - WT_SESSION *session, const char *from, const char *to) + WT_SESSION *session, const char *from, const char *to, uint32_t flags) { DEMO_FILE_HANDLE *demo_fh; DEMO_FILE_SYSTEM *demo_fs; char *copy; int ret = 0; + (void)flags; /* Unused */ + demo_fs = (DEMO_FILE_SYSTEM *)file_system; LOCK_FILE_SYSTEM(session, demo_fs); diff --git a/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_all.java b/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_all.java index 48e85c9fade..83a37e9a6a5 100644 --- a/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_all.java +++ b/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_all.java @@ -988,6 +988,10 @@ allExample() /*! [Statistics logging] */ conn.close(null); + if (false) { // MIGHT_NOT_RUN + /* + * Don't run this code, statistics logging doesn't yet support tables. + */ /*! [Statistics logging with a table] */ conn = wiredtiger.open(home, "create," + @@ -995,23 +999,13 @@ allExample() /*! [Statistics logging with a table] */ conn.close(null); - /*! [Statistics logging with all tables] */ - conn = wiredtiger.open(home, - "create,statistics_log=(sources=(\"table:\"))"); - /*! [Statistics logging with all tables] */ - conn.close(null); - - if (false) { // MIGHT_NOT_RUN /* - * This example code gets run, and a non-existent log file path might - * cause the open to fail. The documentation requires code snippets, - * use if (false) to avoid running it. + * Don't run this code, statistics logging doesn't yet support indexes. */ - /*! [Statistics logging with path] */ + /*! [Statistics logging with a source type] */ conn = wiredtiger.open(home, - "create," + - "statistics_log=(wait=120,path=/log/log.%m.%d.%y)"); - /*! [Statistics logging with path] */ + "create,statistics_log=(sources=(\"index:\"))"); + /*! [Statistics logging with a source type] */ conn.close(null); /* diff --git a/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c b/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c index 9aede2ed907..484df0a6785 100644 --- a/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c +++ b/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c @@ -92,7 +92,7 @@ zalloc(void *cookie, uint32_t number, uint32_t size) opaque = cookie; wt_api = ((ZLIB_COMPRESSOR *)opaque->compressor)->wt_api; return (wt_api->scr_alloc( - wt_api, opaque->session, (size_t)(number * size))); + wt_api, opaque->session, (size_t)number * size)); } /* diff --git a/src/third_party/wiredtiger/src/async/async_api.c b/src/third_party/wiredtiger/src/async/async_api.c index fea8714176b..d53a6c65c1d 100644 --- a/src/third_party/wiredtiger/src/async/async_api.c +++ b/src/third_party/wiredtiger/src/async/async_api.c @@ -490,12 +490,24 @@ __wt_async_flush(WT_SESSION_IMPL *session) WT_ASYNC *async; WT_CONNECTION_IMPL *conn; WT_DECL_RET; + uint32_t i, workers; conn = S2C(session); if (!conn->async_cfg) return (0); async = conn->async; + /* + * Only add a flush operation if there are workers who can process + * it. Otherwise we will wait forever. + */ + workers = 0; + for (i = 0; i < conn->async_workers; ++i) + if (async->worker_tids[i] != 0) + ++workers; + if (workers == 0) + return (0); + WT_STAT_FAST_CONN_INCR(session, async_flush); /* * We have to do several things. First we have to prevent diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c index b9f0ec25d53..3584efc7671 100644 --- a/src/third_party/wiredtiger/src/block/block_ckpt.c +++ b/src/third_party/wiredtiger/src/block/block_ckpt.c @@ -252,7 +252,7 @@ __wt_block_checkpoint(WT_SESSION_IMPL *session, } else WT_ERR(__wt_block_write_off(session, block, buf, &ci->root_offset, &ci->root_size, &ci->root_cksum, - data_cksum, false)); + data_cksum, true, false)); /* * Checkpoints are potentially reading/writing/merging lots of blocks, diff --git a/src/third_party/wiredtiger/src/block/block_ext.c b/src/third_party/wiredtiger/src/block/block_ext.c index 0d3e7b54f17..bad4d8d7990 100644 --- a/src/third_party/wiredtiger/src/block/block_ext.c +++ b/src/third_party/wiredtiger/src/block/block_ext.c @@ -1245,8 +1245,7 @@ __wt_block_extlist_write(WT_SESSION_IMPL *session, WT_DECL_RET; WT_EXT *ext; WT_PAGE_HEADER *dsk; - size_t size; - uint32_t entries; + size_t entries, size; uint8_t *p; WT_RET(__block_extlist_dump(session, block, el, "write")); @@ -1311,7 +1310,7 @@ __wt_block_extlist_write(WT_SESSION_IMPL *session, /* Write the extent list to disk. */ WT_ERR(__wt_block_write_off(session, - block, tmp, &el->offset, &el->size, &el->cksum, true, true)); + block, tmp, &el->offset, &el->size, &el->cksum, true, true, true)); /* * Remove the allocated blocks from the system's allocation list, extent @@ -1450,7 +1449,7 @@ __block_extlist_dump( tag, el->name, el->entries, __wt_buf_set_size(session, el->bytes, true, t1))); - if (ret != 0 || el->entries == 0) + if (el->entries == 0) goto done; memset(sizes, 0, sizeof(sizes)); diff --git a/src/third_party/wiredtiger/src/block/block_mgr.c b/src/third_party/wiredtiger/src/block/block_mgr.c index 971fe713f83..eff25f34304 100644 --- a/src/third_party/wiredtiger/src/block/block_mgr.c +++ b/src/third_party/wiredtiger/src/block/block_mgr.c @@ -479,11 +479,11 @@ __bm_verify_start(WT_BM *bm, * Write a buffer into a block, returning the block's address cookie. */ static int -__bm_write(WT_BM *bm, WT_SESSION_IMPL *session, - WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum) +__bm_write(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, + uint8_t *addr, size_t *addr_sizep, bool data_cksum, bool checkpoint_io) { - return (__wt_block_write( - session, bm->block, buf, addr, addr_sizep, data_cksum)); + return (__wt_block_write(session, + bm->block, buf, addr, addr_sizep, data_cksum, checkpoint_io)); } /* @@ -492,13 +492,14 @@ __bm_write(WT_BM *bm, WT_SESSION_IMPL *session, * readonly version. */ static int -__bm_write_readonly(WT_BM *bm, WT_SESSION_IMPL *session, - WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum) +__bm_write_readonly(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, + uint8_t *addr, size_t *addr_sizep, bool data_cksum, bool checkpoint_io) { WT_UNUSED(buf); WT_UNUSED(addr); WT_UNUSED(addr_sizep); WT_UNUSED(data_cksum); + WT_UNUSED(checkpoint_io); return (__bm_readonly(bm, session)); } diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c index 1603b1574e7..7cff7eab629 100644 --- a/src/third_party/wiredtiger/src/block/block_open.c +++ b/src/third_party/wiredtiger/src/block/block_open.c @@ -15,9 +15,10 @@ static int __desc_read(WT_SESSION_IMPL *, WT_BLOCK *); * Drop a file. */ int -__wt_block_manager_drop(WT_SESSION_IMPL *session, const char *filename) +__wt_block_manager_drop( + WT_SESSION_IMPL *session, const char *filename, bool durable) { - return (__wt_remove_if_exists(session, filename)); + return (__wt_remove_if_exists(session, filename, durable)); } /* @@ -43,8 +44,9 @@ __wt_block_manager_create( * in our space. Move any existing files out of the way and complain. */ for (;;) { - if ((ret = __wt_open(session, filename, WT_OPEN_FILE_TYPE_DATA, - WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &fh)) == 0) + if ((ret = __wt_open(session, filename, + WT_FS_OPEN_FILE_TYPE_DATA, WT_FS_OPEN_CREATE | + WT_FS_OPEN_DURABLE | WT_FS_OPEN_EXCLUSIVE, &fh)) == 0) break; WT_ERR_TEST(ret != EEXIST, ret); @@ -56,7 +58,7 @@ __wt_block_manager_create( WT_ERR(__wt_fs_exist(session, tmp->data, &exists)); if (!exists) { WT_ERR(__wt_fs_rename( - session, filename, tmp->data)); + session, filename, tmp->data, false)); WT_ERR(__wt_msg(session, "unexpected file %s found, renamed to %s", filename, (const char *)tmp->data)); @@ -77,16 +79,9 @@ __wt_block_manager_create( /* Close the file handle. */ WT_TRET(__wt_close(session, &fh)); - /* - * Some filesystems require that we sync the directory to be confident - * that the file will appear. - */ - if (ret == 0) - WT_TRET(__wt_fs_directory_sync(session, filename)); - /* Undo any create on error. */ if (ret != 0) - WT_TRET(__wt_fs_remove(session, filename)); + WT_TRET(__wt_fs_remove(session, filename, false)); err: __wt_scr_free(session, &tmp); @@ -207,11 +202,11 @@ __wt_block_open(WT_SESSION_IMPL *session, */ flags = 0; if (readonly && FLD_ISSET(conn->direct_io, WT_DIRECT_IO_CHECKPOINT)) - LF_SET(WT_OPEN_DIRECTIO); + LF_SET(WT_FS_OPEN_DIRECTIO); if (!readonly && FLD_ISSET(conn->direct_io, WT_DIRECT_IO_DATA)) - LF_SET(WT_OPEN_DIRECTIO); + LF_SET(WT_FS_OPEN_DIRECTIO); WT_ERR(__wt_open( - session, filename, WT_OPEN_FILE_TYPE_DATA, flags, &block->fh)); + session, filename, WT_FS_OPEN_FILE_TYPE_DATA, flags, &block->fh)); /* Set the file's size. */ WT_ERR(__wt_filesize(session, block->fh, &block->size)); diff --git a/src/third_party/wiredtiger/src/block/block_session.c b/src/third_party/wiredtiger/src/block/block_session.c index 268adb530cf..6223751effa 100644 --- a/src/third_party/wiredtiger/src/block/block_session.c +++ b/src/third_party/wiredtiger/src/block/block_session.c @@ -28,7 +28,7 @@ __block_ext_alloc(WT_SESSION_IMPL *session, WT_EXT **extp) { WT_EXT *ext; - u_int skipdepth; + size_t skipdepth; skipdepth = __wt_skip_choose_depth(session); WT_RET(__wt_calloc(session, 1, diff --git a/src/third_party/wiredtiger/src/block/block_write.c b/src/third_party/wiredtiger/src/block/block_write.c index 1fefeee09da..30d06e6259a 100644 --- a/src/third_party/wiredtiger/src/block/block_write.c +++ b/src/third_party/wiredtiger/src/block/block_write.c @@ -210,15 +210,15 @@ __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep) * Write a buffer into a block, returning the block's address cookie. */ int -__wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, - WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum) +__wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, + uint8_t *addr, size_t *addr_sizep, bool data_cksum, bool checkpoint_io) { wt_off_t offset; uint32_t size, cksum; uint8_t *endp; - WT_RET(__wt_block_write_off( - session, block, buf, &offset, &size, &cksum, data_cksum, false)); + WT_RET(__wt_block_write_off(session, block, + buf, &offset, &size, &cksum, data_cksum, checkpoint_io, false)); endp = addr; WT_RET(__wt_block_addr_to_buffer(block, &endp, offset, size, cksum)); @@ -228,14 +228,14 @@ __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, } /* - * __wt_block_write_off -- + * __block_write_off -- * Write a buffer into a block, returning the block's offset, size and * checksum. */ -int -__wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, +static int +__block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump, - bool data_cksum, bool caller_locked) + bool data_cksum, bool checkpoint_io, bool caller_locked) { WT_BLOCK_HEADER *blk; WT_DECL_RET; @@ -254,12 +254,6 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, blk = WT_BLOCK_HEADER_REF(buf->mem); memset(blk, 0, sizeof(*blk)); - /* - * Swap the page-header as needed; this doesn't belong here, but it's - * the best place to catch all callers. - */ - __wt_page_header_byteswap(buf->mem); - /* Buffers should be aligned for writing. */ if (!F_ISSET(buf, WT_ITEM_ALIGNED)) { WT_ASSERT(session, F_ISSET(buf, WT_ITEM_ALIGNED)); @@ -380,6 +374,9 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_STAT_FAST_CONN_INCR(session, block_write); WT_STAT_FAST_CONN_INCRV(session, block_byte_write, align_size); + if (checkpoint_io) + WT_STAT_FAST_CONN_INCRV( + session, block_byte_write_checkpoint, align_size); WT_RET(__wt_verbose(session, WT_VERB_WRITE, "off %" PRIuMAX ", size %" PRIuMAX ", cksum %" PRIu32, @@ -391,3 +388,28 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, return (0); } + +/* + * __wt_block_write_off -- + * Write a buffer into a block, returning the block's offset, size and + * checksum. + */ +int +__wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, + WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump, + bool data_cksum, bool checkpoint_io, bool caller_locked) +{ + WT_DECL_RET; + + /* + * Ensure the page header is in little endian order; this doesn't belong + * here, but it's the best place to catch all callers. After the write, + * swap values back to native order so callers never see anything other + * than their original content. + */ + __wt_page_header_byteswap(buf->mem); + ret = __block_write_off(session, block, buf, + offsetp, sizep, cksump, data_cksum, checkpoint_io, caller_locked); + __wt_page_header_byteswap(buf->mem); + return (ret); +} diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c index 70b3ba56e31..e1b097c22a5 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curnext.c +++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c @@ -183,6 +183,7 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage) if (cbt->last_standard_recno == 0) return (WT_NOTFOUND); __cursor_set_recno(cbt, cbt->ref->ref_recno); + cbt->cip_saved = NULL; goto new_page; } @@ -301,12 +302,13 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage) * WT_INSERT_HEAD[0], and so on. This means WT_INSERT lists are * odd-numbered slots, and WT_ROW array slots are even-numbered slots. * - * New page configuration. + * Initialize for each new page. */ if (newpage) { cbt->ins_head = WT_ROW_INSERT_SMALLEST(page); cbt->ins = WT_SKIP_FIRST(cbt->ins_head); cbt->row_iteration_slot = 1; + cbt->rip_saved = NULL; goto new_insert; } @@ -517,11 +519,13 @@ __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt) */ F_SET(cbt, WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV); - /* - * Clear the count of deleted items on the page. - */ + /* Clear the count of deleted items on the page. */ cbt->page_deleted_count = 0; + /* Clear saved iteration cursor position information. */ + cbt->cip_saved = NULL; + cbt->rip_saved = NULL; + /* * If we don't have a search page, then we're done, we're starting at * the beginning or end of the tree, not as a result of a search. @@ -661,7 +665,7 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) if (page != NULL && (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD || (newpage && cbt->page_deleted_count > 0))) - __wt_page_evict_soon(page); + WT_ERR(__wt_page_evict_soon(session, cbt->ref)); cbt->page_deleted_count = 0; WT_ERR(__wt_tree_walk(session, &cbt->ref, flags)); diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c index 872f648446c..e39dffa357f 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curprev.c +++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c @@ -329,6 +329,7 @@ __cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage) if (cbt->last_standard_recno == 0) return (WT_NOTFOUND); __cursor_set_recno(cbt, cbt->last_standard_recno); + cbt->cip_saved = NULL; goto new_page; } @@ -447,7 +448,7 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage) * WT_INSERT_HEAD[0], and so on. This means WT_INSERT lists are * odd-numbered slots, and WT_ROW array slots are even-numbered slots. * - * New page configuration. + * Initialize for each new page. */ if (newpage) { /* @@ -464,6 +465,7 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage) WT_ROW_INSERT_SLOT(page, page->pg_row_entries - 1); cbt->ins = WT_SKIP_LAST(cbt->ins_head); cbt->row_iteration_slot = page->pg_row_entries * 2 + 1; + cbt->rip_saved = NULL; goto new_insert; } @@ -619,7 +621,7 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) if (page != NULL && (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD || (newpage && cbt->page_deleted_count > 0))) - __wt_page_evict_soon(page); + WT_ERR(__wt_page_evict_soon(session, cbt->ref)); cbt->page_deleted_count = 0; WT_ERR(__wt_tree_walk(session, &cbt->ref, flags)); diff --git a/src/third_party/wiredtiger/src/btree/bt_discard.c b/src/third_party/wiredtiger/src/btree/bt_discard.c index a00bb7dc2b5..965aec16fc2 100644 --- a/src/third_party/wiredtiger/src/btree/bt_discard.c +++ b/src/third_party/wiredtiger/src/btree/bt_discard.c @@ -131,8 +131,10 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) /* Discard any disk image. */ dsk = (WT_PAGE_HEADER *)page->dsk; - if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC)) + if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC)) { + __wt_cache_page_image_decr(session, dsk->mem_size); __wt_overwrite_and_free_len(session, dsk, dsk->mem_size); + } /* Discard any mapped image. */ if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED)) diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c index c97e05d74a7..cacf1369430 100644 --- a/src/third_party/wiredtiger/src/btree/bt_handle.c +++ b/src/third_party/wiredtiger/src/btree/bt_handle.c @@ -690,6 +690,8 @@ __btree_page_sizes(WT_SESSION_IMPL *session) * Don't let pages grow large compared to the cache size or we can end * up in a situation where nothing can be evicted. Take care getting * the cache size: with a shared cache, it may not have been set. + * Don't forget to update the API documentation if you alter the + * bounds for any of the parameters here. */ WT_RET(__wt_config_gets(session, cfg, "memory_page_max", &cval)); btree->maxmempage = (uint64_t)cval.val; diff --git a/src/third_party/wiredtiger/src/btree/bt_huffman.c b/src/third_party/wiredtiger/src/btree/bt_huffman.c index 9e9d69c342e..918791d9c6e 100644 --- a/src/third_party/wiredtiger/src/btree/bt_huffman.c +++ b/src/third_party/wiredtiger/src/btree/bt_huffman.c @@ -157,7 +157,8 @@ __huffman_confchk_file(WT_SESSION_IMPL *session, /* Check the file exists. */ WT_RET(__wt_strndup(session, v->str + len, v->len - len, &fname)); - WT_ERR(__wt_fopen(session, fname, WT_OPEN_FIXED, WT_STREAM_READ, &fs)); + WT_ERR(__wt_fopen( + session, fname, WT_FS_OPEN_FIXED, WT_STREAM_READ, &fs)); /* Optionally return the file handle. */ if (fsp == NULL) diff --git a/src/third_party/wiredtiger/src/btree/bt_io.c b/src/third_party/wiredtiger/src/btree/bt_io.c index 4339de6f25c..6c2e2f1b3fb 100644 --- a/src/third_party/wiredtiger/src/btree/bt_io.c +++ b/src/third_party/wiredtiger/src/btree/bt_io.c @@ -117,7 +117,7 @@ __wt_bt_read(WT_SESSION_IMPL *session, */ if (ret != 0 || result_len != dsk->mem_size - WT_BLOCK_COMPRESS_SKIP) { - fail_msg = "block decryption failed"; + fail_msg = "block decompression failed"; goto corrupt; } } else @@ -168,7 +168,8 @@ err: __wt_scr_free(session, &tmp); */ int __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, - uint8_t *addr, size_t *addr_sizep, bool checkpoint, bool compressed) + uint8_t *addr, size_t *addr_sizep, + bool checkpoint, bool checkpoint_io, bool compressed) { WT_BM *bm; WT_BTREE *btree; @@ -359,10 +360,12 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, /* Call the block manager to write the block. */ WT_ERR(checkpoint ? bm->checkpoint(bm, session, ip, btree->ckpt, data_cksum) : - bm->write(bm, session, ip, addr, addr_sizep, data_cksum)); + bm->write( + bm, session, ip, addr, addr_sizep, data_cksum, checkpoint_io)); WT_STAT_FAST_CONN_INCR(session, cache_write); WT_STAT_FAST_DATA_INCR(session, cache_write); + S2C(session)->cache->bytes_written += dsk->mem_size; WT_STAT_FAST_CONN_INCRV(session, cache_bytes_write, dsk->mem_size); WT_STAT_FAST_DATA_INCRV(session, cache_bytes_write, dsk->mem_size); diff --git a/src/third_party/wiredtiger/src/btree/bt_ovfl.c b/src/third_party/wiredtiger/src/btree/bt_ovfl.c index fbe361e000a..1f080041a23 100644 --- a/src/third_party/wiredtiger/src/btree/bt_ovfl.c +++ b/src/third_party/wiredtiger/src/btree/bt_ovfl.c @@ -33,6 +33,7 @@ __ovfl_read(WT_SESSION_IMPL *session, store->data = WT_PAGE_HEADER_BYTE(btree, dsk); store->size = dsk->u.datalen; + WT_STAT_FAST_CONN_INCR(session, cache_read_overflow); WT_STAT_FAST_DATA_INCR(session, cache_read_overflow); return (0); @@ -208,6 +209,7 @@ __wt_ovfl_cache(WT_SESSION_IMPL *session, */ if (!visible) { WT_RET(__ovfl_cache(session, page, vpack)); + WT_STAT_FAST_CONN_INCR(session, cache_overflow_value); WT_STAT_FAST_DATA_INCR(session, cache_overflow_value); } diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c index 00ec8aa4494..89e5f428628 100644 --- a/src/third_party/wiredtiger/src/btree/bt_page.c +++ b/src/third_party/wiredtiger/src/btree/bt_page.c @@ -219,6 +219,7 @@ __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, /* Update the page's in-memory size and the cache statistics. */ __wt_cache_page_inmem_incr(session, page, size); + __wt_cache_page_image_incr(session, dsk->mem_size); /* Link the new internal page to the parent. */ if (ref != NULL) { diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c index 086500c8b2f..3d396d5ae5b 100644 --- a/src/third_party/wiredtiger/src/btree/bt_read.c +++ b/src/third_party/wiredtiger/src/btree/bt_read.c @@ -296,7 +296,7 @@ err: WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags)); * __evict_force_check -- * Check if a page matches the criteria for forced eviction. */ -static int +static bool __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref) { WT_BTREE *btree; @@ -307,26 +307,26 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref) /* Leaf pages only. */ if (WT_PAGE_IS_INTERNAL(page)) - return (0); + return (false); /* * It's hard to imagine a page with a huge memory footprint that has * never been modified, but check to be sure. */ if (page->modify == NULL) - return (0); + return (false); /* Pages are usually small enough, check that first. */ if (page->memory_footprint < btree->splitmempage) - return (0); + return (false); else if (page->memory_footprint < btree->maxmempage) return (__wt_leaf_page_can_split(session, page)); /* Trigger eviction on the next page release. */ - __wt_page_evict_soon(page); + (void)__wt_page_evict_soon(session, ref); /* Bump the oldest ID, we're about to do some visibility checks. */ - WT_RET(__wt_txn_update_oldest(session, 0)); + (void)__wt_txn_update_oldest(session, 0); /* If eviction cannot succeed, don't try. */ return (__wt_page_can_evict(session, ref, NULL)); @@ -548,10 +548,14 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags * if the page qualifies for forced eviction and update * the page's generation number. If eviction isn't being * done on this file, we're done. + * In-memory split of large pages is allowed while + * no_eviction is set on btree, whereas reconciliation + * is not allowed. */ if (LF_ISSET(WT_READ_NO_EVICT) || F_ISSET(session, WT_SESSION_NO_EVICTION) || - F_ISSET(btree, WT_BTREE_NO_EVICTION)) + (F_ISSET(btree, WT_BTREE_NO_EVICTION) && + !F_ISSET(btree, WT_BTREE_NO_RECONCILE))) goto skip_evict; /* @@ -595,7 +599,14 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags page = ref->page; if (page->read_gen == WT_READGEN_NOTSET) { if (evict_soon) - __wt_page_evict_soon(page); + /* + * Ignore error returns, since the + * evict soon call is advisory and we + * are holding a hazard pointer to the + * page already. + */ + (void)__wt_page_evict_soon( + session, ref); else __wt_cache_read_gen_new(session, page); } else if (!LF_ISSET(WT_READ_NO_GEN)) diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index 7a05a883f83..4f6f300802e 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -298,7 +298,7 @@ static int __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, WT_REF **from_refp, size_t *decrp, WT_REF **to_refp, size_t *incrp) { - WT_ADDR *addr; + WT_ADDR *addr, *ref_addr; WT_CELL_UNPACK unpack; WT_DECL_RET; WT_IKEY *ikey; @@ -345,13 +345,18 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, } /* - * If there's no address (the page has never been written), or the - * address has been instantiated, there's no work to do. Otherwise, - * instantiate the address in-memory, from the on-page cell. + * If there's no address at all (the page has never been written), or + * the address has already been instantiated, there's no work to do. + * Otherwise, the address still references a split page on-page cell, + * instantiate it. We can race with reconciliation and/or eviction of + * the child pages, be cautious: read the address and verify it, and + * only update it if the value is unchanged from the original. In the + * case of a race, the address must no longer reference the split page, + * we're done. */ - addr = ref->addr; - if (addr != NULL && !__wt_off_page(from_home, addr)) { - __wt_cell_unpack((WT_CELL *)ref->addr, &unpack); + WT_ORDERED_READ(ref_addr, ref->addr); + if (ref_addr != NULL && !__wt_off_page(from_home, ref_addr)) { + __wt_cell_unpack((WT_CELL *)ref_addr, &unpack); WT_RET(__wt_calloc_one(session, &addr)); if ((ret = __wt_strndup( session, unpack.data, unpack.size, &addr->addr)) != 0) { @@ -371,7 +376,10 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, break; WT_ILLEGAL_VALUE(session); } - ref->addr = addr; + if (!__wt_atomic_cas_ptr(&ref->addr, ref_addr, addr)) { + __wt_free(session, addr->addr); + __wt_free(session, addr); + } } /* And finally, copy the WT_REF pointer itself. */ @@ -786,7 +794,9 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, */ if (result_entries == 0) { empty_parent = true; - __wt_page_evict_soon(parent); + if (!__wt_ref_is_root(parent->pg_intl_parent_ref)) + ret = __wt_page_evict_soon( + session, parent->pg_intl_parent_ref); goto err; } @@ -1462,11 +1472,11 @@ err: if (parent != NULL) /* * __split_multi_inmem -- - * Instantiate a page in a multi-block set. + * Instantiate a page from a disk image. */ static int __split_multi_inmem( - WT_SESSION_IMPL *session, WT_PAGE *orig, WT_REF *ref, WT_MULTI *multi) + WT_SESSION_IMPL *session, WT_PAGE *orig, WT_MULTI *multi, WT_REF *ref) { WT_CURSOR_BTREE cbt; WT_DECL_ITEM(key); @@ -1487,13 +1497,12 @@ __split_multi_inmem( orig->type != WT_PAGE_COL_VAR || ref->ref_recno != 0); /* - * This code re-creates an in-memory page that is part of a set created - * while evicting a large page, and adds references to any unresolved - * update chains to the new page. We get here due to choosing to keep - * the results of a split in memory or because and update could not be - * written when attempting to evict a page. + * This code re-creates an in-memory page from a disk image, and adds + * references to any unresolved update chains to the new page. We get + * here either because an update could not be written when evicting a + * page, or eviction chose to keep a page in memory. * - * Clear the disk image and link the page into the passed-in WT_REF to + * Steal the disk image and link the page into the passed-in WT_REF to * simplify error handling: our caller will not discard the disk image * when discarding the original page, and our caller will discard the * allocated page on error, when discarding the allocated WT_REF. @@ -1503,6 +1512,19 @@ __split_multi_inmem( WT_PAGE_DISK_ALLOC, &page)); multi->disk_image = NULL; + /* + * Put the re-instantiated page in the same LRU queue location as the + * original page, unless this was a forced eviction, in which case we + * leave the new page with the read generation unset. Eviction will + * set the read generation next time it visits this page. + */ + if (orig->read_gen != WT_READGEN_OLDEST) + page->read_gen = orig->read_gen; + + /* If there are no updates to apply to the page, we're done. */ + if (multi->supd_entries == 0) + return (0); + if (orig->type == WT_PAGE_ROW_LEAF) WT_RET(__wt_scr_alloc(session, 0, &key)); @@ -1551,14 +1573,12 @@ __split_multi_inmem( } /* - * If we modified the page above, it will have set the first dirty - * transaction to the last transaction currently running. However, the - * updates we installed may be older than that. Set the first dirty - * transaction to an impossibly old value so this page is never skipped - * in a checkpoint. + * When modifying the page we set the first dirty transaction to the + * last transaction currently running. However, the updates we made + * might be older than that. Set the first dirty transaction to an + * impossibly old value so this page is never skipped in a checkpoint. */ - if (page->modify != NULL) - page->modify->first_dirty_txn = WT_TXN_FIRST; + page->modify->first_dirty_txn = WT_TXN_FIRST; err: /* Free any resources that may have been cached in the cursor. */ WT_TRET(__wt_btcur_close(&cbt, true)); @@ -1629,19 +1649,17 @@ __split_multi_inmem_fail(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_REF *ref) */ int __wt_multi_to_ref(WT_SESSION_IMPL *session, - WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp) + WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing) { WT_ADDR *addr; WT_IKEY *ikey; WT_REF *ref; - size_t incr; - - incr = 0; /* Allocate an underlying WT_REF. */ WT_RET(__wt_calloc_one(session, refp)); ref = *refp; - incr += sizeof(WT_REF); + if (incrp) + *incrp += sizeof(WT_REF); /* * Set the WT_REF key before (optionally) building the page, underlying @@ -1653,21 +1671,34 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session, ikey = multi->key.ikey; WT_RET(__wt_row_ikey( session, 0, WT_IKEY_DATA(ikey), ikey->size, ref)); - incr += sizeof(WT_IKEY) + ikey->size; + if (incrp) + *incrp += sizeof(WT_IKEY) + ikey->size; break; default: ref->ref_recno = multi->key.recno; break; } - /* If there's a disk image, build a page, otherwise set the address. */ - if (multi->disk_image == NULL) { - /* - * Copy the address: we could simply take the buffer, but that - * would complicate error handling, freeing the reference array - * would have to avoid freeing the memory, and it's not worth - * the confusion. - */ + /* There should be an address or a disk image (or both). */ + WT_ASSERT(session, + multi->addr.addr != NULL || multi->disk_image != NULL); + + /* If we're closing the file, there better be an address. */ + WT_ASSERT(session, multi->addr.addr != NULL || !closing); + + /* Verify any disk image we have. */ + WT_ASSERT(session, multi->disk_image == NULL || + __wt_verify_dsk_image(session, + "[page instantiate]", multi->disk_image, 0, false) == 0); + + /* + * If there's an address, the page was written, set it. + * + * Copy the address: we could simply take the buffer, but that would + * complicate error handling, freeing the reference array would have + * to avoid freeing the memory, and it's not worth the confusion. + */ + if (multi->addr.addr != NULL) { WT_RET(__wt_calloc_one(session, &addr)); ref->addr = addr; addr->size = multi->addr.size; @@ -1675,14 +1706,20 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_RET(__wt_strndup(session, multi->addr.addr, addr->size, &addr->addr)); ref->state = WT_REF_DISK; - } else { - WT_RET(__split_multi_inmem(session, page, ref, multi)); + } + + /* + * If we have a disk image and we're not closing the file, + * re-instantiate the page. + * + * Discard any page image we don't use. + */ + if (multi->disk_image != NULL && !closing) { + WT_RET(__split_multi_inmem(session, page, multi, ref)); ref->state = WT_REF_MEM; } + __wt_free(session, multi->disk_image); - /* Optionally return changes in the memory footprint. */ - if (incrp != NULL) - *incrp += incr; return (0); } @@ -2086,8 +2123,8 @@ __split_multi(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) */ WT_RET(__wt_calloc_def(session, new_entries, &ref_new)); for (i = 0; i < new_entries; ++i) - WT_ERR(__wt_multi_to_ref(session, - page, &mod->mod_multi[i], &ref_new[i], &parent_incr)); + WT_ERR(__wt_multi_to_ref(session, page, + &mod->mod_multi[i], &ref_new[i], &parent_incr, closing)); /* * Split into the parent; if we're closing the file, we hold it @@ -2175,15 +2212,13 @@ __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref) * Rewrite an in-memory page with a new version. */ int -__wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref) +__wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi) { WT_DECL_RET; WT_PAGE *page; - WT_PAGE_MODIFY *mod; WT_REF *new; page = ref->page; - mod = page->modify; WT_RET(__wt_verbose( session, WT_VERB_SPLIT, "%p: split-rewrite", ref->page)); @@ -2198,14 +2233,14 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref) * * Build the new page. * - * Allocate a WT_REF because the error path uses routines that will ea - * free memory. The only field we need to set is the record number, as - * it's used by the search routines. + * Allocate a WT_REF, the error path calls routines that free memory. + * The only field we need to set is the record number, as it's used by + * the search routines. */ WT_RET(__wt_calloc_one(session, &new)); new->ref_recno = ref->ref_recno; - WT_ERR(__split_multi_inmem(session, page, new, &mod->mod_multi[0])); + WT_ERR(__split_multi_inmem(session, page, multi, new)); /* * The rewrite succeeded, we can no longer fail. @@ -2213,7 +2248,7 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref) * Finalize the move, discarding moved update lists from the original * page. */ - __split_multi_inmem_final(page, &mod->mod_multi[0]); + __split_multi_inmem_final(page, multi); /* * Discard the original page. diff --git a/src/third_party/wiredtiger/src/btree/bt_stat.c b/src/third_party/wiredtiger/src/btree/bt_stat.c index 3d5abf34147..d3ddf33446e 100644 --- a/src/third_party/wiredtiger/src/btree/bt_stat.c +++ b/src/third_party/wiredtiger/src/btree/bt_stat.c @@ -41,6 +41,9 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) WT_STAT_SET(session, stats, btree_maxleafpage, btree->maxleafpage); WT_STAT_SET(session, stats, btree_maxleafvalue, btree->maxleafvalue); + WT_STAT_SET(session, stats, cache_bytes_inuse, + __wt_btree_bytes_inuse(session)); + /* Everything else is really, really expensive. */ if (!F_ISSET(cst, WT_CONN_STAT_ALL)) return (0); @@ -139,7 +142,7 @@ __stat_page_col_var( } else { orig_deleted = false; __wt_cell_unpack(cell, unpack); - if (unpack->type == WT_CELL_ADDR_DEL) + if (unpack->type == WT_CELL_DEL) orig_deleted = true; else { entry_cnt += __wt_cell_rle(unpack); diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c index da6c53aa316..df794c96cda 100644 --- a/src/third_party/wiredtiger/src/btree/bt_sync.c +++ b/src/third_party/wiredtiger/src/btree/bt_sync.c @@ -84,7 +84,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) WT_ERR(__wt_txn_get_snapshot(session)); leaf_bytes += page->memory_footprint; ++leaf_pages; - WT_ERR(__wt_reconcile(session, walk, NULL, 0)); + WT_ERR(__wt_reconcile( + session, walk, NULL, WT_CHECKPOINTING)); } } break; @@ -92,7 +93,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) /* * If we are flushing a file at read-committed isolation, which * is of particular interest for flushing the metadata to make - * schema-changing operation durable, get a transactional + * a schema-changing operation durable, get a transactional * snapshot now. * * All changes committed up to this point should be included. @@ -126,7 +127,17 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) */ WT_PUBLISH(btree->checkpointing, WT_CKPT_PREPARE); - WT_ERR(__wt_evict_file_exclusive_on(session)); + /* + * Sync for checkpoint allows splits to happen while the queue + * is being drained, but not reconciliation. We need to do this, + * since draining the queue can take long enough for hot pages + * to grow significantly larger than the configured maximum + * size. + */ + F_SET(btree, WT_BTREE_NO_RECONCILE); + ret = __wt_evict_file_exclusive_on(session); + F_CLR(btree, WT_BTREE_NO_RECONCILE); + WT_ERR(ret); __wt_evict_file_exclusive_off(session); WT_PUBLISH(btree->checkpointing, WT_CKPT_RUNNING); @@ -183,7 +194,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) leaf_bytes += page->memory_footprint; ++leaf_pages; } - WT_ERR(__wt_reconcile(session, walk, NULL, 0)); + WT_ERR(__wt_reconcile( + session, walk, NULL, WT_CHECKPOINTING)); } break; case WT_SYNC_CLOSE: @@ -217,41 +229,9 @@ err: /* On error, clear any left-over tree walk. */ saved_snap_min == WT_TXN_NONE) __wt_txn_release_snapshot(session); - if (btree->checkpointing != WT_CKPT_OFF) { - /* - * Update the checkpoint generation for this handle so visible - * updates newer than the checkpoint can be evicted. - * - * This has to be published before eviction is enabled again, - * so that eviction knows that the checkpoint has completed. - */ - WT_PUBLISH(btree->checkpoint_gen, - conn->txn_global.checkpoint_gen); - WT_STAT_FAST_DATA_SET(session, - btree_checkpoint_generation, btree->checkpoint_gen); - - /* - * Clear the checkpoint flag and push the change; not required, - * but publishing the change means stalled eviction gets moving - * as soon as possible. - */ - btree->checkpointing = WT_CKPT_OFF; - WT_FULL_BARRIER(); - - /* - * If this tree was being skipped by the eviction server during - * the checkpoint, clear the wait. - */ - btree->evict_walk_period = 0; - - /* - * Wake the eviction server, in case application threads have - * stalled while the eviction server decided it couldn't make - * progress. Without this, application threads will be stalled - * until the eviction server next wakes. - */ - WT_TRET(__wt_evict_server_wake(session)); - } + /* Clear the checkpoint flag and push the change. */ + if (btree->checkpointing != WT_CKPT_OFF) + WT_PUBLISH(btree->checkpointing, WT_CKPT_OFF); __wt_spin_unlock(session, &btree->flush_lock); diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c index bb8a750d848..17d32d6ed63 100644 --- a/src/third_party/wiredtiger/src/btree/bt_walk.c +++ b/src/third_party/wiredtiger/src/btree/bt_walk.c @@ -380,16 +380,6 @@ restart: /* /* Ascend to the parent. */ __ref_ascend(session, &ref, &pindex, &slot); - /* - * If we got all the way through an internal page and - * all of the child pages were deleted, mark it for - * eviction. - */ - if (empty_internal && pindex->entries > 1) { - __wt_page_evict_soon(ref->page); - empty_internal = false; - } - /* * If at the root and returning internal pages, return * the root page, otherwise we're done. Regardless, no @@ -403,6 +393,16 @@ restart: /* goto done; } + /* + * If we got all the way through an internal page and + * all of the child pages were deleted, mark it for + * eviction. + */ + if (empty_internal && pindex->entries > 1) { + WT_ERR(__wt_page_evict_soon(session, ref)); + empty_internal = false; + } + /* * Optionally return internal pages. Swap our previous * hazard pointer for the page we'll return. We don't diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c index 4afcd74520f..0f70e84de7e 100644 --- a/src/third_party/wiredtiger/src/btree/row_srch.c +++ b/src/third_party/wiredtiger/src/btree/row_srch.c @@ -775,7 +775,7 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) * traversing the skip list each time accumulates to real time. */ if (samples > 5000) - __wt_page_evict_soon(page); + WT_RET(__wt_page_evict_soon(session, cbt->ref)); return (0); } diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index 1b656c5a0aa..192b80bb359 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -42,7 +42,6 @@ static const WT_CONFIG_CHECK static const WT_CONFIG_CHECK confchk_wiredtiger_open_checkpoint_subconfigs[] = { { "log_size", "int", NULL, "min=0,max=2GB", NULL, 0 }, - { "name", "string", NULL, NULL, NULL, 0 }, { "wait", "int", NULL, "min=0,max=100000", NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } }; @@ -67,16 +66,9 @@ static const WT_CONFIG_CHECK }; static const WT_CONFIG_CHECK - confchk_wiredtiger_open_log_subconfigs[] = { + confchk_WT_CONNECTION_reconfigure_log_subconfigs[] = { { "archive", "boolean", NULL, NULL, NULL, 0 }, - { "compressor", "string", NULL, NULL, NULL, 0 }, - { "enabled", "boolean", NULL, NULL, NULL, 0 }, - { "file_max", "int", NULL, "min=100KB,max=2GB", NULL, 0 }, - { "path", "string", NULL, NULL, NULL, 0 }, { "prealloc", "boolean", NULL, NULL, NULL, 0 }, - { "recover", "string", - NULL, "choices=[\"error\",\"on\"]", - NULL, 0 }, { "zero_fill", "boolean", NULL, NULL, NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } }; @@ -99,10 +91,9 @@ static const WT_CONFIG_CHECK }; static const WT_CONFIG_CHECK - confchk_wiredtiger_open_statistics_log_subconfigs[] = { + confchk_WT_CONNECTION_reconfigure_statistics_log_subconfigs[] = { { "json", "boolean", NULL, NULL, NULL, 0 }, { "on_close", "boolean", NULL, NULL, NULL, 0 }, - { "path", "string", NULL, NULL, NULL, 0 }, { "sources", "list", NULL, NULL, NULL, 0 }, { "timestamp", "string", NULL, NULL, NULL, 0 }, { "wait", "int", NULL, "min=0,max=100000", NULL, 0 }, @@ -117,16 +108,16 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { { "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 }, { "checkpoint", "category", NULL, NULL, - confchk_wiredtiger_open_checkpoint_subconfigs, 3 }, + confchk_wiredtiger_open_checkpoint_subconfigs, 2 }, { "error_prefix", "string", NULL, NULL, NULL, 0 }, { "eviction", "category", NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2 }, { "eviction_dirty_target", "int", - NULL, "min=5,max=99", + NULL, "min=1,max=99", NULL, 0 }, { "eviction_dirty_trigger", "int", - NULL, "min=5,max=99", + NULL, "min=1,max=99", NULL, 0 }, { "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 }, { "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 }, @@ -135,7 +126,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { confchk_wiredtiger_open_file_manager_subconfigs, 3 }, { "log", "category", NULL, NULL, - confchk_wiredtiger_open_log_subconfigs, 8 }, + confchk_WT_CONNECTION_reconfigure_log_subconfigs, 3 }, { "lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2 }, @@ -148,7 +139,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { NULL, 0 }, { "statistics_log", "category", NULL, NULL, - confchk_wiredtiger_open_statistics_log_subconfigs, 6 }, + confchk_WT_CONNECTION_reconfigure_statistics_log_subconfigs, 5 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," @@ -326,6 +317,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor[] = { { "append", "boolean", NULL, NULL, NULL, 0 }, { "bulk", "string", NULL, NULL, NULL, 0 }, { "checkpoint", "string", NULL, NULL, NULL, 0 }, + { "checkpoint_wait", "boolean", NULL, NULL, NULL, 0 }, { "dump", "string", NULL, "choices=[\"hex\",\"json\",\"print\"]", NULL, 0 }, @@ -607,6 +599,32 @@ static const WT_CONFIG_CHECK { NULL, NULL, NULL, NULL, NULL, 0 } }; +static const WT_CONFIG_CHECK + confchk_wiredtiger_open_log_subconfigs[] = { + { "archive", "boolean", NULL, NULL, NULL, 0 }, + { "compressor", "string", NULL, NULL, NULL, 0 }, + { "enabled", "boolean", NULL, NULL, NULL, 0 }, + { "file_max", "int", NULL, "min=100KB,max=2GB", NULL, 0 }, + { "path", "string", NULL, NULL, NULL, 0 }, + { "prealloc", "boolean", NULL, NULL, NULL, 0 }, + { "recover", "string", + NULL, "choices=[\"error\",\"on\"]", + NULL, 0 }, + { "zero_fill", "boolean", NULL, NULL, NULL, 0 }, + { NULL, NULL, NULL, NULL, NULL, 0 } +}; + +static const WT_CONFIG_CHECK + confchk_wiredtiger_open_statistics_log_subconfigs[] = { + { "json", "boolean", NULL, NULL, NULL, 0 }, + { "on_close", "boolean", NULL, NULL, NULL, 0 }, + { "path", "string", NULL, NULL, NULL, 0 }, + { "sources", "list", NULL, NULL, NULL, 0 }, + { "timestamp", "string", NULL, NULL, NULL, 0 }, + { "wait", "int", NULL, "min=0,max=100000", NULL, 0 }, + { NULL, NULL, NULL, NULL, NULL, 0 } +}; + static const WT_CONFIG_CHECK confchk_wiredtiger_open_transaction_sync_subconfigs[] = { { "enabled", "boolean", NULL, NULL, NULL, 0 }, @@ -625,7 +643,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { { "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 }, { "checkpoint", "category", NULL, NULL, - confchk_wiredtiger_open_checkpoint_subconfigs, 3 }, + confchk_wiredtiger_open_checkpoint_subconfigs, 2 }, { "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 }, { "config_base", "boolean", NULL, NULL, NULL, 0 }, { "create", "boolean", NULL, NULL, NULL, 0 }, @@ -640,10 +658,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2 }, { "eviction_dirty_target", "int", - NULL, "min=5,max=99", + NULL, "min=1,max=99", NULL, 0 }, { "eviction_dirty_trigger", "int", - NULL, "min=5,max=99", + NULL, "min=1,max=99", NULL, 0 }, { "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 }, { "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 }, @@ -706,7 +724,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { { "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 }, { "checkpoint", "category", NULL, NULL, - confchk_wiredtiger_open_checkpoint_subconfigs, 3 }, + confchk_wiredtiger_open_checkpoint_subconfigs, 2 }, { "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 }, { "config_base", "boolean", NULL, NULL, NULL, 0 }, { "create", "boolean", NULL, NULL, NULL, 0 }, @@ -721,10 +739,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2 }, { "eviction_dirty_target", "int", - NULL, "min=5,max=99", + NULL, "min=1,max=99", NULL, 0 }, { "eviction_dirty_trigger", "int", - NULL, "min=5,max=99", + NULL, "min=1,max=99", NULL, 0 }, { "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 }, { "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 }, @@ -788,7 +806,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { { "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 }, { "checkpoint", "category", NULL, NULL, - confchk_wiredtiger_open_checkpoint_subconfigs, 3 }, + confchk_wiredtiger_open_checkpoint_subconfigs, 2 }, { "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 }, { "direct_io", "list", NULL, "choices=[\"checkpoint\",\"data\",\"log\"]", @@ -801,10 +819,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2 }, { "eviction_dirty_target", "int", - NULL, "min=5,max=99", + NULL, "min=1,max=99", NULL, 0 }, { "eviction_dirty_trigger", "int", - NULL, "min=5,max=99", + NULL, "min=1,max=99", NULL, 0 }, { "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 }, { "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 }, @@ -864,7 +882,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { { "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 }, { "checkpoint", "category", NULL, NULL, - confchk_wiredtiger_open_checkpoint_subconfigs, 3 }, + confchk_wiredtiger_open_checkpoint_subconfigs, 2 }, { "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 }, { "direct_io", "list", NULL, "choices=[\"checkpoint\",\"data\",\"log\"]", @@ -877,10 +895,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2 }, { "eviction_dirty_target", "int", - NULL, "min=5,max=99", + NULL, "min=1,max=99", NULL, 0 }, { "eviction_dirty_trigger", "int", - NULL, "min=5,max=99", + NULL, "min=1,max=99", NULL, 0 }, { "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 }, { "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 }, @@ -970,17 +988,14 @@ static const WT_CONFIG_ENTRY config_entries[] = { }, { "WT_CONNECTION.reconfigure", "async=(enabled=0,ops_max=1024,threads=2),cache_overhead=8," - "cache_size=100MB,checkpoint=(log_size=0," - "name=\"WiredTigerCheckpoint\",wait=0),error_prefix=," - "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80," - "eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95" + "cache_size=100MB,checkpoint=(log_size=0,wait=0),error_prefix=," + "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=5," + "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95" ",file_manager=(close_handle_minimum=250,close_idle_time=30," - "close_scan_interval=10),log=(archive=,compressor=,enabled=0," - "file_max=100MB,path=\".\",prealloc=,recover=on,zero_fill=0)," + "close_scan_interval=10),log=(archive=,prealloc=,zero_fill=0)," "lsm_manager=(merge=,worker_thread_max=4),lsm_merge=," "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB)," - "statistics=none,statistics_log=(json=0,on_close=0," - "path=\"WiredTigerStat.%d.%H\",sources=," + "statistics=none,statistics_log=(json=0,on_close=0,sources=," "timestamp=\"%b %d %H:%M:%S\",wait=0),verbose=", confchk_WT_CONNECTION_reconfigure, 18 }, @@ -1052,10 +1067,10 @@ static const WT_CONFIG_ENTRY config_entries[] = { NULL, 0 }, { "WT_SESSION.open_cursor", - "append=0,bulk=0,checkpoint=,dump=,next_random=0," - "next_random_sample_size=0,overwrite=,raw=0,readonly=0," + "append=0,bulk=0,checkpoint=,checkpoint_wait=,dump=,next_random=0" + ",next_random_sample_size=0,overwrite=,raw=0,readonly=0," "skip_sort_check=0,statistics=,target=", - confchk_WT_SESSION_open_cursor, 12 + confchk_WT_SESSION_open_cursor, 13 }, { "WT_SESSION.rebalance", "", @@ -1168,21 +1183,20 @@ static const WT_CONFIG_ENTRY config_entries[] = { }, { "wiredtiger_open", "async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1," - "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0," - "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=," - "config_base=,create=0,direct_io=,encryption=(keyid=,name=," - "secretkey=),error_prefix=,eviction=(threads_max=1,threads_min=1)" - ",eviction_dirty_target=80,eviction_dirty_trigger=95," - "eviction_target=80,eviction_trigger=95,exclusive=0,extensions=," - "file_extend=,file_manager=(close_handle_minimum=250," - "close_idle_time=30,close_scan_interval=10),hazard_max=1000," - "in_memory=0,log=(archive=,compressor=,enabled=0,file_max=100MB," - "path=\".\",prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=" - ",worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0" - ",session_max=100,session_scratch_max=2MB," - "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB)," - "statistics=none,statistics_log=(json=0,on_close=0," - "path=\"WiredTigerStat.%d.%H\",sources=," + "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,wait=0)" + ",checkpoint_sync=,config_base=,create=0,direct_io=," + "encryption=(keyid=,name=,secretkey=),error_prefix=," + "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=5," + "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95" + ",exclusive=0,extensions=,file_extend=," + "file_manager=(close_handle_minimum=250,close_idle_time=30," + "close_scan_interval=10),hazard_max=1000,in_memory=0," + "log=(archive=,compressor=,enabled=0,file_max=100MB,path=\".\"," + "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=," + "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0," + "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB" + ",name=,quota=0,reserve=0,size=500MB),statistics=none," + "statistics_log=(json=0,on_close=0,path=\".\",sources=," "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0" ",method=fsync),use_environment=,use_environment_priv=0,verbose=," "write_through=", @@ -1190,21 +1204,20 @@ static const WT_CONFIG_ENTRY config_entries[] = { }, { "wiredtiger_open_all", "async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1," - "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0," - "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=," - "config_base=,create=0,direct_io=,encryption=(keyid=,name=," - "secretkey=),error_prefix=,eviction=(threads_max=1,threads_min=1)" - ",eviction_dirty_target=80,eviction_dirty_trigger=95," - "eviction_target=80,eviction_trigger=95,exclusive=0,extensions=," - "file_extend=,file_manager=(close_handle_minimum=250," - "close_idle_time=30,close_scan_interval=10),hazard_max=1000," - "in_memory=0,log=(archive=,compressor=,enabled=0,file_max=100MB," - "path=\".\",prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=" - ",worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0" - ",session_max=100,session_scratch_max=2MB," - "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB)," - "statistics=none,statistics_log=(json=0,on_close=0," - "path=\"WiredTigerStat.%d.%H\",sources=," + "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,wait=0)" + ",checkpoint_sync=,config_base=,create=0,direct_io=," + "encryption=(keyid=,name=,secretkey=),error_prefix=," + "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=5," + "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95" + ",exclusive=0,extensions=,file_extend=," + "file_manager=(close_handle_minimum=250,close_idle_time=30," + "close_scan_interval=10),hazard_max=1000,in_memory=0," + "log=(archive=,compressor=,enabled=0,file_max=100MB,path=\".\"," + "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=," + "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0," + "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB" + ",name=,quota=0,reserve=0,size=500MB),statistics=none," + "statistics_log=(json=0,on_close=0,path=\".\",sources=," "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0" ",method=fsync),use_environment=,use_environment_priv=0,verbose=," "version=(major=0,minor=0),write_through=", @@ -1212,41 +1225,39 @@ static const WT_CONFIG_ENTRY config_entries[] = { }, { "wiredtiger_open_basecfg", "async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1," - "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0," - "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=," - "direct_io=,encryption=(keyid=,name=,secretkey=),error_prefix=," - "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80," - "eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95" - ",extensions=,file_extend=,file_manager=(close_handle_minimum=250" - ",close_idle_time=30,close_scan_interval=10),hazard_max=1000," - "log=(archive=,compressor=,enabled=0,file_max=100MB,path=\".\"," - "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=," - "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0," - "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB" - ",name=,quota=0,reserve=0,size=500MB),statistics=none," - "statistics_log=(json=0,on_close=0,path=\"WiredTigerStat.%d.%H\"," - "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," - "transaction_sync=(enabled=0,method=fsync),verbose=," + "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,wait=0)" + ",checkpoint_sync=,direct_io=,encryption=(keyid=,name=," + "secretkey=),error_prefix=,eviction=(threads_max=1,threads_min=1)" + ",eviction_dirty_target=5,eviction_dirty_trigger=20," + "eviction_target=80,eviction_trigger=95,extensions=,file_extend=," + "file_manager=(close_handle_minimum=250,close_idle_time=30," + "close_scan_interval=10),hazard_max=1000,log=(archive=," + "compressor=,enabled=0,file_max=100MB,path=\".\",prealloc=," + "recover=on,zero_fill=0),lsm_manager=(merge=,worker_thread_max=4)" + ",lsm_merge=,mmap=,multiprocess=0,readonly=0,session_max=100," + "session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,quota=0," + "reserve=0,size=500MB),statistics=none,statistics_log=(json=0," + "on_close=0,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\"," + "wait=0),transaction_sync=(enabled=0,method=fsync),verbose=," "version=(major=0,minor=0),write_through=", confchk_wiredtiger_open_basecfg, 33 }, { "wiredtiger_open_usercfg", "async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1," - "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0," - "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=," - "direct_io=,encryption=(keyid=,name=,secretkey=),error_prefix=," - "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80," - "eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95" - ",extensions=,file_extend=,file_manager=(close_handle_minimum=250" - ",close_idle_time=30,close_scan_interval=10),hazard_max=1000," - "log=(archive=,compressor=,enabled=0,file_max=100MB,path=\".\"," - "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=," - "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0," - "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB" - ",name=,quota=0,reserve=0,size=500MB),statistics=none," - "statistics_log=(json=0,on_close=0,path=\"WiredTigerStat.%d.%H\"," - "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," - "transaction_sync=(enabled=0,method=fsync),verbose=," + "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,wait=0)" + ",checkpoint_sync=,direct_io=,encryption=(keyid=,name=," + "secretkey=),error_prefix=,eviction=(threads_max=1,threads_min=1)" + ",eviction_dirty_target=5,eviction_dirty_trigger=20," + "eviction_target=80,eviction_trigger=95,extensions=,file_extend=," + "file_manager=(close_handle_minimum=250,close_idle_time=30," + "close_scan_interval=10),hazard_max=1000,log=(archive=," + "compressor=,enabled=0,file_max=100MB,path=\".\",prealloc=," + "recover=on,zero_fill=0),lsm_manager=(merge=,worker_thread_max=4)" + ",lsm_merge=,mmap=,multiprocess=0,readonly=0,session_max=100," + "session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,quota=0," + "reserve=0,size=500MB),statistics=none,statistics_log=(json=0," + "on_close=0,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\"," + "wait=0),transaction_sync=(enabled=0,method=fsync),verbose=," "write_through=", confchk_wiredtiger_open_usercfg, 32 }, diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index 98267eeeb2c..1c6b0c2b500 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -1217,7 +1217,8 @@ __conn_config_file(WT_SESSION_IMPL *session, return (0); /* Open the configuration file. */ - WT_RET(__wt_open(session, filename, WT_OPEN_FILE_TYPE_REGULAR, 0, &fh)); + WT_RET(__wt_open( + session, filename, WT_FS_OPEN_FILE_TYPE_REGULAR, 0, &fh)); WT_ERR(__wt_filesize(session, fh, &size)); if (size == 0) goto err; @@ -1510,8 +1511,8 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) exist = false; if (!is_create) WT_ERR(__wt_fs_exist(session, WT_WIREDTIGER, &exist)); - ret = __wt_open(session, WT_SINGLETHREAD, WT_OPEN_FILE_TYPE_REGULAR, - is_create || exist ? WT_OPEN_CREATE : 0, &conn->lock_fh); + ret = __wt_open(session, WT_SINGLETHREAD, WT_FS_OPEN_FILE_TYPE_REGULAR, + is_create || exist ? WT_FS_OPEN_CREATE : 0, &conn->lock_fh); /* * If this is a read-only connection and we cannot grab the lock @@ -1554,7 +1555,7 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) */ #define WT_SINGLETHREAD_STRING "WiredTiger lock file\n" WT_ERR(__wt_filesize(session, conn->lock_fh, &size)); - if (size != strlen(WT_SINGLETHREAD_STRING)) + if ((size_t)size != strlen(WT_SINGLETHREAD_STRING)) WT_ERR(__wt_write(session, conn->lock_fh, (wt_off_t)0, strlen(WT_SINGLETHREAD_STRING), WT_SINGLETHREAD_STRING)); @@ -1563,7 +1564,8 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) /* We own the lock file, optionally create the WiredTiger file. */ ret = __wt_open(session, WT_WIREDTIGER, - WT_OPEN_FILE_TYPE_REGULAR, is_create ? WT_OPEN_CREATE : 0, &fh); + WT_FS_OPEN_FILE_TYPE_REGULAR, is_create ? WT_FS_OPEN_CREATE : 0, + &fh); /* * If we're read-only, check for handled errors. Even if able to open @@ -1784,7 +1786,7 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[]) * runs. This doesn't matter for correctness, it's just cleaning up * random files. */ - WT_RET(__wt_remove_if_exists(session, WT_BASECONFIG_SET)); + WT_RET(__wt_remove_if_exists(session, WT_BASECONFIG_SET, false)); /* * The base configuration file is only written if creating the database, @@ -1809,7 +1811,7 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[]) return (0); WT_RET(__wt_fopen(session, WT_BASECONFIG_SET, - WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs)); + WT_FS_OPEN_CREATE | WT_FS_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs)); WT_ERR(__wt_fprintf(session, fs, "%s\n\n", "# Do not modify this file.\n" @@ -1870,7 +1872,8 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[]) if (0) { /* Close open file handle, remove any temporary file. */ err: WT_TRET(__wt_fclose(session, &fs)); - WT_TRET(__wt_remove_if_exists(session, WT_BASECONFIG_SET)); + WT_TRET( + __wt_remove_if_exists(session, WT_BASECONFIG_SET, false)); } __wt_free(session, base_config); diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c index 9f15db5382b..e8bb7187418 100644 --- a/src/third_party/wiredtiger/src/conn/conn_cache.c +++ b/src/third_party/wiredtiger/src/conn/conn_cache.c @@ -176,6 +176,10 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]) &cache->evict_queues[i].evict_lock, "cache eviction")); } + /* Ensure there is always a non-NULL current queue. */ + cache->evict_current_queue = + &cache->evict_queues[WT_EVICT_URGENT_QUEUE + 1]; + /* * We get/set some values in the cache statistics (rather than have * two copies), configure them. @@ -197,7 +201,7 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session) WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_CONNECTION_STATS **stats; - uint64_t inuse, leaf, used; + uint64_t inuse, leaf; conn = S2C(session); cache = conn->cache; @@ -208,26 +212,29 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session) * There are races updating the different cache tracking values so * be paranoid calculating the leaf byte usage. */ - used = cache->bytes_overflow + cache->bytes_internal; - leaf = inuse > used ? inuse - used : 0; + leaf = inuse > cache->bytes_internal ? + inuse - cache->bytes_internal : 0; WT_STAT_SET(session, stats, cache_bytes_max, conn->cache_size); WT_STAT_SET(session, stats, cache_bytes_inuse, inuse); - WT_STAT_SET(session, stats, cache_overhead, cache->overhead_pct); - WT_STAT_SET( - session, stats, cache_pages_inuse, __wt_cache_pages_inuse(cache)); + WT_STAT_SET( session, stats, cache_bytes_dirty, __wt_cache_dirty_inuse(cache)); - WT_STAT_SET(session, stats, - cache_eviction_maximum_page_size, cache->evict_max_page_size); - WT_STAT_SET(session, stats, cache_pages_dirty, cache->pages_dirty); - WT_STAT_SET( - session, stats, cache_bytes_internal, cache->bytes_internal); + session, stats, cache_bytes_image, __wt_cache_bytes_image(cache)); WT_STAT_SET( - session, stats, cache_bytes_overflow, cache->bytes_overflow); + session, stats, cache_pages_inuse, __wt_cache_pages_inuse(cache)); + WT_STAT_SET( + session, stats, cache_bytes_internal, cache->bytes_internal); WT_STAT_SET(session, stats, cache_bytes_leaf, leaf); + WT_STAT_SET( + session, stats, cache_bytes_other, __wt_cache_bytes_other(cache)); + + WT_STAT_SET(session, stats, + cache_eviction_maximum_page_size, cache->evict_max_page_size); + WT_STAT_SET(session, stats, cache_pages_dirty, + cache->pages_dirty_intl + cache->pages_dirty_leaf); /* * The number of files with active walks ~= number of hazard pointers @@ -235,7 +242,7 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session) */ if (conn->evict_session != NULL) WT_STAT_SET(session, stats, cache_eviction_walks_active, - conn->evict_session->nhazard); + cache->walk_session->nhazard); } /* @@ -267,11 +274,13 @@ __wt_cache_destroy(WT_SESSION_IMPL *session) __wt_errx(session, "cache server: exiting with %" PRIu64 " bytes in memory", cache->bytes_inmem); - if (cache->bytes_dirty != 0 || cache->pages_dirty != 0) + if (cache->bytes_dirty_intl + cache->bytes_dirty_leaf != 0 || + cache->pages_dirty_intl + cache->pages_dirty_leaf != 0) __wt_errx(session, "cache server: exiting with %" PRIu64 " bytes dirty and %" PRIu64 " pages dirty", - cache->bytes_dirty, cache->pages_dirty); + cache->bytes_dirty_intl + cache->bytes_dirty_leaf, + cache->pages_dirty_intl + cache->pages_dirty_leaf); WT_TRET(__wt_cond_auto_destroy(session, &cache->evict_cond)); WT_TRET(__wt_cond_destroy(session, &cache->evict_waiter_cond)); @@ -286,6 +295,7 @@ __wt_cache_destroy(WT_SESSION_IMPL *session) __wt_spin_destroy(session, &cache->evict_queues[i].evict_lock); __wt_free(session, cache->evict_queues[i].evict_queue); } + __wt_free(session, conn->cache); return (ret); } diff --git a/src/third_party/wiredtiger/src/conn/conn_ckpt.c b/src/third_party/wiredtiger/src/conn/conn_ckpt.c index a23350a5e46..d54c65c4767 100644 --- a/src/third_party/wiredtiger/src/conn/conn_ckpt.c +++ b/src/third_party/wiredtiger/src/conn/conn_ckpt.c @@ -19,61 +19,38 @@ __ckpt_server_config(WT_SESSION_IMPL *session, const char **cfg, bool *startp) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; - WT_DECL_ITEM(tmp); - WT_DECL_RET; - char *p; + + *startp = false; conn = S2C(session); - /* - * The checkpoint configuration requires a wait time and/or a log - * size -- if one is not set, we're not running at all. - * Checkpoints based on log size also require logging be enabled. - */ WT_RET(__wt_config_gets(session, cfg, "checkpoint.wait", &cval)); conn->ckpt_usecs = (uint64_t)cval.val * WT_MILLION; WT_RET(__wt_config_gets(session, cfg, "checkpoint.log_size", &cval)); conn->ckpt_logsize = (wt_off_t)cval.val; - /* Checkpoints are incompatible with in-memory configuration */ - if (conn->ckpt_usecs != 0 || conn->ckpt_logsize != 0) { + /* + * The checkpoint configuration requires a wait time and/or a log size, + * if neither is set, we're not running at all. Checkpoints based on log + * size also require logging be enabled. + */ + if (conn->ckpt_usecs != 0 || + (conn->ckpt_logsize != 0 && + FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))) { + /* Checkpoints are incompatible with in-memory configuration */ WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval)); if (cval.val != 0) WT_RET_MSG(session, EINVAL, - "In memory configuration incompatible with " - "checkpoints"); - } + "checkpoint configuration incompatible with " + "in-memory configuration"); - __wt_log_written_reset(session); - if ((conn->ckpt_usecs == 0 && conn->ckpt_logsize == 0) || - (conn->ckpt_logsize && conn->ckpt_usecs == 0 && - !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))) { - *startp = false; - return (0); - } - *startp = true; + __wt_log_written_reset(session); - /* - * The application can specify a checkpoint name, which we ignore if - * it's our default. - */ - WT_RET(__wt_config_gets(session, cfg, "checkpoint.name", &cval)); - if (cval.len != 0 && - !WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len)) { - WT_RET(__wt_checkpoint_name_ok(session, cval.str, cval.len)); - - WT_RET(__wt_scr_alloc(session, cval.len + 20, &tmp)); - WT_ERR(__wt_buf_fmt( - session, tmp, "name=%.*s", (int)cval.len, cval.str)); - WT_ERR(__wt_strdup(session, tmp->data, &p)); - - __wt_free(session, conn->ckpt_config); - conn->ckpt_config = p; + *startp = true; } -err: __wt_scr_free(session, &tmp); - return (ret); + return (0); } /* @@ -103,7 +80,7 @@ __ckpt_server(void *arg) __wt_cond_wait(session, conn->ckpt_cond, conn->ckpt_usecs)); /* Checkpoint the database. */ - WT_ERR(wt_session->checkpoint(wt_session, conn->ckpt_config)); + WT_ERR(wt_session->checkpoint(wt_session, NULL)); /* Reset. */ if (conn->ckpt_logsize) { @@ -179,7 +156,16 @@ __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[]) conn = S2C(session); start = false; - /* If there is already a server running, shut it down. */ + /* + * Stop any server that is already running. This means that each time + * reconfigure is called we'll bounce the server even if there are no + * configuration changes. This makes our life easier as the underlying + * configuration routine doesn't have to worry about freeing objects + * in the connection structure (it's guaranteed to always start with a + * blank slate), and we don't have to worry about races where a running + * server is reading configuration information that we're updating, and + * it's not expected that reconfiguration will happen a lot. + */ if (conn->ckpt_session != NULL) WT_RET(__wt_checkpoint_server_destroy(session)); @@ -211,8 +197,6 @@ __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session) } WT_TRET(__wt_cond_destroy(session, &conn->ckpt_cond)); - __wt_free(session, conn->ckpt_config); - /* Close the server thread's session. */ if (conn->ckpt_session != NULL) { wt_session = &conn->ckpt_session->iface; @@ -226,7 +210,6 @@ __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session) conn->ckpt_session = NULL; conn->ckpt_tid_set = false; conn->ckpt_cond = NULL; - conn->ckpt_config = NULL; conn->ckpt_usecs = 0; return (ret); diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c index 08fb2b24468..f52fccc7d1c 100644 --- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c +++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c @@ -63,6 +63,16 @@ __conn_dhandle_alloc(WT_SESSION_IMPL *session, if (strcmp(uri, WT_METAFILE_URI) == 0) F_SET(dhandle, WT_DHANDLE_IS_METADATA); + /* + * We are holding the data handle list lock, which protects most + * threads from seeing the new handle until that lock is released. + * + * However, the sweep server scans the list of handles without holding + * that lock, so we need a write barrier here to ensure the sweep + * server doesn't see a partially filled in structure. + */ + WT_WRITE_BARRIER(); + /* * Prepend the handle to the connection list, assuming we're likely to * need new files again soon, until they are cached by all sessions. diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c index 1ae370ef2fa..18ed71e4688 100644 --- a/src/third_party/wiredtiger/src/conn/conn_log.c +++ b/src/third_party/wiredtiger/src/conn/conn_log.c @@ -51,6 +51,25 @@ __logmgr_config( WT_CONNECTION_IMPL *conn; bool enabled; + /* + * A note on reconfiguration: the standard "is this configuration string + * allowed" checks should fail if reconfiguration has invalid strings, + * for example, "log=(enabled)", or "statistics_log=(path=XXX)", because + * the connection reconfiguration method doesn't allow those strings. + * Additionally, the base configuration values during reconfiguration + * are the currently configured values (so we don't revert to default + * values when repeatedly reconfiguring), and configuration processing + * of a currently set value should not change the currently set value. + * + * In this code path, log server reconfiguration does not stop/restart + * the log server, so there's no point in re-evaluating configuration + * strings that cannot be reconfigured, risking bugs in configuration + * setup, and depending on evaluation of currently set values to always + * result in the currently set value. Skip tests for any configuration + * strings which don't make sense during reconfiguration, but don't + * worry about error reporting because it should never happen. + */ + conn = S2C(session); WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval)); @@ -62,6 +81,8 @@ __logmgr_config( * * If it is off and the user it turning it on, or it is on * and the user is turning it off, return an error. + * + * See above: should never happen. */ if (reconfig && ((enabled && !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) || @@ -83,6 +104,8 @@ __logmgr_config( * Setup a log path and compression even if logging is disabled in case * we are going to print a log. Only do this on creation. Once a * compressor or log path are set they cannot be changed. + * + * See above: should never happen. */ if (!reconfig) { conn->log_compressor = NULL; @@ -95,6 +118,7 @@ __logmgr_config( WT_RET(__wt_strndup( session, cval.str, cval.len, &conn->log_path)); } + /* We are done if logging isn't enabled. */ if (!*runp) return (0); @@ -103,13 +127,14 @@ __logmgr_config( if (cval.val != 0) FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE); + /* + * The file size cannot be reconfigured. The amount of memory allocated + * to the log slots may be based on the log file size at creation and we + * don't want to re-allocate that memory while running. + * + * See above: should never happen. + */ if (!reconfig) { - /* - * Ignore if the user tries to change the file size. The - * amount of memory allocated to the log slots may be based - * on the log file size at creation and we don't want to - * re-allocate that memory while running. - */ WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval)); conn->log_file_max = (wt_off_t)cval.val; WT_STAT_FAST_CONN_SET(session, @@ -125,12 +150,17 @@ __logmgr_config( conn->log_prealloc = 1; /* - * Note that it is meaningless to reconfigure this value during - * runtime. It only matters on create before recovery runs. + * Note it's meaningless to reconfigure this value during runtime, it + * only matters on create before recovery runs. + * + * See above: should never happen. */ - WT_RET(__wt_config_gets_def(session, cfg, "log.recover", 0, &cval)); - if (cval.len != 0 && WT_STRING_MATCH("error", cval.str, cval.len)) - FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR); + if (!reconfig) { + WT_RET(__wt_config_gets_def( + session, cfg, "log.recover", 0, &cval)); + if (WT_STRING_MATCH("error", cval.str, cval.len)) + FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR); + } WT_RET(__wt_config_gets(session, cfg, "log.zero_fill", &cval)); if (cval.val != 0) { diff --git a/src/third_party/wiredtiger/src/conn/conn_stat.c b/src/third_party/wiredtiger/src/conn/conn_stat.c index 855ff57808e..4e7cac59c4a 100644 --- a/src/third_party/wiredtiger/src/conn/conn_stat.c +++ b/src/third_party/wiredtiger/src/conn/conn_stat.c @@ -35,6 +35,31 @@ __stat_sources_free(WT_SESSION_IMPL *session, char ***sources) } } +/* + * __stat_config_discard -- + * Discard all statistics-log configuration. + */ +static int +__stat_config_discard(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + + conn = S2C(session); + + /* + * Discard all statistics-log configuration information, called when + * reconfiguring or destroying the statistics logging setup, + */ + __wt_free(session, conn->stat_format); + ret = __wt_fclose(session, &conn->stat_fs); + __wt_free(session, conn->stat_path); + __stat_sources_free(session, &conn->stat_sources); + conn->stat_stamp = NULL; + conn->stat_usecs = 0; + return (ret); +} + /* * __wt_conn_stat_init -- * Initialize the per-connection statistics. @@ -73,20 +98,37 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp) WT_CONFIG objectconf; WT_CONFIG_ITEM cval, k, v; WT_CONNECTION_IMPL *conn; + WT_DECL_ITEM(tmp); WT_DECL_RET; int cnt; char **sources; + /* + * A note on reconfiguration: the standard "is this configuration string + * allowed" checks should fail if reconfiguration has invalid strings, + * for example, "log=(enabled)", or "statistics_log=(path=XXX)", because + * the connection reconfiguration method doesn't allow those strings. + * Additionally, the base configuration values during reconfiguration + * are the currently configured values (so we don't revert to default + * values when repeatedly reconfiguring), and configuration processing + * of a currently set value should not change the currently set value. + * + * In this code path, a previous statistics log server reconfiguration + * may have stopped the server (and we're about to restart it). Because + * stopping the server discarded the configured information stored in + * the connection structure, we have to re-evaluate all configuration + * values, reconfiguration can't skip any of them. + */ + conn = S2C(session); sources = NULL; - WT_RET(__wt_config_gets(session, cfg, "statistics_log.wait", &cval)); /* Only start the server if wait time is non-zero */ + WT_RET(__wt_config_gets(session, cfg, "statistics_log.wait", &cval)); *runp = cval.val != 0; conn->stat_usecs = (uint64_t)cval.val * WT_MILLION; - WT_RET(__wt_config_gets( - session, cfg, "statistics_log.json", &cval)); + WT_RET(__wt_config_gets(session, cfg, "statistics_log.json", &cval)); if (cval.val != 0) FLD_SET(conn->stat_flags, WT_CONN_STAT_JSON); @@ -96,24 +138,30 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp) FLD_SET(conn->stat_flags, WT_CONN_STAT_ON_CLOSE); /* - * Statistics logging configuration requires either a wait time or an - * on-close setting. - */ - if (!*runp && !FLD_ISSET(conn->stat_flags, WT_CONN_STAT_ON_CLOSE)) - return (0); - - /* - * If any statistics logging is done, this must not be a read-only - * connection. + * We don't allow the log path to be reconfigured for security reasons. + * (Applications passing input strings directly to reconfigure would + * expose themselves to a potential security problem, the utility of + * reconfiguring a statistics log path isn't worth the security risk.) + * + * See above for the details, but during reconfiguration we're loading + * the path value from the saved configuration information, and it's + * required during reconfiguration because we potentially stopped and + * are restarting, the server. */ - WT_RET(__wt_config_gets(session, cfg, "statistics_log.sources", &cval)); - WT_RET(__wt_config_subinit(session, &objectconf, &cval)); + WT_RET(__wt_config_gets(session, cfg, "statistics_log.path", &cval)); + WT_ERR(__wt_scr_alloc(session, 0, &tmp)); + WT_ERR(__wt_buf_fmt(session, + tmp, "%.*s/%s", (int)cval.len, cval.str, WT_STATLOG_FILENAME)); + WT_ERR(__wt_filename(session, tmp->data, &conn->stat_path)); + + WT_ERR(__wt_config_gets(session, cfg, "statistics_log.sources", &cval)); + WT_ERR(__wt_config_subinit(session, &objectconf, &cval)); for (cnt = 0; (ret = __wt_config_next(&objectconf, &k, &v)) == 0; ++cnt) ; - WT_RET_NOTFOUND_OK(ret); + WT_ERR_NOTFOUND_OK(ret); if (cnt != 0) { - WT_RET(__wt_calloc_def(session, cnt + 1, &sources)); - WT_RET(__wt_config_subinit(session, &objectconf, &cval)); + WT_ERR(__wt_calloc_def(session, cnt + 1, &sources)); + WT_ERR(__wt_config_subinit(session, &objectconf, &cval)); for (cnt = 0; (ret = __wt_config_next(&objectconf, &k, &v)) == 0; ++cnt) { /* @@ -138,29 +186,37 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp) sources = NULL; } - WT_ERR(__wt_config_gets(session, cfg, "statistics_log.path", &cval)); - WT_ERR(__wt_nfilename(session, cval.str, cval.len, &conn->stat_path)); - /* * When using JSON format, use the same timestamp format as MongoDB by - * default. + * default. This requires caution: the user might have set the timestamp + * in a previous reconfigure call and we don't want to override that, so + * compare the retrieved value with the default value to decide if we + * should use the JSON default. + * + * (This still implies if the user explicitly sets the timestamp to the + * default value, then sets the JSON flag in a separate reconfigure + * call, or vice-versa, we will incorrectly switch to the JSON default + * timestamp. But there's no way to detect that, and this is all a low + * probability path.) + * + * !!! + * Don't rewrite in the compressed "%FT%T.000Z" form, MSVC13 segfaults. */ - if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_JSON)) { - ret = __wt_config_gets( - session, &cfg[1], "statistics_log.timestamp", &cval); - if (ret == WT_NOTFOUND) - WT_ERR(__wt_strdup( - session, "%FT%T.000Z", &conn->stat_format)); - WT_ERR_NOTFOUND_OK(ret); - } - if (conn->stat_format == NULL) { - WT_ERR(__wt_config_gets( - session, cfg, "statistics_log.timestamp", &cval)); +#define WT_TIMESTAMP_DEFAULT "%b %d %H:%M:%S" +#define WT_TIMESTAMP_JSON_DEFAULT "%Y-%m-%dT%H:%M:%S.000Z" + WT_ERR(__wt_config_gets( + session, cfg, "statistics_log.timestamp", &cval)); + if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_JSON) && + WT_STRING_MATCH(WT_TIMESTAMP_DEFAULT, cval.str, cval.len)) + WT_ERR(__wt_strdup( + session, WT_TIMESTAMP_JSON_DEFAULT, &conn->stat_format)); + else WT_ERR(__wt_strndup( session, cval.str, cval.len, &conn->stat_format)); - } err: __stat_sources_free(session, &sources); + __wt_scr_free(session, &tmp); + return (ret); } @@ -373,7 +429,7 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp) if (path != NULL) (void)strcpy(path->mem, tmp->mem); WT_RET(__wt_fopen(session, tmp->mem, - WT_OPEN_CREATE | WT_OPEN_FIXED, WT_STREAM_APPEND, + WT_FS_OPEN_CREATE | WT_FS_OPEN_FIXED, WT_STREAM_APPEND, &log_stream)); } conn->stat_fs = log_stream; @@ -538,14 +594,23 @@ __wt_statlog_create(WT_SESSION_IMPL *session, const char *cfg[]) bool start; conn = S2C(session); - start = false; /* * Stop any server that is already running. This means that each time * reconfigure is called we'll bounce the server even if there are no - * configuration changes - but that makes our lives easier. + * configuration changes. This makes our life easier as the underlying + * configuration routine doesn't have to worry about freeing objects + * in the connection structure (it's guaranteed to always start with a + * blank slate), and we don't have to worry about races where a running + * server is reading configuration information that we're updating, and + * it's not expected that reconfiguration will happen a lot. + * + * If there's no server running, discard any configuration information + * so we don't leak memory during reconfiguration. */ - if (conn->stat_session != NULL) + if (conn->stat_session == NULL) + WT_RET(__stat_config_discard(session)); + else WT_RET(__wt_statlog_destroy(session, false)); WT_RET(__statlog_config(session, cfg, &start)); @@ -568,38 +633,28 @@ __wt_statlog_destroy(WT_SESSION_IMPL *session, bool is_close) conn = S2C(session); + /* Stop the server thread. */ F_CLR(conn, WT_CONN_SERVER_STATISTICS); if (conn->stat_tid_set) { WT_TRET(__wt_cond_signal(session, conn->stat_cond)); WT_TRET(__wt_thread_join(session, conn->stat_tid)); conn->stat_tid_set = false; } + WT_TRET(__wt_cond_destroy(session, &conn->stat_cond)); /* Log a set of statistics on shutdown if configured. */ if (is_close) WT_TRET(__wt_statlog_log_one(session)); - WT_TRET(__wt_cond_destroy(session, &conn->stat_cond)); - - __stat_sources_free(session, &conn->stat_sources); - __wt_free(session, conn->stat_path); - __wt_free(session, conn->stat_format); + /* Discard all configuration information. */ + WT_TRET(__stat_config_discard(session)); /* Close the server thread's session. */ if (conn->stat_session != NULL) { wt_session = &conn->stat_session->iface; WT_TRET(wt_session->close(wt_session, NULL)); + conn->stat_session = NULL; } - /* Clear connection settings so reconfigure is reliable. */ - conn->stat_session = NULL; - conn->stat_tid_set = false; - conn->stat_format = NULL; - WT_TRET(__wt_fclose(session, &conn->stat_fs)); - conn->stat_path = NULL; - conn->stat_sources = NULL; - conn->stat_stamp = NULL; - conn->stat_usecs = 0; - return (ret); } diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c index 4ee23008687..63952169566 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_backup.c +++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c @@ -9,13 +9,12 @@ #include "wt_internal.h" static int __backup_all(WT_SESSION_IMPL *); -static int __backup_cleanup_handles(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *); static int __backup_list_append( WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, const char *); static int __backup_list_uri_append(WT_SESSION_IMPL *, const char *, bool *); static int __backup_start( WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, const char *[]); -static int __backup_stop(WT_SESSION_IMPL *); +static int __backup_stop(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *); static int __backup_uri(WT_SESSION_IMPL *, const char *[], bool *, bool *); /* @@ -76,20 +75,26 @@ __curbackup_close(WT_CURSOR *cursor) WT_CURSOR_BACKUP *cb; WT_DECL_RET; WT_SESSION_IMPL *session; - int tret; cb = (WT_CURSOR_BACKUP *)cursor; CURSOR_API_CALL(cursor, session, close, NULL); - WT_TRET(__backup_cleanup_handles(session, cb)); + /* + * When starting a hot backup, we serialize hot backup cursors and set + * the connection's hot-backup flag. Once that's done, we set the + * cursor's backup-locker flag, implying the cursor owns all necessary + * cleanup (including removing temporary files), regardless of error or + * success. The cursor's backup-locker flag is never cleared (it's just + * discarded when the cursor is closed), because that cursor will never + * not be responsible for cleanup. + */ + if (F_ISSET(cb, WT_CURBACKUP_LOCKER)) + WT_TRET(__backup_stop(session, cb)); + WT_TRET(__wt_cursor_close(cursor)); session->bkp_cursor = NULL; - WT_WITH_SCHEMA_LOCK(session, tret, - tret = __backup_stop(session)); /* Stop the backup. */ - WT_TRET(tret); - err: API_END_RET(session, ret); } @@ -144,11 +149,11 @@ __wt_curbackup_open(WT_SESSION_IMPL *session, ret = __backup_start(session, cb, cfg))); WT_ERR(ret); - /* __wt_cursor_init is last so we don't have to clean up on error. */ WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp)); if (0) { -err: __wt_free(session, cb); +err: WT_TRET(__curbackup_close(cursor)); + *cursorp = NULL; } return (ret); @@ -226,6 +231,9 @@ __backup_start( conn->hot_backup = true; WT_ERR(__wt_writeunlock(session, conn->hot_backup_lock)); + /* We're the lock holder, we own cleanup. */ + F_SET(cb, WT_CURBACKUP_LOCKER); + /* * Create a temporary backup file. This must be opened before * generating the list of targets in backup_uri. This file will @@ -235,7 +243,7 @@ __backup_start( * doesn't confuse restarting in the source database. */ WT_ERR(__wt_fopen(session, WT_BACKUP_TMP, - WT_OPEN_CREATE, WT_STREAM_WRITE, &cb->bfs)); + WT_FS_OPEN_CREATE, WT_STREAM_WRITE, &cb->bfs)); /* * If a list of targets was specified, work our way through them. * Else, generate a list of all database objects. @@ -261,7 +269,7 @@ __backup_start( */ dest = WT_INCREMENTAL_BACKUP; WT_ERR(__wt_fopen(session, WT_INCREMENTAL_SRC, - WT_OPEN_CREATE, WT_STREAM_WRITE, &srcfs)); + WT_FS_OPEN_CREATE, WT_STREAM_WRITE, &srcfs)); WT_ERR(__backup_list_append( session, cb, WT_INCREMENTAL_BACKUP)); } else { @@ -282,12 +290,9 @@ err: /* Close the hot backup file. */ WT_TRET(__wt_fclose(session, &cb->bfs)); if (srcfs != NULL) WT_TRET(__wt_fclose(session, &srcfs)); - if (ret != 0) { - WT_TRET(__backup_cleanup_handles(session, cb)); - WT_TRET(__backup_stop(session)); - } else { + if (ret == 0) { WT_ASSERT(session, dest != NULL); - WT_TRET(__wt_fs_rename(session, WT_BACKUP_TMP, dest)); + WT_TRET(__wt_fs_rename(session, WT_BACKUP_TMP, dest, false)); } return (ret); @@ -295,9 +300,7 @@ err: /* Close the hot backup file. */ /* * __backup_cleanup_handles -- - * Release and free all btree handles held by the backup. This is kept - * separate from __backup_stop because it can be called without the - * schema lock held. + * Release and free all btree handles held by the backup. */ static int __backup_cleanup_handles(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb) @@ -325,15 +328,18 @@ __backup_cleanup_handles(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb) * Stop a backup. */ static int -__backup_stop(WT_SESSION_IMPL *session) +__backup_stop(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; conn = S2C(session); + /* Release all btree handles held by the backup. */ + WT_TRET(__backup_cleanup_handles(session, cb)); + /* Remove any backup specific file. */ - ret = __wt_backup_file_remove(session); + WT_TRET(__wt_backup_file_remove(session)); /* Checkpoint deletion can proceed, as can the next hot backup. */ WT_TRET(__wt_writelock(session, conn->hot_backup_lock)); @@ -443,10 +449,10 @@ __wt_backup_file_remove(WT_SESSION_IMPL *session) * always know we were a source directory while there's any chance of * an incremental backup file existing. */ - WT_TRET(__wt_remove_if_exists(session, WT_BACKUP_TMP)); - WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_BACKUP)); - WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_SRC)); - WT_TRET(__wt_remove_if_exists(session, WT_METADATA_BACKUP)); + WT_TRET(__wt_remove_if_exists(session, WT_BACKUP_TMP, true)); + WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_BACKUP, true)); + WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_SRC, true)); + WT_TRET(__wt_remove_if_exists(session, WT_METADATA_BACKUP, true)); return (ret); } diff --git a/src/third_party/wiredtiger/src/cursor/cur_config.c b/src/third_party/wiredtiger/src/cursor/cur_config.c index e0d270e4245..2d3f3ffd176 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_config.c +++ b/src/third_party/wiredtiger/src/cursor/cur_config.c @@ -58,11 +58,11 @@ __wt_curconfig_open(WT_SESSION_IMPL *session, cursor->session = &session->iface; cursor->key_format = cursor->value_format = "S"; - /* __wt_cursor_init is last so we don't have to clean up on error. */ WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp)); if (0) { -err: __wt_free(session, cconfig); +err: WT_TRET(__curconfig_close(cursor)); + *cursorp = NULL; } return (ret); } diff --git a/src/third_party/wiredtiger/src/cursor/cur_ds.c b/src/third_party/wiredtiger/src/cursor/cur_ds.c index d2b8d81ab37..8d4b7a9384b 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_ds.c +++ b/src/third_party/wiredtiger/src/cursor/cur_ds.c @@ -518,10 +518,7 @@ __wt_curds_open( source->flags = 0; if (0) { -err: if (F_ISSET(cursor, WT_CURSTD_OPEN)) - WT_TRET(cursor->close(cursor)); - else - __wt_free(session, data_source); +err: WT_TRET(__curds_close(cursor)); *cursorp = NULL; } diff --git a/src/third_party/wiredtiger/src/cursor/cur_dump.c b/src/third_party/wiredtiger/src/cursor/cur_dump.c index 595915df7b7..d7f18bb61ac 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_dump.c +++ b/src/third_party/wiredtiger/src/cursor/cur_dump.c @@ -401,13 +401,13 @@ __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp) cursor->json_private = child->json_private = json; } - /* __wt_cursor_init is last so we don't have to clean up on error. */ cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_open_cursor); cfg[1] = NULL; WT_ERR(__wt_cursor_init(cursor, NULL, owner, cfg, cursorp)); if (0) { -err: __wt_free(session, cursor); +err: WT_TRET(__curdump_close(cursor)); + *cursorp = NULL; } return (ret); } diff --git a/src/third_party/wiredtiger/src/cursor/cur_file.c b/src/third_party/wiredtiger/src/cursor/cur_file.c index fac903b4770..8e7bd4bbea5 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_file.c +++ b/src/third_party/wiredtiger/src/cursor/cur_file.c @@ -388,11 +388,11 @@ err: API_END_RET(session, ret); } /* - * __wt_curfile_create -- + * __curfile_create -- * Open a cursor for a given btree handle. */ -int -__wt_curfile_create(WT_SESSION_IMPL *session, +static int +__curfile_create(WT_SESSION_IMPL *session, WT_CURSOR *owner, const char *cfg[], bool bulk, bool bitmap, WT_CURSOR **cursorp) { @@ -439,6 +439,13 @@ __wt_curfile_create(WT_SESSION_IMPL *session, cursor->value_format = btree->value_format; cbt->btree = btree; + /* + * Increment the data-source's in-use counter; done now because closing + * the cursor will decrement it, and all failure paths from here close + * the cursor. + */ + __wt_cursor_dhandle_incr_use(session); + if (session->dhandle->checkpoint != NULL) F_SET(cbt, WT_CBT_NO_TXN); @@ -478,7 +485,6 @@ __wt_curfile_create(WT_SESSION_IMPL *session, /* Underlying btree initialization. */ __wt_btcur_open(cbt); - /* __wt_cursor_init is last so we don't have to clean up on error. */ WT_ERR(__wt_cursor_init( cursor, cursor->internal_uri, owner, cfg, cursorp)); @@ -486,7 +492,8 @@ __wt_curfile_create(WT_SESSION_IMPL *session, WT_STAT_FAST_DATA_INCR(session, cursor_create); if (0) { -err: __wt_free(session, cbt); +err: WT_TRET(__curfile_close(cursor)); + *cursorp = NULL; } return (ret); @@ -503,9 +510,10 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CONFIG_ITEM cval; WT_DECL_RET; uint32_t flags; - bool bitmap, bulk; + bool bitmap, bulk, checkpoint_wait; bitmap = bulk = false; + checkpoint_wait = true; flags = 0; /* @@ -531,6 +539,12 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, else if (!WT_STRING_MATCH("unordered", cval.str, cval.len)) WT_RET_MSG(session, EINVAL, "Value for 'bulk' must be a boolean or 'bitmap'"); + + if (bulk) { + WT_RET(__wt_config_gets(session, + cfg, "checkpoint_wait", &cval)); + checkpoint_wait = cval.val != 0; + } } /* Bulk handles require exclusive access. */ @@ -540,11 +554,11 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, /* Get the handle and lock it while the cursor is using it. */ if (WT_PREFIX_MATCH(uri, "file:")) { /* - * If we are opening exclusive, get the handle while holding - * the checkpoint lock. This prevents a bulk cursor open - * failing with EBUSY due to a database-wide checkpoint. + * If we are opening exclusive and don't want a bulk cursor + * open to fail with EBUSY due to a database-wide checkpoint, + * get the handle while holding the checkpoint lock. */ - if (LF_ISSET(WT_DHANDLE_EXCLUSIVE)) + if (LF_ISSET(WT_DHANDLE_EXCLUSIVE) && checkpoint_wait) WT_WITH_CHECKPOINT_LOCK(session, ret, ret = __wt_session_get_btree_ckpt( session, uri, cfg, flags)); @@ -555,10 +569,8 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, } else WT_RET(__wt_bad_object_type(session, uri)); - WT_ERR(__wt_curfile_create(session, owner, cfg, bulk, bitmap, cursorp)); + WT_ERR(__curfile_create(session, owner, cfg, bulk, bitmap, cursorp)); - /* Increment the data-source's in-use counter. */ - __wt_cursor_dhandle_incr_use(session); return (0); err: /* If the cursor could not be opened, release the handle. */ diff --git a/src/third_party/wiredtiger/src/cursor/cur_index.c b/src/third_party/wiredtiger/src/cursor/cur_index.c index 6de68d86778..82a27d65ce6 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_index.c +++ b/src/third_party/wiredtiger/src/cursor/cur_index.c @@ -263,19 +263,57 @@ err: F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); static int __curindex_search_near(WT_CURSOR *cursor, int *exact) { + WT_CURSOR *child; WT_CURSOR_INDEX *cindex; WT_DECL_RET; + WT_ITEM found_key; WT_SESSION_IMPL *session; + int cmp; cindex = (WT_CURSOR_INDEX *)cursor; - JOINABLE_CURSOR_API_CALL(cursor, session, search_near, NULL); - __wt_cursor_set_raw_key(cindex->child, &cursor->key); - if ((ret = cindex->child->search_near(cindex->child, exact)) == 0) - ret = __curindex_move(cindex); - else - F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); + child = cindex->child; + JOINABLE_CURSOR_API_CALL(cursor, session, search, NULL); -err: API_END_RET(session, ret); + /* + * We are searching using the application-specified key, which + * (usually) doesn't contain the primary key, so it is just a prefix of + * any matching index key. That said, if there is an exact match, we + * want to find the first matching index entry and set exact equal to + * zero. Do a search_near, step to the next entry if we land on one + * that is too small, then check that the prefix matches. + */ + __wt_cursor_set_raw_key(child, &cursor->key); + WT_ERR(child->search_near(child, &cmp)); + + if (cmp < 0) + WT_ERR(child->next(child)); + + /* + * We expect partial matches, and want the smallest record with a key + * greater than or equal to the search key. + * + * If the key we find is shorter than the search key, it can't possibly + * match. + * + * The only way for the key to be exactly equal is if there is an index + * on the primary key, because otherwise the primary key columns will + * be appended to the index key, but we don't disallow that (odd) case. + */ + found_key = child->key; + if (found_key.size < cursor->key.size) + WT_ERR(WT_NOTFOUND); + found_key.size = cursor->key.size; + + WT_ERR(__wt_compare( + session, cindex->index->collator, &cursor->key, &found_key, exact)); + + WT_ERR(__curindex_move(cindex)); + + if (0) { +err: F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); + } + + API_END_RET(session, ret); } /* diff --git a/src/third_party/wiredtiger/src/cursor/cur_log.c b/src/third_party/wiredtiger/src/cursor/cur_log.c index 0a13803da5d..2adf0c2b8ab 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_log.c +++ b/src/third_party/wiredtiger/src/cursor/cur_log.c @@ -315,16 +315,16 @@ __curlog_close(WT_CURSOR *cursor) WT_CONNECTION_IMPL *conn; WT_CURSOR_LOG *cl; WT_DECL_RET; - WT_LOG *log; WT_SESSION_IMPL *session; CURSOR_API_CALL(cursor, session, close, NULL); cl = (WT_CURSOR_LOG *)cursor; conn = S2C(session); + WT_ASSERT(session, FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)); - log = conn->log; - WT_TRET(__wt_readunlock(session, log->log_archive_lock)); - WT_TRET(__curlog_reset(cursor)); + if (F_ISSET(cl, WT_CURLOG_ARCHIVE_LOCK)) + WT_TRET(__wt_readunlock(session, conn->log->log_archive_lock)); + __wt_free(session, cl->cur_lsn); __wt_free(session, cl->next_lsn); __wt_scr_free(session, &cl->logrec); @@ -332,6 +332,7 @@ __curlog_close(WT_CURSOR *cursor) __wt_scr_free(session, &cl->opvalue); __wt_free(session, cl->packed_key); __wt_free(session, cl->packed_value); + WT_TRET(__wt_cursor_close(cursor)); err: API_END_RET(session, ret); @@ -401,23 +402,10 @@ __wt_curlog_open(WT_SESSION_IMPL *session, /* Log cursors block archiving. */ WT_ERR(__wt_readlock(session, log->log_archive_lock)); + F_SET(cl, WT_CURLOG_ARCHIVE_LOCK); if (0) { -err: if (F_ISSET(cursor, WT_CURSTD_OPEN)) - WT_TRET(cursor->close(cursor)); - else { - __wt_free(session, cl->cur_lsn); - __wt_free(session, cl->next_lsn); - __wt_scr_free(session, &cl->logrec); - __wt_scr_free(session, &cl->opkey); - __wt_scr_free(session, &cl->opvalue); - /* - * NOTE: We cannot get on the error path with the - * readlock held. No need to unlock it unless that - * changes above. - */ - __wt_free(session, cl); - } +err: WT_TRET(__curlog_close(cursor)); *cursorp = NULL; } diff --git a/src/third_party/wiredtiger/src/cursor/cur_metadata.c b/src/third_party/wiredtiger/src/cursor/cur_metadata.c index 3d702e2ea8c..fc63ca13f7c 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_metadata.c +++ b/src/third_party/wiredtiger/src/cursor/cur_metadata.c @@ -475,9 +475,11 @@ __curmetadata_close(WT_CURSOR *cursor) mdc = (WT_CURSOR_METADATA *)cursor; file_cursor = mdc->file_cursor; CURSOR_API_CALL(cursor, session, - close, ((WT_CURSOR_BTREE *)file_cursor)->btree); + close, file_cursor == NULL ? + NULL : ((WT_CURSOR_BTREE *)file_cursor)->btree); - ret = file_cursor->close(file_cursor); + if (file_cursor != NULL) + ret = file_cursor->close(file_cursor); WT_TRET(__wt_cursor_close(cursor)); err: API_END_RET(session, ret); @@ -552,9 +554,8 @@ __wt_curmetadata_open(WT_SESSION_IMPL *session, } if (0) { -err: if (mdc->file_cursor != NULL) - WT_TRET(mdc->file_cursor->close(mdc->file_cursor)); - __wt_free(session, mdc); +err: WT_TRET(__curmetadata_close(cursor)); + *cursorp = NULL; } return (ret); } diff --git a/src/third_party/wiredtiger/src/cursor/cur_stat.c b/src/third_party/wiredtiger/src/cursor/cur_stat.c index f7a8f5fc866..5c9159a4c0b 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_stat.c +++ b/src/third_party/wiredtiger/src/cursor/cur_stat.c @@ -36,22 +36,6 @@ __curstat_print_value(WT_SESSION_IMPL *session, uint64_t v, WT_ITEM *buf) return (0); } -/* - * __curstat_free_config -- - * Free the saved configuration string stack - */ -static void -__curstat_free_config(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) -{ - size_t i; - - if (cst->cfg != NULL) { - for (i = 0; cst->cfg[i] != NULL; ++i) - __wt_free(session, cst->cfg[i]); - __wt_free(session, cst->cfg); - } -} - /* * __curstat_get_key -- * WT_CURSOR->get_key for statistics cursors. @@ -334,11 +318,16 @@ __curstat_close(WT_CURSOR *cursor) WT_CURSOR_STAT *cst; WT_DECL_RET; WT_SESSION_IMPL *session; + size_t i; cst = (WT_CURSOR_STAT *)cursor; CURSOR_API_CALL(cursor, session, close, NULL); - __curstat_free_config(session, cst); + if (cst->cfg != NULL) { + for (i = 0; cst->cfg[i] != NULL; ++i) + __wt_free(session, cst->cfg[i]); + __wt_free(session, cst->cfg); + } __wt_buf_free(session, &cst->pv); __wt_free(session, cst->desc_buf); @@ -691,7 +680,6 @@ __wt_curstat_open(WT_SESSION_IMPL *session, /* The cursor isn't yet positioned. */ cst->notpositioned = true; - /* __wt_cursor_init is last so we don't have to clean up on error. */ WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp)); if (0) { @@ -701,8 +689,8 @@ config_err: WT_ERR_MSG(session, EINVAL, } if (0) { -err: __curstat_free_config(session, cst); - __wt_free(session, cst); +err: WT_TRET(__curstat_close(cursor)); + *cursorp = NULL; } return (ret); diff --git a/src/third_party/wiredtiger/src/cursor/cur_table.c b/src/third_party/wiredtiger/src/cursor/cur_table.c index 6d50523043a..a14b40a1150 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_table.c +++ b/src/third_party/wiredtiger/src/cursor/cur_table.c @@ -525,15 +525,20 @@ __curtable_insert(WT_CURSOR *cursor) } /* - * WT_CURSOR.insert doesn't leave the cursor positioned, and the - * application may want to free the memory used to configure the - * insert; don't read that memory again (matching the underlying - * file object cursor insert semantics). + * Insert is the one cursor operation that doesn't end with the cursor + * pointing to an on-page item (except for column-store appends, where + * we are returning a key). That is, the application's cursor continues + * to reference the application's memory after a successful cursor call, + * which isn't true anywhere else. We don't want to have to explain that + * scoping corner case, so we reset the application's cursor so it can + * free the referenced memory and continue on without risking subsequent + * core dumps. */ F_CLR(primary, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + if (F_ISSET(primary, WT_CURSTD_APPEND)) + F_SET(primary, WT_CURSTD_KEY_INT); err: CURSOR_UPDATE_API_END(session, ret); - return (ret); } diff --git a/src/third_party/wiredtiger/src/docs/security.dox b/src/third_party/wiredtiger/src/docs/security.dox index 331f74d969b..82e13ae7ad3 100644 --- a/src/third_party/wiredtiger/src/docs/security.dox +++ b/src/third_party/wiredtiger/src/docs/security.dox @@ -2,10 +2,23 @@ @section directory_permissions Database directory permissions -All WiredTiger files are stored in the database home directory, and the -WiredTiger database directory should have its permissions set to ensure -database objects are not accessible to users without appropriate -permissions. See @ref home for more information. +By default, WiredTiger files are stored beneath the database home directory. +The WiredTiger database directory should have its permissions set to ensure +database objects are not accessible to users without appropriate permissions. +See @ref home for more information. + +@section absolute_path Absolute paths + +WiredTiger prepends the name of the database home to file names which +do not appear to be absolute paths. (The absolute path test is +simplistic, matching a leading slash character on POSIX systems or a +leading alphabetic character and colon on Windows.) No file path +sanitization or validation is done by WiredTiger, for example, file +paths may match universal naming conventions (UNC), or include \c "../" +(dot dot slash) components. + +Applications are responsible for validating user-supplied file paths as +necessary to prevent directory traversal attacks. @section file_permissions File permissions diff --git a/src/third_party/wiredtiger/src/docs/spell.ok b/src/third_party/wiredtiger/src/docs/spell.ok index 96fe04d7426..e08eb7d1447 100644 --- a/src/third_party/wiredtiger/src/docs/spell.ok +++ b/src/third_party/wiredtiger/src/docs/spell.ok @@ -80,6 +80,7 @@ SCons Seward's SiH TXT +UNC URIs WILLNEED WiredTiger @@ -368,6 +369,7 @@ php png posix pre +prepends primary's printf printlog @@ -411,6 +413,7 @@ runtime rwlock sHQ sHq +sanitization scalable scanf schemas diff --git a/src/third_party/wiredtiger/src/docs/statistics.dox b/src/third_party/wiredtiger/src/docs/statistics.dox index 0a29e351e4e..36ce2711dc5 100644 --- a/src/third_party/wiredtiger/src/docs/statistics.dox +++ b/src/third_party/wiredtiger/src/docs/statistics.dox @@ -90,11 +90,20 @@ cursor. @section statistics_log Statistics logging -WiredTiger will optionally log database statistics into a file when the +WiredTiger will optionally log database statistics into files when the the ::wiredtiger_open \c statistics_log configuration is set. -The resulting statistics can be displayed using the \c wtstats visualization -tool. For more information, see @ref_single wtstats. +The log files are named \c WiredTiger.%%d.%%H, where \c %%d is replaced +with the day of the month as a decimal number (01-31), and \c %%H +is replaced by the hour (24-hour clock) as a decimal number (00-23). +Each log file contains the statistics for the hour specified in its name. + +The location of the log files may be changed with the \c statistics_log.path +configuration string. + +The resulting statistics can be displayed and interactively examined +using the \c wtstats visualization tool. For more information, see +@ref_single wtstats. The following example logs statistics every 30 seconds: @@ -120,7 +129,7 @@ Statistics for all underlying data sources of a particular type may be included by adding a partial data source URI to the \c statistics_log configuration string: -@snippet ex_all.c Statistics logging with all tables +@snippet ex_all.c Statistics logging with a source type When database statistics are logged, the database home will be the first space-separated entry for each record in the log file. For example: @@ -151,23 +160,9 @@ currently open in the database, nor will any statistics requiring the traversal of a tree (as if the \c statistics_fast configuration string were set). -The location of the log files may be changed with the \c statistics_log.path -configuration string. The \c path value value may contain ISO C90 standard -strftime conversion specifications. WiredTiger will not create non-existent -directories in the path, they must exist before ::wiredtiger_open is called. - -The following example logs statistics into files named with the month, -day and year: - -@snippet ex_all.c Statistics logging with path - A Python script that parses the default logging output and uses the gnuplot, utility to generate Portable Network Graphics (PNG) format graphs is included in the WiredTiger distribution in the file \c tools/statlog.py. -@m_if{c} -To interactively examine statistics results, see @ref wtstats. -@m_endif - */ diff --git a/src/third_party/wiredtiger/src/docs/upgrading.dox b/src/third_party/wiredtiger/src/docs/upgrading.dox index 5e824fee977..9d3d2239bb4 100644 --- a/src/third_party/wiredtiger/src/docs/upgrading.dox +++ b/src/third_party/wiredtiger/src/docs/upgrading.dox @@ -1,5 +1,48 @@ /*! @page upgrading Upgrading WiredTiger applications +@section version_281 Upgrading to Version 2.8.1 +
+
Cache management defaults
+
+The default values for the \c eviction_dirty_target and \c +eviction_dirty_trigger settings to ::wiredtiger_open have changed to 5 and 20, +respectively. This means that by default, WiredTiger will start writing dirty +pages from cache when it becomes 5% dirty and will throttle activity to keep +the volume of dirty data in cache under 20%. For write-heavy workloads, the +new defaults may result in lower throughput and more threads writing to data +files concurrently. + +These settings also now determine how much work is done at the beginning of a +checkpoint to make the critical section of checkpoints complete more quickly. +
+ +
Checkpoint server created checkpoint names
+
+The ::wiredtiger_open checkpoint configuration no longer supports the +\c name configuration, and checkpoint server created checkpoints will +always be named the default WiredTiger checkpoint name, +"WiredTigerCheckpoint". Applications depending on the ability to set the +checkpoint name for the checkpoint server will require modification. +
+ +
Statistics logging path
+
+The ::wiredtiger_open statistics logging path configuration has been +simplified to be only a path to a directory, and the file name component +of the path may no longer be specified. Applications depending on the +ability to set statistics log file names will require modification. +
+ +
Deprecated statistics field
+
+The connection statistic \c WT_STAT_CONN_CACHE_BYTES_OVERFLOW has been +removed. Overflow information is now available in the +\c WT_STAT_CONN_CACHE_BYTES_OVERFLOW and \c WT_STAT_CONN_CACHE_OVERFLOW_VALUE. +Applications specifically looking for that statistic will require +modification. +
+ +

@section version_280 Upgrading to Version 2.8.0
LSM metadata
@@ -55,7 +98,6 @@ The WiredTiger public API used to define a structure that could encapsulate log sequence numbers. That structure is no longer exposed publicly. -

@section version_270 Upgrading to Version 2.7.0 diff --git a/src/third_party/wiredtiger/src/docs/wtperf.dox b/src/third_party/wiredtiger/src/docs/wtperf.dox index e06272d117c..17b95660f79 100644 --- a/src/third_party/wiredtiger/src/docs/wtperf.dox +++ b/src/third_party/wiredtiger/src/docs/wtperf.dox @@ -210,6 +210,8 @@ if non zero choose a value from within this range as the key for insert operations @par random_value (boolean, default=false) generate random content for the value +@par range_partition (boolean, default=false) +partition data by range (vs hash) @par read_range (unsigned int, default=0) scan a range of keys after each search @par readonly (boolean, default=false) diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index 8ea487bbf83..7d3fd838dcd 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -17,7 +17,7 @@ static int __evict_page(WT_SESSION_IMPL *, bool); static int __evict_pass(WT_SESSION_IMPL *); static int __evict_server(WT_SESSION_IMPL *, bool *); static int __evict_walk(WT_SESSION_IMPL *, uint32_t); -static int __evict_walk_file(WT_SESSION_IMPL *, uint32_t, u_int *); +static int __evict_walk_file(WT_SESSION_IMPL *, uint32_t, u_int, u_int *); /* * __evict_read_gen -- @@ -31,11 +31,6 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry) uint64_t read_gen; btree = entry->btree; - - /* Never prioritize empty slots. */ - if (entry->ref == NULL) - return (UINT64_MAX); - page = entry->ref->page; /* Any page set to the oldest generation should be discarded. */ @@ -70,14 +65,15 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry) * Qsort function: sort the eviction array. */ static int WT_CDECL -__evict_lru_cmp(const void *a, const void *b) +__evict_lru_cmp(const void *a_arg, const void *b_arg) { - uint64_t a_lru, b_lru; + const WT_EVICT_ENTRY *a = a_arg, *b = b_arg; + uint64_t a_score, b_score; - a_lru = __evict_read_gen(a); - b_lru = __evict_read_gen(b); + a_score = (a->ref == NULL ? UINT64_MAX : a->score); + b_score = (b->ref == NULL ? UINT64_MAX : b->score); - return ((a_lru < b_lru) ? -1 : (a_lru == b_lru) ? 0 : 1); + return ((a_score < b_score) ? -1 : (a_score == b_score) ? 0 : 1); } /* @@ -542,54 +538,49 @@ __evict_update_work(WT_SESSION_IMPL *session) cache->evict_max_refs_per_file = WT_MAX(100, WT_MILLION / (conn->open_file_count + 1)); + if (cache->evict_queues[WT_EVICT_URGENT_QUEUE].evict_current != NULL) + FLD_SET(cache->state, WT_EVICT_STATE_URGENT); + /* - * Page eviction overrides the dirty target and other types of eviction, - * that is, we don't care where we are with respect to the dirty target - * if page eviction is configured. + * If we need space in the cache, try to find clean pages to evict. * * Avoid division by zero if the cache size has not yet been set in a * shared cache. */ bytes_max = conn->cache_size + 1; bytes_inuse = __wt_cache_bytes_inuse(cache); - if (bytes_inuse > (cache->eviction_target * bytes_max) / 100) { - FLD_SET(cache->state, WT_EVICT_PASS_ALL); - goto done; - } + if (bytes_inuse > (cache->eviction_target * bytes_max) / 100) + FLD_SET(cache->state, WT_EVICT_STATE_CLEAN); /* - * If the cache has been stuck and is now under control, clear the - * stuck flag. + * Scrub dirty pages and keep them in cache if we are less than half + * way between the cache target and trigger. */ - if (bytes_inuse < bytes_max) - F_CLR(cache, WT_CACHE_STUCK); + if (bytes_inuse < ((cache->eviction_target + cache->eviction_trigger) * + bytes_max) / 200) + FLD_SET(cache->state, WT_EVICT_STATE_SCRUB); - dirty_inuse = __wt_cache_dirty_inuse(cache); - if (dirty_inuse > (cache->eviction_dirty_target * bytes_max) / 100) { - FLD_SET(cache->state, WT_EVICT_PASS_DIRTY); - goto done; - } + dirty_inuse = __wt_cache_dirty_leaf_inuse(cache); + if (dirty_inuse > (cache->eviction_dirty_target * bytes_max) / 100) + FLD_SET(cache->state, WT_EVICT_STATE_DIRTY); /* - * Evict pages with oldest generation (which would otherwise block - * application threads), set regardless of whether we have reached - * the eviction trigger. + * If the cache has been stuck and is now under control, clear the + * stuck flag. */ - if (F_ISSET(cache, WT_CACHE_WOULD_BLOCK)) { - FLD_SET(cache->state, WT_EVICT_PASS_WOULD_BLOCK); - - F_CLR(cache, WT_CACHE_WOULD_BLOCK); - goto done; - } - - return (false); + if (bytes_inuse < bytes_max && + dirty_inuse < (cache->eviction_dirty_trigger * bytes_max) / 100) + F_CLR(cache, WT_CACHE_STUCK); -done: if (F_ISSET(cache, WT_CACHE_STUCK)) { + if (F_ISSET(cache, WT_CACHE_STUCK)) { + WT_ASSERT(session, cache->state != 0); WT_STAT_FAST_CONN_SET(session, cache_eviction_aggressive_set, 1); - FLD_SET(cache->state, WT_EVICT_PASS_AGGRESSIVE); + FLD_SET(cache->state, WT_EVICT_STATE_AGGRESSIVE); } - return (true); + + return (FLD_ISSET(cache->state, + WT_EVICT_STATE_ALL | WT_EVICT_STATE_URGENT)); } /* @@ -603,7 +594,7 @@ __evict_pass(WT_SESSION_IMPL *session) WT_CONNECTION_IMPL *conn; WT_EVICT_WORKER *worker; uint64_t pages_evicted; - int loop; + u_int loop; conn = S2C(session); cache = conn->cache; @@ -647,15 +638,14 @@ __evict_pass(WT_SESSION_IMPL *session) if (loop > 10) { WT_STAT_FAST_CONN_SET(session, cache_eviction_aggressive_set, 1); - FLD_SET(cache->state, WT_EVICT_PASS_AGGRESSIVE); + FLD_SET(cache->state, WT_EVICT_STATE_AGGRESSIVE); } /* * Start a worker if we have capacity and we haven't reached * the eviction targets. */ - if (FLD_ISSET(cache->state, WT_EVICT_PASS_ALL | - WT_EVICT_PASS_DIRTY | WT_EVICT_PASS_WOULD_BLOCK) && + if (FLD_ISSET(cache->state, WT_EVICT_STATE_ALL) && conn->evict_workers < conn->evict_workers_max) { WT_RET(__wt_verbose(session, WT_VERB_EVICTSERVER, "Starting evict worker: %"PRIu32"\n", @@ -671,7 +661,8 @@ __evict_pass(WT_SESSION_IMPL *session) WT_RET(__wt_verbose(session, WT_VERB_EVICTSERVER, "Eviction pass with: Max: %" PRIu64 " In use: %" PRIu64 " Dirty: %" PRIu64, - conn->cache_size, cache->bytes_inmem, cache->bytes_dirty)); + conn->cache_size, cache->bytes_inmem, + cache->bytes_dirty_intl + cache->bytes_dirty_leaf)); WT_RET(__evict_lru_walk(session)); WT_RET_NOTFOUND_OK(__evict_lru_pages(session, true)); @@ -682,29 +673,32 @@ __evict_pass(WT_SESSION_IMPL *session) * sleep, it's not something we can fix. */ if (pages_evicted == cache->pages_evict) { - WT_STAT_FAST_CONN_INCR(session, - cache_eviction_server_slept); /* * Back off if we aren't making progress: walks hold - * the handle list lock, which blocks other operations - * that can free space in cache, such as LSM discarding + * the handle list lock, blocking other operations that + * can free space in cache, such as LSM discarding * handles. + * + * Allow this wait to be interrupted (e.g. if a + * checkpoint completes): make sure we wait for a + * non-zero number of microseconds). */ - __wt_sleep(0, WT_THOUSAND * (uint64_t)loop); + WT_STAT_FAST_CONN_INCR(session, + cache_eviction_server_slept); + WT_RET(__wt_cond_wait(session, + cache->evict_cond, WT_THOUSAND * WT_MAX(loop, 1))); + if (loop == 100) { /* * Mark the cache as stuck if we need space * and aren't evicting any pages. */ - if (!FLD_ISSET(cache->state, - WT_EVICT_PASS_WOULD_BLOCK)) { - F_SET(cache, WT_CACHE_STUCK); - WT_STAT_FAST_CONN_INCR( - session, cache_eviction_slow); - WT_RET(__wt_verbose( - session, WT_VERB_EVICTSERVER, - "unable to reach eviction goal")); - } + F_SET(cache, WT_CACHE_STUCK); + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_slow); + WT_RET(__wt_verbose( + session, WT_VERB_EVICTSERVER, + "unable to reach eviction goal")); break; } } else { @@ -927,26 +921,29 @@ __evict_lru_walk(WT_SESSION_IMPL *session) { WT_CACHE *cache; WT_DECL_RET; - WT_EVICT_QUEUE *evict_queue; - uint64_t cutoff, read_gen_oldest; + WT_EVICT_QUEUE *queue; + uint64_t read_gen_oldest; uint32_t candidates, entries, queue_index; cache = S2C(session)->cache; - queue_index = cache->evict_queue_fill++ % WT_EVICT_QUEUE_MAX; - evict_queue = &cache->evict_queues[queue_index]; + /* Fill the next queue (that isn't the urgent queue). */ + queue_index = + 1 + (cache->evict_queue_fill++ % (WT_EVICT_QUEUE_MAX - 1)); + queue = &cache->evict_queues[queue_index]; + /* Get some more pages to consider for eviction. */ if ((ret = __evict_walk(cache->walk_session, queue_index)) != 0) return (ret == EBUSY ? 0 : ret); /* Sort the list into LRU order and restart. */ - __wt_spin_lock(session, &evict_queue->evict_lock); + __wt_spin_lock(session, &queue->evict_lock); - entries = evict_queue->evict_entries; - qsort(evict_queue->evict_queue, + entries = queue->evict_entries; + qsort(queue->evict_queue, entries, sizeof(WT_EVICT_ENTRY), __evict_lru_cmp); - while (entries > 0 && evict_queue->evict_queue[entries - 1].ref == NULL) + while (entries > 0 && queue->evict_queue[entries - 1].ref == NULL) --entries; /* @@ -956,9 +953,9 @@ __evict_lru_walk(WT_SESSION_IMPL *session) */ while (entries > WT_EVICT_WALK_BASE) __evict_list_clear(session, - &evict_queue->evict_queue[--entries]); + &queue->evict_queue[--entries]); - evict_queue->evict_entries = entries; + queue->evict_entries = entries; if (entries == 0) { /* @@ -966,23 +963,19 @@ __evict_lru_walk(WT_SESSION_IMPL *session) * Make sure application threads don't read past the end of the * candidate list, or they may race with the next walk. */ - evict_queue->evict_candidates = 0; - __wt_spin_unlock(session, &evict_queue->evict_lock); - __wt_spin_lock(session, &cache->evict_queue_lock); - cache->evict_current = NULL; - cache->evict_current_queue = NULL; - __wt_spin_unlock(session, &cache->evict_queue_lock); + queue->evict_candidates = 0; + queue->evict_current = NULL; + __wt_spin_unlock(session, &queue->evict_lock); return (0); } /* Decide how many of the candidates we're going to try and evict. */ - if (FLD_ISSET(cache->state, - WT_EVICT_PASS_AGGRESSIVE | WT_EVICT_PASS_WOULD_BLOCK)) { + if (FLD_ISSET(cache->state, WT_EVICT_STATE_AGGRESSIVE)) { /* * Take all candidates if we only gathered pages with an oldest * read generation set. */ - evict_queue->evict_candidates = entries; + queue->evict_candidates = entries; } else { /* * Find the oldest read generation we have in the queue, used @@ -992,8 +985,7 @@ __evict_lru_walk(WT_SESSION_IMPL *session) read_gen_oldest = WT_READGEN_OLDEST; for (candidates = 0; candidates < entries; ++candidates) { read_gen_oldest = - __evict_read_gen( - &evict_queue->evict_queue[candidates]); + queue->evict_queue[candidates].score; if (read_gen_oldest != WT_READGEN_OLDEST) break; } @@ -1002,51 +994,45 @@ __evict_lru_walk(WT_SESSION_IMPL *session) * Take all candidates if we only gathered pages with an oldest * read generation set. * - * We normally never take more than 50% of the entries; if 50% - * of the entries were at the oldest read generation, take them. + * We normally never take more than 50% of the entries but if + * 50% of the entries were at the oldest read generation, take + * all of them. */ if (read_gen_oldest == WT_READGEN_OLDEST) - evict_queue->evict_candidates = entries; - else if (candidates >= entries / 2) - evict_queue->evict_candidates = candidates; + queue->evict_candidates = entries; + else if (candidates > entries / 2) + queue->evict_candidates = candidates; else { - /* Save the calculated oldest generation. */ - cache->read_gen_oldest = read_gen_oldest; - - /* Find the bottom 25% of read generations. */ - cutoff = - (3 * read_gen_oldest + __evict_read_gen( - &evict_queue->evict_queue[entries - 1])) / 4; - /* - * Don't take less than 10% or more than 50% of entries, - * regardless. That said, if there is only one entry, - * which is normal when populating an empty file, don't - * exclude it. + * Take all of the urgent pages plus a third of + * ordinary candidates (which could be expressed as + * WT_EVICT_WALK_INCR / WT_EVICT_WALK_BASE). In the + * steady state, we want to get as many candidates as + * the eviction walk adds to the queue. + * + * That said, if there is only one entry, which is + * normal when populating an empty file, don't exclude + * it. */ - for (candidates = 1 + entries / 10; - candidates < entries / 2; - candidates++) - if (__evict_read_gen( - &evict_queue->evict_queue[candidates]) > - cutoff) - break; - evict_queue->evict_candidates = candidates; + queue->evict_candidates = + 1 + candidates + ((entries - candidates) - 1) / 3; + cache->read_gen_oldest = read_gen_oldest; } } - __wt_spin_unlock(session, &evict_queue->evict_lock); + queue->evict_current = queue->evict_queue; + __wt_spin_unlock(session, &queue->evict_lock); + /* * Now we can set the next queue. */ __wt_spin_lock(session, &cache->evict_queue_lock); - if (cache->evict_current == NULL) + if (cache->evict_current_queue->evict_current == NULL) WT_STAT_FAST_CONN_INCR(session, cache_eviction_queue_empty); else WT_STAT_FAST_CONN_INCR(session, cache_eviction_queue_not_empty); - cache->evict_current = evict_queue->evict_queue; - cache->evict_current_queue = evict_queue; + cache->evict_current_queue = queue; __wt_spin_unlock(session, &cache->evict_queue_lock); /* @@ -1070,9 +1056,8 @@ __evict_walk(WT_SESSION_IMPL *session, uint32_t queue_index) WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle; WT_DECL_RET; - WT_EVICT_QUEUE *evict_queue; - u_int max_entries, prev_slot, retries; - u_int slot, start_slot, spins; + WT_EVICT_QUEUE *queue; + u_int max_entries, prev_slot, retries, slot, start_slot, spins; bool dhandle_locked, incr; conn = S2C(session); @@ -1086,9 +1071,9 @@ __evict_walk(WT_SESSION_IMPL *session, uint32_t queue_index) * Set the starting slot in the queue and the maximum pages added * per walk. */ - evict_queue = &cache->evict_queues[queue_index]; - start_slot = slot = evict_queue->evict_entries; - max_entries = slot + WT_EVICT_WALK_INCR; + queue = &cache->evict_queues[queue_index]; + start_slot = slot = queue->evict_entries; + max_entries = WT_MIN(slot + WT_EVICT_WALK_INCR, cache->evict_slots); retry: while (slot < max_entries && ret == 0) { /* @@ -1158,7 +1143,7 @@ retry: while (slot < max_entries && ret == 0) { */ if ((btree->checkpointing != WT_CKPT_OFF || btree->evict_priority != 0) && - !FLD_ISSET(cache->state, WT_EVICT_PASS_AGGRESSIVE)) + !FLD_ISSET(cache->state, WT_EVICT_STATE_AGGRESSIVE)) continue; /* Skip files if we have used all available hazard pointers. */ @@ -1171,7 +1156,6 @@ retry: while (slot < max_entries && ret == 0) { * useful in the past. */ if (btree->evict_walk_period != 0 && - evict_queue->evict_entries >= WT_EVICT_WALK_INCR && btree->evict_walk_skips++ < btree->evict_walk_period) continue; btree->evict_walk_skips = 0; @@ -1197,8 +1181,8 @@ retry: while (slot < max_entries && ret == 0) { if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) { cache->evict_file_next = dhandle; WT_WITH_DHANDLE(session, dhandle, - ret = __evict_walk_file( - session, queue_index, &slot)); + ret = __evict_walk_file(session, + queue_index, max_entries, &slot)); WT_ASSERT(session, session->split_gen == 0); } __wt_spin_unlock(session, &cache->evict_walk_lock); @@ -1234,39 +1218,49 @@ retry: while (slot < max_entries && ret == 0) { if (cache->pass_intr == 0 && ret == 0 && slot < max_entries && (retries < 2 || (retries < 10 && - !FLD_ISSET(cache->state, WT_EVICT_PASS_WOULD_BLOCK) && - (slot == evict_queue->evict_entries || slot > start_slot)))) { + (slot == queue->evict_entries || slot > start_slot)))) { start_slot = slot; ++retries; goto retry; } - evict_queue->evict_entries = slot; + queue->evict_entries = slot; return (ret); } /* - * __evict_init_candidate -- + * __evict_push_candidate -- * Initialize a WT_EVICT_ENTRY structure with a given page. */ -static void -__evict_init_candidate(WT_SESSION_IMPL *session, - WT_EVICT_QUEUE *evict_queue, WT_EVICT_ENTRY *evict, WT_REF *ref) +static bool +__evict_push_candidate(WT_SESSION_IMPL *session, + WT_EVICT_QUEUE *queue, WT_EVICT_ENTRY *evict, WT_REF *ref) { u_int slot; + uint8_t orig_flags, new_flags; + + /* + * Threads can race to queue a page (e.g., an ordinary LRU walk can + * race with a page being queued for urgent eviction. + */ + orig_flags = new_flags = ref->page->flags_atomic; + FLD_SET(new_flags, WT_PAGE_EVICT_LRU); + if (orig_flags == new_flags || + !__wt_atomic_cas8(&ref->page->flags_atomic, orig_flags, new_flags)) + return (false); /* Keep track of the maximum slot we are using. */ - slot = (u_int)(evict - evict_queue->evict_queue); - if (slot >= evict_queue->evict_max) - evict_queue->evict_max = slot + 1; + slot = (u_int)(evict - queue->evict_queue); + if (slot >= queue->evict_max) + queue->evict_max = slot + 1; if (evict->ref != NULL) __evict_list_clear(session, evict); - evict->ref = ref; - evict->btree = S2BT(session); - /* Mark the page on the list; set last to flush the other updates. */ - F_SET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU); + evict->btree = S2BT(session); + evict->ref = ref; + evict->score = __evict_read_gen(evict); + return (true); } /* @@ -1274,34 +1268,73 @@ __evict_init_candidate(WT_SESSION_IMPL *session, * Get a few page eviction candidates from a single underlying file. */ static int -__evict_walk_file(WT_SESSION_IMPL *session, uint32_t queue_index, u_int *slotp) +__evict_walk_file(WT_SESSION_IMPL *session, + uint32_t queue_index, u_int max_entries, u_int *slotp) { WT_BTREE *btree; WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_EVICT_ENTRY *end, *evict, *start; - WT_EVICT_QUEUE *evict_queue; + WT_EVICT_QUEUE *queue; WT_PAGE *page; WT_PAGE_MODIFY *mod; WT_REF *ref; + uint64_t btree_inuse, bytes_per_slot, cache_inuse; uint64_t pages_seen, refs_walked; - uint32_t walk_flags; + uint32_t remaining_slots, target_pages, total_slots, walk_flags; int internal_pages, restarts; bool enough, modified; conn = S2C(session); btree = S2BT(session); cache = conn->cache; - evict_queue = &cache->evict_queues[queue_index]; + queue = &cache->evict_queues[queue_index]; internal_pages = restarts = 0; enough = false; - start = evict_queue->evict_queue + *slotp; - end = start + WT_EVICT_WALK_PER_FILE; + /* + * Figure out how many slots to fill from this tree. + * Note that some care is taken in the calculation to avoid overflow. + */ + start = queue->evict_queue + *slotp; + remaining_slots = max_entries - *slotp; + btree_inuse = __wt_btree_bytes_inuse(session); + cache_inuse = __wt_cache_bytes_inuse(cache); + total_slots = max_entries - queue->evict_entries; + + /* + * The target number of pages for this tree is proportional to the + * space it is taking up in cache. Round to the nearest number of + * slots so we assign all of the slots to a tree filling 99+% of the + * cache (and only have to walk it once). + */ + bytes_per_slot = cache_inuse / total_slots; + target_pages = (uint32_t)( + (btree_inuse + bytes_per_slot / 2) / bytes_per_slot); + if (target_pages == 0) { + /* + * Randomly walk trees with a tiny fraction of the cache in + * case there are so many trees that none of them use enough of + * the cache to be allocated slots. + * + * Map a random number into the range [0..1], and if the result + * is greater than the fraction of the cache used by this tree, + * give up. In other words, there is a small chance we will + * visit trees that use a small fraction of the cache. Arrange + * this calculation to avoid overflow (e.g., don't multiply + * anything by UINT32_MAX). + */ + if (__wt_random(&session->rnd) / (double)UINT32_MAX > + btree_inuse / (double)cache_inuse) + return (0); + target_pages = 10; + } + if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD) || - end > evict_queue->evict_queue + cache->evict_slots) - end = evict_queue->evict_queue + cache->evict_slots; + target_pages > remaining_slots) + target_pages = remaining_slots; + end = start + target_pages; walk_flags = WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT; @@ -1352,14 +1385,11 @@ __evict_walk_file(WT_SESSION_IMPL *session, uint32_t queue_index, u_int *slotp) /* * It's possible (but unlikely) to visit a page without a read * generation, if we race with the read instantiating the page. - * Ignore those pages, but set the page's read generation here - * to ensure a bug doesn't somehow leave a page without a read - * generation. + * Set the page's read generation here to ensure a bug doesn't + * somehow leave a page without a read generation. */ - if (page->read_gen == WT_READGEN_NOTSET) { + if (page->read_gen == WT_READGEN_NOTSET) __wt_cache_read_gen_new(session, page); - continue; - } /* Pages we no longer need (clean or dirty), are found money. */ if (page->read_gen == WT_READGEN_OLDEST) { @@ -1367,26 +1397,23 @@ __evict_walk_file(WT_SESSION_IMPL *session, uint32_t queue_index, u_int *slotp) session, cache_eviction_pages_queued_oldest); goto fast; } + if (__wt_page_is_empty(page) || - F_ISSET(session->dhandle, WT_DHANDLE_DEAD)) + F_ISSET(session->dhandle, WT_DHANDLE_DEAD) || + FLD_ISSET(cache->state, WT_EVICT_STATE_AGGRESSIVE)) goto fast; /* Skip clean pages if appropriate. */ if (!modified && (F_ISSET(conn, WT_CONN_IN_MEMORY) || - FLD_ISSET(cache->state, WT_EVICT_PASS_DIRTY))) + !FLD_ISSET(cache->state, WT_EVICT_STATE_CLEAN))) continue; - /* - * If we are only trickling out pages marked for definite - * eviction, skip anything that isn't marked. - */ - if (FLD_ISSET(cache->state, WT_EVICT_PASS_WOULD_BLOCK) && - page->memory_footprint < btree->splitmempage) + /* Skip dirty pages if appropriate. */ + if (modified && !FLD_ISSET(cache->state, WT_EVICT_STATE_DIRTY)) continue; - /* Limit internal pages to 50% unless we get aggressive. */ + /* Limit internal pages to 50% of the total. */ if (WT_PAGE_IS_INTERNAL(page) && - !FLD_ISSET(cache->state, WT_EVICT_PASS_AGGRESSIVE) && internal_pages >= (int)(evict - start) / 2) continue; @@ -1410,8 +1437,7 @@ fast: /* If the page can't be evicted, give up. */ * configure lookaside table writes in reconciliation, allowing * us to evict pages we can't usually evict. */ - if (!FLD_ISSET(cache->state, - WT_EVICT_PASS_AGGRESSIVE | WT_EVICT_PASS_WOULD_BLOCK)) { + if (!FLD_ISSET(cache->state, WT_EVICT_STATE_AGGRESSIVE)) { /* * If the page is clean but has modifications that * appear too new to evict, skip it. @@ -1422,7 +1448,8 @@ fast: /* If the page can't be evicted, give up. */ } WT_ASSERT(session, evict->ref == NULL); - __evict_init_candidate(session, evict_queue, evict, ref); + if (!__evict_push_candidate(session, queue, evict, ref)) + continue; ++evict; if (WT_PAGE_IS_INTERNAL(page)) @@ -1479,19 +1506,21 @@ __evict_check_entry_size(WT_SESSION_IMPL *session, WT_EVICT_ENTRY *entry) cache = S2C(session)->cache; - if (cache->pages_evict == 0) + if (cache->pages_evict == 0 || cache->bytes_evict < WT_MEGABYTE) return (true); max = (cache->bytes_evict / cache->pages_evict) * 4; if ((ref = entry->ref) != NULL) { if ((page = ref->page) == NULL) return (true); + /* - * If this page is more than four times the average evicted page - * size then return false. Return true in all other cases. - * XXX Should we care here if the page is dirty? Probably... + * If this page is dirty and more than four times the average + * evicted page size then return false. Return true in all + * other cases. */ - if (page->memory_footprint > max) { + if (__wt_page_is_modified(page) && + page->memory_footprint > max) { WT_STAT_FAST_CONN_INCR( session, cache_eviction_server_toobig); return (false); @@ -1510,71 +1539,85 @@ __evict_get_ref( { WT_CACHE *cache; WT_EVICT_ENTRY *evict; - WT_EVICT_QUEUE *evict_queue; + WT_EVICT_QUEUE *queue, *urgent_queue; uint32_t candidates; cache = S2C(session)->cache; + urgent_queue = &cache->evict_queues[WT_EVICT_URGENT_QUEUE]; *btreep = NULL; *refp = NULL; - /* - * Avoid the LRU lock if no pages are available. - */ + /* Avoid the LRU lock if no pages are available. */ WT_STAT_FAST_CONN_INCR(session, cache_eviction_get_ref); - if (cache->evict_current == NULL) { + if (cache->evict_current_queue->evict_current == NULL && + urgent_queue->evict_current == NULL) { WT_STAT_FAST_CONN_INCR(session, cache_eviction_get_ref_empty); return (WT_NOTFOUND); } + __wt_spin_lock(session, &cache->evict_queue_lock); + + /* Check the urgent queue first. */ + queue = urgent_queue->evict_current != NULL && + (FLD_ISSET(cache->state, WT_EVICT_STATE_AGGRESSIVE) || + (F_ISSET(session, WT_SESSION_INTERNAL) && + (!is_server || S2C(session)->evict_workers <= 1))) ? + urgent_queue : cache->evict_current_queue; + + __wt_spin_unlock(session, &cache->evict_queue_lock); + /* - * Verify there are still pages available. + * Only evict half of the pages before looking for more. The remainder + * are left to eviction workers (if configured), or application threads + * if necessary. */ - if (cache->evict_current == NULL) { - __wt_spin_unlock(session, &cache->evict_queue_lock); - WT_STAT_FAST_CONN_INCR(session, cache_eviction_get_ref_empty2); - return (WT_NOTFOUND); - } + candidates = queue->evict_candidates; + if (is_server && queue != urgent_queue && candidates > 1) + candidates /= 2; + /* - * We got the queue lock, which should be fast, and now we want to - * get the lock on the individual queue. We know that the shared - * queue fields cannot change now. + * We got the queue lock, which should be fast, and chose a queue. + * Now we want to get the lock on the individual queue. */ - evict_queue = cache->evict_current_queue; for (;;) { - if (__wt_spin_trylock(session, &evict_queue->evict_lock) == 0) - break; - if (!F_ISSET(session, WT_SESSION_INTERNAL)) { - __wt_spin_unlock(session, &cache->evict_queue_lock); + /* Verify there are still pages available. */ + if (queue->evict_current == NULL || (uint32_t) + (queue->evict_current - queue->evict_queue) >= candidates) { + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_get_ref_empty2); return (WT_NOTFOUND); } - __wt_yield(); + if (!is_server) + __wt_spin_lock(session, &queue->evict_lock); + else if (__wt_spin_trylock(session, &queue->evict_lock) != 0) + continue; + break; } - /* - * Only evict half of the pages before looking for more. The remainder - * are left to eviction workers (if configured), or application threads - * if necessary. - */ - candidates = evict_queue->evict_candidates; - if (is_server && candidates > 1) - candidates /= 2; /* Get the next page queued for eviction. */ - while ((evict = cache->evict_current) != NULL && - evict < evict_queue->evict_queue + candidates && - evict->ref != NULL) { + for (evict = queue->evict_current; + evict >= queue->evict_queue && + evict < queue->evict_queue + candidates; + ++evict) { + if (evict->ref == NULL) + continue; WT_ASSERT(session, evict->btree != NULL); + /* - * If the server is helping out and encounters an entry that - * is too large, it stops helping. Evicting a very large - * page in the server thread could stall eviction from finding - * new work. + * If the server is helping out and encounters an entry that is + * too large, it stops helping. Evicting a very large page in + * the server thread could stall eviction from finding new + * work. + * + * However, we can't skip entries in the urgent queue or they + * may never be found again. */ - if (is_server && S2C(session)->evict_workers > 1 && - !__evict_check_entry_size(session, evict)) + if (is_server && queue != urgent_queue && + S2C(session)->evict_workers > 1 && + !__evict_check_entry_size(session, evict)) { + --evict; break; - - /* Move to the next item. */ - ++cache->evict_current; + } /* * Lock the page while holding the eviction mutex to prevent @@ -1604,11 +1647,14 @@ __evict_get_ref( break; } - /* Clear the current pointer if there are no more candidates. */ - if (evict >= evict_queue->evict_queue + evict_queue->evict_candidates) - cache->evict_current = NULL; - __wt_spin_unlock(session, &evict_queue->evict_lock); - __wt_spin_unlock(session, &cache->evict_queue_lock); + /* Move to the next item. */ + if (evict != NULL && evict + 1 < + queue->evict_queue + queue->evict_candidates) + queue->evict_current = evict + 1; + else /* Clear the current pointer if there are no more candidates. */ + queue->evict_current = NULL; + + __wt_spin_unlock(session, &queue->evict_lock); return ((*refp == NULL) ? WT_NOTFOUND : 0); } @@ -1633,16 +1679,14 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server) * An internal session flags either the server itself or an eviction * worker thread. */ - if (F_ISSET(session, WT_SESSION_INTERNAL)) { - if (is_server) { - WT_STAT_FAST_CONN_INCR( - session, cache_eviction_server_evicting); - cache->server_evicts++; - } else { - WT_STAT_FAST_CONN_INCR( - session, cache_eviction_worker_evicting); - cache->worker_evicts++; - } + if (is_server) { + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_server_evicting); + cache->server_evicts++; + } else if (F_ISSET(session, WT_SESSION_INTERNAL)) { + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_worker_evicting); + cache->worker_evicts++; } else { if (__wt_page_is_modified(ref->page)) WT_STAT_FAST_CONN_INCR( @@ -1767,6 +1811,64 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) /* NOTREACHED */ } +/* + * __wt_page_evict_soon -- + * Set a page to be evicted as soon as possible. + */ +int +__wt_page_evict_soon(WT_SESSION_IMPL *session, WT_REF *ref) +{ + WT_CACHE *cache; + WT_EVICT_ENTRY *evict; + WT_EVICT_QUEUE *urgent_queue; + WT_PAGE *page; + bool queued; + + /* Root pages should never be evicted via LRU. */ + WT_ASSERT(session, !__wt_ref_is_root(ref)); + + page = ref->page; + page->read_gen = WT_READGEN_OLDEST; + if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU) || + F_ISSET(S2BT(session), WT_BTREE_NO_EVICTION)) + return (0); + + /* Append to the urgent queue if we can. */ + cache = S2C(session)->cache; + urgent_queue = &cache->evict_queues[WT_EVICT_URGENT_QUEUE]; + queued = false; + + __wt_spin_lock(session, &cache->evict_queue_lock); + if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU) || + F_ISSET(S2BT(session), WT_BTREE_NO_EVICTION)) + goto done; + + __wt_spin_lock(session, &urgent_queue->evict_lock); + if (urgent_queue->evict_current == NULL) { + urgent_queue->evict_current = urgent_queue->evict_queue; + urgent_queue->evict_candidates = 0; + } + evict = urgent_queue->evict_queue + urgent_queue->evict_candidates; + if (evict < urgent_queue->evict_queue + WT_EVICT_QUEUE_MAX && + __evict_push_candidate(session, urgent_queue, evict, ref)) { + ++urgent_queue->evict_candidates; + queued = true; + } + __wt_spin_unlock(session, &urgent_queue->evict_lock); + +done: __wt_spin_unlock(session, &cache->evict_queue_lock); + if (queued) { + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_pages_queued_urgent); + if (S2C(session)->evict_workers > 1) + WT_RET(__wt_cond_signal( + session, cache->evict_waiter_cond)); + else + WT_RET(__wt_evict_server_wake(session)); + } + return (0); +} + /* * __wt_evict_priority_set -- * Set a tree's eviction priority. @@ -1801,13 +1903,15 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) WT_DATA_HANDLE *dhandle, *saved_dhandle; WT_PAGE *page; WT_REF *next_walk; - uint64_t dirty_bytes, dirty_pages, intl_bytes, intl_pages; - uint64_t leaf_bytes, leaf_pages; - uint64_t max_dirty_bytes, max_intl_bytes, max_leaf_bytes, total_bytes; + uint64_t intl_bytes, intl_bytes_max, intl_dirty_bytes; + uint64_t intl_dirty_bytes_max, intl_dirty_pages, intl_pages; + uint64_t leaf_bytes, leaf_bytes_max, leaf_dirty_bytes; + uint64_t leaf_dirty_bytes_max, leaf_dirty_pages, leaf_pages; + uint64_t total_bytes, total_dirty_bytes; size_t size; conn = S2C(session); - total_bytes = 0; + total_bytes = total_dirty_bytes = 0; if (ofile == NULL) fp = stderr; @@ -1823,9 +1927,10 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) !F_ISSET(dhandle, WT_DHANDLE_OPEN)) continue; - dirty_bytes = dirty_pages = intl_bytes = intl_pages = 0; - leaf_bytes = leaf_pages = 0; - max_dirty_bytes = max_intl_bytes = max_leaf_bytes = 0; + intl_bytes = intl_bytes_max = intl_dirty_bytes = 0; + intl_dirty_bytes_max = intl_dirty_pages = intl_pages = 0; + leaf_bytes = leaf_bytes_max = leaf_dirty_bytes = 0; + leaf_dirty_bytes_max = leaf_dirty_pages = leaf_pages = 0; next_walk = NULL; session->dhandle = dhandle; @@ -1838,17 +1943,23 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) if (WT_PAGE_IS_INTERNAL(page)) { ++intl_pages; intl_bytes += size; - max_intl_bytes = WT_MAX(max_intl_bytes, size); + intl_bytes_max = WT_MAX(intl_bytes_max, size); + if (__wt_page_is_modified(page)) { + ++intl_dirty_pages; + intl_dirty_bytes += size; + intl_dirty_bytes_max = + WT_MAX(intl_dirty_bytes_max, size); + } } else { ++leaf_pages; leaf_bytes += size; - max_leaf_bytes = WT_MAX(max_leaf_bytes, size); - } - if (__wt_page_is_modified(page)) { - ++dirty_pages; - dirty_bytes += size; - max_dirty_bytes = - WT_MAX(max_dirty_bytes, size); + leaf_bytes_max = WT_MAX(leaf_bytes_max, size); + if (__wt_page_is_modified(page)) { + ++leaf_dirty_pages; + leaf_dirty_bytes += size; + leaf_dirty_bytes_max = + WT_MAX(leaf_dirty_bytes_max, size); + } } } session->dhandle = NULL; @@ -1860,21 +1971,41 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) dhandle->name, dhandle->checkpoint); if (intl_pages != 0) (void)fprintf(fp, - "\t" "internal pages: %" PRIu64 " pages, %" PRIu64 - " max, %" PRIu64 "MB total\n", - intl_pages, max_intl_bytes, intl_bytes >> 20); + "\t" "internal: " + "%" PRIu64 " pages, " + "%" PRIu64 "MB, " + "%" PRIu64 "/%" PRIu64 " clean/dirty pages, " + "%" PRIu64 "/%" PRIu64 " clean/dirty MB, " + "%" PRIu64 "MB max page, " + "%" PRIu64 "MB max dirty page\n", + intl_pages, + intl_bytes >> 20, + intl_pages - intl_dirty_pages, + intl_dirty_pages, + (intl_bytes - intl_dirty_bytes) >> 20, + intl_dirty_bytes >> 20, + intl_bytes_max >> 20, + intl_dirty_bytes_max >> 20); if (leaf_pages != 0) (void)fprintf(fp, - "\t" "leaf pages: %" PRIu64 " pages, %" PRIu64 - " max, %" PRIu64 "MB total\n", - leaf_pages, max_leaf_bytes, leaf_bytes >> 20); - if (dirty_pages != 0) - (void)fprintf(fp, - "\t" "dirty pages: %" PRIu64 " pages, %" PRIu64 - " max, %" PRIu64 "MB total\n", - dirty_pages, max_dirty_bytes, dirty_bytes >> 20); + "\t" "leaf: " + "%" PRIu64 " pages, " + "%" PRIu64 "MB, " + "%" PRIu64 "/%" PRIu64 " clean/dirty pages, " + "%" PRIu64 "/%" PRIu64 " clean/dirty MB, " + "%" PRIu64 "MB max page, " + "%" PRIu64 "MB max dirty page\n", + leaf_pages, + leaf_bytes >> 20, + leaf_pages - leaf_dirty_pages, + leaf_dirty_pages, + (leaf_bytes - leaf_dirty_bytes) >> 20, + leaf_dirty_bytes >> 20, + leaf_bytes_max >> 20, + leaf_dirty_bytes_max >> 20); total_bytes += intl_bytes + leaf_bytes; + total_dirty_bytes += intl_dirty_bytes + leaf_dirty_bytes; } session->dhandle = saved_dhandle; @@ -1886,10 +2017,13 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) total_bytes += (total_bytes * (uint64_t)conn->cache->overhead_pct) / 100; (void)fprintf(fp, - "cache dump: total found = %" PRIu64 - "MB vs tracked inuse %" PRIu64 "MB\n", - total_bytes >> 20, __wt_cache_bytes_inuse(conn->cache) >> 20); + "cache dump: " + "total found = %" PRIu64 "MB vs tracked inuse %" PRIu64 "MB\n" + "total dirty bytes = %" PRIu64 "MB\n", + total_bytes >> 20, __wt_cache_bytes_inuse(conn->cache) >> 20, + total_dirty_bytes >> 20); (void)fprintf(fp, "==========\n"); + if (ofile != NULL && fclose(fp) != 0) return (EIO); return (0); diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c index 305b81fe69e..d4c4e3e311a 100644 --- a/src/third_party/wiredtiger/src/evict/evict_page.c +++ b/src/third_party/wiredtiger/src/evict/evict_page.c @@ -10,7 +10,7 @@ static int __evict_page_clean_update(WT_SESSION_IMPL *, WT_REF *, bool); static int __evict_page_dirty_update(WT_SESSION_IMPL *, WT_REF *, bool); -static int __evict_review(WT_SESSION_IMPL *, WT_REF *, bool *, bool); +static int __evict_review(WT_SESSION_IMPL *, WT_REF *, uint32_t *, bool); /* * __evict_exclusive_clear -- @@ -45,6 +45,55 @@ __evict_exclusive(WT_SESSION_IMPL *session, WT_REF *ref) return (EBUSY); } +/* + * __wt_page_release_evict -- + * Release a reference to a page, and attempt to immediately evict it. + */ +int +__wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) +{ + WT_BTREE *btree; + WT_DECL_RET; + WT_PAGE *page; + bool locked, too_big; + + btree = S2BT(session); + page = ref->page; + + /* + * Take some care with order of operations: if we release the hazard + * reference without first locking the page, it could be evicted in + * between. + */ + locked = __wt_atomic_casv32(&ref->state, WT_REF_MEM, WT_REF_LOCKED); + if ((ret = __wt_hazard_clear(session, page)) != 0 || !locked) { + if (locked) + ref->state = WT_REF_MEM; + return (ret == 0 ? EBUSY : ret); + } + + (void)__wt_atomic_addv32(&btree->evict_busy, 1); + + too_big = page->memory_footprint > btree->splitmempage; + if ((ret = __wt_evict(session, ref, false)) == 0) { + if (too_big) + WT_STAT_FAST_CONN_INCR(session, cache_eviction_force); + else + /* + * If the page isn't too big, we are evicting it because + * it had a chain of deleted entries that make traversal + * expensive. + */ + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_force_delete); + } else + WT_STAT_FAST_CONN_INCR(session, cache_eviction_force_fail); + + (void)__wt_atomic_subv32(&btree->evict_busy, 1); + + return (ret); +} + /* * __wt_evict -- * Evict a page. @@ -56,7 +105,8 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) WT_DECL_RET; WT_PAGE *page; WT_PAGE_MODIFY *mod; - bool clean_page, forced_eviction, inmem_split, tree_dead; + uint32_t flags; + bool clean_page, tree_dead; conn = S2C(session); @@ -64,8 +114,6 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) WT_ASSERT(session, !WT_SESSION_IS_CHECKPOINT(session)); page = ref->page; - forced_eviction = page->read_gen == WT_READGEN_OLDEST; - inmem_split = false; tree_dead = F_ISSET(session->dhandle, WT_DHANDLE_DEAD); WT_RET(__wt_verbose(session, WT_VERB_EVICT, @@ -78,20 +126,14 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) * to make this check for clean pages, too: while unlikely eviction * would choose an internal page with children, it's not disallowed. */ - WT_ERR(__evict_review(session, ref, &inmem_split, closing)); + WT_ERR(__evict_review(session, ref, &flags, closing)); /* * If there was an in-memory split, the tree has been left in the state * we want: there is nothing more to do. */ - if (inmem_split) - goto done; - - /* - * Update the page's modification reference, reconciliation might have - * changed it. - */ - mod = page->modify; + if (LF_ISSET(WT_EVICT_INMEM_SPLIT)) + return (0); /* Count evictions of internal pages during normal operation. */ if (!closing && WT_PAGE_IS_INTERNAL(page)) { @@ -108,12 +150,13 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) conn->cache->evict_max_page_size = page->memory_footprint; /* Figure out whether reconciliation was done on the page */ + mod = page->modify; clean_page = mod == NULL || mod->rec_result == 0; /* Update the reference and discard the page. */ if (__wt_ref_is_root(ref)) __wt_ref_out(session, ref); - else if (tree_dead || (clean_page && !F_ISSET(conn, WT_CONN_IN_MEMORY))) + else if ((clean_page && !LF_ISSET(WT_EVICT_IN_MEMORY)) || tree_dead) /* * Pages that belong to dead trees never write back to disk * and can't support page splits. @@ -139,14 +182,9 @@ err: if (!closing) WT_STAT_FAST_DATA_INCR(session, cache_eviction_fail); } -done: if (((inmem_split && ret == 0) || (forced_eviction && ret == EBUSY)) && - !F_ISSET(conn->cache, WT_CACHE_WOULD_BLOCK)) { - F_SET(conn->cache, WT_CACHE_WOULD_BLOCK); - WT_TRET(__wt_evict_server_wake(session)); - } - return (ret); } + /* * __evict_delete_ref -- * Mark a page reference deleted and check if the parent can reverse @@ -209,13 +247,6 @@ __evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) { WT_DECL_RET; - /* - * If doing normal system eviction, but only in the service of reducing - * the number of dirty pages, leave the clean page in cache. - */ - if (!closing && __wt_eviction_dirty_target(session)) - return (EBUSY); - /* * Discard the page and update the reference structure; if the page has * an address, it's a disk page; if it has no address, it's a deleted @@ -242,6 +273,7 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) WT_ADDR *addr; WT_DECL_RET; WT_PAGE_MODIFY *mod; + WT_MULTI multi; mod = ref->page->modify; @@ -284,23 +316,14 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) * write. Take advantage of the fact we have exclusive access * to the page and rewrite it in memory. */ - if (mod->mod_multi_entries == 1) - WT_RET(__wt_split_rewrite(session, ref)); - else + if (mod->mod_multi_entries == 1) { + WT_ASSERT(session, closing == false); + WT_RET(__wt_split_rewrite( + session, ref, &mod->mod_multi[0])); + } else WT_RET(__wt_split_multi(session, ref, closing)); break; case WT_PM_REC_REPLACE: /* 1-for-1 page swap */ - /* - * If doing normal system eviction, but only in the service of - * reducing the number of dirty pages, leave the clean page in - * cache. Only do this when replacing a page with another one, - * because when a page splits into multiple pages, we want to - * push it out of cache (and read it back in, when needed), we - * would rather have more, smaller pages than fewer large pages. - */ - if (!closing && __wt_eviction_dirty_target(session)) - return (EBUSY); - /* * Update the parent to reference the replacement page. * @@ -311,10 +334,26 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) *addr = mod->mod_replace; mod->mod_replace.addr = NULL; mod->mod_replace.size = 0; - - __wt_ref_out(session, ref); ref->addr = addr; - WT_PUBLISH(ref->state, WT_REF_DISK); + + /* + * Eviction wants to keep this page if we have a disk image, + * re-instantiate the page in memory, else discard the page. + */ + if (mod->mod_disk_image == NULL) { + __wt_ref_out(session, ref); + WT_PUBLISH(ref->state, WT_REF_DISK); + } else { + /* + * The split code works with WT_MULTI structures, build + * one for the disk image. + */ + memset(&multi, 0, sizeof(multi)); + multi.disk_image = mod->mod_disk_image; + + WT_RET(__wt_split_rewrite(session, ref, &multi)); + } + break; WT_ILLEGAL_VALUE(session); } @@ -351,13 +390,17 @@ __evict_child_check(WT_SESSION_IMPL *session, WT_REF *parent) */ static int __evict_review( - WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp, bool closing) + WT_SESSION_IMPL *session, WT_REF *ref, uint32_t *flagsp, bool closing) { + WT_CACHE *cache; WT_DECL_RET; WT_PAGE *page; uint32_t flags; bool modified; + flags = WT_EVICTING; + *flagsp = flags; + /* * Get exclusive access to the page if our caller doesn't have the tree * locked down. @@ -423,8 +466,9 @@ __evict_review( WT_RET(__wt_txn_update_oldest( session, WT_TXN_OLDEST_STRICT)); - if (!__wt_page_can_evict(session, ref, inmem_splitp)) + if (!__wt_page_can_evict(session, ref, flagsp)) return (EBUSY); + flags = *flagsp; /* * Check for an append-only workload needing an in-memory @@ -433,8 +477,12 @@ __evict_review( * the page stays in memory and the tree is left in the desired * state: avoid the usual cleanup. */ - if (*inmem_splitp) + if (LF_ISSET(WT_EVICT_INMEM_SPLIT)) return (__wt_split_insert(session, ref)); + + /* We are done if reconciliation is disabled. */ + if (F_ISSET(S2BT(session), WT_BTREE_NO_RECONCILE)) + return (EBUSY); } /* If the page is clean, we're done and we can evict. */ @@ -447,10 +495,15 @@ __evict_review( * If we have an exclusive lock (we're discarding the tree), assert * there are no updates we cannot read. * - * Otherwise, if the page we're evicting is a leaf page marked for - * forced eviction, set the update-restore flag, so reconciliation will - * write blocks it can write and create a list of skipped updates for - * blocks it cannot write. This is how forced eviction of active, huge + * Don't set any other flags for internal pages: they don't have update + * lists to be saved and restored, nor can we re-create them in memory. + * + * For leaf pages: + * + * If an in-memory configuration or the page is being forcibly evicted, + * set the update-restore flag, so reconciliation will write blocks it + * can write and create a list of skipped updates for blocks it cannot + * write, along with disk images. This is how eviction of active, huge * pages works: we take a big page and reconcile it into blocks, some of * which we write and discard, the rest of which we re-create as smaller * in-memory pages, (restoring the updates that stopped us from writing @@ -461,32 +514,43 @@ __evict_review( * allowing the eviction of pages we'd otherwise have to retain in cache * to support older readers. * - * Don't set the update-restore or lookaside table flags for internal - * pages, they don't have update lists that can be saved and restored. + * Finally, if we don't need to do eviction at the moment, create disk + * images of split pages in order to re-instantiate them. */ - flags = WT_EVICTING; + cache = S2C(session)->cache; if (closing) LF_SET(WT_VISIBILITY_ERR); else if (!WT_PAGE_IS_INTERNAL(page)) { if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) - LF_SET(WT_EVICT_IN_MEMORY | WT_EVICT_UPDATE_RESTORE); - else if (page->read_gen == WT_READGEN_OLDEST) - LF_SET(WT_EVICT_UPDATE_RESTORE); - else if (F_ISSET(S2C(session)->cache, WT_CACHE_STUCK)) + LF_SET(WT_EVICT_IN_MEMORY | + WT_EVICT_SCRUB | WT_EVICT_UPDATE_RESTORE); + else if (F_ISSET(cache, WT_CACHE_STUCK)) LF_SET(WT_EVICT_LOOKASIDE); + else if (!__wt_txn_visible_all( + session, page->modify->update_txn)) + LF_SET(WT_EVICT_UPDATE_RESTORE); + + /* + * If we aren't trying to free space in the cache, scrub the + * page and keep it around. + */ + if (!LF_ISSET(WT_EVICT_LOOKASIDE) && + FLD_ISSET(cache->state, WT_EVICT_STATE_SCRUB)) + LF_SET(WT_EVICT_SCRUB); } + *flagsp = flags; WT_RET(__wt_reconcile(session, ref, NULL, flags)); /* * Success: assert the page is clean or reconciliation was configured - * for an update/restore split. If the page is clean, assert that - * reconciliation was configured for a lookaside table, or it's not a - * durable object (currently the lookaside table), or all page updates - * were globally visible. + * for update/restore. If the page is clean, assert that reconciliation + * was configured for a lookaside table, or it's not a durable object + * (currently the lookaside table), or all page updates were globally + * visible. */ WT_ASSERT(session, - LF_ISSET(WT_EVICT_UPDATE_RESTORE) || !__wt_page_is_modified(page)); + !__wt_page_is_modified(page) || LF_ISSET(WT_EVICT_UPDATE_RESTORE)); WT_ASSERT(session, __wt_page_is_modified(page) || LF_ISSET(WT_EVICT_LOOKASIDE) || diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h index 50b2eab83b8..0a4593178dc 100644 --- a/src/third_party/wiredtiger/src/include/api.h +++ b/src/third_party/wiredtiger/src/include/api.h @@ -66,6 +66,8 @@ else if (ret == 0 && !F_ISSET(&(s)->txn, WT_TXN_ERROR)) \ ret = __wt_txn_commit((s), NULL); \ else { \ + if (retry) \ + WT_TRET(__wt_session_copy_values(s)); \ WT_TRET(__wt_txn_rollback((s), NULL)); \ if ((ret == 0 || ret == WT_ROLLBACK) && \ (retry)) { \ diff --git a/src/third_party/wiredtiger/src/include/block.h b/src/third_party/wiredtiger/src/include/block.h index a8080c1651c..3342f9b1e5e 100644 --- a/src/third_party/wiredtiger/src/include/block.h +++ b/src/third_party/wiredtiger/src/include/block.h @@ -192,7 +192,7 @@ struct __wt_bm { int (*verify_start) (WT_BM *, WT_SESSION_IMPL *, WT_CKPT *, const char *[]); int (*write) (WT_BM *, - WT_SESSION_IMPL *, WT_ITEM *, uint8_t *, size_t *, bool); + WT_SESSION_IMPL *, WT_ITEM *, uint8_t *, size_t *, bool, bool); int (*write_size)(WT_BM *, WT_SESSION_IMPL *, size_t *); WT_BLOCK *block; /* Underlying file */ diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index 9700b6f4761..817ce892952 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -250,9 +250,19 @@ struct __wt_page_modify { * a replace address and multiple replacement blocks. */ union { - WT_ADDR replace; /* Single, written replacement block */ + struct { /* Single, written replacement block */ + WT_ADDR replace; + + /* + * A disk image that may or may not have been written, used to + * re-instantiate the page in memory. + */ + void *disk_image; + } r; #undef mod_replace -#define mod_replace u1.replace +#define mod_replace u1.r.replace +#undef mod_disk_image +#define mod_disk_image u1.r.disk_image struct { /* Multiple replacement blocks */ struct __wt_multi { @@ -266,14 +276,19 @@ struct __wt_page_modify { } key; /* - * Eviction, but the block wasn't written: either an in-memory - * configuration or unresolved updates prevented the write. - * There may be a list of unresolved updates, there's always an - * associated disk image. + * A disk image that may or may not have been written, used to + * re-instantiate the page in memory. + */ + void *disk_image; + + /* + * List of unresolved updates. Updates are either a WT_INSERT + * or a row-store leaf page entry; when creating lookaside + * records, there is an additional value, the committed item's + * transaction ID. * - * Saved updates are either a WT_INSERT, or a row-store leaf - * page entry; in the case of creating lookaside records, there - * is an additional value, the committed item's transaction ID. + * If there are unresolved updates, the block wasn't written and + * there will always be a disk image. */ struct __wt_save_upd { WT_INSERT *ins; @@ -281,10 +296,9 @@ struct __wt_page_modify { uint64_t onpage_txn; } *supd; uint32_t supd_entries; - void *disk_image; /* - * Block was written: address, size and checksum. + * Disk image was written: address, size and checksum. * On subsequent reconciliations of this page, we avoid writing * the block if it's unchanged by comparing size and checksum; * the reuse flag is set when the block is unchanged and we're diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h index fd921677751..432474f9dc1 100644 --- a/src/third_party/wiredtiger/src/include/btree.h +++ b/src/third_party/wiredtiger/src/include/btree.h @@ -126,12 +126,16 @@ struct __wt_btree { u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */ uint64_t checkpoint_gen; /* Checkpoint generation */ + bool include_checkpoint_txn;/* ID checks include checkpoint */ uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */ uint64_t write_gen; /* Write generation */ + uint64_t bytes_inmem; /* Cache bytes in memory. */ + WT_REF *evict_ref; /* Eviction thread's location */ uint64_t evict_priority; /* Relative priority of cached pages */ u_int evict_walk_period; /* Skip this many LRU walks */ + u_int evict_walk_saved; /* Saved walk skips for checkpoints */ u_int evict_walk_skips; /* Number of walks skipped */ u_int evict_disabled; /* Eviction disabled count */ volatile uint32_t evict_busy; /* Count of threads in eviction */ @@ -154,11 +158,12 @@ struct __wt_btree { #define WT_BTREE_NO_CHECKPOINT 0x00800 /* Disable checkpoints */ #define WT_BTREE_NO_EVICTION 0x01000 /* Disable eviction */ #define WT_BTREE_NO_LOGGING 0x02000 /* Disable logging */ -#define WT_BTREE_REBALANCE 0x04000 /* Handle is for rebalance */ -#define WT_BTREE_SALVAGE 0x08000 /* Handle is for salvage */ -#define WT_BTREE_SKIP_CKPT 0x10000 /* Handle skipped checkpoint */ -#define WT_BTREE_UPGRADE 0x20000 /* Handle is for upgrade */ -#define WT_BTREE_VERIFY 0x40000 /* Handle is for verify */ +#define WT_BTREE_NO_RECONCILE 0x04000 /* Allow splits, even with no evict */ +#define WT_BTREE_REBALANCE 0x08000 /* Handle is for rebalance */ +#define WT_BTREE_SALVAGE 0x10000 /* Handle is for salvage */ +#define WT_BTREE_SKIP_CKPT 0x20000 /* Handle skipped checkpoint */ +#define WT_BTREE_UPGRADE 0x40000 /* Handle is for upgrade */ +#define WT_BTREE_VERIFY 0x80000 /* Handle is for verify */ uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index e0102a11511..3234ad1ed41 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -54,6 +54,27 @@ __wt_btree_block_free( return (bm->free(bm, session, addr, addr_size)); } +/* + * __wt_btree_bytes_inuse -- + * Return the number of bytes in use. + */ +static inline uint64_t +__wt_btree_bytes_inuse(WT_SESSION_IMPL *session) +{ + WT_CACHE *cache; + uint64_t bytes_inuse; + + cache = S2C(session)->cache; + + /* Adjust the cache size to take allocation overhead into account. */ + bytes_inuse = S2BT(session)->bytes_inmem; + if (cache->overhead_pct != 0) + bytes_inuse += + (bytes_inuse * (uint64_t)cache->overhead_pct) / 100; + + return (bytes_inuse); +} + /* * __wt_cache_page_inmem_incr -- * Increment a page's memory footprint in the cache. @@ -66,17 +87,17 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) WT_ASSERT(session, size < WT_EXABYTE); cache = S2C(session)->cache; + (void)__wt_atomic_add64(&S2BT(session)->bytes_inmem, size); (void)__wt_atomic_add64(&cache->bytes_inmem, size); (void)__wt_atomic_addsize(&page->memory_footprint, size); if (__wt_page_is_modified(page)) { - (void)__wt_atomic_add64(&cache->bytes_dirty, size); (void)__wt_atomic_addsize(&page->modify->bytes_dirty, size); + (void)__wt_atomic_add64(WT_PAGE_IS_INTERNAL(page) ? + &cache->bytes_dirty_intl : &cache->bytes_dirty_leaf, size); } - /* Track internal and overflow size in cache. */ + /* Track internal size in cache. */ if (WT_PAGE_IS_INTERNAL(page)) (void)__wt_atomic_add64(&cache->bytes_internal, size); - else if (page->type == WT_PAGE_OVFL) - (void)__wt_atomic_add64(&cache->bytes_overflow, size); } /* @@ -144,10 +165,16 @@ __wt_cache_page_byte_dirty_decr( WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) { WT_CACHE *cache; + const char *destname; + uint64_t *dest; size_t decr, orig; int i; cache = S2C(session)->cache; + dest = WT_PAGE_IS_INTERNAL(page) ? + &cache->bytes_dirty_intl : &cache->bytes_dirty_leaf; + destname = WT_PAGE_IS_INTERNAL(page) ? + "WT_CACHE.bytes_dirty_intl" : "WT_CACHE.bytes_dirty_leaf"; /* * We don't have exclusive access and there are ways of decrementing the @@ -175,8 +202,8 @@ __wt_cache_page_byte_dirty_decr( decr = WT_MIN(size, orig); if (__wt_atomic_cassize( &page->modify->bytes_dirty, orig, orig - decr)) { - __wt_cache_decr_check_uint64(session, - &cache->bytes_dirty, decr, "WT_CACHE.bytes_dirty"); + __wt_cache_decr_check_uint64( + session, dest, decr, destname); break; } } @@ -195,19 +222,18 @@ __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) WT_ASSERT(session, size < WT_EXABYTE); + __wt_cache_decr_check_uint64( + session, &S2BT(session)->bytes_inmem, size, "WT_BTREE.bytes_inmem"); __wt_cache_decr_check_uint64( session, &cache->bytes_inmem, size, "WT_CACHE.bytes_inmem"); __wt_cache_decr_check_size( session, &page->memory_footprint, size, "WT_PAGE.memory_footprint"); if (__wt_page_is_modified(page)) __wt_cache_page_byte_dirty_decr(session, page, size); - /* Track internal and overflow size in cache. */ + /* Track internal size in cache. */ if (WT_PAGE_IS_INTERNAL(page)) __wt_cache_decr_check_uint64(session, &cache->bytes_internal, size, "WT_CACHE.bytes_internal"); - else if (page->type == WT_PAGE_OVFL) - __wt_cache_decr_check_uint64(session, - &cache->bytes_overflow, size, "WT_CACHE.bytes_overflow"); } /* @@ -222,14 +248,16 @@ __wt_cache_dirty_incr(WT_SESSION_IMPL *session, WT_PAGE *page) size_t size; cache = S2C(session)->cache; - (void)__wt_atomic_add64(&cache->pages_dirty, 1); + (void)__wt_atomic_add64(WT_PAGE_IS_INTERNAL(page) ? + &cache->pages_dirty_intl : &cache->pages_dirty_leaf, 1); /* * Take care to read the memory_footprint once in case we are racing * with updates. */ size = page->memory_footprint; - (void)__wt_atomic_add64(&cache->bytes_dirty, size); + (void)__wt_atomic_add64(WT_PAGE_IS_INTERNAL(page) ? + &cache->bytes_dirty_intl : &cache->bytes_dirty_leaf, size); (void)__wt_atomic_addsize(&page->modify->bytes_dirty, size); } @@ -243,16 +271,19 @@ __wt_cache_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_CACHE *cache; WT_PAGE_MODIFY *modify; + uint64_t *pages_dirty; cache = S2C(session)->cache; + pages_dirty = WT_PAGE_IS_INTERNAL(page) ? + &cache->pages_dirty_intl : &cache->pages_dirty_leaf; - if (cache->pages_dirty < 1) { + if (*pages_dirty < 1) { __wt_errx(session, "cache eviction dirty-page decrement failed: dirty page" "count went negative"); - cache->pages_dirty = 0; + *pages_dirty = 0; } else - (void)__wt_atomic_sub64(&cache->pages_dirty, 1); + (void)__wt_atomic_sub64(pages_dirty, 1); modify = page->modify; if (modify != NULL && modify->bytes_dirty != 0) @@ -260,6 +291,34 @@ __wt_cache_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page) session, page, modify->bytes_dirty); } +/* + * __wt_cache_page_image_decr -- + * Decrement a page image's size to the cache. + */ +static inline void +__wt_cache_page_image_decr(WT_SESSION_IMPL *session, uint32_t size) +{ + WT_CACHE *cache; + + cache = S2C(session)->cache; + + __wt_cache_decr_check_uint64( + session, &cache->bytes_image, size, "WT_CACHE.image_inmem"); +} + +/* + * __wt_cache_page_image_incr -- + * Increment a page image's size to the cache. + */ +static inline void +__wt_cache_page_image_incr(WT_SESSION_IMPL *session, uint32_t size) +{ + WT_CACHE *cache; + + cache = S2C(session)->cache; + (void)__wt_atomic_add64(&cache->bytes_image, size); +} + /* * __wt_cache_page_evict -- * Evict pages from the cache. @@ -269,13 +328,20 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_CACHE *cache; WT_PAGE_MODIFY *modify; + uint64_t *dest; + const char *destname; cache = S2C(session)->cache; + dest = WT_PAGE_IS_INTERNAL(page) ? + &cache->bytes_dirty_intl : &cache->bytes_dirty_leaf; + destname = WT_PAGE_IS_INTERNAL(page) ? + "WT_CACHE.bytes_dirty_intl" : "WT_CACHE.bytes_dirty_leaf"; modify = page->modify; /* Update the bytes in-memory to reflect the eviction. */ - __wt_cache_decr_check_uint64(session, - &cache->bytes_inmem, + __wt_cache_decr_check_uint64(session, &S2BT(session)->bytes_inmem, + page->memory_footprint, "WT_BTREE.bytes_inmem"); + __wt_cache_decr_check_uint64(session, &cache->bytes_inmem, page->memory_footprint, "WT_CACHE.bytes_inmem"); /* Update the bytes_internal value to reflect the eviction */ @@ -286,15 +352,14 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) /* Update the cache's dirty-byte count. */ if (modify != NULL && modify->bytes_dirty != 0) { - if (cache->bytes_dirty < modify->bytes_dirty) { + if ((size_t)*dest < modify->bytes_dirty) { __wt_errx(session, - "cache eviction dirty-bytes decrement failed: " - "dirty byte count went negative"); - cache->bytes_dirty = 0; + "%s decrement failed: " + "dirty byte count went negative", destname); + *dest = 0; } else - __wt_cache_decr_check_uint64(session, - &cache->bytes_dirty, - modify->bytes_dirty, "WT_CACHE.bytes_dirty"); + __wt_cache_decr_check_uint64(session, dest, + modify->bytes_dirty, destname); } /* Update pages and bytes evicted. */ @@ -317,16 +382,6 @@ __wt_update_list_memsize(WT_UPDATE *upd) return (upd_size); } -/* - * __wt_page_evict_soon -- - * Set a page to be evicted as soon as possible. - */ -static inline void -__wt_page_evict_soon(WT_PAGE *page) -{ - page->read_gen = WT_READGEN_OLDEST; -} - /* * __wt_page_modify_init -- * A page is about to be modified, allocate the modification structure. @@ -1099,16 +1154,14 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page) * Check whether a page can be evicted. */ static inline bool -__wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) +__wt_page_can_evict( + WT_SESSION_IMPL *session, WT_REF *ref, uint32_t *evict_flagsp) { WT_BTREE *btree; WT_PAGE *page; WT_PAGE_MODIFY *mod; bool modified; - if (inmem_splitp != NULL) - *inmem_splitp = false; - btree = S2BT(session); page = ref->page; mod = page->modify; @@ -1124,8 +1177,8 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) * won't be written or discarded from the cache. */ if (__wt_leaf_page_can_split(session, page)) { - if (inmem_splitp != NULL) - *inmem_splitp = true; + if (evict_flagsp != NULL) + FLD_SET(*evict_flagsp, WT_EVICT_INMEM_SPLIT); return (true); } @@ -1164,6 +1217,10 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_BLOCK)) return (false); + /* If the cache is stuck, try anything else. */ + if (F_ISSET(S2C(session)->cache, WT_CACHE_STUCK)) + return (true); + /* * If the oldest transaction hasn't changed since the last time * this page was written, it's unlikely we can make progress. @@ -1172,7 +1229,6 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) * attempt to avoid repeated attempts to evict the same page. */ if (modified && - !F_ISSET(S2C(session)->cache, WT_CACHE_STUCK) && (mod->last_oldest_id == __wt_txn_oldest_id(session) || !__wt_txn_visible_all(session, mod->update_txn))) return (false); @@ -1180,56 +1236,6 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) return (true); } -/* - * __wt_page_release_evict -- - * Release a reference to a page, and attempt to immediately evict it. - */ -static inline int -__wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) -{ - WT_BTREE *btree; - WT_DECL_RET; - WT_PAGE *page; - bool locked, too_big; - - btree = S2BT(session); - page = ref->page; - - /* - * Take some care with order of operations: if we release the hazard - * reference without first locking the page, it could be evicted in - * between. - */ - locked = __wt_atomic_casv32( - &ref->state, WT_REF_MEM, WT_REF_LOCKED) ? true : false; - if ((ret = __wt_hazard_clear(session, page)) != 0 || !locked) { - if (locked) - ref->state = WT_REF_MEM; - return (ret == 0 ? EBUSY : ret); - } - - (void)__wt_atomic_addv32(&btree->evict_busy, 1); - - too_big = page->memory_footprint > btree->maxmempage; - if ((ret = __wt_evict(session, ref, false)) == 0) { - if (too_big) - WT_STAT_FAST_CONN_INCR(session, cache_eviction_force); - else - /* - * If the page isn't too big, we are evicting it because - * it had a chain of deleted entries that make traversal - * expensive. - */ - WT_STAT_FAST_CONN_INCR( - session, cache_eviction_force_delete); - } else - WT_STAT_FAST_CONN_INCR(session, cache_eviction_force_fail); - - (void)__wt_atomic_subv32(&btree->evict_busy, 1); - - return (ret); -} - /* * __wt_page_release -- * Release a reference to a page. diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h index f4a35de7201..e3a003ccc56 100644 --- a/src/third_party/wiredtiger/src/include/cache.h +++ b/src/third_party/wiredtiger/src/include/cache.h @@ -13,7 +13,6 @@ #define WT_EVICT_INT_SKEW (1<<20) /* Prefer leaf pages over internal pages by this many increments of the read generation. */ -#define WT_EVICT_WALK_PER_FILE 10 /* Pages to queue per file */ #define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */ #define WT_EVICT_WALK_INCR 100 /* Pages added each walk */ @@ -24,9 +23,12 @@ struct __wt_evict_entry { WT_BTREE *btree; /* Enclosing btree object */ WT_REF *ref; /* Page to flush/evict */ + uint64_t score; /* Relative eviction priority */ }; -#define WT_EVICT_QUEUE_MAX 2 +#define WT_EVICT_URGENT_QUEUE 0 /* Urgent queue index */ +#define WT_EVICT_QUEUE_MAX 3 /* Urgent plus two ordinary queues */ + /* * WT_EVICT_QUEUE -- * Encapsulation of an eviction candidate queue. @@ -34,6 +36,7 @@ struct __wt_evict_entry { struct __wt_evict_queue { WT_SPINLOCK evict_lock; /* Eviction LRU queue */ WT_EVICT_ENTRY *evict_queue; /* LRU pages being tracked */ + WT_EVICT_ENTRY *evict_current; /* LRU current page to be evicted */ uint32_t evict_candidates; /* LRU list pages to evict */ uint32_t evict_entries; /* LRU entries in the queue */ volatile uint32_t evict_max; /* LRU maximum eviction slot used */ @@ -70,16 +73,19 @@ struct __wt_cache { * be exact, they can't be garbage, we track what comes in and what goes * out and calculate the difference as needed. */ - uint64_t bytes_inmem; /* Bytes/pages in memory */ - uint64_t pages_inmem; - uint64_t bytes_internal; /* Bytes of internal pages */ - uint64_t bytes_overflow; /* Bytes of overflow pages */ + uint64_t bytes_dirty_intl; /* Bytes/pages currently dirty */ + uint64_t pages_dirty_intl; + uint64_t bytes_dirty_leaf; + uint64_t pages_dirty_leaf; uint64_t bytes_evict; /* Bytes/pages discarded by eviction */ uint64_t pages_evict; uint64_t pages_evicted; /* Pages evicted during a pass */ - uint64_t bytes_dirty; /* Bytes/pages currently dirty */ - uint64_t pages_dirty; + uint64_t bytes_image; /* Bytes of disk images */ + uint64_t bytes_inmem; /* Bytes/pages in memory */ + uint64_t pages_inmem; + uint64_t bytes_internal; /* Bytes of internal pages */ uint64_t bytes_read; /* Bytes read into memory */ + uint64_t bytes_written; uint64_t app_waits; /* User threads waited for cache */ uint64_t app_evicts; /* Pages evicted by user threads */ @@ -121,7 +127,6 @@ struct __wt_cache { WT_SPINLOCK evict_queue_lock; /* Eviction current queue lock */ WT_EVICT_QUEUE evict_queues[WT_EVICT_QUEUE_MAX]; WT_EVICT_QUEUE *evict_current_queue;/* LRU current queue in use */ - WT_EVICT_ENTRY *evict_current; /* LRU current page to be evicted */ uint32_t evict_queue_fill; /* LRU eviction queue index to fill */ uint32_t evict_slots; /* LRU list eviction slots */ WT_DATA_HANDLE @@ -145,10 +150,13 @@ struct __wt_cache { /* * Work state. */ -#define WT_EVICT_PASS_AGGRESSIVE 0x01 -#define WT_EVICT_PASS_ALL 0x02 -#define WT_EVICT_PASS_DIRTY 0x04 -#define WT_EVICT_PASS_WOULD_BLOCK 0x08 +#define WT_EVICT_STATE_AGGRESSIVE 0x01 /* Eviction isn't making progress: + try harder */ +#define WT_EVICT_STATE_CLEAN 0x02 /* Evict clean pages */ +#define WT_EVICT_STATE_DIRTY 0x04 /* Evict dirty pages */ +#define WT_EVICT_STATE_SCRUB 0x08 /* Scrub dirty pages pages */ +#define WT_EVICT_STATE_URGENT 0x10 /* Pages are in the urgent queue */ +#define WT_EVICT_STATE_ALL (WT_EVICT_STATE_CLEAN | WT_EVICT_STATE_DIRTY) uint32_t state; /* * Pass interrupt counter. @@ -162,7 +170,6 @@ struct __wt_cache { #define WT_CACHE_POOL_RUN 0x02 /* Cache pool thread running */ #define WT_CACHE_STUCK 0x04 /* Eviction server is stuck */ #define WT_CACHE_WALK_REVERSE 0x08 /* Scan backwards for candidates */ -#define WT_CACHE_WOULD_BLOCK 0x10 /* Pages that would block apps */ uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i index 72c8307756d..b5cb79afb3c 100644 --- a/src/third_party/wiredtiger/src/include/cache.i +++ b/src/third_party/wiredtiger/src/include/cache.i @@ -104,7 +104,7 @@ __wt_cache_dirty_inuse(WT_CACHE *cache) { uint64_t dirty_inuse; - dirty_inuse = cache->bytes_dirty; + dirty_inuse = cache->bytes_dirty_intl + cache->bytes_dirty_leaf; if (cache->overhead_pct != 0) dirty_inuse += (dirty_inuse * (uint64_t)cache->overhead_pct) / 100; @@ -112,6 +112,67 @@ __wt_cache_dirty_inuse(WT_CACHE *cache) return (dirty_inuse); } +/* + * __wt_cache_dirty_leaf_inuse -- + * Return the number of dirty bytes in use by leaf pages. + */ +static inline uint64_t +__wt_cache_dirty_leaf_inuse(WT_CACHE *cache) +{ + uint64_t dirty_inuse; + + dirty_inuse = cache->bytes_dirty_leaf; + if (cache->overhead_pct != 0) + dirty_inuse += + (dirty_inuse * (uint64_t)cache->overhead_pct) / 100; + + return (dirty_inuse); +} + +/* + * __wt_cache_bytes_image -- + * Return the number of page image bytes in use. + */ +static inline uint64_t +__wt_cache_bytes_image(WT_CACHE *cache) +{ + uint64_t bytes_image; + + bytes_image = cache->bytes_image; + if (cache->overhead_pct != 0) + bytes_image += + (bytes_image * (uint64_t)cache->overhead_pct) / 100; + + return (bytes_image); +} + +/* + * __wt_cache_bytes_other -- + * Return the number of bytes in use not for page images. + */ +static inline uint64_t +__wt_cache_bytes_other(WT_CACHE *cache) +{ + uint64_t bytes_image, bytes_inmem, bytes_other; + + bytes_image = cache->bytes_image; + bytes_inmem = cache->bytes_inmem; + + /* + * The reads above could race with changes to the values, so protect + * against underflow. + */ + if (bytes_image > bytes_inmem) + return (0); + + bytes_other = bytes_inmem - bytes_image; + if (cache->overhead_pct != 0) + bytes_other += + (bytes_other * (uint64_t)cache->overhead_pct) / 100; + + return (bytes_other); +} + /* * __wt_session_can_wait -- * Return if a session available for a potentially slow operation. @@ -138,21 +199,10 @@ __wt_session_can_wait(WT_SESSION_IMPL *session) return (1); } -/* - * __wt_eviction_dirty_target -- - * Return if the eviction server is running to reduce the number of dirty - * pages (versus running to discard pages from the cache). - */ -static inline bool -__wt_eviction_dirty_target(WT_SESSION_IMPL *session) -{ - return (FLD_ISSET(S2C(session)->cache->state, WT_EVICT_PASS_DIRTY)); -} - /* * __wt_eviction_needed -- * Return if an application thread should do eviction, and the cache full - * percentage as a side-effect. + * percentage as a side-effect. */ static inline bool __wt_eviction_needed(WT_SESSION_IMPL *session, u_int *pct_fullp) @@ -186,22 +236,21 @@ __wt_eviction_needed(WT_SESSION_IMPL *session, u_int *pct_fullp) pct_full = (u_int)((100 * bytes_inuse) / bytes_max); if (pct_fullp != NULL) *pct_fullp = pct_full; - /* - * If the connection is closing we do not need eviction from an - * application thread. The eviction subsystem is already closed. - * We return here because some callers depend on the percent full - * having been filled in. - */ - if (F_ISSET(conn, WT_CONN_CLOSING)) - return (false); if (pct_full > cache->eviction_trigger) return (true); - /* Return if there are too many dirty bytes in cache. */ - if (__wt_cache_dirty_inuse(cache) > + /* + * Check if there are too many dirty bytes in cache. + * + * We try to avoid penalizing read-only operations by only checking the + * dirty limit once a transaction ID has been allocated, or if the last + * transaction did an update. + */ + if (__wt_cache_dirty_leaf_inuse(cache) > (cache->eviction_dirty_trigger * bytes_max) / 100) return (true); + return (false); } diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index 0e0c357279a..a9855e42980 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -271,7 +271,6 @@ struct __wt_connection_impl { wt_thread_t ckpt_tid; /* Checkpoint thread */ bool ckpt_tid_set; /* Checkpoint thread set */ WT_CONDVAR *ckpt_cond; /* Checkpoint wait mutex */ - const char *ckpt_config; /* Checkpoint configuration */ #define WT_CKPT_LOGSIZE(conn) ((conn)->ckpt_logsize != 0) wt_off_t ckpt_logsize; /* Checkpoint log size period */ uint32_t ckpt_signalled;/* Checkpoint signalled */ @@ -314,6 +313,7 @@ struct __wt_connection_impl { uint32_t evict_workers; /* Number of eviction workers */ WT_EVICT_WORKER *evict_workctx; /* Eviction worker context */ +#define WT_STATLOG_FILENAME "WiredTigerStat.%d.%H" WT_SESSION_IMPL *stat_session; /* Statistics log session */ wt_thread_t stat_tid; /* Statistics log thread */ bool stat_tid_set; /* Statistics log thread set */ diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h index 6357523a03f..dce24f20844 100644 --- a/src/third_party/wiredtiger/src/include/cursor.h +++ b/src/third_party/wiredtiger/src/include/cursor.h @@ -73,6 +73,9 @@ struct __wt_cursor_backup { WT_CURSOR_BACKUP_ENTRY *list; /* List of files to be copied. */ size_t list_allocated; size_t list_next; + +#define WT_CURBACKUP_LOCKER 0x01 /* Hot-backup started */ + uint8_t flags; }; #define WT_CURSOR_BACKUP_ID(cursor) (((WT_CURSOR_BACKUP *)cursor)->maxid) @@ -413,7 +416,9 @@ struct __wt_cursor_log { uint32_t step_count; /* Intra-record count */ uint32_t rectype; /* Record type */ uint64_t txnid; /* Record txnid */ - uint32_t flags; + +#define WT_CURLOG_ARCHIVE_LOCK 0x01 /* Archive lock held */ + uint8_t flags; }; struct __wt_cursor_metadata { @@ -424,7 +429,7 @@ struct __wt_cursor_metadata { #define WT_MDC_CREATEONLY 0x01 #define WT_MDC_ONMETADATA 0x02 #define WT_MDC_POSITIONED 0x04 - uint32_t flags; + uint8_t flags; }; struct __wt_join_stats_group { diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i index 553dd03f958..76a08138afb 100644 --- a/src/third_party/wiredtiger/src/include/cursor.i +++ b/src/third_party/wiredtiger/src/include/cursor.i @@ -38,9 +38,6 @@ __cursor_pos_clear(WT_CURSOR_BTREE *cbt) cbt->ins_head = NULL; cbt->ins_stack[0] = NULL; - cbt->cip_saved = NULL; - cbt->rip_saved = NULL; - F_CLR(cbt, WT_CBT_POSITION_MASK); } @@ -120,7 +117,7 @@ __curfile_leave(WT_CURSOR_BTREE *cbt) */ if (cbt->ref != NULL && cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD) - __wt_page_evict_soon(cbt->ref->page); + WT_TRET(__wt_page_evict_soon(session, cbt->ref)); cbt->page_deleted_count = 0; /* @@ -130,7 +127,7 @@ __curfile_leave(WT_CURSOR_BTREE *cbt) * * Clear the reference regardless, so we don't try the release twice. */ - ret = __wt_page_release(session, cbt->ref, 0); + WT_TRET(__wt_page_release(session, cbt->ref, 0)); cbt->ref = NULL; return (ret); diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index b0c0f6eccad..f3a639ac07f 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -44,7 +44,7 @@ extern void __wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el); extern int __wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_regionp, size_t *lengthp, void *mapped_cookiep); extern int __wt_block_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region, size_t length, void *mapped_cookie); extern int __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp); -extern int __wt_block_manager_drop(WT_SESSION_IMPL *session, const char *filename); +extern int __wt_block_manager_drop( WT_SESSION_IMPL *session, const char *filename, bool durable); extern int __wt_block_manager_create( WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize); extern void __wt_block_configure_first_fit(WT_BLOCK *block, bool on); extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BLOCK **blockp); @@ -76,8 +76,8 @@ extern int __wt_block_verify_addr(WT_SESSION_IMPL *session, WT_BLOCK *block, con extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len); extern int __wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size); extern int __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep); -extern int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum); -extern int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump, bool data_cksum, bool caller_locked); +extern int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum, bool checkpoint_io); +extern int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump, bool data_cksum, bool checkpoint_io, bool caller_locked); extern int __wt_bloom_create( WT_SESSION_IMPL *session, const char *uri, const char *config, uint64_t count, uint32_t factor, uint32_t k, WT_BLOOM **bloomp); extern int __wt_bloom_open(WT_SESSION_IMPL *session, const char *uri, uint32_t factor, uint32_t k, WT_CURSOR *owner, WT_BLOOM **bloomp); extern int __wt_bloom_insert(WT_BLOOM *bloom, WT_ITEM *key); @@ -139,7 +139,7 @@ extern void __wt_btree_evictable(WT_SESSION_IMPL *session, bool on); extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session); extern void __wt_btree_huffman_close(WT_SESSION_IMPL *session); extern int __wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size); -extern int __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool checkpoint, bool compressed); +extern int __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool checkpoint, bool checkpoint_io, bool compressed); extern const char *__wt_page_type_string(u_int type); extern const char *__wt_cell_type_string(uint8_t type); extern const char *__wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf); @@ -161,11 +161,11 @@ extern int __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPD extern int __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]); extern void __wt_split_stash_discard(WT_SESSION_IMPL *session); extern void __wt_split_stash_discard_all( WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session); -extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp); +extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing); extern int __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref); extern int __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing); extern int __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref); -extern int __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref); +extern int __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi); extern int __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst); extern int __wt_cache_op(WT_SESSION_IMPL *session, WT_CACHE_OP op); extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[]); @@ -282,7 +282,6 @@ extern int __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const extern int __wt_curds_open( WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_DATA_SOURCE *dsrc, WT_CURSOR **cursorp); extern int __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp); extern int __wt_curfile_update_check(WT_CURSOR *cursor); -extern int __wt_curfile_create(WT_SESSION_IMPL *session, WT_CURSOR *owner, const char *cfg[], bool bulk, bool bitmap, WT_CURSOR **cursorp); extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp); extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp); extern int __wt_curjoin_joined(WT_CURSOR *cursor); @@ -346,9 +345,11 @@ extern int __wt_evict_destroy(WT_SESSION_IMPL *session); extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session); extern void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session); extern int __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full); +extern int __wt_page_evict_soon(WT_SESSION_IMPL *session, WT_REF *ref); extern void __wt_evict_priority_set(WT_SESSION_IMPL *session, uint64_t v); extern void __wt_evict_priority_clear(WT_SESSION_IMPL *session); extern int __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile); +extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref); extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing); extern int __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn); extern int __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start); @@ -485,8 +486,7 @@ extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **va extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value); extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path); extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path); -extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name); -extern int __wt_rename_and_sync_directory( WT_SESSION_IMPL *session, const char *from, const char *to); +extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name, bool durable); extern int __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to); extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp); @@ -500,7 +500,7 @@ extern int __wt_errno(void); extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen); extern int __wt_ext_map_windows_error( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint32_t windows_error); extern bool __wt_handle_is_open(WT_SESSION_IMPL *session, const char *name); -extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp); +extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_FS_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp); extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp); extern int __wt_close_connection_close(WT_SESSION_IMPL *session); extern int __wt_os_inmemory(WT_SESSION_IMPL *session); @@ -585,7 +585,7 @@ extern WT_DATA_SOURCE *__wt_schema_get_source(WT_SESSION_IMPL *session, const ch extern int __wt_str_name_check(WT_SESSION_IMPL *session, const char *str); extern int __wt_name_check(WT_SESSION_IMPL *session, const char *str, size_t len); extern int __wt_schema_worker(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags); -extern int __wt_session_notsup(WT_SESSION *wt_session); +extern int __wt_session_notsup(WT_SESSION_IMPL *session); extern int __wt_session_reset_cursors(WT_SESSION_IMPL *session, bool free_buffers); extern int __wt_session_copy_values(WT_SESSION_IMPL *session); extern int __wt_session_release_resources(WT_SESSION_IMPL *session); @@ -719,7 +719,6 @@ extern void __wt_txn_stats_update(WT_SESSION_IMPL *session); extern void __wt_txn_destroy(WT_SESSION_IMPL *session); extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_global_destroy(WT_SESSION_IMPL *session); -extern int __wt_checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len); extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]); diff --git a/src/third_party/wiredtiger/src/include/flags.h b/src/third_party/wiredtiger/src/include/flags.h index f134af69d29..9346605ed24 100644 --- a/src/third_party/wiredtiger/src/include/flags.h +++ b/src/third_party/wiredtiger/src/include/flags.h @@ -2,6 +2,7 @@ * DO NOT EDIT: automatically built by dist/flags.py. * flags section: BEGIN */ +#define WT_CHECKPOINTING 0x00000001 #define WT_CONN_CACHE_POOL 0x00000001 #define WT_CONN_CKPT_SYNC 0x00000002 #define WT_CONN_CLOSING 0x00000004 @@ -21,10 +22,12 @@ #define WT_CONN_SERVER_STATISTICS 0x00010000 #define WT_CONN_SERVER_SWEEP 0x00020000 #define WT_CONN_WAS_BACKUP 0x00040000 -#define WT_EVICTING 0x00000001 -#define WT_EVICT_IN_MEMORY 0x00000002 -#define WT_EVICT_LOOKASIDE 0x00000004 -#define WT_EVICT_UPDATE_RESTORE 0x00000008 +#define WT_EVICTING 0x00000002 +#define WT_EVICT_INMEM_SPLIT 0x00000004 +#define WT_EVICT_IN_MEMORY 0x00000008 +#define WT_EVICT_LOOKASIDE 0x00000010 +#define WT_EVICT_SCRUB 0x00000020 +#define WT_EVICT_UPDATE_RESTORE 0x00000040 #define WT_LOGSCAN_FIRST 0x00000001 #define WT_LOGSCAN_FROM_CKP 0x00000002 #define WT_LOGSCAN_ONE 0x00000004 @@ -100,7 +103,7 @@ #define WT_VERB_VERIFY 0x00800000 #define WT_VERB_VERSION 0x01000000 #define WT_VERB_WRITE 0x02000000 -#define WT_VISIBILITY_ERR 0x00000010 +#define WT_VISIBILITY_ERR 0x00000080 /* * flags section: END * DO NOT EDIT: automatically built by dist/flags.py. diff --git a/src/third_party/wiredtiger/src/include/hardware.h b/src/third_party/wiredtiger/src/include/hardware.h index 93ed8a868b6..0e52818ae05 100644 --- a/src/third_party/wiredtiger/src/include/hardware.h +++ b/src/third_party/wiredtiger/src/include/hardware.h @@ -45,7 +45,16 @@ &(p)->flags_atomic, __orig, __orig & ~(uint8_t)(mask))); \ } while (0) -#define WT_CACHE_LINE_ALIGNMENT 64 /* Cache line alignment */ +/* + * Cache line alignment. + */ +#if defined(__PPC64__) || defined(PPC64) +#define WT_CACHE_LINE_ALIGNMENT 128 +#elif defined(__s390x__) +#define WT_CACHE_LINE_ALIGNMENT 256 +#else +#define WT_CACHE_LINE_ALIGNMENT 64 +#endif #define WT_CACHE_LINE_ALIGNMENT_VERIFY(session, a) \ WT_ASSERT(session, \ WT_PTRDIFF(&(a)[1], &(a)[0]) >= WT_CACHE_LINE_ALIGNMENT && \ diff --git a/src/third_party/wiredtiger/src/include/intpack.i b/src/third_party/wiredtiger/src/include/intpack.i index b27afd24e6c..e8bea58cede 100644 --- a/src/third_party/wiredtiger/src/include/intpack.i +++ b/src/third_party/wiredtiger/src/include/intpack.i @@ -59,7 +59,7 @@ /* Count the leading zero bytes. */ #if defined(__GNUC__) #define WT_LEADING_ZEROS(x, i) \ - (i = (x == 0) ? (int)sizeof (x) : __builtin_clzll(x) >> 3) + (i = (x == 0) ? (int)sizeof(x) : __builtin_clzll(x) >> 3) #elif defined(_MSC_VER) #define WT_LEADING_ZEROS(x, i) do { \ if (x == 0) i = (int)sizeof(x); \ @@ -89,7 +89,7 @@ __wt_vpack_posint(uint8_t **pp, size_t maxlen, uint64_t x) int len, lz, shift; WT_LEADING_ZEROS(x, lz); - len = (int)sizeof (x) - lz; + len = (int)sizeof(x) - lz; WT_SIZE_CHECK_PACK(len + 1, maxlen); p = *pp; @@ -114,7 +114,7 @@ __wt_vpack_negint(uint8_t **pp, size_t maxlen, uint64_t x) int len, lz, shift; WT_LEADING_ZEROS(~x, lz); - len = (int)sizeof (x) - lz; + len = (int)sizeof(x) - lz; WT_SIZE_CHECK_PACK(len + 1, maxlen); p = *pp; @@ -170,7 +170,7 @@ __wt_vunpack_negint(const uint8_t **pp, size_t maxlen, uint64_t *retp) /* There are four length bits in the first byte. */ p = *pp; - len = (int)sizeof (x) - (*p++ & 0xf); + len = (int)sizeof(x) - (*p++ & 0xf); WT_SIZE_CHECK_UNPACK(len + 1, maxlen); for (x = UINT64_MAX; len != 0; --len) diff --git a/src/third_party/wiredtiger/src/include/os_fhandle.i b/src/third_party/wiredtiger/src/include/os_fhandle.i index 313bf8eca3f..9bf5ce0e60b 100644 --- a/src/third_party/wiredtiger/src/include/os_fhandle.i +++ b/src/third_party/wiredtiger/src/include/os_fhandle.i @@ -26,7 +26,7 @@ __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) * There is no way to check when the non-blocking sync-file-range is * complete, but we track the time taken in the call for completeness. */ - WT_STAT_FAST_CONN_INCR_ATOMIC(session, fsync_active); + WT_STAT_FAST_CONN_INCR_ATOMIC(session, thread_fsync_active); WT_STAT_FAST_CONN_INCR(session, fsync_io); if (block) ret = (handle->fh_sync == NULL ? 0 : @@ -34,7 +34,7 @@ __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) else ret = (handle->fh_sync_nowait == NULL ? 0 : handle->fh_sync_nowait(handle, (WT_SESSION *)session)); - WT_STAT_FAST_CONN_DECR_ATOMIC(session, fsync_active); + WT_STAT_FAST_CONN_DECR_ATOMIC(session, thread_fsync_active); return (ret); } @@ -107,13 +107,13 @@ __wt_read( "%s: handle-read: %" WT_SIZET_FMT " at %" PRIuMAX, fh->handle->name, len, (uintmax_t)offset)); - WT_STAT_FAST_CONN_INCR_ATOMIC(session, read_active); + WT_STAT_FAST_CONN_INCR_ATOMIC(session, thread_read_active); WT_STAT_FAST_CONN_INCR(session, read_io); ret = fh->handle->fh_read( fh->handle, (WT_SESSION *)session, offset, len, buf); - WT_STAT_FAST_CONN_DECR_ATOMIC(session, read_active); + WT_STAT_FAST_CONN_DECR_ATOMIC(session, thread_read_active); return (ret); } @@ -165,12 +165,12 @@ __wt_write(WT_SESSION_IMPL *session, "%s: handle-write: %" WT_SIZET_FMT " at %" PRIuMAX, fh->handle->name, len, (uintmax_t)offset)); - WT_STAT_FAST_CONN_INCR_ATOMIC(session, write_active); + WT_STAT_FAST_CONN_INCR_ATOMIC(session, thread_write_active); WT_STAT_FAST_CONN_INCR(session, write_io); ret = fh->handle->fh_write( fh->handle, (WT_SESSION *)session, offset, len, buf); - WT_STAT_FAST_CONN_DECR_ATOMIC(session, write_active); + WT_STAT_FAST_CONN_DECR_ATOMIC(session, thread_write_active); return (ret); } diff --git a/src/third_party/wiredtiger/src/include/os_fs.i b/src/third_party/wiredtiger/src/include/os_fs.i index 88ee71d953a..a3a2fe29b65 100644 --- a/src/third_party/wiredtiger/src/include/os_fs.i +++ b/src/third_party/wiredtiger/src/include/os_fs.i @@ -8,7 +8,7 @@ /* * __wt_fs_directory_list -- - * Get a list of files from a directory. + * Return a list of files from a directory. */ static inline int __wt_fs_directory_list(WT_SESSION_IMPL *session, @@ -60,61 +60,6 @@ __wt_fs_directory_list_free( return (ret); } -/* - * __wt_fs_directory_sync -- - * Flush a directory to ensure file creation is durable. - */ -static inline int -__wt_fs_directory_sync(WT_SESSION_IMPL *session, const char *name) -{ - WT_DECL_RET; - WT_FILE_SYSTEM *file_system; - WT_SESSION *wt_session; - char *copy, *dir; - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - - WT_RET(__wt_verbose( - session, WT_VERB_FILEOPS, "%s: directory-sync", name)); - - /* - * POSIX 1003.1 does not require that fsync of a file handle ensures the - * entry in the directory containing the file has also reached disk (and - * there are historic Linux filesystems requiring it). If the underlying - * filesystem method is set, do an explicit fsync on a file descriptor - * for the directory to be sure. - * - * directory-sync is not a required call, no method means the call isn't - * needed. - */ - file_system = S2C(session)->file_system; - if (file_system->fs_directory_sync == NULL) - return (0); - - copy = NULL; - if (name == NULL || strchr(name, '/') == NULL) - name = S2C(session)->home; - else { - /* - * File name construction should not return a path without any - * slash separator, but caution isn't unreasonable. - */ - WT_RET(__wt_filename(session, name, ©)); - if ((dir = strrchr(copy, '/')) == NULL) - name = S2C(session)->home; - else { - dir[1] = '\0'; - name = copy; - } - } - - wt_session = (WT_SESSION *)session; - ret = file_system->fs_directory_sync(file_system, wt_session, name); - - __wt_free(session, copy); - return (ret); -} - /* * __wt_fs_exist -- * Return if the file exists. @@ -141,10 +86,10 @@ __wt_fs_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) /* * __wt_fs_remove -- - * POSIX remove. + * Remove the file. */ static inline int -__wt_fs_remove(WT_SESSION_IMPL *session, const char *name) +__wt_fs_remove(WT_SESSION_IMPL *session, const char *name, bool durable) { WT_DECL_RET; WT_FILE_SYSTEM *file_system; @@ -169,7 +114,8 @@ __wt_fs_remove(WT_SESSION_IMPL *session, const char *name) file_system = S2C(session)->file_system; wt_session = (WT_SESSION *)session; - ret = file_system->fs_remove(file_system, wt_session, path); + ret = file_system->fs_remove( + file_system, wt_session, path, durable ? WT_FS_DURABLE : 0); __wt_free(session, path); return (ret); @@ -177,10 +123,11 @@ __wt_fs_remove(WT_SESSION_IMPL *session, const char *name) /* * __wt_fs_rename -- - * POSIX rename. + * Rename the file. */ static inline int -__wt_fs_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +__wt_fs_rename( + WT_SESSION_IMPL *session, const char *from, const char *to, bool durable) { WT_DECL_RET; WT_FILE_SYSTEM *file_system; @@ -211,8 +158,8 @@ __wt_fs_rename(WT_SESSION_IMPL *session, const char *from, const char *to) file_system = S2C(session)->file_system; wt_session = (WT_SESSION *)session; - ret = file_system->fs_rename( - file_system, wt_session, from_path, to_path); + ret = file_system->fs_rename(file_system, + wt_session, from_path, to_path, durable ? WT_FS_DURABLE : 0); err: __wt_free(session, from_path); __wt_free(session, to_path); @@ -221,7 +168,7 @@ err: __wt_free(session, from_path); /* * __wt_fs_size -- - * Get the size of a file in bytes, by file name. + * Return the size of a file in bytes, by file name. */ static inline int __wt_fs_size(WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep) diff --git a/src/third_party/wiredtiger/src/include/os_fstream.i b/src/third_party/wiredtiger/src/include/os_fstream.i index 8c0fdadbdb0..92274431011 100644 --- a/src/third_party/wiredtiger/src/include/os_fstream.i +++ b/src/third_party/wiredtiger/src/include/os_fstream.i @@ -93,5 +93,5 @@ __wt_sync_and_rename(WT_SESSION_IMPL *session, WT_TRET(__wt_fclose(session, &fstr)); WT_RET(ret); - return (__wt_rename_and_sync_directory(session, from, to)); + return (__wt_fs_rename(session, from, to, true)); } diff --git a/src/third_party/wiredtiger/src/include/queue.h b/src/third_party/wiredtiger/src/include/queue.h index 1d494875cf6..e3d4daf0f4c 100644 --- a/src/third_party/wiredtiger/src/include/queue.h +++ b/src/third_party/wiredtiger/src/include/queue.h @@ -1,4 +1,4 @@ -/* +/*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -27,28 +27,18 @@ * SUCH DAMAGE. * * @(#)queue.h 8.5 (Berkeley) 8/20/94 - * $FreeBSD: src/sys/sys/queue.h,v 1.54 2002/08/05 05:18:43 alfred Exp $ + * $FreeBSD$ */ -#ifndef _DB_QUEUE_H_ -#define _DB_QUEUE_H_ - -#if defined(__cplusplus) -extern "C" { -#endif - /* + * This is a stripped-down version of the FreeBSD sys/queue.h include file. + * * WiredTiger only uses the TAILQ macros (we've gotten into trouble in the past * by trying to use simpler queues and subsequently discovering a list we didn't * think would ever get to be large could, under some workloads, become large, * and the linear performance for removal of elements from simpler macros proved * to be more trouble than the memory savings were worth. * - * Additionally, we've altered the TAILQ_INSERT_XXX functions to include a write - * barrier, in order to ensure we never insert a partially built structure onto - * a list (this is required because the spinlocks we use don't necessarily imply - * a write barrier). - * * We #undef all of the macros because there are incompatible versions of this * file and these macros on various systems. What makes the problem worse is * they are included and/or defined by system include files which we may have @@ -57,13 +47,28 @@ extern "C" { * several of the LIST_XXX macros. Visual C.NET 7.0 also defines some of these * same macros in Vc7\PlatformSDK\Include\WinNT.h. Make sure we use ours. */ - +#undef QMD_SAVELINK +#undef QMD_TAILQ_CHECK_HEAD +#undef QMD_TAILQ_CHECK_NEXT +#undef QMD_TAILQ_CHECK_PREV +#undef QMD_TAILQ_CHECK_TAIL +#undef QMD_TRACE_ELEM +#undef QMD_TRACE_HEAD +#undef QUEUE_TYPEOF +#undef TAILQ_CLASS_ENTRY +#undef TAILQ_CLASS_HEAD #undef TAILQ_CONCAT #undef TAILQ_EMPTY #undef TAILQ_ENTRY #undef TAILQ_FIRST #undef TAILQ_FOREACH +#undef TAILQ_FOREACH_FROM +#undef TAILQ_FOREACH_FROM_SAFE #undef TAILQ_FOREACH_REVERSE +#undef TAILQ_FOREACH_REVERSE_FROM +#undef TAILQ_FOREACH_REVERSE_FROM_SAFE +#undef TAILQ_FOREACH_REVERSE_SAFE +#undef TAILQ_FOREACH_SAFE #undef TAILQ_HEAD #undef TAILQ_HEAD_INITIALIZER #undef TAILQ_INIT @@ -76,41 +81,25 @@ extern "C" { #undef TAILQ_PREV #undef TAILQ_REMOVE #undef TRACEBUF +#undef TRACEBUF_INITIALIZER #undef TRASHIT +#undef TAILQ_SWAP -#define QUEUE_MACRO_DEBUG 0 -#if QUEUE_MACRO_DEBUG -/* Store the last 2 places the queue element or head was altered */ -struct qm_trace { - char * lastfile; - int lastline; - char * prevfile; - int prevline; -}; - -#define TRACEBUF struct qm_trace trace; -#define TRASHIT(x) do {(x) = (void *)-1;} while (0) - -#define QMD_TRACE_HEAD(head) do { \ - (head)->trace.prevline = (head)->trace.lastline; \ - (head)->trace.prevfile = (head)->trace.lastfile; \ - (head)->trace.lastline = __LINE__; \ - (head)->trace.lastfile = __FILE__; \ -} while (0) - -#define QMD_TRACE_ELEM(elem) do { \ - (elem)->trace.prevline = (elem)->trace.lastline; \ - (elem)->trace.prevfile = (elem)->trace.lastfile; \ - (elem)->trace.lastline = __LINE__; \ - (elem)->trace.lastfile = __FILE__; \ -} while (0) - -#else #define QMD_TRACE_ELEM(elem) #define QMD_TRACE_HEAD(head) +#define QMD_SAVELINK(name, link) #define TRACEBUF +#define TRACEBUF_INITIALIZER #define TRASHIT(x) -#endif /* QUEUE_MACRO_DEBUG */ + +#ifdef __cplusplus +/* + * In C++ there can be structure lists and class lists: + */ +#define QUEUE_TYPEOF(type) type +#else +#define QUEUE_TYPEOF(type) struct type +#endif /* * Tail queue declarations. @@ -122,8 +111,15 @@ struct name { \ TRACEBUF \ } +#define TAILQ_CLASS_HEAD(name, type) \ +struct name { \ + class type *tqh_first; /* first element */ \ + class type **tqh_last; /* addr of last next element */ \ + TRACEBUF \ +} + #define TAILQ_HEAD_INITIALIZER(head) \ - { NULL, &(head).tqh_first } + { NULL, &(head).tqh_first, TRACEBUF_INITIALIZER } #define TAILQ_ENTRY(type) \ struct { \ @@ -132,16 +128,28 @@ struct { \ TRACEBUF \ } +#define TAILQ_CLASS_ENTRY(type) \ +struct { \ + class type *tqe_next; /* next element */ \ + class type **tqe_prev; /* address of previous next element */ \ + TRACEBUF \ +} + /* * Tail queue functions. */ +#define QMD_TAILQ_CHECK_HEAD(head, field) +#define QMD_TAILQ_CHECK_TAIL(head, headname) +#define QMD_TAILQ_CHECK_NEXT(elm, field) +#define QMD_TAILQ_CHECK_PREV(elm, field) + #define TAILQ_CONCAT(head1, head2, field) do { \ if (!TAILQ_EMPTY(head2)) { \ *(head1)->tqh_last = (head2)->tqh_first; \ (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \ (head1)->tqh_last = (head2)->tqh_last; \ TAILQ_INIT((head2)); \ - QMD_TRACE_HEAD(head); \ + QMD_TRACE_HEAD(head1); \ QMD_TRACE_HEAD(head2); \ } \ } while (0) @@ -155,11 +163,41 @@ struct { \ (var); \ (var) = TAILQ_NEXT((var), field)) +#define TAILQ_FOREACH_FROM(var, head, field) \ + for ((var) = ((var) ? (var) : TAILQ_FIRST((head))); \ + (var); \ + (var) = TAILQ_NEXT((var), field)) + +#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = TAILQ_FIRST((head)); \ + (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define TAILQ_FOREACH_FROM_SAFE(var, head, field, tvar) \ + for ((var) = ((var) ? (var) : TAILQ_FIRST((head))); \ + (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + #define TAILQ_FOREACH_REVERSE(var, head, headname, field) \ for ((var) = TAILQ_LAST((head), headname); \ (var); \ (var) = TAILQ_PREV((var), headname, field)) +#define TAILQ_FOREACH_REVERSE_FROM(var, head, headname, field) \ + for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname)); \ + (var); \ + (var) = TAILQ_PREV((var), headname, field)) + +#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \ + for ((var) = TAILQ_LAST((head), headname); \ + (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \ + (var) = (tvar)) + +#define TAILQ_FOREACH_REVERSE_FROM_SAFE(var, head, headname, field, tvar) \ + for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname)); \ + (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \ + (var) = (tvar)) + #define TAILQ_INIT(head) do { \ TAILQ_FIRST((head)) = NULL; \ (head)->tqh_last = &TAILQ_FIRST((head)); \ @@ -167,9 +205,9 @@ struct { \ } while (0) #define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ - WT_WRITE_BARRIER(); \ + QMD_TAILQ_CHECK_NEXT(listelm, field); \ if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\ - TAILQ_NEXT((elm), field)->field.tqe_prev = \ + TAILQ_NEXT((elm), field)->field.tqe_prev = \ &TAILQ_NEXT((elm), field); \ else { \ (head)->tqh_last = &TAILQ_NEXT((elm), field); \ @@ -178,21 +216,21 @@ struct { \ TAILQ_NEXT((listelm), field) = (elm); \ (elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field); \ QMD_TRACE_ELEM(&(elm)->field); \ - QMD_TRACE_ELEM(&listelm->field); \ + QMD_TRACE_ELEM(&(listelm)->field); \ } while (0) #define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ - WT_WRITE_BARRIER(); \ + QMD_TAILQ_CHECK_PREV(listelm, field); \ (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ TAILQ_NEXT((elm), field) = (listelm); \ *(listelm)->field.tqe_prev = (elm); \ (listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field); \ QMD_TRACE_ELEM(&(elm)->field); \ - QMD_TRACE_ELEM(&listelm->field); \ + QMD_TRACE_ELEM(&(listelm)->field); \ } while (0) #define TAILQ_INSERT_HEAD(head, elm, field) do { \ - WT_WRITE_BARRIER(); \ + QMD_TAILQ_CHECK_HEAD(head, field); \ if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != NULL) \ TAILQ_FIRST((head))->field.tqe_prev = \ &TAILQ_NEXT((elm), field); \ @@ -205,7 +243,7 @@ struct { \ } while (0) #define TAILQ_INSERT_TAIL(head, elm, field) do { \ - WT_WRITE_BARRIER(); \ + QMD_TAILQ_CHECK_TAIL(head, field); \ TAILQ_NEXT((elm), field) = NULL; \ (elm)->field.tqe_prev = (head)->tqh_last; \ *(head)->tqh_last = (elm); \ @@ -223,20 +261,36 @@ struct { \ (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) #define TAILQ_REMOVE(head, elm, field) do { \ + QMD_SAVELINK(oldnext, (elm)->field.tqe_next); \ + QMD_SAVELINK(oldprev, (elm)->field.tqe_prev); \ + QMD_TAILQ_CHECK_NEXT(elm, field); \ + QMD_TAILQ_CHECK_PREV(elm, field); \ if ((TAILQ_NEXT((elm), field)) != NULL) \ - TAILQ_NEXT((elm), field)->field.tqe_prev = \ + TAILQ_NEXT((elm), field)->field.tqe_prev = \ (elm)->field.tqe_prev; \ else { \ (head)->tqh_last = (elm)->field.tqe_prev; \ QMD_TRACE_HEAD(head); \ } \ *(elm)->field.tqe_prev = TAILQ_NEXT((elm), field); \ - TRASHIT((elm)->field.tqe_next); \ - TRASHIT((elm)->field.tqe_prev); \ + TRASHIT(*oldnext); \ + TRASHIT(*oldprev); \ QMD_TRACE_ELEM(&(elm)->field); \ } while (0) -#if defined(__cplusplus) -} -#endif -#endif /* !_DB_QUEUE_H_ */ +#define TAILQ_SWAP(head1, head2, type, field) do { \ + QUEUE_TYPEOF(type) *swap_first = (head1)->tqh_first; \ + QUEUE_TYPEOF(type) **swap_last = (head1)->tqh_last; \ + (head1)->tqh_first = (head2)->tqh_first; \ + (head1)->tqh_last = (head2)->tqh_last; \ + (head2)->tqh_first = swap_first; \ + (head2)->tqh_last = swap_last; \ + if ((swap_first = (head1)->tqh_first) != NULL) \ + swap_first->field.tqe_prev = &(head1)->tqh_first; \ + else \ + (head1)->tqh_last = &(head1)->tqh_first; \ + if ((swap_first = (head2)->tqh_first) != NULL) \ + swap_first->field.tqe_prev = &(head2)->tqh_first; \ + else \ + (head2)->tqh_last = &(head2)->tqh_first; \ +} while (0) diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index 57126af8aa4..1df24382236 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -145,14 +145,14 @@ __wt_stats_clear(void *stats_arg, int slot) #define WT_STAT_DECRV(session, stats, fld, value) \ (stats)[WT_STATS_SLOT_ID(session)]->fld -= (int64_t)(value) #define WT_STAT_DECRV_ATOMIC(session, stats, fld, value) \ - __wt_atomic_addi64( \ + __wt_atomic_subi64( \ &(stats)[WT_STATS_SLOT_ID(session)]->fld, (int64_t)(value)) #define WT_STAT_DECR(session, stats, fld) \ WT_STAT_DECRV(session, stats, fld, 1) #define WT_STAT_INCRV(session, stats, fld, value) \ (stats)[WT_STATS_SLOT_ID(session)]->fld += (int64_t)(value) #define WT_STAT_INCRV_ATOMIC(session, stats, fld, value) \ - __wt_atomic_subi64( \ + __wt_atomic_addi64( \ &(stats)[WT_STATS_SLOT_ID(session)]->fld, (int64_t)(value)) #define WT_STAT_INCR(session, stats, fld) \ WT_STAT_INCRV(session, stats, fld, 1) @@ -273,9 +273,12 @@ struct __wt_connection_stats { int64_t block_write; int64_t block_byte_read; int64_t block_byte_write; + int64_t block_byte_write_checkpoint; int64_t block_map_read; int64_t block_byte_map_read; + int64_t cache_bytes_image; int64_t cache_bytes_inuse; + int64_t cache_bytes_other; int64_t cache_bytes_read; int64_t cache_bytes_write; int64_t cache_eviction_checkpoint; @@ -309,6 +312,8 @@ struct __wt_connection_stats { int64_t cache_eviction_maximum_page_size; int64_t cache_eviction_dirty; int64_t cache_eviction_app_dirty; + int64_t cache_read_overflow; + int64_t cache_overflow_value; int64_t cache_eviction_deepen; int64_t cache_write_lookaside; int64_t cache_pages_inuse; @@ -316,6 +321,7 @@ struct __wt_connection_stats { int64_t cache_eviction_force_delete; int64_t cache_eviction_app; int64_t cache_eviction_pages_queued; + int64_t cache_eviction_pages_queued_urgent; int64_t cache_eviction_pages_queued_oldest; int64_t cache_read; int64_t cache_read_lookaside; @@ -328,7 +334,6 @@ struct __wt_connection_stats { int64_t cache_overhead; int64_t cache_bytes_internal; int64_t cache_bytes_leaf; - int64_t cache_bytes_overflow; int64_t cache_bytes_dirty; int64_t cache_pages_dirty; int64_t cache_eviction_clean; @@ -408,9 +413,25 @@ struct __wt_connection_stats { int64_t rec_split_stashed_objects; int64_t session_cursor_open; int64_t session_open; - int64_t fsync_active; - int64_t read_active; - int64_t write_active; + int64_t session_table_compact_fail; + int64_t session_table_compact_success; + int64_t session_table_create_fail; + int64_t session_table_create_success; + int64_t session_table_drop_fail; + int64_t session_table_drop_success; + int64_t session_table_rebalance_fail; + int64_t session_table_rebalance_success; + int64_t session_table_rename_fail; + int64_t session_table_rename_success; + int64_t session_table_salvage_fail; + int64_t session_table_salvage_success; + int64_t session_table_truncate_fail; + int64_t session_table_truncate_success; + int64_t session_table_verify_fail; + int64_t session_table_verify_success; + int64_t thread_fsync_active; + int64_t thread_read_active; + int64_t thread_write_active; int64_t page_busy_blocked; int64_t page_forcible_evict_blocked; int64_t page_locked_blocked; @@ -424,13 +445,13 @@ struct __wt_connection_stats { int64_t txn_checkpoint_time_max; int64_t txn_checkpoint_time_min; int64_t txn_checkpoint_time_recent; + int64_t txn_checkpoint_scrub_target; + int64_t txn_checkpoint_scrub_time; int64_t txn_checkpoint_time_total; int64_t txn_checkpoint; int64_t txn_fail_cache; int64_t txn_checkpoint_fsync_post; - int64_t txn_checkpoint_fsync_pre; int64_t txn_checkpoint_fsync_post_duration; - int64_t txn_checkpoint_fsync_pre_duration; int64_t txn_pinned_range; int64_t txn_pinned_checkpoint_range; int64_t txn_pinned_snapshot_range; @@ -484,6 +505,7 @@ struct __wt_dsrc_stats { int64_t btree_compact_rewrite; int64_t btree_row_internal; int64_t btree_row_leaf; + int64_t cache_bytes_inuse; int64_t cache_bytes_read; int64_t cache_bytes_write; int64_t cache_eviction_checkpoint; diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h index d10738cc670..2e41ae8620d 100644 --- a/src/third_party/wiredtiger/src/include/txn.h +++ b/src/third_party/wiredtiger/src/include/txn.h @@ -98,6 +98,7 @@ struct __wt_txn_global { volatile uint32_t checkpoint_id; /* Checkpoint's session ID */ volatile uint64_t checkpoint_gen; volatile uint64_t checkpoint_pinned; + volatile uint64_t checkpoint_txnid; /* Checkpoint's txn ID */ /* Named snapshot state. */ WT_RWLOCK *nsnap_rwlock; diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index 96f7426e421..8f0f49d9676 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -105,7 +105,8 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session) { WT_BTREE *btree; WT_TXN_GLOBAL *txn_global; - uint64_t checkpoint_gen, checkpoint_pinned, oldest_id; + uint64_t checkpoint_pinned, oldest_id; + bool include_checkpoint_txn; txn_global = &S2C(session)->txn_global; btree = S2BT_SAFE(session); @@ -117,7 +118,11 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session) * we take the minimum of the other two IDs, which is what we want. */ oldest_id = txn_global->oldest_id; - WT_ORDERED_READ(checkpoint_gen, txn_global->checkpoint_gen); + if (btree == NULL) + include_checkpoint_txn = false; + else + WT_ORDERED_READ( + include_checkpoint_txn, btree->include_checkpoint_txn); checkpoint_pinned = txn_global->checkpoint_pinned; /* @@ -130,10 +135,9 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session) * checkpoint, or this handle is up to date with the active checkpoint * then it's safe to ignore the checkpoint ID in the visibility check. */ - if (checkpoint_pinned == WT_TXN_NONE || + if (!include_checkpoint_txn || checkpoint_pinned == WT_TXN_NONE || WT_TXNID_LT(oldest_id, checkpoint_pinned) || - WT_SESSION_IS_CHECKPOINT(session) || - (btree != NULL && btree->checkpoint_gen == checkpoint_gen)) + WT_SESSION_IS_CHECKPOINT(session)) return (oldest_id); return (checkpoint_pinned); diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index f578f4e6c08..0e022048835 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -131,13 +131,13 @@ struct __wt_item { * The maximum packed size of a 64-bit integer. The ::wiredtiger_struct_pack * function will pack single long integers into at most this many bytes. */ -#define WT_INTPACK64_MAXSIZE ((int)sizeof (int64_t) + 1) +#define WT_INTPACK64_MAXSIZE ((int)sizeof(int64_t) + 1) /*! * The maximum packed size of a 32-bit integer. The ::wiredtiger_struct_pack * function will pack single integers into at most this many bytes. */ -#define WT_INTPACK32_MAXSIZE ((int)sizeof (int32_t) + 1) +#define WT_INTPACK32_MAXSIZE ((int)sizeof(int32_t) + 1) /*! * A WT_CURSOR handle is the interface to a cursor. @@ -405,6 +405,12 @@ struct __wt_cursor { * WT_CURSOR::next (WT_CURSOR::prev) method will iterate from the * beginning (end) of the table. * + * If the cursor does not have record number keys or was not configured + * with "append=true", the cursor ends with no key set and a subsequent + * call to the WT_CURSOR::get_key method will fail. The cursor ends with + * no value set and a subsequent call to the WT_CURSOR::get_value method + * will fail. + * * Inserting a new record after the current maximum record in a * fixed-length bit field column-store (that is, a store with an * 'r' type key and 't' type value) may implicitly create the missing @@ -1159,8 +1165,8 @@ struct __wt_session { * @config{ ),,} * @config{memory_page_max, the maximum size a page can grow to in * memory before being reconciled to disk. The specified size will be - * adjusted to a lower bound of 50 * leaf_page_max\, and an - * upper bound of cache_size / 2. This limit is soft - it + * adjusted to a lower bound of leaf_page_max\, and an + * upper bound of cache_size / 10. This limit is soft - it * is possible for pages to be temporarily larger than this value. This * setting is ignored for LSM trees\, see \c chunk_size., an integer * between 512B and 10TB; default \c 5MB.} @@ -1783,8 +1789,6 @@ struct __wt_connection { * database can configure both log_size and wait to set an upper bound * for checkpoints; setting this value above 0 configures periodic * checkpoints., an integer between 0 and 2GB; default \c 0.} - * @config{    name, the checkpoint name., a string; - * default \c "WiredTigerCheckpoint".} * @config{    wait, seconds to wait between each * checkpoint; setting this value above 0 configures periodic * checkpoints., an integer between 0 and 100000; default \c 0.} @@ -1806,11 +1810,11 @@ struct __wt_connection { * @config{eviction_dirty_target, continue evicting until the cache has * less dirty memory than the value\, as a percentage of the total cache * size. Dirty pages will only be evicted if the cache is full enough - * to trigger eviction., an integer between 5 and 99; default \c 80.} + * to trigger eviction., an integer between 1 and 99; default \c 5.} * @config{eviction_dirty_trigger, trigger eviction when the cache is * using this much memory for dirty content\, as a percentage of the * total cache size. This setting only alters behavior if it is lower - * than eviction_trigger., an integer between 5 and 99; default \c 95.} + * than eviction_trigger., an integer between 1 and 99; default \c 20.} * @config{eviction_target, continue evicting until the cache has less * total memory than the value\, as a percentage of the total cache * size. Must be less than \c eviction_trigger., an integer between 10 @@ -1836,25 +1840,8 @@ struct __wt_connection { * configuration options defined below.} * @config{    archive, automatically archive * unneeded log files., a boolean flag; default \c true.} - * @config{    compressor, configure a compressor - * for log records. Permitted values are \c "none" or custom - * compression engine name created with WT_CONNECTION::add_compressor. - * If WiredTiger has builtin support for \c "snappy"\, \c "lz4" or \c - * "zlib" compression\, these names are also available. See @ref - * compression for more information., a string; default \c none.} - * @config{    enabled, enable logging subsystem., a - * boolean flag; default \c false.} - * @config{    file_max, the maximum size of log - * files., an integer between 100KB and 2GB; default \c 100MB.} - * @config{    path, the path to a directory into - * which the log files are written. If the value is not an absolute - * path name\, the files are created relative to the database home., a - * string; default \c ".".} - * @config{    prealloc, - * pre-allocate log files., a boolean flag; default \c true.} - * @config{    recover, run recovery or error if - * recovery needs to run after an unclean shutdown., a string\, chosen - * from the following options: \c "error"\, \c "on"; default \c on.} + * @config{    prealloc, pre-allocate log files., a + * boolean flag; default \c true.} * @config{    zero_fill, manually write zeroes into * log files., a boolean flag; default \c false.} * @config{ ),,} @@ -1914,11 +1901,6 @@ struct __wt_connection { * statistics in JSON format., a boolean flag; default \c false.} * @config{    on_close, log statistics on database * close., a boolean flag; default \c false.} - * @config{    path, the pathname to a file into - * which the log records are written\, may contain ISO C standard - * strftime conversion specifications. If the value is not an absolute - * path name\, the file is created relative to the database home., a - * string; default \c "WiredTigerStat.%d.%H".} * @config{    sources, if non-empty\, include * statistics for the list of data source URIs\, if they are open at the * time of the statistics logging. The list may include URIs matching a @@ -2223,11 +2205,10 @@ struct __wt_connection { * configure both log_size and wait to set an upper bound for checkpoints; * setting this value above 0 configures periodic checkpoints., an integer * between 0 and 2GB; default \c 0.} - * @config{    name, the - * checkpoint name., a string; default \c "WiredTigerCheckpoint".} - * @config{    wait, seconds to wait between each - * checkpoint; setting this value above 0 configures periodic checkpoints., an - * integer between 0 and 100000; default \c 0.} + * @config{    wait, + * seconds to wait between each checkpoint; setting this value above 0 + * configures periodic checkpoints., an integer between 0 and 100000; default \c + * 0.} * @config{ ),,} * @config{checkpoint_sync, flush files to stable storage when closing or * writing checkpoints., a boolean flag; default \c true.} @@ -2288,11 +2269,11 @@ struct __wt_connection { * @config{eviction_dirty_target, continue evicting until the cache has less * dirty memory than the value\, as a percentage of the total cache size. Dirty * pages will only be evicted if the cache is full enough to trigger eviction., - * an integer between 5 and 99; default \c 80.} + * an integer between 1 and 99; default \c 5.} * @config{eviction_dirty_trigger, trigger eviction when the cache is using this * much memory for dirty content\, as a percentage of the total cache size. * This setting only alters behavior if it is lower than eviction_trigger., an - * integer between 5 and 99; default \c 95.} + * integer between 1 and 99; default \c 20.} * @config{eviction_target, continue evicting until the cache has less total * memory than the value\, as a percentage of the total cache size. Must be * less than \c eviction_trigger., an integer between 10 and 99; default \c 80.} @@ -2343,9 +2324,10 @@ struct __wt_connection { * subsystem., a boolean flag; default \c false.} * @config{    file_max, the maximum size of log files., an * integer between 100KB and 2GB; default \c 100MB.} - * @config{    path, the path to a directory into which the - * log files are written. If the value is not an absolute path name\, the files - * are created relative to the database home., a string; default \c ".".} + * @config{    path, the name of a directory into which log + * files are written. The directory must already exist. If the value is not an + * absolute path\, the path is relative to the database home (see @ref + * absolute_path for more information)., a string; default \c ".".} * @config{    prealloc, pre-allocate log files., a boolean * flag; default \c true.} * @config{    recover, run recovery @@ -2415,16 +2397,15 @@ struct __wt_connection { * boolean flag; default \c false.} * @config{    on_close, * log statistics on database close., a boolean flag; default \c false.} - * @config{    path, the pathname to a file into which the - * log records are written\, may contain ISO C standard strftime conversion - * specifications. If the value is not an absolute path name\, the file is - * created relative to the database home., a string; default \c - * "WiredTigerStat.%d.%H".} - * @config{    sources, if - * non-empty\, include statistics for the list of data source URIs\, if they are - * open at the time of the statistics logging. The list may include URIs - * matching a single data source ("table:mytable")\, or a URI matching all data - * sources of a particular type ("table:")., a list of strings; default empty.} + * @config{    path, the name of a directory into which + * statistics files are written. The directory must already exist. If the + * value is not an absolute path\, the path is relative to the database home + * (see @ref absolute_path for more information)., a string; default \c ".".} + * @config{    sources, if non-empty\, include statistics + * for the list of data source URIs\, if they are open at the time of the + * statistics logging. The list may include URIs matching a single data source + * ("table:mytable")\, or a URI matching all data sources of a particular type + * ("table:")., a list of strings; default empty.} * @config{    timestamp, a timestamp prepended to each log * record\, may contain strftime conversion specifications\, when \c json is * configured\, defaults to \c "%FT%Y.000Z"., a string; default \c "%b %d @@ -3701,24 +3682,34 @@ struct __wt_extractor { #if !defined(SWIG) /*! WT_FILE_SYSTEM::open_file file types */ typedef enum { - WT_OPEN_FILE_TYPE_CHECKPOINT, /*!< open a data file checkpoint */ - WT_OPEN_FILE_TYPE_DATA, /*!< open a data file */ - WT_OPEN_FILE_TYPE_DIRECTORY, /*!< open a directory */ - WT_OPEN_FILE_TYPE_LOG, /*!< open a log file */ - WT_OPEN_FILE_TYPE_REGULAR /*!< open a regular file */ -} WT_OPEN_FILE_TYPE; + WT_FS_OPEN_FILE_TYPE_CHECKPOINT,/*!< open a data file checkpoint */ + WT_FS_OPEN_FILE_TYPE_DATA, /*!< open a data file */ + WT_FS_OPEN_FILE_TYPE_DIRECTORY, /*!< open a directory */ + WT_FS_OPEN_FILE_TYPE_LOG, /*!< open a log file */ + WT_FS_OPEN_FILE_TYPE_REGULAR /*!< open a regular file */ +} WT_FS_OPEN_FILE_TYPE; /*! WT_FILE_SYSTEM::open_file flags: create if does not exist */ -#define WT_OPEN_CREATE 0x001 +#define WT_FS_OPEN_CREATE 0x001 /*! WT_FILE_SYSTEM::open_file flags: direct I/O requested */ -#define WT_OPEN_DIRECTIO 0x002 -/*! WT_FILE_SYSTEM::open_file flags: error if exclusive use not available */ -#define WT_OPEN_EXCLUSIVE 0x004 +#define WT_FS_OPEN_DIRECTIO 0x002 +/*! WT_FILE_SYSTEM::open_file flags: file creation must be durable */ +#define WT_FS_OPEN_DURABLE 0x004 +/*! + * WT_FILE_SYSTEM::open_file flags: return EBUSY if exclusive use not available + */ +#define WT_FS_OPEN_EXCLUSIVE 0x008 #ifndef DOXYGEN -#define WT_OPEN_FIXED 0x008 /* Path not home relative (internal) */ +#define WT_FS_OPEN_FIXED 0x010 /* Path not home relative (internal) */ #endif /*! WT_FILE_SYSTEM::open_file flags: open is read-only */ -#define WT_OPEN_READONLY 0x010 +#define WT_FS_OPEN_READONLY 0x020 + +/*! + * WT_FILE_SYSTEM::remove or WT_FILE_SYSTEM::rename flags: the remove or rename + * operation must be durable + */ +#define WT_FS_DURABLE 0x001 /*! * The interface implemented by applications to provide a custom file system @@ -3748,7 +3739,7 @@ struct __wt_file_system { * @param[out] dirlist the method returns an allocated array of * individually allocated strings, one for each entry in the * directory. - * @param[out] countp the method the number of entries returned + * @param[out] countp the number of entries returned */ int (*fs_directory_list)(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *directory, const char *prefix, @@ -3767,23 +3758,6 @@ struct __wt_file_system { int (*fs_directory_list_free)(WT_FILE_SYSTEM *file_system, WT_SESSION *session, char **dirlist, uint32_t count); - /*! - * Flush the named directory. - * - * This method is not required for readonly file systems or file systems - * where it is not necessary to flush a file's directory to ensure the - * durability of file system operations, and should be set to NULL when - * not required by the file system. - * - * @errors - * - * @param file_system the WT_FILE_SYSTEM - * @param session the current WiredTiger session - * @param directory the name of the directory - */ - int (*fs_directory_sync)(WT_FILE_SYSTEM *file_system, - WT_SESSION *session, const char *directory); - /*! * Return if the named file system object exists. * @@ -3800,6 +3774,16 @@ struct __wt_file_system { /*! * Open a handle for a named file system object * + * The method should return ENOENT if the file is not being created and + * does not exist. + * + * The method should return EACCES if the file cannot be opened in the + * requested mode (for example, a file opened for writing in a readonly + * file system). + * + * The method should return EBUSY if ::WT_FS_OPEN_EXCLUSIVE is set and + * the file is in use. + * * @errors * * @param file_system the WT_FILE_SYSTEM @@ -3809,8 +3793,8 @@ struct __wt_file_system { * The file type is provided to allow optimization for different file * access patterns. * @param flags flags indicating how to open the file, one or more of - * ::WT_OPEN_CREATE, ::WT_OPEN_DIRECTIO, ::WT_OPEN_EXCLUSIVE or - * ::WT_OPEN_READONLY. + * ::WT_FS_OPEN_CREATE, ::WT_FS_OPEN_DIRECTIO, ::WT_FS_OPEN_DURABLE, + * ::WT_FS_OPEN_EXCLUSIVE or ::WT_FS_OPEN_READONLY. * @param[out] file_handlep the handle to the newly opened file. File * system implementations must allocate memory for the handle and * the WT_FILE_HANDLE::name field, and fill in the WT_FILE_HANDLE:: @@ -3819,7 +3803,7 @@ struct __wt_file_system { * their own structure as a superset of a WT_FILE_HANDLE:: structure. */ int (*fs_open_file)(WT_FILE_SYSTEM *file_system, WT_SESSION *session, - const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags, + const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, WT_FILE_HANDLE **file_handlep); /*! @@ -3833,9 +3817,11 @@ struct __wt_file_system { * @param file_system the WT_FILE_SYSTEM * @param session the current WiredTiger session * @param name the name of the file system object + * @param durable if the operation requires durability + * @param flags 0 or ::WT_FS_DURABLE */ - int (*fs_remove)( - WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name); + int (*fs_remove)(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *name, uint32_t flags); /*! * Rename a named file system object @@ -3849,9 +3835,10 @@ struct __wt_file_system { * @param session the current WiredTiger session * @param from the original name of the object * @param to the new name for the object + * @param flags 0 or ::WT_FS_DURABLE */ - int (*fs_rename)(WT_FILE_SYSTEM *file_system, - WT_SESSION *session, const char *from, const char *to); + int (*fs_rename)(WT_FILE_SYSTEM *file_system, WT_SESSION *session, + const char *from, const char *to, uint32_t flags); /*! * Return the size of a named file system object @@ -3981,7 +3968,7 @@ struct __wt_file_handle { /*! * Lock/unlock a file from the perspective of other processes running - * in the system. + * in the system, where necessary. * * @errors * @@ -4256,340 +4243,380 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_BLOCK_BYTE_READ 1026 /*! block-manager: bytes written */ #define WT_STAT_CONN_BLOCK_BYTE_WRITE 1027 +/*! block-manager: bytes written for checkpoint */ +#define WT_STAT_CONN_BLOCK_BYTE_WRITE_CHECKPOINT 1028 /*! block-manager: mapped blocks read */ -#define WT_STAT_CONN_BLOCK_MAP_READ 1028 +#define WT_STAT_CONN_BLOCK_MAP_READ 1029 /*! block-manager: mapped bytes read */ -#define WT_STAT_CONN_BLOCK_BYTE_MAP_READ 1029 +#define WT_STAT_CONN_BLOCK_BYTE_MAP_READ 1030 +/*! cache: bytes belonging to page images in the cache */ +#define WT_STAT_CONN_CACHE_BYTES_IMAGE 1031 /*! cache: bytes currently in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_INUSE 1030 +#define WT_STAT_CONN_CACHE_BYTES_INUSE 1032 +/*! cache: bytes not belonging to page images in the cache */ +#define WT_STAT_CONN_CACHE_BYTES_OTHER 1033 /*! cache: bytes read into cache */ -#define WT_STAT_CONN_CACHE_BYTES_READ 1031 +#define WT_STAT_CONN_CACHE_BYTES_READ 1034 /*! cache: bytes written from cache */ -#define WT_STAT_CONN_CACHE_BYTES_WRITE 1032 +#define WT_STAT_CONN_CACHE_BYTES_WRITE 1035 /*! cache: checkpoint blocked page eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1033 +#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1036 /*! cache: eviction calls to get a page */ -#define WT_STAT_CONN_CACHE_EVICTION_GET_REF 1034 +#define WT_STAT_CONN_CACHE_EVICTION_GET_REF 1037 /*! cache: eviction calls to get a page found queue empty */ -#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY 1035 +#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY 1038 /*! cache: eviction calls to get a page found queue empty after locking */ -#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY2 1036 +#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY2 1039 /*! cache: eviction currently operating in aggressive mode */ -#define WT_STAT_CONN_CACHE_EVICTION_AGGRESSIVE_SET 1037 +#define WT_STAT_CONN_CACHE_EVICTION_AGGRESSIVE_SET 1040 /*! cache: eviction server candidate queue empty when topping up */ -#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1038 +#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1041 /*! cache: eviction server candidate queue not empty when topping up */ -#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1039 +#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1042 /*! cache: eviction server evicting pages */ -#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1040 +#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1043 /*! cache: eviction server populating queue, but not evicting pages */ -#define WT_STAT_CONN_CACHE_EVICTION_SERVER_NOT_EVICTING 1041 +#define WT_STAT_CONN_CACHE_EVICTION_SERVER_NOT_EVICTING 1044 /*! cache: eviction server skipped very large page */ -#define WT_STAT_CONN_CACHE_EVICTION_SERVER_TOOBIG 1042 +#define WT_STAT_CONN_CACHE_EVICTION_SERVER_TOOBIG 1045 /*! cache: eviction server slept, because we did not make progress with * eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SERVER_SLEPT 1043 +#define WT_STAT_CONN_CACHE_EVICTION_SERVER_SLEPT 1046 /*! cache: eviction server unable to reach eviction goal */ -#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1044 +#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1047 /*! cache: eviction worker thread evicting pages */ -#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1045 +#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1048 /*! cache: failed eviction of pages that exceeded the in-memory maximum */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1046 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1049 /*! cache: files with active eviction walks */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1047 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1050 /*! cache: files with new eviction walks started */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1048 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1051 /*! cache: hazard pointer blocked page eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1049 +#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1052 /*! cache: hazard pointer check calls */ -#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1050 +#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1053 /*! cache: hazard pointer check entries walked */ -#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1051 +#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1054 /*! cache: hazard pointer maximum array length */ -#define WT_STAT_CONN_CACHE_HAZARD_MAX 1052 +#define WT_STAT_CONN_CACHE_HAZARD_MAX 1055 /*! cache: in-memory page passed criteria to be split */ -#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1053 +#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1056 /*! cache: in-memory page splits */ -#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1054 +#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1057 /*! cache: internal pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1055 +#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1058 /*! cache: internal pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1056 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1059 /*! cache: leaf pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1057 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1060 /*! cache: lookaside table insert calls */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1058 +#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1061 /*! cache: lookaside table remove calls */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1059 +#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1062 /*! cache: maximum bytes configured */ -#define WT_STAT_CONN_CACHE_BYTES_MAX 1060 +#define WT_STAT_CONN_CACHE_BYTES_MAX 1063 /*! cache: maximum page size at eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1061 +#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1064 /*! cache: modified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1062 +#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1065 /*! cache: modified pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1063 +#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1066 +/*! cache: overflow pages read into cache */ +#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1067 +/*! cache: overflow values cached in memory */ +#define WT_STAT_CONN_CACHE_OVERFLOW_VALUE 1068 /*! cache: page split during eviction deepened the tree */ -#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1064 +#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1069 /*! cache: page written requiring lookaside records */ -#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1065 +#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1070 /*! cache: pages currently held in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_INUSE 1066 +#define WT_STAT_CONN_CACHE_PAGES_INUSE 1071 /*! cache: pages evicted because they exceeded the in-memory maximum */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1067 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1072 /*! cache: pages evicted because they had chains of deleted items */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1068 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1073 /*! cache: pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP 1069 +#define WT_STAT_CONN_CACHE_EVICTION_APP 1074 /*! cache: pages queued for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1070 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1075 /*! cache: pages queued for urgent eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1071 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1076 +/*! cache: pages queued for urgent eviction during walk */ +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1077 /*! cache: pages read into cache */ -#define WT_STAT_CONN_CACHE_READ 1072 +#define WT_STAT_CONN_CACHE_READ 1078 /*! cache: pages read into cache requiring lookaside entries */ -#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1073 +#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1079 /*! cache: pages requested from the cache */ -#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1074 +#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1080 /*! cache: pages seen by eviction walk */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1075 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1081 /*! cache: pages selected for eviction unable to be evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1076 +#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1082 /*! cache: pages walked for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_WALK 1077 +#define WT_STAT_CONN_CACHE_EVICTION_WALK 1083 /*! cache: pages written from cache */ -#define WT_STAT_CONN_CACHE_WRITE 1078 +#define WT_STAT_CONN_CACHE_WRITE 1084 /*! cache: pages written requiring in-memory restoration */ -#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1079 +#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1085 /*! cache: percentage overhead */ -#define WT_STAT_CONN_CACHE_OVERHEAD 1080 +#define WT_STAT_CONN_CACHE_OVERHEAD 1086 /*! cache: tracked bytes belonging to internal pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1081 +#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1087 /*! cache: tracked bytes belonging to leaf pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_LEAF 1082 -/*! cache: tracked bytes belonging to overflow pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_OVERFLOW 1083 +#define WT_STAT_CONN_CACHE_BYTES_LEAF 1088 /*! cache: tracked dirty bytes in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1084 +#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1089 /*! cache: tracked dirty pages in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1085 +#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1090 /*! cache: unmodified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1086 +#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1091 /*! connection: auto adjusting condition resets */ -#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1087 +#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1092 /*! connection: auto adjusting condition wait calls */ -#define WT_STAT_CONN_COND_AUTO_WAIT 1088 +#define WT_STAT_CONN_COND_AUTO_WAIT 1093 /*! connection: files currently open */ -#define WT_STAT_CONN_FILE_OPEN 1089 +#define WT_STAT_CONN_FILE_OPEN 1094 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1090 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1095 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1091 +#define WT_STAT_CONN_MEMORY_FREE 1096 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1092 +#define WT_STAT_CONN_MEMORY_GROW 1097 /*! connection: pthread mutex condition wait calls */ -#define WT_STAT_CONN_COND_WAIT 1093 +#define WT_STAT_CONN_COND_WAIT 1098 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1094 +#define WT_STAT_CONN_RWLOCK_READ 1099 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1095 +#define WT_STAT_CONN_RWLOCK_WRITE 1100 /*! connection: total fsync I/Os */ -#define WT_STAT_CONN_FSYNC_IO 1096 +#define WT_STAT_CONN_FSYNC_IO 1101 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1097 +#define WT_STAT_CONN_READ_IO 1102 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1098 +#define WT_STAT_CONN_WRITE_IO 1103 /*! cursor: cursor create calls */ -#define WT_STAT_CONN_CURSOR_CREATE 1099 +#define WT_STAT_CONN_CURSOR_CREATE 1104 /*! cursor: cursor insert calls */ -#define WT_STAT_CONN_CURSOR_INSERT 1100 +#define WT_STAT_CONN_CURSOR_INSERT 1105 /*! cursor: cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT 1101 +#define WT_STAT_CONN_CURSOR_NEXT 1106 /*! cursor: cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV 1102 +#define WT_STAT_CONN_CURSOR_PREV 1107 /*! cursor: cursor remove calls */ -#define WT_STAT_CONN_CURSOR_REMOVE 1103 +#define WT_STAT_CONN_CURSOR_REMOVE 1108 /*! cursor: cursor reset calls */ -#define WT_STAT_CONN_CURSOR_RESET 1104 +#define WT_STAT_CONN_CURSOR_RESET 1109 /*! cursor: cursor restarted searches */ -#define WT_STAT_CONN_CURSOR_RESTART 1105 +#define WT_STAT_CONN_CURSOR_RESTART 1110 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1106 +#define WT_STAT_CONN_CURSOR_SEARCH 1111 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1107 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1112 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1108 +#define WT_STAT_CONN_CURSOR_UPDATE 1113 /*! cursor: truncate calls */ -#define WT_STAT_CONN_CURSOR_TRUNCATE 1109 +#define WT_STAT_CONN_CURSOR_TRUNCATE 1114 /*! data-handle: connection data handles currently active */ -#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1110 +#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1115 /*! data-handle: connection sweep candidate became referenced */ -#define WT_STAT_CONN_DH_SWEEP_REF 1111 +#define WT_STAT_CONN_DH_SWEEP_REF 1116 /*! data-handle: connection sweep dhandles closed */ -#define WT_STAT_CONN_DH_SWEEP_CLOSE 1112 +#define WT_STAT_CONN_DH_SWEEP_CLOSE 1117 /*! data-handle: connection sweep dhandles removed from hash list */ -#define WT_STAT_CONN_DH_SWEEP_REMOVE 1113 +#define WT_STAT_CONN_DH_SWEEP_REMOVE 1118 /*! data-handle: connection sweep time-of-death sets */ -#define WT_STAT_CONN_DH_SWEEP_TOD 1114 +#define WT_STAT_CONN_DH_SWEEP_TOD 1119 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_SWEEPS 1115 +#define WT_STAT_CONN_DH_SWEEPS 1120 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1116 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1121 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1117 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1122 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1118 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1123 /*! log: consolidated slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1119 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1124 /*! log: consolidated slot join races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1120 +#define WT_STAT_CONN_LOG_SLOT_RACES 1125 /*! log: consolidated slot join transitions */ -#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1121 +#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1126 /*! log: consolidated slot joins */ -#define WT_STAT_CONN_LOG_SLOT_JOINS 1122 +#define WT_STAT_CONN_LOG_SLOT_JOINS 1127 /*! log: consolidated slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1123 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1128 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1124 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1129 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1125 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1130 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1126 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1131 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1127 +#define WT_STAT_CONN_LOG_FLUSH 1132 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1128 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1133 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1129 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1134 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1130 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1135 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1131 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1136 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1132 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1137 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1133 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1138 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1134 +#define WT_STAT_CONN_LOG_SCANS 1139 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1135 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1140 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1136 +#define WT_STAT_CONN_LOG_WRITE_LSN 1141 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1137 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1142 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1138 +#define WT_STAT_CONN_LOG_SYNC 1143 /*! log: log sync time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DURATION 1139 +#define WT_STAT_CONN_LOG_SYNC_DURATION 1144 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1140 +#define WT_STAT_CONN_LOG_SYNC_DIR 1145 /*! log: log sync_dir time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1141 +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1146 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1142 +#define WT_STAT_CONN_LOG_WRITES 1147 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1143 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1148 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1144 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1149 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1145 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1150 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1146 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1151 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1147 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1152 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1148 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1153 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1149 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1154 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1150 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1155 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1151 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1156 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1152 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1157 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1153 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1158 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1154 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1159 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1155 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1160 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1156 +#define WT_STAT_CONN_REC_PAGES 1161 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1157 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1162 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1158 +#define WT_STAT_CONN_REC_PAGE_DELETE 1163 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1159 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1164 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1160 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1165 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1161 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1166 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1162 +#define WT_STAT_CONN_SESSION_OPEN 1167 +/*! session: table compact failed calls */ +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1168 +/*! session: table compact successful calls */ +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1169 +/*! session: table create failed calls */ +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1170 +/*! session: table create successful calls */ +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1171 +/*! session: table drop failed calls */ +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1172 +/*! session: table drop successful calls */ +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1173 +/*! session: table rebalance failed calls */ +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1174 +/*! session: table rebalance successful calls */ +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1175 +/*! session: table rename failed calls */ +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1176 +/*! session: table rename successful calls */ +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1177 +/*! session: table salvage failed calls */ +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1178 +/*! session: table salvage successful calls */ +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1179 +/*! session: table truncate failed calls */ +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1180 +/*! session: table truncate successful calls */ +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1181 +/*! session: table verify failed calls */ +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1182 +/*! session: table verify successful calls */ +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1183 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_FSYNC_ACTIVE 1163 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1184 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_READ_ACTIVE 1164 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1185 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_WRITE_ACTIVE 1165 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1186 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1166 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1187 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1167 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1188 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1168 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1189 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1169 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1190 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1170 +#define WT_STAT_CONN_PAGE_SLEEP 1191 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1171 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1192 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1172 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1193 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1173 +#define WT_STAT_CONN_TXN_BEGIN 1194 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1174 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1195 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1175 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1196 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1176 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1197 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1177 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1198 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1178 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1199 +/*! transaction: transaction checkpoint scrub dirty target */ +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1200 +/*! transaction: transaction checkpoint scrub time (msecs) */ +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1201 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1179 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1202 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1180 +#define WT_STAT_CONN_TXN_CHECKPOINT 1203 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1181 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1204 /*! transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1182 -/*! transaction: transaction fsync calls for checkpoint before allocating - * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_PRE 1183 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1205 /*! transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1184 -/*! transaction: transaction fsync duration for checkpoint before - * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_PRE_DURATION 1185 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1206 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1186 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1207 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1187 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1208 /*! transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1188 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1209 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1189 +#define WT_STAT_CONN_TXN_SYNC 1210 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1190 +#define WT_STAT_CONN_TXN_COMMIT 1211 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1191 +#define WT_STAT_CONN_TXN_ROLLBACK 1212 /*! * @} @@ -4678,127 +4705,129 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_DSRC_BTREE_ROW_INTERNAL 2038 /*! btree: row-store leaf pages */ #define WT_STAT_DSRC_BTREE_ROW_LEAF 2039 +/*! cache: bytes currently in the cache */ +#define WT_STAT_DSRC_CACHE_BYTES_INUSE 2040 /*! cache: bytes read into cache */ -#define WT_STAT_DSRC_CACHE_BYTES_READ 2040 +#define WT_STAT_DSRC_CACHE_BYTES_READ 2041 /*! cache: bytes written from cache */ -#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2041 +#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2042 /*! cache: checkpoint blocked page eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2042 +#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2043 /*! cache: data source pages selected for eviction unable to be evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2043 +#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2044 /*! cache: hazard pointer blocked page eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2044 +#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2045 /*! cache: in-memory page passed criteria to be split */ -#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2045 +#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2046 /*! cache: in-memory page splits */ -#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2046 +#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2047 /*! cache: internal pages evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2047 +#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2048 /*! cache: internal pages split during eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2048 +#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2049 /*! cache: leaf pages split during eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2049 +#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2050 /*! cache: modified pages evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2050 +#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2051 /*! cache: overflow pages read into cache */ -#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2051 +#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2052 /*! cache: overflow values cached in memory */ -#define WT_STAT_DSRC_CACHE_OVERFLOW_VALUE 2052 +#define WT_STAT_DSRC_CACHE_OVERFLOW_VALUE 2053 /*! cache: page split during eviction deepened the tree */ -#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2053 +#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2054 /*! cache: page written requiring lookaside records */ -#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2054 +#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2055 /*! cache: pages read into cache */ -#define WT_STAT_DSRC_CACHE_READ 2055 +#define WT_STAT_DSRC_CACHE_READ 2056 /*! cache: pages read into cache requiring lookaside entries */ -#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2056 +#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2057 /*! cache: pages requested from the cache */ -#define WT_STAT_DSRC_CACHE_PAGES_REQUESTED 2057 +#define WT_STAT_DSRC_CACHE_PAGES_REQUESTED 2058 /*! cache: pages written from cache */ -#define WT_STAT_DSRC_CACHE_WRITE 2058 +#define WT_STAT_DSRC_CACHE_WRITE 2059 /*! cache: pages written requiring in-memory restoration */ -#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2059 +#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2060 /*! cache: unmodified pages evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2060 +#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2061 /*! compression: compressed pages read */ -#define WT_STAT_DSRC_COMPRESS_READ 2061 +#define WT_STAT_DSRC_COMPRESS_READ 2062 /*! compression: compressed pages written */ -#define WT_STAT_DSRC_COMPRESS_WRITE 2062 +#define WT_STAT_DSRC_COMPRESS_WRITE 2063 /*! compression: page written failed to compress */ -#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2063 +#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2064 /*! compression: page written was too small to compress */ -#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2064 +#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2065 /*! compression: raw compression call failed, additional data available */ -#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2065 +#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2066 /*! compression: raw compression call failed, no additional data available */ -#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2066 +#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2067 /*! compression: raw compression call succeeded */ -#define WT_STAT_DSRC_COMPRESS_RAW_OK 2067 +#define WT_STAT_DSRC_COMPRESS_RAW_OK 2068 /*! cursor: bulk-loaded cursor-insert calls */ -#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2068 +#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2069 /*! cursor: create calls */ -#define WT_STAT_DSRC_CURSOR_CREATE 2069 +#define WT_STAT_DSRC_CURSOR_CREATE 2070 /*! cursor: cursor-insert key and value bytes inserted */ -#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2070 +#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2071 /*! cursor: cursor-remove key bytes removed */ -#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2071 +#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2072 /*! cursor: cursor-update value bytes updated */ -#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2072 +#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2073 /*! cursor: insert calls */ -#define WT_STAT_DSRC_CURSOR_INSERT 2073 +#define WT_STAT_DSRC_CURSOR_INSERT 2074 /*! cursor: next calls */ -#define WT_STAT_DSRC_CURSOR_NEXT 2074 +#define WT_STAT_DSRC_CURSOR_NEXT 2075 /*! cursor: prev calls */ -#define WT_STAT_DSRC_CURSOR_PREV 2075 +#define WT_STAT_DSRC_CURSOR_PREV 2076 /*! cursor: remove calls */ -#define WT_STAT_DSRC_CURSOR_REMOVE 2076 +#define WT_STAT_DSRC_CURSOR_REMOVE 2077 /*! cursor: reset calls */ -#define WT_STAT_DSRC_CURSOR_RESET 2077 +#define WT_STAT_DSRC_CURSOR_RESET 2078 /*! cursor: restarted searches */ -#define WT_STAT_DSRC_CURSOR_RESTART 2078 +#define WT_STAT_DSRC_CURSOR_RESTART 2079 /*! cursor: search calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH 2079 +#define WT_STAT_DSRC_CURSOR_SEARCH 2080 /*! cursor: search near calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2080 +#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2081 /*! cursor: truncate calls */ -#define WT_STAT_DSRC_CURSOR_TRUNCATE 2081 +#define WT_STAT_DSRC_CURSOR_TRUNCATE 2082 /*! cursor: update calls */ -#define WT_STAT_DSRC_CURSOR_UPDATE 2082 +#define WT_STAT_DSRC_CURSOR_UPDATE 2083 /*! reconciliation: dictionary matches */ -#define WT_STAT_DSRC_REC_DICTIONARY 2083 +#define WT_STAT_DSRC_REC_DICTIONARY 2084 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2084 +#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2085 /*! reconciliation: internal page key bytes discarded using suffix * compression */ -#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2085 +#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2086 /*! reconciliation: internal page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2086 +#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2087 /*! reconciliation: internal-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2087 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2088 /*! reconciliation: leaf page key bytes discarded using prefix compression */ -#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2088 +#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2089 /*! reconciliation: leaf page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2089 +#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2090 /*! reconciliation: leaf-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2090 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2091 /*! reconciliation: maximum blocks required for a page */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2091 +#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2092 /*! reconciliation: overflow values written */ -#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2092 +#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2093 /*! reconciliation: page checksum matches */ -#define WT_STAT_DSRC_REC_PAGE_MATCH 2093 +#define WT_STAT_DSRC_REC_PAGE_MATCH 2094 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_DSRC_REC_PAGES 2094 +#define WT_STAT_DSRC_REC_PAGES 2095 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_DSRC_REC_PAGES_EVICTION 2095 +#define WT_STAT_DSRC_REC_PAGES_EVICTION 2096 /*! reconciliation: pages deleted */ -#define WT_STAT_DSRC_REC_PAGE_DELETE 2096 +#define WT_STAT_DSRC_REC_PAGE_DELETE 2097 /*! session: object compaction */ -#define WT_STAT_DSRC_SESSION_COMPACT 2097 +#define WT_STAT_DSRC_SESSION_COMPACT 2098 /*! session: open cursor count */ -#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2098 +#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2099 /*! transaction: update conflicts */ -#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2099 +#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2100 /*! * @} diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c index bf83c280d8d..8ec910115ac 100644 --- a/src/third_party/wiredtiger/src/log/log.c +++ b/src/third_party/wiredtiger/src/log/log.c @@ -9,13 +9,17 @@ #include "wt_internal.h" static int __log_openfile( - WT_SESSION_IMPL *, bool, WT_FH **, const char *, uint32_t); + WT_SESSION_IMPL *, WT_FH **, const char *, uint32_t, uint32_t); static int __log_write_internal( WT_SESSION_IMPL *, WT_ITEM *, WT_LSN *, uint32_t); #define WT_LOG_COMPRESS_SKIP (offsetof(WT_LOG_RECORD, record)) #define WT_LOG_ENCRYPT_SKIP (offsetof(WT_LOG_RECORD, record)) +/* Flags to __log_openfile */ +#define WT_LOG_OPEN_CREATE_OK 0x01 +#define WT_LOG_OPEN_VERIFY 0x02 + /* * __wt_log_ckpt -- * Record the given LSN as the checkpoint LSN and signal the archive @@ -146,7 +150,7 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) * file than we want. */ WT_ERR(__log_openfile(session, - false, &log_fh, WT_LOG_FILENAME, min_lsn->l.file)); + &log_fh, WT_LOG_FILENAME, min_lsn->l.file, 0)); WT_ERR(__wt_verbose(session, WT_VERB_LOG, "log_force_sync: sync %s to LSN %" PRIu32 "/%" PRIu32, log_fh->name, min_lsn->l.file, min_lsn->l.offset)); @@ -277,7 +281,8 @@ __log_get_files(WT_SESSION_IMPL *session, /* * __wt_log_get_all_files -- * Retrieve the list of log files, either all of them or only the active - * ones (those that are not candidates for archiving). + * ones (those that are not candidates for archiving). The caller is + * responsible for freeing the directory list returned. */ int __wt_log_get_all_files(WT_SESSION_IMPL *session, @@ -307,6 +312,10 @@ __wt_log_get_all_files(WT_SESSION_IMPL *session, for (max = 0, i = 0; i < count; ) { WT_ERR(__wt_log_extract_lognum(session, files[i], &id)); if (active_only && id < log->ckpt_lsn.l.file) { + /* + * Any files not being returned are individually freed + * and the array adjusted. + */ __wt_free(session, files[i]); files[i] = files[count - 1]; files[--count] = NULL; @@ -321,6 +330,10 @@ __wt_log_get_all_files(WT_SESSION_IMPL *session, *filesp = files; *countp = count; + /* + * Only free on error. The caller is responsible for calling free + * once it is done using the returned list. + */ if (0) { err: WT_TRET(__wt_fs_directory_list_free(session, &files, count)); } @@ -674,7 +687,7 @@ err: __wt_scr_free(session, &buf); */ static int __log_openfile(WT_SESSION_IMPL *session, - bool ok_create, WT_FH **fhp, const char *file_prefix, uint32_t id) + WT_FH **fhp, const char *file_prefix, uint32_t id, uint32_t flags) { WT_CONNECTION_IMPL *conn; WT_DECL_ITEM(buf); @@ -683,7 +696,7 @@ __log_openfile(WT_SESSION_IMPL *session, WT_LOG_DESC *desc; WT_LOG_RECORD *logrec; uint32_t allocsize; - u_int flags; + u_int wtopen_flags; conn = S2C(session); log = conn->log; @@ -695,19 +708,19 @@ __log_openfile(WT_SESSION_IMPL *session, WT_ERR(__log_filename(session, id, file_prefix, buf)); WT_ERR(__wt_verbose(session, WT_VERB_LOG, "opening log %s", (const char *)buf->data)); - flags = 0; - if (ok_create) - LF_SET(WT_OPEN_CREATE); + wtopen_flags = 0; + if (LF_ISSET(WT_LOG_OPEN_CREATE_OK)) + FLD_SET(wtopen_flags, WT_FS_OPEN_CREATE); if (FLD_ISSET(conn->direct_io, WT_DIRECT_IO_LOG)) - LF_SET(WT_OPEN_DIRECTIO); + FLD_SET(wtopen_flags, WT_FS_OPEN_DIRECTIO); WT_ERR(__wt_open( - session, buf->data, WT_OPEN_FILE_TYPE_LOG, flags, fhp)); + session, buf->data, WT_FS_OPEN_FILE_TYPE_LOG, wtopen_flags, fhp)); /* * If we are not creating the log file but opening it for reading, * check that the magic number and versions are correct. */ - if (!ok_create) { + if (LF_ISSET(WT_LOG_OPEN_VERIFY)) { WT_ERR(__wt_buf_grow(session, buf, allocsize)); memset(buf->mem, 0, allocsize); WT_ERR(__wt_read(session, *fhp, 0, allocsize, buf->mem)); @@ -773,7 +786,7 @@ __log_alloc_prealloc(WT_SESSION_IMPL *session, uint32_t to_num) * All file setup, writing the header and pre-allocation was done * before. We only need to rename it. */ - WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data)); + WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data, false)); err: __wt_scr_free(session, &from_path); __wt_scr_free(session, &to_path); @@ -870,7 +883,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) * window where another thread could see a NULL log file handle. */ WT_RET(__log_openfile(session, - false, &log_fh, WT_LOG_FILENAME, log->fileid)); + &log_fh, WT_LOG_FILENAME, log->fileid, 0)); WT_PUBLISH(log->log_fh, log_fh); /* * We need to setup the LSNs. Set the end LSN and alloc LSN to @@ -978,7 +991,7 @@ __log_truncate(WT_SESSION_IMPL *session, * Truncate the log file to the given LSN. */ WT_ERR(__log_openfile(session, - false, &log_fh, file_prefix, lsn->l.file)); + &log_fh, file_prefix, lsn->l.file, 0)); WT_ERR(__wt_ftruncate(session, log_fh, lsn->l.offset)); WT_ERR(__wt_fsync(session, log_fh, true)); WT_ERR(__wt_close(session, &log_fh)); @@ -995,7 +1008,7 @@ __log_truncate(WT_SESSION_IMPL *session, if (lognum > lsn->l.file && lognum < log->trunc_lsn.l.file) { WT_ERR(__log_openfile(session, - false, &log_fh, file_prefix, lognum)); + &log_fh, file_prefix, lognum, 0)); /* * If there are intervening files pre-allocated, * truncate them to the end of the log file header. @@ -1047,7 +1060,8 @@ __wt_log_allocfile( /* * Set up the temporary file. */ - WT_ERR(__log_openfile(session, true, &log_fh, WT_LOG_TMPNAME, tmp_id)); + WT_ERR(__log_openfile(session, + &log_fh, WT_LOG_TMPNAME, tmp_id, WT_LOG_OPEN_CREATE_OK)); WT_ERR(__log_file_header(session, log_fh, NULL, true)); WT_ERR(__log_prealloc(session, log_fh)); WT_ERR(__wt_fsync(session, log_fh, true)); @@ -1058,7 +1072,7 @@ __wt_log_allocfile( /* * Rename it into place and make it available. */ - WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data)); + WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data, false)); err: __wt_scr_free(session, &from_path); __wt_scr_free(session, &to_path); @@ -1081,7 +1095,7 @@ __wt_log_remove(WT_SESSION_IMPL *session, WT_ERR(__log_filename(session, lognum, file_prefix, path)); WT_ERR(__wt_verbose(session, WT_VERB_LOG, "log_remove: remove log %s", (char *)path->data)); - WT_ERR(__wt_fs_remove(session, path->data)); + WT_ERR(__wt_fs_remove(session, path->data, false)); err: __wt_scr_free(session, &path); return (ret); } @@ -1117,7 +1131,7 @@ __wt_log_open(WT_SESSION_IMPL *session) WT_RET(__wt_verbose(session, WT_VERB_LOG, "log_open: open fh to directory %s", conn->log_path)); WT_RET(__wt_open(session, conn->log_path, - WT_OPEN_FILE_TYPE_DIRECTORY, 0, &log->log_dir_fh)); + WT_FS_OPEN_FILE_TYPE_DIRECTORY, 0, &log->log_dir_fh)); } if (!F_ISSET(conn, WT_CONN_READONLY)) { @@ -1587,8 +1601,8 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, WT_ERR( __wt_fs_directory_list_free(session, &logfiles, logcount)); } - WT_ERR(__log_openfile( - session, false, &log_fh, WT_LOG_FILENAME, start_lsn.l.file)); + WT_ERR(__log_openfile(session, + &log_fh, WT_LOG_FILENAME, start_lsn.l.file, WT_LOG_OPEN_VERIFY)); WT_ERR(__wt_filesize(session, log_fh, &log_size)); rd_lsn = start_lsn; @@ -1637,7 +1651,8 @@ advance: if (rd_lsn.l.file > end_lsn.l.file) break; WT_ERR(__log_openfile(session, - false, &log_fh, WT_LOG_FILENAME, rd_lsn.l.file)); + &log_fh, WT_LOG_FILENAME, + rd_lsn.l.file, WT_LOG_OPEN_VERIFY)); WT_ERR(__wt_filesize(session, log_fh, &log_size)); eol = false; continue; diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c index 78235fb6a92..bedef6a8596 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c @@ -205,6 +205,12 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update) WT_RET(__wt_txn_id_check(session)); WT_RET(__clsm_enter_update(clsm)); + /* + * Switching the tree will update the generation before + * updating the switch transaction. We test the + * transaction in clsm_enter_update. Now test the + * disk generation to avoid races. + */ if (clsm->dsk_gen != clsm->lsm_tree->dsk_gen) goto open; @@ -219,13 +225,20 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update) * transaction ID in each chunk: any transaction ID * that overlaps with our snapshot is a potential * conflict. + * + * Note that the global snap_min is correct here: it + * tracks concurrent transactions excluding special + * transactions such as checkpoint (which we can't + * conflict with because checkpoint only writes the + * metadata, which is not an LSM tree). */ clsm->nupdates = 1; if (txn->isolation == WT_ISO_SNAPSHOT && F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) { WT_ASSERT(session, F_ISSET(txn, WT_TXN_HAS_SNAPSHOT)); - snap_min = txn->snap_min; + snap_min = + WT_SESSION_TXN_STATE(session)->snap_min; for (switch_txnp = &clsm->switch_txn[clsm->nchunks - 2]; clsm->nupdates < clsm->nchunks; @@ -1521,6 +1534,8 @@ __wt_clsm_open(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree; bool bulk; + WT_STATIC_ASSERT(offsetof(WT_CURSOR_LSM, iface) == 0); + clsm = NULL; cursor = NULL; lsm_tree = NULL; @@ -1566,6 +1581,7 @@ __wt_clsm_open(WT_SESSION_IMPL *session, cursor->value_format = lsm_tree->value_format; clsm->lsm_tree = lsm_tree; + lsm_tree = NULL; /* * The tree's dsk_gen starts at one, so starting the cursor on zero @@ -1573,7 +1589,6 @@ __wt_clsm_open(WT_SESSION_IMPL *session, */ clsm->dsk_gen = 0; - WT_STATIC_ASSERT(offsetof(WT_CURSOR_LSM, iface) == 0); WT_ERR(__wt_cursor_init(cursor, cursor->uri, owner, cfg, cursorp)); if (bulk) @@ -1585,10 +1600,6 @@ err: if (clsm != NULL) else if (lsm_tree != NULL) __wt_lsm_tree_release(session, lsm_tree); - /* - * We open bulk cursors after setting the returned cursor. - * Fix that here. - */ *cursorp = NULL; } diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c index da106ae2089..2ecfb614eee 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c @@ -771,6 +771,11 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) ++lsm_tree->dsk_gen; lsm_tree->modified = true; + /* + * Ensure the updated disk generation is visible to all other threads + * before updating the transaction ID. + */ + WT_FULL_BARRIER(); /* * Set the switch transaction in the previous chunk unless this is @@ -1187,8 +1192,15 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) */ if (lsm_tree->nchunks > 0 && (chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]) != NULL) { - if (chunk->switch_txn == WT_TXN_NONE) + if (chunk->switch_txn == WT_TXN_NONE) { + /* + * Make sure any cursors open on the tree see the + * new switch generation before updating. + */ + ++lsm_tree->dsk_gen; + WT_FULL_BARRIER(); chunk->switch_txn = __wt_txn_id_alloc(session, false); + } /* * If we have a chunk, we want to look for it to be on-disk. * So we need to add a reference to keep it available. diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c index c19f42327be..0f2a407c70d 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c @@ -526,7 +526,7 @@ __lsm_drop_file(WT_SESSION_IMPL *session, const char *uri) ret = __wt_schema_drop(session, uri, drop_cfg)); if (ret == 0) - ret = __wt_fs_remove(session, uri + strlen("file:")); + ret = __wt_fs_remove(session, uri + strlen("file:"), false); WT_RET(__wt_verbose(session, WT_VERB_LSM, "Dropped %s", uri)); if (ret == EBUSY || ret == ENOENT) diff --git a/src/third_party/wiredtiger/src/meta/meta_table.c b/src/third_party/wiredtiger/src/meta/meta_table.c index 38a2edd7219..d39df163daf 100644 --- a/src/third_party/wiredtiger/src/meta/meta_table.c +++ b/src/third_party/wiredtiger/src/meta/meta_table.c @@ -68,6 +68,9 @@ __wt_metadata_cursor_open( if (F_ISSET(btree, WT_BTREE_NO_LOGGING)) F_CLR(btree, WT_BTREE_NO_LOGGING); + /* The metadata file always uses checkpoint IDs in visibility checks. */ + btree->include_checkpoint_txn = true; + return (0); } diff --git a/src/third_party/wiredtiger/src/meta/meta_track.c b/src/third_party/wiredtiger/src/meta/meta_track.c index eb06b2bed66..3d8b7c46500 100644 --- a/src/third_party/wiredtiger/src/meta/meta_track.c +++ b/src/third_party/wiredtiger/src/meta/meta_track.c @@ -141,7 +141,8 @@ __meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk) ret = bm->checkpoint_resolve(bm, session)); break; case WT_ST_DROP_COMMIT: - if ((ret = __wt_block_manager_drop(session, trk->a)) != 0) + if ((ret = + __wt_block_manager_drop(session, trk->a, false)) != 0) __wt_err(session, ret, "metadata remove dropped file %s", trk->a); break; @@ -188,13 +189,15 @@ __meta_track_unroll(WT_SESSION_IMPL *session, WT_META_TRACK *trk) * For removes, b is NULL. */ if (trk->a != NULL && trk->b != NULL && - (ret = __wt_rename_and_sync_directory(session, - trk->b + strlen("file:"), trk->a + strlen("file:"))) != 0) + (ret = __wt_fs_rename(session, + trk->b + strlen("file:"), trk->a + strlen("file:"), + true)) != 0) __wt_err(session, ret, "metadata unroll rename %s to %s", trk->b, trk->a); - if (trk->a == NULL && (ret = - __wt_fs_remove(session, trk->b + strlen("file:"))) != 0) + if (trk->a == NULL && + (ret = __wt_fs_remove(session, + trk->b + strlen("file:"), false)) != 0) __wt_err(session, ret, "metadata unroll create %s", trk->b); diff --git a/src/third_party/wiredtiger/src/meta/meta_turtle.c b/src/third_party/wiredtiger/src/meta/meta_turtle.c index 4d2b359bbed..ace0fabab48 100644 --- a/src/third_party/wiredtiger/src/meta/meta_turtle.c +++ b/src/third_party/wiredtiger/src/meta/meta_turtle.c @@ -158,7 +158,7 @@ __wt_turtle_init(WT_SESSION_IMPL *session) * Discard any turtle setup file left-over from previous runs. This * doesn't matter for correctness, it's just cleaning up random files. */ - WT_RET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET)); + WT_RET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET, false)); /* * We could die after creating the turtle file and before creating the @@ -197,9 +197,10 @@ __wt_turtle_init(WT_SESSION_IMPL *session) "Both %s and %s exist; recreating metadata from " "backup", WT_METADATA_TURTLE, WT_METADATA_BACKUP)); - WT_RET(__wt_remove_if_exists(session, WT_METAFILE)); + WT_RET( + __wt_remove_if_exists(session, WT_METAFILE, false)); WT_RET(__wt_remove_if_exists( - session, WT_METADATA_TURTLE)); + session, WT_METADATA_TURTLE, false)); load = true; } } else @@ -305,7 +306,7 @@ __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) * every time. */ WT_RET(__wt_fopen(session, WT_METADATA_TURTLE_SET, - WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs)); + WT_FS_OPEN_CREATE | WT_FS_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs)); version = wiredtiger_version(&vmajor, &vminor, &vpatch); WT_ERR(__wt_fprintf(session, fs, @@ -320,7 +321,7 @@ __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) /* Close any file handle left open, remove any temporary file. */ err: WT_TRET(__wt_fclose(session, &fs)); - WT_TRET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET)); + WT_TRET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET, false)); return (ret); } diff --git a/src/third_party/wiredtiger/src/os_common/filename.c b/src/third_party/wiredtiger/src/os_common/filename.c index 5f174288350..8b6c1269829 100644 --- a/src/third_party/wiredtiger/src/os_common/filename.c +++ b/src/third_party/wiredtiger/src/os_common/filename.c @@ -56,54 +56,16 @@ __wt_nfilename( * Remove a file if it exists. */ int -__wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name) +__wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name, bool durable) { bool exist; WT_RET(__wt_fs_exist(session, name, &exist)); if (exist) - WT_RET(__wt_fs_remove(session, name)); + WT_RET(__wt_fs_remove(session, name, durable)); return (0); } -/* - * __wt_rename_and_sync_directory -- - * Rename a file and sync the enclosing directory. - */ -int -__wt_rename_and_sync_directory( - WT_SESSION_IMPL *session, const char *from, const char *to) -{ - const char *fp, *tp; - bool same_directory; - - /* Rename the source file to the target. */ - WT_RET(__wt_fs_rename(session, from, to)); - - /* - * Flush the backing directory to guarantee the rename. My reading of - * POSIX 1003.1 is there's no guarantee flushing only one of the from - * or to directories, or flushing a common parent, is sufficient, and - * even if POSIX were to make that guarantee, existing filesystems are - * known to not provide the guarantee or only provide the guarantee - * with specific mount options. Flush both of the from/to directories - * until it's a performance problem. - */ - WT_RET(__wt_fs_directory_sync(session, from)); - - /* - * In almost all cases, we're going to be renaming files in the same - * directory, we can at least fast-path that. - */ - fp = strrchr(from, '/'); - tp = strrchr(to, '/'); - same_directory = (fp == NULL && tp == NULL) || - (fp != NULL && tp != NULL && - fp - from == tp - to && memcmp(from, to, (size_t)(fp - from)) == 0); - - return (same_directory ? 0 : __wt_fs_directory_sync(session, to)); -} - /* * __wt_copy_and_sync -- * Copy a file safely; here to support the wt utility. @@ -134,13 +96,13 @@ __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to) WT_ERR(__wt_scr_alloc(session, 0, &tmp)); WT_ERR(__wt_buf_fmt(session, tmp, "%s.copy", to)); - WT_ERR(__wt_remove_if_exists(session, to)); - WT_ERR(__wt_remove_if_exists(session, tmp->data)); + WT_ERR(__wt_remove_if_exists(session, to, false)); + WT_ERR(__wt_remove_if_exists(session, tmp->data, false)); /* Open the from and temporary file handles. */ - WT_ERR(__wt_open(session, from, WT_OPEN_FILE_TYPE_REGULAR, 0, &ffh)); - WT_ERR(__wt_open(session, tmp->data, WT_OPEN_FILE_TYPE_REGULAR, - WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &tfh)); + WT_ERR(__wt_open(session, from, WT_FS_OPEN_FILE_TYPE_REGULAR, 0, &ffh)); + WT_ERR(__wt_open(session, tmp->data, WT_FS_OPEN_FILE_TYPE_REGULAR, + WT_FS_OPEN_CREATE | WT_FS_OPEN_EXCLUSIVE, &tfh)); /* * Allocate a copy buffer. Don't use a scratch buffer, this thing is @@ -162,7 +124,7 @@ __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to) WT_ERR(__wt_fsync(session, tfh, true)); WT_ERR(__wt_close(session, &tfh)); - ret = __wt_rename_and_sync_directory(session, tmp->data, to); + ret = __wt_fs_rename(session, tmp->data, to, true); err: WT_TRET(__wt_close(session, &ffh)); WT_TRET(__wt_close(session, &tfh)); diff --git a/src/third_party/wiredtiger/src/os_common/os_fhandle.c b/src/third_party/wiredtiger/src/os_common/os_fhandle.c index 81e4cc14ccb..184a9df0e72 100644 --- a/src/third_party/wiredtiger/src/os_common/os_fhandle.c +++ b/src/third_party/wiredtiger/src/os_common/os_fhandle.c @@ -150,19 +150,19 @@ __open_verbose( */ switch (file_type) { - case WT_OPEN_FILE_TYPE_CHECKPOINT: + case WT_FS_OPEN_FILE_TYPE_CHECKPOINT: file_type_tag = "checkpoint"; break; - case WT_OPEN_FILE_TYPE_DATA: + case WT_FS_OPEN_FILE_TYPE_DATA: file_type_tag = "data"; break; - case WT_OPEN_FILE_TYPE_DIRECTORY: + case WT_FS_OPEN_FILE_TYPE_DIRECTORY: file_type_tag = "directory"; break; - case WT_OPEN_FILE_TYPE_LOG: + case WT_FS_OPEN_FILE_TYPE_LOG: file_type_tag = "log"; break; - case WT_OPEN_FILE_TYPE_REGULAR: + case WT_FS_OPEN_FILE_TYPE_REGULAR: file_type_tag = "regular"; break; default: @@ -172,18 +172,18 @@ __open_verbose( WT_RET(__wt_scr_alloc(session, 0, &tmp)); sep = " ("; -#define WT_OPEN_VERBOSE_FLAG(f, name) \ +#define WT_FS_OPEN_VERBOSE_FLAG(f, name) \ if (LF_ISSET(f)) { \ WT_ERR(__wt_buf_catfmt( \ session, tmp, "%s%s", sep, name)); \ sep = ", "; \ } - WT_OPEN_VERBOSE_FLAG(WT_OPEN_CREATE, "create"); - WT_OPEN_VERBOSE_FLAG(WT_OPEN_DIRECTIO, "direct-IO"); - WT_OPEN_VERBOSE_FLAG(WT_OPEN_EXCLUSIVE, "exclusive"); - WT_OPEN_VERBOSE_FLAG(WT_OPEN_FIXED, "fixed"); - WT_OPEN_VERBOSE_FLAG(WT_OPEN_READONLY, "readonly"); + WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_CREATE, "create"); + WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_DIRECTIO, "direct-IO"); + WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_EXCLUSIVE, "exclusive"); + WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_FIXED, "fixed"); + WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_READONLY, "readonly"); if (tmp->size != 0) WT_ERR(__wt_buf_catfmt(session, tmp, ")")); @@ -209,7 +209,7 @@ err: __wt_scr_free(session, &tmp); */ int __wt_open(WT_SESSION_IMPL *session, - const char *name, WT_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp) + const char *name, WT_FS_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; @@ -247,12 +247,12 @@ __wt_open(WT_SESSION_IMPL *session, if (F_ISSET(conn, WT_CONN_READONLY)) { lock_file = strcmp(name, WT_SINGLETHREAD) == 0; if (!lock_file) - LF_SET(WT_OPEN_READONLY); - WT_ASSERT(session, lock_file || !LF_ISSET(WT_OPEN_CREATE)); + LF_SET(WT_FS_OPEN_READONLY); + WT_ASSERT(session, lock_file || !LF_ISSET(WT_FS_OPEN_CREATE)); } /* Create the path to the file. */ - if (!LF_ISSET(WT_OPEN_FIXED)) + if (!LF_ISSET(WT_FS_OPEN_FIXED)) WT_ERR(__wt_filename(session, name, &path)); /* Call the underlying open function. */ @@ -261,7 +261,7 @@ __wt_open(WT_SESSION_IMPL *session, open_called = true; WT_ERR(__fhandle_method_finalize( - session, fh->handle, LF_ISSET(WT_OPEN_READONLY))); + session, fh->handle, LF_ISSET(WT_FS_OPEN_READONLY))); /* * Repeat the check for a match: if there's no match, link our newly diff --git a/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c b/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c index 09c2e08db83..178adc1dac8 100644 --- a/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c +++ b/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c @@ -188,14 +188,16 @@ __im_fs_exist(WT_FILE_SYSTEM *file_system, * POSIX remove. */ static int -__im_fs_remove( - WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name) +__im_fs_remove(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, const char *name, uint32_t flags) { WT_DECL_RET; WT_FILE_HANDLE_INMEM *im_fh; WT_FILE_SYSTEM_INMEM *im_fs; WT_SESSION_IMPL *session; + WT_UNUSED(flags); + im_fs = (WT_FILE_SYSTEM_INMEM *)file_system; session = (WT_SESSION_IMPL *)wt_session; @@ -215,7 +217,7 @@ __im_fs_remove( */ static int __im_fs_rename(WT_FILE_SYSTEM *file_system, - WT_SESSION *wt_session, const char *from, const char *to) + WT_SESSION *wt_session, const char *from, const char *to, uint32_t flags) { WT_DECL_RET; WT_FILE_HANDLE_INMEM *im_fh; @@ -224,6 +226,8 @@ __im_fs_rename(WT_FILE_SYSTEM *file_system, uint64_t bucket; char *copy; + WT_UNUSED(flags); + im_fs = (WT_FILE_SYSTEM_INMEM *)file_system; session = (WT_SESSION_IMPL *)wt_session; @@ -463,7 +467,7 @@ err: __wt_spin_unlock(session, &im_fs->lock); */ static int __im_file_open(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, - const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags, + const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, WT_FILE_HANDLE **file_handlep) { WT_DECL_RET; diff --git a/src/third_party/wiredtiger/src/os_common/os_fstream.c b/src/third_party/wiredtiger/src/os_common/os_fstream.c index 0b199529e19..5a368ea75e6 100644 --- a/src/third_party/wiredtiger/src/os_common/os_fstream.c +++ b/src/third_party/wiredtiger/src/os_common/os_fstream.c @@ -187,7 +187,7 @@ __wt_fopen(WT_SESSION_IMPL *session, fstr = NULL; WT_RET(__wt_open( - session, name, WT_OPEN_FILE_TYPE_REGULAR, open_flags, &fh)); + session, name, WT_FS_OPEN_FILE_TYPE_REGULAR, open_flags, &fh)); WT_ERR(__wt_calloc_one(session, &fstr)); fstr->fh = fh; diff --git a/src/third_party/wiredtiger/src/os_posix/os_fs.c b/src/third_party/wiredtiger/src/os_posix/os_fs.c index 86fa2e8f117..11f38ec063b 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_fs.c +++ b/src/third_party/wiredtiger/src/os_posix/os_fs.c @@ -30,7 +30,7 @@ /* * __posix_sync -- - * Underlying support function to flush a file handle. + * Underlying support function to flush a file descriptor. */ static int __posix_sync( @@ -77,33 +77,42 @@ __posix_sync( #ifdef __linux__ /* * __posix_directory_sync -- - * Flush a directory to ensure file creation is durable. + * Flush a directory to ensure file creation, remove or rename is durable. */ static int -__posix_directory_sync( - WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *path) +__posix_directory_sync(WT_SESSION_IMPL *session, const char *path) { + WT_DECL_ITEM(tmp); WT_DECL_RET; - WT_SESSION_IMPL *session; int fd, tret; + char *dir; - WT_UNUSED(file_system); + WT_RET(__wt_scr_alloc(session, 0, &tmp)); + WT_ERR(__wt_buf_setstr(session, tmp, path)); - session = (WT_SESSION_IMPL *)wt_session; + /* + * This layer should never see a path that doesn't include a trailing + * path separator, this code asserts that fact. + */ + dir = tmp->mem; + strrchr(dir, '/')[1] = '\0'; + fd = -1; /* -Wconditional-uninitialized */ WT_SYSCALL_RETRY(( - (fd = open(path, O_RDONLY, 0444)) == -1 ? -1 : 0), ret); + (fd = open(dir, O_RDONLY, 0444)) == -1 ? -1 : 0), ret); if (ret != 0) - WT_RET_MSG(session, ret, "%s: directory-sync: open", path); + WT_ERR_MSG(session, ret, "%s: directory-sync: open", dir); - ret = __posix_sync(session, fd, path, "directory-sync"); + ret = __posix_sync(session, fd, dir, "directory-sync"); WT_SYSCALL(close(fd), tret); if (tret != 0) { - __wt_err(session, tret, "%s: directory-sync: close", path); + __wt_err(session, tret, "%s: directory-sync: close", dir); if (ret == 0) ret = tret; } + +err: __wt_scr_free(session, &tmp); return (ret); } #endif @@ -141,8 +150,8 @@ __posix_fs_exist(WT_FILE_SYSTEM *file_system, * Remove a file. */ static int -__posix_fs_remove( - WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name) +__posix_fs_remove(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, const char *name, uint32_t flags) { WT_DECL_RET; WT_SESSION_IMPL *session; @@ -159,9 +168,17 @@ __posix_fs_remove( * using unlink may be marginally safer. */ WT_SYSCALL(unlink(name), ret); - if (ret == 0) + if (ret != 0) + WT_RET_MSG(session, ret, "%s: file-remove: unlink", name); + + if (!LF_ISSET(WT_FS_DURABLE)) return (0); - WT_RET_MSG(session, ret, "%s: file-remove: unlink", name); + +#ifdef __linux__ + /* Flush the backing directory to guarantee the remove. */ + WT_RET (__posix_directory_sync(session, name)); +#endif + return (0); } /* @@ -170,7 +187,7 @@ __posix_fs_remove( */ static int __posix_fs_rename(WT_FILE_SYSTEM *file_system, - WT_SESSION *wt_session, const char *from, const char *to) + WT_SESSION *wt_session, const char *from, const char *to, uint32_t flags) { WT_DECL_RET; WT_SESSION_IMPL *session; @@ -187,9 +204,43 @@ __posix_fs_rename(WT_FILE_SYSTEM *file_system, * return (if errno is 0), but we've done the best we can. */ WT_SYSCALL(rename(from, to) != 0 ? -1 : 0, ret); - if (ret == 0) + if (ret != 0) + WT_RET_MSG( + session, ret, "%s to %s: file-rename: rename", from, to); + + if (!LF_ISSET(WT_FS_DURABLE)) return (0); - WT_RET_MSG(session, ret, "%s to %s: file-rename: rename", from, to); +#ifdef __linux__ + /* + * Flush the backing directory to guarantee the rename. My reading of + * POSIX 1003.1 is there's no guarantee flushing only one of the from + * or to directories, or flushing a common parent, is sufficient, and + * even if POSIX were to make that guarantee, existing filesystems are + * known to not provide the guarantee or only provide the guarantee + * with specific mount options. Flush both of the from/to directories + * until it's a performance problem. + */ + WT_RET(__posix_directory_sync(session, from)); + + /* + * In almost all cases, we're going to be renaming files in the same + * directory, we can at least fast-path that. + */ + { + bool same_directory; + const char *fp, *tp; + + fp = strrchr(from, '/'); + tp = strrchr(to, '/'); + same_directory = (fp == NULL && tp == NULL) || + (fp != NULL && tp != NULL && + fp - from == tp - to && memcmp(from, to, (size_t)(fp - from)) == 0); + + if (!same_directory) + WT_RET(__posix_directory_sync(session, to)); + } +#endif + return (0); } /* @@ -513,7 +564,7 @@ __posix_open_file_cloexec(WT_SESSION_IMPL *session, int fd, const char *name) */ static int __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, - const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags, + const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, WT_FILE_HANDLE **file_handlep) { WT_CONNECTION_IMPL *conn; @@ -536,7 +587,7 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, /* Set up error handling. */ pfh->fd = -1; - if (file_type == WT_OPEN_FILE_TYPE_DIRECTORY) { + if (file_type == WT_FS_OPEN_FILE_TYPE_DIRECTORY) { f = O_RDONLY; #ifdef O_CLOEXEC /* @@ -554,10 +605,10 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, goto directory_open; } - f = LF_ISSET(WT_OPEN_READONLY) ? O_RDONLY : O_RDWR; - if (LF_ISSET(WT_OPEN_CREATE)) { + f = LF_ISSET(WT_FS_OPEN_READONLY) ? O_RDONLY : O_RDWR; + if (LF_ISSET(WT_FS_OPEN_CREATE)) { f |= O_CREAT; - if (LF_ISSET(WT_OPEN_EXCLUSIVE)) + if (LF_ISSET(WT_FS_OPEN_EXCLUSIVE)) f |= O_EXCL; mode = 0666; } else @@ -577,7 +628,7 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, #endif #ifdef O_DIRECT /* Direct I/O. */ - if (LF_ISSET(WT_OPEN_DIRECTIO)) { + if (LF_ISSET(WT_FS_OPEN_DIRECTIO)) { f |= O_DIRECT; pfh->direct_io = true; } else @@ -585,11 +636,11 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, #endif #ifdef O_NOATIME /* Avoid updating metadata for read-only workloads. */ - if (file_type == WT_OPEN_FILE_TYPE_DATA) + if (file_type == WT_FS_OPEN_FILE_TYPE_DATA) f |= O_NOATIME; #endif - if (file_type == WT_OPEN_FILE_TYPE_LOG && + if (file_type == WT_FS_OPEN_FILE_TYPE_LOG && FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) { #ifdef O_DSYNC f |= O_DSYNC; @@ -601,6 +652,7 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, #endif } + /* Create/Open the file. */ WT_SYSCALL_RETRY(((pfh->fd = open(name, f, mode)) == -1 ? -1 : 0), ret); if (ret != 0) WT_ERR_MSG(session, ret, @@ -608,6 +660,16 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, "%s: handle-open: open: failed with direct I/O configured, " "some filesystem types do not support direct I/O" : "%s: handle-open: open", name); + +#ifdef __linux__ + /* + * Durability: some filesystems require a directory sync to be confident + * the file will appear. + */ + if (LF_ISSET(WT_FS_OPEN_DURABLE)) + WT_ERR(__posix_directory_sync(session, name)); +#endif + WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name)); #if defined(HAVE_POSIX_FADVISE) @@ -616,7 +678,7 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, * Ignore fadvise when doing direct I/O, the kernel cache isn't * interesting. */ - if (!pfh->direct_io && file_type == WT_OPEN_FILE_TYPE_DATA) { + if (!pfh->direct_io && file_type == WT_FS_OPEN_FILE_TYPE_DATA) { WT_SYSCALL( posix_fadvise(pfh->fd, 0, 0, POSIX_FADV_RANDOM), ret); if (ret != 0) @@ -705,9 +767,6 @@ __wt_os_posix(WT_SESSION_IMPL *session) /* Initialize the POSIX jump table. */ file_system->fs_directory_list = __wt_posix_directory_list; file_system->fs_directory_list_free = __wt_posix_directory_list_free; -#ifdef __linux__ - file_system->fs_directory_sync = __posix_directory_sync; -#endif file_system->fs_exist = __posix_fs_exist; file_system->fs_open_file = __posix_open_file; file_system->fs_remove = __posix_fs_remove; diff --git a/src/third_party/wiredtiger/src/os_win/os_fs.c b/src/third_party/wiredtiger/src/os_win/os_fs.c index 5daba124e90..fc03e0a2595 100644 --- a/src/third_party/wiredtiger/src/os_win/os_fs.c +++ b/src/third_party/wiredtiger/src/os_win/os_fs.c @@ -36,13 +36,14 @@ __win_fs_exist(WT_FILE_SYSTEM *file_system, * Remove a file. */ static int -__win_fs_remove( - WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name) +__win_fs_remove(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, const char *name, uint32_t flags) { DWORD windows_error; WT_SESSION_IMPL *session; WT_UNUSED(file_system); + WT_UNUSED(flags); session = (WT_SESSION_IMPL *)wt_session; @@ -62,12 +63,13 @@ __win_fs_remove( */ static int __win_fs_rename(WT_FILE_SYSTEM *file_system, - WT_SESSION *wt_session, const char *from, const char *to) + WT_SESSION *wt_session, const char *from, const char *to, uint32_t flags) { DWORD windows_error; WT_SESSION_IMPL *session; WT_UNUSED(file_system); + WT_UNUSED(flags); session = (WT_SESSION_IMPL *)wt_session; @@ -426,7 +428,7 @@ __win_file_write(WT_FILE_HANDLE *file_handle, */ static int __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, - const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags, + const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, WT_FILE_HANDLE **file_handlep) { DWORD dwCreationDisposition, windows_error; @@ -458,11 +460,11 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, * require that functionality: create an empty WT_FH structure with * invalid handles. */ - if (file_type == WT_OPEN_FILE_TYPE_DIRECTORY) + if (file_type == WT_FS_OPEN_FILE_TYPE_DIRECTORY) goto directory_open; desired_access = GENERIC_READ; - if (!LF_ISSET(WT_OPEN_READONLY)) + if (!LF_ISSET(WT_FS_OPEN_READONLY)) desired_access |= GENERIC_WRITE; /* @@ -476,15 +478,15 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, f = FILE_ATTRIBUTE_NORMAL; dwCreationDisposition = 0; - if (LF_ISSET(WT_OPEN_CREATE)) { + if (LF_ISSET(WT_FS_OPEN_CREATE)) { dwCreationDisposition = CREATE_NEW; - if (LF_ISSET(WT_OPEN_EXCLUSIVE)) + if (LF_ISSET(WT_FS_OPEN_EXCLUSIVE)) dwCreationDisposition = CREATE_ALWAYS; } else dwCreationDisposition = OPEN_EXISTING; /* Direct I/O. */ - if (LF_ISSET(WT_OPEN_DIRECTIO)) { + if (LF_ISSET(WT_FS_OPEN_DIRECTIO)) { f |= FILE_FLAG_NO_BUFFERING; win_fh->direct_io = true; } @@ -493,19 +495,19 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, if (FLD_ISSET(conn->write_through, file_type)) f |= FILE_FLAG_WRITE_THROUGH; - if (file_type == WT_OPEN_FILE_TYPE_LOG && + if (file_type == WT_FS_OPEN_FILE_TYPE_LOG && FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) f |= FILE_FLAG_WRITE_THROUGH; /* Disable read-ahead on trees: it slows down random read workloads. */ - if (file_type == WT_OPEN_FILE_TYPE_DATA) + if (file_type == WT_FS_OPEN_FILE_TYPE_DATA) f |= FILE_FLAG_RANDOM_ACCESS; win_fh->filehandle = CreateFileA(name, desired_access, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, dwCreationDisposition, f, NULL); if (win_fh->filehandle == INVALID_HANDLE_VALUE) { - if (LF_ISSET(WT_OPEN_CREATE) && + if (LF_ISSET(WT_FS_OPEN_CREATE) && GetLastError() == ERROR_FILE_EXISTS) win_fh->filehandle = CreateFileA(name, desired_access, FILE_SHARE_READ | FILE_SHARE_WRITE, @@ -528,7 +530,7 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, * concurrently with reads on the file. Writes would also move the file * pointer. */ - if (!LF_ISSET(WT_OPEN_READONLY)) { + if (!LF_ISSET(WT_FS_OPEN_READONLY)) { win_fh->filehandle_secondary = CreateFileA(name, desired_access, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, f, NULL); diff --git a/src/third_party/wiredtiger/src/os_win/os_path.c b/src/third_party/wiredtiger/src/os_win/os_path.c index 220752ce7a1..74050600417 100644 --- a/src/third_party/wiredtiger/src/os_win/os_path.c +++ b/src/third_party/wiredtiger/src/os_win/os_path.c @@ -16,8 +16,30 @@ bool __wt_absolute_path(const char *path) { /* - * Check for a drive name (for example, "D:"), allow both forward and - * backward slashes. + * https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247 + * + * For Windows API functions that manipulate files, file names can often + * be relative to the current directory, while some APIs require a fully + * qualified path. A file name is relative to the current directory if + * it does not begin with one of the following: + * + * -- A UNC name of any format, which always start with two backslash + * characters ("\\"). + * -- A disk designator with a backslash, for example "C:\" or "d:\". + * -- A single backslash, for example, "\directory" or "\file.txt". This + * is also referred to as an absolute path. + * + * If a file name begins with only a disk designator but not the + * backslash after the colon, it is interpreted as a relative path to + * the current directory on the drive with the specified letter. Note + * that the current directory may or may not be the root directory + * depending on what it was set to during the most recent "change + * directory" operation on that disk. + * + * -- "C:tmp.txt" refers to a file named "tmp.txt" in the current + * directory on drive C. + * -- "C:tempdir\tmp.txt" refers to a file in a subdirectory to the + * current directory on drive C. */ if (strlen(path) >= 3 && __wt_isalpha(path[0]) && path[1] == ':') path += 2; diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index b49946bb10e..b96b34594b0 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -115,6 +115,7 @@ typedef struct { */ uint32_t page_size; /* Set page size */ uint32_t page_size_orig; /* Saved set page size */ + uint32_t max_raw_page_size; /* Max page size with raw compression */ /* * Second, the split size: if we're doing the page layout, split to a @@ -159,8 +160,15 @@ typedef struct { WT_ADDR addr; /* Split's written location */ uint32_t size; /* Split's size */ uint32_t cksum; /* Split's checksum */ + void *disk_image; /* Split's disk image */ + /* + * Raw compression, the disk image being written is already + * compressed. + */ + bool already_compressed; + /* * Saved update list, supporting the WT_EVICT_UPDATE_RESTORE and * WT_EVICT_LOOKASIDE configurations. @@ -175,13 +183,6 @@ typedef struct { * column-store key. */ WT_ITEM key; /* Promoted row-store key */ - - /* - * During wrapup, after reconciling the root page, we write a - * final block as part of a checkpoint. If raw compression - * was configured, that block may have already been compressed. - */ - bool already_compressed; } *bnd; /* Saved boundaries */ uint32_t bnd_next; /* Next boundary slot */ uint32_t bnd_next_max; /* Maximum boundary slots used */ @@ -445,17 +446,32 @@ __wt_reconcile(WT_SESSION_IMPL *session, } /* - * Clean up reconciliation resources: some workloads have millions of - * boundary structures, and if associated with an application session - * pulled into doing forced eviction, they won't be discarded for the - * life of the session (or until session.reset is called). Discard all - * of the reconciliation resources if an application thread, not doing - * a checkpoint. - */ - __rec_bnd_cleanup(session, r, - F_ISSET(session, WT_SESSION_INTERNAL) || - WT_SESSION_IS_CHECKPOINT(session) ? false : true); + * When application threads perform eviction, don't cache block manager + * or reconciliation structures (even across calls), we can have a + * significant number of application threads doing eviction at the same + * time with large items. We ignore checkpoints, once the checkpoint + * completes, all unnecessary session resources will be discarded. + * + * Even in application threads doing checkpoints or in internal threads + * doing any reconciliation, clean up reconciliation resources. Some + * workloads have millions of boundary structures in a reconciliation + * and we don't want to tie that memory down, even across calls. + */ + if (WT_SESSION_IS_CHECKPOINT(session) || + F_ISSET(session, WT_SESSION_INTERNAL)) + __rec_bnd_cleanup(session, r, false); + else { + /* + * Clean up the underlying block manager memory too: it's not + * reconciliation, but threads discarding reconciliation + * structures want to clean up the block manager's structures + * as well, and there's no obvious place to do that. + */ + if (session->block_manager_cleanup != NULL) + WT_TRET(session->block_manager_cleanup(session)); + WT_TRET(__rec_destroy_session(session)); + } WT_RET(ret); /* @@ -652,7 +668,7 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags) WT_ASSERT(session, mod->mod_multi[i].supd == NULL); WT_ERR(__wt_multi_to_ref(session, - next, &mod->mod_multi[i], &pindex->index[i], NULL)); + next, &mod->mod_multi[i], &pindex->index[i], NULL, false)); pindex->index[i]->home = next; } @@ -1135,8 +1151,20 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, */ if (!skipped && (F_ISSET(btree, WT_BTREE_LOOKASIDE) || - __wt_txn_visible_all(session, max_txn))) + __wt_txn_visible_all(session, max_txn))) { +#ifdef HAVE_DIAGNOSTIC + /* + * The checkpoint transaction is special. Make sure we never + * write (metadata) updates from a checkpoint in a concurrent + * session. + */ + txnid = *updp == NULL ? WT_TXN_NONE : (*updp)->txnid; + WT_ASSERT(session, txnid == WT_TXN_NONE || + txnid != S2C(session)->txn_global.checkpoint_txnid || + WT_SESSION_IS_CHECKPOINT(session)); +#endif return (0); + } /* * In some cases, there had better not be skipped updates or updates not @@ -1845,18 +1873,19 @@ __rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd) WT_CLEAR(bnd->addr); bnd->size = 0; bnd->cksum = 0; + __wt_free(session, bnd->disk_image); __wt_free(session, bnd->supd); bnd->supd_next = 0; bnd->supd_allocated = 0; + bnd->already_compressed = false; + /* * Don't touch the key, we re-use that memory in each new * reconciliation. */ - - bnd->already_compressed = false; } /* @@ -1950,10 +1979,19 @@ __rec_split_init(WT_SESSION_IMPL *session, * additional data because we don't know how well it will compress, and * we don't want to increment our way up to the amount of data needed by * the application to successfully compress to the target page size. + * Ideally accumulate data several times the page size without + * approaching the memory page maximum, but at least have data worth + * one page. + * + * There are cases when we grow the page size to accommodate large + * records, in those cases we split the pages once they have crossed + * the maximum size for a page with raw compression. */ r->page_size = r->page_size_orig = max; if (r->raw_compression) - r->page_size *= 10; + r->max_raw_page_size = r->page_size = + (uint32_t)WT_MIN(r->page_size * 10, + WT_MAX(r->page_size, btree->maxmempage / 2)); /* * Ensure the disk image buffer is large enough for the max object, as @@ -2295,7 +2333,7 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) /* Hitting a page boundary resets the dictionary, in all cases. */ __rec_dictionary_reset(r); - inuse = WT_PTRDIFF32(r->first_free, dsk); + inuse = WT_PTRDIFF(r->first_free, dsk); switch (r->bnd_state) { case SPLIT_BOUNDARY: /* @@ -2465,7 +2503,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session, WT_COMPRESSOR *compressor; WT_DECL_RET; WT_ITEM *dst, *write_ref; - WT_PAGE_HEADER *dsk, *dsk_dst; + WT_PAGE_HEADER *dsk, *dsk_dst, *disk_image; WT_SESSION *wt_session; size_t corrected_page_size, extra_skip, len, result_len; uint64_t recno; @@ -2582,11 +2620,9 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session, /* * Don't create an image so large that any future update will - * cause a split in memory. Use half of the maximum size so - * we split very compressible pages that have reached the - * maximum size in memory into two equal blocks. + * cause a split in memory. */ - if (len > (size_t)btree->maxmempage / 2) + if (max_image_slot == 0 && len > (size_t)r->max_raw_page_size) max_image_slot = slots; } @@ -2648,7 +2684,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session, r->page_size_orig, btree->split_pct, WT_BLOCK_COMPRESS_SKIP + extra_skip, (uint8_t *)dsk + WT_BLOCK_COMPRESS_SKIP, r->raw_offsets, - no_more_rows || max_image_slot == 0 ? slots : max_image_slot, + max_image_slot == 0 ? slots : max_image_slot, (uint8_t *)dst->mem + WT_BLOCK_COMPRESS_SKIP, result_len, no_more_rows || max_image_slot != 0, @@ -2751,7 +2787,8 @@ no_slots: if (result_slots != 0) { /* - * We have a block, finalize the header information. + * We have a block, finalize the compressed disk image's header + * information. */ dst->size = result_len + WT_BLOCK_COMPRESS_SKIP; dsk_dst = dst->mem; @@ -2760,6 +2797,26 @@ no_slots: r->raw_offsets[result_slots] + WT_BLOCK_COMPRESS_SKIP; dsk_dst->u.entries = r->raw_entries[result_slots - 1]; + /* + * Optionally keep the disk image in cache. Update the initial + * page-header fields to reflect the actual data being written. + * + * If updates are saved and need to be restored, we have to keep + * a copy of the disk image. Unfortunately, we don't yet know if + * there are updates to restore for the key range covered by the + * disk image just created. If there are any saved updates, take + * a copy of the disk image, it's freed later if not needed. + */ + if (F_ISSET(r, WT_EVICT_SCRUB) || + (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && r->supd_next > 0)) { + WT_RET(__wt_strndup(session, dsk, + dsk_dst->mem_size, &last->disk_image)); + disk_image = last->disk_image; + disk_image->recno = last->recno; + disk_image->mem_size = dsk_dst->mem_size; + disk_image->u.entries = dsk_dst->u.entries; + } + /* * There is likely a remnant in the working buffer that didn't * get compressed; copy it down to the start of the buffer and @@ -2873,48 +2930,6 @@ split_grow: /* return (0); } -/* - * __rec_raw_decompress -- - * Decompress a raw-compressed image. - */ -static int -__rec_raw_decompress( - WT_SESSION_IMPL *session, const void *image, size_t size, void *retp) -{ - WT_BTREE *btree; - WT_DECL_ITEM(tmp); - WT_DECL_RET; - WT_PAGE_HEADER const *dsk; - size_t result_len; - - btree = S2BT(session); - dsk = image; - - /* - * We skipped an update and we can't write a block, but unfortunately, - * the block has already been compressed. Decompress the block so we - * can subsequently re-instantiate it in memory. - */ - WT_RET(__wt_scr_alloc(session, dsk->mem_size, &tmp)); - memcpy(tmp->mem, image, WT_BLOCK_COMPRESS_SKIP); - WT_ERR(btree->compressor->decompress(btree->compressor, - &session->iface, - (uint8_t *)image + WT_BLOCK_COMPRESS_SKIP, - size - WT_BLOCK_COMPRESS_SKIP, - (uint8_t *)tmp->mem + WT_BLOCK_COMPRESS_SKIP, - dsk->mem_size - WT_BLOCK_COMPRESS_SKIP, - &result_len)); - if (result_len != dsk->mem_size - WT_BLOCK_COMPRESS_SKIP) - WT_ERR(__wt_illegal_value(session, btree->dhandle->name)); - - WT_ERR(__wt_strndup(session, tmp->data, dsk->mem_size, retp)); - WT_ASSERT(session, __wt_verify_dsk_image(session, - "[raw evict split]", tmp->data, dsk->mem_size, false) == 0); - -err: __wt_scr_free(session, &tmp); - return (ret); -} - /* * __rec_split_raw -- * Raw compression split routine. @@ -3022,7 +3037,7 @@ __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r) if (r->raw_compression && r->entries != 0) { while (r->entries != 0) { data_size = - WT_PTRDIFF32(r->first_free, r->disk_image.mem); + WT_PTRDIFF(r->first_free, r->disk_image.mem); if (data_size <= btree->allocsize) break; WT_RET(__rec_split_raw_worker(session, r, 0, true)); @@ -3145,14 +3160,13 @@ __rec_split_write(WT_SESSION_IMPL *session, uint32_t bnd_slot, i, j; int cmp; uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE]; + bool need_image; btree = S2BT(session); dsk = buf->mem; page = r->page; mod = page->modify; - WT_RET(__wt_scr_alloc(session, 0, &key)); - /* Set the zero-length value flag in the page header. */ if (dsk->type == WT_PAGE_ROW_LEAF) { F_CLR(dsk, WT_PAGE_EMPTY_V_ALL | WT_PAGE_EMPTY_V_NONE); @@ -3163,6 +3177,8 @@ __rec_split_write(WT_SESSION_IMPL *session, F_SET(dsk, WT_PAGE_EMPTY_V_NONE); } + bnd->entries = r->entries; + /* Initialize the address (set the page type for the parent). */ switch (dsk->type) { case WT_PAGE_COL_FIX: @@ -3176,9 +3192,8 @@ __rec_split_write(WT_SESSION_IMPL *session, case WT_PAGE_ROW_INT: bnd->addr.type = WT_ADDR_INT; break; - WT_ILLEGAL_VALUE_ERR(session); + WT_ILLEGAL_VALUE(session); } - bnd->size = (uint32_t)buf->size; bnd->cksum = 0; @@ -3190,6 +3205,8 @@ __rec_split_write(WT_SESSION_IMPL *session, * This code requires a key be filled in for the next block (or the * last block flag be set, if there's no next block). */ + if (page->type == WT_PAGE_ROW_LEAF) + WT_RET(__wt_scr_alloc(session, 0, &key)); for (i = 0, supd = r->supd; i < r->supd_next; ++i, ++supd) { /* The last block gets all remaining saved updates. */ if (last_block) { @@ -3254,33 +3271,11 @@ supd_check_complete: * image, we can't actually write it. Instead, we will re-instantiate * the page using the disk image and any list of updates we skipped. */ - if (F_ISSET(r, WT_EVICT_IN_MEMORY) || - (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL)) { - - /* Statistics tracking that we used update/restore. */ - if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL) - r->cache_write_restore = true; - - /* - * If the buffer is compressed (raw compression was configured), - * we have to decompress it so we can instantiate it later. It's - * a slow and convoluted path, but it's also a rare one and it's - * not worth making it faster. Else, the disk image is ready, - * copy it into place for later. It's possible the disk image - * has no items; we have to flag that for verification, it's a - * special case since read/writing empty pages isn't generally - * allowed. - */ - if (bnd->already_compressed) - WT_ERR(__rec_raw_decompress( - session, buf->data, buf->size, &bnd->disk_image)); - else { - WT_ERR(__wt_strndup( - session, buf->data, buf->size, &bnd->disk_image)); - WT_ASSERT(session, __wt_verify_dsk_image(session, - "[evict split]", buf->data, buf->size, true) == 0); - } - goto done; + if (F_ISSET(r, WT_EVICT_IN_MEMORY)) + goto copy_image; + if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL) { + r->cache_write_restore = true; + goto copy_image; } /* @@ -3324,13 +3319,11 @@ supd_check_complete: bnd->addr = multi->addr; WT_STAT_FAST_DATA_INCR(session, rec_page_match); - goto done; + goto copy_image; } } } - bnd->entries = r->entries; - #ifdef HAVE_VERBOSE /* Output a verbose message if we create a page without many entries */ if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT) && r->entries < 6) @@ -3343,8 +3336,8 @@ supd_check_complete: r->bnd_state)); #endif - WT_ERR(__wt_bt_write(session, - buf, addr, &addr_size, false, bnd->already_compressed)); + WT_ERR(__wt_bt_write(session, buf, addr, &addr_size, + false, F_ISSET(r, WT_CHECKPOINTING), bnd->already_compressed)); WT_ERR(__wt_strndup(session, addr, addr_size, &bnd->addr.addr)); bnd->addr.size = (uint8_t)addr_size; @@ -3354,9 +3347,29 @@ supd_check_complete: * the database's lookaside store. */ if (F_ISSET(r, WT_EVICT_LOOKASIDE) && bnd->supd != NULL) - ret = __rec_update_las(session, r, btree->id, bnd); + WT_ERR(__rec_update_las(session, r, btree->id, bnd)); + +copy_image: + /* + * If re-instantiating this page in memory (either because eviction + * wants to, or because we skipped updates to build the disk image), + * save a copy of the disk image. + * + * Raw compression might have already saved a copy of the disk image + * before we could know if we skipped updates to create it, and now + * we know if we're going to need it. + * + * Copy the disk image if we need a copy and don't already have one, + * discard any already saved copy we don't need. + */ + need_image = F_ISSET(r, WT_EVICT_SCRUB) || + (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL); + if (need_image && bnd->disk_image == NULL) + WT_ERR(__wt_strndup( + session, buf->data, buf->size, &bnd->disk_image)); + if (!need_image) + __wt_free(session, bnd->disk_image); -done: err: __wt_scr_free(session, &key); return (ret); } @@ -3556,8 +3569,9 @@ __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_PAGE *parent; WT_RECONCILE *r; - r = cbulk->reconcile; btree = S2BT(session); + if ((r = cbulk->reconcile) == NULL) + return (0); switch (btree->type) { case BTREE_COL_FIX: @@ -5601,9 +5615,10 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_RET(__wt_btree_block_free(session, mod->mod_replace.addr, mod->mod_replace.size)); - /* Discard the replacement page's address. */ + /* Discard the replacement page's address and disk image. */ __wt_free(session, mod->mod_replace.addr); mod->mod_replace.size = 0; + __wt_free(session, mod->mod_disk_image); break; WT_ILLEGAL_VALUE(session); } @@ -5651,26 +5666,33 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) bnd = &r->bnd[0]; /* - * If saving/restoring changes for this page and there's only - * one block, there's nothing to write. This is an in-memory - * configuration or a special case of forced eviction: set up + * If in-memory, or saving/restoring changes for this page and + * there's only one block, there's nothing to write. Set up * a single block as if to split, then use that disk image to - * rewrite the page in memory. + * rewrite the page in memory. This is separate from simple + * replacements where eviction has decided to retain the page + * in memory because the latter can't handle update lists and + * splits can. */ - if (bnd->disk_image != NULL) + if (F_ISSET(r, WT_EVICT_IN_MEMORY) || + (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL)) goto split; /* - * If this is a root page, then we don't have an address and we - * have to create a sync point. The address was cleared when - * we were about to write the buffer so we know what to do here. + * A root page, we don't have an address and we have to create + * a sync point. The address was cleared when we were about to + * write the buffer so we know what to do here. */ if (bnd->addr.addr == NULL) WT_RET(__wt_bt_write(session, &r->disk_image, - NULL, NULL, true, bnd->already_compressed)); + NULL, NULL, true, F_ISSET(r, WT_CHECKPOINTING), + bnd->already_compressed)); else { mod->mod_replace = bnd->addr; bnd->addr.addr = NULL; + + mod->mod_disk_image = bnd->disk_image; + bnd->disk_image = NULL; } mod->rec_result = WT_PM_REC_REPLACE; @@ -5805,19 +5827,26 @@ __rec_split_row(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_RET(__wt_row_ikey_alloc(session, 0, bnd->key.data, bnd->key.size, &multi->key.ikey)); - if (bnd->disk_image == NULL) { - multi->addr = bnd->addr; - multi->addr.reuse = 0; - multi->size = bnd->size; - multi->cksum = bnd->cksum; - bnd->addr.addr = NULL; - } else { + /* + * Copy any disk image. Don't take saved updates without a + * disk image (which happens if they have been saved to the + * lookaside table): they should be discarded along with the + * original page. + */ + multi->disk_image = bnd->disk_image; + bnd->disk_image = NULL; + if (multi->disk_image != NULL) { multi->supd = bnd->supd; multi->supd_entries = bnd->supd_next; bnd->supd = NULL; - multi->disk_image = bnd->disk_image; - bnd->disk_image = NULL; } + + /* Copy any address. */ + multi->addr = bnd->addr; + multi->addr.reuse = 0; + multi->size = bnd->size; + multi->cksum = bnd->cksum; + bnd->addr.addr = NULL; } mod->mod_multi_entries = r->bnd_next; @@ -5845,19 +5874,26 @@ __rec_split_col(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) bnd = r->bnd, i = 0; i < r->bnd_next; ++multi, ++bnd, ++i) { multi->key.recno = bnd->recno; - if (bnd->disk_image == NULL) { - multi->addr = bnd->addr; - multi->addr.reuse = 0; - multi->size = bnd->size; - multi->cksum = bnd->cksum; - bnd->addr.addr = NULL; - } else { + /* + * Copy any disk image. Don't take saved updates without a + * disk image (which happens if they have been saved to the + * lookaside table): they should be discarded along with the + * original page. + */ + multi->disk_image = bnd->disk_image; + bnd->disk_image = NULL; + if (multi->disk_image != NULL) { multi->supd = bnd->supd; multi->supd_entries = bnd->supd_next; bnd->supd = NULL; - multi->disk_image = bnd->disk_image; - bnd->disk_image = NULL; } + + /* Copy any address. */ + multi->addr = bnd->addr; + multi->addr.reuse = 0; + multi->size = bnd->size; + multi->cksum = bnd->cksum; + bnd->addr.addr = NULL; } mod->mod_multi_entries = r->bnd_next; @@ -6133,7 +6169,8 @@ __rec_cell_build_ovfl(WT_SESSION_IMPL *session, /* Write the buffer. */ addr = buf; - WT_ERR(__wt_bt_write(session, tmp, addr, &size, false, false)); + WT_ERR(__wt_bt_write(session, tmp, + addr, &size, false, F_ISSET(r, WT_CHECKPOINTING), false)); /* * Track the overflow record (unless it's a bulk load, which diff --git a/src/third_party/wiredtiger/src/schema/schema_rename.c b/src/third_party/wiredtiger/src/schema/schema_rename.c index 8f4d374fd22..bc92c882117 100644 --- a/src/third_party/wiredtiger/src/schema/schema_rename.c +++ b/src/third_party/wiredtiger/src/schema/schema_rename.c @@ -64,7 +64,7 @@ __rename_file( WT_ERR(__wt_metadata_insert(session, newuri, oldvalue)); /* Rename the underlying file. */ - WT_ERR(__wt_fs_rename(session, filename, newfile)); + WT_ERR(__wt_fs_rename(session, filename, newfile, false)); if (WT_META_TRACKING(session)) WT_ERR(__wt_meta_track_fileop(session, uri, newuri)); diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c index 77d1dc74c84..0072d7e1445 100644 --- a/src/third_party/wiredtiger/src/session/session_api.c +++ b/src/third_party/wiredtiger/src/session/session_api.c @@ -17,12 +17,8 @@ static int __session_rollback_transaction(WT_SESSION *, const char *); * Unsupported session method. */ int -__wt_session_notsup(WT_SESSION *wt_session) +__wt_session_notsup(WT_SESSION_IMPL *session) { - WT_SESSION_IMPL *session; - - session = (WT_SESSION_IMPL *)wt_session; - WT_RET_MSG(session, ENOTSUP, "Unsupported session method"); } @@ -66,6 +62,17 @@ __wt_session_copy_values(WT_SESSION_IMPL *session) TAILQ_FOREACH(cursor, &session->cursors, q) if (F_ISSET(cursor, WT_CURSTD_VALUE_INT)) { +#ifdef HAVE_DIAGNOSTIC + /* + * We have to do this with a transaction ID pinned + * unless the cursor is reading from a checkpoint. + */ + WT_TXN_STATE *txn_state = WT_SESSION_TXN_STATE(session); + WT_ASSERT(session, txn_state->snap_min != WT_TXN_NONE || + (WT_PREFIX_MATCH(cursor->uri, "file:") && + F_ISSET((WT_CURSOR_BTREE *)cursor, WT_CBT_NO_TXN))); +#endif + F_CLR(cursor, WT_CURSTD_VALUE_INT); WT_RET(__wt_buf_set(session, &cursor->value, cursor->value.data, cursor->value.size)); @@ -509,7 +516,11 @@ __session_create(WT_SESSION *wt_session, const char *uri, const char *config) ret = __wt_session_create(session, uri, config); -err: API_END_RET_NOTFOUND_MAP(session, ret); +err: if (ret != 0) + WT_STAT_FAST_CONN_INCR(session, session_table_create_fail); + else + WT_STAT_FAST_CONN_INCR(session, session_table_create_success); + API_END_RET_NOTFOUND_MAP(session, ret); } /* @@ -520,10 +531,18 @@ static int __session_create_readonly( WT_SESSION *wt_session, const char *uri, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(uri); WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, create); + + WT_STAT_FAST_CONN_INCR(session, session_table_create_fail); + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* @@ -570,9 +589,16 @@ err: API_END_RET(session, ret); static int __session_log_flush_readonly(WT_SESSION *wt_session, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, log_flush); + + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* @@ -605,9 +631,16 @@ static int __session_log_printf_readonly(WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_ATTRIBUTE((format (printf, 2, 3))) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(fmt); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, log_printf); + + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* @@ -630,7 +663,12 @@ __session_rebalance(WT_SESSION *wt_session, const char *uri, const char *config) ret = __wt_schema_worker(session, uri, __wt_bt_rebalance, NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_REBALANCE))); -err: API_END_RET_NOTFOUND_MAP(session, ret); +err: if (ret != 0) + WT_STAT_FAST_CONN_INCR(session, session_table_rebalance_fail); + else + WT_STAT_FAST_CONN_INCR(session, + session_table_rebalance_success); + API_END_RET_NOTFOUND_MAP(session, ret); } /* @@ -641,10 +679,18 @@ static int __session_rebalance_readonly( WT_SESSION *wt_session, const char *uri, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(uri); WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, rebalance); + + WT_STAT_FAST_CONN_INCR(session, session_table_rebalance_fail); + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* @@ -670,7 +716,11 @@ __session_rename(WT_SESSION *wt_session, WT_WITH_TABLE_LOCK(session, ret, ret = __wt_schema_rename(session, uri, newuri, cfg)))); -err: API_END_RET_NOTFOUND_MAP(session, ret); +err: if (ret != 0) + WT_STAT_FAST_CONN_INCR(session, session_table_rename_fail); + else + WT_STAT_FAST_CONN_INCR(session, session_table_rename_success); + API_END_RET_NOTFOUND_MAP(session, ret); } /* @@ -681,11 +731,19 @@ static int __session_rename_readonly(WT_SESSION *wt_session, const char *uri, const char *newuri, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(uri); WT_UNUSED(newuri); WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, rename); + + WT_STAT_FAST_CONN_INCR(session, session_table_rename_fail); + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* @@ -733,8 +791,8 @@ __wt_session_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) F_SET(session, WT_SESSION_LOCK_NO_WAIT); /* - * The checkpoint lock only is needed to avoid a spurious EBUSY error - * return. + * Take the checkpoint lock if there is a need to prevent the drop + * operation from failing with EBUSY due to an ongoing checkpoint. */ if (checkpoint_wait) WT_WITH_CHECKPOINT_LOCK(session, ret, @@ -770,7 +828,12 @@ __session_drop(WT_SESSION *wt_session, const char *uri, const char *config) ret = __wt_session_drop(session, uri, cfg); -err: /* Note: drop operations cannot be unrolled (yet?). */ +err: if (ret != 0) + WT_STAT_FAST_CONN_INCR(session, session_table_drop_fail); + else + WT_STAT_FAST_CONN_INCR(session, session_table_drop_success); + + /* Note: drop operations cannot be unrolled (yet?). */ API_END_RET_NOTFOUND_MAP(session, ret); } @@ -782,10 +845,18 @@ static int __session_drop_readonly( WT_SESSION *wt_session, const char *uri, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(uri); WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, drop); + + WT_STAT_FAST_CONN_INCR(session, session_table_drop_fail); + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* @@ -943,7 +1014,11 @@ __session_salvage(WT_SESSION *wt_session, const char *uri, const char *config) ret = __wt_schema_worker(session, uri, __wt_salvage, NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_SALVAGE))); -err: API_END_RET_NOTFOUND_MAP(session, ret); +err: if (ret != 0) + WT_STAT_FAST_CONN_INCR(session, session_table_salvage_fail); + else + WT_STAT_FAST_CONN_INCR(session, session_table_salvage_success); + API_END_RET_NOTFOUND_MAP(session, ret); } /* @@ -954,10 +1029,18 @@ static int __session_salvage_readonly( WT_SESSION *wt_session, const char *uri, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(uri); WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, salvage); + + WT_STAT_FAST_CONN_INCR(session, session_table_salvage_fail); + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* @@ -1135,6 +1218,10 @@ __session_truncate(WT_SESSION *wt_session, err: TXN_API_END_RETRY(session, ret, 0); + if (ret != 0) + WT_STAT_FAST_CONN_INCR(session, session_table_truncate_fail); + else + WT_STAT_FAST_CONN_INCR(session, session_table_truncate_success); /* * Only map WT_NOTFOUND to ENOENT if a URI was specified. */ @@ -1149,12 +1236,20 @@ static int __session_truncate_readonly(WT_SESSION *wt_session, const char *uri, WT_CURSOR *start, WT_CURSOR *stop, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(uri); WT_UNUSED(start); WT_UNUSED(stop); WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, truncate); + + WT_STAT_FAST_CONN_INCR(session, session_table_truncate_fail); + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* @@ -1187,10 +1282,17 @@ static int __session_upgrade_readonly( WT_SESSION *wt_session, const char *uri, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(uri); WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, upgrade); + + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* @@ -1216,7 +1318,11 @@ __session_verify(WT_SESSION *wt_session, const char *uri, const char *config) ret = __wt_schema_worker(session, uri, __wt_verify, NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_VERIFY))); -err: API_END_RET_NOTFOUND_MAP(session, ret); +err: if (ret != 0) + WT_STAT_FAST_CONN_INCR(session, session_table_verify_fail); + else + WT_STAT_FAST_CONN_INCR(session, session_table_verify_success); + API_END_RET_NOTFOUND_MAP(session, ret); } /* @@ -1421,9 +1527,16 @@ err: API_END_RET(session, ret); static int __session_transaction_sync_readonly(WT_SESSION *wt_session, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, transaction_sync); + + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* @@ -1481,9 +1594,16 @@ err: API_END_RET_NOTFOUND_MAP(session, ret); static int __session_checkpoint_readonly(WT_SESSION *wt_session, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, checkpoint); + + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* diff --git a/src/third_party/wiredtiger/src/session/session_compact.c b/src/third_party/wiredtiger/src/session/session_compact.c index 3f7b34d132f..47ed5298304 100644 --- a/src/third_party/wiredtiger/src/session/session_compact.c +++ b/src/third_party/wiredtiger/src/session/session_compact.c @@ -333,6 +333,10 @@ err: session->compact = NULL; */ WT_TRET(__wt_session_release_resources(session)); + if (ret != 0) + WT_STAT_FAST_CONN_INCR(session, session_table_compact_fail); + else + WT_STAT_FAST_CONN_INCR(session, session_table_compact_success); API_END_RET_NOTFOUND_MAP(session, ret); } @@ -344,8 +348,16 @@ int __wt_session_compact_readonly( WT_SESSION *wt_session, const char *uri, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(uri); WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, compact); + + WT_STAT_FAST_CONN_INCR(session, session_table_compact_fail); + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } diff --git a/src/third_party/wiredtiger/src/support/err.c b/src/third_party/wiredtiger/src/support/err.c index 93c0af37328..60fc53cecd0 100644 --- a/src/third_party/wiredtiger/src/support/err.c +++ b/src/third_party/wiredtiger/src/support/err.c @@ -118,7 +118,13 @@ __handler_failure(WT_SESSION_IMPL *session, handler->handle_error(handler, wt_session, error, s) == 0) return; + /* + * In case there is a failure in the default error handler, make sure + * we don't recursively try to report *that* error. + */ + session->event_handler = &__event_handler_default; (void)__handle_error_default(NULL, wt_session, error, s); + session->event_handler = handler; } /* diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index d972f0c140f..49cb3bebc07 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -43,6 +43,7 @@ static const char * const __stats_dsrc_desc[] = { "btree: pages rewritten by compaction", "btree: row-store internal pages", "btree: row-store leaf pages", + "cache: bytes currently in the cache", "cache: bytes read into cache", "cache: bytes written from cache", "cache: checkpoint blocked page eviction", @@ -173,6 +174,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats) stats->btree_compact_rewrite = 0; stats->btree_row_internal = 0; stats->btree_row_leaf = 0; + /* not clearing cache_bytes_inuse */ stats->cache_bytes_read = 0; stats->cache_bytes_write = 0; stats->cache_eviction_checkpoint = 0; @@ -300,6 +302,7 @@ __wt_stat_dsrc_aggregate_single( to->btree_compact_rewrite += from->btree_compact_rewrite; to->btree_row_internal += from->btree_row_internal; to->btree_row_leaf += from->btree_row_leaf; + to->cache_bytes_inuse += from->cache_bytes_inuse; to->cache_bytes_read += from->cache_bytes_read; to->cache_bytes_write += from->cache_bytes_write; to->cache_eviction_checkpoint += from->cache_eviction_checkpoint; @@ -433,6 +436,7 @@ __wt_stat_dsrc_aggregate( WT_STAT_READ(from, btree_compact_rewrite); to->btree_row_internal += WT_STAT_READ(from, btree_row_internal); to->btree_row_leaf += WT_STAT_READ(from, btree_row_leaf); + to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse); to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read); to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write); to->cache_eviction_checkpoint += @@ -542,9 +546,12 @@ static const char * const __stats_connection_desc[] = { "block-manager: blocks written", "block-manager: bytes read", "block-manager: bytes written", + "block-manager: bytes written for checkpoint", "block-manager: mapped blocks read", "block-manager: mapped bytes read", + "cache: bytes belonging to page images in the cache", "cache: bytes currently in the cache", + "cache: bytes not belonging to page images in the cache", "cache: bytes read into cache", "cache: bytes written from cache", "cache: checkpoint blocked page eviction", @@ -578,6 +585,8 @@ static const char * const __stats_connection_desc[] = { "cache: maximum page size at eviction", "cache: modified pages evicted", "cache: modified pages evicted by application threads", + "cache: overflow pages read into cache", + "cache: overflow values cached in memory", "cache: page split during eviction deepened the tree", "cache: page written requiring lookaside records", "cache: pages currently held in the cache", @@ -586,6 +595,7 @@ static const char * const __stats_connection_desc[] = { "cache: pages evicted by application threads", "cache: pages queued for eviction", "cache: pages queued for urgent eviction", + "cache: pages queued for urgent eviction during walk", "cache: pages read into cache", "cache: pages read into cache requiring lookaside entries", "cache: pages requested from the cache", @@ -597,7 +607,6 @@ static const char * const __stats_connection_desc[] = { "cache: percentage overhead", "cache: tracked bytes belonging to internal pages in the cache", "cache: tracked bytes belonging to leaf pages in the cache", - "cache: tracked bytes belonging to overflow pages in the cache", "cache: tracked dirty bytes in the cache", "cache: tracked dirty pages in the cache", "cache: unmodified pages evicted", @@ -677,6 +686,22 @@ static const char * const __stats_connection_desc[] = { "reconciliation: split objects currently awaiting free", "session: open cursor count", "session: open session count", + "session: table compact failed calls", + "session: table compact successful calls", + "session: table create failed calls", + "session: table create successful calls", + "session: table drop failed calls", + "session: table drop successful calls", + "session: table rebalance failed calls", + "session: table rebalance successful calls", + "session: table rename failed calls", + "session: table rename successful calls", + "session: table salvage failed calls", + "session: table salvage successful calls", + "session: table truncate failed calls", + "session: table truncate successful calls", + "session: table verify failed calls", + "session: table verify successful calls", "thread-state: active filesystem fsync calls", "thread-state: active filesystem read calls", "thread-state: active filesystem write calls", @@ -693,13 +718,13 @@ static const char * const __stats_connection_desc[] = { "transaction: transaction checkpoint max time (msecs)", "transaction: transaction checkpoint min time (msecs)", "transaction: transaction checkpoint most recent time (msecs)", + "transaction: transaction checkpoint scrub dirty target", + "transaction: transaction checkpoint scrub time (msecs)", "transaction: transaction checkpoint total time (msecs)", "transaction: transaction checkpoints", "transaction: transaction failures due to cache overflow", "transaction: transaction fsync calls for checkpoint after allocating the transaction ID", - "transaction: transaction fsync calls for checkpoint before allocating the transaction ID", "transaction: transaction fsync duration for checkpoint after allocating the transaction ID (usecs)", - "transaction: transaction fsync duration for checkpoint before allocating the transaction ID (usecs)", "transaction: transaction range of IDs currently pinned", "transaction: transaction range of IDs currently pinned by a checkpoint", "transaction: transaction range of IDs currently pinned by named snapshots", @@ -764,9 +789,12 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->block_write = 0; stats->block_byte_read = 0; stats->block_byte_write = 0; + stats->block_byte_write_checkpoint = 0; stats->block_map_read = 0; stats->block_byte_map_read = 0; + /* not clearing cache_bytes_image */ /* not clearing cache_bytes_inuse */ + /* not clearing cache_bytes_other */ stats->cache_bytes_read = 0; stats->cache_bytes_write = 0; stats->cache_eviction_checkpoint = 0; @@ -800,6 +828,8 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) /* not clearing cache_eviction_maximum_page_size */ stats->cache_eviction_dirty = 0; stats->cache_eviction_app_dirty = 0; + stats->cache_read_overflow = 0; + stats->cache_overflow_value = 0; stats->cache_eviction_deepen = 0; stats->cache_write_lookaside = 0; /* not clearing cache_pages_inuse */ @@ -807,6 +837,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cache_eviction_force_delete = 0; stats->cache_eviction_app = 0; stats->cache_eviction_pages_queued = 0; + stats->cache_eviction_pages_queued_urgent = 0; stats->cache_eviction_pages_queued_oldest = 0; stats->cache_read = 0; stats->cache_read_lookaside = 0; @@ -819,7 +850,6 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) /* not clearing cache_overhead */ /* not clearing cache_bytes_internal */ /* not clearing cache_bytes_leaf */ - /* not clearing cache_bytes_overflow */ /* not clearing cache_bytes_dirty */ /* not clearing cache_pages_dirty */ stats->cache_eviction_clean = 0; @@ -899,9 +929,25 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) /* not clearing rec_split_stashed_objects */ /* not clearing session_cursor_open */ /* not clearing session_open */ - /* not clearing fsync_active */ - /* not clearing read_active */ - /* not clearing write_active */ + /* not clearing session_table_compact_fail */ + /* not clearing session_table_compact_success */ + /* not clearing session_table_create_fail */ + /* not clearing session_table_create_success */ + /* not clearing session_table_drop_fail */ + /* not clearing session_table_drop_success */ + /* not clearing session_table_rebalance_fail */ + /* not clearing session_table_rebalance_success */ + /* not clearing session_table_rename_fail */ + /* not clearing session_table_rename_success */ + /* not clearing session_table_salvage_fail */ + /* not clearing session_table_salvage_success */ + /* not clearing session_table_truncate_fail */ + /* not clearing session_table_truncate_success */ + /* not clearing session_table_verify_fail */ + /* not clearing session_table_verify_success */ + /* not clearing thread_fsync_active */ + /* not clearing thread_read_active */ + /* not clearing thread_write_active */ stats->page_busy_blocked = 0; stats->page_forcible_evict_blocked = 0; stats->page_locked_blocked = 0; @@ -915,13 +961,13 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) /* not clearing txn_checkpoint_time_max */ /* not clearing txn_checkpoint_time_min */ /* not clearing txn_checkpoint_time_recent */ + /* not clearing txn_checkpoint_scrub_target */ + /* not clearing txn_checkpoint_scrub_time */ /* not clearing txn_checkpoint_time_total */ stats->txn_checkpoint = 0; stats->txn_fail_cache = 0; stats->txn_checkpoint_fsync_post = 0; - stats->txn_checkpoint_fsync_pre = 0; - stats->txn_checkpoint_fsync_post_duration = 0; - stats->txn_checkpoint_fsync_pre_duration = 0; + /* not clearing txn_checkpoint_fsync_post_duration */ /* not clearing txn_pinned_range */ /* not clearing txn_pinned_checkpoint_range */ /* not clearing txn_pinned_snapshot_range */ @@ -978,9 +1024,13 @@ __wt_stat_connection_aggregate( to->block_write += WT_STAT_READ(from, block_write); to->block_byte_read += WT_STAT_READ(from, block_byte_read); to->block_byte_write += WT_STAT_READ(from, block_byte_write); + to->block_byte_write_checkpoint += + WT_STAT_READ(from, block_byte_write_checkpoint); to->block_map_read += WT_STAT_READ(from, block_map_read); to->block_byte_map_read += WT_STAT_READ(from, block_byte_map_read); + to->cache_bytes_image += WT_STAT_READ(from, cache_bytes_image); to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse); + to->cache_bytes_other += WT_STAT_READ(from, cache_bytes_other); to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read); to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write); to->cache_eviction_checkpoint += @@ -1039,6 +1089,8 @@ __wt_stat_connection_aggregate( to->cache_eviction_dirty += WT_STAT_READ(from, cache_eviction_dirty); to->cache_eviction_app_dirty += WT_STAT_READ(from, cache_eviction_app_dirty); + to->cache_read_overflow += WT_STAT_READ(from, cache_read_overflow); + to->cache_overflow_value += WT_STAT_READ(from, cache_overflow_value); to->cache_eviction_deepen += WT_STAT_READ(from, cache_eviction_deepen); to->cache_write_lookaside += @@ -1050,6 +1102,8 @@ __wt_stat_connection_aggregate( to->cache_eviction_app += WT_STAT_READ(from, cache_eviction_app); to->cache_eviction_pages_queued += WT_STAT_READ(from, cache_eviction_pages_queued); + to->cache_eviction_pages_queued_urgent += + WT_STAT_READ(from, cache_eviction_pages_queued_urgent); to->cache_eviction_pages_queued_oldest += WT_STAT_READ(from, cache_eviction_pages_queued_oldest); to->cache_read += WT_STAT_READ(from, cache_read); @@ -1065,7 +1119,6 @@ __wt_stat_connection_aggregate( to->cache_overhead += WT_STAT_READ(from, cache_overhead); to->cache_bytes_internal += WT_STAT_READ(from, cache_bytes_internal); to->cache_bytes_leaf += WT_STAT_READ(from, cache_bytes_leaf); - to->cache_bytes_overflow += WT_STAT_READ(from, cache_bytes_overflow); to->cache_bytes_dirty += WT_STAT_READ(from, cache_bytes_dirty); to->cache_pages_dirty += WT_STAT_READ(from, cache_pages_dirty); to->cache_eviction_clean += WT_STAT_READ(from, cache_eviction_clean); @@ -1151,9 +1204,41 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, rec_split_stashed_objects); to->session_cursor_open += WT_STAT_READ(from, session_cursor_open); to->session_open += WT_STAT_READ(from, session_open); - to->fsync_active += WT_STAT_READ(from, fsync_active); - to->read_active += WT_STAT_READ(from, read_active); - to->write_active += WT_STAT_READ(from, write_active); + to->session_table_compact_fail += + WT_STAT_READ(from, session_table_compact_fail); + to->session_table_compact_success += + WT_STAT_READ(from, session_table_compact_success); + to->session_table_create_fail += + WT_STAT_READ(from, session_table_create_fail); + to->session_table_create_success += + WT_STAT_READ(from, session_table_create_success); + to->session_table_drop_fail += + WT_STAT_READ(from, session_table_drop_fail); + to->session_table_drop_success += + WT_STAT_READ(from, session_table_drop_success); + to->session_table_rebalance_fail += + WT_STAT_READ(from, session_table_rebalance_fail); + to->session_table_rebalance_success += + WT_STAT_READ(from, session_table_rebalance_success); + to->session_table_rename_fail += + WT_STAT_READ(from, session_table_rename_fail); + to->session_table_rename_success += + WT_STAT_READ(from, session_table_rename_success); + to->session_table_salvage_fail += + WT_STAT_READ(from, session_table_salvage_fail); + to->session_table_salvage_success += + WT_STAT_READ(from, session_table_salvage_success); + to->session_table_truncate_fail += + WT_STAT_READ(from, session_table_truncate_fail); + to->session_table_truncate_success += + WT_STAT_READ(from, session_table_truncate_success); + to->session_table_verify_fail += + WT_STAT_READ(from, session_table_verify_fail); + to->session_table_verify_success += + WT_STAT_READ(from, session_table_verify_success); + to->thread_fsync_active += WT_STAT_READ(from, thread_fsync_active); + to->thread_read_active += WT_STAT_READ(from, thread_read_active); + to->thread_write_active += WT_STAT_READ(from, thread_write_active); to->page_busy_blocked += WT_STAT_READ(from, page_busy_blocked); to->page_forcible_evict_blocked += WT_STAT_READ(from, page_forcible_evict_blocked); @@ -1175,18 +1260,18 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, txn_checkpoint_time_min); to->txn_checkpoint_time_recent += WT_STAT_READ(from, txn_checkpoint_time_recent); + to->txn_checkpoint_scrub_target += + WT_STAT_READ(from, txn_checkpoint_scrub_target); + to->txn_checkpoint_scrub_time += + WT_STAT_READ(from, txn_checkpoint_scrub_time); to->txn_checkpoint_time_total += WT_STAT_READ(from, txn_checkpoint_time_total); to->txn_checkpoint += WT_STAT_READ(from, txn_checkpoint); to->txn_fail_cache += WT_STAT_READ(from, txn_fail_cache); to->txn_checkpoint_fsync_post += WT_STAT_READ(from, txn_checkpoint_fsync_post); - to->txn_checkpoint_fsync_pre += - WT_STAT_READ(from, txn_checkpoint_fsync_pre); to->txn_checkpoint_fsync_post_duration += WT_STAT_READ(from, txn_checkpoint_fsync_post_duration); - to->txn_checkpoint_fsync_pre_duration += - WT_STAT_READ(from, txn_checkpoint_fsync_pre_duration); to->txn_pinned_range += WT_STAT_READ(from, txn_pinned_range); to->txn_pinned_checkpoint_range += WT_STAT_READ(from, txn_pinned_checkpoint_range); diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index dd4384d9a9a..87b74433769 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -124,6 +124,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session) txn = &session->txn; txn_global = &conn->txn_global; txn_state = WT_SESSION_TXN_STATE(session); + n = 0; /* * Spin waiting for the lock: the sleeps in our blocking readlock @@ -137,20 +138,26 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session) current_id = snap_min = txn_global->current; prev_oldest_id = txn_global->oldest_id; + /* + * Include the checkpoint transaction, if one is running: we should + * ignore any uncommitted changes the checkpoint has written to the + * metadata. We don't have to keep the checkpoint's changes pinned so + * don't including it in the published snap_min. + */ + if ((id = txn_global->checkpoint_txnid) != WT_TXN_NONE) + txn->snapshot[n++] = id; + /* For pure read-only workloads, avoid scanning. */ if (prev_oldest_id == current_id) { txn_state->snap_min = current_id; - __txn_sort_snapshot(session, 0, current_id); - /* Check that the oldest ID has not moved in the meantime. */ WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id); - WT_RET(__wt_readunlock(session, txn_global->scan_rwlock)); - return (0); + goto done; } /* Walk the array of concurrent transactions. */ WT_ORDERED_READ(session_cnt, conn->session_cnt); - for (i = n = 0, s = txn_global->states; i < session_cnt; i++, s++) { + for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) { /* * Build our snapshot of any concurrent transaction IDs. * @@ -178,8 +185,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session) WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id); txn_state->snap_min = snap_min; - WT_RET(__wt_readunlock(session, txn_global->scan_rwlock)); - +done: WT_RET(__wt_readunlock(session, txn_global->scan_rwlock)); __txn_sort_snapshot(session, n, current_id); return (0); } @@ -433,18 +439,22 @@ __wt_txn_release(WT_SESSION_IMPL *session) WT_TXN_STATE *txn_state; txn = &session->txn; - WT_ASSERT(session, txn->mod_count == 0); - txn->notify = NULL; - txn_global = &S2C(session)->txn_global; txn_state = WT_SESSION_TXN_STATE(session); + WT_ASSERT(session, txn->mod_count == 0); + txn->notify = NULL; + /* Clear the transaction's ID from the global table. */ if (WT_SESSION_IS_CHECKPOINT(session)) { WT_ASSERT(session, txn_state->id == WT_TXN_NONE); - txn->id = WT_TXN_NONE; + txn->id = txn_global->checkpoint_txnid = WT_TXN_NONE; - /* Clear the global checkpoint transaction IDs. */ + /* + * Be extra careful to cleanup everything for checkpoints: once + * the global checkpoint ID is cleared, we can no longer tell + * if this session is doing a checkpoint. + */ txn_global->checkpoint_id = 0; txn_global->checkpoint_pinned = WT_TXN_NONE; } else if (F_ISSET(txn, WT_TXN_HAS_ID)) { @@ -470,6 +480,7 @@ __wt_txn_release(WT_SESSION_IMPL *session) */ __wt_txn_release_snapshot(session); txn->isolation = session->isolation; + /* Ensure the transaction flags are cleared on exit */ txn->flags = 0; } @@ -487,10 +498,12 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_TXN *txn; WT_TXN_OP *op; u_int i; + bool did_update; txn = &session->txn; conn = S2C(session); - WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || txn->mod_count == 0); + did_update = txn->mod_count != 0; + WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || !did_update); if (!F_ISSET(txn, WT_TXN_RUNNING)) WT_RET_MSG(session, EINVAL, "No transaction is active"); @@ -540,8 +553,18 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_TRET(txn->notify->notify(txn->notify, (WT_SESSION *)session, txn->id, 1)); + /* + * We are about to release the snapshot: copy values into any + * positioned cursors so they don't point to updates that could be + * freed once we don't have a snapshot. + */ + if (session->ncursors > 0) { + WT_DIAGNOSTIC_YIELD; + WT_RET(__wt_session_copy_values(session)); + } + /* If we are logging, write a commit log record. */ - if (ret == 0 && txn->mod_count > 0 && + if (ret == 0 && did_update && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) && !F_ISSET(session, WT_SESSION_NO_LOGGING)) { /* @@ -569,14 +592,6 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) __wt_txn_op_free(session, op); txn->mod_count = 0; - /* - * We are about to release the snapshot: copy values into any - * positioned cursors so they don't point to updates that could be - * freed once we don't have a transaction ID pinned. - */ - if (session->ncursors > 0) - WT_RET(__wt_session_copy_values(session)); - __wt_txn_release(session); return (0); } diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index 51d26b9aed6..c23f293154a 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -10,14 +10,16 @@ static int __checkpoint_lock_tree( WT_SESSION_IMPL *, bool, bool, const char *[]); +static int __checkpoint_mark_deletes(WT_SESSION_IMPL *, const char *[]); +static int __checkpoint_presync(WT_SESSION_IMPL *, const char *[]); static int __checkpoint_tree_helper(WT_SESSION_IMPL *, const char *[]); /* - * __wt_checkpoint_name_ok -- + * __checkpoint_name_ok -- * Complain if the checkpoint name isn't acceptable. */ -int -__wt_checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len) +static int +__checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len) { /* Check for characters we don't want to see in a metadata file. */ WT_RET(__wt_name_check(session, name, len)); @@ -107,7 +109,7 @@ __checkpoint_apply_all(WT_SESSION_IMPL *session, const char *cfg[], WT_RET(__wt_config_gets(session, cfg, "name", &cval)); named = cval.len != 0; if (named) - WT_RET(__wt_checkpoint_name_ok(session, cval.str, cval.len)); + WT_RET(__checkpoint_name_ok(session, cval.str, cval.len)); /* Step through the targets and optionally operate on each one. */ WT_ERR(__wt_config_gets(session, cfg, "target", &cval)); @@ -183,6 +185,8 @@ __checkpoint_apply(WT_SESSION_IMPL *session, const char *cfg[], /* If we have already locked the handles, apply the operation. */ for (i = 0; i < session->ckpt_handle_next; ++i) { + if (session->ckpt_handle[i] == NULL) + continue; WT_WITH_DHANDLE(session, session->ckpt_handle[i], ret = (*op)(session, cfg)); WT_RET(ret); @@ -234,6 +238,7 @@ __checkpoint_data_source(WT_SESSION_IMPL *session, const char *cfg[]) int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) { + WT_BTREE *btree; WT_DECL_RET; const char *name; @@ -258,6 +263,14 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) if ((ret = __wt_session_get_btree(session, name, NULL, NULL, 0)) != 0) return (ret == EBUSY ? 0 : ret); + /* + * Save the current eviction walk setting: checkpoint can interfere + * with eviction and we don't want to unfairly penalize (or promote) + * eviction in trees due to checkpoints. + */ + btree = S2BT(session); + btree->evict_walk_saved = btree->evict_walk_period; + WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_tree(session, true, true, cfg)); if (ret != 0) { @@ -265,20 +278,164 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) return (ret); } + /* + * Flag that the handle is part of a checkpoint for the purposes + * of transaction visibility checks. + */ + WT_PUBLISH(btree->include_checkpoint_txn, true); + session->ckpt_handle[session->ckpt_handle_next++] = session->dhandle; return (0); } /* - * __checkpoint_write_leaves -- - * Write any dirty leaf pages for all checkpoint handles. + * __checkpoint_update_generation -- + * Update the checkpoint generation of the current tree. + * + * This indicates that the tree will not be visited again by the current + * checkpoint. + */ +static void +__checkpoint_update_generation(WT_SESSION_IMPL *session) +{ + WT_BTREE *btree; + + btree = S2BT(session); + if (!WT_IS_METADATA(session, session->dhandle)) + WT_PUBLISH(btree->include_checkpoint_txn, false); + + WT_PUBLISH(btree->checkpoint_gen, + S2C(session)->txn_global.checkpoint_gen); + WT_STAT_FAST_DATA_SET(session, + btree_checkpoint_generation, btree->checkpoint_gen); +} + +/* + * __checkpoint_reduce_dirty_cache -- + * Release clean trees from the list cached for checkpoints. */ static int -__checkpoint_write_leaves(WT_SESSION_IMPL *session, const char *cfg[]) +__checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session) { - WT_UNUSED(cfg); + WT_CACHE *cache; + WT_CONNECTION_IMPL *conn; + struct timespec start, last, stop; + u_int current_dirty; + uint64_t bytes_written_last, bytes_written_start, bytes_written_total; + uint64_t current_us, stepdown_us, total_ms; + bool progress; + + conn = S2C(session); + cache = conn->cache; + + WT_RET(__wt_epoch(session, &start)); + last = start; + bytes_written_last = 0; + bytes_written_start = cache->bytes_written; + stepdown_us = 10000; + progress = false; + + /* Step down the dirty target to the eviction trigger */ + for (;;) { + current_dirty = (u_int)((100 * + __wt_cache_dirty_leaf_inuse(cache)) / conn->cache_size); + if (current_dirty <= cache->eviction_dirty_target) + break; + + __wt_sleep(0, stepdown_us / 4); + WT_RET(__wt_epoch(session, &stop)); + current_us = WT_TIMEDIFF_US(stop, last); + total_ms = WT_TIMEDIFF_MS(stop, start); + bytes_written_total = + cache->bytes_written - bytes_written_start; - return (__wt_cache_op(session, WT_SYNC_WRITE_LEAVES)); + /* + * Estimate how long the next step down of 1% of dirty data + * should take. + * + * The calculation here assumes that the system is writing from + * cache as fast as it can, and determines the write throughput + * based on the change in the bytes written from cache since + * the start of the call. We use that to estimate how long it + * will take to step the dirty target down by 1%. + * + * Take care to avoid dividing by zero. + */ + if (bytes_written_total - bytes_written_last > WT_MEGABYTE && + bytes_written_total > total_ms && total_ms > 0 && + (!progress || + current_dirty <= cache->eviction_dirty_trigger)) { + stepdown_us = (uint64_t)(WT_THOUSAND * ( + (double)(conn->cache_size / 100) / + (double)(bytes_written_total / total_ms))); + if (!progress) + stepdown_us = WT_MIN(stepdown_us, 200000); + } + + bytes_written_last = bytes_written_total; + + if (current_dirty <= cache->eviction_dirty_trigger) { + progress = true; + + /* + * Smooth out step down: try to limit the impact on + * performance to 10% by waiting once we reach the last + * level. + */ + __wt_sleep(0, 10 * stepdown_us); + cache->eviction_dirty_trigger = current_dirty - 1; + WT_STAT_FAST_CONN_SET(session, + txn_checkpoint_scrub_target, current_dirty - 1); + WT_RET(__wt_epoch(session, &last)); + continue; + } + + /* + * We haven't reached the current target. + * + * Don't wait indefinitely: there might be dirty pages that + * can't be evicted. If we can't meet the target, give up + * and start the checkpoint for real. + */ + if (current_us > 10 * stepdown_us) + break; + } + + WT_RET(__wt_epoch(session, &stop)); + total_ms = WT_TIMEDIFF_MS(stop, start); + WT_STAT_FAST_CONN_SET(session, txn_checkpoint_scrub_time, total_ms); + + return (0); +} + +/* + * __checkpoint_release_clean_trees -- + * Release clean trees from the list cached for checkpoints. + */ +static int +__checkpoint_release_clean_trees(WT_SESSION_IMPL *session) +{ + WT_BTREE *btree; + WT_DATA_HANDLE *dhandle; + WT_DECL_RET; + u_int i; + + for (i = 0; i < session->ckpt_handle_next; i++) { + dhandle = session->ckpt_handle[i]; + btree = dhandle->handle; + if (!F_ISSET(btree, WT_BTREE_SKIP_CKPT)) + continue; + __wt_meta_ckptlist_free(session, btree->ckpt); + btree->ckpt = NULL; + WT_WITH_DHANDLE(session, dhandle, + __checkpoint_update_generation(session)); + session->ckpt_handle[i] = NULL; + WT_WITH_DHANDLE(session, dhandle, + ret = __wt_session_release_btree(session)); + WT_RET(ret); + } + + return (0); } /* @@ -352,6 +509,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) { struct timespec fsync_start, fsync_stop; struct timespec start, stop, verb_timer; + WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_TXN *txn; @@ -359,13 +517,15 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_TXN_ISOLATION saved_isolation; WT_TXN_STATE *txn_state; void *saved_meta_next; - u_int i; + u_int i, orig_trigger; uint64_t fsync_duration_usecs; bool full, idle, logging, tracking; const char *txn_cfg[] = { WT_CONFIG_BASE(session, WT_SESSION_begin_transaction), "isolation=snapshot", NULL }; conn = S2C(session); + cache = conn->cache; + orig_trigger = cache->eviction_dirty_trigger; txn = &session->txn; txn_global = &conn->txn_global; txn_state = WT_SESSION_TXN_STATE(session); @@ -384,21 +544,8 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) /* Configure logging only if doing a full checkpoint. */ logging = FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED); - /* Keep track of handles acquired for locking. */ - WT_ERR(__wt_meta_track_on(session)); - tracking = true; - - /* - * Get a list of handles we want to flush; this may pull closed objects - * into the session cache, but we're going to do that eventually anyway. - */ - WT_ASSERT(session, session->ckpt_handle_next == 0); - WT_WITH_SCHEMA_LOCK(session, ret, - WT_WITH_TABLE_LOCK(session, ret, - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __checkpoint_apply_all( - session, cfg, __wt_checkpoint_get_handles, NULL)))); - WT_ERR(ret); + /* Reset the maximum page size seen by eviction. */ + conn->cache->evict_max_page_size = 0; /* * Update the global oldest ID so we do all possible cleanup. @@ -412,28 +559,11 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) /* Flush data-sources before we start the checkpoint. */ WT_ERR(__checkpoint_data_source(session, cfg)); - WT_ERR(__wt_epoch(session, &verb_timer)); - WT_ERR(__checkpoint_verbose_track(session, - "starting write leaves", &verb_timer)); - - /* Flush dirty leaf pages before we start the checkpoint. */ - session->isolation = txn->isolation = WT_ISO_READ_COMMITTED; - WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_write_leaves)); - /* - * The underlying flush routine scheduled an asynchronous flush - * after writing the leaf pages, but in order to minimize I/O - * while holding the schema lock, do a flush and wait for the - * completion. Do it after flushing the pages to give the - * asynchronous flush as much time as possible before we wait. + * Try to reduce the amount of dirty data in cache so there is less + * work do during the critical section of the checkpoint. */ - WT_ERR(__wt_epoch(session, &fsync_start)); - WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync)); - WT_ERR(__wt_epoch(session, &fsync_stop)); - fsync_duration_usecs = WT_TIMEDIFF_US(fsync_stop, fsync_start); - WT_STAT_FAST_CONN_INCR(session, txn_checkpoint_fsync_pre); - WT_STAT_FAST_CONN_INCRV(session, - txn_checkpoint_fsync_pre_duration, fsync_duration_usecs); + WT_ERR(__checkpoint_reduce_dirty_cache(session)); /* Tell logging that we are about to start a database checkpoint. */ if (full && logging) @@ -462,6 +592,36 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_STAT_FAST_CONN_SET(session, txn_checkpoint_generation, txn_global->checkpoint_gen); + /* Keep track of handles acquired for locking. */ + WT_ERR(__wt_meta_track_on(session)); + tracking = true; + + /* + * Get a list of handles we want to flush; for named checkpoints this + * may pull closed objects into the session cache. + * + * We want to skip checkpointing clean handles whenever possible. That + * is, when the checkpoint is not named or forced. However, we need to + * take care about ordering with respect to the checkpoint transaction. + * + * If we skip clean handles before starting the transaction, the + * checkpoint can miss updates in trees that become dirty as the + * checkpoint is starting. If we wait until the transaction has + * started before locking a handle, there could be a metadata-changing + * operation in between (e.g., salvage) that will cause a write + * conflict when the checkpoint goes to write the metadata. + * + * First, gather all handles, then start the checkpoint transaction, + * then release any clean handles. + */ + WT_ASSERT(session, session->ckpt_handle_next == 0); + WT_WITH_SCHEMA_LOCK(session, ret, + WT_WITH_TABLE_LOCK(session, ret, + WT_WITH_HANDLE_LIST_LOCK(session, + ret = __checkpoint_apply_all( + session, cfg, __wt_checkpoint_get_handles, NULL)))); + WT_ERR(ret); + /* * Start a snapshot transaction for the checkpoint. * @@ -475,21 +635,22 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(__wt_txn_id_check(session)); /* - * Save the checkpoint session ID. We never do checkpoints in the - * default session (with id zero). + * Save the checkpoint session ID. + * + * We never do checkpoints in the default session (with id zero). */ WT_ASSERT(session, session->id != 0 && txn_global->checkpoint_id == 0); txn_global->checkpoint_id = session->id; - txn_global->checkpoint_pinned = - WT_MIN(txn_state->id, txn_state->snap_min); - /* - * We're about to clear the checkpoint transaction from the global - * state table so the oldest ID can move forward. Make sure everything - * we've done above is scheduled. + * Remove the checkpoint transaction from the global table. + * + * This allows ordinary visibility checks to move forward because + * checkpoints often take a long time and only write to the metadata. */ - WT_FULL_BARRIER(); + WT_ERR(__wt_writelock(session, txn_global->scan_rwlock)); + txn_global->checkpoint_txnid = txn->id; + txn_global->checkpoint_pinned = WT_MIN(txn->id, txn->snap_min); /* * Sanity check that the oldest ID hasn't moved on before we have @@ -507,6 +668,25 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) * details). */ txn_state->id = txn_state->snap_min = WT_TXN_NONE; + WT_ERR(__wt_writeunlock(session, txn_global->scan_rwlock)); + + /* + * Unblock updates -- we can figure out that any updates to clean pages + * after this point are too new to be written in the checkpoint. + */ + cache->eviction_dirty_trigger = orig_trigger; + WT_STAT_FAST_CONN_SET( + session, txn_checkpoint_scrub_target, orig_trigger); + + /* + * Mark old checkpoints that are being deleted and figure out which + * trees we can skip in this checkpoint. + * + * Release clean trees. Any updates made after this point will not + * visible to the checkpoint transaction. + */ + WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_mark_deletes)); + WT_ERR(__checkpoint_release_clean_trees(session)); /* Tell logging that we have started a database checkpoint. */ if (full && logging) @@ -522,9 +702,13 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) */ session->dhandle = NULL; - /* Release the snapshot so we aren't pinning pages in cache. */ + /* Release the snapshot so we aren't pinning updates in cache. */ __wt_txn_release_snapshot(session); + /* Mark all trees as open for business (particularly eviction). */ + WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_presync)); + WT_ERR(__wt_evict_server_wake(session)); + WT_ERR(__checkpoint_verbose_track(session, "committing transaction", &verb_timer)); @@ -587,6 +771,12 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) ret = __wt_txn_checkpoint_log( session, false, WT_TXN_LOG_CKPT_SYNC, NULL)); + /* + * Now that the metadata is stable, re-open the metadata file for + * regular eviction by clearing the checkpoint_pinned flag. + */ + txn_global->checkpoint_pinned = WT_TXN_NONE; + if (full) { WT_ERR(__wt_epoch(session, &stop)); __checkpoint_stats(session, &start, &stop); @@ -609,6 +799,10 @@ err: /* if (tracking) WT_TRET(__wt_meta_track_off(session, false, ret != 0)); + cache->eviction_dirty_trigger = orig_trigger; + WT_STAT_FAST_CONN_SET( + session, txn_checkpoint_scrub_target, orig_trigger); + if (F_ISSET(txn, WT_TXN_RUNNING)) { /* * Clear the dhandle so the visibility check doesn't get @@ -634,9 +828,12 @@ err: /* WT_TXN_LOG_CKPT_STOP : WT_TXN_LOG_CKPT_CLEANUP, NULL)); } - for (i = 0; i < session->ckpt_handle_next; ++i) + for (i = 0; i < session->ckpt_handle_next; ++i) { + if (session->ckpt_handle[i] == NULL) + continue; WT_WITH_DHANDLE(session, session->ckpt_handle[i], WT_TRET(__wt_session_release_btree(session))); + } __wt_free(session, session->ckpt_handle); session->ckpt_handle_allocated = session->ckpt_handle_next = 0; @@ -836,7 +1033,7 @@ __checkpoint_lock_tree(WT_SESSION_IMPL *session, if (cval.len == 0) name = WT_CHECKPOINT; else { - WT_ERR(__wt_checkpoint_name_ok(session, cval.str, cval.len)); + WT_ERR(__checkpoint_name_ok(session, cval.str, cval.len)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &name_alloc)); name = name_alloc; } @@ -851,10 +1048,10 @@ __checkpoint_lock_tree(WT_SESSION_IMPL *session, __wt_config_next(&dropconf, &k, &v)) == 0) { /* Disallow unsafe checkpoint names. */ if (v.len == 0) - WT_ERR(__wt_checkpoint_name_ok( + WT_ERR(__checkpoint_name_ok( session, k.str, k.len)); else - WT_ERR(__wt_checkpoint_name_ok( + WT_ERR(__checkpoint_name_ok( session, v.str, v.len)); if (v.len == 0) @@ -986,42 +1183,23 @@ err: if (hot_backup_locked) } /* - * __checkpoint_tree -- - * Checkpoint a single tree. - * Assumes all necessary locks have been acquired by the caller. + * __checkpoint_mark_deletes -- + * Figure out what old checkpoints will be deleted, and whether the + * checkpoint can be skipped entirely. */ static int -__checkpoint_tree( - WT_SESSION_IMPL *session, bool is_checkpoint, const char *cfg[]) +__checkpoint_mark_deletes( + WT_SESSION_IMPL *session, const char *cfg[]) { - WT_BM *bm; WT_BTREE *btree; WT_CKPT *ckpt, *ckptbase; WT_CONFIG_ITEM cval; - WT_CONNECTION_IMPL *conn; - WT_DATA_HANDLE *dhandle; - WT_DECL_RET; - WT_LSN ckptlsn; const char *name; - int deleted, was_modified; - bool fake_ckpt, force; + int deleted; + bool force; btree = S2BT(session); - bm = btree->bm; ckptbase = btree->ckpt; - conn = S2C(session); - dhandle = session->dhandle; - fake_ckpt = false; - was_modified = btree->modified; - - /* - * Set the checkpoint LSN to the maximum LSN so that if logging is - * disabled, recovery will never roll old changes forward over the - * non-logged changes in this checkpoint. If logging is enabled, a - * real checkpoint LSN will be assigned for this checkpoint and - * overwrite this. - */ - WT_MAX_LSN(&ckptlsn); /* * Check for clean objects not requiring a checkpoint. @@ -1050,20 +1228,15 @@ __checkpoint_tree( force = false; F_CLR(btree, WT_BTREE_SKIP_CKPT); if (!btree->modified && cfg != NULL) { - ret = __wt_config_gets(session, cfg, "force", &cval); - if (ret != 0 && ret != WT_NOTFOUND) - WT_ERR(ret); - if (ret == 0 && cval.val != 0) - force = true; + WT_RET(__wt_config_gets(session, cfg, "force", &cval)); + force = cval.val != 0; } if (!btree->modified && !force) { - if (!is_checkpoint) - goto nockpt; - deleted = 0; WT_CKPT_FOREACH(ckptbase, ckpt) if (F_ISSET(ckpt, WT_CKPT_DELETE)) ++deleted; + /* * Complicated test: if the tree is clean and last two * checkpoints have the same name (correcting for internal @@ -1077,17 +1250,52 @@ __checkpoint_tree( (strcmp(name, (ckpt - 2)->name) == 0 || (WT_PREFIX_MATCH(name, WT_CHECKPOINT) && WT_PREFIX_MATCH((ckpt - 2)->name, WT_CHECKPOINT)))) { -nockpt: F_SET(btree, WT_BTREE_SKIP_CKPT); - WT_PUBLISH(btree->checkpoint_gen, - S2C(session)->txn_global.checkpoint_gen); - WT_STAT_FAST_DATA_SET(session, - btree_checkpoint_generation, - btree->checkpoint_gen); - ret = 0; - goto err; + F_SET(btree, WT_BTREE_SKIP_CKPT); + return (0); } } + return (0); +} + +/* + * __checkpoint_tree -- + * Checkpoint a single tree. + * Assumes all necessary locks have been acquired by the caller. + */ +static int +__checkpoint_tree( + WT_SESSION_IMPL *session, bool is_checkpoint, const char *cfg[]) +{ + WT_BM *bm; + WT_BTREE *btree; + WT_CKPT *ckpt, *ckptbase; + WT_CONNECTION_IMPL *conn; + WT_DATA_HANDLE *dhandle; + WT_DECL_RET; + WT_LSN ckptlsn; + int was_modified; + bool fake_ckpt; + + WT_UNUSED(cfg); + + btree = S2BT(session); + bm = btree->bm; + ckptbase = btree->ckpt; + conn = S2C(session); + dhandle = session->dhandle; + fake_ckpt = false; + was_modified = btree->modified; + + /* + * Set the checkpoint LSN to the maximum LSN so that if logging is + * disabled, recovery will never roll old changes forward over the + * non-logged changes in this checkpoint. If logging is enabled, a + * real checkpoint LSN will be assigned for this checkpoint and + * overwrite this. + */ + WT_MAX_LSN(&ckptlsn); + /* * If an object has never been used (in other words, if it could become * a bulk-loaded file), then we must fake the checkpoint. This is good @@ -1183,10 +1391,10 @@ fake: /* /* * If we wrote a checkpoint (rather than faking one), pages may be - * available for re-use. If tracking enabled, defer making pages - * available until transaction end. The exception is if the handle - * is being discarded, in which case the handle will be gone by the - * time we try to apply or unroll the meta tracking event. + * available for re-use. If tracking is enabled, defer making pages + * available until transaction end. The exception is if the handle is + * being discarded, in which case the handle will be gone by the time + * we try to apply or unroll the meta tracking event. */ if (!fake_ckpt) { if (WT_META_TRACKING(session) && is_checkpoint) @@ -1213,6 +1421,25 @@ err: /* return (ret); } +/* + * __checkpoint_presync -- + * Visit all handles after the checkpoint writes are complete and before + * syncing. At this point, all trees should be completely open for + * business. + */ +static int +__checkpoint_presync(WT_SESSION_IMPL *session, const char *cfg[]) +{ + WT_BTREE *btree; + + WT_UNUSED(cfg); + + btree = S2BT(session); + WT_ASSERT(session, !btree->include_checkpoint_txn); + btree->evict_walk_period = btree->evict_walk_saved; + return (0); +} + /* * __checkpoint_tree_helper -- * Checkpoint a tree (suitable for use in *_apply functions). @@ -1220,7 +1447,34 @@ err: /* static int __checkpoint_tree_helper(WT_SESSION_IMPL *session, const char *cfg[]) { - return (__checkpoint_tree(session, true, cfg)); + WT_BTREE *btree; + WT_DECL_RET; + + btree = S2BT(session); + + ret = __checkpoint_tree(session, true, cfg); + + /* + * Whatever happened, we aren't visiting this tree again in this + * checkpoint. Don't keep updates pinned any longer. + */ + __checkpoint_update_generation(session); + + /* + * In case this tree was being skipped by the eviction server + * during the checkpoint, restore the previous state. + */ + btree->evict_walk_period = btree->evict_walk_saved; + + /* + * Wake the eviction server, in case application threads have + * stalled while the eviction server decided it couldn't make + * progress. Without this, application threads will be stalled + * until the eviction server next wakes. + */ + WT_TRET(__wt_evict_server_wake(session)); + + return (ret); } /* @@ -1242,6 +1496,9 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_tree(session, true, true, cfg)); WT_RET(ret); + WT_SAVE_DHANDLE(session, + ret = __checkpoint_mark_deletes(session, cfg)); + WT_RET(ret); return (__checkpoint_tree(session, true, cfg)); } @@ -1319,6 +1576,11 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_tree(session, false, need_tracking, NULL)); WT_ASSERT(session, ret == 0); + if (ret == 0) { + WT_SAVE_DHANDLE(session, + ret = __checkpoint_mark_deletes(session, NULL)); + WT_ASSERT(session, ret == 0); + } if (ret == 0) ret = __checkpoint_tree(session, false, NULL); diff --git a/src/third_party/wiredtiger/src/txn/txn_log.c b/src/third_party/wiredtiger/src/txn/txn_log.c index 470515244f3..e73ff00f5b7 100644 --- a/src/third_party/wiredtiger/src/txn/txn_log.c +++ b/src/third_party/wiredtiger/src/txn/txn_log.c @@ -329,7 +329,7 @@ __wt_txn_checkpoint_log( case WT_TXN_LOG_CKPT_START: /* Take a copy of the transaction snapshot. */ txn->ckpt_nsnapshot = txn->snapshot_count; - recsize = txn->ckpt_nsnapshot * WT_INTPACK64_MAXSIZE; + recsize = (size_t)txn->ckpt_nsnapshot * WT_INTPACK64_MAXSIZE; WT_ERR(__wt_scr_alloc(session, recsize, &txn->ckpt_snapshot)); p = txn->ckpt_snapshot->mem; end = p + recsize; diff --git a/src/third_party/wiredtiger/src/utilities/util_dump.c b/src/third_party/wiredtiger/src/utilities/util_dump.c index da70aea35be..6344a90dddd 100644 --- a/src/third_party/wiredtiger/src/utilities/util_dump.c +++ b/src/third_party/wiredtiger/src/utilities/util_dump.c @@ -242,6 +242,7 @@ dump_table_config( char *p, **cfg, *_cfg[4] = {NULL, NULL, NULL, NULL}; p = NULL; + srch = NULL; cfg = &_cfg[3]; /* Get the table name. */ @@ -306,32 +307,31 @@ dump_table_config( WT_ERR(print_config(session, uri, cfg, json, true)); - if (complex_table) { - /* - * The underlying table configuration function needs a second - * cursor: open one before calling it, it makes error handling - * hugely simpler. - */ - if ((ret = session->open_cursor( - session, "metadata:", NULL, NULL, &srch)) != 0) - WT_ERR(util_cerr(cursor, "open_cursor", ret)); - - if ((ret = dump_table_config_complex( - session, cursor, srch, name, "colgroup:", json)) == 0) - ret = dump_table_config_complex( - session, cursor, srch, name, "index:", json); - - if ((tret = srch->close(srch)) != 0) { - tret = util_cerr(cursor, "close", tret); - if (ret == 0) - ret = tret; - } - } else if (json && printf( - " \"colgroups\" : [],\n" - " \"indices\" : []\n") < 0) + /* + * The underlying table configuration function needs a second + * cursor: open one before calling it, it makes error handling + * hugely simpler. + */ + if ((ret = session->open_cursor( + session, "metadata:", NULL, NULL, &srch)) != 0) + WT_ERR(util_cerr(cursor, "open_cursor", ret)); + + if (complex_table) + WT_ERR(dump_table_config_complex( + session, cursor, srch, name, "colgroup:", json)); + else if (json && printf( + " \"colgroups\" : [],\n") < 0) WT_ERR(util_cerr(cursor, NULL, EIO)); -err: free(p); + WT_ERR(dump_table_config_complex( + session, cursor, srch, name, "index:", json)); + +err: if (srch != NULL && (tret = srch->close(srch)) != 0) { + tret = util_cerr(cursor, "close", tret); + if (ret == 0) + ret = tret; + } + free(p); free(_cfg[0]); free(_cfg[1]); free(_cfg[2]); diff --git a/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c b/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c index a2185dd123f..58da49b2991 100644 --- a/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c +++ b/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c @@ -130,7 +130,8 @@ ops_start(SHARED_CONFIG *cfg) seconds = (stop.tv_sec - start.tv_sec) + (stop.tv_usec - start.tv_usec) * 1e-6; fprintf(stderr, "timer: %.2lf seconds (%d ops/second)\n", - seconds, (int)(((cfg->reverse_scanners + cfg->append_inserters) * + seconds, (int) + (((double)(cfg->reverse_scanners + cfg->append_inserters) * total_nops) / seconds)); /* Verify the files. */ diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c index c97d82809a1..283e2912daa 100644 --- a/src/third_party/wiredtiger/test/format/ops.c +++ b/src/third_party/wiredtiger/test/format/ops.c @@ -342,7 +342,7 @@ snap_check(WT_CURSOR *cursor, switch (g.type) { case FIX: testutil_die(ret, - "snap_check: %" PRIu64 " search: " + "snapshot-isolation: %" PRIu64 " search: " "expected {0x%02x}, found {0x%02x}", start->keyno, start->deleted ? 0 : *(uint8_t *)start->vdata, @@ -350,7 +350,7 @@ snap_check(WT_CURSOR *cursor, /* NOTREACHED */ case ROW: testutil_die(ret, - "snap_check: %.*s search: " + "snapshot-isolation: %.*s search: " "expected {%.*s}, found {%.*s}", (int)key->size, key->data, start->deleted ? @@ -362,7 +362,7 @@ snap_check(WT_CURSOR *cursor, /* NOTREACHED */ case VAR: testutil_die(ret, - "snap_check: %" PRIu64 " search: " + "snapshot-isolation: %" PRIu64 " search: " "expected {%.*s}, found {%.*s}", start->keyno, start->deleted ? diff --git a/src/third_party/wiredtiger/test/format/smoke.sh b/src/third_party/wiredtiger/test/format/smoke.sh index 5fbc349f242..0c86b5e57c6 100755 --- a/src/third_party/wiredtiger/test/format/smoke.sh +++ b/src/third_party/wiredtiger/test/format/smoke.sh @@ -3,7 +3,7 @@ set -e # Smoke-test format as part of running "make check". -args="-1 -c "." data_source=table ops=100000 rows=10000 threads=4 compression=none logging_compression=none" +args="-1 -c "." data_source=table ops=50000 rows=10000 threads=4 compression=none logging_compression=none" $TEST_WRAPPER ./t $args file_type=fix $TEST_WRAPPER ./t $args file_type=row diff --git a/src/third_party/wiredtiger/test/manydbs/Makefile.am b/src/third_party/wiredtiger/test/manydbs/Makefile.am index 2bc47ad7f2e..ff5985cf2a4 100644 --- a/src/third_party/wiredtiger/test/manydbs/Makefile.am +++ b/src/third_party/wiredtiger/test/manydbs/Makefile.am @@ -10,7 +10,8 @@ t_LDADD +=$(top_builddir)/libwiredtiger.la t_LDFLAGS = -static # Run this during a "make check" smoke test. -TESTS = smoke.sh +TESTS = $(noinst_PROGRAMS) +LOG_COMPILER = $(TEST_WRAPPER) clean-local: rm -rf WT_TEST *.core diff --git a/src/third_party/wiredtiger/test/manydbs/smoke.sh b/src/third_party/wiredtiger/test/manydbs/smoke.sh deleted file mode 100755 index c0e2976f154..00000000000 --- a/src/third_party/wiredtiger/test/manydbs/smoke.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/sh - -set -e - -# Smoke-test format as part of running "make check". -# Run with: -# 1. The defaults -# 2. Set idle flag to turn off operations. -# 3. More dbs. -# -echo "manydbs: default with operations turned on" -$TEST_WRAPPER ./t -echo "manydbs: totally idle databases" -$TEST_WRAPPER ./t -I -echo "manydbs: 40 databases with operations" -$TEST_WRAPPER ./t -D 40 -echo "manydbs: 40 idle databases" -$TEST_WRAPPER ./t -I -D 40 diff --git a/src/third_party/wiredtiger/test/mciproject.yml b/src/third_party/wiredtiger/test/mciproject.yml index 3df1ce5805e..8825bb65052 100644 --- a/src/third_party/wiredtiger/test/mciproject.yml +++ b/src/third_party/wiredtiger/test/mciproject.yml @@ -8,12 +8,12 @@ functions: command: git.get_project params: directory: wiredtiger - "fetch artifacts" : &fetch_artifacts + "fetch binaries" : &fetch_binaries - command: s3.get params: aws_key: ${aws_key} aws_secret: ${aws_secret} - remote_file: wiredtiger/${build_variant}/${revision}/artifacts/${build_id}.tgz + remote_file: wiredtiger/${build_variant}/${revision}/binaries/${build_id}.tgz bucket: build_external extract_to: wiredtiger @@ -23,6 +23,22 @@ pre: script: | rm -rf "wiredtiger" post: + - command: archive.targz_pack + params: + target: "wiredtiger.tgz" + source_dir: "wiredtiger" + include: + - "./**" + - command: s3.put + params: + aws_secret: ${aws_secret} + aws_key: ${aws_key} + local_file: wiredtiger.tgz + bucket: build_external + permissions: public-read + content_type: application/tar + display_name: Artifacts + remote_file: wiredtiger/${build_variant}/${revision}/artifacts/${build_id}.tgz - command: shell.exec params: script: | @@ -49,7 +65,7 @@ tasks: ./build_posix/reconf ${configure_env_vars|} ./configure --enable-diagnostic --enable-python --enable-zlib --enable-strict --enable-verbose ${make_command|make} ${smp_command|} 2>&1 - ${make_command|make} check 2>&1 + ${make_command|make} VERBOSE=1 check 2>&1 fi - command: archive.targz_pack params: @@ -65,14 +81,14 @@ tasks: bucket: build_external permissions: public-read content_type: application/tar - display_name: Artifacts - remote_file: wiredtiger/${build_variant}/${revision}/artifacts/${build_id}.tgz + display_name: Binaries + remote_file: wiredtiger/${build_variant}/${revision}/binaries/${build_id}.tgz - name: unit-test depends_on: - name: compile commands: - - func: "fetch artifacts" + - func: "fetch binaries" - command: shell.exec params: working_dir: "wiredtiger" @@ -85,7 +101,7 @@ tasks: depends_on: - name: compile commands: - - func: "fetch artifacts" + - func: "fetch binaries" - command: shell.exec params: working_dir: "wiredtiger" @@ -99,7 +115,7 @@ tasks: depends_on: - name: compile commands: - - func: "fetch artifacts" + - func: "fetch binaries" - command: shell.exec params: working_dir: "wiredtiger" diff --git a/src/third_party/wiredtiger/test/recovery/Makefile.am b/src/third_party/wiredtiger/test/recovery/Makefile.am index 19fc48dce47..3e7fce17d0e 100644 --- a/src/third_party/wiredtiger/test/recovery/Makefile.am +++ b/src/third_party/wiredtiger/test/recovery/Makefile.am @@ -14,8 +14,7 @@ truncated_log_LDADD +=$(top_builddir)/libwiredtiger.la truncated_log_LDFLAGS = -static # Run this during a "make check" smoke test. -TESTS = $(noinst_PROGRAMS) -LOG_COMPILER = $(TEST_WRAPPER) +TESTS = smoke.sh clean-local: rm -rf WT_TEST.* *.core diff --git a/src/third_party/wiredtiger/test/recovery/random-abort.c b/src/third_party/wiredtiger/test/recovery/random-abort.c index 85629eddec4..16065cec29e 100644 --- a/src/third_party/wiredtiger/test/recovery/random-abort.c +++ b/src/third_party/wiredtiger/test/recovery/random-abort.c @@ -91,7 +91,8 @@ thread_run(void *arg) if ((fp = fopen(buf, "w")) == NULL) testutil_die(errno, "fopen"); /* - * Set to no buffering. + * Set to line buffering. But that is advisory only. We've seen + * cases where the result files end up with partial lines. */ __wt_stream_set_line_buffer(fp); if ((ret = td->conn->open_session(td->conn, NULL, NULL, &session)) != 0) @@ -188,7 +189,7 @@ main(int argc, char *argv[]) WT_CURSOR *cursor; WT_SESSION *session; WT_RAND_STATE rnd; - uint64_t key; + uint64_t key, last_key; uint32_t absent, count, i, nth, timeout; int ch, status, ret; pid_t pid; @@ -317,12 +318,23 @@ main(int argc, char *argv[]) * in the table after recovery. Since we did write-no-sync, we * expect every key to have been recovered. */ - for (;; ++count) { + for (last_key = UINT64_MAX;; ++count, last_key = key) { ret = fscanf(fp, "%" SCNu64 "\n", &key); if (ret != EOF && ret != 1) testutil_die(errno, "fscanf"); if (ret == EOF) break; + /* + * If we're unlucky, the last line may be a partially + * written key at the end that can result in a false + * negative error for a missing record. Detect it. + */ + if (last_key != UINT64_MAX && key != last_key + 1) { + printf("%s: Ignore partial record %" PRIu64 + " last valid key %" PRIu64 "\n", + fname, key, last_key); + break; + } snprintf(kname, sizeof(kname), "%" PRIu64, key); cursor->set_key(cursor, kname); if ((ret = cursor->search(cursor)) != 0) { diff --git a/src/third_party/wiredtiger/test/recovery/smoke.sh b/src/third_party/wiredtiger/test/recovery/smoke.sh new file mode 100755 index 00000000000..c7677b64503 --- /dev/null +++ b/src/third_party/wiredtiger/test/recovery/smoke.sh @@ -0,0 +1,8 @@ +#! /bin/sh + +set -e + +# Smoke-test recovery as part of running "make check". + +$TEST_WRAPPER ./random-abort -t 10 -T 5 +$TEST_WRAPPER ./truncated-log diff --git a/src/third_party/wiredtiger/test/suite/helper.py b/src/third_party/wiredtiger/test/suite/helper.py index f85d708880f..9f34b566b3c 100644 --- a/src/third_party/wiredtiger/test/suite/helper.py +++ b/src/third_party/wiredtiger/test/suite/helper.py @@ -179,6 +179,49 @@ def simple_populate_check(self, uri, rows): simple_populate_check_cursor(self, cursor, rows) cursor.close() +# population of a simple object, with a single index +# uri: object +# config: prefix of the session.create configuration string (defaults +# to string value formats) +# rows: entries to insert +def simple_index_populate(self, uri, config, rows): + self.pr('simple_index_populate: ' + uri + ' with ' + str(rows) + ' rows') + self.session.create(uri, 'value_format=S,columns=(key0,value0),' + config) + indxname = 'index:' + uri.split(":")[1] + self.session.create(indxname + ':index1', 'columns=(value0,key0)') + cursor = self.session.open_cursor(uri, None) + for i in range(1, rows + 1): + cursor[key_populate(cursor, i)] = value_populate(cursor, i) + cursor.close() + +def simple_index_populate_check_cursor(self, cursor, rows): + i = 0 + for key,val in cursor: + i += 1 + self.assertEqual(key, key_populate(cursor, i)) + if cursor.value_format == '8t' and val == 0: # deleted + continue + self.assertEqual(val, value_populate(cursor, i)) + self.assertEqual(i, rows) + +def simple_index_populate_check(self, uri, rows): + self.pr('simple_index_populate_check: ' + uri) + + # Check values in the main table. + cursor = self.session.open_cursor(uri, None) + simple_index_populate_check_cursor(self, cursor, rows) + + # Check values in the index. + indxname = 'index:' + uri.split(":")[1] + idxcursor = self.session.open_cursor(indxname + ':index1') + for i in range(1, rows + 1): + k = key_populate(cursor, i) + v = value_populate(cursor, i) + ik = (v,k) # The index key is columns=(v,k). + self.assertEqual(v, idxcursor[ik]) + idxcursor.close() + cursor.close() + # Return the value stored in a complex object. def complex_value_populate(cursor, i): return [str(i) + ': abcdefghijklmnopqrstuvwxyz'[0:i%26], diff --git a/src/third_party/wiredtiger/test/suite/run.py b/src/third_party/wiredtiger/test/suite/run.py index 6e7421b8b96..c37093a2a55 100644 --- a/src/third_party/wiredtiger/test/suite/run.py +++ b/src/third_party/wiredtiger/test/suite/run.py @@ -87,6 +87,7 @@ Options:\n\ -j N | --parallel N run all tests in parallel using N processes\n\ -l | --long run the entire test suite\n\ -p | --preserve preserve output files in WT_TEST/\n\ + -s N | --scenario N use scenario N (N can be number or symbolic)\n\ -t | --timestamp name WT_TEST according to timestamp\n\ -v N | --verbose N set verboseness to N (0<=N<=3, default=1)\n\ \n\ @@ -95,15 +96,27 @@ Tests:\n\ may be a subsuite name (e.g. \'base\' runs test_base*.py)\n\ \n\ When -C or -c are present, there may not be any tests named.\n\ + When -s is present, there must be a test named.\n\ ' # capture the category (AKA 'subsuite') part of a test name, # e.g. test_util03 -> util reCatname = re.compile(r"test_([^0-9]+)[0-9]*") -def addScenarioTests(tests, loader, testname): +def restrictScenario(testcases, restrict): + if restrict == '': + return testcases + elif restrict.isdigit(): + s = int(restrict) + return [t for t in testcases + if hasattr(t, 'scenario_number') and t.scenario_number == s] + else: + return [t for t in testcases + if hasattr(t, 'scenario_name') and t.scenario_name == restrict] + +def addScenarioTests(tests, loader, testname, scenario): loaded = loader.loadTestsFromName(testname) - tests.addTests(generate_scenarios(loaded)) + tests.addTests(restrictScenario(generate_scenarios(loaded), scenario)) def configRecord(cmap, tup): """ @@ -195,20 +208,20 @@ def configApply(suites, configfilename, configwrite): json.dump(configmap, f, sort_keys=True, indent=4) return newsuite -def testsFromArg(tests, loader, arg): +def testsFromArg(tests, loader, arg, scenario): # If a group of test is mentioned, do all tests in that group # e.g. 'run.py base' groupedfiles = glob.glob(suitedir + os.sep + 'test_' + arg + '*.py') if len(groupedfiles) > 0: for file in groupedfiles: - testsFromArg(tests, loader, os.path.basename(file)) + testsFromArg(tests, loader, os.path.basename(file), scenario) return # Explicit test class names if not arg[0].isdigit(): if arg.endswith('.py'): arg = arg[:-3] - addScenarioTests(tests, loader, arg) + addScenarioTests(tests, loader, arg, scenario) return # Deal with ranges @@ -217,7 +230,7 @@ def testsFromArg(tests, loader, arg): else: start, end = int(arg), int(arg) for t in xrange(start, end+1): - addScenarioTests(tests, loader, 'test%03d' % t) + addScenarioTests(tests, loader, 'test%03d' % t, scenario) if __name__ == '__main__': tests = unittest.TestSuite() @@ -228,6 +241,7 @@ if __name__ == '__main__': configfile = None configwrite = False dirarg = None + scenario = '' verbose = 1 args = sys.argv[1:] testargs = [] @@ -265,6 +279,12 @@ if __name__ == '__main__': if option == '-preserve' or option == 'p': preserve = True continue + if option == '-scenario' or option == 's': + if scenario != '' or len(args) == 0: + usage() + sys.exit(2) + scenario = args.pop(0) + continue if option == '-timestamp' or option == 't': timestamp = True continue @@ -303,15 +323,20 @@ if __name__ == '__main__': # Without any tests listed as arguments, do discovery if len(testargs) == 0: + if scenario != '': + sys.stderr.write( + 'run.py: specifying a scenario requires a test name\n') + usage() + sys.exit(2) from discover import defaultTestLoader as loader suites = loader.discover(suitedir) suites = sorted(suites, key=lambda c: str(list(c)[0])) if configfile != None: suites = configApply(suites, configfile, configwrite) - tests.addTests(generate_scenarios(suites)) + tests.addTests(restrictScenario(generate_scenarios(suites), '')) else: for arg in testargs: - testsFromArg(tests, loader, arg) + testsFromArg(tests, loader, arg, scenario) if debug: import pdb diff --git a/src/third_party/wiredtiger/test/suite/test_async01.py b/src/third_party/wiredtiger/test/suite/test_async01.py index 71a18a68121..9322748c30f 100644 --- a/src/third_party/wiredtiger/test/suite/test_async01.py +++ b/src/third_party/wiredtiger/test/suite/test_async01.py @@ -29,7 +29,7 @@ import sys, threading, wiredtiger, wttest from suite_subprocess import suite_subprocess from wiredtiger import WiredTigerError -from wtscenario import check_scenarios +from wtscenario import make_scenarios # TODO - tmp code def tty_pr(s): @@ -122,7 +122,7 @@ class test_async01(wttest.WiredTigerTestCase, suite_subprocess): async_threads = 3 current = {} - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file-col', dict(tablekind='col',uri='file')), ('file-fix', dict(tablekind='fix',uri='file')), ('file-row', dict(tablekind='row',uri='file')), diff --git a/src/third_party/wiredtiger/test/suite/test_async02.py b/src/third_party/wiredtiger/test/suite/test_async02.py index 7aa1b85a2f3..bc6b389fc27 100644 --- a/src/third_party/wiredtiger/test/suite/test_async02.py +++ b/src/third_party/wiredtiger/test/suite/test_async02.py @@ -29,7 +29,7 @@ import sys, threading, wiredtiger, wttest from suite_subprocess import suite_subprocess from wiredtiger import WiredTigerError -from wtscenario import check_scenarios +from wtscenario import make_scenarios class Callback(wiredtiger.AsyncCallback): def __init__(self, current): @@ -119,7 +119,7 @@ class test_async02(wttest.WiredTigerTestCase, suite_subprocess): async_threads = 3 current = {} - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file-col', dict(tablekind='col',uri='file')), ('file-fix', dict(tablekind='fix',uri='file')), ('file-row', dict(tablekind='row',uri='file')), diff --git a/src/third_party/wiredtiger/test/suite/test_backup02.py b/src/third_party/wiredtiger/test/suite/test_backup02.py index 095bfbe404a..398d55abd7a 100644 --- a/src/third_party/wiredtiger/test/suite/test_backup02.py +++ b/src/third_party/wiredtiger/test/suite/test_backup02.py @@ -30,13 +30,13 @@ import Queue import threading, time, wiredtiger, wttest from helper import key_populate, simple_populate from wtthread import backup_thread, checkpoint_thread, op_thread -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_backup02.py # Run background checkpoints and backsups repeatedly while doing inserts # in another thread class test_backup02(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('table', dict(uri='table:test',fmt='L',dsize=100,nops=200,nthreads=1,time=30)), ]) diff --git a/src/third_party/wiredtiger/test/suite/test_backup03.py b/src/third_party/wiredtiger/test/suite/test_backup03.py index e810a2ec714..053009c6edb 100644 --- a/src/third_party/wiredtiger/test/suite/test_backup03.py +++ b/src/third_party/wiredtiger/test/suite/test_backup03.py @@ -28,7 +28,7 @@ import glob, os, shutil, string from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios import wiredtiger, wttest from helper import compare_files,\ complex_populate, complex_populate_lsm, simple_populate @@ -56,25 +56,25 @@ class test_backup_target(wttest.WiredTigerTestCase, suite_subprocess): ('table:' + pfx + '.4', complex_populate_lsm, 3), ] list = [ - ( '1', dict(big=0,list=[0])), # Target objects individually - ( '2', dict(big=1,list=[1])), - ( '3', dict(big=2,list=[2])), - ( '4', dict(big=3,list=[3])), - ('5a', dict(big=0,list=[0,2])), # Target groups of objects - ('5b', dict(big=2,list=[0,2])), - ('6a', dict(big=1,list=[1,3])), - ('6b', dict(big=3,list=[1,3])), - ('7a', dict(big=0,list=[0,1,2])), - ('7b', dict(big=1,list=[0,1,2])), - ('7c', dict(big=2,list=[0,1,2])), - ('8a', dict(big=0,list=[0,1,2,3])), - ('8b', dict(big=1,list=[0,1,2,3])), - ('8c', dict(big=2,list=[0,1,2,3])), - ('8d', dict(big=3,list=[0,1,2,3])), - ( '9', dict(big=3,list=[])), # Backup everything + ( 'backup_1', dict(big=0,list=[0])), # Target objects individually + ( 'backup_2', dict(big=1,list=[1])), + ( 'backup_3', dict(big=2,list=[2])), + ( 'backup_4', dict(big=3,list=[3])), + ('backup_5a', dict(big=0,list=[0,2])), # Target groups of objects + ('backup_5b', dict(big=2,list=[0,2])), + ('backup_6a', dict(big=1,list=[1,3])), + ('backup_6b', dict(big=3,list=[1,3])), + ('backup_7a', dict(big=0,list=[0,1,2])), + ('backup_7b', dict(big=1,list=[0,1,2])), + ('backup_7c', dict(big=2,list=[0,1,2])), + ('backup_8a', dict(big=0,list=[0,1,2,3])), + ('backup_8b', dict(big=1,list=[0,1,2,3])), + ('backup_8c', dict(big=2,list=[0,1,2,3])), + ('backup_8d', dict(big=3,list=[0,1,2,3])), + ('backup_9', dict(big=3,list=[])), # Backup everything ] - scenarios = number_scenarios(multiply_scenarios('.', list)) + scenarios = make_scenarios(list) # Create a large cache, otherwise this test runs quite slowly. conn_config = 'cache_size=1G' diff --git a/src/third_party/wiredtiger/test/suite/test_backup04.py b/src/third_party/wiredtiger/test/suite/test_backup04.py index 852a22c1e0c..866e673dccb 100644 --- a/src/third_party/wiredtiger/test/suite/test_backup04.py +++ b/src/third_party/wiredtiger/test/suite/test_backup04.py @@ -30,7 +30,7 @@ import Queue import threading, time, wiredtiger, wttest import glob, os, shutil from suite_subprocess import suite_subprocess -from wtscenario import check_scenarios +from wtscenario import make_scenarios from wtthread import op_thread from helper import compare_files, key_populate @@ -54,7 +54,7 @@ class test_backup_target(wttest.WiredTigerTestCase, suite_subprocess): # and that is not what we want here. # pfx = 'test_backup' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('table', dict(uri='table:test',dsize=100,nops=2000,nthreads=1,time=30)), ]) diff --git a/src/third_party/wiredtiger/test/suite/test_backup05.py b/src/third_party/wiredtiger/test/suite/test_backup05.py index fbe219d8de8..131732e9a89 100644 --- a/src/third_party/wiredtiger/test/suite/test_backup05.py +++ b/src/third_party/wiredtiger/test/suite/test_backup05.py @@ -35,7 +35,6 @@ import fnmatch, os, shutil, time from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios from helper import copy_wiredtiger_home import wiredtiger, wttest diff --git a/src/third_party/wiredtiger/test/suite/test_base02.py b/src/third_party/wiredtiger/test/suite/test_base02.py index 70117573241..2b51fe1b530 100644 --- a/src/third_party/wiredtiger/test/suite/test_base02.py +++ b/src/third_party/wiredtiger/test/suite/test_base02.py @@ -32,14 +32,14 @@ import json import wiredtiger, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios # Test configuration strings. class test_base02(wttest.WiredTigerTestCase): name = 'test_base02a' extra_config = '' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:')), ('table', dict(uri='table:')), ('lsm', dict(uri='lsm:')), diff --git a/src/third_party/wiredtiger/test/suite/test_base05.py b/src/third_party/wiredtiger/test/suite/test_base05.py index f191f23561f..4bee0efcfe2 100644 --- a/src/third_party/wiredtiger/test/suite/test_base05.py +++ b/src/third_party/wiredtiger/test/suite/test_base05.py @@ -27,7 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_base05.py # Cursor operations @@ -40,7 +40,7 @@ class test_base05(wttest.WiredTigerTestCase): table_name1 = 'test_base05a' table_name2 = 'test_base05b' nentries = 1000 - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('no_huffman', dict(extraconfig='')), ('huffman_key', dict(extraconfig='huffman_key="english"')), ('huffman_val', dict(extraconfig='huffman_value="english"')), diff --git a/src/third_party/wiredtiger/test/suite/test_bug003.py b/src/third_party/wiredtiger/test/suite/test_bug003.py index 739279a0141..28d71a534e2 100644 --- a/src/third_party/wiredtiger/test/suite/test_bug003.py +++ b/src/third_party/wiredtiger/test/suite/test_bug003.py @@ -30,7 +30,7 @@ # Regression tests. import wiredtiger, wttest -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Regression tests. class test_bug003(wttest.WiredTigerTestCase): @@ -43,7 +43,7 @@ class test_bug003(wttest.WiredTigerTestCase): ('yes', dict(name=1)), ] - scenarios = number_scenarios(multiply_scenarios('.', types, ckpt)) + scenarios = make_scenarios(types, ckpt) # Confirm bulk-load isn't stopped by checkpoints. def test_bug003(self): diff --git a/src/third_party/wiredtiger/test/suite/test_bug006.py b/src/third_party/wiredtiger/test/suite/test_bug006.py index e522cdf96f7..314ba57038f 100644 --- a/src/third_party/wiredtiger/test/suite/test_bug006.py +++ b/src/third_party/wiredtiger/test/suite/test_bug006.py @@ -31,13 +31,13 @@ import wiredtiger, wttest from helper import key_populate, value_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # Check that verify and salvage both raise exceptions if there is an open # cursor. class test_bug006(wttest.WiredTigerTestCase): name = 'test_bug006' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:')), ('table', dict(uri='table:')), ]) diff --git a/src/third_party/wiredtiger/test/suite/test_bug008.py b/src/third_party/wiredtiger/test/suite/test_bug008.py index 0243887e258..c4fa411f55e 100644 --- a/src/third_party/wiredtiger/test/suite/test_bug008.py +++ b/src/third_party/wiredtiger/test/suite/test_bug008.py @@ -31,13 +31,13 @@ import wiredtiger, wttest from helper import simple_populate, key_populate, value_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # Test search/search-near operations, including invisible values and keys # past the end of the table. class test_bug008(wttest.WiredTigerTestCase): uri = 'file:test_bug008' # This is a btree layer test. - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('fix', dict(fmt='key_format=r,value_format=8t', empty=1, colvar=0)), ('row', dict(fmt='key_format=S', empty=0, colvar=0)), ('var', dict(fmt='key_format=r', empty=0, colvar=1)) diff --git a/src/third_party/wiredtiger/test/suite/test_bug009.py b/src/third_party/wiredtiger/test/suite/test_bug009.py index 4d10e4391d9..2bdfb7dec52 100644 --- a/src/third_party/wiredtiger/test/suite/test_bug009.py +++ b/src/third_party/wiredtiger/test/suite/test_bug009.py @@ -33,7 +33,6 @@ import wiredtiger, wttest from wiredtiger import stat -from wtscenario import multiply_scenarios, number_scenarios class test_bug009(wttest.WiredTigerTestCase): name = 'test_bug009' diff --git a/src/third_party/wiredtiger/test/suite/test_bug011.py b/src/third_party/wiredtiger/test/suite/test_bug011.py index 50dba1c48be..fceb7a22ddb 100644 --- a/src/third_party/wiredtiger/test/suite/test_bug011.py +++ b/src/third_party/wiredtiger/test/suite/test_bug011.py @@ -42,7 +42,7 @@ class test_bug011(wttest.WiredTigerTestCase): nops = 10000 # Add connection configuration for this test. def conn_config(self, dir): - return 'cache_size=10MB,hazard_max=' + str(self.ntables / 2) + return 'cache_size=10MB,eviction_dirty_target=99,eviction_dirty_trigger=99,hazard_max=' + str(self.ntables / 2) def test_eviction(self): cursors = [] diff --git a/src/third_party/wiredtiger/test/suite/test_bug016.py b/src/third_party/wiredtiger/test/suite/test_bug016.py new file mode 100644 index 00000000000..f7cb3c32559 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_bug016.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest + +# test_bug016.py +# WT-2757: WT_CURSOR.get_key() fails after WT_CURSOR.insert unless the +# cursor has a record number key with append configured. +class test_bug016(wttest.WiredTigerTestCase): + + # Insert a row into a simple column-store table configured to append. + # WT_CURSOR.get_key should succeed. + def test_simple_column_store_append(self): + uri='file:bug016' + self.session.create(uri, 'key_format=r,value_format=S') + cursor = self.session.open_cursor(uri, None, 'append') + cursor.set_value('value') + cursor.insert() + self.assertEquals(cursor.get_key(), 1) + + # Insert a row into a simple column-store table. + # WT_CURSOR.get_key should fail. + def test_simple_column_store(self): + uri='file:bug016' + self.session.create(uri, 'key_format=r,value_format=S') + cursor = self.session.open_cursor(uri, None) + cursor.set_key(37) + cursor.set_value('value') + cursor.insert() + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: cursor.get_key(), "/requires key be set/") + + # Insert a row into a simple row-store table. + # WT_CURSOR.get_key should fail. + def test_simple_row_store(self): + uri='file:bug016' + self.session.create(uri, 'key_format=S,value_format=S') + cursor = self.session.open_cursor(uri, None) + cursor.set_key('key') + cursor.set_value('value') + cursor.insert() + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: cursor.get_key(), "/requires key be set/") + + # Insert a row into a complex column-store table configured to append. + # WT_CURSOR.get_key should succeed. + def test_complex_column_store_append(self): + uri='table:bug016' + self.session.create( + uri, 'key_format=r,value_format=S,columns=(key,value)') + cursor = self.session.open_cursor(uri, None, 'append') + cursor.set_value('value') + cursor.insert() + self.assertEquals(cursor.get_key(), 1) + + # Insert a row into a complex column-store table. + # WT_CURSOR.get_key should fail. + def test_complex_column_store(self): + uri='table:bug016' + self.session.create( + uri, 'key_format=r,value_format=S,columns=(key,value)') + cursor = self.session.open_cursor(uri, None) + cursor.set_key(37) + cursor.set_value('value') + cursor.insert() + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: cursor.get_key(), "/requires key be set/") + + # Insert a row into a complex row-store table. + # WT_CURSOR.get_key should fail. + def test_complex_row_store(self): + uri='table:bug016' + self.session.create( + uri, 'key_format=S,value_format=S,columns=(key,value)') + cursor = self.session.open_cursor(uri, None) + cursor.set_key('key') + cursor.set_value('value') + cursor.insert() + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: cursor.get_key(), "/requires key be set/") + + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_bulk01.py b/src/third_party/wiredtiger/test/suite/test_bulk01.py index 1add11af26b..5bacfafaa20 100644 --- a/src/third_party/wiredtiger/test/suite/test_bulk01.py +++ b/src/third_party/wiredtiger/test/suite/test_bulk01.py @@ -32,7 +32,7 @@ import wiredtiger, wttest from helper import key_populate, value_populate -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Smoke test bulk-load. class test_bulk_load(wttest.WiredTigerTestCase): @@ -52,7 +52,7 @@ class test_bulk_load(wttest.WiredTigerTestCase): ('integer', dict(valfmt='i')), ('string', dict(valfmt='S')), ] - scenarios = number_scenarios(multiply_scenarios('.', types, keyfmt, valfmt)) + scenarios = make_scenarios(types, keyfmt, valfmt) # Test a simple bulk-load def test_bulk_load(self): diff --git a/src/third_party/wiredtiger/test/suite/test_bulk02.py b/src/third_party/wiredtiger/test/suite/test_bulk02.py index fe8118209f2..af0b6d4485d 100644 --- a/src/third_party/wiredtiger/test/suite/test_bulk02.py +++ b/src/third_party/wiredtiger/test/suite/test_bulk02.py @@ -32,7 +32,7 @@ import shutil, os from helper import confirm_empty, key_populate, value_populate from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios import wiredtiger, wttest # test_bulkload_checkpoint @@ -47,7 +47,7 @@ class test_bulkload_checkpoint(wttest.WiredTigerTestCase, suite_subprocess): ('unnamed', dict(ckpt_type='unnamed')), ] - scenarios = number_scenarios(multiply_scenarios('.', types, ckpt_type)) + scenarios = make_scenarios(types, ckpt_type) # Bulk-load handles are skipped by checkpoints. # Named and unnamed checkpoint versions. @@ -90,8 +90,7 @@ class test_bulkload_backup(wttest.WiredTigerTestCase, suite_subprocess): ('different', dict(session_type='different')), ('same', dict(session_type='same')), ] - scenarios = number_scenarios( - multiply_scenarios('.', types, ckpt_type, session_type)) + scenarios = make_scenarios(types, ckpt_type, session_type) # Backup a set of chosen tables/files using the wt backup command. # The only files are bulk-load files, so they shouldn't be copied. diff --git a/src/third_party/wiredtiger/test/suite/test_checkpoint01.py b/src/third_party/wiredtiger/test/suite/test_checkpoint01.py index 6e1ad7814ed..78754dc82fa 100644 --- a/src/third_party/wiredtiger/test/suite/test_checkpoint01.py +++ b/src/third_party/wiredtiger/test/suite/test_checkpoint01.py @@ -28,7 +28,7 @@ import wiredtiger, wttest from helper import key_populate, complex_populate_lsm, simple_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_checkpoint01.py # Checkpoint tests @@ -36,7 +36,7 @@ from wtscenario import check_scenarios # with a set of checkpoints, then confirm the checkpoint's values are correct, # including after other checkpoints are dropped. class test_checkpoint(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:checkpoint',fmt='S')), ('table', dict(uri='table:checkpoint',fmt='S')) ]) @@ -139,7 +139,7 @@ class test_checkpoint(wttest.WiredTigerTestCase): # Check some specific cursor checkpoint combinations. class test_checkpoint_cursor(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:checkpoint',fmt='S')), ('table', dict(uri='table:checkpoint',fmt='S')) ]) @@ -205,7 +205,7 @@ class test_checkpoint_cursor(wttest.WiredTigerTestCase): # Check that you can checkpoint targets. class test_checkpoint_target(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:checkpoint',fmt='S')), ('table', dict(uri='table:checkpoint',fmt='S')) ]) @@ -252,7 +252,7 @@ class test_checkpoint_target(wttest.WiredTigerTestCase): # Check that you can't write checkpoint cursors. class test_checkpoint_cursor_update(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file-r', dict(uri='file:checkpoint',fmt='r')), ('file-S', dict(uri='file:checkpoint',fmt='S')), ('table-r', dict(uri='table:checkpoint',fmt='r')), @@ -277,7 +277,7 @@ class test_checkpoint_cursor_update(wttest.WiredTigerTestCase): # Check that WiredTigerCheckpoint works as a checkpoint specifier. class test_checkpoint_last(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:checkpoint',fmt='S')), ('table', dict(uri='table:checkpoint',fmt='S')) ]) @@ -343,7 +343,7 @@ class test_checkpoint_lsm_name(wttest.WiredTigerTestCase): class test_checkpoint_empty(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:checkpoint')), ('table', dict(uri='table:checkpoint')), ]) diff --git a/src/third_party/wiredtiger/test/suite/test_checkpoint02.py b/src/third_party/wiredtiger/test/suite/test_checkpoint02.py index 71c8792359c..ac57499a9e4 100644 --- a/src/third_party/wiredtiger/test/suite/test_checkpoint02.py +++ b/src/third_party/wiredtiger/test/suite/test_checkpoint02.py @@ -30,13 +30,13 @@ import Queue import threading, time, wiredtiger, wttest from helper import key_populate, simple_populate from wtthread import checkpoint_thread, op_thread -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_checkpoint02.py # Run background checkpoints repeatedly while doing inserts and other # operations in another thread class test_checkpoint02(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('table-100', dict(uri='table:test',fmt='L',dsize=100,nops=50000,nthreads=10)), ('table-10', dict(uri='table:test',fmt='L',dsize=10,nops=50000,nthreads=30)) ]) diff --git a/src/third_party/wiredtiger/test/suite/test_colgap.py b/src/third_party/wiredtiger/test/suite/test_colgap.py index 46682c23167..5cc363dbd4a 100644 --- a/src/third_party/wiredtiger/test/suite/test_colgap.py +++ b/src/third_party/wiredtiger/test/suite/test_colgap.py @@ -28,7 +28,7 @@ import wiredtiger, wttest from helper import simple_populate, key_populate, value_populate -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_colgap.py # Test variable-length column-store gap performance. @@ -149,8 +149,8 @@ class test_colmax(wttest.WiredTigerTestCase): ('not-single', dict(single=0)), ] - scenarios = number_scenarios(multiply_scenarios(\ - '.', types, valfmt, record_number, bulk, reopen, single)) + scenarios = make_scenarios(\ + types, valfmt, record_number, bulk, reopen, single) # Test that variable-length column-store correctly/efficiently handles big # records (if it's not efficient, we'll just hang). diff --git a/src/third_party/wiredtiger/test/suite/test_collator.py b/src/third_party/wiredtiger/test/suite/test_collator.py index 34b5c20247f..a8103fb3671 100644 --- a/src/third_party/wiredtiger/test/suite/test_collator.py +++ b/src/third_party/wiredtiger/test/suite/test_collator.py @@ -28,7 +28,6 @@ import os import wiredtiger, wttest, run -from wtscenario import check_scenarios, number_scenarios # test_collator.py # Test indices using a custom extractor and collator. diff --git a/src/third_party/wiredtiger/test/suite/test_compact01.py b/src/third_party/wiredtiger/test/suite/test_compact01.py index 3af550708ed..183d75f9d31 100644 --- a/src/third_party/wiredtiger/test/suite/test_compact01.py +++ b/src/third_party/wiredtiger/test/suite/test_compact01.py @@ -30,7 +30,7 @@ import wiredtiger, wttest from helper import complex_populate, simple_populate, key_populate from suite_subprocess import suite_subprocess from wiredtiger import stat -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_compact.py # session level compact operation @@ -53,7 +53,7 @@ class test_compact(wttest.WiredTigerTestCase, suite_subprocess): ('method_reopen', dict(utility=0,reopen=1)), ('utility', dict(utility=1,reopen=0)), ] - scenarios = number_scenarios(multiply_scenarios('.', types, compact)) + scenarios = make_scenarios(types, compact) # We want a large cache so that eviction doesn't happen # (which could skew our compaction results). conn_config = 'cache_size=250MB,statistics=(all)' diff --git a/src/third_party/wiredtiger/test/suite/test_compact02.py b/src/third_party/wiredtiger/test/suite/test_compact02.py index 7ad05cd2536..eb21817bd90 100644 --- a/src/third_party/wiredtiger/test/suite/test_compact02.py +++ b/src/third_party/wiredtiger/test/suite/test_compact02.py @@ -32,7 +32,7 @@ import wiredtiger, wttest from wiredtiger import stat -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Test basic compression class test_compact02(wttest.WiredTigerTestCase): @@ -57,8 +57,7 @@ class test_compact02(wttest.WiredTigerTestCase): ('64KB', dict(fileConfig='leaf_page_max=64KB')), ('128KB', dict(fileConfig='leaf_page_max=128KB')), ] - scenarios = \ - number_scenarios(multiply_scenarios('.', types, cacheSize, fileConfig)) + scenarios = make_scenarios(types, cacheSize, fileConfig) # We want about 22K records that total about 130Mb. That is an average # of 6196 bytes per record. Half the records should be smaller, about @@ -97,7 +96,7 @@ class test_compact02(wttest.WiredTigerTestCase): self.home = '.' conn_params = 'create,' + \ cacheSize + ',error_prefix="%s: ",' % self.shortid() + \ - 'statistics=(fast)' + 'statistics=(fast),eviction_dirty_target=99,eviction_dirty_trigger=99' try: self.conn = wiredtiger.wiredtiger_open(self.home, conn_params) except wiredtiger.WiredTigerError as e: diff --git a/src/third_party/wiredtiger/test/suite/test_compress01.py b/src/third_party/wiredtiger/test/suite/test_compress01.py index 94c748fc3e5..2a7e2a7e1a8 100644 --- a/src/third_party/wiredtiger/test/suite/test_compress01.py +++ b/src/third_party/wiredtiger/test/suite/test_compress01.py @@ -32,7 +32,7 @@ import os, run import wiredtiger, wttest -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Test basic compression class test_compress01(wttest.WiredTigerTestCase): @@ -46,7 +46,7 @@ class test_compress01(wttest.WiredTigerTestCase): ('snappy', dict(compress='snappy')), ('none', dict(compress=None)), ] - scenarios = number_scenarios(multiply_scenarios('.', types, compress)) + scenarios = make_scenarios(types, compress) nrecords = 10000 bigvalue = "abcdefghij" * 1000 diff --git a/src/third_party/wiredtiger/test/suite/test_config03.py b/src/third_party/wiredtiger/test/suite/test_config03.py index e91c5de62f8..88ca6ae3f39 100644 --- a/src/third_party/wiredtiger/test/suite/test_config03.py +++ b/src/third_party/wiredtiger/test/suite/test_config03.py @@ -69,14 +69,11 @@ class test_config03(test_base03.test_base03): 'eviction_trigger', 'hazard_max', 'multiprocess', 'session_max', 'verbose' ] - all_scenarios = wtscenario.multiply_scenarios('_', + scenarios = wtscenario.make_scenarios( cache_size_scenarios, create_scenarios, error_prefix_scenarios, eviction_target_scenarios, eviction_trigger_scenarios, hazard_max_scenarios, multiprocess_scenarios, session_max_scenarios, - transactional_scenarios, verbose_scenarios) - - scenarios = wtscenario.prune_scenarios(all_scenarios, 1000) - scenarios = wtscenario.number_scenarios(scenarios) + transactional_scenarios, verbose_scenarios, prune=1000) #wttest.WiredTigerTestCase.printVerbose(2, 'test_config03: running ' + \ # str(len(scenarios)) + ' of ' + \ diff --git a/src/third_party/wiredtiger/test/suite/test_cursor01.py b/src/third_party/wiredtiger/test/suite/test_cursor01.py index cf39d4a4ba4..8c66042eec0 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor01.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor01.py @@ -27,7 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_cursor01.py # Cursor operations @@ -41,7 +41,7 @@ class test_cursor01(wttest.WiredTigerTestCase): table_name1 = 'test_cursor01' nentries = 10 - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file-col', dict(tablekind='col',uri='file')), ('file-fix', dict(tablekind='fix',uri='file')), ('file-row', dict(tablekind='row',uri='file')), diff --git a/src/third_party/wiredtiger/test/suite/test_cursor02.py b/src/third_party/wiredtiger/test/suite/test_cursor02.py index eb1ba4dfc41..a83d30def47 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor02.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor02.py @@ -28,7 +28,7 @@ import wiredtiger from test_cursor_tracker import TestCursorTracker -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_cursor02.py # Cursor operations on small tables. @@ -39,7 +39,7 @@ class test_cursor02(TestCursorTracker): key/value content and to track/verify content after inserts and removes. """ - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('row', dict(tablekind='row', uri='table')), ('lsm-row', dict(tablekind='row', uri='lsm')), ('col', dict(tablekind='col', uri='table')), diff --git a/src/third_party/wiredtiger/test/suite/test_cursor03.py b/src/third_party/wiredtiger/test/suite/test_cursor03.py index 63237f942ca..b4598483c12 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor03.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor03.py @@ -28,7 +28,7 @@ import wiredtiger from test_cursor_tracker import TestCursorTracker -from wtscenario import multiply_scenarios +from wtscenario import make_scenarios # test_cursor03.py # Cursor operations on tables of various sizes, with key/values of various @@ -40,7 +40,7 @@ class test_cursor03(TestCursorTracker): key/value content and to track/verify content after inserts and removes. """ - scenarios = multiply_scenarios('.', [ + scenarios = make_scenarios([ ('row', dict(tablekind='row', keysize=None, valsize=None, uri='table')), ('lsm-row', dict(tablekind='row', keysize=None, valsize=None, uri='lsm')), ('col', dict(tablekind='col', keysize=None, valsize=None, uri='table')), diff --git a/src/third_party/wiredtiger/test/suite/test_cursor04.py b/src/third_party/wiredtiger/test/suite/test_cursor04.py index 6576c623f8a..8cbf922b5eb 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor04.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor04.py @@ -27,7 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_base04.py # Cursor operations @@ -38,7 +38,7 @@ class test_cursor04(wttest.WiredTigerTestCase): table_name1 = 'test_cursor04' nentries = 20 - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('row', dict(tablekind='row', uri='table')), ('lsm-row', dict(tablekind='row', uri='lsm')), ('col', dict(tablekind='col', uri='table')), diff --git a/src/third_party/wiredtiger/test/suite/test_cursor06.py b/src/third_party/wiredtiger/test/suite/test_cursor06.py index 5545c862dd7..3a6240bc6c7 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor06.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor06.py @@ -29,13 +29,13 @@ import wiredtiger, wttest from helper import key_populate, value_populate, simple_populate from helper import complex_value_populate, complex_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_cursor06.py # Test cursor reconfiguration. class test_cursor06(wttest.WiredTigerTestCase): name = 'reconfigure' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file-r', dict(type='file:', config='key_format=r', complex=0)), ('file-S', dict(type='file:', config='key_format=S', complex=0)), ('lsm-S', dict(type='lsm:', config='key_format=S', complex=0)), diff --git a/src/third_party/wiredtiger/test/suite/test_cursor07.py b/src/third_party/wiredtiger/test/suite/test_cursor07.py index d8de0874d7f..d6078183fc1 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor07.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor07.py @@ -33,7 +33,7 @@ import fnmatch, os, shutil, run, time from suite_subprocess import suite_subprocess from wiredtiger import stat -from wtscenario import check_scenarios +from wtscenario import make_scenarios import wttest class test_cursor07(wttest.WiredTigerTestCase, suite_subprocess): @@ -44,7 +44,7 @@ class test_cursor07(wttest.WiredTigerTestCase, suite_subprocess): # test that scenario for log cursors. nkeys = 7000 - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('regular', dict(reopen=False)), ('reopen', dict(reopen=True)) ]) diff --git a/src/third_party/wiredtiger/test/suite/test_cursor08.py b/src/third_party/wiredtiger/test/suite/test_cursor08.py index 1a379518224..3f8f50defa7 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor08.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor08.py @@ -33,7 +33,7 @@ import fnmatch, os, shutil, run, time from suite_subprocess import suite_subprocess from wiredtiger import stat, WiredTigerError -from wtscenario import multiply_scenarios, number_scenarios, check_scenarios +from wtscenario import make_scenarios import wttest class test_cursor08(wttest.WiredTigerTestCase, suite_subprocess): @@ -42,17 +42,17 @@ class test_cursor08(wttest.WiredTigerTestCase, suite_subprocess): uri = 'table:' + tablename nkeys = 500 - reopens = check_scenarios([ + reopens = [ ('regular', dict(reopen=False)), ('reopen', dict(reopen=True)) - ]) - compress = check_scenarios([ + ] + compress = [ ('nop', dict(compress='nop')), ('snappy', dict(compress='snappy')), ('zlib', dict(compress='zlib')), ('none', dict(compress='none')), - ]) - scenarios = number_scenarios(multiply_scenarios('.', reopens, compress)) + ] + scenarios = make_scenarios(reopens, compress) # Load the compression extension, and enable it for logging. def conn_config(self, dir): return 'log=(archive=false,enabled,file_max=%s,' % self.logmax + \ diff --git a/src/third_party/wiredtiger/test/suite/test_cursor09.py b/src/third_party/wiredtiger/test/suite/test_cursor09.py index b77336bc1d7..a05caea4f1f 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor09.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor09.py @@ -29,12 +29,12 @@ import wiredtiger, wttest from helper import key_populate, value_populate, simple_populate from helper import complex_populate, complex_value_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_cursor09.py # JIRA WT-2217: insert resets key/value "set". class test_cursor09(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file-r', dict(type='file:', config='key_format=r', complex=0)), ('file-S', dict(type='file:', config='key_format=S', complex=0)), ('lsm-S', dict(type='lsm:', config='key_format=S', complex=0)), diff --git a/src/third_party/wiredtiger/test/suite/test_cursor_compare.py b/src/third_party/wiredtiger/test/suite/test_cursor_compare.py index 130f4e8ca96..179e20682d2 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor_compare.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor_compare.py @@ -29,7 +29,7 @@ import wiredtiger, wttest, exceptions from helper import complex_populate, simple_populate, key_populate from helper import complex_populate_index_name -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Test cursor comparisons. class test_cursor_comparison(wttest.WiredTigerTestCase): @@ -45,7 +45,7 @@ class test_cursor_comparison(wttest.WiredTigerTestCase): ('recno', dict(keyfmt='r')), ('string', dict(keyfmt='S')) ] - scenarios = number_scenarios(multiply_scenarios('.', types, keyfmt)) + scenarios = make_scenarios(types, keyfmt) def test_cursor_comparison(self): uri = self.type + 'compare' diff --git a/src/third_party/wiredtiger/test/suite/test_cursor_pin.py b/src/third_party/wiredtiger/test/suite/test_cursor_pin.py index 329759d8fc8..1aea49c32b0 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor_pin.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor_pin.py @@ -28,7 +28,7 @@ import wiredtiger, wttest from helper import simple_populate, key_populate, value_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_cursor_pin.py # Smoke-test fast-path searching for pinned pages before re-descending @@ -37,7 +37,7 @@ class test_cursor_pin(wttest.WiredTigerTestCase): uri = 'file:cursor_pin' nentries = 10000 config = 'allocation_size=512,leaf_page_max=512,value_format=S,key_format=' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('recno', dict(keyfmt='r')), ('string', dict(keyfmt='S')), ]) diff --git a/src/third_party/wiredtiger/test/suite/test_cursor_random.py b/src/third_party/wiredtiger/test/suite/test_cursor_random.py index 16ce5cae685..8d7c230043b 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor_random.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor_random.py @@ -29,7 +29,7 @@ import wiredtiger, wttest from helper import complex_populate, simple_populate from helper import key_populate, value_populate -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_cursor_random.py # Cursor next_random operations @@ -42,7 +42,7 @@ class test_cursor_random(wttest.WiredTigerTestCase): ('sample', dict(config='next_random=true,next_random_sample_size=35')), ('not-sample', dict(config='next_random=true')) ] - scenarios =number_scenarios(multiply_scenarios('.', types, config)) + scenarios = make_scenarios(types, config) # Check that opening a random cursor on a row-store returns not-supported # for methods other than next, reconfigure and reset, and next returns @@ -136,7 +136,7 @@ class test_cursor_random(wttest.WiredTigerTestCase): # Check that opening a random cursor on column-store returns not-supported. class test_cursor_random_column(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:random')), ('table', dict(uri='table:random')) ]) @@ -159,7 +159,7 @@ class test_cursor_random_invisible(wttest.WiredTigerTestCase): ('sample', dict(config='next_random=true,next_random_sample_size=35')), ('not-sample', dict(config='next_random=true')) ] - scenarios =number_scenarios(multiply_scenarios('.', types, config)) + scenarios = make_scenarios(types, config) def test_cursor_random_invisible_all(self): uri = self.type diff --git a/src/third_party/wiredtiger/test/suite/test_cursor_random02.py b/src/third_party/wiredtiger/test/suite/test_cursor_random02.py index 84ac0279fc4..93aa97f2282 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor_random02.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor_random02.py @@ -29,7 +29,7 @@ import wiredtiger, wttest from helper import complex_populate, simple_populate from helper import key_populate, value_populate -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_cursor_random02.py # Cursor next_random operations @@ -46,7 +46,7 @@ class test_cursor_random02(wttest.WiredTigerTestCase): ('10000', dict(records=10000)), ('50000', dict(records=50000)), ] - scenarios = number_scenarios(multiply_scenarios('.', config, records)) + scenarios = make_scenarios(config, records) # Check that next_random works in the presence of a larger set of values, # where the values are in an insert list. diff --git a/src/third_party/wiredtiger/test/suite/test_drop.py b/src/third_party/wiredtiger/test/suite/test_drop.py index 52ea7251ab5..a3e80214295 100644 --- a/src/third_party/wiredtiger/test/suite/test_drop.py +++ b/src/third_party/wiredtiger/test/suite/test_drop.py @@ -30,7 +30,7 @@ import os, time import wiredtiger, wttest from helper import confirm_does_not_exist, complex_populate, \ complex_populate_index_name, simple_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_drop.py # session level drop operation @@ -38,7 +38,7 @@ class test_drop(wttest.WiredTigerTestCase): name = 'test_drop' extra_config = '' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:')), ('table', dict(uri='table:')), ('table-lsm', dict(uri='table:', extra_config=',type=lsm')), diff --git a/src/third_party/wiredtiger/test/suite/test_dump.py b/src/third_party/wiredtiger/test/suite/test_dump.py index 85196174c1b..280d5870359 100644 --- a/src/third_party/wiredtiger/test/suite/test_dump.py +++ b/src/third_party/wiredtiger/test/suite/test_dump.py @@ -30,9 +30,10 @@ import os, shutil import wiredtiger, wttest from helper import \ complex_populate, complex_populate_check, \ - simple_populate, simple_populate_check + simple_populate, simple_populate_check, \ + simple_index_populate, simple_index_populate_check from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_dump.py # Utilities: wt dump @@ -64,6 +65,9 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess): ('table-simple', dict(uri='table:', config='', lsm=False, populate=simple_populate, populate_check=simple_populate_check)), + ('table-index', dict(uri='table:', config='', lsm=False, + populate=simple_index_populate, + populate_check=simple_index_populate_check)), ('table-simple-lsm', dict(uri='table:', config='type=lsm', lsm=True, populate=simple_populate, populate_check=simple_populate_check)), @@ -74,8 +78,7 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess): populate=complex_populate, populate_check=complex_populate_check)) ] - scenarios = number_scenarios( - multiply_scenarios('.', types, keyfmt, dumpfmt)) + scenarios = make_scenarios(types, keyfmt, dumpfmt) # Extract the values lines from the dump output. def value_lines(self, fname): diff --git a/src/third_party/wiredtiger/test/suite/test_dupc.py b/src/third_party/wiredtiger/test/suite/test_dupc.py index ec55a36df4c..12b18f1ba79 100644 --- a/src/third_party/wiredtiger/test/suite/test_dupc.py +++ b/src/third_party/wiredtiger/test/suite/test_dupc.py @@ -33,7 +33,7 @@ import os, time import wiredtiger, wttest from helper import complex_populate, key_populate, simple_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # Test session.open_cursor with cursor duplication. class test_duplicate_cursor(wttest.WiredTigerTestCase): @@ -42,7 +42,7 @@ class test_duplicate_cursor(wttest.WiredTigerTestCase): config = 'key_format=' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file-r', dict(uri='file:', fmt='r')), ('file-S', dict(uri='file:', fmt='S')), ('table-r', dict(uri='table:', fmt='r')), diff --git a/src/third_party/wiredtiger/test/suite/test_durability01.py b/src/third_party/wiredtiger/test/suite/test_durability01.py index f578a79baf1..32cdd795914 100644 --- a/src/third_party/wiredtiger/test/suite/test_durability01.py +++ b/src/third_party/wiredtiger/test/suite/test_durability01.py @@ -34,7 +34,6 @@ import fnmatch, os, shutil, time from helper import copy_wiredtiger_home from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios import wttest class test_durability01(wttest.WiredTigerTestCase, suite_subprocess): diff --git a/src/third_party/wiredtiger/test/suite/test_empty.py b/src/third_party/wiredtiger/test/suite/test_empty.py index 50b79db70e4..9fe88107412 100644 --- a/src/third_party/wiredtiger/test/suite/test_empty.py +++ b/src/third_party/wiredtiger/test/suite/test_empty.py @@ -29,14 +29,14 @@ import os import wiredtiger, wttest from helper import key_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_empty.py # Test that empty objects don't write anything other than a single sector. class test_empty(wttest.WiredTigerTestCase): name = 'test_empty' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file-r', dict(type='file:', fmt='r')), ('file-S', dict(type='file:', fmt='S')), ('table-r', dict(type='table:', fmt='r')), diff --git a/src/third_party/wiredtiger/test/suite/test_encrypt01.py b/src/third_party/wiredtiger/test/suite/test_encrypt01.py index 0f2782204d2..d48605aaa83 100644 --- a/src/third_party/wiredtiger/test/suite/test_encrypt01.py +++ b/src/third_party/wiredtiger/test/suite/test_encrypt01.py @@ -32,7 +32,7 @@ import os, run, random import wiredtiger, wttest -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Test basic encryption class test_encrypt01(wttest.WiredTigerTestCase): @@ -60,8 +60,7 @@ class test_encrypt01(wttest.WiredTigerTestCase): ('none-snappy', dict(log_compress=None, block_compress='snappy')), ('snappy-lz4', dict(log_compress='snappy', block_compress='lz4')), ] - scenarios = number_scenarios(multiply_scenarios('.', types, - encrypt, compress)) + scenarios = make_scenarios(types, encrypt, compress) nrecords = 5000 bigvalue = "abcdefghij" * 1001 # len(bigvalue) = 10010 diff --git a/src/third_party/wiredtiger/test/suite/test_encrypt02.py b/src/third_party/wiredtiger/test/suite/test_encrypt02.py index 0376b3e42e4..648686274c4 100644 --- a/src/third_party/wiredtiger/test/suite/test_encrypt02.py +++ b/src/third_party/wiredtiger/test/suite/test_encrypt02.py @@ -33,7 +33,7 @@ import os, run, random import wiredtiger, wttest from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Test basic encryption class test_encrypt02(wttest.WiredTigerTestCase, suite_subprocess): @@ -48,7 +48,7 @@ class test_encrypt02(wttest.WiredTigerTestCase, suite_subprocess): ('keyid-pass', dict( encrypt='rotn', encrypt_args='name=rotn,keyid=11', secret_arg='ABC')), ] - scenarios = number_scenarios(encrypt_type) + scenarios = make_scenarios(encrypt_type) nrecords = 5000 bigvalue = "abcdefghij" * 1001 # len(bigvalue) = 10010 diff --git a/src/third_party/wiredtiger/test/suite/test_encrypt03.py b/src/third_party/wiredtiger/test/suite/test_encrypt03.py index 702d0a2369f..0dc1755d6eb 100644 --- a/src/third_party/wiredtiger/test/suite/test_encrypt03.py +++ b/src/third_party/wiredtiger/test/suite/test_encrypt03.py @@ -32,7 +32,7 @@ import os, run, random import wiredtiger, wttest -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Test basic encryption class test_encrypt03(wttest.WiredTigerTestCase): @@ -48,7 +48,7 @@ class test_encrypt03(wttest.WiredTigerTestCase): #('noname', dict( sys_encrypt='rotn', sys_encrypt_args=',keyid=11', # file_encrypt='none', file_encrypt_args=',keyid=13')), ] - scenarios = number_scenarios(multiply_scenarios('.', types, encrypt)) + scenarios = make_scenarios(types, encrypt) # Override WiredTigerTestCase, we have extensions. def setUpConnectionOpen(self, dir): diff --git a/src/third_party/wiredtiger/test/suite/test_encrypt04.py b/src/third_party/wiredtiger/test/suite/test_encrypt04.py index d7c12d2cba8..97d2cee03a0 100644 --- a/src/third_party/wiredtiger/test/suite/test_encrypt04.py +++ b/src/third_party/wiredtiger/test/suite/test_encrypt04.py @@ -32,7 +32,7 @@ import os, run, random import wiredtiger, wttest -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios from suite_subprocess import suite_subprocess # Test basic encryption with mismatched configuration @@ -69,8 +69,7 @@ class test_encrypt04(wttest.WiredTigerTestCase, suite_subprocess): ('rotn11xyz_and_clear', dict( name2='rotn', keyid2='11', secretkey2='XYZ', fileinclear2=True)) ] - scenarios = number_scenarios(multiply_scenarios \ - ('.', encrypt_scen_1, encrypt_scen_2)) + scenarios = make_scenarios(encrypt_scen_1, encrypt_scen_2) nrecords = 5000 bigvalue = "abcdefghij" * 1001 # len(bigvalue) = 10010 diff --git a/src/third_party/wiredtiger/test/suite/test_encrypt05.py b/src/third_party/wiredtiger/test/suite/test_encrypt05.py index afd8a8103f9..19a3522b3d5 100644 --- a/src/third_party/wiredtiger/test/suite/test_encrypt05.py +++ b/src/third_party/wiredtiger/test/suite/test_encrypt05.py @@ -32,7 +32,7 @@ import os, run, random import wiredtiger, wttest -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Test raw compression with encryption class test_encrypt05(wttest.WiredTigerTestCase): @@ -44,8 +44,7 @@ class test_encrypt05(wttest.WiredTigerTestCase): compress = [ ('zlib', dict(log_compress='zlib', block_compress='zlib')), ] - scenarios = number_scenarios(multiply_scenarios('.', - encrypt, compress)) + scenarios = make_scenarios(encrypt, compress) nrecords = 500 bigvalue = 'a' * 500 # we use values that will definitely give compression diff --git a/src/third_party/wiredtiger/test/suite/test_encrypt06.py b/src/third_party/wiredtiger/test/suite/test_encrypt06.py index 5c88b698aeb..9300583d099 100644 --- a/src/third_party/wiredtiger/test/suite/test_encrypt06.py +++ b/src/third_party/wiredtiger/test/suite/test_encrypt06.py @@ -32,7 +32,7 @@ import os, run, random import wiredtiger, wttest -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Test encryption, when on, does not leak any information class test_encrypt06(wttest.WiredTigerTestCase): @@ -86,7 +86,7 @@ class test_encrypt06(wttest.WiredTigerTestCase): file0_encrypt='rotn', file0_encrypt_args=key13, encrypt0=True, file1_encrypt='none', file1_encrypt_args='', encrypt1=False)), ] - scenarios = number_scenarios(multiply_scenarios('.', encrypt, storagetype)) + scenarios = make_scenarios(encrypt, storagetype) nrecords = 1000 # Override WiredTigerTestCase, we have extensions. diff --git a/src/third_party/wiredtiger/test/suite/test_encrypt07.py b/src/third_party/wiredtiger/test/suite/test_encrypt07.py index 30f28e096a8..97ab1987d4f 100644 --- a/src/third_party/wiredtiger/test/suite/test_encrypt07.py +++ b/src/third_party/wiredtiger/test/suite/test_encrypt07.py @@ -32,7 +32,6 @@ import os, run, string, codecs import wiredtiger, wttest -from wtscenario import multiply_scenarios, number_scenarios import test_salvage # Run the regular salvage test, but with encryption on diff --git a/src/third_party/wiredtiger/test/suite/test_excl.py b/src/third_party/wiredtiger/test/suite/test_excl.py index 90926f51877..cea5756dfbb 100644 --- a/src/third_party/wiredtiger/test/suite/test_excl.py +++ b/src/third_party/wiredtiger/test/suite/test_excl.py @@ -27,11 +27,11 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios # Test session.create with the exclusive configuration. class test_create_excl(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(type='file:')), ('table', dict(type='table:')) ]) diff --git a/src/third_party/wiredtiger/test/suite/test_huffman01.py b/src/third_party/wiredtiger/test/suite/test_huffman01.py index d71198e3151..be307550f2e 100644 --- a/src/third_party/wiredtiger/test/suite/test_huffman01.py +++ b/src/third_party/wiredtiger/test/suite/test_huffman01.py @@ -28,7 +28,7 @@ import os from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios import wiredtiger, wttest # test_huffman01.py @@ -52,7 +52,7 @@ class test_huffman01(wttest.WiredTigerTestCase, suite_subprocess): ('utf8', dict(huffval=',huffman_value=utf8t8file',vfile='t8file')), ('utf16', dict(huffval=',huffman_value=utf16t16file',vfile='t16file')), ] - scenarios = number_scenarios(multiply_scenarios('.', huffkey, huffval)) + scenarios = make_scenarios(huffkey, huffval) def test_huffman(self): dir = self.conn.get_home() diff --git a/src/third_party/wiredtiger/test/suite/test_huffman02.py b/src/third_party/wiredtiger/test/suite/test_huffman02.py index aa4329415a4..d74704daf58 100644 --- a/src/third_party/wiredtiger/test/suite/test_huffman02.py +++ b/src/third_party/wiredtiger/test/suite/test_huffman02.py @@ -28,7 +28,7 @@ import os from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios import wiredtiger, wttest # test_huffman02.py @@ -48,7 +48,7 @@ class test_huffman02(wttest.WiredTigerTestCase, suite_subprocess): ('file', dict(uri='file:huff')), ('table', dict(uri='table:huff')), ] - scenarios = number_scenarios(multiply_scenarios('.',type,huffkey, huffval)) + scenarios = make_scenarios(type, huffkey, huffval) def test_huffman(self): if self.keybad or self.valbad: diff --git a/src/third_party/wiredtiger/test/suite/test_index02.py b/src/third_party/wiredtiger/test/suite/test_index02.py new file mode 100644 index 00000000000..9f39df003b1 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_index02.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest + +# test_index02.py +# test search_near in indices +class test_index02(wttest.WiredTigerTestCase): + '''Test search_near in indices''' + + basename = 'test_index02' + tablename = 'table:' + basename + indexname = 'index:' + basename + ":inverse" + + def test_search_near(self): + '''Create a table, look for a nonexistent key''' + self.session.create(self.tablename, 'key_format=r,value_format=Q,columns=(k,v)') + self.session.create(self.indexname, 'columns=(v)') + cur = self.session.open_cursor(self.tablename, None, "append") + cur.set_value(1) + cur.insert() + cur.set_value(5) + cur.insert() + cur.set_value(5) + cur.insert() + cur.set_value(5) + cur.insert() + cur.set_value(10) + cur.insert() + + # search near should find a match + cur2 = self.session.open_cursor(self.indexname, None, None) + cur2.set_key(5) + self.assertEqual(cur2.search_near(), 0) + + # Retry after reopening + self.reopen_conn() + cur3 = self.session.open_cursor(self.indexname, None, None) + cur3.set_key(5) + self.assertEqual(cur3.search_near(), 0) + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_inmem01.py b/src/third_party/wiredtiger/test/suite/test_inmem01.py index 875ebb2bfa7..c6ae7ff6c4b 100644 --- a/src/third_party/wiredtiger/test/suite/test_inmem01.py +++ b/src/third_party/wiredtiger/test/suite/test_inmem01.py @@ -30,95 +30,73 @@ import wiredtiger, wttest from time import sleep from helper import simple_populate, simple_populate_check from helper import key_populate, value_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_inmem01.py # Test in-memory configuration. class test_inmem01(wttest.WiredTigerTestCase): - name = 'inmem01' - """ - In memory configuration still creates files on disk, but has limits - in terms of how much data can be written. - Test various scenarios including: - - Add a small amount of data, ensure it is present. - - Add more data than would fit into the configured cache. - - Fill the cache with data, remove some data, ensure more data can be - inserted (after a reasonable amount of time for space to be reclaimed) - - Run queries after adding, removing and re-inserting data. - - Try out keeping a cursor open while adding new data. - """ - scenarios = check_scenarios([ - ('col', dict(tablekind='col')), - # Fixed length is very slow, disable it for now - #('fix', dict(tablekind='fix')), - ('row', dict(tablekind='row')) - ]) - - # create an in-memory database - conn_config = 'cache_size=5MB,' + \ - 'file_manager=(close_idle_time=0),in_memory=true' + uri = 'table:inmem01' + conn_config = \ + 'cache_size=5MB,file_manager=(close_idle_time=0),in_memory=true' + table_config = ',memory_page_max=32k,leaf_page_max=4k' - def get_table_config(self): - kf = 'key_format=' - vf = 'value_format=' - if self.tablekind == 'row': - kf = kf + 'S' - else: - kf = kf + 'r' # record format - if self.tablekind == 'fix': - vf = vf + '8t' - else: - vf = vf + 'S' - return 'memory_page_max=32k,leaf_page_max=4k,' + kf + ',' + vf + scenarios = make_scenarios([ + ('col', dict(fmt='key_format=r,value_format=S')), + ('fix', dict(fmt='key_format=r,value_format=8t')), + ('row', dict(fmt='key_format=S,value_format=S')) + ]) + # Smoke-test in-memory configurations, add a small amount of data and + # ensure it's visible. def test_insert(self): - table_config = self.get_table_config() - simple_populate(self, - "table:" + self.name, table_config, 1000) - # Ensure the data is visible. - simple_populate_check(self, 'table:' + self.name, 1000) + config = self.fmt + self.table_config + simple_populate(self, self.uri, config, 1000) + simple_populate_check(self, self.uri, 1000) + # Add more data than fits into the configured cache and verify it fails. def test_insert_over_capacity(self): - table_config = self.get_table_config() + config = self.fmt + self.table_config msg = '/WT_CACHE_FULL.*/' self.assertRaisesHavingMessage(wiredtiger.WiredTigerError, - lambda:simple_populate(self, - "table:" + self.name, table_config, 10000000), msg) + lambda:simple_populate(self, self.uri, config, 10000000), msg) - # Figure out the last key we inserted. - cursor = self.session.open_cursor('table:' + self.name, None) + # Figure out the last key we successfully inserted, and check all + # previous inserts are still there. + cursor = self.session.open_cursor(self.uri, None) cursor.prev() last_key = int(cursor.get_key()) - simple_populate_check(self, 'table:' + self.name, last_key) + simple_populate_check(self, self.uri, last_key) + # Fill the cache with data, remove some data, ensure more data can be + # inserted (after a reasonable amount of time for space to be reclaimed). def test_insert_over_delete(self): - table_config = self.get_table_config() + config = self.fmt + self.table_config msg = '/WT_CACHE_FULL.*/' self.assertRaisesHavingMessage(wiredtiger.WiredTigerError, - lambda:simple_populate(self, - "table:" + self.name, table_config, 10000000), msg) + lambda:simple_populate(self, self.uri, config, 10000000), msg) # Now that the database contains as much data as will fit into # the configured cache, verify removes succeed. - cursor = self.session.open_cursor('table:' + self.name, None) + cursor = self.session.open_cursor(self.uri, None) for i in range(1, 100): cursor.set_key(key_populate(cursor, i)) cursor.remove() + # Run queries after adding, removing and re-inserting data. + # Try out keeping a cursor open while adding new data. def test_insert_over_delete_replace(self): - table_config = self.get_table_config() + config = self.fmt + self.table_config msg = '/WT_CACHE_FULL.*/' self.assertRaisesHavingMessage(wiredtiger.WiredTigerError, - lambda:simple_populate(self, - "table:" + self.name, table_config, 10000000), msg) + lambda:simple_populate(self, self.uri, config, 10000000), msg) - cursor = self.session.open_cursor('table:' + self.name, None) + cursor = self.session.open_cursor(self.uri, None) cursor.prev() last_key = int(cursor.get_key()) # Now that the database contains as much data as will fit into # the configured cache, verify removes succeed. - cursor = self.session.open_cursor('table:' + self.name, None) + cursor = self.session.open_cursor(self.uri, None) for i in range(1, last_key / 4, 1): cursor.set_key(key_populate(cursor, i)) cursor.remove() diff --git a/src/third_party/wiredtiger/test/suite/test_intpack.py b/src/third_party/wiredtiger/test/suite/test_intpack.py index 187b2d7f579..b0cece09494 100644 --- a/src/third_party/wiredtiger/test/suite/test_intpack.py +++ b/src/third_party/wiredtiger/test/suite/test_intpack.py @@ -31,7 +31,7 @@ # import wiredtiger, wttest -from wtscenario import check_scenarios, number_scenarios +from wtscenario import make_scenarios class PackTester: def __init__(self, formatcode, validlow, validhigh, equals): @@ -126,22 +126,27 @@ class PackTester: class test_intpack(wttest.WiredTigerTestCase): name = 'test_intpack' - scenarios = check_scenarios([ - ('b', dict(formatcode='b', low=-128, high=127, nbits=8)), - ('B', dict(formatcode='B', low=0, high=255, nbits=8)), - ('8t', dict(formatcode='8t', low=0, high=255, nbits=8)), - ('5t', dict(formatcode='5t', low=0, high=31, nbits=5)), - ('h', dict(formatcode='h', low=-32768, high=32767, nbits=16)), - ('H', dict(formatcode='H', low=0, high=65535, nbits=16)), - ('i', dict(formatcode='i', low=-2147483648, high=2147483647, nbits=32)), - ('I', dict(formatcode='I', low=0, high=4294967295, nbits=32)), - ('l', dict(formatcode='l', low=-2147483648, high=2147483647, nbits=32)), - ('L', dict(formatcode='L', low=0, high=4294967295, nbits=32)), - ('q', dict(formatcode='q', low=-9223372036854775808, + # We have to be a bit verbose here with naming, as there can be problems with + # case insensitive test names:w + + scenarios = make_scenarios([ + ('int8_t_b', dict(formatcode='b', low=-128, high=127, nbits=8)), + ('uint8_t_B', dict(formatcode='B', low=0, high=255, nbits=8)), + ('fix_len_8t', dict(formatcode='8t', low=0, high=255, nbits=8)), + ('fix_len_5t', dict(formatcode='5t', low=0, high=31, nbits=5)), + ('int16_t_h', dict(formatcode='h', low=-32768, high=32767, nbits=16)), + ('uint16_t_H', dict(formatcode='H', low=0, high=65535, nbits=16)), + ('int32_t_i', dict(formatcode='i', low=-2147483648, high=2147483647, + nbits=32)), + ('uint32_t_I', dict(formatcode='I', low=0, high=4294967295, nbits=32)), + ('int32_t_l', dict(formatcode='l', low=-2147483648, high=2147483647, + nbits=32)), + ('uint32_t_L', dict(formatcode='L', low=0, high=4294967295, nbits=32)), + ('int64_t_q', dict(formatcode='q', low=-9223372036854775808, high=9223372036854775807, nbits=64)), - ('Q', dict(formatcode='Q', low=0, high=18446744073709551615, nbits=64)), + ('uint64_t_Q', dict(formatcode='Q', low=0, high=18446744073709551615, + nbits=64)), ]) - scenarios = check_scenarios(number_scenarios(scenarios)) def test_packing(self): pt = PackTester(self.formatcode, self.low, self.high, self.assertEquals) diff --git a/src/third_party/wiredtiger/test/suite/test_join01.py b/src/third_party/wiredtiger/test/suite/test_join01.py index f8d96a2718a..f3b13026896 100644 --- a/src/third_party/wiredtiger/test/suite/test_join01.py +++ b/src/third_party/wiredtiger/test/suite/test_join01.py @@ -27,7 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_join01.py # Join operations @@ -67,11 +67,9 @@ class test_join01(wttest.WiredTigerTestCase): ('order=2', dict(join_order=2)), ('order=3', dict(join_order=3)), ] - scenarios = number_scenarios(multiply_scenarios('.', type_scen, - bloom0_scen, bloom1_scen, - projection_scen, - nested_scen, stats_scen, - order_scen)) + scenarios = make_scenarios(type_scen, bloom0_scen, bloom1_scen, + projection_scen, nested_scen, stats_scen, + order_scen) # We need statistics for these tests. conn_config = 'statistics=(all)' diff --git a/src/third_party/wiredtiger/test/suite/test_join02.py b/src/third_party/wiredtiger/test/suite/test_join02.py index a691c499cf6..db11ed01039 100644 --- a/src/third_party/wiredtiger/test/suite/test_join02.py +++ b/src/third_party/wiredtiger/test/suite/test_join02.py @@ -27,7 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest, suite_random -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_join02.py # Join operations @@ -48,7 +48,7 @@ class test_join02(wttest.WiredTigerTestCase): ('nobloom', dict(usebloom=False)) ] - scenarios = number_scenarios(multiply_scenarios('.', keyscen, bloomscen)) + scenarios = make_scenarios(keyscen, bloomscen) # Start our range from 1, since WT record numbers start at 1, # it makes things work out nicer. diff --git a/src/third_party/wiredtiger/test/suite/test_join03.py b/src/third_party/wiredtiger/test/suite/test_join03.py index 613d2396b07..af19d934d70 100644 --- a/src/third_party/wiredtiger/test/suite/test_join03.py +++ b/src/third_party/wiredtiger/test/suite/test_join03.py @@ -28,7 +28,6 @@ import os import wiredtiger, wttest, run -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios # test_join03.py # Join operations diff --git a/src/third_party/wiredtiger/test/suite/test_join04.py b/src/third_party/wiredtiger/test/suite/test_join04.py index 7e2afb15285..b270cb7a21c 100644 --- a/src/third_party/wiredtiger/test/suite/test_join04.py +++ b/src/third_party/wiredtiger/test/suite/test_join04.py @@ -28,7 +28,6 @@ import os import wiredtiger, wttest, run -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios # test_join04.py # Join operations diff --git a/src/third_party/wiredtiger/test/suite/test_join05.py b/src/third_party/wiredtiger/test/suite/test_join05.py index ef2be4c6460..7dcb3e08911 100644 --- a/src/third_party/wiredtiger/test/suite/test_join05.py +++ b/src/third_party/wiredtiger/test/suite/test_join05.py @@ -27,7 +27,6 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios # test_join05.py # Tests based on JIRA reports diff --git a/src/third_party/wiredtiger/test/suite/test_join06.py b/src/third_party/wiredtiger/test/suite/test_join06.py index 9af6f93792f..5fedd365712 100644 --- a/src/third_party/wiredtiger/test/suite/test_join06.py +++ b/src/third_party/wiredtiger/test/suite/test_join06.py @@ -28,7 +28,7 @@ import os import wiredtiger, wttest, run -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_join06.py # Join operations @@ -46,7 +46,7 @@ class test_join06(wttest.WiredTigerTestCase): ('nobloom', dict(bloom=False)) ] - scenarios = number_scenarios(multiply_scenarios('.', isoscen, bloomscen)) + scenarios = make_scenarios(isoscen, bloomscen) def gen_values(self, i): s = str(i) # 345 => "345" diff --git a/src/third_party/wiredtiger/test/suite/test_join07.py b/src/third_party/wiredtiger/test/suite/test_join07.py index 36e91361329..2a32e678d72 100644 --- a/src/third_party/wiredtiger/test/suite/test_join07.py +++ b/src/third_party/wiredtiger/test/suite/test_join07.py @@ -28,7 +28,7 @@ import os, re, run import wiredtiger, wttest, suite_random -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios +from wtscenario import make_scenarios class ParseException(Exception): def __init__(self, msg): @@ -198,7 +198,7 @@ class test_join07(wttest.WiredTigerTestCase): ('noextractor', dict(extractor=False)) ] - scenarios = number_scenarios(extractscen) + scenarios = make_scenarios(extractscen) # Return the wiredtiger_open extension argument for a shared library. def extensionArg(self, exts): diff --git a/src/third_party/wiredtiger/test/suite/test_join08.py b/src/third_party/wiredtiger/test/suite/test_join08.py index 6d674ab8193..d389fad706b 100644 --- a/src/third_party/wiredtiger/test/suite/test_join08.py +++ b/src/third_party/wiredtiger/test/suite/test_join08.py @@ -27,7 +27,6 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios # test_join08.py # Test join error paths diff --git a/src/third_party/wiredtiger/test/suite/test_jsondump01.py b/src/third_party/wiredtiger/test/suite/test_jsondump01.py index 10262edc777..dc8027c2115 100644 --- a/src/third_party/wiredtiger/test/suite/test_jsondump01.py +++ b/src/third_party/wiredtiger/test/suite/test_jsondump01.py @@ -29,10 +29,12 @@ import os, json import wiredtiger, wttest from helper import \ - complex_populate, complex_populate_check_cursor,\ - simple_populate, simple_populate_check_cursor + complex_populate, complex_populate_check, complex_populate_check_cursor,\ + simple_populate, simple_populate_check, simple_populate_check_cursor, \ + simple_index_populate, simple_index_populate_check, \ + simple_index_populate_check_cursor, compare_files from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # A 'fake' cursor based on a set of rows. # It emulates a WT cursor well enough for the *_check_cursor methods. @@ -79,25 +81,34 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess): types = [ ('file', dict(uri='file:', config='', lsm=False, populate=simple_populate, - populate_check=simple_populate_check_cursor)), + populate_check=simple_populate_check, + populate_check_cursor=simple_populate_check_cursor)), ('lsm', dict(uri='lsm:', config='', lsm=True, populate=simple_populate, - populate_check=simple_populate_check_cursor)), + populate_check=simple_populate_check, + populate_check_cursor=simple_populate_check_cursor)), ('table-simple', dict(uri='table:', config='', lsm=False, populate=simple_populate, - populate_check=simple_populate_check_cursor)), + populate_check=simple_populate_check, + populate_check_cursor=simple_populate_check_cursor)), + ('table-index', dict(uri='table:', config='', lsm=False, + populate=simple_index_populate, + populate_check=simple_index_populate_check, + populate_check_cursor=simple_index_populate_check_cursor)), ('table-simple-lsm', dict(uri='table:', config='type=lsm', lsm=True, populate=simple_populate, - populate_check=simple_populate_check_cursor)), + populate_check=simple_populate_check, + populate_check_cursor=simple_populate_check_cursor)), ('table-complex', dict(uri='table:', config='', lsm=False, populate=complex_populate, - populate_check=complex_populate_check_cursor)), + populate_check=complex_populate_check, + populate_check_cursor=complex_populate_check_cursor)), ('table-complex-lsm', dict(uri='table:', config='type=lsm', lsm=True, populate=complex_populate, - populate_check=complex_populate_check_cursor)) + populate_check=complex_populate_check, + populate_check_cursor=complex_populate_check_cursor)) ] - scenarios = number_scenarios( - multiply_scenarios('.', types, keyfmt)) + scenarios = make_scenarios(types, keyfmt) # Dump using util, re-load using python's JSON, and do a content comparison. def test_jsondump_util(self): @@ -132,7 +143,7 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess): cursor = self.session.open_cursor(uri, None) fake = FakeCursor(cursor.key_format, cursor.value_format, data) cursor.close() - self.populate_check(self, fake, self.nentries) + self.populate_check_cursor(self, fake, self.nentries) # Dump using util, re-load using python's JSON, and do a content comparison. def test_jsonload_util(self): @@ -153,9 +164,18 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess): loadcmd.append('-a') self.runWt(loadcmd) - # check the contents of the data we read. - cursor = self.session.open_cursor(uri2, None) - self.populate_check(self, cursor, self.nentries) + # Check the contents of the data we read. + self.populate_check(self, uri2, self.nentries) + + # Reload into the original uri, and dump into another file. + self.session.drop(uri, None) + self.session.drop(uri2, None) + self.runWt(['load', '-jf', 'jsondump.out']) + self.runWt(['dump', '-j', uri], outfilename='jsondump2.out') + + # Compare the two outputs, and check the content again. + compare_files(self, 'jsondump.out', 'jsondump2.out') + self.populate_check(self, uri, self.nentries) if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_lsm01.py b/src/third_party/wiredtiger/test/suite/test_lsm01.py index 1f89cf38d77..f6cee20e896 100644 --- a/src/third_party/wiredtiger/test/suite/test_lsm01.py +++ b/src/third_party/wiredtiger/test/suite/test_lsm01.py @@ -54,12 +54,10 @@ class test_lsm01(wttest.WiredTigerTestCase): config_vars = [ 'chunk_size', 'merge_max', 'bloom', 'bloom_bit_count', 'bloom_hash_count' ] - all_scenarios = wtscenario.multiply_scenarios('_', + scenarios = wtscenario.make_scenarios( chunk_size_scenarios, merge_max_scenarios, bloom_scenarios, - bloom_bit_scenarios, bloom_hash_scenarios, record_count_scenarios) - - scenarios = wtscenario.prune_scenarios(all_scenarios, 500) - scenarios = wtscenario.number_scenarios(scenarios) + bloom_bit_scenarios, bloom_hash_scenarios, record_count_scenarios, + prune=500) # Test drop of an object. def test_lsm(self): diff --git a/src/third_party/wiredtiger/test/suite/test_metadata_cursor01.py b/src/third_party/wiredtiger/test/suite/test_metadata_cursor01.py index e759c14f846..7802f89f174 100644 --- a/src/third_party/wiredtiger/test/suite/test_metadata_cursor01.py +++ b/src/third_party/wiredtiger/test/suite/test_metadata_cursor01.py @@ -27,7 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_metadata_cursor01.py # Metadata cursor operations @@ -39,7 +39,7 @@ class test_metadata_cursor01(wttest.WiredTigerTestCase): """ table_name1 = 'test_metadata_cursor01' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('plain', {'metauri' : 'metadata:'}), ('create', {'metauri' : 'metadata:create'}), ]) diff --git a/src/third_party/wiredtiger/test/suite/test_nsnap01.py b/src/third_party/wiredtiger/test/suite/test_nsnap01.py index 5207b577ba4..7e8951750f8 100644 --- a/src/third_party/wiredtiger/test/suite/test_nsnap01.py +++ b/src/third_party/wiredtiger/test/suite/test_nsnap01.py @@ -30,7 +30,6 @@ # Named snapshots: basic API from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios from helper import simple_populate import wiredtiger, wttest diff --git a/src/third_party/wiredtiger/test/suite/test_nsnap02.py b/src/third_party/wiredtiger/test/suite/test_nsnap02.py index e4ed65ef72a..510c9d421ef 100644 --- a/src/third_party/wiredtiger/test/suite/test_nsnap02.py +++ b/src/third_party/wiredtiger/test/suite/test_nsnap02.py @@ -30,7 +30,6 @@ # Named snapshots: Combinations of dropping snapshots from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios from helper import simple_populate import wiredtiger, wttest diff --git a/src/third_party/wiredtiger/test/suite/test_nsnap03.py b/src/third_party/wiredtiger/test/suite/test_nsnap03.py index 0e853522940..3986c0c1a0a 100644 --- a/src/third_party/wiredtiger/test/suite/test_nsnap03.py +++ b/src/third_party/wiredtiger/test/suite/test_nsnap03.py @@ -30,7 +30,6 @@ # Named snapshots: Access and create from multiple sessions from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios from helper import simple_populate import wiredtiger, wttest diff --git a/src/third_party/wiredtiger/test/suite/test_nsnap04.py b/src/third_party/wiredtiger/test/suite/test_nsnap04.py index e8a5c9b6140..f9ef26b5600 100644 --- a/src/third_party/wiredtiger/test/suite/test_nsnap04.py +++ b/src/third_party/wiredtiger/test/suite/test_nsnap04.py @@ -30,7 +30,6 @@ # Named snapshots: Create snapshot from running transaction from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios from helper import simple_populate import wiredtiger, wttest diff --git a/src/third_party/wiredtiger/test/suite/test_overwrite.py b/src/third_party/wiredtiger/test/suite/test_overwrite.py index e22cdab4dea..4972a016bec 100644 --- a/src/third_party/wiredtiger/test/suite/test_overwrite.py +++ b/src/third_party/wiredtiger/test/suite/test_overwrite.py @@ -28,13 +28,13 @@ import wiredtiger, wttest from helper import key_populate, simple_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_overwrite.py # cursor overwrite configuration method class test_overwrite(wttest.WiredTigerTestCase): name = 'overwrite' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file-r', dict(type='file:',keyfmt='r')), ('file-S', dict(type='file:',keyfmt='S')), ('lsm-S', dict(type='lsm:',keyfmt='S')), diff --git a/src/third_party/wiredtiger/test/suite/test_perf001.py b/src/third_party/wiredtiger/test/suite/test_perf001.py index 1280639c9dd..b22ed2baeb0 100644 --- a/src/third_party/wiredtiger/test/suite/test_perf001.py +++ b/src/third_party/wiredtiger/test/suite/test_perf001.py @@ -32,13 +32,13 @@ import wiredtiger, wttest import random from time import clock, time -from wtscenario import check_scenarios +from wtscenario import make_scenarios # Test performance of inserting into a table with an index. class test_perf001(wttest.WiredTigerTestCase): table_name = 'test_perf001' - scenarios = check_scenarios([ + scenarios = make_scenarios([ #('file-file', dict(tabletype='file',indextype='file')), ('file-lsm', dict(tabletype='file',indextype='lsm')), #('lsm-file', dict(tabletype='lsm',indextype='file')), diff --git a/src/third_party/wiredtiger/test/suite/test_readonly01.py b/src/third_party/wiredtiger/test/suite/test_readonly01.py index 59e9743ab7e..e4b431ca1da 100644 --- a/src/third_party/wiredtiger/test/suite/test_readonly01.py +++ b/src/third_party/wiredtiger/test/suite/test_readonly01.py @@ -32,7 +32,7 @@ import fnmatch, os, shutil, time from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios +from wtscenario import make_scenarios import wttest class test_readonly01(wttest.WiredTigerTestCase, suite_subprocess): @@ -73,8 +73,7 @@ class test_readonly01(wttest.WiredTigerTestCase, suite_subprocess): create_params = 'key_format=r,value_format=8t')), ] - scenarios = multiply_scenarios('.', - basecfg_list, dir_list, log_list, types) + scenarios = make_scenarios(basecfg_list, dir_list, log_list, types) def conn_config(self, dir): self.home = dir diff --git a/src/third_party/wiredtiger/test/suite/test_rebalance.py b/src/third_party/wiredtiger/test/suite/test_rebalance.py index f2167e864c9..98bd81de602 100644 --- a/src/third_party/wiredtiger/test/suite/test_rebalance.py +++ b/src/third_party/wiredtiger/test/suite/test_rebalance.py @@ -29,7 +29,7 @@ import os, time import wiredtiger, wttest from helper import complex_populate, simple_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_rebalance.py # session level rebalance operation @@ -41,7 +41,7 @@ class test_rebalance(wttest.WiredTigerTestCase): config = 'key_format=S,allocation_size=512,internal_page_max=512' + \ ',leaf_page_max=1k,lsm=(chunk_size=512k,merge_min=10)' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:')), ('table', dict(uri='table:')), ('lsm', dict(uri='lsm:')) diff --git a/src/third_party/wiredtiger/test/suite/test_reconfig01.py b/src/third_party/wiredtiger/test/suite/test_reconfig01.py index 876de1fe5af..fb3fb7edac6 100644 --- a/src/third_party/wiredtiger/test/suite/test_reconfig01.py +++ b/src/third_party/wiredtiger/test/suite/test_reconfig01.py @@ -92,22 +92,25 @@ class test_reconfig01(wttest.WiredTigerTestCase): self.conn.reconfigure("checkpoint=(wait=5)") self.conn.reconfigure("checkpoint=(log_size=0)") self.conn.reconfigure("checkpoint=(log_size=1M)") - self.conn.reconfigure("checkpoint=(wait=0,name=hi)") - self.conn.reconfigure("checkpoint=(wait=5,name=hi)") - def test_reconfig_stat_log(self): + # Statistics logging: reconfigure the things we can reconfigure. + def test_reconfig_statistics_log_ok(self): self.conn.reconfigure("statistics=[all],statistics_log=(wait=0)") self.conn.reconfigure("statistics_log=(wait=0)") - self.conn.reconfigure("statistics_log=(wait=2)") + self.conn.reconfigure("statistics_log=(wait=2,json=true)") + self.conn.reconfigure("statistics_log=(wait=0)") + self.conn.reconfigure("statistics_log=(wait=2,on_close=true)") self.conn.reconfigure("statistics_log=(wait=0)") self.conn.reconfigure("statistics_log=(wait=2,sources=[lsm:])") self.conn.reconfigure("statistics_log=(wait=0)") self.conn.reconfigure("statistics_log=(wait=2,timestamp=\"t%b %d\")") self.conn.reconfigure("statistics_log=(wait=0)") - self.conn.reconfigure("statistics_log=(wait=2,path=\"wts.%d.%H\")") - self.conn.reconfigure("statistics_log=(wait=0)") - self.conn.reconfigure( - "statistics_log=(wait=2,sources=[lsm:],timestamp=\"%b\")") + + # Statistics logging: reconfigure the things we can't reconfigure. + def test_reconfig_statistics_log_fail(self): + msg = '/unknown configuration key/' + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.conn.reconfigure("log=(path=foo)"), msg) def test_file_manager(self): self.conn.reconfigure("file_manager=(close_scan_interval=3)") diff --git a/src/third_party/wiredtiger/test/suite/test_reconfig02.py b/src/third_party/wiredtiger/test/suite/test_reconfig02.py index 85a9ceb2a34..9d9ac220aa7 100644 --- a/src/third_party/wiredtiger/test/suite/test_reconfig02.py +++ b/src/third_party/wiredtiger/test/suite/test_reconfig02.py @@ -41,24 +41,29 @@ class test_reconfig02(wttest.WiredTigerTestCase): self.conn_config = self.init_config return wttest.WiredTigerTestCase.setUpConnectionOpen(self, dir) - # Call reconfigure for zero filling a file. There is nothing - # we can actually look for to confirm it did anything. - # Also changing the log file size is a no-op, but should not fail. + # Logging: reconfigure the things we can reconfigure. def test_reconfig02_simple(self): + self.conn.reconfigure("log=(archive=false)") + self.conn.reconfigure("log=(prealloc=false)") + self.conn.reconfigure("log=(zero_fill=false)") + + self.conn.reconfigure("log=(archive=true)") + self.conn.reconfigure("log=(prealloc=true)") self.conn.reconfigure("log=(zero_fill=true)") - self.conn.reconfigure("log=(file_max=1MB)") - # Test that we get an error if we try to turn logging off. + # Logging: reconfigure the things we can't reconfigure. def test_reconfig02_disable(self): - msg = 'Invalid argument' - gotException = False - try: - self.conn.reconfigure("log=(enabled=false)") - except wiredtiger.WiredTigerError as e: - gotException = True - self.pr('got exception: ' + str(e)) - self.assertTrue(str(e).find(msg) >= 0) - self.assertTrue(gotException) + msg = '/unknown configuration key/' + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.conn.reconfigure("log=(enabled=true)"), msg) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.conn.reconfigure("log=(compressor=foo)"), msg) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.conn.reconfigure("log=(file_max=1MB)"), msg) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.conn.reconfigure("log=(path=foo)"), msg) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.conn.reconfigure("log=(recovery=true)"), msg) # Logging starts on, but prealloc is off. Verify it is off. # Reconfigure it on and run again, making sure that log files diff --git a/src/third_party/wiredtiger/test/suite/test_rename.py b/src/third_party/wiredtiger/test/suite/test_rename.py index af968a4a38d..1979bbb802a 100644 --- a/src/third_party/wiredtiger/test/suite/test_rename.py +++ b/src/third_party/wiredtiger/test/suite/test_rename.py @@ -31,7 +31,7 @@ import wiredtiger, wttest from helper import confirm_does_not_exist,\ complex_populate, complex_populate_check,\ simple_populate, simple_populate_check -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_rename.py # session level rename operation @@ -39,7 +39,7 @@ class test_rename(wttest.WiredTigerTestCase): name1 = 'test_rename1' name2 = 'test_rename2' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:')), ('table', dict(uri='table:')) ]) diff --git a/src/third_party/wiredtiger/test/suite/test_schema02.py b/src/third_party/wiredtiger/test/suite/test_schema02.py index b404261c066..bccc7dfc728 100644 --- a/src/third_party/wiredtiger/test/suite/test_schema02.py +++ b/src/third_party/wiredtiger/test/suite/test_schema02.py @@ -27,7 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_schema02.py # Columns, column groups, indexes @@ -37,7 +37,7 @@ class test_schema02(wttest.WiredTigerTestCase): """ nentries = 1000 - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('normal', { 'idx_config' : '' }), ('lsm', { 'idx_config' : ',type=lsm' }), ]) diff --git a/src/third_party/wiredtiger/test/suite/test_schema03.py b/src/third_party/wiredtiger/test/suite/test_schema03.py index f48bfdf3cf8..81556393e78 100644 --- a/src/third_party/wiredtiger/test/suite/test_schema03.py +++ b/src/third_party/wiredtiger/test/suite/test_schema03.py @@ -29,7 +29,7 @@ import os import suite_random import wiredtiger, wtscenario, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios try: # Windows does not getrlimit/setrlimit so we must catch the resource @@ -249,7 +249,7 @@ class test_schema03(wttest.WiredTigerTestCase): # but boost it up to this limit anyway. OPEN_FILE_LIMIT = 1000 - restart_scenarios = check_scenarios([('table', dict(s_restart=['table'],P=0.3)), + restart_scenarios = [('table', dict(s_restart=['table'],P=0.3)), ('colgroup0', dict(s_restart=['colgroup0'],P=0.3)), ('index0', dict(s_restart=['index0'],P=0.3)), ('colgroup1', dict(s_restart=['colgroup1'],P=0.3)), @@ -259,7 +259,7 @@ class test_schema03(wttest.WiredTigerTestCase): ('populate1', dict(s_restart=['populate1'],P=0.3)), ('ipop', dict(s_restart=['index0','populate0'],P=0.3)), ('all', dict(s_restart=['table','colgroup0','index0','colgroup1','index1','populate0','index2','populate1'],P=1.0)), - ]) + ] ntable_scenarios = wtscenario.quick_scenarios('s_ntable', [1,2,5,8], [1.0,0.4,0.5,0.5]) @@ -272,11 +272,10 @@ class test_schema03(wttest.WiredTigerTestCase): table_args_scenarios = wtscenario.quick_scenarios('s_extra_table_args', ['', ',type=file', ',type=lsm'], [0.5, 0.3, 0.2]) - all_scenarios = wtscenario.multiply_scenarios('_', restart_scenarios, ntable_scenarios, ncolgroup_scenarios, nindex_scenarios, idx_args_scenarios, table_args_scenarios) - - # Prune the scenarios according to the probabilities given above. - scenarios = wtscenario.prune_scenarios(all_scenarios, 30) - scenarios = wtscenario.number_scenarios(scenarios) + scenarios = wtscenario.make_scenarios( + restart_scenarios, ntable_scenarios, ncolgroup_scenarios, + nindex_scenarios, idx_args_scenarios, table_args_scenarios, + prune=30) # Note: the set can be reduced here for debugging, e.g. # scenarios = scenarios[40:44] diff --git a/src/third_party/wiredtiger/test/suite/test_schema04.py b/src/third_party/wiredtiger/test/suite/test_schema04.py index cd41138deb0..8ac81690819 100644 --- a/src/third_party/wiredtiger/test/suite/test_schema04.py +++ b/src/third_party/wiredtiger/test/suite/test_schema04.py @@ -28,7 +28,7 @@ import os import wiredtiger, wttest, run -from wtscenario import check_scenarios, number_scenarios +from wtscenario import make_scenarios # test_schema04.py # Test indices with duplicates @@ -47,7 +47,7 @@ class test_schema04(wttest.WiredTigerTestCase): """ nentries = 100 - scenarios = number_scenarios([ + scenarios = make_scenarios([ ('index-before', { 'create_index' : 0 }), ('index-during', { 'create_index' : 1 }), ('index-after', { 'create_index' : 2 }), diff --git a/src/third_party/wiredtiger/test/suite/test_schema05.py b/src/third_party/wiredtiger/test/suite/test_schema05.py index 89722d5f89a..89484cfc7bd 100644 --- a/src/third_party/wiredtiger/test/suite/test_schema05.py +++ b/src/third_party/wiredtiger/test/suite/test_schema05.py @@ -28,7 +28,7 @@ import os import wiredtiger, wttest, run -from wtscenario import check_scenarios, number_scenarios +from wtscenario import make_scenarios # test_schema05.py # Test indices using a custom extractor. @@ -51,7 +51,7 @@ class test_schema05(wttest.WiredTigerTestCase): nentries = 1000 nindices = 6 - scenarios = number_scenarios([ + scenarios = make_scenarios([ ('index-before', { 'create_index' : 0 }), ('index-during', { 'create_index' : 1 }), ('index-after', { 'create_index' : 2 }), diff --git a/src/third_party/wiredtiger/test/suite/test_schema06.py b/src/third_party/wiredtiger/test/suite/test_schema06.py index e72959edf2a..e0eec189137 100644 --- a/src/third_party/wiredtiger/test/suite/test_schema06.py +++ b/src/third_party/wiredtiger/test/suite/test_schema06.py @@ -27,6 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest +from wtscenario import make_scenarios # test_schema06.py # Repeatedly create and drop indices @@ -36,10 +37,10 @@ class test_schema06(wttest.WiredTigerTestCase): """ nentries = 1000 - scenarios = [ + scenarios = make_scenarios([ ('normal', { 'idx_config' : '' }), ('lsm', { 'idx_config' : ',type=lsm' }), - ] + ]) def flip(self, inum, val): """ diff --git a/src/third_party/wiredtiger/test/suite/test_split.py b/src/third_party/wiredtiger/test/suite/test_split.py index d09613e1c52..28bf6bc59b0 100644 --- a/src/third_party/wiredtiger/test/suite/test_split.py +++ b/src/third_party/wiredtiger/test/suite/test_split.py @@ -35,7 +35,6 @@ from wiredtiger import stat from helper import confirm_empty,\ key_populate, value_populate, simple_populate,\ complex_populate, complex_value_populate -from wtscenario import multiply_scenarios, number_scenarios # Test splits class test_split(wttest.WiredTigerTestCase): diff --git a/src/third_party/wiredtiger/test/suite/test_stat01.py b/src/third_party/wiredtiger/test/suite/test_stat01.py index 5c3259696eb..1ad51ee9882 100644 --- a/src/third_party/wiredtiger/test/suite/test_stat01.py +++ b/src/third_party/wiredtiger/test/suite/test_stat01.py @@ -29,7 +29,7 @@ import helper, wiredtiger, wttest from wiredtiger import stat from helper import key_populate, simple_populate -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_stat01.py # Statistics operations @@ -49,7 +49,7 @@ class test_stat01(wttest.WiredTigerTestCase): ('recno', dict(keyfmt='r')), ('string', dict(keyfmt='S')), ] - scenarios = number_scenarios(multiply_scenarios('.', types, keyfmt)) + scenarios = make_scenarios(types, keyfmt) conn_config = 'statistics=(all)' diff --git a/src/third_party/wiredtiger/test/suite/test_stat02.py b/src/third_party/wiredtiger/test/suite/test_stat02.py index 88371947b5b..ef3907e54b1 100644 --- a/src/third_party/wiredtiger/test/suite/test_stat02.py +++ b/src/third_party/wiredtiger/test/suite/test_stat02.py @@ -28,7 +28,7 @@ import itertools, wiredtiger, wttest from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios from wiredtiger import stat from helper import complex_populate, complex_populate_lsm, simple_populate @@ -57,8 +57,7 @@ class test_stat_cursor_config(wttest.WiredTigerTestCase): ('size', dict(cursor_config='size')) ] - scenarios = number_scenarios( - multiply_scenarios('.', uri, data_config, cursor_config)) + scenarios = make_scenarios(uri, data_config, cursor_config) # Turn on statistics for this test. def conn_config(self, dir): @@ -106,13 +105,13 @@ class test_stat_cursor_dsrc_clear(wttest.WiredTigerTestCase): pfx = 'test_stat_cursor_dsrc_clear' uri = [ - ('1', dict(uri='file:' + pfx, pop=simple_populate)), - ('2', dict(uri='table:' + pfx, pop=simple_populate)), - ('3', dict(uri='table:' + pfx, pop=complex_populate)), - ('4', dict(uri='table:' + pfx, pop=complex_populate_lsm)) + ('dsrc_clear_1', dict(uri='file:' + pfx, pop=simple_populate)), + ('dsrc_clear_2', dict(uri='table:' + pfx, pop=simple_populate)), + ('dsrc_clear_3', dict(uri='table:' + pfx, pop=complex_populate)), + ('dsrc_clear_4', dict(uri='table:' + pfx, pop=complex_populate_lsm)) ] - scenarios = number_scenarios(multiply_scenarios('.', uri)) + scenarios = make_scenarios(uri) conn_config = 'statistics=(all)' def test_stat_cursor_dsrc_clear(self): @@ -136,13 +135,13 @@ class test_stat_cursor_fast(wttest.WiredTigerTestCase): pfx = 'test_stat_cursor_fast' uri = [ - ('1', dict(uri='file:' + pfx, pop=simple_populate)), - ('2', dict(uri='table:' + pfx, pop=simple_populate)), - ('3', dict(uri='table:' + pfx, pop=complex_populate)), - ('4', dict(uri='table:' + pfx, pop=complex_populate_lsm)) + ('fast_1', dict(uri='file:' + pfx, pop=simple_populate)), + ('fast_2', dict(uri='table:' + pfx, pop=simple_populate)), + ('fast_3', dict(uri='table:' + pfx, pop=complex_populate)), + ('fast_4', dict(uri='table:' + pfx, pop=complex_populate_lsm)) ] - scenarios = number_scenarios(multiply_scenarios('.', uri)) + scenarios = make_scenarios(uri) conn_config = 'statistics=(all)' def test_stat_cursor_fast(self): @@ -180,13 +179,13 @@ class test_stat_cursor_dsrc_error(wttest.WiredTigerTestCase): pfx = 'test_stat_cursor_dsrc_error' uri = [ - ('1', dict(uri='file:' + pfx, pop=simple_populate)), - ('2', dict(uri='table:' + pfx, pop=simple_populate)), - ('3', dict(uri='table:' + pfx, pop=complex_populate)), - ('4', dict(uri='table:' + pfx, pop=complex_populate_lsm)) + ('dsrc_error_1', dict(uri='file:' + pfx, pop=simple_populate)), + ('dsrc_error_2', dict(uri='table:' + pfx, pop=simple_populate)), + ('dsrc_error_3', dict(uri='table:' + pfx, pop=complex_populate)), + ('dsrc_error_4', dict(uri='table:' + pfx, pop=complex_populate_lsm)) ] - scenarios = number_scenarios(multiply_scenarios('.', uri)) + scenarios = make_scenarios(uri) conn_config = 'statistics=(all)' def test_stat_cursor_dsrc_error(self): diff --git a/src/third_party/wiredtiger/test/suite/test_stat03.py b/src/third_party/wiredtiger/test/suite/test_stat03.py index 039ad1f7f8d..b17fe6eb91c 100644 --- a/src/third_party/wiredtiger/test/suite/test_stat03.py +++ b/src/third_party/wiredtiger/test/suite/test_stat03.py @@ -34,7 +34,7 @@ from helper import complex_populate, complex_populate_lsm, simple_populate from helper import key_populate, complex_value_populate, value_populate from helper import complex_populate_colgroup_count, complex_populate_index_count from helper import complex_populate_colgroup_name, complex_populate_index_name -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_stat03.py # Statistics reset test. @@ -51,7 +51,7 @@ class test_stat_cursor_reset(wttest.WiredTigerTestCase): dict(uri='table:' + pfx, pop=complex_populate_lsm)), ] - scenarios = number_scenarios(multiply_scenarios('.', uri)) + scenarios = make_scenarios(uri) conn_config = 'statistics=(all)' def stat_cursor(self, uri): diff --git a/src/third_party/wiredtiger/test/suite/test_stat04.py b/src/third_party/wiredtiger/test/suite/test_stat04.py index e7c39371f80..b5309efff37 100644 --- a/src/third_party/wiredtiger/test/suite/test_stat04.py +++ b/src/third_party/wiredtiger/test/suite/test_stat04.py @@ -28,7 +28,7 @@ import os, struct from suite_subprocess import suite_subprocess -from wtscenario import number_scenarios, multiply_scenarios +from wtscenario import make_scenarios import wiredtiger, wttest from wiredtiger import stat @@ -49,7 +49,7 @@ class test_stat04(wttest.WiredTigerTestCase, suite_subprocess): ('large', dict(nentries=100000, valuesize=1)), ('jumboval', dict(nentries=100, valuesize=4200000)), ] - scenarios = number_scenarios(multiply_scenarios('.', keyfmt, nentries)) + scenarios = make_scenarios(keyfmt, nentries) conn_config = 'statistics=(all)' def init_test(self): @@ -91,6 +91,7 @@ class test_stat04(wttest.WiredTigerTestCase, suite_subprocess): self.checkcount(uri, count) cursor[self.genkey(i)] = self.genvalue(i) count += 1 + # Remove a number of entries, at each step checking that stats match. for i in range(0, self.nentries / 37): cursor.set_key(self.genkey(i*11 % self.nentries)) @@ -99,5 +100,10 @@ class test_stat04(wttest.WiredTigerTestCase, suite_subprocess): self.checkcount(uri, count) cursor.close() + # Confirm the count is correct after writing to the backing file, + # that tests the on-disk format as well as the in-memory format. + self.reopen_conn() + self.checkcount(uri, count) + if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_stat05.py b/src/third_party/wiredtiger/test/suite/test_stat05.py index 9bcedd65089..62562f78ed6 100644 --- a/src/third_party/wiredtiger/test/suite/test_stat05.py +++ b/src/third_party/wiredtiger/test/suite/test_stat05.py @@ -28,7 +28,7 @@ import itertools, wiredtiger, wttest from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios from wiredtiger import stat from helper import complex_populate, complex_populate_lsm, simple_populate from helper import complex_value_populate, key_populate, value_populate @@ -43,16 +43,18 @@ class test_stat_cursor_config(wttest.WiredTigerTestCase): ('file', dict(uri='file:' + pfx, pop=simple_populate, cfg='')), ('table', dict(uri='table:' + pfx, pop=simple_populate, cfg='')), ('inmem', dict(uri='table:' + pfx, pop=simple_populate, cfg='', - conn_config='in_memory,statistics=(fast)')), + conn_config = 'in_memory,statistics=(fast)')), ('table-lsm', dict(uri='table:' + pfx, pop=simple_populate, - cfg=',type=lsm,lsm=(chunk_size=1MB,merge_min=2)')), + cfg=',type=lsm,lsm=(chunk_size=1MB,merge_min=2)', + conn_config = 'statistics=(fast),eviction_dirty_target=99,eviction_dirty_trigger=99')), ('complex', dict(uri='table:' + pfx, pop=complex_populate, cfg='')), ('complex-lsm', dict(uri='table:' + pfx, pop=complex_populate_lsm, - cfg=',lsm=(chunk_size=1MB,merge_min=2)')), + cfg=',lsm=(chunk_size=1MB,merge_min=2)', + conn_config = 'statistics=(fast),eviction_dirty_target=99,eviction_dirty_trigger=99')), ] - scenarios = number_scenarios(uri) + scenarios = make_scenarios(uri) def openAndWalkStatCursor(self): c = self.session.open_cursor( @@ -62,7 +64,6 @@ class test_stat_cursor_config(wttest.WiredTigerTestCase): count += 1 c.close() - # Open a size-only statistics cursor on various table types. Ensure that # the cursor open succeeds. Insert enough data that LSM tables to need to # switch and merge. diff --git a/src/third_party/wiredtiger/test/suite/test_stat_log01.py b/src/third_party/wiredtiger/test/suite/test_stat_log01.py index f6033d940c5..65ce80dfe7d 100644 --- a/src/third_party/wiredtiger/test/suite/test_stat_log01.py +++ b/src/third_party/wiredtiger/test/suite/test_stat_log01.py @@ -51,9 +51,10 @@ class test_stat_log01(wttest.WiredTigerTestCase): None, "create,statistics=(fast),statistics_log=(wait=1)") # Wait for the default interval, to ensure stats have been written. time.sleep(2) - self.check_stats_file("WiredTigerStat") + self.check_stats_file(".") def test_stats_log_name(self): + os.mkdir("foo") self.conn = self.wiredtiger_open( None, "create,statistics=(fast),statistics_log=(wait=1,path=foo)") # Wait for the default interval, to ensure stats have been written. @@ -66,21 +67,18 @@ class test_stat_log01(wttest.WiredTigerTestCase): # Wait for the default interval, to ensure stats have been written. time.sleep(2) self.close_conn() - self.check_stats_file("WiredTigerStat") + self.check_stats_file(".") def test_stats_log_on_close(self): self.conn = self.wiredtiger_open(None, "create,statistics=(fast),statistics_log=(on_close=true)") # Close the connection to ensure the statistics get generated. self.close_conn() - self.check_stats_file("WiredTigerStat") + self.check_stats_file(".") - def check_stats_file(self, filename): - if filename == "WiredTigerStat": - files = glob.glob(filename + '.[0-9]*') - self.assertTrue(files) - else: - self.assertTrue(os.path.isfile(filename)) + def check_stats_file(self, dir): + files = glob.glob(dir + '/' + 'WiredTigerStat.[0-9]*') + self.assertTrue(files) if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_sweep01.py b/src/third_party/wiredtiger/test/suite/test_sweep01.py index bccd2bce012..71f8fcb180e 100644 --- a/src/third_party/wiredtiger/test/suite/test_sweep01.py +++ b/src/third_party/wiredtiger/test/suite/test_sweep01.py @@ -33,8 +33,8 @@ import fnmatch, os, shutil, run, time from suite_subprocess import suite_subprocess +from wtscenario import make_scenarios from wiredtiger import stat -from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios import wttest class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess): @@ -55,7 +55,7 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess): create_params = 'key_format=r,value_format=8t')), ] - scenarios = types + scenarios = make_scenarios(types) def test_ops(self): # diff --git a/src/third_party/wiredtiger/test/suite/test_sweep03.py b/src/third_party/wiredtiger/test/suite/test_sweep03.py index 061c2f5b37b..61078fa96b5 100644 --- a/src/third_party/wiredtiger/test/suite/test_sweep03.py +++ b/src/third_party/wiredtiger/test/suite/test_sweep03.py @@ -33,7 +33,7 @@ import fnmatch, os, shutil, run, time from suite_subprocess import suite_subprocess from wiredtiger import stat -from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios +from wtscenario import make_scenarios import wttest class test_sweep03(wttest.WiredTigerTestCase, suite_subprocess): @@ -54,7 +54,7 @@ class test_sweep03(wttest.WiredTigerTestCase, suite_subprocess): create_params = 'key_format=r,value_format=8t')), ] - scenarios = types + scenarios = make_scenarios(types) def test_disable_idle_timeout1(self): # diff --git a/src/third_party/wiredtiger/test/suite/test_truncate01.py b/src/third_party/wiredtiger/test/suite/test_truncate01.py index 77a476e40c1..9a3518c6984 100644 --- a/src/third_party/wiredtiger/test/suite/test_truncate01.py +++ b/src/third_party/wiredtiger/test/suite/test_truncate01.py @@ -34,13 +34,13 @@ import wiredtiger, wttest from helper import confirm_empty,\ key_populate, value_populate, simple_populate,\ complex_populate, complex_value_populate -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Test truncation arguments. class test_truncate_arguments(wttest.WiredTigerTestCase): name = 'test_truncate' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(type='file:')), ('table', dict(type='table:')) ]) @@ -80,7 +80,7 @@ class test_truncate_arguments(wttest.WiredTigerTestCase): # Test truncation of an object using its URI. class test_truncate_uri(wttest.WiredTigerTestCase): name = 'test_truncate' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(type='file:')), ('table', dict(type='table:')) ]) @@ -115,7 +115,7 @@ class test_truncate_cursor_order(wttest.WiredTigerTestCase): ('recno', dict(keyfmt='r')), ('string', dict(keyfmt='S')), ] - scenarios = number_scenarios(multiply_scenarios('.', types, keyfmt)) + scenarios = make_scenarios(types, keyfmt) # Test an illegal order, then confirm that equal cursors works. def test_truncate_cursor_order(self): @@ -146,7 +146,7 @@ class test_truncate_cursor_end(wttest.WiredTigerTestCase): ('recno', dict(keyfmt='r')), ('string', dict(keyfmt='S')), ] - scenarios = number_scenarios(multiply_scenarios('.', types, keyfmt)) + scenarios = make_scenarios(types, keyfmt) # Test truncation of cursors past the end of the object. def test_truncate_cursor_order(self): @@ -205,8 +205,7 @@ class test_truncate_cursor(wttest.WiredTigerTestCase): ('big', dict(nentries=1000,skip=37)), ] - scenarios = number_scenarios( - multiply_scenarios('.', types, keyfmt, size, reopen)) + scenarios = make_scenarios(types, keyfmt, size, reopen) # Set a cursor key. def cursorKey(self, uri, key): diff --git a/src/third_party/wiredtiger/test/suite/test_truncate02.py b/src/third_party/wiredtiger/test/suite/test_truncate02.py index 6c11302787c..e57a65d2f97 100644 --- a/src/third_party/wiredtiger/test/suite/test_truncate02.py +++ b/src/third_party/wiredtiger/test/suite/test_truncate02.py @@ -32,7 +32,7 @@ import wiredtiger, wttest from helper import key_populate, value_populate, simple_populate -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_truncate_fast_delete # When deleting leaf pages that aren't in memory, we set transactional @@ -86,8 +86,7 @@ class test_truncate_fast_delete(wttest.WiredTigerTestCase): ('txn2', dict(commit=False)), ] - scenarios = number_scenarios( - multiply_scenarios('.', types, keyfmt, overflow, reads, writes, txn)) + scenarios = make_scenarios(types, keyfmt, overflow, reads, writes, txn) # Return the number of records visible to the cursor; test both forward # and backward iteration, they are different code paths in this case. diff --git a/src/third_party/wiredtiger/test/suite/test_txn01.py b/src/third_party/wiredtiger/test/suite/test_txn01.py index eb6963791fd..1ba74461088 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn01.py +++ b/src/third_party/wiredtiger/test/suite/test_txn01.py @@ -27,13 +27,13 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_txn01.py # Transactions: basic functionality class test_txn01(wttest.WiredTigerTestCase): nentries = 1000 - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('col-f', dict(uri='file:text_txn01',key_format='r',value_format='S')), ('col-t', dict(uri='table:text_txn01',key_format='r',value_format='S')), ('fix-f', dict(uri='file:text_txn01',key_format='r',value_format='8t')), diff --git a/src/third_party/wiredtiger/test/suite/test_txn02.py b/src/third_party/wiredtiger/test/suite/test_txn02.py index fccf123d3bc..a0c2c12a47c 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn02.py +++ b/src/third_party/wiredtiger/test/suite/test_txn02.py @@ -32,7 +32,7 @@ import fnmatch, os, shutil, time from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios +from wtscenario import make_scenarios import wttest class test_txn02(wttest.WiredTigerTestCase, suite_subprocess): @@ -81,22 +81,18 @@ class test_txn02(wttest.WiredTigerTestCase, suite_subprocess): txn3s = [('t3c', dict(txn3='commit')), ('t3r', dict(txn3='rollback'))] txn4s = [('t4c', dict(txn4='commit')), ('t4r', dict(txn4='rollback'))] - all_scenarios = multiply_scenarios('.', types, - op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s) - # This test generates thousands of potential scenarios. # For default runs, we'll use a small subset of them, for # long runs (when --long is set) we'll set a much larger limit. - scenarios = number_scenarios(prune_scenarios(all_scenarios, 20, 5000)) + scenarios = make_scenarios(types, + op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s, + prune=20, prunelong=5000) # Each check_log() call takes a second, so we don't call it for # every scenario, we'll limit it to the value of checklog_calls. checklog_calls = 100 if wttest.islongtest() else 2 checklog_mod = (len(scenarios) / checklog_calls + 1) - # scenarios = number_scenarios(multiply_scenarios('.', types, - # op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s)) [:3] - # Overrides WiredTigerTestCase def setUpConnectionOpen(self, dir): self.home = dir # Cycle through the different transaction_sync values in a diff --git a/src/third_party/wiredtiger/test/suite/test_txn03.py b/src/third_party/wiredtiger/test/suite/test_txn03.py index 97180a75949..18a0e096767 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn03.py +++ b/src/third_party/wiredtiger/test/suite/test_txn03.py @@ -31,7 +31,7 @@ # import wiredtiger, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios class test_txn03(wttest.WiredTigerTestCase): tablename = 'test_txn03' @@ -42,7 +42,7 @@ class test_txn03(wttest.WiredTigerTestCase): data_str2 = "TEST_VAL1" nentries = 1000 - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('var', dict(create_params = "key_format=S,value_format=S")), ]) diff --git a/src/third_party/wiredtiger/test/suite/test_txn04.py b/src/third_party/wiredtiger/test/suite/test_txn04.py index 9d9d2db62c6..ade39272f84 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn04.py +++ b/src/third_party/wiredtiger/test/suite/test_txn04.py @@ -32,7 +32,7 @@ import shutil, os from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios import wttest class test_txn04(wttest.WiredTigerTestCase, suite_subprocess): @@ -62,7 +62,7 @@ class test_txn04(wttest.WiredTigerTestCase, suite_subprocess): ] txn1s = [('t1c', dict(txn1='commit')), ('t1r', dict(txn1='rollback'))] - scenarios = number_scenarios(multiply_scenarios('.', types, op1s, txn1s)) + scenarios = make_scenarios(types, op1s, txn1s) # Overrides WiredTigerTestCase def setUpConnectionOpen(self, dir): self.home = dir diff --git a/src/third_party/wiredtiger/test/suite/test_txn05.py b/src/third_party/wiredtiger/test/suite/test_txn05.py index bb68034ca04..9e84fe7d3fe 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn05.py +++ b/src/third_party/wiredtiger/test/suite/test_txn05.py @@ -32,7 +32,7 @@ import fnmatch, os, shutil, time from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios import wttest class test_txn05(wttest.WiredTigerTestCase, suite_subprocess): @@ -63,8 +63,7 @@ class test_txn05(wttest.WiredTigerTestCase, suite_subprocess): ] txn1s = [('t1c', dict(txn1='commit')), ('t1r', dict(txn1='rollback'))] - scenarios = number_scenarios(multiply_scenarios('.', types, op1s, txn1s)) - # scenarios = number_scenarios(multiply_scenarios('.', types, op1s, txn1s))[:3] + scenarios = make_scenarios(types, op1s, txn1s) # Overrides WiredTigerTestCase def setUpConnectionOpen(self, dir): self.home = dir diff --git a/src/third_party/wiredtiger/test/suite/test_txn06.py b/src/third_party/wiredtiger/test/suite/test_txn06.py index 9c1d0335d47..e4636e40e2e 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn06.py +++ b/src/third_party/wiredtiger/test/suite/test_txn06.py @@ -30,7 +30,6 @@ # Transactions: test long-running snapshots from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios from helper import simple_populate import wiredtiger, wttest diff --git a/src/third_party/wiredtiger/test/suite/test_txn07.py b/src/third_party/wiredtiger/test/suite/test_txn07.py index f74120e3590..8dd8238343d 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn07.py +++ b/src/third_party/wiredtiger/test/suite/test_txn07.py @@ -33,7 +33,7 @@ import fnmatch, os, shutil, run, time from suite_subprocess import suite_subprocess from wiredtiger import stat -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios import wttest class test_txn07(wttest.WiredTigerTestCase, suite_subprocess): @@ -70,8 +70,7 @@ class test_txn07(wttest.WiredTigerTestCase, suite_subprocess): ('none', dict(compress='')), ] - scenarios = number_scenarios(multiply_scenarios('.', types, op1s, txn1s, - compress)) + scenarios = make_scenarios(types, op1s, txn1s, compress) # Overrides WiredTigerTestCase def setUpConnectionOpen(self, dir): self.home = dir diff --git a/src/third_party/wiredtiger/test/suite/test_txn08.py b/src/third_party/wiredtiger/test/suite/test_txn08.py index 36253856285..f0cdf08df07 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn08.py +++ b/src/third_party/wiredtiger/test/suite/test_txn08.py @@ -33,7 +33,6 @@ import fnmatch, os, shutil, run, time from suite_subprocess import suite_subprocess from wiredtiger import stat -from wtscenario import multiply_scenarios, number_scenarios import wttest class test_txn08(wttest.WiredTigerTestCase, suite_subprocess): diff --git a/src/third_party/wiredtiger/test/suite/test_txn09.py b/src/third_party/wiredtiger/test/suite/test_txn09.py index f536d65205d..cfad8270ab1 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn09.py +++ b/src/third_party/wiredtiger/test/suite/test_txn09.py @@ -32,7 +32,7 @@ import fnmatch, os, shutil, time from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios +from wtscenario import make_scenarios import wttest class test_txn09(wttest.WiredTigerTestCase, suite_subprocess): @@ -73,13 +73,12 @@ class test_txn09(wttest.WiredTigerTestCase, suite_subprocess): txn3s = [('t3c', dict(txn3='commit')), ('t3r', dict(txn3='rollback'))] txn4s = [('t4c', dict(txn4='commit')), ('t4r', dict(txn4='rollback'))] - all_scenarios = multiply_scenarios('.', types, - op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s) - # This test generates thousands of potential scenarios. # For default runs, we'll use a small subset of them, for # long runs (when --long is set) we'll set a much larger limit. - scenarios = number_scenarios(prune_scenarios(all_scenarios, 20, 5000)) + scenarios = make_scenarios(types, + op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s, + prune=20, prunelong=5000) # Overrides WiredTigerTestCase def setUpConnectionOpen(self, dir): diff --git a/src/third_party/wiredtiger/test/suite/test_txn10.py b/src/third_party/wiredtiger/test/suite/test_txn10.py index cf9c11dd4ab..a4745e60066 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn10.py +++ b/src/third_party/wiredtiger/test/suite/test_txn10.py @@ -32,7 +32,6 @@ import fnmatch, os, shutil, time from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios import wttest class test_txn10(wttest.WiredTigerTestCase, suite_subprocess): diff --git a/src/third_party/wiredtiger/test/suite/test_txn12.py b/src/third_party/wiredtiger/test/suite/test_txn12.py index 8ae9df33990..32c058bea85 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn12.py +++ b/src/third_party/wiredtiger/test/suite/test_txn12.py @@ -29,7 +29,6 @@ import wiredtiger, wttest from suite_subprocess import suite_subprocess from wiredtiger import stat -from wtscenario import multiply_scenarios, number_scenarios # test_txn12.py # test of commit following failed op in a read only transaction. diff --git a/src/third_party/wiredtiger/test/suite/test_txn13.py b/src/third_party/wiredtiger/test/suite/test_txn13.py index dd6a6dbcd6d..ae0250c06e8 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn13.py +++ b/src/third_party/wiredtiger/test/suite/test_txn13.py @@ -33,7 +33,7 @@ #import fnmatch, os, shutil, run, time from suite_subprocess import suite_subprocess -from wtscenario import check_scenarios +from wtscenario import make_scenarios import wiredtiger, wttest class test_txn13(wttest.WiredTigerTestCase, suite_subprocess): @@ -43,7 +43,7 @@ class test_txn13(wttest.WiredTigerTestCase, suite_subprocess): nops = 1024 create_params = 'key_format=i,value_format=S' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('1gb', dict(expect_err=False, valuesize=1048576)), ('2gb', dict(expect_err=False, valuesize=2097152)), ('4gb', dict(expect_err=True, valuesize=4194304)) diff --git a/src/third_party/wiredtiger/test/suite/test_txn14.py b/src/third_party/wiredtiger/test/suite/test_txn14.py index 371f4402567..f9ccabaab8b 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn14.py +++ b/src/third_party/wiredtiger/test/suite/test_txn14.py @@ -32,7 +32,7 @@ import fnmatch, os, shutil, time from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios +from wtscenario import make_scenarios import wttest class test_txn14(wttest.WiredTigerTestCase, suite_subprocess): @@ -47,7 +47,7 @@ class test_txn14(wttest.WiredTigerTestCase, suite_subprocess): ('sync', dict(sync='on')), ('bg', dict(sync='background')), ] - scenarios = multiply_scenarios('.', sync_list) + scenarios = make_scenarios(sync_list) def simulate_crash_restart(self, olddir, newdir): ''' Simulate a crash from olddir and restart in newdir. ''' diff --git a/src/third_party/wiredtiger/test/suite/test_txn15.py b/src/third_party/wiredtiger/test/suite/test_txn15.py index 809dce4ebfa..c061c093b02 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn15.py +++ b/src/third_party/wiredtiger/test/suite/test_txn15.py @@ -33,7 +33,7 @@ import fnmatch, os, shutil, time from suite_subprocess import suite_subprocess from wiredtiger import stat -from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios +from wtscenario import make_scenarios import wttest class test_txn15(wttest.WiredTigerTestCase, suite_subprocess): @@ -71,7 +71,7 @@ class test_txn15(wttest.WiredTigerTestCase, suite_subprocess): ('c_none', dict(commit_sync=None)), ('c_off', dict(commit_sync='sync=off')), ] - scenarios = multiply_scenarios('.', conn_sync_enabled, conn_sync_method, + scenarios = make_scenarios(conn_sync_enabled, conn_sync_method, begin_sync, commit_sync) # Given the different configuration settings determine if this group diff --git a/src/third_party/wiredtiger/test/suite/test_upgrade.py b/src/third_party/wiredtiger/test/suite/test_upgrade.py index 357e437f14d..e4f92f8f8d8 100644 --- a/src/third_party/wiredtiger/test/suite/test_upgrade.py +++ b/src/third_party/wiredtiger/test/suite/test_upgrade.py @@ -29,14 +29,14 @@ import os, time import wiredtiger, wttest from helper import complex_populate, simple_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_upgrade.py # session level upgrade operation class test_upgrade(wttest.WiredTigerTestCase): name = 'test_upgrade' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:')), ('table', dict(uri='table:')) ]) diff --git a/src/third_party/wiredtiger/test/suite/test_util02.py b/src/third_party/wiredtiger/test/suite/test_util02.py index 475e856052a..421b0104484 100644 --- a/src/third_party/wiredtiger/test/suite/test_util02.py +++ b/src/third_party/wiredtiger/test/suite/test_util02.py @@ -29,7 +29,7 @@ import string, os import wiredtiger, wttest from suite_subprocess import suite_subprocess -from wtscenario import check_scenarios +from wtscenario import make_scenarios from helper import complex_populate # test_util02.py @@ -44,7 +44,7 @@ class test_util02(wttest.WiredTigerTestCase, suite_subprocess): nentries = 1000 stringclass = ''.__class__ - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('SS', dict(key_format='S',value_format='S')), ('rS', dict(key_format='r',value_format='S')), ('ri', dict(key_format='r',value_format='i')), diff --git a/src/third_party/wiredtiger/test/suite/test_util03.py b/src/third_party/wiredtiger/test/suite/test_util03.py index c3ea48b8f5e..e341c79ff9e 100644 --- a/src/third_party/wiredtiger/test/suite/test_util03.py +++ b/src/third_party/wiredtiger/test/suite/test_util03.py @@ -28,7 +28,7 @@ from suite_subprocess import suite_subprocess import wiredtiger, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_util03.py # Utilities: wt create @@ -36,7 +36,7 @@ class test_util03(wttest.WiredTigerTestCase, suite_subprocess): tablename = 'test_util03.a' nentries = 1000 - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('none', dict(key_format=None,value_format=None)), ('SS', dict(key_format='S',value_format='S')), ('rS', dict(key_format='r',value_format='S')), diff --git a/src/third_party/wiredtiger/test/suite/test_util13.py b/src/third_party/wiredtiger/test/suite/test_util13.py index 222f42cd7f1..9804dc700ba 100644 --- a/src/third_party/wiredtiger/test/suite/test_util13.py +++ b/src/third_party/wiredtiger/test/suite/test_util13.py @@ -33,7 +33,7 @@ import itertools, wiredtiger, wttest from helper import complex_populate_cgconfig, complex_populate_cgconfig_lsm from helper import simple_populate from helper import complex_populate_check, simple_populate_check -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_util13.py # Utilities: wt dump, as well as the dump cursor @@ -73,7 +73,7 @@ class test_util13(wttest.WiredTigerTestCase, suite_subprocess): cfg='merge_max=5')), ] - scenarios = number_scenarios(multiply_scenarios('.', types)) + scenarios = make_scenarios(types) def compare_config(self, expected_cfg, actual_cfg): # Replace '(' characters so configuration groups don't break parsing. diff --git a/src/third_party/wiredtiger/test/suite/wtscenario.py b/src/third_party/wiredtiger/test/suite/wtscenario.py index 7fad7c228fb..8576b3ac876 100644 --- a/src/third_party/wiredtiger/test/suite/wtscenario.py +++ b/src/third_party/wiredtiger/test/suite/wtscenario.py @@ -64,11 +64,37 @@ def log2chr(val): megabyte = 1024 * 1024 +def make_scenarios(*args, **kwargs): + """ + The standard way to create scenarios for WT tests. + Scenarios can be combined by listing them all as arguments. + A final prune= and/or prunelong= argument may be given that + forces the list of entries in the scenario to be pruned. + The result is a (combined) scenario that has been checked + for name duplicates and has been given names and numbers. + """ + scenes = multiply_scenarios('.', *args) + pruneval = None + prunelong = None + for key in kwargs: + if key == 'prune': + pruneval = kwargs[key] + elif key == 'prunelong': + prunelong = kwargs[key] + else: + raise AssertionError( + 'make_scenarios: unexpected named arg: ' + key) + if pruneval != None or prunelong != None: + pruneval = pruneval if pruneval != None else -1 + prunelong = prunelong if prunelong != None else -1 + scenes = prune_scenarios(scenes, pruneval, prunelong) + return number_scenarios(scenes) + def check_scenarios(scenes): """ - Make sure all scenarios have unique names + Make sure all scenarios have unique case insensitive names """ - assert len(scenes) == len(dict(scenes)) + assert len(scenes) == len(dict((k.lower(), v) for k, v in scenes)) return scenes def multiply_scenarios(sep, *args): @@ -81,8 +107,8 @@ def multiply_scenarios(sep, *args): result = scenes else: total = [] - for scena in scenes: - for scenb in result: + for scena in result: + for scenb in scenes: # Create a merged scenario with a concatenated name name = scena[0] + sep + scenb[0] tdict = {} @@ -235,7 +261,7 @@ class wtscenario: scen.lmax = lmax scen.cache_size = cache s.append((scen.shortName(), dict(session_create_scenario=scen))) - return s + return make_scenarios(s) def shortName(self): """ diff --git a/src/third_party/wiredtiger/test/suite/wttest.py b/src/third_party/wiredtiger/test/suite/wttest.py index 9e430fcdba7..788dd5d0307 100644 --- a/src/third_party/wiredtiger/test/suite/wttest.py +++ b/src/third_party/wiredtiger/test/suite/wttest.py @@ -212,8 +212,8 @@ class WiredTigerTestCase(unittest.TestCase): # help distinguish tests. scen = '' if hasattr(self, 'scenario_number') and hasattr(self, 'scenario_name'): - scen = '(scenario ' + str(self.scenario_number) + \ - ': ' + self.scenario_name + ')' + scen = ' -s ' + str(self.scenario_number) + \ + ' (' + self.scenario_name + ')' return self.simpleName() + scen def simpleName(self): @@ -293,6 +293,8 @@ class WiredTigerTestCase(unittest.TestCase): raise Exception(self.testdir + ": cannot remove directory") os.makedirs(self.testdir) os.chdir(self.testdir) + with open('testname.txt', 'w+') as namefile: + namefile.write(str(self) + '\n') self.fdSetUp() # tearDown needs a conn field, set it here in case the open fails. self.conn = None diff --git a/src/third_party/wiredtiger/test/thread/smoke.sh b/src/third_party/wiredtiger/test/thread/smoke.sh index 9a235b1d8e9..aa2f86c1def 100755 --- a/src/third_party/wiredtiger/test/thread/smoke.sh +++ b/src/third_party/wiredtiger/test/thread/smoke.sh @@ -4,10 +4,10 @@ set -e # Smoke-test format as part of running "make check". $TEST_WRAPPER ./t -t f -$TEST_WRAPPER ./t -S -F -t f +$TEST_WRAPPER ./t -S -F -n 1000 -t f $TEST_WRAPPER ./t -t r -$TEST_WRAPPER ./t -S -F -t r +$TEST_WRAPPER ./t -S -F -n 1000 -t r $TEST_WRAPPER ./t -t v -$TEST_WRAPPER ./t -S -F -t v +$TEST_WRAPPER ./t -S -F -n 1000 -t v diff --git a/src/third_party/wiredtiger/test/utility/misc.c b/src/third_party/wiredtiger/test/utility/misc.c index dfc655dec1a..dffd29a5b6a 100644 --- a/src/third_party/wiredtiger/test/utility/misc.c +++ b/src/third_party/wiredtiger/test/utility/misc.c @@ -192,3 +192,18 @@ dstrdup(const void *str) return (p); testutil_die(errno, "strdup"); } + +/* + * dstrndup -- + * Call emulating strndup, dying on failure. Don't use actual strndup here + * as it is not supported within MSVC. + */ +void * +dstrndup(const char *str, size_t len) +{ + char *p; + + p = dcalloc(len + 1, sizeof(char)); + memcpy(p, str, len); + return (p); +} diff --git a/src/third_party/wiredtiger/test/utility/test_util.h b/src/third_party/wiredtiger/test/utility/test_util.h index 66ff8de2d19..821e06084d2 100644 --- a/src/third_party/wiredtiger/test/utility/test_util.h +++ b/src/third_party/wiredtiger/test/utility/test_util.h @@ -115,6 +115,7 @@ void *dcalloc(size_t, size_t); void *dmalloc(size_t); void *drealloc(void *, size_t); void *dstrdup(const void *); +void *dstrndup(const char *, size_t); void testutil_clean_work_dir(char *); void testutil_cleanup(TEST_OPTS *); void testutil_make_work_dir(char *); diff --git a/src/third_party/wiredtiger/tools/wtstats/stat_data.py b/src/third_party/wiredtiger/tools/wtstats/stat_data.py index a79cf1faf5e..b93f2449c63 100644 --- a/src/third_party/wiredtiger/tools/wtstats/stat_data.py +++ b/src/third_party/wiredtiger/tools/wtstats/stat_data.py @@ -3,17 +3,19 @@ no_scale_per_second_list = [ 'async: current work queue length', 'async: maximum work queue length', + 'cache: bytes belonging to page images in the cache', 'cache: bytes currently in the cache', + 'cache: bytes not belonging to page images in the cache', 'cache: eviction currently operating in aggressive mode', 'cache: files with active eviction walks', 'cache: hazard pointer maximum array length', 'cache: maximum bytes configured', 'cache: maximum page size at eviction', + 'cache: overflow values cached in memory', 'cache: pages currently held in the cache', 'cache: percentage overhead', 'cache: tracked bytes belonging to internal pages in the cache', 'cache: tracked bytes belonging to leaf pages in the cache', - 'cache: tracked bytes belonging to overflow pages in the cache', 'cache: tracked dirty bytes in the cache', 'cache: tracked dirty pages in the cache', 'connection: files currently open', @@ -28,6 +30,22 @@ no_scale_per_second_list = [ 'reconciliation: split objects currently awaiting free', 'session: open cursor count', 'session: open session count', + 'session: table compact failed calls', + 'session: table compact successful calls', + 'session: table create failed calls', + 'session: table create successful calls', + 'session: table drop failed calls', + 'session: table drop successful calls', + 'session: table rebalance failed calls', + 'session: table rebalance successful calls', + 'session: table rename failed calls', + 'session: table rename successful calls', + 'session: table salvage failed calls', + 'session: table salvage successful calls', + 'session: table truncate failed calls', + 'session: table truncate successful calls', + 'session: table verify failed calls', + 'session: table verify successful calls', 'thread-state: active filesystem fsync calls', 'thread-state: active filesystem read calls', 'thread-state: active filesystem write calls', @@ -36,7 +54,10 @@ no_scale_per_second_list = [ 'transaction: transaction checkpoint max time (msecs)', 'transaction: transaction checkpoint min time (msecs)', 'transaction: transaction checkpoint most recent time (msecs)', + 'transaction: transaction checkpoint scrub dirty target', + 'transaction: transaction checkpoint scrub time (msecs)', 'transaction: transaction checkpoint total time (msecs)', + 'transaction: transaction fsync duration for checkpoint after allocating the transaction ID (usecs)', 'transaction: transaction range of IDs currently pinned', 'transaction: transaction range of IDs currently pinned by a checkpoint', 'transaction: transaction range of IDs currently pinned by named snapshots', @@ -64,6 +85,7 @@ no_scale_per_second_list = [ 'btree: overflow pages', 'btree: row-store internal pages', 'btree: row-store leaf pages', + 'cache: bytes currently in the cache', 'cache: overflow values cached in memory', 'LSM: bloom filters in the LSM tree', 'LSM: chunks in the LSM tree', @@ -74,7 +96,9 @@ no_scale_per_second_list = [ ] no_clear_list = [ 'async: maximum work queue length', + 'cache: bytes belonging to page images in the cache', 'cache: bytes currently in the cache', + 'cache: bytes not belonging to page images in the cache', 'cache: eviction currently operating in aggressive mode', 'cache: files with active eviction walks', 'cache: maximum bytes configured', @@ -83,7 +107,6 @@ no_clear_list = [ 'cache: percentage overhead', 'cache: tracked bytes belonging to internal pages in the cache', 'cache: tracked bytes belonging to leaf pages in the cache', - 'cache: tracked bytes belonging to overflow pages in the cache', 'cache: tracked dirty bytes in the cache', 'cache: tracked dirty pages in the cache', 'connection: files currently open', @@ -98,6 +121,22 @@ no_clear_list = [ 'reconciliation: split objects currently awaiting free', 'session: open cursor count', 'session: open session count', + 'session: table compact failed calls', + 'session: table compact successful calls', + 'session: table create failed calls', + 'session: table create successful calls', + 'session: table drop failed calls', + 'session: table drop successful calls', + 'session: table rebalance failed calls', + 'session: table rebalance successful calls', + 'session: table rename failed calls', + 'session: table rename successful calls', + 'session: table salvage failed calls', + 'session: table salvage successful calls', + 'session: table truncate failed calls', + 'session: table truncate successful calls', + 'session: table verify failed calls', + 'session: table verify successful calls', 'thread-state: active filesystem fsync calls', 'thread-state: active filesystem read calls', 'thread-state: active filesystem write calls', @@ -106,11 +145,15 @@ no_clear_list = [ 'transaction: transaction checkpoint max time (msecs)', 'transaction: transaction checkpoint min time (msecs)', 'transaction: transaction checkpoint most recent time (msecs)', + 'transaction: transaction checkpoint scrub dirty target', + 'transaction: transaction checkpoint scrub time (msecs)', 'transaction: transaction checkpoint total time (msecs)', + 'transaction: transaction fsync duration for checkpoint after allocating the transaction ID (usecs)', 'transaction: transaction range of IDs currently pinned', 'transaction: transaction range of IDs currently pinned by a checkpoint', 'transaction: transaction range of IDs currently pinned by named snapshots', 'btree: btree checkpoint generation', + 'cache: bytes currently in the cache', 'session: open cursor count', ] prefix_list = [ diff --git a/src/third_party/wiredtiger/tools/wtstats/wtstats.py b/src/third_party/wiredtiger/tools/wtstats/wtstats.py index ff62d99e825..3549031c30f 100755 --- a/src/third_party/wiredtiger/tools/wtstats/wtstats.py +++ b/src/third_party/wiredtiger/tools/wtstats/wtstats.py @@ -137,6 +137,8 @@ def parse_wtperf_file(file, result): for i, v in enumerate(values): if v == 'N': v = 0 + if v == 'Y': + v = 1 # convert us to ms if '(ms)' in headings[i]: v = float(v) / 1000.0 -- cgit v1.2.1