diff options
author | Alex Gorrod <alexander.gorrod@mongodb.com> | 2016-08-04 16:10:27 +1000 |
---|---|---|
committer | Alex Gorrod <alexander.gorrod@mongodb.com> | 2016-08-04 16:20:01 +1000 |
commit | c4612847ed88c5e0500f0e9ecb2ecdcb49621cf1 (patch) | |
tree | aa58b63ad77ea46be39793aa3d9f94a6b0104163 /src/third_party | |
parent | 5e9b3e958ad96db67a8f4f8790947b1b62b6469e (diff) | |
download | mongo-c4612847ed88c5e0500f0e9ecb2ecdcb49621cf1.tar.gz |
Import wiredtiger-wiredtiger-2.8.0-589-ga9e9696.tar.gz from wiredtiger branch mongodb-3.4
ref: d8fb874..a9e9696
for: 3.3.11
SERVER-24971 Excessive memory held by sessions when application threads do evictions
WT-1162 Add latency to Jenkins wtperf tests and plots
WT-2026 Maximum pages size at eviction too large
WT-2239 Make sure LSM cursors read up to date dsk_gen, it was racing with compact
WT-2353 Failure to create async threads as part of a wiredtiger_open call will cause a hang
WT-2380 Make scripts fail if code doesn't match style
WT-2486 Update make check so that it runs faster
WT-2578 remove write barriers from the TAILQ_INSERT_XXX macros
WT-2648 cache-line alignment for new ports
WT-2665 Limit allocator fragmentation in WiredTiger
WT-2693 Check open_cursor error paths for consistent handling
WT-2708 split child-update race with reconciliation/eviction
WT-2711 Change statistics log configuration options
WT-2728 Don't re-read log file headers during log_flush
WT-2729 Focus eviction walks in largest trees
WT-2730 cursor next/prev can return the wrong key/value pair when crossing a page boundary
WT-2731 Raw compression can create pages that are larger than expected
WT-2732 Coverity analysis defect 99665: Redundant test
WT-2737 Scrub dirty pages rather than evicting them
WT-2738 Remove the ability to change the default checkpoint name
WT-2739 pluggable file systems documentation cleanups
WT-2743 Thread count statistics always report 0
WT-2744 partial line even with line buffering set
WT-2746 track checkpoint I/O separately from eviction I/O
WT-2751 column-store statistics incorrectly calculates the number of entries
WT-2752 Fixes to zipfian wtperf workload config
WT-2755 flexelint configuration treats size_t as 4B type
WT-2756 Upgrade the autoconf archive package to check for swig 3.0
WT-2757 Column tables behave differently when column names are provided
WT-2759 Releasing the hot-backup lock doesn't require the schema lock.
WT-2760 Fix a bug in backup related to directory sync. Change the filesystem API to make durable the default
WT-2762 wtstats tool fails if checkpoint runs
WT-2763 Unit test test_intpack failing on OSX
WT-2764 Optimize checkpoints to reduce throughput disruption
WT-2765 wt dump: indices need to be shown in the dump output
WT-2767 test suite needs way to run an individual scenario
WT-2769 Update documentation to reflect correct limits of memory_page_max
WT-2770 Add statistics tracking schema operations
WT-2772 Investigate log performance testing weirdness
WT-2773 search_near in indexes does not find exact matches
WT-2774 minor cleanups/improvements
WT-2778 Python test suite: make scenario initialization consistent
WT-2779 Raw compression created unexpectedly large pages on disk
WT-2781 Enhance bulk cursor option with an option to return immediately on contention
WT-2782 Missing a fs_directory_list_free in ex_file_system.c
WT-2785 Scrub dirty pages rather than evicting them: single-page reconciliation
WT-2791 Enhance OS X Evergreen unit test
WT-2793 wtperf config improvements
WT-2796 Memory leak in reconciliation uncovered by stress testing
WT-2798 Crash vulnerability with nojournal after create during checkpoint
WT-2800 Illegal file format in test/format on PPC
WT-2801 Crash vulnerability from eviction of metadata during checkpoint
WT-2802 Transaction commit causes heap-use-after free
WT-2803 Add verbose functionality to WT Evergreen tests
WT-2804 Don't read values in a tree without a snapshot
WT-2805 Infinite recursion if error streams fail
WT-2806 wtperf allocation size off-by-one
Diffstat (limited to 'src/third_party')
254 files changed, 4447 insertions, 2911 deletions
diff --git a/src/third_party/wiredtiger/SConstruct b/src/third_party/wiredtiger/SConstruct index a5dd8761d6c..b0ce771e9bd 100644 --- a/src/third_party/wiredtiger/SConstruct +++ b/src/third_party/wiredtiger/SConstruct @@ -484,7 +484,7 @@ t = env.Program("wtperf", [ "bench/wtperf/wtperf_throttle.c", "bench/wtperf/wtperf_truncate.c", ], - LIBS=[wtlib, shim] + wtlibs) + LIBS=[wtlib, shim, testutil] + wtlibs) Default(t) #Build the Examples diff --git a/src/third_party/wiredtiger/bench/wtperf/Makefile.am b/src/third_party/wiredtiger/bench/wtperf/Makefile.am index cc1f84b5406..57792e3887f 100644 --- a/src/third_party/wiredtiger/bench/wtperf/Makefile.am +++ b/src/third_party/wiredtiger/bench/wtperf/Makefile.am @@ -1,13 +1,17 @@ -AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include - -LDADD = $(top_builddir)/libwiredtiger.la -lm +AM_CPPFLAGS = -I$(top_builddir) +AM_CPPFLAGS +=-I$(top_srcdir)/src/include +AM_CPPFLAGS +=-I$(top_srcdir)/test/utility noinst_PROGRAMS = wtperf -wtperf_LDFLAGS = -static wtperf_SOURCES =\ config.c idle_table_cycle.c misc.c track.c wtperf.c \ wtperf.h wtperf_opt.i wtperf_throttle.c wtperf_truncate.c +wtperf_LDADD = $(top_builddir)/test/utility/libtest_util.la +wtperf_LDADD +=$(top_builddir)/libwiredtiger.la +wtperf_LDADD +=-lm +wtperf_LDFLAGS = -static + TESTS = smoke.sh AM_TESTS_ENVIRONMENT = rm -rf WT_TEST ; mkdir WT_TEST ; # automake 1.11 compatibility diff --git a/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c b/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c index b699b5b9dd1..3c079bb560f 100644 --- a/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c +++ b/src/third_party/wiredtiger/bench/wtperf/idle_table_cycle.c @@ -129,7 +129,8 @@ cycle_idle_tables(void *arg) * Drop the table. Keep retrying on EBUSY failure - it is an * expected return when checkpoints are happening. */ - while ((ret = session->drop(session, uri, "force")) == EBUSY) + while ((ret = session->drop( + session, uri, "force,checkpoint_wait=false")) == EBUSY) __wt_sleep(1, 0); if (ret != 0 && ret != EBUSY) { diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/checkpoint_schema_race.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/checkpoint_schema_race.wtperf new file mode 100644 index 00000000000..ade8e88ee9b --- /dev/null +++ b/src/third_party/wiredtiger/bench/wtperf/runners/checkpoint_schema_race.wtperf @@ -0,0 +1,20 @@ +# Check create and drop behavior concurrent with checkpoints (WT-2798). +# Setup a multiple tables and a cache size large enough that checkpoints can +# take a long time. +conn_config="cache_size=8GB,log=(enabled=false),checkpoint=(wait=30)" +table_config="leaf_page_max=4k,internal_page_max=16k,type=file" +icount=10000000 +table_count=100 +table_count_idle=100 +# Turn on create/drop of idle tables, but don't worry if individual operations +# take a long time. +idle_table_cycle=120 +populate_threads=5 +checkpoint_threads=0 +report_interval=5 +# 100 million +random_range=10000000 +run_time=300 +# Setup a workload that dirties a lot of the cache +threads=((count=2,reads=1),(count=2,inserts=1),(count=2,updates=1)) +value_sz=500 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-stress-multi.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-stress-multi.wtperf new file mode 100644 index 00000000000..9699b9ae3bb --- /dev/null +++ b/src/third_party/wiredtiger/bench/wtperf/runners/evict-btree-stress-multi.wtperf @@ -0,0 +1,12 @@ +conn_config="cache_size=1G,eviction=(threads_max=4),session_max=2000" +table_config="type=file" +table_count=100 +icount=100000000 +report_interval=5 +run_time=600 +populate_threads=1 +threads=((count=100,updates=1,reads=4,ops_per_txn=30)) +# Warn if a latency over a quarter second is seen +max_latency=250 +sample_interval=5 +sample_rate=1 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/fruit-lsm.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/fruit-lsm.wtperf deleted file mode 100644 index e5817554201..00000000000 --- a/src/third_party/wiredtiger/bench/wtperf/runners/fruit-lsm.wtperf +++ /dev/null @@ -1,22 +0,0 @@ -# wtperf options file: simulate riak and its test1 and test2 configuration -# The configuration for the connection and table are from riak and the -# specification of the data (count, size, threads) is from basho_bench. -# -conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600),log=(enabled=true),transaction_sync=(enabled=true,method=none),checkpoint=(wait=180),lsm_manager=(worker_thread_max=12)" -#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)" -#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024" -compact=true -sess_config="isolation=snapshot" -table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB),type=lsm,leaf_page_max=16K,leaf_item_max=4K,os_cache_dirty_max=16MB" -icount=25000000 -key_sz=40 -value_sz=800 -#max_latency=2000 -pareto=20 -populate_threads=20 -report_interval=10 -random_value=true -run_time=18000 -sample_interval=10 -table_count=8 -threads=((count=20,read=6,update=1)) diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/fruit-short.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/fruit-short.wtperf deleted file mode 100644 index 10cb423a92d..00000000000 --- a/src/third_party/wiredtiger/bench/wtperf/runners/fruit-short.wtperf +++ /dev/null @@ -1,20 +0,0 @@ -# wtperf options file: simulate riak and its test1 and test2 configuration -# The configuration for the connection and table are from riak and the -# specification of the data (count, size, threads) is from basho_bench. -# -#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)" -conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,lsm_manager=(worker_thread_max=6)" -compact=true -sess_config="isolation=snapshot" -table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB),type=lsm,leaf_page_max=16K" -icount=25000000 -key_sz=40 -value_sz=800 -max_latency=2000 -pareto=20 -populate_threads=20 -report_interval=10 -random_value=true -run_time=1800 -sample_interval=10 -threads=((count=20,read=6,update=1)) diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/log-append-large.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/log-append-large.wtperf deleted file mode 100644 index c1364c17c28..00000000000 --- a/src/third_party/wiredtiger/bench/wtperf/runners/log-append-large.wtperf +++ /dev/null @@ -1,10 +0,0 @@ -# wtperf options file: Test a log file with a multi-threaded -# append workload. We want to create a very large number of log file -# switches with fewer records per log file than we have active threads. -conn_config="cache_size=1G,log=(enabled=true,file_max=20MB),checkpoint=(log_size=1G)" -table_config="type=file" -icount=1000 -report_interval=5 -run_time=0 -value_sz=5000000 -populate_threads=8 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/log-append-zero.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/log-append-zero.wtperf deleted file mode 100644 index 973d2cddd0d..00000000000 --- a/src/third_party/wiredtiger/bench/wtperf/runners/log-append-zero.wtperf +++ /dev/null @@ -1,8 +0,0 @@ -# wtperf options file: Test a log file with a multi-threaded -# append workload. -conn_config="cache_size=1G,log=(enabled=true,file_max=20MB,zero_fill=true),checkpoint=(log_size=1G)" -table_config="type=file" -icount=50000000 -report_interval=5 -run_time=0 -populate_threads=8 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/log-append.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/log-append.wtperf deleted file mode 100644 index 9d0a78e3c61..00000000000 --- a/src/third_party/wiredtiger/bench/wtperf/runners/log-append.wtperf +++ /dev/null @@ -1,8 +0,0 @@ -# wtperf options file: Test a log file with a multi-threaded -# append workload. -conn_config="cache_size=1G,log=(enabled=true,file_max=20MB),checkpoint=(log_size=1G)" -table_config="type=file" -icount=50000000 -report_interval=5 -run_time=0 -populate_threads=8 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/log-nockpt.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/log-nockpt.wtperf deleted file mode 100644 index a078cead740..00000000000 --- a/src/third_party/wiredtiger/bench/wtperf/runners/log-nockpt.wtperf +++ /dev/null @@ -1,12 +0,0 @@ -# wtperf options file: Test performance with a log file enabled. -# Set the log file reasonably small to catch log-swtich bottle -# necks. -conn_config="cache_size=1G,log=(enabled=true,file_max=20MB)" -table_config="type=file" -icount=50000 -report_interval=5 -run_time=40 -populate_threads=1 -random_range=50000000 -threads=((count=8,inserts=1)) - diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/log-noprealloc.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/log-noprealloc.wtperf deleted file mode 100644 index 66032f599aa..00000000000 --- a/src/third_party/wiredtiger/bench/wtperf/runners/log-noprealloc.wtperf +++ /dev/null @@ -1,11 +0,0 @@ -# wtperf options file: Test performance with a log file enabled. -# Set the log file reasonably small to catch log-swtich bottle -# necks. -conn_config="cache_size=1G,log=(enabled=true,file_max=200K,prealloc=false),checkpoint=(log_size=500MB)" -table_config="type=file" -icount=50000 -report_interval=5 -run_time=120 -populate_threads=1 -random_range=50000000 -threads=((count=8,inserts=1)) diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/log.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/log.wtperf index 32a9cc3b0a6..6cf50dfb5a5 100644 --- a/src/third_party/wiredtiger/bench/wtperf/runners/log.wtperf +++ b/src/third_party/wiredtiger/bench/wtperf/runners/log.wtperf @@ -1,10 +1,27 @@ +# # wtperf options file: Test performance with a log file enabled. # Set the log file small to catch log-swtich bottlenecks. -conn_config="cache_size=1G,log=(enabled=true,file_max=200K),checkpoint=(log_size=500MB)" +# +# Perform updates instead of inserts to stress logging not eviction, +# page splits or reconciliation. Have it fit in cache. +# +# We expect this test can and will be run in other forms from the command +# line to change log file size, pre-allocation, zero filling, logging off +# and checkpoint off. +# +# Jenkins runs for perf testing: +# - Config as-is +# - Config + "-C "log=(enabled,file_max=1M)": small log files and switching +# - Config + "-C "log=(enabled,zero_fill=true,file_max=1M)": zero-filling +# - Config + "-C "checkpoint=(wait=0)": no checkpoints +# - Config + "-C "log=(enabled,prealloc=false,file_max=1M)": no pre-allocation +# +conn_config="cache_size=5G,log=(enabled=true),checkpoint=(log_size=500M),eviction=(threads_max=4)" table_config="type=file" -icount=50000 +icount=1000000 report_interval=5 -run_time=120 +run_time=180 populate_threads=1 -random_range=50000000 -threads=((count=8,inserts=1)) +threads=((count=8,updates=1)) +# Warm up the cache for a minute. +warmup=60 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-zipfian-populate.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-zipfian-populate.wtperf new file mode 100644 index 00000000000..ddd9c055eac --- /dev/null +++ b/src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-zipfian-populate.wtperf @@ -0,0 +1,19 @@ +# Create a set of tables with uneven distribution of data +conn_config="cache_size=1G,eviction=(threads_max=4),file_manager=(close_idle_time=100000),checkpoint=(wait=60,log_size=2GB),statistics=(fast),statistics_log=(wait=5,json),session_max=1000" +table_config="type=file" +table_count=100 +icount=0 +random_range=1000000000 +pareto=10 +range_partition=true +report_interval=5 + +run_ops=10000000 +populate_threads=0 +icount=0 +threads=((count=20,inserts=1)) + +# Warn if a latency over 1 second is seen +max_latency=1000 +sample_interval=5 +sample_rate=1 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-zipfian-workload.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-zipfian-workload.wtperf new file mode 100644 index 00000000000..380350c88c8 --- /dev/null +++ b/src/third_party/wiredtiger/bench/wtperf/runners/multi-btree-zipfian-workload.wtperf @@ -0,0 +1,18 @@ +# Read from a set of tables with uneven distribution of data +conn_config="cache_size=1G,eviction=(threads_max=4),file_manager=(close_idle_time=100000),checkpoint=(wait=60,log_size=2GB),statistics=(fast),statistics_log=(wait=5,json),session_max=1000" +table_config="type=file" +table_count=100 +icount=0 +random_range=1000000000 +pareto=10 +range_partition=true +report_interval=5 +create=false + +run_time=600 +threads=((count=20,reads=1)) + +# Warn if a latency over 1 second is seen +max_latency=1000 +sample_interval=5 +sample_rate=1 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/overflow-10k-short.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/overflow-10k-short.wtperf deleted file mode 100644 index 47228079db8..00000000000 --- a/src/third_party/wiredtiger/bench/wtperf/runners/overflow-10k-short.wtperf +++ /dev/null @@ -1,19 +0,0 @@ -# wtperf options file: simulate riak and a short form of its voxer config. -# The configuration for the connection and table are from riak and the -# specification of the data (count, size, threads) is from basho_bench. -# -#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)" -conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,lsm_manager=(worker_thread_max=6)" -compact=true -compression="snappy" -sess_config="isolation=snapshot" -table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB),type=lsm,leaf_page_max=16K,os_cache_dirty_max=16MB" -icount=15000 -key_sz=40 -value_sz=10000 -max_latency=2000 -populate_threads=1 -report_interval=5 -random_value=true -run_time=300 -threads=((count=10,read=1),(count=10,update=1)) diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/overflow-10k.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/overflow-10k.wtperf index 9b4ed2acaee..5d7eeea9cf2 100644 --- a/src/third_party/wiredtiger/bench/wtperf/runners/overflow-10k.wtperf +++ b/src/third_party/wiredtiger/bench/wtperf/runners/overflow-10k.wtperf @@ -1,9 +1,7 @@ -# wtperf options file: simulate riak and its test1 and test2 configuration -# The configuration for the connection and table are from riak and the -# specification of the data (count, size, threads) is from basho_bench. # -#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)" -conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,lsm_manager=(worker_thread_max=6)" +# Run with overflow items and LSM. +# +conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,lsm_manager=(worker_thread_max=6),statistics=(fast),statistics_log=(wait=10)" compact=true compression="snappy" sess_config="isolation=snapshot" @@ -13,8 +11,8 @@ key_sz=40 value_sz=10000 max_latency=2000 populate_threads=1 -report_interval=10 +report_interval=5 random_value=true -run_time=18000 -sample_interval=10 -threads=((count=20,read=1,update=1)) +run_time=300 +threads=((count=10,read=1),(count=10,update=1)) +warmup=30 diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/overflow-130k-short.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/overflow-130k-short.wtperf deleted file mode 100644 index 83f67062bf8..00000000000 --- a/src/third_party/wiredtiger/bench/wtperf/runners/overflow-130k-short.wtperf +++ /dev/null @@ -1,19 +0,0 @@ -# wtperf options file: simulate riak and a short form of its voxer config. -# The configuration for the connection and table are from riak and the -# specification of the data (count, size, threads) is from basho_bench. -# -#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)" -conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,lsm_manager=(worker_thread_max=6)" -compact=true -compression="snappy" -sess_config="isolation=snapshot" -table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB),type=lsm,leaf_page_max=16K,os_cache_dirty_max=16MB" -icount=15000 -key_sz=40 -value_sz=130000 -max_latency=2000 -populate_threads=1 -report_interval=5 -random_value=true -run_time=300 -threads=((count=10,read=1),(count=10,update=1)) diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/overflow-130k.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/overflow-130k.wtperf index a3439f0c575..2be01afd08a 100644 --- a/src/third_party/wiredtiger/bench/wtperf/runners/overflow-130k.wtperf +++ b/src/third_party/wiredtiger/bench/wtperf/runners/overflow-130k.wtperf @@ -1,20 +1,18 @@ -# wtperf options file: simulate riak and its test1 and test2 configuration -# The configuration for the connection and table are from riak and the -# specification of the data (count, size, threads) is from basho_bench. # -#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)" -conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,lsm_manager=(worker_thread_max=6)" +# Run with very large overflow items and btree. +# +conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,statistics=(fast),statistics_log=(wait=10)" compact=true compression="snappy" sess_config="isolation=snapshot" -table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB),type=lsm,leaf_page_max=16K,os_cache_dirty_max=16MB" +table_config="internal_page_max=128K,type=file,leaf_page_max=16K,os_cache_dirty_max=16MB,leaf_value_max=32K" icount=15000 key_sz=40 value_sz=130000 max_latency=2000 populate_threads=1 -report_interval=10 +report_interval=5 random_value=true -run_time=18000 -sample_interval=10 -threads=((count=20,read=1,update=1)) +run_time=300 +threads=((count=10,read=1),(count=10,update=1)) +warmup=30 diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.c b/src/third_party/wiredtiger/bench/wtperf/wtperf.c index 9d35f6fa640..58271106d61 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf.c +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.c @@ -36,7 +36,6 @@ static const CONFIG default_cfg = { NULL, /* reopen config */ NULL, /* base_uri */ NULL, /* uris */ - NULL, /* helium_mount */ NULL, /* conn */ NULL, /* logf */ NULL, /* async */ @@ -73,14 +72,14 @@ static const char * const debug_cconfig = ""; static const char * const debug_tconfig = ""; static void *checkpoint_worker(void *); -static int create_tables(CONFIG *); -static int drop_all_tables(CONFIG *); +static int drop_all_tables(CONFIG *); static int execute_populate(CONFIG *); static int execute_workload(CONFIG *); static int find_table_count(CONFIG *); static void *monitor(void *); static void *populate_thread(void *); static void randomize_value(CONFIG_THREAD *, char *); +static void recreate_dir(const char *); static int start_all_runs(CONFIG *); static int start_run(CONFIG *); static int start_threads(CONFIG *, @@ -93,10 +92,6 @@ static void *worker(void *); static uint64_t wtperf_rand(CONFIG_THREAD *); static uint64_t wtperf_value_range(CONFIG *); -#define HELIUM_NAME "dev1" -#define HELIUM_PATH \ - "../../ext/test/helium/.libs/libwiredtiger_helium.so" -#define HELIUM_CONFIG ",type=helium" #define INDEX_COL_NAMES ",columns=(key,val)" /* Retrieve an ID for the next insert operation. */ @@ -155,6 +150,23 @@ randomize_value(CONFIG_THREAD *thread, char *value_buf) } /* + * Partition data by key ranges. + */ +static uint32_t +map_key_to_table(CONFIG *cfg, uint64_t k) +{ + if (cfg->range_partition) { + /* Take care to return a result in [0..table_count-1]. */ + if (k > cfg->icount + cfg->random_range) + return (0); + return ((uint32_t)((k - 1) / + ((cfg->icount + cfg->random_range + cfg->table_count - 1) / + cfg->table_count))); + } else + return ((uint32_t)(k % cfg->table_count)); +} + +/* * Figure out and extend the size of the value string, used for growing * updates. We know that the value to be updated is in the threads value * scratch buffer. @@ -393,7 +405,7 @@ worker_async(void *arg) * Then retry to get an async op. */ while ((ret = conn->async_new_op( - conn, cfg->uris[next_val % cfg->table_count], + conn, cfg->uris[map_key_to_table(cfg, next_val)], NULL, &cb, &asyncop)) == EBUSY) (void)usleep(10000); if (ret != 0) @@ -466,7 +478,7 @@ do_range_reads(CONFIG *cfg, WT_CURSOR *cursor) range_key_buf = &buf[0]; /* Save where the first key is for comparisons. */ - cursor->get_key(cursor, &range_key_buf); + testutil_check(cursor->get_key(cursor, &range_key_buf)); extract_key(range_key_buf, &next_val); for (range = 0; range < cfg->read_range; ++range) { @@ -477,7 +489,7 @@ do_range_reads(CONFIG *cfg, WT_CURSOR *cursor) break; /* Retrieve and decode the key */ - cursor->get_key(cursor, &range_key_buf); + testutil_check(cursor->get_key(cursor, &range_key_buf)); extract_key(range_key_buf, &next_val); if (next_val < prev_val) { lprintf(cfg, EINVAL, 0, @@ -547,9 +559,8 @@ worker(void *arg) } } /* Setup the timer for throttling. */ - if (thread->workload->throttle != 0 && - (ret = setup_throttle(thread)) != 0) - goto err; + if (thread->workload->throttle != 0) + setup_throttle(thread); /* Setup for truncate */ if (thread->workload->truncate != 0) @@ -611,7 +622,7 @@ worker(void *arg) /* * Spread the data out around the multiple databases. */ - cursor = cursors[next_val % cfg->table_count]; + cursor = cursors[map_key_to_table(cfg, next_val)]; /* * Skip the first time we do an operation, when trk->ops @@ -1010,7 +1021,7 @@ populate_thread(void *arg) /* * Figure out which table this op belongs to. */ - cursor = cursors[op % cfg->table_count]; + cursor = cursors[map_key_to_table(cfg, op)]; generate_key(cfg, key_buf, op); measure_latency = cfg->sample_interval != 0 && @@ -1148,7 +1159,7 @@ populate_async(void *arg) * Allocate an async op for whichever table. */ while ((ret = conn->async_new_op( - conn, cfg->uris[op % cfg->table_count], + conn, cfg->uris[map_key_to_table(cfg, op)], NULL, &cb, &asyncop)) == EBUSY) (void)usleep(10000); if (ret != 0) @@ -1858,7 +1869,7 @@ create_uris(CONFIG *cfg) base_uri_len = strlen(cfg->base_uri); cfg->uris = dcalloc(cfg->table_count, sizeof(char *)); for (i = 0; i < cfg->table_count; i++) { - uri = cfg->uris[i] = dcalloc(base_uri_len + 5, 1); + uri = cfg->uris[i] = dcalloc(base_uri_len + 6, 1); /* * If there is only one table, just use base name. */ @@ -1877,9 +1888,6 @@ create_tables(CONFIG *cfg) int ret; char buf[512]; - if (cfg->create == 0) - return (0); - if ((ret = cfg->conn->open_session( cfg->conn, NULL, cfg->sess_config, &session)) != 0) { lprintf(cfg, ret, 0, @@ -1971,13 +1979,10 @@ start_all_runs(CONFIG *cfg) if (strcmp(cfg->monitor_dir, cfg->home) == 0) next_cfg->monitor_dir = new_home; - /* Create clean home directories. */ - snprintf(cmd_buf, cmd_len, "rm -rf %s && mkdir %s", - next_cfg->home, next_cfg->home); - if ((ret = system(cmd_buf)) != 0) { - fprintf(stderr, "%s: failed\n", cmd_buf); - goto err; - } + /* If creating the sub-database, recreate it's home */ + if (cfg->create != 0) + recreate_dir(next_cfg->home); + if ((ret = pthread_create( &threads[i], NULL, thread_run_wtperf, next_cfg)) != 0) { lprintf(cfg, ret, 0, "Error creating thread"); @@ -2024,8 +2029,8 @@ start_run(CONFIG *cfg) { pthread_t monitor_thread; uint64_t total_ops; + uint32_t run_time; int monitor_created, ret, t_ret; - char helium_buf[256]; monitor_created = ret = 0; /* [-Wconditional-uninitialized] */ @@ -2040,21 +2045,10 @@ start_run(CONFIG *cfg) goto err; } - /* Configure optional Helium volume. */ - if (cfg->helium_mount != NULL) { - snprintf(helium_buf, sizeof(helium_buf), - "entry=wiredtiger_extension_init,config=[" - "%s=[helium_devices=\"he://./%s\"," - "helium_o_volume_truncate=1]]", - HELIUM_NAME, cfg->helium_mount); - if ((ret = cfg->conn->load_extension( - cfg->conn, HELIUM_PATH, helium_buf)) != 0) - lprintf(cfg, - ret, 0, "Error loading Helium: %s", helium_buf); - } - create_uris(cfg); - if ((ret = create_tables(cfg)) != 0) + + /* If creating, create the tables. */ + if (cfg->create != 0 && (ret = create_tables(cfg)) != 0) goto err; /* Start the monitor thread. */ @@ -2083,7 +2077,8 @@ start_run(CONFIG *cfg) goto err; /* Didn't create, set insert count. */ - if (cfg->create == 0 && find_table_count(cfg) != 0) + if (cfg->create == 0 && cfg->random_range == 0 && + find_table_count(cfg) != 0) goto err; /* Start the checkpoint thread. */ if (cfg->checkpoint_threads != 0) { @@ -2108,26 +2103,27 @@ start_run(CONFIG *cfg) cfg->ckpt_ops = sum_ckpt_ops(cfg); total_ops = cfg->read_ops + cfg->insert_ops + cfg->update_ops; + run_time = cfg->run_time == 0 ? 1 : cfg->run_time; lprintf(cfg, 0, 1, "Executed %" PRIu64 " read operations (%" PRIu64 "%%) %" PRIu64 " ops/sec", cfg->read_ops, (cfg->read_ops * 100) / total_ops, - cfg->read_ops / cfg->run_time); + cfg->read_ops / run_time); lprintf(cfg, 0, 1, "Executed %" PRIu64 " insert operations (%" PRIu64 "%%) %" PRIu64 " ops/sec", cfg->insert_ops, (cfg->insert_ops * 100) / total_ops, - cfg->insert_ops / cfg->run_time); + cfg->insert_ops / run_time); lprintf(cfg, 0, 1, "Executed %" PRIu64 " truncate operations (%" PRIu64 "%%) %" PRIu64 " ops/sec", cfg->truncate_ops, (cfg->truncate_ops * 100) / total_ops, - cfg->truncate_ops / cfg->run_time); + cfg->truncate_ops / run_time); lprintf(cfg, 0, 1, "Executed %" PRIu64 " update operations (%" PRIu64 "%%) %" PRIu64 " ops/sec", cfg->update_ops, (cfg->update_ops * 100) / total_ops, - cfg->update_ops / cfg->run_time); + cfg->update_ops / run_time); lprintf(cfg, 0, 1, "Executed %" PRIu64 " checkpoint operations", cfg->ckpt_ops); @@ -2182,18 +2178,21 @@ err: if (ret == 0) extern int __wt_optind, __wt_optreset; extern char *__wt_optarg; +void (*custom_die)(void) = NULL; int main(int argc, char *argv[]) { CONFIG *cfg, _cfg; size_t req_len, sreq_len; - int ch, monitor_set, ret; - const char *opts = "C:H:h:m:O:o:T:"; + bool monitor_set; + int ch, ret; + const char *opts = "C:h:m:O:o:T:"; const char *config_opts; char *cc_buf, *sess_cfg, *tc_buf, *user_cconfig, *user_tconfig; - monitor_set = ret = 0; + monitor_set = false; + ret = 0; config_opts = NULL; cc_buf = sess_cfg = tc_buf = user_cconfig = user_tconfig = NULL; @@ -2219,8 +2218,12 @@ main(int argc, char *argv[]) strcat(user_cconfig, __wt_optarg); } break; - case 'H': - cfg->helium_mount = __wt_optarg; + case 'h': + cfg->home = __wt_optarg; + break; + case 'm': + cfg->monitor_dir = __wt_optarg; + monitor_set = true; break; case 'O': config_opts = __wt_optarg; @@ -2236,15 +2239,7 @@ main(int argc, char *argv[]) strcat(user_tconfig, __wt_optarg); } break; - case 'h': - cfg->home = __wt_optarg; - break; - case 'm': - cfg->monitor_dir = __wt_optarg; - monitor_set = 1; - break; case '?': - fprintf(stderr, "Invalid option\n"); usage(); goto einval; } @@ -2300,7 +2295,7 @@ main(int argc, char *argv[]) * to 4096 if needed. */ req_len = strlen(",async=(enabled=true,threads=)") + 4; - cfg->async_config = dcalloc(req_len, 1); + cfg->async_config = dmalloc(req_len); snprintf(cfg->async_config, req_len, ",async=(enabled=true,threads=%" PRIu32 ")", cfg->async_threads); @@ -2321,13 +2316,9 @@ main(int argc, char *argv[]) } /* Build the URI from the table name. */ - req_len = strlen("table:") + - strlen(HELIUM_NAME) + strlen(cfg->table_name) + 2; - cfg->base_uri = dcalloc(req_len, 1); - snprintf(cfg->base_uri, req_len, "table:%s%s%s", - cfg->helium_mount == NULL ? "" : HELIUM_NAME, - cfg->helium_mount == NULL ? "" : "/", - cfg->table_name); + req_len = strlen("table:") + strlen(cfg->table_name) + 2; + cfg->base_uri = dmalloc(req_len); + snprintf(cfg->base_uri, req_len, "table:%s", cfg->table_name); /* Make stdout line buffered, so verbose output appears quickly. */ __wt_stream_set_line_buffer(stdout); @@ -2346,13 +2337,13 @@ main(int argc, char *argv[]) if (cfg->session_count_idle > 0) { sreq_len = strlen(",session_max=") + 6; req_len += sreq_len; - sess_cfg = dcalloc(sreq_len, 1); + sess_cfg = dmalloc(sreq_len); snprintf(sess_cfg, sreq_len, ",session_max=%" PRIu32, cfg->session_count_idle + cfg->workers_cnt + cfg->populate_threads + 10); } - cc_buf = dcalloc(req_len, 1); + cc_buf = dmalloc(req_len); /* * This is getting hard to parse. */ @@ -2368,36 +2359,34 @@ main(int argc, char *argv[]) if ((ret = config_opt_str(cfg, "conn_config", cc_buf)) != 0) goto err; } - if (cfg->verbose > 1 || cfg->index || cfg->helium_mount != NULL || + if (cfg->verbose > 1 || cfg->index || user_tconfig != NULL || cfg->compress_table != NULL) { - req_len = strlen(cfg->table_config) + strlen(HELIUM_CONFIG) + - strlen(debug_tconfig) + 3; + req_len = strlen(cfg->table_config) + strlen(debug_tconfig) + 3; if (user_tconfig != NULL) req_len += strlen(user_tconfig); if (cfg->compress_table != NULL) req_len += strlen(cfg->compress_table); if (cfg->index) req_len += strlen(INDEX_COL_NAMES); - tc_buf = dcalloc(req_len, 1); + tc_buf = dmalloc(req_len); /* * This is getting hard to parse. */ - snprintf(tc_buf, req_len, "%s%s%s%s%s%s%s%s", + snprintf(tc_buf, req_len, "%s%s%s%s%s%s%s", cfg->table_config, cfg->index ? INDEX_COL_NAMES : "", cfg->compress_table ? cfg->compress_table : "", cfg->verbose > 1 ? ",": "", cfg->verbose > 1 ? debug_tconfig : "", user_tconfig ? ",": "", - user_tconfig ? user_tconfig : "", - cfg->helium_mount == NULL ? "" : HELIUM_CONFIG); + user_tconfig ? user_tconfig : ""); if ((ret = config_opt_str(cfg, "table_config", tc_buf)) != 0) goto err; } if (cfg->log_partial && cfg->table_count > 1) { req_len = strlen(cfg->table_config) + strlen(LOG_PARTIAL_CONFIG) + 1; - cfg->partial_config = dcalloc(req_len, 1); + cfg->partial_config = dmalloc(req_len); snprintf(cfg->partial_config, req_len, "%s%s", cfg->table_config, LOG_PARTIAL_CONFIG); } @@ -2410,7 +2399,7 @@ main(int argc, char *argv[]) strlen(READONLY_CONFIG) + 1; else req_len = strlen(cfg->conn_config) + 1; - cfg->reopen_config = dcalloc(req_len, 1); + cfg->reopen_config = dmalloc(req_len); if (cfg->readonly) snprintf(cfg->reopen_config, req_len, "%s%s", cfg->conn_config, READONLY_CONFIG); @@ -2422,6 +2411,10 @@ main(int argc, char *argv[]) if ((ret = config_sanity(cfg)) != 0) goto err; + /* If creating, remove and re-create the home directory. */ + if (cfg->create != 0) + recreate_dir(cfg->home); + /* Write a copy of the config. */ config_to_file(cfg); @@ -2536,6 +2529,19 @@ stop_threads(CONFIG *cfg, u_int num, CONFIG_THREAD *threads) return (0); } +static void +recreate_dir(const char *name) +{ + char *buf; + size_t len; + + len = strlen(name) * 2 + 100; + buf = dmalloc(len); + (void)snprintf(buf, len, "rm -rf %s && mkdir %s", name, name); + testutil_checkfmt(system(buf), "system: %s", buf); + free(buf); +} + static int drop_all_tables(CONFIG *cfg) { @@ -2615,7 +2621,7 @@ wtperf_rand(CONFIG_THREAD *thread) * first item in the table being "hot". */ if (rval > wtperf_value_range(cfg)) - rval = wtperf_value_range(cfg); + rval = 0; } /* * Wrap the key to within the expected range and avoid zero: we never diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.h b/src/third_party/wiredtiger/bench/wtperf/wtperf.h index d874fa4eefe..27c3832d316 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf.h +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.h @@ -29,14 +29,11 @@ #ifndef HAVE_WTPERF_H #define HAVE_WTPERF_H -#include <wt_internal.h> +#include "test_util.h" + #include <assert.h> #include <math.h> -#ifdef _WIN32 -#include "windows_shim.h" -#endif - #include "config_opt.h" typedef struct __config CONFIG; @@ -83,7 +80,6 @@ typedef struct { typedef struct { uint64_t stone_gap; uint64_t needed_stones; - uint64_t final_stone_gap; uint64_t expected_total; uint64_t total_inserts; uint64_t last_total_inserts; @@ -126,7 +122,6 @@ struct __config { /* Configuration structure */ char *reopen_config; /* Config string for conn reopen */ char *base_uri; /* Object URI */ char **uris; /* URIs if multiple tables */ - const char *helium_mount; /* Optional Helium mount point */ WT_CONNECTION *conn; /* Database connection */ @@ -281,7 +276,7 @@ void latency_print(CONFIG *); int run_truncate( CONFIG *, CONFIG_THREAD *, WT_CURSOR *, WT_SESSION *, int *); int setup_log_file(CONFIG *); -int setup_throttle(CONFIG_THREAD*); +void setup_throttle(CONFIG_THREAD*); int setup_truncate(CONFIG *, CONFIG_THREAD *, WT_SESSION *); int start_idle_table_cycle(CONFIG *, pthread_t *); int stop_idle_table_cycle(CONFIG *, pthread_t); @@ -292,7 +287,7 @@ uint64_t sum_read_ops(CONFIG *); uint64_t sum_truncate_ops(CONFIG *); uint64_t sum_update_ops(CONFIG *); void usage(void); -int worker_throttle(CONFIG_THREAD*); +void worker_throttle(CONFIG_THREAD*); void lprintf(const CONFIG *, int err, uint32_t, const char *, ...) #if defined(__GNUC__) @@ -328,75 +323,4 @@ die(int e, const char *str) fprintf(stderr, "Call to %s failed: %s", str, wiredtiger_strerror(e)); exit(EXIT_FAILURE); } - -/* - * dmalloc -- - * Call malloc, dying on failure. - */ -static inline void * -dmalloc(size_t len) -{ - void *p; - - if ((p = malloc(len)) == NULL) - die(errno, "malloc"); - return (p); -} - -/* - * dcalloc -- - * Call calloc, dying on failure. - */ -static inline void * -dcalloc(size_t num, size_t size) -{ - void *p; - - if ((p = calloc(num, size)) == NULL) - die(errno, "calloc"); - return (p); -} - -/* - * drealloc -- - * Call realloc, dying on failure. - */ -static inline void * -drealloc(void *p, size_t len) -{ - void *repl; - - if ((repl = realloc(p, len)) == NULL) - die(errno, "realloc"); - return (repl); -} - -/* - * dstrdup -- - * Call strdup, dying on failure. - */ -static inline char * -dstrdup(const char *str) -{ - char *p; - - if ((p = strdup(str)) == NULL) - die(errno, "strdup"); - return (p); -} - -/* - * dstrndup -- - * Call emulating strndup, dying on failure. Don't use actual strndup here - * as it is not supported within MSVC. - */ -static inline char * -dstrndup(const char *str, const size_t len) -{ - char *p; - - p = dcalloc(len + 1, sizeof(char)); - memcpy(p, str, len); - return (p); -} #endif diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i index 2afd20f777f..f6c96febc85 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i @@ -144,6 +144,7 @@ DEF_OPT_AS_UINT32(random_range, 0, "if non zero choose a value from within this range as the key for " "insert operations") DEF_OPT_AS_BOOL(random_value, 0, "generate random content for the value") +DEF_OPT_AS_BOOL(range_partition, 0, "partition data by range (vs hash)") DEF_OPT_AS_UINT32(read_range, 0, "scan a range of keys after each search") DEF_OPT_AS_BOOL(readonly, 0, "reopen the connection between populate and workload phases in readonly " diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c b/src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c index a98fd9b18d7..e49bca00d07 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c @@ -31,7 +31,7 @@ /* * Put the initial config together for running a throttled workload. */ -int +void setup_throttle(CONFIG_THREAD *thread) { THROTTLE_CONFIG *throttle_cfg; @@ -70,15 +70,14 @@ setup_throttle(CONFIG_THREAD *thread) throttle_cfg->ops_count = throttle_cfg->ops_per_increment; /* Set the first timestamp of when we incremented */ - WT_RET(__wt_epoch(NULL, &throttle_cfg->last_increment)); - return (0); + testutil_check(__wt_epoch(NULL, &throttle_cfg->last_increment)); } /* * Run the throttle function. We will sleep if needed and then reload the * counter to perform more operations. */ -int +void worker_throttle(CONFIG_THREAD *thread) { THROTTLE_CONFIG *throttle_cfg; @@ -87,7 +86,7 @@ worker_throttle(CONFIG_THREAD *thread) throttle_cfg = &thread->throttle_cfg; - WT_RET(__wt_epoch(NULL, &now)); + testutil_check(__wt_epoch(NULL, &now)); /* * If we did enough operations in the current interval, sleep for @@ -102,7 +101,7 @@ worker_throttle(CONFIG_THREAD *thread) /* * After sleeping, set the interval to the current time. */ - WT_RET(__wt_epoch(NULL, &throttle_cfg->last_increment)); + testutil_check(__wt_epoch(NULL, &throttle_cfg->last_increment)); } else { throttle_cfg->ops_count = (usecs_delta * throttle_cfg->ops_per_increment) / @@ -115,6 +114,4 @@ worker_throttle(CONFIG_THREAD *thread) */ throttle_cfg->ops_count = WT_MIN(throttle_cfg->ops_count, thread->workload->throttle); - - return (0); } diff --git a/src/third_party/wiredtiger/build_posix/Make.subdirs b/src/third_party/wiredtiger/build_posix/Make.subdirs index 64749378ed1..0b5175e4196 100644 --- a/src/third_party/wiredtiger/build_posix/Make.subdirs +++ b/src/third_party/wiredtiger/build_posix/Make.subdirs @@ -18,14 +18,15 @@ ext/extractors/csv ext/test/kvs_bdb HAVE_BERKELEY_DB . api/leveldb LEVELDB -bench/wtperf examples/c lang/java JAVA examples/java JAVA lang/python PYTHON -# Make the tests +# Test/Benchmark support library. test/utility + +# Test programs. test/bloom test/checkpoint test/csuite @@ -39,3 +40,6 @@ test/readonly test/recovery test/salvage test/thread + +# Benchmark programs. +bench/wtperf diff --git a/src/third_party/wiredtiger/build_posix/aclocal/ax_pkg_swig.m4 b/src/third_party/wiredtiger/build_posix/aclocal/ax_pkg_swig.m4 index 9ebdeb531b9..89941bc3fa9 100644 --- a/src/third_party/wiredtiger/build_posix/aclocal/ax_pkg_swig.m4 +++ b/src/third_party/wiredtiger/build_posix/aclocal/ax_pkg_swig.m4 @@ -32,9 +32,9 @@ # LICENSE # # Copyright (c) 2008 Sebastian Huber <sebastian-huber@web.de> -# Copyright (c) 2008 Alan W. Irwin <irwin@beluga.phys.uvic.ca> +# Copyright (c) 2008 Alan W. Irwin # Copyright (c) 2008 Rafael Laboissiere <rafael@laboissiere.net> -# Copyright (c) 2008 Andrew Collier <colliera@ukzn.ac.za> +# Copyright (c) 2008 Andrew Collier # Copyright (c) 2011 Murray Cumming <murrayc@openismus.com> # # This program is free software; you can redistribute it and/or modify it @@ -63,11 +63,11 @@ # modified version of the Autoconf Macro, you may extend this special # exception to the GPL to apply to your modified version as well. -#serial 8 +#serial 11 AC_DEFUN([AX_PKG_SWIG],[ - # Some systems have SWIG 2.0 named "swig2.0" - AC_PATH_PROGS([SWIG],[swig2.0 swig]) + # Ubuntu has swig 2.0 as /usr/bin/swig2.0 + AC_PATH_PROGS([SWIG],[swig swig3.0 swig2.0]) if test -z "$SWIG" ; then m4_ifval([$3],[$3],[:]) elif test -n "$1" ; then diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index 90b1c8378a2..1302247e88e 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -247,8 +247,8 @@ file_config = format_meta + [ Config('memory_page_max', '5MB', r''' the maximum size a page can grow to in memory before being reconciled to disk. The specified size will be adjusted to a lower - bound of <code>50 * leaf_page_max</code>, and an upper bound of - <code>cache_size / 2</code>. This limit is soft - it is possible + bound of <code>leaf_page_max</code>, and an upper bound of + <code>cache_size / 10</code>. This limit is soft - it is possible for pages to be temporarily larger than this value. This setting is ignored for LSM trees, see \c chunk_size''', min='512B', max='10TB'), @@ -373,8 +373,6 @@ connection_runtime_config = [ periodically checkpoint the database. Enabling the checkpoint server uses a session from the configured session_max''', type='category', subconfig=[ - Config('name', '"WiredTigerCheckpoint"', r''' - the checkpoint name'''), Config('log_size', '0', r''' wait for this amount of log record bytes to be written to the log between each checkpoint. A database can configure @@ -388,16 +386,31 @@ connection_runtime_config = [ ]), Config('error_prefix', '', r''' prefix string for error messages'''), - Config('eviction_dirty_target', '80', r''' + Config('eviction', '', r''' + eviction configuration options.''', + type='category', subconfig=[ + Config('threads_max', '1', r''' + maximum number of threads WiredTiger will start to help evict + pages from cache. The number of threads started will vary + depending on the current eviction load. Each eviction worker + thread uses a session from the configured session_max''', + min=1, max=20), + Config('threads_min', '1', r''' + minimum number of threads WiredTiger will start to help evict + pages from cache. The number of threads currently running will + vary depending on the current eviction load''', + min=1, max=20), + ]), + Config('eviction_dirty_target', '5', r''' continue evicting until the cache has less dirty memory than the value, as a percentage of the total cache size. Dirty pages will only be evicted if the cache is full enough to trigger eviction''', - min=5, max=99), - Config('eviction_dirty_trigger', '95', r''' + min=1, max=99), + Config('eviction_dirty_trigger', '20', r''' trigger eviction when the cache is using this much memory for dirty content, as a percentage of the total cache size. This setting only alters behavior if it is lower than eviction_trigger''', - min=5, max=99), + min=1, max=99), Config('eviction_target', '80', r''' continue evicting until the cache has less total memory than the value, as a percentage of the total cache size. Must be less than @@ -420,40 +433,6 @@ connection_runtime_config = [ interval in seconds at which to check for files that are inactive and close them''', min=1, max=100000), ]), - Config('log', '', r''' - enable logging. Enabling logging uses three sessions from the - configured session_max''', - type='category', subconfig=[ - Config('archive', 'true', r''' - automatically archive unneeded log files''', - type='boolean'), - Config('compressor', 'none', r''' - configure a compressor for log records. Permitted values are - \c "none" or custom compression engine name created with - WT_CONNECTION::add_compressor. If WiredTiger has builtin support - for \c "snappy", \c "lz4" or \c "zlib" compression, these names - are also available. See @ref compression for more information'''), - Config('enabled', 'false', r''' - enable logging subsystem''', - type='boolean'), - Config('file_max', '100MB', r''' - the maximum size of log files''', - min='100KB', max='2GB'), - Config('path', '"."', r''' - the path to a directory into which the log files are written. - If the value is not an absolute path name, the files are created - relative to the database home'''), - Config('prealloc', 'true', r''' - pre-allocate log files.''', - type='boolean'), - Config('recover', 'on', r''' - run recovery or error if recovery needs to run after an - unclean shutdown.''', - choices=['error','on']), - Config('zero_fill', 'false', r''' - manually write zeroes into log files''', - type='boolean'), - ]), Config('lsm_manager', '', r''' configure database wide options for LSM tree management. The LSM manager is started automatically the first time an LSM tree is opened. @@ -472,21 +451,6 @@ connection_runtime_config = [ Config('lsm_merge', 'true', r''' merge LSM chunks where possible (deprecated)''', type='boolean', undoc=True), - Config('eviction', '', r''' - eviction configuration options.''', - type='category', subconfig=[ - Config('threads_max', '1', r''' - maximum number of threads WiredTiger will start to help evict - pages from cache. The number of threads started will vary - depending on the current eviction load. Each eviction worker - thread uses a session from the configured session_max''', - min=1, max=20), - Config('threads_min', '1', r''' - minimum number of threads WiredTiger will start to help evict - pages from cache. The number of threads currently running will - vary depending on the current eviction load''', - min=1, max=20), - ]), Config('shared_cache', '', r''' shared cache configuration options. A database should configure either a cache_size or a shared_cache not both. Enabling a @@ -525,38 +489,6 @@ connection_runtime_config = [ are logged using the \c statistics_log configuration. See @ref statistics for more information''', type='list', choices=['all', 'fast', 'none', 'clear']), - Config('statistics_log', '', r''' - log any statistics the database is configured to maintain, - to a file. See @ref statistics for more information. Enabling - the statistics log server uses a session from the configured - session_max''', - type='category', subconfig=[ - Config('json', 'false', r''' - encode statistics in JSON format''', - type='boolean'), - Config('on_close', 'false', r'''log statistics on database close''', - type='boolean'), - Config('path', '"WiredTigerStat.%d.%H"', r''' - the pathname to a file into which the log records are written, - may contain ISO C standard strftime conversion specifications. - If the value is not an absolute path name, the file is created - relative to the database home'''), - Config('sources', '', r''' - if non-empty, include statistics for the list of data source - URIs, if they are open at the time of the statistics logging. - The list may include URIs matching a single data source - ("table:mytable"), or a URI matching all data sources of a - particular type ("table:")''', - type='list'), - Config('timestamp', '"%b %d %H:%M:%S"', r''' - a timestamp prepended to each log record, may contain strftime - conversion specifications, when \c json is configured, defaults - to \c "%FT%Y.000Z"'''), - Config('wait', '0', r''' - seconds to wait between each write of the log records; setting - this value above 0 configures statistics logging''', - min='0', max='100000'), - ]), Config('verbose', '', r''' enable messages for various events. Only available if WiredTiger is configured with --enable-verbose. Options are given as a @@ -590,13 +522,113 @@ connection_runtime_config = [ 'write']), ] +# wiredtiger_open and WT_CONNECTION.reconfigure log configurations. +log_configuration_common = [ + Config('archive', 'true', r''' + automatically archive unneeded log files''', + type='boolean'), + Config('prealloc', 'true', r''' + pre-allocate log files.''', + type='boolean'), + Config('zero_fill', 'false', r''' + manually write zeroes into log files''', + type='boolean') +] +connection_reconfigure_log_configuration = [ + Config('log', '', r''' + enable logging. Enabling logging uses three sessions from the + configured session_max''', + type='category', subconfig= + log_configuration_common) +] +wiredtiger_open_log_configuration = [ + Config('log', '', r''' + enable logging. Enabling logging uses three sessions from the + configured session_max''', + type='category', subconfig= + log_configuration_common + [ + Config('enabled', 'false', r''' + enable logging subsystem''', + type='boolean'), + Config('compressor', 'none', r''' + configure a compressor for log records. Permitted values are + \c "none" or custom compression engine name created with + WT_CONNECTION::add_compressor. If WiredTiger has builtin support + for \c "snappy", \c "lz4" or \c "zlib" compression, these names + are also available. See @ref compression for more information'''), + Config('file_max', '100MB', r''' + the maximum size of log files''', + min='100KB', max='2GB'), + Config('path', '"."', r''' + the name of a directory into which log files are written. The + directory must already exist. If the value is not an absolute + path, the path is relative to the database home (see @ref + absolute_path for more information)'''), + Config('recover', 'on', r''' + run recovery or error if recovery needs to run after an + unclean shutdown''', + choices=['error','on']) + ]), +] + +# wiredtiger_open and WT_CONNECTION.reconfigure statistics log configurations. +statistics_log_configuration_common = [ + Config('json', 'false', r''' + encode statistics in JSON format''', + type='boolean'), + Config('on_close', 'false', r'''log statistics on database close''', + type='boolean'), + Config('sources', '', r''' + if non-empty, include statistics for the list of data source + URIs, if they are open at the time of the statistics logging. + The list may include URIs matching a single data source + ("table:mytable"), or a URI matching all data sources of a + particular type ("table:")''', + type='list'), + Config('timestamp', '"%b %d %H:%M:%S"', r''' + a timestamp prepended to each log record, may contain strftime + conversion specifications, when \c json is configured, defaults + to \c "%FT%Y.000Z"'''), + Config('wait', '0', r''' + seconds to wait between each write of the log records; setting + this value above 0 configures statistics logging''', + min='0', max='100000'), +] +connection_reconfigure_statistics_log_configuration = [ + Config('statistics_log', '', r''' + log any statistics the database is configured to maintain, + to a file. See @ref statistics for more information. Enabling + the statistics log server uses a session from the configured + session_max''', + type='category', subconfig= + statistics_log_configuration_common) +] +wiredtiger_open_statistics_log_configuration = [ + Config('statistics_log', '', r''' + log any statistics the database is configured to maintain, + to a file. See @ref statistics for more information. Enabling + the statistics log server uses a session from the configured + session_max''', + type='category', subconfig= + statistics_log_configuration_common + [ + Config('path', '"."', r''' + the name of a directory into which statistics files are written. + The directory must already exist. If the value is not an absolute + path, the path is relative to the database home (see @ref + absolute_path for more information)''') + ]) +] + session_config = [ Config('isolation', 'read-committed', r''' the default isolation level for operations in this session''', choices=['read-uncommitted', 'read-committed', 'snapshot']), ] -wiredtiger_open_common = connection_runtime_config + [ +wiredtiger_open_common =\ + connection_runtime_config +\ + wiredtiger_open_log_configuration +\ + wiredtiger_open_statistics_log_configuration + [ Config('buffer_alignment', '-1', r''' in-memory alignment (in bytes) for buffers used for I/O. The default value of -1 indicates a platform-specific alignment value @@ -788,8 +820,9 @@ methods = { 'WT_SESSION.drop' : Method([ Config('checkpoint_wait', 'true', r''' - wait for the checkpoint lock, if \c checkpoint_wait=false, fail if - this lock is not available immediately''', + wait for the checkpoint lock, if \c checkpoint_wait=false, perform + the drop operation without taking a lock, returning EBUSY if the + operation conflicts with a running checkpoint''', type='boolean', undoc=True), Config('force', 'false', r''' return success if the object does not exist''', @@ -870,6 +903,11 @@ methods = { "WiredTigerCheckpoint" opens the most recent internal checkpoint taken for the object). The cursor does not support data modification'''), + Config('checkpoint_wait', 'true', r''' + wait for the checkpoint lock, if \c checkpoint_wait=false, open the + cursor without taking a lock, returning EBUSY if the operation + conflicts with a running checkpoint''', + type='boolean', undoc=True), Config('dump', '', r''' configure the cursor for dump format inputs and outputs: "hex" selects a simple hexadecimal format, "json" selects a JSON format @@ -1084,7 +1122,11 @@ methods = { don't free memory during close''', type='boolean'), ]), -'WT_CONNECTION.reconfigure' : Method(connection_runtime_config), +'WT_CONNECTION.reconfigure' : Method( + connection_reconfigure_log_configuration +\ + connection_reconfigure_statistics_log_configuration +\ + connection_runtime_config +), 'WT_CONNECTION.set_file_system' : Method([]), 'WT_CONNECTION.load_extension' : Method([ diff --git a/src/third_party/wiredtiger/dist/flags.py b/src/third_party/wiredtiger/dist/flags.py index b5f36fb707a..8091283a8c0 100644 --- a/src/third_party/wiredtiger/dist/flags.py +++ b/src/third_party/wiredtiger/dist/flags.py @@ -37,10 +37,13 @@ flags = { 'READ_WONT_NEED', ], 'rec_write' : [ + 'CHECKPOINTING', + 'EVICTING', 'EVICT_IN_MEMORY', + 'EVICT_INMEM_SPLIT', 'EVICT_LOOKASIDE', + 'EVICT_SCRUB', 'EVICT_UPDATE_RESTORE', - 'EVICTING', 'VISIBILITY_ERR', ], 'txn_log_checkpoint' : [ diff --git a/src/third_party/wiredtiger/dist/s_all b/src/third_party/wiredtiger/dist/s_all index 46a68864906..33b8f6a76ba 100755 --- a/src/third_party/wiredtiger/dist/s_all +++ b/src/third_party/wiredtiger/dist/s_all @@ -15,6 +15,8 @@ echo 'dist/s_all run started...' force= reconf=0 +errmode=0 +errfound=0 while : do case "$1" in -A) # Reconfigure the library build. @@ -23,6 +25,9 @@ while : -f) # Force versions to be updated force="-f" shift;; + -E) # Return an error code on failure + errmode=1 + shift;; *) break;; esac @@ -48,6 +53,14 @@ errchk() echo "#######################" rm -f $2 + + # Some tests shouldn't return an error, we exclude them here. + case "$1" in + *s_export*) + break;; + *) + errfound=1;; + esac } run() @@ -108,3 +121,6 @@ for f in `find . -name ${t_pfx}\*`; do done echo 'dist/s_all run finished' +if test $errmode -ne 0; then + exit $errfound; +fi diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index 7966ff2cf2e..8c5f1e99bff 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -86,6 +86,7 @@ DbEnv Decrement Decrypt DeleteFileA +EACCES EAGAIN EB EBUSY @@ -117,6 +118,7 @@ FLv FNV FORALL FOREACH +FS FULLFSYNC FindClose FindFirstFile @@ -204,6 +206,7 @@ MERCHANTABILITY METADATA MONGODB MSVC +MULTI MULTIBLOCK MUTEX Manos @@ -326,6 +329,7 @@ UID UIDs UINT ULINE +UNC URI URIs UTF @@ -528,6 +532,7 @@ cust customp cv cxa +dT data's database's datalen @@ -557,6 +562,7 @@ dequeued der dereference desc +designator dest destSize dev @@ -932,6 +938,7 @@ prepend prepended prepending presize +presync primary's printf printlog @@ -1065,6 +1072,7 @@ tV tablename tcbench td +tempdir testutil th tid @@ -1091,6 +1099,7 @@ txn txnc txnid txnmin +txt typedef uB uS diff --git a/src/third_party/wiredtiger/dist/s_style b/src/third_party/wiredtiger/dist/s_style index a222c004cc3..e33db5a5fab 100755 --- a/src/third_party/wiredtiger/dist/s_style +++ b/src/third_party/wiredtiger/dist/s_style @@ -33,7 +33,7 @@ else exit 1; fi - egrep -w 'a a|an an|and and|are are|be be|by by|for for|from from|if if|in in|is is|it it|of of|the the|this this|to to|was was|were were|when when|with with|a an|an a|a the|the a' $f > $t + egrep -w 'a a|an an|and and|are are|be be|by by|for for|from from|if if|in in[^-]|is is|it it|of of|the the|this this|to to|was was|were were|when when|with with|a an|an a|a the|the a' $f > $t test -s $t && { echo "paired typo" echo "============================" diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index 694ffc86ee4..51cc487f04c 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -81,10 +81,10 @@ class SessionStat(Stat): prefix = 'session' def __init__(self, name, desc, flags=''): Stat.__init__(self, name, SessionStat.prefix, desc, flags) -class ThreadState(Stat): +class ThreadStat(Stat): prefix = 'thread-state' def __init__(self, name, desc, flags=''): - Stat.__init__(self, name, ThreadState.prefix, desc, flags) + Stat.__init__(self, name, ThreadStat.prefix, desc, flags) class TxnStat(Stat): prefix = 'transaction' def __init__(self, name, desc, flags=''): @@ -105,7 +105,7 @@ groups['evict'] = [ BlockStat.prefix, CacheStat.prefix, ConnStat.prefix, - ThreadState.prefix + ThreadStat.prefix ] groups['lsm'] = [LSMStat.prefix, TxnStat.prefix] groups['memory'] = [CacheStat.prefix, ConnStat.prefix, RecStat.prefix] @@ -113,7 +113,7 @@ groups['system'] = [ ConnStat.prefix, DhandleStat.prefix, SessionStat.prefix, - ThreadState.prefix + ThreadStat.prefix ] ########################################## @@ -159,6 +159,7 @@ connection_stats = [ BlockStat('block_byte_map_read', 'mapped bytes read', 'size'), BlockStat('block_byte_read', 'bytes read', 'size'), BlockStat('block_byte_write', 'bytes written', 'size'), + BlockStat('block_byte_write_checkpoint', 'bytes written for checkpoint', 'size'), BlockStat('block_map_read', 'mapped blocks read'), BlockStat('block_preload', 'blocks pre-loaded'), BlockStat('block_read', 'blocks read'), @@ -168,11 +169,12 @@ connection_stats = [ # Cache and eviction statistics ########################################## CacheStat('cache_bytes_dirty', 'tracked dirty bytes in the cache', 'no_clear,no_scale,size'), + CacheStat('cache_bytes_image', 'bytes belonging to page images in the cache', 'no_clear,no_scale,size'), CacheStat('cache_bytes_internal', 'tracked bytes belonging to internal pages in the cache', 'no_clear,no_scale,size'), CacheStat('cache_bytes_inuse', 'bytes currently in the cache', 'no_clear,no_scale,size'), CacheStat('cache_bytes_leaf', 'tracked bytes belonging to leaf pages in the cache', 'no_clear,no_scale,size'), CacheStat('cache_bytes_max', 'maximum bytes configured', 'no_clear,no_scale,size'), - CacheStat('cache_bytes_overflow', 'tracked bytes belonging to overflow pages in the cache', 'no_clear,no_scale,size'), + CacheStat('cache_bytes_other', 'bytes not belonging to page images in the cache', 'no_clear,no_scale,size'), CacheStat('cache_bytes_read', 'bytes read into cache', 'size'), CacheStat('cache_bytes_write', 'bytes written from cache', 'size'), CacheStat('cache_eviction_aggressive_set', 'eviction currently operating in aggressive mode', 'no_clear,no_scale'), @@ -193,7 +195,8 @@ connection_stats = [ CacheStat('cache_eviction_internal', 'internal pages evicted'), CacheStat('cache_eviction_maximum_page_size', 'maximum page size at eviction', 'no_clear,no_scale,size'), CacheStat('cache_eviction_pages_queued', 'pages queued for eviction'), - CacheStat('cache_eviction_pages_queued_oldest', 'pages queued for urgent eviction'), + CacheStat('cache_eviction_pages_queued_oldest', 'pages queued for urgent eviction during walk'), + CacheStat('cache_eviction_pages_queued_urgent', 'pages queued for urgent eviction'), CacheStat('cache_eviction_pages_seen', 'pages seen by eviction walk'), CacheStat('cache_eviction_queue_empty', 'eviction server candidate queue empty when topping up'), CacheStat('cache_eviction_queue_not_empty', 'eviction server candidate queue not empty when topping up'), @@ -215,12 +218,14 @@ connection_stats = [ CacheStat('cache_inmem_splittable', 'in-memory page passed criteria to be split'), CacheStat('cache_lookaside_insert', 'lookaside table insert calls'), CacheStat('cache_lookaside_remove', 'lookaside table remove calls'), + CacheStat('cache_overflow_value', 'overflow values cached in memory', 'no_scale'), CacheStat('cache_overhead', 'percentage overhead', 'no_clear,no_scale'), CacheStat('cache_pages_dirty', 'tracked dirty pages in the cache', 'no_clear,no_scale'), CacheStat('cache_pages_inuse', 'pages currently held in the cache', 'no_clear,no_scale'), CacheStat('cache_pages_requested', 'pages requested from the cache'), CacheStat('cache_read', 'pages read into cache'), CacheStat('cache_read_lookaside', 'pages read into cache requiring lookaside entries'), + CacheStat('cache_read_overflow', 'overflow pages read into cache'), CacheStat('cache_write', 'pages written from cache'), CacheStat('cache_write_lookaside', 'page written requiring lookaside records'), CacheStat('cache_write_restore', 'pages written requiring in-memory restoration'), @@ -294,11 +299,11 @@ connection_stats = [ TxnStat('txn_begin', 'transaction begins'), TxnStat('txn_checkpoint', 'transaction checkpoints'), TxnStat('txn_checkpoint_fsync_post', 'transaction fsync calls for checkpoint after allocating the transaction ID'), - TxnStat('txn_checkpoint_fsync_post_duration', 'transaction fsync duration for checkpoint after allocating the transaction ID (usecs)'), - TxnStat('txn_checkpoint_fsync_pre', 'transaction fsync calls for checkpoint before allocating the transaction ID'), - TxnStat('txn_checkpoint_fsync_pre_duration', 'transaction fsync duration for checkpoint before allocating the transaction ID (usecs)'), + TxnStat('txn_checkpoint_fsync_post_duration', 'transaction fsync duration for checkpoint after allocating the transaction ID (usecs)', 'no_clear,no_scale'), TxnStat('txn_checkpoint_generation', 'transaction checkpoint generation', 'no_clear,no_scale'), TxnStat('txn_checkpoint_running', 'transaction checkpoint currently running', 'no_clear,no_scale'), + TxnStat('txn_checkpoint_scrub_target', 'transaction checkpoint scrub dirty target', 'no_clear,no_scale'), + TxnStat('txn_checkpoint_scrub_time', 'transaction checkpoint scrub time (msecs)', 'no_clear,no_scale'), TxnStat('txn_checkpoint_time_max', 'transaction checkpoint max time (msecs)', 'no_clear,no_scale'), TxnStat('txn_checkpoint_time_min', 'transaction checkpoint min time (msecs)', 'no_clear,no_scale'), TxnStat('txn_checkpoint_time_recent', 'transaction checkpoint most recent time (msecs)', 'no_clear,no_scale'), @@ -332,6 +337,22 @@ connection_stats = [ ########################################## SessionStat('session_cursor_open', 'open cursor count', 'no_clear,no_scale'), SessionStat('session_open', 'open session count', 'no_clear,no_scale'), + SessionStat('session_table_compact_fail', 'table compact failed calls', 'no_clear,no_scale'), + SessionStat('session_table_compact_success', 'table compact successful calls', 'no_clear,no_scale'), + SessionStat('session_table_create_fail', 'table create failed calls', 'no_clear,no_scale'), + SessionStat('session_table_create_success', 'table create successful calls', 'no_clear,no_scale'), + SessionStat('session_table_drop_fail', 'table drop failed calls', 'no_clear,no_scale'), + SessionStat('session_table_drop_success', 'table drop successful calls', 'no_clear,no_scale'), + SessionStat('session_table_rebalance_fail', 'table rebalance failed calls', 'no_clear,no_scale'), + SessionStat('session_table_rebalance_success', 'table rebalance successful calls', 'no_clear,no_scale'), + SessionStat('session_table_rename_fail', 'table rename failed calls', 'no_clear,no_scale'), + SessionStat('session_table_rename_success', 'table rename successful calls', 'no_clear,no_scale'), + SessionStat('session_table_salvage_fail', 'table salvage failed calls', 'no_clear,no_scale'), + SessionStat('session_table_salvage_success', 'table salvage successful calls', 'no_clear,no_scale'), + SessionStat('session_table_truncate_fail', 'table truncate failed calls', 'no_clear,no_scale'), + SessionStat('session_table_truncate_success', 'table truncate successful calls', 'no_clear,no_scale'), + SessionStat('session_table_verify_fail', 'table verify failed calls', 'no_clear,no_scale'), + SessionStat('session_table_verify_success', 'table verify successful calls', 'no_clear,no_scale'), ########################################## # Total cursor operations @@ -349,11 +370,11 @@ connection_stats = [ CursorStat('cursor_update', 'cursor update calls'), ########################################## - # Thread State statistics + # Thread Count statistics ########################################## - ThreadState('fsync_active', 'active filesystem fsync calls','no_clear,no_scale'), - ThreadState('read_active', 'active filesystem read calls','no_clear,no_scale'), - ThreadState('write_active', 'active filesystem write calls','no_clear,no_scale'), + ThreadStat('thread_fsync_active', 'active filesystem fsync calls','no_clear,no_scale'), + ThreadStat('thread_read_active', 'active filesystem read calls','no_clear,no_scale'), + ThreadStat('thread_write_active', 'active filesystem write calls','no_clear,no_scale'), ########################################## # Yield statistics @@ -451,6 +472,7 @@ dsrc_stats = [ ########################################## # Cache and eviction statistics ########################################## + CacheStat('cache_bytes_inuse', 'bytes currently in the cache', 'no_clear,no_scale,size'), CacheStat('cache_bytes_read', 'bytes read into cache', 'size'), CacheStat('cache_bytes_write', 'bytes written from cache', 'size'), CacheStat('cache_eviction_checkpoint', 'checkpoint blocked page eviction'), diff --git a/src/third_party/wiredtiger/examples/c/ex_all.c b/src/third_party/wiredtiger/examples/c/ex_all.c index dd807922c10..e8727df3f60 100644 --- a/src/third_party/wiredtiger/examples/c/ex_all.c +++ b/src/third_party/wiredtiger/examples/c/ex_all.c @@ -1160,34 +1160,27 @@ main(void) if (ret == 0) (void)conn->close(conn, NULL); +#ifdef MIGHT_NOT_RUN + /* + * Don't run this code, statistics logging doesn't yet support tables. + */ /*! [Statistics logging with a table] */ ret = wiredtiger_open(home, NULL, "create, statistics_log=(" - "sources=(\"lsm:table1\",\"lsm:table2\"), wait=5)", + "sources=(\"table:table1\",\"table:table2\"), wait=5)", &conn); /*! [Statistics logging with a table] */ if (ret == 0) (void)conn->close(conn, NULL); - /*! [Statistics logging with all tables] */ - ret = wiredtiger_open(home, NULL, - "create, statistics_log=(sources=(\"lsm:\"), wait=5)", - &conn); - /*! [Statistics logging with all tables] */ - if (ret == 0) - (void)conn->close(conn, NULL); - -#ifdef MIGHT_NOT_RUN /* - * This example code gets run, and a non-existent log file path might - * cause the open to fail. The documentation requires code snippets, - * use #ifdef's to avoid running it. + * Don't run this code, statistics logging doesn't yet support indexes. */ - /*! [Statistics logging with path] */ + /*! [Statistics logging with a source type] */ ret = wiredtiger_open(home, NULL, - "create," - "statistics_log=(wait=120,path=/log/log.%m.%d.%y)", &conn); - /*! [Statistics logging with path] */ + "create, statistics_log=(sources=(\"index:\"), wait=5)", + &conn); + /*! [Statistics logging with a source type] */ if (ret == 0) (void)conn->close(conn, NULL); diff --git a/src/third_party/wiredtiger/examples/c/ex_file_system.c b/src/third_party/wiredtiger/examples/c/ex_file_system.c index 77e8f40480b..55ee20e9331 100644 --- a/src/third_party/wiredtiger/examples/c/ex_file_system.c +++ b/src/third_party/wiredtiger/examples/c/ex_file_system.c @@ -118,18 +118,17 @@ int demo_file_system_create(WT_CONNECTION *, WT_CONFIG_ARG *); /* * Forward function declarations for file system API implementation */ -static int demo_fs_open(WT_FILE_SYSTEM *, - WT_SESSION *, const char *, WT_OPEN_FILE_TYPE, uint32_t, WT_FILE_HANDLE **); +static int demo_fs_open(WT_FILE_SYSTEM *, WT_SESSION *, + const char *, WT_FS_OPEN_FILE_TYPE, uint32_t, WT_FILE_HANDLE **); static int demo_fs_directory_list(WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, char ***, uint32_t *); static int demo_fs_directory_list_free( WT_FILE_SYSTEM *, WT_SESSION *, char **, uint32_t); -static int demo_fs_directory_sync(WT_FILE_SYSTEM *file_system, - WT_SESSION *session, const char *directory); static int demo_fs_exist(WT_FILE_SYSTEM *, WT_SESSION *, const char *, bool *); -static int demo_fs_remove(WT_FILE_SYSTEM *, WT_SESSION *, const char *); +static int demo_fs_remove( + WT_FILE_SYSTEM *, WT_SESSION *, const char *, uint32_t); static int demo_fs_rename( - WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *); + WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, uint32_t); static int demo_fs_size( WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *); static int demo_fs_terminate(WT_FILE_SYSTEM *, WT_SESSION *); @@ -255,7 +254,6 @@ demo_file_system_create(WT_CONNECTION *conn, WT_CONFIG_ARG *config) /* Initialize the in-memory jump table. */ file_system->fs_directory_list = demo_fs_directory_list; file_system->fs_directory_list_free = demo_fs_directory_list_free; - file_system->fs_directory_sync = demo_fs_directory_sync; file_system->fs_exist = demo_fs_exist; file_system->fs_open_file = demo_fs_open; file_system->fs_remove = demo_fs_remove; @@ -282,7 +280,7 @@ err: free(demo_fs); */ static int demo_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session, - const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags, + const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, WT_FILE_HANDLE **file_handlep) { DEMO_FILE_HANDLE *demo_fh; @@ -469,21 +467,6 @@ demo_fs_directory_list_free(WT_FILE_SYSTEM *file_system, } /* - * demo_fs_directory_sync -- - * Directory sync for our demo file system, which is a no-op. - */ -static int -demo_fs_directory_sync(WT_FILE_SYSTEM *file_system, - WT_SESSION *session, const char *directory) -{ - (void)file_system; /* Unused */ - (void)session; /* Unused */ - (void)directory; /* Unused */ - - return (0); -} - -/* * demo_fs_exist -- * Return if the file exists. */ @@ -507,13 +490,15 @@ demo_fs_exist(WT_FILE_SYSTEM *file_system, * POSIX remove. */ static int -demo_fs_remove( - WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name) +demo_fs_remove(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *name, uint32_t flags) { DEMO_FILE_SYSTEM *demo_fs; DEMO_FILE_HANDLE *demo_fh; int ret = 0; + (void)flags; /* Unused */ + demo_fs = (DEMO_FILE_SYSTEM *)file_system; ret = ENOENT; @@ -531,13 +516,15 @@ demo_fs_remove( */ static int demo_fs_rename(WT_FILE_SYSTEM *file_system, - WT_SESSION *session, const char *from, const char *to) + WT_SESSION *session, const char *from, const char *to, uint32_t flags) { DEMO_FILE_HANDLE *demo_fh; DEMO_FILE_SYSTEM *demo_fs; char *copy; int ret = 0; + (void)flags; /* Unused */ + demo_fs = (DEMO_FILE_SYSTEM *)file_system; LOCK_FILE_SYSTEM(session, demo_fs); diff --git a/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_all.java b/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_all.java index 48e85c9fade..83a37e9a6a5 100644 --- a/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_all.java +++ b/src/third_party/wiredtiger/examples/java/com/wiredtiger/examples/ex_all.java @@ -988,6 +988,10 @@ allExample() /*! [Statistics logging] */ conn.close(null); + if (false) { // MIGHT_NOT_RUN + /* + * Don't run this code, statistics logging doesn't yet support tables. + */ /*! [Statistics logging with a table] */ conn = wiredtiger.open(home, "create," + @@ -995,23 +999,13 @@ allExample() /*! [Statistics logging with a table] */ conn.close(null); - /*! [Statistics logging with all tables] */ - conn = wiredtiger.open(home, - "create,statistics_log=(sources=(\"table:\"))"); - /*! [Statistics logging with all tables] */ - conn.close(null); - - if (false) { // MIGHT_NOT_RUN /* - * This example code gets run, and a non-existent log file path might - * cause the open to fail. The documentation requires code snippets, - * use if (false) to avoid running it. + * Don't run this code, statistics logging doesn't yet support indexes. */ - /*! [Statistics logging with path] */ + /*! [Statistics logging with a source type] */ conn = wiredtiger.open(home, - "create," + - "statistics_log=(wait=120,path=/log/log.%m.%d.%y)"); - /*! [Statistics logging with path] */ + "create,statistics_log=(sources=(\"index:\"))"); + /*! [Statistics logging with a source type] */ conn.close(null); /* diff --git a/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c b/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c index 9aede2ed907..484df0a6785 100644 --- a/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c +++ b/src/third_party/wiredtiger/ext/compressors/zlib/zlib_compress.c @@ -92,7 +92,7 @@ zalloc(void *cookie, uint32_t number, uint32_t size) opaque = cookie; wt_api = ((ZLIB_COMPRESSOR *)opaque->compressor)->wt_api; return (wt_api->scr_alloc( - wt_api, opaque->session, (size_t)(number * size))); + wt_api, opaque->session, (size_t)number * size)); } /* diff --git a/src/third_party/wiredtiger/src/async/async_api.c b/src/third_party/wiredtiger/src/async/async_api.c index fea8714176b..d53a6c65c1d 100644 --- a/src/third_party/wiredtiger/src/async/async_api.c +++ b/src/third_party/wiredtiger/src/async/async_api.c @@ -490,12 +490,24 @@ __wt_async_flush(WT_SESSION_IMPL *session) WT_ASYNC *async; WT_CONNECTION_IMPL *conn; WT_DECL_RET; + uint32_t i, workers; conn = S2C(session); if (!conn->async_cfg) return (0); async = conn->async; + /* + * Only add a flush operation if there are workers who can process + * it. Otherwise we will wait forever. + */ + workers = 0; + for (i = 0; i < conn->async_workers; ++i) + if (async->worker_tids[i] != 0) + ++workers; + if (workers == 0) + return (0); + WT_STAT_FAST_CONN_INCR(session, async_flush); /* * We have to do several things. First we have to prevent diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c index b9f0ec25d53..3584efc7671 100644 --- a/src/third_party/wiredtiger/src/block/block_ckpt.c +++ b/src/third_party/wiredtiger/src/block/block_ckpt.c @@ -252,7 +252,7 @@ __wt_block_checkpoint(WT_SESSION_IMPL *session, } else WT_ERR(__wt_block_write_off(session, block, buf, &ci->root_offset, &ci->root_size, &ci->root_cksum, - data_cksum, false)); + data_cksum, true, false)); /* * Checkpoints are potentially reading/writing/merging lots of blocks, diff --git a/src/third_party/wiredtiger/src/block/block_ext.c b/src/third_party/wiredtiger/src/block/block_ext.c index 0d3e7b54f17..bad4d8d7990 100644 --- a/src/third_party/wiredtiger/src/block/block_ext.c +++ b/src/third_party/wiredtiger/src/block/block_ext.c @@ -1245,8 +1245,7 @@ __wt_block_extlist_write(WT_SESSION_IMPL *session, WT_DECL_RET; WT_EXT *ext; WT_PAGE_HEADER *dsk; - size_t size; - uint32_t entries; + size_t entries, size; uint8_t *p; WT_RET(__block_extlist_dump(session, block, el, "write")); @@ -1311,7 +1310,7 @@ __wt_block_extlist_write(WT_SESSION_IMPL *session, /* Write the extent list to disk. */ WT_ERR(__wt_block_write_off(session, - block, tmp, &el->offset, &el->size, &el->cksum, true, true)); + block, tmp, &el->offset, &el->size, &el->cksum, true, true, true)); /* * Remove the allocated blocks from the system's allocation list, extent @@ -1450,7 +1449,7 @@ __block_extlist_dump( tag, el->name, el->entries, __wt_buf_set_size(session, el->bytes, true, t1))); - if (ret != 0 || el->entries == 0) + if (el->entries == 0) goto done; memset(sizes, 0, sizeof(sizes)); diff --git a/src/third_party/wiredtiger/src/block/block_mgr.c b/src/third_party/wiredtiger/src/block/block_mgr.c index 971fe713f83..eff25f34304 100644 --- a/src/third_party/wiredtiger/src/block/block_mgr.c +++ b/src/third_party/wiredtiger/src/block/block_mgr.c @@ -479,11 +479,11 @@ __bm_verify_start(WT_BM *bm, * Write a buffer into a block, returning the block's address cookie. */ static int -__bm_write(WT_BM *bm, WT_SESSION_IMPL *session, - WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum) +__bm_write(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, + uint8_t *addr, size_t *addr_sizep, bool data_cksum, bool checkpoint_io) { - return (__wt_block_write( - session, bm->block, buf, addr, addr_sizep, data_cksum)); + return (__wt_block_write(session, + bm->block, buf, addr, addr_sizep, data_cksum, checkpoint_io)); } /* @@ -492,13 +492,14 @@ __bm_write(WT_BM *bm, WT_SESSION_IMPL *session, * readonly version. */ static int -__bm_write_readonly(WT_BM *bm, WT_SESSION_IMPL *session, - WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum) +__bm_write_readonly(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, + uint8_t *addr, size_t *addr_sizep, bool data_cksum, bool checkpoint_io) { WT_UNUSED(buf); WT_UNUSED(addr); WT_UNUSED(addr_sizep); WT_UNUSED(data_cksum); + WT_UNUSED(checkpoint_io); return (__bm_readonly(bm, session)); } diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c index 1603b1574e7..7cff7eab629 100644 --- a/src/third_party/wiredtiger/src/block/block_open.c +++ b/src/third_party/wiredtiger/src/block/block_open.c @@ -15,9 +15,10 @@ static int __desc_read(WT_SESSION_IMPL *, WT_BLOCK *); * Drop a file. */ int -__wt_block_manager_drop(WT_SESSION_IMPL *session, const char *filename) +__wt_block_manager_drop( + WT_SESSION_IMPL *session, const char *filename, bool durable) { - return (__wt_remove_if_exists(session, filename)); + return (__wt_remove_if_exists(session, filename, durable)); } /* @@ -43,8 +44,9 @@ __wt_block_manager_create( * in our space. Move any existing files out of the way and complain. */ for (;;) { - if ((ret = __wt_open(session, filename, WT_OPEN_FILE_TYPE_DATA, - WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &fh)) == 0) + if ((ret = __wt_open(session, filename, + WT_FS_OPEN_FILE_TYPE_DATA, WT_FS_OPEN_CREATE | + WT_FS_OPEN_DURABLE | WT_FS_OPEN_EXCLUSIVE, &fh)) == 0) break; WT_ERR_TEST(ret != EEXIST, ret); @@ -56,7 +58,7 @@ __wt_block_manager_create( WT_ERR(__wt_fs_exist(session, tmp->data, &exists)); if (!exists) { WT_ERR(__wt_fs_rename( - session, filename, tmp->data)); + session, filename, tmp->data, false)); WT_ERR(__wt_msg(session, "unexpected file %s found, renamed to %s", filename, (const char *)tmp->data)); @@ -77,16 +79,9 @@ __wt_block_manager_create( /* Close the file handle. */ WT_TRET(__wt_close(session, &fh)); - /* - * Some filesystems require that we sync the directory to be confident - * that the file will appear. - */ - if (ret == 0) - WT_TRET(__wt_fs_directory_sync(session, filename)); - /* Undo any create on error. */ if (ret != 0) - WT_TRET(__wt_fs_remove(session, filename)); + WT_TRET(__wt_fs_remove(session, filename, false)); err: __wt_scr_free(session, &tmp); @@ -207,11 +202,11 @@ __wt_block_open(WT_SESSION_IMPL *session, */ flags = 0; if (readonly && FLD_ISSET(conn->direct_io, WT_DIRECT_IO_CHECKPOINT)) - LF_SET(WT_OPEN_DIRECTIO); + LF_SET(WT_FS_OPEN_DIRECTIO); if (!readonly && FLD_ISSET(conn->direct_io, WT_DIRECT_IO_DATA)) - LF_SET(WT_OPEN_DIRECTIO); + LF_SET(WT_FS_OPEN_DIRECTIO); WT_ERR(__wt_open( - session, filename, WT_OPEN_FILE_TYPE_DATA, flags, &block->fh)); + session, filename, WT_FS_OPEN_FILE_TYPE_DATA, flags, &block->fh)); /* Set the file's size. */ WT_ERR(__wt_filesize(session, block->fh, &block->size)); diff --git a/src/third_party/wiredtiger/src/block/block_session.c b/src/third_party/wiredtiger/src/block/block_session.c index 268adb530cf..6223751effa 100644 --- a/src/third_party/wiredtiger/src/block/block_session.c +++ b/src/third_party/wiredtiger/src/block/block_session.c @@ -28,7 +28,7 @@ __block_ext_alloc(WT_SESSION_IMPL *session, WT_EXT **extp) { WT_EXT *ext; - u_int skipdepth; + size_t skipdepth; skipdepth = __wt_skip_choose_depth(session); WT_RET(__wt_calloc(session, 1, diff --git a/src/third_party/wiredtiger/src/block/block_write.c b/src/third_party/wiredtiger/src/block/block_write.c index 1fefeee09da..30d06e6259a 100644 --- a/src/third_party/wiredtiger/src/block/block_write.c +++ b/src/third_party/wiredtiger/src/block/block_write.c @@ -210,15 +210,15 @@ __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep) * Write a buffer into a block, returning the block's address cookie. */ int -__wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, - WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum) +__wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, + uint8_t *addr, size_t *addr_sizep, bool data_cksum, bool checkpoint_io) { wt_off_t offset; uint32_t size, cksum; uint8_t *endp; - WT_RET(__wt_block_write_off( - session, block, buf, &offset, &size, &cksum, data_cksum, false)); + WT_RET(__wt_block_write_off(session, block, + buf, &offset, &size, &cksum, data_cksum, checkpoint_io, false)); endp = addr; WT_RET(__wt_block_addr_to_buffer(block, &endp, offset, size, cksum)); @@ -228,14 +228,14 @@ __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, } /* - * __wt_block_write_off -- + * __block_write_off -- * Write a buffer into a block, returning the block's offset, size and * checksum. */ -int -__wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, +static int +__block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump, - bool data_cksum, bool caller_locked) + bool data_cksum, bool checkpoint_io, bool caller_locked) { WT_BLOCK_HEADER *blk; WT_DECL_RET; @@ -254,12 +254,6 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, blk = WT_BLOCK_HEADER_REF(buf->mem); memset(blk, 0, sizeof(*blk)); - /* - * Swap the page-header as needed; this doesn't belong here, but it's - * the best place to catch all callers. - */ - __wt_page_header_byteswap(buf->mem); - /* Buffers should be aligned for writing. */ if (!F_ISSET(buf, WT_ITEM_ALIGNED)) { WT_ASSERT(session, F_ISSET(buf, WT_ITEM_ALIGNED)); @@ -380,6 +374,9 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_STAT_FAST_CONN_INCR(session, block_write); WT_STAT_FAST_CONN_INCRV(session, block_byte_write, align_size); + if (checkpoint_io) + WT_STAT_FAST_CONN_INCRV( + session, block_byte_write_checkpoint, align_size); WT_RET(__wt_verbose(session, WT_VERB_WRITE, "off %" PRIuMAX ", size %" PRIuMAX ", cksum %" PRIu32, @@ -391,3 +388,28 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, return (0); } + +/* + * __wt_block_write_off -- + * Write a buffer into a block, returning the block's offset, size and + * checksum. + */ +int +__wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, + WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump, + bool data_cksum, bool checkpoint_io, bool caller_locked) +{ + WT_DECL_RET; + + /* + * Ensure the page header is in little endian order; this doesn't belong + * here, but it's the best place to catch all callers. After the write, + * swap values back to native order so callers never see anything other + * than their original content. + */ + __wt_page_header_byteswap(buf->mem); + ret = __block_write_off(session, block, buf, + offsetp, sizep, cksump, data_cksum, checkpoint_io, caller_locked); + __wt_page_header_byteswap(buf->mem); + return (ret); +} diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c index 70b3ba56e31..e1b097c22a5 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curnext.c +++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c @@ -183,6 +183,7 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage) if (cbt->last_standard_recno == 0) return (WT_NOTFOUND); __cursor_set_recno(cbt, cbt->ref->ref_recno); + cbt->cip_saved = NULL; goto new_page; } @@ -301,12 +302,13 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage) * WT_INSERT_HEAD[0], and so on. This means WT_INSERT lists are * odd-numbered slots, and WT_ROW array slots are even-numbered slots. * - * New page configuration. + * Initialize for each new page. */ if (newpage) { cbt->ins_head = WT_ROW_INSERT_SMALLEST(page); cbt->ins = WT_SKIP_FIRST(cbt->ins_head); cbt->row_iteration_slot = 1; + cbt->rip_saved = NULL; goto new_insert; } @@ -517,11 +519,13 @@ __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt) */ F_SET(cbt, WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV); - /* - * Clear the count of deleted items on the page. - */ + /* Clear the count of deleted items on the page. */ cbt->page_deleted_count = 0; + /* Clear saved iteration cursor position information. */ + cbt->cip_saved = NULL; + cbt->rip_saved = NULL; + /* * If we don't have a search page, then we're done, we're starting at * the beginning or end of the tree, not as a result of a search. @@ -661,7 +665,7 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) if (page != NULL && (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD || (newpage && cbt->page_deleted_count > 0))) - __wt_page_evict_soon(page); + WT_ERR(__wt_page_evict_soon(session, cbt->ref)); cbt->page_deleted_count = 0; WT_ERR(__wt_tree_walk(session, &cbt->ref, flags)); diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c index 872f648446c..e39dffa357f 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curprev.c +++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c @@ -329,6 +329,7 @@ __cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage) if (cbt->last_standard_recno == 0) return (WT_NOTFOUND); __cursor_set_recno(cbt, cbt->last_standard_recno); + cbt->cip_saved = NULL; goto new_page; } @@ -447,7 +448,7 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage) * WT_INSERT_HEAD[0], and so on. This means WT_INSERT lists are * odd-numbered slots, and WT_ROW array slots are even-numbered slots. * - * New page configuration. + * Initialize for each new page. */ if (newpage) { /* @@ -464,6 +465,7 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage) WT_ROW_INSERT_SLOT(page, page->pg_row_entries - 1); cbt->ins = WT_SKIP_LAST(cbt->ins_head); cbt->row_iteration_slot = page->pg_row_entries * 2 + 1; + cbt->rip_saved = NULL; goto new_insert; } @@ -619,7 +621,7 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) if (page != NULL && (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD || (newpage && cbt->page_deleted_count > 0))) - __wt_page_evict_soon(page); + WT_ERR(__wt_page_evict_soon(session, cbt->ref)); cbt->page_deleted_count = 0; WT_ERR(__wt_tree_walk(session, &cbt->ref, flags)); diff --git a/src/third_party/wiredtiger/src/btree/bt_discard.c b/src/third_party/wiredtiger/src/btree/bt_discard.c index a00bb7dc2b5..965aec16fc2 100644 --- a/src/third_party/wiredtiger/src/btree/bt_discard.c +++ b/src/third_party/wiredtiger/src/btree/bt_discard.c @@ -131,8 +131,10 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) /* Discard any disk image. */ dsk = (WT_PAGE_HEADER *)page->dsk; - if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC)) + if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC)) { + __wt_cache_page_image_decr(session, dsk->mem_size); __wt_overwrite_and_free_len(session, dsk, dsk->mem_size); + } /* Discard any mapped image. */ if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED)) diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c index c97e05d74a7..cacf1369430 100644 --- a/src/third_party/wiredtiger/src/btree/bt_handle.c +++ b/src/third_party/wiredtiger/src/btree/bt_handle.c @@ -690,6 +690,8 @@ __btree_page_sizes(WT_SESSION_IMPL *session) * Don't let pages grow large compared to the cache size or we can end * up in a situation where nothing can be evicted. Take care getting * the cache size: with a shared cache, it may not have been set. + * Don't forget to update the API documentation if you alter the + * bounds for any of the parameters here. */ WT_RET(__wt_config_gets(session, cfg, "memory_page_max", &cval)); btree->maxmempage = (uint64_t)cval.val; diff --git a/src/third_party/wiredtiger/src/btree/bt_huffman.c b/src/third_party/wiredtiger/src/btree/bt_huffman.c index 9e9d69c342e..918791d9c6e 100644 --- a/src/third_party/wiredtiger/src/btree/bt_huffman.c +++ b/src/third_party/wiredtiger/src/btree/bt_huffman.c @@ -157,7 +157,8 @@ __huffman_confchk_file(WT_SESSION_IMPL *session, /* Check the file exists. */ WT_RET(__wt_strndup(session, v->str + len, v->len - len, &fname)); - WT_ERR(__wt_fopen(session, fname, WT_OPEN_FIXED, WT_STREAM_READ, &fs)); + WT_ERR(__wt_fopen( + session, fname, WT_FS_OPEN_FIXED, WT_STREAM_READ, &fs)); /* Optionally return the file handle. */ if (fsp == NULL) diff --git a/src/third_party/wiredtiger/src/btree/bt_io.c b/src/third_party/wiredtiger/src/btree/bt_io.c index 4339de6f25c..6c2e2f1b3fb 100644 --- a/src/third_party/wiredtiger/src/btree/bt_io.c +++ b/src/third_party/wiredtiger/src/btree/bt_io.c @@ -117,7 +117,7 @@ __wt_bt_read(WT_SESSION_IMPL *session, */ if (ret != 0 || result_len != dsk->mem_size - WT_BLOCK_COMPRESS_SKIP) { - fail_msg = "block decryption failed"; + fail_msg = "block decompression failed"; goto corrupt; } } else @@ -168,7 +168,8 @@ err: __wt_scr_free(session, &tmp); */ int __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, - uint8_t *addr, size_t *addr_sizep, bool checkpoint, bool compressed) + uint8_t *addr, size_t *addr_sizep, + bool checkpoint, bool checkpoint_io, bool compressed) { WT_BM *bm; WT_BTREE *btree; @@ -359,10 +360,12 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, /* Call the block manager to write the block. */ WT_ERR(checkpoint ? bm->checkpoint(bm, session, ip, btree->ckpt, data_cksum) : - bm->write(bm, session, ip, addr, addr_sizep, data_cksum)); + bm->write( + bm, session, ip, addr, addr_sizep, data_cksum, checkpoint_io)); WT_STAT_FAST_CONN_INCR(session, cache_write); WT_STAT_FAST_DATA_INCR(session, cache_write); + S2C(session)->cache->bytes_written += dsk->mem_size; WT_STAT_FAST_CONN_INCRV(session, cache_bytes_write, dsk->mem_size); WT_STAT_FAST_DATA_INCRV(session, cache_bytes_write, dsk->mem_size); diff --git a/src/third_party/wiredtiger/src/btree/bt_ovfl.c b/src/third_party/wiredtiger/src/btree/bt_ovfl.c index fbe361e000a..1f080041a23 100644 --- a/src/third_party/wiredtiger/src/btree/bt_ovfl.c +++ b/src/third_party/wiredtiger/src/btree/bt_ovfl.c @@ -33,6 +33,7 @@ __ovfl_read(WT_SESSION_IMPL *session, store->data = WT_PAGE_HEADER_BYTE(btree, dsk); store->size = dsk->u.datalen; + WT_STAT_FAST_CONN_INCR(session, cache_read_overflow); WT_STAT_FAST_DATA_INCR(session, cache_read_overflow); return (0); @@ -208,6 +209,7 @@ __wt_ovfl_cache(WT_SESSION_IMPL *session, */ if (!visible) { WT_RET(__ovfl_cache(session, page, vpack)); + WT_STAT_FAST_CONN_INCR(session, cache_overflow_value); WT_STAT_FAST_DATA_INCR(session, cache_overflow_value); } diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c index 00ec8aa4494..89e5f428628 100644 --- a/src/third_party/wiredtiger/src/btree/bt_page.c +++ b/src/third_party/wiredtiger/src/btree/bt_page.c @@ -219,6 +219,7 @@ __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, /* Update the page's in-memory size and the cache statistics. */ __wt_cache_page_inmem_incr(session, page, size); + __wt_cache_page_image_incr(session, dsk->mem_size); /* Link the new internal page to the parent. */ if (ref != NULL) { diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c index 086500c8b2f..3d396d5ae5b 100644 --- a/src/third_party/wiredtiger/src/btree/bt_read.c +++ b/src/third_party/wiredtiger/src/btree/bt_read.c @@ -296,7 +296,7 @@ err: WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags)); * __evict_force_check -- * Check if a page matches the criteria for forced eviction. */ -static int +static bool __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref) { WT_BTREE *btree; @@ -307,26 +307,26 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref) /* Leaf pages only. */ if (WT_PAGE_IS_INTERNAL(page)) - return (0); + return (false); /* * It's hard to imagine a page with a huge memory footprint that has * never been modified, but check to be sure. */ if (page->modify == NULL) - return (0); + return (false); /* Pages are usually small enough, check that first. */ if (page->memory_footprint < btree->splitmempage) - return (0); + return (false); else if (page->memory_footprint < btree->maxmempage) return (__wt_leaf_page_can_split(session, page)); /* Trigger eviction on the next page release. */ - __wt_page_evict_soon(page); + (void)__wt_page_evict_soon(session, ref); /* Bump the oldest ID, we're about to do some visibility checks. */ - WT_RET(__wt_txn_update_oldest(session, 0)); + (void)__wt_txn_update_oldest(session, 0); /* If eviction cannot succeed, don't try. */ return (__wt_page_can_evict(session, ref, NULL)); @@ -548,10 +548,14 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags * if the page qualifies for forced eviction and update * the page's generation number. If eviction isn't being * done on this file, we're done. + * In-memory split of large pages is allowed while + * no_eviction is set on btree, whereas reconciliation + * is not allowed. */ if (LF_ISSET(WT_READ_NO_EVICT) || F_ISSET(session, WT_SESSION_NO_EVICTION) || - F_ISSET(btree, WT_BTREE_NO_EVICTION)) + (F_ISSET(btree, WT_BTREE_NO_EVICTION) && + !F_ISSET(btree, WT_BTREE_NO_RECONCILE))) goto skip_evict; /* @@ -595,7 +599,14 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags page = ref->page; if (page->read_gen == WT_READGEN_NOTSET) { if (evict_soon) - __wt_page_evict_soon(page); + /* + * Ignore error returns, since the + * evict soon call is advisory and we + * are holding a hazard pointer to the + * page already. + */ + (void)__wt_page_evict_soon( + session, ref); else __wt_cache_read_gen_new(session, page); } else if (!LF_ISSET(WT_READ_NO_GEN)) diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index 7a05a883f83..4f6f300802e 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -298,7 +298,7 @@ static int __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, WT_REF **from_refp, size_t *decrp, WT_REF **to_refp, size_t *incrp) { - WT_ADDR *addr; + WT_ADDR *addr, *ref_addr; WT_CELL_UNPACK unpack; WT_DECL_RET; WT_IKEY *ikey; @@ -345,13 +345,18 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, } /* - * If there's no address (the page has never been written), or the - * address has been instantiated, there's no work to do. Otherwise, - * instantiate the address in-memory, from the on-page cell. + * If there's no address at all (the page has never been written), or + * the address has already been instantiated, there's no work to do. + * Otherwise, the address still references a split page on-page cell, + * instantiate it. We can race with reconciliation and/or eviction of + * the child pages, be cautious: read the address and verify it, and + * only update it if the value is unchanged from the original. In the + * case of a race, the address must no longer reference the split page, + * we're done. */ - addr = ref->addr; - if (addr != NULL && !__wt_off_page(from_home, addr)) { - __wt_cell_unpack((WT_CELL *)ref->addr, &unpack); + WT_ORDERED_READ(ref_addr, ref->addr); + if (ref_addr != NULL && !__wt_off_page(from_home, ref_addr)) { + __wt_cell_unpack((WT_CELL *)ref_addr, &unpack); WT_RET(__wt_calloc_one(session, &addr)); if ((ret = __wt_strndup( session, unpack.data, unpack.size, &addr->addr)) != 0) { @@ -371,7 +376,10 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, break; WT_ILLEGAL_VALUE(session); } - ref->addr = addr; + if (!__wt_atomic_cas_ptr(&ref->addr, ref_addr, addr)) { + __wt_free(session, addr->addr); + __wt_free(session, addr); + } } /* And finally, copy the WT_REF pointer itself. */ @@ -786,7 +794,9 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, */ if (result_entries == 0) { empty_parent = true; - __wt_page_evict_soon(parent); + if (!__wt_ref_is_root(parent->pg_intl_parent_ref)) + ret = __wt_page_evict_soon( + session, parent->pg_intl_parent_ref); goto err; } @@ -1462,11 +1472,11 @@ err: if (parent != NULL) /* * __split_multi_inmem -- - * Instantiate a page in a multi-block set. + * Instantiate a page from a disk image. */ static int __split_multi_inmem( - WT_SESSION_IMPL *session, WT_PAGE *orig, WT_REF *ref, WT_MULTI *multi) + WT_SESSION_IMPL *session, WT_PAGE *orig, WT_MULTI *multi, WT_REF *ref) { WT_CURSOR_BTREE cbt; WT_DECL_ITEM(key); @@ -1487,13 +1497,12 @@ __split_multi_inmem( orig->type != WT_PAGE_COL_VAR || ref->ref_recno != 0); /* - * This code re-creates an in-memory page that is part of a set created - * while evicting a large page, and adds references to any unresolved - * update chains to the new page. We get here due to choosing to keep - * the results of a split in memory or because and update could not be - * written when attempting to evict a page. + * This code re-creates an in-memory page from a disk image, and adds + * references to any unresolved update chains to the new page. We get + * here either because an update could not be written when evicting a + * page, or eviction chose to keep a page in memory. * - * Clear the disk image and link the page into the passed-in WT_REF to + * Steal the disk image and link the page into the passed-in WT_REF to * simplify error handling: our caller will not discard the disk image * when discarding the original page, and our caller will discard the * allocated page on error, when discarding the allocated WT_REF. @@ -1503,6 +1512,19 @@ __split_multi_inmem( WT_PAGE_DISK_ALLOC, &page)); multi->disk_image = NULL; + /* + * Put the re-instantiated page in the same LRU queue location as the + * original page, unless this was a forced eviction, in which case we + * leave the new page with the read generation unset. Eviction will + * set the read generation next time it visits this page. + */ + if (orig->read_gen != WT_READGEN_OLDEST) + page->read_gen = orig->read_gen; + + /* If there are no updates to apply to the page, we're done. */ + if (multi->supd_entries == 0) + return (0); + if (orig->type == WT_PAGE_ROW_LEAF) WT_RET(__wt_scr_alloc(session, 0, &key)); @@ -1551,14 +1573,12 @@ __split_multi_inmem( } /* - * If we modified the page above, it will have set the first dirty - * transaction to the last transaction currently running. However, the - * updates we installed may be older than that. Set the first dirty - * transaction to an impossibly old value so this page is never skipped - * in a checkpoint. + * When modifying the page we set the first dirty transaction to the + * last transaction currently running. However, the updates we made + * might be older than that. Set the first dirty transaction to an + * impossibly old value so this page is never skipped in a checkpoint. */ - if (page->modify != NULL) - page->modify->first_dirty_txn = WT_TXN_FIRST; + page->modify->first_dirty_txn = WT_TXN_FIRST; err: /* Free any resources that may have been cached in the cursor. */ WT_TRET(__wt_btcur_close(&cbt, true)); @@ -1629,19 +1649,17 @@ __split_multi_inmem_fail(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_REF *ref) */ int __wt_multi_to_ref(WT_SESSION_IMPL *session, - WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp) + WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing) { WT_ADDR *addr; WT_IKEY *ikey; WT_REF *ref; - size_t incr; - - incr = 0; /* Allocate an underlying WT_REF. */ WT_RET(__wt_calloc_one(session, refp)); ref = *refp; - incr += sizeof(WT_REF); + if (incrp) + *incrp += sizeof(WT_REF); /* * Set the WT_REF key before (optionally) building the page, underlying @@ -1653,21 +1671,34 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session, ikey = multi->key.ikey; WT_RET(__wt_row_ikey( session, 0, WT_IKEY_DATA(ikey), ikey->size, ref)); - incr += sizeof(WT_IKEY) + ikey->size; + if (incrp) + *incrp += sizeof(WT_IKEY) + ikey->size; break; default: ref->ref_recno = multi->key.recno; break; } - /* If there's a disk image, build a page, otherwise set the address. */ - if (multi->disk_image == NULL) { - /* - * Copy the address: we could simply take the buffer, but that - * would complicate error handling, freeing the reference array - * would have to avoid freeing the memory, and it's not worth - * the confusion. - */ + /* There should be an address or a disk image (or both). */ + WT_ASSERT(session, + multi->addr.addr != NULL || multi->disk_image != NULL); + + /* If we're closing the file, there better be an address. */ + WT_ASSERT(session, multi->addr.addr != NULL || !closing); + + /* Verify any disk image we have. */ + WT_ASSERT(session, multi->disk_image == NULL || + __wt_verify_dsk_image(session, + "[page instantiate]", multi->disk_image, 0, false) == 0); + + /* + * If there's an address, the page was written, set it. + * + * Copy the address: we could simply take the buffer, but that would + * complicate error handling, freeing the reference array would have + * to avoid freeing the memory, and it's not worth the confusion. + */ + if (multi->addr.addr != NULL) { WT_RET(__wt_calloc_one(session, &addr)); ref->addr = addr; addr->size = multi->addr.size; @@ -1675,14 +1706,20 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_RET(__wt_strndup(session, multi->addr.addr, addr->size, &addr->addr)); ref->state = WT_REF_DISK; - } else { - WT_RET(__split_multi_inmem(session, page, ref, multi)); + } + + /* + * If we have a disk image and we're not closing the file, + * re-instantiate the page. + * + * Discard any page image we don't use. + */ + if (multi->disk_image != NULL && !closing) { + WT_RET(__split_multi_inmem(session, page, multi, ref)); ref->state = WT_REF_MEM; } + __wt_free(session, multi->disk_image); - /* Optionally return changes in the memory footprint. */ - if (incrp != NULL) - *incrp += incr; return (0); } @@ -2086,8 +2123,8 @@ __split_multi(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) */ WT_RET(__wt_calloc_def(session, new_entries, &ref_new)); for (i = 0; i < new_entries; ++i) - WT_ERR(__wt_multi_to_ref(session, - page, &mod->mod_multi[i], &ref_new[i], &parent_incr)); + WT_ERR(__wt_multi_to_ref(session, page, + &mod->mod_multi[i], &ref_new[i], &parent_incr, closing)); /* * Split into the parent; if we're closing the file, we hold it @@ -2175,15 +2212,13 @@ __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref) * Rewrite an in-memory page with a new version. */ int -__wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref) +__wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi) { WT_DECL_RET; WT_PAGE *page; - WT_PAGE_MODIFY *mod; WT_REF *new; page = ref->page; - mod = page->modify; WT_RET(__wt_verbose( session, WT_VERB_SPLIT, "%p: split-rewrite", ref->page)); @@ -2198,14 +2233,14 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref) * * Build the new page. * - * Allocate a WT_REF because the error path uses routines that will ea - * free memory. The only field we need to set is the record number, as - * it's used by the search routines. + * Allocate a WT_REF, the error path calls routines that free memory. + * The only field we need to set is the record number, as it's used by + * the search routines. */ WT_RET(__wt_calloc_one(session, &new)); new->ref_recno = ref->ref_recno; - WT_ERR(__split_multi_inmem(session, page, new, &mod->mod_multi[0])); + WT_ERR(__split_multi_inmem(session, page, multi, new)); /* * The rewrite succeeded, we can no longer fail. @@ -2213,7 +2248,7 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref) * Finalize the move, discarding moved update lists from the original * page. */ - __split_multi_inmem_final(page, &mod->mod_multi[0]); + __split_multi_inmem_final(page, multi); /* * Discard the original page. diff --git a/src/third_party/wiredtiger/src/btree/bt_stat.c b/src/third_party/wiredtiger/src/btree/bt_stat.c index 3d5abf34147..d3ddf33446e 100644 --- a/src/third_party/wiredtiger/src/btree/bt_stat.c +++ b/src/third_party/wiredtiger/src/btree/bt_stat.c @@ -41,6 +41,9 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) WT_STAT_SET(session, stats, btree_maxleafpage, btree->maxleafpage); WT_STAT_SET(session, stats, btree_maxleafvalue, btree->maxleafvalue); + WT_STAT_SET(session, stats, cache_bytes_inuse, + __wt_btree_bytes_inuse(session)); + /* Everything else is really, really expensive. */ if (!F_ISSET(cst, WT_CONN_STAT_ALL)) return (0); @@ -139,7 +142,7 @@ __stat_page_col_var( } else { orig_deleted = false; __wt_cell_unpack(cell, unpack); - if (unpack->type == WT_CELL_ADDR_DEL) + if (unpack->type == WT_CELL_DEL) orig_deleted = true; else { entry_cnt += __wt_cell_rle(unpack); diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c index da6c53aa316..df794c96cda 100644 --- a/src/third_party/wiredtiger/src/btree/bt_sync.c +++ b/src/third_party/wiredtiger/src/btree/bt_sync.c @@ -84,7 +84,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) WT_ERR(__wt_txn_get_snapshot(session)); leaf_bytes += page->memory_footprint; ++leaf_pages; - WT_ERR(__wt_reconcile(session, walk, NULL, 0)); + WT_ERR(__wt_reconcile( + session, walk, NULL, WT_CHECKPOINTING)); } } break; @@ -92,7 +93,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) /* * If we are flushing a file at read-committed isolation, which * is of particular interest for flushing the metadata to make - * schema-changing operation durable, get a transactional + * a schema-changing operation durable, get a transactional * snapshot now. * * All changes committed up to this point should be included. @@ -126,7 +127,17 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) */ WT_PUBLISH(btree->checkpointing, WT_CKPT_PREPARE); - WT_ERR(__wt_evict_file_exclusive_on(session)); + /* + * Sync for checkpoint allows splits to happen while the queue + * is being drained, but not reconciliation. We need to do this, + * since draining the queue can take long enough for hot pages + * to grow significantly larger than the configured maximum + * size. + */ + F_SET(btree, WT_BTREE_NO_RECONCILE); + ret = __wt_evict_file_exclusive_on(session); + F_CLR(btree, WT_BTREE_NO_RECONCILE); + WT_ERR(ret); __wt_evict_file_exclusive_off(session); WT_PUBLISH(btree->checkpointing, WT_CKPT_RUNNING); @@ -183,7 +194,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) leaf_bytes += page->memory_footprint; ++leaf_pages; } - WT_ERR(__wt_reconcile(session, walk, NULL, 0)); + WT_ERR(__wt_reconcile( + session, walk, NULL, WT_CHECKPOINTING)); } break; case WT_SYNC_CLOSE: @@ -217,41 +229,9 @@ err: /* On error, clear any left-over tree walk. */ saved_snap_min == WT_TXN_NONE) __wt_txn_release_snapshot(session); - if (btree->checkpointing != WT_CKPT_OFF) { - /* - * Update the checkpoint generation for this handle so visible - * updates newer than the checkpoint can be evicted. - * - * This has to be published before eviction is enabled again, - * so that eviction knows that the checkpoint has completed. - */ - WT_PUBLISH(btree->checkpoint_gen, - conn->txn_global.checkpoint_gen); - WT_STAT_FAST_DATA_SET(session, - btree_checkpoint_generation, btree->checkpoint_gen); - - /* - * Clear the checkpoint flag and push the change; not required, - * but publishing the change means stalled eviction gets moving - * as soon as possible. - */ - btree->checkpointing = WT_CKPT_OFF; - WT_FULL_BARRIER(); - - /* - * If this tree was being skipped by the eviction server during - * the checkpoint, clear the wait. - */ - btree->evict_walk_period = 0; - - /* - * Wake the eviction server, in case application threads have - * stalled while the eviction server decided it couldn't make - * progress. Without this, application threads will be stalled - * until the eviction server next wakes. - */ - WT_TRET(__wt_evict_server_wake(session)); - } + /* Clear the checkpoint flag and push the change. */ + if (btree->checkpointing != WT_CKPT_OFF) + WT_PUBLISH(btree->checkpointing, WT_CKPT_OFF); __wt_spin_unlock(session, &btree->flush_lock); diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c index bb8a750d848..17d32d6ed63 100644 --- a/src/third_party/wiredtiger/src/btree/bt_walk.c +++ b/src/third_party/wiredtiger/src/btree/bt_walk.c @@ -381,16 +381,6 @@ restart: /* __ref_ascend(session, &ref, &pindex, &slot); /* - * If we got all the way through an internal page and - * all of the child pages were deleted, mark it for - * eviction. - */ - if (empty_internal && pindex->entries > 1) { - __wt_page_evict_soon(ref->page); - empty_internal = false; - } - - /* * If at the root and returning internal pages, return * the root page, otherwise we're done. Regardless, no * hazard pointer is required, release the one we hold. @@ -404,6 +394,16 @@ restart: /* } /* + * If we got all the way through an internal page and + * all of the child pages were deleted, mark it for + * eviction. + */ + if (empty_internal && pindex->entries > 1) { + WT_ERR(__wt_page_evict_soon(session, ref)); + empty_internal = false; + } + + /* * Optionally return internal pages. Swap our previous * hazard pointer for the page we'll return. We don't * handle restart or not-found returns, it would require diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c index 4afcd74520f..0f70e84de7e 100644 --- a/src/third_party/wiredtiger/src/btree/row_srch.c +++ b/src/third_party/wiredtiger/src/btree/row_srch.c @@ -775,7 +775,7 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) * traversing the skip list each time accumulates to real time. */ if (samples > 5000) - __wt_page_evict_soon(page); + WT_RET(__wt_page_evict_soon(session, cbt->ref)); return (0); } diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index 1b656c5a0aa..192b80bb359 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -42,7 +42,6 @@ static const WT_CONFIG_CHECK static const WT_CONFIG_CHECK confchk_wiredtiger_open_checkpoint_subconfigs[] = { { "log_size", "int", NULL, "min=0,max=2GB", NULL, 0 }, - { "name", "string", NULL, NULL, NULL, 0 }, { "wait", "int", NULL, "min=0,max=100000", NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } }; @@ -67,16 +66,9 @@ static const WT_CONFIG_CHECK }; static const WT_CONFIG_CHECK - confchk_wiredtiger_open_log_subconfigs[] = { + confchk_WT_CONNECTION_reconfigure_log_subconfigs[] = { { "archive", "boolean", NULL, NULL, NULL, 0 }, - { "compressor", "string", NULL, NULL, NULL, 0 }, - { "enabled", "boolean", NULL, NULL, NULL, 0 }, - { "file_max", "int", NULL, "min=100KB,max=2GB", NULL, 0 }, - { "path", "string", NULL, NULL, NULL, 0 }, { "prealloc", "boolean", NULL, NULL, NULL, 0 }, - { "recover", "string", - NULL, "choices=[\"error\",\"on\"]", - NULL, 0 }, { "zero_fill", "boolean", NULL, NULL, NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } }; @@ -99,10 +91,9 @@ static const WT_CONFIG_CHECK }; static const WT_CONFIG_CHECK - confchk_wiredtiger_open_statistics_log_subconfigs[] = { + confchk_WT_CONNECTION_reconfigure_statistics_log_subconfigs[] = { { "json", "boolean", NULL, NULL, NULL, 0 }, { "on_close", "boolean", NULL, NULL, NULL, 0 }, - { "path", "string", NULL, NULL, NULL, 0 }, { "sources", "list", NULL, NULL, NULL, 0 }, { "timestamp", "string", NULL, NULL, NULL, 0 }, { "wait", "int", NULL, "min=0,max=100000", NULL, 0 }, @@ -117,16 +108,16 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { { "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 }, { "checkpoint", "category", NULL, NULL, - confchk_wiredtiger_open_checkpoint_subconfigs, 3 }, + confchk_wiredtiger_open_checkpoint_subconfigs, 2 }, { "error_prefix", "string", NULL, NULL, NULL, 0 }, { "eviction", "category", NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2 }, { "eviction_dirty_target", "int", - NULL, "min=5,max=99", + NULL, "min=1,max=99", NULL, 0 }, { "eviction_dirty_trigger", "int", - NULL, "min=5,max=99", + NULL, "min=1,max=99", NULL, 0 }, { "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 }, { "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 }, @@ -135,7 +126,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { confchk_wiredtiger_open_file_manager_subconfigs, 3 }, { "log", "category", NULL, NULL, - confchk_wiredtiger_open_log_subconfigs, 8 }, + confchk_WT_CONNECTION_reconfigure_log_subconfigs, 3 }, { "lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2 }, @@ -148,7 +139,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { NULL, 0 }, { "statistics_log", "category", NULL, NULL, - confchk_wiredtiger_open_statistics_log_subconfigs, 6 }, + confchk_WT_CONNECTION_reconfigure_statistics_log_subconfigs, 5 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," @@ -326,6 +317,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor[] = { { "append", "boolean", NULL, NULL, NULL, 0 }, { "bulk", "string", NULL, NULL, NULL, 0 }, { "checkpoint", "string", NULL, NULL, NULL, 0 }, + { "checkpoint_wait", "boolean", NULL, NULL, NULL, 0 }, { "dump", "string", NULL, "choices=[\"hex\",\"json\",\"print\"]", NULL, 0 }, @@ -608,6 +600,32 @@ static const WT_CONFIG_CHECK }; static const WT_CONFIG_CHECK + confchk_wiredtiger_open_log_subconfigs[] = { + { "archive", "boolean", NULL, NULL, NULL, 0 }, + { "compressor", "string", NULL, NULL, NULL, 0 }, + { "enabled", "boolean", NULL, NULL, NULL, 0 }, + { "file_max", "int", NULL, "min=100KB,max=2GB", NULL, 0 }, + { "path", "string", NULL, NULL, NULL, 0 }, + { "prealloc", "boolean", NULL, NULL, NULL, 0 }, + { "recover", "string", + NULL, "choices=[\"error\",\"on\"]", + NULL, 0 }, + { "zero_fill", "boolean", NULL, NULL, NULL, 0 }, + { NULL, NULL, NULL, NULL, NULL, 0 } +}; + +static const WT_CONFIG_CHECK + confchk_wiredtiger_open_statistics_log_subconfigs[] = { + { "json", "boolean", NULL, NULL, NULL, 0 }, + { "on_close", "boolean", NULL, NULL, NULL, 0 }, + { "path", "string", NULL, NULL, NULL, 0 }, + { "sources", "list", NULL, NULL, NULL, 0 }, + { "timestamp", "string", NULL, NULL, NULL, 0 }, + { "wait", "int", NULL, "min=0,max=100000", NULL, 0 }, + { NULL, NULL, NULL, NULL, NULL, 0 } +}; + +static const WT_CONFIG_CHECK confchk_wiredtiger_open_transaction_sync_subconfigs[] = { { "enabled", "boolean", NULL, NULL, NULL, 0 }, { "method", "string", @@ -625,7 +643,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { { "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 }, { "checkpoint", "category", NULL, NULL, - confchk_wiredtiger_open_checkpoint_subconfigs, 3 }, + confchk_wiredtiger_open_checkpoint_subconfigs, 2 }, { "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 }, { "config_base", "boolean", NULL, NULL, NULL, 0 }, { "create", "boolean", NULL, NULL, NULL, 0 }, @@ -640,10 +658,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2 }, { "eviction_dirty_target", "int", - NULL, "min=5,max=99", + NULL, "min=1,max=99", NULL, 0 }, { "eviction_dirty_trigger", "int", - NULL, "min=5,max=99", + NULL, "min=1,max=99", NULL, 0 }, { "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 }, { "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 }, @@ -706,7 +724,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { { "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 }, { "checkpoint", "category", NULL, NULL, - confchk_wiredtiger_open_checkpoint_subconfigs, 3 }, + confchk_wiredtiger_open_checkpoint_subconfigs, 2 }, { "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 }, { "config_base", "boolean", NULL, NULL, NULL, 0 }, { "create", "boolean", NULL, NULL, NULL, 0 }, @@ -721,10 +739,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2 }, { "eviction_dirty_target", "int", - NULL, "min=5,max=99", + NULL, "min=1,max=99", NULL, 0 }, { "eviction_dirty_trigger", "int", - NULL, "min=5,max=99", + NULL, "min=1,max=99", NULL, 0 }, { "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 }, { "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 }, @@ -788,7 +806,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { { "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 }, { "checkpoint", "category", NULL, NULL, - confchk_wiredtiger_open_checkpoint_subconfigs, 3 }, + confchk_wiredtiger_open_checkpoint_subconfigs, 2 }, { "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 }, { "direct_io", "list", NULL, "choices=[\"checkpoint\",\"data\",\"log\"]", @@ -801,10 +819,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2 }, { "eviction_dirty_target", "int", - NULL, "min=5,max=99", + NULL, "min=1,max=99", NULL, 0 }, { "eviction_dirty_trigger", "int", - NULL, "min=5,max=99", + NULL, "min=1,max=99", NULL, 0 }, { "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 }, { "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 }, @@ -864,7 +882,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { { "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 }, { "checkpoint", "category", NULL, NULL, - confchk_wiredtiger_open_checkpoint_subconfigs, 3 }, + confchk_wiredtiger_open_checkpoint_subconfigs, 2 }, { "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 }, { "direct_io", "list", NULL, "choices=[\"checkpoint\",\"data\",\"log\"]", @@ -877,10 +895,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { NULL, NULL, confchk_wiredtiger_open_eviction_subconfigs, 2 }, { "eviction_dirty_target", "int", - NULL, "min=5,max=99", + NULL, "min=1,max=99", NULL, 0 }, { "eviction_dirty_trigger", "int", - NULL, "min=5,max=99", + NULL, "min=1,max=99", NULL, 0 }, { "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 }, { "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 }, @@ -970,17 +988,14 @@ static const WT_CONFIG_ENTRY config_entries[] = { }, { "WT_CONNECTION.reconfigure", "async=(enabled=0,ops_max=1024,threads=2),cache_overhead=8," - "cache_size=100MB,checkpoint=(log_size=0," - "name=\"WiredTigerCheckpoint\",wait=0),error_prefix=," - "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80," - "eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95" + "cache_size=100MB,checkpoint=(log_size=0,wait=0),error_prefix=," + "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=5," + "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95" ",file_manager=(close_handle_minimum=250,close_idle_time=30," - "close_scan_interval=10),log=(archive=,compressor=,enabled=0," - "file_max=100MB,path=\".\",prealloc=,recover=on,zero_fill=0)," + "close_scan_interval=10),log=(archive=,prealloc=,zero_fill=0)," "lsm_manager=(merge=,worker_thread_max=4),lsm_merge=," "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB)," - "statistics=none,statistics_log=(json=0,on_close=0," - "path=\"WiredTigerStat.%d.%H\",sources=," + "statistics=none,statistics_log=(json=0,on_close=0,sources=," "timestamp=\"%b %d %H:%M:%S\",wait=0),verbose=", confchk_WT_CONNECTION_reconfigure, 18 }, @@ -1052,10 +1067,10 @@ static const WT_CONFIG_ENTRY config_entries[] = { NULL, 0 }, { "WT_SESSION.open_cursor", - "append=0,bulk=0,checkpoint=,dump=,next_random=0," - "next_random_sample_size=0,overwrite=,raw=0,readonly=0," + "append=0,bulk=0,checkpoint=,checkpoint_wait=,dump=,next_random=0" + ",next_random_sample_size=0,overwrite=,raw=0,readonly=0," "skip_sort_check=0,statistics=,target=", - confchk_WT_SESSION_open_cursor, 12 + confchk_WT_SESSION_open_cursor, 13 }, { "WT_SESSION.rebalance", "", @@ -1168,21 +1183,20 @@ static const WT_CONFIG_ENTRY config_entries[] = { }, { "wiredtiger_open", "async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1," - "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0," - "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=," - "config_base=,create=0,direct_io=,encryption=(keyid=,name=," - "secretkey=),error_prefix=,eviction=(threads_max=1,threads_min=1)" - ",eviction_dirty_target=80,eviction_dirty_trigger=95," - "eviction_target=80,eviction_trigger=95,exclusive=0,extensions=," - "file_extend=,file_manager=(close_handle_minimum=250," - "close_idle_time=30,close_scan_interval=10),hazard_max=1000," - "in_memory=0,log=(archive=,compressor=,enabled=0,file_max=100MB," - "path=\".\",prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=" - ",worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0" - ",session_max=100,session_scratch_max=2MB," - "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB)," - "statistics=none,statistics_log=(json=0,on_close=0," - "path=\"WiredTigerStat.%d.%H\",sources=," + "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,wait=0)" + ",checkpoint_sync=,config_base=,create=0,direct_io=," + "encryption=(keyid=,name=,secretkey=),error_prefix=," + "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=5," + "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95" + ",exclusive=0,extensions=,file_extend=," + "file_manager=(close_handle_minimum=250,close_idle_time=30," + "close_scan_interval=10),hazard_max=1000,in_memory=0," + "log=(archive=,compressor=,enabled=0,file_max=100MB,path=\".\"," + "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=," + "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0," + "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB" + ",name=,quota=0,reserve=0,size=500MB),statistics=none," + "statistics_log=(json=0,on_close=0,path=\".\",sources=," "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0" ",method=fsync),use_environment=,use_environment_priv=0,verbose=," "write_through=", @@ -1190,21 +1204,20 @@ static const WT_CONFIG_ENTRY config_entries[] = { }, { "wiredtiger_open_all", "async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1," - "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0," - "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=," - "config_base=,create=0,direct_io=,encryption=(keyid=,name=," - "secretkey=),error_prefix=,eviction=(threads_max=1,threads_min=1)" - ",eviction_dirty_target=80,eviction_dirty_trigger=95," - "eviction_target=80,eviction_trigger=95,exclusive=0,extensions=," - "file_extend=,file_manager=(close_handle_minimum=250," - "close_idle_time=30,close_scan_interval=10),hazard_max=1000," - "in_memory=0,log=(archive=,compressor=,enabled=0,file_max=100MB," - "path=\".\",prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=" - ",worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0" - ",session_max=100,session_scratch_max=2MB," - "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB)," - "statistics=none,statistics_log=(json=0,on_close=0," - "path=\"WiredTigerStat.%d.%H\",sources=," + "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,wait=0)" + ",checkpoint_sync=,config_base=,create=0,direct_io=," + "encryption=(keyid=,name=,secretkey=),error_prefix=," + "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=5," + "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95" + ",exclusive=0,extensions=,file_extend=," + "file_manager=(close_handle_minimum=250,close_idle_time=30," + "close_scan_interval=10),hazard_max=1000,in_memory=0," + "log=(archive=,compressor=,enabled=0,file_max=100MB,path=\".\"," + "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=," + "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0," + "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB" + ",name=,quota=0,reserve=0,size=500MB),statistics=none," + "statistics_log=(json=0,on_close=0,path=\".\",sources=," "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0" ",method=fsync),use_environment=,use_environment_priv=0,verbose=," "version=(major=0,minor=0),write_through=", @@ -1212,41 +1225,39 @@ static const WT_CONFIG_ENTRY config_entries[] = { }, { "wiredtiger_open_basecfg", "async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1," - "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0," - "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=," - "direct_io=,encryption=(keyid=,name=,secretkey=),error_prefix=," - "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80," - "eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95" - ",extensions=,file_extend=,file_manager=(close_handle_minimum=250" - ",close_idle_time=30,close_scan_interval=10),hazard_max=1000," - "log=(archive=,compressor=,enabled=0,file_max=100MB,path=\".\"," - "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=," - "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0," - "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB" - ",name=,quota=0,reserve=0,size=500MB),statistics=none," - "statistics_log=(json=0,on_close=0,path=\"WiredTigerStat.%d.%H\"," - "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," - "transaction_sync=(enabled=0,method=fsync),verbose=," + "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,wait=0)" + ",checkpoint_sync=,direct_io=,encryption=(keyid=,name=," + "secretkey=),error_prefix=,eviction=(threads_max=1,threads_min=1)" + ",eviction_dirty_target=5,eviction_dirty_trigger=20," + "eviction_target=80,eviction_trigger=95,extensions=,file_extend=," + "file_manager=(close_handle_minimum=250,close_idle_time=30," + "close_scan_interval=10),hazard_max=1000,log=(archive=," + "compressor=,enabled=0,file_max=100MB,path=\".\",prealloc=," + "recover=on,zero_fill=0),lsm_manager=(merge=,worker_thread_max=4)" + ",lsm_merge=,mmap=,multiprocess=0,readonly=0,session_max=100," + "session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,quota=0," + "reserve=0,size=500MB),statistics=none,statistics_log=(json=0," + "on_close=0,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\"," + "wait=0),transaction_sync=(enabled=0,method=fsync),verbose=," "version=(major=0,minor=0),write_through=", confchk_wiredtiger_open_basecfg, 33 }, { "wiredtiger_open_usercfg", "async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1," - "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0," - "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=," - "direct_io=,encryption=(keyid=,name=,secretkey=),error_prefix=," - "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80," - "eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95" - ",extensions=,file_extend=,file_manager=(close_handle_minimum=250" - ",close_idle_time=30,close_scan_interval=10),hazard_max=1000," - "log=(archive=,compressor=,enabled=0,file_max=100MB,path=\".\"," - "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=," - "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0," - "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB" - ",name=,quota=0,reserve=0,size=500MB),statistics=none," - "statistics_log=(json=0,on_close=0,path=\"WiredTigerStat.%d.%H\"," - "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," - "transaction_sync=(enabled=0,method=fsync),verbose=," + "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,wait=0)" + ",checkpoint_sync=,direct_io=,encryption=(keyid=,name=," + "secretkey=),error_prefix=,eviction=(threads_max=1,threads_min=1)" + ",eviction_dirty_target=5,eviction_dirty_trigger=20," + "eviction_target=80,eviction_trigger=95,extensions=,file_extend=," + "file_manager=(close_handle_minimum=250,close_idle_time=30," + "close_scan_interval=10),hazard_max=1000,log=(archive=," + "compressor=,enabled=0,file_max=100MB,path=\".\",prealloc=," + "recover=on,zero_fill=0),lsm_manager=(merge=,worker_thread_max=4)" + ",lsm_merge=,mmap=,multiprocess=0,readonly=0,session_max=100," + "session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,quota=0," + "reserve=0,size=500MB),statistics=none,statistics_log=(json=0," + "on_close=0,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\"," + "wait=0),transaction_sync=(enabled=0,method=fsync),verbose=," "write_through=", confchk_wiredtiger_open_usercfg, 32 }, diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index 98267eeeb2c..1c6b0c2b500 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -1217,7 +1217,8 @@ __conn_config_file(WT_SESSION_IMPL *session, return (0); /* Open the configuration file. */ - WT_RET(__wt_open(session, filename, WT_OPEN_FILE_TYPE_REGULAR, 0, &fh)); + WT_RET(__wt_open( + session, filename, WT_FS_OPEN_FILE_TYPE_REGULAR, 0, &fh)); WT_ERR(__wt_filesize(session, fh, &size)); if (size == 0) goto err; @@ -1510,8 +1511,8 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) exist = false; if (!is_create) WT_ERR(__wt_fs_exist(session, WT_WIREDTIGER, &exist)); - ret = __wt_open(session, WT_SINGLETHREAD, WT_OPEN_FILE_TYPE_REGULAR, - is_create || exist ? WT_OPEN_CREATE : 0, &conn->lock_fh); + ret = __wt_open(session, WT_SINGLETHREAD, WT_FS_OPEN_FILE_TYPE_REGULAR, + is_create || exist ? WT_FS_OPEN_CREATE : 0, &conn->lock_fh); /* * If this is a read-only connection and we cannot grab the lock @@ -1554,7 +1555,7 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) */ #define WT_SINGLETHREAD_STRING "WiredTiger lock file\n" WT_ERR(__wt_filesize(session, conn->lock_fh, &size)); - if (size != strlen(WT_SINGLETHREAD_STRING)) + if ((size_t)size != strlen(WT_SINGLETHREAD_STRING)) WT_ERR(__wt_write(session, conn->lock_fh, (wt_off_t)0, strlen(WT_SINGLETHREAD_STRING), WT_SINGLETHREAD_STRING)); @@ -1563,7 +1564,8 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) /* We own the lock file, optionally create the WiredTiger file. */ ret = __wt_open(session, WT_WIREDTIGER, - WT_OPEN_FILE_TYPE_REGULAR, is_create ? WT_OPEN_CREATE : 0, &fh); + WT_FS_OPEN_FILE_TYPE_REGULAR, is_create ? WT_FS_OPEN_CREATE : 0, + &fh); /* * If we're read-only, check for handled errors. Even if able to open @@ -1784,7 +1786,7 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[]) * runs. This doesn't matter for correctness, it's just cleaning up * random files. */ - WT_RET(__wt_remove_if_exists(session, WT_BASECONFIG_SET)); + WT_RET(__wt_remove_if_exists(session, WT_BASECONFIG_SET, false)); /* * The base configuration file is only written if creating the database, @@ -1809,7 +1811,7 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[]) return (0); WT_RET(__wt_fopen(session, WT_BASECONFIG_SET, - WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs)); + WT_FS_OPEN_CREATE | WT_FS_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs)); WT_ERR(__wt_fprintf(session, fs, "%s\n\n", "# Do not modify this file.\n" @@ -1870,7 +1872,8 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[]) if (0) { /* Close open file handle, remove any temporary file. */ err: WT_TRET(__wt_fclose(session, &fs)); - WT_TRET(__wt_remove_if_exists(session, WT_BASECONFIG_SET)); + WT_TRET( + __wt_remove_if_exists(session, WT_BASECONFIG_SET, false)); } __wt_free(session, base_config); diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c index 9f15db5382b..e8bb7187418 100644 --- a/src/third_party/wiredtiger/src/conn/conn_cache.c +++ b/src/third_party/wiredtiger/src/conn/conn_cache.c @@ -176,6 +176,10 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]) &cache->evict_queues[i].evict_lock, "cache eviction")); } + /* Ensure there is always a non-NULL current queue. */ + cache->evict_current_queue = + &cache->evict_queues[WT_EVICT_URGENT_QUEUE + 1]; + /* * We get/set some values in the cache statistics (rather than have * two copies), configure them. @@ -197,7 +201,7 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session) WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_CONNECTION_STATS **stats; - uint64_t inuse, leaf, used; + uint64_t inuse, leaf; conn = S2C(session); cache = conn->cache; @@ -208,26 +212,29 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session) * There are races updating the different cache tracking values so * be paranoid calculating the leaf byte usage. */ - used = cache->bytes_overflow + cache->bytes_internal; - leaf = inuse > used ? inuse - used : 0; + leaf = inuse > cache->bytes_internal ? + inuse - cache->bytes_internal : 0; WT_STAT_SET(session, stats, cache_bytes_max, conn->cache_size); WT_STAT_SET(session, stats, cache_bytes_inuse, inuse); - WT_STAT_SET(session, stats, cache_overhead, cache->overhead_pct); - WT_STAT_SET( - session, stats, cache_pages_inuse, __wt_cache_pages_inuse(cache)); + WT_STAT_SET( session, stats, cache_bytes_dirty, __wt_cache_dirty_inuse(cache)); - WT_STAT_SET(session, stats, - cache_eviction_maximum_page_size, cache->evict_max_page_size); - WT_STAT_SET(session, stats, cache_pages_dirty, cache->pages_dirty); - WT_STAT_SET( - session, stats, cache_bytes_internal, cache->bytes_internal); + session, stats, cache_bytes_image, __wt_cache_bytes_image(cache)); WT_STAT_SET( - session, stats, cache_bytes_overflow, cache->bytes_overflow); + session, stats, cache_pages_inuse, __wt_cache_pages_inuse(cache)); + WT_STAT_SET( + session, stats, cache_bytes_internal, cache->bytes_internal); WT_STAT_SET(session, stats, cache_bytes_leaf, leaf); + WT_STAT_SET( + session, stats, cache_bytes_other, __wt_cache_bytes_other(cache)); + + WT_STAT_SET(session, stats, + cache_eviction_maximum_page_size, cache->evict_max_page_size); + WT_STAT_SET(session, stats, cache_pages_dirty, + cache->pages_dirty_intl + cache->pages_dirty_leaf); /* * The number of files with active walks ~= number of hazard pointers @@ -235,7 +242,7 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session) */ if (conn->evict_session != NULL) WT_STAT_SET(session, stats, cache_eviction_walks_active, - conn->evict_session->nhazard); + cache->walk_session->nhazard); } /* @@ -267,11 +274,13 @@ __wt_cache_destroy(WT_SESSION_IMPL *session) __wt_errx(session, "cache server: exiting with %" PRIu64 " bytes in memory", cache->bytes_inmem); - if (cache->bytes_dirty != 0 || cache->pages_dirty != 0) + if (cache->bytes_dirty_intl + cache->bytes_dirty_leaf != 0 || + cache->pages_dirty_intl + cache->pages_dirty_leaf != 0) __wt_errx(session, "cache server: exiting with %" PRIu64 " bytes dirty and %" PRIu64 " pages dirty", - cache->bytes_dirty, cache->pages_dirty); + cache->bytes_dirty_intl + cache->bytes_dirty_leaf, + cache->pages_dirty_intl + cache->pages_dirty_leaf); WT_TRET(__wt_cond_auto_destroy(session, &cache->evict_cond)); WT_TRET(__wt_cond_destroy(session, &cache->evict_waiter_cond)); @@ -286,6 +295,7 @@ __wt_cache_destroy(WT_SESSION_IMPL *session) __wt_spin_destroy(session, &cache->evict_queues[i].evict_lock); __wt_free(session, cache->evict_queues[i].evict_queue); } + __wt_free(session, conn->cache); return (ret); } diff --git a/src/third_party/wiredtiger/src/conn/conn_ckpt.c b/src/third_party/wiredtiger/src/conn/conn_ckpt.c index a23350a5e46..d54c65c4767 100644 --- a/src/third_party/wiredtiger/src/conn/conn_ckpt.c +++ b/src/third_party/wiredtiger/src/conn/conn_ckpt.c @@ -19,61 +19,38 @@ __ckpt_server_config(WT_SESSION_IMPL *session, const char **cfg, bool *startp) { WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; - WT_DECL_ITEM(tmp); - WT_DECL_RET; - char *p; + + *startp = false; conn = S2C(session); - /* - * The checkpoint configuration requires a wait time and/or a log - * size -- if one is not set, we're not running at all. - * Checkpoints based on log size also require logging be enabled. - */ WT_RET(__wt_config_gets(session, cfg, "checkpoint.wait", &cval)); conn->ckpt_usecs = (uint64_t)cval.val * WT_MILLION; WT_RET(__wt_config_gets(session, cfg, "checkpoint.log_size", &cval)); conn->ckpt_logsize = (wt_off_t)cval.val; - /* Checkpoints are incompatible with in-memory configuration */ - if (conn->ckpt_usecs != 0 || conn->ckpt_logsize != 0) { + /* + * The checkpoint configuration requires a wait time and/or a log size, + * if neither is set, we're not running at all. Checkpoints based on log + * size also require logging be enabled. + */ + if (conn->ckpt_usecs != 0 || + (conn->ckpt_logsize != 0 && + FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))) { + /* Checkpoints are incompatible with in-memory configuration */ WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval)); if (cval.val != 0) WT_RET_MSG(session, EINVAL, - "In memory configuration incompatible with " - "checkpoints"); - } + "checkpoint configuration incompatible with " + "in-memory configuration"); - __wt_log_written_reset(session); - if ((conn->ckpt_usecs == 0 && conn->ckpt_logsize == 0) || - (conn->ckpt_logsize && conn->ckpt_usecs == 0 && - !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))) { - *startp = false; - return (0); - } - *startp = true; + __wt_log_written_reset(session); - /* - * The application can specify a checkpoint name, which we ignore if - * it's our default. - */ - WT_RET(__wt_config_gets(session, cfg, "checkpoint.name", &cval)); - if (cval.len != 0 && - !WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len)) { - WT_RET(__wt_checkpoint_name_ok(session, cval.str, cval.len)); - - WT_RET(__wt_scr_alloc(session, cval.len + 20, &tmp)); - WT_ERR(__wt_buf_fmt( - session, tmp, "name=%.*s", (int)cval.len, cval.str)); - WT_ERR(__wt_strdup(session, tmp->data, &p)); - - __wt_free(session, conn->ckpt_config); - conn->ckpt_config = p; + *startp = true; } -err: __wt_scr_free(session, &tmp); - return (ret); + return (0); } /* @@ -103,7 +80,7 @@ __ckpt_server(void *arg) __wt_cond_wait(session, conn->ckpt_cond, conn->ckpt_usecs)); /* Checkpoint the database. */ - WT_ERR(wt_session->checkpoint(wt_session, conn->ckpt_config)); + WT_ERR(wt_session->checkpoint(wt_session, NULL)); /* Reset. */ if (conn->ckpt_logsize) { @@ -179,7 +156,16 @@ __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[]) conn = S2C(session); start = false; - /* If there is already a server running, shut it down. */ + /* + * Stop any server that is already running. This means that each time + * reconfigure is called we'll bounce the server even if there are no + * configuration changes. This makes our life easier as the underlying + * configuration routine doesn't have to worry about freeing objects + * in the connection structure (it's guaranteed to always start with a + * blank slate), and we don't have to worry about races where a running + * server is reading configuration information that we're updating, and + * it's not expected that reconfiguration will happen a lot. + */ if (conn->ckpt_session != NULL) WT_RET(__wt_checkpoint_server_destroy(session)); @@ -211,8 +197,6 @@ __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session) } WT_TRET(__wt_cond_destroy(session, &conn->ckpt_cond)); - __wt_free(session, conn->ckpt_config); - /* Close the server thread's session. */ if (conn->ckpt_session != NULL) { wt_session = &conn->ckpt_session->iface; @@ -226,7 +210,6 @@ __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session) conn->ckpt_session = NULL; conn->ckpt_tid_set = false; conn->ckpt_cond = NULL; - conn->ckpt_config = NULL; conn->ckpt_usecs = 0; return (ret); diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c index 08fb2b24468..f52fccc7d1c 100644 --- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c +++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c @@ -64,6 +64,16 @@ __conn_dhandle_alloc(WT_SESSION_IMPL *session, F_SET(dhandle, WT_DHANDLE_IS_METADATA); /* + * We are holding the data handle list lock, which protects most + * threads from seeing the new handle until that lock is released. + * + * However, the sweep server scans the list of handles without holding + * that lock, so we need a write barrier here to ensure the sweep + * server doesn't see a partially filled in structure. + */ + WT_WRITE_BARRIER(); + + /* * Prepend the handle to the connection list, assuming we're likely to * need new files again soon, until they are cached by all sessions. */ diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c index 1ae370ef2fa..18ed71e4688 100644 --- a/src/third_party/wiredtiger/src/conn/conn_log.c +++ b/src/third_party/wiredtiger/src/conn/conn_log.c @@ -51,6 +51,25 @@ __logmgr_config( WT_CONNECTION_IMPL *conn; bool enabled; + /* + * A note on reconfiguration: the standard "is this configuration string + * allowed" checks should fail if reconfiguration has invalid strings, + * for example, "log=(enabled)", or "statistics_log=(path=XXX)", because + * the connection reconfiguration method doesn't allow those strings. + * Additionally, the base configuration values during reconfiguration + * are the currently configured values (so we don't revert to default + * values when repeatedly reconfiguring), and configuration processing + * of a currently set value should not change the currently set value. + * + * In this code path, log server reconfiguration does not stop/restart + * the log server, so there's no point in re-evaluating configuration + * strings that cannot be reconfigured, risking bugs in configuration + * setup, and depending on evaluation of currently set values to always + * result in the currently set value. Skip tests for any configuration + * strings which don't make sense during reconfiguration, but don't + * worry about error reporting because it should never happen. + */ + conn = S2C(session); WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval)); @@ -62,6 +81,8 @@ __logmgr_config( * * If it is off and the user it turning it on, or it is on * and the user is turning it off, return an error. + * + * See above: should never happen. */ if (reconfig && ((enabled && !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) || @@ -83,6 +104,8 @@ __logmgr_config( * Setup a log path and compression even if logging is disabled in case * we are going to print a log. Only do this on creation. Once a * compressor or log path are set they cannot be changed. + * + * See above: should never happen. */ if (!reconfig) { conn->log_compressor = NULL; @@ -95,6 +118,7 @@ __logmgr_config( WT_RET(__wt_strndup( session, cval.str, cval.len, &conn->log_path)); } + /* We are done if logging isn't enabled. */ if (!*runp) return (0); @@ -103,13 +127,14 @@ __logmgr_config( if (cval.val != 0) FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE); + /* + * The file size cannot be reconfigured. The amount of memory allocated + * to the log slots may be based on the log file size at creation and we + * don't want to re-allocate that memory while running. + * + * See above: should never happen. + */ if (!reconfig) { - /* - * Ignore if the user tries to change the file size. The - * amount of memory allocated to the log slots may be based - * on the log file size at creation and we don't want to - * re-allocate that memory while running. - */ WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval)); conn->log_file_max = (wt_off_t)cval.val; WT_STAT_FAST_CONN_SET(session, @@ -125,12 +150,17 @@ __logmgr_config( conn->log_prealloc = 1; /* - * Note that it is meaningless to reconfigure this value during - * runtime. It only matters on create before recovery runs. + * Note it's meaningless to reconfigure this value during runtime, it + * only matters on create before recovery runs. + * + * See above: should never happen. */ - WT_RET(__wt_config_gets_def(session, cfg, "log.recover", 0, &cval)); - if (cval.len != 0 && WT_STRING_MATCH("error", cval.str, cval.len)) - FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR); + if (!reconfig) { + WT_RET(__wt_config_gets_def( + session, cfg, "log.recover", 0, &cval)); + if (WT_STRING_MATCH("error", cval.str, cval.len)) + FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR); + } WT_RET(__wt_config_gets(session, cfg, "log.zero_fill", &cval)); if (cval.val != 0) { diff --git a/src/third_party/wiredtiger/src/conn/conn_stat.c b/src/third_party/wiredtiger/src/conn/conn_stat.c index 855ff57808e..4e7cac59c4a 100644 --- a/src/third_party/wiredtiger/src/conn/conn_stat.c +++ b/src/third_party/wiredtiger/src/conn/conn_stat.c @@ -36,6 +36,31 @@ __stat_sources_free(WT_SESSION_IMPL *session, char ***sources) } /* + * __stat_config_discard -- + * Discard all statistics-log configuration. + */ +static int +__stat_config_discard(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + + conn = S2C(session); + + /* + * Discard all statistics-log configuration information, called when + * reconfiguring or destroying the statistics logging setup, + */ + __wt_free(session, conn->stat_format); + ret = __wt_fclose(session, &conn->stat_fs); + __wt_free(session, conn->stat_path); + __stat_sources_free(session, &conn->stat_sources); + conn->stat_stamp = NULL; + conn->stat_usecs = 0; + return (ret); +} + +/* * __wt_conn_stat_init -- * Initialize the per-connection statistics. */ @@ -73,20 +98,37 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp) WT_CONFIG objectconf; WT_CONFIG_ITEM cval, k, v; WT_CONNECTION_IMPL *conn; + WT_DECL_ITEM(tmp); WT_DECL_RET; int cnt; char **sources; + /* + * A note on reconfiguration: the standard "is this configuration string + * allowed" checks should fail if reconfiguration has invalid strings, + * for example, "log=(enabled)", or "statistics_log=(path=XXX)", because + * the connection reconfiguration method doesn't allow those strings. + * Additionally, the base configuration values during reconfiguration + * are the currently configured values (so we don't revert to default + * values when repeatedly reconfiguring), and configuration processing + * of a currently set value should not change the currently set value. + * + * In this code path, a previous statistics log server reconfiguration + * may have stopped the server (and we're about to restart it). Because + * stopping the server discarded the configured information stored in + * the connection structure, we have to re-evaluate all configuration + * values, reconfiguration can't skip any of them. + */ + conn = S2C(session); sources = NULL; - WT_RET(__wt_config_gets(session, cfg, "statistics_log.wait", &cval)); /* Only start the server if wait time is non-zero */ + WT_RET(__wt_config_gets(session, cfg, "statistics_log.wait", &cval)); *runp = cval.val != 0; conn->stat_usecs = (uint64_t)cval.val * WT_MILLION; - WT_RET(__wt_config_gets( - session, cfg, "statistics_log.json", &cval)); + WT_RET(__wt_config_gets(session, cfg, "statistics_log.json", &cval)); if (cval.val != 0) FLD_SET(conn->stat_flags, WT_CONN_STAT_JSON); @@ -96,24 +138,30 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp) FLD_SET(conn->stat_flags, WT_CONN_STAT_ON_CLOSE); /* - * Statistics logging configuration requires either a wait time or an - * on-close setting. - */ - if (!*runp && !FLD_ISSET(conn->stat_flags, WT_CONN_STAT_ON_CLOSE)) - return (0); - - /* - * If any statistics logging is done, this must not be a read-only - * connection. + * We don't allow the log path to be reconfigured for security reasons. + * (Applications passing input strings directly to reconfigure would + * expose themselves to a potential security problem, the utility of + * reconfiguring a statistics log path isn't worth the security risk.) + * + * See above for the details, but during reconfiguration we're loading + * the path value from the saved configuration information, and it's + * required during reconfiguration because we potentially stopped and + * are restarting, the server. */ - WT_RET(__wt_config_gets(session, cfg, "statistics_log.sources", &cval)); - WT_RET(__wt_config_subinit(session, &objectconf, &cval)); + WT_RET(__wt_config_gets(session, cfg, "statistics_log.path", &cval)); + WT_ERR(__wt_scr_alloc(session, 0, &tmp)); + WT_ERR(__wt_buf_fmt(session, + tmp, "%.*s/%s", (int)cval.len, cval.str, WT_STATLOG_FILENAME)); + WT_ERR(__wt_filename(session, tmp->data, &conn->stat_path)); + + WT_ERR(__wt_config_gets(session, cfg, "statistics_log.sources", &cval)); + WT_ERR(__wt_config_subinit(session, &objectconf, &cval)); for (cnt = 0; (ret = __wt_config_next(&objectconf, &k, &v)) == 0; ++cnt) ; - WT_RET_NOTFOUND_OK(ret); + WT_ERR_NOTFOUND_OK(ret); if (cnt != 0) { - WT_RET(__wt_calloc_def(session, cnt + 1, &sources)); - WT_RET(__wt_config_subinit(session, &objectconf, &cval)); + WT_ERR(__wt_calloc_def(session, cnt + 1, &sources)); + WT_ERR(__wt_config_subinit(session, &objectconf, &cval)); for (cnt = 0; (ret = __wt_config_next(&objectconf, &k, &v)) == 0; ++cnt) { /* @@ -138,29 +186,37 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp) sources = NULL; } - WT_ERR(__wt_config_gets(session, cfg, "statistics_log.path", &cval)); - WT_ERR(__wt_nfilename(session, cval.str, cval.len, &conn->stat_path)); - /* * When using JSON format, use the same timestamp format as MongoDB by - * default. + * default. This requires caution: the user might have set the timestamp + * in a previous reconfigure call and we don't want to override that, so + * compare the retrieved value with the default value to decide if we + * should use the JSON default. + * + * (This still implies if the user explicitly sets the timestamp to the + * default value, then sets the JSON flag in a separate reconfigure + * call, or vice-versa, we will incorrectly switch to the JSON default + * timestamp. But there's no way to detect that, and this is all a low + * probability path.) + * + * !!! + * Don't rewrite in the compressed "%FT%T.000Z" form, MSVC13 segfaults. */ - if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_JSON)) { - ret = __wt_config_gets( - session, &cfg[1], "statistics_log.timestamp", &cval); - if (ret == WT_NOTFOUND) - WT_ERR(__wt_strdup( - session, "%FT%T.000Z", &conn->stat_format)); - WT_ERR_NOTFOUND_OK(ret); - } - if (conn->stat_format == NULL) { - WT_ERR(__wt_config_gets( - session, cfg, "statistics_log.timestamp", &cval)); +#define WT_TIMESTAMP_DEFAULT "%b %d %H:%M:%S" +#define WT_TIMESTAMP_JSON_DEFAULT "%Y-%m-%dT%H:%M:%S.000Z" + WT_ERR(__wt_config_gets( + session, cfg, "statistics_log.timestamp", &cval)); + if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_JSON) && + WT_STRING_MATCH(WT_TIMESTAMP_DEFAULT, cval.str, cval.len)) + WT_ERR(__wt_strdup( + session, WT_TIMESTAMP_JSON_DEFAULT, &conn->stat_format)); + else WT_ERR(__wt_strndup( session, cval.str, cval.len, &conn->stat_format)); - } err: __stat_sources_free(session, &sources); + __wt_scr_free(session, &tmp); + return (ret); } @@ -373,7 +429,7 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp) if (path != NULL) (void)strcpy(path->mem, tmp->mem); WT_RET(__wt_fopen(session, tmp->mem, - WT_OPEN_CREATE | WT_OPEN_FIXED, WT_STREAM_APPEND, + WT_FS_OPEN_CREATE | WT_FS_OPEN_FIXED, WT_STREAM_APPEND, &log_stream)); } conn->stat_fs = log_stream; @@ -538,14 +594,23 @@ __wt_statlog_create(WT_SESSION_IMPL *session, const char *cfg[]) bool start; conn = S2C(session); - start = false; /* * Stop any server that is already running. This means that each time * reconfigure is called we'll bounce the server even if there are no - * configuration changes - but that makes our lives easier. + * configuration changes. This makes our life easier as the underlying + * configuration routine doesn't have to worry about freeing objects + * in the connection structure (it's guaranteed to always start with a + * blank slate), and we don't have to worry about races where a running + * server is reading configuration information that we're updating, and + * it's not expected that reconfiguration will happen a lot. + * + * If there's no server running, discard any configuration information + * so we don't leak memory during reconfiguration. */ - if (conn->stat_session != NULL) + if (conn->stat_session == NULL) + WT_RET(__stat_config_discard(session)); + else WT_RET(__wt_statlog_destroy(session, false)); WT_RET(__statlog_config(session, cfg, &start)); @@ -568,38 +633,28 @@ __wt_statlog_destroy(WT_SESSION_IMPL *session, bool is_close) conn = S2C(session); + /* Stop the server thread. */ F_CLR(conn, WT_CONN_SERVER_STATISTICS); if (conn->stat_tid_set) { WT_TRET(__wt_cond_signal(session, conn->stat_cond)); WT_TRET(__wt_thread_join(session, conn->stat_tid)); conn->stat_tid_set = false; } + WT_TRET(__wt_cond_destroy(session, &conn->stat_cond)); /* Log a set of statistics on shutdown if configured. */ if (is_close) WT_TRET(__wt_statlog_log_one(session)); - WT_TRET(__wt_cond_destroy(session, &conn->stat_cond)); - - __stat_sources_free(session, &conn->stat_sources); - __wt_free(session, conn->stat_path); - __wt_free(session, conn->stat_format); + /* Discard all configuration information. */ + WT_TRET(__stat_config_discard(session)); /* Close the server thread's session. */ if (conn->stat_session != NULL) { wt_session = &conn->stat_session->iface; WT_TRET(wt_session->close(wt_session, NULL)); + conn->stat_session = NULL; } - /* Clear connection settings so reconfigure is reliable. */ - conn->stat_session = NULL; - conn->stat_tid_set = false; - conn->stat_format = NULL; - WT_TRET(__wt_fclose(session, &conn->stat_fs)); - conn->stat_path = NULL; - conn->stat_sources = NULL; - conn->stat_stamp = NULL; - conn->stat_usecs = 0; - return (ret); } diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c index 4ee23008687..63952169566 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_backup.c +++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c @@ -9,13 +9,12 @@ #include "wt_internal.h" static int __backup_all(WT_SESSION_IMPL *); -static int __backup_cleanup_handles(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *); static int __backup_list_append( WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, const char *); static int __backup_list_uri_append(WT_SESSION_IMPL *, const char *, bool *); static int __backup_start( WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, const char *[]); -static int __backup_stop(WT_SESSION_IMPL *); +static int __backup_stop(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *); static int __backup_uri(WT_SESSION_IMPL *, const char *[], bool *, bool *); /* @@ -76,20 +75,26 @@ __curbackup_close(WT_CURSOR *cursor) WT_CURSOR_BACKUP *cb; WT_DECL_RET; WT_SESSION_IMPL *session; - int tret; cb = (WT_CURSOR_BACKUP *)cursor; CURSOR_API_CALL(cursor, session, close, NULL); - WT_TRET(__backup_cleanup_handles(session, cb)); + /* + * When starting a hot backup, we serialize hot backup cursors and set + * the connection's hot-backup flag. Once that's done, we set the + * cursor's backup-locker flag, implying the cursor owns all necessary + * cleanup (including removing temporary files), regardless of error or + * success. The cursor's backup-locker flag is never cleared (it's just + * discarded when the cursor is closed), because that cursor will never + * not be responsible for cleanup. + */ + if (F_ISSET(cb, WT_CURBACKUP_LOCKER)) + WT_TRET(__backup_stop(session, cb)); + WT_TRET(__wt_cursor_close(cursor)); session->bkp_cursor = NULL; - WT_WITH_SCHEMA_LOCK(session, tret, - tret = __backup_stop(session)); /* Stop the backup. */ - WT_TRET(tret); - err: API_END_RET(session, ret); } @@ -144,11 +149,11 @@ __wt_curbackup_open(WT_SESSION_IMPL *session, ret = __backup_start(session, cb, cfg))); WT_ERR(ret); - /* __wt_cursor_init is last so we don't have to clean up on error. */ WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp)); if (0) { -err: __wt_free(session, cb); +err: WT_TRET(__curbackup_close(cursor)); + *cursorp = NULL; } return (ret); @@ -226,6 +231,9 @@ __backup_start( conn->hot_backup = true; WT_ERR(__wt_writeunlock(session, conn->hot_backup_lock)); + /* We're the lock holder, we own cleanup. */ + F_SET(cb, WT_CURBACKUP_LOCKER); + /* * Create a temporary backup file. This must be opened before * generating the list of targets in backup_uri. This file will @@ -235,7 +243,7 @@ __backup_start( * doesn't confuse restarting in the source database. */ WT_ERR(__wt_fopen(session, WT_BACKUP_TMP, - WT_OPEN_CREATE, WT_STREAM_WRITE, &cb->bfs)); + WT_FS_OPEN_CREATE, WT_STREAM_WRITE, &cb->bfs)); /* * If a list of targets was specified, work our way through them. * Else, generate a list of all database objects. @@ -261,7 +269,7 @@ __backup_start( */ dest = WT_INCREMENTAL_BACKUP; WT_ERR(__wt_fopen(session, WT_INCREMENTAL_SRC, - WT_OPEN_CREATE, WT_STREAM_WRITE, &srcfs)); + WT_FS_OPEN_CREATE, WT_STREAM_WRITE, &srcfs)); WT_ERR(__backup_list_append( session, cb, WT_INCREMENTAL_BACKUP)); } else { @@ -282,12 +290,9 @@ err: /* Close the hot backup file. */ WT_TRET(__wt_fclose(session, &cb->bfs)); if (srcfs != NULL) WT_TRET(__wt_fclose(session, &srcfs)); - if (ret != 0) { - WT_TRET(__backup_cleanup_handles(session, cb)); - WT_TRET(__backup_stop(session)); - } else { + if (ret == 0) { WT_ASSERT(session, dest != NULL); - WT_TRET(__wt_fs_rename(session, WT_BACKUP_TMP, dest)); + WT_TRET(__wt_fs_rename(session, WT_BACKUP_TMP, dest, false)); } return (ret); @@ -295,9 +300,7 @@ err: /* Close the hot backup file. */ /* * __backup_cleanup_handles -- - * Release and free all btree handles held by the backup. This is kept - * separate from __backup_stop because it can be called without the - * schema lock held. + * Release and free all btree handles held by the backup. */ static int __backup_cleanup_handles(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb) @@ -325,15 +328,18 @@ __backup_cleanup_handles(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb) * Stop a backup. */ static int -__backup_stop(WT_SESSION_IMPL *session) +__backup_stop(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; conn = S2C(session); + /* Release all btree handles held by the backup. */ + WT_TRET(__backup_cleanup_handles(session, cb)); + /* Remove any backup specific file. */ - ret = __wt_backup_file_remove(session); + WT_TRET(__wt_backup_file_remove(session)); /* Checkpoint deletion can proceed, as can the next hot backup. */ WT_TRET(__wt_writelock(session, conn->hot_backup_lock)); @@ -443,10 +449,10 @@ __wt_backup_file_remove(WT_SESSION_IMPL *session) * always know we were a source directory while there's any chance of * an incremental backup file existing. */ - WT_TRET(__wt_remove_if_exists(session, WT_BACKUP_TMP)); - WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_BACKUP)); - WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_SRC)); - WT_TRET(__wt_remove_if_exists(session, WT_METADATA_BACKUP)); + WT_TRET(__wt_remove_if_exists(session, WT_BACKUP_TMP, true)); + WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_BACKUP, true)); + WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_SRC, true)); + WT_TRET(__wt_remove_if_exists(session, WT_METADATA_BACKUP, true)); return (ret); } diff --git a/src/third_party/wiredtiger/src/cursor/cur_config.c b/src/third_party/wiredtiger/src/cursor/cur_config.c index e0d270e4245..2d3f3ffd176 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_config.c +++ b/src/third_party/wiredtiger/src/cursor/cur_config.c @@ -58,11 +58,11 @@ __wt_curconfig_open(WT_SESSION_IMPL *session, cursor->session = &session->iface; cursor->key_format = cursor->value_format = "S"; - /* __wt_cursor_init is last so we don't have to clean up on error. */ WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp)); if (0) { -err: __wt_free(session, cconfig); +err: WT_TRET(__curconfig_close(cursor)); + *cursorp = NULL; } return (ret); } diff --git a/src/third_party/wiredtiger/src/cursor/cur_ds.c b/src/third_party/wiredtiger/src/cursor/cur_ds.c index d2b8d81ab37..8d4b7a9384b 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_ds.c +++ b/src/third_party/wiredtiger/src/cursor/cur_ds.c @@ -518,10 +518,7 @@ __wt_curds_open( source->flags = 0; if (0) { -err: if (F_ISSET(cursor, WT_CURSTD_OPEN)) - WT_TRET(cursor->close(cursor)); - else - __wt_free(session, data_source); +err: WT_TRET(__curds_close(cursor)); *cursorp = NULL; } diff --git a/src/third_party/wiredtiger/src/cursor/cur_dump.c b/src/third_party/wiredtiger/src/cursor/cur_dump.c index 595915df7b7..d7f18bb61ac 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_dump.c +++ b/src/third_party/wiredtiger/src/cursor/cur_dump.c @@ -401,13 +401,13 @@ __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp) cursor->json_private = child->json_private = json; } - /* __wt_cursor_init is last so we don't have to clean up on error. */ cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_open_cursor); cfg[1] = NULL; WT_ERR(__wt_cursor_init(cursor, NULL, owner, cfg, cursorp)); if (0) { -err: __wt_free(session, cursor); +err: WT_TRET(__curdump_close(cursor)); + *cursorp = NULL; } return (ret); } diff --git a/src/third_party/wiredtiger/src/cursor/cur_file.c b/src/third_party/wiredtiger/src/cursor/cur_file.c index fac903b4770..8e7bd4bbea5 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_file.c +++ b/src/third_party/wiredtiger/src/cursor/cur_file.c @@ -388,11 +388,11 @@ err: API_END_RET(session, ret); } /* - * __wt_curfile_create -- + * __curfile_create -- * Open a cursor for a given btree handle. */ -int -__wt_curfile_create(WT_SESSION_IMPL *session, +static int +__curfile_create(WT_SESSION_IMPL *session, WT_CURSOR *owner, const char *cfg[], bool bulk, bool bitmap, WT_CURSOR **cursorp) { @@ -439,6 +439,13 @@ __wt_curfile_create(WT_SESSION_IMPL *session, cursor->value_format = btree->value_format; cbt->btree = btree; + /* + * Increment the data-source's in-use counter; done now because closing + * the cursor will decrement it, and all failure paths from here close + * the cursor. + */ + __wt_cursor_dhandle_incr_use(session); + if (session->dhandle->checkpoint != NULL) F_SET(cbt, WT_CBT_NO_TXN); @@ -478,7 +485,6 @@ __wt_curfile_create(WT_SESSION_IMPL *session, /* Underlying btree initialization. */ __wt_btcur_open(cbt); - /* __wt_cursor_init is last so we don't have to clean up on error. */ WT_ERR(__wt_cursor_init( cursor, cursor->internal_uri, owner, cfg, cursorp)); @@ -486,7 +492,8 @@ __wt_curfile_create(WT_SESSION_IMPL *session, WT_STAT_FAST_DATA_INCR(session, cursor_create); if (0) { -err: __wt_free(session, cbt); +err: WT_TRET(__curfile_close(cursor)); + *cursorp = NULL; } return (ret); @@ -503,9 +510,10 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CONFIG_ITEM cval; WT_DECL_RET; uint32_t flags; - bool bitmap, bulk; + bool bitmap, bulk, checkpoint_wait; bitmap = bulk = false; + checkpoint_wait = true; flags = 0; /* @@ -531,6 +539,12 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, else if (!WT_STRING_MATCH("unordered", cval.str, cval.len)) WT_RET_MSG(session, EINVAL, "Value for 'bulk' must be a boolean or 'bitmap'"); + + if (bulk) { + WT_RET(__wt_config_gets(session, + cfg, "checkpoint_wait", &cval)); + checkpoint_wait = cval.val != 0; + } } /* Bulk handles require exclusive access. */ @@ -540,11 +554,11 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, /* Get the handle and lock it while the cursor is using it. */ if (WT_PREFIX_MATCH(uri, "file:")) { /* - * If we are opening exclusive, get the handle while holding - * the checkpoint lock. This prevents a bulk cursor open - * failing with EBUSY due to a database-wide checkpoint. + * If we are opening exclusive and don't want a bulk cursor + * open to fail with EBUSY due to a database-wide checkpoint, + * get the handle while holding the checkpoint lock. */ - if (LF_ISSET(WT_DHANDLE_EXCLUSIVE)) + if (LF_ISSET(WT_DHANDLE_EXCLUSIVE) && checkpoint_wait) WT_WITH_CHECKPOINT_LOCK(session, ret, ret = __wt_session_get_btree_ckpt( session, uri, cfg, flags)); @@ -555,10 +569,8 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, } else WT_RET(__wt_bad_object_type(session, uri)); - WT_ERR(__wt_curfile_create(session, owner, cfg, bulk, bitmap, cursorp)); + WT_ERR(__curfile_create(session, owner, cfg, bulk, bitmap, cursorp)); - /* Increment the data-source's in-use counter. */ - __wt_cursor_dhandle_incr_use(session); return (0); err: /* If the cursor could not be opened, release the handle. */ diff --git a/src/third_party/wiredtiger/src/cursor/cur_index.c b/src/third_party/wiredtiger/src/cursor/cur_index.c index 6de68d86778..82a27d65ce6 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_index.c +++ b/src/third_party/wiredtiger/src/cursor/cur_index.c @@ -263,19 +263,57 @@ err: F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); static int __curindex_search_near(WT_CURSOR *cursor, int *exact) { + WT_CURSOR *child; WT_CURSOR_INDEX *cindex; WT_DECL_RET; + WT_ITEM found_key; WT_SESSION_IMPL *session; + int cmp; cindex = (WT_CURSOR_INDEX *)cursor; - JOINABLE_CURSOR_API_CALL(cursor, session, search_near, NULL); - __wt_cursor_set_raw_key(cindex->child, &cursor->key); - if ((ret = cindex->child->search_near(cindex->child, exact)) == 0) - ret = __curindex_move(cindex); - else - F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); + child = cindex->child; + JOINABLE_CURSOR_API_CALL(cursor, session, search, NULL); -err: API_END_RET(session, ret); + /* + * We are searching using the application-specified key, which + * (usually) doesn't contain the primary key, so it is just a prefix of + * any matching index key. That said, if there is an exact match, we + * want to find the first matching index entry and set exact equal to + * zero. Do a search_near, step to the next entry if we land on one + * that is too small, then check that the prefix matches. + */ + __wt_cursor_set_raw_key(child, &cursor->key); + WT_ERR(child->search_near(child, &cmp)); + + if (cmp < 0) + WT_ERR(child->next(child)); + + /* + * We expect partial matches, and want the smallest record with a key + * greater than or equal to the search key. + * + * If the key we find is shorter than the search key, it can't possibly + * match. + * + * The only way for the key to be exactly equal is if there is an index + * on the primary key, because otherwise the primary key columns will + * be appended to the index key, but we don't disallow that (odd) case. + */ + found_key = child->key; + if (found_key.size < cursor->key.size) + WT_ERR(WT_NOTFOUND); + found_key.size = cursor->key.size; + + WT_ERR(__wt_compare( + session, cindex->index->collator, &cursor->key, &found_key, exact)); + + WT_ERR(__curindex_move(cindex)); + + if (0) { +err: F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); + } + + API_END_RET(session, ret); } /* diff --git a/src/third_party/wiredtiger/src/cursor/cur_log.c b/src/third_party/wiredtiger/src/cursor/cur_log.c index 0a13803da5d..2adf0c2b8ab 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_log.c +++ b/src/third_party/wiredtiger/src/cursor/cur_log.c @@ -315,16 +315,16 @@ __curlog_close(WT_CURSOR *cursor) WT_CONNECTION_IMPL *conn; WT_CURSOR_LOG *cl; WT_DECL_RET; - WT_LOG *log; WT_SESSION_IMPL *session; CURSOR_API_CALL(cursor, session, close, NULL); cl = (WT_CURSOR_LOG *)cursor; conn = S2C(session); + WT_ASSERT(session, FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)); - log = conn->log; - WT_TRET(__wt_readunlock(session, log->log_archive_lock)); - WT_TRET(__curlog_reset(cursor)); + if (F_ISSET(cl, WT_CURLOG_ARCHIVE_LOCK)) + WT_TRET(__wt_readunlock(session, conn->log->log_archive_lock)); + __wt_free(session, cl->cur_lsn); __wt_free(session, cl->next_lsn); __wt_scr_free(session, &cl->logrec); @@ -332,6 +332,7 @@ __curlog_close(WT_CURSOR *cursor) __wt_scr_free(session, &cl->opvalue); __wt_free(session, cl->packed_key); __wt_free(session, cl->packed_value); + WT_TRET(__wt_cursor_close(cursor)); err: API_END_RET(session, ret); @@ -401,23 +402,10 @@ __wt_curlog_open(WT_SESSION_IMPL *session, /* Log cursors block archiving. */ WT_ERR(__wt_readlock(session, log->log_archive_lock)); + F_SET(cl, WT_CURLOG_ARCHIVE_LOCK); if (0) { -err: if (F_ISSET(cursor, WT_CURSTD_OPEN)) - WT_TRET(cursor->close(cursor)); - else { - __wt_free(session, cl->cur_lsn); - __wt_free(session, cl->next_lsn); - __wt_scr_free(session, &cl->logrec); - __wt_scr_free(session, &cl->opkey); - __wt_scr_free(session, &cl->opvalue); - /* - * NOTE: We cannot get on the error path with the - * readlock held. No need to unlock it unless that - * changes above. - */ - __wt_free(session, cl); - } +err: WT_TRET(__curlog_close(cursor)); *cursorp = NULL; } diff --git a/src/third_party/wiredtiger/src/cursor/cur_metadata.c b/src/third_party/wiredtiger/src/cursor/cur_metadata.c index 3d702e2ea8c..fc63ca13f7c 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_metadata.c +++ b/src/third_party/wiredtiger/src/cursor/cur_metadata.c @@ -475,9 +475,11 @@ __curmetadata_close(WT_CURSOR *cursor) mdc = (WT_CURSOR_METADATA *)cursor; file_cursor = mdc->file_cursor; CURSOR_API_CALL(cursor, session, - close, ((WT_CURSOR_BTREE *)file_cursor)->btree); + close, file_cursor == NULL ? + NULL : ((WT_CURSOR_BTREE *)file_cursor)->btree); - ret = file_cursor->close(file_cursor); + if (file_cursor != NULL) + ret = file_cursor->close(file_cursor); WT_TRET(__wt_cursor_close(cursor)); err: API_END_RET(session, ret); @@ -552,9 +554,8 @@ __wt_curmetadata_open(WT_SESSION_IMPL *session, } if (0) { -err: if (mdc->file_cursor != NULL) - WT_TRET(mdc->file_cursor->close(mdc->file_cursor)); - __wt_free(session, mdc); +err: WT_TRET(__curmetadata_close(cursor)); + *cursorp = NULL; } return (ret); } diff --git a/src/third_party/wiredtiger/src/cursor/cur_stat.c b/src/third_party/wiredtiger/src/cursor/cur_stat.c index f7a8f5fc866..5c9159a4c0b 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_stat.c +++ b/src/third_party/wiredtiger/src/cursor/cur_stat.c @@ -37,22 +37,6 @@ __curstat_print_value(WT_SESSION_IMPL *session, uint64_t v, WT_ITEM *buf) } /* - * __curstat_free_config -- - * Free the saved configuration string stack - */ -static void -__curstat_free_config(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) -{ - size_t i; - - if (cst->cfg != NULL) { - for (i = 0; cst->cfg[i] != NULL; ++i) - __wt_free(session, cst->cfg[i]); - __wt_free(session, cst->cfg); - } -} - -/* * __curstat_get_key -- * WT_CURSOR->get_key for statistics cursors. */ @@ -334,11 +318,16 @@ __curstat_close(WT_CURSOR *cursor) WT_CURSOR_STAT *cst; WT_DECL_RET; WT_SESSION_IMPL *session; + size_t i; cst = (WT_CURSOR_STAT *)cursor; CURSOR_API_CALL(cursor, session, close, NULL); - __curstat_free_config(session, cst); + if (cst->cfg != NULL) { + for (i = 0; cst->cfg[i] != NULL; ++i) + __wt_free(session, cst->cfg[i]); + __wt_free(session, cst->cfg); + } __wt_buf_free(session, &cst->pv); __wt_free(session, cst->desc_buf); @@ -691,7 +680,6 @@ __wt_curstat_open(WT_SESSION_IMPL *session, /* The cursor isn't yet positioned. */ cst->notpositioned = true; - /* __wt_cursor_init is last so we don't have to clean up on error. */ WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp)); if (0) { @@ -701,8 +689,8 @@ config_err: WT_ERR_MSG(session, EINVAL, } if (0) { -err: __curstat_free_config(session, cst); - __wt_free(session, cst); +err: WT_TRET(__curstat_close(cursor)); + *cursorp = NULL; } return (ret); diff --git a/src/third_party/wiredtiger/src/cursor/cur_table.c b/src/third_party/wiredtiger/src/cursor/cur_table.c index 6d50523043a..a14b40a1150 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_table.c +++ b/src/third_party/wiredtiger/src/cursor/cur_table.c @@ -525,15 +525,20 @@ __curtable_insert(WT_CURSOR *cursor) } /* - * WT_CURSOR.insert doesn't leave the cursor positioned, and the - * application may want to free the memory used to configure the - * insert; don't read that memory again (matching the underlying - * file object cursor insert semantics). + * Insert is the one cursor operation that doesn't end with the cursor + * pointing to an on-page item (except for column-store appends, where + * we are returning a key). That is, the application's cursor continues + * to reference the application's memory after a successful cursor call, + * which isn't true anywhere else. We don't want to have to explain that + * scoping corner case, so we reset the application's cursor so it can + * free the referenced memory and continue on without risking subsequent + * core dumps. */ F_CLR(primary, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + if (F_ISSET(primary, WT_CURSTD_APPEND)) + F_SET(primary, WT_CURSTD_KEY_INT); err: CURSOR_UPDATE_API_END(session, ret); - return (ret); } diff --git a/src/third_party/wiredtiger/src/docs/security.dox b/src/third_party/wiredtiger/src/docs/security.dox index 331f74d969b..82e13ae7ad3 100644 --- a/src/third_party/wiredtiger/src/docs/security.dox +++ b/src/third_party/wiredtiger/src/docs/security.dox @@ -2,10 +2,23 @@ @section directory_permissions Database directory permissions -All WiredTiger files are stored in the database home directory, and the -WiredTiger database directory should have its permissions set to ensure -database objects are not accessible to users without appropriate -permissions. See @ref home for more information. +By default, WiredTiger files are stored beneath the database home directory. +The WiredTiger database directory should have its permissions set to ensure +database objects are not accessible to users without appropriate permissions. +See @ref home for more information. + +@section absolute_path Absolute paths + +WiredTiger prepends the name of the database home to file names which +do not appear to be absolute paths. (The absolute path test is +simplistic, matching a leading slash character on POSIX systems or a +leading alphabetic character and colon on Windows.) No file path +sanitization or validation is done by WiredTiger, for example, file +paths may match universal naming conventions (UNC), or include \c "../" +(dot dot slash) components. + +Applications are responsible for validating user-supplied file paths as +necessary to prevent directory traversal attacks. @section file_permissions File permissions diff --git a/src/third_party/wiredtiger/src/docs/spell.ok b/src/third_party/wiredtiger/src/docs/spell.ok index 96fe04d7426..e08eb7d1447 100644 --- a/src/third_party/wiredtiger/src/docs/spell.ok +++ b/src/third_party/wiredtiger/src/docs/spell.ok @@ -80,6 +80,7 @@ SCons Seward's SiH TXT +UNC URIs WILLNEED WiredTiger @@ -368,6 +369,7 @@ php png posix pre +prepends primary's printf printlog @@ -411,6 +413,7 @@ runtime rwlock sHQ sHq +sanitization scalable scanf schemas diff --git a/src/third_party/wiredtiger/src/docs/statistics.dox b/src/third_party/wiredtiger/src/docs/statistics.dox index 0a29e351e4e..36ce2711dc5 100644 --- a/src/third_party/wiredtiger/src/docs/statistics.dox +++ b/src/third_party/wiredtiger/src/docs/statistics.dox @@ -90,11 +90,20 @@ cursor. @section statistics_log Statistics logging -WiredTiger will optionally log database statistics into a file when the +WiredTiger will optionally log database statistics into files when the the ::wiredtiger_open \c statistics_log configuration is set. -The resulting statistics can be displayed using the \c wtstats visualization -tool. For more information, see @ref_single wtstats. +The log files are named \c WiredTiger.%%d.%%H, where \c %%d is replaced +with the day of the month as a decimal number (01-31), and \c %%H +is replaced by the hour (24-hour clock) as a decimal number (00-23). +Each log file contains the statistics for the hour specified in its name. + +The location of the log files may be changed with the \c statistics_log.path +configuration string. + +The resulting statistics can be displayed and interactively examined +using the \c wtstats visualization tool. For more information, see +@ref_single wtstats. The following example logs statistics every 30 seconds: @@ -120,7 +129,7 @@ Statistics for all underlying data sources of a particular type may be included by adding a partial data source URI to the \c statistics_log configuration string: -@snippet ex_all.c Statistics logging with all tables +@snippet ex_all.c Statistics logging with a source type When database statistics are logged, the database home will be the first space-separated entry for each record in the log file. For example: @@ -151,23 +160,9 @@ currently open in the database, nor will any statistics requiring the traversal of a tree (as if the \c statistics_fast configuration string were set). -The location of the log files may be changed with the \c statistics_log.path -configuration string. The \c path value value may contain ISO C90 standard -strftime conversion specifications. WiredTiger will not create non-existent -directories in the path, they must exist before ::wiredtiger_open is called. - -The following example logs statistics into files named with the month, -day and year: - -@snippet ex_all.c Statistics logging with path - A Python script that parses the default logging output and uses the <a href="http://www.gnuplot.info/">gnuplot</a>, utility to generate Portable Network Graphics (PNG) format graphs is included in the WiredTiger distribution in the file \c tools/statlog.py. -@m_if{c} -To interactively examine statistics results, see @ref wtstats. -@m_endif - */ diff --git a/src/third_party/wiredtiger/src/docs/upgrading.dox b/src/third_party/wiredtiger/src/docs/upgrading.dox index 5e824fee977..9d3d2239bb4 100644 --- a/src/third_party/wiredtiger/src/docs/upgrading.dox +++ b/src/third_party/wiredtiger/src/docs/upgrading.dox @@ -1,5 +1,48 @@ /*! @page upgrading Upgrading WiredTiger applications +@section version_281 Upgrading to Version 2.8.1 +<dl> +<dt>Cache management defaults</dt> +<dd> +The default values for the \c eviction_dirty_target and \c +eviction_dirty_trigger settings to ::wiredtiger_open have changed to 5 and 20, +respectively. This means that by default, WiredTiger will start writing dirty +pages from cache when it becomes 5% dirty and will throttle activity to keep +the volume of dirty data in cache under 20%. For write-heavy workloads, the +new defaults may result in lower throughput and more threads writing to data +files concurrently. + +These settings also now determine how much work is done at the beginning of a +checkpoint to make the critical section of checkpoints complete more quickly. +</dd> + +<dt>Checkpoint server created checkpoint names</dt> +<dd> +The ::wiredtiger_open checkpoint configuration no longer supports the +\c name configuration, and checkpoint server created checkpoints will +always be named the default WiredTiger checkpoint name, +"WiredTigerCheckpoint". Applications depending on the ability to set the +checkpoint name for the checkpoint server will require modification. +</dd> + +<dt>Statistics logging path</dt> +<dd> +The ::wiredtiger_open statistics logging path configuration has been +simplified to be only a path to a directory, and the file name component +of the path may no longer be specified. Applications depending on the +ability to set statistics log file names will require modification. +</dd> + +<dt>Deprecated statistics field</dt> +<dd> +The connection statistic \c WT_STAT_CONN_CACHE_BYTES_OVERFLOW has been +removed. Overflow information is now available in the +\c WT_STAT_CONN_CACHE_BYTES_OVERFLOW and \c WT_STAT_CONN_CACHE_OVERFLOW_VALUE. +Applications specifically looking for that statistic will require +modification. +</dd> + +</dl><hr> @section version_280 Upgrading to Version 2.8.0 <dl> <dt>LSM metadata</dt> @@ -55,7 +98,6 @@ The WiredTiger public API used to define a structure that could encapsulate log sequence numbers. That structure is no longer exposed publicly. </dd> -<dt> </dl><hr> @section version_270 Upgrading to Version 2.7.0 diff --git a/src/third_party/wiredtiger/src/docs/wtperf.dox b/src/third_party/wiredtiger/src/docs/wtperf.dox index e06272d117c..17b95660f79 100644 --- a/src/third_party/wiredtiger/src/docs/wtperf.dox +++ b/src/third_party/wiredtiger/src/docs/wtperf.dox @@ -210,6 +210,8 @@ if non zero choose a value from within this range as the key for insert operations @par random_value (boolean, default=false) generate random content for the value +@par range_partition (boolean, default=false) +partition data by range (vs hash) @par read_range (unsigned int, default=0) scan a range of keys after each search @par readonly (boolean, default=false) diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index 8ea487bbf83..7d3fd838dcd 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -17,7 +17,7 @@ static int __evict_page(WT_SESSION_IMPL *, bool); static int __evict_pass(WT_SESSION_IMPL *); static int __evict_server(WT_SESSION_IMPL *, bool *); static int __evict_walk(WT_SESSION_IMPL *, uint32_t); -static int __evict_walk_file(WT_SESSION_IMPL *, uint32_t, u_int *); +static int __evict_walk_file(WT_SESSION_IMPL *, uint32_t, u_int, u_int *); /* * __evict_read_gen -- @@ -31,11 +31,6 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry) uint64_t read_gen; btree = entry->btree; - - /* Never prioritize empty slots. */ - if (entry->ref == NULL) - return (UINT64_MAX); - page = entry->ref->page; /* Any page set to the oldest generation should be discarded. */ @@ -70,14 +65,15 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry) * Qsort function: sort the eviction array. */ static int WT_CDECL -__evict_lru_cmp(const void *a, const void *b) +__evict_lru_cmp(const void *a_arg, const void *b_arg) { - uint64_t a_lru, b_lru; + const WT_EVICT_ENTRY *a = a_arg, *b = b_arg; + uint64_t a_score, b_score; - a_lru = __evict_read_gen(a); - b_lru = __evict_read_gen(b); + a_score = (a->ref == NULL ? UINT64_MAX : a->score); + b_score = (b->ref == NULL ? UINT64_MAX : b->score); - return ((a_lru < b_lru) ? -1 : (a_lru == b_lru) ? 0 : 1); + return ((a_score < b_score) ? -1 : (a_score == b_score) ? 0 : 1); } /* @@ -542,54 +538,49 @@ __evict_update_work(WT_SESSION_IMPL *session) cache->evict_max_refs_per_file = WT_MAX(100, WT_MILLION / (conn->open_file_count + 1)); + if (cache->evict_queues[WT_EVICT_URGENT_QUEUE].evict_current != NULL) + FLD_SET(cache->state, WT_EVICT_STATE_URGENT); + /* - * Page eviction overrides the dirty target and other types of eviction, - * that is, we don't care where we are with respect to the dirty target - * if page eviction is configured. + * If we need space in the cache, try to find clean pages to evict. * * Avoid division by zero if the cache size has not yet been set in a * shared cache. */ bytes_max = conn->cache_size + 1; bytes_inuse = __wt_cache_bytes_inuse(cache); - if (bytes_inuse > (cache->eviction_target * bytes_max) / 100) { - FLD_SET(cache->state, WT_EVICT_PASS_ALL); - goto done; - } + if (bytes_inuse > (cache->eviction_target * bytes_max) / 100) + FLD_SET(cache->state, WT_EVICT_STATE_CLEAN); /* - * If the cache has been stuck and is now under control, clear the - * stuck flag. + * Scrub dirty pages and keep them in cache if we are less than half + * way between the cache target and trigger. */ - if (bytes_inuse < bytes_max) - F_CLR(cache, WT_CACHE_STUCK); + if (bytes_inuse < ((cache->eviction_target + cache->eviction_trigger) * + bytes_max) / 200) + FLD_SET(cache->state, WT_EVICT_STATE_SCRUB); - dirty_inuse = __wt_cache_dirty_inuse(cache); - if (dirty_inuse > (cache->eviction_dirty_target * bytes_max) / 100) { - FLD_SET(cache->state, WT_EVICT_PASS_DIRTY); - goto done; - } + dirty_inuse = __wt_cache_dirty_leaf_inuse(cache); + if (dirty_inuse > (cache->eviction_dirty_target * bytes_max) / 100) + FLD_SET(cache->state, WT_EVICT_STATE_DIRTY); /* - * Evict pages with oldest generation (which would otherwise block - * application threads), set regardless of whether we have reached - * the eviction trigger. + * If the cache has been stuck and is now under control, clear the + * stuck flag. */ - if (F_ISSET(cache, WT_CACHE_WOULD_BLOCK)) { - FLD_SET(cache->state, WT_EVICT_PASS_WOULD_BLOCK); - - F_CLR(cache, WT_CACHE_WOULD_BLOCK); - goto done; - } - - return (false); + if (bytes_inuse < bytes_max && + dirty_inuse < (cache->eviction_dirty_trigger * bytes_max) / 100) + F_CLR(cache, WT_CACHE_STUCK); -done: if (F_ISSET(cache, WT_CACHE_STUCK)) { + if (F_ISSET(cache, WT_CACHE_STUCK)) { + WT_ASSERT(session, cache->state != 0); WT_STAT_FAST_CONN_SET(session, cache_eviction_aggressive_set, 1); - FLD_SET(cache->state, WT_EVICT_PASS_AGGRESSIVE); + FLD_SET(cache->state, WT_EVICT_STATE_AGGRESSIVE); } - return (true); + + return (FLD_ISSET(cache->state, + WT_EVICT_STATE_ALL | WT_EVICT_STATE_URGENT)); } /* @@ -603,7 +594,7 @@ __evict_pass(WT_SESSION_IMPL *session) WT_CONNECTION_IMPL *conn; WT_EVICT_WORKER *worker; uint64_t pages_evicted; - int loop; + u_int loop; conn = S2C(session); cache = conn->cache; @@ -647,15 +638,14 @@ __evict_pass(WT_SESSION_IMPL *session) if (loop > 10) { WT_STAT_FAST_CONN_SET(session, cache_eviction_aggressive_set, 1); - FLD_SET(cache->state, WT_EVICT_PASS_AGGRESSIVE); + FLD_SET(cache->state, WT_EVICT_STATE_AGGRESSIVE); } /* * Start a worker if we have capacity and we haven't reached * the eviction targets. */ - if (FLD_ISSET(cache->state, WT_EVICT_PASS_ALL | - WT_EVICT_PASS_DIRTY | WT_EVICT_PASS_WOULD_BLOCK) && + if (FLD_ISSET(cache->state, WT_EVICT_STATE_ALL) && conn->evict_workers < conn->evict_workers_max) { WT_RET(__wt_verbose(session, WT_VERB_EVICTSERVER, "Starting evict worker: %"PRIu32"\n", @@ -671,7 +661,8 @@ __evict_pass(WT_SESSION_IMPL *session) WT_RET(__wt_verbose(session, WT_VERB_EVICTSERVER, "Eviction pass with: Max: %" PRIu64 " In use: %" PRIu64 " Dirty: %" PRIu64, - conn->cache_size, cache->bytes_inmem, cache->bytes_dirty)); + conn->cache_size, cache->bytes_inmem, + cache->bytes_dirty_intl + cache->bytes_dirty_leaf)); WT_RET(__evict_lru_walk(session)); WT_RET_NOTFOUND_OK(__evict_lru_pages(session, true)); @@ -682,29 +673,32 @@ __evict_pass(WT_SESSION_IMPL *session) * sleep, it's not something we can fix. */ if (pages_evicted == cache->pages_evict) { - WT_STAT_FAST_CONN_INCR(session, - cache_eviction_server_slept); /* * Back off if we aren't making progress: walks hold - * the handle list lock, which blocks other operations - * that can free space in cache, such as LSM discarding + * the handle list lock, blocking other operations that + * can free space in cache, such as LSM discarding * handles. + * + * Allow this wait to be interrupted (e.g. if a + * checkpoint completes): make sure we wait for a + * non-zero number of microseconds). */ - __wt_sleep(0, WT_THOUSAND * (uint64_t)loop); + WT_STAT_FAST_CONN_INCR(session, + cache_eviction_server_slept); + WT_RET(__wt_cond_wait(session, + cache->evict_cond, WT_THOUSAND * WT_MAX(loop, 1))); + if (loop == 100) { /* * Mark the cache as stuck if we need space * and aren't evicting any pages. */ - if (!FLD_ISSET(cache->state, - WT_EVICT_PASS_WOULD_BLOCK)) { - F_SET(cache, WT_CACHE_STUCK); - WT_STAT_FAST_CONN_INCR( - session, cache_eviction_slow); - WT_RET(__wt_verbose( - session, WT_VERB_EVICTSERVER, - "unable to reach eviction goal")); - } + F_SET(cache, WT_CACHE_STUCK); + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_slow); + WT_RET(__wt_verbose( + session, WT_VERB_EVICTSERVER, + "unable to reach eviction goal")); break; } } else { @@ -927,26 +921,29 @@ __evict_lru_walk(WT_SESSION_IMPL *session) { WT_CACHE *cache; WT_DECL_RET; - WT_EVICT_QUEUE *evict_queue; - uint64_t cutoff, read_gen_oldest; + WT_EVICT_QUEUE *queue; + uint64_t read_gen_oldest; uint32_t candidates, entries, queue_index; cache = S2C(session)->cache; - queue_index = cache->evict_queue_fill++ % WT_EVICT_QUEUE_MAX; - evict_queue = &cache->evict_queues[queue_index]; + /* Fill the next queue (that isn't the urgent queue). */ + queue_index = + 1 + (cache->evict_queue_fill++ % (WT_EVICT_QUEUE_MAX - 1)); + queue = &cache->evict_queues[queue_index]; + /* Get some more pages to consider for eviction. */ if ((ret = __evict_walk(cache->walk_session, queue_index)) != 0) return (ret == EBUSY ? 0 : ret); /* Sort the list into LRU order and restart. */ - __wt_spin_lock(session, &evict_queue->evict_lock); + __wt_spin_lock(session, &queue->evict_lock); - entries = evict_queue->evict_entries; - qsort(evict_queue->evict_queue, + entries = queue->evict_entries; + qsort(queue->evict_queue, entries, sizeof(WT_EVICT_ENTRY), __evict_lru_cmp); - while (entries > 0 && evict_queue->evict_queue[entries - 1].ref == NULL) + while (entries > 0 && queue->evict_queue[entries - 1].ref == NULL) --entries; /* @@ -956,9 +953,9 @@ __evict_lru_walk(WT_SESSION_IMPL *session) */ while (entries > WT_EVICT_WALK_BASE) __evict_list_clear(session, - &evict_queue->evict_queue[--entries]); + &queue->evict_queue[--entries]); - evict_queue->evict_entries = entries; + queue->evict_entries = entries; if (entries == 0) { /* @@ -966,23 +963,19 @@ __evict_lru_walk(WT_SESSION_IMPL *session) * Make sure application threads don't read past the end of the * candidate list, or they may race with the next walk. */ - evict_queue->evict_candidates = 0; - __wt_spin_unlock(session, &evict_queue->evict_lock); - __wt_spin_lock(session, &cache->evict_queue_lock); - cache->evict_current = NULL; - cache->evict_current_queue = NULL; - __wt_spin_unlock(session, &cache->evict_queue_lock); + queue->evict_candidates = 0; + queue->evict_current = NULL; + __wt_spin_unlock(session, &queue->evict_lock); return (0); } /* Decide how many of the candidates we're going to try and evict. */ - if (FLD_ISSET(cache->state, - WT_EVICT_PASS_AGGRESSIVE | WT_EVICT_PASS_WOULD_BLOCK)) { + if (FLD_ISSET(cache->state, WT_EVICT_STATE_AGGRESSIVE)) { /* * Take all candidates if we only gathered pages with an oldest * read generation set. */ - evict_queue->evict_candidates = entries; + queue->evict_candidates = entries; } else { /* * Find the oldest read generation we have in the queue, used @@ -992,8 +985,7 @@ __evict_lru_walk(WT_SESSION_IMPL *session) read_gen_oldest = WT_READGEN_OLDEST; for (candidates = 0; candidates < entries; ++candidates) { read_gen_oldest = - __evict_read_gen( - &evict_queue->evict_queue[candidates]); + queue->evict_queue[candidates].score; if (read_gen_oldest != WT_READGEN_OLDEST) break; } @@ -1002,51 +994,45 @@ __evict_lru_walk(WT_SESSION_IMPL *session) * Take all candidates if we only gathered pages with an oldest * read generation set. * - * We normally never take more than 50% of the entries; if 50% - * of the entries were at the oldest read generation, take them. + * We normally never take more than 50% of the entries but if + * 50% of the entries were at the oldest read generation, take + * all of them. */ if (read_gen_oldest == WT_READGEN_OLDEST) - evict_queue->evict_candidates = entries; - else if (candidates >= entries / 2) - evict_queue->evict_candidates = candidates; + queue->evict_candidates = entries; + else if (candidates > entries / 2) + queue->evict_candidates = candidates; else { - /* Save the calculated oldest generation. */ - cache->read_gen_oldest = read_gen_oldest; - - /* Find the bottom 25% of read generations. */ - cutoff = - (3 * read_gen_oldest + __evict_read_gen( - &evict_queue->evict_queue[entries - 1])) / 4; - /* - * Don't take less than 10% or more than 50% of entries, - * regardless. That said, if there is only one entry, - * which is normal when populating an empty file, don't - * exclude it. + * Take all of the urgent pages plus a third of + * ordinary candidates (which could be expressed as + * WT_EVICT_WALK_INCR / WT_EVICT_WALK_BASE). In the + * steady state, we want to get as many candidates as + * the eviction walk adds to the queue. + * + * That said, if there is only one entry, which is + * normal when populating an empty file, don't exclude + * it. */ - for (candidates = 1 + entries / 10; - candidates < entries / 2; - candidates++) - if (__evict_read_gen( - &evict_queue->evict_queue[candidates]) > - cutoff) - break; - evict_queue->evict_candidates = candidates; + queue->evict_candidates = + 1 + candidates + ((entries - candidates) - 1) / 3; + cache->read_gen_oldest = read_gen_oldest; } } - __wt_spin_unlock(session, &evict_queue->evict_lock); + queue->evict_current = queue->evict_queue; + __wt_spin_unlock(session, &queue->evict_lock); + /* * Now we can set the next queue. */ __wt_spin_lock(session, &cache->evict_queue_lock); - if (cache->evict_current == NULL) + if (cache->evict_current_queue->evict_current == NULL) WT_STAT_FAST_CONN_INCR(session, cache_eviction_queue_empty); else WT_STAT_FAST_CONN_INCR(session, cache_eviction_queue_not_empty); - cache->evict_current = evict_queue->evict_queue; - cache->evict_current_queue = evict_queue; + cache->evict_current_queue = queue; __wt_spin_unlock(session, &cache->evict_queue_lock); /* @@ -1070,9 +1056,8 @@ __evict_walk(WT_SESSION_IMPL *session, uint32_t queue_index) WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle; WT_DECL_RET; - WT_EVICT_QUEUE *evict_queue; - u_int max_entries, prev_slot, retries; - u_int slot, start_slot, spins; + WT_EVICT_QUEUE *queue; + u_int max_entries, prev_slot, retries, slot, start_slot, spins; bool dhandle_locked, incr; conn = S2C(session); @@ -1086,9 +1071,9 @@ __evict_walk(WT_SESSION_IMPL *session, uint32_t queue_index) * Set the starting slot in the queue and the maximum pages added * per walk. */ - evict_queue = &cache->evict_queues[queue_index]; - start_slot = slot = evict_queue->evict_entries; - max_entries = slot + WT_EVICT_WALK_INCR; + queue = &cache->evict_queues[queue_index]; + start_slot = slot = queue->evict_entries; + max_entries = WT_MIN(slot + WT_EVICT_WALK_INCR, cache->evict_slots); retry: while (slot < max_entries && ret == 0) { /* @@ -1158,7 +1143,7 @@ retry: while (slot < max_entries && ret == 0) { */ if ((btree->checkpointing != WT_CKPT_OFF || btree->evict_priority != 0) && - !FLD_ISSET(cache->state, WT_EVICT_PASS_AGGRESSIVE)) + !FLD_ISSET(cache->state, WT_EVICT_STATE_AGGRESSIVE)) continue; /* Skip files if we have used all available hazard pointers. */ @@ -1171,7 +1156,6 @@ retry: while (slot < max_entries && ret == 0) { * useful in the past. */ if (btree->evict_walk_period != 0 && - evict_queue->evict_entries >= WT_EVICT_WALK_INCR && btree->evict_walk_skips++ < btree->evict_walk_period) continue; btree->evict_walk_skips = 0; @@ -1197,8 +1181,8 @@ retry: while (slot < max_entries && ret == 0) { if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) { cache->evict_file_next = dhandle; WT_WITH_DHANDLE(session, dhandle, - ret = __evict_walk_file( - session, queue_index, &slot)); + ret = __evict_walk_file(session, + queue_index, max_entries, &slot)); WT_ASSERT(session, session->split_gen == 0); } __wt_spin_unlock(session, &cache->evict_walk_lock); @@ -1234,39 +1218,49 @@ retry: while (slot < max_entries && ret == 0) { if (cache->pass_intr == 0 && ret == 0 && slot < max_entries && (retries < 2 || (retries < 10 && - !FLD_ISSET(cache->state, WT_EVICT_PASS_WOULD_BLOCK) && - (slot == evict_queue->evict_entries || slot > start_slot)))) { + (slot == queue->evict_entries || slot > start_slot)))) { start_slot = slot; ++retries; goto retry; } - evict_queue->evict_entries = slot; + queue->evict_entries = slot; return (ret); } /* - * __evict_init_candidate -- + * __evict_push_candidate -- * Initialize a WT_EVICT_ENTRY structure with a given page. */ -static void -__evict_init_candidate(WT_SESSION_IMPL *session, - WT_EVICT_QUEUE *evict_queue, WT_EVICT_ENTRY *evict, WT_REF *ref) +static bool +__evict_push_candidate(WT_SESSION_IMPL *session, + WT_EVICT_QUEUE *queue, WT_EVICT_ENTRY *evict, WT_REF *ref) { u_int slot; + uint8_t orig_flags, new_flags; + + /* + * Threads can race to queue a page (e.g., an ordinary LRU walk can + * race with a page being queued for urgent eviction. + */ + orig_flags = new_flags = ref->page->flags_atomic; + FLD_SET(new_flags, WT_PAGE_EVICT_LRU); + if (orig_flags == new_flags || + !__wt_atomic_cas8(&ref->page->flags_atomic, orig_flags, new_flags)) + return (false); /* Keep track of the maximum slot we are using. */ - slot = (u_int)(evict - evict_queue->evict_queue); - if (slot >= evict_queue->evict_max) - evict_queue->evict_max = slot + 1; + slot = (u_int)(evict - queue->evict_queue); + if (slot >= queue->evict_max) + queue->evict_max = slot + 1; if (evict->ref != NULL) __evict_list_clear(session, evict); - evict->ref = ref; - evict->btree = S2BT(session); - /* Mark the page on the list; set last to flush the other updates. */ - F_SET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU); + evict->btree = S2BT(session); + evict->ref = ref; + evict->score = __evict_read_gen(evict); + return (true); } /* @@ -1274,34 +1268,73 @@ __evict_init_candidate(WT_SESSION_IMPL *session, * Get a few page eviction candidates from a single underlying file. */ static int -__evict_walk_file(WT_SESSION_IMPL *session, uint32_t queue_index, u_int *slotp) +__evict_walk_file(WT_SESSION_IMPL *session, + uint32_t queue_index, u_int max_entries, u_int *slotp) { WT_BTREE *btree; WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_EVICT_ENTRY *end, *evict, *start; - WT_EVICT_QUEUE *evict_queue; + WT_EVICT_QUEUE *queue; WT_PAGE *page; WT_PAGE_MODIFY *mod; WT_REF *ref; + uint64_t btree_inuse, bytes_per_slot, cache_inuse; uint64_t pages_seen, refs_walked; - uint32_t walk_flags; + uint32_t remaining_slots, target_pages, total_slots, walk_flags; int internal_pages, restarts; bool enough, modified; conn = S2C(session); btree = S2BT(session); cache = conn->cache; - evict_queue = &cache->evict_queues[queue_index]; + queue = &cache->evict_queues[queue_index]; internal_pages = restarts = 0; enough = false; - start = evict_queue->evict_queue + *slotp; - end = start + WT_EVICT_WALK_PER_FILE; + /* + * Figure out how many slots to fill from this tree. + * Note that some care is taken in the calculation to avoid overflow. + */ + start = queue->evict_queue + *slotp; + remaining_slots = max_entries - *slotp; + btree_inuse = __wt_btree_bytes_inuse(session); + cache_inuse = __wt_cache_bytes_inuse(cache); + total_slots = max_entries - queue->evict_entries; + + /* + * The target number of pages for this tree is proportional to the + * space it is taking up in cache. Round to the nearest number of + * slots so we assign all of the slots to a tree filling 99+% of the + * cache (and only have to walk it once). + */ + bytes_per_slot = cache_inuse / total_slots; + target_pages = (uint32_t)( + (btree_inuse + bytes_per_slot / 2) / bytes_per_slot); + if (target_pages == 0) { + /* + * Randomly walk trees with a tiny fraction of the cache in + * case there are so many trees that none of them use enough of + * the cache to be allocated slots. + * + * Map a random number into the range [0..1], and if the result + * is greater than the fraction of the cache used by this tree, + * give up. In other words, there is a small chance we will + * visit trees that use a small fraction of the cache. Arrange + * this calculation to avoid overflow (e.g., don't multiply + * anything by UINT32_MAX). + */ + if (__wt_random(&session->rnd) / (double)UINT32_MAX > + btree_inuse / (double)cache_inuse) + return (0); + target_pages = 10; + } + if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD) || - end > evict_queue->evict_queue + cache->evict_slots) - end = evict_queue->evict_queue + cache->evict_slots; + target_pages > remaining_slots) + target_pages = remaining_slots; + end = start + target_pages; walk_flags = WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT; @@ -1352,14 +1385,11 @@ __evict_walk_file(WT_SESSION_IMPL *session, uint32_t queue_index, u_int *slotp) /* * It's possible (but unlikely) to visit a page without a read * generation, if we race with the read instantiating the page. - * Ignore those pages, but set the page's read generation here - * to ensure a bug doesn't somehow leave a page without a read - * generation. + * Set the page's read generation here to ensure a bug doesn't + * somehow leave a page without a read generation. */ - if (page->read_gen == WT_READGEN_NOTSET) { + if (page->read_gen == WT_READGEN_NOTSET) __wt_cache_read_gen_new(session, page); - continue; - } /* Pages we no longer need (clean or dirty), are found money. */ if (page->read_gen == WT_READGEN_OLDEST) { @@ -1367,26 +1397,23 @@ __evict_walk_file(WT_SESSION_IMPL *session, uint32_t queue_index, u_int *slotp) session, cache_eviction_pages_queued_oldest); goto fast; } + if (__wt_page_is_empty(page) || - F_ISSET(session->dhandle, WT_DHANDLE_DEAD)) + F_ISSET(session->dhandle, WT_DHANDLE_DEAD) || + FLD_ISSET(cache->state, WT_EVICT_STATE_AGGRESSIVE)) goto fast; /* Skip clean pages if appropriate. */ if (!modified && (F_ISSET(conn, WT_CONN_IN_MEMORY) || - FLD_ISSET(cache->state, WT_EVICT_PASS_DIRTY))) + !FLD_ISSET(cache->state, WT_EVICT_STATE_CLEAN))) continue; - /* - * If we are only trickling out pages marked for definite - * eviction, skip anything that isn't marked. - */ - if (FLD_ISSET(cache->state, WT_EVICT_PASS_WOULD_BLOCK) && - page->memory_footprint < btree->splitmempage) + /* Skip dirty pages if appropriate. */ + if (modified && !FLD_ISSET(cache->state, WT_EVICT_STATE_DIRTY)) continue; - /* Limit internal pages to 50% unless we get aggressive. */ + /* Limit internal pages to 50% of the total. */ if (WT_PAGE_IS_INTERNAL(page) && - !FLD_ISSET(cache->state, WT_EVICT_PASS_AGGRESSIVE) && internal_pages >= (int)(evict - start) / 2) continue; @@ -1410,8 +1437,7 @@ fast: /* If the page can't be evicted, give up. */ * configure lookaside table writes in reconciliation, allowing * us to evict pages we can't usually evict. */ - if (!FLD_ISSET(cache->state, - WT_EVICT_PASS_AGGRESSIVE | WT_EVICT_PASS_WOULD_BLOCK)) { + if (!FLD_ISSET(cache->state, WT_EVICT_STATE_AGGRESSIVE)) { /* * If the page is clean but has modifications that * appear too new to evict, skip it. @@ -1422,7 +1448,8 @@ fast: /* If the page can't be evicted, give up. */ } WT_ASSERT(session, evict->ref == NULL); - __evict_init_candidate(session, evict_queue, evict, ref); + if (!__evict_push_candidate(session, queue, evict, ref)) + continue; ++evict; if (WT_PAGE_IS_INTERNAL(page)) @@ -1479,19 +1506,21 @@ __evict_check_entry_size(WT_SESSION_IMPL *session, WT_EVICT_ENTRY *entry) cache = S2C(session)->cache; - if (cache->pages_evict == 0) + if (cache->pages_evict == 0 || cache->bytes_evict < WT_MEGABYTE) return (true); max = (cache->bytes_evict / cache->pages_evict) * 4; if ((ref = entry->ref) != NULL) { if ((page = ref->page) == NULL) return (true); + /* - * If this page is more than four times the average evicted page - * size then return false. Return true in all other cases. - * XXX Should we care here if the page is dirty? Probably... + * If this page is dirty and more than four times the average + * evicted page size then return false. Return true in all + * other cases. */ - if (page->memory_footprint > max) { + if (__wt_page_is_modified(page) && + page->memory_footprint > max) { WT_STAT_FAST_CONN_INCR( session, cache_eviction_server_toobig); return (false); @@ -1510,71 +1539,85 @@ __evict_get_ref( { WT_CACHE *cache; WT_EVICT_ENTRY *evict; - WT_EVICT_QUEUE *evict_queue; + WT_EVICT_QUEUE *queue, *urgent_queue; uint32_t candidates; cache = S2C(session)->cache; + urgent_queue = &cache->evict_queues[WT_EVICT_URGENT_QUEUE]; *btreep = NULL; *refp = NULL; - /* - * Avoid the LRU lock if no pages are available. - */ + /* Avoid the LRU lock if no pages are available. */ WT_STAT_FAST_CONN_INCR(session, cache_eviction_get_ref); - if (cache->evict_current == NULL) { + if (cache->evict_current_queue->evict_current == NULL && + urgent_queue->evict_current == NULL) { WT_STAT_FAST_CONN_INCR(session, cache_eviction_get_ref_empty); return (WT_NOTFOUND); } + __wt_spin_lock(session, &cache->evict_queue_lock); + + /* Check the urgent queue first. */ + queue = urgent_queue->evict_current != NULL && + (FLD_ISSET(cache->state, WT_EVICT_STATE_AGGRESSIVE) || + (F_ISSET(session, WT_SESSION_INTERNAL) && + (!is_server || S2C(session)->evict_workers <= 1))) ? + urgent_queue : cache->evict_current_queue; + + __wt_spin_unlock(session, &cache->evict_queue_lock); + /* - * Verify there are still pages available. + * Only evict half of the pages before looking for more. The remainder + * are left to eviction workers (if configured), or application threads + * if necessary. */ - if (cache->evict_current == NULL) { - __wt_spin_unlock(session, &cache->evict_queue_lock); - WT_STAT_FAST_CONN_INCR(session, cache_eviction_get_ref_empty2); - return (WT_NOTFOUND); - } + candidates = queue->evict_candidates; + if (is_server && queue != urgent_queue && candidates > 1) + candidates /= 2; + /* - * We got the queue lock, which should be fast, and now we want to - * get the lock on the individual queue. We know that the shared - * queue fields cannot change now. + * We got the queue lock, which should be fast, and chose a queue. + * Now we want to get the lock on the individual queue. */ - evict_queue = cache->evict_current_queue; for (;;) { - if (__wt_spin_trylock(session, &evict_queue->evict_lock) == 0) - break; - if (!F_ISSET(session, WT_SESSION_INTERNAL)) { - __wt_spin_unlock(session, &cache->evict_queue_lock); + /* Verify there are still pages available. */ + if (queue->evict_current == NULL || (uint32_t) + (queue->evict_current - queue->evict_queue) >= candidates) { + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_get_ref_empty2); return (WT_NOTFOUND); } - __wt_yield(); + if (!is_server) + __wt_spin_lock(session, &queue->evict_lock); + else if (__wt_spin_trylock(session, &queue->evict_lock) != 0) + continue; + break; } - /* - * Only evict half of the pages before looking for more. The remainder - * are left to eviction workers (if configured), or application threads - * if necessary. - */ - candidates = evict_queue->evict_candidates; - if (is_server && candidates > 1) - candidates /= 2; /* Get the next page queued for eviction. */ - while ((evict = cache->evict_current) != NULL && - evict < evict_queue->evict_queue + candidates && - evict->ref != NULL) { + for (evict = queue->evict_current; + evict >= queue->evict_queue && + evict < queue->evict_queue + candidates; + ++evict) { + if (evict->ref == NULL) + continue; WT_ASSERT(session, evict->btree != NULL); + /* - * If the server is helping out and encounters an entry that - * is too large, it stops helping. Evicting a very large - * page in the server thread could stall eviction from finding - * new work. + * If the server is helping out and encounters an entry that is + * too large, it stops helping. Evicting a very large page in + * the server thread could stall eviction from finding new + * work. + * + * However, we can't skip entries in the urgent queue or they + * may never be found again. */ - if (is_server && S2C(session)->evict_workers > 1 && - !__evict_check_entry_size(session, evict)) + if (is_server && queue != urgent_queue && + S2C(session)->evict_workers > 1 && + !__evict_check_entry_size(session, evict)) { + --evict; break; - - /* Move to the next item. */ - ++cache->evict_current; + } /* * Lock the page while holding the eviction mutex to prevent @@ -1604,11 +1647,14 @@ __evict_get_ref( break; } - /* Clear the current pointer if there are no more candidates. */ - if (evict >= evict_queue->evict_queue + evict_queue->evict_candidates) - cache->evict_current = NULL; - __wt_spin_unlock(session, &evict_queue->evict_lock); - __wt_spin_unlock(session, &cache->evict_queue_lock); + /* Move to the next item. */ + if (evict != NULL && evict + 1 < + queue->evict_queue + queue->evict_candidates) + queue->evict_current = evict + 1; + else /* Clear the current pointer if there are no more candidates. */ + queue->evict_current = NULL; + + __wt_spin_unlock(session, &queue->evict_lock); return ((*refp == NULL) ? WT_NOTFOUND : 0); } @@ -1633,16 +1679,14 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server) * An internal session flags either the server itself or an eviction * worker thread. */ - if (F_ISSET(session, WT_SESSION_INTERNAL)) { - if (is_server) { - WT_STAT_FAST_CONN_INCR( - session, cache_eviction_server_evicting); - cache->server_evicts++; - } else { - WT_STAT_FAST_CONN_INCR( - session, cache_eviction_worker_evicting); - cache->worker_evicts++; - } + if (is_server) { + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_server_evicting); + cache->server_evicts++; + } else if (F_ISSET(session, WT_SESSION_INTERNAL)) { + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_worker_evicting); + cache->worker_evicts++; } else { if (__wt_page_is_modified(ref->page)) WT_STAT_FAST_CONN_INCR( @@ -1768,6 +1812,64 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) } /* + * __wt_page_evict_soon -- + * Set a page to be evicted as soon as possible. + */ +int +__wt_page_evict_soon(WT_SESSION_IMPL *session, WT_REF *ref) +{ + WT_CACHE *cache; + WT_EVICT_ENTRY *evict; + WT_EVICT_QUEUE *urgent_queue; + WT_PAGE *page; + bool queued; + + /* Root pages should never be evicted via LRU. */ + WT_ASSERT(session, !__wt_ref_is_root(ref)); + + page = ref->page; + page->read_gen = WT_READGEN_OLDEST; + if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU) || + F_ISSET(S2BT(session), WT_BTREE_NO_EVICTION)) + return (0); + + /* Append to the urgent queue if we can. */ + cache = S2C(session)->cache; + urgent_queue = &cache->evict_queues[WT_EVICT_URGENT_QUEUE]; + queued = false; + + __wt_spin_lock(session, &cache->evict_queue_lock); + if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU) || + F_ISSET(S2BT(session), WT_BTREE_NO_EVICTION)) + goto done; + + __wt_spin_lock(session, &urgent_queue->evict_lock); + if (urgent_queue->evict_current == NULL) { + urgent_queue->evict_current = urgent_queue->evict_queue; + urgent_queue->evict_candidates = 0; + } + evict = urgent_queue->evict_queue + urgent_queue->evict_candidates; + if (evict < urgent_queue->evict_queue + WT_EVICT_QUEUE_MAX && + __evict_push_candidate(session, urgent_queue, evict, ref)) { + ++urgent_queue->evict_candidates; + queued = true; + } + __wt_spin_unlock(session, &urgent_queue->evict_lock); + +done: __wt_spin_unlock(session, &cache->evict_queue_lock); + if (queued) { + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_pages_queued_urgent); + if (S2C(session)->evict_workers > 1) + WT_RET(__wt_cond_signal( + session, cache->evict_waiter_cond)); + else + WT_RET(__wt_evict_server_wake(session)); + } + return (0); +} + +/* * __wt_evict_priority_set -- * Set a tree's eviction priority. */ @@ -1801,13 +1903,15 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) WT_DATA_HANDLE *dhandle, *saved_dhandle; WT_PAGE *page; WT_REF *next_walk; - uint64_t dirty_bytes, dirty_pages, intl_bytes, intl_pages; - uint64_t leaf_bytes, leaf_pages; - uint64_t max_dirty_bytes, max_intl_bytes, max_leaf_bytes, total_bytes; + uint64_t intl_bytes, intl_bytes_max, intl_dirty_bytes; + uint64_t intl_dirty_bytes_max, intl_dirty_pages, intl_pages; + uint64_t leaf_bytes, leaf_bytes_max, leaf_dirty_bytes; + uint64_t leaf_dirty_bytes_max, leaf_dirty_pages, leaf_pages; + uint64_t total_bytes, total_dirty_bytes; size_t size; conn = S2C(session); - total_bytes = 0; + total_bytes = total_dirty_bytes = 0; if (ofile == NULL) fp = stderr; @@ -1823,9 +1927,10 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) !F_ISSET(dhandle, WT_DHANDLE_OPEN)) continue; - dirty_bytes = dirty_pages = intl_bytes = intl_pages = 0; - leaf_bytes = leaf_pages = 0; - max_dirty_bytes = max_intl_bytes = max_leaf_bytes = 0; + intl_bytes = intl_bytes_max = intl_dirty_bytes = 0; + intl_dirty_bytes_max = intl_dirty_pages = intl_pages = 0; + leaf_bytes = leaf_bytes_max = leaf_dirty_bytes = 0; + leaf_dirty_bytes_max = leaf_dirty_pages = leaf_pages = 0; next_walk = NULL; session->dhandle = dhandle; @@ -1838,17 +1943,23 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) if (WT_PAGE_IS_INTERNAL(page)) { ++intl_pages; intl_bytes += size; - max_intl_bytes = WT_MAX(max_intl_bytes, size); + intl_bytes_max = WT_MAX(intl_bytes_max, size); + if (__wt_page_is_modified(page)) { + ++intl_dirty_pages; + intl_dirty_bytes += size; + intl_dirty_bytes_max = + WT_MAX(intl_dirty_bytes_max, size); + } } else { ++leaf_pages; leaf_bytes += size; - max_leaf_bytes = WT_MAX(max_leaf_bytes, size); - } - if (__wt_page_is_modified(page)) { - ++dirty_pages; - dirty_bytes += size; - max_dirty_bytes = - WT_MAX(max_dirty_bytes, size); + leaf_bytes_max = WT_MAX(leaf_bytes_max, size); + if (__wt_page_is_modified(page)) { + ++leaf_dirty_pages; + leaf_dirty_bytes += size; + leaf_dirty_bytes_max = + WT_MAX(leaf_dirty_bytes_max, size); + } } } session->dhandle = NULL; @@ -1860,21 +1971,41 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) dhandle->name, dhandle->checkpoint); if (intl_pages != 0) (void)fprintf(fp, - "\t" "internal pages: %" PRIu64 " pages, %" PRIu64 - " max, %" PRIu64 "MB total\n", - intl_pages, max_intl_bytes, intl_bytes >> 20); + "\t" "internal: " + "%" PRIu64 " pages, " + "%" PRIu64 "MB, " + "%" PRIu64 "/%" PRIu64 " clean/dirty pages, " + "%" PRIu64 "/%" PRIu64 " clean/dirty MB, " + "%" PRIu64 "MB max page, " + "%" PRIu64 "MB max dirty page\n", + intl_pages, + intl_bytes >> 20, + intl_pages - intl_dirty_pages, + intl_dirty_pages, + (intl_bytes - intl_dirty_bytes) >> 20, + intl_dirty_bytes >> 20, + intl_bytes_max >> 20, + intl_dirty_bytes_max >> 20); if (leaf_pages != 0) (void)fprintf(fp, - "\t" "leaf pages: %" PRIu64 " pages, %" PRIu64 - " max, %" PRIu64 "MB total\n", - leaf_pages, max_leaf_bytes, leaf_bytes >> 20); - if (dirty_pages != 0) - (void)fprintf(fp, - "\t" "dirty pages: %" PRIu64 " pages, %" PRIu64 - " max, %" PRIu64 "MB total\n", - dirty_pages, max_dirty_bytes, dirty_bytes >> 20); + "\t" "leaf: " + "%" PRIu64 " pages, " + "%" PRIu64 "MB, " + "%" PRIu64 "/%" PRIu64 " clean/dirty pages, " + "%" PRIu64 "/%" PRIu64 " clean/dirty MB, " + "%" PRIu64 "MB max page, " + "%" PRIu64 "MB max dirty page\n", + leaf_pages, + leaf_bytes >> 20, + leaf_pages - leaf_dirty_pages, + leaf_dirty_pages, + (leaf_bytes - leaf_dirty_bytes) >> 20, + leaf_dirty_bytes >> 20, + leaf_bytes_max >> 20, + leaf_dirty_bytes_max >> 20); total_bytes += intl_bytes + leaf_bytes; + total_dirty_bytes += intl_dirty_bytes + leaf_dirty_bytes; } session->dhandle = saved_dhandle; @@ -1886,10 +2017,13 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) total_bytes += (total_bytes * (uint64_t)conn->cache->overhead_pct) / 100; (void)fprintf(fp, - "cache dump: total found = %" PRIu64 - "MB vs tracked inuse %" PRIu64 "MB\n", - total_bytes >> 20, __wt_cache_bytes_inuse(conn->cache) >> 20); + "cache dump: " + "total found = %" PRIu64 "MB vs tracked inuse %" PRIu64 "MB\n" + "total dirty bytes = %" PRIu64 "MB\n", + total_bytes >> 20, __wt_cache_bytes_inuse(conn->cache) >> 20, + total_dirty_bytes >> 20); (void)fprintf(fp, "==========\n"); + if (ofile != NULL && fclose(fp) != 0) return (EIO); return (0); diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c index 305b81fe69e..d4c4e3e311a 100644 --- a/src/third_party/wiredtiger/src/evict/evict_page.c +++ b/src/third_party/wiredtiger/src/evict/evict_page.c @@ -10,7 +10,7 @@ static int __evict_page_clean_update(WT_SESSION_IMPL *, WT_REF *, bool); static int __evict_page_dirty_update(WT_SESSION_IMPL *, WT_REF *, bool); -static int __evict_review(WT_SESSION_IMPL *, WT_REF *, bool *, bool); +static int __evict_review(WT_SESSION_IMPL *, WT_REF *, uint32_t *, bool); /* * __evict_exclusive_clear -- @@ -46,6 +46,55 @@ __evict_exclusive(WT_SESSION_IMPL *session, WT_REF *ref) } /* + * __wt_page_release_evict -- + * Release a reference to a page, and attempt to immediately evict it. + */ +int +__wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) +{ + WT_BTREE *btree; + WT_DECL_RET; + WT_PAGE *page; + bool locked, too_big; + + btree = S2BT(session); + page = ref->page; + + /* + * Take some care with order of operations: if we release the hazard + * reference without first locking the page, it could be evicted in + * between. + */ + locked = __wt_atomic_casv32(&ref->state, WT_REF_MEM, WT_REF_LOCKED); + if ((ret = __wt_hazard_clear(session, page)) != 0 || !locked) { + if (locked) + ref->state = WT_REF_MEM; + return (ret == 0 ? EBUSY : ret); + } + + (void)__wt_atomic_addv32(&btree->evict_busy, 1); + + too_big = page->memory_footprint > btree->splitmempage; + if ((ret = __wt_evict(session, ref, false)) == 0) { + if (too_big) + WT_STAT_FAST_CONN_INCR(session, cache_eviction_force); + else + /* + * If the page isn't too big, we are evicting it because + * it had a chain of deleted entries that make traversal + * expensive. + */ + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_force_delete); + } else + WT_STAT_FAST_CONN_INCR(session, cache_eviction_force_fail); + + (void)__wt_atomic_subv32(&btree->evict_busy, 1); + + return (ret); +} + +/* * __wt_evict -- * Evict a page. */ @@ -56,7 +105,8 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) WT_DECL_RET; WT_PAGE *page; WT_PAGE_MODIFY *mod; - bool clean_page, forced_eviction, inmem_split, tree_dead; + uint32_t flags; + bool clean_page, tree_dead; conn = S2C(session); @@ -64,8 +114,6 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) WT_ASSERT(session, !WT_SESSION_IS_CHECKPOINT(session)); page = ref->page; - forced_eviction = page->read_gen == WT_READGEN_OLDEST; - inmem_split = false; tree_dead = F_ISSET(session->dhandle, WT_DHANDLE_DEAD); WT_RET(__wt_verbose(session, WT_VERB_EVICT, @@ -78,20 +126,14 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) * to make this check for clean pages, too: while unlikely eviction * would choose an internal page with children, it's not disallowed. */ - WT_ERR(__evict_review(session, ref, &inmem_split, closing)); + WT_ERR(__evict_review(session, ref, &flags, closing)); /* * If there was an in-memory split, the tree has been left in the state * we want: there is nothing more to do. */ - if (inmem_split) - goto done; - - /* - * Update the page's modification reference, reconciliation might have - * changed it. - */ - mod = page->modify; + if (LF_ISSET(WT_EVICT_INMEM_SPLIT)) + return (0); /* Count evictions of internal pages during normal operation. */ if (!closing && WT_PAGE_IS_INTERNAL(page)) { @@ -108,12 +150,13 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) conn->cache->evict_max_page_size = page->memory_footprint; /* Figure out whether reconciliation was done on the page */ + mod = page->modify; clean_page = mod == NULL || mod->rec_result == 0; /* Update the reference and discard the page. */ if (__wt_ref_is_root(ref)) __wt_ref_out(session, ref); - else if (tree_dead || (clean_page && !F_ISSET(conn, WT_CONN_IN_MEMORY))) + else if ((clean_page && !LF_ISSET(WT_EVICT_IN_MEMORY)) || tree_dead) /* * Pages that belong to dead trees never write back to disk * and can't support page splits. @@ -139,14 +182,9 @@ err: if (!closing) WT_STAT_FAST_DATA_INCR(session, cache_eviction_fail); } -done: if (((inmem_split && ret == 0) || (forced_eviction && ret == EBUSY)) && - !F_ISSET(conn->cache, WT_CACHE_WOULD_BLOCK)) { - F_SET(conn->cache, WT_CACHE_WOULD_BLOCK); - WT_TRET(__wt_evict_server_wake(session)); - } - return (ret); } + /* * __evict_delete_ref -- * Mark a page reference deleted and check if the parent can reverse @@ -210,13 +248,6 @@ __evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) WT_DECL_RET; /* - * If doing normal system eviction, but only in the service of reducing - * the number of dirty pages, leave the clean page in cache. - */ - if (!closing && __wt_eviction_dirty_target(session)) - return (EBUSY); - - /* * Discard the page and update the reference structure; if the page has * an address, it's a disk page; if it has no address, it's a deleted * page re-instantiated (for example, by searching) and never written. @@ -242,6 +273,7 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) WT_ADDR *addr; WT_DECL_RET; WT_PAGE_MODIFY *mod; + WT_MULTI multi; mod = ref->page->modify; @@ -284,24 +316,15 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) * write. Take advantage of the fact we have exclusive access * to the page and rewrite it in memory. */ - if (mod->mod_multi_entries == 1) - WT_RET(__wt_split_rewrite(session, ref)); - else + if (mod->mod_multi_entries == 1) { + WT_ASSERT(session, closing == false); + WT_RET(__wt_split_rewrite( + session, ref, &mod->mod_multi[0])); + } else WT_RET(__wt_split_multi(session, ref, closing)); break; case WT_PM_REC_REPLACE: /* 1-for-1 page swap */ /* - * If doing normal system eviction, but only in the service of - * reducing the number of dirty pages, leave the clean page in - * cache. Only do this when replacing a page with another one, - * because when a page splits into multiple pages, we want to - * push it out of cache (and read it back in, when needed), we - * would rather have more, smaller pages than fewer large pages. - */ - if (!closing && __wt_eviction_dirty_target(session)) - return (EBUSY); - - /* * Update the parent to reference the replacement page. * * Publish: a barrier to ensure the structure fields are set @@ -311,10 +334,26 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) *addr = mod->mod_replace; mod->mod_replace.addr = NULL; mod->mod_replace.size = 0; - - __wt_ref_out(session, ref); ref->addr = addr; - WT_PUBLISH(ref->state, WT_REF_DISK); + + /* + * Eviction wants to keep this page if we have a disk image, + * re-instantiate the page in memory, else discard the page. + */ + if (mod->mod_disk_image == NULL) { + __wt_ref_out(session, ref); + WT_PUBLISH(ref->state, WT_REF_DISK); + } else { + /* + * The split code works with WT_MULTI structures, build + * one for the disk image. + */ + memset(&multi, 0, sizeof(multi)); + multi.disk_image = mod->mod_disk_image; + + WT_RET(__wt_split_rewrite(session, ref, &multi)); + } + break; WT_ILLEGAL_VALUE(session); } @@ -351,13 +390,17 @@ __evict_child_check(WT_SESSION_IMPL *session, WT_REF *parent) */ static int __evict_review( - WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp, bool closing) + WT_SESSION_IMPL *session, WT_REF *ref, uint32_t *flagsp, bool closing) { + WT_CACHE *cache; WT_DECL_RET; WT_PAGE *page; uint32_t flags; bool modified; + flags = WT_EVICTING; + *flagsp = flags; + /* * Get exclusive access to the page if our caller doesn't have the tree * locked down. @@ -423,8 +466,9 @@ __evict_review( WT_RET(__wt_txn_update_oldest( session, WT_TXN_OLDEST_STRICT)); - if (!__wt_page_can_evict(session, ref, inmem_splitp)) + if (!__wt_page_can_evict(session, ref, flagsp)) return (EBUSY); + flags = *flagsp; /* * Check for an append-only workload needing an in-memory @@ -433,8 +477,12 @@ __evict_review( * the page stays in memory and the tree is left in the desired * state: avoid the usual cleanup. */ - if (*inmem_splitp) + if (LF_ISSET(WT_EVICT_INMEM_SPLIT)) return (__wt_split_insert(session, ref)); + + /* We are done if reconciliation is disabled. */ + if (F_ISSET(S2BT(session), WT_BTREE_NO_RECONCILE)) + return (EBUSY); } /* If the page is clean, we're done and we can evict. */ @@ -447,10 +495,15 @@ __evict_review( * If we have an exclusive lock (we're discarding the tree), assert * there are no updates we cannot read. * - * Otherwise, if the page we're evicting is a leaf page marked for - * forced eviction, set the update-restore flag, so reconciliation will - * write blocks it can write and create a list of skipped updates for - * blocks it cannot write. This is how forced eviction of active, huge + * Don't set any other flags for internal pages: they don't have update + * lists to be saved and restored, nor can we re-create them in memory. + * + * For leaf pages: + * + * If an in-memory configuration or the page is being forcibly evicted, + * set the update-restore flag, so reconciliation will write blocks it + * can write and create a list of skipped updates for blocks it cannot + * write, along with disk images. This is how eviction of active, huge * pages works: we take a big page and reconcile it into blocks, some of * which we write and discard, the rest of which we re-create as smaller * in-memory pages, (restoring the updates that stopped us from writing @@ -461,32 +514,43 @@ __evict_review( * allowing the eviction of pages we'd otherwise have to retain in cache * to support older readers. * - * Don't set the update-restore or lookaside table flags for internal - * pages, they don't have update lists that can be saved and restored. + * Finally, if we don't need to do eviction at the moment, create disk + * images of split pages in order to re-instantiate them. */ - flags = WT_EVICTING; + cache = S2C(session)->cache; if (closing) LF_SET(WT_VISIBILITY_ERR); else if (!WT_PAGE_IS_INTERNAL(page)) { if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) - LF_SET(WT_EVICT_IN_MEMORY | WT_EVICT_UPDATE_RESTORE); - else if (page->read_gen == WT_READGEN_OLDEST) - LF_SET(WT_EVICT_UPDATE_RESTORE); - else if (F_ISSET(S2C(session)->cache, WT_CACHE_STUCK)) + LF_SET(WT_EVICT_IN_MEMORY | + WT_EVICT_SCRUB | WT_EVICT_UPDATE_RESTORE); + else if (F_ISSET(cache, WT_CACHE_STUCK)) LF_SET(WT_EVICT_LOOKASIDE); + else if (!__wt_txn_visible_all( + session, page->modify->update_txn)) + LF_SET(WT_EVICT_UPDATE_RESTORE); + + /* + * If we aren't trying to free space in the cache, scrub the + * page and keep it around. + */ + if (!LF_ISSET(WT_EVICT_LOOKASIDE) && + FLD_ISSET(cache->state, WT_EVICT_STATE_SCRUB)) + LF_SET(WT_EVICT_SCRUB); } + *flagsp = flags; WT_RET(__wt_reconcile(session, ref, NULL, flags)); /* * Success: assert the page is clean or reconciliation was configured - * for an update/restore split. If the page is clean, assert that - * reconciliation was configured for a lookaside table, or it's not a - * durable object (currently the lookaside table), or all page updates - * were globally visible. + * for update/restore. If the page is clean, assert that reconciliation + * was configured for a lookaside table, or it's not a durable object + * (currently the lookaside table), or all page updates were globally + * visible. */ WT_ASSERT(session, - LF_ISSET(WT_EVICT_UPDATE_RESTORE) || !__wt_page_is_modified(page)); + !__wt_page_is_modified(page) || LF_ISSET(WT_EVICT_UPDATE_RESTORE)); WT_ASSERT(session, __wt_page_is_modified(page) || LF_ISSET(WT_EVICT_LOOKASIDE) || diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h index 50b2eab83b8..0a4593178dc 100644 --- a/src/third_party/wiredtiger/src/include/api.h +++ b/src/third_party/wiredtiger/src/include/api.h @@ -66,6 +66,8 @@ else if (ret == 0 && !F_ISSET(&(s)->txn, WT_TXN_ERROR)) \ ret = __wt_txn_commit((s), NULL); \ else { \ + if (retry) \ + WT_TRET(__wt_session_copy_values(s)); \ WT_TRET(__wt_txn_rollback((s), NULL)); \ if ((ret == 0 || ret == WT_ROLLBACK) && \ (retry)) { \ diff --git a/src/third_party/wiredtiger/src/include/block.h b/src/third_party/wiredtiger/src/include/block.h index a8080c1651c..3342f9b1e5e 100644 --- a/src/third_party/wiredtiger/src/include/block.h +++ b/src/third_party/wiredtiger/src/include/block.h @@ -192,7 +192,7 @@ struct __wt_bm { int (*verify_start) (WT_BM *, WT_SESSION_IMPL *, WT_CKPT *, const char *[]); int (*write) (WT_BM *, - WT_SESSION_IMPL *, WT_ITEM *, uint8_t *, size_t *, bool); + WT_SESSION_IMPL *, WT_ITEM *, uint8_t *, size_t *, bool, bool); int (*write_size)(WT_BM *, WT_SESSION_IMPL *, size_t *); WT_BLOCK *block; /* Underlying file */ diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index 9700b6f4761..817ce892952 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -250,9 +250,19 @@ struct __wt_page_modify { * a replace address and multiple replacement blocks. */ union { - WT_ADDR replace; /* Single, written replacement block */ + struct { /* Single, written replacement block */ + WT_ADDR replace; + + /* + * A disk image that may or may not have been written, used to + * re-instantiate the page in memory. + */ + void *disk_image; + } r; #undef mod_replace -#define mod_replace u1.replace +#define mod_replace u1.r.replace +#undef mod_disk_image +#define mod_disk_image u1.r.disk_image struct { /* Multiple replacement blocks */ struct __wt_multi { @@ -266,14 +276,19 @@ struct __wt_page_modify { } key; /* - * Eviction, but the block wasn't written: either an in-memory - * configuration or unresolved updates prevented the write. - * There may be a list of unresolved updates, there's always an - * associated disk image. + * A disk image that may or may not have been written, used to + * re-instantiate the page in memory. + */ + void *disk_image; + + /* + * List of unresolved updates. Updates are either a WT_INSERT + * or a row-store leaf page entry; when creating lookaside + * records, there is an additional value, the committed item's + * transaction ID. * - * Saved updates are either a WT_INSERT, or a row-store leaf - * page entry; in the case of creating lookaside records, there - * is an additional value, the committed item's transaction ID. + * If there are unresolved updates, the block wasn't written and + * there will always be a disk image. */ struct __wt_save_upd { WT_INSERT *ins; @@ -281,10 +296,9 @@ struct __wt_page_modify { uint64_t onpage_txn; } *supd; uint32_t supd_entries; - void *disk_image; /* - * Block was written: address, size and checksum. + * Disk image was written: address, size and checksum. * On subsequent reconciliations of this page, we avoid writing * the block if it's unchanged by comparing size and checksum; * the reuse flag is set when the block is unchanged and we're diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h index fd921677751..432474f9dc1 100644 --- a/src/third_party/wiredtiger/src/include/btree.h +++ b/src/third_party/wiredtiger/src/include/btree.h @@ -126,12 +126,16 @@ struct __wt_btree { u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */ uint64_t checkpoint_gen; /* Checkpoint generation */ + bool include_checkpoint_txn;/* ID checks include checkpoint */ uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */ uint64_t write_gen; /* Write generation */ + uint64_t bytes_inmem; /* Cache bytes in memory. */ + WT_REF *evict_ref; /* Eviction thread's location */ uint64_t evict_priority; /* Relative priority of cached pages */ u_int evict_walk_period; /* Skip this many LRU walks */ + u_int evict_walk_saved; /* Saved walk skips for checkpoints */ u_int evict_walk_skips; /* Number of walks skipped */ u_int evict_disabled; /* Eviction disabled count */ volatile uint32_t evict_busy; /* Count of threads in eviction */ @@ -154,11 +158,12 @@ struct __wt_btree { #define WT_BTREE_NO_CHECKPOINT 0x00800 /* Disable checkpoints */ #define WT_BTREE_NO_EVICTION 0x01000 /* Disable eviction */ #define WT_BTREE_NO_LOGGING 0x02000 /* Disable logging */ -#define WT_BTREE_REBALANCE 0x04000 /* Handle is for rebalance */ -#define WT_BTREE_SALVAGE 0x08000 /* Handle is for salvage */ -#define WT_BTREE_SKIP_CKPT 0x10000 /* Handle skipped checkpoint */ -#define WT_BTREE_UPGRADE 0x20000 /* Handle is for upgrade */ -#define WT_BTREE_VERIFY 0x40000 /* Handle is for verify */ +#define WT_BTREE_NO_RECONCILE 0x04000 /* Allow splits, even with no evict */ +#define WT_BTREE_REBALANCE 0x08000 /* Handle is for rebalance */ +#define WT_BTREE_SALVAGE 0x10000 /* Handle is for salvage */ +#define WT_BTREE_SKIP_CKPT 0x20000 /* Handle skipped checkpoint */ +#define WT_BTREE_UPGRADE 0x40000 /* Handle is for upgrade */ +#define WT_BTREE_VERIFY 0x80000 /* Handle is for verify */ uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index e0102a11511..3234ad1ed41 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -55,6 +55,27 @@ __wt_btree_block_free( } /* + * __wt_btree_bytes_inuse -- + * Return the number of bytes in use. + */ +static inline uint64_t +__wt_btree_bytes_inuse(WT_SESSION_IMPL *session) +{ + WT_CACHE *cache; + uint64_t bytes_inuse; + + cache = S2C(session)->cache; + + /* Adjust the cache size to take allocation overhead into account. */ + bytes_inuse = S2BT(session)->bytes_inmem; + if (cache->overhead_pct != 0) + bytes_inuse += + (bytes_inuse * (uint64_t)cache->overhead_pct) / 100; + + return (bytes_inuse); +} + +/* * __wt_cache_page_inmem_incr -- * Increment a page's memory footprint in the cache. */ @@ -66,17 +87,17 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) WT_ASSERT(session, size < WT_EXABYTE); cache = S2C(session)->cache; + (void)__wt_atomic_add64(&S2BT(session)->bytes_inmem, size); (void)__wt_atomic_add64(&cache->bytes_inmem, size); (void)__wt_atomic_addsize(&page->memory_footprint, size); if (__wt_page_is_modified(page)) { - (void)__wt_atomic_add64(&cache->bytes_dirty, size); (void)__wt_atomic_addsize(&page->modify->bytes_dirty, size); + (void)__wt_atomic_add64(WT_PAGE_IS_INTERNAL(page) ? + &cache->bytes_dirty_intl : &cache->bytes_dirty_leaf, size); } - /* Track internal and overflow size in cache. */ + /* Track internal size in cache. */ if (WT_PAGE_IS_INTERNAL(page)) (void)__wt_atomic_add64(&cache->bytes_internal, size); - else if (page->type == WT_PAGE_OVFL) - (void)__wt_atomic_add64(&cache->bytes_overflow, size); } /* @@ -144,10 +165,16 @@ __wt_cache_page_byte_dirty_decr( WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) { WT_CACHE *cache; + const char *destname; + uint64_t *dest; size_t decr, orig; int i; cache = S2C(session)->cache; + dest = WT_PAGE_IS_INTERNAL(page) ? + &cache->bytes_dirty_intl : &cache->bytes_dirty_leaf; + destname = WT_PAGE_IS_INTERNAL(page) ? + "WT_CACHE.bytes_dirty_intl" : "WT_CACHE.bytes_dirty_leaf"; /* * We don't have exclusive access and there are ways of decrementing the @@ -175,8 +202,8 @@ __wt_cache_page_byte_dirty_decr( decr = WT_MIN(size, orig); if (__wt_atomic_cassize( &page->modify->bytes_dirty, orig, orig - decr)) { - __wt_cache_decr_check_uint64(session, - &cache->bytes_dirty, decr, "WT_CACHE.bytes_dirty"); + __wt_cache_decr_check_uint64( + session, dest, decr, destname); break; } } @@ -196,18 +223,17 @@ __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) WT_ASSERT(session, size < WT_EXABYTE); __wt_cache_decr_check_uint64( + session, &S2BT(session)->bytes_inmem, size, "WT_BTREE.bytes_inmem"); + __wt_cache_decr_check_uint64( session, &cache->bytes_inmem, size, "WT_CACHE.bytes_inmem"); __wt_cache_decr_check_size( session, &page->memory_footprint, size, "WT_PAGE.memory_footprint"); if (__wt_page_is_modified(page)) __wt_cache_page_byte_dirty_decr(session, page, size); - /* Track internal and overflow size in cache. */ + /* Track internal size in cache. */ if (WT_PAGE_IS_INTERNAL(page)) __wt_cache_decr_check_uint64(session, &cache->bytes_internal, size, "WT_CACHE.bytes_internal"); - else if (page->type == WT_PAGE_OVFL) - __wt_cache_decr_check_uint64(session, - &cache->bytes_overflow, size, "WT_CACHE.bytes_overflow"); } /* @@ -222,14 +248,16 @@ __wt_cache_dirty_incr(WT_SESSION_IMPL *session, WT_PAGE *page) size_t size; cache = S2C(session)->cache; - (void)__wt_atomic_add64(&cache->pages_dirty, 1); + (void)__wt_atomic_add64(WT_PAGE_IS_INTERNAL(page) ? + &cache->pages_dirty_intl : &cache->pages_dirty_leaf, 1); /* * Take care to read the memory_footprint once in case we are racing * with updates. */ size = page->memory_footprint; - (void)__wt_atomic_add64(&cache->bytes_dirty, size); + (void)__wt_atomic_add64(WT_PAGE_IS_INTERNAL(page) ? + &cache->bytes_dirty_intl : &cache->bytes_dirty_leaf, size); (void)__wt_atomic_addsize(&page->modify->bytes_dirty, size); } @@ -243,16 +271,19 @@ __wt_cache_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_CACHE *cache; WT_PAGE_MODIFY *modify; + uint64_t *pages_dirty; cache = S2C(session)->cache; + pages_dirty = WT_PAGE_IS_INTERNAL(page) ? + &cache->pages_dirty_intl : &cache->pages_dirty_leaf; - if (cache->pages_dirty < 1) { + if (*pages_dirty < 1) { __wt_errx(session, "cache eviction dirty-page decrement failed: dirty page" "count went negative"); - cache->pages_dirty = 0; + *pages_dirty = 0; } else - (void)__wt_atomic_sub64(&cache->pages_dirty, 1); + (void)__wt_atomic_sub64(pages_dirty, 1); modify = page->modify; if (modify != NULL && modify->bytes_dirty != 0) @@ -261,6 +292,34 @@ __wt_cache_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page) } /* + * __wt_cache_page_image_decr -- + * Decrement a page image's size to the cache. + */ +static inline void +__wt_cache_page_image_decr(WT_SESSION_IMPL *session, uint32_t size) +{ + WT_CACHE *cache; + + cache = S2C(session)->cache; + + __wt_cache_decr_check_uint64( + session, &cache->bytes_image, size, "WT_CACHE.image_inmem"); +} + +/* + * __wt_cache_page_image_incr -- + * Increment a page image's size to the cache. + */ +static inline void +__wt_cache_page_image_incr(WT_SESSION_IMPL *session, uint32_t size) +{ + WT_CACHE *cache; + + cache = S2C(session)->cache; + (void)__wt_atomic_add64(&cache->bytes_image, size); +} + +/* * __wt_cache_page_evict -- * Evict pages from the cache. */ @@ -269,13 +328,20 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_CACHE *cache; WT_PAGE_MODIFY *modify; + uint64_t *dest; + const char *destname; cache = S2C(session)->cache; + dest = WT_PAGE_IS_INTERNAL(page) ? + &cache->bytes_dirty_intl : &cache->bytes_dirty_leaf; + destname = WT_PAGE_IS_INTERNAL(page) ? + "WT_CACHE.bytes_dirty_intl" : "WT_CACHE.bytes_dirty_leaf"; modify = page->modify; /* Update the bytes in-memory to reflect the eviction. */ - __wt_cache_decr_check_uint64(session, - &cache->bytes_inmem, + __wt_cache_decr_check_uint64(session, &S2BT(session)->bytes_inmem, + page->memory_footprint, "WT_BTREE.bytes_inmem"); + __wt_cache_decr_check_uint64(session, &cache->bytes_inmem, page->memory_footprint, "WT_CACHE.bytes_inmem"); /* Update the bytes_internal value to reflect the eviction */ @@ -286,15 +352,14 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) /* Update the cache's dirty-byte count. */ if (modify != NULL && modify->bytes_dirty != 0) { - if (cache->bytes_dirty < modify->bytes_dirty) { + if ((size_t)*dest < modify->bytes_dirty) { __wt_errx(session, - "cache eviction dirty-bytes decrement failed: " - "dirty byte count went negative"); - cache->bytes_dirty = 0; + "%s decrement failed: " + "dirty byte count went negative", destname); + *dest = 0; } else - __wt_cache_decr_check_uint64(session, - &cache->bytes_dirty, - modify->bytes_dirty, "WT_CACHE.bytes_dirty"); + __wt_cache_decr_check_uint64(session, dest, + modify->bytes_dirty, destname); } /* Update pages and bytes evicted. */ @@ -318,16 +383,6 @@ __wt_update_list_memsize(WT_UPDATE *upd) } /* - * __wt_page_evict_soon -- - * Set a page to be evicted as soon as possible. - */ -static inline void -__wt_page_evict_soon(WT_PAGE *page) -{ - page->read_gen = WT_READGEN_OLDEST; -} - -/* * __wt_page_modify_init -- * A page is about to be modified, allocate the modification structure. */ @@ -1099,16 +1154,14 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page) * Check whether a page can be evicted. */ static inline bool -__wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) +__wt_page_can_evict( + WT_SESSION_IMPL *session, WT_REF *ref, uint32_t *evict_flagsp) { WT_BTREE *btree; WT_PAGE *page; WT_PAGE_MODIFY *mod; bool modified; - if (inmem_splitp != NULL) - *inmem_splitp = false; - btree = S2BT(session); page = ref->page; mod = page->modify; @@ -1124,8 +1177,8 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) * won't be written or discarded from the cache. */ if (__wt_leaf_page_can_split(session, page)) { - if (inmem_splitp != NULL) - *inmem_splitp = true; + if (evict_flagsp != NULL) + FLD_SET(*evict_flagsp, WT_EVICT_INMEM_SPLIT); return (true); } @@ -1164,6 +1217,10 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_BLOCK)) return (false); + /* If the cache is stuck, try anything else. */ + if (F_ISSET(S2C(session)->cache, WT_CACHE_STUCK)) + return (true); + /* * If the oldest transaction hasn't changed since the last time * this page was written, it's unlikely we can make progress. @@ -1172,7 +1229,6 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) * attempt to avoid repeated attempts to evict the same page. */ if (modified && - !F_ISSET(S2C(session)->cache, WT_CACHE_STUCK) && (mod->last_oldest_id == __wt_txn_oldest_id(session) || !__wt_txn_visible_all(session, mod->update_txn))) return (false); @@ -1181,56 +1237,6 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) } /* - * __wt_page_release_evict -- - * Release a reference to a page, and attempt to immediately evict it. - */ -static inline int -__wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) -{ - WT_BTREE *btree; - WT_DECL_RET; - WT_PAGE *page; - bool locked, too_big; - - btree = S2BT(session); - page = ref->page; - - /* - * Take some care with order of operations: if we release the hazard - * reference without first locking the page, it could be evicted in - * between. - */ - locked = __wt_atomic_casv32( - &ref->state, WT_REF_MEM, WT_REF_LOCKED) ? true : false; - if ((ret = __wt_hazard_clear(session, page)) != 0 || !locked) { - if (locked) - ref->state = WT_REF_MEM; - return (ret == 0 ? EBUSY : ret); - } - - (void)__wt_atomic_addv32(&btree->evict_busy, 1); - - too_big = page->memory_footprint > btree->maxmempage; - if ((ret = __wt_evict(session, ref, false)) == 0) { - if (too_big) - WT_STAT_FAST_CONN_INCR(session, cache_eviction_force); - else - /* - * If the page isn't too big, we are evicting it because - * it had a chain of deleted entries that make traversal - * expensive. - */ - WT_STAT_FAST_CONN_INCR( - session, cache_eviction_force_delete); - } else - WT_STAT_FAST_CONN_INCR(session, cache_eviction_force_fail); - - (void)__wt_atomic_subv32(&btree->evict_busy, 1); - - return (ret); -} - -/* * __wt_page_release -- * Release a reference to a page. */ diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h index f4a35de7201..e3a003ccc56 100644 --- a/src/third_party/wiredtiger/src/include/cache.h +++ b/src/third_party/wiredtiger/src/include/cache.h @@ -13,7 +13,6 @@ #define WT_EVICT_INT_SKEW (1<<20) /* Prefer leaf pages over internal pages by this many increments of the read generation. */ -#define WT_EVICT_WALK_PER_FILE 10 /* Pages to queue per file */ #define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */ #define WT_EVICT_WALK_INCR 100 /* Pages added each walk */ @@ -24,9 +23,12 @@ struct __wt_evict_entry { WT_BTREE *btree; /* Enclosing btree object */ WT_REF *ref; /* Page to flush/evict */ + uint64_t score; /* Relative eviction priority */ }; -#define WT_EVICT_QUEUE_MAX 2 +#define WT_EVICT_URGENT_QUEUE 0 /* Urgent queue index */ +#define WT_EVICT_QUEUE_MAX 3 /* Urgent plus two ordinary queues */ + /* * WT_EVICT_QUEUE -- * Encapsulation of an eviction candidate queue. @@ -34,6 +36,7 @@ struct __wt_evict_entry { struct __wt_evict_queue { WT_SPINLOCK evict_lock; /* Eviction LRU queue */ WT_EVICT_ENTRY *evict_queue; /* LRU pages being tracked */ + WT_EVICT_ENTRY *evict_current; /* LRU current page to be evicted */ uint32_t evict_candidates; /* LRU list pages to evict */ uint32_t evict_entries; /* LRU entries in the queue */ volatile uint32_t evict_max; /* LRU maximum eviction slot used */ @@ -70,16 +73,19 @@ struct __wt_cache { * be exact, they can't be garbage, we track what comes in and what goes * out and calculate the difference as needed. */ - uint64_t bytes_inmem; /* Bytes/pages in memory */ - uint64_t pages_inmem; - uint64_t bytes_internal; /* Bytes of internal pages */ - uint64_t bytes_overflow; /* Bytes of overflow pages */ + uint64_t bytes_dirty_intl; /* Bytes/pages currently dirty */ + uint64_t pages_dirty_intl; + uint64_t bytes_dirty_leaf; + uint64_t pages_dirty_leaf; uint64_t bytes_evict; /* Bytes/pages discarded by eviction */ uint64_t pages_evict; uint64_t pages_evicted; /* Pages evicted during a pass */ - uint64_t bytes_dirty; /* Bytes/pages currently dirty */ - uint64_t pages_dirty; + uint64_t bytes_image; /* Bytes of disk images */ + uint64_t bytes_inmem; /* Bytes/pages in memory */ + uint64_t pages_inmem; + uint64_t bytes_internal; /* Bytes of internal pages */ uint64_t bytes_read; /* Bytes read into memory */ + uint64_t bytes_written; uint64_t app_waits; /* User threads waited for cache */ uint64_t app_evicts; /* Pages evicted by user threads */ @@ -121,7 +127,6 @@ struct __wt_cache { WT_SPINLOCK evict_queue_lock; /* Eviction current queue lock */ WT_EVICT_QUEUE evict_queues[WT_EVICT_QUEUE_MAX]; WT_EVICT_QUEUE *evict_current_queue;/* LRU current queue in use */ - WT_EVICT_ENTRY *evict_current; /* LRU current page to be evicted */ uint32_t evict_queue_fill; /* LRU eviction queue index to fill */ uint32_t evict_slots; /* LRU list eviction slots */ WT_DATA_HANDLE @@ -145,10 +150,13 @@ struct __wt_cache { /* * Work state. */ -#define WT_EVICT_PASS_AGGRESSIVE 0x01 -#define WT_EVICT_PASS_ALL 0x02 -#define WT_EVICT_PASS_DIRTY 0x04 -#define WT_EVICT_PASS_WOULD_BLOCK 0x08 +#define WT_EVICT_STATE_AGGRESSIVE 0x01 /* Eviction isn't making progress: + try harder */ +#define WT_EVICT_STATE_CLEAN 0x02 /* Evict clean pages */ +#define WT_EVICT_STATE_DIRTY 0x04 /* Evict dirty pages */ +#define WT_EVICT_STATE_SCRUB 0x08 /* Scrub dirty pages pages */ +#define WT_EVICT_STATE_URGENT 0x10 /* Pages are in the urgent queue */ +#define WT_EVICT_STATE_ALL (WT_EVICT_STATE_CLEAN | WT_EVICT_STATE_DIRTY) uint32_t state; /* * Pass interrupt counter. @@ -162,7 +170,6 @@ struct __wt_cache { #define WT_CACHE_POOL_RUN 0x02 /* Cache pool thread running */ #define WT_CACHE_STUCK 0x04 /* Eviction server is stuck */ #define WT_CACHE_WALK_REVERSE 0x08 /* Scan backwards for candidates */ -#define WT_CACHE_WOULD_BLOCK 0x10 /* Pages that would block apps */ uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i index 72c8307756d..b5cb79afb3c 100644 --- a/src/third_party/wiredtiger/src/include/cache.i +++ b/src/third_party/wiredtiger/src/include/cache.i @@ -104,7 +104,7 @@ __wt_cache_dirty_inuse(WT_CACHE *cache) { uint64_t dirty_inuse; - dirty_inuse = cache->bytes_dirty; + dirty_inuse = cache->bytes_dirty_intl + cache->bytes_dirty_leaf; if (cache->overhead_pct != 0) dirty_inuse += (dirty_inuse * (uint64_t)cache->overhead_pct) / 100; @@ -113,6 +113,67 @@ __wt_cache_dirty_inuse(WT_CACHE *cache) } /* + * __wt_cache_dirty_leaf_inuse -- + * Return the number of dirty bytes in use by leaf pages. + */ +static inline uint64_t +__wt_cache_dirty_leaf_inuse(WT_CACHE *cache) +{ + uint64_t dirty_inuse; + + dirty_inuse = cache->bytes_dirty_leaf; + if (cache->overhead_pct != 0) + dirty_inuse += + (dirty_inuse * (uint64_t)cache->overhead_pct) / 100; + + return (dirty_inuse); +} + +/* + * __wt_cache_bytes_image -- + * Return the number of page image bytes in use. + */ +static inline uint64_t +__wt_cache_bytes_image(WT_CACHE *cache) +{ + uint64_t bytes_image; + + bytes_image = cache->bytes_image; + if (cache->overhead_pct != 0) + bytes_image += + (bytes_image * (uint64_t)cache->overhead_pct) / 100; + + return (bytes_image); +} + +/* + * __wt_cache_bytes_other -- + * Return the number of bytes in use not for page images. + */ +static inline uint64_t +__wt_cache_bytes_other(WT_CACHE *cache) +{ + uint64_t bytes_image, bytes_inmem, bytes_other; + + bytes_image = cache->bytes_image; + bytes_inmem = cache->bytes_inmem; + + /* + * The reads above could race with changes to the values, so protect + * against underflow. + */ + if (bytes_image > bytes_inmem) + return (0); + + bytes_other = bytes_inmem - bytes_image; + if (cache->overhead_pct != 0) + bytes_other += + (bytes_other * (uint64_t)cache->overhead_pct) / 100; + + return (bytes_other); +} + +/* * __wt_session_can_wait -- * Return if a session available for a potentially slow operation. */ @@ -139,20 +200,9 @@ __wt_session_can_wait(WT_SESSION_IMPL *session) } /* - * __wt_eviction_dirty_target -- - * Return if the eviction server is running to reduce the number of dirty - * pages (versus running to discard pages from the cache). - */ -static inline bool -__wt_eviction_dirty_target(WT_SESSION_IMPL *session) -{ - return (FLD_ISSET(S2C(session)->cache->state, WT_EVICT_PASS_DIRTY)); -} - -/* * __wt_eviction_needed -- * Return if an application thread should do eviction, and the cache full - * percentage as a side-effect. + * percentage as a side-effect. */ static inline bool __wt_eviction_needed(WT_SESSION_IMPL *session, u_int *pct_fullp) @@ -186,22 +236,21 @@ __wt_eviction_needed(WT_SESSION_IMPL *session, u_int *pct_fullp) pct_full = (u_int)((100 * bytes_inuse) / bytes_max); if (pct_fullp != NULL) *pct_fullp = pct_full; - /* - * If the connection is closing we do not need eviction from an - * application thread. The eviction subsystem is already closed. - * We return here because some callers depend on the percent full - * having been filled in. - */ - if (F_ISSET(conn, WT_CONN_CLOSING)) - return (false); if (pct_full > cache->eviction_trigger) return (true); - /* Return if there are too many dirty bytes in cache. */ - if (__wt_cache_dirty_inuse(cache) > + /* + * Check if there are too many dirty bytes in cache. + * + * We try to avoid penalizing read-only operations by only checking the + * dirty limit once a transaction ID has been allocated, or if the last + * transaction did an update. + */ + if (__wt_cache_dirty_leaf_inuse(cache) > (cache->eviction_dirty_trigger * bytes_max) / 100) return (true); + return (false); } diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index 0e0c357279a..a9855e42980 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -271,7 +271,6 @@ struct __wt_connection_impl { wt_thread_t ckpt_tid; /* Checkpoint thread */ bool ckpt_tid_set; /* Checkpoint thread set */ WT_CONDVAR *ckpt_cond; /* Checkpoint wait mutex */ - const char *ckpt_config; /* Checkpoint configuration */ #define WT_CKPT_LOGSIZE(conn) ((conn)->ckpt_logsize != 0) wt_off_t ckpt_logsize; /* Checkpoint log size period */ uint32_t ckpt_signalled;/* Checkpoint signalled */ @@ -314,6 +313,7 @@ struct __wt_connection_impl { uint32_t evict_workers; /* Number of eviction workers */ WT_EVICT_WORKER *evict_workctx; /* Eviction worker context */ +#define WT_STATLOG_FILENAME "WiredTigerStat.%d.%H" WT_SESSION_IMPL *stat_session; /* Statistics log session */ wt_thread_t stat_tid; /* Statistics log thread */ bool stat_tid_set; /* Statistics log thread set */ diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h index 6357523a03f..dce24f20844 100644 --- a/src/third_party/wiredtiger/src/include/cursor.h +++ b/src/third_party/wiredtiger/src/include/cursor.h @@ -73,6 +73,9 @@ struct __wt_cursor_backup { WT_CURSOR_BACKUP_ENTRY *list; /* List of files to be copied. */ size_t list_allocated; size_t list_next; + +#define WT_CURBACKUP_LOCKER 0x01 /* Hot-backup started */ + uint8_t flags; }; #define WT_CURSOR_BACKUP_ID(cursor) (((WT_CURSOR_BACKUP *)cursor)->maxid) @@ -413,7 +416,9 @@ struct __wt_cursor_log { uint32_t step_count; /* Intra-record count */ uint32_t rectype; /* Record type */ uint64_t txnid; /* Record txnid */ - uint32_t flags; + +#define WT_CURLOG_ARCHIVE_LOCK 0x01 /* Archive lock held */ + uint8_t flags; }; struct __wt_cursor_metadata { @@ -424,7 +429,7 @@ struct __wt_cursor_metadata { #define WT_MDC_CREATEONLY 0x01 #define WT_MDC_ONMETADATA 0x02 #define WT_MDC_POSITIONED 0x04 - uint32_t flags; + uint8_t flags; }; struct __wt_join_stats_group { diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i index 553dd03f958..76a08138afb 100644 --- a/src/third_party/wiredtiger/src/include/cursor.i +++ b/src/third_party/wiredtiger/src/include/cursor.i @@ -38,9 +38,6 @@ __cursor_pos_clear(WT_CURSOR_BTREE *cbt) cbt->ins_head = NULL; cbt->ins_stack[0] = NULL; - cbt->cip_saved = NULL; - cbt->rip_saved = NULL; - F_CLR(cbt, WT_CBT_POSITION_MASK); } @@ -120,7 +117,7 @@ __curfile_leave(WT_CURSOR_BTREE *cbt) */ if (cbt->ref != NULL && cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD) - __wt_page_evict_soon(cbt->ref->page); + WT_TRET(__wt_page_evict_soon(session, cbt->ref)); cbt->page_deleted_count = 0; /* @@ -130,7 +127,7 @@ __curfile_leave(WT_CURSOR_BTREE *cbt) * * Clear the reference regardless, so we don't try the release twice. */ - ret = __wt_page_release(session, cbt->ref, 0); + WT_TRET(__wt_page_release(session, cbt->ref, 0)); cbt->ref = NULL; return (ret); diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index b0c0f6eccad..f3a639ac07f 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -44,7 +44,7 @@ extern void __wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el); extern int __wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_regionp, size_t *lengthp, void *mapped_cookiep); extern int __wt_block_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region, size_t length, void *mapped_cookie); extern int __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp); -extern int __wt_block_manager_drop(WT_SESSION_IMPL *session, const char *filename); +extern int __wt_block_manager_drop( WT_SESSION_IMPL *session, const char *filename, bool durable); extern int __wt_block_manager_create( WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize); extern void __wt_block_configure_first_fit(WT_BLOCK *block, bool on); extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BLOCK **blockp); @@ -76,8 +76,8 @@ extern int __wt_block_verify_addr(WT_SESSION_IMPL *session, WT_BLOCK *block, con extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len); extern int __wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size); extern int __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep); -extern int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum); -extern int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump, bool data_cksum, bool caller_locked); +extern int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum, bool checkpoint_io); +extern int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump, bool data_cksum, bool checkpoint_io, bool caller_locked); extern int __wt_bloom_create( WT_SESSION_IMPL *session, const char *uri, const char *config, uint64_t count, uint32_t factor, uint32_t k, WT_BLOOM **bloomp); extern int __wt_bloom_open(WT_SESSION_IMPL *session, const char *uri, uint32_t factor, uint32_t k, WT_CURSOR *owner, WT_BLOOM **bloomp); extern int __wt_bloom_insert(WT_BLOOM *bloom, WT_ITEM *key); @@ -139,7 +139,7 @@ extern void __wt_btree_evictable(WT_SESSION_IMPL *session, bool on); extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session); extern void __wt_btree_huffman_close(WT_SESSION_IMPL *session); extern int __wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size); -extern int __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool checkpoint, bool compressed); +extern int __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool checkpoint, bool checkpoint_io, bool compressed); extern const char *__wt_page_type_string(u_int type); extern const char *__wt_cell_type_string(uint8_t type); extern const char *__wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf); @@ -161,11 +161,11 @@ extern int __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPD extern int __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]); extern void __wt_split_stash_discard(WT_SESSION_IMPL *session); extern void __wt_split_stash_discard_all( WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session); -extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp); +extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing); extern int __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref); extern int __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing); extern int __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref); -extern int __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref); +extern int __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi); extern int __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst); extern int __wt_cache_op(WT_SESSION_IMPL *session, WT_CACHE_OP op); extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[]); @@ -282,7 +282,6 @@ extern int __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const extern int __wt_curds_open( WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_DATA_SOURCE *dsrc, WT_CURSOR **cursorp); extern int __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp); extern int __wt_curfile_update_check(WT_CURSOR *cursor); -extern int __wt_curfile_create(WT_SESSION_IMPL *session, WT_CURSOR *owner, const char *cfg[], bool bulk, bool bitmap, WT_CURSOR **cursorp); extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp); extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp); extern int __wt_curjoin_joined(WT_CURSOR *cursor); @@ -346,9 +345,11 @@ extern int __wt_evict_destroy(WT_SESSION_IMPL *session); extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session); extern void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session); extern int __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full); +extern int __wt_page_evict_soon(WT_SESSION_IMPL *session, WT_REF *ref); extern void __wt_evict_priority_set(WT_SESSION_IMPL *session, uint64_t v); extern void __wt_evict_priority_clear(WT_SESSION_IMPL *session); extern int __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile); +extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref); extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing); extern int __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn); extern int __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start); @@ -485,8 +486,7 @@ extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **va extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value); extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path); extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path); -extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name); -extern int __wt_rename_and_sync_directory( WT_SESSION_IMPL *session, const char *from, const char *to); +extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name, bool durable); extern int __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to); extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp); @@ -500,7 +500,7 @@ extern int __wt_errno(void); extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen); extern int __wt_ext_map_windows_error( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint32_t windows_error); extern bool __wt_handle_is_open(WT_SESSION_IMPL *session, const char *name); -extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp); +extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_FS_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp); extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp); extern int __wt_close_connection_close(WT_SESSION_IMPL *session); extern int __wt_os_inmemory(WT_SESSION_IMPL *session); @@ -585,7 +585,7 @@ extern WT_DATA_SOURCE *__wt_schema_get_source(WT_SESSION_IMPL *session, const ch extern int __wt_str_name_check(WT_SESSION_IMPL *session, const char *str); extern int __wt_name_check(WT_SESSION_IMPL *session, const char *str, size_t len); extern int __wt_schema_worker(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags); -extern int __wt_session_notsup(WT_SESSION *wt_session); +extern int __wt_session_notsup(WT_SESSION_IMPL *session); extern int __wt_session_reset_cursors(WT_SESSION_IMPL *session, bool free_buffers); extern int __wt_session_copy_values(WT_SESSION_IMPL *session); extern int __wt_session_release_resources(WT_SESSION_IMPL *session); @@ -719,7 +719,6 @@ extern void __wt_txn_stats_update(WT_SESSION_IMPL *session); extern void __wt_txn_destroy(WT_SESSION_IMPL *session); extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_global_destroy(WT_SESSION_IMPL *session); -extern int __wt_checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len); extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]); diff --git a/src/third_party/wiredtiger/src/include/flags.h b/src/third_party/wiredtiger/src/include/flags.h index f134af69d29..9346605ed24 100644 --- a/src/third_party/wiredtiger/src/include/flags.h +++ b/src/third_party/wiredtiger/src/include/flags.h @@ -2,6 +2,7 @@ * DO NOT EDIT: automatically built by dist/flags.py. * flags section: BEGIN */ +#define WT_CHECKPOINTING 0x00000001 #define WT_CONN_CACHE_POOL 0x00000001 #define WT_CONN_CKPT_SYNC 0x00000002 #define WT_CONN_CLOSING 0x00000004 @@ -21,10 +22,12 @@ #define WT_CONN_SERVER_STATISTICS 0x00010000 #define WT_CONN_SERVER_SWEEP 0x00020000 #define WT_CONN_WAS_BACKUP 0x00040000 -#define WT_EVICTING 0x00000001 -#define WT_EVICT_IN_MEMORY 0x00000002 -#define WT_EVICT_LOOKASIDE 0x00000004 -#define WT_EVICT_UPDATE_RESTORE 0x00000008 +#define WT_EVICTING 0x00000002 +#define WT_EVICT_INMEM_SPLIT 0x00000004 +#define WT_EVICT_IN_MEMORY 0x00000008 +#define WT_EVICT_LOOKASIDE 0x00000010 +#define WT_EVICT_SCRUB 0x00000020 +#define WT_EVICT_UPDATE_RESTORE 0x00000040 #define WT_LOGSCAN_FIRST 0x00000001 #define WT_LOGSCAN_FROM_CKP 0x00000002 #define WT_LOGSCAN_ONE 0x00000004 @@ -100,7 +103,7 @@ #define WT_VERB_VERIFY 0x00800000 #define WT_VERB_VERSION 0x01000000 #define WT_VERB_WRITE 0x02000000 -#define WT_VISIBILITY_ERR 0x00000010 +#define WT_VISIBILITY_ERR 0x00000080 /* * flags section: END * DO NOT EDIT: automatically built by dist/flags.py. diff --git a/src/third_party/wiredtiger/src/include/hardware.h b/src/third_party/wiredtiger/src/include/hardware.h index 93ed8a868b6..0e52818ae05 100644 --- a/src/third_party/wiredtiger/src/include/hardware.h +++ b/src/third_party/wiredtiger/src/include/hardware.h @@ -45,7 +45,16 @@ &(p)->flags_atomic, __orig, __orig & ~(uint8_t)(mask))); \ } while (0) -#define WT_CACHE_LINE_ALIGNMENT 64 /* Cache line alignment */ +/* + * Cache line alignment. + */ +#if defined(__PPC64__) || defined(PPC64) +#define WT_CACHE_LINE_ALIGNMENT 128 +#elif defined(__s390x__) +#define WT_CACHE_LINE_ALIGNMENT 256 +#else +#define WT_CACHE_LINE_ALIGNMENT 64 +#endif #define WT_CACHE_LINE_ALIGNMENT_VERIFY(session, a) \ WT_ASSERT(session, \ WT_PTRDIFF(&(a)[1], &(a)[0]) >= WT_CACHE_LINE_ALIGNMENT && \ diff --git a/src/third_party/wiredtiger/src/include/intpack.i b/src/third_party/wiredtiger/src/include/intpack.i index b27afd24e6c..e8bea58cede 100644 --- a/src/third_party/wiredtiger/src/include/intpack.i +++ b/src/third_party/wiredtiger/src/include/intpack.i @@ -59,7 +59,7 @@ /* Count the leading zero bytes. */ #if defined(__GNUC__) #define WT_LEADING_ZEROS(x, i) \ - (i = (x == 0) ? (int)sizeof (x) : __builtin_clzll(x) >> 3) + (i = (x == 0) ? (int)sizeof(x) : __builtin_clzll(x) >> 3) #elif defined(_MSC_VER) #define WT_LEADING_ZEROS(x, i) do { \ if (x == 0) i = (int)sizeof(x); \ @@ -89,7 +89,7 @@ __wt_vpack_posint(uint8_t **pp, size_t maxlen, uint64_t x) int len, lz, shift; WT_LEADING_ZEROS(x, lz); - len = (int)sizeof (x) - lz; + len = (int)sizeof(x) - lz; WT_SIZE_CHECK_PACK(len + 1, maxlen); p = *pp; @@ -114,7 +114,7 @@ __wt_vpack_negint(uint8_t **pp, size_t maxlen, uint64_t x) int len, lz, shift; WT_LEADING_ZEROS(~x, lz); - len = (int)sizeof (x) - lz; + len = (int)sizeof(x) - lz; WT_SIZE_CHECK_PACK(len + 1, maxlen); p = *pp; @@ -170,7 +170,7 @@ __wt_vunpack_negint(const uint8_t **pp, size_t maxlen, uint64_t *retp) /* There are four length bits in the first byte. */ p = *pp; - len = (int)sizeof (x) - (*p++ & 0xf); + len = (int)sizeof(x) - (*p++ & 0xf); WT_SIZE_CHECK_UNPACK(len + 1, maxlen); for (x = UINT64_MAX; len != 0; --len) diff --git a/src/third_party/wiredtiger/src/include/os_fhandle.i b/src/third_party/wiredtiger/src/include/os_fhandle.i index 313bf8eca3f..9bf5ce0e60b 100644 --- a/src/third_party/wiredtiger/src/include/os_fhandle.i +++ b/src/third_party/wiredtiger/src/include/os_fhandle.i @@ -26,7 +26,7 @@ __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) * There is no way to check when the non-blocking sync-file-range is * complete, but we track the time taken in the call for completeness. */ - WT_STAT_FAST_CONN_INCR_ATOMIC(session, fsync_active); + WT_STAT_FAST_CONN_INCR_ATOMIC(session, thread_fsync_active); WT_STAT_FAST_CONN_INCR(session, fsync_io); if (block) ret = (handle->fh_sync == NULL ? 0 : @@ -34,7 +34,7 @@ __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) else ret = (handle->fh_sync_nowait == NULL ? 0 : handle->fh_sync_nowait(handle, (WT_SESSION *)session)); - WT_STAT_FAST_CONN_DECR_ATOMIC(session, fsync_active); + WT_STAT_FAST_CONN_DECR_ATOMIC(session, thread_fsync_active); return (ret); } @@ -107,13 +107,13 @@ __wt_read( "%s: handle-read: %" WT_SIZET_FMT " at %" PRIuMAX, fh->handle->name, len, (uintmax_t)offset)); - WT_STAT_FAST_CONN_INCR_ATOMIC(session, read_active); + WT_STAT_FAST_CONN_INCR_ATOMIC(session, thread_read_active); WT_STAT_FAST_CONN_INCR(session, read_io); ret = fh->handle->fh_read( fh->handle, (WT_SESSION *)session, offset, len, buf); - WT_STAT_FAST_CONN_DECR_ATOMIC(session, read_active); + WT_STAT_FAST_CONN_DECR_ATOMIC(session, thread_read_active); return (ret); } @@ -165,12 +165,12 @@ __wt_write(WT_SESSION_IMPL *session, "%s: handle-write: %" WT_SIZET_FMT " at %" PRIuMAX, fh->handle->name, len, (uintmax_t)offset)); - WT_STAT_FAST_CONN_INCR_ATOMIC(session, write_active); + WT_STAT_FAST_CONN_INCR_ATOMIC(session, thread_write_active); WT_STAT_FAST_CONN_INCR(session, write_io); ret = fh->handle->fh_write( fh->handle, (WT_SESSION *)session, offset, len, buf); - WT_STAT_FAST_CONN_DECR_ATOMIC(session, write_active); + WT_STAT_FAST_CONN_DECR_ATOMIC(session, thread_write_active); return (ret); } diff --git a/src/third_party/wiredtiger/src/include/os_fs.i b/src/third_party/wiredtiger/src/include/os_fs.i index 88ee71d953a..a3a2fe29b65 100644 --- a/src/third_party/wiredtiger/src/include/os_fs.i +++ b/src/third_party/wiredtiger/src/include/os_fs.i @@ -8,7 +8,7 @@ /* * __wt_fs_directory_list -- - * Get a list of files from a directory. + * Return a list of files from a directory. */ static inline int __wt_fs_directory_list(WT_SESSION_IMPL *session, @@ -61,61 +61,6 @@ __wt_fs_directory_list_free( } /* - * __wt_fs_directory_sync -- - * Flush a directory to ensure file creation is durable. - */ -static inline int -__wt_fs_directory_sync(WT_SESSION_IMPL *session, const char *name) -{ - WT_DECL_RET; - WT_FILE_SYSTEM *file_system; - WT_SESSION *wt_session; - char *copy, *dir; - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - - WT_RET(__wt_verbose( - session, WT_VERB_FILEOPS, "%s: directory-sync", name)); - - /* - * POSIX 1003.1 does not require that fsync of a file handle ensures the - * entry in the directory containing the file has also reached disk (and - * there are historic Linux filesystems requiring it). If the underlying - * filesystem method is set, do an explicit fsync on a file descriptor - * for the directory to be sure. - * - * directory-sync is not a required call, no method means the call isn't - * needed. - */ - file_system = S2C(session)->file_system; - if (file_system->fs_directory_sync == NULL) - return (0); - - copy = NULL; - if (name == NULL || strchr(name, '/') == NULL) - name = S2C(session)->home; - else { - /* - * File name construction should not return a path without any - * slash separator, but caution isn't unreasonable. - */ - WT_RET(__wt_filename(session, name, ©)); - if ((dir = strrchr(copy, '/')) == NULL) - name = S2C(session)->home; - else { - dir[1] = '\0'; - name = copy; - } - } - - wt_session = (WT_SESSION *)session; - ret = file_system->fs_directory_sync(file_system, wt_session, name); - - __wt_free(session, copy); - return (ret); -} - -/* * __wt_fs_exist -- * Return if the file exists. */ @@ -141,10 +86,10 @@ __wt_fs_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) /* * __wt_fs_remove -- - * POSIX remove. + * Remove the file. */ static inline int -__wt_fs_remove(WT_SESSION_IMPL *session, const char *name) +__wt_fs_remove(WT_SESSION_IMPL *session, const char *name, bool durable) { WT_DECL_RET; WT_FILE_SYSTEM *file_system; @@ -169,7 +114,8 @@ __wt_fs_remove(WT_SESSION_IMPL *session, const char *name) file_system = S2C(session)->file_system; wt_session = (WT_SESSION *)session; - ret = file_system->fs_remove(file_system, wt_session, path); + ret = file_system->fs_remove( + file_system, wt_session, path, durable ? WT_FS_DURABLE : 0); __wt_free(session, path); return (ret); @@ -177,10 +123,11 @@ __wt_fs_remove(WT_SESSION_IMPL *session, const char *name) /* * __wt_fs_rename -- - * POSIX rename. + * Rename the file. */ static inline int -__wt_fs_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +__wt_fs_rename( + WT_SESSION_IMPL *session, const char *from, const char *to, bool durable) { WT_DECL_RET; WT_FILE_SYSTEM *file_system; @@ -211,8 +158,8 @@ __wt_fs_rename(WT_SESSION_IMPL *session, const char *from, const char *to) file_system = S2C(session)->file_system; wt_session = (WT_SESSION *)session; - ret = file_system->fs_rename( - file_system, wt_session, from_path, to_path); + ret = file_system->fs_rename(file_system, + wt_session, from_path, to_path, durable ? WT_FS_DURABLE : 0); err: __wt_free(session, from_path); __wt_free(session, to_path); @@ -221,7 +168,7 @@ err: __wt_free(session, from_path); /* * __wt_fs_size -- - * Get the size of a file in bytes, by file name. + * Return the size of a file in bytes, by file name. */ static inline int __wt_fs_size(WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep) diff --git a/src/third_party/wiredtiger/src/include/os_fstream.i b/src/third_party/wiredtiger/src/include/os_fstream.i index 8c0fdadbdb0..92274431011 100644 --- a/src/third_party/wiredtiger/src/include/os_fstream.i +++ b/src/third_party/wiredtiger/src/include/os_fstream.i @@ -93,5 +93,5 @@ __wt_sync_and_rename(WT_SESSION_IMPL *session, WT_TRET(__wt_fclose(session, &fstr)); WT_RET(ret); - return (__wt_rename_and_sync_directory(session, from, to)); + return (__wt_fs_rename(session, from, to, true)); } diff --git a/src/third_party/wiredtiger/src/include/queue.h b/src/third_party/wiredtiger/src/include/queue.h index 1d494875cf6..e3d4daf0f4c 100644 --- a/src/third_party/wiredtiger/src/include/queue.h +++ b/src/third_party/wiredtiger/src/include/queue.h @@ -1,4 +1,4 @@ -/* +/*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * @@ -27,28 +27,18 @@ * SUCH DAMAGE. * * @(#)queue.h 8.5 (Berkeley) 8/20/94 - * $FreeBSD: src/sys/sys/queue.h,v 1.54 2002/08/05 05:18:43 alfred Exp $ + * $FreeBSD$ */ -#ifndef _DB_QUEUE_H_ -#define _DB_QUEUE_H_ - -#if defined(__cplusplus) -extern "C" { -#endif - /* + * This is a stripped-down version of the FreeBSD sys/queue.h include file. + * * WiredTiger only uses the TAILQ macros (we've gotten into trouble in the past * by trying to use simpler queues and subsequently discovering a list we didn't * think would ever get to be large could, under some workloads, become large, * and the linear performance for removal of elements from simpler macros proved * to be more trouble than the memory savings were worth. * - * Additionally, we've altered the TAILQ_INSERT_XXX functions to include a write - * barrier, in order to ensure we never insert a partially built structure onto - * a list (this is required because the spinlocks we use don't necessarily imply - * a write barrier). - * * We #undef all of the macros because there are incompatible versions of this * file and these macros on various systems. What makes the problem worse is * they are included and/or defined by system include files which we may have @@ -57,13 +47,28 @@ extern "C" { * several of the LIST_XXX macros. Visual C.NET 7.0 also defines some of these * same macros in Vc7\PlatformSDK\Include\WinNT.h. Make sure we use ours. */ - +#undef QMD_SAVELINK +#undef QMD_TAILQ_CHECK_HEAD +#undef QMD_TAILQ_CHECK_NEXT +#undef QMD_TAILQ_CHECK_PREV +#undef QMD_TAILQ_CHECK_TAIL +#undef QMD_TRACE_ELEM +#undef QMD_TRACE_HEAD +#undef QUEUE_TYPEOF +#undef TAILQ_CLASS_ENTRY +#undef TAILQ_CLASS_HEAD #undef TAILQ_CONCAT #undef TAILQ_EMPTY #undef TAILQ_ENTRY #undef TAILQ_FIRST #undef TAILQ_FOREACH +#undef TAILQ_FOREACH_FROM +#undef TAILQ_FOREACH_FROM_SAFE #undef TAILQ_FOREACH_REVERSE +#undef TAILQ_FOREACH_REVERSE_FROM +#undef TAILQ_FOREACH_REVERSE_FROM_SAFE +#undef TAILQ_FOREACH_REVERSE_SAFE +#undef TAILQ_FOREACH_SAFE #undef TAILQ_HEAD #undef TAILQ_HEAD_INITIALIZER #undef TAILQ_INIT @@ -76,41 +81,25 @@ extern "C" { #undef TAILQ_PREV #undef TAILQ_REMOVE #undef TRACEBUF +#undef TRACEBUF_INITIALIZER #undef TRASHIT +#undef TAILQ_SWAP -#define QUEUE_MACRO_DEBUG 0 -#if QUEUE_MACRO_DEBUG -/* Store the last 2 places the queue element or head was altered */ -struct qm_trace { - char * lastfile; - int lastline; - char * prevfile; - int prevline; -}; - -#define TRACEBUF struct qm_trace trace; -#define TRASHIT(x) do {(x) = (void *)-1;} while (0) - -#define QMD_TRACE_HEAD(head) do { \ - (head)->trace.prevline = (head)->trace.lastline; \ - (head)->trace.prevfile = (head)->trace.lastfile; \ - (head)->trace.lastline = __LINE__; \ - (head)->trace.lastfile = __FILE__; \ -} while (0) - -#define QMD_TRACE_ELEM(elem) do { \ - (elem)->trace.prevline = (elem)->trace.lastline; \ - (elem)->trace.prevfile = (elem)->trace.lastfile; \ - (elem)->trace.lastline = __LINE__; \ - (elem)->trace.lastfile = __FILE__; \ -} while (0) - -#else #define QMD_TRACE_ELEM(elem) #define QMD_TRACE_HEAD(head) +#define QMD_SAVELINK(name, link) #define TRACEBUF +#define TRACEBUF_INITIALIZER #define TRASHIT(x) -#endif /* QUEUE_MACRO_DEBUG */ + +#ifdef __cplusplus +/* + * In C++ there can be structure lists and class lists: + */ +#define QUEUE_TYPEOF(type) type +#else +#define QUEUE_TYPEOF(type) struct type +#endif /* * Tail queue declarations. @@ -122,8 +111,15 @@ struct name { \ TRACEBUF \ } +#define TAILQ_CLASS_HEAD(name, type) \ +struct name { \ + class type *tqh_first; /* first element */ \ + class type **tqh_last; /* addr of last next element */ \ + TRACEBUF \ +} + #define TAILQ_HEAD_INITIALIZER(head) \ - { NULL, &(head).tqh_first } + { NULL, &(head).tqh_first, TRACEBUF_INITIALIZER } #define TAILQ_ENTRY(type) \ struct { \ @@ -132,16 +128,28 @@ struct { \ TRACEBUF \ } +#define TAILQ_CLASS_ENTRY(type) \ +struct { \ + class type *tqe_next; /* next element */ \ + class type **tqe_prev; /* address of previous next element */ \ + TRACEBUF \ +} + /* * Tail queue functions. */ +#define QMD_TAILQ_CHECK_HEAD(head, field) +#define QMD_TAILQ_CHECK_TAIL(head, headname) +#define QMD_TAILQ_CHECK_NEXT(elm, field) +#define QMD_TAILQ_CHECK_PREV(elm, field) + #define TAILQ_CONCAT(head1, head2, field) do { \ if (!TAILQ_EMPTY(head2)) { \ *(head1)->tqh_last = (head2)->tqh_first; \ (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \ (head1)->tqh_last = (head2)->tqh_last; \ TAILQ_INIT((head2)); \ - QMD_TRACE_HEAD(head); \ + QMD_TRACE_HEAD(head1); \ QMD_TRACE_HEAD(head2); \ } \ } while (0) @@ -155,11 +163,41 @@ struct { \ (var); \ (var) = TAILQ_NEXT((var), field)) +#define TAILQ_FOREACH_FROM(var, head, field) \ + for ((var) = ((var) ? (var) : TAILQ_FIRST((head))); \ + (var); \ + (var) = TAILQ_NEXT((var), field)) + +#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = TAILQ_FIRST((head)); \ + (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define TAILQ_FOREACH_FROM_SAFE(var, head, field, tvar) \ + for ((var) = ((var) ? (var) : TAILQ_FIRST((head))); \ + (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + #define TAILQ_FOREACH_REVERSE(var, head, headname, field) \ for ((var) = TAILQ_LAST((head), headname); \ (var); \ (var) = TAILQ_PREV((var), headname, field)) +#define TAILQ_FOREACH_REVERSE_FROM(var, head, headname, field) \ + for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname)); \ + (var); \ + (var) = TAILQ_PREV((var), headname, field)) + +#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \ + for ((var) = TAILQ_LAST((head), headname); \ + (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \ + (var) = (tvar)) + +#define TAILQ_FOREACH_REVERSE_FROM_SAFE(var, head, headname, field, tvar) \ + for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname)); \ + (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \ + (var) = (tvar)) + #define TAILQ_INIT(head) do { \ TAILQ_FIRST((head)) = NULL; \ (head)->tqh_last = &TAILQ_FIRST((head)); \ @@ -167,9 +205,9 @@ struct { \ } while (0) #define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ - WT_WRITE_BARRIER(); \ + QMD_TAILQ_CHECK_NEXT(listelm, field); \ if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\ - TAILQ_NEXT((elm), field)->field.tqe_prev = \ + TAILQ_NEXT((elm), field)->field.tqe_prev = \ &TAILQ_NEXT((elm), field); \ else { \ (head)->tqh_last = &TAILQ_NEXT((elm), field); \ @@ -178,21 +216,21 @@ struct { \ TAILQ_NEXT((listelm), field) = (elm); \ (elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field); \ QMD_TRACE_ELEM(&(elm)->field); \ - QMD_TRACE_ELEM(&listelm->field); \ + QMD_TRACE_ELEM(&(listelm)->field); \ } while (0) #define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ - WT_WRITE_BARRIER(); \ + QMD_TAILQ_CHECK_PREV(listelm, field); \ (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ TAILQ_NEXT((elm), field) = (listelm); \ *(listelm)->field.tqe_prev = (elm); \ (listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field); \ QMD_TRACE_ELEM(&(elm)->field); \ - QMD_TRACE_ELEM(&listelm->field); \ + QMD_TRACE_ELEM(&(listelm)->field); \ } while (0) #define TAILQ_INSERT_HEAD(head, elm, field) do { \ - WT_WRITE_BARRIER(); \ + QMD_TAILQ_CHECK_HEAD(head, field); \ if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != NULL) \ TAILQ_FIRST((head))->field.tqe_prev = \ &TAILQ_NEXT((elm), field); \ @@ -205,7 +243,7 @@ struct { \ } while (0) #define TAILQ_INSERT_TAIL(head, elm, field) do { \ - WT_WRITE_BARRIER(); \ + QMD_TAILQ_CHECK_TAIL(head, field); \ TAILQ_NEXT((elm), field) = NULL; \ (elm)->field.tqe_prev = (head)->tqh_last; \ *(head)->tqh_last = (elm); \ @@ -223,20 +261,36 @@ struct { \ (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) #define TAILQ_REMOVE(head, elm, field) do { \ + QMD_SAVELINK(oldnext, (elm)->field.tqe_next); \ + QMD_SAVELINK(oldprev, (elm)->field.tqe_prev); \ + QMD_TAILQ_CHECK_NEXT(elm, field); \ + QMD_TAILQ_CHECK_PREV(elm, field); \ if ((TAILQ_NEXT((elm), field)) != NULL) \ - TAILQ_NEXT((elm), field)->field.tqe_prev = \ + TAILQ_NEXT((elm), field)->field.tqe_prev = \ (elm)->field.tqe_prev; \ else { \ (head)->tqh_last = (elm)->field.tqe_prev; \ QMD_TRACE_HEAD(head); \ } \ *(elm)->field.tqe_prev = TAILQ_NEXT((elm), field); \ - TRASHIT((elm)->field.tqe_next); \ - TRASHIT((elm)->field.tqe_prev); \ + TRASHIT(*oldnext); \ + TRASHIT(*oldprev); \ QMD_TRACE_ELEM(&(elm)->field); \ } while (0) -#if defined(__cplusplus) -} -#endif -#endif /* !_DB_QUEUE_H_ */ +#define TAILQ_SWAP(head1, head2, type, field) do { \ + QUEUE_TYPEOF(type) *swap_first = (head1)->tqh_first; \ + QUEUE_TYPEOF(type) **swap_last = (head1)->tqh_last; \ + (head1)->tqh_first = (head2)->tqh_first; \ + (head1)->tqh_last = (head2)->tqh_last; \ + (head2)->tqh_first = swap_first; \ + (head2)->tqh_last = swap_last; \ + if ((swap_first = (head1)->tqh_first) != NULL) \ + swap_first->field.tqe_prev = &(head1)->tqh_first; \ + else \ + (head1)->tqh_last = &(head1)->tqh_first; \ + if ((swap_first = (head2)->tqh_first) != NULL) \ + swap_first->field.tqe_prev = &(head2)->tqh_first; \ + else \ + (head2)->tqh_last = &(head2)->tqh_first; \ +} while (0) diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index 57126af8aa4..1df24382236 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -145,14 +145,14 @@ __wt_stats_clear(void *stats_arg, int slot) #define WT_STAT_DECRV(session, stats, fld, value) \ (stats)[WT_STATS_SLOT_ID(session)]->fld -= (int64_t)(value) #define WT_STAT_DECRV_ATOMIC(session, stats, fld, value) \ - __wt_atomic_addi64( \ + __wt_atomic_subi64( \ &(stats)[WT_STATS_SLOT_ID(session)]->fld, (int64_t)(value)) #define WT_STAT_DECR(session, stats, fld) \ WT_STAT_DECRV(session, stats, fld, 1) #define WT_STAT_INCRV(session, stats, fld, value) \ (stats)[WT_STATS_SLOT_ID(session)]->fld += (int64_t)(value) #define WT_STAT_INCRV_ATOMIC(session, stats, fld, value) \ - __wt_atomic_subi64( \ + __wt_atomic_addi64( \ &(stats)[WT_STATS_SLOT_ID(session)]->fld, (int64_t)(value)) #define WT_STAT_INCR(session, stats, fld) \ WT_STAT_INCRV(session, stats, fld, 1) @@ -273,9 +273,12 @@ struct __wt_connection_stats { int64_t block_write; int64_t block_byte_read; int64_t block_byte_write; + int64_t block_byte_write_checkpoint; int64_t block_map_read; int64_t block_byte_map_read; + int64_t cache_bytes_image; int64_t cache_bytes_inuse; + int64_t cache_bytes_other; int64_t cache_bytes_read; int64_t cache_bytes_write; int64_t cache_eviction_checkpoint; @@ -309,6 +312,8 @@ struct __wt_connection_stats { int64_t cache_eviction_maximum_page_size; int64_t cache_eviction_dirty; int64_t cache_eviction_app_dirty; + int64_t cache_read_overflow; + int64_t cache_overflow_value; int64_t cache_eviction_deepen; int64_t cache_write_lookaside; int64_t cache_pages_inuse; @@ -316,6 +321,7 @@ struct __wt_connection_stats { int64_t cache_eviction_force_delete; int64_t cache_eviction_app; int64_t cache_eviction_pages_queued; + int64_t cache_eviction_pages_queued_urgent; int64_t cache_eviction_pages_queued_oldest; int64_t cache_read; int64_t cache_read_lookaside; @@ -328,7 +334,6 @@ struct __wt_connection_stats { int64_t cache_overhead; int64_t cache_bytes_internal; int64_t cache_bytes_leaf; - int64_t cache_bytes_overflow; int64_t cache_bytes_dirty; int64_t cache_pages_dirty; int64_t cache_eviction_clean; @@ -408,9 +413,25 @@ struct __wt_connection_stats { int64_t rec_split_stashed_objects; int64_t session_cursor_open; int64_t session_open; - int64_t fsync_active; - int64_t read_active; - int64_t write_active; + int64_t session_table_compact_fail; + int64_t session_table_compact_success; + int64_t session_table_create_fail; + int64_t session_table_create_success; + int64_t session_table_drop_fail; + int64_t session_table_drop_success; + int64_t session_table_rebalance_fail; + int64_t session_table_rebalance_success; + int64_t session_table_rename_fail; + int64_t session_table_rename_success; + int64_t session_table_salvage_fail; + int64_t session_table_salvage_success; + int64_t session_table_truncate_fail; + int64_t session_table_truncate_success; + int64_t session_table_verify_fail; + int64_t session_table_verify_success; + int64_t thread_fsync_active; + int64_t thread_read_active; + int64_t thread_write_active; int64_t page_busy_blocked; int64_t page_forcible_evict_blocked; int64_t page_locked_blocked; @@ -424,13 +445,13 @@ struct __wt_connection_stats { int64_t txn_checkpoint_time_max; int64_t txn_checkpoint_time_min; int64_t txn_checkpoint_time_recent; + int64_t txn_checkpoint_scrub_target; + int64_t txn_checkpoint_scrub_time; int64_t txn_checkpoint_time_total; int64_t txn_checkpoint; int64_t txn_fail_cache; int64_t txn_checkpoint_fsync_post; - int64_t txn_checkpoint_fsync_pre; int64_t txn_checkpoint_fsync_post_duration; - int64_t txn_checkpoint_fsync_pre_duration; int64_t txn_pinned_range; int64_t txn_pinned_checkpoint_range; int64_t txn_pinned_snapshot_range; @@ -484,6 +505,7 @@ struct __wt_dsrc_stats { int64_t btree_compact_rewrite; int64_t btree_row_internal; int64_t btree_row_leaf; + int64_t cache_bytes_inuse; int64_t cache_bytes_read; int64_t cache_bytes_write; int64_t cache_eviction_checkpoint; diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h index d10738cc670..2e41ae8620d 100644 --- a/src/third_party/wiredtiger/src/include/txn.h +++ b/src/third_party/wiredtiger/src/include/txn.h @@ -98,6 +98,7 @@ struct __wt_txn_global { volatile uint32_t checkpoint_id; /* Checkpoint's session ID */ volatile uint64_t checkpoint_gen; volatile uint64_t checkpoint_pinned; + volatile uint64_t checkpoint_txnid; /* Checkpoint's txn ID */ /* Named snapshot state. */ WT_RWLOCK *nsnap_rwlock; diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index 96f7426e421..8f0f49d9676 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -105,7 +105,8 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session) { WT_BTREE *btree; WT_TXN_GLOBAL *txn_global; - uint64_t checkpoint_gen, checkpoint_pinned, oldest_id; + uint64_t checkpoint_pinned, oldest_id; + bool include_checkpoint_txn; txn_global = &S2C(session)->txn_global; btree = S2BT_SAFE(session); @@ -117,7 +118,11 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session) * we take the minimum of the other two IDs, which is what we want. */ oldest_id = txn_global->oldest_id; - WT_ORDERED_READ(checkpoint_gen, txn_global->checkpoint_gen); + if (btree == NULL) + include_checkpoint_txn = false; + else + WT_ORDERED_READ( + include_checkpoint_txn, btree->include_checkpoint_txn); checkpoint_pinned = txn_global->checkpoint_pinned; /* @@ -130,10 +135,9 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session) * checkpoint, or this handle is up to date with the active checkpoint * then it's safe to ignore the checkpoint ID in the visibility check. */ - if (checkpoint_pinned == WT_TXN_NONE || + if (!include_checkpoint_txn || checkpoint_pinned == WT_TXN_NONE || WT_TXNID_LT(oldest_id, checkpoint_pinned) || - WT_SESSION_IS_CHECKPOINT(session) || - (btree != NULL && btree->checkpoint_gen == checkpoint_gen)) + WT_SESSION_IS_CHECKPOINT(session)) return (oldest_id); return (checkpoint_pinned); diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index f578f4e6c08..0e022048835 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -131,13 +131,13 @@ struct __wt_item { * The maximum packed size of a 64-bit integer. The ::wiredtiger_struct_pack * function will pack single long integers into at most this many bytes. */ -#define WT_INTPACK64_MAXSIZE ((int)sizeof (int64_t) + 1) +#define WT_INTPACK64_MAXSIZE ((int)sizeof(int64_t) + 1) /*! * The maximum packed size of a 32-bit integer. The ::wiredtiger_struct_pack * function will pack single integers into at most this many bytes. */ -#define WT_INTPACK32_MAXSIZE ((int)sizeof (int32_t) + 1) +#define WT_INTPACK32_MAXSIZE ((int)sizeof(int32_t) + 1) /*! * A WT_CURSOR handle is the interface to a cursor. @@ -405,6 +405,12 @@ struct __wt_cursor { * WT_CURSOR::next (WT_CURSOR::prev) method will iterate from the * beginning (end) of the table. * + * If the cursor does not have record number keys or was not configured + * with "append=true", the cursor ends with no key set and a subsequent + * call to the WT_CURSOR::get_key method will fail. The cursor ends with + * no value set and a subsequent call to the WT_CURSOR::get_value method + * will fail. + * * Inserting a new record after the current maximum record in a * fixed-length bit field column-store (that is, a store with an * 'r' type key and 't' type value) may implicitly create the missing @@ -1159,8 +1165,8 @@ struct __wt_session { * @config{ ),,} * @config{memory_page_max, the maximum size a page can grow to in * memory before being reconciled to disk. The specified size will be - * adjusted to a lower bound of <code>50 * leaf_page_max</code>\, and an - * upper bound of <code>cache_size / 2</code>. This limit is soft - it + * adjusted to a lower bound of <code>leaf_page_max</code>\, and an + * upper bound of <code>cache_size / 10</code>. This limit is soft - it * is possible for pages to be temporarily larger than this value. This * setting is ignored for LSM trees\, see \c chunk_size., an integer * between 512B and 10TB; default \c 5MB.} @@ -1783,8 +1789,6 @@ struct __wt_connection { * database can configure both log_size and wait to set an upper bound * for checkpoints; setting this value above 0 configures periodic * checkpoints., an integer between 0 and 2GB; default \c 0.} - * @config{ name, the checkpoint name., a string; - * default \c "WiredTigerCheckpoint".} * @config{ wait, seconds to wait between each * checkpoint; setting this value above 0 configures periodic * checkpoints., an integer between 0 and 100000; default \c 0.} @@ -1806,11 +1810,11 @@ struct __wt_connection { * @config{eviction_dirty_target, continue evicting until the cache has * less dirty memory than the value\, as a percentage of the total cache * size. Dirty pages will only be evicted if the cache is full enough - * to trigger eviction., an integer between 5 and 99; default \c 80.} + * to trigger eviction., an integer between 1 and 99; default \c 5.} * @config{eviction_dirty_trigger, trigger eviction when the cache is * using this much memory for dirty content\, as a percentage of the * total cache size. This setting only alters behavior if it is lower - * than eviction_trigger., an integer between 5 and 99; default \c 95.} + * than eviction_trigger., an integer between 1 and 99; default \c 20.} * @config{eviction_target, continue evicting until the cache has less * total memory than the value\, as a percentage of the total cache * size. Must be less than \c eviction_trigger., an integer between 10 @@ -1836,25 +1840,8 @@ struct __wt_connection { * configuration options defined below.} * @config{ archive, automatically archive * unneeded log files., a boolean flag; default \c true.} - * @config{ compressor, configure a compressor - * for log records. Permitted values are \c "none" or custom - * compression engine name created with WT_CONNECTION::add_compressor. - * If WiredTiger has builtin support for \c "snappy"\, \c "lz4" or \c - * "zlib" compression\, these names are also available. See @ref - * compression for more information., a string; default \c none.} - * @config{ enabled, enable logging subsystem., a - * boolean flag; default \c false.} - * @config{ file_max, the maximum size of log - * files., an integer between 100KB and 2GB; default \c 100MB.} - * @config{ path, the path to a directory into - * which the log files are written. If the value is not an absolute - * path name\, the files are created relative to the database home., a - * string; default \c ".".} - * @config{ prealloc, - * pre-allocate log files., a boolean flag; default \c true.} - * @config{ recover, run recovery or error if - * recovery needs to run after an unclean shutdown., a string\, chosen - * from the following options: \c "error"\, \c "on"; default \c on.} + * @config{ prealloc, pre-allocate log files., a + * boolean flag; default \c true.} * @config{ zero_fill, manually write zeroes into * log files., a boolean flag; default \c false.} * @config{ ),,} @@ -1914,11 +1901,6 @@ struct __wt_connection { * statistics in JSON format., a boolean flag; default \c false.} * @config{ on_close, log statistics on database * close., a boolean flag; default \c false.} - * @config{ path, the pathname to a file into - * which the log records are written\, may contain ISO C standard - * strftime conversion specifications. If the value is not an absolute - * path name\, the file is created relative to the database home., a - * string; default \c "WiredTigerStat.%d.%H".} * @config{ sources, if non-empty\, include * statistics for the list of data source URIs\, if they are open at the * time of the statistics logging. The list may include URIs matching a @@ -2223,11 +2205,10 @@ struct __wt_connection { * configure both log_size and wait to set an upper bound for checkpoints; * setting this value above 0 configures periodic checkpoints., an integer * between 0 and 2GB; default \c 0.} - * @config{ name, the - * checkpoint name., a string; default \c "WiredTigerCheckpoint".} - * @config{ wait, seconds to wait between each - * checkpoint; setting this value above 0 configures periodic checkpoints., an - * integer between 0 and 100000; default \c 0.} + * @config{ wait, + * seconds to wait between each checkpoint; setting this value above 0 + * configures periodic checkpoints., an integer between 0 and 100000; default \c + * 0.} * @config{ ),,} * @config{checkpoint_sync, flush files to stable storage when closing or * writing checkpoints., a boolean flag; default \c true.} @@ -2288,11 +2269,11 @@ struct __wt_connection { * @config{eviction_dirty_target, continue evicting until the cache has less * dirty memory than the value\, as a percentage of the total cache size. Dirty * pages will only be evicted if the cache is full enough to trigger eviction., - * an integer between 5 and 99; default \c 80.} + * an integer between 1 and 99; default \c 5.} * @config{eviction_dirty_trigger, trigger eviction when the cache is using this * much memory for dirty content\, as a percentage of the total cache size. * This setting only alters behavior if it is lower than eviction_trigger., an - * integer between 5 and 99; default \c 95.} + * integer between 1 and 99; default \c 20.} * @config{eviction_target, continue evicting until the cache has less total * memory than the value\, as a percentage of the total cache size. Must be * less than \c eviction_trigger., an integer between 10 and 99; default \c 80.} @@ -2343,9 +2324,10 @@ struct __wt_connection { * subsystem., a boolean flag; default \c false.} * @config{ file_max, the maximum size of log files., an * integer between 100KB and 2GB; default \c 100MB.} - * @config{ path, the path to a directory into which the - * log files are written. If the value is not an absolute path name\, the files - * are created relative to the database home., a string; default \c ".".} + * @config{ path, the name of a directory into which log + * files are written. The directory must already exist. If the value is not an + * absolute path\, the path is relative to the database home (see @ref + * absolute_path for more information)., a string; default \c ".".} * @config{ prealloc, pre-allocate log files., a boolean * flag; default \c true.} * @config{ recover, run recovery @@ -2415,16 +2397,15 @@ struct __wt_connection { * boolean flag; default \c false.} * @config{ on_close, * log statistics on database close., a boolean flag; default \c false.} - * @config{ path, the pathname to a file into which the - * log records are written\, may contain ISO C standard strftime conversion - * specifications. If the value is not an absolute path name\, the file is - * created relative to the database home., a string; default \c - * "WiredTigerStat.%d.%H".} - * @config{ sources, if - * non-empty\, include statistics for the list of data source URIs\, if they are - * open at the time of the statistics logging. The list may include URIs - * matching a single data source ("table:mytable")\, or a URI matching all data - * sources of a particular type ("table:")., a list of strings; default empty.} + * @config{ path, the name of a directory into which + * statistics files are written. The directory must already exist. If the + * value is not an absolute path\, the path is relative to the database home + * (see @ref absolute_path for more information)., a string; default \c ".".} + * @config{ sources, if non-empty\, include statistics + * for the list of data source URIs\, if they are open at the time of the + * statistics logging. The list may include URIs matching a single data source + * ("table:mytable")\, or a URI matching all data sources of a particular type + * ("table:")., a list of strings; default empty.} * @config{ timestamp, a timestamp prepended to each log * record\, may contain strftime conversion specifications\, when \c json is * configured\, defaults to \c "%FT%Y.000Z"., a string; default \c "%b %d @@ -3701,24 +3682,34 @@ struct __wt_extractor { #if !defined(SWIG) /*! WT_FILE_SYSTEM::open_file file types */ typedef enum { - WT_OPEN_FILE_TYPE_CHECKPOINT, /*!< open a data file checkpoint */ - WT_OPEN_FILE_TYPE_DATA, /*!< open a data file */ - WT_OPEN_FILE_TYPE_DIRECTORY, /*!< open a directory */ - WT_OPEN_FILE_TYPE_LOG, /*!< open a log file */ - WT_OPEN_FILE_TYPE_REGULAR /*!< open a regular file */ -} WT_OPEN_FILE_TYPE; + WT_FS_OPEN_FILE_TYPE_CHECKPOINT,/*!< open a data file checkpoint */ + WT_FS_OPEN_FILE_TYPE_DATA, /*!< open a data file */ + WT_FS_OPEN_FILE_TYPE_DIRECTORY, /*!< open a directory */ + WT_FS_OPEN_FILE_TYPE_LOG, /*!< open a log file */ + WT_FS_OPEN_FILE_TYPE_REGULAR /*!< open a regular file */ +} WT_FS_OPEN_FILE_TYPE; /*! WT_FILE_SYSTEM::open_file flags: create if does not exist */ -#define WT_OPEN_CREATE 0x001 +#define WT_FS_OPEN_CREATE 0x001 /*! WT_FILE_SYSTEM::open_file flags: direct I/O requested */ -#define WT_OPEN_DIRECTIO 0x002 -/*! WT_FILE_SYSTEM::open_file flags: error if exclusive use not available */ -#define WT_OPEN_EXCLUSIVE 0x004 +#define WT_FS_OPEN_DIRECTIO 0x002 +/*! WT_FILE_SYSTEM::open_file flags: file creation must be durable */ +#define WT_FS_OPEN_DURABLE 0x004 +/*! + * WT_FILE_SYSTEM::open_file flags: return EBUSY if exclusive use not available + */ +#define WT_FS_OPEN_EXCLUSIVE 0x008 #ifndef DOXYGEN -#define WT_OPEN_FIXED 0x008 /* Path not home relative (internal) */ +#define WT_FS_OPEN_FIXED 0x010 /* Path not home relative (internal) */ #endif /*! WT_FILE_SYSTEM::open_file flags: open is read-only */ -#define WT_OPEN_READONLY 0x010 +#define WT_FS_OPEN_READONLY 0x020 + +/*! + * WT_FILE_SYSTEM::remove or WT_FILE_SYSTEM::rename flags: the remove or rename + * operation must be durable + */ +#define WT_FS_DURABLE 0x001 /*! * The interface implemented by applications to provide a custom file system @@ -3748,7 +3739,7 @@ struct __wt_file_system { * @param[out] dirlist the method returns an allocated array of * individually allocated strings, one for each entry in the * directory. - * @param[out] countp the method the number of entries returned + * @param[out] countp the number of entries returned */ int (*fs_directory_list)(WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *directory, const char *prefix, @@ -3768,23 +3759,6 @@ struct __wt_file_system { WT_SESSION *session, char **dirlist, uint32_t count); /*! - * Flush the named directory. - * - * This method is not required for readonly file systems or file systems - * where it is not necessary to flush a file's directory to ensure the - * durability of file system operations, and should be set to NULL when - * not required by the file system. - * - * @errors - * - * @param file_system the WT_FILE_SYSTEM - * @param session the current WiredTiger session - * @param directory the name of the directory - */ - int (*fs_directory_sync)(WT_FILE_SYSTEM *file_system, - WT_SESSION *session, const char *directory); - - /*! * Return if the named file system object exists. * * @errors @@ -3800,6 +3774,16 @@ struct __wt_file_system { /*! * Open a handle for a named file system object * + * The method should return ENOENT if the file is not being created and + * does not exist. + * + * The method should return EACCES if the file cannot be opened in the + * requested mode (for example, a file opened for writing in a readonly + * file system). + * + * The method should return EBUSY if ::WT_FS_OPEN_EXCLUSIVE is set and + * the file is in use. + * * @errors * * @param file_system the WT_FILE_SYSTEM @@ -3809,8 +3793,8 @@ struct __wt_file_system { * The file type is provided to allow optimization for different file * access patterns. * @param flags flags indicating how to open the file, one or more of - * ::WT_OPEN_CREATE, ::WT_OPEN_DIRECTIO, ::WT_OPEN_EXCLUSIVE or - * ::WT_OPEN_READONLY. + * ::WT_FS_OPEN_CREATE, ::WT_FS_OPEN_DIRECTIO, ::WT_FS_OPEN_DURABLE, + * ::WT_FS_OPEN_EXCLUSIVE or ::WT_FS_OPEN_READONLY. * @param[out] file_handlep the handle to the newly opened file. File * system implementations must allocate memory for the handle and * the WT_FILE_HANDLE::name field, and fill in the WT_FILE_HANDLE:: @@ -3819,7 +3803,7 @@ struct __wt_file_system { * their own structure as a superset of a WT_FILE_HANDLE:: structure. */ int (*fs_open_file)(WT_FILE_SYSTEM *file_system, WT_SESSION *session, - const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags, + const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, WT_FILE_HANDLE **file_handlep); /*! @@ -3833,9 +3817,11 @@ struct __wt_file_system { * @param file_system the WT_FILE_SYSTEM * @param session the current WiredTiger session * @param name the name of the file system object + * @param durable if the operation requires durability + * @param flags 0 or ::WT_FS_DURABLE */ - int (*fs_remove)( - WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name); + int (*fs_remove)(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *name, uint32_t flags); /*! * Rename a named file system object @@ -3849,9 +3835,10 @@ struct __wt_file_system { * @param session the current WiredTiger session * @param from the original name of the object * @param to the new name for the object + * @param flags 0 or ::WT_FS_DURABLE */ - int (*fs_rename)(WT_FILE_SYSTEM *file_system, - WT_SESSION *session, const char *from, const char *to); + int (*fs_rename)(WT_FILE_SYSTEM *file_system, WT_SESSION *session, + const char *from, const char *to, uint32_t flags); /*! * Return the size of a named file system object @@ -3981,7 +3968,7 @@ struct __wt_file_handle { /*! * Lock/unlock a file from the perspective of other processes running - * in the system. + * in the system, where necessary. * * @errors * @@ -4256,340 +4243,380 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_BLOCK_BYTE_READ 1026 /*! block-manager: bytes written */ #define WT_STAT_CONN_BLOCK_BYTE_WRITE 1027 +/*! block-manager: bytes written for checkpoint */ +#define WT_STAT_CONN_BLOCK_BYTE_WRITE_CHECKPOINT 1028 /*! block-manager: mapped blocks read */ -#define WT_STAT_CONN_BLOCK_MAP_READ 1028 +#define WT_STAT_CONN_BLOCK_MAP_READ 1029 /*! block-manager: mapped bytes read */ -#define WT_STAT_CONN_BLOCK_BYTE_MAP_READ 1029 +#define WT_STAT_CONN_BLOCK_BYTE_MAP_READ 1030 +/*! cache: bytes belonging to page images in the cache */ +#define WT_STAT_CONN_CACHE_BYTES_IMAGE 1031 /*! cache: bytes currently in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_INUSE 1030 +#define WT_STAT_CONN_CACHE_BYTES_INUSE 1032 +/*! cache: bytes not belonging to page images in the cache */ +#define WT_STAT_CONN_CACHE_BYTES_OTHER 1033 /*! cache: bytes read into cache */ -#define WT_STAT_CONN_CACHE_BYTES_READ 1031 +#define WT_STAT_CONN_CACHE_BYTES_READ 1034 /*! cache: bytes written from cache */ -#define WT_STAT_CONN_CACHE_BYTES_WRITE 1032 +#define WT_STAT_CONN_CACHE_BYTES_WRITE 1035 /*! cache: checkpoint blocked page eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1033 +#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1036 /*! cache: eviction calls to get a page */ -#define WT_STAT_CONN_CACHE_EVICTION_GET_REF 1034 +#define WT_STAT_CONN_CACHE_EVICTION_GET_REF 1037 /*! cache: eviction calls to get a page found queue empty */ -#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY 1035 +#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY 1038 /*! cache: eviction calls to get a page found queue empty after locking */ -#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY2 1036 +#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY2 1039 /*! cache: eviction currently operating in aggressive mode */ -#define WT_STAT_CONN_CACHE_EVICTION_AGGRESSIVE_SET 1037 +#define WT_STAT_CONN_CACHE_EVICTION_AGGRESSIVE_SET 1040 /*! cache: eviction server candidate queue empty when topping up */ -#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1038 +#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1041 /*! cache: eviction server candidate queue not empty when topping up */ -#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1039 +#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1042 /*! cache: eviction server evicting pages */ -#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1040 +#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1043 /*! cache: eviction server populating queue, but not evicting pages */ -#define WT_STAT_CONN_CACHE_EVICTION_SERVER_NOT_EVICTING 1041 +#define WT_STAT_CONN_CACHE_EVICTION_SERVER_NOT_EVICTING 1044 /*! cache: eviction server skipped very large page */ -#define WT_STAT_CONN_CACHE_EVICTION_SERVER_TOOBIG 1042 +#define WT_STAT_CONN_CACHE_EVICTION_SERVER_TOOBIG 1045 /*! cache: eviction server slept, because we did not make progress with * eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SERVER_SLEPT 1043 +#define WT_STAT_CONN_CACHE_EVICTION_SERVER_SLEPT 1046 /*! cache: eviction server unable to reach eviction goal */ -#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1044 +#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1047 /*! cache: eviction worker thread evicting pages */ -#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1045 +#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1048 /*! cache: failed eviction of pages that exceeded the in-memory maximum */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1046 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1049 /*! cache: files with active eviction walks */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1047 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1050 /*! cache: files with new eviction walks started */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1048 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1051 /*! cache: hazard pointer blocked page eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1049 +#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1052 /*! cache: hazard pointer check calls */ -#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1050 +#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1053 /*! cache: hazard pointer check entries walked */ -#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1051 +#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1054 /*! cache: hazard pointer maximum array length */ -#define WT_STAT_CONN_CACHE_HAZARD_MAX 1052 +#define WT_STAT_CONN_CACHE_HAZARD_MAX 1055 /*! cache: in-memory page passed criteria to be split */ -#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1053 +#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1056 /*! cache: in-memory page splits */ -#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1054 +#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1057 /*! cache: internal pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1055 +#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1058 /*! cache: internal pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1056 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1059 /*! cache: leaf pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1057 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1060 /*! cache: lookaside table insert calls */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1058 +#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1061 /*! cache: lookaside table remove calls */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1059 +#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1062 /*! cache: maximum bytes configured */ -#define WT_STAT_CONN_CACHE_BYTES_MAX 1060 +#define WT_STAT_CONN_CACHE_BYTES_MAX 1063 /*! cache: maximum page size at eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1061 +#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1064 /*! cache: modified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1062 +#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1065 /*! cache: modified pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1063 +#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1066 +/*! cache: overflow pages read into cache */ +#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1067 +/*! cache: overflow values cached in memory */ +#define WT_STAT_CONN_CACHE_OVERFLOW_VALUE 1068 /*! cache: page split during eviction deepened the tree */ -#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1064 +#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1069 /*! cache: page written requiring lookaside records */ -#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1065 +#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1070 /*! cache: pages currently held in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_INUSE 1066 +#define WT_STAT_CONN_CACHE_PAGES_INUSE 1071 /*! cache: pages evicted because they exceeded the in-memory maximum */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1067 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1072 /*! cache: pages evicted because they had chains of deleted items */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1068 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1073 /*! cache: pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP 1069 +#define WT_STAT_CONN_CACHE_EVICTION_APP 1074 /*! cache: pages queued for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1070 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1075 /*! cache: pages queued for urgent eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1071 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1076 +/*! cache: pages queued for urgent eviction during walk */ +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1077 /*! cache: pages read into cache */ -#define WT_STAT_CONN_CACHE_READ 1072 +#define WT_STAT_CONN_CACHE_READ 1078 /*! cache: pages read into cache requiring lookaside entries */ -#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1073 +#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1079 /*! cache: pages requested from the cache */ -#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1074 +#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1080 /*! cache: pages seen by eviction walk */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1075 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1081 /*! cache: pages selected for eviction unable to be evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1076 +#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1082 /*! cache: pages walked for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_WALK 1077 +#define WT_STAT_CONN_CACHE_EVICTION_WALK 1083 /*! cache: pages written from cache */ -#define WT_STAT_CONN_CACHE_WRITE 1078 +#define WT_STAT_CONN_CACHE_WRITE 1084 /*! cache: pages written requiring in-memory restoration */ -#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1079 +#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1085 /*! cache: percentage overhead */ -#define WT_STAT_CONN_CACHE_OVERHEAD 1080 +#define WT_STAT_CONN_CACHE_OVERHEAD 1086 /*! cache: tracked bytes belonging to internal pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1081 +#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1087 /*! cache: tracked bytes belonging to leaf pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_LEAF 1082 -/*! cache: tracked bytes belonging to overflow pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_OVERFLOW 1083 +#define WT_STAT_CONN_CACHE_BYTES_LEAF 1088 /*! cache: tracked dirty bytes in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1084 +#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1089 /*! cache: tracked dirty pages in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1085 +#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1090 /*! cache: unmodified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1086 +#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1091 /*! connection: auto adjusting condition resets */ -#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1087 +#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1092 /*! connection: auto adjusting condition wait calls */ -#define WT_STAT_CONN_COND_AUTO_WAIT 1088 +#define WT_STAT_CONN_COND_AUTO_WAIT 1093 /*! connection: files currently open */ -#define WT_STAT_CONN_FILE_OPEN 1089 +#define WT_STAT_CONN_FILE_OPEN 1094 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1090 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1095 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1091 +#define WT_STAT_CONN_MEMORY_FREE 1096 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1092 +#define WT_STAT_CONN_MEMORY_GROW 1097 /*! connection: pthread mutex condition wait calls */ -#define WT_STAT_CONN_COND_WAIT 1093 +#define WT_STAT_CONN_COND_WAIT 1098 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1094 +#define WT_STAT_CONN_RWLOCK_READ 1099 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1095 +#define WT_STAT_CONN_RWLOCK_WRITE 1100 /*! connection: total fsync I/Os */ -#define WT_STAT_CONN_FSYNC_IO 1096 +#define WT_STAT_CONN_FSYNC_IO 1101 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1097 +#define WT_STAT_CONN_READ_IO 1102 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1098 +#define WT_STAT_CONN_WRITE_IO 1103 /*! cursor: cursor create calls */ -#define WT_STAT_CONN_CURSOR_CREATE 1099 +#define WT_STAT_CONN_CURSOR_CREATE 1104 /*! cursor: cursor insert calls */ -#define WT_STAT_CONN_CURSOR_INSERT 1100 +#define WT_STAT_CONN_CURSOR_INSERT 1105 /*! cursor: cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT 1101 +#define WT_STAT_CONN_CURSOR_NEXT 1106 /*! cursor: cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV 1102 +#define WT_STAT_CONN_CURSOR_PREV 1107 /*! cursor: cursor remove calls */ -#define WT_STAT_CONN_CURSOR_REMOVE 1103 +#define WT_STAT_CONN_CURSOR_REMOVE 1108 /*! cursor: cursor reset calls */ -#define WT_STAT_CONN_CURSOR_RESET 1104 +#define WT_STAT_CONN_CURSOR_RESET 1109 /*! cursor: cursor restarted searches */ -#define WT_STAT_CONN_CURSOR_RESTART 1105 +#define WT_STAT_CONN_CURSOR_RESTART 1110 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1106 +#define WT_STAT_CONN_CURSOR_SEARCH 1111 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1107 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1112 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1108 +#define WT_STAT_CONN_CURSOR_UPDATE 1113 /*! cursor: truncate calls */ -#define WT_STAT_CONN_CURSOR_TRUNCATE 1109 +#define WT_STAT_CONN_CURSOR_TRUNCATE 1114 /*! data-handle: connection data handles currently active */ -#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1110 +#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1115 /*! data-handle: connection sweep candidate became referenced */ -#define WT_STAT_CONN_DH_SWEEP_REF 1111 +#define WT_STAT_CONN_DH_SWEEP_REF 1116 /*! data-handle: connection sweep dhandles closed */ -#define WT_STAT_CONN_DH_SWEEP_CLOSE 1112 +#define WT_STAT_CONN_DH_SWEEP_CLOSE 1117 /*! data-handle: connection sweep dhandles removed from hash list */ -#define WT_STAT_CONN_DH_SWEEP_REMOVE 1113 +#define WT_STAT_CONN_DH_SWEEP_REMOVE 1118 /*! data-handle: connection sweep time-of-death sets */ -#define WT_STAT_CONN_DH_SWEEP_TOD 1114 +#define WT_STAT_CONN_DH_SWEEP_TOD 1119 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_SWEEPS 1115 +#define WT_STAT_CONN_DH_SWEEPS 1120 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1116 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1121 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1117 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1122 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1118 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1123 /*! log: consolidated slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1119 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1124 /*! log: consolidated slot join races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1120 +#define WT_STAT_CONN_LOG_SLOT_RACES 1125 /*! log: consolidated slot join transitions */ -#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1121 +#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1126 /*! log: consolidated slot joins */ -#define WT_STAT_CONN_LOG_SLOT_JOINS 1122 +#define WT_STAT_CONN_LOG_SLOT_JOINS 1127 /*! log: consolidated slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1123 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1128 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1124 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1129 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1125 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1130 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1126 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1131 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1127 +#define WT_STAT_CONN_LOG_FLUSH 1132 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1128 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1133 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1129 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1134 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1130 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1135 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1131 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1136 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1132 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1137 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1133 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1138 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1134 +#define WT_STAT_CONN_LOG_SCANS 1139 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1135 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1140 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1136 +#define WT_STAT_CONN_LOG_WRITE_LSN 1141 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1137 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1142 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1138 +#define WT_STAT_CONN_LOG_SYNC 1143 /*! log: log sync time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DURATION 1139 +#define WT_STAT_CONN_LOG_SYNC_DURATION 1144 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1140 +#define WT_STAT_CONN_LOG_SYNC_DIR 1145 /*! log: log sync_dir time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1141 +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1146 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1142 +#define WT_STAT_CONN_LOG_WRITES 1147 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1143 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1148 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1144 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1149 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1145 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1150 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1146 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1151 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1147 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1152 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1148 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1153 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1149 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1154 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1150 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1155 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1151 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1156 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1152 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1157 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1153 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1158 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1154 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1159 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1155 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1160 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1156 +#define WT_STAT_CONN_REC_PAGES 1161 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1157 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1162 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1158 +#define WT_STAT_CONN_REC_PAGE_DELETE 1163 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1159 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1164 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1160 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1165 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1161 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1166 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1162 +#define WT_STAT_CONN_SESSION_OPEN 1167 +/*! session: table compact failed calls */ +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1168 +/*! session: table compact successful calls */ +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1169 +/*! session: table create failed calls */ +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1170 +/*! session: table create successful calls */ +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1171 +/*! session: table drop failed calls */ +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1172 +/*! session: table drop successful calls */ +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1173 +/*! session: table rebalance failed calls */ +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1174 +/*! session: table rebalance successful calls */ +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1175 +/*! session: table rename failed calls */ +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1176 +/*! session: table rename successful calls */ +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1177 +/*! session: table salvage failed calls */ +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1178 +/*! session: table salvage successful calls */ +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1179 +/*! session: table truncate failed calls */ +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1180 +/*! session: table truncate successful calls */ +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1181 +/*! session: table verify failed calls */ +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1182 +/*! session: table verify successful calls */ +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1183 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_FSYNC_ACTIVE 1163 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1184 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_READ_ACTIVE 1164 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1185 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_WRITE_ACTIVE 1165 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1186 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1166 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1187 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1167 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1188 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1168 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1189 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1169 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1190 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1170 +#define WT_STAT_CONN_PAGE_SLEEP 1191 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1171 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1192 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1172 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1193 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1173 +#define WT_STAT_CONN_TXN_BEGIN 1194 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1174 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1195 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1175 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1196 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1176 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1197 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1177 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1198 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1178 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1199 +/*! transaction: transaction checkpoint scrub dirty target */ +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1200 +/*! transaction: transaction checkpoint scrub time (msecs) */ +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1201 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1179 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1202 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1180 +#define WT_STAT_CONN_TXN_CHECKPOINT 1203 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1181 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1204 /*! transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1182 -/*! transaction: transaction fsync calls for checkpoint before allocating - * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_PRE 1183 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1205 /*! transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1184 -/*! transaction: transaction fsync duration for checkpoint before - * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_PRE_DURATION 1185 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1206 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1186 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1207 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1187 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1208 /*! transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1188 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1209 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1189 +#define WT_STAT_CONN_TXN_SYNC 1210 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1190 +#define WT_STAT_CONN_TXN_COMMIT 1211 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1191 +#define WT_STAT_CONN_TXN_ROLLBACK 1212 /*! * @} @@ -4678,127 +4705,129 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_DSRC_BTREE_ROW_INTERNAL 2038 /*! btree: row-store leaf pages */ #define WT_STAT_DSRC_BTREE_ROW_LEAF 2039 +/*! cache: bytes currently in the cache */ +#define WT_STAT_DSRC_CACHE_BYTES_INUSE 2040 /*! cache: bytes read into cache */ -#define WT_STAT_DSRC_CACHE_BYTES_READ 2040 +#define WT_STAT_DSRC_CACHE_BYTES_READ 2041 /*! cache: bytes written from cache */ -#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2041 +#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2042 /*! cache: checkpoint blocked page eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2042 +#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2043 /*! cache: data source pages selected for eviction unable to be evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2043 +#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2044 /*! cache: hazard pointer blocked page eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2044 +#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2045 /*! cache: in-memory page passed criteria to be split */ -#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2045 +#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2046 /*! cache: in-memory page splits */ -#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2046 +#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2047 /*! cache: internal pages evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2047 +#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2048 /*! cache: internal pages split during eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2048 +#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2049 /*! cache: leaf pages split during eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2049 +#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2050 /*! cache: modified pages evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2050 +#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2051 /*! cache: overflow pages read into cache */ -#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2051 +#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2052 /*! cache: overflow values cached in memory */ -#define WT_STAT_DSRC_CACHE_OVERFLOW_VALUE 2052 +#define WT_STAT_DSRC_CACHE_OVERFLOW_VALUE 2053 /*! cache: page split during eviction deepened the tree */ -#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2053 +#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2054 /*! cache: page written requiring lookaside records */ -#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2054 +#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2055 /*! cache: pages read into cache */ -#define WT_STAT_DSRC_CACHE_READ 2055 +#define WT_STAT_DSRC_CACHE_READ 2056 /*! cache: pages read into cache requiring lookaside entries */ -#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2056 +#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2057 /*! cache: pages requested from the cache */ -#define WT_STAT_DSRC_CACHE_PAGES_REQUESTED 2057 +#define WT_STAT_DSRC_CACHE_PAGES_REQUESTED 2058 /*! cache: pages written from cache */ -#define WT_STAT_DSRC_CACHE_WRITE 2058 +#define WT_STAT_DSRC_CACHE_WRITE 2059 /*! cache: pages written requiring in-memory restoration */ -#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2059 +#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2060 /*! cache: unmodified pages evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2060 +#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2061 /*! compression: compressed pages read */ -#define WT_STAT_DSRC_COMPRESS_READ 2061 +#define WT_STAT_DSRC_COMPRESS_READ 2062 /*! compression: compressed pages written */ -#define WT_STAT_DSRC_COMPRESS_WRITE 2062 +#define WT_STAT_DSRC_COMPRESS_WRITE 2063 /*! compression: page written failed to compress */ -#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2063 +#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2064 /*! compression: page written was too small to compress */ -#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2064 +#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2065 /*! compression: raw compression call failed, additional data available */ -#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2065 +#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2066 /*! compression: raw compression call failed, no additional data available */ -#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2066 +#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2067 /*! compression: raw compression call succeeded */ -#define WT_STAT_DSRC_COMPRESS_RAW_OK 2067 +#define WT_STAT_DSRC_COMPRESS_RAW_OK 2068 /*! cursor: bulk-loaded cursor-insert calls */ -#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2068 +#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2069 /*! cursor: create calls */ -#define WT_STAT_DSRC_CURSOR_CREATE 2069 +#define WT_STAT_DSRC_CURSOR_CREATE 2070 /*! cursor: cursor-insert key and value bytes inserted */ -#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2070 +#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2071 /*! cursor: cursor-remove key bytes removed */ -#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2071 +#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2072 /*! cursor: cursor-update value bytes updated */ -#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2072 +#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2073 /*! cursor: insert calls */ -#define WT_STAT_DSRC_CURSOR_INSERT 2073 +#define WT_STAT_DSRC_CURSOR_INSERT 2074 /*! cursor: next calls */ -#define WT_STAT_DSRC_CURSOR_NEXT 2074 +#define WT_STAT_DSRC_CURSOR_NEXT 2075 /*! cursor: prev calls */ -#define WT_STAT_DSRC_CURSOR_PREV 2075 +#define WT_STAT_DSRC_CURSOR_PREV 2076 /*! cursor: remove calls */ -#define WT_STAT_DSRC_CURSOR_REMOVE 2076 +#define WT_STAT_DSRC_CURSOR_REMOVE 2077 /*! cursor: reset calls */ -#define WT_STAT_DSRC_CURSOR_RESET 2077 +#define WT_STAT_DSRC_CURSOR_RESET 2078 /*! cursor: restarted searches */ -#define WT_STAT_DSRC_CURSOR_RESTART 2078 +#define WT_STAT_DSRC_CURSOR_RESTART 2079 /*! cursor: search calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH 2079 +#define WT_STAT_DSRC_CURSOR_SEARCH 2080 /*! cursor: search near calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2080 +#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2081 /*! cursor: truncate calls */ -#define WT_STAT_DSRC_CURSOR_TRUNCATE 2081 +#define WT_STAT_DSRC_CURSOR_TRUNCATE 2082 /*! cursor: update calls */ -#define WT_STAT_DSRC_CURSOR_UPDATE 2082 +#define WT_STAT_DSRC_CURSOR_UPDATE 2083 /*! reconciliation: dictionary matches */ -#define WT_STAT_DSRC_REC_DICTIONARY 2083 +#define WT_STAT_DSRC_REC_DICTIONARY 2084 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2084 +#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2085 /*! reconciliation: internal page key bytes discarded using suffix * compression */ -#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2085 +#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2086 /*! reconciliation: internal page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2086 +#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2087 /*! reconciliation: internal-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2087 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2088 /*! reconciliation: leaf page key bytes discarded using prefix compression */ -#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2088 +#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2089 /*! reconciliation: leaf page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2089 +#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2090 /*! reconciliation: leaf-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2090 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2091 /*! reconciliation: maximum blocks required for a page */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2091 +#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2092 /*! reconciliation: overflow values written */ -#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2092 +#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2093 /*! reconciliation: page checksum matches */ -#define WT_STAT_DSRC_REC_PAGE_MATCH 2093 +#define WT_STAT_DSRC_REC_PAGE_MATCH 2094 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_DSRC_REC_PAGES 2094 +#define WT_STAT_DSRC_REC_PAGES 2095 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_DSRC_REC_PAGES_EVICTION 2095 +#define WT_STAT_DSRC_REC_PAGES_EVICTION 2096 /*! reconciliation: pages deleted */ -#define WT_STAT_DSRC_REC_PAGE_DELETE 2096 +#define WT_STAT_DSRC_REC_PAGE_DELETE 2097 /*! session: object compaction */ -#define WT_STAT_DSRC_SESSION_COMPACT 2097 +#define WT_STAT_DSRC_SESSION_COMPACT 2098 /*! session: open cursor count */ -#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2098 +#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2099 /*! transaction: update conflicts */ -#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2099 +#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2100 /*! * @} diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c index bf83c280d8d..8ec910115ac 100644 --- a/src/third_party/wiredtiger/src/log/log.c +++ b/src/third_party/wiredtiger/src/log/log.c @@ -9,13 +9,17 @@ #include "wt_internal.h" static int __log_openfile( - WT_SESSION_IMPL *, bool, WT_FH **, const char *, uint32_t); + WT_SESSION_IMPL *, WT_FH **, const char *, uint32_t, uint32_t); static int __log_write_internal( WT_SESSION_IMPL *, WT_ITEM *, WT_LSN *, uint32_t); #define WT_LOG_COMPRESS_SKIP (offsetof(WT_LOG_RECORD, record)) #define WT_LOG_ENCRYPT_SKIP (offsetof(WT_LOG_RECORD, record)) +/* Flags to __log_openfile */ +#define WT_LOG_OPEN_CREATE_OK 0x01 +#define WT_LOG_OPEN_VERIFY 0x02 + /* * __wt_log_ckpt -- * Record the given LSN as the checkpoint LSN and signal the archive @@ -146,7 +150,7 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) * file than we want. */ WT_ERR(__log_openfile(session, - false, &log_fh, WT_LOG_FILENAME, min_lsn->l.file)); + &log_fh, WT_LOG_FILENAME, min_lsn->l.file, 0)); WT_ERR(__wt_verbose(session, WT_VERB_LOG, "log_force_sync: sync %s to LSN %" PRIu32 "/%" PRIu32, log_fh->name, min_lsn->l.file, min_lsn->l.offset)); @@ -277,7 +281,8 @@ __log_get_files(WT_SESSION_IMPL *session, /* * __wt_log_get_all_files -- * Retrieve the list of log files, either all of them or only the active - * ones (those that are not candidates for archiving). + * ones (those that are not candidates for archiving). The caller is + * responsible for freeing the directory list returned. */ int __wt_log_get_all_files(WT_SESSION_IMPL *session, @@ -307,6 +312,10 @@ __wt_log_get_all_files(WT_SESSION_IMPL *session, for (max = 0, i = 0; i < count; ) { WT_ERR(__wt_log_extract_lognum(session, files[i], &id)); if (active_only && id < log->ckpt_lsn.l.file) { + /* + * Any files not being returned are individually freed + * and the array adjusted. + */ __wt_free(session, files[i]); files[i] = files[count - 1]; files[--count] = NULL; @@ -321,6 +330,10 @@ __wt_log_get_all_files(WT_SESSION_IMPL *session, *filesp = files; *countp = count; + /* + * Only free on error. The caller is responsible for calling free + * once it is done using the returned list. + */ if (0) { err: WT_TRET(__wt_fs_directory_list_free(session, &files, count)); } @@ -674,7 +687,7 @@ err: __wt_scr_free(session, &buf); */ static int __log_openfile(WT_SESSION_IMPL *session, - bool ok_create, WT_FH **fhp, const char *file_prefix, uint32_t id) + WT_FH **fhp, const char *file_prefix, uint32_t id, uint32_t flags) { WT_CONNECTION_IMPL *conn; WT_DECL_ITEM(buf); @@ -683,7 +696,7 @@ __log_openfile(WT_SESSION_IMPL *session, WT_LOG_DESC *desc; WT_LOG_RECORD *logrec; uint32_t allocsize; - u_int flags; + u_int wtopen_flags; conn = S2C(session); log = conn->log; @@ -695,19 +708,19 @@ __log_openfile(WT_SESSION_IMPL *session, WT_ERR(__log_filename(session, id, file_prefix, buf)); WT_ERR(__wt_verbose(session, WT_VERB_LOG, "opening log %s", (const char *)buf->data)); - flags = 0; - if (ok_create) - LF_SET(WT_OPEN_CREATE); + wtopen_flags = 0; + if (LF_ISSET(WT_LOG_OPEN_CREATE_OK)) + FLD_SET(wtopen_flags, WT_FS_OPEN_CREATE); if (FLD_ISSET(conn->direct_io, WT_DIRECT_IO_LOG)) - LF_SET(WT_OPEN_DIRECTIO); + FLD_SET(wtopen_flags, WT_FS_OPEN_DIRECTIO); WT_ERR(__wt_open( - session, buf->data, WT_OPEN_FILE_TYPE_LOG, flags, fhp)); + session, buf->data, WT_FS_OPEN_FILE_TYPE_LOG, wtopen_flags, fhp)); /* * If we are not creating the log file but opening it for reading, * check that the magic number and versions are correct. */ - if (!ok_create) { + if (LF_ISSET(WT_LOG_OPEN_VERIFY)) { WT_ERR(__wt_buf_grow(session, buf, allocsize)); memset(buf->mem, 0, allocsize); WT_ERR(__wt_read(session, *fhp, 0, allocsize, buf->mem)); @@ -773,7 +786,7 @@ __log_alloc_prealloc(WT_SESSION_IMPL *session, uint32_t to_num) * All file setup, writing the header and pre-allocation was done * before. We only need to rename it. */ - WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data)); + WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data, false)); err: __wt_scr_free(session, &from_path); __wt_scr_free(session, &to_path); @@ -870,7 +883,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) * window where another thread could see a NULL log file handle. */ WT_RET(__log_openfile(session, - false, &log_fh, WT_LOG_FILENAME, log->fileid)); + &log_fh, WT_LOG_FILENAME, log->fileid, 0)); WT_PUBLISH(log->log_fh, log_fh); /* * We need to setup the LSNs. Set the end LSN and alloc LSN to @@ -978,7 +991,7 @@ __log_truncate(WT_SESSION_IMPL *session, * Truncate the log file to the given LSN. */ WT_ERR(__log_openfile(session, - false, &log_fh, file_prefix, lsn->l.file)); + &log_fh, file_prefix, lsn->l.file, 0)); WT_ERR(__wt_ftruncate(session, log_fh, lsn->l.offset)); WT_ERR(__wt_fsync(session, log_fh, true)); WT_ERR(__wt_close(session, &log_fh)); @@ -995,7 +1008,7 @@ __log_truncate(WT_SESSION_IMPL *session, if (lognum > lsn->l.file && lognum < log->trunc_lsn.l.file) { WT_ERR(__log_openfile(session, - false, &log_fh, file_prefix, lognum)); + &log_fh, file_prefix, lognum, 0)); /* * If there are intervening files pre-allocated, * truncate them to the end of the log file header. @@ -1047,7 +1060,8 @@ __wt_log_allocfile( /* * Set up the temporary file. */ - WT_ERR(__log_openfile(session, true, &log_fh, WT_LOG_TMPNAME, tmp_id)); + WT_ERR(__log_openfile(session, + &log_fh, WT_LOG_TMPNAME, tmp_id, WT_LOG_OPEN_CREATE_OK)); WT_ERR(__log_file_header(session, log_fh, NULL, true)); WT_ERR(__log_prealloc(session, log_fh)); WT_ERR(__wt_fsync(session, log_fh, true)); @@ -1058,7 +1072,7 @@ __wt_log_allocfile( /* * Rename it into place and make it available. */ - WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data)); + WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data, false)); err: __wt_scr_free(session, &from_path); __wt_scr_free(session, &to_path); @@ -1081,7 +1095,7 @@ __wt_log_remove(WT_SESSION_IMPL *session, WT_ERR(__log_filename(session, lognum, file_prefix, path)); WT_ERR(__wt_verbose(session, WT_VERB_LOG, "log_remove: remove log %s", (char *)path->data)); - WT_ERR(__wt_fs_remove(session, path->data)); + WT_ERR(__wt_fs_remove(session, path->data, false)); err: __wt_scr_free(session, &path); return (ret); } @@ -1117,7 +1131,7 @@ __wt_log_open(WT_SESSION_IMPL *session) WT_RET(__wt_verbose(session, WT_VERB_LOG, "log_open: open fh to directory %s", conn->log_path)); WT_RET(__wt_open(session, conn->log_path, - WT_OPEN_FILE_TYPE_DIRECTORY, 0, &log->log_dir_fh)); + WT_FS_OPEN_FILE_TYPE_DIRECTORY, 0, &log->log_dir_fh)); } if (!F_ISSET(conn, WT_CONN_READONLY)) { @@ -1587,8 +1601,8 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, WT_ERR( __wt_fs_directory_list_free(session, &logfiles, logcount)); } - WT_ERR(__log_openfile( - session, false, &log_fh, WT_LOG_FILENAME, start_lsn.l.file)); + WT_ERR(__log_openfile(session, + &log_fh, WT_LOG_FILENAME, start_lsn.l.file, WT_LOG_OPEN_VERIFY)); WT_ERR(__wt_filesize(session, log_fh, &log_size)); rd_lsn = start_lsn; @@ -1637,7 +1651,8 @@ advance: if (rd_lsn.l.file > end_lsn.l.file) break; WT_ERR(__log_openfile(session, - false, &log_fh, WT_LOG_FILENAME, rd_lsn.l.file)); + &log_fh, WT_LOG_FILENAME, + rd_lsn.l.file, WT_LOG_OPEN_VERIFY)); WT_ERR(__wt_filesize(session, log_fh, &log_size)); eol = false; continue; diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c index 78235fb6a92..bedef6a8596 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c @@ -205,6 +205,12 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update) WT_RET(__wt_txn_id_check(session)); WT_RET(__clsm_enter_update(clsm)); + /* + * Switching the tree will update the generation before + * updating the switch transaction. We test the + * transaction in clsm_enter_update. Now test the + * disk generation to avoid races. + */ if (clsm->dsk_gen != clsm->lsm_tree->dsk_gen) goto open; @@ -219,13 +225,20 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update) * transaction ID in each chunk: any transaction ID * that overlaps with our snapshot is a potential * conflict. + * + * Note that the global snap_min is correct here: it + * tracks concurrent transactions excluding special + * transactions such as checkpoint (which we can't + * conflict with because checkpoint only writes the + * metadata, which is not an LSM tree). */ clsm->nupdates = 1; if (txn->isolation == WT_ISO_SNAPSHOT && F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) { WT_ASSERT(session, F_ISSET(txn, WT_TXN_HAS_SNAPSHOT)); - snap_min = txn->snap_min; + snap_min = + WT_SESSION_TXN_STATE(session)->snap_min; for (switch_txnp = &clsm->switch_txn[clsm->nchunks - 2]; clsm->nupdates < clsm->nchunks; @@ -1521,6 +1534,8 @@ __wt_clsm_open(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree; bool bulk; + WT_STATIC_ASSERT(offsetof(WT_CURSOR_LSM, iface) == 0); + clsm = NULL; cursor = NULL; lsm_tree = NULL; @@ -1566,6 +1581,7 @@ __wt_clsm_open(WT_SESSION_IMPL *session, cursor->value_format = lsm_tree->value_format; clsm->lsm_tree = lsm_tree; + lsm_tree = NULL; /* * The tree's dsk_gen starts at one, so starting the cursor on zero @@ -1573,7 +1589,6 @@ __wt_clsm_open(WT_SESSION_IMPL *session, */ clsm->dsk_gen = 0; - WT_STATIC_ASSERT(offsetof(WT_CURSOR_LSM, iface) == 0); WT_ERR(__wt_cursor_init(cursor, cursor->uri, owner, cfg, cursorp)); if (bulk) @@ -1585,10 +1600,6 @@ err: if (clsm != NULL) else if (lsm_tree != NULL) __wt_lsm_tree_release(session, lsm_tree); - /* - * We open bulk cursors after setting the returned cursor. - * Fix that here. - */ *cursorp = NULL; } diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c index da106ae2089..2ecfb614eee 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c @@ -771,6 +771,11 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) ++lsm_tree->dsk_gen; lsm_tree->modified = true; + /* + * Ensure the updated disk generation is visible to all other threads + * before updating the transaction ID. + */ + WT_FULL_BARRIER(); /* * Set the switch transaction in the previous chunk unless this is @@ -1187,8 +1192,15 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) */ if (lsm_tree->nchunks > 0 && (chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]) != NULL) { - if (chunk->switch_txn == WT_TXN_NONE) + if (chunk->switch_txn == WT_TXN_NONE) { + /* + * Make sure any cursors open on the tree see the + * new switch generation before updating. + */ + ++lsm_tree->dsk_gen; + WT_FULL_BARRIER(); chunk->switch_txn = __wt_txn_id_alloc(session, false); + } /* * If we have a chunk, we want to look for it to be on-disk. * So we need to add a reference to keep it available. diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c index c19f42327be..0f2a407c70d 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c @@ -526,7 +526,7 @@ __lsm_drop_file(WT_SESSION_IMPL *session, const char *uri) ret = __wt_schema_drop(session, uri, drop_cfg)); if (ret == 0) - ret = __wt_fs_remove(session, uri + strlen("file:")); + ret = __wt_fs_remove(session, uri + strlen("file:"), false); WT_RET(__wt_verbose(session, WT_VERB_LSM, "Dropped %s", uri)); if (ret == EBUSY || ret == ENOENT) diff --git a/src/third_party/wiredtiger/src/meta/meta_table.c b/src/third_party/wiredtiger/src/meta/meta_table.c index 38a2edd7219..d39df163daf 100644 --- a/src/third_party/wiredtiger/src/meta/meta_table.c +++ b/src/third_party/wiredtiger/src/meta/meta_table.c @@ -68,6 +68,9 @@ __wt_metadata_cursor_open( if (F_ISSET(btree, WT_BTREE_NO_LOGGING)) F_CLR(btree, WT_BTREE_NO_LOGGING); + /* The metadata file always uses checkpoint IDs in visibility checks. */ + btree->include_checkpoint_txn = true; + return (0); } diff --git a/src/third_party/wiredtiger/src/meta/meta_track.c b/src/third_party/wiredtiger/src/meta/meta_track.c index eb06b2bed66..3d8b7c46500 100644 --- a/src/third_party/wiredtiger/src/meta/meta_track.c +++ b/src/third_party/wiredtiger/src/meta/meta_track.c @@ -141,7 +141,8 @@ __meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk) ret = bm->checkpoint_resolve(bm, session)); break; case WT_ST_DROP_COMMIT: - if ((ret = __wt_block_manager_drop(session, trk->a)) != 0) + if ((ret = + __wt_block_manager_drop(session, trk->a, false)) != 0) __wt_err(session, ret, "metadata remove dropped file %s", trk->a); break; @@ -188,13 +189,15 @@ __meta_track_unroll(WT_SESSION_IMPL *session, WT_META_TRACK *trk) * For removes, b is NULL. */ if (trk->a != NULL && trk->b != NULL && - (ret = __wt_rename_and_sync_directory(session, - trk->b + strlen("file:"), trk->a + strlen("file:"))) != 0) + (ret = __wt_fs_rename(session, + trk->b + strlen("file:"), trk->a + strlen("file:"), + true)) != 0) __wt_err(session, ret, "metadata unroll rename %s to %s", trk->b, trk->a); - if (trk->a == NULL && (ret = - __wt_fs_remove(session, trk->b + strlen("file:"))) != 0) + if (trk->a == NULL && + (ret = __wt_fs_remove(session, + trk->b + strlen("file:"), false)) != 0) __wt_err(session, ret, "metadata unroll create %s", trk->b); diff --git a/src/third_party/wiredtiger/src/meta/meta_turtle.c b/src/third_party/wiredtiger/src/meta/meta_turtle.c index 4d2b359bbed..ace0fabab48 100644 --- a/src/third_party/wiredtiger/src/meta/meta_turtle.c +++ b/src/third_party/wiredtiger/src/meta/meta_turtle.c @@ -158,7 +158,7 @@ __wt_turtle_init(WT_SESSION_IMPL *session) * Discard any turtle setup file left-over from previous runs. This * doesn't matter for correctness, it's just cleaning up random files. */ - WT_RET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET)); + WT_RET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET, false)); /* * We could die after creating the turtle file and before creating the @@ -197,9 +197,10 @@ __wt_turtle_init(WT_SESSION_IMPL *session) "Both %s and %s exist; recreating metadata from " "backup", WT_METADATA_TURTLE, WT_METADATA_BACKUP)); - WT_RET(__wt_remove_if_exists(session, WT_METAFILE)); + WT_RET( + __wt_remove_if_exists(session, WT_METAFILE, false)); WT_RET(__wt_remove_if_exists( - session, WT_METADATA_TURTLE)); + session, WT_METADATA_TURTLE, false)); load = true; } } else @@ -305,7 +306,7 @@ __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) * every time. */ WT_RET(__wt_fopen(session, WT_METADATA_TURTLE_SET, - WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs)); + WT_FS_OPEN_CREATE | WT_FS_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs)); version = wiredtiger_version(&vmajor, &vminor, &vpatch); WT_ERR(__wt_fprintf(session, fs, @@ -320,7 +321,7 @@ __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) /* Close any file handle left open, remove any temporary file. */ err: WT_TRET(__wt_fclose(session, &fs)); - WT_TRET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET)); + WT_TRET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET, false)); return (ret); } diff --git a/src/third_party/wiredtiger/src/os_common/filename.c b/src/third_party/wiredtiger/src/os_common/filename.c index 5f174288350..8b6c1269829 100644 --- a/src/third_party/wiredtiger/src/os_common/filename.c +++ b/src/third_party/wiredtiger/src/os_common/filename.c @@ -56,55 +56,17 @@ __wt_nfilename( * Remove a file if it exists. */ int -__wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name) +__wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name, bool durable) { bool exist; WT_RET(__wt_fs_exist(session, name, &exist)); if (exist) - WT_RET(__wt_fs_remove(session, name)); + WT_RET(__wt_fs_remove(session, name, durable)); return (0); } /* - * __wt_rename_and_sync_directory -- - * Rename a file and sync the enclosing directory. - */ -int -__wt_rename_and_sync_directory( - WT_SESSION_IMPL *session, const char *from, const char *to) -{ - const char *fp, *tp; - bool same_directory; - - /* Rename the source file to the target. */ - WT_RET(__wt_fs_rename(session, from, to)); - - /* - * Flush the backing directory to guarantee the rename. My reading of - * POSIX 1003.1 is there's no guarantee flushing only one of the from - * or to directories, or flushing a common parent, is sufficient, and - * even if POSIX were to make that guarantee, existing filesystems are - * known to not provide the guarantee or only provide the guarantee - * with specific mount options. Flush both of the from/to directories - * until it's a performance problem. - */ - WT_RET(__wt_fs_directory_sync(session, from)); - - /* - * In almost all cases, we're going to be renaming files in the same - * directory, we can at least fast-path that. - */ - fp = strrchr(from, '/'); - tp = strrchr(to, '/'); - same_directory = (fp == NULL && tp == NULL) || - (fp != NULL && tp != NULL && - fp - from == tp - to && memcmp(from, to, (size_t)(fp - from)) == 0); - - return (same_directory ? 0 : __wt_fs_directory_sync(session, to)); -} - -/* * __wt_copy_and_sync -- * Copy a file safely; here to support the wt utility. */ @@ -134,13 +96,13 @@ __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to) WT_ERR(__wt_scr_alloc(session, 0, &tmp)); WT_ERR(__wt_buf_fmt(session, tmp, "%s.copy", to)); - WT_ERR(__wt_remove_if_exists(session, to)); - WT_ERR(__wt_remove_if_exists(session, tmp->data)); + WT_ERR(__wt_remove_if_exists(session, to, false)); + WT_ERR(__wt_remove_if_exists(session, tmp->data, false)); /* Open the from and temporary file handles. */ - WT_ERR(__wt_open(session, from, WT_OPEN_FILE_TYPE_REGULAR, 0, &ffh)); - WT_ERR(__wt_open(session, tmp->data, WT_OPEN_FILE_TYPE_REGULAR, - WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &tfh)); + WT_ERR(__wt_open(session, from, WT_FS_OPEN_FILE_TYPE_REGULAR, 0, &ffh)); + WT_ERR(__wt_open(session, tmp->data, WT_FS_OPEN_FILE_TYPE_REGULAR, + WT_FS_OPEN_CREATE | WT_FS_OPEN_EXCLUSIVE, &tfh)); /* * Allocate a copy buffer. Don't use a scratch buffer, this thing is @@ -162,7 +124,7 @@ __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to) WT_ERR(__wt_fsync(session, tfh, true)); WT_ERR(__wt_close(session, &tfh)); - ret = __wt_rename_and_sync_directory(session, tmp->data, to); + ret = __wt_fs_rename(session, tmp->data, to, true); err: WT_TRET(__wt_close(session, &ffh)); WT_TRET(__wt_close(session, &tfh)); diff --git a/src/third_party/wiredtiger/src/os_common/os_fhandle.c b/src/third_party/wiredtiger/src/os_common/os_fhandle.c index 81e4cc14ccb..184a9df0e72 100644 --- a/src/third_party/wiredtiger/src/os_common/os_fhandle.c +++ b/src/third_party/wiredtiger/src/os_common/os_fhandle.c @@ -150,19 +150,19 @@ __open_verbose( */ switch (file_type) { - case WT_OPEN_FILE_TYPE_CHECKPOINT: + case WT_FS_OPEN_FILE_TYPE_CHECKPOINT: file_type_tag = "checkpoint"; break; - case WT_OPEN_FILE_TYPE_DATA: + case WT_FS_OPEN_FILE_TYPE_DATA: file_type_tag = "data"; break; - case WT_OPEN_FILE_TYPE_DIRECTORY: + case WT_FS_OPEN_FILE_TYPE_DIRECTORY: file_type_tag = "directory"; break; - case WT_OPEN_FILE_TYPE_LOG: + case WT_FS_OPEN_FILE_TYPE_LOG: file_type_tag = "log"; break; - case WT_OPEN_FILE_TYPE_REGULAR: + case WT_FS_OPEN_FILE_TYPE_REGULAR: file_type_tag = "regular"; break; default: @@ -172,18 +172,18 @@ __open_verbose( WT_RET(__wt_scr_alloc(session, 0, &tmp)); sep = " ("; -#define WT_OPEN_VERBOSE_FLAG(f, name) \ +#define WT_FS_OPEN_VERBOSE_FLAG(f, name) \ if (LF_ISSET(f)) { \ WT_ERR(__wt_buf_catfmt( \ session, tmp, "%s%s", sep, name)); \ sep = ", "; \ } - WT_OPEN_VERBOSE_FLAG(WT_OPEN_CREATE, "create"); - WT_OPEN_VERBOSE_FLAG(WT_OPEN_DIRECTIO, "direct-IO"); - WT_OPEN_VERBOSE_FLAG(WT_OPEN_EXCLUSIVE, "exclusive"); - WT_OPEN_VERBOSE_FLAG(WT_OPEN_FIXED, "fixed"); - WT_OPEN_VERBOSE_FLAG(WT_OPEN_READONLY, "readonly"); + WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_CREATE, "create"); + WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_DIRECTIO, "direct-IO"); + WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_EXCLUSIVE, "exclusive"); + WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_FIXED, "fixed"); + WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_READONLY, "readonly"); if (tmp->size != 0) WT_ERR(__wt_buf_catfmt(session, tmp, ")")); @@ -209,7 +209,7 @@ err: __wt_scr_free(session, &tmp); */ int __wt_open(WT_SESSION_IMPL *session, - const char *name, WT_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp) + const char *name, WT_FS_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; @@ -247,12 +247,12 @@ __wt_open(WT_SESSION_IMPL *session, if (F_ISSET(conn, WT_CONN_READONLY)) { lock_file = strcmp(name, WT_SINGLETHREAD) == 0; if (!lock_file) - LF_SET(WT_OPEN_READONLY); - WT_ASSERT(session, lock_file || !LF_ISSET(WT_OPEN_CREATE)); + LF_SET(WT_FS_OPEN_READONLY); + WT_ASSERT(session, lock_file || !LF_ISSET(WT_FS_OPEN_CREATE)); } /* Create the path to the file. */ - if (!LF_ISSET(WT_OPEN_FIXED)) + if (!LF_ISSET(WT_FS_OPEN_FIXED)) WT_ERR(__wt_filename(session, name, &path)); /* Call the underlying open function. */ @@ -261,7 +261,7 @@ __wt_open(WT_SESSION_IMPL *session, open_called = true; WT_ERR(__fhandle_method_finalize( - session, fh->handle, LF_ISSET(WT_OPEN_READONLY))); + session, fh->handle, LF_ISSET(WT_FS_OPEN_READONLY))); /* * Repeat the check for a match: if there's no match, link our newly diff --git a/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c b/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c index 09c2e08db83..178adc1dac8 100644 --- a/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c +++ b/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c @@ -188,14 +188,16 @@ __im_fs_exist(WT_FILE_SYSTEM *file_system, * POSIX remove. */ static int -__im_fs_remove( - WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name) +__im_fs_remove(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, const char *name, uint32_t flags) { WT_DECL_RET; WT_FILE_HANDLE_INMEM *im_fh; WT_FILE_SYSTEM_INMEM *im_fs; WT_SESSION_IMPL *session; + WT_UNUSED(flags); + im_fs = (WT_FILE_SYSTEM_INMEM *)file_system; session = (WT_SESSION_IMPL *)wt_session; @@ -215,7 +217,7 @@ __im_fs_remove( */ static int __im_fs_rename(WT_FILE_SYSTEM *file_system, - WT_SESSION *wt_session, const char *from, const char *to) + WT_SESSION *wt_session, const char *from, const char *to, uint32_t flags) { WT_DECL_RET; WT_FILE_HANDLE_INMEM *im_fh; @@ -224,6 +226,8 @@ __im_fs_rename(WT_FILE_SYSTEM *file_system, uint64_t bucket; char *copy; + WT_UNUSED(flags); + im_fs = (WT_FILE_SYSTEM_INMEM *)file_system; session = (WT_SESSION_IMPL *)wt_session; @@ -463,7 +467,7 @@ err: __wt_spin_unlock(session, &im_fs->lock); */ static int __im_file_open(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, - const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags, + const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, WT_FILE_HANDLE **file_handlep) { WT_DECL_RET; diff --git a/src/third_party/wiredtiger/src/os_common/os_fstream.c b/src/third_party/wiredtiger/src/os_common/os_fstream.c index 0b199529e19..5a368ea75e6 100644 --- a/src/third_party/wiredtiger/src/os_common/os_fstream.c +++ b/src/third_party/wiredtiger/src/os_common/os_fstream.c @@ -187,7 +187,7 @@ __wt_fopen(WT_SESSION_IMPL *session, fstr = NULL; WT_RET(__wt_open( - session, name, WT_OPEN_FILE_TYPE_REGULAR, open_flags, &fh)); + session, name, WT_FS_OPEN_FILE_TYPE_REGULAR, open_flags, &fh)); WT_ERR(__wt_calloc_one(session, &fstr)); fstr->fh = fh; diff --git a/src/third_party/wiredtiger/src/os_posix/os_fs.c b/src/third_party/wiredtiger/src/os_posix/os_fs.c index 86fa2e8f117..11f38ec063b 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_fs.c +++ b/src/third_party/wiredtiger/src/os_posix/os_fs.c @@ -30,7 +30,7 @@ /* * __posix_sync -- - * Underlying support function to flush a file handle. + * Underlying support function to flush a file descriptor. */ static int __posix_sync( @@ -77,33 +77,42 @@ __posix_sync( #ifdef __linux__ /* * __posix_directory_sync -- - * Flush a directory to ensure file creation is durable. + * Flush a directory to ensure file creation, remove or rename is durable. */ static int -__posix_directory_sync( - WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *path) +__posix_directory_sync(WT_SESSION_IMPL *session, const char *path) { + WT_DECL_ITEM(tmp); WT_DECL_RET; - WT_SESSION_IMPL *session; int fd, tret; + char *dir; - WT_UNUSED(file_system); + WT_RET(__wt_scr_alloc(session, 0, &tmp)); + WT_ERR(__wt_buf_setstr(session, tmp, path)); - session = (WT_SESSION_IMPL *)wt_session; + /* + * This layer should never see a path that doesn't include a trailing + * path separator, this code asserts that fact. + */ + dir = tmp->mem; + strrchr(dir, '/')[1] = '\0'; + fd = -1; /* -Wconditional-uninitialized */ WT_SYSCALL_RETRY(( - (fd = open(path, O_RDONLY, 0444)) == -1 ? -1 : 0), ret); + (fd = open(dir, O_RDONLY, 0444)) == -1 ? -1 : 0), ret); if (ret != 0) - WT_RET_MSG(session, ret, "%s: directory-sync: open", path); + WT_ERR_MSG(session, ret, "%s: directory-sync: open", dir); - ret = __posix_sync(session, fd, path, "directory-sync"); + ret = __posix_sync(session, fd, dir, "directory-sync"); WT_SYSCALL(close(fd), tret); if (tret != 0) { - __wt_err(session, tret, "%s: directory-sync: close", path); + __wt_err(session, tret, "%s: directory-sync: close", dir); if (ret == 0) ret = tret; } + +err: __wt_scr_free(session, &tmp); return (ret); } #endif @@ -141,8 +150,8 @@ __posix_fs_exist(WT_FILE_SYSTEM *file_system, * Remove a file. */ static int -__posix_fs_remove( - WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name) +__posix_fs_remove(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, const char *name, uint32_t flags) { WT_DECL_RET; WT_SESSION_IMPL *session; @@ -159,9 +168,17 @@ __posix_fs_remove( * using unlink may be marginally safer. */ WT_SYSCALL(unlink(name), ret); - if (ret == 0) + if (ret != 0) + WT_RET_MSG(session, ret, "%s: file-remove: unlink", name); + + if (!LF_ISSET(WT_FS_DURABLE)) return (0); - WT_RET_MSG(session, ret, "%s: file-remove: unlink", name); + +#ifdef __linux__ + /* Flush the backing directory to guarantee the remove. */ + WT_RET (__posix_directory_sync(session, name)); +#endif + return (0); } /* @@ -170,7 +187,7 @@ __posix_fs_remove( */ static int __posix_fs_rename(WT_FILE_SYSTEM *file_system, - WT_SESSION *wt_session, const char *from, const char *to) + WT_SESSION *wt_session, const char *from, const char *to, uint32_t flags) { WT_DECL_RET; WT_SESSION_IMPL *session; @@ -187,9 +204,43 @@ __posix_fs_rename(WT_FILE_SYSTEM *file_system, * return (if errno is 0), but we've done the best we can. */ WT_SYSCALL(rename(from, to) != 0 ? -1 : 0, ret); - if (ret == 0) + if (ret != 0) + WT_RET_MSG( + session, ret, "%s to %s: file-rename: rename", from, to); + + if (!LF_ISSET(WT_FS_DURABLE)) return (0); - WT_RET_MSG(session, ret, "%s to %s: file-rename: rename", from, to); +#ifdef __linux__ + /* + * Flush the backing directory to guarantee the rename. My reading of + * POSIX 1003.1 is there's no guarantee flushing only one of the from + * or to directories, or flushing a common parent, is sufficient, and + * even if POSIX were to make that guarantee, existing filesystems are + * known to not provide the guarantee or only provide the guarantee + * with specific mount options. Flush both of the from/to directories + * until it's a performance problem. + */ + WT_RET(__posix_directory_sync(session, from)); + + /* + * In almost all cases, we're going to be renaming files in the same + * directory, we can at least fast-path that. + */ + { + bool same_directory; + const char *fp, *tp; + + fp = strrchr(from, '/'); + tp = strrchr(to, '/'); + same_directory = (fp == NULL && tp == NULL) || + (fp != NULL && tp != NULL && + fp - from == tp - to && memcmp(from, to, (size_t)(fp - from)) == 0); + + if (!same_directory) + WT_RET(__posix_directory_sync(session, to)); + } +#endif + return (0); } /* @@ -513,7 +564,7 @@ __posix_open_file_cloexec(WT_SESSION_IMPL *session, int fd, const char *name) */ static int __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, - const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags, + const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, WT_FILE_HANDLE **file_handlep) { WT_CONNECTION_IMPL *conn; @@ -536,7 +587,7 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, /* Set up error handling. */ pfh->fd = -1; - if (file_type == WT_OPEN_FILE_TYPE_DIRECTORY) { + if (file_type == WT_FS_OPEN_FILE_TYPE_DIRECTORY) { f = O_RDONLY; #ifdef O_CLOEXEC /* @@ -554,10 +605,10 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, goto directory_open; } - f = LF_ISSET(WT_OPEN_READONLY) ? O_RDONLY : O_RDWR; - if (LF_ISSET(WT_OPEN_CREATE)) { + f = LF_ISSET(WT_FS_OPEN_READONLY) ? O_RDONLY : O_RDWR; + if (LF_ISSET(WT_FS_OPEN_CREATE)) { f |= O_CREAT; - if (LF_ISSET(WT_OPEN_EXCLUSIVE)) + if (LF_ISSET(WT_FS_OPEN_EXCLUSIVE)) f |= O_EXCL; mode = 0666; } else @@ -577,7 +628,7 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, #endif #ifdef O_DIRECT /* Direct I/O. */ - if (LF_ISSET(WT_OPEN_DIRECTIO)) { + if (LF_ISSET(WT_FS_OPEN_DIRECTIO)) { f |= O_DIRECT; pfh->direct_io = true; } else @@ -585,11 +636,11 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, #endif #ifdef O_NOATIME /* Avoid updating metadata for read-only workloads. */ - if (file_type == WT_OPEN_FILE_TYPE_DATA) + if (file_type == WT_FS_OPEN_FILE_TYPE_DATA) f |= O_NOATIME; #endif - if (file_type == WT_OPEN_FILE_TYPE_LOG && + if (file_type == WT_FS_OPEN_FILE_TYPE_LOG && FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) { #ifdef O_DSYNC f |= O_DSYNC; @@ -601,6 +652,7 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, #endif } + /* Create/Open the file. */ WT_SYSCALL_RETRY(((pfh->fd = open(name, f, mode)) == -1 ? -1 : 0), ret); if (ret != 0) WT_ERR_MSG(session, ret, @@ -608,6 +660,16 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, "%s: handle-open: open: failed with direct I/O configured, " "some filesystem types do not support direct I/O" : "%s: handle-open: open", name); + +#ifdef __linux__ + /* + * Durability: some filesystems require a directory sync to be confident + * the file will appear. + */ + if (LF_ISSET(WT_FS_OPEN_DURABLE)) + WT_ERR(__posix_directory_sync(session, name)); +#endif + WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name)); #if defined(HAVE_POSIX_FADVISE) @@ -616,7 +678,7 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, * Ignore fadvise when doing direct I/O, the kernel cache isn't * interesting. */ - if (!pfh->direct_io && file_type == WT_OPEN_FILE_TYPE_DATA) { + if (!pfh->direct_io && file_type == WT_FS_OPEN_FILE_TYPE_DATA) { WT_SYSCALL( posix_fadvise(pfh->fd, 0, 0, POSIX_FADV_RANDOM), ret); if (ret != 0) @@ -705,9 +767,6 @@ __wt_os_posix(WT_SESSION_IMPL *session) /* Initialize the POSIX jump table. */ file_system->fs_directory_list = __wt_posix_directory_list; file_system->fs_directory_list_free = __wt_posix_directory_list_free; -#ifdef __linux__ - file_system->fs_directory_sync = __posix_directory_sync; -#endif file_system->fs_exist = __posix_fs_exist; file_system->fs_open_file = __posix_open_file; file_system->fs_remove = __posix_fs_remove; diff --git a/src/third_party/wiredtiger/src/os_win/os_fs.c b/src/third_party/wiredtiger/src/os_win/os_fs.c index 5daba124e90..fc03e0a2595 100644 --- a/src/third_party/wiredtiger/src/os_win/os_fs.c +++ b/src/third_party/wiredtiger/src/os_win/os_fs.c @@ -36,13 +36,14 @@ __win_fs_exist(WT_FILE_SYSTEM *file_system, * Remove a file. */ static int -__win_fs_remove( - WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name) +__win_fs_remove(WT_FILE_SYSTEM *file_system, + WT_SESSION *wt_session, const char *name, uint32_t flags) { DWORD windows_error; WT_SESSION_IMPL *session; WT_UNUSED(file_system); + WT_UNUSED(flags); session = (WT_SESSION_IMPL *)wt_session; @@ -62,12 +63,13 @@ __win_fs_remove( */ static int __win_fs_rename(WT_FILE_SYSTEM *file_system, - WT_SESSION *wt_session, const char *from, const char *to) + WT_SESSION *wt_session, const char *from, const char *to, uint32_t flags) { DWORD windows_error; WT_SESSION_IMPL *session; WT_UNUSED(file_system); + WT_UNUSED(flags); session = (WT_SESSION_IMPL *)wt_session; @@ -426,7 +428,7 @@ __win_file_write(WT_FILE_HANDLE *file_handle, */ static int __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, - const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags, + const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, WT_FILE_HANDLE **file_handlep) { DWORD dwCreationDisposition, windows_error; @@ -458,11 +460,11 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, * require that functionality: create an empty WT_FH structure with * invalid handles. */ - if (file_type == WT_OPEN_FILE_TYPE_DIRECTORY) + if (file_type == WT_FS_OPEN_FILE_TYPE_DIRECTORY) goto directory_open; desired_access = GENERIC_READ; - if (!LF_ISSET(WT_OPEN_READONLY)) + if (!LF_ISSET(WT_FS_OPEN_READONLY)) desired_access |= GENERIC_WRITE; /* @@ -476,15 +478,15 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, f = FILE_ATTRIBUTE_NORMAL; dwCreationDisposition = 0; - if (LF_ISSET(WT_OPEN_CREATE)) { + if (LF_ISSET(WT_FS_OPEN_CREATE)) { dwCreationDisposition = CREATE_NEW; - if (LF_ISSET(WT_OPEN_EXCLUSIVE)) + if (LF_ISSET(WT_FS_OPEN_EXCLUSIVE)) dwCreationDisposition = CREATE_ALWAYS; } else dwCreationDisposition = OPEN_EXISTING; /* Direct I/O. */ - if (LF_ISSET(WT_OPEN_DIRECTIO)) { + if (LF_ISSET(WT_FS_OPEN_DIRECTIO)) { f |= FILE_FLAG_NO_BUFFERING; win_fh->direct_io = true; } @@ -493,19 +495,19 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, if (FLD_ISSET(conn->write_through, file_type)) f |= FILE_FLAG_WRITE_THROUGH; - if (file_type == WT_OPEN_FILE_TYPE_LOG && + if (file_type == WT_FS_OPEN_FILE_TYPE_LOG && FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) f |= FILE_FLAG_WRITE_THROUGH; /* Disable read-ahead on trees: it slows down random read workloads. */ - if (file_type == WT_OPEN_FILE_TYPE_DATA) + if (file_type == WT_FS_OPEN_FILE_TYPE_DATA) f |= FILE_FLAG_RANDOM_ACCESS; win_fh->filehandle = CreateFileA(name, desired_access, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, dwCreationDisposition, f, NULL); if (win_fh->filehandle == INVALID_HANDLE_VALUE) { - if (LF_ISSET(WT_OPEN_CREATE) && + if (LF_ISSET(WT_FS_OPEN_CREATE) && GetLastError() == ERROR_FILE_EXISTS) win_fh->filehandle = CreateFileA(name, desired_access, FILE_SHARE_READ | FILE_SHARE_WRITE, @@ -528,7 +530,7 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, * concurrently with reads on the file. Writes would also move the file * pointer. */ - if (!LF_ISSET(WT_OPEN_READONLY)) { + if (!LF_ISSET(WT_FS_OPEN_READONLY)) { win_fh->filehandle_secondary = CreateFileA(name, desired_access, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, f, NULL); diff --git a/src/third_party/wiredtiger/src/os_win/os_path.c b/src/third_party/wiredtiger/src/os_win/os_path.c index 220752ce7a1..74050600417 100644 --- a/src/third_party/wiredtiger/src/os_win/os_path.c +++ b/src/third_party/wiredtiger/src/os_win/os_path.c @@ -16,8 +16,30 @@ bool __wt_absolute_path(const char *path) { /* - * Check for a drive name (for example, "D:"), allow both forward and - * backward slashes. + * https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247 + * + * For Windows API functions that manipulate files, file names can often + * be relative to the current directory, while some APIs require a fully + * qualified path. A file name is relative to the current directory if + * it does not begin with one of the following: + * + * -- A UNC name of any format, which always start with two backslash + * characters ("\\"). + * -- A disk designator with a backslash, for example "C:\" or "d:\". + * -- A single backslash, for example, "\directory" or "\file.txt". This + * is also referred to as an absolute path. + * + * If a file name begins with only a disk designator but not the + * backslash after the colon, it is interpreted as a relative path to + * the current directory on the drive with the specified letter. Note + * that the current directory may or may not be the root directory + * depending on what it was set to during the most recent "change + * directory" operation on that disk. + * + * -- "C:tmp.txt" refers to a file named "tmp.txt" in the current + * directory on drive C. + * -- "C:tempdir\tmp.txt" refers to a file in a subdirectory to the + * current directory on drive C. */ if (strlen(path) >= 3 && __wt_isalpha(path[0]) && path[1] == ':') path += 2; diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index b49946bb10e..b96b34594b0 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -115,6 +115,7 @@ typedef struct { */ uint32_t page_size; /* Set page size */ uint32_t page_size_orig; /* Saved set page size */ + uint32_t max_raw_page_size; /* Max page size with raw compression */ /* * Second, the split size: if we're doing the page layout, split to a @@ -159,9 +160,16 @@ typedef struct { WT_ADDR addr; /* Split's written location */ uint32_t size; /* Split's size */ uint32_t cksum; /* Split's checksum */ + void *disk_image; /* Split's disk image */ /* + * Raw compression, the disk image being written is already + * compressed. + */ + bool already_compressed; + + /* * Saved update list, supporting the WT_EVICT_UPDATE_RESTORE and * WT_EVICT_LOOKASIDE configurations. */ @@ -175,13 +183,6 @@ typedef struct { * column-store key. */ WT_ITEM key; /* Promoted row-store key */ - - /* - * During wrapup, after reconciling the root page, we write a - * final block as part of a checkpoint. If raw compression - * was configured, that block may have already been compressed. - */ - bool already_compressed; } *bnd; /* Saved boundaries */ uint32_t bnd_next; /* Next boundary slot */ uint32_t bnd_next_max; /* Maximum boundary slots used */ @@ -445,17 +446,32 @@ __wt_reconcile(WT_SESSION_IMPL *session, } /* - * Clean up reconciliation resources: some workloads have millions of - * boundary structures, and if associated with an application session - * pulled into doing forced eviction, they won't be discarded for the - * life of the session (or until session.reset is called). Discard all - * of the reconciliation resources if an application thread, not doing - * a checkpoint. - */ - __rec_bnd_cleanup(session, r, - F_ISSET(session, WT_SESSION_INTERNAL) || - WT_SESSION_IS_CHECKPOINT(session) ? false : true); + * When application threads perform eviction, don't cache block manager + * or reconciliation structures (even across calls), we can have a + * significant number of application threads doing eviction at the same + * time with large items. We ignore checkpoints, once the checkpoint + * completes, all unnecessary session resources will be discarded. + * + * Even in application threads doing checkpoints or in internal threads + * doing any reconciliation, clean up reconciliation resources. Some + * workloads have millions of boundary structures in a reconciliation + * and we don't want to tie that memory down, even across calls. + */ + if (WT_SESSION_IS_CHECKPOINT(session) || + F_ISSET(session, WT_SESSION_INTERNAL)) + __rec_bnd_cleanup(session, r, false); + else { + /* + * Clean up the underlying block manager memory too: it's not + * reconciliation, but threads discarding reconciliation + * structures want to clean up the block manager's structures + * as well, and there's no obvious place to do that. + */ + if (session->block_manager_cleanup != NULL) + WT_TRET(session->block_manager_cleanup(session)); + WT_TRET(__rec_destroy_session(session)); + } WT_RET(ret); /* @@ -652,7 +668,7 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags) WT_ASSERT(session, mod->mod_multi[i].supd == NULL); WT_ERR(__wt_multi_to_ref(session, - next, &mod->mod_multi[i], &pindex->index[i], NULL)); + next, &mod->mod_multi[i], &pindex->index[i], NULL, false)); pindex->index[i]->home = next; } @@ -1135,8 +1151,20 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, */ if (!skipped && (F_ISSET(btree, WT_BTREE_LOOKASIDE) || - __wt_txn_visible_all(session, max_txn))) + __wt_txn_visible_all(session, max_txn))) { +#ifdef HAVE_DIAGNOSTIC + /* + * The checkpoint transaction is special. Make sure we never + * write (metadata) updates from a checkpoint in a concurrent + * session. + */ + txnid = *updp == NULL ? WT_TXN_NONE : (*updp)->txnid; + WT_ASSERT(session, txnid == WT_TXN_NONE || + txnid != S2C(session)->txn_global.checkpoint_txnid || + WT_SESSION_IS_CHECKPOINT(session)); +#endif return (0); + } /* * In some cases, there had better not be skipped updates or updates not @@ -1845,18 +1873,19 @@ __rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd) WT_CLEAR(bnd->addr); bnd->size = 0; bnd->cksum = 0; + __wt_free(session, bnd->disk_image); __wt_free(session, bnd->supd); bnd->supd_next = 0; bnd->supd_allocated = 0; + bnd->already_compressed = false; + /* * Don't touch the key, we re-use that memory in each new * reconciliation. */ - - bnd->already_compressed = false; } /* @@ -1950,10 +1979,19 @@ __rec_split_init(WT_SESSION_IMPL *session, * additional data because we don't know how well it will compress, and * we don't want to increment our way up to the amount of data needed by * the application to successfully compress to the target page size. + * Ideally accumulate data several times the page size without + * approaching the memory page maximum, but at least have data worth + * one page. + * + * There are cases when we grow the page size to accommodate large + * records, in those cases we split the pages once they have crossed + * the maximum size for a page with raw compression. */ r->page_size = r->page_size_orig = max; if (r->raw_compression) - r->page_size *= 10; + r->max_raw_page_size = r->page_size = + (uint32_t)WT_MIN(r->page_size * 10, + WT_MAX(r->page_size, btree->maxmempage / 2)); /* * Ensure the disk image buffer is large enough for the max object, as @@ -2295,7 +2333,7 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) /* Hitting a page boundary resets the dictionary, in all cases. */ __rec_dictionary_reset(r); - inuse = WT_PTRDIFF32(r->first_free, dsk); + inuse = WT_PTRDIFF(r->first_free, dsk); switch (r->bnd_state) { case SPLIT_BOUNDARY: /* @@ -2465,7 +2503,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session, WT_COMPRESSOR *compressor; WT_DECL_RET; WT_ITEM *dst, *write_ref; - WT_PAGE_HEADER *dsk, *dsk_dst; + WT_PAGE_HEADER *dsk, *dsk_dst, *disk_image; WT_SESSION *wt_session; size_t corrected_page_size, extra_skip, len, result_len; uint64_t recno; @@ -2582,11 +2620,9 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session, /* * Don't create an image so large that any future update will - * cause a split in memory. Use half of the maximum size so - * we split very compressible pages that have reached the - * maximum size in memory into two equal blocks. + * cause a split in memory. */ - if (len > (size_t)btree->maxmempage / 2) + if (max_image_slot == 0 && len > (size_t)r->max_raw_page_size) max_image_slot = slots; } @@ -2648,7 +2684,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session, r->page_size_orig, btree->split_pct, WT_BLOCK_COMPRESS_SKIP + extra_skip, (uint8_t *)dsk + WT_BLOCK_COMPRESS_SKIP, r->raw_offsets, - no_more_rows || max_image_slot == 0 ? slots : max_image_slot, + max_image_slot == 0 ? slots : max_image_slot, (uint8_t *)dst->mem + WT_BLOCK_COMPRESS_SKIP, result_len, no_more_rows || max_image_slot != 0, @@ -2751,7 +2787,8 @@ no_slots: if (result_slots != 0) { /* - * We have a block, finalize the header information. + * We have a block, finalize the compressed disk image's header + * information. */ dst->size = result_len + WT_BLOCK_COMPRESS_SKIP; dsk_dst = dst->mem; @@ -2761,6 +2798,26 @@ no_slots: dsk_dst->u.entries = r->raw_entries[result_slots - 1]; /* + * Optionally keep the disk image in cache. Update the initial + * page-header fields to reflect the actual data being written. + * + * If updates are saved and need to be restored, we have to keep + * a copy of the disk image. Unfortunately, we don't yet know if + * there are updates to restore for the key range covered by the + * disk image just created. If there are any saved updates, take + * a copy of the disk image, it's freed later if not needed. + */ + if (F_ISSET(r, WT_EVICT_SCRUB) || + (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && r->supd_next > 0)) { + WT_RET(__wt_strndup(session, dsk, + dsk_dst->mem_size, &last->disk_image)); + disk_image = last->disk_image; + disk_image->recno = last->recno; + disk_image->mem_size = dsk_dst->mem_size; + disk_image->u.entries = dsk_dst->u.entries; + } + + /* * There is likely a remnant in the working buffer that didn't * get compressed; copy it down to the start of the buffer and * update the starting record number, free space and so on. @@ -2874,48 +2931,6 @@ split_grow: /* } /* - * __rec_raw_decompress -- - * Decompress a raw-compressed image. - */ -static int -__rec_raw_decompress( - WT_SESSION_IMPL *session, const void *image, size_t size, void *retp) -{ - WT_BTREE *btree; - WT_DECL_ITEM(tmp); - WT_DECL_RET; - WT_PAGE_HEADER const *dsk; - size_t result_len; - - btree = S2BT(session); - dsk = image; - - /* - * We skipped an update and we can't write a block, but unfortunately, - * the block has already been compressed. Decompress the block so we - * can subsequently re-instantiate it in memory. - */ - WT_RET(__wt_scr_alloc(session, dsk->mem_size, &tmp)); - memcpy(tmp->mem, image, WT_BLOCK_COMPRESS_SKIP); - WT_ERR(btree->compressor->decompress(btree->compressor, - &session->iface, - (uint8_t *)image + WT_BLOCK_COMPRESS_SKIP, - size - WT_BLOCK_COMPRESS_SKIP, - (uint8_t *)tmp->mem + WT_BLOCK_COMPRESS_SKIP, - dsk->mem_size - WT_BLOCK_COMPRESS_SKIP, - &result_len)); - if (result_len != dsk->mem_size - WT_BLOCK_COMPRESS_SKIP) - WT_ERR(__wt_illegal_value(session, btree->dhandle->name)); - - WT_ERR(__wt_strndup(session, tmp->data, dsk->mem_size, retp)); - WT_ASSERT(session, __wt_verify_dsk_image(session, - "[raw evict split]", tmp->data, dsk->mem_size, false) == 0); - -err: __wt_scr_free(session, &tmp); - return (ret); -} - -/* * __rec_split_raw -- * Raw compression split routine. */ @@ -3022,7 +3037,7 @@ __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r) if (r->raw_compression && r->entries != 0) { while (r->entries != 0) { data_size = - WT_PTRDIFF32(r->first_free, r->disk_image.mem); + WT_PTRDIFF(r->first_free, r->disk_image.mem); if (data_size <= btree->allocsize) break; WT_RET(__rec_split_raw_worker(session, r, 0, true)); @@ -3145,14 +3160,13 @@ __rec_split_write(WT_SESSION_IMPL *session, uint32_t bnd_slot, i, j; int cmp; uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE]; + bool need_image; btree = S2BT(session); dsk = buf->mem; page = r->page; mod = page->modify; - WT_RET(__wt_scr_alloc(session, 0, &key)); - /* Set the zero-length value flag in the page header. */ if (dsk->type == WT_PAGE_ROW_LEAF) { F_CLR(dsk, WT_PAGE_EMPTY_V_ALL | WT_PAGE_EMPTY_V_NONE); @@ -3163,6 +3177,8 @@ __rec_split_write(WT_SESSION_IMPL *session, F_SET(dsk, WT_PAGE_EMPTY_V_NONE); } + bnd->entries = r->entries; + /* Initialize the address (set the page type for the parent). */ switch (dsk->type) { case WT_PAGE_COL_FIX: @@ -3176,9 +3192,8 @@ __rec_split_write(WT_SESSION_IMPL *session, case WT_PAGE_ROW_INT: bnd->addr.type = WT_ADDR_INT; break; - WT_ILLEGAL_VALUE_ERR(session); + WT_ILLEGAL_VALUE(session); } - bnd->size = (uint32_t)buf->size; bnd->cksum = 0; @@ -3190,6 +3205,8 @@ __rec_split_write(WT_SESSION_IMPL *session, * This code requires a key be filled in for the next block (or the * last block flag be set, if there's no next block). */ + if (page->type == WT_PAGE_ROW_LEAF) + WT_RET(__wt_scr_alloc(session, 0, &key)); for (i = 0, supd = r->supd; i < r->supd_next; ++i, ++supd) { /* The last block gets all remaining saved updates. */ if (last_block) { @@ -3254,33 +3271,11 @@ supd_check_complete: * image, we can't actually write it. Instead, we will re-instantiate * the page using the disk image and any list of updates we skipped. */ - if (F_ISSET(r, WT_EVICT_IN_MEMORY) || - (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL)) { - - /* Statistics tracking that we used update/restore. */ - if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL) - r->cache_write_restore = true; - - /* - * If the buffer is compressed (raw compression was configured), - * we have to decompress it so we can instantiate it later. It's - * a slow and convoluted path, but it's also a rare one and it's - * not worth making it faster. Else, the disk image is ready, - * copy it into place for later. It's possible the disk image - * has no items; we have to flag that for verification, it's a - * special case since read/writing empty pages isn't generally - * allowed. - */ - if (bnd->already_compressed) - WT_ERR(__rec_raw_decompress( - session, buf->data, buf->size, &bnd->disk_image)); - else { - WT_ERR(__wt_strndup( - session, buf->data, buf->size, &bnd->disk_image)); - WT_ASSERT(session, __wt_verify_dsk_image(session, - "[evict split]", buf->data, buf->size, true) == 0); - } - goto done; + if (F_ISSET(r, WT_EVICT_IN_MEMORY)) + goto copy_image; + if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL) { + r->cache_write_restore = true; + goto copy_image; } /* @@ -3324,13 +3319,11 @@ supd_check_complete: bnd->addr = multi->addr; WT_STAT_FAST_DATA_INCR(session, rec_page_match); - goto done; + goto copy_image; } } } - bnd->entries = r->entries; - #ifdef HAVE_VERBOSE /* Output a verbose message if we create a page without many entries */ if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT) && r->entries < 6) @@ -3343,8 +3336,8 @@ supd_check_complete: r->bnd_state)); #endif - WT_ERR(__wt_bt_write(session, - buf, addr, &addr_size, false, bnd->already_compressed)); + WT_ERR(__wt_bt_write(session, buf, addr, &addr_size, + false, F_ISSET(r, WT_CHECKPOINTING), bnd->already_compressed)); WT_ERR(__wt_strndup(session, addr, addr_size, &bnd->addr.addr)); bnd->addr.size = (uint8_t)addr_size; @@ -3354,9 +3347,29 @@ supd_check_complete: * the database's lookaside store. */ if (F_ISSET(r, WT_EVICT_LOOKASIDE) && bnd->supd != NULL) - ret = __rec_update_las(session, r, btree->id, bnd); + WT_ERR(__rec_update_las(session, r, btree->id, bnd)); + +copy_image: + /* + * If re-instantiating this page in memory (either because eviction + * wants to, or because we skipped updates to build the disk image), + * save a copy of the disk image. + * + * Raw compression might have already saved a copy of the disk image + * before we could know if we skipped updates to create it, and now + * we know if we're going to need it. + * + * Copy the disk image if we need a copy and don't already have one, + * discard any already saved copy we don't need. + */ + need_image = F_ISSET(r, WT_EVICT_SCRUB) || + (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL); + if (need_image && bnd->disk_image == NULL) + WT_ERR(__wt_strndup( + session, buf->data, buf->size, &bnd->disk_image)); + if (!need_image) + __wt_free(session, bnd->disk_image); -done: err: __wt_scr_free(session, &key); return (ret); } @@ -3556,8 +3569,9 @@ __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_PAGE *parent; WT_RECONCILE *r; - r = cbulk->reconcile; btree = S2BT(session); + if ((r = cbulk->reconcile) == NULL) + return (0); switch (btree->type) { case BTREE_COL_FIX: @@ -5601,9 +5615,10 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_RET(__wt_btree_block_free(session, mod->mod_replace.addr, mod->mod_replace.size)); - /* Discard the replacement page's address. */ + /* Discard the replacement page's address and disk image. */ __wt_free(session, mod->mod_replace.addr); mod->mod_replace.size = 0; + __wt_free(session, mod->mod_disk_image); break; WT_ILLEGAL_VALUE(session); } @@ -5651,26 +5666,33 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) bnd = &r->bnd[0]; /* - * If saving/restoring changes for this page and there's only - * one block, there's nothing to write. This is an in-memory - * configuration or a special case of forced eviction: set up + * If in-memory, or saving/restoring changes for this page and + * there's only one block, there's nothing to write. Set up * a single block as if to split, then use that disk image to - * rewrite the page in memory. + * rewrite the page in memory. This is separate from simple + * replacements where eviction has decided to retain the page + * in memory because the latter can't handle update lists and + * splits can. */ - if (bnd->disk_image != NULL) + if (F_ISSET(r, WT_EVICT_IN_MEMORY) || + (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL)) goto split; /* - * If this is a root page, then we don't have an address and we - * have to create a sync point. The address was cleared when - * we were about to write the buffer so we know what to do here. + * A root page, we don't have an address and we have to create + * a sync point. The address was cleared when we were about to + * write the buffer so we know what to do here. */ if (bnd->addr.addr == NULL) WT_RET(__wt_bt_write(session, &r->disk_image, - NULL, NULL, true, bnd->already_compressed)); + NULL, NULL, true, F_ISSET(r, WT_CHECKPOINTING), + bnd->already_compressed)); else { mod->mod_replace = bnd->addr; bnd->addr.addr = NULL; + + mod->mod_disk_image = bnd->disk_image; + bnd->disk_image = NULL; } mod->rec_result = WT_PM_REC_REPLACE; @@ -5805,19 +5827,26 @@ __rec_split_row(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_RET(__wt_row_ikey_alloc(session, 0, bnd->key.data, bnd->key.size, &multi->key.ikey)); - if (bnd->disk_image == NULL) { - multi->addr = bnd->addr; - multi->addr.reuse = 0; - multi->size = bnd->size; - multi->cksum = bnd->cksum; - bnd->addr.addr = NULL; - } else { + /* + * Copy any disk image. Don't take saved updates without a + * disk image (which happens if they have been saved to the + * lookaside table): they should be discarded along with the + * original page. + */ + multi->disk_image = bnd->disk_image; + bnd->disk_image = NULL; + if (multi->disk_image != NULL) { multi->supd = bnd->supd; multi->supd_entries = bnd->supd_next; bnd->supd = NULL; - multi->disk_image = bnd->disk_image; - bnd->disk_image = NULL; } + + /* Copy any address. */ + multi->addr = bnd->addr; + multi->addr.reuse = 0; + multi->size = bnd->size; + multi->cksum = bnd->cksum; + bnd->addr.addr = NULL; } mod->mod_multi_entries = r->bnd_next; @@ -5845,19 +5874,26 @@ __rec_split_col(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) bnd = r->bnd, i = 0; i < r->bnd_next; ++multi, ++bnd, ++i) { multi->key.recno = bnd->recno; - if (bnd->disk_image == NULL) { - multi->addr = bnd->addr; - multi->addr.reuse = 0; - multi->size = bnd->size; - multi->cksum = bnd->cksum; - bnd->addr.addr = NULL; - } else { + /* + * Copy any disk image. Don't take saved updates without a + * disk image (which happens if they have been saved to the + * lookaside table): they should be discarded along with the + * original page. + */ + multi->disk_image = bnd->disk_image; + bnd->disk_image = NULL; + if (multi->disk_image != NULL) { multi->supd = bnd->supd; multi->supd_entries = bnd->supd_next; bnd->supd = NULL; - multi->disk_image = bnd->disk_image; - bnd->disk_image = NULL; } + + /* Copy any address. */ + multi->addr = bnd->addr; + multi->addr.reuse = 0; + multi->size = bnd->size; + multi->cksum = bnd->cksum; + bnd->addr.addr = NULL; } mod->mod_multi_entries = r->bnd_next; @@ -6133,7 +6169,8 @@ __rec_cell_build_ovfl(WT_SESSION_IMPL *session, /* Write the buffer. */ addr = buf; - WT_ERR(__wt_bt_write(session, tmp, addr, &size, false, false)); + WT_ERR(__wt_bt_write(session, tmp, + addr, &size, false, F_ISSET(r, WT_CHECKPOINTING), false)); /* * Track the overflow record (unless it's a bulk load, which diff --git a/src/third_party/wiredtiger/src/schema/schema_rename.c b/src/third_party/wiredtiger/src/schema/schema_rename.c index 8f4d374fd22..bc92c882117 100644 --- a/src/third_party/wiredtiger/src/schema/schema_rename.c +++ b/src/third_party/wiredtiger/src/schema/schema_rename.c @@ -64,7 +64,7 @@ __rename_file( WT_ERR(__wt_metadata_insert(session, newuri, oldvalue)); /* Rename the underlying file. */ - WT_ERR(__wt_fs_rename(session, filename, newfile)); + WT_ERR(__wt_fs_rename(session, filename, newfile, false)); if (WT_META_TRACKING(session)) WT_ERR(__wt_meta_track_fileop(session, uri, newuri)); diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c index 77d1dc74c84..0072d7e1445 100644 --- a/src/third_party/wiredtiger/src/session/session_api.c +++ b/src/third_party/wiredtiger/src/session/session_api.c @@ -17,12 +17,8 @@ static int __session_rollback_transaction(WT_SESSION *, const char *); * Unsupported session method. */ int -__wt_session_notsup(WT_SESSION *wt_session) +__wt_session_notsup(WT_SESSION_IMPL *session) { - WT_SESSION_IMPL *session; - - session = (WT_SESSION_IMPL *)wt_session; - WT_RET_MSG(session, ENOTSUP, "Unsupported session method"); } @@ -66,6 +62,17 @@ __wt_session_copy_values(WT_SESSION_IMPL *session) TAILQ_FOREACH(cursor, &session->cursors, q) if (F_ISSET(cursor, WT_CURSTD_VALUE_INT)) { +#ifdef HAVE_DIAGNOSTIC + /* + * We have to do this with a transaction ID pinned + * unless the cursor is reading from a checkpoint. + */ + WT_TXN_STATE *txn_state = WT_SESSION_TXN_STATE(session); + WT_ASSERT(session, txn_state->snap_min != WT_TXN_NONE || + (WT_PREFIX_MATCH(cursor->uri, "file:") && + F_ISSET((WT_CURSOR_BTREE *)cursor, WT_CBT_NO_TXN))); +#endif + F_CLR(cursor, WT_CURSTD_VALUE_INT); WT_RET(__wt_buf_set(session, &cursor->value, cursor->value.data, cursor->value.size)); @@ -509,7 +516,11 @@ __session_create(WT_SESSION *wt_session, const char *uri, const char *config) ret = __wt_session_create(session, uri, config); -err: API_END_RET_NOTFOUND_MAP(session, ret); +err: if (ret != 0) + WT_STAT_FAST_CONN_INCR(session, session_table_create_fail); + else + WT_STAT_FAST_CONN_INCR(session, session_table_create_success); + API_END_RET_NOTFOUND_MAP(session, ret); } /* @@ -520,10 +531,18 @@ static int __session_create_readonly( WT_SESSION *wt_session, const char *uri, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(uri); WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, create); + + WT_STAT_FAST_CONN_INCR(session, session_table_create_fail); + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* @@ -570,9 +589,16 @@ err: API_END_RET(session, ret); static int __session_log_flush_readonly(WT_SESSION *wt_session, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, log_flush); + + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* @@ -605,9 +631,16 @@ static int __session_log_printf_readonly(WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_ATTRIBUTE((format (printf, 2, 3))) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(fmt); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, log_printf); + + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* @@ -630,7 +663,12 @@ __session_rebalance(WT_SESSION *wt_session, const char *uri, const char *config) ret = __wt_schema_worker(session, uri, __wt_bt_rebalance, NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_REBALANCE))); -err: API_END_RET_NOTFOUND_MAP(session, ret); +err: if (ret != 0) + WT_STAT_FAST_CONN_INCR(session, session_table_rebalance_fail); + else + WT_STAT_FAST_CONN_INCR(session, + session_table_rebalance_success); + API_END_RET_NOTFOUND_MAP(session, ret); } /* @@ -641,10 +679,18 @@ static int __session_rebalance_readonly( WT_SESSION *wt_session, const char *uri, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(uri); WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, rebalance); + + WT_STAT_FAST_CONN_INCR(session, session_table_rebalance_fail); + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* @@ -670,7 +716,11 @@ __session_rename(WT_SESSION *wt_session, WT_WITH_TABLE_LOCK(session, ret, ret = __wt_schema_rename(session, uri, newuri, cfg)))); -err: API_END_RET_NOTFOUND_MAP(session, ret); +err: if (ret != 0) + WT_STAT_FAST_CONN_INCR(session, session_table_rename_fail); + else + WT_STAT_FAST_CONN_INCR(session, session_table_rename_success); + API_END_RET_NOTFOUND_MAP(session, ret); } /* @@ -681,11 +731,19 @@ static int __session_rename_readonly(WT_SESSION *wt_session, const char *uri, const char *newuri, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(uri); WT_UNUSED(newuri); WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, rename); + + WT_STAT_FAST_CONN_INCR(session, session_table_rename_fail); + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* @@ -733,8 +791,8 @@ __wt_session_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) F_SET(session, WT_SESSION_LOCK_NO_WAIT); /* - * The checkpoint lock only is needed to avoid a spurious EBUSY error - * return. + * Take the checkpoint lock if there is a need to prevent the drop + * operation from failing with EBUSY due to an ongoing checkpoint. */ if (checkpoint_wait) WT_WITH_CHECKPOINT_LOCK(session, ret, @@ -770,7 +828,12 @@ __session_drop(WT_SESSION *wt_session, const char *uri, const char *config) ret = __wt_session_drop(session, uri, cfg); -err: /* Note: drop operations cannot be unrolled (yet?). */ +err: if (ret != 0) + WT_STAT_FAST_CONN_INCR(session, session_table_drop_fail); + else + WT_STAT_FAST_CONN_INCR(session, session_table_drop_success); + + /* Note: drop operations cannot be unrolled (yet?). */ API_END_RET_NOTFOUND_MAP(session, ret); } @@ -782,10 +845,18 @@ static int __session_drop_readonly( WT_SESSION *wt_session, const char *uri, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(uri); WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, drop); + + WT_STAT_FAST_CONN_INCR(session, session_table_drop_fail); + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* @@ -943,7 +1014,11 @@ __session_salvage(WT_SESSION *wt_session, const char *uri, const char *config) ret = __wt_schema_worker(session, uri, __wt_salvage, NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_SALVAGE))); -err: API_END_RET_NOTFOUND_MAP(session, ret); +err: if (ret != 0) + WT_STAT_FAST_CONN_INCR(session, session_table_salvage_fail); + else + WT_STAT_FAST_CONN_INCR(session, session_table_salvage_success); + API_END_RET_NOTFOUND_MAP(session, ret); } /* @@ -954,10 +1029,18 @@ static int __session_salvage_readonly( WT_SESSION *wt_session, const char *uri, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(uri); WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, salvage); + + WT_STAT_FAST_CONN_INCR(session, session_table_salvage_fail); + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* @@ -1135,6 +1218,10 @@ __session_truncate(WT_SESSION *wt_session, err: TXN_API_END_RETRY(session, ret, 0); + if (ret != 0) + WT_STAT_FAST_CONN_INCR(session, session_table_truncate_fail); + else + WT_STAT_FAST_CONN_INCR(session, session_table_truncate_success); /* * Only map WT_NOTFOUND to ENOENT if a URI was specified. */ @@ -1149,12 +1236,20 @@ static int __session_truncate_readonly(WT_SESSION *wt_session, const char *uri, WT_CURSOR *start, WT_CURSOR *stop, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(uri); WT_UNUSED(start); WT_UNUSED(stop); WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, truncate); + + WT_STAT_FAST_CONN_INCR(session, session_table_truncate_fail); + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* @@ -1187,10 +1282,17 @@ static int __session_upgrade_readonly( WT_SESSION *wt_session, const char *uri, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(uri); WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, upgrade); + + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* @@ -1216,7 +1318,11 @@ __session_verify(WT_SESSION *wt_session, const char *uri, const char *config) ret = __wt_schema_worker(session, uri, __wt_verify, NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_VERIFY))); -err: API_END_RET_NOTFOUND_MAP(session, ret); +err: if (ret != 0) + WT_STAT_FAST_CONN_INCR(session, session_table_verify_fail); + else + WT_STAT_FAST_CONN_INCR(session, session_table_verify_success); + API_END_RET_NOTFOUND_MAP(session, ret); } /* @@ -1421,9 +1527,16 @@ err: API_END_RET(session, ret); static int __session_transaction_sync_readonly(WT_SESSION *wt_session, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, transaction_sync); + + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* @@ -1481,9 +1594,16 @@ err: API_END_RET_NOTFOUND_MAP(session, ret); static int __session_checkpoint_readonly(WT_SESSION *wt_session, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, checkpoint); + + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } /* diff --git a/src/third_party/wiredtiger/src/session/session_compact.c b/src/third_party/wiredtiger/src/session/session_compact.c index 3f7b34d132f..47ed5298304 100644 --- a/src/third_party/wiredtiger/src/session/session_compact.c +++ b/src/third_party/wiredtiger/src/session/session_compact.c @@ -333,6 +333,10 @@ err: session->compact = NULL; */ WT_TRET(__wt_session_release_resources(session)); + if (ret != 0) + WT_STAT_FAST_CONN_INCR(session, session_table_compact_fail); + else + WT_STAT_FAST_CONN_INCR(session, session_table_compact_success); API_END_RET_NOTFOUND_MAP(session, ret); } @@ -344,8 +348,16 @@ int __wt_session_compact_readonly( WT_SESSION *wt_session, const char *uri, const char *config) { + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UNUSED(uri); WT_UNUSED(config); - return (__wt_session_notsup(wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL_NOCONF(session, compact); + + WT_STAT_FAST_CONN_INCR(session, session_table_compact_fail); + ret = __wt_session_notsup(session); +err: API_END_RET(session, ret); } diff --git a/src/third_party/wiredtiger/src/support/err.c b/src/third_party/wiredtiger/src/support/err.c index 93c0af37328..60fc53cecd0 100644 --- a/src/third_party/wiredtiger/src/support/err.c +++ b/src/third_party/wiredtiger/src/support/err.c @@ -118,7 +118,13 @@ __handler_failure(WT_SESSION_IMPL *session, handler->handle_error(handler, wt_session, error, s) == 0) return; + /* + * In case there is a failure in the default error handler, make sure + * we don't recursively try to report *that* error. + */ + session->event_handler = &__event_handler_default; (void)__handle_error_default(NULL, wt_session, error, s); + session->event_handler = handler; } /* diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index d972f0c140f..49cb3bebc07 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -43,6 +43,7 @@ static const char * const __stats_dsrc_desc[] = { "btree: pages rewritten by compaction", "btree: row-store internal pages", "btree: row-store leaf pages", + "cache: bytes currently in the cache", "cache: bytes read into cache", "cache: bytes written from cache", "cache: checkpoint blocked page eviction", @@ -173,6 +174,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats) stats->btree_compact_rewrite = 0; stats->btree_row_internal = 0; stats->btree_row_leaf = 0; + /* not clearing cache_bytes_inuse */ stats->cache_bytes_read = 0; stats->cache_bytes_write = 0; stats->cache_eviction_checkpoint = 0; @@ -300,6 +302,7 @@ __wt_stat_dsrc_aggregate_single( to->btree_compact_rewrite += from->btree_compact_rewrite; to->btree_row_internal += from->btree_row_internal; to->btree_row_leaf += from->btree_row_leaf; + to->cache_bytes_inuse += from->cache_bytes_inuse; to->cache_bytes_read += from->cache_bytes_read; to->cache_bytes_write += from->cache_bytes_write; to->cache_eviction_checkpoint += from->cache_eviction_checkpoint; @@ -433,6 +436,7 @@ __wt_stat_dsrc_aggregate( WT_STAT_READ(from, btree_compact_rewrite); to->btree_row_internal += WT_STAT_READ(from, btree_row_internal); to->btree_row_leaf += WT_STAT_READ(from, btree_row_leaf); + to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse); to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read); to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write); to->cache_eviction_checkpoint += @@ -542,9 +546,12 @@ static const char * const __stats_connection_desc[] = { "block-manager: blocks written", "block-manager: bytes read", "block-manager: bytes written", + "block-manager: bytes written for checkpoint", "block-manager: mapped blocks read", "block-manager: mapped bytes read", + "cache: bytes belonging to page images in the cache", "cache: bytes currently in the cache", + "cache: bytes not belonging to page images in the cache", "cache: bytes read into cache", "cache: bytes written from cache", "cache: checkpoint blocked page eviction", @@ -578,6 +585,8 @@ static const char * const __stats_connection_desc[] = { "cache: maximum page size at eviction", "cache: modified pages evicted", "cache: modified pages evicted by application threads", + "cache: overflow pages read into cache", + "cache: overflow values cached in memory", "cache: page split during eviction deepened the tree", "cache: page written requiring lookaside records", "cache: pages currently held in the cache", @@ -586,6 +595,7 @@ static const char * const __stats_connection_desc[] = { "cache: pages evicted by application threads", "cache: pages queued for eviction", "cache: pages queued for urgent eviction", + "cache: pages queued for urgent eviction during walk", "cache: pages read into cache", "cache: pages read into cache requiring lookaside entries", "cache: pages requested from the cache", @@ -597,7 +607,6 @@ static const char * const __stats_connection_desc[] = { "cache: percentage overhead", "cache: tracked bytes belonging to internal pages in the cache", "cache: tracked bytes belonging to leaf pages in the cache", - "cache: tracked bytes belonging to overflow pages in the cache", "cache: tracked dirty bytes in the cache", "cache: tracked dirty pages in the cache", "cache: unmodified pages evicted", @@ -677,6 +686,22 @@ static const char * const __stats_connection_desc[] = { "reconciliation: split objects currently awaiting free", "session: open cursor count", "session: open session count", + "session: table compact failed calls", + "session: table compact successful calls", + "session: table create failed calls", + "session: table create successful calls", + "session: table drop failed calls", + "session: table drop successful calls", + "session: table rebalance failed calls", + "session: table rebalance successful calls", + "session: table rename failed calls", + "session: table rename successful calls", + "session: table salvage failed calls", + "session: table salvage successful calls", + "session: table truncate failed calls", + "session: table truncate successful calls", + "session: table verify failed calls", + "session: table verify successful calls", "thread-state: active filesystem fsync calls", "thread-state: active filesystem read calls", "thread-state: active filesystem write calls", @@ -693,13 +718,13 @@ static const char * const __stats_connection_desc[] = { "transaction: transaction checkpoint max time (msecs)", "transaction: transaction checkpoint min time (msecs)", "transaction: transaction checkpoint most recent time (msecs)", + "transaction: transaction checkpoint scrub dirty target", + "transaction: transaction checkpoint scrub time (msecs)", "transaction: transaction checkpoint total time (msecs)", "transaction: transaction checkpoints", "transaction: transaction failures due to cache overflow", "transaction: transaction fsync calls for checkpoint after allocating the transaction ID", - "transaction: transaction fsync calls for checkpoint before allocating the transaction ID", "transaction: transaction fsync duration for checkpoint after allocating the transaction ID (usecs)", - "transaction: transaction fsync duration for checkpoint before allocating the transaction ID (usecs)", "transaction: transaction range of IDs currently pinned", "transaction: transaction range of IDs currently pinned by a checkpoint", "transaction: transaction range of IDs currently pinned by named snapshots", @@ -764,9 +789,12 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->block_write = 0; stats->block_byte_read = 0; stats->block_byte_write = 0; + stats->block_byte_write_checkpoint = 0; stats->block_map_read = 0; stats->block_byte_map_read = 0; + /* not clearing cache_bytes_image */ /* not clearing cache_bytes_inuse */ + /* not clearing cache_bytes_other */ stats->cache_bytes_read = 0; stats->cache_bytes_write = 0; stats->cache_eviction_checkpoint = 0; @@ -800,6 +828,8 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) /* not clearing cache_eviction_maximum_page_size */ stats->cache_eviction_dirty = 0; stats->cache_eviction_app_dirty = 0; + stats->cache_read_overflow = 0; + stats->cache_overflow_value = 0; stats->cache_eviction_deepen = 0; stats->cache_write_lookaside = 0; /* not clearing cache_pages_inuse */ @@ -807,6 +837,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cache_eviction_force_delete = 0; stats->cache_eviction_app = 0; stats->cache_eviction_pages_queued = 0; + stats->cache_eviction_pages_queued_urgent = 0; stats->cache_eviction_pages_queued_oldest = 0; stats->cache_read = 0; stats->cache_read_lookaside = 0; @@ -819,7 +850,6 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) /* not clearing cache_overhead */ /* not clearing cache_bytes_internal */ /* not clearing cache_bytes_leaf */ - /* not clearing cache_bytes_overflow */ /* not clearing cache_bytes_dirty */ /* not clearing cache_pages_dirty */ stats->cache_eviction_clean = 0; @@ -899,9 +929,25 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) /* not clearing rec_split_stashed_objects */ /* not clearing session_cursor_open */ /* not clearing session_open */ - /* not clearing fsync_active */ - /* not clearing read_active */ - /* not clearing write_active */ + /* not clearing session_table_compact_fail */ + /* not clearing session_table_compact_success */ + /* not clearing session_table_create_fail */ + /* not clearing session_table_create_success */ + /* not clearing session_table_drop_fail */ + /* not clearing session_table_drop_success */ + /* not clearing session_table_rebalance_fail */ + /* not clearing session_table_rebalance_success */ + /* not clearing session_table_rename_fail */ + /* not clearing session_table_rename_success */ + /* not clearing session_table_salvage_fail */ + /* not clearing session_table_salvage_success */ + /* not clearing session_table_truncate_fail */ + /* not clearing session_table_truncate_success */ + /* not clearing session_table_verify_fail */ + /* not clearing session_table_verify_success */ + /* not clearing thread_fsync_active */ + /* not clearing thread_read_active */ + /* not clearing thread_write_active */ stats->page_busy_blocked = 0; stats->page_forcible_evict_blocked = 0; stats->page_locked_blocked = 0; @@ -915,13 +961,13 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) /* not clearing txn_checkpoint_time_max */ /* not clearing txn_checkpoint_time_min */ /* not clearing txn_checkpoint_time_recent */ + /* not clearing txn_checkpoint_scrub_target */ + /* not clearing txn_checkpoint_scrub_time */ /* not clearing txn_checkpoint_time_total */ stats->txn_checkpoint = 0; stats->txn_fail_cache = 0; stats->txn_checkpoint_fsync_post = 0; - stats->txn_checkpoint_fsync_pre = 0; - stats->txn_checkpoint_fsync_post_duration = 0; - stats->txn_checkpoint_fsync_pre_duration = 0; + /* not clearing txn_checkpoint_fsync_post_duration */ /* not clearing txn_pinned_range */ /* not clearing txn_pinned_checkpoint_range */ /* not clearing txn_pinned_snapshot_range */ @@ -978,9 +1024,13 @@ __wt_stat_connection_aggregate( to->block_write += WT_STAT_READ(from, block_write); to->block_byte_read += WT_STAT_READ(from, block_byte_read); to->block_byte_write += WT_STAT_READ(from, block_byte_write); + to->block_byte_write_checkpoint += + WT_STAT_READ(from, block_byte_write_checkpoint); to->block_map_read += WT_STAT_READ(from, block_map_read); to->block_byte_map_read += WT_STAT_READ(from, block_byte_map_read); + to->cache_bytes_image += WT_STAT_READ(from, cache_bytes_image); to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse); + to->cache_bytes_other += WT_STAT_READ(from, cache_bytes_other); to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read); to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write); to->cache_eviction_checkpoint += @@ -1039,6 +1089,8 @@ __wt_stat_connection_aggregate( to->cache_eviction_dirty += WT_STAT_READ(from, cache_eviction_dirty); to->cache_eviction_app_dirty += WT_STAT_READ(from, cache_eviction_app_dirty); + to->cache_read_overflow += WT_STAT_READ(from, cache_read_overflow); + to->cache_overflow_value += WT_STAT_READ(from, cache_overflow_value); to->cache_eviction_deepen += WT_STAT_READ(from, cache_eviction_deepen); to->cache_write_lookaside += @@ -1050,6 +1102,8 @@ __wt_stat_connection_aggregate( to->cache_eviction_app += WT_STAT_READ(from, cache_eviction_app); to->cache_eviction_pages_queued += WT_STAT_READ(from, cache_eviction_pages_queued); + to->cache_eviction_pages_queued_urgent += + WT_STAT_READ(from, cache_eviction_pages_queued_urgent); to->cache_eviction_pages_queued_oldest += WT_STAT_READ(from, cache_eviction_pages_queued_oldest); to->cache_read += WT_STAT_READ(from, cache_read); @@ -1065,7 +1119,6 @@ __wt_stat_connection_aggregate( to->cache_overhead += WT_STAT_READ(from, cache_overhead); to->cache_bytes_internal += WT_STAT_READ(from, cache_bytes_internal); to->cache_bytes_leaf += WT_STAT_READ(from, cache_bytes_leaf); - to->cache_bytes_overflow += WT_STAT_READ(from, cache_bytes_overflow); to->cache_bytes_dirty += WT_STAT_READ(from, cache_bytes_dirty); to->cache_pages_dirty += WT_STAT_READ(from, cache_pages_dirty); to->cache_eviction_clean += WT_STAT_READ(from, cache_eviction_clean); @@ -1151,9 +1204,41 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, rec_split_stashed_objects); to->session_cursor_open += WT_STAT_READ(from, session_cursor_open); to->session_open += WT_STAT_READ(from, session_open); - to->fsync_active += WT_STAT_READ(from, fsync_active); - to->read_active += WT_STAT_READ(from, read_active); - to->write_active += WT_STAT_READ(from, write_active); + to->session_table_compact_fail += + WT_STAT_READ(from, session_table_compact_fail); + to->session_table_compact_success += + WT_STAT_READ(from, session_table_compact_success); + to->session_table_create_fail += + WT_STAT_READ(from, session_table_create_fail); + to->session_table_create_success += + WT_STAT_READ(from, session_table_create_success); + to->session_table_drop_fail += + WT_STAT_READ(from, session_table_drop_fail); + to->session_table_drop_success += + WT_STAT_READ(from, session_table_drop_success); + to->session_table_rebalance_fail += + WT_STAT_READ(from, session_table_rebalance_fail); + to->session_table_rebalance_success += + WT_STAT_READ(from, session_table_rebalance_success); + to->session_table_rename_fail += + WT_STAT_READ(from, session_table_rename_fail); + to->session_table_rename_success += + WT_STAT_READ(from, session_table_rename_success); + to->session_table_salvage_fail += + WT_STAT_READ(from, session_table_salvage_fail); + to->session_table_salvage_success += + WT_STAT_READ(from, session_table_salvage_success); + to->session_table_truncate_fail += + WT_STAT_READ(from, session_table_truncate_fail); + to->session_table_truncate_success += + WT_STAT_READ(from, session_table_truncate_success); + to->session_table_verify_fail += + WT_STAT_READ(from, session_table_verify_fail); + to->session_table_verify_success += + WT_STAT_READ(from, session_table_verify_success); + to->thread_fsync_active += WT_STAT_READ(from, thread_fsync_active); + to->thread_read_active += WT_STAT_READ(from, thread_read_active); + to->thread_write_active += WT_STAT_READ(from, thread_write_active); to->page_busy_blocked += WT_STAT_READ(from, page_busy_blocked); to->page_forcible_evict_blocked += WT_STAT_READ(from, page_forcible_evict_blocked); @@ -1175,18 +1260,18 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, txn_checkpoint_time_min); to->txn_checkpoint_time_recent += WT_STAT_READ(from, txn_checkpoint_time_recent); + to->txn_checkpoint_scrub_target += + WT_STAT_READ(from, txn_checkpoint_scrub_target); + to->txn_checkpoint_scrub_time += + WT_STAT_READ(from, txn_checkpoint_scrub_time); to->txn_checkpoint_time_total += WT_STAT_READ(from, txn_checkpoint_time_total); to->txn_checkpoint += WT_STAT_READ(from, txn_checkpoint); to->txn_fail_cache += WT_STAT_READ(from, txn_fail_cache); to->txn_checkpoint_fsync_post += WT_STAT_READ(from, txn_checkpoint_fsync_post); - to->txn_checkpoint_fsync_pre += - WT_STAT_READ(from, txn_checkpoint_fsync_pre); to->txn_checkpoint_fsync_post_duration += WT_STAT_READ(from, txn_checkpoint_fsync_post_duration); - to->txn_checkpoint_fsync_pre_duration += - WT_STAT_READ(from, txn_checkpoint_fsync_pre_duration); to->txn_pinned_range += WT_STAT_READ(from, txn_pinned_range); to->txn_pinned_checkpoint_range += WT_STAT_READ(from, txn_pinned_checkpoint_range); diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index dd4384d9a9a..87b74433769 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -124,6 +124,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session) txn = &session->txn; txn_global = &conn->txn_global; txn_state = WT_SESSION_TXN_STATE(session); + n = 0; /* * Spin waiting for the lock: the sleeps in our blocking readlock @@ -137,20 +138,26 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session) current_id = snap_min = txn_global->current; prev_oldest_id = txn_global->oldest_id; + /* + * Include the checkpoint transaction, if one is running: we should + * ignore any uncommitted changes the checkpoint has written to the + * metadata. We don't have to keep the checkpoint's changes pinned so + * don't including it in the published snap_min. + */ + if ((id = txn_global->checkpoint_txnid) != WT_TXN_NONE) + txn->snapshot[n++] = id; + /* For pure read-only workloads, avoid scanning. */ if (prev_oldest_id == current_id) { txn_state->snap_min = current_id; - __txn_sort_snapshot(session, 0, current_id); - /* Check that the oldest ID has not moved in the meantime. */ WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id); - WT_RET(__wt_readunlock(session, txn_global->scan_rwlock)); - return (0); + goto done; } /* Walk the array of concurrent transactions. */ WT_ORDERED_READ(session_cnt, conn->session_cnt); - for (i = n = 0, s = txn_global->states; i < session_cnt; i++, s++) { + for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) { /* * Build our snapshot of any concurrent transaction IDs. * @@ -178,8 +185,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session) WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id); txn_state->snap_min = snap_min; - WT_RET(__wt_readunlock(session, txn_global->scan_rwlock)); - +done: WT_RET(__wt_readunlock(session, txn_global->scan_rwlock)); __txn_sort_snapshot(session, n, current_id); return (0); } @@ -433,18 +439,22 @@ __wt_txn_release(WT_SESSION_IMPL *session) WT_TXN_STATE *txn_state; txn = &session->txn; - WT_ASSERT(session, txn->mod_count == 0); - txn->notify = NULL; - txn_global = &S2C(session)->txn_global; txn_state = WT_SESSION_TXN_STATE(session); + WT_ASSERT(session, txn->mod_count == 0); + txn->notify = NULL; + /* Clear the transaction's ID from the global table. */ if (WT_SESSION_IS_CHECKPOINT(session)) { WT_ASSERT(session, txn_state->id == WT_TXN_NONE); - txn->id = WT_TXN_NONE; + txn->id = txn_global->checkpoint_txnid = WT_TXN_NONE; - /* Clear the global checkpoint transaction IDs. */ + /* + * Be extra careful to cleanup everything for checkpoints: once + * the global checkpoint ID is cleared, we can no longer tell + * if this session is doing a checkpoint. + */ txn_global->checkpoint_id = 0; txn_global->checkpoint_pinned = WT_TXN_NONE; } else if (F_ISSET(txn, WT_TXN_HAS_ID)) { @@ -470,6 +480,7 @@ __wt_txn_release(WT_SESSION_IMPL *session) */ __wt_txn_release_snapshot(session); txn->isolation = session->isolation; + /* Ensure the transaction flags are cleared on exit */ txn->flags = 0; } @@ -487,10 +498,12 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_TXN *txn; WT_TXN_OP *op; u_int i; + bool did_update; txn = &session->txn; conn = S2C(session); - WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || txn->mod_count == 0); + did_update = txn->mod_count != 0; + WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || !did_update); if (!F_ISSET(txn, WT_TXN_RUNNING)) WT_RET_MSG(session, EINVAL, "No transaction is active"); @@ -540,8 +553,18 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_TRET(txn->notify->notify(txn->notify, (WT_SESSION *)session, txn->id, 1)); + /* + * We are about to release the snapshot: copy values into any + * positioned cursors so they don't point to updates that could be + * freed once we don't have a snapshot. + */ + if (session->ncursors > 0) { + WT_DIAGNOSTIC_YIELD; + WT_RET(__wt_session_copy_values(session)); + } + /* If we are logging, write a commit log record. */ - if (ret == 0 && txn->mod_count > 0 && + if (ret == 0 && did_update && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) && !F_ISSET(session, WT_SESSION_NO_LOGGING)) { /* @@ -569,14 +592,6 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) __wt_txn_op_free(session, op); txn->mod_count = 0; - /* - * We are about to release the snapshot: copy values into any - * positioned cursors so they don't point to updates that could be - * freed once we don't have a transaction ID pinned. - */ - if (session->ncursors > 0) - WT_RET(__wt_session_copy_values(session)); - __wt_txn_release(session); return (0); } diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index 51d26b9aed6..c23f293154a 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -10,14 +10,16 @@ static int __checkpoint_lock_tree( WT_SESSION_IMPL *, bool, bool, const char *[]); +static int __checkpoint_mark_deletes(WT_SESSION_IMPL *, const char *[]); +static int __checkpoint_presync(WT_SESSION_IMPL *, const char *[]); static int __checkpoint_tree_helper(WT_SESSION_IMPL *, const char *[]); /* - * __wt_checkpoint_name_ok -- + * __checkpoint_name_ok -- * Complain if the checkpoint name isn't acceptable. */ -int -__wt_checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len) +static int +__checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len) { /* Check for characters we don't want to see in a metadata file. */ WT_RET(__wt_name_check(session, name, len)); @@ -107,7 +109,7 @@ __checkpoint_apply_all(WT_SESSION_IMPL *session, const char *cfg[], WT_RET(__wt_config_gets(session, cfg, "name", &cval)); named = cval.len != 0; if (named) - WT_RET(__wt_checkpoint_name_ok(session, cval.str, cval.len)); + WT_RET(__checkpoint_name_ok(session, cval.str, cval.len)); /* Step through the targets and optionally operate on each one. */ WT_ERR(__wt_config_gets(session, cfg, "target", &cval)); @@ -183,6 +185,8 @@ __checkpoint_apply(WT_SESSION_IMPL *session, const char *cfg[], /* If we have already locked the handles, apply the operation. */ for (i = 0; i < session->ckpt_handle_next; ++i) { + if (session->ckpt_handle[i] == NULL) + continue; WT_WITH_DHANDLE(session, session->ckpt_handle[i], ret = (*op)(session, cfg)); WT_RET(ret); @@ -234,6 +238,7 @@ __checkpoint_data_source(WT_SESSION_IMPL *session, const char *cfg[]) int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) { + WT_BTREE *btree; WT_DECL_RET; const char *name; @@ -258,6 +263,14 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) if ((ret = __wt_session_get_btree(session, name, NULL, NULL, 0)) != 0) return (ret == EBUSY ? 0 : ret); + /* + * Save the current eviction walk setting: checkpoint can interfere + * with eviction and we don't want to unfairly penalize (or promote) + * eviction in trees due to checkpoints. + */ + btree = S2BT(session); + btree->evict_walk_saved = btree->evict_walk_period; + WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_tree(session, true, true, cfg)); if (ret != 0) { @@ -265,20 +278,164 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) return (ret); } + /* + * Flag that the handle is part of a checkpoint for the purposes + * of transaction visibility checks. + */ + WT_PUBLISH(btree->include_checkpoint_txn, true); + session->ckpt_handle[session->ckpt_handle_next++] = session->dhandle; return (0); } /* - * __checkpoint_write_leaves -- - * Write any dirty leaf pages for all checkpoint handles. + * __checkpoint_update_generation -- + * Update the checkpoint generation of the current tree. + * + * This indicates that the tree will not be visited again by the current + * checkpoint. + */ +static void +__checkpoint_update_generation(WT_SESSION_IMPL *session) +{ + WT_BTREE *btree; + + btree = S2BT(session); + if (!WT_IS_METADATA(session, session->dhandle)) + WT_PUBLISH(btree->include_checkpoint_txn, false); + + WT_PUBLISH(btree->checkpoint_gen, + S2C(session)->txn_global.checkpoint_gen); + WT_STAT_FAST_DATA_SET(session, + btree_checkpoint_generation, btree->checkpoint_gen); +} + +/* + * __checkpoint_reduce_dirty_cache -- + * Release clean trees from the list cached for checkpoints. */ static int -__checkpoint_write_leaves(WT_SESSION_IMPL *session, const char *cfg[]) +__checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session) { - WT_UNUSED(cfg); + WT_CACHE *cache; + WT_CONNECTION_IMPL *conn; + struct timespec start, last, stop; + u_int current_dirty; + uint64_t bytes_written_last, bytes_written_start, bytes_written_total; + uint64_t current_us, stepdown_us, total_ms; + bool progress; + + conn = S2C(session); + cache = conn->cache; + + WT_RET(__wt_epoch(session, &start)); + last = start; + bytes_written_last = 0; + bytes_written_start = cache->bytes_written; + stepdown_us = 10000; + progress = false; + + /* Step down the dirty target to the eviction trigger */ + for (;;) { + current_dirty = (u_int)((100 * + __wt_cache_dirty_leaf_inuse(cache)) / conn->cache_size); + if (current_dirty <= cache->eviction_dirty_target) + break; + + __wt_sleep(0, stepdown_us / 4); + WT_RET(__wt_epoch(session, &stop)); + current_us = WT_TIMEDIFF_US(stop, last); + total_ms = WT_TIMEDIFF_MS(stop, start); + bytes_written_total = + cache->bytes_written - bytes_written_start; - return (__wt_cache_op(session, WT_SYNC_WRITE_LEAVES)); + /* + * Estimate how long the next step down of 1% of dirty data + * should take. + * + * The calculation here assumes that the system is writing from + * cache as fast as it can, and determines the write throughput + * based on the change in the bytes written from cache since + * the start of the call. We use that to estimate how long it + * will take to step the dirty target down by 1%. + * + * Take care to avoid dividing by zero. + */ + if (bytes_written_total - bytes_written_last > WT_MEGABYTE && + bytes_written_total > total_ms && total_ms > 0 && + (!progress || + current_dirty <= cache->eviction_dirty_trigger)) { + stepdown_us = (uint64_t)(WT_THOUSAND * ( + (double)(conn->cache_size / 100) / + (double)(bytes_written_total / total_ms))); + if (!progress) + stepdown_us = WT_MIN(stepdown_us, 200000); + } + + bytes_written_last = bytes_written_total; + + if (current_dirty <= cache->eviction_dirty_trigger) { + progress = true; + + /* + * Smooth out step down: try to limit the impact on + * performance to 10% by waiting once we reach the last + * level. + */ + __wt_sleep(0, 10 * stepdown_us); + cache->eviction_dirty_trigger = current_dirty - 1; + WT_STAT_FAST_CONN_SET(session, + txn_checkpoint_scrub_target, current_dirty - 1); + WT_RET(__wt_epoch(session, &last)); + continue; + } + + /* + * We haven't reached the current target. + * + * Don't wait indefinitely: there might be dirty pages that + * can't be evicted. If we can't meet the target, give up + * and start the checkpoint for real. + */ + if (current_us > 10 * stepdown_us) + break; + } + + WT_RET(__wt_epoch(session, &stop)); + total_ms = WT_TIMEDIFF_MS(stop, start); + WT_STAT_FAST_CONN_SET(session, txn_checkpoint_scrub_time, total_ms); + + return (0); +} + +/* + * __checkpoint_release_clean_trees -- + * Release clean trees from the list cached for checkpoints. + */ +static int +__checkpoint_release_clean_trees(WT_SESSION_IMPL *session) +{ + WT_BTREE *btree; + WT_DATA_HANDLE *dhandle; + WT_DECL_RET; + u_int i; + + for (i = 0; i < session->ckpt_handle_next; i++) { + dhandle = session->ckpt_handle[i]; + btree = dhandle->handle; + if (!F_ISSET(btree, WT_BTREE_SKIP_CKPT)) + continue; + __wt_meta_ckptlist_free(session, btree->ckpt); + btree->ckpt = NULL; + WT_WITH_DHANDLE(session, dhandle, + __checkpoint_update_generation(session)); + session->ckpt_handle[i] = NULL; + WT_WITH_DHANDLE(session, dhandle, + ret = __wt_session_release_btree(session)); + WT_RET(ret); + } + + return (0); } /* @@ -352,6 +509,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) { struct timespec fsync_start, fsync_stop; struct timespec start, stop, verb_timer; + WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_TXN *txn; @@ -359,13 +517,15 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_TXN_ISOLATION saved_isolation; WT_TXN_STATE *txn_state; void *saved_meta_next; - u_int i; + u_int i, orig_trigger; uint64_t fsync_duration_usecs; bool full, idle, logging, tracking; const char *txn_cfg[] = { WT_CONFIG_BASE(session, WT_SESSION_begin_transaction), "isolation=snapshot", NULL }; conn = S2C(session); + cache = conn->cache; + orig_trigger = cache->eviction_dirty_trigger; txn = &session->txn; txn_global = &conn->txn_global; txn_state = WT_SESSION_TXN_STATE(session); @@ -384,21 +544,8 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) /* Configure logging only if doing a full checkpoint. */ logging = FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED); - /* Keep track of handles acquired for locking. */ - WT_ERR(__wt_meta_track_on(session)); - tracking = true; - - /* - * Get a list of handles we want to flush; this may pull closed objects - * into the session cache, but we're going to do that eventually anyway. - */ - WT_ASSERT(session, session->ckpt_handle_next == 0); - WT_WITH_SCHEMA_LOCK(session, ret, - WT_WITH_TABLE_LOCK(session, ret, - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __checkpoint_apply_all( - session, cfg, __wt_checkpoint_get_handles, NULL)))); - WT_ERR(ret); + /* Reset the maximum page size seen by eviction. */ + conn->cache->evict_max_page_size = 0; /* * Update the global oldest ID so we do all possible cleanup. @@ -412,28 +559,11 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) /* Flush data-sources before we start the checkpoint. */ WT_ERR(__checkpoint_data_source(session, cfg)); - WT_ERR(__wt_epoch(session, &verb_timer)); - WT_ERR(__checkpoint_verbose_track(session, - "starting write leaves", &verb_timer)); - - /* Flush dirty leaf pages before we start the checkpoint. */ - session->isolation = txn->isolation = WT_ISO_READ_COMMITTED; - WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_write_leaves)); - /* - * The underlying flush routine scheduled an asynchronous flush - * after writing the leaf pages, but in order to minimize I/O - * while holding the schema lock, do a flush and wait for the - * completion. Do it after flushing the pages to give the - * asynchronous flush as much time as possible before we wait. + * Try to reduce the amount of dirty data in cache so there is less + * work do during the critical section of the checkpoint. */ - WT_ERR(__wt_epoch(session, &fsync_start)); - WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync)); - WT_ERR(__wt_epoch(session, &fsync_stop)); - fsync_duration_usecs = WT_TIMEDIFF_US(fsync_stop, fsync_start); - WT_STAT_FAST_CONN_INCR(session, txn_checkpoint_fsync_pre); - WT_STAT_FAST_CONN_INCRV(session, - txn_checkpoint_fsync_pre_duration, fsync_duration_usecs); + WT_ERR(__checkpoint_reduce_dirty_cache(session)); /* Tell logging that we are about to start a database checkpoint. */ if (full && logging) @@ -462,6 +592,36 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_STAT_FAST_CONN_SET(session, txn_checkpoint_generation, txn_global->checkpoint_gen); + /* Keep track of handles acquired for locking. */ + WT_ERR(__wt_meta_track_on(session)); + tracking = true; + + /* + * Get a list of handles we want to flush; for named checkpoints this + * may pull closed objects into the session cache. + * + * We want to skip checkpointing clean handles whenever possible. That + * is, when the checkpoint is not named or forced. However, we need to + * take care about ordering with respect to the checkpoint transaction. + * + * If we skip clean handles before starting the transaction, the + * checkpoint can miss updates in trees that become dirty as the + * checkpoint is starting. If we wait until the transaction has + * started before locking a handle, there could be a metadata-changing + * operation in between (e.g., salvage) that will cause a write + * conflict when the checkpoint goes to write the metadata. + * + * First, gather all handles, then start the checkpoint transaction, + * then release any clean handles. + */ + WT_ASSERT(session, session->ckpt_handle_next == 0); + WT_WITH_SCHEMA_LOCK(session, ret, + WT_WITH_TABLE_LOCK(session, ret, + WT_WITH_HANDLE_LIST_LOCK(session, + ret = __checkpoint_apply_all( + session, cfg, __wt_checkpoint_get_handles, NULL)))); + WT_ERR(ret); + /* * Start a snapshot transaction for the checkpoint. * @@ -475,21 +635,22 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(__wt_txn_id_check(session)); /* - * Save the checkpoint session ID. We never do checkpoints in the - * default session (with id zero). + * Save the checkpoint session ID. + * + * We never do checkpoints in the default session (with id zero). */ WT_ASSERT(session, session->id != 0 && txn_global->checkpoint_id == 0); txn_global->checkpoint_id = session->id; - txn_global->checkpoint_pinned = - WT_MIN(txn_state->id, txn_state->snap_min); - /* - * We're about to clear the checkpoint transaction from the global - * state table so the oldest ID can move forward. Make sure everything - * we've done above is scheduled. + * Remove the checkpoint transaction from the global table. + * + * This allows ordinary visibility checks to move forward because + * checkpoints often take a long time and only write to the metadata. */ - WT_FULL_BARRIER(); + WT_ERR(__wt_writelock(session, txn_global->scan_rwlock)); + txn_global->checkpoint_txnid = txn->id; + txn_global->checkpoint_pinned = WT_MIN(txn->id, txn->snap_min); /* * Sanity check that the oldest ID hasn't moved on before we have @@ -507,6 +668,25 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) * details). */ txn_state->id = txn_state->snap_min = WT_TXN_NONE; + WT_ERR(__wt_writeunlock(session, txn_global->scan_rwlock)); + + /* + * Unblock updates -- we can figure out that any updates to clean pages + * after this point are too new to be written in the checkpoint. + */ + cache->eviction_dirty_trigger = orig_trigger; + WT_STAT_FAST_CONN_SET( + session, txn_checkpoint_scrub_target, orig_trigger); + + /* + * Mark old checkpoints that are being deleted and figure out which + * trees we can skip in this checkpoint. + * + * Release clean trees. Any updates made after this point will not + * visible to the checkpoint transaction. + */ + WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_mark_deletes)); + WT_ERR(__checkpoint_release_clean_trees(session)); /* Tell logging that we have started a database checkpoint. */ if (full && logging) @@ -522,9 +702,13 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) */ session->dhandle = NULL; - /* Release the snapshot so we aren't pinning pages in cache. */ + /* Release the snapshot so we aren't pinning updates in cache. */ __wt_txn_release_snapshot(session); + /* Mark all trees as open for business (particularly eviction). */ + WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_presync)); + WT_ERR(__wt_evict_server_wake(session)); + WT_ERR(__checkpoint_verbose_track(session, "committing transaction", &verb_timer)); @@ -587,6 +771,12 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) ret = __wt_txn_checkpoint_log( session, false, WT_TXN_LOG_CKPT_SYNC, NULL)); + /* + * Now that the metadata is stable, re-open the metadata file for + * regular eviction by clearing the checkpoint_pinned flag. + */ + txn_global->checkpoint_pinned = WT_TXN_NONE; + if (full) { WT_ERR(__wt_epoch(session, &stop)); __checkpoint_stats(session, &start, &stop); @@ -609,6 +799,10 @@ err: /* if (tracking) WT_TRET(__wt_meta_track_off(session, false, ret != 0)); + cache->eviction_dirty_trigger = orig_trigger; + WT_STAT_FAST_CONN_SET( + session, txn_checkpoint_scrub_target, orig_trigger); + if (F_ISSET(txn, WT_TXN_RUNNING)) { /* * Clear the dhandle so the visibility check doesn't get @@ -634,9 +828,12 @@ err: /* WT_TXN_LOG_CKPT_STOP : WT_TXN_LOG_CKPT_CLEANUP, NULL)); } - for (i = 0; i < session->ckpt_handle_next; ++i) + for (i = 0; i < session->ckpt_handle_next; ++i) { + if (session->ckpt_handle[i] == NULL) + continue; WT_WITH_DHANDLE(session, session->ckpt_handle[i], WT_TRET(__wt_session_release_btree(session))); + } __wt_free(session, session->ckpt_handle); session->ckpt_handle_allocated = session->ckpt_handle_next = 0; @@ -836,7 +1033,7 @@ __checkpoint_lock_tree(WT_SESSION_IMPL *session, if (cval.len == 0) name = WT_CHECKPOINT; else { - WT_ERR(__wt_checkpoint_name_ok(session, cval.str, cval.len)); + WT_ERR(__checkpoint_name_ok(session, cval.str, cval.len)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &name_alloc)); name = name_alloc; } @@ -851,10 +1048,10 @@ __checkpoint_lock_tree(WT_SESSION_IMPL *session, __wt_config_next(&dropconf, &k, &v)) == 0) { /* Disallow unsafe checkpoint names. */ if (v.len == 0) - WT_ERR(__wt_checkpoint_name_ok( + WT_ERR(__checkpoint_name_ok( session, k.str, k.len)); else - WT_ERR(__wt_checkpoint_name_ok( + WT_ERR(__checkpoint_name_ok( session, v.str, v.len)); if (v.len == 0) @@ -986,42 +1183,23 @@ err: if (hot_backup_locked) } /* - * __checkpoint_tree -- - * Checkpoint a single tree. - * Assumes all necessary locks have been acquired by the caller. + * __checkpoint_mark_deletes -- + * Figure out what old checkpoints will be deleted, and whether the + * checkpoint can be skipped entirely. */ static int -__checkpoint_tree( - WT_SESSION_IMPL *session, bool is_checkpoint, const char *cfg[]) +__checkpoint_mark_deletes( + WT_SESSION_IMPL *session, const char *cfg[]) { - WT_BM *bm; WT_BTREE *btree; WT_CKPT *ckpt, *ckptbase; WT_CONFIG_ITEM cval; - WT_CONNECTION_IMPL *conn; - WT_DATA_HANDLE *dhandle; - WT_DECL_RET; - WT_LSN ckptlsn; const char *name; - int deleted, was_modified; - bool fake_ckpt, force; + int deleted; + bool force; btree = S2BT(session); - bm = btree->bm; ckptbase = btree->ckpt; - conn = S2C(session); - dhandle = session->dhandle; - fake_ckpt = false; - was_modified = btree->modified; - - /* - * Set the checkpoint LSN to the maximum LSN so that if logging is - * disabled, recovery will never roll old changes forward over the - * non-logged changes in this checkpoint. If logging is enabled, a - * real checkpoint LSN will be assigned for this checkpoint and - * overwrite this. - */ - WT_MAX_LSN(&ckptlsn); /* * Check for clean objects not requiring a checkpoint. @@ -1050,20 +1228,15 @@ __checkpoint_tree( force = false; F_CLR(btree, WT_BTREE_SKIP_CKPT); if (!btree->modified && cfg != NULL) { - ret = __wt_config_gets(session, cfg, "force", &cval); - if (ret != 0 && ret != WT_NOTFOUND) - WT_ERR(ret); - if (ret == 0 && cval.val != 0) - force = true; + WT_RET(__wt_config_gets(session, cfg, "force", &cval)); + force = cval.val != 0; } if (!btree->modified && !force) { - if (!is_checkpoint) - goto nockpt; - deleted = 0; WT_CKPT_FOREACH(ckptbase, ckpt) if (F_ISSET(ckpt, WT_CKPT_DELETE)) ++deleted; + /* * Complicated test: if the tree is clean and last two * checkpoints have the same name (correcting for internal @@ -1077,17 +1250,52 @@ __checkpoint_tree( (strcmp(name, (ckpt - 2)->name) == 0 || (WT_PREFIX_MATCH(name, WT_CHECKPOINT) && WT_PREFIX_MATCH((ckpt - 2)->name, WT_CHECKPOINT)))) { -nockpt: F_SET(btree, WT_BTREE_SKIP_CKPT); - WT_PUBLISH(btree->checkpoint_gen, - S2C(session)->txn_global.checkpoint_gen); - WT_STAT_FAST_DATA_SET(session, - btree_checkpoint_generation, - btree->checkpoint_gen); - ret = 0; - goto err; + F_SET(btree, WT_BTREE_SKIP_CKPT); + return (0); } } + return (0); +} + +/* + * __checkpoint_tree -- + * Checkpoint a single tree. + * Assumes all necessary locks have been acquired by the caller. + */ +static int +__checkpoint_tree( + WT_SESSION_IMPL *session, bool is_checkpoint, const char *cfg[]) +{ + WT_BM *bm; + WT_BTREE *btree; + WT_CKPT *ckpt, *ckptbase; + WT_CONNECTION_IMPL *conn; + WT_DATA_HANDLE *dhandle; + WT_DECL_RET; + WT_LSN ckptlsn; + int was_modified; + bool fake_ckpt; + + WT_UNUSED(cfg); + + btree = S2BT(session); + bm = btree->bm; + ckptbase = btree->ckpt; + conn = S2C(session); + dhandle = session->dhandle; + fake_ckpt = false; + was_modified = btree->modified; + + /* + * Set the checkpoint LSN to the maximum LSN so that if logging is + * disabled, recovery will never roll old changes forward over the + * non-logged changes in this checkpoint. If logging is enabled, a + * real checkpoint LSN will be assigned for this checkpoint and + * overwrite this. + */ + WT_MAX_LSN(&ckptlsn); + /* * If an object has never been used (in other words, if it could become * a bulk-loaded file), then we must fake the checkpoint. This is good @@ -1183,10 +1391,10 @@ fake: /* /* * If we wrote a checkpoint (rather than faking one), pages may be - * available for re-use. If tracking enabled, defer making pages - * available until transaction end. The exception is if the handle - * is being discarded, in which case the handle will be gone by the - * time we try to apply or unroll the meta tracking event. + * available for re-use. If tracking is enabled, defer making pages + * available until transaction end. The exception is if the handle is + * being discarded, in which case the handle will be gone by the time + * we try to apply or unroll the meta tracking event. */ if (!fake_ckpt) { if (WT_META_TRACKING(session) && is_checkpoint) @@ -1214,13 +1422,59 @@ err: /* } /* + * __checkpoint_presync -- + * Visit all handles after the checkpoint writes are complete and before + * syncing. At this point, all trees should be completely open for + * business. + */ +static int +__checkpoint_presync(WT_SESSION_IMPL *session, const char *cfg[]) +{ + WT_BTREE *btree; + + WT_UNUSED(cfg); + + btree = S2BT(session); + WT_ASSERT(session, !btree->include_checkpoint_txn); + btree->evict_walk_period = btree->evict_walk_saved; + return (0); +} + +/* * __checkpoint_tree_helper -- * Checkpoint a tree (suitable for use in *_apply functions). */ static int __checkpoint_tree_helper(WT_SESSION_IMPL *session, const char *cfg[]) { - return (__checkpoint_tree(session, true, cfg)); + WT_BTREE *btree; + WT_DECL_RET; + + btree = S2BT(session); + + ret = __checkpoint_tree(session, true, cfg); + + /* + * Whatever happened, we aren't visiting this tree again in this + * checkpoint. Don't keep updates pinned any longer. + */ + __checkpoint_update_generation(session); + + /* + * In case this tree was being skipped by the eviction server + * during the checkpoint, restore the previous state. + */ + btree->evict_walk_period = btree->evict_walk_saved; + + /* + * Wake the eviction server, in case application threads have + * stalled while the eviction server decided it couldn't make + * progress. Without this, application threads will be stalled + * until the eviction server next wakes. + */ + WT_TRET(__wt_evict_server_wake(session)); + + return (ret); } /* @@ -1242,6 +1496,9 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_tree(session, true, true, cfg)); WT_RET(ret); + WT_SAVE_DHANDLE(session, + ret = __checkpoint_mark_deletes(session, cfg)); + WT_RET(ret); return (__checkpoint_tree(session, true, cfg)); } @@ -1319,6 +1576,11 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_tree(session, false, need_tracking, NULL)); WT_ASSERT(session, ret == 0); + if (ret == 0) { + WT_SAVE_DHANDLE(session, + ret = __checkpoint_mark_deletes(session, NULL)); + WT_ASSERT(session, ret == 0); + } if (ret == 0) ret = __checkpoint_tree(session, false, NULL); diff --git a/src/third_party/wiredtiger/src/txn/txn_log.c b/src/third_party/wiredtiger/src/txn/txn_log.c index 470515244f3..e73ff00f5b7 100644 --- a/src/third_party/wiredtiger/src/txn/txn_log.c +++ b/src/third_party/wiredtiger/src/txn/txn_log.c @@ -329,7 +329,7 @@ __wt_txn_checkpoint_log( case WT_TXN_LOG_CKPT_START: /* Take a copy of the transaction snapshot. */ txn->ckpt_nsnapshot = txn->snapshot_count; - recsize = txn->ckpt_nsnapshot * WT_INTPACK64_MAXSIZE; + recsize = (size_t)txn->ckpt_nsnapshot * WT_INTPACK64_MAXSIZE; WT_ERR(__wt_scr_alloc(session, recsize, &txn->ckpt_snapshot)); p = txn->ckpt_snapshot->mem; end = p + recsize; diff --git a/src/third_party/wiredtiger/src/utilities/util_dump.c b/src/third_party/wiredtiger/src/utilities/util_dump.c index da70aea35be..6344a90dddd 100644 --- a/src/third_party/wiredtiger/src/utilities/util_dump.c +++ b/src/third_party/wiredtiger/src/utilities/util_dump.c @@ -242,6 +242,7 @@ dump_table_config( char *p, **cfg, *_cfg[4] = {NULL, NULL, NULL, NULL}; p = NULL; + srch = NULL; cfg = &_cfg[3]; /* Get the table name. */ @@ -306,32 +307,31 @@ dump_table_config( WT_ERR(print_config(session, uri, cfg, json, true)); - if (complex_table) { - /* - * The underlying table configuration function needs a second - * cursor: open one before calling it, it makes error handling - * hugely simpler. - */ - if ((ret = session->open_cursor( - session, "metadata:", NULL, NULL, &srch)) != 0) - WT_ERR(util_cerr(cursor, "open_cursor", ret)); - - if ((ret = dump_table_config_complex( - session, cursor, srch, name, "colgroup:", json)) == 0) - ret = dump_table_config_complex( - session, cursor, srch, name, "index:", json); - - if ((tret = srch->close(srch)) != 0) { - tret = util_cerr(cursor, "close", tret); - if (ret == 0) - ret = tret; - } - } else if (json && printf( - " \"colgroups\" : [],\n" - " \"indices\" : []\n") < 0) + /* + * The underlying table configuration function needs a second + * cursor: open one before calling it, it makes error handling + * hugely simpler. + */ + if ((ret = session->open_cursor( + session, "metadata:", NULL, NULL, &srch)) != 0) + WT_ERR(util_cerr(cursor, "open_cursor", ret)); + + if (complex_table) + WT_ERR(dump_table_config_complex( + session, cursor, srch, name, "colgroup:", json)); + else if (json && printf( + " \"colgroups\" : [],\n") < 0) WT_ERR(util_cerr(cursor, NULL, EIO)); -err: free(p); + WT_ERR(dump_table_config_complex( + session, cursor, srch, name, "index:", json)); + +err: if (srch != NULL && (tret = srch->close(srch)) != 0) { + tret = util_cerr(cursor, "close", tret); + if (ret == 0) + ret = tret; + } + free(p); free(_cfg[0]); free(_cfg[1]); free(_cfg[2]); diff --git a/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c b/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c index a2185dd123f..58da49b2991 100644 --- a/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c +++ b/src/third_party/wiredtiger/test/cursor_order/cursor_order_ops.c @@ -130,7 +130,8 @@ ops_start(SHARED_CONFIG *cfg) seconds = (stop.tv_sec - start.tv_sec) + (stop.tv_usec - start.tv_usec) * 1e-6; fprintf(stderr, "timer: %.2lf seconds (%d ops/second)\n", - seconds, (int)(((cfg->reverse_scanners + cfg->append_inserters) * + seconds, (int) + (((double)(cfg->reverse_scanners + cfg->append_inserters) * total_nops) / seconds)); /* Verify the files. */ diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c index c97d82809a1..283e2912daa 100644 --- a/src/third_party/wiredtiger/test/format/ops.c +++ b/src/third_party/wiredtiger/test/format/ops.c @@ -342,7 +342,7 @@ snap_check(WT_CURSOR *cursor, switch (g.type) { case FIX: testutil_die(ret, - "snap_check: %" PRIu64 " search: " + "snapshot-isolation: %" PRIu64 " search: " "expected {0x%02x}, found {0x%02x}", start->keyno, start->deleted ? 0 : *(uint8_t *)start->vdata, @@ -350,7 +350,7 @@ snap_check(WT_CURSOR *cursor, /* NOTREACHED */ case ROW: testutil_die(ret, - "snap_check: %.*s search: " + "snapshot-isolation: %.*s search: " "expected {%.*s}, found {%.*s}", (int)key->size, key->data, start->deleted ? @@ -362,7 +362,7 @@ snap_check(WT_CURSOR *cursor, /* NOTREACHED */ case VAR: testutil_die(ret, - "snap_check: %" PRIu64 " search: " + "snapshot-isolation: %" PRIu64 " search: " "expected {%.*s}, found {%.*s}", start->keyno, start->deleted ? diff --git a/src/third_party/wiredtiger/test/format/smoke.sh b/src/third_party/wiredtiger/test/format/smoke.sh index 5fbc349f242..0c86b5e57c6 100755 --- a/src/third_party/wiredtiger/test/format/smoke.sh +++ b/src/third_party/wiredtiger/test/format/smoke.sh @@ -3,7 +3,7 @@ set -e # Smoke-test format as part of running "make check". -args="-1 -c "." data_source=table ops=100000 rows=10000 threads=4 compression=none logging_compression=none" +args="-1 -c "." data_source=table ops=50000 rows=10000 threads=4 compression=none logging_compression=none" $TEST_WRAPPER ./t $args file_type=fix $TEST_WRAPPER ./t $args file_type=row diff --git a/src/third_party/wiredtiger/test/manydbs/Makefile.am b/src/third_party/wiredtiger/test/manydbs/Makefile.am index 2bc47ad7f2e..ff5985cf2a4 100644 --- a/src/third_party/wiredtiger/test/manydbs/Makefile.am +++ b/src/third_party/wiredtiger/test/manydbs/Makefile.am @@ -10,7 +10,8 @@ t_LDADD +=$(top_builddir)/libwiredtiger.la t_LDFLAGS = -static # Run this during a "make check" smoke test. -TESTS = smoke.sh +TESTS = $(noinst_PROGRAMS) +LOG_COMPILER = $(TEST_WRAPPER) clean-local: rm -rf WT_TEST *.core diff --git a/src/third_party/wiredtiger/test/manydbs/smoke.sh b/src/third_party/wiredtiger/test/manydbs/smoke.sh deleted file mode 100755 index c0e2976f154..00000000000 --- a/src/third_party/wiredtiger/test/manydbs/smoke.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/sh - -set -e - -# Smoke-test format as part of running "make check". -# Run with: -# 1. The defaults -# 2. Set idle flag to turn off operations. -# 3. More dbs. -# -echo "manydbs: default with operations turned on" -$TEST_WRAPPER ./t -echo "manydbs: totally idle databases" -$TEST_WRAPPER ./t -I -echo "manydbs: 40 databases with operations" -$TEST_WRAPPER ./t -D 40 -echo "manydbs: 40 idle databases" -$TEST_WRAPPER ./t -I -D 40 diff --git a/src/third_party/wiredtiger/test/mciproject.yml b/src/third_party/wiredtiger/test/mciproject.yml index 3df1ce5805e..8825bb65052 100644 --- a/src/third_party/wiredtiger/test/mciproject.yml +++ b/src/third_party/wiredtiger/test/mciproject.yml @@ -8,12 +8,12 @@ functions: command: git.get_project params: directory: wiredtiger - "fetch artifacts" : &fetch_artifacts + "fetch binaries" : &fetch_binaries - command: s3.get params: aws_key: ${aws_key} aws_secret: ${aws_secret} - remote_file: wiredtiger/${build_variant}/${revision}/artifacts/${build_id}.tgz + remote_file: wiredtiger/${build_variant}/${revision}/binaries/${build_id}.tgz bucket: build_external extract_to: wiredtiger @@ -23,6 +23,22 @@ pre: script: | rm -rf "wiredtiger" post: + - command: archive.targz_pack + params: + target: "wiredtiger.tgz" + source_dir: "wiredtiger" + include: + - "./**" + - command: s3.put + params: + aws_secret: ${aws_secret} + aws_key: ${aws_key} + local_file: wiredtiger.tgz + bucket: build_external + permissions: public-read + content_type: application/tar + display_name: Artifacts + remote_file: wiredtiger/${build_variant}/${revision}/artifacts/${build_id}.tgz - command: shell.exec params: script: | @@ -49,7 +65,7 @@ tasks: ./build_posix/reconf ${configure_env_vars|} ./configure --enable-diagnostic --enable-python --enable-zlib --enable-strict --enable-verbose ${make_command|make} ${smp_command|} 2>&1 - ${make_command|make} check 2>&1 + ${make_command|make} VERBOSE=1 check 2>&1 fi - command: archive.targz_pack params: @@ -65,14 +81,14 @@ tasks: bucket: build_external permissions: public-read content_type: application/tar - display_name: Artifacts - remote_file: wiredtiger/${build_variant}/${revision}/artifacts/${build_id}.tgz + display_name: Binaries + remote_file: wiredtiger/${build_variant}/${revision}/binaries/${build_id}.tgz - name: unit-test depends_on: - name: compile commands: - - func: "fetch artifacts" + - func: "fetch binaries" - command: shell.exec params: working_dir: "wiredtiger" @@ -85,7 +101,7 @@ tasks: depends_on: - name: compile commands: - - func: "fetch artifacts" + - func: "fetch binaries" - command: shell.exec params: working_dir: "wiredtiger" @@ -99,7 +115,7 @@ tasks: depends_on: - name: compile commands: - - func: "fetch artifacts" + - func: "fetch binaries" - command: shell.exec params: working_dir: "wiredtiger" diff --git a/src/third_party/wiredtiger/test/recovery/Makefile.am b/src/third_party/wiredtiger/test/recovery/Makefile.am index 19fc48dce47..3e7fce17d0e 100644 --- a/src/third_party/wiredtiger/test/recovery/Makefile.am +++ b/src/third_party/wiredtiger/test/recovery/Makefile.am @@ -14,8 +14,7 @@ truncated_log_LDADD +=$(top_builddir)/libwiredtiger.la truncated_log_LDFLAGS = -static # Run this during a "make check" smoke test. -TESTS = $(noinst_PROGRAMS) -LOG_COMPILER = $(TEST_WRAPPER) +TESTS = smoke.sh clean-local: rm -rf WT_TEST.* *.core diff --git a/src/third_party/wiredtiger/test/recovery/random-abort.c b/src/third_party/wiredtiger/test/recovery/random-abort.c index 85629eddec4..16065cec29e 100644 --- a/src/third_party/wiredtiger/test/recovery/random-abort.c +++ b/src/third_party/wiredtiger/test/recovery/random-abort.c @@ -91,7 +91,8 @@ thread_run(void *arg) if ((fp = fopen(buf, "w")) == NULL) testutil_die(errno, "fopen"); /* - * Set to no buffering. + * Set to line buffering. But that is advisory only. We've seen + * cases where the result files end up with partial lines. */ __wt_stream_set_line_buffer(fp); if ((ret = td->conn->open_session(td->conn, NULL, NULL, &session)) != 0) @@ -188,7 +189,7 @@ main(int argc, char *argv[]) WT_CURSOR *cursor; WT_SESSION *session; WT_RAND_STATE rnd; - uint64_t key; + uint64_t key, last_key; uint32_t absent, count, i, nth, timeout; int ch, status, ret; pid_t pid; @@ -317,12 +318,23 @@ main(int argc, char *argv[]) * in the table after recovery. Since we did write-no-sync, we * expect every key to have been recovered. */ - for (;; ++count) { + for (last_key = UINT64_MAX;; ++count, last_key = key) { ret = fscanf(fp, "%" SCNu64 "\n", &key); if (ret != EOF && ret != 1) testutil_die(errno, "fscanf"); if (ret == EOF) break; + /* + * If we're unlucky, the last line may be a partially + * written key at the end that can result in a false + * negative error for a missing record. Detect it. + */ + if (last_key != UINT64_MAX && key != last_key + 1) { + printf("%s: Ignore partial record %" PRIu64 + " last valid key %" PRIu64 "\n", + fname, key, last_key); + break; + } snprintf(kname, sizeof(kname), "%" PRIu64, key); cursor->set_key(cursor, kname); if ((ret = cursor->search(cursor)) != 0) { diff --git a/src/third_party/wiredtiger/test/recovery/smoke.sh b/src/third_party/wiredtiger/test/recovery/smoke.sh new file mode 100755 index 00000000000..c7677b64503 --- /dev/null +++ b/src/third_party/wiredtiger/test/recovery/smoke.sh @@ -0,0 +1,8 @@ +#! /bin/sh + +set -e + +# Smoke-test recovery as part of running "make check". + +$TEST_WRAPPER ./random-abort -t 10 -T 5 +$TEST_WRAPPER ./truncated-log diff --git a/src/third_party/wiredtiger/test/suite/helper.py b/src/third_party/wiredtiger/test/suite/helper.py index f85d708880f..9f34b566b3c 100644 --- a/src/third_party/wiredtiger/test/suite/helper.py +++ b/src/third_party/wiredtiger/test/suite/helper.py @@ -179,6 +179,49 @@ def simple_populate_check(self, uri, rows): simple_populate_check_cursor(self, cursor, rows) cursor.close() +# population of a simple object, with a single index +# uri: object +# config: prefix of the session.create configuration string (defaults +# to string value formats) +# rows: entries to insert +def simple_index_populate(self, uri, config, rows): + self.pr('simple_index_populate: ' + uri + ' with ' + str(rows) + ' rows') + self.session.create(uri, 'value_format=S,columns=(key0,value0),' + config) + indxname = 'index:' + uri.split(":")[1] + self.session.create(indxname + ':index1', 'columns=(value0,key0)') + cursor = self.session.open_cursor(uri, None) + for i in range(1, rows + 1): + cursor[key_populate(cursor, i)] = value_populate(cursor, i) + cursor.close() + +def simple_index_populate_check_cursor(self, cursor, rows): + i = 0 + for key,val in cursor: + i += 1 + self.assertEqual(key, key_populate(cursor, i)) + if cursor.value_format == '8t' and val == 0: # deleted + continue + self.assertEqual(val, value_populate(cursor, i)) + self.assertEqual(i, rows) + +def simple_index_populate_check(self, uri, rows): + self.pr('simple_index_populate_check: ' + uri) + + # Check values in the main table. + cursor = self.session.open_cursor(uri, None) + simple_index_populate_check_cursor(self, cursor, rows) + + # Check values in the index. + indxname = 'index:' + uri.split(":")[1] + idxcursor = self.session.open_cursor(indxname + ':index1') + for i in range(1, rows + 1): + k = key_populate(cursor, i) + v = value_populate(cursor, i) + ik = (v,k) # The index key is columns=(v,k). + self.assertEqual(v, idxcursor[ik]) + idxcursor.close() + cursor.close() + # Return the value stored in a complex object. def complex_value_populate(cursor, i): return [str(i) + ': abcdefghijklmnopqrstuvwxyz'[0:i%26], diff --git a/src/third_party/wiredtiger/test/suite/run.py b/src/third_party/wiredtiger/test/suite/run.py index 6e7421b8b96..c37093a2a55 100644 --- a/src/third_party/wiredtiger/test/suite/run.py +++ b/src/third_party/wiredtiger/test/suite/run.py @@ -87,6 +87,7 @@ Options:\n\ -j N | --parallel N run all tests in parallel using N processes\n\ -l | --long run the entire test suite\n\ -p | --preserve preserve output files in WT_TEST/<testname>\n\ + -s N | --scenario N use scenario N (N can be number or symbolic)\n\ -t | --timestamp name WT_TEST according to timestamp\n\ -v N | --verbose N set verboseness to N (0<=N<=3, default=1)\n\ \n\ @@ -95,15 +96,27 @@ Tests:\n\ may be a subsuite name (e.g. \'base\' runs test_base*.py)\n\ \n\ When -C or -c are present, there may not be any tests named.\n\ + When -s is present, there must be a test named.\n\ ' # capture the category (AKA 'subsuite') part of a test name, # e.g. test_util03 -> util reCatname = re.compile(r"test_([^0-9]+)[0-9]*") -def addScenarioTests(tests, loader, testname): +def restrictScenario(testcases, restrict): + if restrict == '': + return testcases + elif restrict.isdigit(): + s = int(restrict) + return [t for t in testcases + if hasattr(t, 'scenario_number') and t.scenario_number == s] + else: + return [t for t in testcases + if hasattr(t, 'scenario_name') and t.scenario_name == restrict] + +def addScenarioTests(tests, loader, testname, scenario): loaded = loader.loadTestsFromName(testname) - tests.addTests(generate_scenarios(loaded)) + tests.addTests(restrictScenario(generate_scenarios(loaded), scenario)) def configRecord(cmap, tup): """ @@ -195,20 +208,20 @@ def configApply(suites, configfilename, configwrite): json.dump(configmap, f, sort_keys=True, indent=4) return newsuite -def testsFromArg(tests, loader, arg): +def testsFromArg(tests, loader, arg, scenario): # If a group of test is mentioned, do all tests in that group # e.g. 'run.py base' groupedfiles = glob.glob(suitedir + os.sep + 'test_' + arg + '*.py') if len(groupedfiles) > 0: for file in groupedfiles: - testsFromArg(tests, loader, os.path.basename(file)) + testsFromArg(tests, loader, os.path.basename(file), scenario) return # Explicit test class names if not arg[0].isdigit(): if arg.endswith('.py'): arg = arg[:-3] - addScenarioTests(tests, loader, arg) + addScenarioTests(tests, loader, arg, scenario) return # Deal with ranges @@ -217,7 +230,7 @@ def testsFromArg(tests, loader, arg): else: start, end = int(arg), int(arg) for t in xrange(start, end+1): - addScenarioTests(tests, loader, 'test%03d' % t) + addScenarioTests(tests, loader, 'test%03d' % t, scenario) if __name__ == '__main__': tests = unittest.TestSuite() @@ -228,6 +241,7 @@ if __name__ == '__main__': configfile = None configwrite = False dirarg = None + scenario = '' verbose = 1 args = sys.argv[1:] testargs = [] @@ -265,6 +279,12 @@ if __name__ == '__main__': if option == '-preserve' or option == 'p': preserve = True continue + if option == '-scenario' or option == 's': + if scenario != '' or len(args) == 0: + usage() + sys.exit(2) + scenario = args.pop(0) + continue if option == '-timestamp' or option == 't': timestamp = True continue @@ -303,15 +323,20 @@ if __name__ == '__main__': # Without any tests listed as arguments, do discovery if len(testargs) == 0: + if scenario != '': + sys.stderr.write( + 'run.py: specifying a scenario requires a test name\n') + usage() + sys.exit(2) from discover import defaultTestLoader as loader suites = loader.discover(suitedir) suites = sorted(suites, key=lambda c: str(list(c)[0])) if configfile != None: suites = configApply(suites, configfile, configwrite) - tests.addTests(generate_scenarios(suites)) + tests.addTests(restrictScenario(generate_scenarios(suites), '')) else: for arg in testargs: - testsFromArg(tests, loader, arg) + testsFromArg(tests, loader, arg, scenario) if debug: import pdb diff --git a/src/third_party/wiredtiger/test/suite/test_async01.py b/src/third_party/wiredtiger/test/suite/test_async01.py index 71a18a68121..9322748c30f 100644 --- a/src/third_party/wiredtiger/test/suite/test_async01.py +++ b/src/third_party/wiredtiger/test/suite/test_async01.py @@ -29,7 +29,7 @@ import sys, threading, wiredtiger, wttest from suite_subprocess import suite_subprocess from wiredtiger import WiredTigerError -from wtscenario import check_scenarios +from wtscenario import make_scenarios # TODO - tmp code def tty_pr(s): @@ -122,7 +122,7 @@ class test_async01(wttest.WiredTigerTestCase, suite_subprocess): async_threads = 3 current = {} - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file-col', dict(tablekind='col',uri='file')), ('file-fix', dict(tablekind='fix',uri='file')), ('file-row', dict(tablekind='row',uri='file')), diff --git a/src/third_party/wiredtiger/test/suite/test_async02.py b/src/third_party/wiredtiger/test/suite/test_async02.py index 7aa1b85a2f3..bc6b389fc27 100644 --- a/src/third_party/wiredtiger/test/suite/test_async02.py +++ b/src/third_party/wiredtiger/test/suite/test_async02.py @@ -29,7 +29,7 @@ import sys, threading, wiredtiger, wttest from suite_subprocess import suite_subprocess from wiredtiger import WiredTigerError -from wtscenario import check_scenarios +from wtscenario import make_scenarios class Callback(wiredtiger.AsyncCallback): def __init__(self, current): @@ -119,7 +119,7 @@ class test_async02(wttest.WiredTigerTestCase, suite_subprocess): async_threads = 3 current = {} - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file-col', dict(tablekind='col',uri='file')), ('file-fix', dict(tablekind='fix',uri='file')), ('file-row', dict(tablekind='row',uri='file')), diff --git a/src/third_party/wiredtiger/test/suite/test_backup02.py b/src/third_party/wiredtiger/test/suite/test_backup02.py index 095bfbe404a..398d55abd7a 100644 --- a/src/third_party/wiredtiger/test/suite/test_backup02.py +++ b/src/third_party/wiredtiger/test/suite/test_backup02.py @@ -30,13 +30,13 @@ import Queue import threading, time, wiredtiger, wttest from helper import key_populate, simple_populate from wtthread import backup_thread, checkpoint_thread, op_thread -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_backup02.py # Run background checkpoints and backsups repeatedly while doing inserts # in another thread class test_backup02(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('table', dict(uri='table:test',fmt='L',dsize=100,nops=200,nthreads=1,time=30)), ]) diff --git a/src/third_party/wiredtiger/test/suite/test_backup03.py b/src/third_party/wiredtiger/test/suite/test_backup03.py index e810a2ec714..053009c6edb 100644 --- a/src/third_party/wiredtiger/test/suite/test_backup03.py +++ b/src/third_party/wiredtiger/test/suite/test_backup03.py @@ -28,7 +28,7 @@ import glob, os, shutil, string from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios import wiredtiger, wttest from helper import compare_files,\ complex_populate, complex_populate_lsm, simple_populate @@ -56,25 +56,25 @@ class test_backup_target(wttest.WiredTigerTestCase, suite_subprocess): ('table:' + pfx + '.4', complex_populate_lsm, 3), ] list = [ - ( '1', dict(big=0,list=[0])), # Target objects individually - ( '2', dict(big=1,list=[1])), - ( '3', dict(big=2,list=[2])), - ( '4', dict(big=3,list=[3])), - ('5a', dict(big=0,list=[0,2])), # Target groups of objects - ('5b', dict(big=2,list=[0,2])), - ('6a', dict(big=1,list=[1,3])), - ('6b', dict(big=3,list=[1,3])), - ('7a', dict(big=0,list=[0,1,2])), - ('7b', dict(big=1,list=[0,1,2])), - ('7c', dict(big=2,list=[0,1,2])), - ('8a', dict(big=0,list=[0,1,2,3])), - ('8b', dict(big=1,list=[0,1,2,3])), - ('8c', dict(big=2,list=[0,1,2,3])), - ('8d', dict(big=3,list=[0,1,2,3])), - ( '9', dict(big=3,list=[])), # Backup everything + ( 'backup_1', dict(big=0,list=[0])), # Target objects individually + ( 'backup_2', dict(big=1,list=[1])), + ( 'backup_3', dict(big=2,list=[2])), + ( 'backup_4', dict(big=3,list=[3])), + ('backup_5a', dict(big=0,list=[0,2])), # Target groups of objects + ('backup_5b', dict(big=2,list=[0,2])), + ('backup_6a', dict(big=1,list=[1,3])), + ('backup_6b', dict(big=3,list=[1,3])), + ('backup_7a', dict(big=0,list=[0,1,2])), + ('backup_7b', dict(big=1,list=[0,1,2])), + ('backup_7c', dict(big=2,list=[0,1,2])), + ('backup_8a', dict(big=0,list=[0,1,2,3])), + ('backup_8b', dict(big=1,list=[0,1,2,3])), + ('backup_8c', dict(big=2,list=[0,1,2,3])), + ('backup_8d', dict(big=3,list=[0,1,2,3])), + ('backup_9', dict(big=3,list=[])), # Backup everything ] - scenarios = number_scenarios(multiply_scenarios('.', list)) + scenarios = make_scenarios(list) # Create a large cache, otherwise this test runs quite slowly. conn_config = 'cache_size=1G' diff --git a/src/third_party/wiredtiger/test/suite/test_backup04.py b/src/third_party/wiredtiger/test/suite/test_backup04.py index 852a22c1e0c..866e673dccb 100644 --- a/src/third_party/wiredtiger/test/suite/test_backup04.py +++ b/src/third_party/wiredtiger/test/suite/test_backup04.py @@ -30,7 +30,7 @@ import Queue import threading, time, wiredtiger, wttest import glob, os, shutil from suite_subprocess import suite_subprocess -from wtscenario import check_scenarios +from wtscenario import make_scenarios from wtthread import op_thread from helper import compare_files, key_populate @@ -54,7 +54,7 @@ class test_backup_target(wttest.WiredTigerTestCase, suite_subprocess): # and that is not what we want here. # pfx = 'test_backup' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('table', dict(uri='table:test',dsize=100,nops=2000,nthreads=1,time=30)), ]) diff --git a/src/third_party/wiredtiger/test/suite/test_backup05.py b/src/third_party/wiredtiger/test/suite/test_backup05.py index fbe219d8de8..131732e9a89 100644 --- a/src/third_party/wiredtiger/test/suite/test_backup05.py +++ b/src/third_party/wiredtiger/test/suite/test_backup05.py @@ -35,7 +35,6 @@ import fnmatch, os, shutil, time from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios from helper import copy_wiredtiger_home import wiredtiger, wttest diff --git a/src/third_party/wiredtiger/test/suite/test_base02.py b/src/third_party/wiredtiger/test/suite/test_base02.py index 70117573241..2b51fe1b530 100644 --- a/src/third_party/wiredtiger/test/suite/test_base02.py +++ b/src/third_party/wiredtiger/test/suite/test_base02.py @@ -32,14 +32,14 @@ import json import wiredtiger, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios # Test configuration strings. class test_base02(wttest.WiredTigerTestCase): name = 'test_base02a' extra_config = '' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:')), ('table', dict(uri='table:')), ('lsm', dict(uri='lsm:')), diff --git a/src/third_party/wiredtiger/test/suite/test_base05.py b/src/third_party/wiredtiger/test/suite/test_base05.py index f191f23561f..4bee0efcfe2 100644 --- a/src/third_party/wiredtiger/test/suite/test_base05.py +++ b/src/third_party/wiredtiger/test/suite/test_base05.py @@ -27,7 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_base05.py # Cursor operations @@ -40,7 +40,7 @@ class test_base05(wttest.WiredTigerTestCase): table_name1 = 'test_base05a' table_name2 = 'test_base05b' nentries = 1000 - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('no_huffman', dict(extraconfig='')), ('huffman_key', dict(extraconfig='huffman_key="english"')), ('huffman_val', dict(extraconfig='huffman_value="english"')), diff --git a/src/third_party/wiredtiger/test/suite/test_bug003.py b/src/third_party/wiredtiger/test/suite/test_bug003.py index 739279a0141..28d71a534e2 100644 --- a/src/third_party/wiredtiger/test/suite/test_bug003.py +++ b/src/third_party/wiredtiger/test/suite/test_bug003.py @@ -30,7 +30,7 @@ # Regression tests. import wiredtiger, wttest -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Regression tests. class test_bug003(wttest.WiredTigerTestCase): @@ -43,7 +43,7 @@ class test_bug003(wttest.WiredTigerTestCase): ('yes', dict(name=1)), ] - scenarios = number_scenarios(multiply_scenarios('.', types, ckpt)) + scenarios = make_scenarios(types, ckpt) # Confirm bulk-load isn't stopped by checkpoints. def test_bug003(self): diff --git a/src/third_party/wiredtiger/test/suite/test_bug006.py b/src/third_party/wiredtiger/test/suite/test_bug006.py index e522cdf96f7..314ba57038f 100644 --- a/src/third_party/wiredtiger/test/suite/test_bug006.py +++ b/src/third_party/wiredtiger/test/suite/test_bug006.py @@ -31,13 +31,13 @@ import wiredtiger, wttest from helper import key_populate, value_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # Check that verify and salvage both raise exceptions if there is an open # cursor. class test_bug006(wttest.WiredTigerTestCase): name = 'test_bug006' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:')), ('table', dict(uri='table:')), ]) diff --git a/src/third_party/wiredtiger/test/suite/test_bug008.py b/src/third_party/wiredtiger/test/suite/test_bug008.py index 0243887e258..c4fa411f55e 100644 --- a/src/third_party/wiredtiger/test/suite/test_bug008.py +++ b/src/third_party/wiredtiger/test/suite/test_bug008.py @@ -31,13 +31,13 @@ import wiredtiger, wttest from helper import simple_populate, key_populate, value_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # Test search/search-near operations, including invisible values and keys # past the end of the table. class test_bug008(wttest.WiredTigerTestCase): uri = 'file:test_bug008' # This is a btree layer test. - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('fix', dict(fmt='key_format=r,value_format=8t', empty=1, colvar=0)), ('row', dict(fmt='key_format=S', empty=0, colvar=0)), ('var', dict(fmt='key_format=r', empty=0, colvar=1)) diff --git a/src/third_party/wiredtiger/test/suite/test_bug009.py b/src/third_party/wiredtiger/test/suite/test_bug009.py index 4d10e4391d9..2bdfb7dec52 100644 --- a/src/third_party/wiredtiger/test/suite/test_bug009.py +++ b/src/third_party/wiredtiger/test/suite/test_bug009.py @@ -33,7 +33,6 @@ import wiredtiger, wttest from wiredtiger import stat -from wtscenario import multiply_scenarios, number_scenarios class test_bug009(wttest.WiredTigerTestCase): name = 'test_bug009' diff --git a/src/third_party/wiredtiger/test/suite/test_bug011.py b/src/third_party/wiredtiger/test/suite/test_bug011.py index 50dba1c48be..fceb7a22ddb 100644 --- a/src/third_party/wiredtiger/test/suite/test_bug011.py +++ b/src/third_party/wiredtiger/test/suite/test_bug011.py @@ -42,7 +42,7 @@ class test_bug011(wttest.WiredTigerTestCase): nops = 10000 # Add connection configuration for this test. def conn_config(self, dir): - return 'cache_size=10MB,hazard_max=' + str(self.ntables / 2) + return 'cache_size=10MB,eviction_dirty_target=99,eviction_dirty_trigger=99,hazard_max=' + str(self.ntables / 2) def test_eviction(self): cursors = [] diff --git a/src/third_party/wiredtiger/test/suite/test_bug016.py b/src/third_party/wiredtiger/test/suite/test_bug016.py new file mode 100644 index 00000000000..f7cb3c32559 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_bug016.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest + +# test_bug016.py +# WT-2757: WT_CURSOR.get_key() fails after WT_CURSOR.insert unless the +# cursor has a record number key with append configured. +class test_bug016(wttest.WiredTigerTestCase): + + # Insert a row into a simple column-store table configured to append. + # WT_CURSOR.get_key should succeed. + def test_simple_column_store_append(self): + uri='file:bug016' + self.session.create(uri, 'key_format=r,value_format=S') + cursor = self.session.open_cursor(uri, None, 'append') + cursor.set_value('value') + cursor.insert() + self.assertEquals(cursor.get_key(), 1) + + # Insert a row into a simple column-store table. + # WT_CURSOR.get_key should fail. + def test_simple_column_store(self): + uri='file:bug016' + self.session.create(uri, 'key_format=r,value_format=S') + cursor = self.session.open_cursor(uri, None) + cursor.set_key(37) + cursor.set_value('value') + cursor.insert() + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: cursor.get_key(), "/requires key be set/") + + # Insert a row into a simple row-store table. + # WT_CURSOR.get_key should fail. + def test_simple_row_store(self): + uri='file:bug016' + self.session.create(uri, 'key_format=S,value_format=S') + cursor = self.session.open_cursor(uri, None) + cursor.set_key('key') + cursor.set_value('value') + cursor.insert() + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: cursor.get_key(), "/requires key be set/") + + # Insert a row into a complex column-store table configured to append. + # WT_CURSOR.get_key should succeed. + def test_complex_column_store_append(self): + uri='table:bug016' + self.session.create( + uri, 'key_format=r,value_format=S,columns=(key,value)') + cursor = self.session.open_cursor(uri, None, 'append') + cursor.set_value('value') + cursor.insert() + self.assertEquals(cursor.get_key(), 1) + + # Insert a row into a complex column-store table. + # WT_CURSOR.get_key should fail. + def test_complex_column_store(self): + uri='table:bug016' + self.session.create( + uri, 'key_format=r,value_format=S,columns=(key,value)') + cursor = self.session.open_cursor(uri, None) + cursor.set_key(37) + cursor.set_value('value') + cursor.insert() + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: cursor.get_key(), "/requires key be set/") + + # Insert a row into a complex row-store table. + # WT_CURSOR.get_key should fail. + def test_complex_row_store(self): + uri='table:bug016' + self.session.create( + uri, 'key_format=S,value_format=S,columns=(key,value)') + cursor = self.session.open_cursor(uri, None) + cursor.set_key('key') + cursor.set_value('value') + cursor.insert() + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: cursor.get_key(), "/requires key be set/") + + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_bulk01.py b/src/third_party/wiredtiger/test/suite/test_bulk01.py index 1add11af26b..5bacfafaa20 100644 --- a/src/third_party/wiredtiger/test/suite/test_bulk01.py +++ b/src/third_party/wiredtiger/test/suite/test_bulk01.py @@ -32,7 +32,7 @@ import wiredtiger, wttest from helper import key_populate, value_populate -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Smoke test bulk-load. class test_bulk_load(wttest.WiredTigerTestCase): @@ -52,7 +52,7 @@ class test_bulk_load(wttest.WiredTigerTestCase): ('integer', dict(valfmt='i')), ('string', dict(valfmt='S')), ] - scenarios = number_scenarios(multiply_scenarios('.', types, keyfmt, valfmt)) + scenarios = make_scenarios(types, keyfmt, valfmt) # Test a simple bulk-load def test_bulk_load(self): diff --git a/src/third_party/wiredtiger/test/suite/test_bulk02.py b/src/third_party/wiredtiger/test/suite/test_bulk02.py index fe8118209f2..af0b6d4485d 100644 --- a/src/third_party/wiredtiger/test/suite/test_bulk02.py +++ b/src/third_party/wiredtiger/test/suite/test_bulk02.py @@ -32,7 +32,7 @@ import shutil, os from helper import confirm_empty, key_populate, value_populate from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios import wiredtiger, wttest # test_bulkload_checkpoint @@ -47,7 +47,7 @@ class test_bulkload_checkpoint(wttest.WiredTigerTestCase, suite_subprocess): ('unnamed', dict(ckpt_type='unnamed')), ] - scenarios = number_scenarios(multiply_scenarios('.', types, ckpt_type)) + scenarios = make_scenarios(types, ckpt_type) # Bulk-load handles are skipped by checkpoints. # Named and unnamed checkpoint versions. @@ -90,8 +90,7 @@ class test_bulkload_backup(wttest.WiredTigerTestCase, suite_subprocess): ('different', dict(session_type='different')), ('same', dict(session_type='same')), ] - scenarios = number_scenarios( - multiply_scenarios('.', types, ckpt_type, session_type)) + scenarios = make_scenarios(types, ckpt_type, session_type) # Backup a set of chosen tables/files using the wt backup command. # The only files are bulk-load files, so they shouldn't be copied. diff --git a/src/third_party/wiredtiger/test/suite/test_checkpoint01.py b/src/third_party/wiredtiger/test/suite/test_checkpoint01.py index 6e1ad7814ed..78754dc82fa 100644 --- a/src/third_party/wiredtiger/test/suite/test_checkpoint01.py +++ b/src/third_party/wiredtiger/test/suite/test_checkpoint01.py @@ -28,7 +28,7 @@ import wiredtiger, wttest from helper import key_populate, complex_populate_lsm, simple_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_checkpoint01.py # Checkpoint tests @@ -36,7 +36,7 @@ from wtscenario import check_scenarios # with a set of checkpoints, then confirm the checkpoint's values are correct, # including after other checkpoints are dropped. class test_checkpoint(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:checkpoint',fmt='S')), ('table', dict(uri='table:checkpoint',fmt='S')) ]) @@ -139,7 +139,7 @@ class test_checkpoint(wttest.WiredTigerTestCase): # Check some specific cursor checkpoint combinations. class test_checkpoint_cursor(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:checkpoint',fmt='S')), ('table', dict(uri='table:checkpoint',fmt='S')) ]) @@ -205,7 +205,7 @@ class test_checkpoint_cursor(wttest.WiredTigerTestCase): # Check that you can checkpoint targets. class test_checkpoint_target(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:checkpoint',fmt='S')), ('table', dict(uri='table:checkpoint',fmt='S')) ]) @@ -252,7 +252,7 @@ class test_checkpoint_target(wttest.WiredTigerTestCase): # Check that you can't write checkpoint cursors. class test_checkpoint_cursor_update(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file-r', dict(uri='file:checkpoint',fmt='r')), ('file-S', dict(uri='file:checkpoint',fmt='S')), ('table-r', dict(uri='table:checkpoint',fmt='r')), @@ -277,7 +277,7 @@ class test_checkpoint_cursor_update(wttest.WiredTigerTestCase): # Check that WiredTigerCheckpoint works as a checkpoint specifier. class test_checkpoint_last(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:checkpoint',fmt='S')), ('table', dict(uri='table:checkpoint',fmt='S')) ]) @@ -343,7 +343,7 @@ class test_checkpoint_lsm_name(wttest.WiredTigerTestCase): class test_checkpoint_empty(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:checkpoint')), ('table', dict(uri='table:checkpoint')), ]) diff --git a/src/third_party/wiredtiger/test/suite/test_checkpoint02.py b/src/third_party/wiredtiger/test/suite/test_checkpoint02.py index 71c8792359c..ac57499a9e4 100644 --- a/src/third_party/wiredtiger/test/suite/test_checkpoint02.py +++ b/src/third_party/wiredtiger/test/suite/test_checkpoint02.py @@ -30,13 +30,13 @@ import Queue import threading, time, wiredtiger, wttest from helper import key_populate, simple_populate from wtthread import checkpoint_thread, op_thread -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_checkpoint02.py # Run background checkpoints repeatedly while doing inserts and other # operations in another thread class test_checkpoint02(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('table-100', dict(uri='table:test',fmt='L',dsize=100,nops=50000,nthreads=10)), ('table-10', dict(uri='table:test',fmt='L',dsize=10,nops=50000,nthreads=30)) ]) diff --git a/src/third_party/wiredtiger/test/suite/test_colgap.py b/src/third_party/wiredtiger/test/suite/test_colgap.py index 46682c23167..5cc363dbd4a 100644 --- a/src/third_party/wiredtiger/test/suite/test_colgap.py +++ b/src/third_party/wiredtiger/test/suite/test_colgap.py @@ -28,7 +28,7 @@ import wiredtiger, wttest from helper import simple_populate, key_populate, value_populate -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_colgap.py # Test variable-length column-store gap performance. @@ -149,8 +149,8 @@ class test_colmax(wttest.WiredTigerTestCase): ('not-single', dict(single=0)), ] - scenarios = number_scenarios(multiply_scenarios(\ - '.', types, valfmt, record_number, bulk, reopen, single)) + scenarios = make_scenarios(\ + types, valfmt, record_number, bulk, reopen, single) # Test that variable-length column-store correctly/efficiently handles big # records (if it's not efficient, we'll just hang). diff --git a/src/third_party/wiredtiger/test/suite/test_collator.py b/src/third_party/wiredtiger/test/suite/test_collator.py index 34b5c20247f..a8103fb3671 100644 --- a/src/third_party/wiredtiger/test/suite/test_collator.py +++ b/src/third_party/wiredtiger/test/suite/test_collator.py @@ -28,7 +28,6 @@ import os import wiredtiger, wttest, run -from wtscenario import check_scenarios, number_scenarios # test_collator.py # Test indices using a custom extractor and collator. diff --git a/src/third_party/wiredtiger/test/suite/test_compact01.py b/src/third_party/wiredtiger/test/suite/test_compact01.py index 3af550708ed..183d75f9d31 100644 --- a/src/third_party/wiredtiger/test/suite/test_compact01.py +++ b/src/third_party/wiredtiger/test/suite/test_compact01.py @@ -30,7 +30,7 @@ import wiredtiger, wttest from helper import complex_populate, simple_populate, key_populate from suite_subprocess import suite_subprocess from wiredtiger import stat -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_compact.py # session level compact operation @@ -53,7 +53,7 @@ class test_compact(wttest.WiredTigerTestCase, suite_subprocess): ('method_reopen', dict(utility=0,reopen=1)), ('utility', dict(utility=1,reopen=0)), ] - scenarios = number_scenarios(multiply_scenarios('.', types, compact)) + scenarios = make_scenarios(types, compact) # We want a large cache so that eviction doesn't happen # (which could skew our compaction results). conn_config = 'cache_size=250MB,statistics=(all)' diff --git a/src/third_party/wiredtiger/test/suite/test_compact02.py b/src/third_party/wiredtiger/test/suite/test_compact02.py index 7ad05cd2536..eb21817bd90 100644 --- a/src/third_party/wiredtiger/test/suite/test_compact02.py +++ b/src/third_party/wiredtiger/test/suite/test_compact02.py @@ -32,7 +32,7 @@ import wiredtiger, wttest from wiredtiger import stat -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Test basic compression class test_compact02(wttest.WiredTigerTestCase): @@ -57,8 +57,7 @@ class test_compact02(wttest.WiredTigerTestCase): ('64KB', dict(fileConfig='leaf_page_max=64KB')), ('128KB', dict(fileConfig='leaf_page_max=128KB')), ] - scenarios = \ - number_scenarios(multiply_scenarios('.', types, cacheSize, fileConfig)) + scenarios = make_scenarios(types, cacheSize, fileConfig) # We want about 22K records that total about 130Mb. That is an average # of 6196 bytes per record. Half the records should be smaller, about @@ -97,7 +96,7 @@ class test_compact02(wttest.WiredTigerTestCase): self.home = '.' conn_params = 'create,' + \ cacheSize + ',error_prefix="%s: ",' % self.shortid() + \ - 'statistics=(fast)' + 'statistics=(fast),eviction_dirty_target=99,eviction_dirty_trigger=99' try: self.conn = wiredtiger.wiredtiger_open(self.home, conn_params) except wiredtiger.WiredTigerError as e: diff --git a/src/third_party/wiredtiger/test/suite/test_compress01.py b/src/third_party/wiredtiger/test/suite/test_compress01.py index 94c748fc3e5..2a7e2a7e1a8 100644 --- a/src/third_party/wiredtiger/test/suite/test_compress01.py +++ b/src/third_party/wiredtiger/test/suite/test_compress01.py @@ -32,7 +32,7 @@ import os, run import wiredtiger, wttest -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Test basic compression class test_compress01(wttest.WiredTigerTestCase): @@ -46,7 +46,7 @@ class test_compress01(wttest.WiredTigerTestCase): ('snappy', dict(compress='snappy')), ('none', dict(compress=None)), ] - scenarios = number_scenarios(multiply_scenarios('.', types, compress)) + scenarios = make_scenarios(types, compress) nrecords = 10000 bigvalue = "abcdefghij" * 1000 diff --git a/src/third_party/wiredtiger/test/suite/test_config03.py b/src/third_party/wiredtiger/test/suite/test_config03.py index e91c5de62f8..88ca6ae3f39 100644 --- a/src/third_party/wiredtiger/test/suite/test_config03.py +++ b/src/third_party/wiredtiger/test/suite/test_config03.py @@ -69,14 +69,11 @@ class test_config03(test_base03.test_base03): 'eviction_trigger', 'hazard_max', 'multiprocess', 'session_max', 'verbose' ] - all_scenarios = wtscenario.multiply_scenarios('_', + scenarios = wtscenario.make_scenarios( cache_size_scenarios, create_scenarios, error_prefix_scenarios, eviction_target_scenarios, eviction_trigger_scenarios, hazard_max_scenarios, multiprocess_scenarios, session_max_scenarios, - transactional_scenarios, verbose_scenarios) - - scenarios = wtscenario.prune_scenarios(all_scenarios, 1000) - scenarios = wtscenario.number_scenarios(scenarios) + transactional_scenarios, verbose_scenarios, prune=1000) #wttest.WiredTigerTestCase.printVerbose(2, 'test_config03: running ' + \ # str(len(scenarios)) + ' of ' + \ diff --git a/src/third_party/wiredtiger/test/suite/test_cursor01.py b/src/third_party/wiredtiger/test/suite/test_cursor01.py index cf39d4a4ba4..8c66042eec0 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor01.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor01.py @@ -27,7 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_cursor01.py # Cursor operations @@ -41,7 +41,7 @@ class test_cursor01(wttest.WiredTigerTestCase): table_name1 = 'test_cursor01' nentries = 10 - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file-col', dict(tablekind='col',uri='file')), ('file-fix', dict(tablekind='fix',uri='file')), ('file-row', dict(tablekind='row',uri='file')), diff --git a/src/third_party/wiredtiger/test/suite/test_cursor02.py b/src/third_party/wiredtiger/test/suite/test_cursor02.py index eb1ba4dfc41..a83d30def47 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor02.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor02.py @@ -28,7 +28,7 @@ import wiredtiger from test_cursor_tracker import TestCursorTracker -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_cursor02.py # Cursor operations on small tables. @@ -39,7 +39,7 @@ class test_cursor02(TestCursorTracker): key/value content and to track/verify content after inserts and removes. """ - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('row', dict(tablekind='row', uri='table')), ('lsm-row', dict(tablekind='row', uri='lsm')), ('col', dict(tablekind='col', uri='table')), diff --git a/src/third_party/wiredtiger/test/suite/test_cursor03.py b/src/third_party/wiredtiger/test/suite/test_cursor03.py index 63237f942ca..b4598483c12 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor03.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor03.py @@ -28,7 +28,7 @@ import wiredtiger from test_cursor_tracker import TestCursorTracker -from wtscenario import multiply_scenarios +from wtscenario import make_scenarios # test_cursor03.py # Cursor operations on tables of various sizes, with key/values of various @@ -40,7 +40,7 @@ class test_cursor03(TestCursorTracker): key/value content and to track/verify content after inserts and removes. """ - scenarios = multiply_scenarios('.', [ + scenarios = make_scenarios([ ('row', dict(tablekind='row', keysize=None, valsize=None, uri='table')), ('lsm-row', dict(tablekind='row', keysize=None, valsize=None, uri='lsm')), ('col', dict(tablekind='col', keysize=None, valsize=None, uri='table')), diff --git a/src/third_party/wiredtiger/test/suite/test_cursor04.py b/src/third_party/wiredtiger/test/suite/test_cursor04.py index 6576c623f8a..8cbf922b5eb 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor04.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor04.py @@ -27,7 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_base04.py # Cursor operations @@ -38,7 +38,7 @@ class test_cursor04(wttest.WiredTigerTestCase): table_name1 = 'test_cursor04' nentries = 20 - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('row', dict(tablekind='row', uri='table')), ('lsm-row', dict(tablekind='row', uri='lsm')), ('col', dict(tablekind='col', uri='table')), diff --git a/src/third_party/wiredtiger/test/suite/test_cursor06.py b/src/third_party/wiredtiger/test/suite/test_cursor06.py index 5545c862dd7..3a6240bc6c7 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor06.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor06.py @@ -29,13 +29,13 @@ import wiredtiger, wttest from helper import key_populate, value_populate, simple_populate from helper import complex_value_populate, complex_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_cursor06.py # Test cursor reconfiguration. class test_cursor06(wttest.WiredTigerTestCase): name = 'reconfigure' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file-r', dict(type='file:', config='key_format=r', complex=0)), ('file-S', dict(type='file:', config='key_format=S', complex=0)), ('lsm-S', dict(type='lsm:', config='key_format=S', complex=0)), diff --git a/src/third_party/wiredtiger/test/suite/test_cursor07.py b/src/third_party/wiredtiger/test/suite/test_cursor07.py index d8de0874d7f..d6078183fc1 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor07.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor07.py @@ -33,7 +33,7 @@ import fnmatch, os, shutil, run, time from suite_subprocess import suite_subprocess from wiredtiger import stat -from wtscenario import check_scenarios +from wtscenario import make_scenarios import wttest class test_cursor07(wttest.WiredTigerTestCase, suite_subprocess): @@ -44,7 +44,7 @@ class test_cursor07(wttest.WiredTigerTestCase, suite_subprocess): # test that scenario for log cursors. nkeys = 7000 - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('regular', dict(reopen=False)), ('reopen', dict(reopen=True)) ]) diff --git a/src/third_party/wiredtiger/test/suite/test_cursor08.py b/src/third_party/wiredtiger/test/suite/test_cursor08.py index 1a379518224..3f8f50defa7 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor08.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor08.py @@ -33,7 +33,7 @@ import fnmatch, os, shutil, run, time from suite_subprocess import suite_subprocess from wiredtiger import stat, WiredTigerError -from wtscenario import multiply_scenarios, number_scenarios, check_scenarios +from wtscenario import make_scenarios import wttest class test_cursor08(wttest.WiredTigerTestCase, suite_subprocess): @@ -42,17 +42,17 @@ class test_cursor08(wttest.WiredTigerTestCase, suite_subprocess): uri = 'table:' + tablename nkeys = 500 - reopens = check_scenarios([ + reopens = [ ('regular', dict(reopen=False)), ('reopen', dict(reopen=True)) - ]) - compress = check_scenarios([ + ] + compress = [ ('nop', dict(compress='nop')), ('snappy', dict(compress='snappy')), ('zlib', dict(compress='zlib')), ('none', dict(compress='none')), - ]) - scenarios = number_scenarios(multiply_scenarios('.', reopens, compress)) + ] + scenarios = make_scenarios(reopens, compress) # Load the compression extension, and enable it for logging. def conn_config(self, dir): return 'log=(archive=false,enabled,file_max=%s,' % self.logmax + \ diff --git a/src/third_party/wiredtiger/test/suite/test_cursor09.py b/src/third_party/wiredtiger/test/suite/test_cursor09.py index b77336bc1d7..a05caea4f1f 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor09.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor09.py @@ -29,12 +29,12 @@ import wiredtiger, wttest from helper import key_populate, value_populate, simple_populate from helper import complex_populate, complex_value_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_cursor09.py # JIRA WT-2217: insert resets key/value "set". class test_cursor09(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file-r', dict(type='file:', config='key_format=r', complex=0)), ('file-S', dict(type='file:', config='key_format=S', complex=0)), ('lsm-S', dict(type='lsm:', config='key_format=S', complex=0)), diff --git a/src/third_party/wiredtiger/test/suite/test_cursor_compare.py b/src/third_party/wiredtiger/test/suite/test_cursor_compare.py index 130f4e8ca96..179e20682d2 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor_compare.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor_compare.py @@ -29,7 +29,7 @@ import wiredtiger, wttest, exceptions from helper import complex_populate, simple_populate, key_populate from helper import complex_populate_index_name -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Test cursor comparisons. class test_cursor_comparison(wttest.WiredTigerTestCase): @@ -45,7 +45,7 @@ class test_cursor_comparison(wttest.WiredTigerTestCase): ('recno', dict(keyfmt='r')), ('string', dict(keyfmt='S')) ] - scenarios = number_scenarios(multiply_scenarios('.', types, keyfmt)) + scenarios = make_scenarios(types, keyfmt) def test_cursor_comparison(self): uri = self.type + 'compare' diff --git a/src/third_party/wiredtiger/test/suite/test_cursor_pin.py b/src/third_party/wiredtiger/test/suite/test_cursor_pin.py index 329759d8fc8..1aea49c32b0 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor_pin.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor_pin.py @@ -28,7 +28,7 @@ import wiredtiger, wttest from helper import simple_populate, key_populate, value_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_cursor_pin.py # Smoke-test fast-path searching for pinned pages before re-descending @@ -37,7 +37,7 @@ class test_cursor_pin(wttest.WiredTigerTestCase): uri = 'file:cursor_pin' nentries = 10000 config = 'allocation_size=512,leaf_page_max=512,value_format=S,key_format=' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('recno', dict(keyfmt='r')), ('string', dict(keyfmt='S')), ]) diff --git a/src/third_party/wiredtiger/test/suite/test_cursor_random.py b/src/third_party/wiredtiger/test/suite/test_cursor_random.py index 16ce5cae685..8d7c230043b 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor_random.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor_random.py @@ -29,7 +29,7 @@ import wiredtiger, wttest from helper import complex_populate, simple_populate from helper import key_populate, value_populate -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_cursor_random.py # Cursor next_random operations @@ -42,7 +42,7 @@ class test_cursor_random(wttest.WiredTigerTestCase): ('sample', dict(config='next_random=true,next_random_sample_size=35')), ('not-sample', dict(config='next_random=true')) ] - scenarios =number_scenarios(multiply_scenarios('.', types, config)) + scenarios = make_scenarios(types, config) # Check that opening a random cursor on a row-store returns not-supported # for methods other than next, reconfigure and reset, and next returns @@ -136,7 +136,7 @@ class test_cursor_random(wttest.WiredTigerTestCase): # Check that opening a random cursor on column-store returns not-supported. class test_cursor_random_column(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:random')), ('table', dict(uri='table:random')) ]) @@ -159,7 +159,7 @@ class test_cursor_random_invisible(wttest.WiredTigerTestCase): ('sample', dict(config='next_random=true,next_random_sample_size=35')), ('not-sample', dict(config='next_random=true')) ] - scenarios =number_scenarios(multiply_scenarios('.', types, config)) + scenarios = make_scenarios(types, config) def test_cursor_random_invisible_all(self): uri = self.type diff --git a/src/third_party/wiredtiger/test/suite/test_cursor_random02.py b/src/third_party/wiredtiger/test/suite/test_cursor_random02.py index 84ac0279fc4..93aa97f2282 100644 --- a/src/third_party/wiredtiger/test/suite/test_cursor_random02.py +++ b/src/third_party/wiredtiger/test/suite/test_cursor_random02.py @@ -29,7 +29,7 @@ import wiredtiger, wttest from helper import complex_populate, simple_populate from helper import key_populate, value_populate -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_cursor_random02.py # Cursor next_random operations @@ -46,7 +46,7 @@ class test_cursor_random02(wttest.WiredTigerTestCase): ('10000', dict(records=10000)), ('50000', dict(records=50000)), ] - scenarios = number_scenarios(multiply_scenarios('.', config, records)) + scenarios = make_scenarios(config, records) # Check that next_random works in the presence of a larger set of values, # where the values are in an insert list. diff --git a/src/third_party/wiredtiger/test/suite/test_drop.py b/src/third_party/wiredtiger/test/suite/test_drop.py index 52ea7251ab5..a3e80214295 100644 --- a/src/third_party/wiredtiger/test/suite/test_drop.py +++ b/src/third_party/wiredtiger/test/suite/test_drop.py @@ -30,7 +30,7 @@ import os, time import wiredtiger, wttest from helper import confirm_does_not_exist, complex_populate, \ complex_populate_index_name, simple_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_drop.py # session level drop operation @@ -38,7 +38,7 @@ class test_drop(wttest.WiredTigerTestCase): name = 'test_drop' extra_config = '' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:')), ('table', dict(uri='table:')), ('table-lsm', dict(uri='table:', extra_config=',type=lsm')), diff --git a/src/third_party/wiredtiger/test/suite/test_dump.py b/src/third_party/wiredtiger/test/suite/test_dump.py index 85196174c1b..280d5870359 100644 --- a/src/third_party/wiredtiger/test/suite/test_dump.py +++ b/src/third_party/wiredtiger/test/suite/test_dump.py @@ -30,9 +30,10 @@ import os, shutil import wiredtiger, wttest from helper import \ complex_populate, complex_populate_check, \ - simple_populate, simple_populate_check + simple_populate, simple_populate_check, \ + simple_index_populate, simple_index_populate_check from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_dump.py # Utilities: wt dump @@ -64,6 +65,9 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess): ('table-simple', dict(uri='table:', config='', lsm=False, populate=simple_populate, populate_check=simple_populate_check)), + ('table-index', dict(uri='table:', config='', lsm=False, + populate=simple_index_populate, + populate_check=simple_index_populate_check)), ('table-simple-lsm', dict(uri='table:', config='type=lsm', lsm=True, populate=simple_populate, populate_check=simple_populate_check)), @@ -74,8 +78,7 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess): populate=complex_populate, populate_check=complex_populate_check)) ] - scenarios = number_scenarios( - multiply_scenarios('.', types, keyfmt, dumpfmt)) + scenarios = make_scenarios(types, keyfmt, dumpfmt) # Extract the values lines from the dump output. def value_lines(self, fname): diff --git a/src/third_party/wiredtiger/test/suite/test_dupc.py b/src/third_party/wiredtiger/test/suite/test_dupc.py index ec55a36df4c..12b18f1ba79 100644 --- a/src/third_party/wiredtiger/test/suite/test_dupc.py +++ b/src/third_party/wiredtiger/test/suite/test_dupc.py @@ -33,7 +33,7 @@ import os, time import wiredtiger, wttest from helper import complex_populate, key_populate, simple_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # Test session.open_cursor with cursor duplication. class test_duplicate_cursor(wttest.WiredTigerTestCase): @@ -42,7 +42,7 @@ class test_duplicate_cursor(wttest.WiredTigerTestCase): config = 'key_format=' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file-r', dict(uri='file:', fmt='r')), ('file-S', dict(uri='file:', fmt='S')), ('table-r', dict(uri='table:', fmt='r')), diff --git a/src/third_party/wiredtiger/test/suite/test_durability01.py b/src/third_party/wiredtiger/test/suite/test_durability01.py index f578a79baf1..32cdd795914 100644 --- a/src/third_party/wiredtiger/test/suite/test_durability01.py +++ b/src/third_party/wiredtiger/test/suite/test_durability01.py @@ -34,7 +34,6 @@ import fnmatch, os, shutil, time from helper import copy_wiredtiger_home from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios import wttest class test_durability01(wttest.WiredTigerTestCase, suite_subprocess): diff --git a/src/third_party/wiredtiger/test/suite/test_empty.py b/src/third_party/wiredtiger/test/suite/test_empty.py index 50b79db70e4..9fe88107412 100644 --- a/src/third_party/wiredtiger/test/suite/test_empty.py +++ b/src/third_party/wiredtiger/test/suite/test_empty.py @@ -29,14 +29,14 @@ import os import wiredtiger, wttest from helper import key_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_empty.py # Test that empty objects don't write anything other than a single sector. class test_empty(wttest.WiredTigerTestCase): name = 'test_empty' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file-r', dict(type='file:', fmt='r')), ('file-S', dict(type='file:', fmt='S')), ('table-r', dict(type='table:', fmt='r')), diff --git a/src/third_party/wiredtiger/test/suite/test_encrypt01.py b/src/third_party/wiredtiger/test/suite/test_encrypt01.py index 0f2782204d2..d48605aaa83 100644 --- a/src/third_party/wiredtiger/test/suite/test_encrypt01.py +++ b/src/third_party/wiredtiger/test/suite/test_encrypt01.py @@ -32,7 +32,7 @@ import os, run, random import wiredtiger, wttest -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Test basic encryption class test_encrypt01(wttest.WiredTigerTestCase): @@ -60,8 +60,7 @@ class test_encrypt01(wttest.WiredTigerTestCase): ('none-snappy', dict(log_compress=None, block_compress='snappy')), ('snappy-lz4', dict(log_compress='snappy', block_compress='lz4')), ] - scenarios = number_scenarios(multiply_scenarios('.', types, - encrypt, compress)) + scenarios = make_scenarios(types, encrypt, compress) nrecords = 5000 bigvalue = "abcdefghij" * 1001 # len(bigvalue) = 10010 diff --git a/src/third_party/wiredtiger/test/suite/test_encrypt02.py b/src/third_party/wiredtiger/test/suite/test_encrypt02.py index 0376b3e42e4..648686274c4 100644 --- a/src/third_party/wiredtiger/test/suite/test_encrypt02.py +++ b/src/third_party/wiredtiger/test/suite/test_encrypt02.py @@ -33,7 +33,7 @@ import os, run, random import wiredtiger, wttest from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Test basic encryption class test_encrypt02(wttest.WiredTigerTestCase, suite_subprocess): @@ -48,7 +48,7 @@ class test_encrypt02(wttest.WiredTigerTestCase, suite_subprocess): ('keyid-pass', dict( encrypt='rotn', encrypt_args='name=rotn,keyid=11', secret_arg='ABC')), ] - scenarios = number_scenarios(encrypt_type) + scenarios = make_scenarios(encrypt_type) nrecords = 5000 bigvalue = "abcdefghij" * 1001 # len(bigvalue) = 10010 diff --git a/src/third_party/wiredtiger/test/suite/test_encrypt03.py b/src/third_party/wiredtiger/test/suite/test_encrypt03.py index 702d0a2369f..0dc1755d6eb 100644 --- a/src/third_party/wiredtiger/test/suite/test_encrypt03.py +++ b/src/third_party/wiredtiger/test/suite/test_encrypt03.py @@ -32,7 +32,7 @@ import os, run, random import wiredtiger, wttest -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Test basic encryption class test_encrypt03(wttest.WiredTigerTestCase): @@ -48,7 +48,7 @@ class test_encrypt03(wttest.WiredTigerTestCase): #('noname', dict( sys_encrypt='rotn', sys_encrypt_args=',keyid=11', # file_encrypt='none', file_encrypt_args=',keyid=13')), ] - scenarios = number_scenarios(multiply_scenarios('.', types, encrypt)) + scenarios = make_scenarios(types, encrypt) # Override WiredTigerTestCase, we have extensions. def setUpConnectionOpen(self, dir): diff --git a/src/third_party/wiredtiger/test/suite/test_encrypt04.py b/src/third_party/wiredtiger/test/suite/test_encrypt04.py index d7c12d2cba8..97d2cee03a0 100644 --- a/src/third_party/wiredtiger/test/suite/test_encrypt04.py +++ b/src/third_party/wiredtiger/test/suite/test_encrypt04.py @@ -32,7 +32,7 @@ import os, run, random import wiredtiger, wttest -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios from suite_subprocess import suite_subprocess # Test basic encryption with mismatched configuration @@ -69,8 +69,7 @@ class test_encrypt04(wttest.WiredTigerTestCase, suite_subprocess): ('rotn11xyz_and_clear', dict( name2='rotn', keyid2='11', secretkey2='XYZ', fileinclear2=True)) ] - scenarios = number_scenarios(multiply_scenarios \ - ('.', encrypt_scen_1, encrypt_scen_2)) + scenarios = make_scenarios(encrypt_scen_1, encrypt_scen_2) nrecords = 5000 bigvalue = "abcdefghij" * 1001 # len(bigvalue) = 10010 diff --git a/src/third_party/wiredtiger/test/suite/test_encrypt05.py b/src/third_party/wiredtiger/test/suite/test_encrypt05.py index afd8a8103f9..19a3522b3d5 100644 --- a/src/third_party/wiredtiger/test/suite/test_encrypt05.py +++ b/src/third_party/wiredtiger/test/suite/test_encrypt05.py @@ -32,7 +32,7 @@ import os, run, random import wiredtiger, wttest -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Test raw compression with encryption class test_encrypt05(wttest.WiredTigerTestCase): @@ -44,8 +44,7 @@ class test_encrypt05(wttest.WiredTigerTestCase): compress = [ ('zlib', dict(log_compress='zlib', block_compress='zlib')), ] - scenarios = number_scenarios(multiply_scenarios('.', - encrypt, compress)) + scenarios = make_scenarios(encrypt, compress) nrecords = 500 bigvalue = 'a' * 500 # we use values that will definitely give compression diff --git a/src/third_party/wiredtiger/test/suite/test_encrypt06.py b/src/third_party/wiredtiger/test/suite/test_encrypt06.py index 5c88b698aeb..9300583d099 100644 --- a/src/third_party/wiredtiger/test/suite/test_encrypt06.py +++ b/src/third_party/wiredtiger/test/suite/test_encrypt06.py @@ -32,7 +32,7 @@ import os, run, random import wiredtiger, wttest -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Test encryption, when on, does not leak any information class test_encrypt06(wttest.WiredTigerTestCase): @@ -86,7 +86,7 @@ class test_encrypt06(wttest.WiredTigerTestCase): file0_encrypt='rotn', file0_encrypt_args=key13, encrypt0=True, file1_encrypt='none', file1_encrypt_args='', encrypt1=False)), ] - scenarios = number_scenarios(multiply_scenarios('.', encrypt, storagetype)) + scenarios = make_scenarios(encrypt, storagetype) nrecords = 1000 # Override WiredTigerTestCase, we have extensions. diff --git a/src/third_party/wiredtiger/test/suite/test_encrypt07.py b/src/third_party/wiredtiger/test/suite/test_encrypt07.py index 30f28e096a8..97ab1987d4f 100644 --- a/src/third_party/wiredtiger/test/suite/test_encrypt07.py +++ b/src/third_party/wiredtiger/test/suite/test_encrypt07.py @@ -32,7 +32,6 @@ import os, run, string, codecs import wiredtiger, wttest -from wtscenario import multiply_scenarios, number_scenarios import test_salvage # Run the regular salvage test, but with encryption on diff --git a/src/third_party/wiredtiger/test/suite/test_excl.py b/src/third_party/wiredtiger/test/suite/test_excl.py index 90926f51877..cea5756dfbb 100644 --- a/src/third_party/wiredtiger/test/suite/test_excl.py +++ b/src/third_party/wiredtiger/test/suite/test_excl.py @@ -27,11 +27,11 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios # Test session.create with the exclusive configuration. class test_create_excl(wttest.WiredTigerTestCase): - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(type='file:')), ('table', dict(type='table:')) ]) diff --git a/src/third_party/wiredtiger/test/suite/test_huffman01.py b/src/third_party/wiredtiger/test/suite/test_huffman01.py index d71198e3151..be307550f2e 100644 --- a/src/third_party/wiredtiger/test/suite/test_huffman01.py +++ b/src/third_party/wiredtiger/test/suite/test_huffman01.py @@ -28,7 +28,7 @@ import os from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios import wiredtiger, wttest # test_huffman01.py @@ -52,7 +52,7 @@ class test_huffman01(wttest.WiredTigerTestCase, suite_subprocess): ('utf8', dict(huffval=',huffman_value=utf8t8file',vfile='t8file')), ('utf16', dict(huffval=',huffman_value=utf16t16file',vfile='t16file')), ] - scenarios = number_scenarios(multiply_scenarios('.', huffkey, huffval)) + scenarios = make_scenarios(huffkey, huffval) def test_huffman(self): dir = self.conn.get_home() diff --git a/src/third_party/wiredtiger/test/suite/test_huffman02.py b/src/third_party/wiredtiger/test/suite/test_huffman02.py index aa4329415a4..d74704daf58 100644 --- a/src/third_party/wiredtiger/test/suite/test_huffman02.py +++ b/src/third_party/wiredtiger/test/suite/test_huffman02.py @@ -28,7 +28,7 @@ import os from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios import wiredtiger, wttest # test_huffman02.py @@ -48,7 +48,7 @@ class test_huffman02(wttest.WiredTigerTestCase, suite_subprocess): ('file', dict(uri='file:huff')), ('table', dict(uri='table:huff')), ] - scenarios = number_scenarios(multiply_scenarios('.',type,huffkey, huffval)) + scenarios = make_scenarios(type, huffkey, huffval) def test_huffman(self): if self.keybad or self.valbad: diff --git a/src/third_party/wiredtiger/test/suite/test_index02.py b/src/third_party/wiredtiger/test/suite/test_index02.py new file mode 100644 index 00000000000..9f39df003b1 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_index02.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest + +# test_index02.py +# test search_near in indices +class test_index02(wttest.WiredTigerTestCase): + '''Test search_near in indices''' + + basename = 'test_index02' + tablename = 'table:' + basename + indexname = 'index:' + basename + ":inverse" + + def test_search_near(self): + '''Create a table, look for a nonexistent key''' + self.session.create(self.tablename, 'key_format=r,value_format=Q,columns=(k,v)') + self.session.create(self.indexname, 'columns=(v)') + cur = self.session.open_cursor(self.tablename, None, "append") + cur.set_value(1) + cur.insert() + cur.set_value(5) + cur.insert() + cur.set_value(5) + cur.insert() + cur.set_value(5) + cur.insert() + cur.set_value(10) + cur.insert() + + # search near should find a match + cur2 = self.session.open_cursor(self.indexname, None, None) + cur2.set_key(5) + self.assertEqual(cur2.search_near(), 0) + + # Retry after reopening + self.reopen_conn() + cur3 = self.session.open_cursor(self.indexname, None, None) + cur3.set_key(5) + self.assertEqual(cur3.search_near(), 0) + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_inmem01.py b/src/third_party/wiredtiger/test/suite/test_inmem01.py index 875ebb2bfa7..c6ae7ff6c4b 100644 --- a/src/third_party/wiredtiger/test/suite/test_inmem01.py +++ b/src/third_party/wiredtiger/test/suite/test_inmem01.py @@ -30,95 +30,73 @@ import wiredtiger, wttest from time import sleep from helper import simple_populate, simple_populate_check from helper import key_populate, value_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_inmem01.py # Test in-memory configuration. class test_inmem01(wttest.WiredTigerTestCase): - name = 'inmem01' - """ - In memory configuration still creates files on disk, but has limits - in terms of how much data can be written. - Test various scenarios including: - - Add a small amount of data, ensure it is present. - - Add more data than would fit into the configured cache. - - Fill the cache with data, remove some data, ensure more data can be - inserted (after a reasonable amount of time for space to be reclaimed) - - Run queries after adding, removing and re-inserting data. - - Try out keeping a cursor open while adding new data. - """ - scenarios = check_scenarios([ - ('col', dict(tablekind='col')), - # Fixed length is very slow, disable it for now - #('fix', dict(tablekind='fix')), - ('row', dict(tablekind='row')) - ]) - - # create an in-memory database - conn_config = 'cache_size=5MB,' + \ - 'file_manager=(close_idle_time=0),in_memory=true' + uri = 'table:inmem01' + conn_config = \ + 'cache_size=5MB,file_manager=(close_idle_time=0),in_memory=true' + table_config = ',memory_page_max=32k,leaf_page_max=4k' - def get_table_config(self): - kf = 'key_format=' - vf = 'value_format=' - if self.tablekind == 'row': - kf = kf + 'S' - else: - kf = kf + 'r' # record format - if self.tablekind == 'fix': - vf = vf + '8t' - else: - vf = vf + 'S' - return 'memory_page_max=32k,leaf_page_max=4k,' + kf + ',' + vf + scenarios = make_scenarios([ + ('col', dict(fmt='key_format=r,value_format=S')), + ('fix', dict(fmt='key_format=r,value_format=8t')), + ('row', dict(fmt='key_format=S,value_format=S')) + ]) + # Smoke-test in-memory configurations, add a small amount of data and + # ensure it's visible. def test_insert(self): - table_config = self.get_table_config() - simple_populate(self, - "table:" + self.name, table_config, 1000) - # Ensure the data is visible. - simple_populate_check(self, 'table:' + self.name, 1000) + config = self.fmt + self.table_config + simple_populate(self, self.uri, config, 1000) + simple_populate_check(self, self.uri, 1000) + # Add more data than fits into the configured cache and verify it fails. def test_insert_over_capacity(self): - table_config = self.get_table_config() + config = self.fmt + self.table_config msg = '/WT_CACHE_FULL.*/' self.assertRaisesHavingMessage(wiredtiger.WiredTigerError, - lambda:simple_populate(self, - "table:" + self.name, table_config, 10000000), msg) + lambda:simple_populate(self, self.uri, config, 10000000), msg) - # Figure out the last key we inserted. - cursor = self.session.open_cursor('table:' + self.name, None) + # Figure out the last key we successfully inserted, and check all + # previous inserts are still there. + cursor = self.session.open_cursor(self.uri, None) cursor.prev() last_key = int(cursor.get_key()) - simple_populate_check(self, 'table:' + self.name, last_key) + simple_populate_check(self, self.uri, last_key) + # Fill the cache with data, remove some data, ensure more data can be + # inserted (after a reasonable amount of time for space to be reclaimed). def test_insert_over_delete(self): - table_config = self.get_table_config() + config = self.fmt + self.table_config msg = '/WT_CACHE_FULL.*/' self.assertRaisesHavingMessage(wiredtiger.WiredTigerError, - lambda:simple_populate(self, - "table:" + self.name, table_config, 10000000), msg) + lambda:simple_populate(self, self.uri, config, 10000000), msg) # Now that the database contains as much data as will fit into # the configured cache, verify removes succeed. - cursor = self.session.open_cursor('table:' + self.name, None) + cursor = self.session.open_cursor(self.uri, None) for i in range(1, 100): cursor.set_key(key_populate(cursor, i)) cursor.remove() + # Run queries after adding, removing and re-inserting data. + # Try out keeping a cursor open while adding new data. def test_insert_over_delete_replace(self): - table_config = self.get_table_config() + config = self.fmt + self.table_config msg = '/WT_CACHE_FULL.*/' self.assertRaisesHavingMessage(wiredtiger.WiredTigerError, - lambda:simple_populate(self, - "table:" + self.name, table_config, 10000000), msg) + lambda:simple_populate(self, self.uri, config, 10000000), msg) - cursor = self.session.open_cursor('table:' + self.name, None) + cursor = self.session.open_cursor(self.uri, None) cursor.prev() last_key = int(cursor.get_key()) # Now that the database contains as much data as will fit into # the configured cache, verify removes succeed. - cursor = self.session.open_cursor('table:' + self.name, None) + cursor = self.session.open_cursor(self.uri, None) for i in range(1, last_key / 4, 1): cursor.set_key(key_populate(cursor, i)) cursor.remove() diff --git a/src/third_party/wiredtiger/test/suite/test_intpack.py b/src/third_party/wiredtiger/test/suite/test_intpack.py index 187b2d7f579..b0cece09494 100644 --- a/src/third_party/wiredtiger/test/suite/test_intpack.py +++ b/src/third_party/wiredtiger/test/suite/test_intpack.py @@ -31,7 +31,7 @@ # import wiredtiger, wttest -from wtscenario import check_scenarios, number_scenarios +from wtscenario import make_scenarios class PackTester: def __init__(self, formatcode, validlow, validhigh, equals): @@ -126,22 +126,27 @@ class PackTester: class test_intpack(wttest.WiredTigerTestCase): name = 'test_intpack' - scenarios = check_scenarios([ - ('b', dict(formatcode='b', low=-128, high=127, nbits=8)), - ('B', dict(formatcode='B', low=0, high=255, nbits=8)), - ('8t', dict(formatcode='8t', low=0, high=255, nbits=8)), - ('5t', dict(formatcode='5t', low=0, high=31, nbits=5)), - ('h', dict(formatcode='h', low=-32768, high=32767, nbits=16)), - ('H', dict(formatcode='H', low=0, high=65535, nbits=16)), - ('i', dict(formatcode='i', low=-2147483648, high=2147483647, nbits=32)), - ('I', dict(formatcode='I', low=0, high=4294967295, nbits=32)), - ('l', dict(formatcode='l', low=-2147483648, high=2147483647, nbits=32)), - ('L', dict(formatcode='L', low=0, high=4294967295, nbits=32)), - ('q', dict(formatcode='q', low=-9223372036854775808, + # We have to be a bit verbose here with naming, as there can be problems with + # case insensitive test names:w + + scenarios = make_scenarios([ + ('int8_t_b', dict(formatcode='b', low=-128, high=127, nbits=8)), + ('uint8_t_B', dict(formatcode='B', low=0, high=255, nbits=8)), + ('fix_len_8t', dict(formatcode='8t', low=0, high=255, nbits=8)), + ('fix_len_5t', dict(formatcode='5t', low=0, high=31, nbits=5)), + ('int16_t_h', dict(formatcode='h', low=-32768, high=32767, nbits=16)), + ('uint16_t_H', dict(formatcode='H', low=0, high=65535, nbits=16)), + ('int32_t_i', dict(formatcode='i', low=-2147483648, high=2147483647, + nbits=32)), + ('uint32_t_I', dict(formatcode='I', low=0, high=4294967295, nbits=32)), + ('int32_t_l', dict(formatcode='l', low=-2147483648, high=2147483647, + nbits=32)), + ('uint32_t_L', dict(formatcode='L', low=0, high=4294967295, nbits=32)), + ('int64_t_q', dict(formatcode='q', low=-9223372036854775808, high=9223372036854775807, nbits=64)), - ('Q', dict(formatcode='Q', low=0, high=18446744073709551615, nbits=64)), + ('uint64_t_Q', dict(formatcode='Q', low=0, high=18446744073709551615, + nbits=64)), ]) - scenarios = check_scenarios(number_scenarios(scenarios)) def test_packing(self): pt = PackTester(self.formatcode, self.low, self.high, self.assertEquals) diff --git a/src/third_party/wiredtiger/test/suite/test_join01.py b/src/third_party/wiredtiger/test/suite/test_join01.py index f8d96a2718a..f3b13026896 100644 --- a/src/third_party/wiredtiger/test/suite/test_join01.py +++ b/src/third_party/wiredtiger/test/suite/test_join01.py @@ -27,7 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_join01.py # Join operations @@ -67,11 +67,9 @@ class test_join01(wttest.WiredTigerTestCase): ('order=2', dict(join_order=2)), ('order=3', dict(join_order=3)), ] - scenarios = number_scenarios(multiply_scenarios('.', type_scen, - bloom0_scen, bloom1_scen, - projection_scen, - nested_scen, stats_scen, - order_scen)) + scenarios = make_scenarios(type_scen, bloom0_scen, bloom1_scen, + projection_scen, nested_scen, stats_scen, + order_scen) # We need statistics for these tests. conn_config = 'statistics=(all)' diff --git a/src/third_party/wiredtiger/test/suite/test_join02.py b/src/third_party/wiredtiger/test/suite/test_join02.py index a691c499cf6..db11ed01039 100644 --- a/src/third_party/wiredtiger/test/suite/test_join02.py +++ b/src/third_party/wiredtiger/test/suite/test_join02.py @@ -27,7 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest, suite_random -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_join02.py # Join operations @@ -48,7 +48,7 @@ class test_join02(wttest.WiredTigerTestCase): ('nobloom', dict(usebloom=False)) ] - scenarios = number_scenarios(multiply_scenarios('.', keyscen, bloomscen)) + scenarios = make_scenarios(keyscen, bloomscen) # Start our range from 1, since WT record numbers start at 1, # it makes things work out nicer. diff --git a/src/third_party/wiredtiger/test/suite/test_join03.py b/src/third_party/wiredtiger/test/suite/test_join03.py index 613d2396b07..af19d934d70 100644 --- a/src/third_party/wiredtiger/test/suite/test_join03.py +++ b/src/third_party/wiredtiger/test/suite/test_join03.py @@ -28,7 +28,6 @@ import os import wiredtiger, wttest, run -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios # test_join03.py # Join operations diff --git a/src/third_party/wiredtiger/test/suite/test_join04.py b/src/third_party/wiredtiger/test/suite/test_join04.py index 7e2afb15285..b270cb7a21c 100644 --- a/src/third_party/wiredtiger/test/suite/test_join04.py +++ b/src/third_party/wiredtiger/test/suite/test_join04.py @@ -28,7 +28,6 @@ import os import wiredtiger, wttest, run -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios # test_join04.py # Join operations diff --git a/src/third_party/wiredtiger/test/suite/test_join05.py b/src/third_party/wiredtiger/test/suite/test_join05.py index ef2be4c6460..7dcb3e08911 100644 --- a/src/third_party/wiredtiger/test/suite/test_join05.py +++ b/src/third_party/wiredtiger/test/suite/test_join05.py @@ -27,7 +27,6 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios # test_join05.py # Tests based on JIRA reports diff --git a/src/third_party/wiredtiger/test/suite/test_join06.py b/src/third_party/wiredtiger/test/suite/test_join06.py index 9af6f93792f..5fedd365712 100644 --- a/src/third_party/wiredtiger/test/suite/test_join06.py +++ b/src/third_party/wiredtiger/test/suite/test_join06.py @@ -28,7 +28,7 @@ import os import wiredtiger, wttest, run -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_join06.py # Join operations @@ -46,7 +46,7 @@ class test_join06(wttest.WiredTigerTestCase): ('nobloom', dict(bloom=False)) ] - scenarios = number_scenarios(multiply_scenarios('.', isoscen, bloomscen)) + scenarios = make_scenarios(isoscen, bloomscen) def gen_values(self, i): s = str(i) # 345 => "345" diff --git a/src/third_party/wiredtiger/test/suite/test_join07.py b/src/third_party/wiredtiger/test/suite/test_join07.py index 36e91361329..2a32e678d72 100644 --- a/src/third_party/wiredtiger/test/suite/test_join07.py +++ b/src/third_party/wiredtiger/test/suite/test_join07.py @@ -28,7 +28,7 @@ import os, re, run import wiredtiger, wttest, suite_random -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios +from wtscenario import make_scenarios class ParseException(Exception): def __init__(self, msg): @@ -198,7 +198,7 @@ class test_join07(wttest.WiredTigerTestCase): ('noextractor', dict(extractor=False)) ] - scenarios = number_scenarios(extractscen) + scenarios = make_scenarios(extractscen) # Return the wiredtiger_open extension argument for a shared library. def extensionArg(self, exts): diff --git a/src/third_party/wiredtiger/test/suite/test_join08.py b/src/third_party/wiredtiger/test/suite/test_join08.py index 6d674ab8193..d389fad706b 100644 --- a/src/third_party/wiredtiger/test/suite/test_join08.py +++ b/src/third_party/wiredtiger/test/suite/test_join08.py @@ -27,7 +27,6 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios # test_join08.py # Test join error paths diff --git a/src/third_party/wiredtiger/test/suite/test_jsondump01.py b/src/third_party/wiredtiger/test/suite/test_jsondump01.py index 10262edc777..dc8027c2115 100644 --- a/src/third_party/wiredtiger/test/suite/test_jsondump01.py +++ b/src/third_party/wiredtiger/test/suite/test_jsondump01.py @@ -29,10 +29,12 @@ import os, json import wiredtiger, wttest from helper import \ - complex_populate, complex_populate_check_cursor,\ - simple_populate, simple_populate_check_cursor + complex_populate, complex_populate_check, complex_populate_check_cursor,\ + simple_populate, simple_populate_check, simple_populate_check_cursor, \ + simple_index_populate, simple_index_populate_check, \ + simple_index_populate_check_cursor, compare_files from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # A 'fake' cursor based on a set of rows. # It emulates a WT cursor well enough for the *_check_cursor methods. @@ -79,25 +81,34 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess): types = [ ('file', dict(uri='file:', config='', lsm=False, populate=simple_populate, - populate_check=simple_populate_check_cursor)), + populate_check=simple_populate_check, + populate_check_cursor=simple_populate_check_cursor)), ('lsm', dict(uri='lsm:', config='', lsm=True, populate=simple_populate, - populate_check=simple_populate_check_cursor)), + populate_check=simple_populate_check, + populate_check_cursor=simple_populate_check_cursor)), ('table-simple', dict(uri='table:', config='', lsm=False, populate=simple_populate, - populate_check=simple_populate_check_cursor)), + populate_check=simple_populate_check, + populate_check_cursor=simple_populate_check_cursor)), + ('table-index', dict(uri='table:', config='', lsm=False, + populate=simple_index_populate, + populate_check=simple_index_populate_check, + populate_check_cursor=simple_index_populate_check_cursor)), ('table-simple-lsm', dict(uri='table:', config='type=lsm', lsm=True, populate=simple_populate, - populate_check=simple_populate_check_cursor)), + populate_check=simple_populate_check, + populate_check_cursor=simple_populate_check_cursor)), ('table-complex', dict(uri='table:', config='', lsm=False, populate=complex_populate, - populate_check=complex_populate_check_cursor)), + populate_check=complex_populate_check, + populate_check_cursor=complex_populate_check_cursor)), ('table-complex-lsm', dict(uri='table:', config='type=lsm', lsm=True, populate=complex_populate, - populate_check=complex_populate_check_cursor)) + populate_check=complex_populate_check, + populate_check_cursor=complex_populate_check_cursor)) ] - scenarios = number_scenarios( - multiply_scenarios('.', types, keyfmt)) + scenarios = make_scenarios(types, keyfmt) # Dump using util, re-load using python's JSON, and do a content comparison. def test_jsondump_util(self): @@ -132,7 +143,7 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess): cursor = self.session.open_cursor(uri, None) fake = FakeCursor(cursor.key_format, cursor.value_format, data) cursor.close() - self.populate_check(self, fake, self.nentries) + self.populate_check_cursor(self, fake, self.nentries) # Dump using util, re-load using python's JSON, and do a content comparison. def test_jsonload_util(self): @@ -153,9 +164,18 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess): loadcmd.append('-a') self.runWt(loadcmd) - # check the contents of the data we read. - cursor = self.session.open_cursor(uri2, None) - self.populate_check(self, cursor, self.nentries) + # Check the contents of the data we read. + self.populate_check(self, uri2, self.nentries) + + # Reload into the original uri, and dump into another file. + self.session.drop(uri, None) + self.session.drop(uri2, None) + self.runWt(['load', '-jf', 'jsondump.out']) + self.runWt(['dump', '-j', uri], outfilename='jsondump2.out') + + # Compare the two outputs, and check the content again. + compare_files(self, 'jsondump.out', 'jsondump2.out') + self.populate_check(self, uri, self.nentries) if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_lsm01.py b/src/third_party/wiredtiger/test/suite/test_lsm01.py index 1f89cf38d77..f6cee20e896 100644 --- a/src/third_party/wiredtiger/test/suite/test_lsm01.py +++ b/src/third_party/wiredtiger/test/suite/test_lsm01.py @@ -54,12 +54,10 @@ class test_lsm01(wttest.WiredTigerTestCase): config_vars = [ 'chunk_size', 'merge_max', 'bloom', 'bloom_bit_count', 'bloom_hash_count' ] - all_scenarios = wtscenario.multiply_scenarios('_', + scenarios = wtscenario.make_scenarios( chunk_size_scenarios, merge_max_scenarios, bloom_scenarios, - bloom_bit_scenarios, bloom_hash_scenarios, record_count_scenarios) - - scenarios = wtscenario.prune_scenarios(all_scenarios, 500) - scenarios = wtscenario.number_scenarios(scenarios) + bloom_bit_scenarios, bloom_hash_scenarios, record_count_scenarios, + prune=500) # Test drop of an object. def test_lsm(self): diff --git a/src/third_party/wiredtiger/test/suite/test_metadata_cursor01.py b/src/third_party/wiredtiger/test/suite/test_metadata_cursor01.py index e759c14f846..7802f89f174 100644 --- a/src/third_party/wiredtiger/test/suite/test_metadata_cursor01.py +++ b/src/third_party/wiredtiger/test/suite/test_metadata_cursor01.py @@ -27,7 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_metadata_cursor01.py # Metadata cursor operations @@ -39,7 +39,7 @@ class test_metadata_cursor01(wttest.WiredTigerTestCase): """ table_name1 = 'test_metadata_cursor01' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('plain', {'metauri' : 'metadata:'}), ('create', {'metauri' : 'metadata:create'}), ]) diff --git a/src/third_party/wiredtiger/test/suite/test_nsnap01.py b/src/third_party/wiredtiger/test/suite/test_nsnap01.py index 5207b577ba4..7e8951750f8 100644 --- a/src/third_party/wiredtiger/test/suite/test_nsnap01.py +++ b/src/third_party/wiredtiger/test/suite/test_nsnap01.py @@ -30,7 +30,6 @@ # Named snapshots: basic API from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios from helper import simple_populate import wiredtiger, wttest diff --git a/src/third_party/wiredtiger/test/suite/test_nsnap02.py b/src/third_party/wiredtiger/test/suite/test_nsnap02.py index e4ed65ef72a..510c9d421ef 100644 --- a/src/third_party/wiredtiger/test/suite/test_nsnap02.py +++ b/src/third_party/wiredtiger/test/suite/test_nsnap02.py @@ -30,7 +30,6 @@ # Named snapshots: Combinations of dropping snapshots from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios from helper import simple_populate import wiredtiger, wttest diff --git a/src/third_party/wiredtiger/test/suite/test_nsnap03.py b/src/third_party/wiredtiger/test/suite/test_nsnap03.py index 0e853522940..3986c0c1a0a 100644 --- a/src/third_party/wiredtiger/test/suite/test_nsnap03.py +++ b/src/third_party/wiredtiger/test/suite/test_nsnap03.py @@ -30,7 +30,6 @@ # Named snapshots: Access and create from multiple sessions from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios from helper import simple_populate import wiredtiger, wttest diff --git a/src/third_party/wiredtiger/test/suite/test_nsnap04.py b/src/third_party/wiredtiger/test/suite/test_nsnap04.py index e8a5c9b6140..f9ef26b5600 100644 --- a/src/third_party/wiredtiger/test/suite/test_nsnap04.py +++ b/src/third_party/wiredtiger/test/suite/test_nsnap04.py @@ -30,7 +30,6 @@ # Named snapshots: Create snapshot from running transaction from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios from helper import simple_populate import wiredtiger, wttest diff --git a/src/third_party/wiredtiger/test/suite/test_overwrite.py b/src/third_party/wiredtiger/test/suite/test_overwrite.py index e22cdab4dea..4972a016bec 100644 --- a/src/third_party/wiredtiger/test/suite/test_overwrite.py +++ b/src/third_party/wiredtiger/test/suite/test_overwrite.py @@ -28,13 +28,13 @@ import wiredtiger, wttest from helper import key_populate, simple_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_overwrite.py # cursor overwrite configuration method class test_overwrite(wttest.WiredTigerTestCase): name = 'overwrite' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file-r', dict(type='file:',keyfmt='r')), ('file-S', dict(type='file:',keyfmt='S')), ('lsm-S', dict(type='lsm:',keyfmt='S')), diff --git a/src/third_party/wiredtiger/test/suite/test_perf001.py b/src/third_party/wiredtiger/test/suite/test_perf001.py index 1280639c9dd..b22ed2baeb0 100644 --- a/src/third_party/wiredtiger/test/suite/test_perf001.py +++ b/src/third_party/wiredtiger/test/suite/test_perf001.py @@ -32,13 +32,13 @@ import wiredtiger, wttest import random from time import clock, time -from wtscenario import check_scenarios +from wtscenario import make_scenarios # Test performance of inserting into a table with an index. class test_perf001(wttest.WiredTigerTestCase): table_name = 'test_perf001' - scenarios = check_scenarios([ + scenarios = make_scenarios([ #('file-file', dict(tabletype='file',indextype='file')), ('file-lsm', dict(tabletype='file',indextype='lsm')), #('lsm-file', dict(tabletype='lsm',indextype='file')), diff --git a/src/third_party/wiredtiger/test/suite/test_readonly01.py b/src/third_party/wiredtiger/test/suite/test_readonly01.py index 59e9743ab7e..e4b431ca1da 100644 --- a/src/third_party/wiredtiger/test/suite/test_readonly01.py +++ b/src/third_party/wiredtiger/test/suite/test_readonly01.py @@ -32,7 +32,7 @@ import fnmatch, os, shutil, time from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios +from wtscenario import make_scenarios import wttest class test_readonly01(wttest.WiredTigerTestCase, suite_subprocess): @@ -73,8 +73,7 @@ class test_readonly01(wttest.WiredTigerTestCase, suite_subprocess): create_params = 'key_format=r,value_format=8t')), ] - scenarios = multiply_scenarios('.', - basecfg_list, dir_list, log_list, types) + scenarios = make_scenarios(basecfg_list, dir_list, log_list, types) def conn_config(self, dir): self.home = dir diff --git a/src/third_party/wiredtiger/test/suite/test_rebalance.py b/src/third_party/wiredtiger/test/suite/test_rebalance.py index f2167e864c9..98bd81de602 100644 --- a/src/third_party/wiredtiger/test/suite/test_rebalance.py +++ b/src/third_party/wiredtiger/test/suite/test_rebalance.py @@ -29,7 +29,7 @@ import os, time import wiredtiger, wttest from helper import complex_populate, simple_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_rebalance.py # session level rebalance operation @@ -41,7 +41,7 @@ class test_rebalance(wttest.WiredTigerTestCase): config = 'key_format=S,allocation_size=512,internal_page_max=512' + \ ',leaf_page_max=1k,lsm=(chunk_size=512k,merge_min=10)' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:')), ('table', dict(uri='table:')), ('lsm', dict(uri='lsm:')) diff --git a/src/third_party/wiredtiger/test/suite/test_reconfig01.py b/src/third_party/wiredtiger/test/suite/test_reconfig01.py index 876de1fe5af..fb3fb7edac6 100644 --- a/src/third_party/wiredtiger/test/suite/test_reconfig01.py +++ b/src/third_party/wiredtiger/test/suite/test_reconfig01.py @@ -92,22 +92,25 @@ class test_reconfig01(wttest.WiredTigerTestCase): self.conn.reconfigure("checkpoint=(wait=5)") self.conn.reconfigure("checkpoint=(log_size=0)") self.conn.reconfigure("checkpoint=(log_size=1M)") - self.conn.reconfigure("checkpoint=(wait=0,name=hi)") - self.conn.reconfigure("checkpoint=(wait=5,name=hi)") - def test_reconfig_stat_log(self): + # Statistics logging: reconfigure the things we can reconfigure. + def test_reconfig_statistics_log_ok(self): self.conn.reconfigure("statistics=[all],statistics_log=(wait=0)") self.conn.reconfigure("statistics_log=(wait=0)") - self.conn.reconfigure("statistics_log=(wait=2)") + self.conn.reconfigure("statistics_log=(wait=2,json=true)") + self.conn.reconfigure("statistics_log=(wait=0)") + self.conn.reconfigure("statistics_log=(wait=2,on_close=true)") self.conn.reconfigure("statistics_log=(wait=0)") self.conn.reconfigure("statistics_log=(wait=2,sources=[lsm:])") self.conn.reconfigure("statistics_log=(wait=0)") self.conn.reconfigure("statistics_log=(wait=2,timestamp=\"t%b %d\")") self.conn.reconfigure("statistics_log=(wait=0)") - self.conn.reconfigure("statistics_log=(wait=2,path=\"wts.%d.%H\")") - self.conn.reconfigure("statistics_log=(wait=0)") - self.conn.reconfigure( - "statistics_log=(wait=2,sources=[lsm:],timestamp=\"%b\")") + + # Statistics logging: reconfigure the things we can't reconfigure. + def test_reconfig_statistics_log_fail(self): + msg = '/unknown configuration key/' + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.conn.reconfigure("log=(path=foo)"), msg) def test_file_manager(self): self.conn.reconfigure("file_manager=(close_scan_interval=3)") diff --git a/src/third_party/wiredtiger/test/suite/test_reconfig02.py b/src/third_party/wiredtiger/test/suite/test_reconfig02.py index 85a9ceb2a34..9d9ac220aa7 100644 --- a/src/third_party/wiredtiger/test/suite/test_reconfig02.py +++ b/src/third_party/wiredtiger/test/suite/test_reconfig02.py @@ -41,24 +41,29 @@ class test_reconfig02(wttest.WiredTigerTestCase): self.conn_config = self.init_config return wttest.WiredTigerTestCase.setUpConnectionOpen(self, dir) - # Call reconfigure for zero filling a file. There is nothing - # we can actually look for to confirm it did anything. - # Also changing the log file size is a no-op, but should not fail. + # Logging: reconfigure the things we can reconfigure. def test_reconfig02_simple(self): + self.conn.reconfigure("log=(archive=false)") + self.conn.reconfigure("log=(prealloc=false)") + self.conn.reconfigure("log=(zero_fill=false)") + + self.conn.reconfigure("log=(archive=true)") + self.conn.reconfigure("log=(prealloc=true)") self.conn.reconfigure("log=(zero_fill=true)") - self.conn.reconfigure("log=(file_max=1MB)") - # Test that we get an error if we try to turn logging off. + # Logging: reconfigure the things we can't reconfigure. def test_reconfig02_disable(self): - msg = 'Invalid argument' - gotException = False - try: - self.conn.reconfigure("log=(enabled=false)") - except wiredtiger.WiredTigerError as e: - gotException = True - self.pr('got exception: ' + str(e)) - self.assertTrue(str(e).find(msg) >= 0) - self.assertTrue(gotException) + msg = '/unknown configuration key/' + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.conn.reconfigure("log=(enabled=true)"), msg) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.conn.reconfigure("log=(compressor=foo)"), msg) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.conn.reconfigure("log=(file_max=1MB)"), msg) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.conn.reconfigure("log=(path=foo)"), msg) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.conn.reconfigure("log=(recovery=true)"), msg) # Logging starts on, but prealloc is off. Verify it is off. # Reconfigure it on and run again, making sure that log files diff --git a/src/third_party/wiredtiger/test/suite/test_rename.py b/src/third_party/wiredtiger/test/suite/test_rename.py index af968a4a38d..1979bbb802a 100644 --- a/src/third_party/wiredtiger/test/suite/test_rename.py +++ b/src/third_party/wiredtiger/test/suite/test_rename.py @@ -31,7 +31,7 @@ import wiredtiger, wttest from helper import confirm_does_not_exist,\ complex_populate, complex_populate_check,\ simple_populate, simple_populate_check -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_rename.py # session level rename operation @@ -39,7 +39,7 @@ class test_rename(wttest.WiredTigerTestCase): name1 = 'test_rename1' name2 = 'test_rename2' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:')), ('table', dict(uri='table:')) ]) diff --git a/src/third_party/wiredtiger/test/suite/test_schema02.py b/src/third_party/wiredtiger/test/suite/test_schema02.py index b404261c066..bccc7dfc728 100644 --- a/src/third_party/wiredtiger/test/suite/test_schema02.py +++ b/src/third_party/wiredtiger/test/suite/test_schema02.py @@ -27,7 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_schema02.py # Columns, column groups, indexes @@ -37,7 +37,7 @@ class test_schema02(wttest.WiredTigerTestCase): """ nentries = 1000 - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('normal', { 'idx_config' : '' }), ('lsm', { 'idx_config' : ',type=lsm' }), ]) diff --git a/src/third_party/wiredtiger/test/suite/test_schema03.py b/src/third_party/wiredtiger/test/suite/test_schema03.py index f48bfdf3cf8..81556393e78 100644 --- a/src/third_party/wiredtiger/test/suite/test_schema03.py +++ b/src/third_party/wiredtiger/test/suite/test_schema03.py @@ -29,7 +29,7 @@ import os import suite_random import wiredtiger, wtscenario, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios try: # Windows does not getrlimit/setrlimit so we must catch the resource @@ -249,7 +249,7 @@ class test_schema03(wttest.WiredTigerTestCase): # but boost it up to this limit anyway. OPEN_FILE_LIMIT = 1000 - restart_scenarios = check_scenarios([('table', dict(s_restart=['table'],P=0.3)), + restart_scenarios = [('table', dict(s_restart=['table'],P=0.3)), ('colgroup0', dict(s_restart=['colgroup0'],P=0.3)), ('index0', dict(s_restart=['index0'],P=0.3)), ('colgroup1', dict(s_restart=['colgroup1'],P=0.3)), @@ -259,7 +259,7 @@ class test_schema03(wttest.WiredTigerTestCase): ('populate1', dict(s_restart=['populate1'],P=0.3)), ('ipop', dict(s_restart=['index0','populate0'],P=0.3)), ('all', dict(s_restart=['table','colgroup0','index0','colgroup1','index1','populate0','index2','populate1'],P=1.0)), - ]) + ] ntable_scenarios = wtscenario.quick_scenarios('s_ntable', [1,2,5,8], [1.0,0.4,0.5,0.5]) @@ -272,11 +272,10 @@ class test_schema03(wttest.WiredTigerTestCase): table_args_scenarios = wtscenario.quick_scenarios('s_extra_table_args', ['', ',type=file', ',type=lsm'], [0.5, 0.3, 0.2]) - all_scenarios = wtscenario.multiply_scenarios('_', restart_scenarios, ntable_scenarios, ncolgroup_scenarios, nindex_scenarios, idx_args_scenarios, table_args_scenarios) - - # Prune the scenarios according to the probabilities given above. - scenarios = wtscenario.prune_scenarios(all_scenarios, 30) - scenarios = wtscenario.number_scenarios(scenarios) + scenarios = wtscenario.make_scenarios( + restart_scenarios, ntable_scenarios, ncolgroup_scenarios, + nindex_scenarios, idx_args_scenarios, table_args_scenarios, + prune=30) # Note: the set can be reduced here for debugging, e.g. # scenarios = scenarios[40:44] diff --git a/src/third_party/wiredtiger/test/suite/test_schema04.py b/src/third_party/wiredtiger/test/suite/test_schema04.py index cd41138deb0..8ac81690819 100644 --- a/src/third_party/wiredtiger/test/suite/test_schema04.py +++ b/src/third_party/wiredtiger/test/suite/test_schema04.py @@ -28,7 +28,7 @@ import os import wiredtiger, wttest, run -from wtscenario import check_scenarios, number_scenarios +from wtscenario import make_scenarios # test_schema04.py # Test indices with duplicates @@ -47,7 +47,7 @@ class test_schema04(wttest.WiredTigerTestCase): """ nentries = 100 - scenarios = number_scenarios([ + scenarios = make_scenarios([ ('index-before', { 'create_index' : 0 }), ('index-during', { 'create_index' : 1 }), ('index-after', { 'create_index' : 2 }), diff --git a/src/third_party/wiredtiger/test/suite/test_schema05.py b/src/third_party/wiredtiger/test/suite/test_schema05.py index 89722d5f89a..89484cfc7bd 100644 --- a/src/third_party/wiredtiger/test/suite/test_schema05.py +++ b/src/third_party/wiredtiger/test/suite/test_schema05.py @@ -28,7 +28,7 @@ import os import wiredtiger, wttest, run -from wtscenario import check_scenarios, number_scenarios +from wtscenario import make_scenarios # test_schema05.py # Test indices using a custom extractor. @@ -51,7 +51,7 @@ class test_schema05(wttest.WiredTigerTestCase): nentries = 1000 nindices = 6 - scenarios = number_scenarios([ + scenarios = make_scenarios([ ('index-before', { 'create_index' : 0 }), ('index-during', { 'create_index' : 1 }), ('index-after', { 'create_index' : 2 }), diff --git a/src/third_party/wiredtiger/test/suite/test_schema06.py b/src/third_party/wiredtiger/test/suite/test_schema06.py index e72959edf2a..e0eec189137 100644 --- a/src/third_party/wiredtiger/test/suite/test_schema06.py +++ b/src/third_party/wiredtiger/test/suite/test_schema06.py @@ -27,6 +27,7 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest +from wtscenario import make_scenarios # test_schema06.py # Repeatedly create and drop indices @@ -36,10 +37,10 @@ class test_schema06(wttest.WiredTigerTestCase): """ nentries = 1000 - scenarios = [ + scenarios = make_scenarios([ ('normal', { 'idx_config' : '' }), ('lsm', { 'idx_config' : ',type=lsm' }), - ] + ]) def flip(self, inum, val): """ diff --git a/src/third_party/wiredtiger/test/suite/test_split.py b/src/third_party/wiredtiger/test/suite/test_split.py index d09613e1c52..28bf6bc59b0 100644 --- a/src/third_party/wiredtiger/test/suite/test_split.py +++ b/src/third_party/wiredtiger/test/suite/test_split.py @@ -35,7 +35,6 @@ from wiredtiger import stat from helper import confirm_empty,\ key_populate, value_populate, simple_populate,\ complex_populate, complex_value_populate -from wtscenario import multiply_scenarios, number_scenarios # Test splits class test_split(wttest.WiredTigerTestCase): diff --git a/src/third_party/wiredtiger/test/suite/test_stat01.py b/src/third_party/wiredtiger/test/suite/test_stat01.py index 5c3259696eb..1ad51ee9882 100644 --- a/src/third_party/wiredtiger/test/suite/test_stat01.py +++ b/src/third_party/wiredtiger/test/suite/test_stat01.py @@ -29,7 +29,7 @@ import helper, wiredtiger, wttest from wiredtiger import stat from helper import key_populate, simple_populate -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_stat01.py # Statistics operations @@ -49,7 +49,7 @@ class test_stat01(wttest.WiredTigerTestCase): ('recno', dict(keyfmt='r')), ('string', dict(keyfmt='S')), ] - scenarios = number_scenarios(multiply_scenarios('.', types, keyfmt)) + scenarios = make_scenarios(types, keyfmt) conn_config = 'statistics=(all)' diff --git a/src/third_party/wiredtiger/test/suite/test_stat02.py b/src/third_party/wiredtiger/test/suite/test_stat02.py index 88371947b5b..ef3907e54b1 100644 --- a/src/third_party/wiredtiger/test/suite/test_stat02.py +++ b/src/third_party/wiredtiger/test/suite/test_stat02.py @@ -28,7 +28,7 @@ import itertools, wiredtiger, wttest from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios from wiredtiger import stat from helper import complex_populate, complex_populate_lsm, simple_populate @@ -57,8 +57,7 @@ class test_stat_cursor_config(wttest.WiredTigerTestCase): ('size', dict(cursor_config='size')) ] - scenarios = number_scenarios( - multiply_scenarios('.', uri, data_config, cursor_config)) + scenarios = make_scenarios(uri, data_config, cursor_config) # Turn on statistics for this test. def conn_config(self, dir): @@ -106,13 +105,13 @@ class test_stat_cursor_dsrc_clear(wttest.WiredTigerTestCase): pfx = 'test_stat_cursor_dsrc_clear' uri = [ - ('1', dict(uri='file:' + pfx, pop=simple_populate)), - ('2', dict(uri='table:' + pfx, pop=simple_populate)), - ('3', dict(uri='table:' + pfx, pop=complex_populate)), - ('4', dict(uri='table:' + pfx, pop=complex_populate_lsm)) + ('dsrc_clear_1', dict(uri='file:' + pfx, pop=simple_populate)), + ('dsrc_clear_2', dict(uri='table:' + pfx, pop=simple_populate)), + ('dsrc_clear_3', dict(uri='table:' + pfx, pop=complex_populate)), + ('dsrc_clear_4', dict(uri='table:' + pfx, pop=complex_populate_lsm)) ] - scenarios = number_scenarios(multiply_scenarios('.', uri)) + scenarios = make_scenarios(uri) conn_config = 'statistics=(all)' def test_stat_cursor_dsrc_clear(self): @@ -136,13 +135,13 @@ class test_stat_cursor_fast(wttest.WiredTigerTestCase): pfx = 'test_stat_cursor_fast' uri = [ - ('1', dict(uri='file:' + pfx, pop=simple_populate)), - ('2', dict(uri='table:' + pfx, pop=simple_populate)), - ('3', dict(uri='table:' + pfx, pop=complex_populate)), - ('4', dict(uri='table:' + pfx, pop=complex_populate_lsm)) + ('fast_1', dict(uri='file:' + pfx, pop=simple_populate)), + ('fast_2', dict(uri='table:' + pfx, pop=simple_populate)), + ('fast_3', dict(uri='table:' + pfx, pop=complex_populate)), + ('fast_4', dict(uri='table:' + pfx, pop=complex_populate_lsm)) ] - scenarios = number_scenarios(multiply_scenarios('.', uri)) + scenarios = make_scenarios(uri) conn_config = 'statistics=(all)' def test_stat_cursor_fast(self): @@ -180,13 +179,13 @@ class test_stat_cursor_dsrc_error(wttest.WiredTigerTestCase): pfx = 'test_stat_cursor_dsrc_error' uri = [ - ('1', dict(uri='file:' + pfx, pop=simple_populate)), - ('2', dict(uri='table:' + pfx, pop=simple_populate)), - ('3', dict(uri='table:' + pfx, pop=complex_populate)), - ('4', dict(uri='table:' + pfx, pop=complex_populate_lsm)) + ('dsrc_error_1', dict(uri='file:' + pfx, pop=simple_populate)), + ('dsrc_error_2', dict(uri='table:' + pfx, pop=simple_populate)), + ('dsrc_error_3', dict(uri='table:' + pfx, pop=complex_populate)), + ('dsrc_error_4', dict(uri='table:' + pfx, pop=complex_populate_lsm)) ] - scenarios = number_scenarios(multiply_scenarios('.', uri)) + scenarios = make_scenarios(uri) conn_config = 'statistics=(all)' def test_stat_cursor_dsrc_error(self): diff --git a/src/third_party/wiredtiger/test/suite/test_stat03.py b/src/third_party/wiredtiger/test/suite/test_stat03.py index 039ad1f7f8d..b17fe6eb91c 100644 --- a/src/third_party/wiredtiger/test/suite/test_stat03.py +++ b/src/third_party/wiredtiger/test/suite/test_stat03.py @@ -34,7 +34,7 @@ from helper import complex_populate, complex_populate_lsm, simple_populate from helper import key_populate, complex_value_populate, value_populate from helper import complex_populate_colgroup_count, complex_populate_index_count from helper import complex_populate_colgroup_name, complex_populate_index_name -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_stat03.py # Statistics reset test. @@ -51,7 +51,7 @@ class test_stat_cursor_reset(wttest.WiredTigerTestCase): dict(uri='table:' + pfx, pop=complex_populate_lsm)), ] - scenarios = number_scenarios(multiply_scenarios('.', uri)) + scenarios = make_scenarios(uri) conn_config = 'statistics=(all)' def stat_cursor(self, uri): diff --git a/src/third_party/wiredtiger/test/suite/test_stat04.py b/src/third_party/wiredtiger/test/suite/test_stat04.py index e7c39371f80..b5309efff37 100644 --- a/src/third_party/wiredtiger/test/suite/test_stat04.py +++ b/src/third_party/wiredtiger/test/suite/test_stat04.py @@ -28,7 +28,7 @@ import os, struct from suite_subprocess import suite_subprocess -from wtscenario import number_scenarios, multiply_scenarios +from wtscenario import make_scenarios import wiredtiger, wttest from wiredtiger import stat @@ -49,7 +49,7 @@ class test_stat04(wttest.WiredTigerTestCase, suite_subprocess): ('large', dict(nentries=100000, valuesize=1)), ('jumboval', dict(nentries=100, valuesize=4200000)), ] - scenarios = number_scenarios(multiply_scenarios('.', keyfmt, nentries)) + scenarios = make_scenarios(keyfmt, nentries) conn_config = 'statistics=(all)' def init_test(self): @@ -91,6 +91,7 @@ class test_stat04(wttest.WiredTigerTestCase, suite_subprocess): self.checkcount(uri, count) cursor[self.genkey(i)] = self.genvalue(i) count += 1 + # Remove a number of entries, at each step checking that stats match. for i in range(0, self.nentries / 37): cursor.set_key(self.genkey(i*11 % self.nentries)) @@ -99,5 +100,10 @@ class test_stat04(wttest.WiredTigerTestCase, suite_subprocess): self.checkcount(uri, count) cursor.close() + # Confirm the count is correct after writing to the backing file, + # that tests the on-disk format as well as the in-memory format. + self.reopen_conn() + self.checkcount(uri, count) + if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_stat05.py b/src/third_party/wiredtiger/test/suite/test_stat05.py index 9bcedd65089..62562f78ed6 100644 --- a/src/third_party/wiredtiger/test/suite/test_stat05.py +++ b/src/third_party/wiredtiger/test/suite/test_stat05.py @@ -28,7 +28,7 @@ import itertools, wiredtiger, wttest from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios from wiredtiger import stat from helper import complex_populate, complex_populate_lsm, simple_populate from helper import complex_value_populate, key_populate, value_populate @@ -43,16 +43,18 @@ class test_stat_cursor_config(wttest.WiredTigerTestCase): ('file', dict(uri='file:' + pfx, pop=simple_populate, cfg='')), ('table', dict(uri='table:' + pfx, pop=simple_populate, cfg='')), ('inmem', dict(uri='table:' + pfx, pop=simple_populate, cfg='', - conn_config='in_memory,statistics=(fast)')), + conn_config = 'in_memory,statistics=(fast)')), ('table-lsm', dict(uri='table:' + pfx, pop=simple_populate, - cfg=',type=lsm,lsm=(chunk_size=1MB,merge_min=2)')), + cfg=',type=lsm,lsm=(chunk_size=1MB,merge_min=2)', + conn_config = 'statistics=(fast),eviction_dirty_target=99,eviction_dirty_trigger=99')), ('complex', dict(uri='table:' + pfx, pop=complex_populate, cfg='')), ('complex-lsm', dict(uri='table:' + pfx, pop=complex_populate_lsm, - cfg=',lsm=(chunk_size=1MB,merge_min=2)')), + cfg=',lsm=(chunk_size=1MB,merge_min=2)', + conn_config = 'statistics=(fast),eviction_dirty_target=99,eviction_dirty_trigger=99')), ] - scenarios = number_scenarios(uri) + scenarios = make_scenarios(uri) def openAndWalkStatCursor(self): c = self.session.open_cursor( @@ -62,7 +64,6 @@ class test_stat_cursor_config(wttest.WiredTigerTestCase): count += 1 c.close() - # Open a size-only statistics cursor on various table types. Ensure that # the cursor open succeeds. Insert enough data that LSM tables to need to # switch and merge. diff --git a/src/third_party/wiredtiger/test/suite/test_stat_log01.py b/src/third_party/wiredtiger/test/suite/test_stat_log01.py index f6033d940c5..65ce80dfe7d 100644 --- a/src/third_party/wiredtiger/test/suite/test_stat_log01.py +++ b/src/third_party/wiredtiger/test/suite/test_stat_log01.py @@ -51,9 +51,10 @@ class test_stat_log01(wttest.WiredTigerTestCase): None, "create,statistics=(fast),statistics_log=(wait=1)") # Wait for the default interval, to ensure stats have been written. time.sleep(2) - self.check_stats_file("WiredTigerStat") + self.check_stats_file(".") def test_stats_log_name(self): + os.mkdir("foo") self.conn = self.wiredtiger_open( None, "create,statistics=(fast),statistics_log=(wait=1,path=foo)") # Wait for the default interval, to ensure stats have been written. @@ -66,21 +67,18 @@ class test_stat_log01(wttest.WiredTigerTestCase): # Wait for the default interval, to ensure stats have been written. time.sleep(2) self.close_conn() - self.check_stats_file("WiredTigerStat") + self.check_stats_file(".") def test_stats_log_on_close(self): self.conn = self.wiredtiger_open(None, "create,statistics=(fast),statistics_log=(on_close=true)") # Close the connection to ensure the statistics get generated. self.close_conn() - self.check_stats_file("WiredTigerStat") + self.check_stats_file(".") - def check_stats_file(self, filename): - if filename == "WiredTigerStat": - files = glob.glob(filename + '.[0-9]*') - self.assertTrue(files) - else: - self.assertTrue(os.path.isfile(filename)) + def check_stats_file(self, dir): + files = glob.glob(dir + '/' + 'WiredTigerStat.[0-9]*') + self.assertTrue(files) if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_sweep01.py b/src/third_party/wiredtiger/test/suite/test_sweep01.py index bccd2bce012..71f8fcb180e 100644 --- a/src/third_party/wiredtiger/test/suite/test_sweep01.py +++ b/src/third_party/wiredtiger/test/suite/test_sweep01.py @@ -33,8 +33,8 @@ import fnmatch, os, shutil, run, time from suite_subprocess import suite_subprocess +from wtscenario import make_scenarios from wiredtiger import stat -from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios import wttest class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess): @@ -55,7 +55,7 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess): create_params = 'key_format=r,value_format=8t')), ] - scenarios = types + scenarios = make_scenarios(types) def test_ops(self): # diff --git a/src/third_party/wiredtiger/test/suite/test_sweep03.py b/src/third_party/wiredtiger/test/suite/test_sweep03.py index 061c2f5b37b..61078fa96b5 100644 --- a/src/third_party/wiredtiger/test/suite/test_sweep03.py +++ b/src/third_party/wiredtiger/test/suite/test_sweep03.py @@ -33,7 +33,7 @@ import fnmatch, os, shutil, run, time from suite_subprocess import suite_subprocess from wiredtiger import stat -from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios +from wtscenario import make_scenarios import wttest class test_sweep03(wttest.WiredTigerTestCase, suite_subprocess): @@ -54,7 +54,7 @@ class test_sweep03(wttest.WiredTigerTestCase, suite_subprocess): create_params = 'key_format=r,value_format=8t')), ] - scenarios = types + scenarios = make_scenarios(types) def test_disable_idle_timeout1(self): # diff --git a/src/third_party/wiredtiger/test/suite/test_truncate01.py b/src/third_party/wiredtiger/test/suite/test_truncate01.py index 77a476e40c1..9a3518c6984 100644 --- a/src/third_party/wiredtiger/test/suite/test_truncate01.py +++ b/src/third_party/wiredtiger/test/suite/test_truncate01.py @@ -34,13 +34,13 @@ import wiredtiger, wttest from helper import confirm_empty,\ key_populate, value_populate, simple_populate,\ complex_populate, complex_value_populate -from wtscenario import check_scenarios, multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # Test truncation arguments. class test_truncate_arguments(wttest.WiredTigerTestCase): name = 'test_truncate' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(type='file:')), ('table', dict(type='table:')) ]) @@ -80,7 +80,7 @@ class test_truncate_arguments(wttest.WiredTigerTestCase): # Test truncation of an object using its URI. class test_truncate_uri(wttest.WiredTigerTestCase): name = 'test_truncate' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(type='file:')), ('table', dict(type='table:')) ]) @@ -115,7 +115,7 @@ class test_truncate_cursor_order(wttest.WiredTigerTestCase): ('recno', dict(keyfmt='r')), ('string', dict(keyfmt='S')), ] - scenarios = number_scenarios(multiply_scenarios('.', types, keyfmt)) + scenarios = make_scenarios(types, keyfmt) # Test an illegal order, then confirm that equal cursors works. def test_truncate_cursor_order(self): @@ -146,7 +146,7 @@ class test_truncate_cursor_end(wttest.WiredTigerTestCase): ('recno', dict(keyfmt='r')), ('string', dict(keyfmt='S')), ] - scenarios = number_scenarios(multiply_scenarios('.', types, keyfmt)) + scenarios = make_scenarios(types, keyfmt) # Test truncation of cursors past the end of the object. def test_truncate_cursor_order(self): @@ -205,8 +205,7 @@ class test_truncate_cursor(wttest.WiredTigerTestCase): ('big', dict(nentries=1000,skip=37)), ] - scenarios = number_scenarios( - multiply_scenarios('.', types, keyfmt, size, reopen)) + scenarios = make_scenarios(types, keyfmt, size, reopen) # Set a cursor key. def cursorKey(self, uri, key): diff --git a/src/third_party/wiredtiger/test/suite/test_truncate02.py b/src/third_party/wiredtiger/test/suite/test_truncate02.py index 6c11302787c..e57a65d2f97 100644 --- a/src/third_party/wiredtiger/test/suite/test_truncate02.py +++ b/src/third_party/wiredtiger/test/suite/test_truncate02.py @@ -32,7 +32,7 @@ import wiredtiger, wttest from helper import key_populate, value_populate, simple_populate -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_truncate_fast_delete # When deleting leaf pages that aren't in memory, we set transactional @@ -86,8 +86,7 @@ class test_truncate_fast_delete(wttest.WiredTigerTestCase): ('txn2', dict(commit=False)), ] - scenarios = number_scenarios( - multiply_scenarios('.', types, keyfmt, overflow, reads, writes, txn)) + scenarios = make_scenarios(types, keyfmt, overflow, reads, writes, txn) # Return the number of records visible to the cursor; test both forward # and backward iteration, they are different code paths in this case. diff --git a/src/third_party/wiredtiger/test/suite/test_txn01.py b/src/third_party/wiredtiger/test/suite/test_txn01.py index eb6963791fd..1ba74461088 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn01.py +++ b/src/third_party/wiredtiger/test/suite/test_txn01.py @@ -27,13 +27,13 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_txn01.py # Transactions: basic functionality class test_txn01(wttest.WiredTigerTestCase): nentries = 1000 - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('col-f', dict(uri='file:text_txn01',key_format='r',value_format='S')), ('col-t', dict(uri='table:text_txn01',key_format='r',value_format='S')), ('fix-f', dict(uri='file:text_txn01',key_format='r',value_format='8t')), diff --git a/src/third_party/wiredtiger/test/suite/test_txn02.py b/src/third_party/wiredtiger/test/suite/test_txn02.py index fccf123d3bc..a0c2c12a47c 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn02.py +++ b/src/third_party/wiredtiger/test/suite/test_txn02.py @@ -32,7 +32,7 @@ import fnmatch, os, shutil, time from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios +from wtscenario import make_scenarios import wttest class test_txn02(wttest.WiredTigerTestCase, suite_subprocess): @@ -81,22 +81,18 @@ class test_txn02(wttest.WiredTigerTestCase, suite_subprocess): txn3s = [('t3c', dict(txn3='commit')), ('t3r', dict(txn3='rollback'))] txn4s = [('t4c', dict(txn4='commit')), ('t4r', dict(txn4='rollback'))] - all_scenarios = multiply_scenarios('.', types, - op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s) - # This test generates thousands of potential scenarios. # For default runs, we'll use a small subset of them, for # long runs (when --long is set) we'll set a much larger limit. - scenarios = number_scenarios(prune_scenarios(all_scenarios, 20, 5000)) + scenarios = make_scenarios(types, + op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s, + prune=20, prunelong=5000) # Each check_log() call takes a second, so we don't call it for # every scenario, we'll limit it to the value of checklog_calls. checklog_calls = 100 if wttest.islongtest() else 2 checklog_mod = (len(scenarios) / checklog_calls + 1) - # scenarios = number_scenarios(multiply_scenarios('.', types, - # op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s)) [:3] - # Overrides WiredTigerTestCase def setUpConnectionOpen(self, dir): self.home = dir # Cycle through the different transaction_sync values in a diff --git a/src/third_party/wiredtiger/test/suite/test_txn03.py b/src/third_party/wiredtiger/test/suite/test_txn03.py index 97180a75949..18a0e096767 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn03.py +++ b/src/third_party/wiredtiger/test/suite/test_txn03.py @@ -31,7 +31,7 @@ # import wiredtiger, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios class test_txn03(wttest.WiredTigerTestCase): tablename = 'test_txn03' @@ -42,7 +42,7 @@ class test_txn03(wttest.WiredTigerTestCase): data_str2 = "TEST_VAL1" nentries = 1000 - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('var', dict(create_params = "key_format=S,value_format=S")), ]) diff --git a/src/third_party/wiredtiger/test/suite/test_txn04.py b/src/third_party/wiredtiger/test/suite/test_txn04.py index 9d9d2db62c6..ade39272f84 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn04.py +++ b/src/third_party/wiredtiger/test/suite/test_txn04.py @@ -32,7 +32,7 @@ import shutil, os from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios import wttest class test_txn04(wttest.WiredTigerTestCase, suite_subprocess): @@ -62,7 +62,7 @@ class test_txn04(wttest.WiredTigerTestCase, suite_subprocess): ] txn1s = [('t1c', dict(txn1='commit')), ('t1r', dict(txn1='rollback'))] - scenarios = number_scenarios(multiply_scenarios('.', types, op1s, txn1s)) + scenarios = make_scenarios(types, op1s, txn1s) # Overrides WiredTigerTestCase def setUpConnectionOpen(self, dir): self.home = dir diff --git a/src/third_party/wiredtiger/test/suite/test_txn05.py b/src/third_party/wiredtiger/test/suite/test_txn05.py index bb68034ca04..9e84fe7d3fe 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn05.py +++ b/src/third_party/wiredtiger/test/suite/test_txn05.py @@ -32,7 +32,7 @@ import fnmatch, os, shutil, time from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios import wttest class test_txn05(wttest.WiredTigerTestCase, suite_subprocess): @@ -63,8 +63,7 @@ class test_txn05(wttest.WiredTigerTestCase, suite_subprocess): ] txn1s = [('t1c', dict(txn1='commit')), ('t1r', dict(txn1='rollback'))] - scenarios = number_scenarios(multiply_scenarios('.', types, op1s, txn1s)) - # scenarios = number_scenarios(multiply_scenarios('.', types, op1s, txn1s))[:3] + scenarios = make_scenarios(types, op1s, txn1s) # Overrides WiredTigerTestCase def setUpConnectionOpen(self, dir): self.home = dir diff --git a/src/third_party/wiredtiger/test/suite/test_txn06.py b/src/third_party/wiredtiger/test/suite/test_txn06.py index 9c1d0335d47..e4636e40e2e 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn06.py +++ b/src/third_party/wiredtiger/test/suite/test_txn06.py @@ -30,7 +30,6 @@ # Transactions: test long-running snapshots from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios from helper import simple_populate import wiredtiger, wttest diff --git a/src/third_party/wiredtiger/test/suite/test_txn07.py b/src/third_party/wiredtiger/test/suite/test_txn07.py index f74120e3590..8dd8238343d 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn07.py +++ b/src/third_party/wiredtiger/test/suite/test_txn07.py @@ -33,7 +33,7 @@ import fnmatch, os, shutil, run, time from suite_subprocess import suite_subprocess from wiredtiger import stat -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios import wttest class test_txn07(wttest.WiredTigerTestCase, suite_subprocess): @@ -70,8 +70,7 @@ class test_txn07(wttest.WiredTigerTestCase, suite_subprocess): ('none', dict(compress='')), ] - scenarios = number_scenarios(multiply_scenarios('.', types, op1s, txn1s, - compress)) + scenarios = make_scenarios(types, op1s, txn1s, compress) # Overrides WiredTigerTestCase def setUpConnectionOpen(self, dir): self.home = dir diff --git a/src/third_party/wiredtiger/test/suite/test_txn08.py b/src/third_party/wiredtiger/test/suite/test_txn08.py index 36253856285..f0cdf08df07 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn08.py +++ b/src/third_party/wiredtiger/test/suite/test_txn08.py @@ -33,7 +33,6 @@ import fnmatch, os, shutil, run, time from suite_subprocess import suite_subprocess from wiredtiger import stat -from wtscenario import multiply_scenarios, number_scenarios import wttest class test_txn08(wttest.WiredTigerTestCase, suite_subprocess): diff --git a/src/third_party/wiredtiger/test/suite/test_txn09.py b/src/third_party/wiredtiger/test/suite/test_txn09.py index f536d65205d..cfad8270ab1 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn09.py +++ b/src/third_party/wiredtiger/test/suite/test_txn09.py @@ -32,7 +32,7 @@ import fnmatch, os, shutil, time from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios +from wtscenario import make_scenarios import wttest class test_txn09(wttest.WiredTigerTestCase, suite_subprocess): @@ -73,13 +73,12 @@ class test_txn09(wttest.WiredTigerTestCase, suite_subprocess): txn3s = [('t3c', dict(txn3='commit')), ('t3r', dict(txn3='rollback'))] txn4s = [('t4c', dict(txn4='commit')), ('t4r', dict(txn4='rollback'))] - all_scenarios = multiply_scenarios('.', types, - op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s) - # This test generates thousands of potential scenarios. # For default runs, we'll use a small subset of them, for # long runs (when --long is set) we'll set a much larger limit. - scenarios = number_scenarios(prune_scenarios(all_scenarios, 20, 5000)) + scenarios = make_scenarios(types, + op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s, + prune=20, prunelong=5000) # Overrides WiredTigerTestCase def setUpConnectionOpen(self, dir): diff --git a/src/third_party/wiredtiger/test/suite/test_txn10.py b/src/third_party/wiredtiger/test/suite/test_txn10.py index cf9c11dd4ab..a4745e60066 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn10.py +++ b/src/third_party/wiredtiger/test/suite/test_txn10.py @@ -32,7 +32,6 @@ import fnmatch, os, shutil, time from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios import wttest class test_txn10(wttest.WiredTigerTestCase, suite_subprocess): diff --git a/src/third_party/wiredtiger/test/suite/test_txn12.py b/src/third_party/wiredtiger/test/suite/test_txn12.py index 8ae9df33990..32c058bea85 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn12.py +++ b/src/third_party/wiredtiger/test/suite/test_txn12.py @@ -29,7 +29,6 @@ import wiredtiger, wttest from suite_subprocess import suite_subprocess from wiredtiger import stat -from wtscenario import multiply_scenarios, number_scenarios # test_txn12.py # test of commit following failed op in a read only transaction. diff --git a/src/third_party/wiredtiger/test/suite/test_txn13.py b/src/third_party/wiredtiger/test/suite/test_txn13.py index dd6a6dbcd6d..ae0250c06e8 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn13.py +++ b/src/third_party/wiredtiger/test/suite/test_txn13.py @@ -33,7 +33,7 @@ #import fnmatch, os, shutil, run, time from suite_subprocess import suite_subprocess -from wtscenario import check_scenarios +from wtscenario import make_scenarios import wiredtiger, wttest class test_txn13(wttest.WiredTigerTestCase, suite_subprocess): @@ -43,7 +43,7 @@ class test_txn13(wttest.WiredTigerTestCase, suite_subprocess): nops = 1024 create_params = 'key_format=i,value_format=S' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('1gb', dict(expect_err=False, valuesize=1048576)), ('2gb', dict(expect_err=False, valuesize=2097152)), ('4gb', dict(expect_err=True, valuesize=4194304)) diff --git a/src/third_party/wiredtiger/test/suite/test_txn14.py b/src/third_party/wiredtiger/test/suite/test_txn14.py index 371f4402567..f9ccabaab8b 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn14.py +++ b/src/third_party/wiredtiger/test/suite/test_txn14.py @@ -32,7 +32,7 @@ import fnmatch, os, shutil, time from suite_subprocess import suite_subprocess -from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios +from wtscenario import make_scenarios import wttest class test_txn14(wttest.WiredTigerTestCase, suite_subprocess): @@ -47,7 +47,7 @@ class test_txn14(wttest.WiredTigerTestCase, suite_subprocess): ('sync', dict(sync='on')), ('bg', dict(sync='background')), ] - scenarios = multiply_scenarios('.', sync_list) + scenarios = make_scenarios(sync_list) def simulate_crash_restart(self, olddir, newdir): ''' Simulate a crash from olddir and restart in newdir. ''' diff --git a/src/third_party/wiredtiger/test/suite/test_txn15.py b/src/third_party/wiredtiger/test/suite/test_txn15.py index 809dce4ebfa..c061c093b02 100644 --- a/src/third_party/wiredtiger/test/suite/test_txn15.py +++ b/src/third_party/wiredtiger/test/suite/test_txn15.py @@ -33,7 +33,7 @@ import fnmatch, os, shutil, time from suite_subprocess import suite_subprocess from wiredtiger import stat -from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios +from wtscenario import make_scenarios import wttest class test_txn15(wttest.WiredTigerTestCase, suite_subprocess): @@ -71,7 +71,7 @@ class test_txn15(wttest.WiredTigerTestCase, suite_subprocess): ('c_none', dict(commit_sync=None)), ('c_off', dict(commit_sync='sync=off')), ] - scenarios = multiply_scenarios('.', conn_sync_enabled, conn_sync_method, + scenarios = make_scenarios(conn_sync_enabled, conn_sync_method, begin_sync, commit_sync) # Given the different configuration settings determine if this group diff --git a/src/third_party/wiredtiger/test/suite/test_upgrade.py b/src/third_party/wiredtiger/test/suite/test_upgrade.py index 357e437f14d..e4f92f8f8d8 100644 --- a/src/third_party/wiredtiger/test/suite/test_upgrade.py +++ b/src/third_party/wiredtiger/test/suite/test_upgrade.py @@ -29,14 +29,14 @@ import os, time import wiredtiger, wttest from helper import complex_populate, simple_populate -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_upgrade.py # session level upgrade operation class test_upgrade(wttest.WiredTigerTestCase): name = 'test_upgrade' - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('file', dict(uri='file:')), ('table', dict(uri='table:')) ]) diff --git a/src/third_party/wiredtiger/test/suite/test_util02.py b/src/third_party/wiredtiger/test/suite/test_util02.py index 475e856052a..421b0104484 100644 --- a/src/third_party/wiredtiger/test/suite/test_util02.py +++ b/src/third_party/wiredtiger/test/suite/test_util02.py @@ -29,7 +29,7 @@ import string, os import wiredtiger, wttest from suite_subprocess import suite_subprocess -from wtscenario import check_scenarios +from wtscenario import make_scenarios from helper import complex_populate # test_util02.py @@ -44,7 +44,7 @@ class test_util02(wttest.WiredTigerTestCase, suite_subprocess): nentries = 1000 stringclass = ''.__class__ - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('SS', dict(key_format='S',value_format='S')), ('rS', dict(key_format='r',value_format='S')), ('ri', dict(key_format='r',value_format='i')), diff --git a/src/third_party/wiredtiger/test/suite/test_util03.py b/src/third_party/wiredtiger/test/suite/test_util03.py index c3ea48b8f5e..e341c79ff9e 100644 --- a/src/third_party/wiredtiger/test/suite/test_util03.py +++ b/src/third_party/wiredtiger/test/suite/test_util03.py @@ -28,7 +28,7 @@ from suite_subprocess import suite_subprocess import wiredtiger, wttest -from wtscenario import check_scenarios +from wtscenario import make_scenarios # test_util03.py # Utilities: wt create @@ -36,7 +36,7 @@ class test_util03(wttest.WiredTigerTestCase, suite_subprocess): tablename = 'test_util03.a' nentries = 1000 - scenarios = check_scenarios([ + scenarios = make_scenarios([ ('none', dict(key_format=None,value_format=None)), ('SS', dict(key_format='S',value_format='S')), ('rS', dict(key_format='r',value_format='S')), diff --git a/src/third_party/wiredtiger/test/suite/test_util13.py b/src/third_party/wiredtiger/test/suite/test_util13.py index 222f42cd7f1..9804dc700ba 100644 --- a/src/third_party/wiredtiger/test/suite/test_util13.py +++ b/src/third_party/wiredtiger/test/suite/test_util13.py @@ -33,7 +33,7 @@ import itertools, wiredtiger, wttest from helper import complex_populate_cgconfig, complex_populate_cgconfig_lsm from helper import simple_populate from helper import complex_populate_check, simple_populate_check -from wtscenario import multiply_scenarios, number_scenarios +from wtscenario import make_scenarios # test_util13.py # Utilities: wt dump, as well as the dump cursor @@ -73,7 +73,7 @@ class test_util13(wttest.WiredTigerTestCase, suite_subprocess): cfg='merge_max=5')), ] - scenarios = number_scenarios(multiply_scenarios('.', types)) + scenarios = make_scenarios(types) def compare_config(self, expected_cfg, actual_cfg): # Replace '(' characters so configuration groups don't break parsing. diff --git a/src/third_party/wiredtiger/test/suite/wtscenario.py b/src/third_party/wiredtiger/test/suite/wtscenario.py index 7fad7c228fb..8576b3ac876 100644 --- a/src/third_party/wiredtiger/test/suite/wtscenario.py +++ b/src/third_party/wiredtiger/test/suite/wtscenario.py @@ -64,11 +64,37 @@ def log2chr(val): megabyte = 1024 * 1024 +def make_scenarios(*args, **kwargs): + """ + The standard way to create scenarios for WT tests. + Scenarios can be combined by listing them all as arguments. + A final prune= and/or prunelong= argument may be given that + forces the list of entries in the scenario to be pruned. + The result is a (combined) scenario that has been checked + for name duplicates and has been given names and numbers. + """ + scenes = multiply_scenarios('.', *args) + pruneval = None + prunelong = None + for key in kwargs: + if key == 'prune': + pruneval = kwargs[key] + elif key == 'prunelong': + prunelong = kwargs[key] + else: + raise AssertionError( + 'make_scenarios: unexpected named arg: ' + key) + if pruneval != None or prunelong != None: + pruneval = pruneval if pruneval != None else -1 + prunelong = prunelong if prunelong != None else -1 + scenes = prune_scenarios(scenes, pruneval, prunelong) + return number_scenarios(scenes) + def check_scenarios(scenes): """ - Make sure all scenarios have unique names + Make sure all scenarios have unique case insensitive names """ - assert len(scenes) == len(dict(scenes)) + assert len(scenes) == len(dict((k.lower(), v) for k, v in scenes)) return scenes def multiply_scenarios(sep, *args): @@ -81,8 +107,8 @@ def multiply_scenarios(sep, *args): result = scenes else: total = [] - for scena in scenes: - for scenb in result: + for scena in result: + for scenb in scenes: # Create a merged scenario with a concatenated name name = scena[0] + sep + scenb[0] tdict = {} @@ -235,7 +261,7 @@ class wtscenario: scen.lmax = lmax scen.cache_size = cache s.append((scen.shortName(), dict(session_create_scenario=scen))) - return s + return make_scenarios(s) def shortName(self): """ diff --git a/src/third_party/wiredtiger/test/suite/wttest.py b/src/third_party/wiredtiger/test/suite/wttest.py index 9e430fcdba7..788dd5d0307 100644 --- a/src/third_party/wiredtiger/test/suite/wttest.py +++ b/src/third_party/wiredtiger/test/suite/wttest.py @@ -212,8 +212,8 @@ class WiredTigerTestCase(unittest.TestCase): # help distinguish tests. scen = '' if hasattr(self, 'scenario_number') and hasattr(self, 'scenario_name'): - scen = '(scenario ' + str(self.scenario_number) + \ - ': ' + self.scenario_name + ')' + scen = ' -s ' + str(self.scenario_number) + \ + ' (' + self.scenario_name + ')' return self.simpleName() + scen def simpleName(self): @@ -293,6 +293,8 @@ class WiredTigerTestCase(unittest.TestCase): raise Exception(self.testdir + ": cannot remove directory") os.makedirs(self.testdir) os.chdir(self.testdir) + with open('testname.txt', 'w+') as namefile: + namefile.write(str(self) + '\n') self.fdSetUp() # tearDown needs a conn field, set it here in case the open fails. self.conn = None diff --git a/src/third_party/wiredtiger/test/thread/smoke.sh b/src/third_party/wiredtiger/test/thread/smoke.sh index 9a235b1d8e9..aa2f86c1def 100755 --- a/src/third_party/wiredtiger/test/thread/smoke.sh +++ b/src/third_party/wiredtiger/test/thread/smoke.sh @@ -4,10 +4,10 @@ set -e # Smoke-test format as part of running "make check". $TEST_WRAPPER ./t -t f -$TEST_WRAPPER ./t -S -F -t f +$TEST_WRAPPER ./t -S -F -n 1000 -t f $TEST_WRAPPER ./t -t r -$TEST_WRAPPER ./t -S -F -t r +$TEST_WRAPPER ./t -S -F -n 1000 -t r $TEST_WRAPPER ./t -t v -$TEST_WRAPPER ./t -S -F -t v +$TEST_WRAPPER ./t -S -F -n 1000 -t v diff --git a/src/third_party/wiredtiger/test/utility/misc.c b/src/third_party/wiredtiger/test/utility/misc.c index dfc655dec1a..dffd29a5b6a 100644 --- a/src/third_party/wiredtiger/test/utility/misc.c +++ b/src/third_party/wiredtiger/test/utility/misc.c @@ -192,3 +192,18 @@ dstrdup(const void *str) return (p); testutil_die(errno, "strdup"); } + +/* + * dstrndup -- + * Call emulating strndup, dying on failure. Don't use actual strndup here + * as it is not supported within MSVC. + */ +void * +dstrndup(const char *str, size_t len) +{ + char *p; + + p = dcalloc(len + 1, sizeof(char)); + memcpy(p, str, len); + return (p); +} diff --git a/src/third_party/wiredtiger/test/utility/test_util.h b/src/third_party/wiredtiger/test/utility/test_util.h index 66ff8de2d19..821e06084d2 100644 --- a/src/third_party/wiredtiger/test/utility/test_util.h +++ b/src/third_party/wiredtiger/test/utility/test_util.h @@ -115,6 +115,7 @@ void *dcalloc(size_t, size_t); void *dmalloc(size_t); void *drealloc(void *, size_t); void *dstrdup(const void *); +void *dstrndup(const char *, size_t); void testutil_clean_work_dir(char *); void testutil_cleanup(TEST_OPTS *); void testutil_make_work_dir(char *); diff --git a/src/third_party/wiredtiger/tools/wtstats/stat_data.py b/src/third_party/wiredtiger/tools/wtstats/stat_data.py index a79cf1faf5e..b93f2449c63 100644 --- a/src/third_party/wiredtiger/tools/wtstats/stat_data.py +++ b/src/third_party/wiredtiger/tools/wtstats/stat_data.py @@ -3,17 +3,19 @@ no_scale_per_second_list = [ 'async: current work queue length', 'async: maximum work queue length', + 'cache: bytes belonging to page images in the cache', 'cache: bytes currently in the cache', + 'cache: bytes not belonging to page images in the cache', 'cache: eviction currently operating in aggressive mode', 'cache: files with active eviction walks', 'cache: hazard pointer maximum array length', 'cache: maximum bytes configured', 'cache: maximum page size at eviction', + 'cache: overflow values cached in memory', 'cache: pages currently held in the cache', 'cache: percentage overhead', 'cache: tracked bytes belonging to internal pages in the cache', 'cache: tracked bytes belonging to leaf pages in the cache', - 'cache: tracked bytes belonging to overflow pages in the cache', 'cache: tracked dirty bytes in the cache', 'cache: tracked dirty pages in the cache', 'connection: files currently open', @@ -28,6 +30,22 @@ no_scale_per_second_list = [ 'reconciliation: split objects currently awaiting free', 'session: open cursor count', 'session: open session count', + 'session: table compact failed calls', + 'session: table compact successful calls', + 'session: table create failed calls', + 'session: table create successful calls', + 'session: table drop failed calls', + 'session: table drop successful calls', + 'session: table rebalance failed calls', + 'session: table rebalance successful calls', + 'session: table rename failed calls', + 'session: table rename successful calls', + 'session: table salvage failed calls', + 'session: table salvage successful calls', + 'session: table truncate failed calls', + 'session: table truncate successful calls', + 'session: table verify failed calls', + 'session: table verify successful calls', 'thread-state: active filesystem fsync calls', 'thread-state: active filesystem read calls', 'thread-state: active filesystem write calls', @@ -36,7 +54,10 @@ no_scale_per_second_list = [ 'transaction: transaction checkpoint max time (msecs)', 'transaction: transaction checkpoint min time (msecs)', 'transaction: transaction checkpoint most recent time (msecs)', + 'transaction: transaction checkpoint scrub dirty target', + 'transaction: transaction checkpoint scrub time (msecs)', 'transaction: transaction checkpoint total time (msecs)', + 'transaction: transaction fsync duration for checkpoint after allocating the transaction ID (usecs)', 'transaction: transaction range of IDs currently pinned', 'transaction: transaction range of IDs currently pinned by a checkpoint', 'transaction: transaction range of IDs currently pinned by named snapshots', @@ -64,6 +85,7 @@ no_scale_per_second_list = [ 'btree: overflow pages', 'btree: row-store internal pages', 'btree: row-store leaf pages', + 'cache: bytes currently in the cache', 'cache: overflow values cached in memory', 'LSM: bloom filters in the LSM tree', 'LSM: chunks in the LSM tree', @@ -74,7 +96,9 @@ no_scale_per_second_list = [ ] no_clear_list = [ 'async: maximum work queue length', + 'cache: bytes belonging to page images in the cache', 'cache: bytes currently in the cache', + 'cache: bytes not belonging to page images in the cache', 'cache: eviction currently operating in aggressive mode', 'cache: files with active eviction walks', 'cache: maximum bytes configured', @@ -83,7 +107,6 @@ no_clear_list = [ 'cache: percentage overhead', 'cache: tracked bytes belonging to internal pages in the cache', 'cache: tracked bytes belonging to leaf pages in the cache', - 'cache: tracked bytes belonging to overflow pages in the cache', 'cache: tracked dirty bytes in the cache', 'cache: tracked dirty pages in the cache', 'connection: files currently open', @@ -98,6 +121,22 @@ no_clear_list = [ 'reconciliation: split objects currently awaiting free', 'session: open cursor count', 'session: open session count', + 'session: table compact failed calls', + 'session: table compact successful calls', + 'session: table create failed calls', + 'session: table create successful calls', + 'session: table drop failed calls', + 'session: table drop successful calls', + 'session: table rebalance failed calls', + 'session: table rebalance successful calls', + 'session: table rename failed calls', + 'session: table rename successful calls', + 'session: table salvage failed calls', + 'session: table salvage successful calls', + 'session: table truncate failed calls', + 'session: table truncate successful calls', + 'session: table verify failed calls', + 'session: table verify successful calls', 'thread-state: active filesystem fsync calls', 'thread-state: active filesystem read calls', 'thread-state: active filesystem write calls', @@ -106,11 +145,15 @@ no_clear_list = [ 'transaction: transaction checkpoint max time (msecs)', 'transaction: transaction checkpoint min time (msecs)', 'transaction: transaction checkpoint most recent time (msecs)', + 'transaction: transaction checkpoint scrub dirty target', + 'transaction: transaction checkpoint scrub time (msecs)', 'transaction: transaction checkpoint total time (msecs)', + 'transaction: transaction fsync duration for checkpoint after allocating the transaction ID (usecs)', 'transaction: transaction range of IDs currently pinned', 'transaction: transaction range of IDs currently pinned by a checkpoint', 'transaction: transaction range of IDs currently pinned by named snapshots', 'btree: btree checkpoint generation', + 'cache: bytes currently in the cache', 'session: open cursor count', ] prefix_list = [ diff --git a/src/third_party/wiredtiger/tools/wtstats/wtstats.py b/src/third_party/wiredtiger/tools/wtstats/wtstats.py index ff62d99e825..3549031c30f 100755 --- a/src/third_party/wiredtiger/tools/wtstats/wtstats.py +++ b/src/third_party/wiredtiger/tools/wtstats/wtstats.py @@ -137,6 +137,8 @@ def parse_wtperf_file(file, result): for i, v in enumerate(values): if v == 'N': v = 0 + if v == 'Y': + v = 1 # convert us to ms if '(ms)' in headings[i]: v = float(v) / 1000.0 |