summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/third_party/wiredtiger/bench/workgen/runner/read_write_heavy.wtperf20
-rw-r--r--src/third_party/wiredtiger/bench/workgen/runner/read_write_storms.py115
-rw-r--r--src/third_party/wiredtiger/bench/workgen/workgen.cxx10
-rwxr-xr-xsrc/third_party/wiredtiger/bench/workgen/wtperf.py60
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py30
-rw-r--r--src/third_party/wiredtiger/dist/filelist1
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok2
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py24
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_all.c6
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/block/block_mgr.c2
-rw-r--r--src/third_party/wiredtiger/src/block/block_read.c1
-rw-r--r--src/third_party/wiredtiger/src/block/block_write.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_compact.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curprev.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c19
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_delete.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_handle.c11
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_misc.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_ovfl.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_page.c12
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_rebalance.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_ret.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_slvg.c12
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c5
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_stat.c11
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy.c65
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c81
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_walk.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/row_key.c5
-rw-r--r--src/third_party/wiredtiger/src/cache/cache_las.c4
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c159
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache_pool.c17
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_capacity.c474
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_open.c5
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_reconfig.c1
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_sweep.c26
-rw-r--r--src/third_party/wiredtiger/src/docs/programming.dox1
-rw-r--r--src/third_party/wiredtiger/src/docs/tune-capacity.dox38
-rw-r--r--src/third_party/wiredtiger/src/include/block.h1
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h14
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i14
-rw-r--r--src/third_party/wiredtiger/src/include/capacity.h74
-rw-r--r--src/third_party/wiredtiger/src/include/cell.i59
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h40
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.i4
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h5
-rw-r--r--src/third_party/wiredtiger/src/include/os.h3
-rw-r--r--src/third_party/wiredtiger/src/include/os_fhandle.i5
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h15
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i20
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in559
-rw-r--r--src/third_party/wiredtiger/src/include/wt_internal.h3
-rw-r--r--src/third_party/wiredtiger/src/log/log.c28
-rw-r--r--src/third_party/wiredtiger/src/log/log_slot.c1
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_work_unit.c65
-rw-r--r--src/third_party/wiredtiger/src/os_common/os_fhandle.c130
-rw-r--r--src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c1
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_fs.c5
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_fs.c2
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_track.c2
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c389
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c46
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c8
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c8
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_main.c3
-rw-r--r--src/third_party/wiredtiger/test/csuite/random_directio/main.c3
-rw-r--r--src/third_party/wiredtiger/test/evergreen.yml33
-rwxr-xr-xsrc/third_party/wiredtiger/test/evergreen/compatibility_test_for_mongodb_releases.sh138
-rw-r--r--src/third_party/wiredtiger/test/format/config.h4
-rw-r--r--src/third_party/wiredtiger/test/format/format.h1
-rw-r--r--src/third_party/wiredtiger/test/format/wts.c2
-rwxr-xr-x[-rw-r--r--]src/third_party/wiredtiger/test/suite/test_baseconfig.py2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_dictionary.py10
-rw-r--r--src/third_party/wiredtiger/test/suite/test_reconfig01.py7
-rw-r--r--src/third_party/wiredtiger/test/suite/test_split.py8
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/test_txn19.py2
-rwxr-xr-x[-rw-r--r--]src/third_party/wiredtiger/test/suite/wttest.py9
81 files changed, 2265 insertions, 719 deletions
diff --git a/src/third_party/wiredtiger/bench/workgen/runner/read_write_heavy.wtperf b/src/third_party/wiredtiger/bench/workgen/runner/read_write_heavy.wtperf
new file mode 100644
index 00000000000..f05ed62c5b4
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/workgen/runner/read_write_heavy.wtperf
@@ -0,0 +1,20 @@
+# Warning: This config includes unwritten, implicit configuration defaults.
+# Changes to those values may cause differences in behavior.
+conn_config="cache_size=2GB,eviction=(threads_max=8),log=(enabled=true),session_max=250,statistics=(fast),statistics_log=(wait=1,json),io_capacity=(total=30M)"
+checkpoint_interval=60
+checkpoint_threads=1
+compression="snappy"
+create=true
+close_conn=false
+icount=4000000
+log_like_table=true
+populate_threads=4
+report_interval=1
+run_time=900
+sample_interval=1
+sample_rate=1
+table_count=100
+threads=((count=80,updates=1,throttle=11,throttle_burst=0),(count=80,reads=1,throttle=60,throttle_burst=0))
+value_sz=7000
+warmup=0
+table_config="memory_page_max=10m,leaf_value_max=64MB,checksum=on,split_pct=90,type=file,log=(enabled=false),leaf_page_max=32k,block_compressor=snappy"
diff --git a/src/third_party/wiredtiger/bench/workgen/runner/read_write_storms.py b/src/third_party/wiredtiger/bench/workgen/runner/read_write_storms.py
new file mode 100644
index 00000000000..2f774d0c902
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/workgen/runner/read_write_storms.py
@@ -0,0 +1,115 @@
+#/usr/bin/env python
+# generated from runner/read_write_heavy.wtperf originally, then hand edited.
+
+from runner import *
+from wiredtiger import *
+from workgen import *
+
+context = Context()
+conn_config = ""
+conn_config += ",cache_size=2GB,eviction=(threads_max=8),log=(enabled=true),session_max=250,statistics=(fast),statistics_log=(wait=1,json),io_capacity=(total=30M)" # explicitly added
+conn = wiredtiger_open("WT_TEST", "create," + conn_config)
+s = conn.open_session("")
+
+wtperf_table_config = "key_format=S,value_format=S,type=lsm," +\
+ "exclusive=true,allocation_size=4kb," +\
+ "internal_page_max=64kb,leaf_page_max=4kb,split_pct=100,"
+compress_table_config = "block_compressor=snappy,"
+table_config = "memory_page_max=10m,leaf_value_max=64MB,checksum=on,split_pct=90,type=file,log=(enabled=false),leaf_page_max=32k,block_compressor=snappy"
+tables = []
+table_count = 100
+for i in range(0, table_count):
+ tname = "table:test" + str(i)
+ table = Table(tname)
+ s.create(tname, wtperf_table_config +\
+ compress_table_config + table_config)
+ table.options.key_size = 20
+ table.options.value_size = 7000
+ tables.append(table)
+
+populate_threads = 4
+icount = 4000000
+# There are multiple tables to be filled during populate,
+# the icount is split between them all.
+pop_ops = Operation(Operation.OP_INSERT, tables[0])
+pop_ops = op_multi_table(pop_ops, tables)
+nops_per_thread = icount / (populate_threads * table_count)
+pop_thread = Thread(pop_ops * nops_per_thread)
+pop_workload = Workload(context, populate_threads * pop_thread)
+pop_workload.run(conn)
+print('populate complete')
+
+# Log like file, requires that logging be enabled in the connection config.
+log_name = "table:log"
+s.create(log_name, wtperf_table_config + "key_format=S,value_format=S," + compress_table_config + table_config + ",log=(enabled=true)")
+log_table = Table(log_name)
+
+ops = Operation(Operation.OP_UPDATE, tables[0])
+ops = op_multi_table(ops, tables, False)
+ops = op_log_like(ops, log_table, 0)
+thread0 = Thread(ops)
+# These operations include log_like operations, which will increase the number
+# of insert/update operations by a factor of 2.0. This may cause the
+# actual operations performed to be above the throttle.
+thread0.options.throttle=11
+thread0.options.throttle_burst=0
+
+ops = Operation(Operation.OP_SEARCH, tables[0])
+ops = op_multi_table(ops, tables, False)
+ops = op_log_like(ops, log_table, 0)
+thread1 = Thread(ops)
+thread1.options.throttle=60
+thread1.options.throttle_burst=0
+
+ops = Operation(Operation.OP_SLEEP, "60") + \
+ Operation(Operation.OP_CHECKPOINT, "")
+checkpoint_thread = Thread(ops)
+
+ops = Operation(Operation.OP_SLEEP, "0.1") + \
+ Operation(Operation.OP_LOG_FLUSH, "")
+logging_thread = Thread(ops)
+
+############################################################################
+# This part was added to the generated file.
+# Add threads that do a bunch of operations and sleep, all in a loop.
+# At the beginning of the run the threads will tend to be synchronized,
+# but that effect will dissipate over time.
+
+ops = Operation(Operation.OP_UPDATE, tables[0])
+ops = op_multi_table(ops, tables, False)
+ops = op_log_like(ops, log_table, 0)
+ops = ops * 10000 + Operation(Operation.OP_SLEEP, "10")
+thread_big_10 = Thread(ops)
+
+ops = Operation(Operation.OP_UPDATE, tables[0])
+ops = op_multi_table(ops, tables, False)
+ops = op_log_like(ops, log_table, 0)
+ops = ops * 80000 + Operation(Operation.OP_SLEEP, "20")
+thread_big_20 = Thread(ops)
+
+ops = Operation(Operation.OP_SEARCH, tables[0])
+ops = op_multi_table(ops, tables, False)
+ops = op_log_like(ops, log_table, 0)
+ops = ops * 10000 + Operation(Operation.OP_SLEEP, "8")
+thread_bigread_8 = Thread(ops)
+
+ops = Operation(Operation.OP_SEARCH, tables[0])
+ops = op_multi_table(ops, tables, False)
+ops = op_log_like(ops, log_table, 0)
+ops = ops * 80000 + Operation(Operation.OP_SLEEP, "16")
+thread_bigread_16 = Thread(ops)
+
+# End of added section.
+# The new threads will also be added to the workload below.
+############################################################################
+
+workload = Workload(context, 80 * thread0 + 80 * thread1 + checkpoint_thread + logging_thread + 10 * thread_big_10 + 10 * thread_big_20 + 10 * thread_bigread_8 + 10 * thread_bigread_16)
+workload.options.report_interval=1
+workload.options.run_time=900
+workload.options.sample_rate=1
+workload.options.warmup=0
+workload.options.sample_interval_ms = 1000
+workload.run(conn)
+
+latency_filename = "WT_TEST/latency.out"
+latency.workload_latency(workload, latency_filename)
diff --git a/src/third_party/wiredtiger/bench/workgen/workgen.cxx b/src/third_party/wiredtiger/bench/workgen/workgen.cxx
index 776e7cf6098..9bfa29e3136 100644
--- a/src/third_party/wiredtiger/bench/workgen/workgen.cxx
+++ b/src/third_party/wiredtiger/bench/workgen/workgen.cxx
@@ -948,7 +948,8 @@ int Throttle::throttle(uint64_t op_count, uint64_t *op_limit) {
_next_div = ts_add_ms(now, _ms_per_div);
_started = true;
} else {
- _ops_delta += (op_count - _ops_prev);
+ if (_burst != 0.0)
+ _ops_delta += (op_count - _ops_prev);
// Sleep until the next division, but potentially with some randomness.
if (now < _next_div) {
@@ -961,7 +962,12 @@ int Throttle::throttle(uint64_t op_count, uint64_t *op_limit) {
}
_next_div = ts_add_ms(_next_div, _ms_per_div);
}
- ops = _ops_per_div;
+
+ if (_burst == 0.0)
+ ops = _ops_left_this_second;
+ else
+ ops = _ops_per_div;
+
if (_ops_delta < (int64_t)ops) {
ops -= _ops_delta;
_ops_delta = 0;
diff --git a/src/third_party/wiredtiger/bench/workgen/wtperf.py b/src/third_party/wiredtiger/bench/workgen/wtperf.py
index 9ce1b84a663..e4ce0393276 100755
--- a/src/third_party/wiredtiger/bench/workgen/wtperf.py
+++ b/src/third_party/wiredtiger/bench/workgen/wtperf.py
@@ -34,7 +34,7 @@
# See also the usage() function.
#
from __future__ import print_function
-import os, shutil, sys, tempfile
+import os, shutil, sys, subprocess, tempfile
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
@@ -155,8 +155,6 @@ class Translator:
# "(abc=123,def=234,ghi=(hi=1,bye=2))" would return 3 items.
def split_config_parens(self, s):
if s[0:1] != '(':
- import pdb
- pdb.set_trace()
self.fatal_error('missing left paren', 'config parse error')
if s[-1:] != ')':
self.fatal_error('missing right paren', 'config parse error')
@@ -201,16 +199,21 @@ class Translator:
result += ' '
return result
+ def copy_file(self, srcname, destdir, destname):
+ dest_fullname = os.path.join(destdir, destname)
+ suffix = 0
+ while os.path.exists(dest_fullname):
+ suffix += 1
+ dest_fullname = os.path.join(destdir, destname + str(suffix))
+ shutil.copyfile(srcname, dest_fullname)
+
def copy_config(self):
# Note: If we add the capability of setting options on the command
# line, we won't be able to do a simple copy.
- config_save = os.path.join(self.homedir, 'CONFIG.wtperf')
- suffix = 0
- while os.path.exists(config_save):
- suffix += 1
- config_save = os.path.join(self.homedir, \
- 'CONFIG.wtperf.' + str(suffix))
- shutil.copyfile(self.filename, config_save)
+ self.copy_file(self.filename, self.homedir, 'CONFIG.wtperf')
+
+ def copy_python_source(self, srcname):
+ self.copy_file(srcname, self.homedir, 'RUN.py')
# Wtperf's throttle is based on the number of regular operations,
# not including log_like operations. Workgen counts all operations,
@@ -271,6 +274,9 @@ class Translator:
topts.read = 0
topts.reads = 0
topts.throttle = 0
+ # Workgen's throttle_burst variable has a default of 1.0 . Since we
+ # are always explicitly setting it, set our own value to the same.
+ topts.throttle_burst = 1.0
topts.update = 0
topts.updates = 0
topts.random_range = 0
@@ -333,8 +339,11 @@ class Translator:
if topts.throttle > 0:
(throttle, comment) = self.calc_throttle(topts, log_like_table)
tdecls += comment
- tdecls += self.assign_str(thread_name + '.options.throttle',
- throttle)
+ tdecls += self.assign_str(
+ thread_name + '.options.throttle', throttle)
+ tdecls += self.assign_str(
+ thread_name + '.options.throttle_burst',
+ topts.throttle_burst)
tdecls += '\n'
if topts.count > 1:
tnames += str(topts.count) + ' * '
@@ -504,9 +513,11 @@ class Translator:
def translate_inner(self):
workloadopts = ''
+ input_as_string = ''
with open(self.filename) as fin:
for line in fin:
self.linenum += 1
+ input_as_string += line
commentpos = line.find('#')
if commentpos >= 0:
line = line[0:commentpos]
@@ -563,6 +574,11 @@ class Translator:
s += 'from wiredtiger import *\n'
s += 'from workgen import *\n'
s += '\n'
+ s += '\'\'\' The original wtperf input file follows:\n'
+ s += input_as_string
+ if not input_as_string.endswith('\n'):
+ s += '\n'
+ s += '\'\'\'\n\n'
async_config = ''
if opts.compact and opts.async_threads == 0:
opts.async_threads = 2;
@@ -585,6 +601,7 @@ class Translator:
s += ' return op_ret\n'
s += '\n'
s += 'context = Context()\n'
+ s += 'homedir = "' + self.homedir + '"\n'
extra_config = ''
s += 'conn_config = ""\n'
@@ -599,8 +616,7 @@ class Translator:
s += 'conn_config += extensions_config(["compressors/' + \
compression + '"])\n'
compression = 'block_compressor=' + compression + ','
- s += 'conn = wiredtiger_open("' + self.homedir + \
- '", "create," + conn_config)\n'
+ s += 'conn = wiredtiger_open(homedir, "create," + conn_config)\n'
s += 's = conn.open_session("' + sess_config + '")\n'
s += '\n'
s += self.translate_table_create()
@@ -618,8 +634,8 @@ class Translator:
s += 'conn.close()\n'
if readonly:
'conn_config += ",readonly=true"\n'
- s += 'conn = wiredtiger_open(' + \
- '"' + self.homedir + '", "create," + conn_config)\n'
+ s += 'conn = wiredtiger_open(homedir, ' + \
+ '"create," + conn_config)\n'
s += '\n'
s += 'workload = Workload(context, ' + t_var + ')\n'
s += workloadopts
@@ -627,7 +643,7 @@ class Translator:
if self.verbose > 0:
s += 'print("workload:")\n'
s += 'workload.run(conn)\n\n'
- s += 'latency_filename = "' + self.homedir + '/latency.out"\n'
+ s += 'latency_filename = homedir + "/latency.out"\n'
s += 'latency.workload_latency(workload, latency_filename)\n'
if close_conn:
@@ -684,16 +700,22 @@ for arg in sys.argv[1:]:
# directory after the run, because the wiredtiger_open
# in the generated code will clean out the directory first.
raised = None
+ ret = 0
try:
- execfile(tmpfile)
- except Exception, exception:
+ # Run python on the generated script
+ ret = subprocess.call([sys.executable, tmpfile])
+ except (KeyboardInterrupt, Exception), exception:
raised = exception
if not os.path.isdir(homedir):
os.makedirs(homedir)
translator.copy_config()
+ translator.copy_python_source(tmpfile)
os.remove(tmpfile)
if raised != None:
raise raised
+ if ret != 0:
+ raise Exception('Running generated program returned ' +
+ str(ret))
else:
usage()
sys.exit(1)
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index 44d7d8f9886..4db94e19cf3 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -519,6 +519,17 @@ connection_runtime_config = [
interval in seconds at which to check for files that are
inactive and close them''', min=1, max=100000),
]),
+ Config('io_capacity', '', r'''
+ control how many bytes per second are written and read. Exceeding
+ the capacity results in throttling.''',
+ type='category', subconfig=[
+ Config('total', '0', r'''
+ number of bytes per second available to all subsystems in total.
+ When set, decisions about what subsystems are throttled, and in
+ what proportion, are made internally. The minimum non-zero setting
+ is 1MB.''',
+ min='0', max='1TB'),
+ ]),
Config('lsm_manager', '', r'''
configure database wide options for LSM tree management. The LSM
manager is started automatically the first time an LSM tree is opened.
@@ -597,8 +608,9 @@ connection_runtime_config = [
intended for use with internal stress testing of WiredTiger.''',
type='list', undoc=True,
choices=[
- 'checkpoint_slow', 'lookaside_sweep_race', 'split_1', 'split_2',
- 'split_3', 'split_4', 'split_5', 'split_6', 'split_7', 'split_8']),
+ 'aggressive_sweep', 'checkpoint_slow', 'lookaside_sweep_race',
+ 'split_1', 'split_2', 'split_3', 'split_4', 'split_5', 'split_6',
+ 'split_7', 'split_8']),
Config('verbose', '', r'''
enable messages for various events. Options are given as a
list, such as <code>"verbose=[evictserver,read]"</code>''',
@@ -1471,13 +1483,15 @@ methods = {
Config('get', 'all_committed', r'''
specify which timestamp to query: \c all_committed returns the largest
timestamp such that all timestamps up to that value have committed,
- \c oldest returns the most recent \c oldest_timestamp set with
- WT_CONNECTION::set_timestamp, \c oldest_reader returns the
+ \c last_checkpoint returns the timestamp of the most recent stable
+ checkpoint, \c oldest returns the most recent \c oldest_timestamp set
+ with WT_CONNECTION::set_timestamp, \c oldest_reader returns the
minimum of the read timestamps of all active readers \c pinned returns
- the minimum of the\c oldest_timestamp and the read timestamps of all
- active readers, and \c stable returns the most recent
- \c stable_timestamp set with WT_CONNECTION::set_timestamp. See
- @ref transaction_timestamps''',
+ the minimum of the \c oldest_timestamp and the read timestamps of all
+ active readers, \c recovery returns the timestamp of the most recent
+ stable checkpoint taken prior to a shutdown and \c stable returns the
+ most recent \c stable_timestamp set with WT_CONNECTION::set_timestamp.
+ See @ref transaction_timestamps''',
choices=['all_committed','last_checkpoint',
'oldest','oldest_reader','pinned','recovery','stable']),
]),
diff --git a/src/third_party/wiredtiger/dist/filelist b/src/third_party/wiredtiger/dist/filelist
index 1bbeeb3c7a3..73fa6819e94 100644
--- a/src/third_party/wiredtiger/dist/filelist
+++ b/src/third_party/wiredtiger/dist/filelist
@@ -68,6 +68,7 @@ src/conn/api_version.c
src/conn/conn_api.c
src/conn/conn_cache.c
src/conn/conn_cache_pool.c
+src/conn/conn_capacity.c
src/conn/conn_ckpt.c
src/conn/conn_dhandle.c
src/conn/conn_handle.c
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index b93c99a9f99..c93229014c8 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -1175,6 +1175,7 @@ scalability
sched
scr
sd
+second's
secretkey
sed
sessionp
@@ -1230,6 +1231,7 @@ subinit
sublicense
subone
suboptimal
+subsystem's
subtest
subtree
sunique
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index 5184bbcb9a2..9bca52f402a 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -52,6 +52,10 @@ class CacheWalkStat(Stat):
def __init__(self, name, desc, flags=''):
flags += ',cache_walk'
Stat.__init__(self, name, CacheWalkStat.prefix, desc, flags)
+class CapacityStat(Stat):
+ prefix = 'capacity'
+ def __init__(self, name, desc, flags=''):
+ Stat.__init__(self, name, CapacityStat.prefix, desc, flags)
class CompressStat(Stat):
prefix = 'compression'
def __init__(self, name, desc, flags=''):
@@ -134,6 +138,7 @@ groups['memory'] = [
ConnStat.prefix,
RecStat.prefix]
groups['system'] = [
+ CapacityStat.prefix,
ConnStat.prefix,
DhandleStat.prefix,
PerfHistStat.prefix,
@@ -294,6 +299,24 @@ connection_stats = [
CacheStat('cache_write_restore', 'pages written requiring in-memory restoration'),
##########################################
+ # Capacity statistics
+ ##########################################
+ CapacityStat('capacity_bytes_ckpt', 'throttled bytes written for checkpoint'),
+ CapacityStat('capacity_bytes_evict', 'throttled bytes written for eviction'),
+ CapacityStat('capacity_bytes_log', 'throttled bytes written for log'),
+ CapacityStat('capacity_bytes_read', 'throttled bytes read'),
+ CapacityStat('capacity_bytes_written', 'throttled bytes written total'),
+ CapacityStat('capacity_threshold', 'threshold to call fsync'),
+ CapacityStat('capacity_time_ckpt', 'time waiting during checkpoint (usecs)'),
+ CapacityStat('capacity_time_evict', 'time waiting during eviction (usecs)'),
+ CapacityStat('capacity_time_log', 'time waiting during logging (usecs)'),
+ CapacityStat('capacity_time_read', 'time waiting during read (usecs)'),
+ CapacityStat('capacity_time_total', 'time waiting due to total capacity (usecs)'),
+ CapacityStat('fsync_all_fh', 'background fsync file handles synced'),
+ CapacityStat('fsync_all_fh_total', 'background fsync file handles considered'),
+ CapacityStat('fsync_all_time', 'background fsync time (msecs)', 'no_clear,no_scale'),
+
+ ##########################################
# Cursor operations
##########################################
CursorStat('cursor_open_count', 'open cursor count', 'no_clear,no_scale'),
@@ -333,6 +356,7 @@ connection_stats = [
# Dhandle statistics
##########################################
DhandleStat('dh_conn_handle_count', 'connection data handles currently active', 'no_clear,no_scale'),
+ DhandleStat('dh_conn_handle_size', 'connection data handle size', 'no_clear,no_scale,size'),
DhandleStat('dh_session_handles', 'session dhandles swept'),
DhandleStat('dh_session_sweeps', 'session sweep attempts'),
DhandleStat('dh_sweep_close', 'connection sweep dhandles closed'),
diff --git a/src/third_party/wiredtiger/examples/c/ex_all.c b/src/third_party/wiredtiger/examples/c/ex_all.c
index 445a92ba5f8..3bf66a876fd 100644
--- a/src/third_party/wiredtiger/examples/c/ex_all.c
+++ b/src/third_party/wiredtiger/examples/c/ex_all.c
@@ -1284,6 +1284,12 @@ main(int argc, char *argv[])
/*! [Configure file_extend] */
error_check(conn->close(conn, NULL));
+ /*! [Configure capacity] */
+ error_check(wiredtiger_open(
+ home, NULL, "create,io_capacity=(total=40MB)", &conn));
+ /*! [Configure capacity] */
+ error_check(conn->close(conn, NULL));
+
/*! [Eviction configuration] */
/*
* Configure eviction to begin at 90% full, and run until the cache
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index b45b085a227..b522dcbe4b9 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
{
- "commit": "92719d6bc9a83ce45c337db6a67adcc1354cca32",
+ "commit": "0c6ba8d8be02dd34a46c3e9533971f1739b6ad8e",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
"branch": "mongodb-4.2"
diff --git a/src/third_party/wiredtiger/src/block/block_mgr.c b/src/third_party/wiredtiger/src/block/block_mgr.c
index 2107fd85a7f..7211e5cfa24 100644
--- a/src/third_party/wiredtiger/src/block/block_mgr.c
+++ b/src/third_party/wiredtiger/src/block/block_mgr.c
@@ -505,6 +505,8 @@ static int
__bm_write(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf,
uint8_t *addr, size_t *addr_sizep, bool data_checksum, bool checkpoint_io)
{
+ __wt_capacity_throttle(session, buf->size,
+ checkpoint_io ? WT_THROTTLE_CKPT : WT_THROTTLE_EVICT);
return (__wt_block_write(session,
bm->block, buf, addr, addr_sizep, data_checksum, checkpoint_io));
}
diff --git a/src/third_party/wiredtiger/src/block/block_read.c b/src/third_party/wiredtiger/src/block/block_read.c
index 977fb165b84..9614e1c2810 100644
--- a/src/third_party/wiredtiger/src/block/block_read.c
+++ b/src/third_party/wiredtiger/src/block/block_read.c
@@ -98,6 +98,7 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session,
block, "read", offset, size, bm->is_live, __func__, __LINE__));
#endif
/* Read the block. */
+ __wt_capacity_throttle(session, size, WT_THROTTLE_READ);
WT_RET(
__wt_block_read_off(session, block, buf, offset, size, checksum));
diff --git a/src/third_party/wiredtiger/src/block/block_write.c b/src/third_party/wiredtiger/src/block/block_write.c
index 4de128494d0..9edc4e0108b 100644
--- a/src/third_party/wiredtiger/src/block/block_write.c
+++ b/src/third_party/wiredtiger/src/block/block_write.c
@@ -351,9 +351,9 @@ __block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
* cache, but only if the current session can wait.
*/
if (block->os_cache_dirty_max != 0 &&
- (block->os_cache_dirty += align_size) > block->os_cache_dirty_max &&
+ fh->written > block->os_cache_dirty_max &&
__wt_session_can_wait(session)) {
- block->os_cache_dirty = 0;
+ fh->written = 0;
if ((ret = __wt_fsync(session, fh, false)) != 0) {
/*
* Ignore ENOTSUP, but don't try again.
diff --git a/src/third_party/wiredtiger/src/btree/bt_compact.c b/src/third_party/wiredtiger/src/btree/bt_compact.c
index d861276a843..37ee36634ff 100644
--- a/src/third_party/wiredtiger/src/btree/bt_compact.c
+++ b/src/third_party/wiredtiger/src/btree/bt_compact.c
@@ -30,7 +30,7 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
/* If the page is clean, test the original addresses. */
if (__wt_page_evict_clean(page)) {
- __wt_ref_info(ref, &addr, &addr_size, NULL);
+ __wt_ref_info(session, ref, &addr, &addr_size, NULL);
if (addr == NULL)
return (0);
return (
@@ -249,7 +249,7 @@ __wt_compact_page_skip(
* if it's useful to rewrite leaf pages, don't do the I/O if a rewrite
* won't help.
*/
- __wt_ref_info(ref, &addr, &addr_size, &type);
+ __wt_ref_info(session, ref, &addr, &addr_size, &type);
WT_ASSERT(session, addr != NULL);
if (addr != NULL && type != WT_CELL_ADDR_INT) {
bm = S2BT(session)->bm;
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index b8f99a03df9..d12548b008e 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -229,7 +229,7 @@ new_page: /* Find the matching WT_COL slot. */
if (cbt->cip_saved != cip) {
if ((cell = WT_COL_PTR(page, cip)) == NULL)
continue;
- __wt_cell_unpack(page, cell, &unpack);
+ __wt_cell_unpack(session, page, cell, &unpack);
if (unpack.type == WT_CELL_DEL) {
if ((rle = __wt_cell_rle(&unpack)) == 1)
continue;
diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c
index 7ecebc0f9d4..32310b8a341 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curprev.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c
@@ -376,7 +376,7 @@ new_page: if (cbt->recno < cbt->ref->ref_recno)
if (cbt->cip_saved != cip) {
if ((cell = WT_COL_PTR(page, cip)) == NULL)
continue;
- __wt_cell_unpack(page, cell, &unpack);
+ __wt_cell_unpack(session, page, cell, &unpack);
if (unpack.type == WT_CELL_DEL) {
if (__wt_cell_rle(&unpack) == 1)
continue;
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index 9395490b165..1ce403dba7f 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -141,7 +141,7 @@ __debug_item_key(WT_DBG *ds, const char *tag, const void *data_arg, size_t size)
return (ds->f(ds, "\t%s%s{%s}\n",
tag == NULL ? "" : tag, tag == NULL ? "" : " ",
__wt_buf_set_printable_format(
- ds->session, data_arg, size, ds->key_format, ds->t1)));
+ session, data_arg, size, ds->key_format, ds->t1)));
}
/*
@@ -170,7 +170,7 @@ __debug_item_value(
return (ds->f(ds, "\t%s%s{%s}\n",
tag == NULL ? "" : tag, tag == NULL ? "" : " ",
__wt_buf_set_printable_format(
- ds->session, data_arg, size, ds->value_format, ds->t1)));
+ session, data_arg, size, ds->value_format, ds->t1)));
}
/*
@@ -527,7 +527,7 @@ __debug_dsk_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk)
btree = S2BT(ds->session);
- WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, false) {
+ WT_CELL_FOREACH_BEGIN(ds->session, btree, dsk, unpack, false) {
WT_RET(__debug_cell(ds, dsk, &unpack));
} WT_CELL_FOREACH_END;
return (0);
@@ -997,7 +997,7 @@ __debug_page_col_var(WT_DBG *ds, WT_REF *ref)
unpack = NULL;
rle = 1;
} else {
- __wt_cell_unpack(page, cell, unpack);
+ __wt_cell_unpack(ds->session, page, cell, unpack);
rle = __wt_cell_rle(unpack);
}
WT_RET(__wt_snprintf(
@@ -1081,7 +1081,7 @@ __debug_page_row_leaf(WT_DBG *ds, WT_PAGE *page)
WT_ERR(__wt_row_leaf_key(session, page, rip, key, false));
WT_ERR(__debug_item_key(ds, "K", key->data, key->size));
- __wt_row_leaf_value_cell(page, rip, NULL, unpack);
+ __wt_row_leaf_value_cell(session, page, rip, NULL, unpack);
WT_ERR(__debug_cell_data(
ds, page, WT_PAGE_ROW_LEAF, "V", unpack));
@@ -1205,8 +1205,11 @@ __debug_update(WT_DBG *ds, WT_UPDATE *upd, bool hexbyte)
else
WT_RET(ds->f(ds, "\t" "txn id %" PRIu64, upd->txnid));
__wt_timestamp_to_string(
- upd->timestamp, ts_string, sizeof(ts_string));
- WT_RET(ds->f(ds, ", ts %s", ts_string));
+ upd->start_ts, ts_string, sizeof(ts_string));
+ WT_RET(ds->f(ds, ", start_ts %s", ts_string));
+ __wt_timestamp_to_string(
+ upd->stop_ts, ts_string, sizeof(ts_string));
+ WT_RET(ds->f(ds, ", stop_ts %s", ts_string));
WT_RET(ds->f(ds, "\n"));
}
return (0);
@@ -1253,7 +1256,7 @@ __debug_ref(WT_DBG *ds, WT_REF *ref)
break;
}
- __wt_ref_info(ref, &addr, &addr_size, NULL);
+ __wt_ref_info(session, ref, &addr, &addr_size, NULL);
return (ds->f(ds, "\t" "%p %s %s\n", (void *)ref,
state, __wt_addr_string(session, addr, addr_size, ds->t1)));
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_delete.c b/src/third_party/wiredtiger/src/btree/bt_delete.c
index 7168c8475da..405f00a7259 100644
--- a/src/third_party/wiredtiger/src/btree/bt_delete.c
+++ b/src/third_party/wiredtiger/src/btree/bt_delete.c
@@ -323,8 +323,8 @@ __tombstone_update_alloc(WT_SESSION_IMPL *session,
*/
if (page_del != NULL) {
upd->txnid = page_del->txnid;
- upd->timestamp = page_del->timestamp;
- upd->durable_timestamp = page_del->timestamp;
+ upd->start_ts = page_del->timestamp;
+ upd->durable_ts = page_del->timestamp;
upd->prepare_state = page_del->prepare_state;
}
*updp = upd;
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c
index 38e11837e2c..6d96c2537b3 100644
--- a/src/third_party/wiredtiger/src/btree/bt_handle.c
+++ b/src/third_party/wiredtiger/src/btree/bt_handle.c
@@ -488,15 +488,12 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
* Don't do compression adjustment for fixed-size column store, the
* leaf page sizes don't change. (We could adjust internal pages but not
* internal pages, but that seems an unlikely use case.)
- * XXX
- * Don't do compression adjustment of snappy-compressed blocks.
*/
btree->intlpage_compadjust = false;
btree->maxintlpage_precomp = btree->maxintlpage;
btree->leafpage_compadjust = false;
btree->maxleafpage_precomp = btree->maxleafpage;
if (btree->compressor != NULL && btree->compressor->compress != NULL &&
- !WT_STRING_MATCH("snappy", cval.str, cval.len) &&
btree->type != BTREE_COL_FIX) {
/*
* Don't do compression adjustment when on-disk page sizes are
@@ -611,6 +608,12 @@ __wt_btree_tree_open(
F_SET(session, WT_SESSION_QUIET_CORRUPT_FILE);
if ((ret = __wt_bt_read(session, &dsk, addr, addr_size)) == 0)
ret = __wt_verify_dsk(session, tmp->data, &dsk);
+ /*
+ * Flag any failed read or verification: if we're in startup, it may
+ * be fatal.
+ */
+ if (ret != 0)
+ F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
F_CLR(session, WT_SESSION_QUIET_CORRUPT_FILE);
if (ret != 0)
__wt_err(session, ret,
@@ -783,7 +786,7 @@ __btree_preload(WT_SESSION_IMPL *session)
/* Pre-load the second-level internal pages. */
WT_INTL_FOREACH_BEGIN(session, btree->root.page, ref) {
- __wt_ref_info(ref, &addr, &addr_size, NULL);
+ __wt_ref_info(session, ref, &addr, &addr_size, NULL);
if (addr != NULL)
WT_RET(bm->preload(bm, session, addr, addr_size));
} WT_INTL_FOREACH_END;
diff --git a/src/third_party/wiredtiger/src/btree/bt_misc.c b/src/third_party/wiredtiger/src/btree/bt_misc.c
index 51eb68aa51f..434dd579c5f 100644
--- a/src/third_party/wiredtiger/src/btree/bt_misc.c
+++ b/src/third_party/wiredtiger/src/btree/bt_misc.c
@@ -102,7 +102,7 @@ __wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf)
return (buf->data);
}
- __wt_ref_info(ref, &addr, &addr_size, NULL);
+ __wt_ref_info(session, ref, &addr, &addr_size, NULL);
return (__wt_addr_string(session, addr, addr_size, buf));
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_ovfl.c b/src/third_party/wiredtiger/src/btree/bt_ovfl.c
index 9e608114672..e254a9acf7d 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ovfl.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ovfl.c
@@ -217,7 +217,7 @@ __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell)
bm = btree->bm;
unpack = &_unpack;
- __wt_cell_unpack(page, cell, unpack);
+ __wt_cell_unpack(session, page, cell, unpack);
/*
* Finally remove overflow key/value objects, called when reconciliation
diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c
index db096ab12c9..bd30d3218c2 100644
--- a/src/third_party/wiredtiger/src/btree/bt_page.c
+++ b/src/third_party/wiredtiger/src/btree/bt_page.c
@@ -286,7 +286,7 @@ __inmem_col_int(WT_SESSION_IMPL *session, WT_PAGE *page)
pindex = WT_INTL_INDEX_GET_SAFE(page);
refp = pindex->index;
hint = 0;
- WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, true) {
+ WT_CELL_FOREACH_BEGIN(session, btree, page->dsk, unpack, true) {
ref = *refp++;
ref->home = page;
ref->pindex_hint = hint++;
@@ -310,7 +310,7 @@ __inmem_col_var_repeats(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t *np)
btree = S2BT(session);
/* Walk the page, counting entries for the repeats array. */
- WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, true) {
+ WT_CELL_FOREACH_BEGIN(session, btree, page->dsk, unpack, true) {
if (__wt_cell_rle(&unpack) > 1)
++*np;
} WT_CELL_FOREACH_END;
@@ -346,7 +346,7 @@ __inmem_col_var(
*/
indx = 0;
cip = page->pg_var;
- WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, true) {
+ WT_CELL_FOREACH_BEGIN(session, btree, page->dsk, unpack, true) {
WT_COL_PTR_SET(cip, WT_PAGE_DISK_OFFSET(page, unpack.cell));
cip++;
@@ -409,7 +409,7 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
refp = pindex->index;
overflow_keys = false;
hint = 0;
- WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, true) {
+ WT_CELL_FOREACH_BEGIN(session, btree, page->dsk, unpack, true) {
ref = *refp;
ref->home = page;
ref->pindex_hint = hint++;
@@ -522,7 +522,7 @@ __inmem_row_leaf_entries(
* single on-page (WT_CELL_VALUE) or overflow (WT_CELL_VALUE_OVFL) item.
*/
nindx = 0;
- WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, true) {
+ WT_CELL_FOREACH_BEGIN(session, btree, dsk, unpack, true) {
switch (unpack.type) {
case WT_CELL_KEY:
case WT_CELL_KEY_OVFL:
@@ -554,7 +554,7 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
/* Walk the page, building indices. */
rip = page->pg_row;
- WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, true) {
+ WT_CELL_FOREACH_BEGIN(session, btree, page->dsk, unpack, true) {
switch (unpack.type) {
case WT_CELL_KEY_OVFL:
__wt_row_leaf_key_set_cell(page, rip, unpack.cell);
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index 413e94377d3..c0933d4c4f8 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -188,9 +188,9 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
session, &las_value, &upd, &incr, upd_type));
total_incr += incr;
upd->txnid = las_txnid;
- upd->timestamp = las_timestamp;
+ upd->start_ts = las_timestamp;
+ upd->durable_ts = durable_timestamp;
upd->prepare_state = prepare_state;
- upd->durable_timestamp = durable_timestamp;
switch (page->type) {
case WT_PAGE_COL_FIX:
@@ -478,7 +478,7 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
* only lookaside entries, and a subsequent search or insert is forcing
* re-creation of the name space.
*/
- __wt_ref_info(ref, &addr, &addr_size, NULL);
+ __wt_ref_info(session, ref, &addr, &addr_size, NULL);
if (addr == NULL) {
WT_ASSERT(session, previous_state != WT_REF_DISK);
diff --git a/src/third_party/wiredtiger/src/btree/bt_rebalance.c b/src/third_party/wiredtiger/src/btree/bt_rebalance.c
index 45dac75a56a..46dc96aedce 100644
--- a/src/third_party/wiredtiger/src/btree/bt_rebalance.c
+++ b/src/third_party/wiredtiger/src/btree/bt_rebalance.c
@@ -215,7 +215,7 @@ __rebalance_col_walk(
* location cookie pairs. Keys are on-page/overflow items and location
* cookies are WT_CELL_ADDR_XXX items.
*/
- WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, true) {
+ WT_CELL_FOREACH_BEGIN(session, btree, dsk, unpack, true) {
switch (unpack.type) {
case WT_CELL_ADDR_INT:
/* An internal page: read it and recursively walk it. */
@@ -301,7 +301,7 @@ __rebalance_row_walk(
* cookies are WT_CELL_ADDR_XXX items.
*/
first_cell = true;
- WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, true) {
+ WT_CELL_FOREACH_BEGIN(session, btree, dsk, unpack, true) {
switch (unpack.type) {
case WT_CELL_KEY:
key = unpack;
diff --git a/src/third_party/wiredtiger/src/btree/bt_ret.c b/src/third_party/wiredtiger/src/btree/bt_ret.c
index b25a5932284..61351c26e36 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ret.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ret.c
@@ -101,7 +101,7 @@ __value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
return (0);
/* Take the value from the original page cell. */
- __wt_row_leaf_value_cell(page, rip, NULL, &unpack);
+ __wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
return (__wt_page_cell_data_ref(
session, page, &unpack, &cursor->value));
@@ -110,7 +110,7 @@ __value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
if (page->type == WT_PAGE_COL_VAR) {
/* Take the value from the original page cell. */
cell = WT_COL_PTR(page, &page->pg_var[cbt->slot]);
- __wt_cell_unpack(page, cell, &unpack);
+ __wt_cell_unpack(session, page, cell, &unpack);
return (__wt_page_cell_data_ref(
session, page, &unpack, &cursor->value));
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c
index e98835c2eb3..a03cfb6405d 100644
--- a/src/third_party/wiredtiger/src/btree/bt_slvg.c
+++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c
@@ -571,7 +571,7 @@ __slvg_trk_leaf(WT_SESSION_IMPL *session,
* the page.
*/
stop_recno = dsk->recno;
- WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, true) {
+ WT_CELL_FOREACH_BEGIN(session, btree, dsk, unpack, true) {
stop_recno += __wt_cell_rle(&unpack);
} WT_CELL_FOREACH_END;
@@ -687,7 +687,7 @@ __slvg_trk_leaf_walk(
/* Determine page min/max timestamps, count page overflow items. */
ovfl_cnt = 0;
- WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, true) {
+ WT_CELL_FOREACH_BEGIN(session, btree, dsk, unpack, true) {
if (unpack.ovfl)
++ovfl_cnt;
__slvg_trk_leaf_ts(trk, &unpack);
@@ -703,7 +703,7 @@ __slvg_trk_leaf_walk(
trk->trk_ovfl_cnt = ovfl_cnt;
ovfl_cnt = 0;
- WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, true) {
+ WT_CELL_FOREACH_BEGIN(session, btree, dsk, unpack, true) {
if (unpack.ovfl) {
WT_RET(__wt_memdup(session, unpack.data,
unpack.size, &trk->trk_ovfl_addr[ovfl_cnt].addr));
@@ -1390,7 +1390,7 @@ __slvg_col_ovfl(WT_SESSION_IMPL *session, WT_TRACK *trk,
WT_COL_FOREACH(page, cip, i) {
cell = WT_COL_PTR(page, cip);
- __wt_cell_unpack(page, cell, &unpack);
+ __wt_cell_unpack(session, page, cell, &unpack);
recno += __wt_cell_rle(&unpack);
/*
@@ -2127,10 +2127,10 @@ __slvg_row_ovfl(WT_SESSION_IMPL *session,
(void)__wt_row_leaf_key_info(
page, copy, NULL, &cell, NULL, NULL);
if (cell != NULL) {
- __wt_cell_unpack(page, cell, &unpack);
+ __wt_cell_unpack(session, page, cell, &unpack);
WT_RET(__slvg_row_ovfl_single(session, trk, &unpack));
}
- __wt_row_leaf_value_cell(page, rip, NULL, &unpack);
+ __wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
WT_RET(__slvg_row_ovfl_single(session, trk, &unpack));
}
return (0);
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index 62d96d79ba8..f0407ce71b1 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -185,7 +185,7 @@ __split_ovfl_key_cleanup(WT_SESSION_IMPL *session, WT_PAGE *page, WT_REF *ref)
ikey->cell_offset = 0;
cell = WT_PAGE_REF_OFFSET(page, cell_offset);
- __wt_cell_unpack(page, cell, &kpack);
+ __wt_cell_unpack(session, page, cell, &kpack);
if (kpack.ovfl && kpack.raw != WT_CELL_KEY_OVFL_RM)
WT_RET(__wt_ovfl_discard(session, page, cell));
@@ -260,7 +260,8 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
*/
WT_ORDERED_READ(ref_addr, ref->addr);
if (ref_addr != NULL && !__wt_off_page(from_home, ref_addr)) {
- __wt_cell_unpack(from_home, (WT_CELL *)ref_addr, &unpack);
+ __wt_cell_unpack(
+ session, from_home, (WT_CELL *)ref_addr, &unpack);
WT_RET(__wt_calloc_one(session, &addr));
addr->oldest_start_ts = unpack.oldest_start_ts;
addr->newest_start_ts = unpack.newest_start_ts;
diff --git a/src/third_party/wiredtiger/src/btree/bt_stat.c b/src/third_party/wiredtiger/src/btree/bt_stat.c
index fd73a84da5d..c201d9af73a 100644
--- a/src/third_party/wiredtiger/src/btree/bt_stat.c
+++ b/src/third_party/wiredtiger/src/btree/bt_stat.c
@@ -165,7 +165,7 @@ __stat_page_col_var(
++deleted_cnt;
} else {
orig_deleted = false;
- __wt_cell_unpack(page, cell, unpack);
+ __wt_cell_unpack(session, page, cell, unpack);
if (unpack->type == WT_CELL_DEL)
orig_deleted = true;
else {
@@ -244,7 +244,8 @@ __stat_page_row_int(
* a reference to the original cell.
*/
if (page->dsk != NULL)
- WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, false) {
+ WT_CELL_FOREACH_BEGIN(
+ session, btree, page->dsk, unpack, false) {
if (__wt_cell_type(unpack.cell) == WT_CELL_KEY_OVFL)
++ovfl_cnt;
} WT_CELL_FOREACH_END;
@@ -293,7 +294,8 @@ __stat_page_row_leaf(
upd->type != WT_UPDATE_TOMBSTONE))
++entry_cnt;
if (upd == NULL) {
- __wt_row_leaf_value_cell(page, rip, NULL, &unpack);
+ __wt_row_leaf_value_cell(
+ session, page, rip, NULL, &unpack);
if (unpack.type == WT_CELL_VALUE_OVFL)
++ovfl_cnt;
}
@@ -316,7 +318,8 @@ __stat_page_row_leaf(
*/
if (page->dsk != NULL) {
key = false;
- WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, false) {
+ WT_CELL_FOREACH_BEGIN(
+ session, btree, page->dsk, unpack, false) {
switch (__wt_cell_type(unpack.cell)) {
case WT_CELL_KEY_OVFL:
++ovfl_cnt;
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index c238669efd4..e47d210cc93 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -236,9 +236,9 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[])
* includes it.
*/
memset(&addr_unpack, 0, sizeof(addr_unpack));
- addr_unpack.oldest_start_ts =
- addr_unpack.newest_start_ts = WT_TS_NONE;
- addr_unpack.newest_stop_ts = WT_TS_MAX;
+ addr_unpack.oldest_start_ts = WT_TS_NONE;
+ addr_unpack.newest_start_ts =
+ addr_unpack.newest_stop_ts = WT_TS_MAX;
addr_unpack.raw = WT_CELL_ADDR_INT;
/* Verify the tree. */
@@ -326,6 +326,11 @@ static int
__verify_addr_ts(WT_SESSION_IMPL *session,
WT_REF *ref, WT_CELL_UNPACK *unpack, WT_VSTUFF *vs)
{
+ if (unpack->newest_stop_ts == WT_TS_NONE)
+ WT_RET_MSG(session, WT_ERROR,
+ "internal page reference at %s has a newest stop "
+ "timestamp of 0",
+ __wt_page_addr_string(session, ref, vs->tmp1));
if (unpack->oldest_start_ts > unpack->newest_start_ts)
WT_RET_MSG(session, WT_ERROR,
"internal page reference at %s has an oldest start "
@@ -447,7 +452,7 @@ recno_chk: if (recno != vs->record_total + 1)
if ((cell = WT_COL_PTR(page, cip)) == NULL)
++recno;
else {
- __wt_cell_unpack(page, cell, unpack);
+ __wt_cell_unpack(session, page, cell, unpack);
recno += __wt_cell_rle(unpack);
}
vs->record_total += recno;
@@ -534,7 +539,7 @@ celltype_err: WT_RET_MSG(session, WT_ERROR,
/* Unpack the address block and check timestamps */
__wt_cell_unpack(
- child_ref->home, child_ref->addr, unpack);
+ session, child_ref->home, child_ref->addr, unpack);
WT_RET(__verify_addr_ts(
session, child_ref, unpack, vs));
@@ -569,7 +574,7 @@ celltype_err: WT_RET_MSG(session, WT_ERROR,
/* Unpack the address block and check timestamps */
__wt_cell_unpack(
- child_ref->home, child_ref->addr, unpack);
+ session, child_ref->home, child_ref->addr, unpack);
WT_RET(__verify_addr_ts(
session, child_ref, unpack, vs));
@@ -810,7 +815,7 @@ __verify_page_cell(WT_SESSION_IMPL *session,
/* Walk the page, tracking timestamps and verifying overflow pages. */
cell_num = 0;
- WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, false) {
+ WT_CELL_FOREACH_BEGIN(session, btree, dsk, unpack, false) {
++cell_num;
switch (unpack.type) {
case WT_CELL_KEY_OVFL:
@@ -839,20 +844,29 @@ __verify_page_cell(WT_SESSION_IMPL *session,
case WT_CELL_ADDR_INT:
case WT_CELL_ADDR_LEAF:
case WT_CELL_ADDR_LEAF_NO:
+ if (unpack.newest_stop_ts == WT_TS_NONE)
+ WT_RET_MSG(session, WT_ERROR,
+ "cell %" PRIu32 " on page at %s has a "
+ "newest stop timestamp of 0",
+ cell_num - 1,
+ __wt_page_addr_string(
+ session, ref, vs->tmp1));
if (unpack.oldest_start_ts > unpack.newest_start_ts)
WT_RET_MSG(session, WT_ERROR,
- "cell %" PRIu32 " on page at %s has an oldest "
- "start timestamp newer than its newest start "
- "timestamp",
- cell_num - 1,
- __wt_page_addr_string(session, ref, vs->tmp1));
+ "cell %" PRIu32 " on page at %s has an "
+ "oldest start timestamp newer than its "
+ "newest start timestamp",
+ cell_num - 1,
+ __wt_page_addr_string(
+ session, ref, vs->tmp1));
if (unpack.newest_start_ts > unpack.newest_stop_ts)
WT_RET_MSG(session, WT_ERROR,
- "cell %" PRIu32 " on page at %s has a newest "
- "start timestamp newer than its newest stop "
- "timestamp",
- cell_num - 1,
- __wt_page_addr_string(session, ref, vs->tmp1));
+ "cell %" PRIu32 " on page at %s has a "
+ "newest start timestamp newer than its "
+ "newest stop timestamp",
+ cell_num - 1,
+ __wt_page_addr_string(
+ session, ref, vs->tmp1));
WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1,
"oldest start", unpack.oldest_start_ts,
@@ -872,12 +886,21 @@ __verify_page_cell(WT_SESSION_IMPL *session,
case WT_CELL_VALUE_COPY:
case WT_CELL_VALUE_OVFL:
case WT_CELL_VALUE_SHORT:
+ if (unpack.stop_ts == WT_TS_NONE)
+ WT_RET_MSG(session, WT_ERROR,
+ "cell %" PRIu32 " on page at %s has a stop "
+ "timestamp of 0",
+ cell_num - 1,
+ __wt_page_addr_string(
+ session, ref, vs->tmp1));
if (unpack.start_ts > unpack.stop_ts)
WT_RET_MSG(session, WT_ERROR,
- "cell %" PRIu32 " on page at %s has a start "
- "timestamp newer than its stop timestamp ",
- cell_num - 1,
- __wt_page_addr_string(session, ref, vs->tmp1));
+ "cell %" PRIu32 " on page at %s has a "
+ "start timestamp newer than its stop "
+ "timestamp ",
+ cell_num - 1,
+ __wt_page_addr_string(
+ session, ref, vs->tmp1));
WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1,
"start", unpack.start_ts,
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
index e6bd252d67f..5896852c1bf 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
@@ -34,7 +34,6 @@ static int __verify_dsk_row(
#define WT_RET_VRFY(session, ...) do { \
if (!(F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))) \
__wt_errx(session, __VA_ARGS__); \
- F_SET(S2C(session), WT_CONN_DATA_CORRUPTION); \
return (WT_ERROR); \
} while (0)
@@ -279,16 +278,22 @@ __verify_dsk_ts(WT_SESSION_IMPL *session,
case WT_CELL_ADDR_INT:
case WT_CELL_ADDR_LEAF:
case WT_CELL_ADDR_LEAF_NO:
+ if (unpack->newest_stop_ts == WT_TS_NONE)
+ WT_RET_VRFY(session,
+ "cell %" PRIu32 " on page at %s has a newest stop "
+ "timestamp of 0",
+ cell_num - 1, tag);
if (unpack->oldest_start_ts > unpack->newest_start_ts)
WT_RET_VRFY(session,
- "cell %" PRIu32 " on page at %s has an oldest start "
- "timestamp newer than its newest start timestamp",
- cell_num - 1, tag);
+ "cell %" PRIu32 " on page at %s has an oldest "
+ "start timestamp newer than its newest start "
+ "timestamp",
+ cell_num - 1, tag);
if (unpack->newest_start_ts > unpack->newest_stop_ts)
WT_RET_VRFY(session,
- "cell %" PRIu32 " on page at %s has a newest start "
- "timestamp newer than its newest stop timestamp",
- cell_num - 1, tag);
+ "cell %" PRIu32 " on page at %s has a newest start "
+ "timestamp newer than its newest stop timestamp",
+ cell_num - 1, tag);
if (addr == NULL)
break;
@@ -311,11 +316,16 @@ __verify_dsk_ts(WT_SESSION_IMPL *session,
case WT_CELL_VALUE_OVFL:
case WT_CELL_VALUE_OVFL_RM:
case WT_CELL_VALUE_SHORT:
+ if (unpack->stop_ts == WT_TS_NONE)
+ WT_RET_VRFY(session,
+ "cell %" PRIu32 " on page at %s has a stop "
+ "timestamp of 0",
+ cell_num - 1, tag);
if (unpack->start_ts > unpack->stop_ts)
WT_RET_VRFY(session,
- "cell %" PRIu32 " on page at %s has a start timestamp "
- "newer than its stop timestamp ",
- cell_num - 1, tag);
+ "cell %" PRIu32 " on page at %s has a start "
+ "timestamp newer than its stop timestamp ",
+ cell_num - 1, tag);
if (addr == NULL)
break;
@@ -384,7 +394,8 @@ __verify_dsk_row(WT_SESSION_IMPL *session,
++cell_num;
/* Carefully unpack the cell. */
- if (__wt_cell_unpack_safe(dsk, cell, unpack, end) != 0) {
+ if (__wt_cell_unpack_safe(
+ session, dsk, cell, unpack, end) != 0) {
ret = __err_cell_corrupt(session, cell_num, tag);
goto err;
}
@@ -660,7 +671,7 @@ __verify_dsk_col_int(WT_SESSION_IMPL *session,
++cell_num;
/* Carefully unpack the cell. */
- if (__wt_cell_unpack_safe(dsk, cell, unpack, end) != 0)
+ if (__wt_cell_unpack_safe(session, dsk, cell, unpack, end) != 0)
return (__err_cell_corrupt(session, cell_num, tag));
/* Check the raw and collapsed cell types. */
@@ -709,32 +720,36 @@ static int
__verify_dsk_col_var(WT_SESSION_IMPL *session,
const char *tag, const WT_PAGE_HEADER *dsk, WT_ADDR *addr)
{
+ struct {
+ const void *data;
+ size_t size;
+ wt_timestamp_t start_ts, stop_ts;
+ bool deleted;
+ } last;
WT_BM *bm;
WT_BTREE *btree;
WT_CELL *cell;
WT_CELL_UNPACK *unpack, _unpack;
WT_DECL_RET;
- size_t last_size;
uint32_t cell_num, cell_type, i;
uint8_t *end;
- const uint8_t *last_data;
- bool last_deleted;
btree = S2BT(session);
bm = btree->bm;
unpack = &_unpack;
end = (uint8_t *)dsk + dsk->mem_size;
- last_data = NULL;
- last_size = 0;
- last_deleted = false;
+ last.data = NULL;
+ last.size = 0;
+ last.start_ts = last.stop_ts = WT_TS_NONE;
+ last.deleted = false;
cell_num = 0;
WT_CELL_FOREACH_VRFY(btree, dsk, cell, unpack, i) {
++cell_num;
/* Carefully unpack the cell. */
- if (__wt_cell_unpack_safe(dsk, cell, unpack, end) != 0)
+ if (__wt_cell_unpack_safe(session, dsk, cell, unpack, end) != 0)
return (__err_cell_corrupt(session, cell_num, tag));
/* Check the raw and collapsed cell types. */
@@ -762,33 +777,38 @@ __verify_dsk_col_var(WT_SESSION_IMPL *session,
* a chance for RLE encoding. We don't have to care about data
* encoding or anything else, a byte comparison is enough.
*/
- if (last_deleted) {
+ if (unpack->start_ts != last.start_ts ||
+ unpack->stop_ts != last.stop_ts)
+ ;
+ else if (last.deleted) {
if (cell_type == WT_CELL_DEL)
goto match_err;
} else
if (cell_type == WT_CELL_VALUE &&
- last_data != NULL &&
- last_size == unpack->size &&
- memcmp(last_data, unpack->data, last_size) == 0)
+ last.data != NULL &&
+ last.size == unpack->size &&
+ memcmp(last.data, unpack->data, last.size) == 0)
match_err: WT_RET_VRFY(session,
"data entries %" PRIu32 " and %" PRIu32
" on page at %s are identical and should "
"have been run-length encoded",
cell_num - 1, cell_num, tag);
+ last.start_ts = unpack->start_ts;
+ last.stop_ts = unpack->stop_ts;
switch (cell_type) {
case WT_CELL_DEL:
- last_deleted = true;
- last_data = NULL;
+ last.data = NULL;
+ last.deleted = true;
break;
case WT_CELL_VALUE_OVFL:
- last_deleted = false;
- last_data = NULL;
+ last.data = NULL;
+ last.deleted = false;
break;
case WT_CELL_VALUE:
- last_deleted = false;
- last_data = unpack->data;
- last_size = unpack->size;
+ last.data = unpack->data;
+ last.size = unpack->size;
+ last.deleted = false;
break;
}
}
@@ -863,7 +883,6 @@ static int
__err_cell_corrupt(
WT_SESSION_IMPL *session, uint32_t entry_num, const char *tag)
{
- F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
WT_RET_VRFY(session,
"item %" PRIu32 " on page at %s is a corrupted cell",
entry_num, tag);
diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c
index acf6643bcc5..a7cb433b56a 100644
--- a/src/third_party/wiredtiger/src/btree/bt_walk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_walk.c
@@ -85,7 +85,7 @@ found: WT_ASSERT(session, pindex->index[slot] == ref);
* Check if a reference is for a leaf page.
*/
static inline bool
-__ref_is_leaf(WT_REF *ref)
+__ref_is_leaf(WT_SESSION_IMPL *session, WT_REF *ref)
{
size_t addr_size;
const uint8_t *addr;
@@ -96,7 +96,7 @@ __ref_is_leaf(WT_REF *ref)
* this page is a leaf page or not. If there's no address, the page
* isn't on disk and we don't know the page type.
*/
- __wt_ref_info(ref, &addr, &addr_size, &type);
+ __wt_ref_info(session, ref, &addr, &addr_size, &type);
return (addr == NULL ?
false : type == WT_CELL_ADDR_LEAF || type == WT_CELL_ADDR_LEAF_NO);
}
@@ -650,7 +650,7 @@ __tree_walk_skip_count_callback(
if (ref->state == WT_REF_DELETED &&
__wt_delete_page_skip(session, ref, false))
*skipp = true;
- else if (*skipleafcntp > 0 && __ref_is_leaf(ref)) {
+ else if (*skipleafcntp > 0 && __ref_is_leaf(session, ref)) {
--*skipleafcntp;
*skipp = true;
} else
diff --git a/src/third_party/wiredtiger/src/btree/row_key.c b/src/third_party/wiredtiger/src/btree/row_key.c
index e67c36e6661..38aea173e8c 100644
--- a/src/third_party/wiredtiger/src/btree/row_key.c
+++ b/src/third_party/wiredtiger/src/btree/row_key.c
@@ -262,7 +262,7 @@ switch_and_jump: /* Switching to a forward roll. */
/*
* It must be an on-page cell, unpack it.
*/
- __wt_cell_unpack(page, cell, unpack);
+ __wt_cell_unpack(session, page, cell, unpack);
/* 3: the test for an on-page reference to an overflow key. */
if (unpack->type == WT_CELL_KEY_OVFL) {
@@ -286,7 +286,8 @@ switch_and_jump: /* Switching to a forward roll. */
copy = WT_ROW_KEY_COPY(rip);
if (!__wt_row_leaf_key_info(page, copy,
NULL, &cell, &keyb->data, &keyb->size)) {
- __wt_cell_unpack(page, cell, unpack);
+ __wt_cell_unpack(
+ session, page, cell, unpack);
ret = __wt_dsk_cell_data_ref(session,
WT_PAGE_ROW_LEAF, unpack, keyb);
}
diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c
index cd185cc75cc..a68c706ad95 100644
--- a/src/third_party/wiredtiger/src/cache/cache_las.c
+++ b/src/third_party/wiredtiger/src/cache/cache_las.c
@@ -727,12 +727,12 @@ __wt_las_insert_block(WT_CURSOR *cursor,
upd->type == WT_UPDATE_MODIFY)) {
las_value.size = 0;
cursor->set_value(cursor, upd->txnid,
- upd->timestamp, upd->durable_timestamp,
+ upd->start_ts, upd->durable_ts,
upd->prepare_state, WT_UPDATE_BIRTHMARK,
&las_value);
} else
cursor->set_value(cursor, upd->txnid,
- upd->timestamp, upd->durable_timestamp,
+ upd->start_ts, upd->durable_ts,
upd->prepare_state, upd->type, &las_value);
/*
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index 32939a97c72..90b1dc023ec 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -93,6 +93,12 @@ static const WT_CONFIG_CHECK
};
static const WT_CONFIG_CHECK
+ confchk_wiredtiger_open_io_capacity_subconfigs[] = {
+ { "total", "int", NULL, "min=0,max=1TB", NULL, 0 },
+ { NULL, NULL, NULL, NULL, NULL, 0 }
+};
+
+static const WT_CONFIG_CHECK
confchk_WT_CONNECTION_reconfigure_log_subconfigs[] = {
{ "archive", "boolean", NULL, NULL, NULL, 0 },
{ "os_cache_dirty_pct", "int",
@@ -170,6 +176,9 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
{ "file_manager", "category",
NULL, NULL,
confchk_wiredtiger_open_file_manager_subconfigs, 3 },
+ { "io_capacity", "category",
+ NULL, NULL,
+ confchk_wiredtiger_open_io_capacity_subconfigs, 1 },
{ "log", "category",
NULL, NULL,
confchk_WT_CONNECTION_reconfigure_log_subconfigs, 4 },
@@ -191,9 +200,9 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
NULL, NULL,
confchk_WT_CONNECTION_reconfigure_statistics_log_subconfigs, 5 },
{ "timing_stress_for_test", "list",
- NULL, "choices=[\"checkpoint_slow\",\"lookaside_sweep_race\","
- "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
- "\"split_6\",\"split_7\",\"split_8\"]",
+ NULL, "choices=[\"aggressive_sweep\",\"checkpoint_slow\","
+ "\"lookaside_sweep_race\",\"split_1\",\"split_2\",\"split_3\","
+ "\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]",
NULL, 0 },
{ "verbose", "list",
NULL, "choices=[\"api\",\"block\",\"checkpoint\","
@@ -876,6 +885,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
confchk_wiredtiger_open_file_manager_subconfigs, 3 },
{ "hazard_max", "int", NULL, "min=15", NULL, 0 },
{ "in_memory", "boolean", NULL, NULL, NULL, 0 },
+ { "io_capacity", "category",
+ NULL, NULL,
+ confchk_wiredtiger_open_io_capacity_subconfigs, 1 },
{ "log", "category",
NULL, NULL,
confchk_wiredtiger_open_log_subconfigs, 9 },
@@ -904,9 +916,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
NULL, NULL,
confchk_wiredtiger_open_statistics_log_subconfigs, 6 },
{ "timing_stress_for_test", "list",
- NULL, "choices=[\"checkpoint_slow\",\"lookaside_sweep_race\","
- "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
- "\"split_6\",\"split_7\",\"split_8\"]",
+ NULL, "choices=[\"aggressive_sweep\",\"checkpoint_slow\","
+ "\"lookaside_sweep_race\",\"split_1\",\"split_2\",\"split_3\","
+ "\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]",
NULL, 0 },
{ "transaction_sync", "category",
NULL, NULL,
@@ -982,6 +994,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
confchk_wiredtiger_open_file_manager_subconfigs, 3 },
{ "hazard_max", "int", NULL, "min=15", NULL, 0 },
{ "in_memory", "boolean", NULL, NULL, NULL, 0 },
+ { "io_capacity", "category",
+ NULL, NULL,
+ confchk_wiredtiger_open_io_capacity_subconfigs, 1 },
{ "log", "category",
NULL, NULL,
confchk_wiredtiger_open_log_subconfigs, 9 },
@@ -1010,9 +1025,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
NULL, NULL,
confchk_wiredtiger_open_statistics_log_subconfigs, 6 },
{ "timing_stress_for_test", "list",
- NULL, "choices=[\"checkpoint_slow\",\"lookaside_sweep_race\","
- "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
- "\"split_6\",\"split_7\",\"split_8\"]",
+ NULL, "choices=[\"aggressive_sweep\",\"checkpoint_slow\","
+ "\"lookaside_sweep_race\",\"split_1\",\"split_2\",\"split_3\","
+ "\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]",
NULL, 0 },
{ "transaction_sync", "category",
NULL, NULL,
@@ -1085,6 +1100,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
NULL, NULL,
confchk_wiredtiger_open_file_manager_subconfigs, 3 },
{ "hazard_max", "int", NULL, "min=15", NULL, 0 },
+ { "io_capacity", "category",
+ NULL, NULL,
+ confchk_wiredtiger_open_io_capacity_subconfigs, 1 },
{ "log", "category",
NULL, NULL,
confchk_wiredtiger_open_log_subconfigs, 9 },
@@ -1113,9 +1131,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
NULL, NULL,
confchk_wiredtiger_open_statistics_log_subconfigs, 6 },
{ "timing_stress_for_test", "list",
- NULL, "choices=[\"checkpoint_slow\",\"lookaside_sweep_race\","
- "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
- "\"split_6\",\"split_7\",\"split_8\"]",
+ NULL, "choices=[\"aggressive_sweep\",\"checkpoint_slow\","
+ "\"lookaside_sweep_race\",\"split_1\",\"split_2\",\"split_3\","
+ "\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]",
NULL, 0 },
{ "transaction_sync", "category",
NULL, NULL,
@@ -1186,6 +1204,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
NULL, NULL,
confchk_wiredtiger_open_file_manager_subconfigs, 3 },
{ "hazard_max", "int", NULL, "min=15", NULL, 0 },
+ { "io_capacity", "category",
+ NULL, NULL,
+ confchk_wiredtiger_open_io_capacity_subconfigs, 1 },
{ "log", "category",
NULL, NULL,
confchk_wiredtiger_open_log_subconfigs, 9 },
@@ -1214,9 +1235,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
NULL, NULL,
confchk_wiredtiger_open_statistics_log_subconfigs, 6 },
{ "timing_stress_for_test", "list",
- NULL, "choices=[\"checkpoint_slow\",\"lookaside_sweep_race\","
- "\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
- "\"split_6\",\"split_7\",\"split_8\"]",
+ NULL, "choices=[\"aggressive_sweep\",\"checkpoint_slow\","
+ "\"lookaside_sweep_race\",\"split_1\",\"split_2\",\"split_3\","
+ "\"split_4\",\"split_5\",\"split_6\",\"split_7\",\"split_8\"]",
NULL, 0 },
{ "transaction_sync", "category",
NULL, NULL,
@@ -1294,15 +1315,15 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"eviction_checkpoint_target=1,eviction_dirty_target=5,"
"eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
",file_manager=(close_handle_minimum=250,close_idle_time=30,"
- "close_scan_interval=10),log=(archive=true,os_cache_dirty_pct=0,"
- "prealloc=true,zero_fill=false),lsm_manager=(merge=true,"
- "worker_thread_max=4),lsm_merge=true,"
+ "close_scan_interval=10),io_capacity=(total=0),log=(archive=true,"
+ "os_cache_dirty_pct=0,prealloc=true,zero_fill=false),"
+ "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
"operation_tracking=(enabled=false,path=\".\"),"
"shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
"statistics=none,statistics_log=(json=false,on_close=false,"
"sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"timing_stress_for_test=,verbose=",
- confchk_WT_CONNECTION_reconfigure, 23
+ confchk_WT_CONNECTION_reconfigure, 24
},
{ "WT_CONNECTION.rollback_to_stable",
"",
@@ -1548,19 +1569,20 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"eviction_target=80,eviction_trigger=95,exclusive=false,"
"extensions=,file_extend=,file_manager=(close_handle_minimum=250,"
"close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
- "in_memory=false,log=(archive=true,compressor=,enabled=false,"
- "file_max=100MB,os_cache_dirty_pct=0,path=\".\",prealloc=true,"
- "recover=on,zero_fill=false),lsm_manager=(merge=true,"
- "worker_thread_max=4),lsm_merge=true,mmap=true,multiprocess=false"
- ",operation_tracking=(enabled=false,path=\".\"),readonly=false,"
- "salvage=false,session_max=100,session_scratch_max=2MB,"
- "session_table_cache=true,shared_cache=(chunk=10MB,name=,quota=0,"
- "reserve=0,size=500MB),statistics=none,statistics_log=(json=false"
- ",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\""
- ",wait=0),timing_stress_for_test=,transaction_sync=(enabled=false"
- ",method=fsync),use_environment=true,use_environment_priv=false,"
+ "in_memory=false,io_capacity=(total=0),log=(archive=true,"
+ "compressor=,enabled=false,file_max=100MB,os_cache_dirty_pct=0,"
+ "path=\".\",prealloc=true,recover=on,zero_fill=false),"
+ "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
+ "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
+ "path=\".\"),readonly=false,salvage=false,session_max=100,"
+ "session_scratch_max=2MB,session_table_cache=true,"
+ "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
+ "statistics=none,statistics_log=(json=false,on_close=false,"
+ "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
+ "timing_stress_for_test=,transaction_sync=(enabled=false,"
+ "method=fsync),use_environment=true,use_environment_priv=false,"
"verbose=,write_through=",
- confchk_wiredtiger_open, 47
+ confchk_wiredtiger_open, 48
},
{ "wiredtiger_open_all",
"async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
@@ -1575,19 +1597,20 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"eviction_target=80,eviction_trigger=95,exclusive=false,"
"extensions=,file_extend=,file_manager=(close_handle_minimum=250,"
"close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
- "in_memory=false,log=(archive=true,compressor=,enabled=false,"
- "file_max=100MB,os_cache_dirty_pct=0,path=\".\",prealloc=true,"
- "recover=on,zero_fill=false),lsm_manager=(merge=true,"
- "worker_thread_max=4),lsm_merge=true,mmap=true,multiprocess=false"
- ",operation_tracking=(enabled=false,path=\".\"),readonly=false,"
- "salvage=false,session_max=100,session_scratch_max=2MB,"
- "session_table_cache=true,shared_cache=(chunk=10MB,name=,quota=0,"
- "reserve=0,size=500MB),statistics=none,statistics_log=(json=false"
- ",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\""
- ",wait=0),timing_stress_for_test=,transaction_sync=(enabled=false"
- ",method=fsync),use_environment=true,use_environment_priv=false,"
+ "in_memory=false,io_capacity=(total=0),log=(archive=true,"
+ "compressor=,enabled=false,file_max=100MB,os_cache_dirty_pct=0,"
+ "path=\".\",prealloc=true,recover=on,zero_fill=false),"
+ "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
+ "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
+ "path=\".\"),readonly=false,salvage=false,session_max=100,"
+ "session_scratch_max=2MB,session_table_cache=true,"
+ "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
+ "statistics=none,statistics_log=(json=false,on_close=false,"
+ "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
+ "timing_stress_for_test=,transaction_sync=(enabled=false,"
+ "method=fsync),use_environment=true,use_environment_priv=false,"
"verbose=,version=(major=0,minor=0),write_through=",
- confchk_wiredtiger_open_all, 48
+ confchk_wiredtiger_open_all, 49
},
{ "wiredtiger_open_basecfg",
"async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
@@ -1601,18 +1624,19 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
",extensions=,file_extend=,file_manager=(close_handle_minimum=250"
",close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
- "log=(archive=true,compressor=,enabled=false,file_max=100MB,"
- "os_cache_dirty_pct=0,path=\".\",prealloc=true,recover=on,"
- "zero_fill=false),lsm_manager=(merge=true,worker_thread_max=4),"
- "lsm_merge=true,mmap=true,multiprocess=false,"
- "operation_tracking=(enabled=false,path=\".\"),readonly=false,"
- "salvage=false,session_max=100,session_scratch_max=2MB,"
- "session_table_cache=true,shared_cache=(chunk=10MB,name=,quota=0,"
- "reserve=0,size=500MB),statistics=none,statistics_log=(json=false"
- ",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\""
- ",wait=0),timing_stress_for_test=,transaction_sync=(enabled=false"
- ",method=fsync),verbose=,version=(major=0,minor=0),write_through=",
- confchk_wiredtiger_open_basecfg, 42
+ "io_capacity=(total=0),log=(archive=true,compressor=,"
+ "enabled=false,file_max=100MB,os_cache_dirty_pct=0,path=\".\","
+ "prealloc=true,recover=on,zero_fill=false),"
+ "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
+ "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
+ "path=\".\"),readonly=false,salvage=false,session_max=100,"
+ "session_scratch_max=2MB,session_table_cache=true,"
+ "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
+ "statistics=none,statistics_log=(json=false,on_close=false,"
+ "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
+ "timing_stress_for_test=,transaction_sync=(enabled=false,"
+ "method=fsync),verbose=,version=(major=0,minor=0),write_through=",
+ confchk_wiredtiger_open_basecfg, 43
},
{ "wiredtiger_open_usercfg",
"async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
@@ -1626,18 +1650,19 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
",extensions=,file_extend=,file_manager=(close_handle_minimum=250"
",close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
- "log=(archive=true,compressor=,enabled=false,file_max=100MB,"
- "os_cache_dirty_pct=0,path=\".\",prealloc=true,recover=on,"
- "zero_fill=false),lsm_manager=(merge=true,worker_thread_max=4),"
- "lsm_merge=true,mmap=true,multiprocess=false,"
- "operation_tracking=(enabled=false,path=\".\"),readonly=false,"
- "salvage=false,session_max=100,session_scratch_max=2MB,"
- "session_table_cache=true,shared_cache=(chunk=10MB,name=,quota=0,"
- "reserve=0,size=500MB),statistics=none,statistics_log=(json=false"
- ",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\""
- ",wait=0),timing_stress_for_test=,transaction_sync=(enabled=false"
- ",method=fsync),verbose=,write_through=",
- confchk_wiredtiger_open_usercfg, 41
+ "io_capacity=(total=0),log=(archive=true,compressor=,"
+ "enabled=false,file_max=100MB,os_cache_dirty_pct=0,path=\".\","
+ "prealloc=true,recover=on,zero_fill=false),"
+ "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
+ "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
+ "path=\".\"),readonly=false,salvage=false,session_max=100,"
+ "session_scratch_max=2MB,session_table_cache=true,"
+ "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
+ "statistics=none,statistics_log=(json=false,on_close=false,"
+ "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
+ "timing_stress_for_test=,transaction_sync=(enabled=false,"
+ "method=fsync),verbose=,write_through=",
+ confchk_wiredtiger_open_usercfg, 42
},
{ NULL, NULL, NULL, 0 }
};
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index 43d2ee47afd..0630bdb3711 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -2019,6 +2019,7 @@ __wt_timing_stress_config(WT_SESSION_IMPL *session, const char *cfg[])
* conditions aren't encountered.
*/
static const WT_NAME_FLAG stress_types[] = {
+ { "aggressive_sweep", WT_TIMING_STRESS_AGGRESSIVE_SWEEP },
{ "checkpoint_slow", WT_TIMING_STRESS_CHECKPOINT_SLOW },
{ "lookaside_sweep_race",WT_TIMING_STRESS_LOOKASIDE_SWEEP },
{ "split_1", WT_TIMING_STRESS_SPLIT_1 },
@@ -2138,6 +2139,7 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[])
"config_base=,"
"create=,"
"encryption=(secretkey=),"
+ "error_prefix=,"
"exclusive=,"
"in_memory=,"
"log=(recover=),"
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
index ed01390955b..0e15841c59a 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
@@ -52,10 +52,10 @@ __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg)
WT_DECL_RET;
uint64_t chunk, quota, reserve, size, used_cache;
char *pool_name;
- bool created, updating;
+ bool cp_locked, created, updating;
conn = S2C(session);
- created = updating = false;
+ cp_locked = created = updating = false;
pool_name = NULL;
cp = NULL;
@@ -117,7 +117,16 @@ __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg)
"Attempting to join a cache pool that does not exist: %s",
pool_name);
+ /*
+ * At this point we have a cache pool to use. We need to take its
+ * lock. We need to drop the process lock first to avoid deadlock
+ * and acquire in the proper order.
+ */
+ __wt_spin_unlock(session, &__wt_process.spinlock);
cp = __wt_process.cache_pool;
+ __wt_spin_lock(session, &cp->cache_pool_lock);
+ cp_locked = true;
+ __wt_spin_lock(session, &__wt_process.spinlock);
/*
* The cache pool requires a reference count to avoid a race between
@@ -209,6 +218,8 @@ __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg)
conn->cache->cp_reserved = reserve;
conn->cache->cp_quota = quota;
+ __wt_spin_unlock(session, &cp->cache_pool_lock);
+ cp_locked = false;
/* Wake up the cache pool server so any changes are noticed. */
if (updating)
@@ -221,6 +232,8 @@ __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg)
F_SET(conn, WT_CONN_CACHE_POOL);
err: __wt_spin_unlock(session, &__wt_process.spinlock);
+ if (cp_locked)
+ __wt_spin_unlock(session, &cp->cache_pool_lock);
__wt_free(session, pool_name);
if (ret != 0 && created) {
__wt_free(session, cp->name);
diff --git a/src/third_party/wiredtiger/src/conn/conn_capacity.c b/src/third_party/wiredtiger/src/conn/conn_capacity.c
new file mode 100644
index 00000000000..0dd6a8c3c6d
--- /dev/null
+++ b/src/third_party/wiredtiger/src/conn/conn_capacity.c
@@ -0,0 +1,474 @@
+/*
+ * Copyright (c) 2014-2019 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * Compute the time in nanoseconds that must be reserved to represent
+ * a number of bytes in a subsystem with a particular capacity per second.
+ */
+#define WT_RESERVATION_NS(bytes, capacity) \
+ (((bytes) * WT_BILLION) / (capacity))
+
+/*
+ * The fraction of a second's worth of capacity that will be stolen at a
+ * time. The number of bytes this represents may be different for different
+ * subsystems, since each subsystem has its own capacity per second.
+ */
+#define WT_STEAL_FRACTION(x) ((x) / 16)
+
+/*
+ * __capacity_config --
+ * Set I/O capacity configuration.
+ */
+static int
+__capacity_config(WT_SESSION_IMPL *session, const char *cfg[])
+{
+ WT_CAPACITY *cap;
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ uint64_t total;
+
+ conn = S2C(session);
+
+ WT_RET(__wt_config_gets(session, cfg, "io_capacity.total", &cval));
+ if (cval.val != 0 && cval.val < WT_THROTTLE_MIN)
+ WT_RET_MSG(session, EINVAL,
+ "total I/O capacity value %" PRId64 " below minimum %d",
+ cval.val, WT_THROTTLE_MIN);
+
+ cap = &conn->capacity;
+ cap->total = total = (uint64_t)cval.val;
+ if (cval.val != 0) {
+ /*
+ * We've been given a total capacity, set the
+ * capacity of all the subsystems.
+ */
+ cap->ckpt = WT_CAPACITY_SYS(total, WT_CAP_CKPT);
+ cap->evict = WT_CAPACITY_SYS(total, WT_CAP_EVICT);
+ cap->log = WT_CAPACITY_SYS(total, WT_CAP_LOG);
+ cap->read = WT_CAPACITY_SYS(total, WT_CAP_READ);
+
+ /*
+ * Set the threshold to the percent of our capacity to
+ * periodically asynchronously flush what we've written.
+ */
+ cap->threshold = ((cap->ckpt + cap->evict + cap->log) /
+ 100) * WT_CAPACITY_PCT;
+ if (cap->threshold < WT_CAPACITY_MIN_THRESHOLD)
+ cap->threshold = WT_CAPACITY_MIN_THRESHOLD;
+ WT_STAT_CONN_SET(session, capacity_threshold, cap->threshold);
+ } else
+ WT_STAT_CONN_SET(session, capacity_threshold, 0);
+
+ return (0);
+}
+
+/*
+ * __capacity_server_run_chk --
+ * Check to decide if the capacity server should continue running.
+ */
+static bool
+__capacity_server_run_chk(WT_SESSION_IMPL *session)
+{
+ return (F_ISSET(S2C(session), WT_CONN_SERVER_CAPACITY));
+}
+
+/*
+ * __capacity_server --
+ * The capacity server thread.
+ */
+static WT_THREAD_RET
+__capacity_server(void *arg)
+{
+ WT_CAPACITY *cap;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ uint64_t start, stop, time_ms;
+
+ session = arg;
+ conn = S2C(session);
+ cap = &conn->capacity;
+ for (;;) {
+ /*
+ * Wait until signalled but check once per second in case
+ * the signal was missed.
+ */
+ __wt_cond_wait(session,
+ conn->capacity_cond, WT_MILLION, __capacity_server_run_chk);
+
+ /* Check if we're quitting or being reconfigured. */
+ if (!__capacity_server_run_chk(session))
+ break;
+
+ cap->signalled = false;
+ if (cap->written < cap->threshold)
+ continue;
+
+ start = __wt_clock(session);
+ WT_ERR(__wt_fsync_background(session));
+ stop = __wt_clock(session);
+ time_ms = WT_CLOCKDIFF_MS(stop, start);
+ WT_STAT_CONN_SET(session, fsync_all_time, time_ms);
+ cap->written = 0;
+ }
+
+ if (0) {
+err: WT_PANIC_MSG(session, ret, "capacity server error");
+ }
+ return (WT_THREAD_RET_VALUE);
+}
+
+/*
+ * __capacity_server_start --
+ * Start the capacity server thread.
+ */
+static int
+__capacity_server_start(WT_CONNECTION_IMPL *conn)
+{
+ WT_SESSION_IMPL *session;
+
+ F_SET(conn, WT_CONN_SERVER_CAPACITY);
+
+ /*
+ * The capacity server gets its own session.
+ */
+ WT_RET(__wt_open_internal_session(conn,
+ "capacity-server", false, 0, &conn->capacity_session));
+ session = conn->capacity_session;
+
+ WT_RET(__wt_cond_alloc(session,
+ "capacity server", &conn->capacity_cond));
+
+ /*
+ * Start the thread.
+ */
+ WT_RET(__wt_thread_create(
+ session, &conn->capacity_tid, __capacity_server, session));
+ conn->capacity_tid_set = true;
+
+ return (0);
+}
+
+/*
+ * __wt_capacity_server_create --
+ * Configure and start the capacity server.
+ */
+int
+__wt_capacity_server_create(WT_SESSION_IMPL *session, const char *cfg[])
+{
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+
+ /*
+ * Stop any server that is already running. This means that each time
+ * reconfigure is called we'll bounce the server even if there are no
+ * configuration changes. This makes our life easier as the underlying
+ * configuration routine doesn't have to worry about freeing objects
+ * in the connection structure (it's guaranteed to always start with a
+ * blank slate), and we don't have to worry about races where a running
+ * server is reading configuration information that we're updating, and
+ * it's not expected that reconfiguration will happen a lot.
+ */
+ if (conn->capacity_session != NULL)
+ WT_RET(__wt_capacity_server_destroy(session));
+ WT_RET(__capacity_config(session, cfg));
+
+ /*
+ * If it is a read only connection or if background fsync is not
+ * supported, then there is nothing to do.
+ */
+ if (F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY) ||
+ !__wt_fsync_background_chk(session))
+ return (0);
+
+ if (conn->capacity.total != 0)
+ WT_RET(__capacity_server_start(conn));
+
+ return (0);
+}
+
+/*
+ * __wt_capacity_server_destroy --
+ * Destroy the capacity server thread.
+ */
+int
+__wt_capacity_server_destroy(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+
+ conn = S2C(session);
+
+ F_CLR(conn, WT_CONN_SERVER_CAPACITY);
+ if (conn->capacity_tid_set) {
+ __wt_cond_signal(session, conn->capacity_cond);
+ WT_TRET(__wt_thread_join(session, &conn->capacity_tid));
+ conn->capacity_tid_set = false;
+ }
+ __wt_cond_destroy(session, &conn->capacity_cond);
+
+ /* Close the server thread's session. */
+ if (conn->capacity_session != NULL) {
+ wt_session = &conn->capacity_session->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ }
+
+ /*
+ * Ensure capacity settings are cleared - so that reconfigure doesn't
+ * get confused.
+ */
+ conn->capacity_session = NULL;
+ conn->capacity_tid_set = false;
+ conn->capacity_cond = NULL;
+
+ return (ret);
+}
+
+/*
+ * __capacity_signal --
+ * Signal the capacity thread if sufficient data has been written.
+ */
+static void
+__capacity_signal(WT_SESSION_IMPL *session)
+{
+ WT_CAPACITY *cap;
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+ cap = &conn->capacity;
+ if (cap->written >= cap->threshold && !cap->signalled) {
+ __wt_cond_signal(session, conn->capacity_cond);
+ cap->signalled = true;
+ }
+}
+
+/*
+ * __capacity_reserve --
+ * Make a reservation for the given number of bytes against
+ * the capacity of the subsystem.
+ */
+static void
+__capacity_reserve(uint64_t *reservation, uint64_t bytes, uint64_t capacity,
+ uint64_t now_ns, uint64_t *result)
+{
+ uint64_t res_len, res_value;
+
+ if (capacity != 0) {
+ res_len = WT_RESERVATION_NS(bytes, capacity);
+ res_value = __wt_atomic_add64(reservation, res_len);
+ if (now_ns > res_value && now_ns - res_value > WT_BILLION)
+ /*
+ * If the reservation clock is out of date, bring it
+ * to within a second of a current time.
+ */
+ (void)__wt_atomic_store64(reservation,
+ (now_ns - WT_BILLION) + res_len);
+ } else
+ res_value = now_ns;
+
+ *result = res_value;
+}
+
+/*
+ * __wt_capacity_throttle --
+ * Reserve a time to perform a write operation for the subsystem,
+ * and wait until that time.
+ *
+ * The concept is that each write to a subsystem reserves a time slot
+ * to do its write, and atomically adjusts the reservation marker to
+ * point past the reserved slot. The size of the adjustment (i.e. the
+ * length of time represented by the slot in nanoseconds) is chosen to
+ * be proportional to the number of bytes to be written, and the
+ * proportion is a simple calculation so that we can fit reservations for
+ * exactly the configured capacity in a second. Reservation times are
+ * in nanoseconds since the epoch.
+ */
+void
+__wt_capacity_throttle(WT_SESSION_IMPL *session, uint64_t bytes,
+ WT_THROTTLE_TYPE type)
+{
+ struct timespec now;
+ WT_CAPACITY *cap;
+ WT_CONNECTION_IMPL *conn;
+ uint64_t best_res, capacity, new_res, now_ns, sleep_us, res_total_value;
+ uint64_t res_value, steal_capacity, stolen_bytes, this_res;
+ uint64_t *reservation, *steal;
+ uint64_t total_capacity;
+
+ conn = S2C(session);
+ cap = &conn->capacity;
+ /* If not using capacity there's nothing to do. */
+ if (cap->total == 0)
+ return;
+
+ capacity = steal_capacity = 0;
+ reservation = steal = NULL;
+ switch (type) {
+ case WT_THROTTLE_CKPT:
+ capacity = cap->ckpt;
+ reservation = &cap->reservation_ckpt;
+ WT_STAT_CONN_INCRV(session, capacity_bytes_ckpt, bytes);
+ break;
+ case WT_THROTTLE_EVICT:
+ capacity = cap->evict;
+ reservation = &cap->reservation_evict;
+ WT_STAT_CONN_INCRV(session, capacity_bytes_evict, bytes);
+ break;
+ case WT_THROTTLE_LOG:
+ capacity = cap->log;
+ reservation = &cap->reservation_log;
+ WT_STAT_CONN_INCRV(session, capacity_bytes_log, bytes);
+ break;
+ case WT_THROTTLE_READ:
+ capacity = cap->read;
+ reservation = &cap->reservation_read;
+ WT_STAT_CONN_INCRV(session, capacity_bytes_read, bytes);
+ break;
+ }
+ total_capacity = cap->total;
+
+ /*
+ * Right now no subsystem can be individually turned off, but it is
+ * certainly a possibility to consider one subsystem may be turned off
+ * at some point in the future. If this subsystem is not throttled
+ * there's nothing to do.
+ */
+ if (capacity == 0 || F_ISSET(conn, WT_CONN_RECOVERING))
+ return;
+
+ /*
+ * There may in fact be some reads done under the umbrella of log
+ * I/O, but they are mostly done under recovery. And if we are
+ * recovering, we don't reach this code.
+ */
+ if (type != WT_THROTTLE_READ) {
+ (void)__wt_atomic_addv64(&cap->written, bytes);
+ WT_STAT_CONN_INCRV(session, capacity_bytes_written, bytes);
+ __capacity_signal(session);
+ }
+
+ /* If we get sizes larger than this, later calculations may overflow. */
+ WT_ASSERT(session, bytes < 16 * (uint64_t)WT_GIGABYTE);
+ WT_ASSERT(session, capacity != 0);
+
+ /* Get the current time in nanoseconds since the epoch. */
+ __wt_epoch(session, &now);
+ now_ns = (uint64_t)now.tv_sec * WT_BILLION + (uint64_t)now.tv_nsec;
+
+again:
+ /* Take a reservation for the subsystem, and for the total */
+ __capacity_reserve(reservation, bytes, capacity, now_ns, &res_value);
+ __capacity_reserve(&cap->reservation_total, bytes, total_capacity,
+ now_ns, &res_total_value);
+
+ /*
+ * If we ended up with a future reservation, and we aren't constricted
+ * by the total capacity, then we may be able to reallocate some
+ * unused reservation time from another subsystem.
+ */
+ if (res_value > now_ns && res_total_value < now_ns && steal == NULL &&
+ total_capacity != 0) {
+ best_res = now_ns - WT_BILLION / 2;
+ if (type != WT_THROTTLE_CKPT &&
+ (this_res = cap->reservation_ckpt) < best_res) {
+ steal = &cap->reservation_ckpt;
+ steal_capacity = cap->ckpt;
+ best_res = this_res;
+ }
+ if (type != WT_THROTTLE_EVICT &&
+ (this_res = cap->reservation_evict) < best_res) {
+ steal = &cap->reservation_evict;
+ steal_capacity = cap->evict;
+ best_res = this_res;
+ }
+ if (type != WT_THROTTLE_LOG &&
+ (this_res = cap->reservation_log) < best_res) {
+ steal = &cap->reservation_log;
+ steal_capacity = cap->log;
+ best_res = this_res;
+ }
+ if (type != WT_THROTTLE_READ &&
+ (this_res = cap->reservation_read) < best_res) {
+ steal = &cap->reservation_read;
+ steal_capacity = cap->read;
+ best_res = this_res;
+ }
+
+ if (steal != NULL) {
+ /*
+ * We have a subsystem that has enough spare capacity
+ * to steal. We'll take a small slice (a fraction
+ * of a second worth) and add it to our own subsystem.
+ */
+ if (best_res < now_ns - WT_BILLION &&
+ now_ns > WT_BILLION)
+ new_res = now_ns - WT_BILLION;
+ else
+ new_res = best_res;
+ WT_ASSERT(session, steal_capacity != 0);
+ new_res += WT_STEAL_FRACTION(WT_BILLION) +
+ WT_RESERVATION_NS(bytes, steal_capacity);
+ if (!__wt_atomic_casv64(steal, best_res, new_res)) {
+ /*
+ * Give up our reservations and try again.
+ * We won't try to steal the next time.
+ */
+ (void)__wt_atomic_sub64(reservation,
+ WT_RESERVATION_NS(bytes, capacity));
+ (void)__wt_atomic_sub64(&cap->reservation_total,
+ WT_RESERVATION_NS(bytes, total_capacity));
+ goto again;
+ }
+
+ /*
+ * We've stolen a fraction of a second of capacity.
+ * Figure out how many bytes that is, before adding
+ * that many bytes to the acquiring subsystem's
+ * capacity.
+ */
+ stolen_bytes = WT_STEAL_FRACTION(steal_capacity);
+ res_value = __wt_atomic_sub64(reservation,
+ WT_RESERVATION_NS(stolen_bytes, capacity));
+ }
+ }
+ if (res_value < res_total_value)
+ res_value = res_total_value;
+
+ if (res_value > now_ns) {
+ sleep_us = (res_value - now_ns) / WT_THOUSAND;
+ if (res_value == res_total_value)
+ WT_STAT_CONN_INCRV(session,
+ capacity_time_total, sleep_us);
+ else
+ switch (type) {
+ case WT_THROTTLE_CKPT:
+ WT_STAT_CONN_INCRV(session,
+ capacity_time_ckpt, sleep_us);
+ break;
+ case WT_THROTTLE_EVICT:
+ WT_STAT_CONN_INCRV(session,
+ capacity_time_evict, sleep_us);
+ break;
+ case WT_THROTTLE_LOG:
+ WT_STAT_CONN_INCRV(session,
+ capacity_time_log, sleep_us);
+ break;
+ case WT_THROTTLE_READ:
+ WT_STAT_CONN_INCRV(session,
+ capacity_time_read, sleep_us);
+ break;
+ }
+ if (sleep_us > WT_CAPACITY_SLEEP_CUTOFF_US)
+ /* Sleep handles large usec values. */
+ __wt_sleep(0, sleep_us);
+ }
+}
diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c
index 28ad155ff53..27d1e6a620d 100644
--- a/src/third_party/wiredtiger/src/conn/conn_open.c
+++ b/src/third_party/wiredtiger/src/conn/conn_open.c
@@ -56,6 +56,7 @@ __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[])
/* Initialize transaction support. */
WT_RET(__wt_txn_global_init(session, cfg));
+ WT_STAT_CONN_SET(session, dh_conn_handle_size, sizeof(WT_DATA_HANDLE));
return (0);
}
@@ -101,6 +102,7 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn)
F_SET(conn, WT_CONN_CLOSING_NO_MORE_OPENS);
WT_FULL_BARRIER();
+ WT_TRET(__wt_capacity_server_destroy(session));
WT_TRET(__wt_checkpoint_server_destroy(session));
WT_TRET(__wt_statlog_destroy(session, true));
WT_TRET(__wt_sweep_destroy(session));
@@ -251,6 +253,9 @@ __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[])
/* Start the optional async threads. */
WT_RET(__wt_async_create(session, cfg));
+ /* Start the optional capacity thread. */
+ WT_RET(__wt_capacity_server_create(session, cfg));
+
/* Start the optional checkpoint thread. */
WT_RET(__wt_checkpoint_server_create(session, cfg));
diff --git a/src/third_party/wiredtiger/src/conn/conn_reconfig.c b/src/third_party/wiredtiger/src/conn/conn_reconfig.c
index e56e76c8fd6..c6d7203f08e 100644
--- a/src/third_party/wiredtiger/src/conn/conn_reconfig.c
+++ b/src/third_party/wiredtiger/src/conn/conn_reconfig.c
@@ -475,6 +475,7 @@ __wt_conn_reconfig(WT_SESSION_IMPL *session, const char **cfg)
WT_ERR(__wt_conn_statistics_config(session, cfg));
WT_ERR(__wt_async_reconfig(session, cfg));
WT_ERR(__wt_cache_config(session, true, cfg));
+ WT_ERR(__wt_capacity_server_create(session, cfg));
WT_ERR(__wt_checkpoint_server_create(session, cfg));
WT_ERR(__wt_logmgr_reconfig(session, cfg));
WT_ERR(__wt_lsm_manager_reconfig(session, cfg));
diff --git a/src/third_party/wiredtiger/src/conn/conn_sweep.c b/src/third_party/wiredtiger/src/conn/conn_sweep.c
index c8681c13427..673f7c81399 100644
--- a/src/third_party/wiredtiger/src/conn/conn_sweep.c
+++ b/src/third_party/wiredtiger/src/conn/conn_sweep.c
@@ -278,13 +278,18 @@ __sweep_server(void *arg)
WT_DECL_RET;
WT_SESSION_IMPL *session;
time_t last, now;
- uint64_t last_las_sweep_id, min_sleep, oldest_id;
+ uint64_t last_las_sweep_id, min_sleep, oldest_id, sweep_interval;
u_int dead_handles;
session = arg;
conn = S2C(session);
last_las_sweep_id = WT_TXN_NONE;
min_sleep = WT_MIN(WT_LAS_SWEEP_SEC, conn->sweep_interval);
+ if (FLD_ISSET(conn->timing_stress_flags,
+ WT_TIMING_STRESS_AGGRESSIVE_SWEEP))
+ sweep_interval = conn->sweep_interval / 10;
+ else
+ sweep_interval = conn->sweep_interval;
/*
* Sweep for dead and excess handles.
@@ -292,8 +297,14 @@ __sweep_server(void *arg)
__wt_seconds(session, &last);
for (;;) {
/* Wait until the next event. */
- __wt_cond_wait(session, conn->sweep_cond,
- min_sleep * WT_MILLION, __sweep_server_run_chk);
+ if (FLD_ISSET(conn->timing_stress_flags,
+ WT_TIMING_STRESS_AGGRESSIVE_SWEEP))
+ __wt_cond_wait(session, conn->sweep_cond,
+ min_sleep * 100 * WT_THOUSAND,
+ __sweep_server_run_chk);
+ else
+ __wt_cond_wait(session, conn->sweep_cond,
+ min_sleep * WT_MILLION, __sweep_server_run_chk);
/* Check if we're quitting or being reconfigured. */
if (!__sweep_server_run_chk(session))
@@ -312,7 +323,9 @@ __sweep_server(void *arg)
* bringing in and evicting pages from the lookaside table,
* which will stop the cache from moving into the stuck state.
*/
- if (now - last >= WT_LAS_SWEEP_SEC &&
+ if ((FLD_ISSET(conn->timing_stress_flags,
+ WT_TIMING_STRESS_AGGRESSIVE_SWEEP) ||
+ now - last >= WT_LAS_SWEEP_SEC) &&
!__wt_las_empty(session) &&
!__wt_cache_stuck(session)) {
oldest_id = __wt_txn_oldest_id(session);
@@ -327,7 +340,7 @@ __sweep_server(void *arg)
* less frequently than the lookaside table by default and the
* frequency is controlled by a user setting.
*/
- if ((uint64_t)(now - last) < conn->sweep_interval)
+ if ((uint64_t)(now - last) < sweep_interval)
continue;
WT_STAT_CONN_INCR(session, dh_sweeps);
/*
@@ -350,6 +363,9 @@ __sweep_server(void *arg)
if (dead_handles > 0)
WT_ERR(__sweep_remove_handles(session));
+
+ /* Remember the last sweep time. */
+ last = now;
}
if (0) {
diff --git a/src/third_party/wiredtiger/src/docs/programming.dox b/src/third_party/wiredtiger/src/docs/programming.dox
index 3ddb0c376c5..960babfc146 100644
--- a/src/third_party/wiredtiger/src/docs/programming.dox
+++ b/src/third_party/wiredtiger/src/docs/programming.dox
@@ -68,6 +68,7 @@ each of which is ordered by one or more columns.
- @subpage_single tune_build_options
- @subpage_single tune_bulk_load
- @subpage_single tune_cache
+- @subpage_single tune_capacity
- @subpage_single tune_checksum
- @subpage_single tune_close
- @subpage_single tune_cursor_persist
diff --git a/src/third_party/wiredtiger/src/docs/tune-capacity.dox b/src/third_party/wiredtiger/src/docs/tune-capacity.dox
new file mode 100644
index 00000000000..3aad4997576
--- /dev/null
+++ b/src/third_party/wiredtiger/src/docs/tune-capacity.dox
@@ -0,0 +1,38 @@
+/*! @page tune_capacity Capacity tuning
+
+In some cases, it can be helpful to constrain the overall I/O bandwidth
+generated by the database. This can be beneficial when resources are shared,
+for example, in cloud or virtual environments.
+
+The total bandwidth capacity is configured by setting the
+\c io_capacity configuration string when calling the ::wiredtiger_open
+function. The capacity can be adjusted with WT_CONNECTION::reconfigure.
+
+An example of setting a capacity limit to 40MB per second:
+
+@snippet ex_all.c Configure capacity
+
+When a total capacity is set the volume of system reads and writes totalled
+will not exceed the given I/O capacity.
+If a read or write is scheduled and would overflow the capacity, the issuing
+thread will sleep to guarantee the capacity ceiling. The policy used is
+fair to all threads, and gives some weight to both readers and writers to
+try to ensure that each session can make progress when bandwidth
+resources are limited.
+
+System reads and writes do not directly translate to disk I/O
+operations. These operations go through the operating system cache. To ensure
+the steady flow of data to the disk, setting a capacity also enables an
+additional thread that monitors the writes performed for each file. For each
+file that has sufficient data written to it, a call to an
+asynchronous \c fsync will be made. This call normally queues the flush
+in the operating system, though there is no guarantee about when it will
+actually occur. On Windows, there is no equivalent support for asynchronously
+scheduling writes to disk, so this extra "sync" thread is not active.
+
+When a total capacity is not set, or equivalently, when it is set to 0,
+there are no capacity constraints on the database, and pauses will never
+be inserted before I/O is done, nor are extra asynchronous \c fsync calls
+performed.
+
+ */
diff --git a/src/third_party/wiredtiger/src/include/block.h b/src/third_party/wiredtiger/src/include/block.h
index 584149d4379..8efaf10dd2b 100644
--- a/src/third_party/wiredtiger/src/include/block.h
+++ b/src/third_party/wiredtiger/src/include/block.h
@@ -234,7 +234,6 @@ struct __wt_block {
uint32_t allocsize; /* Allocation size */
size_t os_cache; /* System buffer cache flush max */
size_t os_cache_max;
- size_t os_cache_dirty; /* System buffer cache write max */
size_t os_cache_dirty_max;
u_int block_header; /* Header length */
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 2450f90a3a6..14d5a04b096 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -827,8 +827,9 @@ struct __wt_page {
*/
struct __wt_page_deleted {
volatile uint64_t txnid; /* Transaction ID */
- wt_timestamp_t timestamp;
- wt_timestamp_t durable_timestamp; /* aligned uint64_t timestamp */
+
+ wt_timestamp_t timestamp; /* Timestamps */
+ wt_timestamp_t durable_timestamp;
/*
* The state is used for transaction prepare to manage visibility
@@ -1058,8 +1059,9 @@ struct __wt_ikey {
*/
struct __wt_update {
volatile uint64_t txnid; /* transaction ID */
- wt_timestamp_t timestamp; /* aligned uint64_t timestamp */
- wt_timestamp_t durable_timestamp; /* aligned uint64_t timestamp */
+
+ wt_timestamp_t durable_ts; /* timestamps */
+ wt_timestamp_t start_ts, stop_ts;
WT_UPDATE *next; /* forward-linked list */
@@ -1082,7 +1084,7 @@ struct __wt_update {
* The update state is used for transaction prepare to manage
* visibility and transitioning update structure state safely.
*/
- volatile uint8_t prepare_state; /* Prepare state. */
+ volatile uint8_t prepare_state; /* prepare state */
/*
* Zero or more bytes of value (the payload) immediately follows the
@@ -1096,7 +1098,7 @@ struct __wt_update {
* WT_UPDATE_SIZE is the expected structure size excluding the payload data --
* we verify the build to ensure the compiler hasn't inserted padding.
*/
-#define WT_UPDATE_SIZE 38
+#define WT_UPDATE_SIZE 46
/*
* The memory size of an update: include some padding because this is such a
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index b17bfcc2595..f0c072615b8 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -1018,7 +1018,7 @@ __wt_row_leaf_key(WT_SESSION_IMPL *session,
* Return the unpacked value for a row-store leaf page key.
*/
static inline void
-__wt_row_leaf_value_cell(
+__wt_row_leaf_value_cell(WT_SESSION_IMPL *session,
WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *kpack, WT_CELL_UNPACK *vpack)
{
WT_CELL *kcell, *vcell;
@@ -1048,13 +1048,14 @@ __wt_row_leaf_value_cell(
page, copy, NULL, &kcell, &key, &size) && kcell == NULL)
vcell = (WT_CELL *)((uint8_t *)key + size);
else {
- __wt_cell_unpack(page, kcell, &unpack);
+ __wt_cell_unpack(session, page, kcell, &unpack);
vcell = (WT_CELL *)((uint8_t *)
unpack.cell + __wt_cell_total_len(&unpack));
}
}
- __wt_cell_unpack(page, __wt_cell_leaf_value_parse(page, vcell), vpack);
+ __wt_cell_unpack(session,
+ page, __wt_cell_leaf_value_parse(page, vcell), vpack);
}
/*
@@ -1087,7 +1088,8 @@ __wt_row_leaf_value(WT_PAGE *page, WT_ROW *rip, WT_ITEM *value)
* Return the addr/size and type triplet for a reference.
*/
static inline void
-__wt_ref_info(WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep)
+__wt_ref_info(WT_SESSION_IMPL *session,
+ WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep)
{
WT_ADDR *addr;
WT_CELL_UNPACK *unpack, _unpack;
@@ -1128,7 +1130,7 @@ __wt_ref_info(WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep)
break;
}
} else {
- __wt_cell_unpack(page, (WT_CELL *)addr, unpack);
+ __wt_cell_unpack(session, page, (WT_CELL *)addr, unpack);
*addrp = unpack->data;
*sizep = unpack->size;
if (typep != NULL)
@@ -1149,7 +1151,7 @@ __wt_ref_block_free(WT_SESSION_IMPL *session, WT_REF *ref)
if (ref->addr == NULL)
return (0);
- __wt_ref_info(ref, &addr, &addr_size, NULL);
+ __wt_ref_info(session, ref, &addr, &addr_size, NULL);
WT_RET(__wt_btree_block_free(session, addr, addr_size));
/* Clear the address (so we don't free it twice). */
diff --git a/src/third_party/wiredtiger/src/include/capacity.h b/src/third_party/wiredtiger/src/include/capacity.h
new file mode 100644
index 00000000000..1fb42f5b435
--- /dev/null
+++ b/src/third_party/wiredtiger/src/include/capacity.h
@@ -0,0 +1,74 @@
+/*-
+ * Copyright (c) 2014-2019 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+typedef enum {
+ WT_THROTTLE_CKPT, /* Checkpoint throttle */
+ WT_THROTTLE_EVICT, /* Eviction throttle */
+ WT_THROTTLE_LOG, /* Logging throttle */
+ WT_THROTTLE_READ /* Read throttle */
+} WT_THROTTLE_TYPE;
+
+#define WT_THROTTLE_MIN WT_MEGABYTE /* Config minimum size */
+
+/*
+ * The per-file threshold means we won't start the background fsync on a file
+ * until it crosses the per-file threshold of data written. The other minimum
+ * threshold defines a minimum threshold for the background thread. Otherwise
+ * we compute a percentage of the given capacity.
+ */
+#define WT_CAPACITY_FILE_THRESHOLD (WT_MEGABYTE / 2)
+#define WT_CAPACITY_MIN_THRESHOLD (10 * WT_MEGABYTE)
+#define WT_CAPACITY_PCT 10
+
+/*
+ * If we're being asked to sleep a short amount of time, ignore it.
+ * A non-zero value means there may be a temporary violation of the
+ * capacity limitation, but one that would even out. That is, possibly
+ * fewer sleeps with the risk of more choppy behavior as this number
+ * is larger.
+ */
+#define WT_CAPACITY_SLEEP_CUTOFF_US 100
+
+/*
+ * When given a total capacity, divide it up for each subsystem. These defines
+ * represent the percentage of the total capacity that we allow for each
+ * subsystem capacity. We allow and expect the sum of the subsystems to
+ * exceed 100, as often they are not at their maximum at the same time. In any
+ * event, we track the total capacity separately, so it is never exceeded.
+ */
+#define WT_CAPACITY_SYS(total, pct) ((total) * (pct) / 100)
+#define WT_CAP_CKPT 5
+#define WT_CAP_EVICT 50
+#define WT_CAP_LOG 30
+#define WT_CAP_READ 55
+
+struct __wt_capacity {
+ uint64_t ckpt; /* Bytes/sec checkpoint capacity */
+ uint64_t evict; /* Bytes/sec eviction capacity */
+ uint64_t log; /* Bytes/sec logging capacity */
+ uint64_t read; /* Bytes/sec read capacity */
+ uint64_t total; /* Bytes/sec total capacity */
+ uint64_t threshold; /* Capacity size period */
+
+ volatile uint64_t written; /* Written this period */
+ volatile bool signalled; /* Capacity signalled */
+
+ /*
+ * A reservation is a point in time when a read or write for a subsystem
+ * can be scheduled, so as not to overrun the given capacity. These
+ * values hold the next available reservation, in nanoseconds since
+ * the epoch. Getting a reservation with a future time implies sleeping
+ * until that time; getting a reservation with a past time implies that
+ * the operation can be done immediately.
+ */
+ uint64_t reservation_ckpt; /* Atomic: next checkpoint write */
+ uint64_t reservation_evict; /* Atomic: next eviction write */
+ uint64_t reservation_log; /* Atomic: next logging write */
+ uint64_t reservation_read; /* Atomic: next read */
+ uint64_t reservation_total; /* Atomic: next operation of any kind */
+};
diff --git a/src/third_party/wiredtiger/src/include/cell.i b/src/third_party/wiredtiger/src/include/cell.i
index 654a409e627..8ec25508a7c 100644
--- a/src/third_party/wiredtiger/src/include/cell.i
+++ b/src/third_party/wiredtiger/src/include/cell.i
@@ -176,10 +176,11 @@ struct __wt_cell_unpack {
* Pack a start, stop timestamp pair for a value.
*/
static inline void
-__cell_pack_timestamp_value(
+__cell_pack_timestamp_value(WT_SESSION_IMPL *session,
uint8_t **pp, wt_timestamp_t start_ts, wt_timestamp_t stop_ts)
{
- WT_ASSERT(NULL, start_ts <= stop_ts);
+ WT_ASSERT(session, stop_ts != WT_TS_NONE);
+ WT_ASSERT(session, start_ts <= stop_ts);
if (__wt_process.page_version_ts) {
/* Start timestamp, stop timestamp difference. */
@@ -194,11 +195,13 @@ __cell_pack_timestamp_value(
* address.
*/
static inline void
-__cell_pack_timestamp_addr(uint8_t **pp, wt_timestamp_t oldest_start_ts,
+__cell_pack_timestamp_addr(WT_SESSION_IMPL *session,
+ uint8_t **pp, wt_timestamp_t oldest_start_ts,
wt_timestamp_t newest_start_ts, wt_timestamp_t newest_stop_ts)
{
- WT_ASSERT(NULL, oldest_start_ts <= newest_start_ts);
- WT_ASSERT(NULL, newest_start_ts <= newest_stop_ts);
+ WT_ASSERT(session, newest_stop_ts != WT_TS_NONE);
+ WT_ASSERT(session, oldest_start_ts <= newest_start_ts);
+ WT_ASSERT(session, newest_start_ts <= newest_stop_ts);
if (__wt_process.page_version_ts) {
(void)__wt_vpack_uint(pp, 0, oldest_start_ts);
@@ -214,14 +217,15 @@ __cell_pack_timestamp_addr(uint8_t **pp, wt_timestamp_t oldest_start_ts,
* Pack an address cell.
*/
static inline size_t
-__wt_cell_pack_addr(WT_CELL *cell, u_int cell_type, uint64_t recno,
+__wt_cell_pack_addr(WT_SESSION_IMPL *session,
+ WT_CELL *cell, u_int cell_type, uint64_t recno,
wt_timestamp_t oldest_start_ts,
wt_timestamp_t newest_start_ts, wt_timestamp_t newest_stop_ts, size_t size)
{
uint8_t *p;
p = cell->__chunk + 1;
- __cell_pack_timestamp_addr(
+ __cell_pack_timestamp_addr(session,
&p, oldest_start_ts, newest_start_ts, newest_stop_ts);
if (recno == WT_RECNO_OOB)
@@ -239,13 +243,13 @@ __wt_cell_pack_addr(WT_CELL *cell, u_int cell_type, uint64_t recno,
* Set a data item's WT_CELL contents.
*/
static inline size_t
-__wt_cell_pack_data(WT_CELL *cell,
+__wt_cell_pack_data(WT_SESSION_IMPL *session, WT_CELL *cell,
wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle, size_t size)
{
uint8_t byte, *p;
p = cell->__chunk + 1;
- __cell_pack_timestamp_value(&p, start_ts, stop_ts);
+ __cell_pack_timestamp_value(session, &p, start_ts, stop_ts);
/*
* Short data cells without run-length encoding have 6 bits of data
@@ -343,13 +347,13 @@ __wt_cell_pack_data_match(WT_CELL *page_cell,
* Write a copy value cell.
*/
static inline size_t
-__wt_cell_pack_copy(WT_CELL *cell,
+__wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell,
wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle, uint64_t v)
{
uint8_t *p;
p = cell->__chunk + 1;
- __cell_pack_timestamp_value(&p, start_ts, stop_ts);
+ __cell_pack_timestamp_value(session, &p, start_ts, stop_ts);
if (rle < 2)
cell->__chunk[0] = WT_CELL_VALUE_COPY; /* Type */
@@ -367,13 +371,13 @@ __wt_cell_pack_copy(WT_CELL *cell,
* Write a deleted value cell.
*/
static inline size_t
-__wt_cell_pack_del(WT_CELL *cell,
+__wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell,
wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle)
{
uint8_t *p;
p = cell->__chunk + 1;
- __cell_pack_timestamp_value(&p, start_ts, stop_ts);
+ __cell_pack_timestamp_value(session, &p, start_ts, stop_ts);
if (rle < 2)
cell->__chunk[0] = WT_CELL_DEL; /* Type */
@@ -453,7 +457,7 @@ __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size)
* Pack an overflow cell.
*/
static inline size_t
-__wt_cell_pack_ovfl(WT_CELL *cell, uint8_t type,
+__wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type,
wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle, size_t size)
{
uint8_t *p;
@@ -465,7 +469,7 @@ __wt_cell_pack_ovfl(WT_CELL *cell, uint8_t type,
break;
case WT_CELL_VALUE_OVFL:
case WT_CELL_VALUE_OVFL_RM:
- __cell_pack_timestamp_value(&p, start_ts, stop_ts);
+ __cell_pack_timestamp_value(session, &p, start_ts, stop_ts);
break;
}
@@ -621,7 +625,7 @@ __wt_cell_leaf_value_parse(WT_PAGE *page, WT_CELL *cell)
* Unpack a WT_CELL into a structure, with optional boundary checks.
*/
static inline int
-__wt_cell_unpack_safe(const WT_PAGE_HEADER *dsk,
+__wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk,
WT_CELL *cell, WT_CELL_UNPACK *unpack, const void *end)
{
struct {
@@ -729,9 +733,11 @@ restart:
WT_PTRDIFF(end, p), &unpack->newest_stop_ts));
unpack->newest_stop_ts += unpack->newest_start_ts;
- WT_ASSERT(NULL,
+ WT_ASSERT(session,
+ unpack->newest_stop_ts != WT_TS_NONE);
+ WT_ASSERT(session,
unpack->oldest_start_ts <= unpack->newest_start_ts);
- WT_ASSERT(NULL,
+ WT_ASSERT(session,
unpack->newest_start_ts <= unpack->newest_stop_ts);
break;
case WT_CELL_DEL:
@@ -746,7 +752,8 @@ restart:
0 : WT_PTRDIFF(end, p), &unpack->stop_ts));
unpack->stop_ts += unpack->start_ts;
- WT_ASSERT(NULL, unpack->start_ts <= unpack->stop_ts);
+ WT_ASSERT(session, unpack->stop_ts != WT_TS_NONE);
+ WT_ASSERT(session, unpack->start_ts <= unpack->stop_ts);
break;
}
@@ -851,7 +858,7 @@ done: WT_CELL_LEN_CHK(cell, unpack->__len);
* Unpack a WT_CELL into a structure.
*/
static inline void
-__wt_cell_unpack_dsk(
+__wt_cell_unpack_dsk(WT_SESSION_IMPL *session,
const WT_PAGE_HEADER *dsk, WT_CELL *cell, WT_CELL_UNPACK *unpack)
{
/*
@@ -885,7 +892,7 @@ __wt_cell_unpack_dsk(
return;
}
- (void)__wt_cell_unpack_safe(dsk, cell, unpack, NULL);
+ (void)__wt_cell_unpack_safe(session, dsk, cell, unpack, NULL);
}
/*
@@ -893,9 +900,10 @@ __wt_cell_unpack_dsk(
* Unpack a WT_CELL into a structure.
*/
static inline void
-__wt_cell_unpack(WT_PAGE *page, WT_CELL *cell, WT_CELL_UNPACK *unpack)
+__wt_cell_unpack(WT_SESSION_IMPL *session,
+ WT_PAGE *page, WT_CELL *cell, WT_CELL_UNPACK *unpack)
{
- __wt_cell_unpack_dsk(page->dsk, cell, unpack);
+ __wt_cell_unpack_dsk(session, page->dsk, cell, unpack);
}
/*
@@ -985,13 +993,14 @@ __wt_page_cell_data_ref(WT_SESSION_IMPL *session,
* WT_CELL_FOREACH --
* Walk the cells on a page.
*/
-#define WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, skip_ts) do { \
+#define WT_CELL_FOREACH_BEGIN(session, btree, dsk, unpack, skip_ts) do {\
uint32_t __i; \
uint8_t *__cell; \
for (__cell = WT_PAGE_HEADER_BYTE(btree, dsk), \
__i = (dsk)->u.entries; \
__i > 0; __cell += (unpack).__len, --__i) { \
- __wt_cell_unpack_dsk(dsk, (WT_CELL *)__cell, &(unpack));\
+ __wt_cell_unpack_dsk( \
+ session, dsk, (WT_CELL *)__cell, &(unpack)); \
/* \
* Optionally skip unstable page entries after downgrade\
* to a release without page timestamps. Check for cells\
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index 2c639fc7b8a..280d7e32f7d 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -293,6 +293,12 @@ struct __wt_connection_impl {
uint32_t async_size; /* Async op array size */
uint32_t async_workers; /* Number of async workers */
+ WT_CAPACITY capacity; /* Capacity structure */
+ WT_SESSION_IMPL *capacity_session; /* Capacity thread session */
+ wt_thread_t capacity_tid; /* Capacity thread */
+ bool capacity_tid_set; /* Capacity thread set */
+ WT_CONDVAR *capacity_cond; /* Capacity wait mutex */
+
WT_LSM_MANAGER lsm_manager; /* LSM worker thread information */
WT_KEYED_ENCRYPTOR *kencryptor; /* Encryptor for metadata and log */
@@ -456,16 +462,17 @@ struct __wt_connection_impl {
* delays have been requested.
*/
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_TIMING_STRESS_CHECKPOINT_SLOW 0x001u
-#define WT_TIMING_STRESS_LOOKASIDE_SWEEP 0x002u
-#define WT_TIMING_STRESS_SPLIT_1 0x004u
-#define WT_TIMING_STRESS_SPLIT_2 0x008u
-#define WT_TIMING_STRESS_SPLIT_3 0x010u
-#define WT_TIMING_STRESS_SPLIT_4 0x020u
-#define WT_TIMING_STRESS_SPLIT_5 0x040u
-#define WT_TIMING_STRESS_SPLIT_6 0x080u
-#define WT_TIMING_STRESS_SPLIT_7 0x100u
-#define WT_TIMING_STRESS_SPLIT_8 0x200u
+#define WT_TIMING_STRESS_AGGRESSIVE_SWEEP 0x001u
+#define WT_TIMING_STRESS_CHECKPOINT_SLOW 0x002u
+#define WT_TIMING_STRESS_LOOKASIDE_SWEEP 0x004u
+#define WT_TIMING_STRESS_SPLIT_1 0x008u
+#define WT_TIMING_STRESS_SPLIT_2 0x010u
+#define WT_TIMING_STRESS_SPLIT_3 0x020u
+#define WT_TIMING_STRESS_SPLIT_4 0x040u
+#define WT_TIMING_STRESS_SPLIT_5 0x080u
+#define WT_TIMING_STRESS_SPLIT_6 0x100u
+#define WT_TIMING_STRESS_SPLIT_7 0x200u
+#define WT_TIMING_STRESS_SPLIT_8 0x400u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint64_t timing_stress_flags;
@@ -500,12 +507,13 @@ struct __wt_connection_impl {
#define WT_CONN_RECOVERING 0x0020000u
#define WT_CONN_SALVAGE 0x0040000u
#define WT_CONN_SERVER_ASYNC 0x0080000u
-#define WT_CONN_SERVER_CHECKPOINT 0x0100000u
-#define WT_CONN_SERVER_LOG 0x0200000u
-#define WT_CONN_SERVER_LSM 0x0400000u
-#define WT_CONN_SERVER_STATISTICS 0x0800000u
-#define WT_CONN_SERVER_SWEEP 0x1000000u
-#define WT_CONN_WAS_BACKUP 0x2000000u
+#define WT_CONN_SERVER_CAPACITY 0x0100000u
+#define WT_CONN_SERVER_CHECKPOINT 0x0200000u
+#define WT_CONN_SERVER_LOG 0x0400000u
+#define WT_CONN_SERVER_LSM 0x0800000u
+#define WT_CONN_SERVER_STATISTICS 0x1000000u
+#define WT_CONN_SERVER_SWEEP 0x2000000u
+#define WT_CONN_WAS_BACKUP 0x4000000u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint32_t flags;
};
diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i
index 4dcf31a1dc0..351a5cd7abe 100644
--- a/src/third_party/wiredtiger/src/include/cursor.i
+++ b/src/third_party/wiredtiger/src/include/cursor.i
@@ -425,7 +425,7 @@ __cursor_row_slot_return(WT_CURSOR_BTREE *cbt, WT_ROW *rip, WT_UPDATE *upd)
*/
kpack = &_kpack;
memset(kpack, 0, sizeof(*kpack));
- __wt_cell_unpack(page, cell, kpack);
+ __wt_cell_unpack(session, page, cell, kpack);
if (kpack->type == WT_CELL_KEY &&
cbt->rip_saved != NULL && cbt->rip_saved == rip - 1) {
WT_ASSERT(session, cbt->row_key->size >= kpack->prefix);
@@ -470,7 +470,7 @@ value:
return (0);
/* Else, take the value from the original page cell. */
- __wt_row_leaf_value_cell(page, rip, kpack, vpack);
+ __wt_row_leaf_value_cell(session, page, rip, kpack, vpack);
return (__wt_page_cell_data_ref(session, cbt->ref->page, vpack, vb));
}
/*
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 196028b5297..681c6c242ae 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -265,6 +265,9 @@ extern int __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg) WT
extern int __wt_conn_cache_pool_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern WT_THREAD_RET __wt_cache_pool_server(void *arg);
+extern int __wt_capacity_server_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_capacity_server_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void __wt_capacity_throttle(WT_SESSION_IMPL *session, uint64_t bytes, WT_THROTTLE_TYPE type);
extern int __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_checkpoint_signal(WT_SESSION_IMPL *session, wt_off_t logsize);
@@ -566,6 +569,8 @@ extern int __wt_ext_map_windows_error(WT_EXTENSION_API *wt_api, WT_SESSION *wt_s
extern bool __wt_handle_is_open(WT_SESSION_IMPL *session, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_FS_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_fsync_background_chk(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_fsync_background(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_close_connection_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_os_inmemory(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_fopen(WT_SESSION_IMPL *session, const char *name, uint32_t open_flags, uint32_t flags, WT_FSTREAM **fstrp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/third_party/wiredtiger/src/include/os.h b/src/third_party/wiredtiger/src/include/os.h
index ff50fff0081..37e0799ef16 100644
--- a/src/third_party/wiredtiger/src/include/os.h
+++ b/src/third_party/wiredtiger/src/include/os.h
@@ -109,9 +109,12 @@ struct __wt_fh {
const char *name; /* File name */
uint64_t name_hash; /* hash of name */
+ uint64_t last_sync; /* time of background fsync */
+ volatile uint64_t written; /* written since fsync */
TAILQ_ENTRY(__wt_fh) q; /* internal queue */
TAILQ_ENTRY(__wt_fh) hashq; /* internal hash queue */
u_int ref; /* reference count */
+ WT_FS_OPEN_FILE_TYPE file_type; /* file type */
WT_FILE_HANDLE *handle;
};
diff --git a/src/third_party/wiredtiger/src/include/os_fhandle.i b/src/third_party/wiredtiger/src/include/os_fhandle.i
index 1aab749a2ac..a0573ee3cba 100644
--- a/src/third_party/wiredtiger/src/include/os_fhandle.i
+++ b/src/third_party/wiredtiger/src/include/os_fhandle.i
@@ -114,6 +114,10 @@ __wt_read(
ret = fh->handle->fh_read(
fh->handle, (WT_SESSION *)session, offset, len, buf);
+ /* Flag any failed read: if we're in startup, it may be fatal. */
+ if (ret != 0)
+ F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
+
time_stop = __wt_clock(session);
__wt_stat_msecs_hist_incr_fsread(session,
WT_CLOCKDIFF_MS(time_stop, time_start));
@@ -196,6 +200,7 @@ __wt_write(WT_SESSION_IMPL *session,
time_stop = __wt_clock(session);
__wt_stat_msecs_hist_incr_fswrite(session,
WT_CLOCKDIFF_MS(time_stop, time_start));
+ (void)__wt_atomic_addv64(&fh->written, len);
WT_STAT_CONN_DECR_ATOMIC(session, thread_write_active);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index 75be6c5147a..40dc8cf695e 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -453,6 +453,20 @@ struct __wt_connection_stats {
int64_t cache_bytes_dirty;
int64_t cache_pages_dirty;
int64_t cache_eviction_clean;
+ int64_t fsync_all_fh_total;
+ int64_t fsync_all_fh;
+ int64_t fsync_all_time;
+ int64_t capacity_threshold;
+ int64_t capacity_bytes_read;
+ int64_t capacity_bytes_ckpt;
+ int64_t capacity_bytes_evict;
+ int64_t capacity_bytes_log;
+ int64_t capacity_bytes_written;
+ int64_t capacity_time_total;
+ int64_t capacity_time_ckpt;
+ int64_t capacity_time_evict;
+ int64_t capacity_time_log;
+ int64_t capacity_time_read;
int64_t cond_auto_wait_reset;
int64_t cond_auto_wait;
int64_t time_travel;
@@ -494,6 +508,7 @@ struct __wt_connection_stats {
int64_t cursor_update_bytes_changed;
int64_t cursor_reopen;
int64_t cursor_open_count;
+ int64_t dh_conn_handle_size;
int64_t dh_conn_handle_count;
int64_t dh_sweep_ref;
int64_t dh_sweep_close;
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 360262e68fe..7ba90887513 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -129,8 +129,8 @@ __txn_resolve_prepared_update(WT_SESSION_IMPL *session, WT_UPDATE *upd)
*/
upd->prepare_state = WT_PREPARE_LOCKED;
WT_WRITE_BARRIER();
- upd->timestamp = txn->commit_timestamp;
- upd->durable_timestamp = txn->durable_timestamp;
+ upd->start_ts = txn->commit_timestamp;
+ upd->durable_ts = txn->durable_timestamp;
WT_PUBLISH(upd->prepare_state, WT_PREPARE_RESOLVED);
}
@@ -382,7 +382,7 @@ __wt_txn_op_apply_prepare_state(
}
for (updp = ref->page_del->update_list;
updp != NULL && *updp != NULL; ++updp) {
- (*updp)->timestamp = ts;
+ (*updp)->start_ts = ts;
/*
* Holding the ref locked means we have exclusive access, so if
* we are committing we don't need to use the prepare locked
@@ -390,7 +390,7 @@ __wt_txn_op_apply_prepare_state(
*/
(*updp)->prepare_state = prepare_state;
if (commit)
- (*updp)->durable_timestamp = txn->durable_timestamp;
+ (*updp)->durable_ts = txn->durable_timestamp;
}
ref->page_del->timestamp = ts;
if (commit)
@@ -446,13 +446,13 @@ __wt_txn_op_set_timestamp(WT_SESSION_IMPL *session, WT_TXN_OP *op)
* commit and durable timestamps need to be updated.
*/
timestamp = op->type == WT_TXN_OP_REF_DELETE ?
- &op->u.ref->page_del->timestamp : &op->u.op_upd->timestamp;
+ &op->u.ref->page_del->timestamp : &op->u.op_upd->start_ts;
if (*timestamp == WT_TS_NONE) {
*timestamp = txn->commit_timestamp;
timestamp = op->type == WT_TXN_OP_REF_DELETE ?
&op->u.ref->page_del->durable_timestamp :
- &op->u.op_upd->durable_timestamp;
+ &op->u.op_upd->durable_ts;
*timestamp = txn->durable_timestamp;
}
}
@@ -684,7 +684,7 @@ __wt_txn_upd_visible_all(WT_SESSION_IMPL *session, WT_UPDATE *upd)
upd->prepare_state == WT_PREPARE_INPROGRESS)
return (false);
- return (__wt_txn_visible_all(session, upd->txnid, upd->timestamp));
+ return (__wt_txn_visible_all(session, upd->txnid, upd->start_ts));
}
/*
@@ -782,8 +782,8 @@ __wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd)
if (prepare_state == WT_PREPARE_LOCKED)
continue;
- upd_visible = __wt_txn_visible(
- session, upd->txnid, upd->timestamp);
+ upd_visible =
+ __wt_txn_visible(session, upd->txnid, upd->start_ts);
/*
* The visibility check is only valid if the update does not
@@ -817,7 +817,7 @@ __wt_txn_upd_durable(WT_SESSION_IMPL *session, WT_UPDATE *upd)
/* If update is visible then check if it is durable. */
if (__wt_txn_upd_visible_type(session, upd) != WT_VISIBLE_TRUE)
return (false);
- return (__wt_txn_visible(session, upd->txnid, upd->durable_timestamp));
+ return (__wt_txn_visible(session, upd->txnid, upd->durable_ts));
}
/*
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 2875102571f..1ac4de23044 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -2296,6 +2296,15 @@ struct __wt_connection {
* seconds at which to check for files that are inactive and close
* them., an integer between 1 and 100000; default \c 10.}
* @config{ ),,}
+ * @config{io_capacity = (, control how many bytes per second are
+ * written and read. Exceeding the capacity results in throttling., a
+ * set of related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;total, number of bytes per second
+ * available to all subsystems in total. When set\, decisions about
+ * what subsystems are throttled\, and in what proportion\, are made
+ * internally. The minimum non-zero setting is 1MB., an integer between
+ * 0 and 1TB; default \c 0.}
+ * @config{ ),,}
* @config{log = (, enable logging. Enabling logging uses three
* sessions from the configured session_max., a set of related
* configuration options defined below.}
@@ -2510,16 +2519,19 @@ struct __wt_connection {
* @configstart{WT_CONNECTION.query_timestamp, see dist/api_data.py}
* @config{get, specify which timestamp to query: \c all_committed
* returns the largest timestamp such that all timestamps up to that
- * value have committed\, \c oldest returns the most recent \c
- * oldest_timestamp set with WT_CONNECTION::set_timestamp\, \c
+ * value have committed\, \c last_checkpoint returns the timestamp of
+ * the most recent stable checkpoint\, \c oldest returns the most recent
+ * \c oldest_timestamp set with WT_CONNECTION::set_timestamp\, \c
* oldest_reader returns the minimum of the read timestamps of all
- * active readers \c pinned returns the minimum of the\c
- * oldest_timestamp and the read timestamps of all active readers\, and
- * \c stable returns the most recent \c stable_timestamp set with
- * WT_CONNECTION::set_timestamp. See @ref transaction_timestamps., a
- * string\, chosen from the following options: \c "all_committed"\, \c
- * "last_checkpoint"\, \c "oldest"\, \c "oldest_reader"\, \c "pinned"\,
- * \c "recovery"\, \c "stable"; default \c all_committed.}
+ * active readers \c pinned returns the minimum of the \c
+ * oldest_timestamp and the read timestamps of all active readers\, \c
+ * recovery returns the timestamp of the most recent stable checkpoint
+ * taken prior to a shutdown and \c stable returns the most recent \c
+ * stable_timestamp set with WT_CONNECTION::set_timestamp. See @ref
+ * transaction_timestamps., a string\, chosen from the following
+ * options: \c "all_committed"\, \c "last_checkpoint"\, \c "oldest"\, \c
+ * "oldest_reader"\, \c "pinned"\, \c "recovery"\, \c "stable"; default
+ * \c all_committed.}
* @configend
* @errors
* If there is no matching timestamp (e.g., if this method is called
@@ -2949,6 +2961,15 @@ struct __wt_connection {
* @config{ ),,}
* @config{in_memory, keep data in-memory only. See @ref in_memory for more
* information., a boolean flag; default \c false.}
+ * @config{io_capacity = (, control how many bytes per second are written and
+ * read. Exceeding the capacity results in throttling., a set of related
+ * configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;total,
+ * number of bytes per second available to all subsystems in total. When set\,
+ * decisions about what subsystems are throttled\, and in what proportion\, are
+ * made internally. The minimum non-zero setting is 1MB., an integer between 0
+ * and 1TB; default \c 0.}
+ * @config{ ),,}
* @config{log = (, enable logging. Enabling logging uses three sessions from
* the configured session_max., a set of related configuration options defined
* below.}
@@ -5217,532 +5238,562 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1127
/*! cache: unmodified pages evicted */
#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1128
+/*! capacity: background fsync file handles considered */
+#define WT_STAT_CONN_FSYNC_ALL_FH_TOTAL 1129
+/*! capacity: background fsync file handles synced */
+#define WT_STAT_CONN_FSYNC_ALL_FH 1130
+/*! capacity: background fsync time (msecs) */
+#define WT_STAT_CONN_FSYNC_ALL_TIME 1131
+/*! capacity: threshold to call fsync */
+#define WT_STAT_CONN_CAPACITY_THRESHOLD 1132
+/*! capacity: throttled bytes read */
+#define WT_STAT_CONN_CAPACITY_BYTES_READ 1133
+/*! capacity: throttled bytes written for checkpoint */
+#define WT_STAT_CONN_CAPACITY_BYTES_CKPT 1134
+/*! capacity: throttled bytes written for eviction */
+#define WT_STAT_CONN_CAPACITY_BYTES_EVICT 1135
+/*! capacity: throttled bytes written for log */
+#define WT_STAT_CONN_CAPACITY_BYTES_LOG 1136
+/*! capacity: throttled bytes written total */
+#define WT_STAT_CONN_CAPACITY_BYTES_WRITTEN 1137
+/*! capacity: time waiting due to total capacity (usecs) */
+#define WT_STAT_CONN_CAPACITY_TIME_TOTAL 1138
+/*! capacity: time waiting during checkpoint (usecs) */
+#define WT_STAT_CONN_CAPACITY_TIME_CKPT 1139
+/*! capacity: time waiting during eviction (usecs) */
+#define WT_STAT_CONN_CAPACITY_TIME_EVICT 1140
+/*! capacity: time waiting during logging (usecs) */
+#define WT_STAT_CONN_CAPACITY_TIME_LOG 1141
+/*! capacity: time waiting during read (usecs) */
+#define WT_STAT_CONN_CAPACITY_TIME_READ 1142
/*! connection: auto adjusting condition resets */
-#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1129
+#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1143
/*! connection: auto adjusting condition wait calls */
-#define WT_STAT_CONN_COND_AUTO_WAIT 1130
+#define WT_STAT_CONN_COND_AUTO_WAIT 1144
/*! connection: detected system time went backwards */
-#define WT_STAT_CONN_TIME_TRAVEL 1131
+#define WT_STAT_CONN_TIME_TRAVEL 1145
/*! connection: files currently open */
-#define WT_STAT_CONN_FILE_OPEN 1132
+#define WT_STAT_CONN_FILE_OPEN 1146
/*! connection: memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 1133
+#define WT_STAT_CONN_MEMORY_ALLOCATION 1147
/*! connection: memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 1134
+#define WT_STAT_CONN_MEMORY_FREE 1148
/*! connection: memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 1135
+#define WT_STAT_CONN_MEMORY_GROW 1149
/*! connection: pthread mutex condition wait calls */
-#define WT_STAT_CONN_COND_WAIT 1136
+#define WT_STAT_CONN_COND_WAIT 1150
/*! connection: pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 1137
+#define WT_STAT_CONN_RWLOCK_READ 1151
/*! connection: pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 1138
+#define WT_STAT_CONN_RWLOCK_WRITE 1152
/*! connection: total fsync I/Os */
-#define WT_STAT_CONN_FSYNC_IO 1139
+#define WT_STAT_CONN_FSYNC_IO 1153
/*! connection: total read I/Os */
-#define WT_STAT_CONN_READ_IO 1140
+#define WT_STAT_CONN_READ_IO 1154
/*! connection: total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1141
+#define WT_STAT_CONN_WRITE_IO 1155
/*! cursor: cached cursor count */
-#define WT_STAT_CONN_CURSOR_CACHED_COUNT 1142
+#define WT_STAT_CONN_CURSOR_CACHED_COUNT 1156
/*! cursor: cursor bulk loaded cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT_BULK 1143
+#define WT_STAT_CONN_CURSOR_INSERT_BULK 1157
/*! cursor: cursor close calls that result in cache */
-#define WT_STAT_CONN_CURSOR_CACHE 1144
+#define WT_STAT_CONN_CURSOR_CACHE 1158
/*! cursor: cursor create calls */
-#define WT_STAT_CONN_CURSOR_CREATE 1145
+#define WT_STAT_CONN_CURSOR_CREATE 1159
/*! cursor: cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT 1146
+#define WT_STAT_CONN_CURSOR_INSERT 1160
/*! cursor: cursor insert key and value bytes */
-#define WT_STAT_CONN_CURSOR_INSERT_BYTES 1147
+#define WT_STAT_CONN_CURSOR_INSERT_BYTES 1161
/*! cursor: cursor modify calls */
-#define WT_STAT_CONN_CURSOR_MODIFY 1148
+#define WT_STAT_CONN_CURSOR_MODIFY 1162
/*! cursor: cursor modify key and value bytes affected */
-#define WT_STAT_CONN_CURSOR_MODIFY_BYTES 1149
+#define WT_STAT_CONN_CURSOR_MODIFY_BYTES 1163
/*! cursor: cursor modify value bytes modified */
-#define WT_STAT_CONN_CURSOR_MODIFY_BYTES_TOUCH 1150
+#define WT_STAT_CONN_CURSOR_MODIFY_BYTES_TOUCH 1164
/*! cursor: cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT 1151
+#define WT_STAT_CONN_CURSOR_NEXT 1165
/*! cursor: cursor operation restarted */
-#define WT_STAT_CONN_CURSOR_RESTART 1152
+#define WT_STAT_CONN_CURSOR_RESTART 1166
/*! cursor: cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV 1153
+#define WT_STAT_CONN_CURSOR_PREV 1167
/*! cursor: cursor remove calls */
-#define WT_STAT_CONN_CURSOR_REMOVE 1154
+#define WT_STAT_CONN_CURSOR_REMOVE 1168
/*! cursor: cursor remove key bytes removed */
-#define WT_STAT_CONN_CURSOR_REMOVE_BYTES 1155
+#define WT_STAT_CONN_CURSOR_REMOVE_BYTES 1169
/*! cursor: cursor reserve calls */
-#define WT_STAT_CONN_CURSOR_RESERVE 1156
+#define WT_STAT_CONN_CURSOR_RESERVE 1170
/*! cursor: cursor reset calls */
-#define WT_STAT_CONN_CURSOR_RESET 1157
+#define WT_STAT_CONN_CURSOR_RESET 1171
/*! cursor: cursor search calls */
-#define WT_STAT_CONN_CURSOR_SEARCH 1158
+#define WT_STAT_CONN_CURSOR_SEARCH 1172
/*! cursor: cursor search near calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1159
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1173
/*! cursor: cursor sweep buckets */
-#define WT_STAT_CONN_CURSOR_SWEEP_BUCKETS 1160
+#define WT_STAT_CONN_CURSOR_SWEEP_BUCKETS 1174
/*! cursor: cursor sweep cursors closed */
-#define WT_STAT_CONN_CURSOR_SWEEP_CLOSED 1161
+#define WT_STAT_CONN_CURSOR_SWEEP_CLOSED 1175
/*! cursor: cursor sweep cursors examined */
-#define WT_STAT_CONN_CURSOR_SWEEP_EXAMINED 1162
+#define WT_STAT_CONN_CURSOR_SWEEP_EXAMINED 1176
/*! cursor: cursor sweeps */
-#define WT_STAT_CONN_CURSOR_SWEEP 1163
+#define WT_STAT_CONN_CURSOR_SWEEP 1177
/*! cursor: cursor truncate calls */
-#define WT_STAT_CONN_CURSOR_TRUNCATE 1164
+#define WT_STAT_CONN_CURSOR_TRUNCATE 1178
/*! cursor: cursor update calls */
-#define WT_STAT_CONN_CURSOR_UPDATE 1165
+#define WT_STAT_CONN_CURSOR_UPDATE 1179
/*! cursor: cursor update key and value bytes */
-#define WT_STAT_CONN_CURSOR_UPDATE_BYTES 1166
+#define WT_STAT_CONN_CURSOR_UPDATE_BYTES 1180
/*! cursor: cursor update value size change */
-#define WT_STAT_CONN_CURSOR_UPDATE_BYTES_CHANGED 1167
+#define WT_STAT_CONN_CURSOR_UPDATE_BYTES_CHANGED 1181
/*! cursor: cursors reused from cache */
-#define WT_STAT_CONN_CURSOR_REOPEN 1168
+#define WT_STAT_CONN_CURSOR_REOPEN 1182
/*! cursor: open cursor count */
-#define WT_STAT_CONN_CURSOR_OPEN_COUNT 1169
+#define WT_STAT_CONN_CURSOR_OPEN_COUNT 1183
+/*! data-handle: connection data handle size */
+#define WT_STAT_CONN_DH_CONN_HANDLE_SIZE 1184
/*! data-handle: connection data handles currently active */
-#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1170
+#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1185
/*! data-handle: connection sweep candidate became referenced */
-#define WT_STAT_CONN_DH_SWEEP_REF 1171
+#define WT_STAT_CONN_DH_SWEEP_REF 1186
/*! data-handle: connection sweep dhandles closed */
-#define WT_STAT_CONN_DH_SWEEP_CLOSE 1172
+#define WT_STAT_CONN_DH_SWEEP_CLOSE 1187
/*! data-handle: connection sweep dhandles removed from hash list */
-#define WT_STAT_CONN_DH_SWEEP_REMOVE 1173
+#define WT_STAT_CONN_DH_SWEEP_REMOVE 1188
/*! data-handle: connection sweep time-of-death sets */
-#define WT_STAT_CONN_DH_SWEEP_TOD 1174
+#define WT_STAT_CONN_DH_SWEEP_TOD 1189
/*! data-handle: connection sweeps */
-#define WT_STAT_CONN_DH_SWEEPS 1175
+#define WT_STAT_CONN_DH_SWEEPS 1190
/*! data-handle: session dhandles swept */
-#define WT_STAT_CONN_DH_SESSION_HANDLES 1176
+#define WT_STAT_CONN_DH_SESSION_HANDLES 1191
/*! data-handle: session sweep attempts */
-#define WT_STAT_CONN_DH_SESSION_SWEEPS 1177
+#define WT_STAT_CONN_DH_SESSION_SWEEPS 1192
/*! lock: checkpoint lock acquisitions */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1178
+#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1193
/*! lock: checkpoint lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1179
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1194
/*! lock: checkpoint lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1180
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1195
/*!
* lock: commit timestamp queue lock application thread time waiting
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_APPLICATION 1181
+#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_APPLICATION 1196
/*! lock: commit timestamp queue lock internal thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_INTERNAL 1182
+#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_INTERNAL 1197
/*! lock: commit timestamp queue read lock acquisitions */
-#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_READ_COUNT 1183
+#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_READ_COUNT 1198
/*! lock: commit timestamp queue write lock acquisitions */
-#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WRITE_COUNT 1184
+#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WRITE_COUNT 1199
/*! lock: dhandle lock application thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1185
+#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1200
/*! lock: dhandle lock internal thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1186
+#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1201
/*! lock: dhandle read lock acquisitions */
-#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1187
+#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1202
/*! lock: dhandle write lock acquisitions */
-#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1188
+#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1203
/*! lock: metadata lock acquisitions */
-#define WT_STAT_CONN_LOCK_METADATA_COUNT 1189
+#define WT_STAT_CONN_LOCK_METADATA_COUNT 1204
/*! lock: metadata lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1190
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1205
/*! lock: metadata lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1191
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1206
/*!
* lock: read timestamp queue lock application thread time waiting
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1192
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1207
/*! lock: read timestamp queue lock internal thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1193
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1208
/*! lock: read timestamp queue read lock acquisitions */
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1194
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1209
/*! lock: read timestamp queue write lock acquisitions */
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1195
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1210
/*! lock: schema lock acquisitions */
-#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1196
+#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1211
/*! lock: schema lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1197
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1212
/*! lock: schema lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1198
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1213
/*!
* lock: table lock application thread time waiting for the table lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1199
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1214
/*!
* lock: table lock internal thread time waiting for the table lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1200
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1215
/*! lock: table read lock acquisitions */
-#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1201
+#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1216
/*! lock: table write lock acquisitions */
-#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1202
+#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1217
/*! lock: txn global lock application thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1203
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1218
/*! lock: txn global lock internal thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1204
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1219
/*! lock: txn global read lock acquisitions */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1205
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1220
/*! lock: txn global write lock acquisitions */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1206
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1221
/*! log: busy returns attempting to switch slots */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1207
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1222
/*! log: force archive time sleeping (usecs) */
-#define WT_STAT_CONN_LOG_FORCE_ARCHIVE_SLEEP 1208
+#define WT_STAT_CONN_LOG_FORCE_ARCHIVE_SLEEP 1223
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1209
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1224
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1210
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1225
/*! log: log files manually zero-filled */
-#define WT_STAT_CONN_LOG_ZERO_FILLS 1211
+#define WT_STAT_CONN_LOG_ZERO_FILLS 1226
/*! log: log flush operations */
-#define WT_STAT_CONN_LOG_FLUSH 1212
+#define WT_STAT_CONN_LOG_FLUSH 1227
/*! log: log force write operations */
-#define WT_STAT_CONN_LOG_FORCE_WRITE 1213
+#define WT_STAT_CONN_LOG_FORCE_WRITE 1228
/*! log: log force write operations skipped */
-#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1214
+#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1229
/*! log: log records compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1215
+#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1230
/*! log: log records not compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1216
+#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1231
/*! log: log records too small to compress */
-#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1217
+#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1232
/*! log: log release advances write LSN */
-#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1218
+#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1233
/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1219
+#define WT_STAT_CONN_LOG_SCANS 1234
/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1220
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1235
/*! log: log server thread advances write LSN */
-#define WT_STAT_CONN_LOG_WRITE_LSN 1221
+#define WT_STAT_CONN_LOG_WRITE_LSN 1236
/*! log: log server thread write LSN walk skipped */
-#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1222
+#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1237
/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1223
+#define WT_STAT_CONN_LOG_SYNC 1238
/*! log: log sync time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DURATION 1224
+#define WT_STAT_CONN_LOG_SYNC_DURATION 1239
/*! log: log sync_dir operations */
-#define WT_STAT_CONN_LOG_SYNC_DIR 1225
+#define WT_STAT_CONN_LOG_SYNC_DIR 1240
/*! log: log sync_dir time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1226
+#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1241
/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1227
+#define WT_STAT_CONN_LOG_WRITES 1242
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1228
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1243
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1229
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1244
/*! log: number of pre-allocated log files to create */
-#define WT_STAT_CONN_LOG_PREALLOC_MAX 1230
+#define WT_STAT_CONN_LOG_PREALLOC_MAX 1245
/*! log: pre-allocated log files not ready and missed */
-#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1231
+#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1246
/*! log: pre-allocated log files prepared */
-#define WT_STAT_CONN_LOG_PREALLOC_FILES 1232
+#define WT_STAT_CONN_LOG_PREALLOC_FILES 1247
/*! log: pre-allocated log files used */
-#define WT_STAT_CONN_LOG_PREALLOC_USED 1233
+#define WT_STAT_CONN_LOG_PREALLOC_USED 1248
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1234
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1249
/*! log: slot close lost race */
-#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1235
+#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1250
/*! log: slot close unbuffered waits */
-#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1236
+#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1251
/*! log: slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1237
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1252
/*! log: slot join atomic update races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1238
+#define WT_STAT_CONN_LOG_SLOT_RACES 1253
/*! log: slot join calls atomic updates raced */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1239
+#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1254
/*! log: slot join calls did not yield */
-#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1240
+#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1255
/*! log: slot join calls found active slot closed */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1241
+#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1256
/*! log: slot join calls slept */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1242
+#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1257
/*! log: slot join calls yielded */
-#define WT_STAT_CONN_LOG_SLOT_YIELD 1243
+#define WT_STAT_CONN_LOG_SLOT_YIELD 1258
/*! log: slot join found active slot closed */
-#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1244
+#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1259
/*! log: slot joins yield time (usecs) */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1245
+#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1260
/*! log: slot transitions unable to find free slot */
-#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1246
+#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1261
/*! log: slot unbuffered writes */
-#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1247
+#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1262
/*! log: total in-memory size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_MEM 1248
+#define WT_STAT_CONN_LOG_COMPRESS_MEM 1263
/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1249
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1264
/*! log: total size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_LEN 1250
+#define WT_STAT_CONN_LOG_COMPRESS_LEN 1265
/*! log: written slots coalesced */
-#define WT_STAT_CONN_LOG_SLOT_COALESCED 1251
+#define WT_STAT_CONN_LOG_SLOT_COALESCED 1266
/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1252
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1267
/*! perf: file system read latency histogram (bucket 1) - 10-49ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1253
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1268
/*! perf: file system read latency histogram (bucket 2) - 50-99ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1254
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1269
/*! perf: file system read latency histogram (bucket 3) - 100-249ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1255
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1270
/*! perf: file system read latency histogram (bucket 4) - 250-499ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1256
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1271
/*! perf: file system read latency histogram (bucket 5) - 500-999ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1257
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1272
/*! perf: file system read latency histogram (bucket 6) - 1000ms+ */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1258
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1273
/*! perf: file system write latency histogram (bucket 1) - 10-49ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1259
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1274
/*! perf: file system write latency histogram (bucket 2) - 50-99ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1260
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1275
/*! perf: file system write latency histogram (bucket 3) - 100-249ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1261
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1276
/*! perf: file system write latency histogram (bucket 4) - 250-499ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1262
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1277
/*! perf: file system write latency histogram (bucket 5) - 500-999ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1263
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1278
/*! perf: file system write latency histogram (bucket 6) - 1000ms+ */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1264
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1279
/*! perf: operation read latency histogram (bucket 1) - 100-249us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1265
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1280
/*! perf: operation read latency histogram (bucket 2) - 250-499us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1266
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1281
/*! perf: operation read latency histogram (bucket 3) - 500-999us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1267
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1282
/*! perf: operation read latency histogram (bucket 4) - 1000-9999us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1268
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1283
/*! perf: operation read latency histogram (bucket 5) - 10000us+ */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1269
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1284
/*! perf: operation write latency histogram (bucket 1) - 100-249us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1270
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1285
/*! perf: operation write latency histogram (bucket 2) - 250-499us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1271
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1286
/*! perf: operation write latency histogram (bucket 3) - 500-999us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1272
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1287
/*! perf: operation write latency histogram (bucket 4) - 1000-9999us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1273
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1288
/*! perf: operation write latency histogram (bucket 5) - 10000us+ */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1274
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1289
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1275
+#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1290
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1276
+#define WT_STAT_CONN_REC_PAGES 1291
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1277
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1292
/*! reconciliation: pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE 1278
+#define WT_STAT_CONN_REC_PAGE_DELETE 1293
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1279
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1294
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1280
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1295
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1281
+#define WT_STAT_CONN_SESSION_OPEN 1296
/*! session: session query timestamp calls */
-#define WT_STAT_CONN_SESSION_QUERY_TS 1282
+#define WT_STAT_CONN_SESSION_QUERY_TS 1297
/*! session: table alter failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1283
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1298
/*! session: table alter successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1284
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1299
/*! session: table alter unchanged and skipped */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1285
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1300
/*! session: table compact failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1286
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1301
/*! session: table compact successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1287
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1302
/*! session: table create failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1288
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1303
/*! session: table create successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1289
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1304
/*! session: table drop failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1290
+#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1305
/*! session: table drop successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1291
+#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1306
/*! session: table rebalance failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1292
+#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1307
/*! session: table rebalance successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1293
+#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1308
/*! session: table rename failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1294
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1309
/*! session: table rename successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1295
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1310
/*! session: table salvage failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1296
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1311
/*! session: table salvage successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1297
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1312
/*! session: table truncate failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1298
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1313
/*! session: table truncate successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1299
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1314
/*! session: table verify failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1300
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1315
/*! session: table verify successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1301
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1316
/*! thread-state: active filesystem fsync calls */
-#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1302
+#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1317
/*! thread-state: active filesystem read calls */
-#define WT_STAT_CONN_THREAD_READ_ACTIVE 1303
+#define WT_STAT_CONN_THREAD_READ_ACTIVE 1318
/*! thread-state: active filesystem write calls */
-#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1304
+#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1319
/*! thread-yield: application thread time evicting (usecs) */
-#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1305
+#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1320
/*! thread-yield: application thread time waiting for cache (usecs) */
-#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1306
+#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1321
/*!
* thread-yield: connection close blocked waiting for transaction state
* stabilization
*/
-#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1307
+#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1322
/*! thread-yield: connection close yielded for lsm manager shutdown */
-#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1308
+#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1323
/*! thread-yield: data handle lock yielded */
-#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1309
+#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1324
/*!
* thread-yield: get reference for page index and slot time sleeping
* (usecs)
*/
-#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1310
+#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1325
/*! thread-yield: log server sync yielded for log write */
-#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1311
+#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1326
/*! thread-yield: page access yielded due to prepare state change */
-#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1312
+#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1327
/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1313
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1328
/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1314
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1329
/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1315
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1330
/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1316
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1331
/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1317
+#define WT_STAT_CONN_PAGE_SLEEP 1332
/*!
* thread-yield: page delete rollback time sleeping for state change
* (usecs)
*/
-#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1318
+#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1333
/*! thread-yield: page reconciliation yielded due to child modification */
-#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1319
+#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1334
/*! transaction: Number of prepared updates */
-#define WT_STAT_CONN_TXN_PREPARED_UPDATES_COUNT 1320
+#define WT_STAT_CONN_TXN_PREPARED_UPDATES_COUNT 1335
/*! transaction: Number of prepared updates added to cache overflow */
-#define WT_STAT_CONN_TXN_PREPARED_UPDATES_LOOKASIDE_INSERTS 1321
+#define WT_STAT_CONN_TXN_PREPARED_UPDATES_LOOKASIDE_INSERTS 1336
/*! transaction: Number of prepared updates resolved */
-#define WT_STAT_CONN_TXN_PREPARED_UPDATES_RESOLVED 1322
+#define WT_STAT_CONN_TXN_PREPARED_UPDATES_RESOLVED 1337
/*! transaction: commit timestamp queue entries walked */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_WALKED 1323
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_WALKED 1338
/*! transaction: commit timestamp queue insert to empty */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_EMPTY 1324
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_EMPTY 1339
/*! transaction: commit timestamp queue inserts to head */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_HEAD 1325
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_HEAD 1340
/*! transaction: commit timestamp queue inserts total */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS 1326
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS 1341
/*! transaction: commit timestamp queue length */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN 1327
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN 1342
/*! transaction: number of named snapshots created */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1328
+#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1343
/*! transaction: number of named snapshots dropped */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1329
+#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1344
/*! transaction: prepared transactions */
-#define WT_STAT_CONN_TXN_PREPARE 1330
+#define WT_STAT_CONN_TXN_PREPARE 1345
/*! transaction: prepared transactions committed */
-#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1331
+#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1346
/*! transaction: prepared transactions currently active */
-#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1332
+#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1347
/*! transaction: prepared transactions rolled back */
-#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1333
+#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1348
/*! transaction: query timestamp calls */
-#define WT_STAT_CONN_TXN_QUERY_TS 1334
+#define WT_STAT_CONN_TXN_QUERY_TS 1349
/*! transaction: read timestamp queue entries walked */
-#define WT_STAT_CONN_TXN_READ_QUEUE_WALKED 1335
+#define WT_STAT_CONN_TXN_READ_QUEUE_WALKED 1350
/*! transaction: read timestamp queue insert to empty */
-#define WT_STAT_CONN_TXN_READ_QUEUE_EMPTY 1336
+#define WT_STAT_CONN_TXN_READ_QUEUE_EMPTY 1351
/*! transaction: read timestamp queue inserts to head */
-#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1337
+#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1352
/*! transaction: read timestamp queue inserts total */
-#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1338
+#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1353
/*! transaction: read timestamp queue length */
-#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1339
+#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1354
/*! transaction: rollback to stable calls */
-#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE 1340
+#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE 1355
/*! transaction: rollback to stable updates aborted */
-#define WT_STAT_CONN_TXN_ROLLBACK_UPD_ABORTED 1341
+#define WT_STAT_CONN_TXN_ROLLBACK_UPD_ABORTED 1356
/*! transaction: rollback to stable updates removed from cache overflow */
-#define WT_STAT_CONN_TXN_ROLLBACK_LAS_REMOVED 1342
+#define WT_STAT_CONN_TXN_ROLLBACK_LAS_REMOVED 1357
/*! transaction: set timestamp calls */
-#define WT_STAT_CONN_TXN_SET_TS 1343
+#define WT_STAT_CONN_TXN_SET_TS 1358
/*! transaction: set timestamp commit calls */
-#define WT_STAT_CONN_TXN_SET_TS_COMMIT 1344
+#define WT_STAT_CONN_TXN_SET_TS_COMMIT 1359
/*! transaction: set timestamp commit updates */
-#define WT_STAT_CONN_TXN_SET_TS_COMMIT_UPD 1345
+#define WT_STAT_CONN_TXN_SET_TS_COMMIT_UPD 1360
/*! transaction: set timestamp oldest calls */
-#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1346
+#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1361
/*! transaction: set timestamp oldest updates */
-#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1347
+#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1362
/*! transaction: set timestamp stable calls */
-#define WT_STAT_CONN_TXN_SET_TS_STABLE 1348
+#define WT_STAT_CONN_TXN_SET_TS_STABLE 1363
/*! transaction: set timestamp stable updates */
-#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1349
+#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1364
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1350
+#define WT_STAT_CONN_TXN_BEGIN 1365
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1351
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1366
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1352
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1367
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1353
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1368
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1354
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1369
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1355
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1370
/*! transaction: transaction checkpoint scrub dirty target */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1356
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1371
/*! transaction: transaction checkpoint scrub time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1357
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1372
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1358
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1373
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1359
+#define WT_STAT_CONN_TXN_CHECKPOINT 1374
/*!
* transaction: transaction checkpoints skipped because database was
* clean
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1360
+#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1375
/*! transaction: transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1361
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1376
/*!
* transaction: transaction fsync calls for checkpoint after allocating
* the transaction ID
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1362
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1377
/*!
* transaction: transaction fsync duration for checkpoint after
* allocating the transaction ID (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1363
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1378
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1364
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1379
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1365
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1380
/*!
* transaction: transaction range of IDs currently pinned by named
* snapshots
*/
-#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1366
+#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1381
/*! transaction: transaction range of timestamps currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1367
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1382
/*! transaction: transaction range of timestamps pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1368
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1383
/*!
* transaction: transaction range of timestamps pinned by the oldest
* timestamp
*/
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1369
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1384
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1370
+#define WT_STAT_CONN_TXN_SYNC 1385
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1371
+#define WT_STAT_CONN_TXN_COMMIT 1386
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1372
+#define WT_STAT_CONN_TXN_ROLLBACK 1387
/*! transaction: update conflicts */
-#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1373
+#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1388
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h
index 75801ceb48b..d93f6a3be7f 100644
--- a/src/third_party/wiredtiger/src/include/wt_internal.h
+++ b/src/third_party/wiredtiger/src/include/wt_internal.h
@@ -97,6 +97,8 @@ struct __wt_cache;
typedef struct __wt_cache WT_CACHE;
struct __wt_cache_pool;
typedef struct __wt_cache_pool WT_CACHE_POOL;
+struct __wt_capacity;
+ typedef struct __wt_capacity WT_CAPACITY;
struct __wt_cell;
typedef struct __wt_cell WT_CELL;
struct __wt_cell_unpack;
@@ -359,6 +361,7 @@ typedef uint64_t wt_timestamp_t;
#include "btmem.h"
#include "btree.h"
#include "cache.h"
+#include "capacity.h"
#include "compact.h"
#include "config.h"
#include "cursor.h"
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c
index aff145be512..9e27a996251 100644
--- a/src/third_party/wiredtiger/src/log/log.c
+++ b/src/third_party/wiredtiger/src/log/log.c
@@ -182,6 +182,22 @@ __log_wait_for_earlier_slot(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
}
/*
+ * __log_fs_read --
+ * Wrapper when reading from a log file.
+ */
+static int
+__log_fs_read(WT_SESSION_IMPL *session,
+ WT_FH *fh, wt_off_t offset, size_t len, void *buf)
+{
+ WT_DECL_RET;
+
+ __wt_capacity_throttle(session, len, WT_THROTTLE_LOG);
+ if ((ret = __wt_read(session, fh, offset, len, buf)) != 0)
+ WT_RET_MSG(session, ret, "%s: log read failure", fh->name);
+ return (ret);
+}
+
+/*
* __log_fs_write --
* Wrapper when writing to a log file. If we're writing to a new log
* file for the first time wait for writes to the previous log file.
@@ -207,6 +223,7 @@ __log_fs_write(WT_SESSION_IMPL *session,
__log_wait_for_earlier_slot(session, slot);
WT_RET(__wt_log_force_sync(session, &slot->slot_release_lsn));
}
+ __wt_capacity_throttle(session, len, WT_THROTTLE_LOG);
if ((ret = __wt_write(session, slot->slot_fh, offset, len, buf)) != 0)
WT_PANIC_RET(session, ret,
"%s: fatal log failure", slot->slot_fh->name);
@@ -663,6 +680,7 @@ __log_zero(WT_SESSION_IMPL *session,
*/
if ((uint32_t)len - off < bufsz)
wrlen = (uint32_t)len - off;
+ __wt_capacity_throttle(session, wrlen, WT_THROTTLE_LOG);
WT_ERR(__wt_write(session,
fh, (wt_off_t)off, wrlen, zerobuf->mem));
off += wrlen;
@@ -989,7 +1007,7 @@ __log_open_verify(WT_SESSION_IMPL *session, uint32_t id, WT_FH **fhp,
* Read in the log file header and verify it.
*/
WT_ERR(__log_openfile(session, id, 0, &fh));
- WT_ERR(__wt_read(session, fh, 0, allocsize, buf->mem));
+ WT_ERR(__log_fs_read(session, fh, 0, allocsize, buf->mem));
logrec = (WT_LOG_RECORD *)buf->mem;
__wt_log_record_byteswap(logrec);
desc = (WT_LOG_DESC *)logrec->record;
@@ -1053,7 +1071,7 @@ __log_open_verify(WT_SESSION_IMPL *session, uint32_t id, WT_FH **fhp,
goto err;
memset(buf->mem, 0, allocsize);
- WT_ERR(__wt_read(session, fh, allocsize, allocsize, buf->mem));
+ WT_ERR(__log_fs_read(session, fh, allocsize, allocsize, buf->mem));
logrec = (WT_LOG_RECORD *)buf->mem;
/*
* We have a valid header but the system record is not there.
@@ -1932,7 +1950,7 @@ __log_has_hole(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t log_size,
for (off = offset; remainder > 0;
remainder -= (wt_off_t)rdlen, off += (wt_off_t)rdlen) {
rdlen = WT_MIN(bufsz, (size_t)remainder);
- WT_ERR(__wt_read(session, fh, off, rdlen, buf));
+ WT_ERR(__log_fs_read(session, fh, off, rdlen, buf));
allocsize = (log == NULL ? WT_LOG_ALIGN : log->allocsize);
if (memcmp(buf, zerobuf, rdlen) != 0) {
/*
@@ -2450,7 +2468,7 @@ advance:
*/
WT_ASSERT(session, buf->memsize >= allocsize);
need_salvage = F_ISSET(conn, WT_CONN_SALVAGE);
- WT_ERR(__wt_read(session,
+ WT_ERR(__log_fs_read(session,
log_fh, rd_lsn.l.offset, (size_t)allocsize, buf->mem));
need_salvage = false;
/*
@@ -2504,7 +2522,7 @@ advance:
* record, especially for direct I/O.
*/
WT_ERR(__wt_buf_grow(session, buf, rdup_len));
- WT_ERR(__wt_read(session, log_fh,
+ WT_ERR(__log_fs_read(session, log_fh,
rd_lsn.l.offset, (size_t)rdup_len, buf->mem));
WT_STAT_CONN_INCR(session, log_scan_rereads);
}
diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c
index acff9771f62..40f37b961e8 100644
--- a/src/third_party/wiredtiger/src/log/log_slot.c
+++ b/src/third_party/wiredtiger/src/log/log_slot.c
@@ -535,6 +535,7 @@ __wt_log_slot_destroy(WT_SESSION_IMPL *session)
(uint64_t)slot->slot_state, WT_LOG_SLOT_RESERVED)) {
rel = WT_LOG_SLOT_RELEASED_BUFFERED(slot->slot_state);
if (rel != 0)
+ /* Writes are not throttled. */
WT_RET(__wt_write(session, slot->slot_fh,
slot->slot_start_offset, (size_t)rel,
slot->slot_buf.mem));
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
index 1da4169d234..5b91aa09db2 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
@@ -697,11 +697,11 @@ __lsm_drop_file(WT_SESSION_IMPL *session, const char *uri)
}
/*
- * __wt_lsm_free_chunks --
+ * __lsm_free_chunks --
* Try to drop chunks from the tree that are no longer required.
*/
-int
-__wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+static int
+__lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
WT_DECL_RET;
WT_LSM_CHUNK *chunk;
@@ -712,15 +712,6 @@ __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
flush_metadata = false;
- if (lsm_tree->nold_chunks == 0)
- return (0);
-
- /*
- * Make sure only a single thread is freeing the old chunk array
- * at any time.
- */
- if (!__wt_atomic_cas32(&lsm_tree->freeing_old_chunks, 0, 1))
- return (0);
/*
* Take a copy of the current state of the LSM tree and look for chunks
* to drop. We do it this way to avoid holding the LSM tree lock while
@@ -743,16 +734,6 @@ __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
}
/*
- * Don't remove files if a hot backup is in progress.
- *
- * The schema lock protects the set of live files, this check
- * prevents us from removing a file that hot backup already
- * knows about.
- */
- if (S2C(session)->hot_backup)
- break;
-
- /*
* Drop any bloom filters and chunks we can. Don't try to drop
* a chunk if the bloom filter drop fails.
* An EBUSY return indicates that a cursor is still open in
@@ -822,7 +803,6 @@ err: /* Flush the metadata unless the system is in panic */
}
__lsm_unpin_chunks(session, &cookie);
__wt_free(session, cookie.chunk_array);
- lsm_tree->freeing_old_chunks = 0;
/* Returning non-zero means there is no work to do. */
if (!flush_metadata)
@@ -830,3 +810,42 @@ err: /* Flush the metadata unless the system is in panic */
return (ret);
}
+
+/*
+ * __wt_lsm_free_chunks --
+ * Try to drop chunks from the tree that are no longer required.
+ */
+int
+__wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+
+ conn = S2C(session);
+
+ if (lsm_tree->nold_chunks == 0)
+ return (0);
+
+ /*
+ * Make sure only a single thread is freeing the old chunk array
+ * at any time.
+ */
+ if (!__wt_atomic_cas32(&lsm_tree->freeing_old_chunks, 0, 1))
+ return (0);
+
+ /*
+ * Don't remove files if a hot backup is in progress.
+ *
+ * The schema lock protects the set of live files, this check prevents
+ * us from removing a file that hot backup already knows about.
+ */
+ if (!conn->hot_backup) {
+ __wt_readlock(session, &conn->hot_backup_lock);
+ if (!conn->hot_backup)
+ ret = __lsm_free_chunks(session, lsm_tree);
+ __wt_readunlock(session, &conn->hot_backup_lock);
+ }
+
+ lsm_tree->freeing_old_chunks = 0;
+ return (ret);
+}
diff --git a/src/third_party/wiredtiger/src/os_common/os_fhandle.c b/src/third_party/wiredtiger/src/os_common/os_fhandle.c
index 3100817e650..df67508c4fe 100644
--- a/src/third_party/wiredtiger/src/os_common/os_fhandle.c
+++ b/src/third_party/wiredtiger/src/os_common/os_fhandle.c
@@ -240,6 +240,8 @@ __wt_open(WT_SESSION_IMPL *session,
WT_ERR(__wt_calloc_one(session, &fh));
WT_ERR(__wt_strdup(session, name, &fh->name));
+ fh->file_type = file_type;
+
/*
* If this is a read-only connection, open all files read-only except
* the lock file.
@@ -356,6 +358,134 @@ __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp)
}
/*
+ * __wt_fsync_background_chk --
+ * Return if background fsync is supported.
+ */
+bool
+__wt_fsync_background_chk(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_FH *fh;
+ WT_FILE_HANDLE *handle;
+ bool supported;
+
+ conn = S2C(session);
+ supported = true;
+ __wt_spin_lock(session, &conn->fh_lock);
+ /*
+ * Look for the first data file handle and see if
+ * the fsync nowait function is supported.
+ */
+ TAILQ_FOREACH(fh, &conn->fhqh, q) {
+ handle = fh->handle;
+ if (fh->file_type != WT_FS_OPEN_FILE_TYPE_DATA)
+ continue;
+ /*
+ * If we don't have a function, return false, otherwise
+ * return true. In any case, we are done with the loop.
+ */
+ if (handle->fh_sync_nowait == NULL)
+ supported = false;
+ break;
+ }
+ __wt_spin_unlock(session, &conn->fh_lock);
+ return (supported);
+}
+
+/*
+ * __fsync_background --
+ * Background fsync for a single dirty file handle.
+ */
+static int
+__fsync_background(WT_SESSION_IMPL *session, WT_FH *fh)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_FILE_HANDLE *handle;
+ uint64_t now;
+
+ conn = S2C(session);
+ WT_STAT_CONN_INCR(session, fsync_all_fh_total);
+
+ handle = fh->handle;
+ if (handle->fh_sync_nowait == NULL ||
+ fh->written < WT_CAPACITY_FILE_THRESHOLD)
+ return (0);
+
+ /* Only sync data files. */
+ if (fh->file_type != WT_FS_OPEN_FILE_TYPE_DATA)
+ return (0);
+
+ now = __wt_clock(session);
+ if (fh->last_sync == 0 || WT_CLOCKDIFF_SEC(now, fh->last_sync) > 0) {
+ __wt_spin_unlock(session, &conn->fh_lock);
+
+ /*
+ * We set the false flag to indicate a non-blocking background
+ * fsync, but there is no guarantee that it doesn't block. If
+ * we wanted to detect if it is blocking, adding a clock call
+ * and checking the time would be done here.
+ */
+ ret = __wt_fsync(session, fh, false);
+ if (ret == 0) {
+ WT_STAT_CONN_INCR(session, fsync_all_fh);
+ fh->last_sync = now;
+ fh->written = 0;
+ }
+
+ __wt_spin_lock(session, &conn->fh_lock);
+ }
+ return (ret);
+}
+
+/*
+ * __wt_fsync_background --
+ * Background fsync for all dirty file handles.
+ */
+int
+__wt_fsync_background(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_FH *fh, *fhnext;
+
+ conn = S2C(session);
+ __wt_spin_lock(session, &conn->fh_lock);
+ TAILQ_FOREACH_SAFE(fh, &conn->fhqh, q, fhnext) {
+ /*
+ * The worker routine will unlock the list to avoid holding it
+ * locked over an fsync. Increment the count on the current and
+ * next handles to guarantee their validity.
+ */
+ if (fhnext != NULL)
+ ++fhnext->ref;
+ ++fh->ref;
+
+ WT_TRET(__fsync_background(session, fh));
+
+ /*
+ * The file handle reference may have gone to 0, in which case
+ * we're responsible for the close. Configure the close routine
+ * to drop the lock, which means we must re-acquire it.
+ */
+ if (--fh->ref == 0) {
+ WT_TRET(__handle_close(session, fh, true));
+ __wt_spin_lock(session, &conn->fh_lock);
+ }
+
+ /*
+ * Decrement the next element's reference count. It might have
+ * gone to 0 as well, in which case we'll close it in the next
+ * loop iteration.
+ */
+ if (fhnext != NULL)
+ --fhnext->ref;
+ }
+ __wt_spin_unlock(session, &conn->fh_lock);
+ return (ret);
+}
+
+/*
* __wt_close_connection_close --
* Close any open file handles at connection close.
*/
diff --git a/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c b/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c
index 94db8806305..304a745efb2 100644
--- a/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c
+++ b/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c
@@ -349,7 +349,6 @@ __im_file_read(WT_FILE_HANDLE *file_handle,
__wt_spin_unlock(session, &im_fs->lock);
if (ret == 0)
return (0);
- F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
WT_RET_MSG(session, WT_ERROR,
"%s: handle-read: failed to read %" WT_SIZET_FMT " bytes at "
"offset %" WT_SIZET_FMT,
diff --git a/src/third_party/wiredtiger/src/os_posix/os_fs.c b/src/third_party/wiredtiger/src/os_posix/os_fs.c
index f9771fb3860..438af2eb58d 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_fs.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_fs.c
@@ -439,15 +439,12 @@ __posix_file_read(WT_FILE_HANDLE *file_handle,
/* Break reads larger than 1GB into 1GB chunks. */
for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) {
chunk = WT_MIN(len, WT_GIGABYTE);
- if ((nr = pread(pfh->fd, addr, chunk, offset)) <= 0) {
- if (nr == 0)
- F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
+ if ((nr = pread(pfh->fd, addr, chunk, offset)) <= 0)
WT_RET_MSG(session,
nr == 0 ? WT_ERROR : __wt_errno(),
"%s: handle-read: pread: failed to read %"
WT_SIZET_FMT " bytes at offset %" PRIuMAX,
file_handle->name, chunk, (uintmax_t)offset);
- }
}
return (0);
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_fs.c b/src/third_party/wiredtiger/src/os_win/os_fs.c
index 684a28bfd51..b2f90233a4c 100644
--- a/src/third_party/wiredtiger/src/os_win/os_fs.c
+++ b/src/third_party/wiredtiger/src/os_win/os_fs.c
@@ -299,8 +299,6 @@ __win_file_read(WT_FILE_HANDLE *file_handle,
win_fh->filehandle, addr, chunk, &nr, &overlapped)) {
windows_error = __wt_getlasterror();
ret = __wt_map_windows_error(windows_error);
- if (ret == WT_ERROR)
- F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
__wt_err(session, ret,
"%s: handle-read: ReadFile: failed to read %lu "
"bytes at offset %" PRIuMAX ": %s",
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_track.c b/src/third_party/wiredtiger/src/reconcile/rec_track.c
index 6508db6df8f..d84d5524df3 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_track.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_track.c
@@ -39,7 +39,7 @@ __ovfl_discard_verbose(
WT_RET(__wt_scr_alloc(session, 512, &tmp));
unpack = &_unpack;
- __wt_cell_unpack(page, cell, unpack);
+ __wt_cell_unpack(session, page, cell, unpack);
__wt_verbose(session, WT_VERB_OVERFLOW,
"discard: %s%s%p %s",
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 647712093a8..786f2bdec81 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -12,8 +12,6 @@ struct __rec_chunk; typedef struct __rec_chunk WT_CHUNK;
struct __rec_dictionary; typedef struct __rec_dictionary WT_DICTIONARY;
struct __rec_kv; typedef struct __rec_kv WT_KV;
-#define WT_TS_FIXME 37 /* Fake timestamp */
-
/*
* Reconciliation is the process of taking an in-memory page, walking each entry
* in the page, building a backing disk image in a temporary buffer representing
@@ -273,6 +271,16 @@ typedef struct {
WT_CURSOR_BTREE update_modify_cbt;
} WT_RECONCILE;
+typedef struct {
+ WT_UPDATE *upd; /* Update to write (or NULL) */
+
+ uint64_t txnid; /* Transaction ID, timestamps */
+ wt_timestamp_t start_ts, stop_ts;
+
+ bool upd_saved; /* Updates saved to list */
+
+} WT_UPDATE_SELECT;
+
#define WT_CROSSING_MIN_BND(r, next_len) \
((r)->cur_ptr->min_offset == 0 && \
(next_len) > (r)->min_space_avail)
@@ -1157,8 +1165,8 @@ __rec_append_orig_value(WT_SESSION_IMPL *session,
*/
if (upd->type == WT_UPDATE_BIRTHMARK) {
append->txnid = upd->txnid;
- append->timestamp = upd->timestamp;
- append->durable_timestamp = upd->durable_timestamp;
+ append->start_ts = upd->start_ts;
+ append->durable_ts = upd->durable_ts;
append->next = upd->next;
}
@@ -1176,14 +1184,13 @@ err: __wt_scr_free(session, &tmp);
}
/*
- * __rec_txn_read --
+ * __rec_upd_select --
* Return the update in a list that should be written (or NULL if none can
* be written).
*/
static int
-__rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
- WT_INSERT *ins, void *ripcip, WT_CELL_UNPACK *vpack,
- bool *upd_savedp, WT_UPDATE **updp)
+__rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins,
+ void *ripcip, WT_CELL_UNPACK *vpack, WT_UPDATE_SELECT *upd_select)
{
WT_PAGE *page;
WT_UPDATE *first_ts_upd, *first_txn_upd, *first_upd, *upd;
@@ -1192,9 +1199,12 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
uint64_t max_txn, txnid;
bool all_visible, prepared, skipped_birthmark, uncommitted;
- if (upd_savedp != NULL)
- *upd_savedp = false;
- *updp = NULL;
+ /*
+ * The "saved updates" return value is used independently of returning
+ * an update we can write, both must be initialized.
+ */
+ upd_select->upd = NULL;
+ upd_select->upd_saved = false;
page = r->page;
first_ts_upd = first_txn_upd = NULL;
@@ -1252,7 +1262,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
continue;
/* Consider a non durable update as uncommitted. */
- if (upd->timestamp != WT_TS_NONE &&
+ if (upd->start_ts != WT_TS_NONE &&
!__wt_txn_upd_durable(session, upd)) {
uncommitted = r->update_uncommitted = true;
continue;
@@ -1261,7 +1271,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
}
/* Track the first update with non-zero timestamp. */
- if (first_ts_upd == NULL && upd->timestamp != WT_TS_NONE)
+ if (first_ts_upd == NULL && upd->start_ts != WT_TS_NONE)
first_ts_upd = upd;
/*
@@ -1282,8 +1292,8 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
* (but we save enough information that checkpoint can fix
* things up if we choose an update that is too new).
*/
- if (*updp == NULL && r->las_skew_newest)
- *updp = upd;
+ if (upd_select->upd == NULL && r->las_skew_newest)
+ upd_select->upd = upd;
/* Consider non durable updates as uncommitted. */
if ((F_ISSET(r, WT_REC_VISIBLE_ALL) ?
@@ -1302,7 +1312,8 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
* discard an uncommitted update.
*/
if (F_ISSET(r, WT_REC_UPDATE_RESTORE) &&
- *updp != NULL && (uncommitted || prepared)) {
+ upd_select->upd != NULL &&
+ (uncommitted || prepared)) {
r->leave_dirty = true;
return (__wt_set_return(session, EBUSY));
}
@@ -1318,17 +1329,26 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
* (set to the first uncommitted transaction). Lookaside with
* stable timestamp always takes the first stable update.
*/
- if (*updp == NULL)
- *updp = upd;
+ if (upd_select->upd == NULL)
+ upd_select->upd = upd;
}
/* Keep track of the selected update. */
- upd = *updp;
+ upd = upd_select->upd;
/* Reconciliation should never see an aborted or reserved update. */
WT_ASSERT(session, upd == NULL ||
(upd->txnid != WT_TXN_ABORTED && upd->type != WT_UPDATE_RESERVE));
+ /*
+ * The checkpoint transaction is special. Make sure we never write
+ * metadata updates from a checkpoint in a concurrent session.
+ */
+ WT_ASSERT(session, !WT_IS_METADATA(session->dhandle) ||
+ upd == NULL || upd->txnid == WT_TXN_NONE ||
+ upd->txnid != S2C(session)->txn_global.checkpoint_state.id ||
+ WT_SESSION_IS_CHECKPOINT(session));
+
/* If all of the updates were aborted, quit. */
if (first_txn_upd == NULL) {
WT_ASSERT(session, upd == NULL);
@@ -1340,13 +1360,29 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
r->update_used = true;
/*
- * The checkpoint transaction is special. Make sure we never write
- * metadata updates from a checkpoint in a concurrent session.
+ * The start timestamp is determined by the commit timestamp when the
+ * key is first inserted (or last updated). The end timestamp is set
+ * when a key/value pair becomes invalid, either because of a remove
+ * or a modify/update operation on the same key.
*/
- WT_ASSERT(session, !WT_IS_METADATA(session->dhandle) ||
- upd == NULL || upd->txnid == WT_TXN_NONE ||
- upd->txnid != S2C(session)->txn_global.checkpoint_state.id ||
- WT_SESSION_IS_CHECKPOINT(session));
+ if (upd != NULL) {
+ upd_select->txnid = upd->txnid;
+
+ /*
+ * TIMESTAMP-FIXME
+ * This is waiting on the WT_UPDATE structure's start/stop
+ * timestamp work. For now, if we don't have a timestamp,
+ * just pretend it's durable, otherwise pretend the start
+ * and stop timestamps are the same.
+ *
+ */
+ if (upd_select->upd->start_ts == WT_TS_NONE) {
+ upd_select->start_ts = WT_TS_NONE;
+ upd_select->stop_ts = WT_TS_MAX;
+ } else
+ upd_select->start_ts =
+ upd_select->stop_ts = upd_select->upd->start_ts;
+ }
/*
* Track the most recent transaction in the page. We store this in the
@@ -1358,8 +1394,8 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
r->max_txn = max_txn;
/* Update the maximum timestamp. */
- if (first_ts_upd != NULL && r->max_timestamp < first_ts_upd->timestamp)
- r->max_timestamp = first_ts_upd->timestamp;
+ if (first_ts_upd != NULL && r->max_timestamp < first_ts_upd->start_ts)
+ r->max_timestamp = first_ts_upd->start_ts;
/*
* If the update we chose was a birthmark, or we are doing
@@ -1369,7 +1405,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
if (upd != NULL &&
(upd->type == WT_UPDATE_BIRTHMARK ||
(F_ISSET(r, WT_REC_UPDATE_RESTORE) && skipped_birthmark)))
- *updp = NULL;
+ upd_select->upd = NULL;
/*
* Check if all updates on the page are visible. If not, it must stay
@@ -1379,7 +1415,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
* order), so we track the maximum transaction ID and the newest update
* with a timestamp (if any).
*/
- timestamp = first_ts_upd == NULL ? 0 : first_ts_upd->timestamp;
+ timestamp = first_ts_upd == NULL ? 0 : first_ts_upd->start_ts;
all_visible = upd == first_txn_upd && !(uncommitted || prepared) &&
(F_ISSET(r, WT_REC_VISIBLE_ALL) ?
__wt_txn_visible_all(session, max_txn, timestamp) :
@@ -1427,9 +1463,9 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
* The order of the updates on the list matters, we can't move only the
* unresolved updates, move the entire update list.
*/
- WT_RET(__rec_update_save(session, r, ins, ripcip, *updp, upd_memsize));
- if (upd_savedp != NULL)
- *upd_savedp = true;
+ WT_RET(__rec_update_save(
+ session, r, ins, ripcip, upd_select->upd, upd_memsize));
+ upd_select->upd_saved = true;
/*
* Track the first off-page update when saving history in the lookaside
@@ -1441,18 +1477,18 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
if (WT_TXNID_LT(r->unstable_txn, first_upd->txnid))
r->unstable_txn = first_upd->txnid;
if (first_ts_upd != NULL &&
- r->unstable_timestamp < first_ts_upd->timestamp)
- r->unstable_timestamp = first_ts_upd->timestamp;
+ r->unstable_timestamp < first_ts_upd->start_ts)
+ r->unstable_timestamp = first_ts_upd->start_ts;
} else if (F_ISSET(r, WT_REC_LOOKASIDE)) {
- for (upd = first_upd; upd != *updp; upd = upd->next) {
+ for (upd = first_upd; upd != upd_select->upd; upd = upd->next) {
if (upd->txnid == WT_TXN_ABORTED)
continue;
if (upd->txnid != WT_TXN_NONE &&
WT_TXNID_LT(upd->txnid, r->unstable_txn))
r->unstable_txn = upd->txnid;
- if (upd->timestamp < r->unstable_timestamp)
- r->unstable_timestamp = upd->timestamp;
+ if (upd->start_ts < r->unstable_timestamp)
+ r->unstable_timestamp = upd->start_ts;
}
}
@@ -1461,7 +1497,9 @@ check_original_value:
* Paranoia: check that we didn't choose an update that has since been
* rolled back.
*/
- WT_ASSERT(session, *updp == NULL || (*updp)->txnid != WT_TXN_ABORTED);
+ WT_ASSERT(session,
+ upd_select->upd == NULL ||
+ upd_select->upd->txnid != WT_TXN_ABORTED);
/*
* Returning an update means the original on-page value might be lost,
@@ -1474,7 +1512,8 @@ check_original_value:
* - or any reconciliation of a backing overflow record that will be
* physically removed once it's no longer needed.
*/
- if (*updp != NULL && (!WT_UPDATE_DATA_VALUE(*updp) ||
+ if (upd_select->upd != NULL &&
+ (!WT_UPDATE_DATA_VALUE(upd_select->upd) ||
F_ISSET(r, WT_REC_LOOKASIDE) || (vpack != NULL &&
vpack->ovfl && vpack->raw != WT_CELL_VALUE_OVFL_RM)))
WT_RET(
@@ -1937,7 +1976,7 @@ __rec_dict_replace(WT_SESSION_IMPL *session, WT_RECONCILE *r,
offset = (uint64_t)WT_PTRDIFF(r->first_free,
(uint8_t *)r->cur_ptr->image.mem + dp->offset);
val->len = val->cell_len = __wt_cell_pack_copy(
- &val->cell, start_ts, stop_ts, rle, offset);
+ session, &val->cell, start_ts, stop_ts, rle, offset);
val->buf.data = NULL;
val->buf.size = 0;
}
@@ -3600,7 +3639,7 @@ __wt_bulk_insert_var(
val = &r->v;
if (deleted) {
val->cell_len = __wt_cell_pack_del(
- &val->cell, WT_TS_NONE, WT_TS_MAX, cbulk->rle);
+ session, &val->cell, WT_TS_NONE, WT_TS_MAX, cbulk->rle);
val->buf.data = NULL;
val->buf.size = 0;
val->len = val->cell_len;
@@ -3729,7 +3768,7 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
if (addr == NULL && __wt_off_page(page, ref->addr))
addr = ref->addr;
if (addr == NULL) {
- __wt_cell_unpack(page, ref->addr, vpack);
+ __wt_cell_unpack(session, page, ref->addr, vpack);
val->buf.data = ref->addr;
val->buf.size = __wt_cell_total_len(vpack);
val->cell_len = 0;
@@ -3813,6 +3852,7 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
WT_INSERT *ins;
WT_PAGE *page;
WT_UPDATE *upd;
+ WT_UPDATE_SELECT upd_select;
uint64_t recno;
uint32_t entry, nrecs;
@@ -3828,7 +3868,9 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
/* Update any changes to the original on-page data items. */
WT_SKIP_FOREACH(ins, WT_COL_UPDATE_SINGLE(page)) {
- WT_RET(__rec_txn_read(session, r, ins, NULL, NULL, NULL, &upd));
+ WT_RET(__rec_upd_select(
+ session, r, ins, NULL, NULL, &upd_select));
+ upd = upd_select.upd;
if (upd != NULL)
__bit_setv(r->first_free,
WT_INSERT_RECNO(ins) - pageref->ref_recno,
@@ -3872,8 +3914,9 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
break;
upd = NULL;
} else {
- WT_RET(__rec_txn_read(
- session, r, ins, NULL, NULL, NULL, &upd));
+ WT_RET(__rec_upd_select(
+ session, r, ins, NULL, NULL, &upd_select));
+ upd = upd_select.upd;
recno = WT_INSERT_RECNO(ins);
}
for (;;) {
@@ -4004,15 +4047,14 @@ __rec_col_fix_slvg(WT_SESSION_IMPL *session,
*/
static int
__rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r,
- WT_SALVAGE_COOKIE *salvage,
- WT_ITEM *value, bool deleted, uint8_t overflow_type,
- wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle)
+ WT_SALVAGE_COOKIE *salvage, WT_ITEM *value,
+ wt_timestamp_t start_ts, wt_timestamp_t stop_ts,
+ uint64_t rle, bool deleted, bool overflow_type)
{
WT_BTREE *btree;
WT_KV *val;
btree = S2BT(session);
-
val = &r->v;
/*
@@ -4047,14 +4089,14 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r,
}
if (deleted) {
- val->cell_len =
- __wt_cell_pack_del(&val->cell, start_ts, stop_ts, rle);
+ val->cell_len = __wt_cell_pack_del(
+ session, &val->cell, start_ts, stop_ts, rle);
val->buf.data = NULL;
val->buf.size = 0;
val->len = val->cell_len;
} else if (overflow_type) {
- val->cell_len = __wt_cell_pack_ovfl(&val->cell,
- overflow_type, start_ts, stop_ts, rle, value->size);
+ val->cell_len = __wt_cell_pack_ovfl(session, &val->cell,
+ WT_CELL_VALUE_OVFL, start_ts, stop_ts, rle, value->size);
val->buf.data = value->data;
val->buf.size = value->size;
val->len = val->cell_len + value->size;
@@ -4088,6 +4130,11 @@ __rec_col_var(WT_SESSION_IMPL *session,
WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage)
{
enum { OVFL_IGNORE, OVFL_UNUSED, OVFL_USED } ovfl_state;
+ struct {
+ WT_ITEM *value; /* Value */
+ wt_timestamp_t start_ts, stop_ts; /* Timestamps */
+ bool deleted; /* If deleted */
+ } last;
WT_BTREE *btree;
WT_CELL *cell;
WT_CELL_UNPACK *vpack, _vpack;
@@ -4096,30 +4143,38 @@ __rec_col_var(WT_SESSION_IMPL *session,
WT_DECL_ITEM(orig);
WT_DECL_RET;
WT_INSERT *ins;
- WT_ITEM *last;
WT_PAGE *page;
WT_UPDATE *upd;
+ WT_UPDATE_SELECT upd_select;
wt_timestamp_t start_ts, stop_ts;
uint64_t n, nrepeat, repeat_count, rle, skip, src_recno;
uint32_t i, size;
- bool deleted, last_deleted, orig_deleted, update_no_copy;
+ bool deleted, orig_deleted, update_no_copy;
const void *data;
btree = S2BT(session);
- page = pageref->page;
- last = r->last;
vpack = &_vpack;
cbt = &r->update_modify_cbt;
+ page = pageref->page;
+ upd = NULL;
+ size = 0;
+ data = NULL;
+
+ /* Set the "last" values to cause failure if they're not set. */
+ last.value = r->last;
+ last.start_ts = last.stop_ts = WT_TS_NONE;
+ last.deleted = false;
+
+ /*
+ * Set the start/stop values to cause failure if they're not set.
+ * [-Werror=maybe-uninitialized]
+ */
+ start_ts = stop_ts = WT_TS_NONE;
WT_RET(__rec_split_init(session,
r, page, pageref->ref_recno, btree->maxleafpage_precomp));
WT_RET(__wt_scr_alloc(session, 0, &orig));
- data = NULL;
- size = 0;
- upd = NULL;
-
- start_ts = stop_ts = WT_TS_FIXME;
/*
* The salvage code may be calling us to reconcile a page where there
@@ -4133,11 +4188,12 @@ __rec_col_var(WT_SESSION_IMPL *session,
* helper function's assistance.)
*/
rle = 0;
- last_deleted = false;
if (salvage != NULL && salvage->missing != 0) {
if (salvage->skip == 0) {
rle = salvage->missing;
- last_deleted = true;
+ last.start_ts = WT_TS_NONE;
+ last.stop_ts = WT_TS_MAX;
+ last.deleted = true;
/*
* Correct the number of records we're going to "take",
@@ -4145,9 +4201,9 @@ __rec_col_var(WT_SESSION_IMPL *session,
*/
salvage->take += salvage->missing;
} else
- WT_ERR(__rec_col_var_helper(session,
- r, NULL, NULL, true, false,
- WT_TS_NONE, WT_TS_MAX, salvage->missing));
+ WT_ERR(__rec_col_var_helper(session, r,
+ NULL, NULL, WT_TS_NONE, WT_TS_MAX,
+ salvage->missing, true, false));
}
/*
@@ -4167,11 +4223,15 @@ __rec_col_var(WT_SESSION_IMPL *session,
WT_COL_FOREACH(page, cip, i) {
ovfl_state = OVFL_IGNORE;
if ((cell = WT_COL_PTR(page, cip)) == NULL) {
+ start_ts = WT_TS_NONE;
+ stop_ts = WT_TS_MAX;
nrepeat = 1;
ins = NULL;
orig_deleted = true;
} else {
- __wt_cell_unpack(page, cell, vpack);
+ __wt_cell_unpack(session, page, cell, vpack);
+ start_ts = vpack->start_ts;
+ stop_ts = vpack->stop_ts;
nrepeat = __wt_cell_rle(vpack);
ins = WT_SKIP_FIRST(WT_COL_UPDATE(page, cip));
@@ -4227,8 +4287,23 @@ record_loop: /*
n < nrepeat; n += repeat_count, src_recno += repeat_count) {
upd = NULL;
if (ins != NULL && WT_INSERT_RECNO(ins) == src_recno) {
- WT_ERR(__rec_txn_read(
- session, r, ins, cip, vpack, NULL, &upd));
+ WT_ERR(__rec_upd_select(
+ session, r, ins, cip, vpack, &upd_select));
+ upd = upd_select.upd;
+ if (upd == NULL) {
+ /*
+ * TIMESTAMP-FIXME
+ * I'm pretty sure this is wrong: a NULL
+ * update means an item was deleted, and
+ * I think that requires a tombstone on
+ * the page.
+ */
+ start_ts = WT_TS_NONE;
+ stop_ts = WT_TS_MAX;
+ } else {
+ start_ts = upd_select.start_ts;
+ stop_ts = upd_select.stop_ts;
+ }
ins = WT_SKIP_NEXT(ins);
}
@@ -4316,18 +4391,19 @@ record_loop: /*
*/
if (rle != 0) {
WT_ERR(__rec_col_var_helper(
- session, r, salvage, last,
- last_deleted, 0,
- start_ts, stop_ts, rle));
+ session, r, salvage,
+ last.value,
+ last.start_ts, last.stop_ts,
+ rle, last.deleted, false));
rle = 0;
}
- last->data = vpack->data;
- last->size = vpack->size;
- WT_ERR(__rec_col_var_helper(
- session, r, salvage, last, false,
- WT_CELL_VALUE_OVFL,
- start_ts, stop_ts, repeat_count));
+ last.value->data = vpack->data;
+ last.value->size = vpack->size;
+ WT_ERR(__rec_col_var_helper(session, r,
+ salvage,
+ last.value, start_ts, stop_ts,
+ repeat_count, false, true));
/* Track if page has overflow items. */
r->ovfl_items = true;
@@ -4367,16 +4443,20 @@ compare: /*
* we've been doing that all along.
*/
if (rle != 0) {
- if ((deleted && last_deleted) ||
- (!last_deleted && !deleted &&
- last->size == size &&
- memcmp(last->data, data, size) == 0)) {
+ if ((!__wt_process.page_version_ts ||
+ (last.start_ts == start_ts &&
+ last.stop_ts == stop_ts)) &&
+ ((deleted && last.deleted) ||
+ (!deleted && !last.deleted &&
+ last.value->size == size &&
+ memcmp(
+ last.value->data, data, size) == 0))) {
rle += repeat_count;
continue;
}
- WT_ERR(__rec_col_var_helper(session, r,
- salvage, last, last_deleted, 0,
- start_ts, stop_ts, rle));
+ WT_ERR(__rec_col_var_helper(session, r, salvage,
+ last.value, last.start_ts, last.stop_ts,
+ rle, last.deleted, false));
}
/*
@@ -4399,13 +4479,15 @@ compare: /*
* the pointers, they're not moving.
*/
if (data == vpack->data || update_no_copy) {
- last->data = data;
- last->size = size;
+ last.value->data = data;
+ last.value->size = size;
} else
WT_ERR(__wt_buf_set(
- session, last, data, size));
+ session, last.value, data, size));
}
- last_deleted = deleted;
+ last.start_ts = start_ts;
+ last.stop_ts = stop_ts;
+ last.deleted = deleted;
rle = repeat_count;
}
@@ -4449,10 +4531,24 @@ compare: /*
upd = NULL;
} else {
- WT_ERR(__rec_txn_read(
- session, r, ins, NULL, NULL, NULL, &upd));
+ WT_ERR(__rec_upd_select(
+ session, r, ins, NULL, NULL, &upd_select));
+ upd = upd_select.upd;
n = WT_INSERT_RECNO(ins);
}
+ if (upd == NULL) {
+ /*
+ * TIMESTAMP-FIXME
+ * I'm pretty sure this is wrong: a NULL update means
+ * an item was deleted, and I think that requires a
+ * tombstone on the page.
+ */
+ start_ts = WT_TS_NONE;
+ stop_ts = WT_TS_MAX;
+ } else {
+ start_ts = upd_select.start_ts;
+ stop_ts = upd_select.stop_ts;
+ }
while (src_recno <= n) {
deleted = false;
update_no_copy = true;
@@ -4465,7 +4561,10 @@ compare: /*
*/
if (src_recno < n) {
deleted = true;
- if (last_deleted) {
+ if (last.deleted &&
+ (!__wt_process.page_version_ts ||
+ (last.start_ts == start_ts &&
+ last.stop_ts == stop_ts))) {
/*
* The record adjustment is decremented
* by one so we can naturally fall into
@@ -4478,9 +4577,22 @@ compare: /*
rle += skip;
src_recno += skip;
}
- } else if (upd == NULL)
+ } else if (upd == NULL) {
+ /*
+ * TIMESTAMP-FIXME
+ * I'm pretty sure this is wrong: a NULL
+ * update means an item was deleted, and
+ * I think that requires a tombstone on
+ * the page.
+ */
+ start_ts = WT_TS_NONE;
+ stop_ts = WT_TS_MAX;
+
deleted = true;
- else
+ } else {
+ start_ts = upd_select.start_ts;
+ stop_ts = upd_select.stop_ts;
+
switch (upd->type) {
case WT_UPDATE_MODIFY:
/*
@@ -4504,22 +4616,27 @@ compare: /*
break;
WT_ILLEGAL_VALUE_ERR(session, upd->type);
}
+ }
/*
* Handle RLE accounting and comparisons -- see comment
* above, this code fragment does the same thing.
*/
if (rle != 0) {
- if ((deleted && last_deleted) ||
- (!last_deleted && !deleted &&
- last->size == size &&
- memcmp(last->data, data, size) == 0)) {
+ if ((!__wt_process.page_version_ts ||
+ (last.start_ts == start_ts &&
+ last.stop_ts == stop_ts)) &&
+ ((deleted && last.deleted) ||
+ (!deleted && !last.deleted &&
+ last.value->size == size &&
+ memcmp(
+ last.value->data, data, size) == 0))) {
++rle;
goto next;
}
- WT_ERR(__rec_col_var_helper(session, r,
- salvage, last, last_deleted, 0,
- start_ts, stop_ts, rle));
+ WT_ERR(__rec_col_var_helper(session, r, salvage,
+ last.value, last.start_ts, last.stop_ts,
+ rle, last.deleted, false));
}
/*
@@ -4533,15 +4650,17 @@ compare: /*
*/
if (!deleted) {
if (update_no_copy) {
- last->data = data;
- last->size = size;
+ last.value->data = data;
+ last.value->size = size;
} else
WT_ERR(__wt_buf_set(
- session, last, data, size));
+ session, last.value, data, size));
}
/* Ready for the next loop, reset the RLE counter. */
- last_deleted = deleted;
+ last.start_ts = start_ts;
+ last.stop_ts = stop_ts;
+ last.deleted = deleted;
rle = 1;
/*
@@ -4564,8 +4683,8 @@ next: if (src_recno == UINT64_MAX)
/* If we were tracking a record, write it. */
if (rle != 0)
- WT_ERR(__rec_col_var_helper(session, r, salvage,
- last, last_deleted, 0, start_ts, stop_ts, rle));
+ WT_ERR(__rec_col_var_helper(session, r, salvage, last.value,
+ last.start_ts, last.stop_ts, rle, last.deleted, false));
/* Write the remnant page. */
ret = __rec_split_finish(session, r);
@@ -4651,7 +4770,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
if (ikey != NULL && ikey->cell_offset != 0) {
cell =
WT_PAGE_REF_OFFSET(page, ikey->cell_offset);
- __wt_cell_unpack(page, cell, kpack);
+ __wt_cell_unpack(session, page, cell, kpack);
key_onpage_ovfl = kpack->ovfl &&
kpack->raw != WT_CELL_KEY_OVFL_RM;
}
@@ -4748,7 +4867,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
newest_start_ts = addr->newest_start_ts;
newest_stop_ts = addr->newest_stop_ts;
} else {
- __wt_cell_unpack(page, ref->addr, vpack);
+ __wt_cell_unpack(session, page, ref->addr, vpack);
if (state == WT_CHILD_PROXY) {
WT_ERR(__wt_buf_set(session, &val->buf,
ref->addr, __wt_cell_total_len(vpack)));
@@ -4886,9 +5005,10 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
WT_KV *key, *val;
WT_ROW *rip;
WT_UPDATE *upd;
+ WT_UPDATE_SELECT upd_select;
wt_timestamp_t start_ts, stop_ts;
size_t size;
- uint64_t slvg_skip;
+ uint64_t slvg_skip, txnid;
uint32_t i;
bool dictionary, key_onpage_ovfl, ovfl_key;
void *copy;
@@ -4947,17 +5067,23 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
kpack = NULL;
else {
kpack = &_kpack;
- __wt_cell_unpack(page, cell, kpack);
+ __wt_cell_unpack(session, page, cell, kpack);
}
/* Unpack the on-page value cell, set the default timestamps. */
- __wt_row_leaf_value_cell(page, rip, NULL, vpack);
+ __wt_row_leaf_value_cell(session, page, rip, NULL, vpack);
start_ts = vpack->start_ts;
stop_ts = vpack->stop_ts;
+ txnid = WT_TXN_NONE;
/* Look for an update. */
- WT_ERR(__rec_txn_read(
- session, r, NULL, rip, vpack, NULL, &upd));
+ WT_ERR(__rec_upd_select(
+ session, r, NULL, rip, vpack, &upd_select));
+ if ((upd = upd_select.upd) != NULL) {
+ txnid = upd_select.txnid;
+ start_ts = upd_select.start_ts;
+ stop_ts = upd_select.stop_ts;
+ }
/* Build value cell. */
dictionary = false;
@@ -5147,7 +5273,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
goto build;
kpack = &_kpack;
- __wt_cell_unpack(page, cell, kpack);
+ __wt_cell_unpack(session, page, cell, kpack);
if (btree->huffman_key == NULL &&
kpack->type == WT_CELL_KEY &&
tmpkey->size >= kpack->prefix) {
@@ -5215,13 +5341,11 @@ build:
/*
* Copy the key/value pair onto the page. Zero-length items must
* be globally visible as we're writing nothing to the page.
- *
- * WT_TS_FIXME: NONE-MAX is too pessimistic a test, and we may
- * want to adjust start_ts/stop_ts.
*/
__rec_image_copy(session, r, key);
if (val->len == 0 &&
- start_ts == WT_TS_NONE && stop_ts == WT_TS_MAX)
+ (!__wt_process.page_version_ts ||
+ __wt_txn_visible_all(session, txnid, stop_ts)))
r->any_empty_value = true;
else {
r->all_empty_value = false;
@@ -5259,7 +5383,9 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
WT_CURSOR_BTREE *cbt;
WT_KV *key, *val;
WT_UPDATE *upd;
+ WT_UPDATE_SELECT upd_select;
wt_timestamp_t start_ts, stop_ts;
+ uint64_t txnid;
bool ovfl_key, upd_saved;
btree = S2BT(session);
@@ -5269,9 +5395,13 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
val = &r->v;
for (; ins != NULL; ins = WT_SKIP_NEXT(ins)) {
- WT_RET(__rec_txn_read(
- session, r, ins, NULL, NULL, &upd_saved, &upd));
- start_ts = stop_ts = WT_TS_FIXME;
+ WT_RET(__rec_upd_select(
+ session, r, ins, NULL, NULL, &upd_select));
+ upd = upd_select.upd;
+ txnid = upd_select.txnid;
+ start_ts = upd_select.start_ts;
+ stop_ts = upd_select.stop_ts;
+ upd_saved = upd_select.upd_saved;
if (upd == NULL) {
/*
@@ -5345,13 +5475,11 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
/*
* Copy the key/value pair onto the page. Zero-length items must
* be globally visible as we're writing nothing to the page.
- *
- * WT_TS_FIXME: NONE-MAX is too pessimistic a test, and we may
- * want to adjust start_ts/stop_ts.
*/
__rec_image_copy(session, r, key);
if (val->len == 0 &&
- start_ts == WT_TS_NONE && stop_ts == WT_TS_MAX)
+ (!__wt_process.page_version_ts &&
+ __wt_txn_visible_all(session, txnid, stop_ts)))
r->any_empty_value = true;
else {
r->all_empty_value = false;
@@ -5949,7 +6077,8 @@ __rec_cell_build_addr(WT_SESSION_IMPL *session,
*/
val->buf.data = addr->addr;
val->buf.size = addr->size;
- val->cell_len = __wt_cell_pack_addr(&val->cell, cell_type, recno,
+ val->cell_len = __wt_cell_pack_addr(session,
+ &val->cell, cell_type, recno,
addr->oldest_start_ts, addr->newest_start_ts, addr->newest_stop_ts,
val->buf.size);
val->len = val->cell_len + val->buf.size;
@@ -5996,7 +6125,7 @@ __rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r,
}
}
val->cell_len = __wt_cell_pack_data(
- &val->cell, start_ts, stop_ts, rle, val->buf.size);
+ session, &val->cell, start_ts, stop_ts, rle, val->buf.size);
val->len = val->cell_len + val->buf.size;
return (0);
@@ -6069,7 +6198,7 @@ __rec_cell_build_ovfl(WT_SESSION_IMPL *session,
/* Build the cell and return. */
kv->cell_len = __wt_cell_pack_ovfl(
- &kv->cell, type, start_ts, stop_ts, rle, kv->buf.size);
+ session, &kv->cell, type, start_ts, stop_ts, rle, kv->buf.size);
kv->len = kv->cell_len + kv->buf.size;
err: __wt_scr_free(session, &tmp);
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index d47b6291fe1..3d5ca2d6a16 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -882,6 +882,20 @@ static const char * const __stats_connection_desc[] = {
"cache: tracked dirty bytes in the cache",
"cache: tracked dirty pages in the cache",
"cache: unmodified pages evicted",
+ "capacity: background fsync file handles considered",
+ "capacity: background fsync file handles synced",
+ "capacity: background fsync time (msecs)",
+ "capacity: threshold to call fsync",
+ "capacity: throttled bytes read",
+ "capacity: throttled bytes written for checkpoint",
+ "capacity: throttled bytes written for eviction",
+ "capacity: throttled bytes written for log",
+ "capacity: throttled bytes written total",
+ "capacity: time waiting due to total capacity (usecs)",
+ "capacity: time waiting during checkpoint (usecs)",
+ "capacity: time waiting during eviction (usecs)",
+ "capacity: time waiting during logging (usecs)",
+ "capacity: time waiting during read (usecs)",
"connection: auto adjusting condition resets",
"connection: auto adjusting condition wait calls",
"connection: detected system time went backwards",
@@ -923,6 +937,7 @@ static const char * const __stats_connection_desc[] = {
"cursor: cursor update value size change",
"cursor: cursors reused from cache",
"cursor: open cursor count",
+ "data-handle: connection data handle size",
"data-handle: connection data handles currently active",
"data-handle: connection sweep candidate became referenced",
"data-handle: connection sweep dhandles closed",
@@ -1298,6 +1313,20 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
/* not clearing cache_bytes_dirty */
/* not clearing cache_pages_dirty */
stats->cache_eviction_clean = 0;
+ stats->fsync_all_fh_total = 0;
+ stats->fsync_all_fh = 0;
+ /* not clearing fsync_all_time */
+ stats->capacity_threshold = 0;
+ stats->capacity_bytes_read = 0;
+ stats->capacity_bytes_ckpt = 0;
+ stats->capacity_bytes_evict = 0;
+ stats->capacity_bytes_log = 0;
+ stats->capacity_bytes_written = 0;
+ stats->capacity_time_total = 0;
+ stats->capacity_time_ckpt = 0;
+ stats->capacity_time_evict = 0;
+ stats->capacity_time_log = 0;
+ stats->capacity_time_read = 0;
stats->cond_auto_wait_reset = 0;
stats->cond_auto_wait = 0;
stats->time_travel = 0;
@@ -1339,6 +1368,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cursor_update_bytes_changed = 0;
stats->cursor_reopen = 0;
/* not clearing cursor_open_count */
+ /* not clearing dh_conn_handle_size */
/* not clearing dh_conn_handle_count */
stats->dh_sweep_ref = 0;
stats->dh_sweep_close = 0;
@@ -1760,6 +1790,21 @@ __wt_stat_connection_aggregate(
to->cache_bytes_dirty += WT_STAT_READ(from, cache_bytes_dirty);
to->cache_pages_dirty += WT_STAT_READ(from, cache_pages_dirty);
to->cache_eviction_clean += WT_STAT_READ(from, cache_eviction_clean);
+ to->fsync_all_fh_total += WT_STAT_READ(from, fsync_all_fh_total);
+ to->fsync_all_fh += WT_STAT_READ(from, fsync_all_fh);
+ to->fsync_all_time += WT_STAT_READ(from, fsync_all_time);
+ to->capacity_threshold += WT_STAT_READ(from, capacity_threshold);
+ to->capacity_bytes_read += WT_STAT_READ(from, capacity_bytes_read);
+ to->capacity_bytes_ckpt += WT_STAT_READ(from, capacity_bytes_ckpt);
+ to->capacity_bytes_evict += WT_STAT_READ(from, capacity_bytes_evict);
+ to->capacity_bytes_log += WT_STAT_READ(from, capacity_bytes_log);
+ to->capacity_bytes_written +=
+ WT_STAT_READ(from, capacity_bytes_written);
+ to->capacity_time_total += WT_STAT_READ(from, capacity_time_total);
+ to->capacity_time_ckpt += WT_STAT_READ(from, capacity_time_ckpt);
+ to->capacity_time_evict += WT_STAT_READ(from, capacity_time_evict);
+ to->capacity_time_log += WT_STAT_READ(from, capacity_time_log);
+ to->capacity_time_read += WT_STAT_READ(from, capacity_time_read);
to->cond_auto_wait_reset += WT_STAT_READ(from, cond_auto_wait_reset);
to->cond_auto_wait += WT_STAT_READ(from, cond_auto_wait);
to->time_travel += WT_STAT_READ(from, time_travel);
@@ -1804,6 +1849,7 @@ __wt_stat_connection_aggregate(
WT_STAT_READ(from, cursor_update_bytes_changed);
to->cursor_reopen += WT_STAT_READ(from, cursor_reopen);
to->cursor_open_count += WT_STAT_READ(from, cursor_open_count);
+ to->dh_conn_handle_size += WT_STAT_READ(from, dh_conn_handle_size);
to->dh_conn_handle_count += WT_STAT_READ(from, dh_conn_handle_count);
to->dh_sweep_ref += WT_STAT_READ(from, dh_sweep_ref);
to->dh_sweep_close += WT_STAT_READ(from, dh_sweep_close);
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 60fb6aee8c5..13fd1ee1233 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -643,7 +643,7 @@ __txn_commit_timestamp_validate(WT_SESSION_IMPL *session)
* Check timestamps are used in order.
*/
op_zero_ts = !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT);
- upd_zero_ts = upd->timestamp == WT_TS_NONE;
+ upd_zero_ts = upd->start_ts == WT_TS_NONE;
if (op_zero_ts != upd_zero_ts)
WT_RET_MSG(session, EINVAL,
"per-key timestamps used inconsistently");
@@ -655,14 +655,14 @@ __txn_commit_timestamp_validate(WT_SESSION_IMPL *session)
if (op_zero_ts)
continue;
- op_timestamp = op->u.op_upd->timestamp;
+ op_timestamp = op->u.op_upd->start_ts;
/*
* Only if the update structure doesn't have a timestamp
* then use the one in the transaction structure.
*/
if (op_timestamp == WT_TS_NONE)
op_timestamp = txn->commit_timestamp;
- if (op_timestamp < upd->timestamp)
+ if (op_timestamp < upd->start_ts)
WT_RET_MSG(session, EINVAL,
"out of order timestamps");
}
@@ -1016,7 +1016,7 @@ __wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[])
}
/* Set prepare timestamp. */
- upd->timestamp = ts;
+ upd->start_ts = ts;
WT_PUBLISH(upd->prepare_state, WT_PREPARE_INPROGRESS);
op->u.op_upd = NULL;
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index dfcd5eaa7c8..2ff42b7d220 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -106,10 +106,10 @@ __txn_abort_newer_update(WT_SESSION_IMPL *session,
* updates were also rolled back.
*/
if (upd->txnid == WT_TXN_ABORTED ||
- upd->timestamp == WT_TS_NONE) {
+ upd->start_ts == WT_TS_NONE) {
if (upd == first_upd)
first_upd = upd->next;
- } else if (rollback_timestamp < upd->durable_timestamp) {
+ } else if (rollback_timestamp < upd->durable_ts) {
/*
* If any updates are aborted, all newer updates
* better be aborted as well.
@@ -127,8 +127,8 @@ __txn_abort_newer_update(WT_SESSION_IMPL *session,
upd->txnid = WT_TXN_ABORTED;
WT_STAT_CONN_INCR(session, txn_rollback_upd_aborted);
- upd->timestamp = 0;
- upd->durable_timestamp = 0;
+ upd->durable_ts = 0;
+ upd->start_ts = 0;
}
}
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_main.c b/src/third_party/wiredtiger/src/utilities/util_main.c
index a1ad6097e70..de4f31fcf23 100644
--- a/src/third_party/wiredtiger/src/utilities/util_main.c
+++ b/src/third_party/wiredtiger/src/utilities/util_main.c
@@ -256,6 +256,7 @@ main(int argc, char *argv[])
/* Build the configuration string. */
len = 10; /* some slop */
p1 = p2 = p3 = "";
+ len += strlen("error_prefix=wt");
if (config != NULL)
len += strlen(config);
if (cmd_config != NULL)
@@ -271,7 +272,7 @@ main(int argc, char *argv[])
(void)util_err(NULL, errno, NULL);
goto err;
}
- if ((ret = __wt_snprintf(p, len, "%s,%s,%s%s%s%s",
+ if ((ret = __wt_snprintf(p, len, "error_prefix=wt,%s,%s,%s%s%s%s",
config == NULL ? "" : config,
cmd_config == NULL ? "" : cmd_config,
rec_config, p1, p2, p3)) != 0) {
diff --git a/src/third_party/wiredtiger/test/csuite/random_directio/main.c b/src/third_party/wiredtiger/test/csuite/random_directio/main.c
index eafbe9754be..894d704a7cf 100644
--- a/src/third_party/wiredtiger/test/csuite/random_directio/main.c
+++ b/src/third_party/wiredtiger/test/csuite/random_directio/main.c
@@ -904,12 +904,11 @@ check_db(uint32_t nth, uint32_t datasize, bool directio, uint32_t flags)
printf("starting full scan at %" PRIu64 "\n", id);
gen_kv(keybuf, kvsize, id, 0, large_arr[0], true);
cursor->set_key(cursor, keybuf);
- testutil_check(cursor->search(cursor));
th = 0;
/* Keep bitmap of "active" threads. */
threadmap = (0x1U << nth) - 1;
- for (ret = 0; ret != WT_NOTFOUND && threadmap != 0;
+ for (ret = cursor->search(cursor); ret != WT_NOTFOUND && threadmap != 0;
ret = cursor->next(cursor)) {
testutil_check(ret);
testutil_check(cursor->get_key(cursor, &gotkey));
diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml
index 00e16ed43ac..9189a993732 100644
--- a/src/third_party/wiredtiger/test/evergreen.yml
+++ b/src/third_party/wiredtiger/test/evergreen.yml
@@ -965,6 +965,18 @@ tasks:
ulimit -c unlimited
largescale/run-million-collection-test.sh .
+ - name: compatibility-test-for-mongodb-releases
+ commands:
+ - func: "fetch source"
+ - command: shell.exec
+ params:
+ working_dir: "wiredtiger"
+ script: |
+ set -o errexit
+ set -o verbose
+ test/evergreen/compatibility_test_for_mongodb_releases.sh
+
+
buildvariants:
- name: ubuntu1404
display_name: Ubuntu 14.04
@@ -972,9 +984,10 @@ buildvariants:
- ubuntu1404-test
expansions:
# It's ugly, but we need the absolute path here, not the relative
- test_env_vars: PATH=/opt/mongodbtoolchain/v2/bin:$PATH LD_LIBRARY_PATH=$(pwd)/.libs
+ test_env_vars: PATH=/opt/mongodbtoolchain/v3/bin:$PATH LD_LIBRARY_PATH=$(pwd)/.libs
smp_command: -j $(grep -c ^processor /proc/cpuinfo)
- configure_env_vars: CC=/opt/mongodbtoolchain/bin/gcc CXX=/opt/mongodbtoolchain/bin/g++ PATH=/opt/mongodbtoolchain/v2/bin:$PATH
+ configure_env_vars: CC=/opt/mongodbtoolchain/v3/bin/gcc CXX=/opt/mongodbtoolchain/v3/bin/g++ PATH=/opt/mongodbtoolchain/v3/bin:$PATH
+ make_command: PATH=/opt/mongodbtoolchain/v3/bin:$PATH make
tasks:
- name: compile
- name: lang-python-test
@@ -1037,10 +1050,18 @@ buildvariants:
run_on:
- rhel62-large
expansions:
- configure_env_vars: CC=/opt/mongodbtoolchain/bin/gcc CXX=/opt/mongodbtoolchain/bin/g++
+ configure_env_vars: CC=/opt/mongodbtoolchain/v3/bin/gcc CXX=/opt/mongodbtoolchain/v3/bin/g++
tasks:
- name: million-collection-test
+- name: compatibility-tests
+ display_name: Compatibility tests
+ batchtime: 10080 # 7 days
+ run_on:
+ - ubuntu1404-test
+ tasks:
+ - name: compatibility-test-for-mongodb-releases
+
- name: windows-64
display_name: Windows 64-bit
run_on:
@@ -1058,9 +1079,9 @@ buildvariants:
- macos-1012
expansions:
smp_command: -j $(sysctl -n hw.logicalcpu)
- configure_env_vars: PATH=/opt/mongodbtoolchain/v2/bin:$PATH
- make_command: PATH=/opt/mongodbtoolchain/v2/bin:$PATH ARCHFLAGS=-Wno-error=unused-command-line-argument-hard-error-in-future make
- test_env_vars: PATH=/opt/mongodbtoolchain/v2/bin:$PATH DYLD_LIBRARY_PATH=$(pwd)/.libs
+ configure_env_vars: PATH=/opt/mongodbtoolchain/v3/bin:$PATH
+ make_command: PATH=/opt/mongodbtoolchain/v3/bin:$PATH ARCHFLAGS=-Wno-error=unused-command-line-argument-hard-error-in-future make
+ test_env_vars: PATH=/opt/mongodbtoolchain/v3/bin:$PATH DYLD_LIBRARY_PATH=$(pwd)/.libs
tasks:
- name: compile
- name: make-check-test
diff --git a/src/third_party/wiredtiger/test/evergreen/compatibility_test_for_mongodb_releases.sh b/src/third_party/wiredtiger/test/evergreen/compatibility_test_for_mongodb_releases.sh
new file mode 100755
index 00000000000..1207c479c59
--- /dev/null
+++ b/src/third_party/wiredtiger/test/evergreen/compatibility_test_for_mongodb_releases.sh
@@ -0,0 +1,138 @@
+#!/usr/bin/env bash
+##############################################################################################
+# Check releases to ensure forward and backward compatibility.
+##############################################################################################
+
+###########################################################################
+# Return the most recent version of the tagged release.
+###########################################################################
+get_release()
+{
+ echo "$(git tag | grep "^mongodb-$1.[0-9]" | sort -V | sed -e '$p' -e d)"
+}
+
+#############################################################
+# This function will
+# - checkout git tree of the desired release and build it,
+# - generate test objects.
+#
+# arg1: MongoDB tagged release number or develop branch identifier.
+#############################################################
+build_rel()
+{
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+ echo "Building release: \"$1\""
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+
+ git clone --quiet https://github.com/wiredtiger/wiredtiger.git "wt.$1" > /dev/null || return 1
+ cd "wt.$1" || return 1
+
+ config=""
+ config+="--enable-snappy "
+
+ case "$1" in
+ # Please note 'develop' here is planned as the future MongoDB release 4.2 - the only release that supports
+ # both enabling and disabling of timestamps in data format. Once 4.2 is released, we need to update this script.
+ "develop")
+ branch="develop";;
+ "develop-timestamps")
+ branch="develop"
+ config+="--enable-page-version-ts";;
+ *)
+ branch=$(get_release "$1");;
+ esac
+
+ git checkout --quiet -b $branch || return 1
+
+ (sh build_posix/reconf && ./configure $config && make -j $(grep -c ^processor /proc/cpuinfo)) > /dev/null || return 1
+
+ cd test/format || return 1
+
+ # Run a configuration and generate some on-disk files.
+ args=""
+ args+="cache=80 " # Medium cache so there's eviction
+ args+="checkpoints=1 " # Force periodic writes
+ args+="compression=snappy " # We only built with snappy, force the choice
+ args+="data_source=table "
+ args+="in_memory=0 " # Interested in the on-disk format
+ args+="leak_memory=1 " # Faster runs
+ args+="logging_compression=snappy " # We only built with snappy, force the choice
+ args+="quiet=1 "
+ args+="rebalance=0 " # Faster runs
+ args+="rows=1000000 "
+ args+="salvage=0 " # Faster runs
+ args+="timer=4 "
+ args+="verify=0 " # Faster runs
+ for am in fix row var; do
+ ./t -h "RUNDIR.$am" -1 "file_type=$am" $args || return 1
+ done
+
+ return 0
+}
+
+#############################################################
+# This function will
+# - verify a pair of releases can verify each other's objects.
+#
+# arg1: release #1
+# arg2: release #2
+#############################################################
+verify()
+{
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+ echo "Verifying release \"$1\" and \"$2\""
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+ a="wt.$1"
+ b="wt.$2"
+
+ EXT="extensions=["
+ EXT+="ext/compressors/snappy/.libs/libwiredtiger_snappy.so,"
+ EXT+="ext/collators/reverse/.libs/libwiredtiger_reverse_collator.so, "
+ EXT+="ext/encryptors/rotn/.libs/libwiredtiger_rotn.so, "
+ EXT+="]"
+
+ cd $a || return 1
+ for am in fix row var; do
+ echo "$a/wt verifying $b/test/format/RUNDIR.$am..."
+ WIREDTIGER_CONFIG="$EXT" \
+ ./wt -h ../$b/test/format/RUNDIR.$am verify table:wt || return 1
+ done
+
+ cd ../$b || return 1
+ for am in fix row var; do
+ echo "$b/wt verifying $a/test/format/RUNDIR.$am..."
+ WIREDTIGER_CONFIG="$EXT" \
+ ./wt -h ../$a/test/format/RUNDIR.$am verify table:wt || return 1
+ done
+
+ return 0
+}
+
+run()
+{
+ # Build test files from each release.
+ (build_rel 3.4) || return 1
+ (build_rel 3.6) || return 1
+ (build_rel 4.0) || return 1
+ (build_rel develop) || return 1
+ (build_rel develop-timestamps) || return 1
+
+ # Verify forward/backward compatibility.
+ (verify 3.4 3.6) || return 1
+ (verify 3.6 4.0) || return 1
+ (verify 4.0 develop) || return 1
+ (verify 4.0 develop-timestamps) || return 1
+ (verify develop develop-timestamps) || return 1
+
+ return 0
+}
+
+# Create a directory in which to do the work.
+top="test-compatibility-run"
+rm -rf $top && mkdir $top && cd $top || {
+ echo "$0: unable to create $top working directory"
+ exit 1
+}
+
+run
+exit $?
diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h
index f4354588a6b..d638781197e 100644
--- a/src/third_party/wiredtiger/test/format/config.h
+++ b/src/third_party/wiredtiger/test/format/config.h
@@ -334,6 +334,10 @@ static CONFIG c[] = {
"maximum time to run in minutes",
C_IGNORE, 0, 0, UINT_MAX, &g.c_timer, NULL },
+ { "timing_stress_aggressive_sweep",
+ "stress aggressive sweep", /* 2% */
+ C_BOOL, 2, 0, 0, &g.c_timing_stress_aggressive_sweep, NULL },
+
{ "timing_stress_checkpoint",
"stress checkpoints", /* 2% */
C_BOOL, 2, 0, 0, &g.c_timing_stress_checkpoint, NULL },
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index aab40ef3174..8ef34ec226f 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -220,6 +220,7 @@ typedef struct {
uint32_t c_statistics_server;
uint32_t c_threads;
uint32_t c_timer;
+ uint32_t c_timing_stress_aggressive_sweep;
uint32_t c_timing_stress_checkpoint;
uint32_t c_timing_stress_lookaside_sweep;
uint32_t c_timing_stress_split_1;
diff --git a/src/third_party/wiredtiger/test/format/wts.c b/src/third_party/wiredtiger/test/format/wts.c
index e52640d9c46..fc12c381a23 100644
--- a/src/third_party/wiredtiger/test/format/wts.c
+++ b/src/third_party/wiredtiger/test/format/wts.c
@@ -235,6 +235,8 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp)
/* Optionally stress operations. */
CONFIG_APPEND(p, ",timing_stress_for_test=[");
+ if (g.c_timing_stress_aggressive_sweep)
+ CONFIG_APPEND(p, ",aggressive_sweep");
if (g.c_timing_stress_checkpoint)
CONFIG_APPEND(p, ",checkpoint_slow");
if (g.c_timing_stress_lookaside_sweep)
diff --git a/src/third_party/wiredtiger/test/suite/test_baseconfig.py b/src/third_party/wiredtiger/test/suite/test_baseconfig.py
index 4a97a3599a4..717f49dece2 100644..100755
--- a/src/third_party/wiredtiger/test/suite/test_baseconfig.py
+++ b/src/third_party/wiredtiger/test/suite/test_baseconfig.py
@@ -36,6 +36,8 @@ class test_baseconfig(wttest.WiredTigerTestCase):
# Open up another database and modify the baseconfig
os.mkdir("A")
conn = self.wiredtiger_open("A", 'create')
+ # Mark the new directory as corrupted
+ self.databaseCorrupted("A")
self.assertTrue(os.path.exists("A/WiredTiger.basecfg"))
with open("A/WiredTiger.basecfg", "a") as basecfg_file:
basecfg_file.write("foo!")
diff --git a/src/third_party/wiredtiger/test/suite/test_dictionary.py b/src/third_party/wiredtiger/test/suite/test_dictionary.py
index 2b49b46e7f2..0990cdfb525 100644
--- a/src/third_party/wiredtiger/test/suite/test_dictionary.py
+++ b/src/third_party/wiredtiger/test/suite/test_dictionary.py
@@ -38,8 +38,8 @@ import wiredtiger, wttest
class test_dictionary(wttest.WiredTigerTestCase):
conn_config = 'statistics=(all)'
scenarios = make_scenarios([
- ('row', dict(key_format='S', value_format='S')),
- ('var', dict(key_format='r', value_format='S')),
+ ('row', dict(key_format='S')),
+ ('var', dict(key_format='r')),
])
# Smoke test dictionary compression.
@@ -48,8 +48,10 @@ class test_dictionary(wttest.WiredTigerTestCase):
uri = 'file:test_dictionary' # This is a btree layer test.
# Create the object, open the cursor, insert some records with identical values. Use
- # alternating values, otherwise column-store will RLE compress them into a single item.
- self.session.create(uri, 'dictionary=100,value_format=S,key_format=' + self.key_format)
+ # a reasonably large page size so most of the items fit on a page. Use alternating
+ # values, otherwise column-store will RLE compress them into a single item.
+ config='leaf_page_max=64K,dictionary=100,value_format=S,key_format='
+ self.session.create(uri, config + self.key_format)
cursor = self.session.open_cursor(uri, None)
i = 0
while i < nentries:
diff --git a/src/third_party/wiredtiger/test/suite/test_reconfig01.py b/src/third_party/wiredtiger/test/suite/test_reconfig01.py
index 8957e1d4e43..84f96150768 100644
--- a/src/third_party/wiredtiger/test/suite/test_reconfig01.py
+++ b/src/third_party/wiredtiger/test/suite/test_reconfig01.py
@@ -106,6 +106,13 @@ class test_reconfig01(wttest.WiredTigerTestCase):
self.conn.reconfigure("statistics=(fast)")
self.conn.reconfigure("statistics=(none)")
+ def test_reconfig_capacity(self):
+ self.conn.reconfigure("io_capacity=(total=80M)")
+ self.conn.reconfigure("io_capacity=(total=100M)")
+ msg = '/below minimum/'
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.conn.reconfigure("io_capacity=(total=16K)"), msg)
+
def test_reconfig_checkpoints(self):
self.conn.reconfigure("checkpoint=(wait=0)")
self.conn.reconfigure("checkpoint=(wait=5)")
diff --git a/src/third_party/wiredtiger/test/suite/test_split.py b/src/third_party/wiredtiger/test/suite/test_split.py
index 11f0f115e1d..cde840fb055 100644
--- a/src/third_party/wiredtiger/test/suite/test_split.py
+++ b/src/third_party/wiredtiger/test/suite/test_split.py
@@ -45,9 +45,13 @@ class test_split(wttest.WiredTigerTestCase):
'allocation_size=4KB,leaf_page_max=4KB,split_pct=75')
cursor = self.session.open_cursor(self.uri, None)
+ # THIS TEST IS DEPENDENT ON THE PAGE SIZES CREATED BY RECONCILIATION.
+ # IF IT FAILS, IT MAY BE RECONCILIATION ISN'T CREATING THE SAME SIZE
+ # PAGES AS BEFORE.
+
# Create a 4KB page (more than 3KB): 40 records w / 10 byte keys
# and 81 byte values.
- for i in range(40):
+ for i in range(35):
cursor['%09d' % i] = 8 * ('%010d' % i)
# Stabilize
@@ -59,7 +63,7 @@ class test_split(wttest.WiredTigerTestCase):
# Now append a few records so we're definitely (a little) over 4KB
cursor = self.session.open_cursor(self.uri, None)
- for i in range(50,55):
+ for i in range(50,60):
cursor['%09d' % i] = 8 * ('%010d' % i)
# Stabilize
diff --git a/src/third_party/wiredtiger/test/suite/test_txn19.py b/src/third_party/wiredtiger/test/suite/test_txn19.py
index a6061ffbb9a..98053a5c7a8 100755
--- a/src/third_party/wiredtiger/test/suite/test_txn19.py
+++ b/src/third_party/wiredtiger/test/suite/test_txn19.py
@@ -201,6 +201,8 @@ class test_txn19(wttest.WiredTigerTestCase, suite_subprocess):
def corrupt_log(self, homedir):
if not self.corrupted():
return
+ # Mark this test has having corrupted files
+ self.databaseCorrupted()
self.f(self.log_number_to_file_name(homedir, self.corruptpos))
# Corrupt a second log file if needed
diff --git a/src/third_party/wiredtiger/test/suite/wttest.py b/src/third_party/wiredtiger/test/suite/wttest.py
index c0b755d2230..ca4a8295373 100644..100755
--- a/src/third_party/wiredtiger/test/suite/wttest.py
+++ b/src/third_party/wiredtiger/test/suite/wttest.py
@@ -585,6 +585,15 @@ class WiredTigerTestCase(unittest.TestCase):
msg = '**** ' + myname + ' HAS A KNOWN LIMITATION: ' + name + ' ****'
self.printOnce(msg)
+ def databaseCorrupted(self, directory = None):
+ """
+ Mark this test as having a corrupted database by creating a
+ DATABASE_CORRUPTED file in the home directory.
+ """
+ if directory == None:
+ directory = self.home
+ open(os.path.join(directory, "DATABASE_CORRUPTED"), "a").close()
+
@staticmethod
def printVerbose(level, message):
if level <= WiredTigerTestCase._verbose: