diff options
author | Luke Chen <luke.chen@mongodb.com> | 2019-03-14 13:36:01 +1100 |
---|---|---|
committer | Luke Chen <luke.chen@mongodb.com> | 2019-03-14 13:37:49 +1100 |
commit | 0a201aa5bff44f490b970727587b60066c3ce1a7 (patch) | |
tree | fcc45ed3afe3960779fc5ec29c4cdf3f2add174e | |
parent | e77f1b2475391d2031ee1c8416121c1a40502caa (diff) | |
download | mongo-0a201aa5bff44f490b970727587b60066c3ce1a7.tar.gz |
Import wiredtiger: db5942dc1dc27f35f334ff3fb4d103b0cceb7968 from branch mongodb-4.0r4.0.7-rc0
ref: 7f85272f91..db5942dc1d
for: 4.0.7
WT-4447 Add prototype implementation allowing limiting of IO per subsystem
WT-4532 Fix null pointer access to WT_DATA_HANDLE in __wt_page_in_func
WT-4547 Consolidate capacity and block manager bytes written
WT-4562 Coverity: possible divide by zero conn_capacity:434
WT-4576 Coverity warning in conn_capacity.c
WT-4615 Sync backup file before returning backup cursor
32 files changed, 1297 insertions, 312 deletions
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index 13d47d72d07..6908a52f5e0 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -519,6 +519,17 @@ connection_runtime_config = [ interval in seconds at which to check for files that are inactive and close them''', min=1, max=100000), ]), + Config('io_capacity', '', r''' + control how many bytes per second are written and read. Exceeding + the capacity results in throttling.''', + type='category', subconfig=[ + Config('total', '0', r''' + number of bytes per second available to all subsystems in total. + When set, decisions about what subsystems are throttled, and in + what proportion, are made internally. The minimum non-zero setting + is 1MB.''', + min='0', max='1TB'), + ]), Config('lsm_manager', '', r''' configure database wide options for LSM tree management. The LSM manager is started automatically the first time an LSM tree is opened. diff --git a/src/third_party/wiredtiger/dist/filelist b/src/third_party/wiredtiger/dist/filelist index 1bbeeb3c7a3..73fa6819e94 100644 --- a/src/third_party/wiredtiger/dist/filelist +++ b/src/third_party/wiredtiger/dist/filelist @@ -68,6 +68,7 @@ src/conn/api_version.c src/conn/conn_api.c src/conn/conn_cache.c src/conn/conn_cache_pool.c +src/conn/conn_capacity.c src/conn/conn_ckpt.c src/conn/conn_dhandle.c src/conn/conn_handle.c diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index c5546aed751..c251c99f2fe 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -1169,6 +1169,7 @@ scalability sched scr sd +second's secretkey sed sessionp @@ -1224,6 +1225,7 @@ subinit sublicense subone suboptimal +subsystem's subtest subtree sunique diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index 50e7be0039f..8b26fa2e9af 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -52,6 +52,10 @@ class CacheWalkStat(Stat): def __init__(self, name, desc, flags=''): flags += ',cache_walk' Stat.__init__(self, name, CacheWalkStat.prefix, desc, flags) +class CapacityStat(Stat): + prefix = 'capacity' + def __init__(self, name, desc, flags=''): + Stat.__init__(self, name, CapacityStat.prefix, desc, flags) class CompressStat(Stat): prefix = 'compression' def __init__(self, name, desc, flags=''): @@ -134,6 +138,7 @@ groups['memory'] = [ ConnStat.prefix, RecStat.prefix] groups['system'] = [ + CapacityStat.prefix, ConnStat.prefix, DhandleStat.prefix, PerfHistStat.prefix, @@ -294,6 +299,24 @@ connection_stats = [ CacheStat('cache_write_restore', 'pages written requiring in-memory restoration'), ########################################## + # Capacity statistics + ########################################## + CapacityStat('capacity_bytes_ckpt', 'throttled bytes written for checkpoint'), + CapacityStat('capacity_bytes_evict', 'throttled bytes written for eviction'), + CapacityStat('capacity_bytes_log', 'throttled bytes written for log'), + CapacityStat('capacity_bytes_read', 'throttled bytes read'), + CapacityStat('capacity_bytes_written', 'throttled bytes written total'), + CapacityStat('capacity_threshold', 'threshold to call fsync'), + CapacityStat('capacity_time_ckpt', 'time waiting during checkpoint (usecs)'), + CapacityStat('capacity_time_evict', 'time waiting during eviction (usecs)'), + CapacityStat('capacity_time_log', 'time waiting during logging (usecs)'), + CapacityStat('capacity_time_read', 'time waiting during read (usecs)'), + CapacityStat('capacity_time_total', 'time waiting due to total capacity (usecs)'), + CapacityStat('fsync_all_fh', 'background fsync file handles synced'), + CapacityStat('fsync_all_fh_total', 'background fsync file handles considered'), + CapacityStat('fsync_all_time', 'background fsync time (msecs)', 'no_clear,no_scale'), + + ########################################## # Cursor operations ########################################## CursorStat('cursor_open_count', 'open cursor count', 'no_clear,no_scale'), diff --git a/src/third_party/wiredtiger/examples/c/ex_all.c b/src/third_party/wiredtiger/examples/c/ex_all.c index 445a92ba5f8..3bf66a876fd 100644 --- a/src/third_party/wiredtiger/examples/c/ex_all.c +++ b/src/third_party/wiredtiger/examples/c/ex_all.c @@ -1284,6 +1284,12 @@ main(int argc, char *argv[]) /*! [Configure file_extend] */ error_check(conn->close(conn, NULL)); + /*! [Configure capacity] */ + error_check(wiredtiger_open( + home, NULL, "create,io_capacity=(total=40MB)", &conn)); + /*! [Configure capacity] */ + error_check(conn->close(conn, NULL)); + /*! [Eviction configuration] */ /* * Configure eviction to begin at 90% full, and run until the cache diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 38cfff85858..4a38fd2a265 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -1,5 +1,5 @@ { - "commit": "7f85272f91a95b4b47802808c44c75a8a794e7a8", + "commit": "db5942dc1dc27f35f334ff3fb4d103b0cceb7968", "github": "wiredtiger/wiredtiger.git", "vendor": "wiredtiger", "branch": "mongodb-4.0" diff --git a/src/third_party/wiredtiger/src/block/block_mgr.c b/src/third_party/wiredtiger/src/block/block_mgr.c index 2107fd85a7f..7211e5cfa24 100644 --- a/src/third_party/wiredtiger/src/block/block_mgr.c +++ b/src/third_party/wiredtiger/src/block/block_mgr.c @@ -505,6 +505,8 @@ static int __bm_write(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_checksum, bool checkpoint_io) { + __wt_capacity_throttle(session, buf->size, + checkpoint_io ? WT_THROTTLE_CKPT : WT_THROTTLE_EVICT); return (__wt_block_write(session, bm->block, buf, addr, addr_sizep, data_checksum, checkpoint_io)); } diff --git a/src/third_party/wiredtiger/src/block/block_read.c b/src/third_party/wiredtiger/src/block/block_read.c index 83c1ba17d6d..e190161fbd3 100644 --- a/src/third_party/wiredtiger/src/block/block_read.c +++ b/src/third_party/wiredtiger/src/block/block_read.c @@ -98,6 +98,7 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, block, "read", offset, size, bm->is_live, __func__, __LINE__)); #endif /* Read the block. */ + __wt_capacity_throttle(session, size, WT_THROTTLE_READ); WT_RET( __wt_block_read_off(session, block, buf, offset, size, checksum)); diff --git a/src/third_party/wiredtiger/src/block/block_write.c b/src/third_party/wiredtiger/src/block/block_write.c index 51baa55b383..ec2de7375de 100644 --- a/src/third_party/wiredtiger/src/block/block_write.c +++ b/src/third_party/wiredtiger/src/block/block_write.c @@ -351,9 +351,9 @@ __block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, * cache, but only if the current session can wait. */ if (block->os_cache_dirty_max != 0 && - (block->os_cache_dirty += align_size) > block->os_cache_dirty_max && + fh->written > block->os_cache_dirty_max && __wt_session_can_wait(session)) { - block->os_cache_dirty = 0; + fh->written = 0; if ((ret = __wt_fsync(session, fh, false)) != 0) { /* * Ignore ENOTSUP, but don't try again. diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index 1095fe18560..521f3d4bdc8 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -93,6 +93,12 @@ static const WT_CONFIG_CHECK }; static const WT_CONFIG_CHECK + confchk_wiredtiger_open_io_capacity_subconfigs[] = { + { "total", "int", NULL, "min=0,max=1TB", NULL, 0 }, + { NULL, NULL, NULL, NULL, NULL, 0 } +}; + +static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure_log_subconfigs[] = { { "archive", "boolean", NULL, NULL, NULL, 0 }, { "os_cache_dirty_pct", "int", @@ -170,6 +176,9 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { { "file_manager", "category", NULL, NULL, confchk_wiredtiger_open_file_manager_subconfigs, 3 }, + { "io_capacity", "category", + NULL, NULL, + confchk_wiredtiger_open_io_capacity_subconfigs, 1 }, { "log", "category", NULL, NULL, confchk_WT_CONNECTION_reconfigure_log_subconfigs, 4 }, @@ -874,6 +883,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { confchk_wiredtiger_open_file_manager_subconfigs, 3 }, { "hazard_max", "int", NULL, "min=15", NULL, 0 }, { "in_memory", "boolean", NULL, NULL, NULL, 0 }, + { "io_capacity", "category", + NULL, NULL, + confchk_wiredtiger_open_io_capacity_subconfigs, 1 }, { "log", "category", NULL, NULL, confchk_wiredtiger_open_log_subconfigs, 9 }, @@ -980,6 +992,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { confchk_wiredtiger_open_file_manager_subconfigs, 3 }, { "hazard_max", "int", NULL, "min=15", NULL, 0 }, { "in_memory", "boolean", NULL, NULL, NULL, 0 }, + { "io_capacity", "category", + NULL, NULL, + confchk_wiredtiger_open_io_capacity_subconfigs, 1 }, { "log", "category", NULL, NULL, confchk_wiredtiger_open_log_subconfigs, 9 }, @@ -1083,6 +1098,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { NULL, NULL, confchk_wiredtiger_open_file_manager_subconfigs, 3 }, { "hazard_max", "int", NULL, "min=15", NULL, 0 }, + { "io_capacity", "category", + NULL, NULL, + confchk_wiredtiger_open_io_capacity_subconfigs, 1 }, { "log", "category", NULL, NULL, confchk_wiredtiger_open_log_subconfigs, 9 }, @@ -1184,6 +1202,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { NULL, NULL, confchk_wiredtiger_open_file_manager_subconfigs, 3 }, { "hazard_max", "int", NULL, "min=15", NULL, 0 }, + { "io_capacity", "category", + NULL, NULL, + confchk_wiredtiger_open_io_capacity_subconfigs, 1 }, { "log", "category", NULL, NULL, confchk_wiredtiger_open_log_subconfigs, 9 }, @@ -1292,15 +1313,15 @@ static const WT_CONFIG_ENTRY config_entries[] = { "eviction_checkpoint_target=1,eviction_dirty_target=5," "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95" ",file_manager=(close_handle_minimum=250,close_idle_time=30," - "close_scan_interval=10),log=(archive=true,os_cache_dirty_pct=0," - "prealloc=true,zero_fill=false),lsm_manager=(merge=true," - "worker_thread_max=4),lsm_merge=true," + "close_scan_interval=10),io_capacity=(total=0),log=(archive=true," + "os_cache_dirty_pct=0,prealloc=true,zero_fill=false)," + "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true," "operation_tracking=(enabled=false,path=\".\")," "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB)," "statistics=none,statistics_log=(json=false,on_close=false," "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," "timing_stress_for_test=,verbose=", - confchk_WT_CONNECTION_reconfigure, 23 + confchk_WT_CONNECTION_reconfigure, 24 }, { "WT_CONNECTION.rollback_to_stable", "", @@ -1545,19 +1566,20 @@ static const WT_CONFIG_ENTRY config_entries[] = { "eviction_target=80,eviction_trigger=95,exclusive=false," "extensions=,file_extend=,file_manager=(close_handle_minimum=250," "close_idle_time=30,close_scan_interval=10),hazard_max=1000," - "in_memory=false,log=(archive=true,compressor=,enabled=false," - "file_max=100MB,os_cache_dirty_pct=0,path=\".\",prealloc=true," - "recover=on,zero_fill=false),lsm_manager=(merge=true," - "worker_thread_max=4),lsm_merge=true,mmap=true,multiprocess=false" - ",operation_tracking=(enabled=false,path=\".\"),readonly=false," - "salvage=false,session_max=100,session_scratch_max=2MB," - "session_table_cache=true,shared_cache=(chunk=10MB,name=,quota=0," - "reserve=0,size=500MB),statistics=none,statistics_log=(json=false" - ",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\"" - ",wait=0),timing_stress_for_test=,transaction_sync=(enabled=false" - ",method=fsync),use_environment=true,use_environment_priv=false," + "in_memory=false,io_capacity=(total=0),log=(archive=true," + "compressor=,enabled=false,file_max=100MB,os_cache_dirty_pct=0," + "path=\".\",prealloc=true,recover=on,zero_fill=false)," + "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true," + "mmap=true,multiprocess=false,operation_tracking=(enabled=false," + "path=\".\"),readonly=false,salvage=false,session_max=100," + "session_scratch_max=2MB,session_table_cache=true," + "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB)," + "statistics=none,statistics_log=(json=false,on_close=false," + "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," + "timing_stress_for_test=,transaction_sync=(enabled=false," + "method=fsync),use_environment=true,use_environment_priv=false," "verbose=,write_through=", - confchk_wiredtiger_open, 47 + confchk_wiredtiger_open, 48 }, { "wiredtiger_open_all", "async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1" @@ -1572,19 +1594,20 @@ static const WT_CONFIG_ENTRY config_entries[] = { "eviction_target=80,eviction_trigger=95,exclusive=false," "extensions=,file_extend=,file_manager=(close_handle_minimum=250," "close_idle_time=30,close_scan_interval=10),hazard_max=1000," - "in_memory=false,log=(archive=true,compressor=,enabled=false," - "file_max=100MB,os_cache_dirty_pct=0,path=\".\",prealloc=true," - "recover=on,zero_fill=false),lsm_manager=(merge=true," - "worker_thread_max=4),lsm_merge=true,mmap=true,multiprocess=false" - ",operation_tracking=(enabled=false,path=\".\"),readonly=false," - "salvage=false,session_max=100,session_scratch_max=2MB," - "session_table_cache=true,shared_cache=(chunk=10MB,name=,quota=0," - "reserve=0,size=500MB),statistics=none,statistics_log=(json=false" - ",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\"" - ",wait=0),timing_stress_for_test=,transaction_sync=(enabled=false" - ",method=fsync),use_environment=true,use_environment_priv=false," + "in_memory=false,io_capacity=(total=0),log=(archive=true," + "compressor=,enabled=false,file_max=100MB,os_cache_dirty_pct=0," + "path=\".\",prealloc=true,recover=on,zero_fill=false)," + "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true," + "mmap=true,multiprocess=false,operation_tracking=(enabled=false," + "path=\".\"),readonly=false,salvage=false,session_max=100," + "session_scratch_max=2MB,session_table_cache=true," + "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB)," + "statistics=none,statistics_log=(json=false,on_close=false," + "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," + "timing_stress_for_test=,transaction_sync=(enabled=false," + "method=fsync),use_environment=true,use_environment_priv=false," "verbose=,version=(major=0,minor=0),write_through=", - confchk_wiredtiger_open_all, 48 + confchk_wiredtiger_open_all, 49 }, { "wiredtiger_open_basecfg", "async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1" @@ -1598,18 +1621,19 @@ static const WT_CONFIG_ENTRY config_entries[] = { "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95" ",extensions=,file_extend=,file_manager=(close_handle_minimum=250" ",close_idle_time=30,close_scan_interval=10),hazard_max=1000," - "log=(archive=true,compressor=,enabled=false,file_max=100MB," - "os_cache_dirty_pct=0,path=\".\",prealloc=true,recover=on," - "zero_fill=false),lsm_manager=(merge=true,worker_thread_max=4)," - "lsm_merge=true,mmap=true,multiprocess=false," - "operation_tracking=(enabled=false,path=\".\"),readonly=false," - "salvage=false,session_max=100,session_scratch_max=2MB," - "session_table_cache=true,shared_cache=(chunk=10MB,name=,quota=0," - "reserve=0,size=500MB),statistics=none,statistics_log=(json=false" - ",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\"" - ",wait=0),timing_stress_for_test=,transaction_sync=(enabled=false" - ",method=fsync),verbose=,version=(major=0,minor=0),write_through=", - confchk_wiredtiger_open_basecfg, 42 + "io_capacity=(total=0),log=(archive=true,compressor=," + "enabled=false,file_max=100MB,os_cache_dirty_pct=0,path=\".\"," + "prealloc=true,recover=on,zero_fill=false)," + "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true," + "mmap=true,multiprocess=false,operation_tracking=(enabled=false," + "path=\".\"),readonly=false,salvage=false,session_max=100," + "session_scratch_max=2MB,session_table_cache=true," + "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB)," + "statistics=none,statistics_log=(json=false,on_close=false," + "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," + "timing_stress_for_test=,transaction_sync=(enabled=false," + "method=fsync),verbose=,version=(major=0,minor=0),write_through=", + confchk_wiredtiger_open_basecfg, 43 }, { "wiredtiger_open_usercfg", "async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1" @@ -1623,18 +1647,19 @@ static const WT_CONFIG_ENTRY config_entries[] = { "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95" ",extensions=,file_extend=,file_manager=(close_handle_minimum=250" ",close_idle_time=30,close_scan_interval=10),hazard_max=1000," - "log=(archive=true,compressor=,enabled=false,file_max=100MB," - "os_cache_dirty_pct=0,path=\".\",prealloc=true,recover=on," - "zero_fill=false),lsm_manager=(merge=true,worker_thread_max=4)," - "lsm_merge=true,mmap=true,multiprocess=false," - "operation_tracking=(enabled=false,path=\".\"),readonly=false," - "salvage=false,session_max=100,session_scratch_max=2MB," - "session_table_cache=true,shared_cache=(chunk=10MB,name=,quota=0," - "reserve=0,size=500MB),statistics=none,statistics_log=(json=false" - ",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\"" - ",wait=0),timing_stress_for_test=,transaction_sync=(enabled=false" - ",method=fsync),verbose=,write_through=", - confchk_wiredtiger_open_usercfg, 41 + "io_capacity=(total=0),log=(archive=true,compressor=," + "enabled=false,file_max=100MB,os_cache_dirty_pct=0,path=\".\"," + "prealloc=true,recover=on,zero_fill=false)," + "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true," + "mmap=true,multiprocess=false,operation_tracking=(enabled=false," + "path=\".\"),readonly=false,salvage=false,session_max=100," + "session_scratch_max=2MB,session_table_cache=true," + "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB)," + "statistics=none,statistics_log=(json=false,on_close=false," + "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," + "timing_stress_for_test=,transaction_sync=(enabled=false," + "method=fsync),verbose=,write_through=", + confchk_wiredtiger_open_usercfg, 42 }, { NULL, NULL, NULL, 0 } }; diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index 6a336563538..ef0072c45ac 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -1051,6 +1051,9 @@ __conn_close(WT_CONNECTION *wt_conn, const char *config) CONNECTION_API_CALL(conn, session, close, config, cfg); + /* The default session is used to access data handles during close. */ + F_CLR(session, WT_SESSION_NO_DATA_HANDLES); + WT_TRET(__wt_config_gets(session, cfg, "leak_memory", &cval)); if (cval.val != 0) F_SET(conn, WT_CONN_LEAK_MEMORY); @@ -2312,6 +2315,11 @@ wiredtiger_dummy_session_init( * use the WT_CONNECTION_IMPL's default session and its strerror method. */ session->iface.strerror = __wt_session_strerror; + + /* + * The dummy session should never be used to access data handles. + */ + F_SET(session, WT_SESSION_NO_DATA_HANDLES); } /* @@ -2761,6 +2769,13 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, /* Start the worker threads and run recovery. */ WT_ERR(__wt_connection_workers(session, cfg)); + /* + * The default session should not open data handles after this point: + * since it can be shared between threads, relying on session->dhandle + * is not safe. + */ + F_SET(session, WT_SESSION_NO_DATA_HANDLES); + WT_STATIC_ASSERT(offsetof(WT_CONNECTION_IMPL, iface) == 0); *connectionp = &conn->iface; diff --git a/src/third_party/wiredtiger/src/conn/conn_capacity.c b/src/third_party/wiredtiger/src/conn/conn_capacity.c new file mode 100644 index 00000000000..0dd6a8c3c6d --- /dev/null +++ b/src/third_party/wiredtiger/src/conn/conn_capacity.c @@ -0,0 +1,474 @@ +/* + * Copyright (c) 2014-2019 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * Compute the time in nanoseconds that must be reserved to represent + * a number of bytes in a subsystem with a particular capacity per second. + */ +#define WT_RESERVATION_NS(bytes, capacity) \ + (((bytes) * WT_BILLION) / (capacity)) + +/* + * The fraction of a second's worth of capacity that will be stolen at a + * time. The number of bytes this represents may be different for different + * subsystems, since each subsystem has its own capacity per second. + */ +#define WT_STEAL_FRACTION(x) ((x) / 16) + +/* + * __capacity_config -- + * Set I/O capacity configuration. + */ +static int +__capacity_config(WT_SESSION_IMPL *session, const char *cfg[]) +{ + WT_CAPACITY *cap; + WT_CONFIG_ITEM cval; + WT_CONNECTION_IMPL *conn; + uint64_t total; + + conn = S2C(session); + + WT_RET(__wt_config_gets(session, cfg, "io_capacity.total", &cval)); + if (cval.val != 0 && cval.val < WT_THROTTLE_MIN) + WT_RET_MSG(session, EINVAL, + "total I/O capacity value %" PRId64 " below minimum %d", + cval.val, WT_THROTTLE_MIN); + + cap = &conn->capacity; + cap->total = total = (uint64_t)cval.val; + if (cval.val != 0) { + /* + * We've been given a total capacity, set the + * capacity of all the subsystems. + */ + cap->ckpt = WT_CAPACITY_SYS(total, WT_CAP_CKPT); + cap->evict = WT_CAPACITY_SYS(total, WT_CAP_EVICT); + cap->log = WT_CAPACITY_SYS(total, WT_CAP_LOG); + cap->read = WT_CAPACITY_SYS(total, WT_CAP_READ); + + /* + * Set the threshold to the percent of our capacity to + * periodically asynchronously flush what we've written. + */ + cap->threshold = ((cap->ckpt + cap->evict + cap->log) / + 100) * WT_CAPACITY_PCT; + if (cap->threshold < WT_CAPACITY_MIN_THRESHOLD) + cap->threshold = WT_CAPACITY_MIN_THRESHOLD; + WT_STAT_CONN_SET(session, capacity_threshold, cap->threshold); + } else + WT_STAT_CONN_SET(session, capacity_threshold, 0); + + return (0); +} + +/* + * __capacity_server_run_chk -- + * Check to decide if the capacity server should continue running. + */ +static bool +__capacity_server_run_chk(WT_SESSION_IMPL *session) +{ + return (F_ISSET(S2C(session), WT_CONN_SERVER_CAPACITY)); +} + +/* + * __capacity_server -- + * The capacity server thread. + */ +static WT_THREAD_RET +__capacity_server(void *arg) +{ + WT_CAPACITY *cap; + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_SESSION_IMPL *session; + uint64_t start, stop, time_ms; + + session = arg; + conn = S2C(session); + cap = &conn->capacity; + for (;;) { + /* + * Wait until signalled but check once per second in case + * the signal was missed. + */ + __wt_cond_wait(session, + conn->capacity_cond, WT_MILLION, __capacity_server_run_chk); + + /* Check if we're quitting or being reconfigured. */ + if (!__capacity_server_run_chk(session)) + break; + + cap->signalled = false; + if (cap->written < cap->threshold) + continue; + + start = __wt_clock(session); + WT_ERR(__wt_fsync_background(session)); + stop = __wt_clock(session); + time_ms = WT_CLOCKDIFF_MS(stop, start); + WT_STAT_CONN_SET(session, fsync_all_time, time_ms); + cap->written = 0; + } + + if (0) { +err: WT_PANIC_MSG(session, ret, "capacity server error"); + } + return (WT_THREAD_RET_VALUE); +} + +/* + * __capacity_server_start -- + * Start the capacity server thread. + */ +static int +__capacity_server_start(WT_CONNECTION_IMPL *conn) +{ + WT_SESSION_IMPL *session; + + F_SET(conn, WT_CONN_SERVER_CAPACITY); + + /* + * The capacity server gets its own session. + */ + WT_RET(__wt_open_internal_session(conn, + "capacity-server", false, 0, &conn->capacity_session)); + session = conn->capacity_session; + + WT_RET(__wt_cond_alloc(session, + "capacity server", &conn->capacity_cond)); + + /* + * Start the thread. + */ + WT_RET(__wt_thread_create( + session, &conn->capacity_tid, __capacity_server, session)); + conn->capacity_tid_set = true; + + return (0); +} + +/* + * __wt_capacity_server_create -- + * Configure and start the capacity server. + */ +int +__wt_capacity_server_create(WT_SESSION_IMPL *session, const char *cfg[]) +{ + WT_CONNECTION_IMPL *conn; + + conn = S2C(session); + + /* + * Stop any server that is already running. This means that each time + * reconfigure is called we'll bounce the server even if there are no + * configuration changes. This makes our life easier as the underlying + * configuration routine doesn't have to worry about freeing objects + * in the connection structure (it's guaranteed to always start with a + * blank slate), and we don't have to worry about races where a running + * server is reading configuration information that we're updating, and + * it's not expected that reconfiguration will happen a lot. + */ + if (conn->capacity_session != NULL) + WT_RET(__wt_capacity_server_destroy(session)); + WT_RET(__capacity_config(session, cfg)); + + /* + * If it is a read only connection or if background fsync is not + * supported, then there is nothing to do. + */ + if (F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY) || + !__wt_fsync_background_chk(session)) + return (0); + + if (conn->capacity.total != 0) + WT_RET(__capacity_server_start(conn)); + + return (0); +} + +/* + * __wt_capacity_server_destroy -- + * Destroy the capacity server thread. + */ +int +__wt_capacity_server_destroy(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_SESSION *wt_session; + + conn = S2C(session); + + F_CLR(conn, WT_CONN_SERVER_CAPACITY); + if (conn->capacity_tid_set) { + __wt_cond_signal(session, conn->capacity_cond); + WT_TRET(__wt_thread_join(session, &conn->capacity_tid)); + conn->capacity_tid_set = false; + } + __wt_cond_destroy(session, &conn->capacity_cond); + + /* Close the server thread's session. */ + if (conn->capacity_session != NULL) { + wt_session = &conn->capacity_session->iface; + WT_TRET(wt_session->close(wt_session, NULL)); + } + + /* + * Ensure capacity settings are cleared - so that reconfigure doesn't + * get confused. + */ + conn->capacity_session = NULL; + conn->capacity_tid_set = false; + conn->capacity_cond = NULL; + + return (ret); +} + +/* + * __capacity_signal -- + * Signal the capacity thread if sufficient data has been written. + */ +static void +__capacity_signal(WT_SESSION_IMPL *session) +{ + WT_CAPACITY *cap; + WT_CONNECTION_IMPL *conn; + + conn = S2C(session); + cap = &conn->capacity; + if (cap->written >= cap->threshold && !cap->signalled) { + __wt_cond_signal(session, conn->capacity_cond); + cap->signalled = true; + } +} + +/* + * __capacity_reserve -- + * Make a reservation for the given number of bytes against + * the capacity of the subsystem. + */ +static void +__capacity_reserve(uint64_t *reservation, uint64_t bytes, uint64_t capacity, + uint64_t now_ns, uint64_t *result) +{ + uint64_t res_len, res_value; + + if (capacity != 0) { + res_len = WT_RESERVATION_NS(bytes, capacity); + res_value = __wt_atomic_add64(reservation, res_len); + if (now_ns > res_value && now_ns - res_value > WT_BILLION) + /* + * If the reservation clock is out of date, bring it + * to within a second of a current time. + */ + (void)__wt_atomic_store64(reservation, + (now_ns - WT_BILLION) + res_len); + } else + res_value = now_ns; + + *result = res_value; +} + +/* + * __wt_capacity_throttle -- + * Reserve a time to perform a write operation for the subsystem, + * and wait until that time. + * + * The concept is that each write to a subsystem reserves a time slot + * to do its write, and atomically adjusts the reservation marker to + * point past the reserved slot. The size of the adjustment (i.e. the + * length of time represented by the slot in nanoseconds) is chosen to + * be proportional to the number of bytes to be written, and the + * proportion is a simple calculation so that we can fit reservations for + * exactly the configured capacity in a second. Reservation times are + * in nanoseconds since the epoch. + */ +void +__wt_capacity_throttle(WT_SESSION_IMPL *session, uint64_t bytes, + WT_THROTTLE_TYPE type) +{ + struct timespec now; + WT_CAPACITY *cap; + WT_CONNECTION_IMPL *conn; + uint64_t best_res, capacity, new_res, now_ns, sleep_us, res_total_value; + uint64_t res_value, steal_capacity, stolen_bytes, this_res; + uint64_t *reservation, *steal; + uint64_t total_capacity; + + conn = S2C(session); + cap = &conn->capacity; + /* If not using capacity there's nothing to do. */ + if (cap->total == 0) + return; + + capacity = steal_capacity = 0; + reservation = steal = NULL; + switch (type) { + case WT_THROTTLE_CKPT: + capacity = cap->ckpt; + reservation = &cap->reservation_ckpt; + WT_STAT_CONN_INCRV(session, capacity_bytes_ckpt, bytes); + break; + case WT_THROTTLE_EVICT: + capacity = cap->evict; + reservation = &cap->reservation_evict; + WT_STAT_CONN_INCRV(session, capacity_bytes_evict, bytes); + break; + case WT_THROTTLE_LOG: + capacity = cap->log; + reservation = &cap->reservation_log; + WT_STAT_CONN_INCRV(session, capacity_bytes_log, bytes); + break; + case WT_THROTTLE_READ: + capacity = cap->read; + reservation = &cap->reservation_read; + WT_STAT_CONN_INCRV(session, capacity_bytes_read, bytes); + break; + } + total_capacity = cap->total; + + /* + * Right now no subsystem can be individually turned off, but it is + * certainly a possibility to consider one subsystem may be turned off + * at some point in the future. If this subsystem is not throttled + * there's nothing to do. + */ + if (capacity == 0 || F_ISSET(conn, WT_CONN_RECOVERING)) + return; + + /* + * There may in fact be some reads done under the umbrella of log + * I/O, but they are mostly done under recovery. And if we are + * recovering, we don't reach this code. + */ + if (type != WT_THROTTLE_READ) { + (void)__wt_atomic_addv64(&cap->written, bytes); + WT_STAT_CONN_INCRV(session, capacity_bytes_written, bytes); + __capacity_signal(session); + } + + /* If we get sizes larger than this, later calculations may overflow. */ + WT_ASSERT(session, bytes < 16 * (uint64_t)WT_GIGABYTE); + WT_ASSERT(session, capacity != 0); + + /* Get the current time in nanoseconds since the epoch. */ + __wt_epoch(session, &now); + now_ns = (uint64_t)now.tv_sec * WT_BILLION + (uint64_t)now.tv_nsec; + +again: + /* Take a reservation for the subsystem, and for the total */ + __capacity_reserve(reservation, bytes, capacity, now_ns, &res_value); + __capacity_reserve(&cap->reservation_total, bytes, total_capacity, + now_ns, &res_total_value); + + /* + * If we ended up with a future reservation, and we aren't constricted + * by the total capacity, then we may be able to reallocate some + * unused reservation time from another subsystem. + */ + if (res_value > now_ns && res_total_value < now_ns && steal == NULL && + total_capacity != 0) { + best_res = now_ns - WT_BILLION / 2; + if (type != WT_THROTTLE_CKPT && + (this_res = cap->reservation_ckpt) < best_res) { + steal = &cap->reservation_ckpt; + steal_capacity = cap->ckpt; + best_res = this_res; + } + if (type != WT_THROTTLE_EVICT && + (this_res = cap->reservation_evict) < best_res) { + steal = &cap->reservation_evict; + steal_capacity = cap->evict; + best_res = this_res; + } + if (type != WT_THROTTLE_LOG && + (this_res = cap->reservation_log) < best_res) { + steal = &cap->reservation_log; + steal_capacity = cap->log; + best_res = this_res; + } + if (type != WT_THROTTLE_READ && + (this_res = cap->reservation_read) < best_res) { + steal = &cap->reservation_read; + steal_capacity = cap->read; + best_res = this_res; + } + + if (steal != NULL) { + /* + * We have a subsystem that has enough spare capacity + * to steal. We'll take a small slice (a fraction + * of a second worth) and add it to our own subsystem. + */ + if (best_res < now_ns - WT_BILLION && + now_ns > WT_BILLION) + new_res = now_ns - WT_BILLION; + else + new_res = best_res; + WT_ASSERT(session, steal_capacity != 0); + new_res += WT_STEAL_FRACTION(WT_BILLION) + + WT_RESERVATION_NS(bytes, steal_capacity); + if (!__wt_atomic_casv64(steal, best_res, new_res)) { + /* + * Give up our reservations and try again. + * We won't try to steal the next time. + */ + (void)__wt_atomic_sub64(reservation, + WT_RESERVATION_NS(bytes, capacity)); + (void)__wt_atomic_sub64(&cap->reservation_total, + WT_RESERVATION_NS(bytes, total_capacity)); + goto again; + } + + /* + * We've stolen a fraction of a second of capacity. + * Figure out how many bytes that is, before adding + * that many bytes to the acquiring subsystem's + * capacity. + */ + stolen_bytes = WT_STEAL_FRACTION(steal_capacity); + res_value = __wt_atomic_sub64(reservation, + WT_RESERVATION_NS(stolen_bytes, capacity)); + } + } + if (res_value < res_total_value) + res_value = res_total_value; + + if (res_value > now_ns) { + sleep_us = (res_value - now_ns) / WT_THOUSAND; + if (res_value == res_total_value) + WT_STAT_CONN_INCRV(session, + capacity_time_total, sleep_us); + else + switch (type) { + case WT_THROTTLE_CKPT: + WT_STAT_CONN_INCRV(session, + capacity_time_ckpt, sleep_us); + break; + case WT_THROTTLE_EVICT: + WT_STAT_CONN_INCRV(session, + capacity_time_evict, sleep_us); + break; + case WT_THROTTLE_LOG: + WT_STAT_CONN_INCRV(session, + capacity_time_log, sleep_us); + break; + case WT_THROTTLE_READ: + WT_STAT_CONN_INCRV(session, + capacity_time_read, sleep_us); + break; + } + if (sleep_us > WT_CAPACITY_SLEEP_CUTOFF_US) + /* Sleep handles large usec values. */ + __wt_sleep(0, sleep_us); + } +} diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c index 28ad155ff53..7a2b52f40f9 100644 --- a/src/third_party/wiredtiger/src/conn/conn_open.c +++ b/src/third_party/wiredtiger/src/conn/conn_open.c @@ -101,6 +101,7 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) F_SET(conn, WT_CONN_CLOSING_NO_MORE_OPENS); WT_FULL_BARRIER(); + WT_TRET(__wt_capacity_server_destroy(session)); WT_TRET(__wt_checkpoint_server_destroy(session)); WT_TRET(__wt_statlog_destroy(session, true)); WT_TRET(__wt_sweep_destroy(session)); @@ -251,6 +252,9 @@ __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[]) /* Start the optional async threads. */ WT_RET(__wt_async_create(session, cfg)); + /* Start the optional capacity thread. */ + WT_RET(__wt_capacity_server_create(session, cfg)); + /* Start the optional checkpoint thread. */ WT_RET(__wt_checkpoint_server_create(session, cfg)); diff --git a/src/third_party/wiredtiger/src/conn/conn_reconfig.c b/src/third_party/wiredtiger/src/conn/conn_reconfig.c index e56e76c8fd6..c6d7203f08e 100644 --- a/src/third_party/wiredtiger/src/conn/conn_reconfig.c +++ b/src/third_party/wiredtiger/src/conn/conn_reconfig.c @@ -475,6 +475,7 @@ __wt_conn_reconfig(WT_SESSION_IMPL *session, const char **cfg) WT_ERR(__wt_conn_statistics_config(session, cfg)); WT_ERR(__wt_async_reconfig(session, cfg)); WT_ERR(__wt_cache_config(session, true, cfg)); + WT_ERR(__wt_capacity_server_create(session, cfg)); WT_ERR(__wt_checkpoint_server_create(session, cfg)); WT_ERR(__wt_logmgr_reconfig(session, cfg)); WT_ERR(__wt_lsm_manager_reconfig(session, cfg)); diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c index 315a822cc13..04882e527ce 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_backup.c +++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c @@ -359,18 +359,25 @@ __backup_start(WT_SESSION_IMPL *session, } err: /* Close the hot backup file. */ - if (cb->bfs != NULL) - WT_TRET(__wt_fclose(session, &cb->bfs)); if (srcfs != NULL) WT_TRET(__wt_fclose(session, &srcfs)); + /* + * Sync and rename the temp file into place. + */ + if (ret == 0) + ret = __wt_sync_and_rename(session, + &cb->bfs, WT_BACKUP_TMP, dest); if (ret == 0) { - WT_ASSERT(session, dest != NULL); - WT_TRET(__wt_fs_rename(session, WT_BACKUP_TMP, dest, false)); __wt_writelock(session, &conn->hot_backup_lock); conn->hot_backup_list = cb->list; __wt_writeunlock(session, &conn->hot_backup_lock); F_SET(session, WT_SESSION_BACKUP_CURSOR); } + /* + * If the file hasn't been closed, do it now. + */ + if (cb->bfs != NULL) + WT_TRET(__wt_fclose(session, &cb->bfs)); done: return (ret); diff --git a/src/third_party/wiredtiger/src/docs/programming.dox b/src/third_party/wiredtiger/src/docs/programming.dox index 3ddb0c376c5..960babfc146 100644 --- a/src/third_party/wiredtiger/src/docs/programming.dox +++ b/src/third_party/wiredtiger/src/docs/programming.dox @@ -68,6 +68,7 @@ each of which is ordered by one or more columns. - @subpage_single tune_build_options - @subpage_single tune_bulk_load - @subpage_single tune_cache +- @subpage_single tune_capacity - @subpage_single tune_checksum - @subpage_single tune_close - @subpage_single tune_cursor_persist diff --git a/src/third_party/wiredtiger/src/docs/tune-capacity.dox b/src/third_party/wiredtiger/src/docs/tune-capacity.dox new file mode 100644 index 00000000000..3aad4997576 --- /dev/null +++ b/src/third_party/wiredtiger/src/docs/tune-capacity.dox @@ -0,0 +1,38 @@ +/*! @page tune_capacity Capacity tuning + +In some cases, it can be helpful to constrain the overall I/O bandwidth +generated by the database. This can be beneficial when resources are shared, +for example, in cloud or virtual environments. + +The total bandwidth capacity is configured by setting the +\c io_capacity configuration string when calling the ::wiredtiger_open +function. The capacity can be adjusted with WT_CONNECTION::reconfigure. + +An example of setting a capacity limit to 40MB per second: + +@snippet ex_all.c Configure capacity + +When a total capacity is set the volume of system reads and writes totalled +will not exceed the given I/O capacity. +If a read or write is scheduled and would overflow the capacity, the issuing +thread will sleep to guarantee the capacity ceiling. The policy used is +fair to all threads, and gives some weight to both readers and writers to +try to ensure that each session can make progress when bandwidth +resources are limited. + +System reads and writes do not directly translate to disk I/O +operations. These operations go through the operating system cache. To ensure +the steady flow of data to the disk, setting a capacity also enables an +additional thread that monitors the writes performed for each file. For each +file that has sufficient data written to it, a call to an +asynchronous \c fsync will be made. This call normally queues the flush +in the operating system, though there is no guarantee about when it will +actually occur. On Windows, there is no equivalent support for asynchronously +scheduling writes to disk, so this extra "sync" thread is not active. + +When a total capacity is not set, or equivalently, when it is set to 0, +there are no capacity constraints on the database, and pauses will never +be inserted before I/O is done, nor are extra asynchronous \c fsync calls +performed. + + */ diff --git a/src/third_party/wiredtiger/src/include/block.h b/src/third_party/wiredtiger/src/include/block.h index 584149d4379..8efaf10dd2b 100644 --- a/src/third_party/wiredtiger/src/include/block.h +++ b/src/third_party/wiredtiger/src/include/block.h @@ -234,7 +234,6 @@ struct __wt_block { uint32_t allocsize; /* Allocation size */ size_t os_cache; /* System buffer cache flush max */ size_t os_cache_max; - size_t os_cache_dirty; /* System buffer cache write max */ size_t os_cache_dirty_max; u_int block_header; /* Header length */ diff --git a/src/third_party/wiredtiger/src/include/capacity.h b/src/third_party/wiredtiger/src/include/capacity.h new file mode 100644 index 00000000000..1fb42f5b435 --- /dev/null +++ b/src/third_party/wiredtiger/src/include/capacity.h @@ -0,0 +1,74 @@ +/*- + * Copyright (c) 2014-2019 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +typedef enum { + WT_THROTTLE_CKPT, /* Checkpoint throttle */ + WT_THROTTLE_EVICT, /* Eviction throttle */ + WT_THROTTLE_LOG, /* Logging throttle */ + WT_THROTTLE_READ /* Read throttle */ +} WT_THROTTLE_TYPE; + +#define WT_THROTTLE_MIN WT_MEGABYTE /* Config minimum size */ + +/* + * The per-file threshold means we won't start the background fsync on a file + * until it crosses the per-file threshold of data written. The other minimum + * threshold defines a minimum threshold for the background thread. Otherwise + * we compute a percentage of the given capacity. + */ +#define WT_CAPACITY_FILE_THRESHOLD (WT_MEGABYTE / 2) +#define WT_CAPACITY_MIN_THRESHOLD (10 * WT_MEGABYTE) +#define WT_CAPACITY_PCT 10 + +/* + * If we're being asked to sleep a short amount of time, ignore it. + * A non-zero value means there may be a temporary violation of the + * capacity limitation, but one that would even out. That is, possibly + * fewer sleeps with the risk of more choppy behavior as this number + * is larger. + */ +#define WT_CAPACITY_SLEEP_CUTOFF_US 100 + +/* + * When given a total capacity, divide it up for each subsystem. These defines + * represent the percentage of the total capacity that we allow for each + * subsystem capacity. We allow and expect the sum of the subsystems to + * exceed 100, as often they are not at their maximum at the same time. In any + * event, we track the total capacity separately, so it is never exceeded. + */ +#define WT_CAPACITY_SYS(total, pct) ((total) * (pct) / 100) +#define WT_CAP_CKPT 5 +#define WT_CAP_EVICT 50 +#define WT_CAP_LOG 30 +#define WT_CAP_READ 55 + +struct __wt_capacity { + uint64_t ckpt; /* Bytes/sec checkpoint capacity */ + uint64_t evict; /* Bytes/sec eviction capacity */ + uint64_t log; /* Bytes/sec logging capacity */ + uint64_t read; /* Bytes/sec read capacity */ + uint64_t total; /* Bytes/sec total capacity */ + uint64_t threshold; /* Capacity size period */ + + volatile uint64_t written; /* Written this period */ + volatile bool signalled; /* Capacity signalled */ + + /* + * A reservation is a point in time when a read or write for a subsystem + * can be scheduled, so as not to overrun the given capacity. These + * values hold the next available reservation, in nanoseconds since + * the epoch. Getting a reservation with a future time implies sleeping + * until that time; getting a reservation with a past time implies that + * the operation can be done immediately. + */ + uint64_t reservation_ckpt; /* Atomic: next checkpoint write */ + uint64_t reservation_evict; /* Atomic: next eviction write */ + uint64_t reservation_log; /* Atomic: next logging write */ + uint64_t reservation_read; /* Atomic: next read */ + uint64_t reservation_total; /* Atomic: next operation of any kind */ +}; diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index 7577ce68e46..a23434ea9e2 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -289,6 +289,12 @@ struct __wt_connection_impl { uint32_t async_size; /* Async op array size */ uint32_t async_workers; /* Number of async workers */ + WT_CAPACITY capacity; /* Capacity structure */ + WT_SESSION_IMPL *capacity_session; /* Capacity thread session */ + wt_thread_t capacity_tid; /* Capacity thread */ + bool capacity_tid_set; /* Capacity thread set */ + WT_CONDVAR *capacity_cond; /* Capacity wait mutex */ + WT_LSM_MANAGER lsm_manager; /* LSM worker thread information */ WT_KEYED_ENCRYPTOR *kencryptor; /* Encryptor for metadata and log */ @@ -496,12 +502,13 @@ struct __wt_connection_impl { #define WT_CONN_RECOVERING 0x0020000u #define WT_CONN_SALVAGE 0x0040000u #define WT_CONN_SERVER_ASYNC 0x0080000u -#define WT_CONN_SERVER_CHECKPOINT 0x0100000u -#define WT_CONN_SERVER_LOG 0x0200000u -#define WT_CONN_SERVER_LSM 0x0400000u -#define WT_CONN_SERVER_STATISTICS 0x0800000u -#define WT_CONN_SERVER_SWEEP 0x1000000u -#define WT_CONN_WAS_BACKUP 0x2000000u +#define WT_CONN_SERVER_CAPACITY 0x0100000u +#define WT_CONN_SERVER_CHECKPOINT 0x0200000u +#define WT_CONN_SERVER_LOG 0x0400000u +#define WT_CONN_SERVER_LSM 0x0800000u +#define WT_CONN_SERVER_STATISTICS 0x1000000u +#define WT_CONN_SERVER_SWEEP 0x2000000u +#define WT_CONN_WAS_BACKUP 0x4000000u /* AUTOMATIC FLAG VALUE GENERATION STOP */ uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 50eaa16e847..b12febce98d 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -264,6 +264,9 @@ extern int __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg) WT extern int __wt_conn_cache_pool_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern WT_THREAD_RET __wt_cache_pool_server(void *arg); +extern int __wt_capacity_server_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_capacity_server_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_capacity_throttle(WT_SESSION_IMPL *session, uint64_t bytes, WT_THROTTLE_TYPE type); extern int __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_checkpoint_signal(WT_SESSION_IMPL *session, wt_off_t logsize); @@ -565,6 +568,8 @@ extern int __wt_ext_map_windows_error(WT_EXTENSION_API *wt_api, WT_SESSION *wt_s extern bool __wt_handle_is_open(WT_SESSION_IMPL *session, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_FS_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_fsync_background_chk(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_fsync_background(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_close_connection_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_os_inmemory(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_fopen(WT_SESSION_IMPL *session, const char *name, uint32_t open_flags, uint32_t flags, WT_FSTREAM **fstrp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/third_party/wiredtiger/src/include/os.h b/src/third_party/wiredtiger/src/include/os.h index ff50fff0081..37e0799ef16 100644 --- a/src/third_party/wiredtiger/src/include/os.h +++ b/src/third_party/wiredtiger/src/include/os.h @@ -109,9 +109,12 @@ struct __wt_fh { const char *name; /* File name */ uint64_t name_hash; /* hash of name */ + uint64_t last_sync; /* time of background fsync */ + volatile uint64_t written; /* written since fsync */ TAILQ_ENTRY(__wt_fh) q; /* internal queue */ TAILQ_ENTRY(__wt_fh) hashq; /* internal hash queue */ u_int ref; /* reference count */ + WT_FS_OPEN_FILE_TYPE file_type; /* file type */ WT_FILE_HANDLE *handle; }; diff --git a/src/third_party/wiredtiger/src/include/os_fhandle.i b/src/third_party/wiredtiger/src/include/os_fhandle.i index 1aab749a2ac..f0102ad37c0 100644 --- a/src/third_party/wiredtiger/src/include/os_fhandle.i +++ b/src/third_party/wiredtiger/src/include/os_fhandle.i @@ -196,6 +196,7 @@ __wt_write(WT_SESSION_IMPL *session, time_stop = __wt_clock(session); __wt_stat_msecs_hist_incr_fswrite(session, WT_CLOCKDIFF_MS(time_stop, time_start)); + (void)__wt_atomic_addv64(&fh->written, len); WT_STAT_CONN_DECR_ATOMIC(session, thread_write_active); return (ret); } diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index 801fd6d03a6..a1fc065d263 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -453,6 +453,20 @@ struct __wt_connection_stats { int64_t cache_bytes_dirty; int64_t cache_pages_dirty; int64_t cache_eviction_clean; + int64_t fsync_all_fh_total; + int64_t fsync_all_fh; + int64_t fsync_all_time; + int64_t capacity_threshold; + int64_t capacity_bytes_read; + int64_t capacity_bytes_ckpt; + int64_t capacity_bytes_evict; + int64_t capacity_bytes_log; + int64_t capacity_bytes_written; + int64_t capacity_time_total; + int64_t capacity_time_ckpt; + int64_t capacity_time_evict; + int64_t capacity_time_log; + int64_t capacity_time_read; int64_t cond_auto_wait_reset; int64_t cond_auto_wait; int64_t time_travel; diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index a4658c5d373..17bfb813151 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -2286,6 +2286,15 @@ struct __wt_connection { * seconds at which to check for files that are inactive and close * them., an integer between 1 and 100000; default \c 10.} * @config{ ),,} + * @config{io_capacity = (, control how many bytes per second are + * written and read. Exceeding the capacity results in throttling., a + * set of related configuration options defined below.} + * @config{ total, number of bytes per second + * available to all subsystems in total. When set\, decisions about + * what subsystems are throttled\, and in what proportion\, are made + * internally. The minimum non-zero setting is 1MB., an integer between + * 0 and 1TB; default \c 0.} + * @config{ ),,} * @config{log = (, enable logging. Enabling logging uses three * sessions from the configured session_max., a set of related * configuration options defined below.} @@ -2939,6 +2948,15 @@ struct __wt_connection { * @config{ ),,} * @config{in_memory, keep data in-memory only. See @ref in_memory for more * information., a boolean flag; default \c false.} + * @config{io_capacity = (, control how many bytes per second are written and + * read. Exceeding the capacity results in throttling., a set of related + * configuration options defined below.} + * @config{ total, + * number of bytes per second available to all subsystems in total. When set\, + * decisions about what subsystems are throttled\, and in what proportion\, are + * made internally. The minimum non-zero setting is 1MB., an integer between 0 + * and 1TB; default \c 0.} + * @config{ ),,} * @config{log = (, enable logging. Enabling logging uses three sessions from * the configured session_max., a set of related configuration options defined * below.} @@ -5207,518 +5225,546 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CACHE_PAGES_DIRTY 1127 /*! cache: unmodified pages evicted */ #define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1128 +/*! capacity: background fsync file handles considered */ +#define WT_STAT_CONN_FSYNC_ALL_FH_TOTAL 1129 +/*! capacity: background fsync file handles synced */ +#define WT_STAT_CONN_FSYNC_ALL_FH 1130 +/*! capacity: background fsync time (msecs) */ +#define WT_STAT_CONN_FSYNC_ALL_TIME 1131 +/*! capacity: threshold to call fsync */ +#define WT_STAT_CONN_CAPACITY_THRESHOLD 1132 +/*! capacity: throttled bytes read */ +#define WT_STAT_CONN_CAPACITY_BYTES_READ 1133 +/*! capacity: throttled bytes written for checkpoint */ +#define WT_STAT_CONN_CAPACITY_BYTES_CKPT 1134 +/*! capacity: throttled bytes written for eviction */ +#define WT_STAT_CONN_CAPACITY_BYTES_EVICT 1135 +/*! capacity: throttled bytes written for log */ +#define WT_STAT_CONN_CAPACITY_BYTES_LOG 1136 +/*! capacity: throttled bytes written total */ +#define WT_STAT_CONN_CAPACITY_BYTES_WRITTEN 1137 +/*! capacity: time waiting due to total capacity (usecs) */ +#define WT_STAT_CONN_CAPACITY_TIME_TOTAL 1138 +/*! capacity: time waiting during checkpoint (usecs) */ +#define WT_STAT_CONN_CAPACITY_TIME_CKPT 1139 +/*! capacity: time waiting during eviction (usecs) */ +#define WT_STAT_CONN_CAPACITY_TIME_EVICT 1140 +/*! capacity: time waiting during logging (usecs) */ +#define WT_STAT_CONN_CAPACITY_TIME_LOG 1141 +/*! capacity: time waiting during read (usecs) */ +#define WT_STAT_CONN_CAPACITY_TIME_READ 1142 /*! connection: auto adjusting condition resets */ -#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1129 +#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1143 /*! connection: auto adjusting condition wait calls */ -#define WT_STAT_CONN_COND_AUTO_WAIT 1130 +#define WT_STAT_CONN_COND_AUTO_WAIT 1144 /*! connection: detected system time went backwards */ -#define WT_STAT_CONN_TIME_TRAVEL 1131 +#define WT_STAT_CONN_TIME_TRAVEL 1145 /*! connection: files currently open */ -#define WT_STAT_CONN_FILE_OPEN 1132 +#define WT_STAT_CONN_FILE_OPEN 1146 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1133 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1147 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1134 +#define WT_STAT_CONN_MEMORY_FREE 1148 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1135 +#define WT_STAT_CONN_MEMORY_GROW 1149 /*! connection: pthread mutex condition wait calls */ -#define WT_STAT_CONN_COND_WAIT 1136 +#define WT_STAT_CONN_COND_WAIT 1150 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1137 +#define WT_STAT_CONN_RWLOCK_READ 1151 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1138 +#define WT_STAT_CONN_RWLOCK_WRITE 1152 /*! connection: total fsync I/Os */ -#define WT_STAT_CONN_FSYNC_IO 1139 +#define WT_STAT_CONN_FSYNC_IO 1153 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1140 +#define WT_STAT_CONN_READ_IO 1154 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1141 +#define WT_STAT_CONN_WRITE_IO 1155 /*! cursor: cached cursor count */ -#define WT_STAT_CONN_CURSOR_CACHED_COUNT 1142 +#define WT_STAT_CONN_CURSOR_CACHED_COUNT 1156 /*! cursor: cursor close calls that result in cache */ -#define WT_STAT_CONN_CURSOR_CACHE 1143 +#define WT_STAT_CONN_CURSOR_CACHE 1157 /*! cursor: cursor create calls */ -#define WT_STAT_CONN_CURSOR_CREATE 1144 +#define WT_STAT_CONN_CURSOR_CREATE 1158 /*! cursor: cursor insert calls */ -#define WT_STAT_CONN_CURSOR_INSERT 1145 +#define WT_STAT_CONN_CURSOR_INSERT 1159 /*! cursor: cursor modify calls */ -#define WT_STAT_CONN_CURSOR_MODIFY 1146 +#define WT_STAT_CONN_CURSOR_MODIFY 1160 /*! cursor: cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT 1147 +#define WT_STAT_CONN_CURSOR_NEXT 1161 /*! cursor: cursor operation restarted */ -#define WT_STAT_CONN_CURSOR_RESTART 1148 +#define WT_STAT_CONN_CURSOR_RESTART 1162 /*! cursor: cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV 1149 +#define WT_STAT_CONN_CURSOR_PREV 1163 /*! cursor: cursor remove calls */ -#define WT_STAT_CONN_CURSOR_REMOVE 1150 +#define WT_STAT_CONN_CURSOR_REMOVE 1164 /*! cursor: cursor reserve calls */ -#define WT_STAT_CONN_CURSOR_RESERVE 1151 +#define WT_STAT_CONN_CURSOR_RESERVE 1165 /*! cursor: cursor reset calls */ -#define WT_STAT_CONN_CURSOR_RESET 1152 +#define WT_STAT_CONN_CURSOR_RESET 1166 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1153 +#define WT_STAT_CONN_CURSOR_SEARCH 1167 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1154 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1168 /*! cursor: cursor sweep buckets */ -#define WT_STAT_CONN_CURSOR_SWEEP_BUCKETS 1155 +#define WT_STAT_CONN_CURSOR_SWEEP_BUCKETS 1169 /*! cursor: cursor sweep cursors closed */ -#define WT_STAT_CONN_CURSOR_SWEEP_CLOSED 1156 +#define WT_STAT_CONN_CURSOR_SWEEP_CLOSED 1170 /*! cursor: cursor sweep cursors examined */ -#define WT_STAT_CONN_CURSOR_SWEEP_EXAMINED 1157 +#define WT_STAT_CONN_CURSOR_SWEEP_EXAMINED 1171 /*! cursor: cursor sweeps */ -#define WT_STAT_CONN_CURSOR_SWEEP 1158 +#define WT_STAT_CONN_CURSOR_SWEEP 1172 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1159 +#define WT_STAT_CONN_CURSOR_UPDATE 1173 /*! cursor: cursors reused from cache */ -#define WT_STAT_CONN_CURSOR_REOPEN 1160 +#define WT_STAT_CONN_CURSOR_REOPEN 1174 /*! cursor: open cursor count */ -#define WT_STAT_CONN_CURSOR_OPEN_COUNT 1161 +#define WT_STAT_CONN_CURSOR_OPEN_COUNT 1175 /*! cursor: truncate calls */ -#define WT_STAT_CONN_CURSOR_TRUNCATE 1162 +#define WT_STAT_CONN_CURSOR_TRUNCATE 1176 /*! data-handle: connection data handles currently active */ -#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1163 +#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1177 /*! data-handle: connection sweep candidate became referenced */ -#define WT_STAT_CONN_DH_SWEEP_REF 1164 +#define WT_STAT_CONN_DH_SWEEP_REF 1178 /*! data-handle: connection sweep dhandles closed */ -#define WT_STAT_CONN_DH_SWEEP_CLOSE 1165 +#define WT_STAT_CONN_DH_SWEEP_CLOSE 1179 /*! data-handle: connection sweep dhandles removed from hash list */ -#define WT_STAT_CONN_DH_SWEEP_REMOVE 1166 +#define WT_STAT_CONN_DH_SWEEP_REMOVE 1180 /*! data-handle: connection sweep time-of-death sets */ -#define WT_STAT_CONN_DH_SWEEP_TOD 1167 +#define WT_STAT_CONN_DH_SWEEP_TOD 1181 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_SWEEPS 1168 +#define WT_STAT_CONN_DH_SWEEPS 1182 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1169 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1183 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1170 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1184 /*! lock: checkpoint lock acquisitions */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1171 +#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1185 /*! lock: checkpoint lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1172 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1186 /*! lock: checkpoint lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1173 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1187 /*! * lock: commit timestamp queue lock application thread time waiting * (usecs) */ -#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_APPLICATION 1174 +#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_APPLICATION 1188 /*! lock: commit timestamp queue lock internal thread time waiting (usecs) */ -#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_INTERNAL 1175 +#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_INTERNAL 1189 /*! lock: commit timestamp queue read lock acquisitions */ -#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_READ_COUNT 1176 +#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_READ_COUNT 1190 /*! lock: commit timestamp queue write lock acquisitions */ -#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WRITE_COUNT 1177 +#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WRITE_COUNT 1191 /*! lock: dhandle lock application thread time waiting (usecs) */ -#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1178 +#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1192 /*! lock: dhandle lock internal thread time waiting (usecs) */ -#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1179 +#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1193 /*! lock: dhandle read lock acquisitions */ -#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1180 +#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1194 /*! lock: dhandle write lock acquisitions */ -#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1181 +#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1195 /*! lock: metadata lock acquisitions */ -#define WT_STAT_CONN_LOCK_METADATA_COUNT 1182 +#define WT_STAT_CONN_LOCK_METADATA_COUNT 1196 /*! lock: metadata lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1183 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1197 /*! lock: metadata lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1184 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1198 /*! * lock: read timestamp queue lock application thread time waiting * (usecs) */ -#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1185 +#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1199 /*! lock: read timestamp queue lock internal thread time waiting (usecs) */ -#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1186 +#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1200 /*! lock: read timestamp queue read lock acquisitions */ -#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1187 +#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1201 /*! lock: read timestamp queue write lock acquisitions */ -#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1188 +#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1202 /*! lock: schema lock acquisitions */ -#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1189 +#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1203 /*! lock: schema lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1190 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1204 /*! lock: schema lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1191 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1205 /*! * lock: table lock application thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1192 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1206 /*! * lock: table lock internal thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1193 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1207 /*! lock: table read lock acquisitions */ -#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1194 +#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1208 /*! lock: table write lock acquisitions */ -#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1195 +#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1209 /*! lock: txn global lock application thread time waiting (usecs) */ -#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1196 +#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1210 /*! lock: txn global lock internal thread time waiting (usecs) */ -#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1197 +#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1211 /*! lock: txn global read lock acquisitions */ -#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1198 +#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1212 /*! lock: txn global write lock acquisitions */ -#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1199 +#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1213 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1200 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1214 /*! log: force archive time sleeping (usecs) */ -#define WT_STAT_CONN_LOG_FORCE_ARCHIVE_SLEEP 1201 +#define WT_STAT_CONN_LOG_FORCE_ARCHIVE_SLEEP 1215 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1202 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1216 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1203 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1217 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1204 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1218 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1205 +#define WT_STAT_CONN_LOG_FLUSH 1219 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1206 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1220 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1207 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1221 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1208 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1222 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1209 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1223 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1210 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1224 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1211 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1225 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1212 +#define WT_STAT_CONN_LOG_SCANS 1226 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1213 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1227 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1214 +#define WT_STAT_CONN_LOG_WRITE_LSN 1228 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1215 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1229 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1216 +#define WT_STAT_CONN_LOG_SYNC 1230 /*! log: log sync time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DURATION 1217 +#define WT_STAT_CONN_LOG_SYNC_DURATION 1231 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1218 +#define WT_STAT_CONN_LOG_SYNC_DIR 1232 /*! log: log sync_dir time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1219 +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1233 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1220 +#define WT_STAT_CONN_LOG_WRITES 1234 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1221 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1235 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1222 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1236 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1223 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1237 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1224 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1238 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1225 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1239 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1226 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1240 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1227 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1241 /*! log: slot close lost race */ -#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1228 +#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1242 /*! log: slot close unbuffered waits */ -#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1229 +#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1243 /*! log: slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1230 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1244 /*! log: slot join atomic update races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1231 +#define WT_STAT_CONN_LOG_SLOT_RACES 1245 /*! log: slot join calls atomic updates raced */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1232 +#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1246 /*! log: slot join calls did not yield */ -#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1233 +#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1247 /*! log: slot join calls found active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1234 +#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1248 /*! log: slot join calls slept */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1235 +#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1249 /*! log: slot join calls yielded */ -#define WT_STAT_CONN_LOG_SLOT_YIELD 1236 +#define WT_STAT_CONN_LOG_SLOT_YIELD 1250 /*! log: slot join found active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1237 +#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1251 /*! log: slot joins yield time (usecs) */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1238 +#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1252 /*! log: slot transitions unable to find free slot */ -#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1239 +#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1253 /*! log: slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1240 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1254 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1241 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1255 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1242 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1256 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1243 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1257 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1244 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1258 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1245 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1259 /*! perf: file system read latency histogram (bucket 1) - 10-49ms */ -#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1246 +#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1260 /*! perf: file system read latency histogram (bucket 2) - 50-99ms */ -#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1247 +#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1261 /*! perf: file system read latency histogram (bucket 3) - 100-249ms */ -#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1248 +#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1262 /*! perf: file system read latency histogram (bucket 4) - 250-499ms */ -#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1249 +#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1263 /*! perf: file system read latency histogram (bucket 5) - 500-999ms */ -#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1250 +#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1264 /*! perf: file system read latency histogram (bucket 6) - 1000ms+ */ -#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1251 +#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1265 /*! perf: file system write latency histogram (bucket 1) - 10-49ms */ -#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1252 +#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1266 /*! perf: file system write latency histogram (bucket 2) - 50-99ms */ -#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1253 +#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1267 /*! perf: file system write latency histogram (bucket 3) - 100-249ms */ -#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1254 +#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1268 /*! perf: file system write latency histogram (bucket 4) - 250-499ms */ -#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1255 +#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1269 /*! perf: file system write latency histogram (bucket 5) - 500-999ms */ -#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1256 +#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1270 /*! perf: file system write latency histogram (bucket 6) - 1000ms+ */ -#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1257 +#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1271 /*! perf: operation read latency histogram (bucket 1) - 100-249us */ -#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1258 +#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1272 /*! perf: operation read latency histogram (bucket 2) - 250-499us */ -#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1259 +#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1273 /*! perf: operation read latency histogram (bucket 3) - 500-999us */ -#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1260 +#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1274 /*! perf: operation read latency histogram (bucket 4) - 1000-9999us */ -#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1261 +#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1275 /*! perf: operation read latency histogram (bucket 5) - 10000us+ */ -#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1262 +#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1276 /*! perf: operation write latency histogram (bucket 1) - 100-249us */ -#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1263 +#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1277 /*! perf: operation write latency histogram (bucket 2) - 250-499us */ -#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1264 +#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1278 /*! perf: operation write latency histogram (bucket 3) - 500-999us */ -#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1265 +#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1279 /*! perf: operation write latency histogram (bucket 4) - 1000-9999us */ -#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1266 +#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1280 /*! perf: operation write latency histogram (bucket 5) - 10000us+ */ -#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1267 +#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1281 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1268 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1282 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1269 +#define WT_STAT_CONN_REC_PAGES 1283 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1270 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1284 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1271 +#define WT_STAT_CONN_REC_PAGE_DELETE 1285 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1272 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1286 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1273 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1287 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1274 +#define WT_STAT_CONN_SESSION_OPEN 1288 /*! session: session query timestamp calls */ -#define WT_STAT_CONN_SESSION_QUERY_TS 1275 +#define WT_STAT_CONN_SESSION_QUERY_TS 1289 /*! session: table alter failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1276 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1290 /*! session: table alter successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1277 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1291 /*! session: table alter unchanged and skipped */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1278 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1292 /*! session: table compact failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1279 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1293 /*! session: table compact successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1280 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1294 /*! session: table create failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1281 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1295 /*! session: table create successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1282 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1296 /*! session: table drop failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1283 +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1297 /*! session: table drop successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1284 +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1298 /*! session: table rebalance failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1285 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1299 /*! session: table rebalance successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1286 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1300 /*! session: table rename failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1287 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1301 /*! session: table rename successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1288 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1302 /*! session: table salvage failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1289 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1303 /*! session: table salvage successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1290 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1304 /*! session: table truncate failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1291 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1305 /*! session: table truncate successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1292 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1306 /*! session: table verify failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1293 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1307 /*! session: table verify successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1294 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1308 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1295 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1309 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_THREAD_READ_ACTIVE 1296 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1310 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1297 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1311 /*! thread-yield: application thread time evicting (usecs) */ -#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1298 +#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1312 /*! thread-yield: application thread time waiting for cache (usecs) */ -#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1299 +#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1313 /*! * thread-yield: connection close blocked waiting for transaction state * stabilization */ -#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1300 +#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1314 /*! thread-yield: connection close yielded for lsm manager shutdown */ -#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1301 +#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1315 /*! thread-yield: data handle lock yielded */ -#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1302 +#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1316 /*! * thread-yield: get reference for page index and slot time sleeping * (usecs) */ -#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1303 +#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1317 /*! thread-yield: log server sync yielded for log write */ -#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1304 +#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1318 /*! thread-yield: page access yielded due to prepare state change */ -#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1305 +#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1319 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1306 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1320 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1307 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1321 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1308 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1322 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1309 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1323 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1310 +#define WT_STAT_CONN_PAGE_SLEEP 1324 /*! * thread-yield: page delete rollback time sleeping for state change * (usecs) */ -#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1311 +#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1325 /*! thread-yield: page reconciliation yielded due to child modification */ -#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1312 +#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1326 /*! transaction: Number of prepared updates */ -#define WT_STAT_CONN_TXN_PREPARED_UPDATES_COUNT 1313 +#define WT_STAT_CONN_TXN_PREPARED_UPDATES_COUNT 1327 /*! transaction: Number of prepared updates added to cache overflow */ -#define WT_STAT_CONN_TXN_PREPARED_UPDATES_LOOKASIDE_INSERTS 1314 +#define WT_STAT_CONN_TXN_PREPARED_UPDATES_LOOKASIDE_INSERTS 1328 /*! transaction: Number of prepared updates resolved */ -#define WT_STAT_CONN_TXN_PREPARED_UPDATES_RESOLVED 1315 +#define WT_STAT_CONN_TXN_PREPARED_UPDATES_RESOLVED 1329 /*! transaction: commit timestamp queue entries walked */ -#define WT_STAT_CONN_TXN_COMMIT_QUEUE_WALKED 1316 +#define WT_STAT_CONN_TXN_COMMIT_QUEUE_WALKED 1330 /*! transaction: commit timestamp queue insert to empty */ -#define WT_STAT_CONN_TXN_COMMIT_QUEUE_EMPTY 1317 +#define WT_STAT_CONN_TXN_COMMIT_QUEUE_EMPTY 1331 /*! transaction: commit timestamp queue inserts to head */ -#define WT_STAT_CONN_TXN_COMMIT_QUEUE_HEAD 1318 +#define WT_STAT_CONN_TXN_COMMIT_QUEUE_HEAD 1332 /*! transaction: commit timestamp queue inserts total */ -#define WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS 1319 +#define WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS 1333 /*! transaction: commit timestamp queue length */ -#define WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN 1320 +#define WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN 1334 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1321 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1335 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1322 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1336 /*! transaction: prepared transactions */ -#define WT_STAT_CONN_TXN_PREPARE 1323 +#define WT_STAT_CONN_TXN_PREPARE 1337 /*! transaction: prepared transactions committed */ -#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1324 +#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1338 /*! transaction: prepared transactions currently active */ -#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1325 +#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1339 /*! transaction: prepared transactions rolled back */ -#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1326 +#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1340 /*! transaction: query timestamp calls */ -#define WT_STAT_CONN_TXN_QUERY_TS 1327 +#define WT_STAT_CONN_TXN_QUERY_TS 1341 /*! transaction: read timestamp queue entries walked */ -#define WT_STAT_CONN_TXN_READ_QUEUE_WALKED 1328 +#define WT_STAT_CONN_TXN_READ_QUEUE_WALKED 1342 /*! transaction: read timestamp queue insert to empty */ -#define WT_STAT_CONN_TXN_READ_QUEUE_EMPTY 1329 +#define WT_STAT_CONN_TXN_READ_QUEUE_EMPTY 1343 /*! transaction: read timestamp queue inserts to head */ -#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1330 +#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1344 /*! transaction: read timestamp queue inserts total */ -#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1331 +#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1345 /*! transaction: read timestamp queue length */ -#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1332 +#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1346 /*! transaction: rollback to stable calls */ -#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE 1333 +#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE 1347 /*! transaction: rollback to stable updates aborted */ -#define WT_STAT_CONN_TXN_ROLLBACK_UPD_ABORTED 1334 +#define WT_STAT_CONN_TXN_ROLLBACK_UPD_ABORTED 1348 /*! transaction: rollback to stable updates removed from cache overflow */ -#define WT_STAT_CONN_TXN_ROLLBACK_LAS_REMOVED 1335 +#define WT_STAT_CONN_TXN_ROLLBACK_LAS_REMOVED 1349 /*! transaction: set timestamp calls */ -#define WT_STAT_CONN_TXN_SET_TS 1336 +#define WT_STAT_CONN_TXN_SET_TS 1350 /*! transaction: set timestamp commit calls */ -#define WT_STAT_CONN_TXN_SET_TS_COMMIT 1337 +#define WT_STAT_CONN_TXN_SET_TS_COMMIT 1351 /*! transaction: set timestamp commit updates */ -#define WT_STAT_CONN_TXN_SET_TS_COMMIT_UPD 1338 +#define WT_STAT_CONN_TXN_SET_TS_COMMIT_UPD 1352 /*! transaction: set timestamp oldest calls */ -#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1339 +#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1353 /*! transaction: set timestamp oldest updates */ -#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1340 +#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1354 /*! transaction: set timestamp stable calls */ -#define WT_STAT_CONN_TXN_SET_TS_STABLE 1341 +#define WT_STAT_CONN_TXN_SET_TS_STABLE 1355 /*! transaction: set timestamp stable updates */ -#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1342 +#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1356 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1343 +#define WT_STAT_CONN_TXN_BEGIN 1357 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1344 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1358 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1345 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1359 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1346 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1360 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1347 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1361 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1348 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1362 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1349 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1363 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1350 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1364 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1351 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1365 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1352 +#define WT_STAT_CONN_TXN_CHECKPOINT 1366 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1353 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1367 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1354 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1368 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1355 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1369 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1356 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1370 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1357 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1371 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1358 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1372 /*! * transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1359 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1373 /*! transaction: transaction range of timestamps currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1360 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1374 /*! transaction: transaction range of timestamps pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1361 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1375 /*! * transaction: transaction range of timestamps pinned by the oldest * timestamp */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1362 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1376 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1363 +#define WT_STAT_CONN_TXN_SYNC 1377 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1364 +#define WT_STAT_CONN_TXN_COMMIT 1378 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1365 +#define WT_STAT_CONN_TXN_ROLLBACK 1379 /*! transaction: update conflicts */ -#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1366 +#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1380 /*! * @} diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h index 75801ceb48b..d93f6a3be7f 100644 --- a/src/third_party/wiredtiger/src/include/wt_internal.h +++ b/src/third_party/wiredtiger/src/include/wt_internal.h @@ -97,6 +97,8 @@ struct __wt_cache; typedef struct __wt_cache WT_CACHE; struct __wt_cache_pool; typedef struct __wt_cache_pool WT_CACHE_POOL; +struct __wt_capacity; + typedef struct __wt_capacity WT_CAPACITY; struct __wt_cell; typedef struct __wt_cell WT_CELL; struct __wt_cell_unpack; @@ -359,6 +361,7 @@ typedef uint64_t wt_timestamp_t; #include "btmem.h" #include "btree.h" #include "cache.h" +#include "capacity.h" #include "compact.h" #include "config.h" #include "cursor.h" diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c index aff145be512..9e27a996251 100644 --- a/src/third_party/wiredtiger/src/log/log.c +++ b/src/third_party/wiredtiger/src/log/log.c @@ -182,6 +182,22 @@ __log_wait_for_earlier_slot(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) } /* + * __log_fs_read -- + * Wrapper when reading from a log file. + */ +static int +__log_fs_read(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, size_t len, void *buf) +{ + WT_DECL_RET; + + __wt_capacity_throttle(session, len, WT_THROTTLE_LOG); + if ((ret = __wt_read(session, fh, offset, len, buf)) != 0) + WT_RET_MSG(session, ret, "%s: log read failure", fh->name); + return (ret); +} + +/* * __log_fs_write -- * Wrapper when writing to a log file. If we're writing to a new log * file for the first time wait for writes to the previous log file. @@ -207,6 +223,7 @@ __log_fs_write(WT_SESSION_IMPL *session, __log_wait_for_earlier_slot(session, slot); WT_RET(__wt_log_force_sync(session, &slot->slot_release_lsn)); } + __wt_capacity_throttle(session, len, WT_THROTTLE_LOG); if ((ret = __wt_write(session, slot->slot_fh, offset, len, buf)) != 0) WT_PANIC_RET(session, ret, "%s: fatal log failure", slot->slot_fh->name); @@ -663,6 +680,7 @@ __log_zero(WT_SESSION_IMPL *session, */ if ((uint32_t)len - off < bufsz) wrlen = (uint32_t)len - off; + __wt_capacity_throttle(session, wrlen, WT_THROTTLE_LOG); WT_ERR(__wt_write(session, fh, (wt_off_t)off, wrlen, zerobuf->mem)); off += wrlen; @@ -989,7 +1007,7 @@ __log_open_verify(WT_SESSION_IMPL *session, uint32_t id, WT_FH **fhp, * Read in the log file header and verify it. */ WT_ERR(__log_openfile(session, id, 0, &fh)); - WT_ERR(__wt_read(session, fh, 0, allocsize, buf->mem)); + WT_ERR(__log_fs_read(session, fh, 0, allocsize, buf->mem)); logrec = (WT_LOG_RECORD *)buf->mem; __wt_log_record_byteswap(logrec); desc = (WT_LOG_DESC *)logrec->record; @@ -1053,7 +1071,7 @@ __log_open_verify(WT_SESSION_IMPL *session, uint32_t id, WT_FH **fhp, goto err; memset(buf->mem, 0, allocsize); - WT_ERR(__wt_read(session, fh, allocsize, allocsize, buf->mem)); + WT_ERR(__log_fs_read(session, fh, allocsize, allocsize, buf->mem)); logrec = (WT_LOG_RECORD *)buf->mem; /* * We have a valid header but the system record is not there. @@ -1932,7 +1950,7 @@ __log_has_hole(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t log_size, for (off = offset; remainder > 0; remainder -= (wt_off_t)rdlen, off += (wt_off_t)rdlen) { rdlen = WT_MIN(bufsz, (size_t)remainder); - WT_ERR(__wt_read(session, fh, off, rdlen, buf)); + WT_ERR(__log_fs_read(session, fh, off, rdlen, buf)); allocsize = (log == NULL ? WT_LOG_ALIGN : log->allocsize); if (memcmp(buf, zerobuf, rdlen) != 0) { /* @@ -2450,7 +2468,7 @@ advance: */ WT_ASSERT(session, buf->memsize >= allocsize); need_salvage = F_ISSET(conn, WT_CONN_SALVAGE); - WT_ERR(__wt_read(session, + WT_ERR(__log_fs_read(session, log_fh, rd_lsn.l.offset, (size_t)allocsize, buf->mem)); need_salvage = false; /* @@ -2504,7 +2522,7 @@ advance: * record, especially for direct I/O. */ WT_ERR(__wt_buf_grow(session, buf, rdup_len)); - WT_ERR(__wt_read(session, log_fh, + WT_ERR(__log_fs_read(session, log_fh, rd_lsn.l.offset, (size_t)rdup_len, buf->mem)); WT_STAT_CONN_INCR(session, log_scan_rereads); } diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c index acff9771f62..40f37b961e8 100644 --- a/src/third_party/wiredtiger/src/log/log_slot.c +++ b/src/third_party/wiredtiger/src/log/log_slot.c @@ -535,6 +535,7 @@ __wt_log_slot_destroy(WT_SESSION_IMPL *session) (uint64_t)slot->slot_state, WT_LOG_SLOT_RESERVED)) { rel = WT_LOG_SLOT_RELEASED_BUFFERED(slot->slot_state); if (rel != 0) + /* Writes are not throttled. */ WT_RET(__wt_write(session, slot->slot_fh, slot->slot_start_offset, (size_t)rel, slot->slot_buf.mem)); diff --git a/src/third_party/wiredtiger/src/os_common/os_fhandle.c b/src/third_party/wiredtiger/src/os_common/os_fhandle.c index 3100817e650..df67508c4fe 100644 --- a/src/third_party/wiredtiger/src/os_common/os_fhandle.c +++ b/src/third_party/wiredtiger/src/os_common/os_fhandle.c @@ -240,6 +240,8 @@ __wt_open(WT_SESSION_IMPL *session, WT_ERR(__wt_calloc_one(session, &fh)); WT_ERR(__wt_strdup(session, name, &fh->name)); + fh->file_type = file_type; + /* * If this is a read-only connection, open all files read-only except * the lock file. @@ -356,6 +358,134 @@ __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) } /* + * __wt_fsync_background_chk -- + * Return if background fsync is supported. + */ +bool +__wt_fsync_background_chk(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + WT_FH *fh; + WT_FILE_HANDLE *handle; + bool supported; + + conn = S2C(session); + supported = true; + __wt_spin_lock(session, &conn->fh_lock); + /* + * Look for the first data file handle and see if + * the fsync nowait function is supported. + */ + TAILQ_FOREACH(fh, &conn->fhqh, q) { + handle = fh->handle; + if (fh->file_type != WT_FS_OPEN_FILE_TYPE_DATA) + continue; + /* + * If we don't have a function, return false, otherwise + * return true. In any case, we are done with the loop. + */ + if (handle->fh_sync_nowait == NULL) + supported = false; + break; + } + __wt_spin_unlock(session, &conn->fh_lock); + return (supported); +} + +/* + * __fsync_background -- + * Background fsync for a single dirty file handle. + */ +static int +__fsync_background(WT_SESSION_IMPL *session, WT_FH *fh) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_FILE_HANDLE *handle; + uint64_t now; + + conn = S2C(session); + WT_STAT_CONN_INCR(session, fsync_all_fh_total); + + handle = fh->handle; + if (handle->fh_sync_nowait == NULL || + fh->written < WT_CAPACITY_FILE_THRESHOLD) + return (0); + + /* Only sync data files. */ + if (fh->file_type != WT_FS_OPEN_FILE_TYPE_DATA) + return (0); + + now = __wt_clock(session); + if (fh->last_sync == 0 || WT_CLOCKDIFF_SEC(now, fh->last_sync) > 0) { + __wt_spin_unlock(session, &conn->fh_lock); + + /* + * We set the false flag to indicate a non-blocking background + * fsync, but there is no guarantee that it doesn't block. If + * we wanted to detect if it is blocking, adding a clock call + * and checking the time would be done here. + */ + ret = __wt_fsync(session, fh, false); + if (ret == 0) { + WT_STAT_CONN_INCR(session, fsync_all_fh); + fh->last_sync = now; + fh->written = 0; + } + + __wt_spin_lock(session, &conn->fh_lock); + } + return (ret); +} + +/* + * __wt_fsync_background -- + * Background fsync for all dirty file handles. + */ +int +__wt_fsync_background(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_FH *fh, *fhnext; + + conn = S2C(session); + __wt_spin_lock(session, &conn->fh_lock); + TAILQ_FOREACH_SAFE(fh, &conn->fhqh, q, fhnext) { + /* + * The worker routine will unlock the list to avoid holding it + * locked over an fsync. Increment the count on the current and + * next handles to guarantee their validity. + */ + if (fhnext != NULL) + ++fhnext->ref; + ++fh->ref; + + WT_TRET(__fsync_background(session, fh)); + + /* + * The file handle reference may have gone to 0, in which case + * we're responsible for the close. Configure the close routine + * to drop the lock, which means we must re-acquire it. + */ + if (--fh->ref == 0) { + WT_TRET(__handle_close(session, fh, true)); + __wt_spin_lock(session, &conn->fh_lock); + } + + /* + * Decrement the next element's reference count. It might have + * gone to 0 as well, in which case we'll close it in the next + * loop iteration. + */ + if (fhnext != NULL) + --fhnext->ref; + } + __wt_spin_unlock(session, &conn->fh_lock); + return (ret); +} + +/* * __wt_close_connection_close -- * Close any open file handles at connection close. */ diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index cee1f270d6d..534d598b3f3 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -863,6 +863,20 @@ static const char * const __stats_connection_desc[] = { "cache: tracked dirty bytes in the cache", "cache: tracked dirty pages in the cache", "cache: unmodified pages evicted", + "capacity: background fsync file handles considered", + "capacity: background fsync file handles synced", + "capacity: background fsync time (msecs)", + "capacity: threshold to call fsync", + "capacity: throttled bytes read", + "capacity: throttled bytes written for checkpoint", + "capacity: throttled bytes written for eviction", + "capacity: throttled bytes written for log", + "capacity: throttled bytes written total", + "capacity: time waiting due to total capacity (usecs)", + "capacity: time waiting during checkpoint (usecs)", + "capacity: time waiting during eviction (usecs)", + "capacity: time waiting during logging (usecs)", + "capacity: time waiting during read (usecs)", "connection: auto adjusting condition resets", "connection: auto adjusting condition wait calls", "connection: detected system time went backwards", @@ -1272,6 +1286,20 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) /* not clearing cache_bytes_dirty */ /* not clearing cache_pages_dirty */ stats->cache_eviction_clean = 0; + stats->fsync_all_fh_total = 0; + stats->fsync_all_fh = 0; + /* not clearing fsync_all_time */ + stats->capacity_threshold = 0; + stats->capacity_bytes_read = 0; + stats->capacity_bytes_ckpt = 0; + stats->capacity_bytes_evict = 0; + stats->capacity_bytes_log = 0; + stats->capacity_bytes_written = 0; + stats->capacity_time_total = 0; + stats->capacity_time_ckpt = 0; + stats->capacity_time_evict = 0; + stats->capacity_time_log = 0; + stats->capacity_time_read = 0; stats->cond_auto_wait_reset = 0; stats->cond_auto_wait = 0; stats->time_travel = 0; @@ -1727,6 +1755,21 @@ __wt_stat_connection_aggregate( to->cache_bytes_dirty += WT_STAT_READ(from, cache_bytes_dirty); to->cache_pages_dirty += WT_STAT_READ(from, cache_pages_dirty); to->cache_eviction_clean += WT_STAT_READ(from, cache_eviction_clean); + to->fsync_all_fh_total += WT_STAT_READ(from, fsync_all_fh_total); + to->fsync_all_fh += WT_STAT_READ(from, fsync_all_fh); + to->fsync_all_time += WT_STAT_READ(from, fsync_all_time); + to->capacity_threshold += WT_STAT_READ(from, capacity_threshold); + to->capacity_bytes_read += WT_STAT_READ(from, capacity_bytes_read); + to->capacity_bytes_ckpt += WT_STAT_READ(from, capacity_bytes_ckpt); + to->capacity_bytes_evict += WT_STAT_READ(from, capacity_bytes_evict); + to->capacity_bytes_log += WT_STAT_READ(from, capacity_bytes_log); + to->capacity_bytes_written += + WT_STAT_READ(from, capacity_bytes_written); + to->capacity_time_total += WT_STAT_READ(from, capacity_time_total); + to->capacity_time_ckpt += WT_STAT_READ(from, capacity_time_ckpt); + to->capacity_time_evict += WT_STAT_READ(from, capacity_time_evict); + to->capacity_time_log += WT_STAT_READ(from, capacity_time_log); + to->capacity_time_read += WT_STAT_READ(from, capacity_time_read); to->cond_auto_wait_reset += WT_STAT_READ(from, cond_auto_wait_reset); to->cond_auto_wait += WT_STAT_READ(from, cond_auto_wait); to->time_travel += WT_STAT_READ(from, time_travel); diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c index 5201d63f685..424ebf68445 100644 --- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c +++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c @@ -454,12 +454,12 @@ __txn_rollback_to_stable_check(WT_SESSION_IMPL *session) } /* - * __wt_txn_rollback_to_stable -- + * __txn_rollback_to_stable -- * Rollback all in-memory state related to timestamps more recent than * the passed in timestamp. */ -int -__wt_txn_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[]) +static int +__txn_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[]) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; @@ -510,3 +510,26 @@ err: F_CLR(conn, WT_CONN_EVICTION_NO_LOOKASIDE); __wt_free(session, conn->stable_rollback_bitstring); return (ret); } + +/* + * __wt_txn_rollback_to_stable -- + * Rollback all in-memory state related to timestamps more recent than + * the passed in timestamp. + */ +int +__wt_txn_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[]) +{ + WT_DECL_RET; + + /* + * Don't use the connection's default session: we are working on data + * handles and (a) don't want to cache all of them forever, plus (b) + * can't guarantee that no other method will be called concurrently. + */ + WT_RET(__wt_open_internal_session(S2C(session), + "txn rollback_to_stable", true, 0, &session)); + ret = __txn_rollback_to_stable(session, cfg); + WT_TRET(session->iface.close(&session->iface, NULL)); + + return (ret); +} diff --git a/src/third_party/wiredtiger/test/suite/test_reconfig01.py b/src/third_party/wiredtiger/test/suite/test_reconfig01.py index 8957e1d4e43..84f96150768 100644 --- a/src/third_party/wiredtiger/test/suite/test_reconfig01.py +++ b/src/third_party/wiredtiger/test/suite/test_reconfig01.py @@ -106,6 +106,13 @@ class test_reconfig01(wttest.WiredTigerTestCase): self.conn.reconfigure("statistics=(fast)") self.conn.reconfigure("statistics=(none)") + def test_reconfig_capacity(self): + self.conn.reconfigure("io_capacity=(total=80M)") + self.conn.reconfigure("io_capacity=(total=100M)") + msg = '/below minimum/' + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.conn.reconfigure("io_capacity=(total=16K)"), msg) + def test_reconfig_checkpoints(self): self.conn.reconfigure("checkpoint=(wait=0)") self.conn.reconfigure("checkpoint=(wait=5)") |