diff options
author | Luke Chen <luke.chen@mongodb.com> | 2021-06-03 16:03:32 +1000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-06-03 06:31:18 +0000 |
commit | c67907e95130ed3e66ffdbcd6895d84dddfe139d (patch) | |
tree | 10eac0299933e915caf1bfc88e0ab426181b97b9 | |
parent | 25bf17229bdddea523ae94571cc22ae4939cfd3c (diff) | |
download | mongo-c67907e95130ed3e66ffdbcd6895d84dddfe139d.tar.gz |
Import wiredtiger: ce3ba1932911e41ff7b0055d9a7ab79b5c13c886 from branch mongodb-5.0
ref: ec0b500175..ce3ba19329
for: 5.1.0
WT-7514 Let tiered subsystem open files on behalf of block manager
28 files changed, 764 insertions, 423 deletions
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index 377fa72013a..1731f92911f 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -272,6 +272,10 @@ file_runtime_config = common_runtime_config + [ the file is read-only. All methods that may modify a file are disabled. See @ref readonly for more information''', type='boolean'), + Config('tiered_object', 'false', r''' + this file is a tiered object. When opened on its own, it is marked as + readonly and may be restricted in other ways''', + type='boolean', undoc=True), ] # Per-file configuration @@ -451,7 +455,7 @@ lsm_meta = file_config + lsm_config + [ obsolete chunks in the LSM tree'''), ] -tiered_meta = common_meta + tiered_config + [ +tiered_meta = file_config + tiered_config + [ Config('last', '0', r''' the last allocated object ID'''), Config('tiers', '', r''' diff --git a/src/third_party/wiredtiger/dist/filelist b/src/third_party/wiredtiger/dist/filelist index 5bdbe137fa2..b1fe227cdfc 100644 --- a/src/third_party/wiredtiger/dist/filelist +++ b/src/third_party/wiredtiger/dist/filelist @@ -212,6 +212,7 @@ src/support/timestamp.c src/support/update_vector.c src/tiered/tiered_config.c src/tiered/tiered_handle.c +src/tiered/tiered_opener.c src/tiered/tiered_work.c src/txn/txn.c src/txn/txn_ckpt.c diff --git a/src/third_party/wiredtiger/dist/s_void b/src/third_party/wiredtiger/dist/s_void index ab46f05c593..c679f5d3b0a 100755 --- a/src/third_party/wiredtiger/dist/s_void +++ b/src/third_party/wiredtiger/dist/s_void @@ -58,6 +58,7 @@ func_ok() -e '/int __wt_block_compact_end$/d' \ -e '/int __wt_block_compact_start$/d' \ -e '/int __wt_block_manager_size$/d' \ + -e '/int __wt_block_tiered_load$/d' \ -e '/int __wt_block_write_size$/d' \ -e '/int __wt_buf_catfmt$/d' \ -e '/int __wt_buf_fmt$/d' \ diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index e1bf0ebcc4e..5e5f99a5db0 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -502,6 +502,7 @@ connection_stats = [ # Tiered storage statistics ########################################## StorageStat('flush_tier', 'flush_tier operation calls'), + StorageStat('flush_tier_busy', 'flush_tier busy retries'), ########################################## # Thread Count statistics diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 6ca5ab8d456..433d2fa5e88 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-5.0", - "commit": "ec0b5001752dcd05060ae043a0d5c3d6b3610206" + "commit": "ce3ba1932911e41ff7b0055d9a7ab79b5c13c886" } diff --git a/src/third_party/wiredtiger/src/block/block_addr.c b/src/third_party/wiredtiger/src/block/block_addr.c index f594d30e83e..181ed8ad77f 100644 --- a/src/third_party/wiredtiger/src/block/block_addr.c +++ b/src/third_party/wiredtiger/src/block/block_addr.c @@ -40,6 +40,8 @@ __block_buffer_to_addr(WT_BLOCK *block, const uint8_t **pp, uint32_t *objectidp, *offsetp = 0; *objectidp = *sizep = *checksump = 0; } else { + if (block->has_objects && l == 0 && o != WT_BLOCK_INVALID_OFFSET) + WT_RET_MSG(NULL, EINVAL, "address cookie decoding for Btree with objects has object 0"); *objectidp = (uint32_t)l; *offsetp = (wt_off_t)(o + 1) * block->allocsize; *sizep = (uint32_t)s * block->allocsize; @@ -68,8 +70,11 @@ __wt_block_addr_to_buffer(WT_BLOCK *block, uint8_t **pp, uint32_t objectid, wt_o s = size / block->allocsize; c = checksum; } - if (block->has_objects) + if (block->has_objects) { + if (l == 0 && o != WT_BLOCK_INVALID_OFFSET) + WT_RET_MSG(NULL, EINVAL, "address cookie encoding for Btree with objects has object 0"); WT_RET(__wt_vpack_uint(pp, 0, l)); + } WT_RET(__wt_vpack_uint(pp, 0, o)); WT_RET(__wt_vpack_uint(pp, 0, s)); WT_RET(__wt_vpack_uint(pp, 0, c)); diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c index 8194b8212de..f8c06b84714 100644 --- a/src/third_party/wiredtiger/src/block/block_ckpt.c +++ b/src/third_party/wiredtiger/src/block/block_ckpt.c @@ -746,13 +746,6 @@ live_update: ci->ckpt_discard = ci->discard; WT_ERR(__wt_block_extlist_init(session, &ci->discard, "live", "discard", false)); - /* - * TODO: tiered: for now we are switching files on a checkpoint, we'll want to do it only on - * flush_tier. - */ - if (block->has_objects) - WT_ERR(__wt_block_tiered_newfile(session, block)); - #ifdef HAVE_DIAGNOSTIC /* * The first checkpoint in the system should always have an empty discard list. If we've read diff --git a/src/third_party/wiredtiger/src/block/block_mgr.c b/src/third_party/wiredtiger/src/block/block_mgr.c index 4be319fe79c..6cba094aefe 100644 --- a/src/third_party/wiredtiger/src/block/block_mgr.c +++ b/src/third_party/wiredtiger/src/block/block_mgr.c @@ -289,30 +289,6 @@ __bm_compact_start_readonly(WT_BM *bm, WT_SESSION_IMPL *session) } /* - * __bm_flush_tier -- - * Flush the underlying file to the shared tier. - */ -static int -__bm_flush_tier(WT_BM *bm, WT_SESSION_IMPL *session, uint8_t **flush_cookie, size_t *cookie_size) -{ - return (__wt_block_tiered_flush(session, bm->block, flush_cookie, cookie_size)); -} - -/* - * __bm_flush_tier_readonly -- - * Flush the underlying file to the shared tier; readonly version. - */ -static int -__bm_flush_tier_readonly( - WT_BM *bm, WT_SESSION_IMPL *session, uint8_t **flush_cookie, size_t *cookie_size) -{ - WT_UNUSED(flush_cookie); - WT_UNUSED(cookie_size); - - return (__bm_readonly(bm, session)); -} - -/* * __bm_free -- * Free a block of space to the underlying file. */ @@ -463,6 +439,29 @@ __bm_stat(WT_BM *bm, WT_SESSION_IMPL *session, WT_DSRC_STATS *stats) } /* + * __bm_switch_object -- + * Modify the tiered object. + */ +static int +__bm_switch_object(WT_BM *bm, WT_SESSION_IMPL *session, uint64_t object_id, uint32_t flags) +{ + return (__wt_block_switch_object(session, bm->block, object_id, flags)); +} + +/* + * __bm_switch_object_readonly -- + * Modify the tiered object; readonly version. + */ +static int +__bm_switch_object_readonly(WT_BM *bm, WT_SESSION_IMPL *session, uint64_t object_id, uint32_t flags) +{ + WT_UNUSED(object_id); + WT_UNUSED(flags); + + return (__bm_readonly(bm, session)); +} + +/* * __bm_sync -- * Flush a file to disk. */ @@ -589,7 +588,6 @@ __bm_method_set(WT_BM *bm, bool readonly) bm->compact_skip = __bm_compact_skip; bm->compact_start = __bm_compact_start; bm->corrupt = __wt_bm_corrupt; - bm->flush_tier = __bm_flush_tier; bm->free = __bm_free; bm->is_mapped = __bm_is_mapped; bm->map_discard = __bm_map_discard; @@ -601,6 +599,7 @@ __bm_method_set(WT_BM *bm, bool readonly) bm->salvage_valid = __bm_salvage_valid; bm->size = __wt_block_manager_size; bm->stat = __bm_stat; + bm->switch_object = __bm_switch_object; bm->sync = __bm_sync; bm->verify_addr = __bm_verify_addr; bm->verify_end = __bm_verify_end; @@ -616,12 +615,12 @@ __bm_method_set(WT_BM *bm, bool readonly) bm->compact_page_skip = __bm_compact_page_skip_readonly; bm->compact_skip = __bm_compact_skip_readonly; bm->compact_start = __bm_compact_start_readonly; - bm->flush_tier = __bm_flush_tier_readonly; bm->free = __bm_free_readonly; bm->salvage_end = __bm_salvage_end_readonly; bm->salvage_next = __bm_salvage_next_readonly; bm->salvage_start = __bm_salvage_start_readonly; bm->salvage_valid = __bm_salvage_valid_readonly; + bm->switch_object = __bm_switch_object_readonly; bm->sync = __bm_sync_readonly; bm->write = __bm_write_readonly; bm->write_size = __bm_write_size_readonly; @@ -633,8 +632,9 @@ __bm_method_set(WT_BM *bm, bool readonly) * Open a file. */ int -__wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], - bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp) +__wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, + WT_BLOCK_FILE_OPENER *opener, const char *cfg[], bool forced_salvage, bool readonly, + uint32_t allocsize, WT_BM **bmp) { WT_BM *bm; WT_DECL_RET; @@ -644,8 +644,8 @@ __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, const ch WT_RET(__wt_calloc_one(session, &bm)); __bm_method_set(bm, false); - WT_ERR( - __wt_block_open(session, filename, cfg, forced_salvage, readonly, allocsize, &bm->block)); + WT_ERR(__wt_block_open( + session, filename, opener, cfg, forced_salvage, readonly, allocsize, &bm->block)); *bmp = bm; return (0); diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c index c41de4aaaaa..4483b67ab13 100644 --- a/src/third_party/wiredtiger/src/block/block_open.c +++ b/src/third_party/wiredtiger/src/block/block_open.c @@ -138,8 +138,8 @@ __wt_block_configure_first_fit(WT_BLOCK *block, bool on) * Open a block handle. */ int -__wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], - bool forced_salvage, bool readonly, uint32_t allocsize, WT_BLOCK **blockp) +__wt_block_open(WT_SESSION_IMPL *session, const char *filename, WT_BLOCK_FILE_OPENER *opener, + const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BLOCK **blockp) { WT_BLOCK *block; WT_CONFIG_ITEM cval; @@ -176,12 +176,17 @@ __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[ block->ref = 1; block->name_hash = hash; block->allocsize = allocsize; + block->opener = opener; WT_CONN_BLOCK_INSERT(conn, block, bucket); WT_ERR(__wt_strdup(session, filename, &block->name)); WT_ERR(__wt_config_gets(session, cfg, "block_allocation", &cval)); block->allocfirst = WT_STRING_MATCH("first", cval.str, cval.len); + block->has_objects = (opener != NULL); + if (block->has_objects) + /* FIXME-WT-7588 fix 32 bit vs 64 bit mismatch. */ + block->objectid = (uint32_t)opener->current_object_id(opener); /* Configuration: optional OS buffer cache maximum size. */ WT_ERR(__wt_config_gets(session, cfg, "os_cache_max", &cval)); @@ -211,7 +216,12 @@ __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[ if (!readonly && FLD_ISSET(conn->direct_io, WT_DIRECT_IO_DATA)) LF_SET(WT_FS_OPEN_DIRECTIO); block->file_flags = flags; - WT_ERR(__wt_open(session, filename, WT_FS_OPEN_FILE_TYPE_DATA, block->file_flags, &block->fh)); + if (block->has_objects) + WT_ERR(opener->open(opener, session, WT_TIERED_CURRENT_ID, WT_FS_OPEN_FILE_TYPE_DATA, + block->file_flags, &block->fh)); + else + WT_ERR( + __wt_open(session, filename, WT_FS_OPEN_FILE_TYPE_DATA, block->file_flags, &block->fh)); /* Set the file's size. */ WT_ERR(__wt_filesize(session, block->fh, &block->size)); diff --git a/src/third_party/wiredtiger/src/block/block_read.c b/src/third_party/wiredtiger/src/block/block_read.c index 08c1ce8067f..d5c1d90718a 100644 --- a/src/third_party/wiredtiger/src/block/block_read.c +++ b/src/third_party/wiredtiger/src/block/block_read.c @@ -213,43 +213,12 @@ err: int __wt_block_fh(WT_SESSION_IMPL *session, WT_BLOCK *block, uint32_t objectid, WT_FH **fhp) { - WT_DECL_ITEM(tmp); - WT_DECL_RET; - const char *filename; - - if (!block->has_objects || objectid == block->objectid) { + if (!block->has_objects) *fhp = block->fh; - return (0); - } - - /* TODO: tiered: fh readlock; we may want a reference count on each file handle given out. */ - if (objectid * sizeof(WT_FILE_HANDLE *) < block->ofh_alloc && - (*fhp = block->ofh[objectid]) != NULL) - return (0); - - /* TODO: tiered: fh writelock */ - /* Ensure the array goes far enough. */ - WT_RET(__wt_realloc_def(session, &block->ofh_alloc, objectid + 1, &block->ofh)); - if (objectid >= block->max_objectid) - block->max_objectid = objectid + 1; - if ((*fhp = block->ofh[objectid]) != NULL) - return (0); + else + WT_RET(__wt_block_tiered_fh(session, block, objectid, fhp)); - WT_RET(__wt_scr_alloc(session, 0, &tmp)); - if (objectid == 0) - filename = block->name; - else { - WT_ERR(__wt_buf_fmt(session, tmp, "%s.%08" PRIu32, block->name, objectid)); - filename = tmp->data; - } - WT_ERR(__wt_open(session, filename, WT_FS_OPEN_FILE_TYPE_DATA, - WT_FS_OPEN_READONLY | block->file_flags, &block->ofh[objectid])); - *fhp = block->ofh[objectid]; - WT_ASSERT(session, *fhp != NULL); - -err: - __wt_scr_free(session, &tmp); - return (ret); + return (0); } /* diff --git a/src/third_party/wiredtiger/src/block/block_tiered.c b/src/third_party/wiredtiger/src/block/block_tiered.c index b275ccd95a7..4ed0d49811a 100644 --- a/src/third_party/wiredtiger/src/block/block_tiered.c +++ b/src/third_party/wiredtiger/src/block/block_tiered.c @@ -9,77 +9,100 @@ #include "wt_internal.h" /* - * __wt_block_tiered_flush -- - * Flush this file, start another file. + * __block_switch_writeable -- + * Switch a new writeable object. */ -int -__wt_block_tiered_flush( - WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t **flush_cookie, size_t *cookie_size) +static int +__block_switch_writeable(WT_SESSION_IMPL *session, WT_BLOCK *block, uint64_t object_id) { - /* TODO: tiered: fill in the cookie. */ - (void)flush_cookie; - (void)cookie_size; + WT_DECL_RET; - return (__wt_block_tiered_newfile(session, block)); -} + WT_ERR(__wt_close(session, &block->fh)); -/* - * __wt_block_tiered_load -- - * Set up log-structured processing when loading a new root page. - */ -int -__wt_block_tiered_load(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci) -{ /* - * TODO: tiered: this call currently advances the object id, that's probably not appropriate for - * readonly opens. Perhaps it's also not appropriate for opening at an older checkpoint? + * FIXME-WT-7470: write lock while opening a new write handle. */ - if (block->has_objects) { - block->objectid = ci->root_objectid; + WT_ERR(block->opener->open( + block->opener, session, object_id, WT_FS_OPEN_FILE_TYPE_DATA, block->file_flags, &block->fh)); - /* Advance to the next file for future changes. */ - WT_RET(__wt_block_tiered_newfile(session, block)); - } - return (0); +err: + return (ret); } /* - * __wt_block_tiered_newfile -- - * Switch a log-structured block object to a new file. + * __wt_block_tiered_fh -- + * Open an object from the shared tier. */ int -__wt_block_tiered_newfile(WT_SESSION_IMPL *session, WT_BLOCK *block) +__wt_block_tiered_fh(WT_SESSION_IMPL *session, WT_BLOCK *block, uint32_t object_id, WT_FH **fhp) { WT_DECL_ITEM(tmp); WT_DECL_RET; - const char *filename; - - /* Get the old file name again. */ - WT_ERR(__wt_scr_alloc(session, 0, &tmp)); /* - * TODO: tiered: We will get rid of the log id, and this name generation will be replaced by the - * name generated by __tiered_switch. + * FIXME-WT-7470: take a read lock to get a handle, and a write lock to open a handle or extend + * the array. + * + * If the object id isn't larger than the array of file handles, see if it's already opened. */ - WT_ERR(__wt_close(session, &block->fh)); - - /* Bump to a new file ID. */ - ++block->objectid; - WT_ERR(__wt_buf_fmt(session, tmp, "%s.%08" PRIu32, block->name, block->objectid)); - filename = tmp->data; + if (object_id * sizeof(WT_FILE_HANDLE *) < block->ofh_alloc && + (*fhp = block->ofh[object_id]) != NULL) + return (0); - WT_WITH_BUCKET_STORAGE(session->bucket_storage, session, { - ret = __wt_open(session, filename, WT_FS_OPEN_FILE_TYPE_DATA, - WT_FS_OPEN_CREATE | block->file_flags, &block->fh); - }); - WT_ERR(ret); - WT_ERR(__wt_desc_write(session, block->fh, block->allocsize)); + /* Ensure the array is big enough. */ + WT_RET(__wt_realloc_def(session, &block->ofh_alloc, object_id + 1, &block->ofh)); + if (object_id >= block->max_objectid) + block->max_objectid = object_id + 1; + if ((*fhp = block->ofh[object_id]) != NULL) + return (0); - block->size = block->allocsize; - __wt_block_ckpt_destroy(session, &block->live); - WT_ERR(__wt_block_ckpt_init(session, &block->live, "live")); + WT_RET(__wt_scr_alloc(session, 0, &tmp)); + WT_ERR(block->opener->open(block->opener, session, object_id, WT_FS_OPEN_FILE_TYPE_DATA, + WT_FS_OPEN_READONLY | block->file_flags, &block->ofh[object_id])); + *fhp = block->ofh[object_id]; + WT_ASSERT(session, *fhp != NULL); err: __wt_scr_free(session, &tmp); return (ret); } + +/* + * __wt_block_switch_object -- + * Modify an object. + */ +int +__wt_block_switch_object( + WT_SESSION_IMPL *session, WT_BLOCK *block, uint64_t object_id, uint32_t flags) +{ + WT_UNUSED(flags); + + /* + * FIXME-WT-7596 the flags argument will be used in the future to perform various tasks, + * to efficiently mark objects in transition (that is during a switch): + * - mark this file as the writeable file (what currently happens) + * - disallow writes to this object (reads still allowed, we're about to switch) + * - close this object (about to move it, don't allow reopens yet) + * - allow opens on this object again + */ + return (__block_switch_writeable(session, block, object_id)); +} + +/* + * __wt_block_tiered_load -- + * Set up object file processing when loading a new root page. + */ +int +__wt_block_tiered_load(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci) +{ + WT_UNUSED(session); + + if (block->has_objects) + block->objectid = ci->root_objectid; + + /* + * FIXME-WT-7589: There is probably more work here, perhaps in switching the current file, and + * setting the live checkpoint to the argument checkpoint. + */ + return (0); +} diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c index 5e6444dc202..4e5afd92148 100644 --- a/src/third_party/wiredtiger/src/btree/bt_handle.c +++ b/src/third_party/wiredtiger/src/btree/bt_handle.c @@ -57,6 +57,7 @@ __btree_clear(WT_SESSION_IMPL *session) int __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) { + WT_BLOCK_FILE_OPENER *opener; WT_BM *bm; WT_BTREE *btree; WT_CKPT ckpt; @@ -110,15 +111,17 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) /* Initialize and configure the WT_BTREE structure. */ WT_ERR(__btree_conf(session, &ckpt)); - /* Connect to the underlying block manager. */ - filename = dhandle->name; - if (!WT_PREFIX_SKIP(filename, "file:")) - WT_ERR_MSG(session, EINVAL, "expected a 'file:' URI"); + /* + * Get an opener abstraction that the block manager can use to open any of the files that + * represent a btree. In the case of a tiered Btree, that would allow opening different files + * according to an object id in a reference. For a non-tiered Btree, the opener will know to + * always open a single file (given by the filename). + */ + WT_ERR(__wt_tiered_opener(session, dhandle, &opener, &filename)); - WT_WITH_BUCKET_STORAGE(btree->bstorage, session, - ret = __wt_block_manager_open(session, filename, dhandle->cfg, forced_salvage, - F_ISSET(btree, WT_BTREE_READONLY), btree->allocsize, &btree->bm)); - WT_ERR(ret); + /* Connect to the underlying block manager. */ + WT_ERR(__wt_block_manager_open(session, filename, opener, dhandle->cfg, forced_salvage, + F_ISSET(btree, WT_BTREE_READONLY), btree->allocsize, &btree->bm)); bm = btree->bm; @@ -401,6 +404,12 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) else F_SET(btree, WT_BTREE_NO_LOGGING); + WT_RET(__wt_config_gets(session, cfg, "tiered_object", &cval)); + if (cval.val) + F_SET(btree, WT_BTREE_NO_CHECKPOINT); + else + F_CLR(btree, WT_BTREE_NO_CHECKPOINT); + /* Checksums */ WT_RET(__wt_config_gets(session, cfg, "checksum", &cval)); if (WT_STRING_MATCH("on", cval.str, cval.len)) @@ -1003,3 +1012,26 @@ __wt_btree_immediately_durable(WT_SESSION_IMPL *session) (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))) && !F_ISSET(btree, WT_BTREE_NO_LOGGING)); } + +/* + * __wt_btree_switch_object -- + * Switch to a writeable object for a tiered btree. + */ +int +__wt_btree_switch_object(WT_SESSION_IMPL *session, uint64_t object_id, uint32_t flags) +{ + WT_BM *bm; + WT_DECL_RET; + + bm = S2BT(session)->bm; + + /* + * When initially opening a tiered Btree, a tier switch is done internally without the btree + * being fully opened. That's okay, the btree will be told later about the current object + * number. + */ + if (bm != NULL) + ret = bm->switch_object(bm, session, object_id, flags); + + return (ret); +} diff --git a/src/third_party/wiredtiger/src/btree/bt_import.c b/src/third_party/wiredtiger/src/btree/bt_import.c index 6a650cf0647..cb60e9aa14a 100644 --- a/src/third_party/wiredtiger/src/btree/bt_import.c +++ b/src/third_party/wiredtiger/src/btree/bt_import.c @@ -46,7 +46,7 @@ __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **configp) * size, but 512B allows us to read the descriptor block and that's all we care about. */ F_SET(session, WT_SESSION_IMPORT_REPAIR); - WT_ERR(__wt_block_manager_open(session, filename, cfg, false, true, 512, &bm)); + WT_ERR(__wt_block_manager_open(session, filename, NULL, cfg, false, true, 512, &bm)); ret = bm->checkpoint_last(bm, session, &metadata, &checkpoint_list, checkpoint); WT_TRET(bm->close(bm, session)); F_CLR(session, WT_SESSION_IMPORT_REPAIR); @@ -118,7 +118,7 @@ __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **configp) * size. When we did this earlier, we were able to read the descriptor block properly but the * checkpoint's byte representation was wrong because it was using the wrong allocation size. */ - WT_ERR(__wt_block_manager_open(session, filename, cfg, false, true, allocsize, &bm)); + WT_ERR(__wt_block_manager_open(session, filename, NULL, cfg, false, true, allocsize, &bm)); __wt_free(session, checkpoint_list); __wt_free(session, metadata); ret = bm->checkpoint_last(bm, session, &metadata, &checkpoint_list, checkpoint); diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index 908e02e8688..7678ea6b85c 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -192,6 +192,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_alter[] = { {"log", "category", NULL, NULL, confchk_WT_SESSION_create_log_subconfigs, 1}, {"os_cache_dirty_max", "int", NULL, "min=0", NULL, 0}, {"os_cache_max", "int", NULL, "min=0", NULL, 0}, {"readonly", "boolean", NULL, NULL, NULL, 0}, + {"tiered_object", "boolean", NULL, NULL, NULL, 0}, {"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0}, {"write_timestamp_usage", "string", NULL, "choices=[\"always\",\"key_consistent\",\"mixed_mode\"," @@ -305,6 +306,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_create[] = { {"split_deepen_min_child", "int", NULL, NULL, NULL, 0}, {"split_deepen_per_child", "int", NULL, NULL, NULL, 0}, {"split_pct", "int", NULL, "min=50,max=100", NULL, 0}, + {"tiered_object", "boolean", NULL, NULL, NULL, 0}, {"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs, 6}, {"type", "string", NULL, NULL, NULL, 0}, @@ -455,6 +457,7 @@ static const WT_CONFIG_CHECK confchk_file_config[] = { {"split_deepen_min_child", "int", NULL, NULL, NULL, 0}, {"split_deepen_per_child", "int", NULL, NULL, NULL, 0}, {"split_pct", "int", NULL, "min=50,max=100", NULL, 0}, + {"tiered_object", "boolean", NULL, NULL, NULL, 0}, {"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs, 6}, {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0}, @@ -503,6 +506,7 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = { {"split_deepen_min_child", "int", NULL, NULL, NULL, 0}, {"split_deepen_per_child", "int", NULL, NULL, NULL, 0}, {"split_pct", "int", NULL, "min=50,max=100", NULL, 0}, + {"tiered_object", "boolean", NULL, NULL, NULL, 0}, {"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs, 6}, {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0}, @@ -567,6 +571,7 @@ static const WT_CONFIG_CHECK confchk_lsm_meta[] = { {"split_deepen_min_child", "int", NULL, NULL, NULL, 0}, {"split_deepen_per_child", "int", NULL, NULL, NULL, 0}, {"split_pct", "int", NULL, "min=50,max=100", NULL, 0}, + {"tiered_object", "boolean", NULL, NULL, NULL, 0}, {"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs, 6}, {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0}, @@ -616,6 +621,7 @@ static const WT_CONFIG_CHECK confchk_object_meta[] = { {"split_deepen_min_child", "int", NULL, NULL, NULL, 0}, {"split_deepen_per_child", "int", NULL, NULL, NULL, 0}, {"split_pct", "int", NULL, "min=50,max=100", NULL, 0}, + {"tiered_object", "boolean", NULL, NULL, NULL, 0}, {"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs, 6}, {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0}, @@ -680,6 +686,7 @@ static const WT_CONFIG_CHECK confchk_tier_meta[] = { {"split_deepen_min_child", "int", NULL, NULL, NULL, 0}, {"split_deepen_per_child", "int", NULL, NULL, NULL, 0}, {"split_pct", "int", NULL, "min=50,max=100", NULL, 0}, + {"tiered_object", "boolean", NULL, NULL, NULL, 0}, {"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs, 6}, {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0}, @@ -692,13 +699,45 @@ static const WT_CONFIG_CHECK confchk_tier_meta[] = { {NULL, NULL, NULL, NULL, NULL, 0}}; static const WT_CONFIG_CHECK confchk_tiered_meta[] = { + {"access_pattern_hint", "string", NULL, "choices=[\"none\",\"random\",\"sequential\"]", NULL, 0}, + {"allocation_size", "int", NULL, "min=512B,max=128MB", NULL, 0}, {"app_metadata", "string", NULL, NULL, NULL, 0}, {"assert", "category", NULL, NULL, confchk_assert_subconfigs, 4}, + {"block_allocation", "string", NULL, "choices=[\"best\",\"first\"]", NULL, 0}, + {"block_compressor", "string", NULL, NULL, NULL, 0}, + {"cache_resident", "boolean", NULL, NULL, NULL, 0}, + {"checksum", "string", NULL, "choices=[\"on\",\"off\",\"uncompressed\"]", NULL, 0}, {"collator", "string", NULL, NULL, NULL, 0}, {"columns", "list", NULL, NULL, NULL, 0}, - {"last", "string", NULL, NULL, NULL, 0}, + {"dictionary", "int", NULL, "min=0", NULL, 0}, + {"encryption", "category", NULL, NULL, confchk_WT_SESSION_create_encryption_subconfigs, 2}, + {"format", "string", NULL, "choices=[\"btree\"]", NULL, 0}, + {"huffman_key", "string", NULL, NULL, NULL, 0}, {"huffman_value", "string", NULL, NULL, NULL, 0}, + {"ignore_in_memory_cache_size", "boolean", NULL, NULL, NULL, 0}, + {"internal_item_max", "int", NULL, "min=0", NULL, 0}, + {"internal_key_max", "int", NULL, "min=0", NULL, 0}, + {"internal_key_truncate", "boolean", NULL, NULL, NULL, 0}, + {"internal_page_max", "int", NULL, "min=512B,max=512MB", NULL, 0}, + {"key_format", "format", __wt_struct_confchk, NULL, NULL, 0}, + {"key_gap", "int", NULL, "min=0", NULL, 0}, {"last", "string", NULL, NULL, NULL, 0}, + {"leaf_item_max", "int", NULL, "min=0", NULL, 0}, {"leaf_key_max", "int", NULL, "min=0", NULL, 0}, + {"leaf_page_max", "int", NULL, "min=512B,max=512MB", NULL, 0}, + {"leaf_value_max", "int", NULL, "min=0", NULL, 0}, + {"log", "category", NULL, NULL, confchk_WT_SESSION_create_log_subconfigs, 1}, + {"memory_page_image_max", "int", NULL, "min=0", NULL, 0}, + {"memory_page_max", "int", NULL, "min=512B,max=10TB", NULL, 0}, + {"os_cache_dirty_max", "int", NULL, "min=0", NULL, 0}, + {"os_cache_max", "int", NULL, "min=0", NULL, 0}, + {"prefix_compression", "boolean", NULL, NULL, NULL, 0}, + {"prefix_compression_min", "int", NULL, "min=0", NULL, 0}, + {"readonly", "boolean", NULL, NULL, NULL, 0}, + {"split_deepen_min_child", "int", NULL, NULL, NULL, 0}, + {"split_deepen_per_child", "int", NULL, NULL, NULL, 0}, + {"split_pct", "int", NULL, "min=50,max=100", NULL, 0}, + {"tiered_object", "boolean", NULL, NULL, NULL, 0}, {"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs, 6}, {"tiers", "list", NULL, NULL, NULL, 0}, + {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0}, {"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0}, {"write_timestamp_usage", "string", NULL, "choices=[\"always\",\"key_consistent\",\"mixed_mode\"," @@ -1101,9 +1140,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", "assert=(commit_timestamp=none,durable_timestamp=none," "read_timestamp=none,write_timestamp=off),cache_resident=false," "checkpoint=,exclusive_refreshed=true,log=(enabled=true)," - "os_cache_dirty_max=0,os_cache_max=0,readonly=false,verbose=[]," - "write_timestamp_usage=none", - confchk_WT_SESSION_alter, 12}, + "os_cache_dirty_max=0,os_cache_max=0,readonly=false," + "tiered_object=false,verbose=[],write_timestamp_usage=none", + confchk_WT_SESSION_alter, 13}, {"WT_SESSION.begin_transaction", "ignore_prepare=false,isolation=,name=,operation_timeout_ms=0," "priority=0,read_before_oldest=false,read_timestamp=," @@ -1137,10 +1176,11 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", "memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0," "prefix_compression=false,prefix_compression_min=4,readonly=false" ",source=,split_deepen_min_child=0,split_deepen_per_child=0," - "split_pct=90,tiered_storage=(auth_token=,bucket=,bucket_prefix=," - "local_retention=300,name=,object_target_size=10M),type=file," - "value_format=u,verbose=[],write_timestamp_usage=none", - confchk_WT_SESSION_create, 49}, + "split_pct=90,tiered_object=false,tiered_storage=(auth_token=," + "bucket=,bucket_prefix=,local_retention=300,name=," + "object_target_size=10M),type=file,value_format=u,verbose=[]," + "write_timestamp_usage=none", + confchk_WT_SESSION_create, 50}, {"WT_SESSION.drop", "checkpoint_wait=true,force=false,lock_wait=true," "remove_files=true", @@ -1203,11 +1243,11 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", "log=(enabled=true),memory_page_image_max=0,memory_page_max=5MB," "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," "prefix_compression_min=4,readonly=false,split_deepen_min_child=0" - ",split_deepen_per_child=0,split_pct=90," + ",split_deepen_per_child=0,split_pct=90,tiered_object=false," "tiered_storage=(auth_token=,bucket=,bucket_prefix=," "local_retention=300,name=,object_target_size=10M),value_format=u" ",verbose=[],write_timestamp_usage=none", - confchk_file_config, 41}, + confchk_file_config, 42}, {"file.meta", "access_pattern_hint=none,allocation_size=4KB,app_metadata=," "assert=(commit_timestamp=none,durable_timestamp=none," @@ -1223,11 +1263,11 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", "log=(enabled=true),memory_page_image_max=0,memory_page_max=5MB," "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," "prefix_compression_min=4,readonly=false,split_deepen_min_child=0" - ",split_deepen_per_child=0,split_pct=90," + ",split_deepen_per_child=0,split_pct=90,tiered_object=false," "tiered_storage=(auth_token=,bucket=,bucket_prefix=," "local_retention=300,name=,object_target_size=10M),value_format=u" ",verbose=[],version=(major=0,minor=0),write_timestamp_usage=none", - confchk_file_meta, 46}, + confchk_file_meta, 47}, {"index.meta", "app_metadata=,assert=(commit_timestamp=none," "durable_timestamp=none,read_timestamp=none,write_timestamp=off)," @@ -1254,11 +1294,11 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", "memory_page_max=5MB,old_chunks=,os_cache_dirty_max=0," "os_cache_max=0,prefix_compression=false,prefix_compression_min=4" ",readonly=false,split_deepen_min_child=0," - "split_deepen_per_child=0,split_pct=90," + "split_deepen_per_child=0,split_pct=90,tiered_object=false," "tiered_storage=(auth_token=,bucket=,bucket_prefix=," "local_retention=300,name=,object_target_size=10M),value_format=u" ",verbose=[],write_timestamp_usage=none", - confchk_lsm_meta, 45}, + confchk_lsm_meta, 46}, {"object.meta", "access_pattern_hint=none,allocation_size=4KB,app_metadata=," "assert=(commit_timestamp=none,durable_timestamp=none," @@ -1274,11 +1314,11 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", "log=(enabled=true),memory_page_image_max=0,memory_page_max=5MB," "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," "prefix_compression_min=4,readonly=false,split_deepen_min_child=0" - ",split_deepen_per_child=0,split_pct=90," + ",split_deepen_per_child=0,split_pct=90,tiered_object=false," "tiered_storage=(auth_token=,bucket=,bucket_prefix=," "local_retention=300,name=,object_target_size=10M),value_format=u" ",verbose=[],version=(major=0,minor=0),write_timestamp_usage=none", - confchk_object_meta, 47}, + confchk_object_meta, 48}, {"table.meta", "app_metadata=,assert=(commit_timestamp=none," "durable_timestamp=none,read_timestamp=none,write_timestamp=off)," @@ -1301,17 +1341,30 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator", "memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0," "prefix_compression=false,prefix_compression_min=4,readonly=false" ",split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90," - "tiered_storage=(auth_token=,bucket=,bucket_prefix=," - "local_retention=300,name=,object_target_size=10M),value_format=u" - ",verbose=[],version=(major=0,minor=0),write_timestamp_usage=none", - confchk_tier_meta, 48}, + "tiered_object=false,tiered_storage=(auth_token=,bucket=," + "bucket_prefix=,local_retention=300,name=,object_target_size=10M)" + ",value_format=u,verbose=[],version=(major=0,minor=0)," + "write_timestamp_usage=none", + confchk_tier_meta, 49}, {"tiered.meta", - "app_metadata=,assert=(commit_timestamp=none," - "durable_timestamp=none,read_timestamp=none,write_timestamp=off)," - "collator=,columns=,last=0,tiered_storage=(auth_token=,bucket=," + "access_pattern_hint=none,allocation_size=4KB,app_metadata=," + "assert=(commit_timestamp=none,durable_timestamp=none," + "read_timestamp=none,write_timestamp=off),block_allocation=best," + "block_compressor=,cache_resident=false,checksum=uncompressed," + "collator=,columns=,dictionary=0,encryption=(keyid=,name=)," + "format=btree,huffman_key=,huffman_value=," + "ignore_in_memory_cache_size=false,internal_item_max=0," + "internal_key_max=0,internal_key_truncate=true," + "internal_page_max=4KB,key_format=u,key_gap=10,last=0," + "leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB," + "leaf_value_max=0,log=(enabled=true),memory_page_image_max=0," + "memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0," + "prefix_compression=false,prefix_compression_min=4,readonly=false" + ",split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90," + "tiered_object=false,tiered_storage=(auth_token=,bucket=," "bucket_prefix=,local_retention=300,name=,object_target_size=10M)" - ",tiers=,verbose=[],write_timestamp_usage=none", - confchk_tiered_meta, 9}, + ",tiers=,value_format=u,verbose=[],write_timestamp_usage=none", + confchk_tiered_meta, 44}, {"wiredtiger_open", "buffer_alignment=-1,builtin_extension_config=,cache_cursors=true" ",cache_max_wait_ms=0,cache_overhead=8,cache_size=100MB," diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c index 5e56a841de4..41654780528 100644 --- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c +++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c @@ -70,6 +70,7 @@ __conn_dhandle_config_set(WT_SESSION_IMPL *session) WT_ERR(__wt_calloc_def(session, 3, &dhandle->cfg)); switch (dhandle->type) { case WT_DHANDLE_TYPE_BTREE: + case WT_DHANDLE_TYPE_TIERED: /* * We are stripping out all checkpoint related information from the config string. We save * the rest of the metadata string, that is essentially static and unchanging and then @@ -105,9 +106,6 @@ __conn_dhandle_config_set(WT_SESSION_IMPL *session) case WT_DHANDLE_TYPE_TABLE: WT_ERR(__wt_strdup(session, WT_CONFIG_BASE(session, table_meta), &dhandle->cfg[0])); break; - case WT_DHANDLE_TYPE_TIERED: - WT_ERR(__wt_strdup(session, WT_CONFIG_BASE(session, tiered_meta), &dhandle->cfg[0])); - break; case WT_DHANDLE_TYPE_TIERED_TREE: WT_ERR(__wt_strdup(session, WT_CONFIG_BASE(session, tier_meta), &dhandle->cfg[0])); break; @@ -148,6 +146,7 @@ __conn_dhandle_destroy(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle) ret = __wt_schema_close_table(session, (WT_TABLE *)dhandle); break; case WT_DHANDLE_TYPE_TIERED: + WT_WITH_DHANDLE(session, dhandle, ret = __wt_btree_discard(session)); ret = __wt_tiered_close(session, (WT_TIERED *)dhandle); break; case WT_DHANDLE_TYPE_TIERED_TREE: @@ -401,6 +400,8 @@ __wt_conn_dhandle_close(WT_SESSION_IMPL *session, bool final, bool mark_dead) WT_TRET(__wt_schema_close_table(session, (WT_TABLE *)dhandle)); break; case WT_DHANDLE_TYPE_TIERED: + WT_TRET(__wt_btree_close(session)); + F_CLR(btree, WT_BTREE_SPECIAL_FLAGS); WT_TRET(__wt_tiered_close(session, (WT_TIERED *)dhandle)); break; case WT_DHANDLE_TYPE_TIERED_TREE: @@ -562,6 +563,18 @@ __wt_conn_dhandle_open(WT_SESSION_IMPL *session, const char *cfg[], uint32_t fla WT_ERR(__wt_schema_open_table(session)); break; case WT_DHANDLE_TYPE_TIERED: + /* Set any special flags on the btree handle. */ + F_SET(btree, LF_MASK(WT_BTREE_SPECIAL_FLAGS)); + + /* + * Allocate data-source statistics memory. We don't allocate that memory when allocating the + * data handle because not all data handles need statistics (for example, handles used for + * checkpoint locking). If we are reopening the handle, then it may already have statistics + * memory, check to avoid the leak. + */ + if (dhandle->stat_array == NULL) + WT_ERR(__wt_stat_dsrc_init(session, dhandle)); + WT_ERR(__wt_tiered_open(session, cfg)); break; case WT_DHANDLE_TYPE_TIERED_TREE: diff --git a/src/third_party/wiredtiger/src/conn/conn_tiered.c b/src/third_party/wiredtiger/src/conn/conn_tiered.c index 24a4a5a16f7..295d94783b1 100644 --- a/src/third_party/wiredtiger/src/conn/conn_tiered.c +++ b/src/third_party/wiredtiger/src/conn/conn_tiered.c @@ -134,9 +134,9 @@ __tier_flush_meta( uint64_t now; char *newconfig, *obj_value; const char *cfg[3] = {NULL, NULL, NULL}; - bool tracking; + bool release, tracking; - tracking = false; + release = tracking = false; WT_RET(__wt_scr_alloc(session, 512, &buf)); dhandle = &tiered->iface; @@ -145,6 +145,7 @@ __tier_flush_meta( tracking = true; WT_ERR(__wt_session_get_dhandle(session, dhandle->name, NULL, NULL, WT_DHANDLE_EXCLUSIVE)); + release = true; /* * Once the flush call succeeds we want to first remove the file: entry from the metadata and * then update the object: metadata to indicate the flush is complete. @@ -162,7 +163,8 @@ __tier_flush_meta( err: __wt_free(session, newconfig); - WT_TRET(__wt_session_release_dhandle(session)); + if (release) + WT_TRET(__wt_session_release_dhandle(session)); __wt_scr_free(session, &buf); if (tracking) WT_TRET(__wt_meta_track_off(session, true, ret != 0)); @@ -180,6 +182,7 @@ __wt_tier_do_flush( WT_DECL_RET; WT_FILE_SYSTEM *bucket_fs; WT_STORAGE_SOURCE *storage_source; + uint32_t msec, retry; const char *local_name, *obj_name; storage_source = tiered->bstorage->storage_source; @@ -194,8 +197,21 @@ __wt_tier_do_flush( WT_RET(storage_source->ss_flush( storage_source, &session->iface, bucket_fs, local_name, obj_name, NULL)); - WT_WITH_CHECKPOINT_LOCK(session, - WT_WITH_SCHEMA_LOCK(session, ret = __tier_flush_meta(session, tiered, local_uri, obj_uri))); + /* + * Flushing the metadata grabs the data handle with exclusive access, and the data handle may be + * held by the thread that queues the flush tier work item. As a result, the handle may be busy, + * so retry as needed, up to a few seconds. + */ + for (msec = 10, retry = 0; msec < 3000; msec *= 2, retry++) { + if (retry != 0) + __wt_sleep(0, msec * WT_THOUSAND); + WT_WITH_CHECKPOINT_LOCK(session, + WT_WITH_SCHEMA_LOCK( + session, ret = __tier_flush_meta(session, tiered, local_uri, obj_uri))); + if (ret != EBUSY) + break; + WT_STAT_CONN_INCR(session, flush_tier_busy); + } WT_RET(ret); /* diff --git a/src/third_party/wiredtiger/src/include/block.h b/src/third_party/wiredtiger/src/include/block.h index 2006be2f9f4..5ce6c364ae9 100644 --- a/src/third_party/wiredtiger/src/include/block.h +++ b/src/third_party/wiredtiger/src/include/block.h @@ -185,7 +185,6 @@ struct __wt_bm { int (*compact_skip)(WT_BM *, WT_SESSION_IMPL *, bool *); int (*compact_start)(WT_BM *, WT_SESSION_IMPL *); int (*corrupt)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); - int (*flush_tier)(WT_BM *, WT_SESSION_IMPL *, uint8_t **, size_t *); int (*free)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); bool (*is_mapped)(WT_BM *, WT_SESSION_IMPL *); int (*map_discard)(WT_BM *, WT_SESSION_IMPL *, void *, size_t); @@ -197,6 +196,7 @@ struct __wt_bm { int (*salvage_valid)(WT_BM *, WT_SESSION_IMPL *, uint8_t *, size_t, bool); int (*size)(WT_BM *, WT_SESSION_IMPL *, wt_off_t *); int (*stat)(WT_BM *, WT_SESSION_IMPL *, WT_DSRC_STATS *stats); + int (*switch_object)(WT_BM *, WT_SESSION_IMPL *, uint64_t, uint32_t); int (*sync)(WT_BM *, WT_SESSION_IMPL *, bool); int (*verify_addr)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); int (*verify_end)(WT_BM *, WT_SESSION_IMPL *); @@ -221,8 +221,9 @@ struct __wt_bm { * Block manager handle, references a single file. */ struct __wt_block { - const char *name; /* Name */ - uint64_t name_hash; /* Hash of name */ + const char *name; /* Name */ + uint64_t name_hash; /* Hash of name */ + WT_BLOCK_FILE_OPENER *opener; /* how to open files/objects */ /* A list of block manager handles, sharing a file descriptor. */ uint32_t ref; /* References */ @@ -239,7 +240,6 @@ struct __wt_block { /* Configuration information, set when the file is opened. */ uint32_t allocfirst; /* Allocation is first-fit */ uint32_t allocsize; /* Allocation size */ - bool has_objects; /* Address cookies contain object id */ size_t os_cache; /* System buffer cache flush max */ size_t os_cache_max; size_t os_cache_dirty_max; @@ -247,8 +247,11 @@ struct __wt_block { u_int block_header; /* Header length */ /* Object file tracking. */ - uint32_t file_flags, objectid, max_objectid; - WT_FH **ofh; + bool has_objects; /* Address cookies contain object id */ + uint32_t file_flags; /* Flags for opening objects */ + uint32_t objectid; /* Current writeable object id */ + uint32_t max_objectid; /* Size of object handle array */ + WT_FH **ofh; /* Object file handles */ size_t ofh_alloc; /* @@ -316,6 +319,20 @@ struct __wt_block_desc { #define WT_BLOCK_DESC_SIZE 16 /* + * WT_BLOCK_FILE_OPENER -- + * An open callback for the block manager. This hides details about how to access the + * different objects that make up a tiered file. + */ +struct __wt_block_file_opener { + /* An id to be used with the open call to reference the current object. */ +#define WT_TIERED_CURRENT_ID 0xFFFFFFFFFFFFFFFFULL + int (*open)( + WT_BLOCK_FILE_OPENER *, WT_SESSION_IMPL *, uint64_t, WT_FS_OPEN_FILE_TYPE, u_int, WT_FH **); + uint64_t (*current_object_id)(WT_BLOCK_FILE_OPENER *); + void *cookie; /* Used in open call */ +}; + +/* * __wt_block_desc_byteswap -- * Handle big- and little-endian transformation of a description block. */ diff --git a/src/third_party/wiredtiger/src/include/dhandle.h b/src/third_party/wiredtiger/src/include/dhandle.h index 01cfeffce6e..c17970f4760 100644 --- a/src/third_party/wiredtiger/src/include/dhandle.h +++ b/src/third_party/wiredtiger/src/include/dhandle.h @@ -94,8 +94,8 @@ struct __wt_data_handle { WT_DHANDLE_TYPE_TIERED_TREE } type; - /* This will include the tiered type soon. */ -#define WT_DHANDLE_BTREE(dhandle) ((dhandle)->type == WT_DHANDLE_TYPE_BTREE) +#define WT_DHANDLE_BTREE(dhandle) \ + ((dhandle)->type == WT_DHANDLE_TYPE_BTREE || (dhandle)->type == WT_DHANDLE_TYPE_TIERED) bool compact_skip; /* If the handle failed to compact */ diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index e89dd2c943e..92cdcdbf733 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -169,8 +169,8 @@ extern int __wt_block_manager_drop(WT_SESSION_IMPL *session, const char *filenam extern int __wt_block_manager_named_size(WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, - const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp) - WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); + WT_BLOCK_FILE_OPENER *opener, const char *cfg[], bool forced_salvage, bool readonly, + uint32_t allocsize, WT_BM **bmp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_regionp, @@ -182,9 +182,9 @@ extern int __wt_block_off_free(WT_SESSION_IMPL *session, WT_BLOCK *block, uint32 wt_off_t offset, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_off_remove_overlap(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t off, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], - bool forced_salvage, bool readonly, uint32_t allocsize, WT_BLOCK **blockp) - WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, + WT_BLOCK_FILE_OPENER *opener, const char *cfg[], bool forced_salvage, bool readonly, + uint32_t allocsize, WT_BLOCK **blockp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint32_t objectid, wt_off_t offset, uint32_t size, uint32_t checksum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -200,12 +200,12 @@ extern int __wt_block_salvage_valid(WT_SESSION_IMPL *session, WT_BLOCK *block, u size_t addr_size, bool valid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_size_alloc(WT_SESSION_IMPL *session, WT_SIZE **szp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_tiered_flush(WT_SESSION_IMPL *session, WT_BLOCK *block, - uint8_t **flush_cookie, size_t *cookie_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_switch_object(WT_SESSION_IMPL *session, WT_BLOCK *block, uint64_t object_id, + uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_tiered_fh(WT_SESSION_IMPL *session, WT_BLOCK *block, uint32_t object_id, + WT_FH **fhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_tiered_load(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_tiered_newfile(WT_SESSION_IMPL *session, WT_BLOCK *block) - WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region, @@ -304,6 +304,8 @@ extern int __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btree_switch_object(WT_SESSION_IMPL *session, uint64_t object_id, uint32_t flags) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_btree_tree_open(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) @@ -1466,6 +1468,9 @@ extern int __wt_tiered_name(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle, u uint32_t flags, const char **retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_tiered_open(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_tiered_opener(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle, + WT_BLOCK_FILE_OPENER **openerp, const char **filenamep) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_tiered_put_drop_local(WT_SESSION_IMPL *session, WT_TIERED *tiered, uint64_t id) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_tiered_put_drop_shared(WT_SESSION_IMPL *session, WT_TIERED *tiered, uint64_t id) diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index 3f4bd5150e3..72b67b9df44 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -586,6 +586,7 @@ struct __wt_connection_stats { int64_t rec_time_window_prepared; int64_t rec_split_stashed_bytes; int64_t rec_split_stashed_objects; + int64_t flush_tier_busy; int64_t flush_tier; int64_t session_open; int64_t session_query_ts; diff --git a/src/third_party/wiredtiger/src/include/tiered.h b/src/third_party/wiredtiger/src/include/tiered.h index 8fa2fec35a7..d4b973da98c 100644 --- a/src/third_party/wiredtiger/src/include/tiered.h +++ b/src/third_party/wiredtiger/src/include/tiered.h @@ -111,6 +111,8 @@ struct __wt_tiered { WT_TIERED_TIERS tiers[WT_TIERED_MAX_TIERS]; /* Tiers array */ + WT_BLOCK_FILE_OPENER opener; + uint64_t current_id; /* Current object id number */ uint64_t next_id; /* Next object number */ diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index d0a4c0ad38e..f1ac840bfca 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -4821,11 +4821,11 @@ struct __wt_storage_source { */ int (*ss_flush)(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session, WT_FILE_SYSTEM *file_system, const char *source, const char *object, - const char *config); + const char *config); /*! * After a flush, rename the source file from the default file system to be cached in - * the shared object storage. + * the shared object storage. * * @errors * @@ -5565,541 +5565,543 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1271 /*! reconciliation: split objects currently awaiting free */ #define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1272 +/*! session: flush_tier busy retries */ +#define WT_STAT_CONN_FLUSH_TIER_BUSY 1273 /*! session: flush_tier operation calls */ -#define WT_STAT_CONN_FLUSH_TIER 1273 +#define WT_STAT_CONN_FLUSH_TIER 1274 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1274 +#define WT_STAT_CONN_SESSION_OPEN 1275 /*! session: session query timestamp calls */ -#define WT_STAT_CONN_SESSION_QUERY_TS 1275 +#define WT_STAT_CONN_SESSION_QUERY_TS 1276 /*! session: table alter failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1276 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1277 /*! session: table alter successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1277 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1278 /*! session: table alter unchanged and skipped */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1278 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1279 /*! session: table compact failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1279 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1280 /*! session: table compact successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1280 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1281 /*! session: table create failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1281 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1282 /*! session: table create successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1282 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1283 /*! session: table drop failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1283 +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1284 /*! session: table drop successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1284 +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1285 /*! session: table rename failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1285 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1286 /*! session: table rename successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1286 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1287 /*! session: table salvage failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1287 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1288 /*! session: table salvage successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1288 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1289 /*! session: table truncate failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1289 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1290 /*! session: table truncate successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1290 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1291 /*! session: table verify failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1291 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1292 /*! session: table verify successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1292 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1293 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1293 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1294 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_THREAD_READ_ACTIVE 1294 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1295 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1295 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1296 /*! thread-yield: application thread time evicting (usecs) */ -#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1296 +#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1297 /*! thread-yield: application thread time waiting for cache (usecs) */ -#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1297 +#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1298 /*! * thread-yield: connection close blocked waiting for transaction state * stabilization */ -#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1298 +#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1299 /*! thread-yield: connection close yielded for lsm manager shutdown */ -#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1299 +#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1300 /*! thread-yield: data handle lock yielded */ -#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1300 +#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1301 /*! * thread-yield: get reference for page index and slot time sleeping * (usecs) */ -#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1301 +#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1302 /*! thread-yield: log server sync yielded for log write */ -#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1302 +#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1303 /*! thread-yield: page access yielded due to prepare state change */ -#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1303 +#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1304 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1304 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1305 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1305 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1306 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1306 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1307 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1307 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1308 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1308 +#define WT_STAT_CONN_PAGE_SLEEP 1309 /*! * thread-yield: page delete rollback time sleeping for state change * (usecs) */ -#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1309 +#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1310 /*! thread-yield: page reconciliation yielded due to child modification */ -#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1310 +#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1311 /*! transaction: Number of prepared updates */ -#define WT_STAT_CONN_TXN_PREPARED_UPDATES 1311 +#define WT_STAT_CONN_TXN_PREPARED_UPDATES 1312 /*! transaction: Number of prepared updates committed */ -#define WT_STAT_CONN_TXN_PREPARED_UPDATES_COMMITTED 1312 +#define WT_STAT_CONN_TXN_PREPARED_UPDATES_COMMITTED 1313 /*! transaction: Number of prepared updates repeated on the same key */ -#define WT_STAT_CONN_TXN_PREPARED_UPDATES_KEY_REPEATED 1313 +#define WT_STAT_CONN_TXN_PREPARED_UPDATES_KEY_REPEATED 1314 /*! transaction: Number of prepared updates rolled back */ -#define WT_STAT_CONN_TXN_PREPARED_UPDATES_ROLLEDBACK 1314 +#define WT_STAT_CONN_TXN_PREPARED_UPDATES_ROLLEDBACK 1315 /*! transaction: prepared transactions */ -#define WT_STAT_CONN_TXN_PREPARE 1315 +#define WT_STAT_CONN_TXN_PREPARE 1316 /*! transaction: prepared transactions committed */ -#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1316 +#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1317 /*! transaction: prepared transactions currently active */ -#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1317 +#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1318 /*! transaction: prepared transactions rolled back */ -#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1318 +#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1319 /*! transaction: query timestamp calls */ -#define WT_STAT_CONN_TXN_QUERY_TS 1319 +#define WT_STAT_CONN_TXN_QUERY_TS 1320 /*! transaction: rollback to stable calls */ -#define WT_STAT_CONN_TXN_RTS 1320 +#define WT_STAT_CONN_TXN_RTS 1321 /*! transaction: rollback to stable pages visited */ -#define WT_STAT_CONN_TXN_RTS_PAGES_VISITED 1321 +#define WT_STAT_CONN_TXN_RTS_PAGES_VISITED 1322 /*! transaction: rollback to stable tree walk skipping pages */ -#define WT_STAT_CONN_TXN_RTS_TREE_WALK_SKIP_PAGES 1322 +#define WT_STAT_CONN_TXN_RTS_TREE_WALK_SKIP_PAGES 1323 /*! transaction: rollback to stable updates aborted */ -#define WT_STAT_CONN_TXN_RTS_UPD_ABORTED 1323 +#define WT_STAT_CONN_TXN_RTS_UPD_ABORTED 1324 /*! transaction: sessions scanned in each walk of concurrent sessions */ -#define WT_STAT_CONN_TXN_SESSIONS_WALKED 1324 +#define WT_STAT_CONN_TXN_SESSIONS_WALKED 1325 /*! transaction: set timestamp calls */ -#define WT_STAT_CONN_TXN_SET_TS 1325 +#define WT_STAT_CONN_TXN_SET_TS 1326 /*! transaction: set timestamp durable calls */ -#define WT_STAT_CONN_TXN_SET_TS_DURABLE 1326 +#define WT_STAT_CONN_TXN_SET_TS_DURABLE 1327 /*! transaction: set timestamp durable updates */ -#define WT_STAT_CONN_TXN_SET_TS_DURABLE_UPD 1327 +#define WT_STAT_CONN_TXN_SET_TS_DURABLE_UPD 1328 /*! transaction: set timestamp oldest calls */ -#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1328 +#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1329 /*! transaction: set timestamp oldest updates */ -#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1329 +#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1330 /*! transaction: set timestamp stable calls */ -#define WT_STAT_CONN_TXN_SET_TS_STABLE 1330 +#define WT_STAT_CONN_TXN_SET_TS_STABLE 1331 /*! transaction: set timestamp stable updates */ -#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1331 +#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1332 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1332 +#define WT_STAT_CONN_TXN_BEGIN 1333 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1333 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1334 /*! * transaction: transaction checkpoint currently running for history * store file */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING_HS 1334 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING_HS 1335 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1335 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1336 /*! * transaction: transaction checkpoint history store file duration * (usecs) */ -#define WT_STAT_CONN_TXN_HS_CKPT_DURATION 1336 +#define WT_STAT_CONN_TXN_HS_CKPT_DURATION 1337 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1337 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1338 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1338 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1339 /*! * transaction: transaction checkpoint most recent duration for gathering * all handles (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION 1339 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION 1340 /*! * transaction: transaction checkpoint most recent duration for gathering * applied handles (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_APPLY 1340 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_APPLY 1341 /*! * transaction: transaction checkpoint most recent duration for gathering * skipped handles (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_SKIP 1341 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_SKIP 1342 /*! transaction: transaction checkpoint most recent handles applied */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_APPLIED 1342 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_APPLIED 1343 /*! transaction: transaction checkpoint most recent handles skipped */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_SKIPPED 1343 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_SKIPPED 1344 /*! transaction: transaction checkpoint most recent handles walked */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_WALKED 1344 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_WALKED 1345 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1345 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1346 /*! transaction: transaction checkpoint prepare currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING 1346 +#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING 1347 /*! transaction: transaction checkpoint prepare max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX 1347 +#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX 1348 /*! transaction: transaction checkpoint prepare min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN 1348 +#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN 1349 /*! transaction: transaction checkpoint prepare most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT 1349 +#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT 1350 /*! transaction: transaction checkpoint prepare total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL 1350 +#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL 1351 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1351 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1352 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1352 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1353 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1353 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1354 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1354 +#define WT_STAT_CONN_TXN_CHECKPOINT 1355 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1355 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1356 /*! transaction: transaction failures due to history store */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1356 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1357 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1357 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1358 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1358 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1359 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1359 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1360 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1360 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1361 /*! transaction: transaction range of timestamps currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1361 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1362 /*! transaction: transaction range of timestamps pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1362 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1363 /*! * transaction: transaction range of timestamps pinned by the oldest * active read timestamp */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1363 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1364 /*! * transaction: transaction range of timestamps pinned by the oldest * timestamp */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1364 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1365 /*! transaction: transaction read timestamp of the oldest active reader */ -#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1365 +#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1366 /*! transaction: transaction rollback to stable currently running */ -#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE_RUNNING 1366 +#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE_RUNNING 1367 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1367 +#define WT_STAT_CONN_TXN_SYNC 1368 /*! transaction: transaction walk of concurrent sessions */ -#define WT_STAT_CONN_TXN_WALK_SESSIONS 1368 +#define WT_STAT_CONN_TXN_WALK_SESSIONS 1369 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1369 +#define WT_STAT_CONN_TXN_COMMIT 1370 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1370 +#define WT_STAT_CONN_TXN_ROLLBACK 1371 /*! LSM: sleep for LSM checkpoint throttle */ -#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1371 +#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1372 /*! LSM: sleep for LSM merge throttle */ -#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1372 +#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1373 /*! cache: bytes currently in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_INUSE 1373 +#define WT_STAT_CONN_CACHE_BYTES_INUSE 1374 /*! cache: bytes dirty in the cache cumulative */ -#define WT_STAT_CONN_CACHE_BYTES_DIRTY_TOTAL 1374 +#define WT_STAT_CONN_CACHE_BYTES_DIRTY_TOTAL 1375 /*! cache: bytes read into cache */ -#define WT_STAT_CONN_CACHE_BYTES_READ 1375 +#define WT_STAT_CONN_CACHE_BYTES_READ 1376 /*! cache: bytes written from cache */ -#define WT_STAT_CONN_CACHE_BYTES_WRITE 1376 +#define WT_STAT_CONN_CACHE_BYTES_WRITE 1377 /*! cache: checkpoint blocked page eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1377 +#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1378 /*! * cache: checkpoint of history store file blocked non-history store page * eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_BLOCKED_CHECKPOINT_HS 1378 +#define WT_STAT_CONN_CACHE_EVICTION_BLOCKED_CHECKPOINT_HS 1379 /*! cache: eviction walk target pages histogram - 0-9 */ -#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT10 1379 +#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT10 1380 /*! cache: eviction walk target pages histogram - 10-31 */ -#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT32 1380 +#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT32 1381 /*! cache: eviction walk target pages histogram - 128 and higher */ -#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_GE128 1381 +#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_GE128 1382 /*! cache: eviction walk target pages histogram - 32-63 */ -#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT64 1382 +#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT64 1383 /*! cache: eviction walk target pages histogram - 64-128 */ -#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT128 1383 +#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT128 1384 /*! * cache: eviction walk target pages reduced due to history store cache * pressure */ -#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_REDUCED 1384 +#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_REDUCED 1385 /*! cache: eviction walks abandoned */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ABANDONED 1385 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ABANDONED 1386 /*! cache: eviction walks gave up because they restarted their walk twice */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STOPPED 1386 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STOPPED 1387 /*! * cache: eviction walks gave up because they saw too many pages and * found no candidates */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_NO_TARGETS 1387 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_NO_TARGETS 1388 /*! * cache: eviction walks gave up because they saw too many pages and * found too few candidates */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_RATIO 1388 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_RATIO 1389 /*! cache: eviction walks reached end of tree */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ENDED 1389 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ENDED 1390 /*! cache: eviction walks restarted */ -#define WT_STAT_CONN_CACHE_EVICTION_WALK_RESTART 1390 +#define WT_STAT_CONN_CACHE_EVICTION_WALK_RESTART 1391 /*! cache: eviction walks started from root of tree */ -#define WT_STAT_CONN_CACHE_EVICTION_WALK_FROM_ROOT 1391 +#define WT_STAT_CONN_CACHE_EVICTION_WALK_FROM_ROOT 1392 /*! cache: eviction walks started from saved location in tree */ -#define WT_STAT_CONN_CACHE_EVICTION_WALK_SAVED_POS 1392 +#define WT_STAT_CONN_CACHE_EVICTION_WALK_SAVED_POS 1393 /*! cache: hazard pointer blocked page eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1393 +#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1394 /*! cache: history store table insert calls */ -#define WT_STAT_CONN_CACHE_HS_INSERT 1394 +#define WT_STAT_CONN_CACHE_HS_INSERT 1395 /*! cache: history store table insert calls that returned restart */ -#define WT_STAT_CONN_CACHE_HS_INSERT_RESTART 1395 +#define WT_STAT_CONN_CACHE_HS_INSERT_RESTART 1396 /*! * cache: history store table out-of-order resolved updates that lose * their durable timestamp */ -#define WT_STAT_CONN_CACHE_HS_ORDER_LOSE_DURABLE_TIMESTAMP 1396 +#define WT_STAT_CONN_CACHE_HS_ORDER_LOSE_DURABLE_TIMESTAMP 1397 /*! * cache: history store table out-of-order updates that were fixed up by * reinserting with the fixed timestamp */ -#define WT_STAT_CONN_CACHE_HS_ORDER_REINSERT 1397 +#define WT_STAT_CONN_CACHE_HS_ORDER_REINSERT 1398 /*! cache: history store table reads */ -#define WT_STAT_CONN_CACHE_HS_READ 1398 +#define WT_STAT_CONN_CACHE_HS_READ 1399 /*! cache: history store table reads missed */ -#define WT_STAT_CONN_CACHE_HS_READ_MISS 1399 +#define WT_STAT_CONN_CACHE_HS_READ_MISS 1400 /*! cache: history store table reads requiring squashed modifies */ -#define WT_STAT_CONN_CACHE_HS_READ_SQUASH 1400 +#define WT_STAT_CONN_CACHE_HS_READ_SQUASH 1401 /*! * cache: history store table truncation by rollback to stable to remove * an unstable update */ -#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_RTS_UNSTABLE 1401 +#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_RTS_UNSTABLE 1402 /*! * cache: history store table truncation by rollback to stable to remove * an update */ -#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_RTS 1402 +#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_RTS 1403 /*! cache: history store table truncation to remove an update */ -#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE 1403 +#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE 1404 /*! * cache: history store table truncation to remove range of updates due * to key being removed from the data page during reconciliation */ -#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_ONPAGE_REMOVAL 1404 +#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_ONPAGE_REMOVAL 1405 /*! * cache: history store table truncation to remove range of updates due * to out-of-order timestamp update on data page */ -#define WT_STAT_CONN_CACHE_HS_ORDER_REMOVE 1405 +#define WT_STAT_CONN_CACHE_HS_ORDER_REMOVE 1406 /*! cache: history store table writes requiring squashed modifies */ -#define WT_STAT_CONN_CACHE_HS_WRITE_SQUASH 1406 +#define WT_STAT_CONN_CACHE_HS_WRITE_SQUASH 1407 /*! cache: in-memory page passed criteria to be split */ -#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1407 +#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1408 /*! cache: in-memory page splits */ -#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1408 +#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1409 /*! cache: internal pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1409 +#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1410 /*! cache: internal pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1410 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1411 /*! cache: leaf pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1411 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1412 /*! cache: modified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1412 +#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1413 /*! cache: overflow pages read into cache */ -#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1413 +#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1414 /*! cache: page split during eviction deepened the tree */ -#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1414 +#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1415 /*! cache: page written requiring history store records */ -#define WT_STAT_CONN_CACHE_WRITE_HS 1415 +#define WT_STAT_CONN_CACHE_WRITE_HS 1416 /*! cache: pages read into cache */ -#define WT_STAT_CONN_CACHE_READ 1416 +#define WT_STAT_CONN_CACHE_READ 1417 /*! cache: pages read into cache after truncate */ -#define WT_STAT_CONN_CACHE_READ_DELETED 1417 +#define WT_STAT_CONN_CACHE_READ_DELETED 1418 /*! cache: pages read into cache after truncate in prepare state */ -#define WT_STAT_CONN_CACHE_READ_DELETED_PREPARED 1418 +#define WT_STAT_CONN_CACHE_READ_DELETED_PREPARED 1419 /*! cache: pages requested from the cache */ -#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1419 +#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1420 /*! cache: pages seen by eviction walk */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1420 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1421 /*! cache: pages written from cache */ -#define WT_STAT_CONN_CACHE_WRITE 1421 +#define WT_STAT_CONN_CACHE_WRITE 1422 /*! cache: pages written requiring in-memory restoration */ -#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1422 +#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1423 /*! cache: tracked dirty bytes in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1423 +#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1424 /*! cache: unmodified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1424 +#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1425 /*! checkpoint-cleanup: pages added for eviction */ -#define WT_STAT_CONN_CC_PAGES_EVICT 1425 +#define WT_STAT_CONN_CC_PAGES_EVICT 1426 /*! checkpoint-cleanup: pages removed */ -#define WT_STAT_CONN_CC_PAGES_REMOVED 1426 +#define WT_STAT_CONN_CC_PAGES_REMOVED 1427 /*! checkpoint-cleanup: pages skipped during tree walk */ -#define WT_STAT_CONN_CC_PAGES_WALK_SKIPPED 1427 +#define WT_STAT_CONN_CC_PAGES_WALK_SKIPPED 1428 /*! checkpoint-cleanup: pages visited */ -#define WT_STAT_CONN_CC_PAGES_VISITED 1428 +#define WT_STAT_CONN_CC_PAGES_VISITED 1429 /*! cursor: Total number of entries skipped by cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT_SKIP_TOTAL 1429 +#define WT_STAT_CONN_CURSOR_NEXT_SKIP_TOTAL 1430 /*! cursor: Total number of entries skipped by cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV_SKIP_TOTAL 1430 +#define WT_STAT_CONN_CURSOR_PREV_SKIP_TOTAL 1431 /*! * cursor: Total number of entries skipped to position the history store * cursor */ -#define WT_STAT_CONN_CURSOR_SKIP_HS_CUR_POSITION 1431 +#define WT_STAT_CONN_CURSOR_SKIP_HS_CUR_POSITION 1432 /*! * cursor: Total number of times a search near has exited due to prefix * config */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR_PREFIX_FAST_PATHS 1432 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR_PREFIX_FAST_PATHS 1433 /*! * cursor: cursor next calls that skip due to a globally visible history * store tombstone */ -#define WT_STAT_CONN_CURSOR_NEXT_HS_TOMBSTONE 1433 +#define WT_STAT_CONN_CURSOR_NEXT_HS_TOMBSTONE 1434 /*! * cursor: cursor next calls that skip greater than or equal to 100 * entries */ -#define WT_STAT_CONN_CURSOR_NEXT_SKIP_GE_100 1434 +#define WT_STAT_CONN_CURSOR_NEXT_SKIP_GE_100 1435 /*! cursor: cursor next calls that skip less than 100 entries */ -#define WT_STAT_CONN_CURSOR_NEXT_SKIP_LT_100 1435 +#define WT_STAT_CONN_CURSOR_NEXT_SKIP_LT_100 1436 /*! * cursor: cursor prev calls that skip due to a globally visible history * store tombstone */ -#define WT_STAT_CONN_CURSOR_PREV_HS_TOMBSTONE 1436 +#define WT_STAT_CONN_CURSOR_PREV_HS_TOMBSTONE 1437 /*! * cursor: cursor prev calls that skip greater than or equal to 100 * entries */ -#define WT_STAT_CONN_CURSOR_PREV_SKIP_GE_100 1437 +#define WT_STAT_CONN_CURSOR_PREV_SKIP_GE_100 1438 /*! cursor: cursor prev calls that skip less than 100 entries */ -#define WT_STAT_CONN_CURSOR_PREV_SKIP_LT_100 1438 +#define WT_STAT_CONN_CURSOR_PREV_SKIP_LT_100 1439 /*! cursor: open cursor count */ -#define WT_STAT_CONN_CURSOR_OPEN_COUNT 1439 +#define WT_STAT_CONN_CURSOR_OPEN_COUNT 1440 /*! reconciliation: approximate byte size of timestamps in pages written */ -#define WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TS 1440 +#define WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TS 1441 /*! * reconciliation: approximate byte size of transaction IDs in pages * written */ -#define WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TXN 1441 +#define WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TXN 1442 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1442 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1443 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1443 +#define WT_STAT_CONN_REC_PAGES 1444 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1444 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1445 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1445 +#define WT_STAT_CONN_REC_PAGE_DELETE 1446 /*! * reconciliation: pages written including an aggregated newest start * durable timestamp */ -#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_START_DURABLE_TS 1446 +#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_START_DURABLE_TS 1447 /*! * reconciliation: pages written including an aggregated newest stop * durable timestamp */ -#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_DURABLE_TS 1447 +#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_DURABLE_TS 1448 /*! * reconciliation: pages written including an aggregated newest stop * timestamp */ -#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TS 1448 +#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TS 1449 /*! * reconciliation: pages written including an aggregated newest stop * transaction ID */ -#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TXN 1449 +#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TXN 1450 /*! * reconciliation: pages written including an aggregated newest * transaction ID */ -#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_TXN 1450 +#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_TXN 1451 /*! * reconciliation: pages written including an aggregated oldest start * timestamp */ -#define WT_STAT_CONN_REC_TIME_AGGR_OLDEST_START_TS 1451 +#define WT_STAT_CONN_REC_TIME_AGGR_OLDEST_START_TS 1452 /*! reconciliation: pages written including an aggregated prepare */ -#define WT_STAT_CONN_REC_TIME_AGGR_PREPARED 1452 +#define WT_STAT_CONN_REC_TIME_AGGR_PREPARED 1453 /*! * reconciliation: pages written including at least one start durable * timestamp */ -#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_START_TS 1453 +#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_START_TS 1454 /*! * reconciliation: pages written including at least one start transaction * ID */ -#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TXN 1454 +#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TXN 1455 /*! * reconciliation: pages written including at least one stop durable * timestamp */ -#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_STOP_TS 1455 +#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_STOP_TS 1456 /*! reconciliation: pages written including at least one stop timestamp */ -#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TS 1456 +#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TS 1457 /*! * reconciliation: pages written including at least one stop transaction * ID */ -#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TXN 1457 +#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TXN 1458 /*! reconciliation: records written including a start durable timestamp */ -#define WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_START_TS 1458 +#define WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_START_TS 1459 /*! reconciliation: records written including a start timestamp */ -#define WT_STAT_CONN_REC_TIME_WINDOW_START_TS 1459 +#define WT_STAT_CONN_REC_TIME_WINDOW_START_TS 1460 /*! reconciliation: records written including a start transaction ID */ -#define WT_STAT_CONN_REC_TIME_WINDOW_START_TXN 1460 +#define WT_STAT_CONN_REC_TIME_WINDOW_START_TXN 1461 /*! reconciliation: records written including a stop durable timestamp */ -#define WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_STOP_TS 1461 +#define WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_STOP_TS 1462 /*! reconciliation: records written including a stop timestamp */ -#define WT_STAT_CONN_REC_TIME_WINDOW_STOP_TS 1462 +#define WT_STAT_CONN_REC_TIME_WINDOW_STOP_TS 1463 /*! reconciliation: records written including a stop transaction ID */ -#define WT_STAT_CONN_REC_TIME_WINDOW_STOP_TXN 1463 +#define WT_STAT_CONN_REC_TIME_WINDOW_STOP_TXN 1464 /*! session: tiered operations dequeued and processed */ -#define WT_STAT_CONN_TIERED_WORK_UNITS_DEQUEUED 1464 +#define WT_STAT_CONN_TIERED_WORK_UNITS_DEQUEUED 1465 /*! session: tiered operations scheduled */ -#define WT_STAT_CONN_TIERED_WORK_UNITS_CREATED 1465 +#define WT_STAT_CONN_TIERED_WORK_UNITS_CREATED 1466 /*! session: tiered storage local retention time (secs) */ -#define WT_STAT_CONN_TIERED_RETENTION 1466 +#define WT_STAT_CONN_TIERED_RETENTION 1467 /*! session: tiered storage object size */ -#define WT_STAT_CONN_TIERED_OBJECT_SIZE 1467 +#define WT_STAT_CONN_TIERED_OBJECT_SIZE 1468 /*! transaction: race to read prepared update retry */ -#define WT_STAT_CONN_TXN_READ_RACE_PREPARE_UPDATE 1468 +#define WT_STAT_CONN_TXN_READ_RACE_PREPARE_UPDATE 1469 /*! * transaction: rollback to stable history store records with stop * timestamps older than newer records */ -#define WT_STAT_CONN_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 1469 +#define WT_STAT_CONN_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 1470 /*! transaction: rollback to stable inconsistent checkpoint */ -#define WT_STAT_CONN_TXN_RTS_INCONSISTENT_CKPT 1470 +#define WT_STAT_CONN_TXN_RTS_INCONSISTENT_CKPT 1471 /*! transaction: rollback to stable keys removed */ -#define WT_STAT_CONN_TXN_RTS_KEYS_REMOVED 1471 +#define WT_STAT_CONN_TXN_RTS_KEYS_REMOVED 1472 /*! transaction: rollback to stable keys restored */ -#define WT_STAT_CONN_TXN_RTS_KEYS_RESTORED 1472 +#define WT_STAT_CONN_TXN_RTS_KEYS_RESTORED 1473 /*! transaction: rollback to stable restored tombstones from history store */ -#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES 1473 +#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES 1474 /*! transaction: rollback to stable restored updates from history store */ -#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_UPDATES 1474 +#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_UPDATES 1475 /*! transaction: rollback to stable sweeping history store keys */ -#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1475 +#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1476 /*! transaction: rollback to stable updates removed from history store */ -#define WT_STAT_CONN_TXN_RTS_HS_REMOVED 1476 +#define WT_STAT_CONN_TXN_RTS_HS_REMOVED 1477 /*! transaction: transaction checkpoints due to obsolete pages */ -#define WT_STAT_CONN_TXN_CHECKPOINT_OBSOLETE_APPLIED 1477 +#define WT_STAT_CONN_TXN_CHECKPOINT_OBSOLETE_APPLIED 1478 /*! transaction: update conflicts */ -#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1478 +#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1479 /*! * @} diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h index 4f896a73525..4487554a7f3 100644 --- a/src/third_party/wiredtiger/src/include/wt_internal.h +++ b/src/third_party/wiredtiger/src/include/wt_internal.h @@ -77,6 +77,8 @@ struct __wt_block_ckpt; typedef struct __wt_block_ckpt WT_BLOCK_CKPT; struct __wt_block_desc; typedef struct __wt_block_desc WT_BLOCK_DESC; +struct __wt_block_file_opener; +typedef struct __wt_block_file_opener WT_BLOCK_FILE_OPENER; struct __wt_block_header; typedef struct __wt_block_header WT_BLOCK_HEADER; struct __wt_block_mods; diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index 647d87bf9fb..3d156e6e3df 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -1249,6 +1249,7 @@ static const char *const __stats_connection_desc[] = { "reconciliation: records written including a prepare state", "reconciliation: split bytes currently awaiting free", "reconciliation: split objects currently awaiting free", + "session: flush_tier busy retries", "session: flush_tier operation calls", "session: open session count", "session: session query timestamp calls", @@ -1773,6 +1774,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->rec_time_window_prepared = 0; /* not clearing rec_split_stashed_bytes */ /* not clearing rec_split_stashed_objects */ + stats->flush_tier_busy = 0; stats->flush_tier = 0; /* not clearing session_open */ stats->session_query_ts = 0; @@ -2289,6 +2291,7 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS * to->rec_time_window_prepared += WT_STAT_READ(from, rec_time_window_prepared); to->rec_split_stashed_bytes += WT_STAT_READ(from, rec_split_stashed_bytes); to->rec_split_stashed_objects += WT_STAT_READ(from, rec_split_stashed_objects); + to->flush_tier_busy += WT_STAT_READ(from, flush_tier_busy); to->flush_tier += WT_STAT_READ(from, flush_tier); to->session_open += WT_STAT_READ(from, session_open); to->session_query_ts += WT_STAT_READ(from, session_query_ts); diff --git a/src/third_party/wiredtiger/src/tiered/tiered_handle.c b/src/third_party/wiredtiger/src/tiered/tiered_handle.c index 11ecd0a4941..e9fd555b2ef 100644 --- a/src/third_party/wiredtiger/src/tiered/tiered_handle.c +++ b/src/third_party/wiredtiger/src/tiered/tiered_handle.c @@ -97,6 +97,7 @@ __tiered_create_local(WT_SESSION_IMPL *session, WT_TIERED *tiered) __wt_verbose(session, WT_VERB_TIERED, "TIER_CREATE_LOCAL: LOCAL: %s", name); cfg[0] = WT_CONFIG_BASE(session, object_meta); cfg[1] = tiered->obj_config; + cfg[2] = "tiered_object=true,readonly=true"; __wt_verbose(session, WT_VERB_TIERED, "TIER_CREATE_LOCAL: obj_config: %s : %s", name, cfg[1]); WT_ASSERT(session, tiered->obj_config != NULL); WT_ERR(__wt_config_merge(session, cfg, NULL, (const char **)&config)); @@ -113,11 +114,15 @@ __tiered_create_local(WT_SESSION_IMPL *session, WT_TIERED *tiered) this_tier->name = name; F_SET(this_tier, WT_TIERS_OP_READ | WT_TIERS_OP_WRITE); - if (0) { + WT_WITH_DHANDLE( + session, &tiered->iface, ret = __wt_btree_switch_object(session, tiered->current_id, 0)); + WT_ERR(ret); + err: + if (ret != 0) /* Only free name on error. */ __wt_free(session, name); - } + __wt_free(session, config); return (ret); } @@ -270,7 +275,7 @@ __tiered_update_metadata(WT_SESSION_IMPL *session, WT_TIERED *tiered, const char newconfig = NULL; WT_RET(__wt_scr_alloc(session, 0, &tmp)); - WT_RET(__wt_buf_fmt(session, tmp, "last=%" PRIu64 ",tiers=(\"", tiered->current_id)); + WT_RET(__wt_buf_fmt(session, tmp, "last=%" PRIu64 ",tiers=(", tiered->current_id)); for (i = 0; i < WT_TIERED_MAX_TIERS; ++i) { if (tiered->tiers[i].name == NULL) { __wt_verbose(session, WT_VERB_TIERED, "TIER_UPDATE_META: names[%" PRIu32 "] NULL", i); @@ -278,7 +283,7 @@ __tiered_update_metadata(WT_SESSION_IMPL *session, WT_TIERED *tiered, const char } __wt_verbose(session, WT_VERB_TIERED, "TIER_UPDATE_META: names[%" PRIu32 "]: %s", i, tiered->tiers[i].name); - WT_RET(__wt_buf_catfmt(session, tmp, "%s%s\"", i == 0 ? "" : ",", tiered->tiers[i].name)); + WT_RET(__wt_buf_catfmt(session, tmp, "%s\"%s\"", i == 0 ? "" : ",", tiered->tiers[i].name)); } WT_RET(__wt_buf_catfmt(session, tmp, ")")); @@ -450,7 +455,7 @@ static int __tiered_open(WT_SESSION_IMPL *session, const char *cfg[]) { WT_CONFIG_ITEM cval, tierconf; - WT_DATA_HANDLE *dhandle; + WT_DATA_HANDLE *dhandle, *file_dhandle; WT_DECL_ITEM(tmp); WT_DECL_RET; WT_TIERED *tiered; @@ -459,13 +464,15 @@ __tiered_open(WT_SESSION_IMPL *session, const char *cfg[]) uint32_t unused; #endif char *metaconf; + const char *newconfig; const char *obj_cfg[] = {WT_CONFIG_BASE(session, object_meta), NULL, NULL}; + const char *new_tiered_cfg[] = {NULL, NULL, NULL, NULL}; const char **tiered_cfg, *config; dhandle = session->dhandle; tiered = (WT_TIERED *)dhandle; tiered_cfg = dhandle->cfg; - config = NULL; + config = newconfig = NULL; metaconf = NULL; WT_RET(__wt_scr_alloc(session, 0, &tmp)); @@ -512,15 +519,31 @@ __tiered_open(WT_SESSION_IMPL *session, const char *cfg[]) __wt_verbose( session, WT_VERB_TIERED, "TIERED_OPEN: create %s config %s", dhandle->name, config); WT_ERR(__wt_tiered_switch(session, config)); + file_dhandle = tiered->tiers[WT_TIERED_INDEX_LOCAL].tier; + WT_ASSERT(session, file_dhandle != dhandle && file_dhandle->type == WT_DHANDLE_TYPE_BTREE); - /* XXX brute force, need to figure out functions to use to do this properly. */ - /* We need to update the dhandle config entry to reflect the new tiers metadata. */ - WT_ERR(__wt_metadata_search(session, dhandle->name, &metaconf)); + /* + * XXX brute force, need to figure out functions to use to do this properly. + * + * We are updating the tiered dhandle config entry to reflect the new tiers metadata. The + * tiered dhandle must look almost exactly like the local file dhandle. The difference is + * that the local file dhandle is marked as readonly and also tagged as a tiered object. + * We'll turn those off before putting it into tiered dhandle. + */ + WT_ERR(__wt_metadata_search(session, file_dhandle->name, &metaconf)); __wt_verbose(session, WT_VERB_TIERED, "TIERED_OPEN: after switch meta conf %s %s", dhandle->name, metaconf); + new_tiered_cfg[0] = metaconf; + new_tiered_cfg[1] = "tiered_object=false,readonly=false"; + WT_ERR(__wt_config_merge(session, new_tiered_cfg, NULL, &newconfig)); __wt_free(session, dhandle->cfg[1]); - dhandle->cfg[1] = metaconf; + dhandle->cfg[1] = newconfig; + WT_ERR(__wt_config_merge(session, dhandle->cfg, NULL, &newconfig)); + WT_ERR(__wt_metadata_update(session, dhandle->name, newconfig)); } + WT_ERR(__wt_btree_open(session, tiered_cfg)); + WT_ERR(__wt_btree_switch_object(session, tiered->current_id, 0)); + #if 1 if (0) { /* Temp code to keep s_all happy. */ diff --git a/src/third_party/wiredtiger/src/tiered/tiered_opener.c b/src/third_party/wiredtiger/src/tiered/tiered_opener.c new file mode 100644 index 00000000000..66b07713822 --- /dev/null +++ b/src/third_party/wiredtiger/src/tiered/tiered_opener.c @@ -0,0 +1,99 @@ +/*- + * Copyright (c) 2014-present MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __tiered_opener_open -- + * Open an object by number. + */ +static int +__tiered_opener_open(WT_BLOCK_FILE_OPENER *opener, WT_SESSION_IMPL *session, uint64_t object_id, + WT_FS_OPEN_FILE_TYPE type, u_int flags, WT_FH **fhp) +{ + WT_BUCKET_STORAGE *bstorage; + WT_DECL_RET; + WT_TIERED *tiered; + const char *object_name, *object_uri; + + tiered = opener->cookie; + object_uri = NULL; + + WT_ASSERT(session, + (object_id > 0 && object_id <= tiered->current_id) || object_id == WT_TIERED_CURRENT_ID); + /* + * FIXME-WT-7590 we will need some kind of locking while we're looking at the tiered structure. + * This can be called at any time, because we are opening the objects lazily. + */ + if (object_id == tiered->current_id || object_id == WT_TIERED_CURRENT_ID) { + bstorage = NULL; + object_name = tiered->tiers[WT_TIERED_INDEX_LOCAL].name; + if (!WT_PREFIX_SKIP(object_name, "file:")) + WT_RET_MSG(session, EINVAL, "expected a 'file:' URI"); + } else { + WT_ERR( + __wt_tiered_name(session, &tiered->iface, object_id, WT_TIERED_NAME_OBJECT, &object_uri)); + object_name = object_uri; + WT_PREFIX_SKIP_REQUIRED(session, object_name, "object:"); + bstorage = tiered->bstorage; + } + WT_WITH_BUCKET_STORAGE( + bstorage, session, { ret = __wt_open(session, object_name, type, flags, fhp); }); +err: + __wt_free(session, object_uri); + return (ret); +} + +/* + * __tiered_opener_current_id -- + * Get the current writeable object id. + */ +static uint64_t +__tiered_opener_current_id(WT_BLOCK_FILE_OPENER *opener) +{ + WT_TIERED *tiered; + + tiered = opener->cookie; + + /* + * FIXME-WT-7590 we will need some kind of locking while we're looking at the tiered structure. + * This can be called at any time, because we are opening the objects lazily. + */ + return (tiered->current_id); +} + +/* + * __wt_tiered_opener -- + * Set up an opener for a tiered handle. + */ +int +__wt_tiered_opener(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle, + WT_BLOCK_FILE_OPENER **openerp, const char **filenamep) +{ + WT_TIERED *tiered; + const char *filename; + + filename = dhandle->name; + *openerp = NULL; + + if (dhandle->type == WT_DHANDLE_TYPE_BTREE) { + if (!WT_PREFIX_SKIP(filename, "file:")) + WT_RET_MSG(session, EINVAL, "expected a 'file:' URI"); + *filenamep = filename; + } else if (dhandle->type == WT_DHANDLE_TYPE_TIERED) { + tiered = (WT_TIERED *)dhandle; + tiered->opener.open = __tiered_opener_open; + tiered->opener.current_object_id = __tiered_opener_current_id; + tiered->opener.cookie = tiered; + *openerp = &tiered->opener; + *filenamep = dhandle->name; + } else + WT_RET_MSG(session, EINVAL, "invalid URI: %s", dhandle->name); + + return (0); +} diff --git a/src/third_party/wiredtiger/test/suite/test_tiered02.py b/src/third_party/wiredtiger/test/suite/test_tiered02.py index 515e84388e5..3416e581acb 100755 --- a/src/third_party/wiredtiger/test/suite/test_tiered02.py +++ b/src/third_party/wiredtiger/test/suite/test_tiered02.py @@ -35,9 +35,7 @@ class test_tiered02(wttest.WiredTigerTestCase): K = 1024 M = 1024 * K G = 1024 * M - # TODO: tiered: change this to a table: URI, otherwise we are - # not using tiered files. - uri = "file:test_tiered02" + uri = "table:test_tiered02" auth_token = "test_token" bucket = "mybucket" @@ -46,25 +44,25 @@ class test_tiered02(wttest.WiredTigerTestCase): prefix = "pfx-" def conn_config(self): - os.makedirs(self.bucket, exist_ok=True) + if not os.path.exists(self.bucket): + os.mkdir(self.bucket) return \ 'statistics=(all),' + \ 'tiered_storage=(auth_token=%s,' % self.auth_token + \ 'bucket=%s,' % self.bucket + \ 'bucket_prefix=%s,' % self.prefix + \ - 'name=%s)' % self.extension_name + 'name=%s),tiered_manager=(wait=0)' % self.extension_name # Load the local store extension, but skip the test if it is missing. def conn_extensions(self, extlist): extlist.skip_if_missing = True extlist.extension('storage_sources', self.extension_name) - def confirm_flush(self, increase=True): - # TODO: tiered: flush tests disabled, as the interface - # for flushing will be changed. - return + def progress(self, s): + self.verbose(3, s) + self.pr(s) - self.flushed_objects + def confirm_flush(self, increase=True): got = sorted(list(os.listdir(self.bucket))) self.pr('Flushed objects: ' + str(got)) if increase: @@ -80,45 +78,86 @@ class test_tiered02(wttest.WiredTigerTestCase): self.flushed_objects = 0 args = 'key_format=S' + intl_page = 'internal_page_max=16K' + base_create = 'key_format=S,value_format=S,' + intl_page + self.pr("create sys") + #self.session.create(self.uri + 'xxx', base_create) + + self.progress('Create simple data set (10)') ds = SimpleDataSet(self, self.uri, 10, config=args) + self.progress('populate') ds.populate() ds.check() + self.progress('checkpoint') self.session.checkpoint() - # For some reason, every checkpoint does not cause a flush. - # As we're about to move to a new model of flushing, we're not going to chase this error. - #self.confirm_flush() + self.progress('flush_tier') + self.session.flush_tier(None) + self.confirm_flush() + # FIXME-WT-7589 reopening a connection does not yet work. + if False: + self.close_conn() + self.progress('reopen_conn') + self.reopen_conn() + # Check what was there before + ds = SimpleDataSet(self, self.uri, 10, config=args) + ds.check() + + self.progress('Create simple data set (50)') ds = SimpleDataSet(self, self.uri, 50, config=args) + self.progress('populate') ds.populate() ds.check() + self.progress('checkpoint') self.session.checkpoint() + self.progress('flush_tier') + self.session.flush_tier(None) self.confirm_flush() + # FIXME-WT-7589 This test works up to this point, then runs into trouble. + if True: + return + + self.progress('Create simple data set (100)') ds = SimpleDataSet(self, self.uri, 100, config=args) + self.progress('populate') ds.populate() ds.check() + self.progress('checkpoint') self.session.checkpoint() + self.progress('flush_tier') + self.session.flush_tier(None) self.confirm_flush() + self.progress('Create simple data set (200)') ds = SimpleDataSet(self, self.uri, 200, config=args) + self.progress('populate') ds.populate() ds.check() + self.progress('close_conn') self.close_conn() self.confirm_flush() # closing the connection does a checkpoint + self.progress('reopen_conn') self.reopen_conn() # Check what was there before ds = SimpleDataSet(self, self.uri, 200, config=args) ds.check() # Now add some more. + self.progress('Create simple data set (300)') ds = SimpleDataSet(self, self.uri, 300, config=args) + self.progress('populate') ds.populate() ds.check() - # We haven't done a checkpoint/flush so there should be + # We haven't done a flush so there should be # nothing extra on the shared tier. self.confirm_flush(increase=False) + self.progress('checkpoint') + self.session.checkpoint() + self.confirm_flush(increase=False) + self.progress('END TEST') if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_tiered04.py b/src/third_party/wiredtiger/test/suite/test_tiered04.py index 26254f4d33d..78a7e274e53 100755 --- a/src/third_party/wiredtiger/test/suite/test_tiered04.py +++ b/src/third_party/wiredtiger/test/suite/test_tiered04.py @@ -88,13 +88,19 @@ class test_tiered04(wttest.WiredTigerTestCase): stat_cursor.close() return val + def check(self, tc, n): + for i in range(0, n): + self.assertEqual(tc[str(i)], str(i)) + tc.set_key(str(n)) + self.assertEquals(tc.search(), wiredtiger.WT_NOTFOUND) + # Test calling the flush_tier API. def test_tiered(self): # Create three tables. One using the system tiered storage, one # specifying its own bucket and object size and one using no # tiered storage. Use stats to verify correct setup. intl_page = 'internal_page_max=16K' - base_create = 'key_format=S,' + intl_page + base_create = 'key_format=S,value_format=S,' + intl_page self.pr("create sys") self.session.create(self.uri, base_create) conf = \ @@ -110,13 +116,34 @@ class test_tiered04(wttest.WiredTigerTestCase): self.pr("create non tiered/local") self.session.create(self.uri_none, base_create + conf) - #self.pr("open cursor") - #c = self.session.open_cursor(self.uri) self.pr("flush tier") + c = self.session.open_cursor(self.uri) + c["0"] = "0" + self.check(c, 1) + c.close() self.session.flush_tier(None) - self.pr("flush tier again") + c = self.session.open_cursor(self.uri) + c["1"] = "1" + self.check(c, 2) + c.close() + + c = self.session.open_cursor(self.uri) + c["2"] = "2" + self.check(c, 3) + + self.pr("flush tier again, holding open cursor") + # FIXME-WT-7591 Remove the extra cursor close and open surrounding the flush_tier call. + # Having a cursor open during a flush_tier does not yet work, so the test closes it, + # and reopens after the flush_tier. + c.close() self.session.flush_tier(None) + c = self.session.open_cursor(self.uri) + + c["3"] = "3" + self.check(c, 4) + c.close() + calls = self.get_stat(stat.conn.flush_tier, None) flush = 2 self.assertEqual(calls, flush) |