diff options
author | Luke Chen <luke.chen@mongodb.com> | 2021-10-05 15:12:04 +1100 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-10-05 04:40:31 +0000 |
commit | 80469adedd382ab8a8d50d0bc9d7f8e756bb8219 (patch) | |
tree | e197090f399e10a9e62f0481caef3450b86a64ee | |
parent | e64abfccd2b8ba786083bf1e38d9b70143560b69 (diff) | |
download | mongo-80469adedd382ab8a8d50d0bc9d7f8e756bb8219.tar.gz |
Import wiredtiger: 8f54c7f772f96f53413ace85637b1ea4a2a2ed64 from branch mongodb-master
ref: 464dc4490f..8f54c7f772
for: 5.1.0
WT-8078 Implement tiered storage local retention caching
20 files changed, 344 insertions, 282 deletions
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index c660bde80c1..6c0cc16592c 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -1136,6 +1136,7 @@ numSymbols numbare objs offpage +ofh ok oldv ondisk diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index b17de356294..362712d5de7 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -505,8 +505,9 @@ conn_stats = [ ########################################## # Tiered storage statistics ########################################## - StorageStat('flush_state_races', 'flush state races'), StorageStat('flush_tier', 'flush_tier operation calls'), + StorageStat('local_objects_inuse', 'attempts to remove a local object and the object is in use'), + StorageStat('local_objects_removed', 'local objects removed'), ########################################## # Thread Count statistics diff --git a/src/third_party/wiredtiger/ext/storage_sources/local_store/local_store.c b/src/third_party/wiredtiger/ext/storage_sources/local_store/local_store.c index 8bcb6911d06..ee5720a1844 100644 --- a/src/third_party/wiredtiger/ext/storage_sources/local_store/local_store.c +++ b/src/third_party/wiredtiger/ext/storage_sources/local_store/local_store.c @@ -637,6 +637,8 @@ local_file_copy(LOCAL_STORAGE *local, WT_SESSION *session, const char *src_path, goto err; } } + if (ret == 0 && (ret = chmod(tmp_path, 0444)) < 0) + ret = local_err(local, session, errno, "%s: file_copy chmod failed", tmp_path); if ((ret = rename(tmp_path, dest_path)) != 0) { ret = local_err(local, session, errno, "%s: cannot rename from %s", dest_path, tmp_path); goto err; @@ -693,7 +695,7 @@ err: /* * local_flush_finish -- - * Move a file from the default file system to the cache in the new file system. + * Cache a file in the new file system. */ static int local_flush_finish(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session, @@ -715,9 +717,13 @@ local_flush_finish(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session, goto err; local->op_count++; - if ((ret = rename(source, dest_path)) != 0) { + /* + * Link the object with the original local object. The could be replaced by a file copy if + * portability is an issue. + */ + if ((ret = link(source, dest_path)) != 0) { ret = local_err( - local, session, errno, "ss_flush_finish rename %s to %s failed", source, dest_path); + local, session, errno, "ss_flush_finish link %s to %s failed", source, dest_path); goto err; } /* Set the file to readonly in the cache. */ diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 34966c3078a..b784a5d9786 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-master", - "commit": "464dc4490f2028b6ac76006c386325e4c67afeb6" + "commit": "8f54c7f772f96f53413ace85637b1ea4a2a2ed64" } diff --git a/src/third_party/wiredtiger/src/conn/conn_tiered.c b/src/third_party/wiredtiger/src/conn/conn_tiered.c index 58b2026c5b8..6f34064ca7e 100644 --- a/src/third_party/wiredtiger/src/conn/conn_tiered.c +++ b/src/third_party/wiredtiger/src/conn/conn_tiered.c @@ -20,6 +20,16 @@ #endif /* + * __tiered_server_run_chk -- + * Check to decide if the tiered storage server should continue running. + */ +static bool +__tiered_server_run_chk(WT_SESSION_IMPL *session) +{ + return (FLD_ISSET(S2C(session)->server_flags, WT_CONN_SERVER_TIERED)); +} + +/* * __flush_tier_wait -- * Wait for all previous work units queued to be processed. */ @@ -121,55 +131,63 @@ err: /* * __tier_storage_remove_local -- - * Perform one iteration of tiered storage local tier removal. + * Perform one iteration of tiered storage local object removal. */ static int -__tier_storage_remove_local(WT_SESSION_IMPL *session, const char *uri, bool force) +__tier_storage_remove_local(WT_SESSION_IMPL *session) { - WT_CONFIG_ITEM cval; WT_DECL_RET; - size_t len; + WT_TIERED_WORK_UNIT *entry; uint64_t now; - char *config, *newfile; - const char *cfg[2], *filename; - - config = newfile = NULL; - if (uri == NULL) - return (0); - __wt_verbose(session, WT_VERB_TIERED, "Removing tree %s", uri); - filename = uri; - WT_PREFIX_SKIP_REQUIRED(session, filename, "tiered:"); - len = strlen("file:") + strlen(filename) + 1; - WT_ERR(__wt_calloc_def(session, len, &newfile)); - WT_ERR(__wt_snprintf(newfile, len, "file:%s", filename)); + const char *object; - /* - * If the file:URI of the tiered object does not exist, there is nothing to do. - */ - ret = __wt_metadata_search(session, newfile, &config); - if (ret == WT_NOTFOUND) { - ret = 0; - goto err; - } - WT_ERR(ret); + entry = NULL; + for (;;) { + /* Check if we're quitting or being reconfigured. */ + if (!__tiered_server_run_chk(session)) + break; - /* - * We have a local version of this tiered data. Check its metadata for when it expires and - * remove if necessary. - */ - cfg[0] = config; - cfg[1] = NULL; - WT_ERR(__wt_config_gets(session, cfg, "local_retention", &cval)); - __wt_seconds(session, &now); - if (force || (uint64_t)cval.val + S2C(session)->bstorage->retain_secs >= now) + __wt_seconds(session, &now); + __wt_tiered_get_drop_local(session, now, &entry); + if (entry == NULL) + break; + WT_ERR(__wt_tiered_name( + session, &entry->tiered->iface, entry->id, WT_TIERED_NAME_OBJECT, &object)); + __wt_verbose(session, WT_VERB_TIERED, "REMOVE_LOCAL: %s at %" PRIu64, object, now); + WT_PREFIX_SKIP_REQUIRED(session, object, "object:"); /* - * We want to remove the entry and the file. Probably do a schema_drop on the file:uri. + * If the handle is still open, it could still be in use for reading. In that case put the + * work unit back on the work queue and keep trying. */ - __wt_verbose(session, WT_VERB_TIERED, "Would remove %s. Local retention expired", newfile); - + if (__wt_handle_is_open(session, object)) { + __wt_verbose(session, WT_VERB_TIERED, "REMOVE_LOCAL: %s in USE, queue again", object); + WT_STAT_CONN_INCR(session, local_objects_inuse); + /* + * FIXME-WT-7470: If the object we want to remove is in use this is the place to call + * object sweep to clean up block->ofh file handles. Another alternative would be to try + * to sweep and then try the remove call below rather than pushing it back on the work + * queue. NOTE: Remove 'ofh' from s_string.ok when removing this comment. + * + * Update the time on the entry before pushing it back on the queue so that we don't get + * into an infinite loop trying to drop an open file that may be in use a while. + */ + WT_ASSERT(session, entry->tiered != NULL && entry->tiered->bstorage != NULL); + entry->op_val = now + entry->tiered->bstorage->retain_secs; + __wt_tiered_push_work(session, entry); + } else { + __wt_verbose(session, WT_VERB_TIERED, "REMOVE_LOCAL: actually remove %s", object); + WT_STAT_CONN_INCR(session, local_objects_removed); + WT_ERR(__wt_fs_remove(session, object, false)); + /* + * We are responsible for freeing the work unit when we're done with it. + */ + __wt_tiered_work_free(session, entry); + } + entry = NULL; + } err: - __wt_free(session, config); - __wt_free(session, newfile); + if (entry != NULL) + __wt_tiered_work_free(session, entry); return (ret); } @@ -230,8 +248,8 @@ err: * Perform one iteration of copying newly flushed objects to the shared storage. */ int -__wt_tier_do_flush( - WT_SESSION_IMPL *session, WT_TIERED *tiered, const char *local_uri, const char *obj_uri) +__wt_tier_do_flush(WT_SESSION_IMPL *session, WT_TIERED *tiered, uint32_t id, const char *local_uri, + const char *obj_uri) { WT_DECL_RET; WT_FILE_SYSTEM *bucket_fs; @@ -260,6 +278,11 @@ __wt_tier_do_flush( */ WT_RET(storage_source->ss_flush_finish( storage_source, &session->iface, bucket_fs, local_name, obj_name, NULL)); + /* + * After successful flushing, push a work unit to drop the local object in the future. The + * object will be removed locally after the local retention period expires. + */ + WT_RET(__wt_tiered_put_drop_local(session, tiered, id)); return (0); } @@ -276,7 +299,7 @@ __wt_tier_flush(WT_SESSION_IMPL *session, WT_TIERED *tiered, uint32_t id) local_uri = obj_uri = NULL; WT_ERR(__wt_tiered_name(session, &tiered->iface, id, WT_TIERED_NAME_LOCAL, &local_uri)); WT_ERR(__wt_tiered_name(session, &tiered->iface, id, WT_TIERED_NAME_OBJECT, &obj_uri)); - WT_ERR(__wt_tier_do_flush(session, tiered, local_uri, obj_uri)); + WT_ERR(__wt_tier_do_flush(session, tiered, id, local_uri, obj_uri)); err: __wt_free(session, local_uri); @@ -296,6 +319,10 @@ __tier_storage_copy(WT_SESSION_IMPL *session) entry = NULL; for (;;) { + /* Check if we're quitting or being reconfigured. */ + if (!__tiered_server_run_chk(session)) + break; + /* * We probably need some kind of flush generation so that we don't process flush items for * tables that are added during an in-progress flush_tier. This thread could run due to a @@ -334,7 +361,7 @@ __tier_storage_remove(WT_SESSION_IMPL *session, bool force) * We want to walk the metadata perhaps and for each tiered URI, call remove on its file:URI * version. */ - WT_RET(__tier_storage_remove_local(session, NULL, force)); + WT_RET(__tier_storage_remove_local(session)); return (0); } @@ -448,16 +475,6 @@ __tiered_manager_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp) } /* - * __tiered_server_run_chk -- - * Check to decide if the tiered storage server should continue running. - */ -static bool -__tiered_server_run_chk(WT_SESSION_IMPL *session) -{ - return (FLD_ISSET(S2C(session)->server_flags, WT_CONN_SERVER_TIERED)); -} - -/* * __tiered_server -- * The tiered storage server thread. */ diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 0d7a550094c..0f4f3d84a67 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -1474,8 +1474,8 @@ extern int __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP * extern int __wt_thread_group_resize(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, uint32_t new_min, uint32_t new_max, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_tier_do_flush(WT_SESSION_IMPL *session, WT_TIERED *tiered, const char *local_uri, - const char *obj_uri) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_tier_do_flush(WT_SESSION_IMPL *session, WT_TIERED *tiered, uint32_t id, + const char *local_uri, const char *obj_uri) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_tier_flush(WT_SESSION_IMPL *session, WT_TIERED *tiered, uint32_t id) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_tiered_bucket_config(WT_SESSION_IMPL *session, const char *cfg[], diff --git a/src/third_party/wiredtiger/src/include/os_fs_inline.h b/src/third_party/wiredtiger/src/include/os_fs_inline.h index 2276f096312..2010032a5c2 100644 --- a/src/third_party/wiredtiger/src/include/os_fs_inline.h +++ b/src/third_party/wiredtiger/src/include/os_fs_inline.h @@ -141,7 +141,7 @@ __wt_fs_remove(WT_SESSION_IMPL *session, const char *name, bool durable) * It is a layering violation to retrieve a WT_FH here, but it is a useful diagnostic to ensure * WiredTiger doesn't have the handle open. */ - if (__wt_handle_is_open(session, name)) + if (__wt_handle_is_open(session, name) && !F_ISSET(session, WT_SESSION_QUIET_TIERED)) WT_RET_MSG(session, EINVAL, "%s: file-remove: file has open handles", name); #endif diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h index 465f641954b..5206910f139 100644 --- a/src/third_party/wiredtiger/src/include/session.h +++ b/src/third_party/wiredtiger/src/include/session.h @@ -203,10 +203,11 @@ struct __wt_session_impl { #define WT_SESSION_NO_LOGGING 0x00800u #define WT_SESSION_NO_RECONCILE 0x01000u #define WT_SESSION_QUIET_CORRUPT_FILE 0x02000u -#define WT_SESSION_READ_WONT_NEED 0x04000u -#define WT_SESSION_RESOLVING_TXN 0x08000u -#define WT_SESSION_ROLLBACK_TO_STABLE 0x10000u -#define WT_SESSION_SCHEMA_TXN 0x20000u +#define WT_SESSION_QUIET_TIERED 0x04000u +#define WT_SESSION_READ_WONT_NEED 0x08000u +#define WT_SESSION_RESOLVING_TXN 0x10000u +#define WT_SESSION_ROLLBACK_TO_STABLE 0x20000u +#define WT_SESSION_SCHEMA_TXN 0x40000u /* AUTOMATIC FLAG VALUE GENERATION STOP 32 */ uint32_t flags; diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index e65aaf99b3f..df1cd95de67 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -686,8 +686,9 @@ struct __wt_connection_stats { int64_t rec_time_window_stop_txn; int64_t rec_split_stashed_bytes; int64_t rec_split_stashed_objects; - int64_t flush_state_races; + int64_t local_objects_inuse; int64_t flush_tier; + int64_t local_objects_removed; int64_t session_open; int64_t session_query_ts; int64_t session_table_alter_fail; diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index 44eb86b1a32..fc8e542c33a 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -5911,288 +5911,290 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1371 /*! reconciliation: split objects currently awaiting free */ #define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1372 -/*! session: flush state races */ -#define WT_STAT_CONN_FLUSH_STATE_RACES 1373 +/*! session: attempts to remove a local object and the object is in use */ +#define WT_STAT_CONN_LOCAL_OBJECTS_INUSE 1373 /*! session: flush_tier operation calls */ #define WT_STAT_CONN_FLUSH_TIER 1374 +/*! session: local objects removed */ +#define WT_STAT_CONN_LOCAL_OBJECTS_REMOVED 1375 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1375 +#define WT_STAT_CONN_SESSION_OPEN 1376 /*! session: session query timestamp calls */ -#define WT_STAT_CONN_SESSION_QUERY_TS 1376 +#define WT_STAT_CONN_SESSION_QUERY_TS 1377 /*! session: table alter failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1377 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1378 /*! session: table alter successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1378 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1379 /*! session: table alter triggering checkpoint calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_TRIGGER_CHECKPOINT 1379 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_TRIGGER_CHECKPOINT 1380 /*! session: table alter unchanged and skipped */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1380 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1381 /*! session: table compact failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1381 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1382 /*! session: table compact successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1382 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1383 /*! session: table create failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1383 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1384 /*! session: table create successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1384 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1385 /*! session: table drop failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1385 +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1386 /*! session: table drop successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1386 +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1387 /*! session: table rename failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1387 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1388 /*! session: table rename successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1388 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1389 /*! session: table salvage failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1389 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1390 /*! session: table salvage successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1390 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1391 /*! session: table truncate failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1391 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1392 /*! session: table truncate successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1392 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1393 /*! session: table verify failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1393 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1394 /*! session: table verify successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1394 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1395 /*! session: tiered operations dequeued and processed */ -#define WT_STAT_CONN_TIERED_WORK_UNITS_DEQUEUED 1395 +#define WT_STAT_CONN_TIERED_WORK_UNITS_DEQUEUED 1396 /*! session: tiered operations scheduled */ -#define WT_STAT_CONN_TIERED_WORK_UNITS_CREATED 1396 +#define WT_STAT_CONN_TIERED_WORK_UNITS_CREATED 1397 /*! session: tiered storage local retention time (secs) */ -#define WT_STAT_CONN_TIERED_RETENTION 1397 +#define WT_STAT_CONN_TIERED_RETENTION 1398 /*! session: tiered storage object size */ -#define WT_STAT_CONN_TIERED_OBJECT_SIZE 1398 +#define WT_STAT_CONN_TIERED_OBJECT_SIZE 1399 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1399 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1400 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_THREAD_READ_ACTIVE 1400 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1401 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1401 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1402 /*! thread-yield: application thread time evicting (usecs) */ -#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1402 +#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1403 /*! thread-yield: application thread time waiting for cache (usecs) */ -#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1403 +#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1404 /*! * thread-yield: connection close blocked waiting for transaction state * stabilization */ -#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1404 +#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1405 /*! thread-yield: connection close yielded for lsm manager shutdown */ -#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1405 +#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1406 /*! thread-yield: data handle lock yielded */ -#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1406 +#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1407 /*! * thread-yield: get reference for page index and slot time sleeping * (usecs) */ -#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1407 +#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1408 /*! thread-yield: page access yielded due to prepare state change */ -#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1408 +#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1409 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1409 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1410 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1410 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1411 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1411 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1412 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1412 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1413 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1413 +#define WT_STAT_CONN_PAGE_SLEEP 1414 /*! * thread-yield: page delete rollback time sleeping for state change * (usecs) */ -#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1414 +#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1415 /*! thread-yield: page reconciliation yielded due to child modification */ -#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1415 +#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1416 /*! transaction: Number of prepared updates */ -#define WT_STAT_CONN_TXN_PREPARED_UPDATES 1416 +#define WT_STAT_CONN_TXN_PREPARED_UPDATES 1417 /*! transaction: Number of prepared updates committed */ -#define WT_STAT_CONN_TXN_PREPARED_UPDATES_COMMITTED 1417 +#define WT_STAT_CONN_TXN_PREPARED_UPDATES_COMMITTED 1418 /*! transaction: Number of prepared updates repeated on the same key */ -#define WT_STAT_CONN_TXN_PREPARED_UPDATES_KEY_REPEATED 1418 +#define WT_STAT_CONN_TXN_PREPARED_UPDATES_KEY_REPEATED 1419 /*! transaction: Number of prepared updates rolled back */ -#define WT_STAT_CONN_TXN_PREPARED_UPDATES_ROLLEDBACK 1419 +#define WT_STAT_CONN_TXN_PREPARED_UPDATES_ROLLEDBACK 1420 /*! transaction: prepared transactions */ -#define WT_STAT_CONN_TXN_PREPARE 1420 +#define WT_STAT_CONN_TXN_PREPARE 1421 /*! transaction: prepared transactions committed */ -#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1421 +#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1422 /*! transaction: prepared transactions currently active */ -#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1422 +#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1423 /*! transaction: prepared transactions rolled back */ -#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1423 +#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1424 /*! * transaction: prepared transactions rolled back and do not remove the * history store entry */ -#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK_DO_NOT_REMOVE_HS_UPDATE 1424 +#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK_DO_NOT_REMOVE_HS_UPDATE 1425 /*! * transaction: prepared transactions rolled back and fix the history * store entry with checkpoint reserved transaction id */ -#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK_FIX_HS_UPDATE_WITH_CKPT_RESERVED_TXNID 1425 +#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK_FIX_HS_UPDATE_WITH_CKPT_RESERVED_TXNID 1426 /*! transaction: query timestamp calls */ -#define WT_STAT_CONN_TXN_QUERY_TS 1426 +#define WT_STAT_CONN_TXN_QUERY_TS 1427 /*! transaction: race to read prepared update retry */ -#define WT_STAT_CONN_TXN_READ_RACE_PREPARE_UPDATE 1427 +#define WT_STAT_CONN_TXN_READ_RACE_PREPARE_UPDATE 1428 /*! transaction: rollback to stable calls */ -#define WT_STAT_CONN_TXN_RTS 1428 +#define WT_STAT_CONN_TXN_RTS 1429 /*! * transaction: rollback to stable history store records with stop * timestamps older than newer records */ -#define WT_STAT_CONN_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 1429 +#define WT_STAT_CONN_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 1430 /*! transaction: rollback to stable inconsistent checkpoint */ -#define WT_STAT_CONN_TXN_RTS_INCONSISTENT_CKPT 1430 +#define WT_STAT_CONN_TXN_RTS_INCONSISTENT_CKPT 1431 /*! transaction: rollback to stable keys removed */ -#define WT_STAT_CONN_TXN_RTS_KEYS_REMOVED 1431 +#define WT_STAT_CONN_TXN_RTS_KEYS_REMOVED 1432 /*! transaction: rollback to stable keys restored */ -#define WT_STAT_CONN_TXN_RTS_KEYS_RESTORED 1432 +#define WT_STAT_CONN_TXN_RTS_KEYS_RESTORED 1433 /*! transaction: rollback to stable pages visited */ -#define WT_STAT_CONN_TXN_RTS_PAGES_VISITED 1433 +#define WT_STAT_CONN_TXN_RTS_PAGES_VISITED 1434 /*! transaction: rollback to stable restored tombstones from history store */ -#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES 1434 +#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES 1435 /*! transaction: rollback to stable restored updates from history store */ -#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_UPDATES 1435 +#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_UPDATES 1436 /*! transaction: rollback to stable skipping delete rle */ -#define WT_STAT_CONN_TXN_RTS_DELETE_RLE_SKIPPED 1436 +#define WT_STAT_CONN_TXN_RTS_DELETE_RLE_SKIPPED 1437 /*! transaction: rollback to stable skipping stable rle */ -#define WT_STAT_CONN_TXN_RTS_STABLE_RLE_SKIPPED 1437 +#define WT_STAT_CONN_TXN_RTS_STABLE_RLE_SKIPPED 1438 /*! transaction: rollback to stable sweeping history store keys */ -#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1438 +#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1439 /*! transaction: rollback to stable tree walk skipping pages */ -#define WT_STAT_CONN_TXN_RTS_TREE_WALK_SKIP_PAGES 1439 +#define WT_STAT_CONN_TXN_RTS_TREE_WALK_SKIP_PAGES 1440 /*! transaction: rollback to stable updates aborted */ -#define WT_STAT_CONN_TXN_RTS_UPD_ABORTED 1440 +#define WT_STAT_CONN_TXN_RTS_UPD_ABORTED 1441 /*! transaction: rollback to stable updates removed from history store */ -#define WT_STAT_CONN_TXN_RTS_HS_REMOVED 1441 +#define WT_STAT_CONN_TXN_RTS_HS_REMOVED 1442 /*! transaction: sessions scanned in each walk of concurrent sessions */ -#define WT_STAT_CONN_TXN_SESSIONS_WALKED 1442 +#define WT_STAT_CONN_TXN_SESSIONS_WALKED 1443 /*! transaction: set timestamp calls */ -#define WT_STAT_CONN_TXN_SET_TS 1443 +#define WT_STAT_CONN_TXN_SET_TS 1444 /*! transaction: set timestamp durable calls */ -#define WT_STAT_CONN_TXN_SET_TS_DURABLE 1444 +#define WT_STAT_CONN_TXN_SET_TS_DURABLE 1445 /*! transaction: set timestamp durable updates */ -#define WT_STAT_CONN_TXN_SET_TS_DURABLE_UPD 1445 +#define WT_STAT_CONN_TXN_SET_TS_DURABLE_UPD 1446 /*! transaction: set timestamp oldest calls */ -#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1446 +#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1447 /*! transaction: set timestamp oldest updates */ -#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1447 +#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1448 /*! transaction: set timestamp stable calls */ -#define WT_STAT_CONN_TXN_SET_TS_STABLE 1448 +#define WT_STAT_CONN_TXN_SET_TS_STABLE 1449 /*! transaction: set timestamp stable updates */ -#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1449 +#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1450 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1450 +#define WT_STAT_CONN_TXN_BEGIN 1451 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1451 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1452 /*! * transaction: transaction checkpoint currently running for history * store file */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING_HS 1452 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING_HS 1453 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1453 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1454 /*! * transaction: transaction checkpoint history store file duration * (usecs) */ -#define WT_STAT_CONN_TXN_HS_CKPT_DURATION 1454 +#define WT_STAT_CONN_TXN_HS_CKPT_DURATION 1455 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1455 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1456 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1456 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1457 /*! * transaction: transaction checkpoint most recent duration for gathering * all handles (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION 1457 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION 1458 /*! * transaction: transaction checkpoint most recent duration for gathering * applied handles (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_APPLY 1458 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_APPLY 1459 /*! * transaction: transaction checkpoint most recent duration for gathering * skipped handles (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_SKIP 1459 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_SKIP 1460 /*! transaction: transaction checkpoint most recent handles applied */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_APPLIED 1460 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_APPLIED 1461 /*! transaction: transaction checkpoint most recent handles skipped */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_SKIPPED 1461 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_SKIPPED 1462 /*! transaction: transaction checkpoint most recent handles walked */ -#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_WALKED 1462 +#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_WALKED 1463 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1463 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1464 /*! transaction: transaction checkpoint prepare currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING 1464 +#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING 1465 /*! transaction: transaction checkpoint prepare max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX 1465 +#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX 1466 /*! transaction: transaction checkpoint prepare min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN 1466 +#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN 1467 /*! transaction: transaction checkpoint prepare most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT 1467 +#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT 1468 /*! transaction: transaction checkpoint prepare total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL 1468 +#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL 1469 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1469 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1470 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1470 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1471 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1471 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1472 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1472 +#define WT_STAT_CONN_TXN_CHECKPOINT 1473 /*! transaction: transaction checkpoints due to obsolete pages */ -#define WT_STAT_CONN_TXN_CHECKPOINT_OBSOLETE_APPLIED 1473 +#define WT_STAT_CONN_TXN_CHECKPOINT_OBSOLETE_APPLIED 1474 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1474 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1475 /*! transaction: transaction failures due to history store */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1475 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1476 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1476 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1477 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1477 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1478 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1478 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1479 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1479 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1480 /*! transaction: transaction range of timestamps currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1480 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1481 /*! transaction: transaction range of timestamps pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1481 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1482 /*! * transaction: transaction range of timestamps pinned by the oldest * active read timestamp */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1482 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1483 /*! * transaction: transaction range of timestamps pinned by the oldest * timestamp */ -#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1483 +#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1484 /*! transaction: transaction read timestamp of the oldest active reader */ -#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1484 +#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1485 /*! transaction: transaction rollback to stable currently running */ -#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE_RUNNING 1485 +#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE_RUNNING 1486 /*! transaction: transaction walk of concurrent sessions */ -#define WT_STAT_CONN_TXN_WALK_SESSIONS 1486 +#define WT_STAT_CONN_TXN_WALK_SESSIONS 1487 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1487 +#define WT_STAT_CONN_TXN_COMMIT 1488 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1488 +#define WT_STAT_CONN_TXN_ROLLBACK 1489 /*! transaction: update conflicts */ -#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1489 +#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1490 /*! * @} diff --git a/src/third_party/wiredtiger/src/os_common/os_fhandle.c b/src/third_party/wiredtiger/src/os_common/os_fhandle.c index f39fbd599e7..18bdb756176 100644 --- a/src/third_party/wiredtiger/src/os_common/os_fhandle.c +++ b/src/third_party/wiredtiger/src/os_common/os_fhandle.c @@ -41,7 +41,6 @@ __fhandle_method_finalize(WT_SESSION_IMPL *session, WT_FILE_HANDLE *handle, bool return (0); } -#ifdef HAVE_DIAGNOSTIC /* * __wt_handle_is_open -- * Return if there's an open handle matching a name. @@ -72,7 +71,6 @@ __wt_handle_is_open(WT_SESSION_IMPL *session, const char *name) return (found); } -#endif /* * __handle_search -- @@ -288,9 +286,8 @@ __handle_close(WT_SESSION_IMPL *session, WT_FH *fh, bool locked) conn = S2C(session); - if (fh->ref != 0) { + if (fh->ref != 0) __wt_errx(session, "Closing a file handle with open references: %s", fh->name); - } /* Remove from the list. */ bucket = fh->name_hash & (conn->hash_size - 1); diff --git a/src/third_party/wiredtiger/src/os_posix/os_fs.c b/src/third_party/wiredtiger/src/os_posix/os_fs.c index 1ae6259e5d8..3898eb74343 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_fs.c +++ b/src/third_party/wiredtiger/src/os_posix/os_fs.c @@ -797,12 +797,16 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const cha /* Create/Open the file. */ WT_SYSCALL_RETRY(((pfh->fd = open(name, f, mode)) == -1 ? -1 : 0), ret); - if (ret != 0) + if (ret != 0) { + /* If we don't want error messages, just return the error value. */ + if (F_ISSET(session, WT_SESSION_QUIET_TIERED) && ret == ENOENT) + goto err; WT_ERR_MSG(session, ret, pfh->direct_io ? "%s: handle-open: open: failed with direct I/O configured, some " "filesystem types do not support direct I/O" : "%s: handle-open: open", name); + } #ifdef __linux__ /* diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index 2318e813cce..f5ae97c8cea 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -1393,8 +1393,9 @@ static const char *const __stats_connection_desc[] = { "reconciliation: records written including a stop transaction ID", "reconciliation: split bytes currently awaiting free", "reconciliation: split objects currently awaiting free", - "session: flush state races", + "session: attempts to remove a local object and the object is in use", "session: flush_tier operation calls", + "session: local objects removed", "session: open session count", "session: session query timestamp calls", "session: table alter failed calls", @@ -1926,8 +1927,9 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->rec_time_window_stop_txn = 0; /* not clearing rec_split_stashed_bytes */ /* not clearing rec_split_stashed_objects */ - stats->flush_state_races = 0; + stats->local_objects_inuse = 0; stats->flush_tier = 0; + stats->local_objects_removed = 0; /* not clearing session_open */ stats->session_query_ts = 0; /* not clearing session_table_alter_fail */ @@ -2471,8 +2473,9 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS * to->rec_time_window_stop_txn += WT_STAT_READ(from, rec_time_window_stop_txn); to->rec_split_stashed_bytes += WT_STAT_READ(from, rec_split_stashed_bytes); to->rec_split_stashed_objects += WT_STAT_READ(from, rec_split_stashed_objects); - to->flush_state_races += WT_STAT_READ(from, flush_state_races); + to->local_objects_inuse += WT_STAT_READ(from, local_objects_inuse); to->flush_tier += WT_STAT_READ(from, flush_tier); + to->local_objects_removed += WT_STAT_READ(from, local_objects_removed); to->session_open += WT_STAT_READ(from, session_open); to->session_query_ts += WT_STAT_READ(from, session_query_ts); to->session_table_alter_fail += WT_STAT_READ(from, session_table_alter_fail); diff --git a/src/third_party/wiredtiger/src/tiered/tiered_handle.c b/src/third_party/wiredtiger/src/tiered/tiered_handle.c index 5cfd583b1c9..5be43f315c6 100644 --- a/src/third_party/wiredtiger/src/tiered/tiered_handle.c +++ b/src/third_party/wiredtiger/src/tiered/tiered_handle.c @@ -441,9 +441,7 @@ __wt_tiered_switch(WT_SESSION_IMPL *session, const char *config) /* * __wt_tiered_name -- - * Given a dhandle structure and object number generate the URI name of the given type. XXX - * Currently this is only used in this file but I anticipate it may be of use outside. If not, - * make this static and tiered_name instead. + * Given a dhandle structure and object number generate the URI name of the given type. */ int __wt_tiered_name( @@ -567,9 +565,7 @@ __tiered_open(WT_SESSION_IMPL *session, const char *cfg[]) /* Temp code to keep s_all happy. */ FLD_SET(unused, WT_TIERED_OBJ_LOCAL | WT_TIERED_TREE_UNUSED); FLD_SET(unused, WT_TIERED_WORK_FORCE | WT_TIERED_WORK_FREE); - WT_ERR(__wt_tiered_put_drop_local(session, tiered, tiered->current_id)); WT_ERR(__wt_tiered_put_drop_shared(session, tiered, tiered->current_id)); - __wt_tiered_get_drop_local(session, 0, &entry); __wt_tiered_get_drop_shared(session, &entry); } #endif diff --git a/src/third_party/wiredtiger/src/tiered/tiered_opener.c b/src/third_party/wiredtiger/src/tiered/tiered_opener.c index 5ca027510fc..b6f3df90fa7 100644 --- a/src/third_party/wiredtiger/src/tiered/tiered_opener.c +++ b/src/third_party/wiredtiger/src/tiered/tiered_opener.c @@ -20,44 +20,44 @@ __tiered_opener_open(WT_BLOCK_FILE_OPENER *opener, WT_SESSION_IMPL *session, uin WT_DECL_RET; WT_TIERED *tiered; const char *object_name, *object_uri; + bool local_only; tiered = opener->cookie; object_uri = NULL; + local_only = false; WT_ASSERT(session, (object_id > 0 && object_id <= tiered->current_id) || object_id == WT_TIERED_CURRENT_ID); /* - * FIXME-WT-7590 we will need some kind of locking while we're looking at the tiered structure. - * This can be called at any time, because we are opening the objects lazily. + * First look for the local file. This will be the fastest access and we retain recent objects + * in the local database for a while. */ if (object_id == tiered->current_id || object_id == WT_TIERED_CURRENT_ID) { bstorage = NULL; object_name = tiered->tiers[WT_TIERED_INDEX_LOCAL].name; - if (!WT_PREFIX_SKIP(object_name, "file:")) - WT_RET_MSG(session, EINVAL, "expected a 'file:' URI"); - WT_ERR(__wt_open(session, object_name, type, flags, fhp)); + WT_PREFIX_SKIP_REQUIRED(session, object_name, "file:"); + local_only = true; } else { WT_ERR( __wt_tiered_name(session, &tiered->iface, object_id, WT_TIERED_NAME_OBJECT, &object_uri)); object_name = object_uri; WT_PREFIX_SKIP_REQUIRED(session, object_name, "object:"); + LF_SET(WT_FS_OPEN_READONLY); + WT_ASSERT(session, !FLD_ISSET(flags, WT_FS_OPEN_CREATE)); + F_SET(session, WT_SESSION_QUIET_TIERED); + } + ret = __wt_open(session, object_name, type, flags, fhp); + F_CLR(session, WT_SESSION_QUIET_TIERED); + + /* + * FIXME-WT-7590 we will need some kind of locking while we're looking at the tiered structure. + * This can be called at any time, because we are opening the objects lazily. + */ + if (!local_only && ret != 0) { bstorage = tiered->bstorage; - flags |= WT_FS_OPEN_READONLY; + LF_SET(WT_FS_OPEN_READONLY); WT_WITH_BUCKET_STORAGE( bstorage, session, { ret = __wt_open(session, object_name, type, flags, fhp); }); - if (ret == ENOENT) { - /* - * There is a window where the object may not be copied yet to the bucket. If it isn't - * found try the local system. If it isn't found there then try the bucket one more - * time. - */ - ret = __wt_open(session, object_name, type, flags, fhp); - __wt_errx(session, "OPENER: local %s ret %d", object_name, ret); - if (ret == ENOENT) - WT_WITH_BUCKET_STORAGE( - bstorage, session, { ret = __wt_open(session, object_name, type, flags, fhp); }); - WT_ERR(ret); - } } err: __wt_free(session, object_uri); diff --git a/src/third_party/wiredtiger/src/tiered/tiered_work.c b/src/third_party/wiredtiger/src/tiered/tiered_work.c index b3fd1b7f9b5..efc80ea86bd 100644 --- a/src/third_party/wiredtiger/src/tiered/tiered_work.c +++ b/src/third_party/wiredtiger/src/tiered/tiered_work.c @@ -9,6 +9,24 @@ #include "wt_internal.h" /* + * __tiered_flush_state -- + * Account for flush work units so threads can know when shared storage flushing is complete. + */ +static void +__tiered_flush_state(WT_SESSION_IMPL *session, uint32_t type, bool incr) +{ + WT_CONNECTION_IMPL *conn; + + if (type != WT_TIERED_WORK_FLUSH) + return; + conn = S2C(session); + if (incr) + (void)__wt_atomic_addv32(&conn->flush_state, 1); + else + (void)__wt_atomic_subv32(&conn->flush_state, 1); +} + +/* * __wt_tiered_work_free -- * Free a work unit and account for it in the flush state. */ @@ -16,18 +34,9 @@ void __wt_tiered_work_free(WT_SESSION_IMPL *session, WT_TIERED_WORK_UNIT *entry) { WT_CONNECTION_IMPL *conn; - uint32_t new_state, old_state; conn = S2C(session); - for (;;) { - WT_BARRIER(); - old_state = conn->flush_state; - new_state = old_state - 1; - if (__wt_atomic_casv32(&conn->flush_state, old_state, new_state)) - break; - WT_STAT_CONN_INCR(session, flush_state_races); - __wt_yield(); - } + __tiered_flush_state(session, entry->type, false); /* If all work is done signal any waiting thread waiting for sync. */ if (WT_FLUSH_STATE_DONE(conn->flush_state)) __wt_cond_signal(session, conn->flush_cond); @@ -42,23 +51,13 @@ void __wt_tiered_push_work(WT_SESSION_IMPL *session, WT_TIERED_WORK_UNIT *entry) { WT_CONNECTION_IMPL *conn; - uint32_t new_state, old_state; conn = S2C(session); - __wt_spin_lock(session, &conn->tiered_lock); TAILQ_INSERT_TAIL(&conn->tieredqh, entry, q); WT_STAT_CONN_INCR(session, tiered_work_units_created); __wt_spin_unlock(session, &conn->tiered_lock); - for (;;) { - WT_BARRIER(); - old_state = conn->flush_state; - new_state = old_state + 1; - if (__wt_atomic_casv32(&conn->flush_state, old_state, new_state)) - break; - WT_STAT_CONN_INCR(session, flush_state_races); - __wt_yield(); - } + __tiered_flush_state(session, entry->type, true); __wt_cond_signal(session, conn->tiered_cond); return; } @@ -87,10 +86,10 @@ __wt_tiered_pop_work( if (FLD_ISSET(type, entry->type) && (maxval == 0 || entry->op_val < maxval)) { TAILQ_REMOVE(&conn->tieredqh, entry, q); WT_STAT_CONN_INCR(session, tiered_work_units_dequeued); + *entryp = entry; break; } } - *entryp = entry; __wt_spin_unlock(session, &conn->tiered_lock); return; } diff --git a/src/third_party/wiredtiger/test/csuite/tiered_abort/main.c b/src/third_party/wiredtiger/test/csuite/tiered_abort/main.c index c29460b8f46..8111cd108ac 100644 --- a/src/third_party/wiredtiger/test/csuite/tiered_abort/main.c +++ b/src/third_party/wiredtiger/test/csuite/tiered_abort/main.c @@ -55,15 +55,17 @@ static char home[1024]; /* Program working dir */ * Also each worker thread creates its own textual records file that records the data it * inserted and it records the timestamp that was used for that insertion. */ +#define LOCAL_RETENTION 2 /* Local retention time */ +#define MIN_TIME LOCAL_RETENTION * 8 /* Make sure checkpoint and flush_tier run enough */ +#define MAX_TIME MIN_TIME * 4 + #define BUCKET "bucket" #define INVALID_KEY UINT64_MAX -#define MAX_CKPT_INVL 5 /* Maximum interval between checkpoints */ -#define MAX_FLUSH_INVL 5 /* Maximum interval between flush_tier calls */ -#define MAX_TH 20 /* Maximum configurable threads */ -#define MAX_TIME 40 +#define MAX_CKPT_INVL LOCAL_RETENTION * 3 /* Maximum interval between checkpoints */ +#define MAX_FLUSH_INVL LOCAL_RETENTION * 2 /* Maximum interval between flush_tier calls */ +#define MAX_TH 20 /* Maximum configurable threads */ #define MAX_VAL 1024 #define MIN_TH 5 -#define MIN_TIME 10 #define NUM_INT_THREADS 3 #define RECORDS_FILE "records-%" PRIu32 /* Include worker threads and extra sessions */ @@ -96,7 +98,7 @@ static uint32_t flush_calls = 1; "eviction_updates_target=20,eviction_updates_trigger=90," \ "log=(archive=true,file_max=10M,enabled),session_max=%d," \ "statistics=(fast),statistics_log=(wait=1,json=true)," \ - "tiered_storage=(bucket=%s,bucket_prefix=pfx,name=local_store)" + "tiered_storage=(bucket=%s,bucket_prefix=pfx,local_retention=%d,name=local_store)" #define ENV_CONFIG_TXNSYNC \ ENV_CONFIG_DEF \ ",eviction_dirty_target=20,eviction_dirty_trigger=90" \ @@ -440,8 +442,8 @@ run_workload(uint32_t nth, const char *build_dir) if (chdir(home) != 0) testutil_die(errno, "Child chdir: %s", home); - testutil_check( - __wt_snprintf(envconf, sizeof(envconf), ENV_CONFIG_TXNSYNC, cache_mb, SESSION_MAX, BUCKET)); + testutil_check(__wt_snprintf(envconf, sizeof(envconf), ENV_CONFIG_TXNSYNC, cache_mb, + SESSION_MAX, BUCKET, LOCAL_RETENTION)); testutil_check(__wt_snprintf(extconf, sizeof(extconf), ",extensions=(%s/%s=(early_load=true))", build_dir, WT_STORAGE_LIB)); @@ -490,7 +492,6 @@ run_workload(uint32_t nth, const char *build_dir) testutil_check(__wt_thread_create(NULL, &thr[ts_id], thread_ts_run, &td[ts_id])); } printf("Create %" PRIu32 " writer threads\n", nth); - printf("Create %" PRIu32 " writer threads\n", nth); for (i = 0; i < nth; ++i) { td[i].conn = conn; td[i].start = WT_BILLION * (uint64_t)i; diff --git a/src/third_party/wiredtiger/test/suite/test_tiered02.py b/src/third_party/wiredtiger/test/suite/test_tiered02.py index bed8e57ef54..2041bcdf767 100755 --- a/src/third_party/wiredtiger/test/suite/test_tiered02.py +++ b/src/third_party/wiredtiger/test/suite/test_tiered02.py @@ -83,9 +83,7 @@ class test_tiered02(wttest.WiredTigerTestCase): self.assertEqual(len(got), self.flushed_objects) self.flushed_objects = len(got) - # Test tiered storage with the old prototype way of signaling flushing to the shared - # tier via checkpoints. When flush_tier is working, the checkpoint calls can be - # replaced with flush_tier. + # Test tiered storage with checkpoints and flush_tier calls. def test_tiered(self): self.flushed_objects = 0 args = 'key_format=S' @@ -110,7 +108,7 @@ class test_tiered02(wttest.WiredTigerTestCase): self.close_conn() self.progress('reopen_conn') self.reopen_conn() - # Check what was there before + # Check what was there before. ds = SimpleDataSet(self, self.uri, 10, config=args) ds.check() @@ -152,7 +150,7 @@ class test_tiered02(wttest.WiredTigerTestCase): self.progress('reopen_conn') self.reopen_conn() - # Check what was there before + # Check what was there before. ds = SimpleDataSet(self, self.uri, 200, config=args) ds.check() diff --git a/src/third_party/wiredtiger/test/suite/test_tiered04.py b/src/third_party/wiredtiger/test/suite/test_tiered04.py index efb5630eda6..05fbbc44a5e 100755 --- a/src/third_party/wiredtiger/test/suite/test_tiered04.py +++ b/src/third_party/wiredtiger/test/suite/test_tiered04.py @@ -26,7 +26,7 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. -import os, wiredtiger, wttest +import os, time, wiredtiger, wttest from wiredtiger import stat StorageSource = wiredtiger.StorageSource # easy access to constants @@ -35,8 +35,11 @@ StorageSource = wiredtiger.StorageSource # easy access to constants class test_tiered04(wttest.WiredTigerTestCase): # If the 'uri' changes all the other names must change with it. - fileuri_base = 'file:test_tiered04-000000000' - objuri = 'object:test_tiered04-0000000001.wtobj' + base = 'test_tiered04-000000000' + fileuri_base = 'file:' + base + obj1file = base + '1.wtobj' + obj2file = base + '2.wtobj' + objuri = 'object:' + base + '1.wtobj' tiereduri = "tiered:test_tiered04" uri = "table:test_tiered04" @@ -53,8 +56,8 @@ class test_tiered04(wttest.WiredTigerTestCase): object_sys_val = 9 * 1024 * 1024 object_uri = "15M" object_uri_val = 15 * 1024 * 1024 - retention = 600 - retention1 = 350 + retention = 3 + retention1 = 600 def conn_config(self): os.mkdir(self.bucket) os.mkdir(self.bucket1) @@ -120,19 +123,50 @@ class test_tiered04(wttest.WiredTigerTestCase): self.pr("flush tier") c = self.session.open_cursor(self.uri) + c1 = self.session.open_cursor(self.uri1) + cn = self.session.open_cursor(self.uri_none) c["0"] = "0" + c1["0"] = "0" + cn["0"] = "0" self.check(c, 1) + self.check(c1, 1) + self.check(cn, 1) c.close() + + # Check the local retention. After a flush_tier call the object file should exist in + # the local database. Then after sleeping long enough it should be removed. + self.session.checkpoint() + self.session.flush_tier(None) + self.pr("Check for ") + self.pr(self.obj1file) + self.assertTrue(os.path.exists(self.obj1file)) + self.assertTrue(os.path.exists(self.obj2file)) + self.pr("Sleep") + time.sleep(self.retention + 1) + # We call flush_tier here because otherwise the internal thread that + # processes the work units won't run for a while. This call will signal + # the internal thread to process the work units. self.session.flush_tier(None) + time.sleep(1) + self.pr("Check removal of ") + self.pr(self.obj1file) + self.assertFalse(os.path.exists(self.obj1file)) c = self.session.open_cursor(self.uri) c["1"] = "1" + c1["1"] = "1" + cn["1"] = "1" self.check(c, 2) c.close() c = self.session.open_cursor(self.uri) c["2"] = "2" + c1["2"] = "2" + cn["2"] = "2" self.check(c, 3) + c1.close() + cn.close() + self.session.checkpoint() self.pr("flush tier again, holding open cursor") self.session.flush_tier(None) @@ -142,7 +176,7 @@ class test_tiered04(wttest.WiredTigerTestCase): c.close() calls = self.get_stat(stat.conn.flush_tier, None) - flush = 2 + flush = 3 self.assertEqual(calls, flush) obj = self.get_stat(stat.conn.tiered_object_size, None) self.assertEqual(obj, self.object_sys_val) @@ -174,26 +208,27 @@ class test_tiered04(wttest.WiredTigerTestCase): self.assertEqual(retain, self.retention) self.session.flush_tier(None) self.session.flush_tier('force=true') + flush += 2 calls = self.get_stat(stat.conn.flush_tier, None) - self.assertEqual(calls, 4) + self.assertEqual(calls, flush) # Test reconfiguration. - new = self.retention * 2 - config = 'tiered_storage=(local_retention=%d)' % new + config = 'tiered_storage=(local_retention=%d)' % self.retention1 self.pr("reconfigure") self.conn.reconfigure(config) retain = self.get_stat(stat.conn.tiered_retention, None) - self.assertEqual(retain, new) - self.pr("reconfigure flush_tier") + self.assertEqual(retain, self.retention1) + # Call flush_tier with its various configuration arguments. It is difficult # to force a timeout or lock contention with a unit test. So just test the # call for now. self.session.flush_tier('timeout=10') self.session.flush_tier('lock_wait=false') self.session.flush_tier('sync=off') + flush += 3 self.pr("reconfigure get stat") calls = self.get_stat(stat.conn.flush_tier, None) - self.assertEqual(calls, 7) + self.assertEqual(calls, flush) if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_tiered06.py b/src/third_party/wiredtiger/test/suite/test_tiered06.py index 614619fdef2..f129bafd0c0 100755 --- a/src/third_party/wiredtiger/test/suite/test_tiered06.py +++ b/src/third_party/wiredtiger/test/suite/test_tiered06.py @@ -81,7 +81,7 @@ class test_tiered06(wttest.WiredTigerTestCase): # Nothing is in the directory list until a flush. self.assertEquals(fs.fs_directory_list(session, '', ''), []) - # Flushing moves the file into the file system + # Flushing copies the file into the file system. local.ss_flush(session, fs, 'foobar', 'foobar', None) local.ss_flush_finish(session, fs, 'foobar', 'foobar', None) @@ -91,7 +91,7 @@ class test_tiered06(wttest.WiredTigerTestCase): fh = fs.fs_open_file(session, 'foobar', FileSystem.open_file_type_data, FileSystem.open_readonly) inbytes = bytes(1000000) # An empty buffer with a million zero bytes. - fh.fh_read(session, 0, inbytes) # read into the buffer + fh.fh_read(session, 0, inbytes) # Read into the buffer. self.assertEquals(outbytes[0:1000000], inbytes) self.assertEquals(fs.fs_size(session, 'foobar'), len(outbytes)) self.assertEquals(fh.fh_size(session), len(outbytes)) @@ -136,29 +136,29 @@ class test_tiered06(wttest.WiredTigerTestCase): block_size = 4096 f = open('abc', 'wb') - # blocks filled with 'a', etc. + # Create some blocks filled with 'a', etc. a_block = ('a' * block_size).encode() b_block = ('b' * block_size).encode() c_block = ('c' * block_size).encode() file_size = nblocks * block_size - # write all blocks as 'a', but in reverse order + # Write all blocks as 'a', but in reverse order. for pos in range(file_size - block_size, 0, -block_size): f.seek(pos) f.write(a_block) - # write the even blocks as 'b', forwards + # Write the even blocks as 'b', forwards. for pos in range(0, file_size, block_size * 2): f.seek(pos) f.write(b_block) - # write every third block as 'c', backwards + # Write every third block as 'c', backwards. for pos in range(file_size - block_size, 0, -block_size * 3): f.seek(pos) f.write(c_block) f.close() - # Flushing moves the file into the file system + # Flushing copies the file into the file system. local.ss_flush(session, fs, 'abc', 'abc', None) local.ss_flush_finish(session, fs, 'abc', 'abc', None) @@ -172,7 +172,7 @@ class test_tiered06(wttest.WiredTigerTestCase): in_block = bytes(block_size) fh = fs.fs_open_file(session, 'abc', FileSystem.open_file_type_data, FileSystem.open_readonly) - # Do some spot checks, reading non-sequentially + # Do some spot checks, reading non-sequentially. fh.fh_read(session, 500 * block_size, in_block) # divisible by 2, not 3 self.assertEquals(in_block, b_block) fh.fh_read(session, 333 * block_size, in_block) # divisible by 3, not 2 @@ -208,7 +208,7 @@ class test_tiered06(wttest.WiredTigerTestCase): cachedir1 = "./cache1" cachedir2 = "./cache2" - # Add a suffix to each in a list + # Add a suffix to each in a list. def suffix(self, lst, sfx): return [x + '.' + sfx for x in lst] @@ -221,7 +221,7 @@ class test_tiered06(wttest.WiredTigerTestCase): # Check for data files in the WiredTiger home directory. def check_home(self, expect): - # Get list of all .wt files in home, prune out the WiredTiger produced ones + # Get list of all .wt files in home, prune out the WiredTiger produced ones. got = sorted(list(os.listdir(self.home))) got = [x for x in got if not x.startswith('WiredTiger') and x.endswith('.wt')] expect = sorted(self.suffix(expect, 'wt')) @@ -229,7 +229,7 @@ class test_tiered06(wttest.WiredTigerTestCase): # Check that objects are "in the cloud" after a flush. # Using the local storage module, they are actually going to be in either - # objectdir1 or objectdir2 + # objectdir1 or objectdir2. def check_objects(self, expect1, expect2): got = sorted(list(os.listdir(self.objectdir1))) expect = sorted(self.suffix(expect1, 'wtobj')) @@ -252,7 +252,7 @@ class test_tiered06(wttest.WiredTigerTestCase): f.write('hello') def test_local_file_systems(self): - # Test using various buckets, hosts + # Test using various buckets, hosts. session = self.session local = self.conn.get_storage_source('local_store') @@ -310,12 +310,12 @@ class test_tiered06(wttest.WiredTigerTestCase): self.check_caches([], []) self.check_objects(['beagle'], []) - # Bad file to flush + # Bad file to flush. errmsg = '/No such file/' self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: local.ss_flush(session, fs1, 'bad.wt', 'bad.wtobj'), errmsg) - # It's okay to flush again, nothing changes + # It's okay to flush again, nothing changes. local.ss_flush(session, fs1, 'beagle.wt', 'beagle.wtobj') self.check_home(['beagle', 'bird', 'bison', 'bat', 'cat', 'cougar', 'coyote', 'cub']) self.check_dirlist(fs1, '', ['beagle']) @@ -323,15 +323,15 @@ class test_tiered06(wttest.WiredTigerTestCase): self.check_caches([], []) self.check_objects(['beagle'], []) - # When we flush_finish, the local file will move to the cache directory + # When we flush_finish, the local file will be in both the local and cache directory. local.ss_flush_finish(session, fs1, 'beagle.wt', 'beagle.wtobj') - self.check_home(['bird', 'bison', 'bat', 'cat', 'cougar', 'coyote', 'cub']) + self.check_home(['beagle', 'bird', 'bison', 'bat', 'cat', 'cougar', 'coyote', 'cub']) self.check_dirlist(fs1, '', ['beagle']) self.check_dirlist(fs2, '', []) self.check_caches(['beagle'], []) self.check_objects(['beagle'], []) - # Do a some more in each file ssytem + # Do a some more in each file system. local.ss_flush(session, fs1, 'bison.wt', 'bison.wtobj') local.ss_flush(session, fs2, 'cat.wt', 'cat.wtobj') local.ss_flush(session, fs1, 'bat.wt', 'bat.wtobj') @@ -339,13 +339,13 @@ class test_tiered06(wttest.WiredTigerTestCase): local.ss_flush(session, fs2, 'cub.wt', 'cub.wtobj') local.ss_flush_finish(session, fs1, 'bat.wt', 'bat.wtobj') - self.check_home(['bird', 'bison', 'cougar', 'coyote', 'cub']) + self.check_home(['beagle', 'bird', 'bison', 'bat', 'cat', 'cougar', 'coyote', 'cub']) self.check_dirlist(fs1, '', ['beagle', 'bat', 'bison']) self.check_dirlist(fs2, '', ['cat', 'cub']) self.check_caches(['beagle', 'bat'], ['cat']) self.check_objects(['beagle', 'bat', 'bison'], ['cat', 'cub']) - # Test directory listing prefixes + # Test directory listing prefixes. self.check_dirlist(fs1, '', ['beagle', 'bat', 'bison']) self.check_dirlist(fs1, 'ba', ['bat']) self.check_dirlist(fs1, 'be', ['beagle']) |