diff options
author | Benety Goh <benety@mongodb.com> | 2022-08-29 07:28:19 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-10-12 13:38:54 +0000 |
commit | 615ffd680b4ca995b7e8185900caf0bda8114e4f (patch) | |
tree | bd4755dcdc7eeefc8e1f95bbec4eaad464b2a784 | |
parent | 6e10f7705e4eff4738291af5a5fc2aa58bca9d5c (diff) | |
download | mongo-615ffd680b4ca995b7e8185900caf0bda8114e4f.tar.gz |
SERVER-68477 TTLMonitor fixes NaN expireAfterSeconds on TTL indexes during step up
(cherry picked from commit b8eb75f99a18a8651e2315998cf16f53e48917bb)
(cherry picked from commit aab9685c531bed030dc8bba914d695d7c66fc08b)
-rw-r--r-- | jstests/noPassthrough/ttl_expire_nan.js | 24 | ||||
-rw-r--r-- | src/mongo/db/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/ttl.cpp | 78 |
3 files changed, 102 insertions, 1 deletions
diff --git a/jstests/noPassthrough/ttl_expire_nan.js b/jstests/noPassthrough/ttl_expire_nan.js index 6b8ff73d015..6ab095192c4 100644 --- a/jstests/noPassthrough/ttl_expire_nan.js +++ b/jstests/noPassthrough/ttl_expire_nan.js @@ -65,5 +65,29 @@ assert.gt(newNodeSpec.expireAfterSeconds, // during the NaN 'expireAfterSeconds' conversion. checkLog.containsJson(primary, 6835900, {namespace: coll.getFullName()}); +// Confirm that a node with an existing TTL index with NaN 'expireAfterSeconds' will convert the +// duration on the TTL index from NaN to a large positive value when it becomes the primary node. +// When stepping down the primary, we use 'force' because there's no other electable node. +// Subsequently, we wait for the stepped down node to become primary again. +// To confirm that the TTL index has been fixed, we check the oplog for a collMod operation on the +// TTL index that changes the `expireAfterSeconds` field from NaN to a large positive value. +assert.commandWorked(primary.adminCommand({replSetStepDown: 5, force: true})); +primary = rst.waitForPrimary(); +const collModOplogEntries = + rst.findOplog(primary, + { + op: 'c', + ns: coll.getDB().getCollection('$cmd').getFullName(), + 'o.collMod': coll.getName(), + 'o.index.name': 't_1', + 'o.index.expireAfterSeconds': newNodeSpec.expireAfterSeconds + }, + /*limit=*/1) + .toArray(); +assert.eq(collModOplogEntries.length, + 1, + 'TTL index with NaN expireAfterSeconds was not fixed using collMod during step-up: ' + + tojson(rst.findOplog(primary, {op: {$ne: 'n'}}, /*limit=*/10).toArray())); + rst.stopSet(); })(); diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index b56f2898980..41cc49d52c5 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -1147,6 +1147,7 @@ env.Library( '$BUILD_DIR/mongo/db/repl/replica_set_aware_service', '$BUILD_DIR/mongo/db/repl/tenant_migration_access_blocker', '$BUILD_DIR/mongo/idl/server_parameter', + 'catalog/catalog_helpers', 'catalog/database_holder', 'commands/server_status_core', 'service_context', diff --git a/src/mongo/db/ttl.cpp b/src/mongo/db/ttl.cpp index 7e02d6816d0..2aec6e2bee0 100644 --- a/src/mongo/db/ttl.cpp +++ b/src/mongo/db/ttl.cpp @@ -36,10 +36,13 @@ #include "mongo/base/counter.h" #include "mongo/db/auth/authorization_session.h" #include "mongo/db/auth/user_name.h" +#include "mongo/db/catalog/coll_mod.h" #include "mongo/db/catalog/collection.h" #include "mongo/db/catalog/database_holder.h" #include "mongo/db/catalog/index_catalog.h" +#include "mongo/db/catalog/index_key_validate.h" #include "mongo/db/client.h" +#include "mongo/db/coll_mod_gen.h" #include "mongo/db/commands/fsync_locked.h" #include "mongo/db/commands/server_status_metric.h" #include "mongo/db/concurrency/write_conflict_exception.h" @@ -544,7 +547,80 @@ void shutdownTTLMonitor(ServiceContext* serviceContext) { } } -void TTLMonitor::onStepUp(OperationContext* opCtx) {} +void TTLMonitor::onStepUp(OperationContext* opCtx) { + auto&& ttlCollectionCache = TTLCollectionCache::get(opCtx->getServiceContext()); + auto ttlInfos = ttlCollectionCache.getTTLInfos(); + for (const auto& [uuid, infos] : ttlInfos) { + auto collectionCatalog = CollectionCatalog::get(opCtx); + if (collectionCatalog->isCollectionAwaitingVisibility(uuid)) { + continue; + } + + // The collection was dropped. + auto nss = collectionCatalog->lookupNSSByUUID(opCtx, uuid); + if (!nss) { + continue; + } + + if (nss->isTemporaryReshardingCollection() || nss->isDropPendingNamespace()) { + continue; + } + + try { + uassertStatusOK(userAllowedWriteNS(opCtx, *nss)); + + for (const auto& info : infos) { + // Skip clustered indexes with TTL. This includes time-series collections. + if (info.isClustered()) { + continue; + } + if (!info.isExpireAfterSecondsNaN()) { + continue; + } + + auto indexName = info.getIndexName(); + LOGV2(6847700, + "Running collMod to fix TTL index with NaN 'expireAfterSeconds'.", + "ns"_attr = *nss, + "uuid"_attr = uuid, + "name"_attr = indexName, + "expireAfterSecondsNew"_attr = + index_key_validate::kExpireAfterSecondsForInactiveTTLIndex); + + // Compose collMod command to amend 'expireAfterSeconds' to same value that + // would be used by listIndexes() to convert the NaN value in the catalog. + CollModIndex collModIndex; + collModIndex.setName(StringData{indexName}); + collModIndex.setExpireAfterSeconds(mongo::durationCount<Seconds>( + index_key_validate::kExpireAfterSecondsForInactiveTTLIndex)); + CollMod collModCmd{*nss}; + collModCmd.setIndex(collModIndex); + + // processCollModCommand() will acquire MODE_X access to the collection. + BSONObjBuilder builder; + uassertStatusOK(collMod(opCtx, *nss, collModCmd.toBSON({}), &builder)); + auto result = builder.obj(); + LOGV2(6847701, + "Successfully fixed TTL index with NaN 'expireAfterSeconds' using collMod", + "ns"_attr = *nss, + "uuid"_attr = uuid, + "name"_attr = indexName, + "result"_attr = result); + } + } catch (const ExceptionForCat<ErrorCategory::Interruption>&) { + // The exception is relevant to the entire TTL monitoring process, not just the specific + // TTL index. Let the exception escape so it can be addressed at the higher monitoring + // layer. + throw; + } catch (const DBException& ex) { + LOGV2_ERROR(6835901, + "Error checking TTL job on collection during step up", + logAttrs(*nss), + "error"_attr = ex); + continue; + } + } +} namespace { |