summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenety Goh <benety@mongodb.com>2022-08-29 07:28:19 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-08-31 13:01:30 +0000
commita29518c420c5167178438b47634c118f6df2c114 (patch)
tree2071c8c115f00e7befa6d3bcde3a648d48b5a855
parentc72e1a93ceb4dd69d2f11f110b2976355620220f (diff)
downloadmongo-a29518c420c5167178438b47634c118f6df2c114.tar.gz
SERVER-68477 TTLMonitor fixes NaN expireAfterSeconds on TTL indexes during step up
(cherry picked from commit b8eb75f99a18a8651e2315998cf16f53e48917bb)
-rw-r--r--jstests/noPassthrough/ttl_expire_nan.js24
-rw-r--r--src/mongo/db/SConscript1
-rw-r--r--src/mongo/db/ttl.cpp78
3 files changed, 102 insertions, 1 deletions
diff --git a/jstests/noPassthrough/ttl_expire_nan.js b/jstests/noPassthrough/ttl_expire_nan.js
index 6b8ff73d015..6ab095192c4 100644
--- a/jstests/noPassthrough/ttl_expire_nan.js
+++ b/jstests/noPassthrough/ttl_expire_nan.js
@@ -65,5 +65,29 @@ assert.gt(newNodeSpec.expireAfterSeconds,
// during the NaN 'expireAfterSeconds' conversion.
checkLog.containsJson(primary, 6835900, {namespace: coll.getFullName()});
+// Confirm that a node with an existing TTL index with NaN 'expireAfterSeconds' will convert the
+// duration on the TTL index from NaN to a large positive value when it becomes the primary node.
+// When stepping down the primary, we use 'force' because there's no other electable node.
+// Subsequently, we wait for the stepped down node to become primary again.
+// To confirm that the TTL index has been fixed, we check the oplog for a collMod operation on the
+// TTL index that changes the `expireAfterSeconds` field from NaN to a large positive value.
+assert.commandWorked(primary.adminCommand({replSetStepDown: 5, force: true}));
+primary = rst.waitForPrimary();
+const collModOplogEntries =
+ rst.findOplog(primary,
+ {
+ op: 'c',
+ ns: coll.getDB().getCollection('$cmd').getFullName(),
+ 'o.collMod': coll.getName(),
+ 'o.index.name': 't_1',
+ 'o.index.expireAfterSeconds': newNodeSpec.expireAfterSeconds
+ },
+ /*limit=*/1)
+ .toArray();
+assert.eq(collModOplogEntries.length,
+ 1,
+ 'TTL index with NaN expireAfterSeconds was not fixed using collMod during step-up: ' +
+ tojson(rst.findOplog(primary, {op: {$ne: 'n'}}, /*limit=*/10).toArray()));
+
rst.stopSet();
})();
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript
index b28543b40f9..b0f6854468c 100644
--- a/src/mongo/db/SConscript
+++ b/src/mongo/db/SConscript
@@ -1305,6 +1305,7 @@ env.Library(
'$BUILD_DIR/mongo/db/repl/tenant_migration_access_blocker',
'$BUILD_DIR/mongo/db/s/sharding_runtime_d',
'$BUILD_DIR/mongo/idl/server_parameter',
+ 'catalog/catalog_helpers',
'catalog/database_holder',
'commands/server_status_core',
'service_context',
diff --git a/src/mongo/db/ttl.cpp b/src/mongo/db/ttl.cpp
index 8da9e1ee46a..ee94a03d228 100644
--- a/src/mongo/db/ttl.cpp
+++ b/src/mongo/db/ttl.cpp
@@ -32,10 +32,12 @@
#include "mongo/db/auth/authorization_session.h"
#include "mongo/db/auth/user_name.h"
+#include "mongo/db/catalog/coll_mod.h"
#include "mongo/db/catalog/collection.h"
#include "mongo/db/catalog/collection_catalog.h"
#include "mongo/db/catalog/database_holder.h"
#include "mongo/db/catalog/index_catalog.h"
+#include "mongo/db/catalog/index_key_validate.h"
#include "mongo/db/client.h"
#include "mongo/db/commands/fsync_locked.h"
#include "mongo/db/commands/server_status_metric.h"
@@ -687,7 +689,81 @@ void shutdownTTLMonitor(ServiceContext* serviceContext) {
}
}
-void TTLMonitor::onStepUp(OperationContext* opCtx) {}
+void TTLMonitor::onStepUp(OperationContext* opCtx) {
+ auto&& ttlCollectionCache = TTLCollectionCache::get(opCtx->getServiceContext());
+ auto ttlInfos = ttlCollectionCache.getTTLInfos();
+ for (const auto& [uuid, infos] : ttlInfos) {
+ auto collectionCatalog = CollectionCatalog::get(opCtx);
+ if (collectionCatalog->isCollectionAwaitingVisibility(uuid)) {
+ continue;
+ }
+
+ // The collection was dropped.
+ auto nss = collectionCatalog->lookupNSSByUUID(opCtx, uuid);
+ if (!nss) {
+ continue;
+ }
+
+ if (nss->isTemporaryReshardingCollection() || nss->isDropPendingNamespace()) {
+ continue;
+ }
+
+ try {
+ uassertStatusOK(userAllowedWriteNS(opCtx, *nss));
+
+ for (const auto& info : infos) {
+ // Skip clustered indexes with TTL. This includes time-series collections.
+ if (info.isClustered()) {
+ continue;
+ }
+ if (!info.isExpireAfterSecondsNaN()) {
+ continue;
+ }
+
+ auto indexName = info.getIndexName();
+ LOGV2(6847700,
+ "Running collMod to fix TTL index with NaN 'expireAfterSeconds'.",
+ "ns"_attr = *nss,
+ "uuid"_attr = uuid,
+ "name"_attr = indexName,
+ "expireAfterSecondsNew"_attr =
+ index_key_validate::kExpireAfterSecondsForInactiveTTLIndex);
+
+ // Compose collMod command to amend 'expireAfterSeconds' to same value that
+ // would be used by listIndexes() to convert the NaN value in the catalog.
+ CollModIndex collModIndex;
+ collModIndex.setName(StringData{indexName});
+ collModIndex.setExpireAfterSeconds(mongo::durationCount<Seconds>(
+ index_key_validate::kExpireAfterSecondsForInactiveTTLIndex));
+ CollMod collModCmd{*nss};
+ collModCmd.getCollModRequest().setIndex(collModIndex);
+
+ // processCollModCommand() will acquire MODE_X access to the collection.
+ BSONObjBuilder builder;
+ uassertStatusOK(
+ processCollModCommand(opCtx, {nss->db(), uuid}, collModCmd, &builder));
+ auto result = builder.obj();
+ LOGV2(6847701,
+ "Successfully fixed TTL index with NaN 'expireAfterSeconds' using collMod",
+ "ns"_attr = *nss,
+ "uuid"_attr = uuid,
+ "name"_attr = indexName,
+ "result"_attr = result);
+ }
+ } catch (const ExceptionForCat<ErrorCategory::Interruption>&) {
+ // The exception is relevant to the entire TTL monitoring process, not just the specific
+ // TTL index. Let the exception escape so it can be addressed at the higher monitoring
+ // layer.
+ throw;
+ } catch (const DBException& ex) {
+ LOGV2_ERROR(6835901,
+ "Error checking TTL job on collection during step up",
+ logAttrs(*nss),
+ "error"_attr = ex);
+ continue;
+ }
+ }
+}
long long TTLMonitor::getTTLPasses_forTest() {
return ttlPasses.get();