From 35c86a14e2ad2ef8d14fe7ea3ea02951f30e646a Mon Sep 17 00:00:00 2001 From: Cheahuychou Mao Date: Sun, 17 May 2020 17:58:09 -0400 Subject: SERVER-40441 Make arbiters not try to setup the sessions collection or check if it exists in LogicalSessionCache refresh/reap thread --- .../replsets/sessions_collection_auto_healing.js | 36 ++++++++++- jstests/replsets/sessions_collection_reaping.js | 75 ++++++++++++++++++++++ src/mongo/db/logical_session_cache_impl.cpp | 53 +++++++++------ 3 files changed, 142 insertions(+), 22 deletions(-) create mode 100644 jstests/replsets/sessions_collection_reaping.js diff --git a/jstests/replsets/sessions_collection_auto_healing.js b/jstests/replsets/sessions_collection_auto_healing.js index 28f3dc51bba..f8240fc58d6 100644 --- a/jstests/replsets/sessions_collection_auto_healing.js +++ b/jstests/replsets/sessions_collection_auto_healing.js @@ -9,7 +9,11 @@ TestData.disableImplicitSessions = true; var replTest = new ReplSetTest({ name: 'refresh', - nodes: [{rsConfig: {votes: 1, priority: 1}}, {rsConfig: {votes: 0, priority: 0}}] + nodes: [ + {/* primary */}, + {/* secondary */ rsConfig: {priority: 0}}, + {/* arbiter */ rsConfig: {arbiterOnly: true}} + ] }); var nodes = replTest.startSet(); @@ -21,6 +25,11 @@ replTest.awaitSecondaryNodes(); var secondary = replTest.getSecondary(); var secondaryAdmin = secondary.getDB("admin"); +let arbiter = replTest.getArbiter(); + +const refreshErrorMsgRegex = + new RegExp("Failed to refresh session cache, will try again at the next refresh interval"); + // Get the current value of the TTL index so that we can verify it's being properly applied. let res = assert.commandWorked( primary.adminCommand({getParameter: 1, localLogicalSessionTimeoutMinutes: 1})); @@ -64,6 +73,22 @@ let timeoutMinutes = res.localLogicalSessionTimeoutMinutes; replTest.awaitReplication(); validateSessionsCollection(secondary, false, false, timeoutMinutes); } + +// Test that a refresh on an arbiter does not create the sessions collection. +{ + validateSessionsCollection(primary, false, false, timeoutMinutes); + + assert.commandWorked(arbiter.adminCommand({clearLog: 'global'})); + assert.commandWorked(arbiter.adminCommand({refreshLogicalSessionCacheNow: 1})); + + validateSessionsCollection(primary, false, false, timeoutMinutes); + + if (!jsTest.options().useRandomBinVersionsWithinReplicaSet) { + // Verify that the arbiter did not try to set up the session collection or refresh. + assert.eq(false, checkLog.checkContainsOnce(arbiter, refreshErrorMsgRegex)); + } +} + // Test that a refresh on the primary creates the sessions collection. { validateSessionsCollection(primary, false, false, timeoutMinutes); @@ -87,6 +112,15 @@ let timeoutMinutes = res.localLogicalSessionTimeoutMinutes; validateSessionsCollection(primary, true, false, timeoutMinutes); } +// Test that a refresh on an arbiter will not create the TTL index on the sessions collection. +{ + validateSessionsCollection(primary, true, false, timeoutMinutes); + + assert.commandWorked(arbiter.adminCommand({refreshLogicalSessionCacheNow: 1})); + + validateSessionsCollection(primary, true, false, timeoutMinutes); +} + // Test that a refresh on the primary will create the TTL index on the sessions collection. { validateSessionsCollection(primary, true, false, timeoutMinutes); diff --git a/jstests/replsets/sessions_collection_reaping.js b/jstests/replsets/sessions_collection_reaping.js new file mode 100644 index 00000000000..ec6e60bbf40 --- /dev/null +++ b/jstests/replsets/sessions_collection_reaping.js @@ -0,0 +1,75 @@ +(function() { +"use strict"; + +// This test makes assertions about the number of sessions, which are not compatible with +// implicit sessions. +TestData.disableImplicitSessions = true; + +let replTest = new ReplSetTest({ + name: 'reaping', + nodes: [ + {/* primary */}, + {/* secondary */ rsConfig: {priority: 0}}, + {/* arbiter */ rsConfig: {arbiterOnly: true}} + ], + nodeOptions: {setParameter: {TransactionRecordMinimumLifetimeMinutes: 0}} +}); +let nodes = replTest.startSet(); + +replTest.initiate(); +let primary = replTest.getPrimary(); +let sessionsCollOnPrimary = primary.getDB("config").system.sessions; +let transactionsCollOnPrimary = primary.getDB("config").transactions; + +replTest.awaitSecondaryNodes(); +let secondary = replTest.getSecondary(); +let arbiter = replTest.getArbiter(); + +const dbName = jsTestName(); +const collName = "test"; +const reapErrorMsgRegex = + new RegExp("Sessions collection is not set up.*waiting until next sessions reap interval"); + +// Set up a session with a retryable write. +let session = primary.startSession({retryWrites: 1}); +assert.commandWorked(session.getDatabase(dbName)[collName].save({x: 1})); +assert.commandWorked(primary.adminCommand({refreshLogicalSessionCacheNow: 1})); +assert.eq(1, sessionsCollOnPrimary.count()); +assert.eq(1, transactionsCollOnPrimary.count()); + +// Remove the session doc so the session gets reaped when reapLogicalSessionCacheNow is run. +assert.commandWorked(sessionsCollOnPrimary.remove({})); + +// Test that a reap on a secondary does not lead to the on-disk state reaping of the session +// since the session does not exist in the secondary's session catalog. +{ + assert.commandWorked(secondary.adminCommand({clearLog: 'global'})); + assert.commandWorked(secondary.adminCommand({reapLogicalSessionCacheNow: 1})); + + assert.eq(1, transactionsCollOnPrimary.count()); + assert.eq(false, checkLog.checkContainsOnce(secondary, reapErrorMsgRegex)); +} + +// Test that a reap on an arbiter does not lead to reaping of the session. +{ + assert.commandWorked(arbiter.adminCommand({clearLog: 'global'})); + assert.commandWorked(arbiter.adminCommand({reapLogicalSessionCacheNow: 1})); + + assert.eq(1, transactionsCollOnPrimary.count()); + + if (!jsTest.options().useRandomBinVersionsWithinReplicaSet) { + // Verify that the arbiter did not try to reap the session. + assert.eq(false, checkLog.checkContainsOnce(arbiter, reapErrorMsgRegex)); + } +} + +// Test that a reap on the primary works as expected. +{ + assert.commandWorked(primary.adminCommand({clearLog: 'global'})); + assert.commandWorked(primary.adminCommand({reapLogicalSessionCacheNow: 1})); + + assert.eq(0, transactionsCollOnPrimary.count()); +} + +replTest.stopSet(); +})(); diff --git a/src/mongo/db/logical_session_cache_impl.cpp b/src/mongo/db/logical_session_cache_impl.cpp index 524f910ec1d..1a0e2f0d13e 100644 --- a/src/mongo/db/logical_session_cache_impl.cpp +++ b/src/mongo/db/logical_session_cache_impl.cpp @@ -36,6 +36,7 @@ #include "mongo/db/logical_session_id.h" #include "mongo/db/logical_session_id_helpers.h" #include "mongo/db/operation_context.h" +#include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/s/operation_sharding_state.h" #include "mongo/db/service_context.h" #include "mongo/logv2/log.h" @@ -146,6 +147,21 @@ void LogicalSessionCacheImpl::_periodicReap(Client* client) { } Status LogicalSessionCacheImpl::_reap(Client* client) { + boost::optional uniqueCtx; + auto* const opCtx = [&] { + if (client->getOperationContext()) { + return client->getOperationContext(); + } + + uniqueCtx.emplace(client->makeOperationContext()); + return uniqueCtx->get(); + }(); + + const auto replCoord = repl::ReplicationCoordinator::get(opCtx); + if (replCoord && replCoord->getMemberState().arbiter()) { + return Status::OK(); + } + // Take the lock to update some stats. { stdx::lock_guard lk(_mutex); @@ -159,16 +175,6 @@ Status LogicalSessionCacheImpl::_reap(Client* client) { _stats.setTransactionReaperJobCount(_stats.getTransactionReaperJobCount() + 1); } - boost::optional uniqueCtx; - auto* const opCtx = [&] { - if (client->getOperationContext()) { - return client->getOperationContext(); - } - - uniqueCtx.emplace(client->makeOperationContext()); - return uniqueCtx->get(); - }(); - int numReaped = 0; try { @@ -216,6 +222,22 @@ Status LogicalSessionCacheImpl::_reap(Client* client) { } void LogicalSessionCacheImpl::_refresh(Client* client) { + // get or make an opCtx + boost::optional uniqueCtx; + auto* const opCtx = [&client, &uniqueCtx] { + if (client->getOperationContext()) { + return client->getOperationContext(); + } + + uniqueCtx.emplace(client->makeOperationContext()); + return uniqueCtx->get(); + }(); + + const auto replCoord = repl::ReplicationCoordinator::get(opCtx); + if (replCoord && replCoord->getMemberState().arbiter()) { + return; + } + // Stats for serverStatus: { stdx::lock_guard lk(_mutex); @@ -238,17 +260,6 @@ void LogicalSessionCacheImpl::_refresh(Client* client) { _stats.setLastSessionsCollectionJobDurationMillis(millis.count()); }); - // get or make an opCtx - boost::optional uniqueCtx; - auto* const opCtx = [&client, &uniqueCtx] { - if (client->getOperationContext()) { - return client->getOperationContext(); - } - - uniqueCtx.emplace(client->makeOperationContext()); - return uniqueCtx->get(); - }(); - ON_BLOCK_EXIT([&opCtx] { clearShardingOperationFailedStatus(opCtx); }); try { -- cgit v1.2.1