diff options
author | Wenbin Zhu <wenbin.zhu@mongodb.com> | 2021-06-22 21:36:08 +0000 |
---|---|---|
committer | Wenbin Zhu <wenbin.zhu@mongodb.com> | 2021-06-22 22:24:34 +0000 |
commit | 41720c10856d4b876b7b0c9dcf10a3c14a44d8cb (patch) | |
tree | 0d6b44cf9b6cf5fd591aa008eddbcf0f7163733a | |
parent | aa7d6c35d3c9ea32bb40d09f428306472645a800 (diff) | |
download | mongo-41720c10856d4b876b7b0c9dcf10a3c14a44d8cb.tar.gz |
SERVER-56010 Wait for primary services to finish rebuilding before completing replset initialization.
(cherry picked from commit 48e8104223a4ba42bdf22ecf03143c78d757a109)
-rw-r--r-- | jstests/replsets/tenant_migration_donor_kill_op_retry.js | 5 | ||||
-rw-r--r-- | jstests/replsets/tenant_migration_donor_state_machine.js | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/primary_only_service.cpp | 36 | ||||
-rw-r--r-- | src/mongo/db/repl/primary_only_service.h | 22 | ||||
-rw-r--r-- | src/mongo/db/repl/primary_only_service_test.cpp | 32 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 2 | ||||
-rw-r--r-- | src/mongo/shell/replsettest.js | 82 |
7 files changed, 134 insertions, 49 deletions
diff --git a/jstests/replsets/tenant_migration_donor_kill_op_retry.js b/jstests/replsets/tenant_migration_donor_kill_op_retry.js index e834c8e98e1..a9e1f9bd44f 100644 --- a/jstests/replsets/tenant_migration_donor_kill_op_retry.js +++ b/jstests/replsets/tenant_migration_donor_kill_op_retry.js @@ -152,7 +152,10 @@ if (!tenantMigrationTest.isFeatureFlagEnabled()) { tenantMigrationTest.getDonorRst().startSet(Object.assign({}, migrationX509Options.donor, { setParameter: {['failpoint.' + fpName]: tojson({mode: 'alwaysOn'})} })); - tenantMigrationTest.getDonorRst().initiate(); + // The failpoints in this test run hang the TenantMigrationDonorService during service + // rebuild, so we need to skip waiting on PrimaryOnlyServices. + tenantMigrationTest.getDonorRst().initiate( + null, null, {doNotWaitForPrimaryOnlyServices: true}); TenantMigrationUtil.createTenantMigrationRecipientRoleIfNotExist( tenantMigrationTest.getDonorRst()); diff --git a/jstests/replsets/tenant_migration_donor_state_machine.js b/jstests/replsets/tenant_migration_donor_state_machine.js index b88cc76cada..fed89e496c4 100644 --- a/jstests/replsets/tenant_migration_donor_state_machine.js +++ b/jstests/replsets/tenant_migration_donor_state_machine.js @@ -52,7 +52,7 @@ function testDonorForgetMigrationAfterMigrationCompletes( })); assert.soon(() => 0 === donorPrimary.adminCommand({serverStatus: 1}) - .repl.primaryOnlyServices.TenantMigrationDonorService); + .repl.primaryOnlyServices.TenantMigrationDonorService.numInstances); const donorRecipientMonitorPoolStats = donorPrimary.adminCommand({connPoolStats: 1}).replicaSets; @@ -70,7 +70,7 @@ function testDonorForgetMigrationAfterMigrationCompletes( })); assert.soon(() => 0 === recipientPrimary.adminCommand({serverStatus: 1}) - .repl.primaryOnlyServices.TenantMigrationRecipientService); + .repl.primaryOnlyServices.TenantMigrationRecipientService.numInstances); const recipientRecipientMonitorPoolStats = recipientPrimary.adminCommand({connPoolStats: 1}).replicaSets; diff --git a/src/mongo/db/repl/primary_only_service.cpp b/src/mongo/db/repl/primary_only_service.cpp index c5a2966e493..b95354b5f2e 100644 --- a/src/mongo/db/repl/primary_only_service.cpp +++ b/src/mongo/db/repl/primary_only_service.cpp @@ -219,8 +219,8 @@ void PrimaryOnlyServiceRegistry::onStepDown() { void PrimaryOnlyServiceRegistry::reportServiceInfoForServerStatus(BSONObjBuilder* result) noexcept { BSONObjBuilder subBuilder(result->subobjStart("primaryOnlyServices")); for (auto& service : _servicesByName) { - subBuilder.appendNumber(service.first, - static_cast<long long>(service.second->getNumberOfInstances())); + BSONObjBuilder serviceInfoBuilder(subBuilder.subobjStart(service.first)); + service.second->reportForServerStatus(&serviceInfoBuilder); } } @@ -236,9 +236,10 @@ void PrimaryOnlyServiceRegistry::reportServiceInfoForCurrentOp( PrimaryOnlyService::PrimaryOnlyService(ServiceContext* serviceContext) : _serviceContext(serviceContext) {} -size_t PrimaryOnlyService::getNumberOfInstances() { +void PrimaryOnlyService::reportForServerStatus(BSONObjBuilder* result) noexcept { stdx::lock_guard lk(_mutex); - return _activeInstances.size(); + result->append("state", _getStateString(lk)); + result->appendNumber("numInstances", static_cast<long long>(_activeInstances.size())); } void PrimaryOnlyService::reportInstanceInfoForCurrentOp( @@ -255,11 +256,6 @@ void PrimaryOnlyService::reportInstanceInfoForCurrentOp( } } -bool PrimaryOnlyService::isRunning() const { - stdx::lock_guard lk(_mutex); - return _state == State::kRunning; -} - void PrimaryOnlyService::registerOpCtx(OperationContext* opCtx, bool allowOpCtxWhileRebuilding) { stdx::lock_guard lk(_mutex); auto [_, inserted] = _opCtxs.emplace(opCtx); @@ -371,6 +367,11 @@ void PrimaryOnlyService::onStepUp(const OpTime& stepUpOpTime) { // all previous writes to state documents are also committed, and then schedule work to // rebuild Instances from their persisted state documents. lk.lock(); + LOGV2_DEBUG(5601000, + 2, + "Waiting on first write of the new term to be majority committed", + "service"_attr = getServiceName(), + "stepUpOpTime"_attr = stepUpOpTime); WaitForMajorityService::get(_serviceContext) .waitUntilMajority(stepUpOpTime, _source.token()) .thenRunOn(**newScopedExecutor) @@ -761,6 +762,23 @@ std::shared_ptr<PrimaryOnlyService::Instance> PrimaryOnlyService::_insertNewInst return it->second.getInstance(); } +StringData PrimaryOnlyService::_getStateString(WithLock) const { + switch (_state) { + case State::kRunning: + return "running"; + case State::kPaused: + return "paused"; + case State::kRebuilding: + return "rebuilding"; + case State::kRebuildFailed: + return "rebuildFailed"; + case State::kShutdown: + return "shutdown"; + default: + MONGO_UNREACHABLE; + } +} + PrimaryOnlyService::AllowOpCtxWhenServiceRebuildingBlock::AllowOpCtxWhenServiceRebuildingBlock( Client* client) : _client(client), _clientState(&primaryOnlyServiceStateForClient(_client)) { diff --git a/src/mongo/db/repl/primary_only_service.h b/src/mongo/db/repl/primary_only_service.h index 111f76d1cf7..56bdbe3c365 100644 --- a/src/mongo/db/repl/primary_only_service.h +++ b/src/mongo/db/repl/primary_only_service.h @@ -243,16 +243,10 @@ public: void releaseAllInstances(Status status); /** - * Returns whether this service is currently running. This is true only when the node is in - * state PRIMARY *and* this service has finished all asynchronous work associated with resuming - * after stepUp. + * Adds information of this service to the result 'BSONObjBuilder', containing the number of + * active instances and the state of this service. */ - bool isRunning() const; - - /** - * Returns the number of currently running Instances of this service. - */ - size_t getNumberOfInstances(); + void reportForServerStatus(BSONObjBuilder* result) noexcept; /** * Adds information about the Instances belonging to this service to 'ops', to show up in @@ -435,6 +429,11 @@ private: */ void _interruptInstances(WithLock, Status); + /** + * Returns a string representation of the current state. + */ + StringData _getStateString(WithLock) const; + ServiceContext* const _serviceContext; // All member variables are labeled with one of the following codes indicating the @@ -531,12 +530,11 @@ public: PrimaryOnlyService* lookupServiceByNamespace(const NamespaceString& ns); /** - * Adds a 'primaryOnlyServices' sub-obj to the 'result' BSONObjBuilder containing a count of the - * number of active instances for each registered service. + * Adds a 'primaryOnlyServices' sub-obj to the 'result' BSONObjBuilder containing information + * (given by PrimaryService::reportForServerStatus) of each registered service. */ void reportServiceInfoForServerStatus(BSONObjBuilder* result) noexcept; - /** * Adds information about the Instances running in all registered services to 'ops', to show up * in currentOp(). 'connMode' and 'sessionMode' are arguments provided to currentOp, and can be diff --git a/src/mongo/db/repl/primary_only_service_test.cpp b/src/mongo/db/repl/primary_only_service_test.cpp index 4ebbc29df11..4aa1798cc9d 100644 --- a/src/mongo/db/repl/primary_only_service_test.cpp +++ b/src/mongo/db/repl/primary_only_service_test.cpp @@ -639,16 +639,30 @@ TEST_F(PrimaryOnlyServiceTest, DoubleCreateInstance) { } TEST_F(PrimaryOnlyServiceTest, ReportServerStatusInfo) { + stepDown(); + // Make the instance rebuild on stepUp hang. + auto rebuildingFPTimesEntered = + PrimaryOnlyServiceHangBeforeRebuildingInstances.setMode(FailPoint::alwaysOn); + stepUp(); + { + PrimaryOnlyServiceHangBeforeRebuildingInstances.waitForTimesEntered( + ++rebuildingFPTimesEntered); + BSONObjBuilder resultBuilder; _registry->reportServiceInfoForServerStatus(&resultBuilder); - ASSERT_BSONOBJ_EQ(BSON("primaryOnlyServices" << BSON("TestService" << 0)), - resultBuilder.obj()); + ASSERT_BSONOBJ_EQ( + BSON("primaryOnlyServices" << BSON("TestService" << BSON("state" + << "rebuilding" + << "numInstances" << 0))), + resultBuilder.obj()); } // Make sure the instance doesn't complete. TestServiceHangDuringInitialization.setMode(FailPoint::alwaysOn); + PrimaryOnlyServiceHangBeforeRebuildingInstances.setMode(FailPoint::off); + auto opCtx = makeOperationContext(); auto instance = TestService::Instance::getOrCreate(opCtx.get(), _service, BSON("_id" << 0 << "state" << 0)); @@ -657,8 +671,11 @@ TEST_F(PrimaryOnlyServiceTest, ReportServerStatusInfo) { BSONObjBuilder resultBuilder; _registry->reportServiceInfoForServerStatus(&resultBuilder); - ASSERT_BSONOBJ_EQ(BSON("primaryOnlyServices" << BSON("TestService" << 1)), - resultBuilder.obj()); + ASSERT_BSONOBJ_EQ( + BSON("primaryOnlyServices" << BSON("TestService" << BSON("state" + << "running" + << "numInstances" << 1))), + resultBuilder.obj()); } auto instance2 = @@ -668,8 +685,11 @@ TEST_F(PrimaryOnlyServiceTest, ReportServerStatusInfo) { BSONObjBuilder resultBuilder; _registry->reportServiceInfoForServerStatus(&resultBuilder); - ASSERT_BSONOBJ_EQ(BSON("primaryOnlyServices" << BSON("TestService" << 2)), - resultBuilder.obj()); + ASSERT_BSONOBJ_EQ( + BSON("primaryOnlyServices" << BSON("TestService" << BSON("state" + << "running" + << "numInstances" << 2))), + resultBuilder.obj()); } TestServiceHangDuringInitialization.setMode(FailPoint::off); diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index d0bb3f21d20..712aba1a7b8 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -955,7 +955,7 @@ void ReplicationCoordinatorImpl::shutdown(OperationContext* opCtx) { return; } - LOGV2_DEBUG(5074000, 1, "Shutting down the replica set aware services."); + LOGV2(5074000, "Shutting down the replica set aware services."); ReplicaSetAwareServiceRegistry::get(_service).onShutdown(); LOGV2(21328, "Shutting down replication subsystems"); diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js index 960fd2fd196..be8c6231f96 100644 --- a/src/mongo/shell/replsettest.js +++ b/src/mongo/shell/replsettest.js @@ -207,7 +207,7 @@ var ReplSetTest = function(opts) { unauthenticatedConns.length > 0; // There are few cases where we do not auth - // 1. When transitiong to auth + // 1. When transitioning to auth // 2. When cluster is running in x509 but shell was not started with TLS (i.e. sslSpecial // suite) if (needsAuth && @@ -1193,6 +1193,15 @@ var ReplSetTest = function(opts) { } }; + this._notX509Auth = function(conn) { + const nodeId = "n" + self.getNodeId(conn); + const nodeOptions = self.nodeOptions[nodeId] || {}; + const options = + (nodeOptions === {} || !self.startOptions) ? nodeOptions : self.startOptions; + const authMode = options.clusterAuthMode; + return authMode != "sendX509" && authMode != "x509" && authMode != "sendKeyFile"; + }; + function replSetCommandWithRetry(primary, cmd) { print("Running command with retry: " + tojson(cmd)); const cmdName = Object.keys(cmd)[0]; @@ -1307,7 +1316,8 @@ var ReplSetTest = function(opts) { this.initiateWithAnyNodeAsPrimary = function(cfg, initCmd, { doNotWaitForStableRecoveryTimestamp: doNotWaitForStableRecoveryTimestamp = false, doNotWaitForReplication: doNotWaitForReplication = false, - doNotWaitForNewlyAddedRemovals: doNotWaitForNewlyAddedRemovals = false + doNotWaitForNewlyAddedRemovals: doNotWaitForNewlyAddedRemovals = false, + doNotWaitForPrimaryOnlyServices: doNotWaitForPrimaryOnlyServices = false } = {}) { let startTime = new Date(); // Measure the execution time of this function. var primary = this.nodes[0].getDB("admin"); @@ -1600,17 +1610,8 @@ var ReplSetTest = function(opts) { // asCluster() currently does not validate connections with X509 authentication. // If the test is using X509, we skip disabling the server parameter as the // 'setParameter' command will fail. - const nodeId = "n" + self.getNodeId(node); - const nodeOptions = self.nodeOptions[nodeId] || {}; - const options = - (nodeOptions === {} || !self.startOptions) ? nodeOptions : self.startOptions; - const authMode = options.clusterAuthMode; - const notX509 = - authMode != "sendX509" && authMode != "x509" && authMode != "sendKeyFile"; - - // We should only be checking the binary version if we are not using X509 auth, - // as any server command will fail if the 'authMode' is X509. - if (notX509) { + // TODO(SERVER-57924): cleanup asCluster() to avoid checking here. + if (self._notX509Auth(node) || node.isTLS()) { const serverStatus = assert.commandWorked(node.getDB("admin").runCommand({serverStatus: 1})); const currVersion = serverStatus.version; @@ -1658,6 +1659,20 @@ var ReplSetTest = function(opts) { }); } + // Waits for the primary only services to finish rebuilding to avoid background writes + // after initiation is done. PrimaryOnlyServices wait for the stepup optime to be majority + // committed before rebuilding services, so we skip waiting for PrimaryOnlyServices if + // we do not wait for replication. + if (!doNotWaitForReplication && !doNotWaitForPrimaryOnlyServices) { + primary = self.getPrimary(); + // TODO(SERVER-57924): cleanup asCluster() to avoid checking here. + if (self._notX509Auth(primary) || primary.isTLS()) { + asCluster(self.nodes, function() { + self.waitForPrimaryOnlyServices(primary); + }); + } + } + // Turn off the failpoints now that initial sync and initial setup is complete. if (failPointsSupported) { this.nodes.forEach(function(conn) { @@ -1676,15 +1691,22 @@ var ReplSetTest = function(opts) { * This version should be prefered where possible but requires all connections in the * ReplSetTest to be authorized to run replSetGetStatus. */ - this.initiateWithNodeZeroAsPrimary = function(cfg, initCmd) { + this.initiateWithNodeZeroAsPrimary = function(cfg, initCmd, { + doNotWaitForPrimaryOnlyServices: doNotWaitForPrimaryOnlyServices = false, + } = {}) { let startTime = new Date(); // Measure the execution time of this function. - this.initiateWithAnyNodeAsPrimary(cfg, initCmd); + this.initiateWithAnyNodeAsPrimary(cfg, initCmd, {doNotWaitForPrimaryOnlyServices: true}); // stepUp() calls awaitReplication() which requires all nodes to be authorized to run // replSetGetStatus. asCluster(this.nodes, function() { - self.stepUp(self.nodes[0]); + const newPrimary = self.nodes[0]; + self.stepUp(newPrimary); + if (!doNotWaitForPrimaryOnlyServices) { + self.waitForPrimaryOnlyServices(newPrimary); + } }); + print("ReplSetTest initiateWithNodeZeroAsPrimary took " + (new Date() - startTime) + "ms for " + this.nodes.length + " nodes."); }; @@ -1693,8 +1715,11 @@ var ReplSetTest = function(opts) { * Runs replSetInitiate on the replica set and requests the first node to step up as * primary. */ - this.initiate = function(cfg, initCmd) { - this.initiateWithNodeZeroAsPrimary(cfg, initCmd); + this.initiate = function(cfg, initCmd, { + doNotWaitForPrimaryOnlyServices: doNotWaitForPrimaryOnlyServices = false, + } = {}) { + this.initiateWithNodeZeroAsPrimary( + cfg, initCmd, {doNotWaitForPrimaryOnlyServices: doNotWaitForPrimaryOnlyServices}); }; /** @@ -1763,6 +1788,27 @@ var ReplSetTest = function(opts) { }; /** + * Waits for primary only services to finish the rebuilding stage after a primary is elected. + * This is useful for tests that are expecting particular write timestamps since some primary + * only services can do background writes (e.g. build indexes) during rebuilding stage that + * could advance the last write timestamp. + */ + this.waitForPrimaryOnlyServices = function(primary) { + jsTest.log("Waiting for primary only services to finish rebuilding"); + primary = primary || self.getPrimary(); + + assert.soonNoExcept(function() { + const res = assert.commandWorked(primary.adminCommand({serverStatus: 1, repl: 1})); + // 'PrimaryOnlyServices' does not exist prior to v5.0, using empty + // object to skip waiting in case of multiversion tests. + const services = res.repl.primaryOnlyServices || {}; + return Object.keys(services).every((s) => { + return services[s].state === undefined || services[s].state === "running"; + }); + }, "Timed out waiting for primary only services to finish rebuilding"); + }; + + /** * Gets the current replica set config from the specified node index. If no nodeId is specified, * uses the primary node. */ |