summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWenbin Zhu <wenbin.zhu@mongodb.com>2021-06-22 21:36:08 +0000
committerWenbin Zhu <wenbin.zhu@mongodb.com>2021-06-22 22:24:34 +0000
commit41720c10856d4b876b7b0c9dcf10a3c14a44d8cb (patch)
tree0d6b44cf9b6cf5fd591aa008eddbcf0f7163733a
parentaa7d6c35d3c9ea32bb40d09f428306472645a800 (diff)
downloadmongo-41720c10856d4b876b7b0c9dcf10a3c14a44d8cb.tar.gz
SERVER-56010 Wait for primary services to finish rebuilding before completing replset initialization.
(cherry picked from commit 48e8104223a4ba42bdf22ecf03143c78d757a109)
-rw-r--r--jstests/replsets/tenant_migration_donor_kill_op_retry.js5
-rw-r--r--jstests/replsets/tenant_migration_donor_state_machine.js4
-rw-r--r--src/mongo/db/repl/primary_only_service.cpp36
-rw-r--r--src/mongo/db/repl/primary_only_service.h22
-rw-r--r--src/mongo/db/repl/primary_only_service_test.cpp32
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp2
-rw-r--r--src/mongo/shell/replsettest.js82
7 files changed, 134 insertions, 49 deletions
diff --git a/jstests/replsets/tenant_migration_donor_kill_op_retry.js b/jstests/replsets/tenant_migration_donor_kill_op_retry.js
index e834c8e98e1..a9e1f9bd44f 100644
--- a/jstests/replsets/tenant_migration_donor_kill_op_retry.js
+++ b/jstests/replsets/tenant_migration_donor_kill_op_retry.js
@@ -152,7 +152,10 @@ if (!tenantMigrationTest.isFeatureFlagEnabled()) {
tenantMigrationTest.getDonorRst().startSet(Object.assign({}, migrationX509Options.donor, {
setParameter: {['failpoint.' + fpName]: tojson({mode: 'alwaysOn'})}
}));
- tenantMigrationTest.getDonorRst().initiate();
+ // The failpoints in this test run hang the TenantMigrationDonorService during service
+ // rebuild, so we need to skip waiting on PrimaryOnlyServices.
+ tenantMigrationTest.getDonorRst().initiate(
+ null, null, {doNotWaitForPrimaryOnlyServices: true});
TenantMigrationUtil.createTenantMigrationRecipientRoleIfNotExist(
tenantMigrationTest.getDonorRst());
diff --git a/jstests/replsets/tenant_migration_donor_state_machine.js b/jstests/replsets/tenant_migration_donor_state_machine.js
index b88cc76cada..fed89e496c4 100644
--- a/jstests/replsets/tenant_migration_donor_state_machine.js
+++ b/jstests/replsets/tenant_migration_donor_state_machine.js
@@ -52,7 +52,7 @@ function testDonorForgetMigrationAfterMigrationCompletes(
}));
assert.soon(() => 0 ===
donorPrimary.adminCommand({serverStatus: 1})
- .repl.primaryOnlyServices.TenantMigrationDonorService);
+ .repl.primaryOnlyServices.TenantMigrationDonorService.numInstances);
const donorRecipientMonitorPoolStats =
donorPrimary.adminCommand({connPoolStats: 1}).replicaSets;
@@ -70,7 +70,7 @@ function testDonorForgetMigrationAfterMigrationCompletes(
}));
assert.soon(() => 0 ===
recipientPrimary.adminCommand({serverStatus: 1})
- .repl.primaryOnlyServices.TenantMigrationRecipientService);
+ .repl.primaryOnlyServices.TenantMigrationRecipientService.numInstances);
const recipientRecipientMonitorPoolStats =
recipientPrimary.adminCommand({connPoolStats: 1}).replicaSets;
diff --git a/src/mongo/db/repl/primary_only_service.cpp b/src/mongo/db/repl/primary_only_service.cpp
index c5a2966e493..b95354b5f2e 100644
--- a/src/mongo/db/repl/primary_only_service.cpp
+++ b/src/mongo/db/repl/primary_only_service.cpp
@@ -219,8 +219,8 @@ void PrimaryOnlyServiceRegistry::onStepDown() {
void PrimaryOnlyServiceRegistry::reportServiceInfoForServerStatus(BSONObjBuilder* result) noexcept {
BSONObjBuilder subBuilder(result->subobjStart("primaryOnlyServices"));
for (auto& service : _servicesByName) {
- subBuilder.appendNumber(service.first,
- static_cast<long long>(service.second->getNumberOfInstances()));
+ BSONObjBuilder serviceInfoBuilder(subBuilder.subobjStart(service.first));
+ service.second->reportForServerStatus(&serviceInfoBuilder);
}
}
@@ -236,9 +236,10 @@ void PrimaryOnlyServiceRegistry::reportServiceInfoForCurrentOp(
PrimaryOnlyService::PrimaryOnlyService(ServiceContext* serviceContext)
: _serviceContext(serviceContext) {}
-size_t PrimaryOnlyService::getNumberOfInstances() {
+void PrimaryOnlyService::reportForServerStatus(BSONObjBuilder* result) noexcept {
stdx::lock_guard lk(_mutex);
- return _activeInstances.size();
+ result->append("state", _getStateString(lk));
+ result->appendNumber("numInstances", static_cast<long long>(_activeInstances.size()));
}
void PrimaryOnlyService::reportInstanceInfoForCurrentOp(
@@ -255,11 +256,6 @@ void PrimaryOnlyService::reportInstanceInfoForCurrentOp(
}
}
-bool PrimaryOnlyService::isRunning() const {
- stdx::lock_guard lk(_mutex);
- return _state == State::kRunning;
-}
-
void PrimaryOnlyService::registerOpCtx(OperationContext* opCtx, bool allowOpCtxWhileRebuilding) {
stdx::lock_guard lk(_mutex);
auto [_, inserted] = _opCtxs.emplace(opCtx);
@@ -371,6 +367,11 @@ void PrimaryOnlyService::onStepUp(const OpTime& stepUpOpTime) {
// all previous writes to state documents are also committed, and then schedule work to
// rebuild Instances from their persisted state documents.
lk.lock();
+ LOGV2_DEBUG(5601000,
+ 2,
+ "Waiting on first write of the new term to be majority committed",
+ "service"_attr = getServiceName(),
+ "stepUpOpTime"_attr = stepUpOpTime);
WaitForMajorityService::get(_serviceContext)
.waitUntilMajority(stepUpOpTime, _source.token())
.thenRunOn(**newScopedExecutor)
@@ -761,6 +762,23 @@ std::shared_ptr<PrimaryOnlyService::Instance> PrimaryOnlyService::_insertNewInst
return it->second.getInstance();
}
+StringData PrimaryOnlyService::_getStateString(WithLock) const {
+ switch (_state) {
+ case State::kRunning:
+ return "running";
+ case State::kPaused:
+ return "paused";
+ case State::kRebuilding:
+ return "rebuilding";
+ case State::kRebuildFailed:
+ return "rebuildFailed";
+ case State::kShutdown:
+ return "shutdown";
+ default:
+ MONGO_UNREACHABLE;
+ }
+}
+
PrimaryOnlyService::AllowOpCtxWhenServiceRebuildingBlock::AllowOpCtxWhenServiceRebuildingBlock(
Client* client)
: _client(client), _clientState(&primaryOnlyServiceStateForClient(_client)) {
diff --git a/src/mongo/db/repl/primary_only_service.h b/src/mongo/db/repl/primary_only_service.h
index 111f76d1cf7..56bdbe3c365 100644
--- a/src/mongo/db/repl/primary_only_service.h
+++ b/src/mongo/db/repl/primary_only_service.h
@@ -243,16 +243,10 @@ public:
void releaseAllInstances(Status status);
/**
- * Returns whether this service is currently running. This is true only when the node is in
- * state PRIMARY *and* this service has finished all asynchronous work associated with resuming
- * after stepUp.
+ * Adds information of this service to the result 'BSONObjBuilder', containing the number of
+ * active instances and the state of this service.
*/
- bool isRunning() const;
-
- /**
- * Returns the number of currently running Instances of this service.
- */
- size_t getNumberOfInstances();
+ void reportForServerStatus(BSONObjBuilder* result) noexcept;
/**
* Adds information about the Instances belonging to this service to 'ops', to show up in
@@ -435,6 +429,11 @@ private:
*/
void _interruptInstances(WithLock, Status);
+ /**
+ * Returns a string representation of the current state.
+ */
+ StringData _getStateString(WithLock) const;
+
ServiceContext* const _serviceContext;
// All member variables are labeled with one of the following codes indicating the
@@ -531,12 +530,11 @@ public:
PrimaryOnlyService* lookupServiceByNamespace(const NamespaceString& ns);
/**
- * Adds a 'primaryOnlyServices' sub-obj to the 'result' BSONObjBuilder containing a count of the
- * number of active instances for each registered service.
+ * Adds a 'primaryOnlyServices' sub-obj to the 'result' BSONObjBuilder containing information
+ * (given by PrimaryService::reportForServerStatus) of each registered service.
*/
void reportServiceInfoForServerStatus(BSONObjBuilder* result) noexcept;
-
/**
* Adds information about the Instances running in all registered services to 'ops', to show up
* in currentOp(). 'connMode' and 'sessionMode' are arguments provided to currentOp, and can be
diff --git a/src/mongo/db/repl/primary_only_service_test.cpp b/src/mongo/db/repl/primary_only_service_test.cpp
index 4ebbc29df11..4aa1798cc9d 100644
--- a/src/mongo/db/repl/primary_only_service_test.cpp
+++ b/src/mongo/db/repl/primary_only_service_test.cpp
@@ -639,16 +639,30 @@ TEST_F(PrimaryOnlyServiceTest, DoubleCreateInstance) {
}
TEST_F(PrimaryOnlyServiceTest, ReportServerStatusInfo) {
+ stepDown();
+ // Make the instance rebuild on stepUp hang.
+ auto rebuildingFPTimesEntered =
+ PrimaryOnlyServiceHangBeforeRebuildingInstances.setMode(FailPoint::alwaysOn);
+ stepUp();
+
{
+ PrimaryOnlyServiceHangBeforeRebuildingInstances.waitForTimesEntered(
+ ++rebuildingFPTimesEntered);
+
BSONObjBuilder resultBuilder;
_registry->reportServiceInfoForServerStatus(&resultBuilder);
- ASSERT_BSONOBJ_EQ(BSON("primaryOnlyServices" << BSON("TestService" << 0)),
- resultBuilder.obj());
+ ASSERT_BSONOBJ_EQ(
+ BSON("primaryOnlyServices" << BSON("TestService" << BSON("state"
+ << "rebuilding"
+ << "numInstances" << 0))),
+ resultBuilder.obj());
}
// Make sure the instance doesn't complete.
TestServiceHangDuringInitialization.setMode(FailPoint::alwaysOn);
+ PrimaryOnlyServiceHangBeforeRebuildingInstances.setMode(FailPoint::off);
+
auto opCtx = makeOperationContext();
auto instance =
TestService::Instance::getOrCreate(opCtx.get(), _service, BSON("_id" << 0 << "state" << 0));
@@ -657,8 +671,11 @@ TEST_F(PrimaryOnlyServiceTest, ReportServerStatusInfo) {
BSONObjBuilder resultBuilder;
_registry->reportServiceInfoForServerStatus(&resultBuilder);
- ASSERT_BSONOBJ_EQ(BSON("primaryOnlyServices" << BSON("TestService" << 1)),
- resultBuilder.obj());
+ ASSERT_BSONOBJ_EQ(
+ BSON("primaryOnlyServices" << BSON("TestService" << BSON("state"
+ << "running"
+ << "numInstances" << 1))),
+ resultBuilder.obj());
}
auto instance2 =
@@ -668,8 +685,11 @@ TEST_F(PrimaryOnlyServiceTest, ReportServerStatusInfo) {
BSONObjBuilder resultBuilder;
_registry->reportServiceInfoForServerStatus(&resultBuilder);
- ASSERT_BSONOBJ_EQ(BSON("primaryOnlyServices" << BSON("TestService" << 2)),
- resultBuilder.obj());
+ ASSERT_BSONOBJ_EQ(
+ BSON("primaryOnlyServices" << BSON("TestService" << BSON("state"
+ << "running"
+ << "numInstances" << 2))),
+ resultBuilder.obj());
}
TestServiceHangDuringInitialization.setMode(FailPoint::off);
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index d0bb3f21d20..712aba1a7b8 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -955,7 +955,7 @@ void ReplicationCoordinatorImpl::shutdown(OperationContext* opCtx) {
return;
}
- LOGV2_DEBUG(5074000, 1, "Shutting down the replica set aware services.");
+ LOGV2(5074000, "Shutting down the replica set aware services.");
ReplicaSetAwareServiceRegistry::get(_service).onShutdown();
LOGV2(21328, "Shutting down replication subsystems");
diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js
index 960fd2fd196..be8c6231f96 100644
--- a/src/mongo/shell/replsettest.js
+++ b/src/mongo/shell/replsettest.js
@@ -207,7 +207,7 @@ var ReplSetTest = function(opts) {
unauthenticatedConns.length > 0;
// There are few cases where we do not auth
- // 1. When transitiong to auth
+ // 1. When transitioning to auth
// 2. When cluster is running in x509 but shell was not started with TLS (i.e. sslSpecial
// suite)
if (needsAuth &&
@@ -1193,6 +1193,15 @@ var ReplSetTest = function(opts) {
}
};
+ this._notX509Auth = function(conn) {
+ const nodeId = "n" + self.getNodeId(conn);
+ const nodeOptions = self.nodeOptions[nodeId] || {};
+ const options =
+ (nodeOptions === {} || !self.startOptions) ? nodeOptions : self.startOptions;
+ const authMode = options.clusterAuthMode;
+ return authMode != "sendX509" && authMode != "x509" && authMode != "sendKeyFile";
+ };
+
function replSetCommandWithRetry(primary, cmd) {
print("Running command with retry: " + tojson(cmd));
const cmdName = Object.keys(cmd)[0];
@@ -1307,7 +1316,8 @@ var ReplSetTest = function(opts) {
this.initiateWithAnyNodeAsPrimary = function(cfg, initCmd, {
doNotWaitForStableRecoveryTimestamp: doNotWaitForStableRecoveryTimestamp = false,
doNotWaitForReplication: doNotWaitForReplication = false,
- doNotWaitForNewlyAddedRemovals: doNotWaitForNewlyAddedRemovals = false
+ doNotWaitForNewlyAddedRemovals: doNotWaitForNewlyAddedRemovals = false,
+ doNotWaitForPrimaryOnlyServices: doNotWaitForPrimaryOnlyServices = false
} = {}) {
let startTime = new Date(); // Measure the execution time of this function.
var primary = this.nodes[0].getDB("admin");
@@ -1600,17 +1610,8 @@ var ReplSetTest = function(opts) {
// asCluster() currently does not validate connections with X509 authentication.
// If the test is using X509, we skip disabling the server parameter as the
// 'setParameter' command will fail.
- const nodeId = "n" + self.getNodeId(node);
- const nodeOptions = self.nodeOptions[nodeId] || {};
- const options =
- (nodeOptions === {} || !self.startOptions) ? nodeOptions : self.startOptions;
- const authMode = options.clusterAuthMode;
- const notX509 =
- authMode != "sendX509" && authMode != "x509" && authMode != "sendKeyFile";
-
- // We should only be checking the binary version if we are not using X509 auth,
- // as any server command will fail if the 'authMode' is X509.
- if (notX509) {
+ // TODO(SERVER-57924): cleanup asCluster() to avoid checking here.
+ if (self._notX509Auth(node) || node.isTLS()) {
const serverStatus =
assert.commandWorked(node.getDB("admin").runCommand({serverStatus: 1}));
const currVersion = serverStatus.version;
@@ -1658,6 +1659,20 @@ var ReplSetTest = function(opts) {
});
}
+ // Waits for the primary only services to finish rebuilding to avoid background writes
+ // after initiation is done. PrimaryOnlyServices wait for the stepup optime to be majority
+ // committed before rebuilding services, so we skip waiting for PrimaryOnlyServices if
+ // we do not wait for replication.
+ if (!doNotWaitForReplication && !doNotWaitForPrimaryOnlyServices) {
+ primary = self.getPrimary();
+ // TODO(SERVER-57924): cleanup asCluster() to avoid checking here.
+ if (self._notX509Auth(primary) || primary.isTLS()) {
+ asCluster(self.nodes, function() {
+ self.waitForPrimaryOnlyServices(primary);
+ });
+ }
+ }
+
// Turn off the failpoints now that initial sync and initial setup is complete.
if (failPointsSupported) {
this.nodes.forEach(function(conn) {
@@ -1676,15 +1691,22 @@ var ReplSetTest = function(opts) {
* This version should be prefered where possible but requires all connections in the
* ReplSetTest to be authorized to run replSetGetStatus.
*/
- this.initiateWithNodeZeroAsPrimary = function(cfg, initCmd) {
+ this.initiateWithNodeZeroAsPrimary = function(cfg, initCmd, {
+ doNotWaitForPrimaryOnlyServices: doNotWaitForPrimaryOnlyServices = false,
+ } = {}) {
let startTime = new Date(); // Measure the execution time of this function.
- this.initiateWithAnyNodeAsPrimary(cfg, initCmd);
+ this.initiateWithAnyNodeAsPrimary(cfg, initCmd, {doNotWaitForPrimaryOnlyServices: true});
// stepUp() calls awaitReplication() which requires all nodes to be authorized to run
// replSetGetStatus.
asCluster(this.nodes, function() {
- self.stepUp(self.nodes[0]);
+ const newPrimary = self.nodes[0];
+ self.stepUp(newPrimary);
+ if (!doNotWaitForPrimaryOnlyServices) {
+ self.waitForPrimaryOnlyServices(newPrimary);
+ }
});
+
print("ReplSetTest initiateWithNodeZeroAsPrimary took " + (new Date() - startTime) +
"ms for " + this.nodes.length + " nodes.");
};
@@ -1693,8 +1715,11 @@ var ReplSetTest = function(opts) {
* Runs replSetInitiate on the replica set and requests the first node to step up as
* primary.
*/
- this.initiate = function(cfg, initCmd) {
- this.initiateWithNodeZeroAsPrimary(cfg, initCmd);
+ this.initiate = function(cfg, initCmd, {
+ doNotWaitForPrimaryOnlyServices: doNotWaitForPrimaryOnlyServices = false,
+ } = {}) {
+ this.initiateWithNodeZeroAsPrimary(
+ cfg, initCmd, {doNotWaitForPrimaryOnlyServices: doNotWaitForPrimaryOnlyServices});
};
/**
@@ -1763,6 +1788,27 @@ var ReplSetTest = function(opts) {
};
/**
+ * Waits for primary only services to finish the rebuilding stage after a primary is elected.
+ * This is useful for tests that are expecting particular write timestamps since some primary
+ * only services can do background writes (e.g. build indexes) during rebuilding stage that
+ * could advance the last write timestamp.
+ */
+ this.waitForPrimaryOnlyServices = function(primary) {
+ jsTest.log("Waiting for primary only services to finish rebuilding");
+ primary = primary || self.getPrimary();
+
+ assert.soonNoExcept(function() {
+ const res = assert.commandWorked(primary.adminCommand({serverStatus: 1, repl: 1}));
+ // 'PrimaryOnlyServices' does not exist prior to v5.0, using empty
+ // object to skip waiting in case of multiversion tests.
+ const services = res.repl.primaryOnlyServices || {};
+ return Object.keys(services).every((s) => {
+ return services[s].state === undefined || services[s].state === "running";
+ });
+ }, "Timed out waiting for primary only services to finish rebuilding");
+ };
+
+ /**
* Gets the current replica set config from the specified node index. If no nodeId is specified,
* uses the primary node.
*/