summaryrefslogtreecommitdiff
path: root/src/mongo/db/repl
diff options
context:
space:
mode:
authorSiyuan Zhou <siyuan.zhou@mongodb.com>2020-04-12 23:48:40 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-04-24 02:01:46 +0000
commitcf6abf2091c36130c3b6feef6af45fee2761a922 (patch)
tree765c2ce28758667064ef0b6ebb6f8b6342bd45c8 /src/mongo/db/repl
parent70b693235173db39960fb5828b7594ce2c1f8eae (diff)
downloadmongo-cf6abf2091c36130c3b6feef6af45fee2761a922.tar.gz
SERVER-47142 Check primary before writing config and no-op.
(cherry picked from commit 544cbb209709ebee4f17f2d669b1909bf66be6bb)
Diffstat (limited to 'src/mongo/db/repl')
-rw-r--r--src/mongo/db/repl/repl_set_commands.cpp18
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state.h8
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.cpp16
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.h4
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_mock.cpp5
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_mock.h4
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp26
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp7
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp60
9 files changed, 110 insertions, 38 deletions
diff --git a/src/mongo/db/repl/repl_set_commands.cpp b/src/mongo/db/repl/repl_set_commands.cpp
index f26b238258e..da84f96cfe1 100644
--- a/src/mongo/db/repl/repl_set_commands.cpp
+++ b/src/mongo/db/repl/repl_set_commands.cpp
@@ -443,24 +443,6 @@ public:
}
auto status = replCoord->processReplSetReconfig(opCtx, parsedArgs, &result);
-
- if (status.isOK() && !parsedArgs.force) {
- const auto service = opCtx->getServiceContext();
- Lock::GlobalLock globalLock(opCtx, MODE_IX);
- writeConflictRetry(opCtx, "replSetReconfig", kReplSetReconfigNss, [&] {
- WriteUnitOfWork wuow(opCtx);
- // Users must not be allowed to provide their own contents for the o2 field.
- // o2 field of no-ops is supposed to be used internally.
-
- service->getOpObserver()->onOpMessage(opCtx,
- BSON("msg"
- << "Reconfig set"
- << "version"
- << parsedArgs.newConfigObj["version"]));
- wuow.commit();
- });
- }
-
uassertStatusOK(status);
// Now that the new config has been persisted and installed in memory, wait for the new
diff --git a/src/mongo/db/repl/replication_coordinator_external_state.h b/src/mongo/db/repl/replication_coordinator_external_state.h
index 5bce5d1b06d..585bf42984a 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state.h
@@ -164,10 +164,12 @@ public:
virtual StatusWith<BSONObj> loadLocalConfigDocument(OperationContext* opCtx) = 0;
/**
- * Stores the replica set config document in local storage, or returns an error.
+ * Stores the replica set config document in local storage and writes a no-op in the oplog, or
+ * returns an error.
*/
- virtual Status storeLocalConfigDocument(OperationContext* opCtx, const BSONObj& config) = 0;
-
+ virtual Status storeLocalConfigDocument(OperationContext* opCtx,
+ const BSONObj& config,
+ bool writeOplog) = 0;
/**
* Creates the collection for "lastVote" documents and initializes it, or returns an error.
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index 0760875c56c..a76ad911abc 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -577,16 +577,22 @@ StatusWith<BSONObj> ReplicationCoordinatorExternalStateImpl::loadLocalConfigDocu
}
Status ReplicationCoordinatorExternalStateImpl::storeLocalConfigDocument(OperationContext* opCtx,
- const BSONObj& config) {
+ const BSONObj& config,
+ bool writeOplog) {
try {
writeConflictRetry(opCtx, "save replica set config", configCollectionName, [&] {
+ WriteUnitOfWork wuow(opCtx);
Lock::DBLock dbWriteLock(opCtx, configDatabaseName, MODE_X);
Helpers::putSingleton(opCtx, configCollectionName, config);
- });
-
- // Wait for durability of the new config document.
- opCtx->recoveryUnit()->waitUntilDurable(opCtx);
+ if (writeOplog) {
+ auto msgObj = BSON("msg"
+ << "Reconfig set"
+ << "version" << config["version"]);
+ _service->getOpObserver()->onOpMessage(opCtx, msgObj);
+ }
+ wuow.commit();
+ });
return Status::OK();
} catch (const DBException& ex) {
return ex.toStatus();
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.h b/src/mongo/db/repl/replication_coordinator_external_state_impl.h
index 5a3b52229bf..b5e8d8c3d80 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.h
@@ -86,7 +86,9 @@ public:
virtual bool isSelf(const HostAndPort& host, ServiceContext* service);
Status createLocalLastVoteCollection(OperationContext* opCtx) final;
virtual StatusWith<BSONObj> loadLocalConfigDocument(OperationContext* opCtx);
- virtual Status storeLocalConfigDocument(OperationContext* opCtx, const BSONObj& config);
+ virtual Status storeLocalConfigDocument(OperationContext* opCtx,
+ const BSONObj& config,
+ bool writeOplog);
virtual StatusWith<LastVote> loadLocalLastVoteDocument(OperationContext* opCtx);
virtual Status storeLocalLastVoteDocument(OperationContext* opCtx, const LastVote& lastVote);
virtual void setGlobalTimestamp(ServiceContext* service, const Timestamp& newTime);
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
index 5d6305a2c79..c8b46495a98 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
@@ -76,7 +76,7 @@ void ReplicationCoordinatorExternalStateMock::stopDataReplication(OperationConte
Status ReplicationCoordinatorExternalStateMock::initializeReplSetStorage(OperationContext* opCtx,
const BSONObj& config) {
- return storeLocalConfigDocument(opCtx, config);
+ return storeLocalConfigDocument(opCtx, config, false);
}
void ReplicationCoordinatorExternalStateMock::shutdown(OperationContext*) {}
@@ -119,7 +119,8 @@ StatusWith<BSONObj> ReplicationCoordinatorExternalStateMock::loadLocalConfigDocu
}
Status ReplicationCoordinatorExternalStateMock::storeLocalConfigDocument(OperationContext* opCtx,
- const BSONObj& config) {
+ const BSONObj& config,
+ bool writeOplog) {
if (_storeLocalConfigDocumentStatus.isOK()) {
setLocalConfigDocument(StatusWith<BSONObj>(config));
return Status::OK();
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.h b/src/mongo/db/repl/replication_coordinator_external_state_mock.h
index 1444eaeb1ef..472033750f3 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.h
@@ -76,7 +76,9 @@ public:
virtual bool isSelf(const HostAndPort& host, ServiceContext* service);
virtual HostAndPort getClientHostAndPort(const OperationContext* opCtx);
virtual StatusWith<BSONObj> loadLocalConfigDocument(OperationContext* opCtx);
- virtual Status storeLocalConfigDocument(OperationContext* opCtx, const BSONObj& config);
+ virtual Status storeLocalConfigDocument(OperationContext* opCtx,
+ const BSONObj& config,
+ bool writeOplog);
virtual StatusWith<LastVote> loadLocalLastVoteDocument(OperationContext* opCtx);
virtual Status storeLocalLastVoteDocument(OperationContext* opCtx, const LastVote& lastVote);
virtual void setGlobalTimestamp(ServiceContext* service, const Timestamp& newTime);
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 827458e2be0..2745b25b9e5 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -3353,14 +3353,26 @@ Status ReplicationCoordinatorImpl::doReplSetReconfig(OperationContext* opCtx,
}
LOGV2(51814, "Persisting new config to disk");
- status = _externalState->storeLocalConfigDocument(opCtx, newConfig.toBSON());
- if (!status.isOK()) {
- LOGV2_ERROR(21422,
- "replSetReconfig failed to store config document; {error}",
- "replSetReconfig failed to store config document",
- "error"_attr = status);
- return status;
+ {
+ Lock::GlobalLock globalLock(opCtx, LockMode::MODE_IX);
+ if (!force && !_readWriteAbility->canAcceptNonLocalWrites(opCtx)) {
+ return {ErrorCodes::NotMaster, "Stepped down when persisting new config"};
+ }
+
+ // Don't write no-op for internal and external force reconfig.
+ // For non-force reconfig, we are guaranteed the node is a writable primary.
+ status = _externalState->storeLocalConfigDocument(
+ opCtx, newConfig.toBSON(), !force /* writeOplog */);
+ if (!status.isOK()) {
+ LOGV2_ERROR(21422,
+ "replSetReconfig failed to store config document; {error}",
+ "replSetReconfig failed to store config document",
+ "error"_attr = status);
+ return status;
+ }
}
+ // Wait for durability of the new config document.
+ opCtx->recoveryUnit()->waitUntilDurable(opCtx);
configStateGuard.dismiss();
_finishReplSetReconfig(opCtx, newConfig, force, myIndex.getValue());
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index ee856e988da..4eb65779b27 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -584,7 +584,12 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigStore(
"newConfigVersionAndTerm"_attr = newConfig.getConfigVersionAndTerm());
auto opCtx = cc().makeOperationContext();
- auto status = _externalState->storeLocalConfigDocument(opCtx.get(), newConfig.toBSON());
+ // Don't write the no-op for config learned via heartbeats.
+ auto status = _externalState->storeLocalConfigDocument(
+ opCtx.get(), newConfig.toBSON(), false /* writeOplog */);
+ // Wait for durability of the new config document.
+ opCtx->recoveryUnit()->waitUntilDurable(opCtx.get());
+
bool isFirstConfig;
{
stdx::lock_guard<Latch> lk(_mutex);
diff --git a/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
index 7df4b9b4d97..e13ddaf8b47 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
@@ -1227,6 +1227,66 @@ TEST_F(ReplCoordReconfigTest,
ASSERT_OK(status);
}
+TEST_F(ReplCoordReconfigTest, StepdownShouldInterruptConfigWrite) {
+ // Start out in a non-initial config version.
+ init();
+ auto configVersion = 2;
+ assertStartSuccess(
+ configWithMembers(configVersion, 0, BSON_ARRAY(member(1, "n1:1") << member(2, "n2:1"))),
+ HostAndPort("n1", 1));
+ ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+
+ // Simulate application of one oplog entry.
+ replCoordSetMyLastAppliedAndDurableOpTime(OpTime(Timestamp(1, 1), 0));
+
+ // Get elected primary.
+ simulateSuccessfulV1Election();
+ ASSERT_EQ(getReplCoord()->getMemberState(), MemberState::RS_PRIMARY);
+ ASSERT_EQ(getReplCoord()->getTerm(), 1);
+
+ // Advance your optime.
+ auto commitPoint = OpTime(Timestamp(2, 1), 1);
+ replCoordSetMyLastAppliedAndDurableOpTime(commitPoint);
+ replicateOpTo(2, commitPoint);
+
+ // Respond to heartbeats before reconfig.
+ respondToAllHeartbeats();
+
+ // Do a reconfig that should fail due to stepdown.
+ configVersion = 3;
+ ReplSetReconfigArgs args;
+ args.newConfigObj = configWithMembers(
+ configVersion, 1, BSON_ARRAY(member(1, "n1:1") << member(2, "n2:1") << member(3, "n3:1")));
+
+ BSONObjBuilder result;
+ Status status(ErrorCodes::InternalError, "Not Set");
+ const auto opCtx = makeOperationContext();
+ stdx::thread reconfigThread;
+ reconfigThread = stdx::thread(
+ [&] { status = getReplCoord()->processReplSetReconfig(opCtx.get(), args, &result); });
+
+ getNet()->enterNetwork();
+ // Wait for the next heartbeat of quorum check and blackhole it.
+ // The reconfig is hung during the quorum check.
+ auto request = getNet()->getNextReadyRequest();
+ getNet()->blackHole(request);
+ getNet()->exitNetwork();
+
+ // Step down due to a higher term.
+ TopologyCoordinator::UpdateTermResult termUpdated;
+ auto updateTermEvh = getReplCoord()->updateTerm_forTest(2, &termUpdated);
+ ASSERT(termUpdated == TopologyCoordinator::UpdateTermResult::kTriggerStepDown);
+ ASSERT(updateTermEvh.isValid());
+ getReplExec()->waitForEvent(updateTermEvh);
+
+ // Respond to quorum check to resume the reconfig.
+ respondToAllHeartbeats();
+
+ reconfigThread.join();
+ ASSERT_EQ(status.code(), ErrorCodes::NotMaster);
+ ASSERT_EQ(status.reason(), "Stepped down when persisting new config");
+}
+
} // anonymous namespace
} // namespace repl
} // namespace mongo