summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSpencer T Brody <spencer@mongodb.com>2016-11-16 18:00:03 -0500
committerSpencer T Brody <spencer@mongodb.com>2016-11-17 17:26:03 -0500
commitc7ebfd0fd292e45256e9799a2a96ed6054ecc357 (patch)
treefabd2c7fa5aed4f24d12ae7ae45cf88065dfb326
parent5c5fe01994749cdd9ef35d0bf13e0fd67abf7e31 (diff)
downloadmongo-c7ebfd0fd292e45256e9799a2a96ed6054ecc357.tar.gz
SERVER-27053 Don't acknowledge writes if the term has changed.
(cherry picked from commit a557fd981d235f84d4a0865dc0bb6b5385fc7a21)
-rw-r--r--buildscripts/resmokeconfig/suites/replica_sets_legacy.yml2
-rw-r--r--jstests/replsets/write_concern_after_stepdown.js120
-rw-r--r--jstests/replsets/write_concern_after_stepdown_and_stepup.js134
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp44
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h17
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp37
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_test.cpp313
-rw-r--r--src/mongo/db/write_concern.cpp5
8 files changed, 500 insertions, 172 deletions
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_legacy.yml b/buildscripts/resmokeconfig/suites/replica_sets_legacy.yml
index 8803dc88e12..1ba90512c6a 100644
--- a/buildscripts/resmokeconfig/suites/replica_sets_legacy.yml
+++ b/buildscripts/resmokeconfig/suites/replica_sets_legacy.yml
@@ -16,6 +16,8 @@ selector:
- jstests/replsets/stepup.js
# The combination of new bridges and PV0 can lead to an improper spanning tree in sync2.js.
- jstests/replsets/sync2.js
+ # PV0's w:majority guarantees aren't strong enough for this test to pass.
+ - jstests/replsets/write_concern_after_stepdown_and_stepup.js
executor:
js_test:
diff --git a/jstests/replsets/write_concern_after_stepdown.js b/jstests/replsets/write_concern_after_stepdown.js
new file mode 100644
index 00000000000..c8493ea4fb6
--- /dev/null
+++ b/jstests/replsets/write_concern_after_stepdown.js
@@ -0,0 +1,120 @@
+/*
+ * Tests that heartbeats containing writes from a different branch of history can't cause a stale
+ * primary to incorrectly acknowledge a w:majority write that's about to be rolled back.
+ */
+(function() {
+ 'use strict';
+
+ var name = "writeConcernStepDownAndBackUp";
+ var dbName = "wMajorityCheck";
+ var collName = "stepdownAndBackUp";
+
+ var rst = new ReplSetTest({
+ name: name,
+ nodes: [
+ {},
+ {},
+ {rsConfig: {priority: 0}},
+ ],
+ useBridge: true
+ });
+ var nodes = rst.startSet();
+ rst.initiate();
+
+ function waitForState(node, state) {
+ assert.soonNoExcept(function() {
+ assert.commandWorked(node.adminCommand(
+ {replSetTest: 1, waitForMemberState: state, timeoutMillis: rst.kDefaultTimeoutMS}));
+ return true;
+ });
+ }
+
+ function waitForPrimary(node) {
+ assert.soon(function() {
+ return node.adminCommand('ismaster').ismaster;
+ });
+ }
+
+ function stepUp(node) {
+ var primary = rst.getPrimary();
+ if (primary != node) {
+ assert.throws(function() {
+ primary.adminCommand({replSetStepDown: 60 * 5});
+ });
+ }
+ waitForPrimary(node);
+ }
+
+ jsTestLog("Make sure node 0 is primary.");
+ stepUp(nodes[0]);
+ var primary = rst.getPrimary();
+ var secondaries = rst.getSecondaries();
+ assert.eq(nodes[0], primary);
+ // Wait for all data bearing nodes to get up to date.
+ assert.writeOK(nodes[0].getDB(dbName).getCollection(collName).insert(
+ {a: 1}, {writeConcern: {w: 3, wtimeout: rst.kDefaultTimeoutMS}}));
+
+ // Stop the secondaries from replicating.
+ secondaries.forEach(function(node) {
+ assert.commandWorked(
+ node.adminCommand({configureFailPoint: 'rsSyncApplyStop', mode: 'alwaysOn'}));
+ });
+ // Stop the primary from being able to complete stepping down.
+ assert.commandWorked(
+ nodes[0].adminCommand({configureFailPoint: 'blockHeartbeatStepdown', mode: 'alwaysOn'}));
+
+ jsTestLog("Do w:majority write that will block waiting for replication.");
+ var doMajorityWrite = function() {
+ var res = db.getSiblingDB('wMajorityCheck').stepdownAndBackUp.insert({a: 2}, {
+ writeConcern: {w: 'majority'}
+ });
+ assert.writeErrorWithCode(res, ErrorCodes.PrimarySteppedDown);
+ };
+
+ var joinMajorityWriter = startParallelShell(doMajorityWrite, nodes[0].port);
+
+ jsTest.log("Disconnect primary from all secondaries");
+ nodes[0].disconnect(nodes[1]);
+ nodes[0].disconnect(nodes[2]);
+
+ jsTest.log("Wait for a new primary to be elected");
+ // Allow the secondaries to replicate again.
+ secondaries.forEach(function(node) {
+ assert.commandWorked(
+ node.adminCommand({configureFailPoint: 'rsSyncApplyStop', mode: 'off'}));
+ });
+
+ waitForPrimary(nodes[1]);
+
+ jsTest.log("Do a write to the new primary");
+ assert.writeOK(nodes[1].getDB(dbName).getCollection(collName).insert(
+ {a: 3}, {writeConcern: {w: 2, wtimeout: rst.kDefaultTimeoutMS}}));
+
+ jsTest.log("Reconnect the old primary to the rest of the nodes");
+ // Only allow the old primary to connect to the other nodes, not the other way around.
+ // This is so that the old priamry will detect that it needs to step down and step itself down,
+ // rather than one of the other nodes detecting this and sending it a replSetStepDown command,
+ // which would cause the old primary to kill all operations and close all connections, making
+ // the way that the insert in the parallel shell fails be nondeterministic. Rather than
+ // handling all possible failure modes in the parallel shell, allowing heartbeat connectivity in
+ // only one direction makes it easier for the test to fail deterministically.
+ nodes[1].acceptConnectionsFrom(nodes[0]);
+ nodes[2].acceptConnectionsFrom(nodes[0]);
+
+ joinMajorityWriter();
+
+ // Allow the old primary to finish stepping down so that shutdown can finish.
+ var res = null;
+ try {
+ res = nodes[0].adminCommand({configureFailPoint: 'blockHeartbeatStepdown', mode: 'off'});
+ } catch (e) {
+ // Expected - once we disable the fail point the stepdown will proceed and it's racy whether
+ // the stepdown closes all connections before or after the configureFailPoint command
+ // returns
+ }
+ if (res) {
+ assert.commandWorked(res);
+ }
+
+ rst.stopSet();
+}());
diff --git a/jstests/replsets/write_concern_after_stepdown_and_stepup.js b/jstests/replsets/write_concern_after_stepdown_and_stepup.js
new file mode 100644
index 00000000000..ea1c6312eae
--- /dev/null
+++ b/jstests/replsets/write_concern_after_stepdown_and_stepup.js
@@ -0,0 +1,134 @@
+/*
+ * Tests that heartbeats containing writes from a different branch of history can't cause a stale
+ * primary to incorrectly acknowledge a w:majority write that's about to be rolled back, even if the
+ * stale primary is re-elected primary before waiting for the write concern acknowledgement.
+ */
+(function() {
+ 'use strict';
+
+ var name = "writeConcernStepDownAndBackUp";
+ var dbName = "wMajorityCheck";
+ var collName = "stepdownAndBackUp";
+
+ var rst = new ReplSetTest({
+ name: name,
+ nodes: [
+ {},
+ {},
+ {rsConfig: {priority: 0}},
+ ],
+ useBridge: true
+ });
+ var nodes = rst.startSet();
+ rst.initiate();
+
+ function waitForState(node, state) {
+ assert.soonNoExcept(function() {
+ assert.commandWorked(node.adminCommand(
+ {replSetTest: 1, waitForMemberState: state, timeoutMillis: rst.kDefaultTimeoutMS}));
+ return true;
+ });
+ }
+
+ function waitForPrimary(node) {
+ assert.soon(function() {
+ return node.adminCommand('ismaster').ismaster;
+ });
+ }
+
+ function stepUp(node) {
+ var primary = rst.getPrimary();
+ if (primary != node) {
+ assert.throws(function() {
+ primary.adminCommand({replSetStepDown: 60 * 5});
+ });
+ }
+ waitForPrimary(node);
+ }
+
+ jsTestLog("Make sure node 0 is primary.");
+ stepUp(nodes[0]);
+ var primary = rst.getPrimary();
+ var secondaries = rst.getSecondaries();
+ assert.eq(nodes[0], primary);
+ // Wait for all data bearing nodes to get up to date.
+ assert.writeOK(nodes[0].getDB(dbName).getCollection(collName).insert(
+ {a: 1}, {writeConcern: {w: 3, wtimeout: rst.kDefaultTimeoutMS}}));
+
+ // Stop the secondaries from replicating.
+ secondaries.forEach(function(node) {
+ assert.commandWorked(
+ node.adminCommand({configureFailPoint: 'rsSyncApplyStop', mode: 'alwaysOn'}));
+ });
+ // Stop the primary from calling into awaitReplication()
+ assert.commandWorked(nodes[0].adminCommand(
+ {configureFailPoint: 'hangBeforeWaitingForWriteConcern', mode: 'alwaysOn'}));
+
+ jsTestLog("Do w:majority write that won't enter awaitReplication() until after the primary " +
+ "has stepped down and back up");
+ var doMajorityWrite = function() {
+ // Run ismaster command with 'hangUpOnStepDown' set to false to mark this connection as
+ // one that shouldn't be closed when the node steps down. This simulates the scenario where
+ // the write was coming from a mongos.
+ assert.commandWorked(db.adminCommand({ismaster: 1, hangUpOnStepDown: false}));
+
+ var res = db.getSiblingDB('wMajorityCheck').stepdownAndBackUp.insert({a: 2}, {
+ writeConcern: {w: 'majority'}
+ });
+ assert.writeErrorWithCode(res, ErrorCodes.PrimarySteppedDown);
+ };
+
+ var joinMajorityWriter = startParallelShell(doMajorityWrite, nodes[0].port);
+
+ jsTest.log("Disconnect primary from all secondaries");
+ nodes[0].disconnect(nodes[1]);
+ nodes[0].disconnect(nodes[2]);
+
+ jsTest.log("Wait for a new primary to be elected");
+ // Allow the secondaries to replicate again.
+ secondaries.forEach(function(node) {
+ assert.commandWorked(
+ node.adminCommand({configureFailPoint: 'rsSyncApplyStop', mode: 'off'}));
+ });
+
+ waitForPrimary(nodes[1]);
+
+ jsTest.log("Do a write to the new primary");
+ assert.writeOK(nodes[1].getDB(dbName).getCollection(collName).insert(
+ {a: 3}, {writeConcern: {w: 2, wtimeout: rst.kDefaultTimeoutMS}}));
+
+ jsTest.log("Reconnect the old primary to the rest of the nodes");
+ nodes[0].reconnect(nodes[1]);
+ nodes[0].reconnect(nodes[2]);
+
+ jsTest.log("Wait for the old primary to step down, roll back its write, and apply the " +
+ "new writes from the new primary");
+ waitForState(nodes[0], ReplSetTest.State.SECONDARY);
+ rst.awaitReplication();
+
+ // At this point all 3 nodes should have the same data
+ assert.soonNoExcept(function() {
+ nodes.forEach(function(node) {
+ assert.eq(null,
+ node.getDB(dbName).getCollection(collName).findOne({a: 2}),
+ "Node " + node.host + " contained op that should have been rolled back");
+ assert.neq(null,
+ node.getDB(dbName).getCollection(collName).findOne({a: 3}),
+ "Node " + node.host +
+ " was missing op from branch of history that should have persisted");
+ });
+ return true;
+ });
+
+ jsTest.log("Make the original primary become primary once again");
+ stepUp(nodes[0]);
+
+ jsTest.log("Unblock the thread waiting for replication of the now rolled-back write, ensure " +
+ "that the write concern failed");
+ assert.commandWorked(nodes[0].adminCommand(
+ {configureFailPoint: 'hangBeforeWaitingForWriteConcern', mode: 'off'}));
+
+ joinMajorityWriter();
+
+ rst.stopSet();
+}());
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 526b03d9b38..78141e4987e 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -1617,9 +1617,35 @@ Status ReplicationCoordinatorImpl::_awaitReplication_inlock(
return Status::OK();
}
- if (replMode == modeReplSet && !_memberState.primary()) {
- return {ErrorCodes::PrimarySteppedDown,
- "Primary stepped down while waiting for replication"};
+ auto checkForStepDown = [&]() -> Status {
+ if (replMode == modeReplSet && !_memberState.primary()) {
+ return {ErrorCodes::PrimarySteppedDown,
+ "Primary stepped down while waiting for replication"};
+ }
+
+ if (opTime.getTerm() != _cachedTerm) {
+ return {
+ ErrorCodes::PrimarySteppedDown,
+ str::stream() << "Term changed from " << opTime.getTerm() << " to " << _cachedTerm
+ << " while waiting for replication, indicating that this node must "
+ "have stepped down."};
+ }
+
+ if (_stepDownPending) {
+ return {ErrorCodes::PrimarySteppedDown,
+ "Received stepdown request while waiting for replication"};
+ }
+ return Status::OK();
+ };
+
+ Status stepdownStatus = checkForStepDown();
+ if (!stepdownStatus.isOK()) {
+ return stepdownStatus;
+ }
+
+ auto interruptStatus = txn->checkForInterruptNoAssert();
+ if (!interruptStatus.isOK()) {
+ return interruptStatus;
}
if (writeConcern.wMode.empty()) {
@@ -1647,10 +1673,6 @@ Status ReplicationCoordinatorImpl::_awaitReplication_inlock(
WaiterInfoGuard waitInfo(
&_replicationWaiterList, txn->getOpID(), opTime, &writeConcern, &condVar);
while (!_doneWaitingForReplication_inlock(opTime, minSnapshot, writeConcern)) {
- if (replMode == modeReplSet && !_getMemberState_inlock().primary()) {
- return {ErrorCodes::PrimarySteppedDown,
- "Not primary anymore while waiting for replication - primary stepped down"};
- }
if (_inShutdown) {
return {ErrorCodes::ShutdownInProgress, "Replication is being shut down"};
@@ -1672,6 +1694,11 @@ Status ReplicationCoordinatorImpl::_awaitReplication_inlock(
}
return {ErrorCodes::WriteConcernFailed, "waiting for replication timed out"};
}
+
+ stepdownStatus = checkForStepDown();
+ if (!stepdownStatus.isOK()) {
+ return stepdownStatus;
+ }
}
return _checkIfWriteConcernCanBeSatisfied_inlock(writeConcern);
@@ -2520,6 +2547,7 @@ ReplicationCoordinatorImpl::_updateMemberStateFromTopologyCoordinator_inlock() {
_canAcceptNonLocalWrites = false;
_isCatchingUp = false;
_isWaitingForDrainToComplete = false;
+ _stepDownPending = false;
_drainFinishedCond_forTest.notify_all();
serverGlobalParams.featureCompatibility.validateFeaturesAsMaster.store(false);
result = kActionCloseAllConnections;
@@ -3367,7 +3395,7 @@ EventHandle ReplicationCoordinatorImpl::_updateTerm_incallback(
if (localUpdateTermResult == TopologyCoordinator::UpdateTermResult::kTriggerStepDown) {
log() << "stepping down from primary, because a new term has begun: " << term;
_topCoord->prepareForStepDown();
- return _stepDownStart();
+ return _stepDownStart(false);
}
return EventHandle();
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index 8597f9c21ce..25e448500fd 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -910,7 +910,10 @@ private:
*/
void _requestRemotePrimaryStepdown(const HostAndPort& target);
- ReplicationExecutor::EventHandle _stepDownStart();
+ /**
+ * Schedules stepdown to run with the global exclusive lock.
+ */
+ ReplicationExecutor::EventHandle _stepDownStart(bool hasMutex);
/**
* Completes a step-down of the current node. Must be run with a global
@@ -949,9 +952,11 @@ private:
* Utility method that schedules or performs actions specified by a HeartbeatResponseAction
* returned by a TopologyCoordinator::processHeartbeatResponse(V1) call with the given
* value of "responseStatus".
+ * 'hasMutex' is true if the caller is holding _mutex. TODO(SERVER-27083): Remove this.
*/
void _handleHeartbeatResponseAction(const HeartbeatResponseAction& action,
- const StatusWith<ReplSetHeartbeatResponse>& responseStatus);
+ const StatusWith<ReplSetHeartbeatResponse>& responseStatus,
+ bool hasMutex);
/**
* Scan the SlaveInfoVector and determine the highest OplogEntry present on a majority of
@@ -1180,6 +1185,14 @@ private:
// TODO: ideally this should only change on rollbacks NOT on mongod restarts also.
int _rbid; // (M)
+ // Indicates that we've received a request to stepdown from PRIMARY (likely via a heartbeat)
+ // TODO(SERVER-27083): This bool is redundant of the same-named bool in TopologyCoordinatorImpl,
+ // but due to mutex ordering between _mutex and _topoMutex we can't inspect the
+ // TopologyCoordinator field in awaitReplication() where this bool is used. Once we get rid
+ // of topoMutex and start guarding access to the TopologyCoordinator via _mutex we should
+ // consolidate the two bools.
+ bool _stepDownPending = false; // (M)
+
// list of information about clients waiting on replication. Does *not* own the WaiterInfos.
WaiterList _replicationWaiterList; // (M)
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index e7c9ca3f332..dc96dc74d1f 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -66,6 +66,8 @@ using CBHandle = ReplicationExecutor::CallbackHandle;
using CBHStatus = StatusWith<CBHandle>;
using LockGuard = stdx::lock_guard<stdx::mutex>;
+MONGO_FP_DECLARE(blockHeartbeatStepdown);
+
} // namespace
using executor::RemoteCommandRequest;
@@ -213,7 +215,7 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse(
_scheduleHeartbeatToTarget(
target, targetIndex, std::max(now, action.getNextHeartbeatStartDate()));
- _handleHeartbeatResponseAction(action, hbStatusResponse);
+ _handleHeartbeatResponseAction(action, hbStatusResponse, false /*we're not holding _mutex*/);
}
void ReplicationCoordinatorImpl::_updateOpTimesFromHeartbeat_inlock(int targetIndex,
@@ -233,11 +235,13 @@ void ReplicationCoordinatorImpl::_updateOpTimesFromHeartbeat_inlock(int targetIn
void ReplicationCoordinatorImpl::_handleHeartbeatResponseAction(
const HeartbeatResponseAction& action,
- const StatusWith<ReplSetHeartbeatResponse>& responseStatus) {
+ const StatusWith<ReplSetHeartbeatResponse>& responseStatus,
+ bool hasMutex) {
switch (action.getAction()) {
case HeartbeatResponseAction::NoAction:
// Update the cached member state if different than the current topology member state
if (_memberState != _topCoord->getMemberState()) {
+ invariant(!hasMutex);
stdx::unique_lock<stdx::mutex> lk(_mutex);
const PostMemberStateUpdateAction postUpdateAction =
_updateMemberStateFromTopologyCoordinator_inlock();
@@ -257,7 +261,7 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponseAction(
log() << "Stepping down from primary in response to heartbeat";
_topCoord->prepareForStepDown();
// Don't need to wait for stepdown to finish.
- _stepDownStart();
+ _stepDownStart(hasMutex);
break;
case HeartbeatResponseAction::StepDownRemotePrimary: {
invariant(action.getPrimaryConfigIndex() != _selfIndex);
@@ -311,11 +315,19 @@ void ReplicationCoordinatorImpl::_requestRemotePrimaryStepdown(const HostAndPort
}
}
-ReplicationExecutor::EventHandle ReplicationCoordinatorImpl::_stepDownStart() {
+ReplicationExecutor::EventHandle ReplicationCoordinatorImpl::_stepDownStart(bool hasMutex) {
+ {
+ boost::optional<stdx::lock_guard<stdx::mutex>> lk;
+ if (!hasMutex) {
+ lk.emplace(_mutex);
+ }
+ _stepDownPending = true;
+ }
auto finishEvent = _makeEvent();
if (!finishEvent) {
return finishEvent;
}
+
_replExecutor.scheduleWorkWithGlobalExclusiveLock(stdx::bind(
&ReplicationCoordinatorImpl::_stepDownFinish, this, stdx::placeholders::_1, finishEvent));
return finishEvent;
@@ -328,6 +340,19 @@ void ReplicationCoordinatorImpl::_stepDownFinish(
return;
}
+ if (MONGO_FAIL_POINT(blockHeartbeatStepdown)) {
+ // Must reschedule rather than block so we don't take up threads in the replication
+ // executor.
+ sleepmillis(10);
+ _replExecutor.scheduleWorkWithGlobalExclusiveLock(
+ stdx::bind(&ReplicationCoordinatorImpl::_stepDownFinish,
+ this,
+ stdx::placeholders::_1,
+ finishedEvent));
+
+ return;
+ }
+
LockGuard topoLock(_topoMutex);
invariant(cbData.txn);
@@ -668,7 +693,9 @@ void ReplicationCoordinatorImpl::_handleLivenessTimeout(
_topCoord->setMemberAsDown(now, memberIndex, _getMyLastDurableOpTime_inlock());
// Don't mind potential asynchronous stepdown as this is the last step of
// liveness check.
- _handleHeartbeatResponseAction(action, makeStatusWith<ReplSetHeartbeatResponse>());
+ _handleHeartbeatResponseAction(action,
+ makeStatusWith<ReplSetHeartbeatResponse>(),
+ true /*we're holding _mutex*/);
}
}
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index 1c43172f83c..914b64eea3d 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -43,6 +43,7 @@
#include "mongo/db/repl/old_update_position_args.h"
#include "mongo/db/repl/optime.h"
#include "mongo/db/repl/read_concern_args.h"
+#include "mongo/db/repl/repl_client_info.h"
#include "mongo/db/repl/repl_set_heartbeat_args.h"
#include "mongo/db/repl/repl_set_heartbeat_args_v1.h"
#include "mongo/db/repl/repl_settings.h"
@@ -79,15 +80,15 @@ using executor::RemoteCommandResponse;
using unittest::assertGet;
typedef ReplicationCoordinator::ReplSetReconfigArgs ReplSetReconfigArgs;
-// Helper class to wrap Timestamp as an OpTime with term 0.
-struct OpTimeWithTermZero {
- OpTimeWithTermZero(unsigned int sec, unsigned int i) : timestamp(sec, i) {}
+// Helper class to wrap Timestamp as an OpTime with term 1.
+struct OpTimeWithTermOne {
+ OpTimeWithTermOne(unsigned int sec, unsigned int i) : timestamp(sec, i) {}
operator OpTime() const {
- return OpTime(timestamp, 0);
+ return OpTime(timestamp, 1);
}
operator boost::optional<OpTime>() const {
- return OpTime(timestamp, 0);
+ return OpTime(timestamp, 1);
}
OpTime asOpTime() const {
@@ -601,7 +602,7 @@ TEST_F(ReplCoordTest, NodeReturnsImmediatelyWhenAwaitReplicationIsRanAgainstASta
init("");
auto txn = makeOperationContext();
- OpTimeWithTermZero time(100, 1);
+ OpTimeWithTermOne time(100, 1);
WriteConcernOptions writeConcern;
writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
@@ -620,7 +621,7 @@ TEST_F(ReplCoordTest, NodeReturnsImmediatelyWhenAwaitReplicationIsRanAgainstAMas
init(settings);
auto txn = makeOperationContext();
- OpTimeWithTermZero time(100, 1);
+ OpTimeWithTermOne time(100, 1);
WriteConcernOptions writeConcern;
writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
@@ -654,7 +655,7 @@ TEST_F(ReplCoordTest, NodeReturnsNotMasterWhenRunningAwaitReplicationAgainstASec
auto txn = makeOperationContext();
- OpTimeWithTermZero time(100, 1);
+ OpTimeWithTermOne time(100, 1);
WriteConcernOptions writeConcern;
writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
@@ -667,7 +668,7 @@ TEST_F(ReplCoordTest, NodeReturnsNotMasterWhenRunningAwaitReplicationAgainstASec
ASSERT_EQUALS(ErrorCodes::PrimarySteppedDown, statusAndDur.status);
}
-TEST_F(ReplCoordTest, NodeReturnsOkWhenRunningAwaitReplicationAgainstPrimaryWithWZero) {
+TEST_F(ReplCoordTest, NodeReturnsOkWhenRunningAwaitReplicationAgainstPrimaryWithWTermOne) {
assertStartSuccess(BSON("_id"
<< "mySet"
<< "version"
@@ -687,7 +688,7 @@ TEST_F(ReplCoordTest, NodeReturnsOkWhenRunningAwaitReplicationAgainstPrimaryWith
<< 2))),
HostAndPort("node1", 12345));
- OpTimeWithTermZero time(100, 1);
+ OpTimeWithTermOne time(100, 1);
WriteConcernOptions writeConcern;
writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
@@ -696,13 +697,13 @@ TEST_F(ReplCoordTest, NodeReturnsOkWhenRunningAwaitReplicationAgainstPrimaryWith
// Become primary.
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
simulateSuccessfulV1Election();
ASSERT(getReplCoord()->getMemberState().primary());
auto txn = makeOperationContext();
- ;
+
ReplicationCoordinator::StatusAndDuration statusAndDur =
getReplCoord()->awaitReplication(txn.get(), time, writeConcern);
ASSERT_OK(statusAndDur.status);
@@ -735,12 +736,12 @@ TEST_F(ReplCoordTest,
<< 3))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
simulateSuccessfulV1Election();
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
+ OpTimeWithTermOne time1(100, 1);
+ OpTimeWithTermOne time2(100, 2);
WriteConcernOptions writeConcern;
writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
@@ -815,12 +816,12 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedUntilASufficientNumberOfNodes
<< 3))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
simulateSuccessfulV1Election();
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
+ OpTimeWithTermOne time1(100, 1);
+ OpTimeWithTermOne time2(100, 2);
WriteConcernOptions writeConcern;
writeConcern.wTimeout = WriteConcernOptions::kNoWaiting;
@@ -1026,6 +1027,8 @@ TEST_F(
// another name if we didn't get a high enough one.
}
+ auto zeroOpTimeInCurrentTerm = OpTime(Timestamp(0, 0), 1);
+ ReplClientInfo::forClient(txn.get()->getClient()).setLastOp(zeroOpTimeInCurrentTerm);
statusAndDur =
getReplCoord()->awaitReplicationOfLastOpForClient(txn.get(), majorityWriteConcern);
ASSERT_EQUALS(ErrorCodes::WriteConcernFailed, statusAndDur.status);
@@ -1148,14 +1151,14 @@ TEST_F(ReplCoordTest, NodeReturnsOkWhenAWriteConcernWithNoTimeoutHasBeenSatisfie
<< 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
simulateSuccessfulV1Election();
ReplicationAwaiter awaiter(getReplCoord(), getServiceContext());
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
+ OpTimeWithTermOne time1(100, 1);
+ OpTimeWithTermOne time2(100, 2);
WriteConcernOptions writeConcern;
writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
@@ -1212,14 +1215,14 @@ TEST_F(ReplCoordTest, NodeReturnsWriteConcernFailedWhenAWriteConcernTimesOutBefo
<< 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
simulateSuccessfulV1Election();
ReplicationAwaiter awaiter(getReplCoord(), getServiceContext());
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
+ OpTimeWithTermOne time1(100, 1);
+ OpTimeWithTermOne time2(100, 2);
WriteConcernOptions writeConcern;
writeConcern.wDeadline = getNet()->now() + Milliseconds(50);
@@ -1263,14 +1266,14 @@ TEST_F(ReplCoordTest,
<< 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
simulateSuccessfulV1Election();
ReplicationAwaiter awaiter(getReplCoord(), getServiceContext());
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
+ OpTimeWithTermOne time1(100, 1);
+ OpTimeWithTermOne time2(100, 2);
WriteConcernOptions writeConcern;
writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
@@ -1313,15 +1316,15 @@ TEST_F(ReplCoordTest, NodeReturnsNotMasterWhenSteppingDownBeforeSatisfyingAWrite
<< 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
simulateSuccessfulV1Election();
const auto txn = makeOperationContext();
ReplicationAwaiter awaiter(getReplCoord(), getServiceContext());
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
+ OpTimeWithTermOne time1(100, 1);
+ OpTimeWithTermOne time2(100, 2);
WriteConcernOptions writeConcern;
writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
@@ -1355,14 +1358,14 @@ TEST_F(ReplCoordTest,
<< "node3"))),
HostAndPort("node1"));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
simulateSuccessfulV1Election();
ReplicationAwaiter awaiter(getReplCoord(), getServiceContext());
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
+ OpTimeWithTermOne time1(100, 1);
+ OpTimeWithTermOne time2(100, 2);
WriteConcernOptions writeConcern;
writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
@@ -1575,7 +1578,7 @@ TEST_F(ReplCoordTest, ConcurrentStepDownShouldNotSignalTheSameFinishEventMoreTha
TEST_F(StepDownTest, NodeReturnsNotMasterWhenAskedToStepDownAsANonPrimaryNode) {
const auto txn = makeOperationContext();
- OpTimeWithTermZero optime1(100, 1);
+ OpTimeWithTermOne optime1(100, 1);
// All nodes are caught up
getReplCoord()->setMyLastAppliedOpTime(optime1);
getReplCoord()->setMyLastDurableOpTime(optime1);
@@ -1589,7 +1592,7 @@ TEST_F(StepDownTest, NodeReturnsNotMasterWhenAskedToStepDownAsANonPrimaryNode) {
TEST_F(StepDownTest,
NodeReturnsExceededTimeLimitWhenStepDownFailsToObtainTheGlobalLockWithinTheAllottedTime) {
- OpTimeWithTermZero optime1(100, 1);
+ OpTimeWithTermOne optime1(100, 1);
// All nodes are caught up
getReplCoord()->setMyLastAppliedOpTime(optime1);
getReplCoord()->setMyLastDurableOpTime(optime1);
@@ -1819,8 +1822,8 @@ TEST_F(ReplCoordTest, NodeBecomesPrimaryAgainWhenStepDownTimeoutExpiresInASingle
TEST_F(StepDownTest,
NodeReturnsExceededTimeLimitWhenNoSecondaryIsCaughtUpWithinStepDownsSecondaryCatchUpPeriod) {
- OpTimeWithTermZero optime1(100, 1);
- OpTimeWithTermZero optime2(100, 2);
+ OpTimeWithTermOne optime1(100, 1);
+ OpTimeWithTermOne optime2(100, 2);
// No secondary is caught up
auto repl = getReplCoord();
repl->setMyLastAppliedOpTime(optime2);
@@ -1986,8 +1989,8 @@ TEST_F(StepDownTest,
}
TEST_F(StepDownTest, NodeReturnsInterruptedWhenInterruptedDuringStepDown) {
- OpTimeWithTermZero optime1(100, 1);
- OpTimeWithTermZero optime2(100, 2);
+ OpTimeWithTermOne optime1(100, 1);
+ OpTimeWithTermOne optime2(100, 2);
// No secondary is caught up
auto repl = getReplCoord();
repl->setMyLastAppliedOpTime(optime2);
@@ -2142,9 +2145,9 @@ TEST_F(ReplCoordTest, NodeIncludesOtherMembersProgressInOldUpdatePositionCommand
<< BSON("_id" << 2 << "host"
<< "test3:1234"))),
HostAndPort("test1", 1234));
- OpTimeWithTermZero optime1(100, 1);
- OpTimeWithTermZero optime2(100, 2);
- OpTimeWithTermZero optime3(2, 1);
+ OpTimeWithTermOne optime1(100, 1);
+ OpTimeWithTermOne optime2(100, 2);
+ OpTimeWithTermOne optime3(2, 1);
getReplCoord()->setMyLastAppliedOpTime(optime1);
getReplCoord()->setMyLastDurableOpTime(optime1);
ASSERT_OK(getReplCoord()->setLastAppliedOptime_forTest(1, 1, optime2));
@@ -2177,7 +2180,7 @@ TEST_F(ReplCoordTest, NodeIncludesOtherMembersProgressInOldUpdatePositionCommand
ASSERT_EQUALS(optime3.timestamp,
entry[OldUpdatePositionArgs::kOpTimeFieldName]["ts"].timestamp());
}
- ASSERT_EQUALS(0, entry[OldUpdatePositionArgs::kOpTimeFieldName]["t"].Number());
+ ASSERT_EQUALS(1, entry[OldUpdatePositionArgs::kOpTimeFieldName]["t"].Number());
}
ASSERT_EQUALS(3U, memberIds.size()); // Make sure we saw all 3 nodes
}
@@ -2200,8 +2203,8 @@ TEST_F(ReplCoordTest,
<< "test3:1234"))),
HostAndPort("test2", 1234));
getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
// Can't unset maintenance mode if it was never set to begin with.
Status status = getReplCoord()->setMaintenanceMode(false);
@@ -2227,8 +2230,8 @@ TEST_F(ReplCoordTest,
<< "test3:1234"))),
HostAndPort("test2", 1234));
getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
// valid set
ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
ASSERT_TRUE(getReplCoord()->getMemberState().recovering());
@@ -2259,8 +2262,8 @@ TEST_F(ReplCoordTest, AllowAsManyUnsetMaintenanceModesAsThereHaveBeenSetMaintena
<< "test3:1234"))),
HostAndPort("test2", 1234));
getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
// Can set multiple times
ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
ASSERT_OK(getReplCoord()->setMaintenanceMode(true));
@@ -2293,8 +2296,8 @@ TEST_F(ReplCoordTest, SettingAndUnsettingMaintenanceModeShouldNotAffectRollbackS
<< "test3:1234"))),
HostAndPort("test2", 1234));
getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
// From rollback, entering and exiting maintenance mode doesn't change perceived
// state.
@@ -2335,8 +2338,8 @@ TEST_F(ReplCoordTest, DoNotAllowMaintenanceModeWhilePrimary) {
<< "test3:1234"))),
HostAndPort("test2", 1234));
getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
// Can't modify maintenance mode when PRIMARY
simulateSuccessfulV1Election();
@@ -2374,8 +2377,8 @@ TEST_F(ReplCoordTest, DoNotAllowSettingMaintenanceModeWhileConductingAnElection)
<< "test3:1234"))),
HostAndPort("test2", 1234));
getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY);
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
// TODO this election shouldn't have to happen.
simulateSuccessfulV1Election();
@@ -2439,8 +2442,8 @@ TEST_F(ReplCoordTest,
<< BSON("_id" << 2 << "host" << client2Host.toString()))),
HostAndPort("node1", 12345));
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
+ OpTimeWithTermOne time1(100, 1);
+ OpTimeWithTermOne time2(100, 2);
getReplCoord()->setMyLastAppliedOpTime(time2);
getReplCoord()->setMyLastDurableOpTime(time2);
@@ -2484,8 +2487,8 @@ TEST_F(ReplCoordTest,
<< BSON("_id" << 2 << "host" << client2Host.toString()))),
HostAndPort("node1", 12345));
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
+ OpTimeWithTermOne time1(100, 1);
+ OpTimeWithTermOne time2(100, 2);
getReplCoord()->setMyLastAppliedOpTime(time2);
getReplCoord()->setMyLastDurableOpTime(time2);
@@ -2515,8 +2518,8 @@ TEST_F(ReplCoordTest, NodeDoesNotIncludeItselfWhenRunningGetHostsWrittenToInMast
OID client = OID::gen();
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
+ OpTimeWithTermOne time1(100, 1);
+ OpTimeWithTermOne time2(100, 2);
getExternalState()->setClientHostAndPort(clientHost);
HandshakeArgs handshake;
@@ -2717,12 +2720,12 @@ TEST_F(ReplCoordTest, DoNotProcessSelfWhenUpdatePositionContainsInfoAboutSelf) {
<< 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
simulateSuccessfulV1Election();
- OpTime time1({100, 1}, 2);
- OpTime time2({100, 2}, 2);
+ OpTime time1({100, 1}, 1);
+ OpTime time2({100, 2}, 1);
getReplCoord()->setMyLastAppliedOpTime(time1);
getReplCoord()->setMyLastDurableOpTime(time1);
@@ -2738,18 +2741,17 @@ TEST_F(ReplCoordTest, DoNotProcessSelfWhenUpdatePositionContainsInfoAboutSelf) {
// receive updatePosition containing ourself, should not process the update for self
UpdatePositionArgs args;
- ASSERT_OK(args.initialize(
- BSON(UpdatePositionArgs::kCommandFieldName
- << 1
- << UpdatePositionArgs::kUpdateArrayFieldName
- << BSON_ARRAY(BSON(UpdatePositionArgs::kConfigVersionFieldName
- << 2
- << UpdatePositionArgs::kMemberIdFieldName
- << 0
- << UpdatePositionArgs::kDurableOpTimeFieldName
- << BSON("ts" << time2.getTimestamp() << "t" << 2)
- << UpdatePositionArgs::kAppliedOpTimeFieldName
- << BSON("ts" << time2.getTimestamp() << "t" << 2))))));
+ ASSERT_OK(args.initialize(BSON(UpdatePositionArgs::kCommandFieldName
+ << 1
+ << UpdatePositionArgs::kUpdateArrayFieldName
+ << BSON_ARRAY(BSON(UpdatePositionArgs::kConfigVersionFieldName
+ << 2
+ << UpdatePositionArgs::kMemberIdFieldName
+ << 0
+ << UpdatePositionArgs::kDurableOpTimeFieldName
+ << time2.toBSON()
+ << UpdatePositionArgs::kAppliedOpTimeFieldName
+ << time2.toBSON())))));
ASSERT_OK(getReplCoord()->processReplSetUpdatePosition(args, 0));
ASSERT_EQUALS(ErrorCodes::WriteConcernFailed,
@@ -2776,13 +2778,13 @@ TEST_F(ReplCoordTest, DoNotProcessSelfWhenOldUpdatePositionContainsInfoAboutSelf
<< 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
simulateSuccessfulV1Election();
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
- OpTimeWithTermZero staleTime(10, 0);
+ OpTimeWithTermOne time1(100, 1);
+ OpTimeWithTermOne time2(100, 2);
+ OpTimeWithTermOne staleTime(10, 0);
getReplCoord()->setMyLastAppliedOpTime(time1);
getReplCoord()->setMyLastDurableOpTime(time1);
@@ -2833,12 +2835,12 @@ TEST_F(ReplCoordTest, DoNotProcessUpdatePositionWhenItsConfigVersionIsIncorrect)
<< 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
simulateSuccessfulV1Election();
- OpTime time1({100, 1}, 3);
- OpTime time2({100, 2}, 3);
+ OpTime time1({100, 1}, 1);
+ OpTime time2({100, 2}, 1);
getReplCoord()->setMyLastAppliedOpTime(time1);
getReplCoord()->setMyLastDurableOpTime(time1);
@@ -2848,18 +2850,17 @@ TEST_F(ReplCoordTest, DoNotProcessUpdatePositionWhenItsConfigVersionIsIncorrect)
// receive updatePosition with incorrect config version
UpdatePositionArgs args;
- ASSERT_OK(args.initialize(
- BSON(UpdatePositionArgs::kCommandFieldName
- << 1
- << UpdatePositionArgs::kUpdateArrayFieldName
- << BSON_ARRAY(BSON(UpdatePositionArgs::kConfigVersionFieldName
- << 3
- << UpdatePositionArgs::kMemberIdFieldName
- << 1
- << UpdatePositionArgs::kDurableOpTimeFieldName
- << BSON("ts" << time2.getTimestamp() << "t" << 3)
- << UpdatePositionArgs::kAppliedOpTimeFieldName
- << BSON("ts" << time2.getTimestamp() << "t" << 3))))));
+ ASSERT_OK(args.initialize(BSON(UpdatePositionArgs::kCommandFieldName
+ << 1
+ << UpdatePositionArgs::kUpdateArrayFieldName
+ << BSON_ARRAY(BSON(UpdatePositionArgs::kConfigVersionFieldName
+ << 3
+ << UpdatePositionArgs::kMemberIdFieldName
+ << 1
+ << UpdatePositionArgs::kDurableOpTimeFieldName
+ << time2.toBSON()
+ << UpdatePositionArgs::kAppliedOpTimeFieldName
+ << time2.toBSON())))));
auto txn = makeOperationContext();
@@ -2891,13 +2892,13 @@ TEST_F(ReplCoordTest, DoNotProcessOldUpdatePositionWhenItsConfigVersionIsIncorre
<< 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
simulateSuccessfulV1Election();
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
- OpTimeWithTermZero staleTime(10, 0);
+ OpTimeWithTermOne time1(100, 1);
+ OpTimeWithTermOne time2(100, 2);
+ OpTimeWithTermOne staleTime(10, 0);
getReplCoord()->setMyLastAppliedOpTime(time1);
getReplCoord()->setMyLastDurableOpTime(time1);
@@ -2947,12 +2948,12 @@ TEST_F(ReplCoordTest, DoNotProcessUpdatePositionOfMembersWhoseIdsAreNotInTheConf
<< 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
simulateSuccessfulV1Election();
- OpTime time1({100, 1}, 2);
- OpTime time2({100, 2}, 2);
+ OpTime time1({100, 1}, 1);
+ OpTime time2({100, 2}, 1);
getReplCoord()->setMyLastAppliedOpTime(time1);
getReplCoord()->setMyLastDurableOpTime(time1);
@@ -2962,18 +2963,17 @@ TEST_F(ReplCoordTest, DoNotProcessUpdatePositionOfMembersWhoseIdsAreNotInTheConf
// receive updatePosition with nonexistent member id
UpdatePositionArgs args;
- ASSERT_OK(args.initialize(
- BSON(UpdatePositionArgs::kCommandFieldName
- << 1
- << UpdatePositionArgs::kUpdateArrayFieldName
- << BSON_ARRAY(BSON(UpdatePositionArgs::kConfigVersionFieldName
- << 2
- << UpdatePositionArgs::kMemberIdFieldName
- << 9
- << UpdatePositionArgs::kDurableOpTimeFieldName
- << BSON("ts" << time2.getTimestamp() << "t" << 2)
- << UpdatePositionArgs::kAppliedOpTimeFieldName
- << BSON("ts" << time2.getTimestamp() << "t" << 2))))));
+ ASSERT_OK(args.initialize(BSON(UpdatePositionArgs::kCommandFieldName
+ << 1
+ << UpdatePositionArgs::kUpdateArrayFieldName
+ << BSON_ARRAY(BSON(UpdatePositionArgs::kConfigVersionFieldName
+ << 2
+ << UpdatePositionArgs::kMemberIdFieldName
+ << 9
+ << UpdatePositionArgs::kDurableOpTimeFieldName
+ << time2.toBSON()
+ << UpdatePositionArgs::kAppliedOpTimeFieldName
+ << time2.toBSON())))));
auto txn = makeOperationContext();
@@ -3003,13 +3003,13 @@ TEST_F(ReplCoordTest, DoNotProcessOldUpdatePositionOfMembersWhoseIdsAreNotInTheC
<< 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
simulateSuccessfulV1Election();
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
- OpTimeWithTermZero staleTime(10, 0);
+ OpTimeWithTermOne time1(100, 1);
+ OpTimeWithTermOne time2(100, 2);
+ OpTimeWithTermOne staleTime(10, 0);
getReplCoord()->setMyLastAppliedOpTime(time1);
getReplCoord()->setMyLastDurableOpTime(time1);
@@ -3058,13 +3058,13 @@ TEST_F(ReplCoordTest,
<< 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
simulateSuccessfulV1Election();
- OpTimeWithTermZero time1(100, 1);
- OpTimeWithTermZero time2(100, 2);
- OpTimeWithTermZero staleTime(10, 0);
+ OpTimeWithTermOne time1(100, 1);
+ OpTimeWithTermOne time2(100, 2);
+ OpTimeWithTermOne staleTime(10, 0);
getReplCoord()->setMyLastAppliedOpTime(time1);
getReplCoord()->setMyLastDurableOpTime(time1);
@@ -3151,11 +3151,11 @@ TEST_F(ReplCoordTest, AwaitReplicationShouldResolveAsNormalDuringAReconfig) {
disableSnapshots();
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 2));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 2));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 2));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 2));
simulateSuccessfulV1Election();
- OpTimeWithTermZero time(100, 2);
+ OpTimeWithTermOne time(100, 2);
// 3 nodes waiting for time
WriteConcernOptions writeConcern;
@@ -3242,11 +3242,11 @@ TEST_F(
<< 2))),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 2));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 2));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 2));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 2));
simulateSuccessfulV1Election();
- OpTimeWithTermZero time(100, 2);
+ OpTimeWithTermOne time(100, 2);
// 3 nodes waiting for time
WriteConcernOptions writeConcern;
@@ -3316,8 +3316,8 @@ TEST_F(ReplCoordTest,
disableSnapshots();
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 1));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 1));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 1));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 1));
simulateSuccessfulV1Election();
OpTime time(Timestamp(100, 2), 1);
@@ -3523,15 +3523,15 @@ TEST_F(ReplCoordTest, NodeReturnsShutdownInProgressWhenWaitingUntilAnOpTimeDurin
<< 0))),
HostAndPort("node1", 12345));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(10, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(10, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(10, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(10, 0));
auto txn = makeOperationContext();
shutdown(txn.get());
auto status = getReplCoord()->waitUntilOpTimeForRead(
- txn.get(), ReadConcernArgs(OpTimeWithTermZero(50, 0), ReadConcernLevel::kLocalReadConcern));
+ txn.get(), ReadConcernArgs(OpTimeWithTermOne(50, 0), ReadConcernLevel::kLocalReadConcern));
ASSERT_EQ(status, ErrorCodes::ShutdownInProgress);
}
@@ -3547,14 +3547,14 @@ TEST_F(ReplCoordTest, NodeReturnsInterruptedWhenWaitingUntilAnOpTimeIsInterrupte
<< 0))),
HostAndPort("node1", 12345));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(10, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(10, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(10, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(10, 0));
const auto txn = makeOperationContext();
killOperation(txn.get());
auto status = getReplCoord()->waitUntilOpTimeForRead(
- txn.get(), ReadConcernArgs(OpTimeWithTermZero(50, 0), ReadConcernLevel::kLocalReadConcern));
+ txn.get(), ReadConcernArgs(OpTimeWithTermOne(50, 0), ReadConcernLevel::kLocalReadConcern));
ASSERT_EQ(status, ErrorCodes::Interrupted);
}
@@ -3587,14 +3587,13 @@ TEST_F(ReplCoordTest, NodeReturnsOkImmediatelyWhenWaitingUntilOpTimePassesAnOpTi
<< 0))),
HostAndPort("node1", 12345));
- getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermZero(100, 0));
- getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermZero(100, 0));
+ getReplCoord()->setMyLastAppliedOpTime(OpTimeWithTermOne(100, 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTimeWithTermOne(100, 0));
auto txn = makeOperationContext();
ASSERT_OK(getReplCoord()->waitUntilOpTimeForRead(
- txn.get(),
- ReadConcernArgs(OpTimeWithTermZero(50, 0), ReadConcernLevel::kLocalReadConcern)));
+ txn.get(), ReadConcernArgs(OpTimeWithTermOne(50, 0), ReadConcernLevel::kLocalReadConcern)));
}
TEST_F(ReplCoordTest, NodeReturnsOkImmediatelyWhenWaitingUntilOpTimePassesAnOpTimeEqualToOurLast) {
@@ -3610,7 +3609,7 @@ TEST_F(ReplCoordTest, NodeReturnsOkImmediatelyWhenWaitingUntilOpTimePassesAnOpTi
HostAndPort("node1", 12345));
- OpTimeWithTermZero time(100, 0);
+ OpTimeWithTermOne time(100, 0);
getReplCoord()->setMyLastAppliedOpTime(time);
getReplCoord()->setMyLastDurableOpTime(time);
@@ -3627,7 +3626,7 @@ TEST_F(ReplCoordTest,
auto txn = makeOperationContext();
auto status = getReplCoord()->waitUntilOpTimeForRead(
- txn.get(), ReadConcernArgs(OpTimeWithTermZero(50, 0), ReadConcernLevel::kLocalReadConcern));
+ txn.get(), ReadConcernArgs(OpTimeWithTermOne(50, 0), ReadConcernLevel::kLocalReadConcern));
ASSERT_EQ(status, ErrorCodes::NotAReplicaSet);
}
diff --git a/src/mongo/db/write_concern.cpp b/src/mongo/db/write_concern.cpp
index 1548dae14a2..f4a30f917f5 100644
--- a/src/mongo/db/write_concern.cpp
+++ b/src/mongo/db/write_concern.cpp
@@ -45,6 +45,7 @@
#include "mongo/db/storage/storage_engine.h"
#include "mongo/db/write_concern_options.h"
#include "mongo/rpc/protocol.h"
+#include "mongo/util/fail_point_service.h"
#include "mongo/util/log.h"
namespace mongo {
@@ -59,6 +60,8 @@ static Counter64 gleWtimeouts;
static ServerStatusMetricField<Counter64> gleWtimeoutsDisplay("getLastError.wtimeouts",
&gleWtimeouts);
+MONGO_FP_DECLARE(hangBeforeWaitingForWriteConcern);
+
StatusWith<WriteConcernOptions> extractWriteConcern(OperationContext* txn,
const BSONObj& cmdObj,
const std::string& dbName,
@@ -182,6 +185,8 @@ Status waitForWriteConcern(OperationContext* txn,
<< ", write concern: " << writeConcern.toBSON();
auto replCoord = repl::ReplicationCoordinator::get(txn);
+ MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangBeforeWaitingForWriteConcern);
+
// Next handle blocking on disk
Timer syncTimer;
WriteConcernOptions writeConcernWithPopulatedSyncMode =