From 8651c754eedf84651dd5051aa43c70cd96b00586 Mon Sep 17 00:00:00 2001 From: Vesselina Ratcheva Date: Wed, 26 Feb 2020 00:32:26 -0500 Subject: Revert "SERVER-17934 Do not report replication progress upstream while in initial sync" This reverts commit c87d505f571c7dbe45048cd3cbe116278f8efa31. --- .../no_progress_updates_during_initial_sync.js | 144 --------------------- src/mongo/db/repl/topology_coordinator.cpp | 12 +- src/mongo/db/repl/topology_coordinator_v1_test.cpp | 35 ----- 3 files changed, 2 insertions(+), 189 deletions(-) delete mode 100644 jstests/replsets/no_progress_updates_during_initial_sync.js diff --git a/jstests/replsets/no_progress_updates_during_initial_sync.js b/jstests/replsets/no_progress_updates_during_initial_sync.js deleted file mode 100644 index d9ec0accdc7..00000000000 --- a/jstests/replsets/no_progress_updates_during_initial_sync.js +++ /dev/null @@ -1,144 +0,0 @@ -/** - * Test that a node in initial sync does not report replication progress. There are two routes - * these kinds of updates take: - * - via spanning tree: - * initial-syncing nodes should send no replSetUpdatePosition commands upstream at all - * - via heartbeats: - * these nodes should include null lastApplied and lastDurable optimes in heartbeat responses - * - * @tags: [requires_fcv_44] - */ -(function() { -"use strict"; - -load("jstests/libs/write_concern_util.js"); - -const testName = jsTestName(); -const rst = new ReplSetTest({name: testName, nodes: 1}); -rst.startSet(); -rst.initiate(); - -const primary = rst.getPrimary(); -const primaryDb = primary.getDB("test"); -assert.commandWorked(primaryDb.test.insert({"starting": "doc"})); - -jsTestLog("Adding a new node to the replica set"); - -const secondary = rst.add({ - rsConfig: {priority: 0}, - setParameter: { - // Used to guarantee we have something to fetch. - 'failpoint.initialSyncHangAfterDataCloning': tojson({mode: 'alwaysOn'}), - 'failpoint.initialSyncHangBeforeFinish': tojson({mode: 'alwaysOn'}), - 'numInitialSyncAttempts': 1, - } -}); -rst.reInitiate(); -rst.waitForState(secondary, ReplSetTest.State.STARTUP_2); - -// Make sure we are through with cloning before inserting more docs on the primary, so that we can -// guarantee we have to fetch and apply them. We begin fetching inclusively of the primary's -// lastApplied. -assert.commandWorked(secondary.adminCommand({ - waitForFailPoint: "initialSyncHangAfterDataCloning", - timesEntered: 1, - maxTimeMS: kDefaultWaitForFailPointTimeout -})); - -jsTestLog("Inserting some docs on the primary to advance its lastApplied"); - -assert.commandWorked(primaryDb.test.insert([{a: 1}, {b: 2}, {c: 3}, {d: 4}, {e: 5}])); - -jsTestLog("Resuming initial sync"); - -assert.commandWorked( - secondary.adminCommand({configureFailPoint: "initialSyncHangAfterDataCloning", mode: "off"})); - -assert.commandWorked(secondary.adminCommand({ - waitForFailPoint: "initialSyncHangBeforeFinish", - timesEntered: 1, - maxTimeMS: kDefaultWaitForFailPointTimeout -})); - -// 1. Make sure the initial syncing node sent no replSetUpdatePosition commands while applying. -sleep(4 * 1000); -const numUpdatePosition = assert.commandWorked(secondary.adminCommand({serverStatus: 1})) - .metrics.repl.network.replSetUpdatePosition.num; -assert.eq(0, numUpdatePosition); - -const nullOpTime = { - "ts": Timestamp(0, 0), - "t": NumberLong(-1) -}; -const nullWallTime = ISODate("1970-01-01T00:00:00Z"); - -// 2. It also should not participate in the acknowledgement of any writes. -const writeResW2 = primaryDb.runCommand({ - insert: "test", - documents: [{"writeConcernTwo": "shouldfail"}], - writeConcern: {w: 2, wtimeout: 4000} -}); -checkWriteConcernTimedOut(writeResW2); - -// The lastCommitted opTime should not advance on the secondary. -const opTimesAfterW2 = assert.commandWorked(secondary.adminCommand({replSetGetStatus: 1})).optimes; -assert.docEq(opTimesAfterW2.lastCommittedOpTime, nullOpTime, () => tojson(opTimesAfterW2)); -assert.eq(nullWallTime, opTimesAfterW2.lastCommittedWallTime, () => tojson(opTimesAfterW2)); - -const writeResWMaj = primaryDb.runCommand({ - insert: "test", - documents: [{"writeConcernMajority": "shouldfail"}], - writeConcern: {w: "majority", wtimeout: 4000} -}); -checkWriteConcernTimedOut(writeResWMaj); - -// The lastCommitted opTime should not advance on the secondary. -const opTimesAfterWMaj = - assert.commandWorked(secondary.adminCommand({replSetGetStatus: 1})).optimes; -assert.docEq(opTimesAfterWMaj.lastCommittedOpTime, nullOpTime, () => tojson(opTimesAfterWMaj)); -assert.eq(nullWallTime, opTimesAfterWMaj.lastCommittedWallTime, () => tojson(opTimesAfterWMaj)); - -// 3. Make sure that even though the lastApplied and lastDurable have advanced on the secondary... -const secondaryStatusRes = assert.commandWorked(secondary.adminCommand({replSetGetStatus: 1})); -const secondaryOpTimes = secondaryStatusRes.optimes; -assert.gte( - bsonWoCompare(secondaryOpTimes.appliedOpTime, nullOpTime), 0, () => tojson(secondaryOpTimes)); -assert.gte( - bsonWoCompare(secondaryOpTimes.durableOpTime, nullOpTime), 0, () => tojson(secondaryOpTimes)); -assert.neq(nullWallTime, secondaryOpTimes.optimeDate, () => tojson(secondaryOpTimes)); -assert.neq(nullWallTime, secondaryOpTimes.optimeDurableDate, () => tojson(secondaryOpTimes)); - -// ...the primary thinks they're still null as they were null in the heartbeat responses. -const primaryStatusRes = assert.commandWorked(primary.adminCommand({replSetGetStatus: 1})); -const secondaryOpTimesAsSeenByPrimary = primaryStatusRes.members[1]; -assert.docEq(secondaryOpTimesAsSeenByPrimary.optime, - nullOpTime, - () => tojson(secondaryOpTimesAsSeenByPrimary)); -assert.docEq(secondaryOpTimesAsSeenByPrimary.optimeDurable, - nullOpTime, - () => tojson(secondaryOpTimesAsSeenByPrimary)); -assert.eq(nullWallTime, - secondaryOpTimesAsSeenByPrimary.optimeDate, - () => tojson(secondaryOpTimesAsSeenByPrimary)); -assert.eq(nullWallTime, - secondaryOpTimesAsSeenByPrimary.optimeDurableDate, - () => tojson(secondaryOpTimesAsSeenByPrimary)); - -// 4. Finally, confirm that we did indeed fetch and apply all documents during initial sync. -assert(secondaryStatusRes.initialSyncStatus, - () => "Response should have an 'initialSyncStatus' field: " + tojson(secondaryStatusRes)); -// We should have applied at least 6 documents, not 5, as fetching and applying are inclusive of the -// sync source's lastApplied. -assert.gte(secondaryStatusRes.initialSyncStatus.appliedOps, 6); - -// Turn off the last failpoint and wait for the node to finish initial sync. -assert.commandWorked( - secondary.adminCommand({configureFailPoint: "initialSyncHangBeforeFinish", mode: "off"})); -rst.awaitSecondaryNodes(); - -// The set should now be able to satisfy {w:2} writes. -assert.commandWorked( - primaryDb.runCommand({insert: "test", documents: [{"will": "succeed"}], writeConcern: {w: 2}})); - -rst.stopSet(); -})(); diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp index a2b2690cee3..ca3d1d0dd03 100644 --- a/src/mongo/db/repl/topology_coordinator.cpp +++ b/src/mongo/db/repl/topology_coordinator.cpp @@ -681,16 +681,8 @@ Status TopologyCoordinator::prepareHeartbeatResponseV1(Date_t now, response->setElectionTime(_electionTime); } - OpTimeAndWallTime lastOpApplied; - OpTimeAndWallTime lastOpDurable; - - // We include null times for lastApplied and lastDurable if we are in STARTUP_2, as we do not - // want to report replication progress and be part of write majorities while in initial sync. - if (!myState.startup2()) { - lastOpApplied = getMyLastAppliedOpTimeAndWallTime(); - lastOpDurable = getMyLastDurableOpTimeAndWallTime(); - } - + const OpTimeAndWallTime lastOpApplied = getMyLastAppliedOpTimeAndWallTime(); + const OpTimeAndWallTime lastOpDurable = getMyLastDurableOpTimeAndWallTime(); response->setAppliedOpTimeAndWallTime(lastOpApplied); response->setDurableOpTimeAndWallTime(lastOpDurable); diff --git a/src/mongo/db/repl/topology_coordinator_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_v1_test.cpp index 149e7c4cc33..5f8bcea353e 100644 --- a/src/mongo/db/repl/topology_coordinator_v1_test.cpp +++ b/src/mongo/db/repl/topology_coordinator_v1_test.cpp @@ -2278,41 +2278,6 @@ TEST_F(TopoCoordTest, OmitUninitializedConfigTermFromHeartbeat) { ASSERT_FALSE(response.toBSON().hasField("configTerm"_sd)); } -TEST_F(TopoCoordTest, RespondToHeartbeatsWithNullLastAppliedAndLastDurableWhileInInitialSync) { - ASSERT_TRUE(TopologyCoordinator::Role::kFollower == getTopoCoord().getRole()); - ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s); - updateConfig(BSON("_id" - << "rs0" - << "version" << 1 << "members" - << BSON_ARRAY(BSON("_id" << 0 << "host" - << "h0") - << BSON("_id" << 1 << "host" - << "h1"))), - 1); - - ASSERT_TRUE(TopologyCoordinator::Role::kFollower == getTopoCoord().getRole()); - ASSERT_EQUALS(MemberState::RS_STARTUP2, getTopoCoord().getMemberState().s); - - heartbeatFromMember( - HostAndPort("h0"), "rs0", MemberState::RS_SECONDARY, OpTime(Timestamp(3, 0), 0)); - - // The lastApplied and lastDurable should be null for any heartbeat responses we send while in - // STARTUP_2, even when they are otherwise initialized. - OpTime lastOpTime(Timestamp(2, 0), 0); - topoCoordSetMyLastAppliedOpTime(lastOpTime, Date_t(), false); - topoCoordSetMyLastDurableOpTime(lastOpTime, Date_t(), false); - - ReplSetHeartbeatArgsV1 args; - args.setConfigVersion(1); - args.setSetName("rs0"); - args.setSenderId(0); - ReplSetHeartbeatResponse response; - - ASSERT_OK(getTopoCoord().prepareHeartbeatResponseV1(now()++, args, "rs0", &response)); - ASSERT_EQUALS(OpTime(), response.getAppliedOpTime()); - ASSERT_EQUALS(OpTime(), response.getDurableOpTime()); -} - TEST_F(TopoCoordTest, BecomeCandidateWhenBecomingSecondaryInSingleNodeSet) { ASSERT_TRUE(TopologyCoordinator::Role::kFollower == getTopoCoord().getRole()); ASSERT_EQUALS(MemberState::RS_STARTUP, getTopoCoord().getMemberState().s); -- cgit v1.2.1