diff options
author | Matthew Russotto <matthew.russotto@mongodb.com> | 2022-04-29 14:47:45 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-05-16 14:54:05 +0000 |
commit | d424a76ded97659d5c0933444a3210df82f096e6 (patch) | |
tree | 873cfa9ad2e36e05076c1276226c178cd34995cc | |
parent | cfd244ba1e0cb73af6ee8807961db74a38a097d0 (diff) | |
download | mongo-d424a76ded97659d5c0933444a3210df82f096e6.tar.gz |
SERVER-66089 Initial sync should do transaction table read with a later afterClusterTime.
(cherry picked from commit 878b6bb32413dc105133ffac31e9a0cf21e930f4)
-rw-r--r-- | etc/backports_required_for_multiversion_tests.yml | 4 | ||||
-rw-r--r-- | jstests/replsets/initial_sync_with_partial_transaction.js | 98 | ||||
-rw-r--r-- | src/mongo/db/repl/initial_syncer.cpp | 14 |
3 files changed, 111 insertions, 5 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml index 965c178afc1..42c28dd86fb 100644 --- a/etc/backports_required_for_multiversion_tests.yml +++ b/etc/backports_required_for_multiversion_tests.yml @@ -82,6 +82,8 @@ last-continuous: test_file: jstests/aggregation/lookup_let_optimization.js - ticket: SERVER-59290 test_file: jstests/replsets/sync_source_changes.js + - ticket: SERVER-66089 + test_file: jstests/replsets/initial_sync_with_partial_transaction.js # Tests that should only be excluded from particular suites should be listed under that suite. suites: @@ -324,6 +326,8 @@ last-lts: test_file: jstests/sharding/update_with_dollar_fields.js - ticket: SERVER-63531 test_file: jstests/replsets/buildindexes_false_commit_quorum.js + - ticket: SERVER-66089 + test_file: jstests/replsets/initial_sync_with_partial_transaction.js # Tests that should only be excluded from particular suites should be listed under that suite. suites: diff --git a/jstests/replsets/initial_sync_with_partial_transaction.js b/jstests/replsets/initial_sync_with_partial_transaction.js new file mode 100644 index 00000000000..a96387bbb31 --- /dev/null +++ b/jstests/replsets/initial_sync_with_partial_transaction.js @@ -0,0 +1,98 @@ +/** + * Tests that an initial sync that starts in the middle of a large transaction on a secondary + * is able to complete and apply the entire transaction. + * + * @tags: [ + * exclude_from_large_txns, + * uses_transactions, + * ] + */ +(function() { +"use strict"; +load("jstests/replsets/rslib.js"); // For reconnect() +load("jstests/libs/fail_point_util.js"); + +const replTest = new ReplSetTest({ + nodes: [{}, {rsConfig: {priority: 0}}], + nodeOptions: { + setParameter: + // We want two entries in each oplog batch, so the beginning of the transaction is not + // the end of the batch. + {maxNumberOfTransactionOperationsInSingleOplogEntry: 1, bgSyncOplogFetcherBatchSize: 2} + }, +}); + +replTest.startSet(); +replTest.initiateWithHighElectionTimeout(); + +// We have to add and pause the initial sync node here, because we cannot re-initiate the set while +// the failpoints are held. +jsTestLog("Adding initial sync node"); +const initialSyncNode = replTest.add({ + rsConfig: {priority: 0, votes: 0}, + setParameter: { + 'initialSyncSourceReadPreference': 'secondary', + 'failpoint.initialSyncHangBeforeChoosingSyncSource': tojson({mode: 'alwaysOn'}), + 'logComponentVerbosity': tojsononeline({replication: {initialSync: 1}}) + } +}); +replTest.reInitiate(); + +const dbName = jsTest.name(); +const collName = "coll"; + +const primary = replTest.getPrimary(); +const testDB = primary.getDB(dbName); +const syncSource = replTest.nodes[1]; + +assert.commandWorked(testDB.runCommand({create: collName, writeConcern: {w: "majority"}})); + +jsTest.log("Stop secondary oplog replication before the last operation in the transaction."); +// The stopReplProducerOnDocument failpoint ensures that secondary stops replicating before +// accepting the last two operations in the transaction. +const stopReplProducerOnDocumentFailPoint = configureFailPoint( + syncSource, "stopReplProducerOnDocument", {document: {"applyOps.o._id": "next-last in txn"}}); + +// This will cause us to pause batch application at a critical point, with "first in txn" and +// "next in txn" in the oplog but not applied. +const pauseBatchApplicationAfterWritingOplogEntriesFailPoint = + configureFailPoint(syncSource, "pauseBatchApplicationAfterWritingOplogEntries"); + +jsTestLog("Starting transaction"); +const session = primary.startSession({causalConsistency: false}); +const sessionDB = session.getDatabase(dbName); +session.startTransaction({writeConcern: {w: 1}}); + +assert.commandWorked(sessionDB.getCollection(collName).insert({_id: "first in txn"})); +assert.commandWorked(sessionDB.getCollection(collName).insert({_id: "next in txn"})); +assert.commandWorked(sessionDB.getCollection(collName).insert({_id: "next-last in txn"})); +assert.commandWorked(sessionDB.getCollection(collName).insert({_id: "last in txn"})); + +jsTestLog("Committing transaction"); +assert.commandWorked(session.commitTransaction_forTesting()); +stopReplProducerOnDocumentFailPoint.wait(); +pauseBatchApplicationAfterWritingOplogEntriesFailPoint.wait(); + +jsTestLog("Starting initial sync"); +assert.commandWorked(initialSyncNode.adminCommand( + {configureFailPoint: "initialSyncHangBeforeChoosingSyncSource", mode: "off"})); + +// The bug we're testing for is a race. The way we fix the race is to wait until it resolves. +// So it's very difficult to test it deterministically; if we use failpoints to force the race to +// happen, we'll just hang when the code is correct. Instead we hold the sync source in the +// condition that causes the race until we reach the point it's about to happen, then release +// it hoping it'll fail most of the time when the bug is present. +jsTestLog("Waiting to fetch the defaultBeginFetchingOplogEntry"); +checkLog.containsJson(initialSyncNode, 6608900); +stopReplProducerOnDocumentFailPoint.off(); +pauseBatchApplicationAfterWritingOplogEntriesFailPoint.off(); +replTest.awaitSecondaryNodes(); +replTest.awaitReplication(); + +// Make sure we got the transaction. +assert.eq(4, initialSyncNode.getDB(dbName)[collName].find().itcount()); + +printjson(initialSyncNode.getDB("local").oplog.rs.find().toArray()); + +replTest.stopSet(); +})(); diff --git a/src/mongo/db/repl/initial_syncer.cpp b/src/mongo/db/repl/initial_syncer.cpp index 8c18fc1f824..70cd0452644 100644 --- a/src/mongo/db/repl/initial_syncer.cpp +++ b/src/mongo/db/repl/initial_syncer.cpp @@ -892,6 +892,10 @@ void InitialSyncer::_lastOplogEntryFetcherCallbackForDefaultBeginFetchingOpTime( std::string logMsg = str::stream() << "Initial Syncer got the defaultBeginFetchingTimestamp: " << defaultBeginFetchingOpTime.toString(); pauseAtInitialSyncFuzzerSyncronizationPoints(logMsg); + LOGV2_DEBUG(6608900, + 1, + "Initial Syncer got the defaultBeginFetchingOpTime", + "defaultBeginFetchingOpTime"_attr = defaultBeginFetchingOpTime); status = _scheduleGetBeginFetchingOpTime_inlock(onCompletionGuard, defaultBeginFetchingOpTime); if (!status.isOK()) { @@ -907,10 +911,10 @@ Status InitialSyncer::_scheduleGetBeginFetchingOpTime_inlock( const auto preparedState = DurableTxnState_serializer(DurableTxnStateEnum::kPrepared); const auto inProgressState = DurableTxnState_serializer(DurableTxnStateEnum::kInProgress); - // Obtain the oldest active transaction timestamp from the remote by querying their - // transactions table. To prevent oplog holes from causing this query to return an inaccurate - // timestamp, we specify an afterClusterTime of Timestamp(0, 1) so that we wait for all previous - // writes to be visible. + // Obtain the oldest active transaction timestamp from the remote by querying their transactions + // table. To prevent oplog holes (primary) or a stale lastAppliedSnapshot (secondary) from + // causing this query to return an inaccurate timestamp, we specify an afterClusterTime of the + // defaultBeginFetchingOpTime so that we wait for all previous writes to be visible. BSONObjBuilder cmd; cmd.append("find", NamespaceString::kSessionTransactionsTableNamespace.coll().toString()); cmd.append("filter", @@ -919,7 +923,7 @@ Status InitialSyncer::_scheduleGetBeginFetchingOpTime_inlock( cmd.append("readConcern", BSON("level" << "local" - << "afterClusterTime" << Timestamp(0, 1))); + << "afterClusterTime" << defaultBeginFetchingOpTime.getTimestamp())); cmd.append("limit", 1); _beginFetchingOpTimeFetcher = std::make_unique<Fetcher>( |