summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorSiyuan Zhou <siyuan.zhou@mongodb.com>2019-08-21 01:54:48 +0000
committerSiyuan Zhou <visualzhou@gmail.com>2019-09-04 22:28:10 -0400
commitb198f2b3b2502a92f76ea491254b9b6f10dd38ff (patch)
tree5fd667f752a0853c14710671844905fadc906349 /src/mongo
parent67ca07dcb52af228b9f51c379f1f5c23c71b1b69 (diff)
downloadmongo-b198f2b3b2502a92f76ea491254b9b6f10dd38ff.tar.gz
SERVER-42910 Oplog query with higher timestamp but lower term than the sync source shouldn't time out due to afterClusterTime.
(cherry picked from commit f54709196711c63a429b71f47c584661286d675f)
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/repl/oplog_fetcher.cpp6
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp4
2 files changed, 7 insertions, 3 deletions
diff --git a/src/mongo/db/repl/oplog_fetcher.cpp b/src/mongo/db/repl/oplog_fetcher.cpp
index 3162319ab2b..45c880be0ed 100644
--- a/src/mongo/db/repl/oplog_fetcher.cpp
+++ b/src/mongo/db/repl/oplog_fetcher.cpp
@@ -362,9 +362,9 @@ BSONObj OplogFetcher::_makeFindCommandObject(const NamespaceString& nss,
cmdBob.append("term", term);
}
- // This ensures that the sync source never returns an empty batch of documents for the first set
- // of results.
- cmdBob.append("readConcern", BSON("afterClusterTime" << lastOpTimeFetched.getTimestamp()));
+ // This ensures that the sync source waits for all earlier oplog writes to be visible.
+ // Since Timestamp(0, 0) isn't allowed, Timestamp(0, 1) is the minimal we can use.
+ cmdBob.append("readConcern", BSON("afterClusterTime" << Timestamp(0, 1)));
return cmdBob.obj();
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 56c281e251c..4f57c4ccedf 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -1477,6 +1477,10 @@ Status ReplicationCoordinatorImpl::_waitUntilOpTime(OperationContext* opCtx,
// We wait only on primaries, because on secondaries, other mechanisms assure that the
// last applied optime is always hole-free, and waiting for all earlier writes to be visible
// can deadlock against secondary command application.
+ //
+ // Note that oplog queries by secondary nodes depend on this behavior to wait for
+ // all oplog holes to be filled in, despite providing an afterClusterTime field
+ // with Timestamp(0,1).
_storage->waitForAllEarlierOplogWritesToBeVisible(opCtx, /* primaryOnly =*/true);
}