summaryrefslogtreecommitdiff
path: root/src/mongo/db/repl/storage_interface_impl.cpp
diff options
context:
space:
mode:
authorDianna Hohensee <dianna.hohensee@mongodb.com>2020-05-05 13:33:48 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-05-06 16:11:54 +0000
commitdcc42b3db40ecc1cb3ca278d9dcc2208a6c7734a (patch)
tree3db0d7c6a9de47cae925cf0652358ff2600ef9f0 /src/mongo/db/repl/storage_interface_impl.cpp
parentdc716555efbfca50d6d00f860153d191d1c59e9e (diff)
downloadmongo-dcc42b3db40ecc1cb3ca278d9dcc2208a6c7734a.tar.gz
SERVER-47959 JournalFlusher will retry oplog reads on WriteConflictExceptions caused by a concurrent {full:true} validate command on the oplog collection
Diffstat (limited to 'src/mongo/db/repl/storage_interface_impl.cpp')
-rw-r--r--src/mongo/db/repl/storage_interface_impl.cpp45
1 files changed, 36 insertions, 9 deletions
diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp
index e9b5f68d3d6..0b0e2e4e065 100644
--- a/src/mongo/db/repl/storage_interface_impl.cpp
+++ b/src/mongo/db/repl/storage_interface_impl.cpp
@@ -1048,19 +1048,11 @@ boost::optional<BSONObj> StorageInterfaceImpl::findOplogEntryLessThanOrEqualToTi
invariant(oplog);
invariant(opCtx->lockState()->isLocked());
- // Using a YieldPolicy WRITE_CONFLICT_RETRY_ONLY that will allow query to retry on
- // WriteConflictExceptions without releasing locks that are important to callers.
- //
- // This read can run concurrently with the validate cmd's WT verify operation due to the special
- // locking rules for internal operations accessing the oplog collection. Validate holds a MODE_X
- // collection lock for WT verify, but an internal read only needs a MODE_IS global lock. Trying
- // to open a cursor on a collection that has a verify operation running produces an EBUSY error
- // that we then convert to a WCE.
std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> exec =
InternalPlanner::collectionScan(opCtx,
NamespaceString::kRsOplogNamespace.ns(),
oplog,
- PlanExecutor::WRITE_CONFLICT_RETRY_ONLY,
+ PlanExecutor::NO_YIELD,
InternalPlanner::BACKWARD);
// A record id in the oplog collection is equivalent to the document's timestamp field.
@@ -1082,6 +1074,41 @@ boost::optional<BSONObj> StorageInterfaceImpl::findOplogEntryLessThanOrEqualToTi
return boost::none;
}
+boost::optional<BSONObj> StorageInterfaceImpl::findOplogEntryLessThanOrEqualToTimestampRetryOnWCE(
+ OperationContext* opCtx, Collection* oplogCollection, const Timestamp& timestamp) {
+ // Oplog reads are specially done under only MODE_IS global locks, without database or
+ // collection level intent locks. Therefore, reads can run concurrently with validate cmds that
+ // take collection MODE_X locks. Validate with {full:true} set calls WT::verify on the
+ // collection, which causes concurrent readers to hit WT EBUSY errors that MongoDB converts
+ // into WriteConflictException errors.
+ //
+ // Consequently, this code must be resilient to WCE errors and retry until the validate cmd
+ // finishes. The greater operation using this helper cannot simply fail because it would cause
+ // correctness errors.
+
+ int retries = 0;
+ while (true) {
+ try {
+ return findOplogEntryLessThanOrEqualToTimestamp(opCtx, oplogCollection, timestamp);
+ } catch (const WriteConflictException&) {
+ // This will log a message about the conflict initially and then every 5 seconds, with
+ // the current rather arbitrary settings.
+ if (retries % 10 == 0) {
+ LOGV2(47959000,
+ "Reading the oplog collection conflicts with a validate cmd. Continuing to "
+ "retry.",
+ "retries"_attr = retries);
+ }
+
+ ++retries;
+
+ // Sleep a bit so we do not keep hammering the system with retries while the validate
+ // cmd finishes.
+ opCtx->sleepFor(Milliseconds(500));
+ }
+ }
+}
+
Timestamp StorageInterfaceImpl::getLatestOplogTimestamp(OperationContext* opCtx) {
auto statusWithTimestamp = [&]() {
AutoGetOplog oplogRead(opCtx, OplogAccessMode::kRead);