diff options
author | Dianna Hohensee <dianna.hohensee@mongodb.com> | 2020-05-05 13:33:48 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-05-06 16:11:54 +0000 |
commit | dcc42b3db40ecc1cb3ca278d9dcc2208a6c7734a (patch) | |
tree | 3db0d7c6a9de47cae925cf0652358ff2600ef9f0 /src/mongo/db/repl/storage_interface_impl.cpp | |
parent | dc716555efbfca50d6d00f860153d191d1c59e9e (diff) | |
download | mongo-dcc42b3db40ecc1cb3ca278d9dcc2208a6c7734a.tar.gz |
SERVER-47959 JournalFlusher will retry oplog reads on WriteConflictExceptions caused by a concurrent {full:true} validate command on the oplog collection
Diffstat (limited to 'src/mongo/db/repl/storage_interface_impl.cpp')
-rw-r--r-- | src/mongo/db/repl/storage_interface_impl.cpp | 45 |
1 files changed, 36 insertions, 9 deletions
diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp index e9b5f68d3d6..0b0e2e4e065 100644 --- a/src/mongo/db/repl/storage_interface_impl.cpp +++ b/src/mongo/db/repl/storage_interface_impl.cpp @@ -1048,19 +1048,11 @@ boost::optional<BSONObj> StorageInterfaceImpl::findOplogEntryLessThanOrEqualToTi invariant(oplog); invariant(opCtx->lockState()->isLocked()); - // Using a YieldPolicy WRITE_CONFLICT_RETRY_ONLY that will allow query to retry on - // WriteConflictExceptions without releasing locks that are important to callers. - // - // This read can run concurrently with the validate cmd's WT verify operation due to the special - // locking rules for internal operations accessing the oplog collection. Validate holds a MODE_X - // collection lock for WT verify, but an internal read only needs a MODE_IS global lock. Trying - // to open a cursor on a collection that has a verify operation running produces an EBUSY error - // that we then convert to a WCE. std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> exec = InternalPlanner::collectionScan(opCtx, NamespaceString::kRsOplogNamespace.ns(), oplog, - PlanExecutor::WRITE_CONFLICT_RETRY_ONLY, + PlanExecutor::NO_YIELD, InternalPlanner::BACKWARD); // A record id in the oplog collection is equivalent to the document's timestamp field. @@ -1082,6 +1074,41 @@ boost::optional<BSONObj> StorageInterfaceImpl::findOplogEntryLessThanOrEqualToTi return boost::none; } +boost::optional<BSONObj> StorageInterfaceImpl::findOplogEntryLessThanOrEqualToTimestampRetryOnWCE( + OperationContext* opCtx, Collection* oplogCollection, const Timestamp& timestamp) { + // Oplog reads are specially done under only MODE_IS global locks, without database or + // collection level intent locks. Therefore, reads can run concurrently with validate cmds that + // take collection MODE_X locks. Validate with {full:true} set calls WT::verify on the + // collection, which causes concurrent readers to hit WT EBUSY errors that MongoDB converts + // into WriteConflictException errors. + // + // Consequently, this code must be resilient to WCE errors and retry until the validate cmd + // finishes. The greater operation using this helper cannot simply fail because it would cause + // correctness errors. + + int retries = 0; + while (true) { + try { + return findOplogEntryLessThanOrEqualToTimestamp(opCtx, oplogCollection, timestamp); + } catch (const WriteConflictException&) { + // This will log a message about the conflict initially and then every 5 seconds, with + // the current rather arbitrary settings. + if (retries % 10 == 0) { + LOGV2(47959000, + "Reading the oplog collection conflicts with a validate cmd. Continuing to " + "retry.", + "retries"_attr = retries); + } + + ++retries; + + // Sleep a bit so we do not keep hammering the system with retries while the validate + // cmd finishes. + opCtx->sleepFor(Milliseconds(500)); + } + } +} + Timestamp StorageInterfaceImpl::getLatestOplogTimestamp(OperationContext* opCtx) { auto statusWithTimestamp = [&]() { AutoGetOplog oplogRead(opCtx, OplogAccessMode::kRead); |