diff options
author | William Schultz <william.schultz@mongodb.com> | 2018-12-21 16:33:35 -0500 |
---|---|---|
committer | William Schultz <william.schultz@mongodb.com> | 2018-12-21 16:43:58 -0500 |
commit | f15556ae1ba4f78d2823d54e38d7025c7e9ca4fb (patch) | |
tree | 74207795633775a6cd080534c0ea29693bc9a682 /src/mongo/db/read_concern_mongod.cpp | |
parent | 7b85fb049b087fbcb04d6f356a0f7bee9ef48190 (diff) | |
download | mongo-f15556ae1ba4f78d2823d54e38d7025c7e9ca4fb.tar.gz |
SERVER-37560 Add core functionality for speculative majority reads
This patch adds functionality for "speculative" majority reads. These are reads that can satisfy "majority" read concern guarantees without support from the storage engine for reading from a historical snapshot. Queries of this nature will, by default, wait on the most recent lastApplied optime to majority commit after they complete, but before returning to the client. They can also optionally set a custom optime T to wait on, if they know that they did not read any data that reflects the effects of operations newer than optime T.
Diffstat (limited to 'src/mongo/db/read_concern_mongod.cpp')
-rw-r--r-- | src/mongo/db/read_concern_mongod.cpp | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/src/mongo/db/read_concern_mongod.cpp b/src/mongo/db/read_concern_mongod.cpp index 3f00c9890bf..0967968159b 100644 --- a/src/mongo/db/read_concern_mongod.cpp +++ b/src/mongo/db/read_concern_mongod.cpp @@ -38,6 +38,7 @@ #include "mongo/db/op_observer.h" #include "mongo/db/operation_context.h" #include "mongo/db/repl/repl_client_info.h" +#include "mongo/db/repl/speculative_majority_read_info.h" #include "mongo/db/s/sharding_state.h" #include "mongo/db/server_options.h" #include "mongo/db/server_parameters.h" @@ -295,6 +296,17 @@ MONGO_REGISTER_SHIM(waitForReadConcern) // It is not used for atClusterTime because waitUntilOpTimeForRead handles waiting for // the majority snapshot in that case. + // Handle speculative majority reads. + if (readConcernArgs.getMajorityReadMechanism() == + repl::ReadConcernArgs::MajorityReadMechanism::kSpeculative) { + // We read from a local snapshot, so there is no need to set an explicit read source. + // Mark down that we need to block after the command is done to satisfy majority read + // concern, though. + auto& speculativeReadInfo = repl::SpeculativeMajorityReadInfo::get(opCtx); + speculativeReadInfo.setIsSpeculativeRead(); + return Status::OK(); + } + const int debugLevel = serverGlobalParams.clusterRole == ClusterRole::ConfigServer ? 1 : 2; LOG(debugLevel) << "Waiting for 'committed' snapshot to be available for reading: " @@ -365,4 +377,44 @@ MONGO_REGISTER_SHIM(waitForLinearizableReadConcern)(OperationContext* opCtx)->St return awaitReplResult.status; } +MONGO_REGISTER_SHIM(waitForSpeculativeMajorityReadConcern) +(OperationContext* opCtx, repl::SpeculativeMajorityReadInfo speculativeReadInfo)->Status { + invariant(speculativeReadInfo.isSpeculativeRead()); + + // Select the optime to wait on. A command may have selected a specific optime to wait on. If + // not, then we just wait on the most recent optime written on this node i.e. lastApplied. + auto replCoord = repl::ReplicationCoordinator::get(opCtx); + repl::OpTime waitOpTime; + auto lastApplied = replCoord->getMyLastAppliedOpTime(); + auto speculativeReadOpTime = speculativeReadInfo.getSpeculativeReadOpTime(); + if (speculativeReadOpTime) { + // The optime provided must not be greater than the current lastApplied. + invariant(*speculativeReadOpTime <= lastApplied); + waitOpTime = *speculativeReadOpTime; + } else { + waitOpTime = lastApplied; + } + + // Block to make sure returned data is majority committed. + LOG(1) << "Servicing speculative majority read, waiting for optime " << waitOpTime + << " to become committed, current commit point: " << replCoord->getLastCommittedOpTime(); + + if (!opCtx->hasDeadline()) { + // TODO (SERVER-38727): Replace this with a user specified timeout value, to address the + // fact that getMore commands do not respect maxTimeMS properly. Currently, this hard-coded + // value represents the maximum time we are ever willing to wait for an optime to majority + // commit when doing a speculative majority read. We make this value rather conservative. + auto timeout = Seconds(15); + opCtx->setDeadlineAfterNowBy(timeout, ErrorCodes::MaxTimeMSExpired); + } + Timer t; + auto waitStatus = replCoord->awaitOpTimeCommitted(opCtx, waitOpTime); + if (waitStatus.isOK()) { + LOG(1) << "Optime " << waitOpTime << " became majority committed, waited " << t.millis() + << "ms for speculative majority read to be satisfied."; + } + return waitStatus; +} + + } // namespace mongo |