summaryrefslogtreecommitdiff
path: root/src/mongo/db/read_concern_mongod.cpp
diff options
context:
space:
mode:
authorWilliam Schultz <william.schultz@mongodb.com>2018-12-21 16:33:35 -0500
committerWilliam Schultz <william.schultz@mongodb.com>2018-12-21 16:43:58 -0500
commitf15556ae1ba4f78d2823d54e38d7025c7e9ca4fb (patch)
tree74207795633775a6cd080534c0ea29693bc9a682 /src/mongo/db/read_concern_mongod.cpp
parent7b85fb049b087fbcb04d6f356a0f7bee9ef48190 (diff)
downloadmongo-f15556ae1ba4f78d2823d54e38d7025c7e9ca4fb.tar.gz
SERVER-37560 Add core functionality for speculative majority reads
This patch adds functionality for "speculative" majority reads. These are reads that can satisfy "majority" read concern guarantees without support from the storage engine for reading from a historical snapshot. Queries of this nature will, by default, wait on the most recent lastApplied optime to majority commit after they complete, but before returning to the client. They can also optionally set a custom optime T to wait on, if they know that they did not read any data that reflects the effects of operations newer than optime T.
Diffstat (limited to 'src/mongo/db/read_concern_mongod.cpp')
-rw-r--r--src/mongo/db/read_concern_mongod.cpp52
1 files changed, 52 insertions, 0 deletions
diff --git a/src/mongo/db/read_concern_mongod.cpp b/src/mongo/db/read_concern_mongod.cpp
index 3f00c9890bf..0967968159b 100644
--- a/src/mongo/db/read_concern_mongod.cpp
+++ b/src/mongo/db/read_concern_mongod.cpp
@@ -38,6 +38,7 @@
#include "mongo/db/op_observer.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/repl/repl_client_info.h"
+#include "mongo/db/repl/speculative_majority_read_info.h"
#include "mongo/db/s/sharding_state.h"
#include "mongo/db/server_options.h"
#include "mongo/db/server_parameters.h"
@@ -295,6 +296,17 @@ MONGO_REGISTER_SHIM(waitForReadConcern)
// It is not used for atClusterTime because waitUntilOpTimeForRead handles waiting for
// the majority snapshot in that case.
+ // Handle speculative majority reads.
+ if (readConcernArgs.getMajorityReadMechanism() ==
+ repl::ReadConcernArgs::MajorityReadMechanism::kSpeculative) {
+ // We read from a local snapshot, so there is no need to set an explicit read source.
+ // Mark down that we need to block after the command is done to satisfy majority read
+ // concern, though.
+ auto& speculativeReadInfo = repl::SpeculativeMajorityReadInfo::get(opCtx);
+ speculativeReadInfo.setIsSpeculativeRead();
+ return Status::OK();
+ }
+
const int debugLevel = serverGlobalParams.clusterRole == ClusterRole::ConfigServer ? 1 : 2;
LOG(debugLevel) << "Waiting for 'committed' snapshot to be available for reading: "
@@ -365,4 +377,44 @@ MONGO_REGISTER_SHIM(waitForLinearizableReadConcern)(OperationContext* opCtx)->St
return awaitReplResult.status;
}
+MONGO_REGISTER_SHIM(waitForSpeculativeMajorityReadConcern)
+(OperationContext* opCtx, repl::SpeculativeMajorityReadInfo speculativeReadInfo)->Status {
+ invariant(speculativeReadInfo.isSpeculativeRead());
+
+ // Select the optime to wait on. A command may have selected a specific optime to wait on. If
+ // not, then we just wait on the most recent optime written on this node i.e. lastApplied.
+ auto replCoord = repl::ReplicationCoordinator::get(opCtx);
+ repl::OpTime waitOpTime;
+ auto lastApplied = replCoord->getMyLastAppliedOpTime();
+ auto speculativeReadOpTime = speculativeReadInfo.getSpeculativeReadOpTime();
+ if (speculativeReadOpTime) {
+ // The optime provided must not be greater than the current lastApplied.
+ invariant(*speculativeReadOpTime <= lastApplied);
+ waitOpTime = *speculativeReadOpTime;
+ } else {
+ waitOpTime = lastApplied;
+ }
+
+ // Block to make sure returned data is majority committed.
+ LOG(1) << "Servicing speculative majority read, waiting for optime " << waitOpTime
+ << " to become committed, current commit point: " << replCoord->getLastCommittedOpTime();
+
+ if (!opCtx->hasDeadline()) {
+ // TODO (SERVER-38727): Replace this with a user specified timeout value, to address the
+ // fact that getMore commands do not respect maxTimeMS properly. Currently, this hard-coded
+ // value represents the maximum time we are ever willing to wait for an optime to majority
+ // commit when doing a speculative majority read. We make this value rather conservative.
+ auto timeout = Seconds(15);
+ opCtx->setDeadlineAfterNowBy(timeout, ErrorCodes::MaxTimeMSExpired);
+ }
+ Timer t;
+ auto waitStatus = replCoord->awaitOpTimeCommitted(opCtx, waitOpTime);
+ if (waitStatus.isOK()) {
+ LOG(1) << "Optime " << waitOpTime << " became majority committed, waited " << t.millis()
+ << "ms for speculative majority read to be satisfied.";
+ }
+ return waitStatus;
+}
+
+
} // namespace mongo