summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMisha Tyulenev <misha@mongodb.com>2017-04-07 14:07:05 -0400
committerMisha Tyulenev <misha@mongodb.com>2017-04-07 14:59:03 -0400
commitfed0e2cc62a65067c7f2991554206a95d2619172 (patch)
tree3f8ce1477e1e97406a4aeaf9d2799c4520b3cac3
parent53934264e65978b631978e8fb60b8bdb1b643cab (diff)
downloadmongo-fed0e2cc62a65067c7f2991554206a95d2619172.tar.gz
SERVER-27772 Force noop write on primary when readConcern:afterClusterTime > clusterTime on secondaries
-rw-r--r--src/mongo/db/SConscript5
-rw-r--r--src/mongo/db/commands/dbcommands.cpp183
-rw-r--r--src/mongo/db/commands/oplog_note.cpp96
-rw-r--r--src/mongo/db/read_concern.cpp172
-rw-r--r--src/mongo/db/read_concern.h56
-rw-r--r--src/mongo/db/s/SConscript1
6 files changed, 254 insertions, 259 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript
index 562c1d1297b..a88cc433d3b 100644
--- a/src/mongo/db/SConscript
+++ b/src/mongo/db/SConscript
@@ -773,9 +773,8 @@ env.Library(
)
env.Library(
- target="rw_concern_d",
+ target="write_concern_d",
source=[
- "read_concern.cpp",
"write_concern.cpp",
],
LIBDEPS=[
@@ -941,7 +940,7 @@ env.Library(
"repl/sync_tail",
"repl/topology_coordinator_impl",
"run_commands",
- "rw_concern_d",
+ "write_concern_d",
"s/commands",
"s/metadata",
"s/sharding",
diff --git a/src/mongo/db/commands/dbcommands.cpp b/src/mongo/db/commands/dbcommands.cpp
index ab85d4c9470..648c1fb2447 100644
--- a/src/mongo/db/commands/dbcommands.cpp
+++ b/src/mongo/db/commands/dbcommands.cpp
@@ -84,7 +84,6 @@
#include "mongo/db/query/get_executor.h"
#include "mongo/db/query/internal_plans.h"
#include "mongo/db/query/query_planner.h"
-#include "mongo/db/read_concern.h"
#include "mongo/db/repair_database.h"
#include "mongo/db/repl/optime.h"
#include "mongo/db/repl/read_concern_args.h"
@@ -93,6 +92,7 @@
#include "mongo/db/repl/replication_coordinator_global.h"
#include "mongo/db/s/operation_sharding_state.h"
#include "mongo/db/s/sharding_state.h"
+#include "mongo/db/server_parameters.h"
#include "mongo/db/stats/storage_stats.h"
#include "mongo/db/write_concern.h"
#include "mongo/rpc/metadata.h"
@@ -123,6 +123,15 @@ using std::stringstream;
using std::unique_ptr;
namespace {
+
+// This is a special flag that allows for testing of snapshot behavior by skipping the replication
+// related checks and isolating the storage/query side of snapshotting.
+bool testingSnapshotBehaviorInIsolation = false;
+ExportedServerParameter<bool, ServerParameterType::kStartupOnly> TestingSnapshotBehaviorInIsolation(
+ ServerParameterSet::getGlobal(),
+ "testingSnapshotBehaviorInIsolation",
+ &testingSnapshotBehaviorInIsolation);
+
void registerErrorImpl(OperationContext* opCtx, const DBException& exception) {
CurOp::get(opCtx)->debug().exceptionInfo = exception.getInfo();
}
@@ -135,7 +144,7 @@ MONGO_INITIALIZER(InitializeRegisterErrorHandler)(InitializerContext* const) {
* For replica set members it returns the last known op time from opCtx. Otherwise will return
* uninitialized logical time.
*/
-LogicalTime _getClientOperationTime(OperationContext* opCtx) {
+LogicalTime getClientOperationTime(OperationContext* opCtx) {
repl::ReplicationCoordinator* replCoord =
repl::ReplicationCoordinator::get(opCtx->getClient()->getServiceContext());
const bool isReplSet =
@@ -156,9 +165,9 @@ LogicalTime _getClientOperationTime(OperationContext* opCtx) {
*
* TODO: SERVER-28419 Do not compute operationTime if replica set does not propagate clusterTime.
*/
-LogicalTime _computeOperationTime(OperationContext* opCtx,
- LogicalTime startOperationTime,
- repl::ReadConcernLevel level) {
+LogicalTime computeOperationTime(OperationContext* opCtx,
+ LogicalTime startOperationTime,
+ repl::ReadConcernLevel level) {
repl::ReplicationCoordinator* replCoord =
repl::ReplicationCoordinator::get(opCtx->getClient()->getServiceContext());
const bool isReplSet =
@@ -168,7 +177,7 @@ LogicalTime _computeOperationTime(OperationContext* opCtx,
return LogicalTime();
}
- auto operationTime = _getClientOperationTime(opCtx);
+ auto operationTime = getClientOperationTime(opCtx);
invariant(operationTime >= startOperationTime);
// If the last operationTime has not changed, consider this command a read, and, for replica set
@@ -183,6 +192,160 @@ LogicalTime _computeOperationTime(OperationContext* opCtx,
return operationTime;
}
+
+Status makeNoopWriteIfNeeded(OperationContext* opCtx, LogicalTime clusterTime) {
+ repl::ReplicationCoordinator* const replCoord = repl::ReplicationCoordinator::get(opCtx);
+ auto lastAppliedTime = LogicalTime(replCoord->getMyLastAppliedOpTime().getTimestamp());
+ if (clusterTime > lastAppliedTime) {
+ auto shardingState = ShardingState::get(opCtx);
+ // standalone replica set, so there is no need to advance the OpLog on the primary.
+ if (!shardingState->enabled()) {
+ return Status::OK();
+ }
+
+ auto myShard =
+ Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardingState->getShardName());
+ if (!myShard.isOK()) {
+ return myShard.getStatus();
+ }
+
+ auto swRes = myShard.getValue()->runCommand(
+ opCtx,
+ ReadPreferenceSetting(ReadPreference::PrimaryOnly),
+ "admin",
+ BSON("applyOpLogNote" << 1 << "clusterTime" << clusterTime.asTimestamp() << "data"
+ << BSON("append noop write" << 1)),
+ Shard::RetryPolicy::kIdempotent);
+ return swRes.getStatus();
+ }
+ return Status::OK();
+}
+
+Status waitForReadConcern(OperationContext* opCtx, const repl::ReadConcernArgs& readConcernArgs) {
+ repl::ReplicationCoordinator* const replCoord = repl::ReplicationCoordinator::get(opCtx);
+
+ if (readConcernArgs.getLevel() == repl::ReadConcernLevel::kLinearizableReadConcern) {
+ if (replCoord->getReplicationMode() != repl::ReplicationCoordinator::modeReplSet) {
+ // For master/slave and standalone nodes, Linearizable Read is not supported.
+ return {ErrorCodes::NotAReplicaSet,
+ "node needs to be a replica set member to use read concern"};
+ }
+
+ // Replica sets running pv0 do not support linearizable read concern until further testing
+ // is completed (SERVER-27025).
+ if (!replCoord->isV1ElectionProtocol()) {
+ return {
+ ErrorCodes::IncompatibleElectionProtocol,
+ "Replica sets running protocol version 0 do not support readConcern: linearizable"};
+ }
+
+ if (readConcernArgs.getArgsOpTime()) {
+ return {ErrorCodes::FailedToParse,
+ "afterOpTime not compatible with linearizable read concern"};
+ }
+
+ if (!replCoord->getMemberState().primary()) {
+ return {ErrorCodes::NotMaster,
+ "cannot satisfy linearizable read concern on non-primary node"};
+ }
+ }
+
+ auto afterClusterTime = readConcernArgs.getArgsClusterTime();
+ if (afterClusterTime) {
+ auto currentTime = LogicalClock::get(opCtx)->getClusterTime().getTime();
+ if (currentTime < *afterClusterTime) {
+ return {ErrorCodes::InvalidOptions,
+ "readConcern afterClusterTime must not be greater than clusterTime value"};
+ }
+ }
+
+ // Skip waiting for the OpTime when testing snapshot behavior
+ if (!testingSnapshotBehaviorInIsolation && !readConcernArgs.isEmpty()) {
+ if (afterClusterTime) {
+ auto status = makeNoopWriteIfNeeded(opCtx, *afterClusterTime);
+ if (!status.isOK()) {
+ LOG(1) << "failed noop write due to " << status.toString();
+ }
+ }
+
+ auto status = replCoord->waitUntilOpTimeForRead(opCtx, readConcernArgs);
+ if (!status.isOK()) {
+ return status;
+ }
+ }
+
+ if ((replCoord->getReplicationMode() == repl::ReplicationCoordinator::Mode::modeReplSet ||
+ testingSnapshotBehaviorInIsolation) &&
+ readConcernArgs.getLevel() == repl::ReadConcernLevel::kMajorityReadConcern) {
+ // ReadConcern Majority is not supported in ProtocolVersion 0.
+ if (!testingSnapshotBehaviorInIsolation && !replCoord->isV1ElectionProtocol()) {
+ return {ErrorCodes::ReadConcernMajorityNotEnabled,
+ str::stream() << "Replica sets running protocol version 0 do not support "
+ "readConcern: majority"};
+ }
+
+ const int debugLevel = serverGlobalParams.clusterRole == ClusterRole::ConfigServer ? 1 : 2;
+
+ LOG(debugLevel) << "Waiting for 'committed' snapshot to be available for reading: "
+ << readConcernArgs;
+
+ Status status = opCtx->recoveryUnit()->setReadFromMajorityCommittedSnapshot();
+
+ // Wait until a snapshot is available.
+ while (status == ErrorCodes::ReadConcernMajorityNotAvailableYet) {
+ LOG(debugLevel) << "Snapshot not available yet.";
+ replCoord->waitUntilSnapshotCommitted(opCtx, SnapshotName::min());
+ status = opCtx->recoveryUnit()->setReadFromMajorityCommittedSnapshot();
+ }
+
+ if (!status.isOK()) {
+ return status;
+ }
+
+ LOG(debugLevel) << "Using 'committed' snapshot: " << CurOp::get(opCtx)->query();
+ }
+
+ return Status::OK();
+}
+
+Status waitForLinearizableReadConcern(OperationContext* opCtx) {
+
+ repl::ReplicationCoordinator* replCoord =
+ repl::ReplicationCoordinator::get(opCtx->getClient()->getServiceContext());
+
+ {
+ Lock::DBLock lk(opCtx, "local", MODE_IX);
+ Lock::CollectionLock lock(opCtx->lockState(), "local.oplog.rs", MODE_IX);
+
+ if (!replCoord->canAcceptWritesForDatabase(opCtx, "admin")) {
+ return {ErrorCodes::NotMaster,
+ "No longer primary when waiting for linearizable read concern"};
+ }
+
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+
+ WriteUnitOfWork uow(opCtx);
+ opCtx->getClient()->getServiceContext()->getOpObserver()->onOpMessage(
+ opCtx,
+ BSON("msg"
+ << "linearizable read"));
+ uow.commit();
+ }
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(
+ opCtx, "waitForLinearizableReadConcern", "local.rs.oplog");
+ }
+ WriteConcernOptions wc = WriteConcernOptions(
+ WriteConcernOptions::kMajority, WriteConcernOptions::SyncMode::UNSET, 0);
+
+ repl::OpTime lastOpApplied = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp();
+ auto awaitReplResult = replCoord->awaitReplication(opCtx, lastOpApplied, wc);
+ if (awaitReplResult.status == ErrorCodes::WriteConcernFailed) {
+ return Status(ErrorCodes::LinearizableReadConcernError,
+ "Failed to confirm that read was linearizable.");
+ }
+ return awaitReplResult.status;
+}
+
} // namespace
@@ -1435,7 +1598,7 @@ bool Command::run(OperationContext* opCtx,
std::string errmsg;
bool result;
- auto startOperationTime = _getClientOperationTime(opCtx);
+ auto startOperationTime = getClientOperationTime(opCtx);
if (!supportsWriteConcern(cmd)) {
if (commandSpecifiesWriteConcern(cmd)) {
auto result = appendCommandStatus(
@@ -1505,7 +1668,7 @@ bool Command::run(OperationContext* opCtx,
appendCommandStatus(inPlaceReplyBob, result, errmsg);
- auto operationTime = _computeOperationTime(
+ auto operationTime = computeOperationTime(
opCtx, startOperationTime, readConcernArgsStatus.getValue().getLevel());
// An uninitialized operation time means the cluster time is not propagated, so the operation
@@ -1710,13 +1873,13 @@ void mongo::execCommandDatabase(OperationContext* opCtx,
BSONObjBuilder metadataBob;
appendReplyMetadata(opCtx, request, &metadataBob);
- // Ideally this should be using _computeOperationTime, but with the code
+ // Ideally this should be using computeOperationTime, but with the code
// structured as it currently is we don't know the startOperationTime or
// readConcern at this point. Using the cluster time instead of the actual
// operation time is correct, but can result in extra waiting on subsequent
// afterClusterTime reads.
//
- // TODO: SERVER-28445 change this to use _computeOperationTime once the exception handling
+ // TODO: SERVER-28445 change this to use computeOperationTime once the exception handling
// path is moved into Command::run()
auto operationTime = LogicalClock::get(opCtx)->getClusterTime().getTime();
diff --git a/src/mongo/db/commands/oplog_note.cpp b/src/mongo/db/commands/oplog_note.cpp
index 0c59dca849a..fd6083a6a89 100644
--- a/src/mongo/db/commands/oplog_note.cpp
+++ b/src/mongo/db/commands/oplog_note.cpp
@@ -26,22 +26,59 @@
* then also delete it in the license file.
*/
-#include <string>
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kCommand
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/commands.h"
+
+#include "mongo/base/init.h"
#include "mongo/bson/util/bson_extract.h"
#include "mongo/db/auth/action_type.h"
#include "mongo/db/auth/authorization_session.h"
#include "mongo/db/auth/resource_pattern.h"
-#include "mongo/db/commands.h"
#include "mongo/db/concurrency/d_concurrency.h"
+#include "mongo/db/concurrency/write_conflict_exception.h"
+#include "mongo/db/curop.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/op_observer.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/repl/oplog.h"
+#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/replication_coordinator_global.h"
#include "mongo/db/service_context.h"
+#include "mongo/util/log.h"
namespace mongo {
+namespace {
+Status _performNoopWrite(OperationContext* opCtx, BSONObj msgObj, StringData note) {
+ repl::ReplicationCoordinator* const replCoord = repl::ReplicationCoordinator::get(opCtx);
+ // Use GlobalLock + lockMMAPV1Flush instead of DBLock to allow return when the lock is not
+ // available. It may happen when the primary steps down and a shared global lock is
+ // acquired.
+ Lock::GlobalLock lock(opCtx, MODE_IX, 1);
+
+ if (!lock.isLocked()) {
+ LOG(1) << "Global lock is not available skipping noopWrite";
+ return {ErrorCodes::LockFailed, "Global lock is not available"};
+ }
+ opCtx->lockState()->lockMMAPV1Flush();
+
+ // Its a proxy for being a primary passing "local" will cause it to return true on secondary
+ if (!replCoord->canAcceptWritesForDatabase(opCtx, "admin")) {
+ return {ErrorCodes::NotMaster, "Not a primary"};
+ }
+
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
+ WriteUnitOfWork uow(opCtx);
+ opCtx->getClient()->getServiceContext()->getOpObserver()->onOpMessage(opCtx, msgObj);
+ uow.commit();
+ }
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(opCtx, note, repl::rsOplogName);
+
+ return Status::OK();
+}
+} // namespace
using std::string;
using std::stringstream;
@@ -49,18 +86,23 @@ using std::stringstream;
class AppendOplogNoteCmd : public Command {
public:
AppendOplogNoteCmd() : Command("appendOplogNote") {}
+
virtual bool slaveOk() const {
return false;
}
+
virtual bool adminOnly() const {
return true;
}
+
virtual bool supportsWriteConcern(const BSONObj& cmd) const override {
return true;
}
+
virtual void help(stringstream& help) const {
help << "Adds a no-op entry to the oplog";
}
+
virtual Status checkAuthForCommand(Client* client,
const std::string& dbname,
const BSONObj& cmdObj) {
@@ -70,32 +112,52 @@ public:
}
return Status::OK();
}
+
virtual bool run(OperationContext* opCtx,
const string& dbname,
BSONObj& cmdObj,
int,
string& errmsg,
BSONObjBuilder& result) {
- if (!repl::getGlobalReplicationCoordinator()->isReplEnabled()) {
- return appendCommandStatus(
- result,
- Status(ErrorCodes::NoReplicationEnabled,
- "Must have replication set up to run \"appendOplogNote\""));
- }
BSONElement dataElement;
- Status status = bsonExtractTypedField(cmdObj, "data", Object, &dataElement);
- if (!status.isOK()) {
- return appendCommandStatus(result, status);
+ auto dataStatus = bsonExtractTypedField(cmdObj, "data", Object, &dataElement);
+ if (!dataStatus.isOK()) {
+ return appendCommandStatus(result, dataStatus);
}
- Lock::GlobalWrite globalWrite(opCtx);
+ Timestamp clusterTime;
+ auto clusterTimeStatus = bsonExtractTimestampField(cmdObj, "clusterTime", &clusterTime);
- WriteUnitOfWork wuow(opCtx);
- getGlobalServiceContext()->getOpObserver()->onOpMessage(opCtx, dataElement.Obj());
- wuow.commit();
- return true;
+ auto replCoord = repl::ReplicationCoordinator::get(opCtx);
+ if (!replCoord->isReplEnabled()) {
+ return appendCommandStatus(result,
+ {ErrorCodes::NoReplicationEnabled,
+ "Must have replication set up to run \"appendOplogNote\""});
+ }
+
+ if (!clusterTimeStatus.isOK()) {
+ if (clusterTimeStatus == ErrorCodes::NoSuchKey) { // no need to use clusterTime
+ return appendCommandStatus(
+ result, _performNoopWrite(opCtx, dataElement.Obj(), "appendOpLogNote"));
+ }
+ return appendCommandStatus(result, clusterTimeStatus);
+ }
+
+ auto lastAppliedOpTime = replCoord->getMyLastAppliedOpTime().getTimestamp();
+ if (clusterTime > lastAppliedOpTime) {
+ return appendCommandStatus(
+ result, _performNoopWrite(opCtx, dataElement.Obj(), "appendOpLogNote"));
+ } else {
+ LOG(1) << "Not scheduling a noop write. Requested clusterTime" << clusterTime
+ << " is less or equal to the last primary OpTime: " << lastAppliedOpTime;
+ return appendCommandStatus(result, Status::OK());
+ }
}
+};
-} appendOplogNoteCmd;
+MONGO_INITIALIZER(RegisterAppendOpLogNoteCmd)(InitializerContext* context) {
+ new AppendOplogNoteCmd();
+ return Status::OK();
+}
} // namespace mongo
diff --git a/src/mongo/db/read_concern.cpp b/src/mongo/db/read_concern.cpp
deleted file mode 100644
index 80602ff2fce..00000000000
--- a/src/mongo/db/read_concern.cpp
+++ /dev/null
@@ -1,172 +0,0 @@
-/**
- * Copyright (C) 2016 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kCommand
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/read_concern.h"
-
-#include "mongo/base/status.h"
-#include "mongo/base/status_with.h"
-#include "mongo/db/commands.h"
-#include "mongo/db/concurrency/d_concurrency.h"
-#include "mongo/db/concurrency/write_conflict_exception.h"
-#include "mongo/db/curop.h"
-#include "mongo/db/op_observer.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/repl/read_concern_args.h"
-#include "mongo/db/repl/repl_client_info.h"
-#include "mongo/db/repl/replication_coordinator.h"
-#include "mongo/db/server_options.h"
-#include "mongo/db/server_parameters.h"
-#include "mongo/util/log.h"
-
-namespace mongo {
-namespace {
-
-// This is a special flag that allows for testing of snapshot behavior by skipping the replication
-// related checks and isolating the storage/query side of snapshotting.
-bool testingSnapshotBehaviorInIsolation = false;
-ExportedServerParameter<bool, ServerParameterType::kStartupOnly> TestingSnapshotBehaviorInIsolation(
- ServerParameterSet::getGlobal(),
- "testingSnapshotBehaviorInIsolation",
- &testingSnapshotBehaviorInIsolation);
-
-} // namespace
-
-Status waitForReadConcern(OperationContext* opCtx, const repl::ReadConcernArgs& readConcernArgs) {
- repl::ReplicationCoordinator* const replCoord = repl::ReplicationCoordinator::get(opCtx);
-
- if (readConcernArgs.getLevel() == repl::ReadConcernLevel::kLinearizableReadConcern) {
- if (replCoord->getReplicationMode() != repl::ReplicationCoordinator::modeReplSet) {
- // For master/slave and standalone nodes, Linearizable Read is not supported.
- return {ErrorCodes::NotAReplicaSet,
- "node needs to be a replica set member to use read concern"};
- }
-
- // Replica sets running pv0 do not support linearizable read concern until further testing
- // is completed (SERVER-27025).
- if (!replCoord->isV1ElectionProtocol()) {
- return {
- ErrorCodes::IncompatibleElectionProtocol,
- "Replica sets running protocol version 0 do not support readConcern: linearizable"};
- }
-
- if (readConcernArgs.getArgsOpTime()) {
- return {ErrorCodes::FailedToParse,
- "afterOpTime not compatible with linearizable read concern"};
- }
-
- if (!replCoord->getMemberState().primary()) {
- return {ErrorCodes::NotMaster,
- "cannot satisfy linearizable read concern on non-primary node"};
- }
- }
-
- // Skip waiting for the OpTime when testing snapshot behavior
- if (!testingSnapshotBehaviorInIsolation && !readConcernArgs.isEmpty()) {
- Status status = replCoord->waitUntilOpTimeForRead(opCtx, readConcernArgs);
- if (!status.isOK()) {
- return status;
- }
- }
-
- if ((replCoord->getReplicationMode() == repl::ReplicationCoordinator::Mode::modeReplSet ||
- testingSnapshotBehaviorInIsolation) &&
- readConcernArgs.getLevel() == repl::ReadConcernLevel::kMajorityReadConcern) {
- // ReadConcern Majority is not supported in ProtocolVersion 0.
- if (!testingSnapshotBehaviorInIsolation && !replCoord->isV1ElectionProtocol()) {
- return {ErrorCodes::ReadConcernMajorityNotEnabled,
- str::stream() << "Replica sets running protocol version 0 do not support "
- "readConcern: majority"};
- }
-
- const int debugLevel = serverGlobalParams.clusterRole == ClusterRole::ConfigServer ? 1 : 2;
-
- LOG(debugLevel) << "Waiting for 'committed' snapshot to be available for reading: "
- << readConcernArgs;
-
- Status status = opCtx->recoveryUnit()->setReadFromMajorityCommittedSnapshot();
-
- // Wait until a snapshot is available.
- while (status == ErrorCodes::ReadConcernMajorityNotAvailableYet) {
- LOG(debugLevel) << "Snapshot not available yet.";
- replCoord->waitUntilSnapshotCommitted(opCtx, SnapshotName::min());
- status = opCtx->recoveryUnit()->setReadFromMajorityCommittedSnapshot();
- }
-
- if (!status.isOK()) {
- return status;
- }
-
- LOG(debugLevel) << "Using 'committed' snapshot: " << CurOp::get(opCtx)->query();
- }
-
- return Status::OK();
-}
-
-Status waitForLinearizableReadConcern(OperationContext* opCtx) {
-
- repl::ReplicationCoordinator* replCoord =
- repl::ReplicationCoordinator::get(opCtx->getClient()->getServiceContext());
-
- {
- Lock::DBLock lk(opCtx, "local", MODE_IX);
- Lock::CollectionLock lock(opCtx->lockState(), "local.oplog.rs", MODE_IX);
-
- if (!replCoord->canAcceptWritesForDatabase(opCtx, "admin")) {
- return {ErrorCodes::NotMaster,
- "No longer primary when waiting for linearizable read concern"};
- }
-
- MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
-
- WriteUnitOfWork uow(opCtx);
- opCtx->getClient()->getServiceContext()->getOpObserver()->onOpMessage(
- opCtx,
- BSON("msg"
- << "linearizable read"));
- uow.commit();
- }
- MONGO_WRITE_CONFLICT_RETRY_LOOP_END(
- opCtx, "waitForLinearizableReadConcern", "local.rs.oplog");
- }
- WriteConcernOptions wc = WriteConcernOptions(
- WriteConcernOptions::kMajority, WriteConcernOptions::SyncMode::UNSET, 0);
-
- repl::OpTime lastOpApplied = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp();
- auto awaitReplResult = replCoord->awaitReplication(opCtx, lastOpApplied, wc);
- if (awaitReplResult.status == ErrorCodes::WriteConcernFailed) {
- return Status(ErrorCodes::LinearizableReadConcernError,
- "Failed to confirm that read was linearizable.");
- }
- return awaitReplResult.status;
-}
-
-} // namespace mongo
diff --git a/src/mongo/db/read_concern.h b/src/mongo/db/read_concern.h
deleted file mode 100644
index 8c01e902f01..00000000000
--- a/src/mongo/db/read_concern.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/**
- * Copyright (C) 2016 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-namespace mongo {
-
-class BSONObj;
-class OperationContext;
-class Status;
-template <typename T>
-class StatusWith;
-namespace repl {
-class ReadConcernArgs;
-}
-
-
-/**
- * Given the specified read concern arguments, performs checks that the read concern can actually be
- * satisfied given the current state of the server and if so calls into the replication subsystem to
- * perform the wait.
- */
-Status waitForReadConcern(OperationContext* opCtx, const repl::ReadConcernArgs& readConcernArgs);
-
-/*
- * Given a linearizable read command, confirm that
- * current primary is still the true primary of the replica set.
- */
-Status waitForLinearizableReadConcern(OperationContext* opCtx);
-
-} // namespace mongo
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript
index f4a27742bee..8238441d20b 100644
--- a/src/mongo/db/s/SConscript
+++ b/src/mongo/db/s/SConscript
@@ -96,7 +96,6 @@ env.Library(
'$BUILD_DIR/mongo/bson/util/bson_extract',
'$BUILD_DIR/mongo/db/common',
'$BUILD_DIR/mongo/db/range_deleter',
- '$BUILD_DIR/mongo/db/rw_concern_d',
'$BUILD_DIR/mongo/db/concurrency/lock_manager',
'$BUILD_DIR/mongo/db/query/internal_plans',
'$BUILD_DIR/mongo/s/client/shard_local',