summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorKaloian Manassiev <kaloian.manassiev@mongodb.com>2016-06-30 14:32:38 -0400
committerKaloian Manassiev <kaloian.manassiev@mongodb.com>2016-07-08 13:26:39 -0400
commit7c67f3a6fd52ea4933c23ce89f1cc5da62628ec4 (patch)
treed843d57ca15094997eff24e10035170244be1c1b /src/mongo/db
parent920afddd4910e1efa6736331f2b38e7e276bd3d8 (diff)
downloadmongo-7c67f3a6fd52ea4933c23ce89f1cc5da62628ec4.tar.gz
SERVER-24858 Tighten assertions around waiting for write concern
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/commands/get_last_error.cpp9
-rw-r--r--src/mongo/db/dbcommands.cpp4
-rw-r--r--src/mongo/db/repl/read_concern_args.cpp4
-rw-r--r--src/mongo/db/repl/read_concern_args.h6
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp17
-rw-r--r--src/mongo/db/write_concern.cpp142
-rw-r--r--src/mongo/db/write_concern.h4
-rw-r--r--src/mongo/db/write_concern_options.cpp2
-rw-r--r--src/mongo/db/write_concern_options.h4
9 files changed, 84 insertions, 108 deletions
diff --git a/src/mongo/db/commands/get_last_error.cpp b/src/mongo/db/commands/get_last_error.cpp
index 7af1a83aafa..498d32d9d0c 100644
--- a/src/mongo/db/commands/get_last_error.cpp
+++ b/src/mongo/db/commands/get_last_error.cpp
@@ -44,6 +44,7 @@
#include "mongo/util/log.h"
namespace mongo {
+namespace {
using std::string;
using std::stringstream;
@@ -219,11 +220,9 @@ public:
//
// Validate write concern no matter what, this matches 2.4 behavior
//
-
-
if (status.isOK()) {
// Ensure options are valid for this host
- status = validateWriteConcern(txn, writeConcern, dbname);
+ status = validateWriteConcern(txn, writeConcern);
}
if (!status.isOK()) {
@@ -316,4 +315,6 @@ public:
return true;
}
} cmdGetPrevError;
-}
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/dbcommands.cpp b/src/mongo/db/dbcommands.cpp
index d6facd3fd3f..91f9ad970d3 100644
--- a/src/mongo/db/dbcommands.cpp
+++ b/src/mongo/db/dbcommands.cpp
@@ -99,7 +99,7 @@
#include "mongo/s/chunk_version.h"
#include "mongo/s/client/shard_registry.h"
#include "mongo/s/grid.h"
-#include "mongo/s/stale_exception.h" // for SendStaleConfigException
+#include "mongo/s/stale_exception.h"
#include "mongo/scripting/engine.h"
#include "mongo/util/fail_point_service.h"
#include "mongo/util/log.h"
@@ -1473,7 +1473,7 @@ bool Command::run(OperationContext* txn,
}
} else {
// Skip waiting for the OpTime when testing snapshot behavior.
- if (!testingSnapshotBehaviorInIsolation) {
+ if (!testingSnapshotBehaviorInIsolation && !readConcernArgs.isEmpty()) {
// Wait for readConcern to be satisfied.
auto readConcernStatus = replCoord->waitUntilOpTimeForRead(txn, readConcernArgs);
if (!readConcernStatus.isOK()) {
diff --git a/src/mongo/db/repl/read_concern_args.cpp b/src/mongo/db/repl/read_concern_args.cpp
index c0af213fe56..409560de922 100644
--- a/src/mongo/db/repl/read_concern_args.cpp
+++ b/src/mongo/db/repl/read_concern_args.cpp
@@ -70,6 +70,10 @@ BSONObj ReadConcernArgs::toBSON() const {
return bob.obj();
}
+bool ReadConcernArgs::isEmpty() const {
+ return getOpTime().isNull() && getLevel() == repl::ReadConcernLevel::kLocalReadConcern;
+}
+
ReadConcernLevel ReadConcernArgs::getLevel() const {
return _level.value_or(ReadConcernLevel::kLocalReadConcern);
}
diff --git a/src/mongo/db/repl/read_concern_args.h b/src/mongo/db/repl/read_concern_args.h
index 5fa9a5fba8c..b258ce04461 100644
--- a/src/mongo/db/repl/read_concern_args.h
+++ b/src/mongo/db/repl/read_concern_args.h
@@ -80,6 +80,12 @@ public:
*/
void appendInfo(BSONObjBuilder* builder) const;
+ /**
+ * Returns whether these arguments are 'empty' in the sense that no read concern has been
+ * requested.
+ */
+ bool isEmpty() const;
+
ReadConcernLevel getLevel() const;
OpTime getOpTime() const;
BSONObj toBSON() const;
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 875468ca3f0..9337ffb9cfd 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -96,8 +96,7 @@ using NextAction = Fetcher::NextAction;
namespace {
-Status shutDownInProgressStatus(ErrorCodes::ShutdownInProgress,
- "replication system is shutting down");
+const char kLocalDB[] = "local";
void lockAndCall(stdx::unique_lock<stdx::mutex>* lk, const stdx::function<void()>& fn) {
if (!lk->owns_lock()) {
@@ -1060,6 +1059,10 @@ OpTime ReplicationCoordinatorImpl::getMyLastDurableOpTime() const {
Status ReplicationCoordinatorImpl::waitUntilOpTimeForRead(OperationContext* txn,
const ReadConcernArgs& settings) {
+ // We should never wait for replication if we are holding any locks, because this can
+ // potentially block for long time while doing network activity.
+ invariant(!txn->lockState()->isLocked());
+
const bool isMajorityReadConcern =
settings.getLevel() == ReadConcernLevel::kMajorityReadConcern;
@@ -1501,6 +1504,10 @@ ReplicationCoordinator::StatusAndDuration ReplicationCoordinatorImpl::_awaitRepl
const OpTime& opTime,
SnapshotName minSnapshot,
const WriteConcernOptions& writeConcern) {
+ // We should never wait for writes to replicate if we are holding any locks, because the this
+ // can potentially block for long time while doing network activity.
+ invariant(!txn->lockState()->isLocked());
+
const Mode replMode = getReplicationMode();
if (replMode == modeNone) {
// no replication check needed (validated above)
@@ -1808,7 +1815,7 @@ bool ReplicationCoordinatorImpl::canAcceptWritesForDatabase(StringData dbName) {
if (_canAcceptNonLocalWrites) {
return true;
}
- if (dbName == "local") {
+ if (dbName == kLocalDB) {
return true;
}
return !replAllDead && _settings.isMaster();
@@ -1860,7 +1867,7 @@ bool ReplicationCoordinatorImpl::shouldIgnoreUniqueIndex(const IndexDescriptor*
if (idx->isIdIndex()) {
return false;
}
- if (nsToDatabaseSubstring(idx->parentNS()) == "local") {
+ if (nsToDatabaseSubstring(idx->parentNS()) == kLocalDB) {
// always enforce on local
return false;
}
@@ -2971,7 +2978,7 @@ SyncSourceResolverResponse ReplicationCoordinatorImpl::selectSyncSource(
};
Fetcher candidateProber(&_replExecutor,
candidate,
- "local",
+ kLocalDB,
BSON("find"
<< "oplog.rs"
<< "limit"
diff --git a/src/mongo/db/write_concern.cpp b/src/mongo/db/write_concern.cpp
index 1fbb5b9d827..ac428b29183 100644
--- a/src/mongo/db/write_concern.cpp
+++ b/src/mongo/db/write_concern.cpp
@@ -58,90 +58,70 @@ static ServerStatusMetricField<TimerStats> displayGleLatency("getLastError.wtime
static Counter64 gleWtimeouts;
static ServerStatusMetricField<Counter64> gleWtimeoutsDisplay("getLastError.wtimeouts",
&gleWtimeouts);
-namespace {
-const std::string kLocalDB = "local";
-} // namespace
StatusWith<WriteConcernOptions> extractWriteConcern(OperationContext* txn,
const BSONObj& cmdObj,
const std::string& dbName,
const bool supportsWriteConcern) {
- // The default write concern if empty is w : 1
- // Specifying w : 0 is/was allowed, but is interpreted identically to w : 1
- WriteConcernOptions writeConcern =
- repl::getGlobalReplicationCoordinator()->getGetLastErrorDefault();
-
- auto wcResult = WriteConcernOptions::extractWCFromCommand(cmdObj, dbName, writeConcern);
+ // The default write concern if empty is {w:1}. Specifying {w:0} is/was allowed, but is
+ // interpreted identically to {w:1}.
+ auto wcResult = WriteConcernOptions::extractWCFromCommand(
+ cmdObj, dbName, repl::ReplicationCoordinator::get(txn)->getGetLastErrorDefault());
if (!wcResult.isOK()) {
return wcResult.getStatus();
}
- writeConcern = wcResult.getValue();
- // We didn't use the default, so the user supplied their own writeConcern.
- if (!wcResult.getValue().usedDefault) {
+ WriteConcernOptions writeConcern = wcResult.getValue();
+
+ if (writeConcern.usedDefault) {
+ if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer &&
+ !txn->getClient()->isInDirectClient()) {
+ // This is here only for backwards compatibility with 3.2 clusters which have commands
+ // that do not specify write concern when writing to the config server.
+ writeConcern = {
+ WriteConcernOptions::kMajority, WriteConcernOptions::SyncMode::UNSET, Seconds(30)};
+ }
+ } else if (supportsWriteConcern) {
// If it supports writeConcern and does not use the default, validate the writeConcern.
- if (supportsWriteConcern) {
- Status wcStatus = validateWriteConcern(txn, writeConcern, dbName);
- if (!wcStatus.isOK()) {
- return wcStatus;
- }
- } else {
- // This command doesn't do writes so it should not be passed a writeConcern.
- // If we did not use the default writeConcern, one was provided when it shouldn't have
- // been by the user.
- return Status(ErrorCodes::InvalidOptions, "Command does not support writeConcern");
+ Status wcStatus = validateWriteConcern(txn, writeConcern);
+ if (!wcStatus.isOK()) {
+ return wcStatus;
}
+ } else {
+ // This command doesn't do writes so it should not be passed a writeConcern. If we did not
+ // use the default writeConcern, one was provided when it shouldn't have been by the user.
+ return {ErrorCodes::InvalidOptions, "Command does not support writeConcern"};
}
return writeConcern;
}
-Status validateWriteConcern(OperationContext* txn,
- const WriteConcernOptions& writeConcern,
- const std::string& dbName) {
- const bool isJournalEnabled = getGlobalServiceContext()->getGlobalStorageEngine()->isDurable();
-
- if (writeConcern.syncMode == WriteConcernOptions::SyncMode::JOURNAL && !isJournalEnabled) {
+Status validateWriteConcern(OperationContext* txn, const WriteConcernOptions& writeConcern) {
+ if (writeConcern.syncMode == WriteConcernOptions::SyncMode::JOURNAL &&
+ !txn->getServiceContext()->getGlobalStorageEngine()->isDurable()) {
return Status(ErrorCodes::BadValue,
"cannot use 'j' option when a host does not have journaling enabled");
}
- const bool isConfigServer = serverGlobalParams.clusterRole == ClusterRole::ConfigServer;
- const bool isLocalDb(dbName == kLocalDB);
- const repl::ReplicationCoordinator::Mode replMode =
- repl::getGlobalReplicationCoordinator()->getReplicationMode();
-
- if (isConfigServer) {
- if (!writeConcern.validForConfigServers()) {
- return Status(
- ErrorCodes::BadValue,
- str::stream()
- << "w:1 and w:'majority' are the only valid write concerns when writing to "
- "config servers, got: "
- << writeConcern.toBSON().toString());
- }
-
- if (replMode == repl::ReplicationCoordinator::modeReplSet &&
- // Allow writes performed within the server to have a write concern of { w: 1 }.
- // This is so commands have the option to skip waiting for replication if they are
- // holding locks (ex. addShardToZone). This also allows commands that perform
- // multiple writes to batch the wait at the end.
- !txn->getClient()->isInDirectClient() &&
- !isLocalDb && writeConcern.wMode.empty()) {
- invariant(writeConcern.wNumNodes == 1);
- return Status(
- ErrorCodes::BadValue,
- str::stream()
- << "w: 'majority' is the only valid write concern when writing to config "
- "server replica sets, got: "
- << writeConcern.toBSON().toString());
+ // Remote callers of the config server (as in callers making network calls, not the internal
+ // logic) should never be making non-majority writes against the config server, because sharding
+ // is not resilient against rollbacks of metadata writes.
+ if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer &&
+ !writeConcern.validForConfigServers()) {
+ // The only cases where we allow non-majority writes are from within the config servers
+ // themselves, because these wait for write concern explicitly.
+ if (!txn->getClient()->isInDirectClient()) {
+ return {ErrorCodes::BadValue,
+ str::stream() << "w:'majority' is the only valid write concern when writing "
+ "to config servers, got: "
+ << writeConcern.toBSON()};
}
}
- if (replMode == repl::ReplicationCoordinator::modeNone) {
- if (writeConcern.wNumNodes > 1) {
- return Status(ErrorCodes::BadValue, "cannot use 'w' > 1 when a host is not replicated");
- }
+ const auto replMode = repl::ReplicationCoordinator::get(txn)->getReplicationMode();
+
+ if (replMode == repl::ReplicationCoordinator::modeNone && writeConcern.wNumNodes > 1) {
+ return Status(ErrorCodes::BadValue, "cannot use 'w' > 1 when a host is not replicated");
}
if (replMode != repl::ReplicationCoordinator::modeReplSet && !writeConcern.wMode.empty() &&
@@ -186,45 +166,26 @@ void WriteConcernResult::appendTo(const WriteConcernOptions& writeConcern,
else
result->append("err", err);
- // *** 2.4 SyncClusterConnection compatibility ***
- // 2.4 expects either fsync'd files, or a "waited" field exist after running an fsync : true
- // GLE, but with journaling we don't actually need to run the fsync (fsync command is
- // preferred in 2.6). So we add a "waited" field if one doesn't exist.
-
- if (writeConcern.syncMode == WriteConcernOptions::SyncMode::FSYNC) {
- if (fsyncFiles < 0 && (wTime < 0 || !wTimedOut)) {
- dassert(result->asTempObj()["waited"].eoo());
- result->appendNumber("waited", syncMillis);
- }
-
- // For ephemeral storage engines, 0 files may be fsynced.
- dassert(result->asTempObj()["fsyncFiles"].numberLong() >= 0 ||
- !result->asTempObj()["waited"].eoo());
- }
+ // For ephemeral storage engines, 0 files may be fsynced
+ invariant(writeConcern.syncMode != WriteConcernOptions::SyncMode::FSYNC ||
+ (result->asTempObj()["fsyncFiles"].numberLong() >= 0 ||
+ !result->asTempObj()["waited"].eoo()));
}
Status waitForWriteConcern(OperationContext* txn,
const OpTime& replOpTime,
const WriteConcernOptions& writeConcern,
WriteConcernResult* result) {
- // We assume all options have been validated earlier, if not, programming error.
- // Passing localDB name is a hack to avoid more rigorous check that performed for non local DB.
- dassert(validateWriteConcern(txn, writeConcern, kLocalDB).isOK());
+ // We assume all options have been validated earlier, if not, programming error
+ dassertOK(validateWriteConcern(txn, writeConcern));
- // We should never be waiting for write concern while holding any sort of lock, because this may
- // lead to situations where the replication heartbeats are stalled.
- //
- // This check does not hold for writes done through dbeval because it runs with a global X lock.
- dassert(!txn->lockState()->isLocked() || txn->getClient()->isInDirectClient());
+ auto replCoord = repl::ReplicationCoordinator::get(txn);
// Next handle blocking on disk
-
Timer syncTimer;
- auto replCoord = repl::getGlobalReplicationCoordinator();
WriteConcernOptions writeConcernWithPopulatedSyncMode =
replCoord->populateUnsetWriteConcernOptionsSyncMode(writeConcern);
-
switch (writeConcernWithPopulatedSyncMode.syncMode) {
case WriteConcernOptions::SyncMode::UNSET:
severe() << "Attempting to wait on a WriteConcern with an unset sync option";
@@ -270,16 +231,15 @@ Status waitForWriteConcern(OperationContext* txn,
return Status::OK();
}
- // Now we wait for replication
- // Note that replica set stepdowns and gle mode changes are thrown as errors
+ // Replica set stepdowns and gle mode changes are thrown as errors
repl::ReplicationCoordinator::StatusAndDuration replStatus =
- repl::getGlobalReplicationCoordinator()->awaitReplication(
- txn, replOpTime, writeConcernWithPopulatedSyncMode);
+ replCoord->awaitReplication(txn, replOpTime, writeConcernWithPopulatedSyncMode);
if (replStatus.status == ErrorCodes::WriteConcernFailed) {
gleWtimeouts.increment();
result->err = "timeout";
result->wTimedOut = true;
}
+
// Add stats
result->writtenTo = repl::getGlobalReplicationCoordinator()->getHostsWrittenTo(
replOpTime,
diff --git a/src/mongo/db/write_concern.h b/src/mongo/db/write_concern.h
index 2b18ff5c538..eeff284ec61 100644
--- a/src/mongo/db/write_concern.h
+++ b/src/mongo/db/write_concern.h
@@ -54,9 +54,7 @@ StatusWith<WriteConcernOptions> extractWriteConcern(OperationContext* txn,
/**
* Verifies that a WriteConcern is valid for this particular host.
*/
-Status validateWriteConcern(OperationContext* txn,
- const WriteConcernOptions& writeConcern,
- const std::string& dbName);
+Status validateWriteConcern(OperationContext* txn, const WriteConcernOptions& writeConcern);
struct WriteConcernResult {
WriteConcernResult() {
diff --git a/src/mongo/db/write_concern_options.cpp b/src/mongo/db/write_concern_options.cpp
index 2b0c9abef23..95b411d9baa 100644
--- a/src/mongo/db/write_concern_options.cpp
+++ b/src/mongo/db/write_concern_options.cpp
@@ -180,7 +180,7 @@ bool WriteConcernOptions::shouldWaitForOtherNodes() const {
}
bool WriteConcernOptions::validForConfigServers() const {
- return wNumNodes == 1 || wMode == kMajority;
+ return wMode == kMajority;
}
} // namespace mongo
diff --git a/src/mongo/db/write_concern_options.h b/src/mongo/db/write_concern_options.h
index f969fc08031..06ddfbc56a0 100644
--- a/src/mongo/db/write_concern_options.h
+++ b/src/mongo/db/write_concern_options.h
@@ -79,8 +79,8 @@ public:
bool shouldWaitForOtherNodes() const;
/**
- * Returns true if this is a valid write concern to use against a config server.
- * TODO(spencer): Once we stop supporting SCCC config servers, forbid this from allowing w:1
+ * Returns true if this is a {w:majority} write concern, which is the only valid write concern
+ * to use against a config server.
*/
bool validForConfigServers() const;