summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiyuan Zhou <siyuan.zhou@mongodb.com>2015-04-23 16:21:37 -0400
committerSiyuan Zhou <siyuan.zhou@mongodb.com>2015-04-29 15:47:36 -0400
commit27a8cb772d9e876fc7549a2c6ce3c1a62a4a394f (patch)
treeef70ac38c686d04291fb9413f59892d10baa065b
parent917091361df13ee8f331bdd5742806bd45d909d0 (diff)
downloadmongo-27a8cb772d9e876fc7549a2c6ce3c1a62a4a394f.tar.gz
SERVER-5218 Batch oplog writes always wait for journal.
-rw-r--r--src/mongo/db/commands/cleanup_orphaned_cmd.cpp2
-rw-r--r--src/mongo/db/commands/write_commands/batch_executor.cpp34
-rw-r--r--src/mongo/db/commands/write_commands/batch_executor.h2
-rw-r--r--src/mongo/db/range_deleter.cpp2
-rw-r--r--src/mongo/db/repl/initial_sync.h2
-rw-r--r--src/mongo/db/repl/oplog.cpp6
-rw-r--r--src/mongo/db/repl/replica_set_config.cpp4
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp4
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_test.cpp10
-rw-r--r--src/mongo/db/repl/sync_tail.cpp21
-rw-r--r--src/mongo/db/repl/sync_tail.h7
-rw-r--r--src/mongo/db/write_concern.cpp19
-rw-r--r--src/mongo/db/write_concern_options.cpp2
-rw-r--r--src/mongo/db/write_concern_options.h2
-rw-r--r--src/mongo/s/d_migrate.cpp2
15 files changed, 46 insertions, 73 deletions
diff --git a/src/mongo/db/commands/cleanup_orphaned_cmd.cpp b/src/mongo/db/commands/cleanup_orphaned_cmd.cpp
index 89f57103958..a6ff2b90a6d 100644
--- a/src/mongo/db/commands/cleanup_orphaned_cmd.cpp
+++ b/src/mongo/db/commands/cleanup_orphaned_cmd.cpp
@@ -53,7 +53,7 @@ namespace {
using mongo::WriteConcernOptions;
const int kDefaultWTimeoutMs = 60 * 1000;
- const WriteConcernOptions DefaultWriteConcern("majority",
+ const WriteConcernOptions DefaultWriteConcern(WriteConcernOptions::kMajority,
WriteConcernOptions::NONE,
kDefaultWTimeoutMs);
}
diff --git a/src/mongo/db/commands/write_commands/batch_executor.cpp b/src/mongo/db/commands/write_commands/batch_executor.cpp
index ae57fdbecbb..1c12d272270 100644
--- a/src/mongo/db/commands/write_commands/batch_executor.cpp
+++ b/src/mongo/db/commands/write_commands/batch_executor.cpp
@@ -735,32 +735,13 @@ namespace mongo {
std::vector<BatchedUpsertDetail*>* upsertedIds,
std::vector<WriteErrorDetail*>* errors ) {
- WriteConcernOptions originalWC = _txn->getWriteConcern();
-
- // We adjust the write concern attached to the OperationContext to not wait for
- // journal. Later, the code will restore the write concern to wait for journal on
- // the last write of the batch.
- if (request.sizeWriteOps() > 1
- && originalWC.syncMode == WriteConcernOptions::JOURNAL)
- {
- WriteConcernOptions writeConcern = originalWC;
- writeConcern.syncMode = WriteConcernOptions::NONE;
- _txn->setWriteConcern(writeConcern);
- }
-
if ( request.getBatchType() == BatchedCommandRequest::BatchType_Insert ) {
- execInserts( request, originalWC, errors );
+ execInserts( request, errors );
}
else if ( request.getBatchType() == BatchedCommandRequest::BatchType_Update ) {
for ( size_t i = 0; i < request.sizeWriteOps(); i++ ) {
if ( i + 1 == request.sizeWriteOps() ) {
- // For the last write in the batch, restore the write concern back to the
- // original provided one; this may set WriteConcernOptions::JOURNAL back
- // to true.
- _txn->setWriteConcern(originalWC);
- // Use the original write concern to possibly await the commit of this write,
- // in order to flush the journal as requested.
setupSynchronousCommit( _txn );
}
@@ -787,12 +768,6 @@ namespace mongo {
for ( size_t i = 0; i < request.sizeWriteOps(); i++ ) {
if ( i + 1 == request.sizeWriteOps() ) {
- // For the last write in the batch, restore the write concern back to the
- // original provided one; this may set WriteConcernOptions::JOURNAL back
- // to true.
- _txn->setWriteConcern(originalWC);
- // Use the original write concern to possibly await the commit of this write,
- // in order to flush the journal as requested.
setupSynchronousCommit( _txn );
}
@@ -837,7 +812,6 @@ namespace mongo {
}
void WriteBatchExecutor::execInserts( const BatchedCommandRequest& request,
- const WriteConcernOptions& originalWC,
std::vector<WriteErrorDetail*>* errors ) {
// Theory of operation:
@@ -882,12 +856,6 @@ namespace mongo {
++state.currIndex) {
if (state.currIndex + 1 == state.request->sizeWriteOps()) {
- // For the last write in the batch, restore the write concern back to the
- // original provided one; this may set WriteConcernOptions::JOURNAL back
- // to true.
- _txn->setWriteConcern(originalWC);
- // Use the original write concern to possibly await the commit of this write,
- // in order to flush the journal as requested.
setupSynchronousCommit(_txn);
}
diff --git a/src/mongo/db/commands/write_commands/batch_executor.h b/src/mongo/db/commands/write_commands/batch_executor.h
index e5d55a5b9fb..6216ae65c89 100644
--- a/src/mongo/db/commands/write_commands/batch_executor.h
+++ b/src/mongo/db/commands/write_commands/batch_executor.h
@@ -33,7 +33,6 @@
#include "mongo/base/disallow_copying.h"
#include "mongo/db/ops/update_request.h"
-#include "mongo/db/write_concern_options.h"
#include "mongo/s/write_ops/batched_command_request.h"
#include "mongo/s/write_ops/batched_command_response.h"
#include "mongo/s/write_ops/batched_delete_document.h"
@@ -95,7 +94,6 @@ namespace mongo {
* times.
*/
void execInserts( const BatchedCommandRequest& request,
- const WriteConcernOptions& originalWC,
std::vector<WriteErrorDetail*>* errors );
/**
diff --git a/src/mongo/db/range_deleter.cpp b/src/mongo/db/range_deleter.cpp
index e754fb9fb02..bde2e3ad36c 100644
--- a/src/mongo/db/range_deleter.cpp
+++ b/src/mongo/db/range_deleter.cpp
@@ -269,7 +269,7 @@ namespace {
const int kWTimeoutMillis = 60 * 60 * 1000;
bool _waitForMajority(OperationContext* txn, std::string* errMsg) {
- const WriteConcernOptions writeConcern("majority",
+ const WriteConcernOptions writeConcern(WriteConcernOptions::kMajority,
WriteConcernOptions::NONE,
kWTimeoutMillis);
diff --git a/src/mongo/db/repl/initial_sync.h b/src/mongo/db/repl/initial_sync.h
index 2cf41660d94..bf5800d4643 100644
--- a/src/mongo/db/repl/initial_sync.h
+++ b/src/mongo/db/repl/initial_sync.h
@@ -48,7 +48,7 @@ namespace repl {
*/
void oplogApplication(OperationContext* txn, const Timestamp& endOpTime);
- // Initial sync will ignore all journal requirement flags and dones't await commit
+ // Initial sync will ignore all journal requirement flags and doesn't await commit
// before updating last OpTime.
virtual bool supportsAwaitingCommit() { return false; }
};
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index 7032cf1994d..15f51bee56d 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -302,12 +302,6 @@ namespace {
b.appendBool("fromMigrate", true);
}
- if (txn->getWriteConcern().shouldWaitForOtherNodes()
- && txn->getWriteConcern().syncMode == WriteConcernOptions::JOURNAL)
- {
- b.appendBool("j", true);
- }
-
if ( o2 ) {
b.append("o2", *o2);
}
diff --git a/src/mongo/db/repl/replica_set_config.cpp b/src/mongo/db/repl/replica_set_config.cpp
index 06c27161bc5..a4e632ccccc 100644
--- a/src/mongo/db/repl/replica_set_config.cpp
+++ b/src/mongo/db/repl/replica_set_config.cpp
@@ -359,7 +359,7 @@ namespace {
}
}
else {
- if ("majority" != _defaultWriteConcern.wMode &&
+ if (WriteConcernOptions::kMajority != _defaultWriteConcern.wMode &&
!findCustomWriteMode(_defaultWriteConcern.wMode).isOK()) {
return Status(ErrorCodes::BadValue, str::stream() <<
"Default write concern requires undefined write mode " <<
@@ -377,7 +377,7 @@ namespace {
Status ReplicaSetConfig::checkIfWriteConcernCanBeSatisfied(
const WriteConcernOptions& writeConcern) const {
- if (!writeConcern.wMode.empty() && writeConcern.wMode != "majority") {
+ if (!writeConcern.wMode.empty() && writeConcern.wMode != WriteConcernOptions::kMajority) {
StatusWith<ReplicaSetTagPattern> tagPatternStatus =
findCustomWriteMode(writeConcern.wMode);
if (!tagPatternStatus.isOK()) {
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index bc137be8ddc..778e64e2c27 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -858,7 +858,7 @@ namespace {
if (!writeConcern.wMode.empty()) {
StringData patternName;
- if (writeConcern.wMode == "majority") {
+ if (writeConcern.wMode == WriteConcernOptions::kMajority) {
patternName = ReplicaSetConfig::kMajorityWriteConcernModeName;
}
else {
@@ -959,7 +959,7 @@ namespace {
return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
}
- if (replMode == modeMasterSlave && writeConcern.wMode == "majority") {
+ if (replMode == modeMasterSlave && writeConcern.wMode == WriteConcernOptions::kMajority) {
// with master/slave, majority is equivalent to w=1
return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index d7fcffc6ed6..6411cb8ea09 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -432,7 +432,7 @@ namespace {
writeConcern.wNumNodes = 0;
- writeConcern.wMode = "majority";
+ writeConcern.wMode = WriteConcernOptions::kMajority;
// w:majority always works on master/slave
ReplicationCoordinator::StatusAndDuration statusAndDur = getReplCoord()->awaitReplication(
&txn, time, writeConcern);
@@ -574,7 +574,7 @@ namespace {
// Set up valid write concerns for the rest of the test
WriteConcernOptions majorityWriteConcern;
majorityWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- majorityWriteConcern.wMode = "majority";
+ majorityWriteConcern.wMode = WriteConcernOptions::kMajority;
WriteConcernOptions multiDCWriteConcern;
multiDCWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
@@ -1817,7 +1817,7 @@ namespace {
// majority nodes waiting for time
WriteConcernOptions writeConcern;
writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
- writeConcern.wMode = "majority";
+ writeConcern.wMode = WriteConcernOptions::kMajority;
ReplicationAwaiter awaiter(getReplCoord(), &txn);
awaiter.setOpTime(time);
@@ -1827,7 +1827,7 @@ namespace {
// demonstrate that majority cannot currently be satisfied
WriteConcernOptions writeConcern2;
writeConcern2.wTimeout = WriteConcernOptions::kNoWaiting;
- writeConcern2.wMode = "majority";
+ writeConcern2.wMode = WriteConcernOptions::kMajority;
ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
getReplCoord()->awaitReplication(&txn, time, writeConcern2).status);
@@ -1884,7 +1884,7 @@ namespace {
WriteConcernOptions majorityWriteConcern;
majorityWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- majorityWriteConcern.wMode = "majority";
+ majorityWriteConcern.wMode = WriteConcernOptions::kMajority;
ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp
index 5a997a32a09..3c6dd40838e 100644
--- a/src/mongo/db/repl/sync_tail.cpp
+++ b/src/mongo/db/repl/sync_tail.cpp
@@ -53,6 +53,7 @@
#include "mongo/db/repl/minvalid.h"
#include "mongo/db/repl/oplog.h"
#include "mongo/db/repl/repl_client_info.h"
+#include "mongo/db/repl/replica_set_config.h"
#include "mongo/db/repl/replication_coordinator_global.h"
#include "mongo/db/stats/timer_stats.h"
#include "mongo/util/exit.h"
@@ -276,9 +277,8 @@ namespace repl {
}
std::vector< std::vector<BSONObj> > writerVectors(replWriterThreadCount);
- bool mustAwaitCommit = false;
- fillWriterVectors(ops, &writerVectors, &mustAwaitCommit);
+ fillWriterVectors(ops, &writerVectors);
LOG(2) << "replication batch size is " << ops.size() << endl;
// We must grab this because we're going to grab write locks later.
// We hold this mutex the entire time we're writing; it doesn't matter
@@ -302,12 +302,14 @@ namespace repl {
return Timestamp();
}
- if (supportsAwaitingCommit() && mustAwaitCommit) {
+ const bool mustAwaitCommit = replCoord->isV1ElectionProtocol() && supportsAwaitingCommit();
+ if (mustAwaitCommit) {
txn->recoveryUnit()->goingToAwaitCommit();
}
+
Timestamp lastOpTime = writeOpsToOplog(txn, ops);
- // Wait for journal before setting last op time if any op in batch had j:true
- if (supportsAwaitingCommit() && mustAwaitCommit) {
+
+ if (mustAwaitCommit) {
txn->recoveryUnit()->awaitCommit();
}
ReplClientInfo::forClient(txn->getClient()).setLastOp(lastOpTime);
@@ -320,8 +322,7 @@ namespace repl {
}
void SyncTail::fillWriterVectors(const std::deque<BSONObj>& ops,
- std::vector< std::vector<BSONObj> >* writerVectors,
- bool* mustAwaitCommit) {
+ std::vector< std::vector<BSONObj> >* writerVectors) {
for (std::deque<BSONObj>::const_iterator it = ops.begin();
it != ops.end();
@@ -335,12 +336,6 @@ namespace repl {
const char* opType = it->getField( "op" ).valuestrsafe();
- // Check if any entry needs journaling, and if so return the need
- const bool foundJournal = it->getField("j").trueValue();
- if (foundJournal) {
- *mustAwaitCommit = true;
- }
-
if (getGlobalServiceContext()->getGlobalStorageEngine()->supportsDocLocking() &&
isCrudOpType(opType)) {
BSONElement id;
diff --git a/src/mongo/db/repl/sync_tail.h b/src/mongo/db/repl/sync_tail.h
index b2bad4c4828..8d0f79ced7c 100644
--- a/src/mongo/db/repl/sync_tail.h
+++ b/src/mongo/db/repl/sync_tail.h
@@ -128,11 +128,8 @@ namespace repl {
// Doles out all the work to the writer pool threads and waits for them to complete
void applyOps(const std::vector< std::vector<BSONObj> >& writerVectors);
- // mustAwaitCommit is an out-parameter and indicates that at least one of the ops
- // in 'ops' had j:true.
- void fillWriterVectors(const std::deque<BSONObj>& ops,
- std::vector< std::vector<BSONObj> >* writerVectors,
- bool* mustAwaitCommit);
+ void fillWriterVectors(const std::deque<BSONObj>& ops,
+ std::vector< std::vector<BSONObj> >* writerVectors);
void handleSlaveDelay(const BSONObj& op);
// persistent pool of worker threads for writing ops to the databases
diff --git a/src/mongo/db/write_concern.cpp b/src/mongo/db/write_concern.cpp
index a16352b34fc..7bb8edd0fa2 100644
--- a/src/mongo/db/write_concern.cpp
+++ b/src/mongo/db/write_concern.cpp
@@ -62,6 +62,18 @@ namespace mongo {
}
}
+ namespace {
+ // The consensus protocol requires that w: majority implies j: true on all nodes.
+ void addJournalSyncForWMajority(WriteConcernOptions* writeConcern) {
+ if (repl::getGlobalReplicationCoordinator()->isV1ElectionProtocol()
+ && writeConcern->wMode == WriteConcernOptions::kMajority
+ && writeConcern->syncMode == WriteConcernOptions::NONE)
+ {
+ writeConcern->syncMode = WriteConcernOptions::JOURNAL;
+ }
+ }
+ } // namespace
+
StatusWith<WriteConcernOptions> extractWriteConcern(const BSONObj& cmdObj) {
// The default write concern if empty is w : 1
// Specifying w : 0 is/was allowed, but is interpreted identically to w : 1
@@ -70,6 +82,8 @@ namespace mongo {
if (writeConcern.wNumNodes == 0 && writeConcern.wMode.empty()) {
writeConcern.wNumNodes = 1;
}
+ // Upgrade default write concern if necessary.
+ addJournalSyncForWMajority(&writeConcern);
BSONElement writeConcernElement;
Status wcStatus = bsonExtractTypedField(cmdObj,
@@ -100,6 +114,9 @@ namespace mongo {
return wcStatus;
}
+ // Upgrade parsed write concern if necessary.
+ addJournalSyncForWMajority(&writeConcern);
+
return writeConcern;
}
@@ -130,7 +147,7 @@ namespace mongo {
if ( replMode != repl::ReplicationCoordinator::modeReplSet &&
!writeConcern.wMode.empty() &&
- writeConcern.wMode != "majority" ) {
+ writeConcern.wMode != WriteConcernOptions::kMajority ) {
return Status( ErrorCodes::BadValue,
string( "cannot use non-majority 'w' mode " ) + writeConcern.wMode
+ " when a host is not a member of a replica set" );
diff --git a/src/mongo/db/write_concern_options.cpp b/src/mongo/db/write_concern_options.cpp
index 50bf9cad040..993e373ec9b 100644
--- a/src/mongo/db/write_concern_options.cpp
+++ b/src/mongo/db/write_concern_options.cpp
@@ -39,6 +39,8 @@ namespace mongo {
const BSONObj WriteConcernOptions::Acknowledged(BSON("w" << W_NORMAL));
const BSONObj WriteConcernOptions::Unacknowledged(BSON("w" << W_NONE));
+ const string WriteConcernOptions::kMajority("majority");
+
static const BSONField<bool> mongosSecondaryThrottleField("_secondaryThrottle", true);
static const BSONField<bool> secondaryThrottleField("secondaryThrottle", true);
static const BSONField<BSONObj> writeConcernField("writeConcern");
diff --git a/src/mongo/db/write_concern_options.h b/src/mongo/db/write_concern_options.h
index 445b6fab1db..945e88a4fca 100644
--- a/src/mongo/db/write_concern_options.h
+++ b/src/mongo/db/write_concern_options.h
@@ -46,6 +46,8 @@ namespace mongo {
static const BSONObj Acknowledged;
static const BSONObj Unacknowledged;
+ static const std::string kMajority; // = "majority"
+
WriteConcernOptions() { reset(); }
WriteConcernOptions(int numNodes,
diff --git a/src/mongo/s/d_migrate.cpp b/src/mongo/s/d_migrate.cpp
index ae319e1006a..481b03ecc79 100644
--- a/src/mongo/s/d_migrate.cpp
+++ b/src/mongo/s/d_migrate.cpp
@@ -2508,7 +2508,7 @@ namespace mongo {
const WriteConcernOptions& writeConcern) {
WriteConcernOptions majorityWriteConcern;
majorityWriteConcern.wTimeout = -1;
- majorityWriteConcern.wMode = "majority";
+ majorityWriteConcern.wMode = WriteConcernOptions::kMajority;
Status majorityStatus = repl::getGlobalReplicationCoordinator()->awaitReplication(
txn, lastOpApplied, majorityWriteConcern).status;