From 66a48710fcdfa9783d44f1b467a7bceb6df7dcd2 Mon Sep 17 00:00:00 2001 From: Siyuan Zhou Date: Wed, 29 Apr 2015 20:28:39 -0400 Subject: Revert "SERVER-5218 Batch oplog writes always wait for journal." This reverts commit 27a8cb772d9e876fc7549a2c6ce3c1a62a4a394f. --- src/mongo/db/commands/cleanup_orphaned_cmd.cpp | 2 +- .../db/commands/write_commands/batch_executor.cpp | 34 +++++++++++++++++++++- .../db/commands/write_commands/batch_executor.h | 2 ++ src/mongo/db/range_deleter.cpp | 2 +- src/mongo/db/repl/initial_sync.h | 2 +- src/mongo/db/repl/oplog.cpp | 6 ++++ src/mongo/db/repl/replica_set_config.cpp | 4 +-- src/mongo/db/repl/replication_coordinator_impl.cpp | 4 +-- .../db/repl/replication_coordinator_impl_test.cpp | 10 +++---- src/mongo/db/repl/sync_tail.cpp | 21 ++++++++----- src/mongo/db/repl/sync_tail.h | 7 +++-- src/mongo/db/write_concern.cpp | 19 +----------- src/mongo/db/write_concern_options.cpp | 2 -- src/mongo/db/write_concern_options.h | 2 -- 14 files changed, 72 insertions(+), 45 deletions(-) (limited to 'src/mongo/db') diff --git a/src/mongo/db/commands/cleanup_orphaned_cmd.cpp b/src/mongo/db/commands/cleanup_orphaned_cmd.cpp index a6ff2b90a6d..89f57103958 100644 --- a/src/mongo/db/commands/cleanup_orphaned_cmd.cpp +++ b/src/mongo/db/commands/cleanup_orphaned_cmd.cpp @@ -53,7 +53,7 @@ namespace { using mongo::WriteConcernOptions; const int kDefaultWTimeoutMs = 60 * 1000; - const WriteConcernOptions DefaultWriteConcern(WriteConcernOptions::kMajority, + const WriteConcernOptions DefaultWriteConcern("majority", WriteConcernOptions::NONE, kDefaultWTimeoutMs); } diff --git a/src/mongo/db/commands/write_commands/batch_executor.cpp b/src/mongo/db/commands/write_commands/batch_executor.cpp index 1c12d272270..ae57fdbecbb 100644 --- a/src/mongo/db/commands/write_commands/batch_executor.cpp +++ b/src/mongo/db/commands/write_commands/batch_executor.cpp @@ -735,13 +735,32 @@ namespace mongo { std::vector* upsertedIds, std::vector* errors ) { + WriteConcernOptions originalWC = _txn->getWriteConcern(); + + // We adjust the write concern attached to the OperationContext to not wait for + // journal. Later, the code will restore the write concern to wait for journal on + // the last write of the batch. + if (request.sizeWriteOps() > 1 + && originalWC.syncMode == WriteConcernOptions::JOURNAL) + { + WriteConcernOptions writeConcern = originalWC; + writeConcern.syncMode = WriteConcernOptions::NONE; + _txn->setWriteConcern(writeConcern); + } + if ( request.getBatchType() == BatchedCommandRequest::BatchType_Insert ) { - execInserts( request, errors ); + execInserts( request, originalWC, errors ); } else if ( request.getBatchType() == BatchedCommandRequest::BatchType_Update ) { for ( size_t i = 0; i < request.sizeWriteOps(); i++ ) { if ( i + 1 == request.sizeWriteOps() ) { + // For the last write in the batch, restore the write concern back to the + // original provided one; this may set WriteConcernOptions::JOURNAL back + // to true. + _txn->setWriteConcern(originalWC); + // Use the original write concern to possibly await the commit of this write, + // in order to flush the journal as requested. setupSynchronousCommit( _txn ); } @@ -768,6 +787,12 @@ namespace mongo { for ( size_t i = 0; i < request.sizeWriteOps(); i++ ) { if ( i + 1 == request.sizeWriteOps() ) { + // For the last write in the batch, restore the write concern back to the + // original provided one; this may set WriteConcernOptions::JOURNAL back + // to true. + _txn->setWriteConcern(originalWC); + // Use the original write concern to possibly await the commit of this write, + // in order to flush the journal as requested. setupSynchronousCommit( _txn ); } @@ -812,6 +837,7 @@ namespace mongo { } void WriteBatchExecutor::execInserts( const BatchedCommandRequest& request, + const WriteConcernOptions& originalWC, std::vector* errors ) { // Theory of operation: @@ -856,6 +882,12 @@ namespace mongo { ++state.currIndex) { if (state.currIndex + 1 == state.request->sizeWriteOps()) { + // For the last write in the batch, restore the write concern back to the + // original provided one; this may set WriteConcernOptions::JOURNAL back + // to true. + _txn->setWriteConcern(originalWC); + // Use the original write concern to possibly await the commit of this write, + // in order to flush the journal as requested. setupSynchronousCommit(_txn); } diff --git a/src/mongo/db/commands/write_commands/batch_executor.h b/src/mongo/db/commands/write_commands/batch_executor.h index 6216ae65c89..e5d55a5b9fb 100644 --- a/src/mongo/db/commands/write_commands/batch_executor.h +++ b/src/mongo/db/commands/write_commands/batch_executor.h @@ -33,6 +33,7 @@ #include "mongo/base/disallow_copying.h" #include "mongo/db/ops/update_request.h" +#include "mongo/db/write_concern_options.h" #include "mongo/s/write_ops/batched_command_request.h" #include "mongo/s/write_ops/batched_command_response.h" #include "mongo/s/write_ops/batched_delete_document.h" @@ -94,6 +95,7 @@ namespace mongo { * times. */ void execInserts( const BatchedCommandRequest& request, + const WriteConcernOptions& originalWC, std::vector* errors ); /** diff --git a/src/mongo/db/range_deleter.cpp b/src/mongo/db/range_deleter.cpp index bde2e3ad36c..e754fb9fb02 100644 --- a/src/mongo/db/range_deleter.cpp +++ b/src/mongo/db/range_deleter.cpp @@ -269,7 +269,7 @@ namespace { const int kWTimeoutMillis = 60 * 60 * 1000; bool _waitForMajority(OperationContext* txn, std::string* errMsg) { - const WriteConcernOptions writeConcern(WriteConcernOptions::kMajority, + const WriteConcernOptions writeConcern("majority", WriteConcernOptions::NONE, kWTimeoutMillis); diff --git a/src/mongo/db/repl/initial_sync.h b/src/mongo/db/repl/initial_sync.h index bf5800d4643..2cf41660d94 100644 --- a/src/mongo/db/repl/initial_sync.h +++ b/src/mongo/db/repl/initial_sync.h @@ -48,7 +48,7 @@ namespace repl { */ void oplogApplication(OperationContext* txn, const Timestamp& endOpTime); - // Initial sync will ignore all journal requirement flags and doesn't await commit + // Initial sync will ignore all journal requirement flags and dones't await commit // before updating last OpTime. virtual bool supportsAwaitingCommit() { return false; } }; diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp index 15f51bee56d..7032cf1994d 100644 --- a/src/mongo/db/repl/oplog.cpp +++ b/src/mongo/db/repl/oplog.cpp @@ -302,6 +302,12 @@ namespace { b.appendBool("fromMigrate", true); } + if (txn->getWriteConcern().shouldWaitForOtherNodes() + && txn->getWriteConcern().syncMode == WriteConcernOptions::JOURNAL) + { + b.appendBool("j", true); + } + if ( o2 ) { b.append("o2", *o2); } diff --git a/src/mongo/db/repl/replica_set_config.cpp b/src/mongo/db/repl/replica_set_config.cpp index a4e632ccccc..06c27161bc5 100644 --- a/src/mongo/db/repl/replica_set_config.cpp +++ b/src/mongo/db/repl/replica_set_config.cpp @@ -359,7 +359,7 @@ namespace { } } else { - if (WriteConcernOptions::kMajority != _defaultWriteConcern.wMode && + if ("majority" != _defaultWriteConcern.wMode && !findCustomWriteMode(_defaultWriteConcern.wMode).isOK()) { return Status(ErrorCodes::BadValue, str::stream() << "Default write concern requires undefined write mode " << @@ -377,7 +377,7 @@ namespace { Status ReplicaSetConfig::checkIfWriteConcernCanBeSatisfied( const WriteConcernOptions& writeConcern) const { - if (!writeConcern.wMode.empty() && writeConcern.wMode != WriteConcernOptions::kMajority) { + if (!writeConcern.wMode.empty() && writeConcern.wMode != "majority") { StatusWith tagPatternStatus = findCustomWriteMode(writeConcern.wMode); if (!tagPatternStatus.isOK()) { diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 778e64e2c27..bc137be8ddc 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -858,7 +858,7 @@ namespace { if (!writeConcern.wMode.empty()) { StringData patternName; - if (writeConcern.wMode == WriteConcernOptions::kMajority) { + if (writeConcern.wMode == "majority") { patternName = ReplicaSetConfig::kMajorityWriteConcernModeName; } else { @@ -959,7 +959,7 @@ namespace { return StatusAndDuration(Status::OK(), Milliseconds(timer->millis())); } - if (replMode == modeMasterSlave && writeConcern.wMode == WriteConcernOptions::kMajority) { + if (replMode == modeMasterSlave && writeConcern.wMode == "majority") { // with master/slave, majority is equivalent to w=1 return StatusAndDuration(Status::OK(), Milliseconds(timer->millis())); } diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp index 6411cb8ea09..d7fcffc6ed6 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp @@ -432,7 +432,7 @@ namespace { writeConcern.wNumNodes = 0; - writeConcern.wMode = WriteConcernOptions::kMajority; + writeConcern.wMode = "majority"; // w:majority always works on master/slave ReplicationCoordinator::StatusAndDuration statusAndDur = getReplCoord()->awaitReplication( &txn, time, writeConcern); @@ -574,7 +574,7 @@ namespace { // Set up valid write concerns for the rest of the test WriteConcernOptions majorityWriteConcern; majorityWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting; - majorityWriteConcern.wMode = WriteConcernOptions::kMajority; + majorityWriteConcern.wMode = "majority"; WriteConcernOptions multiDCWriteConcern; multiDCWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting; @@ -1817,7 +1817,7 @@ namespace { // majority nodes waiting for time WriteConcernOptions writeConcern; writeConcern.wTimeout = WriteConcernOptions::kNoTimeout; - writeConcern.wMode = WriteConcernOptions::kMajority; + writeConcern.wMode = "majority"; ReplicationAwaiter awaiter(getReplCoord(), &txn); awaiter.setOpTime(time); @@ -1827,7 +1827,7 @@ namespace { // demonstrate that majority cannot currently be satisfied WriteConcernOptions writeConcern2; writeConcern2.wTimeout = WriteConcernOptions::kNoWaiting; - writeConcern2.wMode = WriteConcernOptions::kMajority; + writeConcern2.wMode = "majority"; ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, getReplCoord()->awaitReplication(&txn, time, writeConcern2).status); @@ -1884,7 +1884,7 @@ namespace { WriteConcernOptions majorityWriteConcern; majorityWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting; - majorityWriteConcern.wMode = WriteConcernOptions::kMajority; + majorityWriteConcern.wMode = "majority"; ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status); diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp index 3c6dd40838e..5a997a32a09 100644 --- a/src/mongo/db/repl/sync_tail.cpp +++ b/src/mongo/db/repl/sync_tail.cpp @@ -53,7 +53,6 @@ #include "mongo/db/repl/minvalid.h" #include "mongo/db/repl/oplog.h" #include "mongo/db/repl/repl_client_info.h" -#include "mongo/db/repl/replica_set_config.h" #include "mongo/db/repl/replication_coordinator_global.h" #include "mongo/db/stats/timer_stats.h" #include "mongo/util/exit.h" @@ -277,8 +276,9 @@ namespace repl { } std::vector< std::vector > writerVectors(replWriterThreadCount); + bool mustAwaitCommit = false; - fillWriterVectors(ops, &writerVectors); + fillWriterVectors(ops, &writerVectors, &mustAwaitCommit); LOG(2) << "replication batch size is " << ops.size() << endl; // We must grab this because we're going to grab write locks later. // We hold this mutex the entire time we're writing; it doesn't matter @@ -302,14 +302,12 @@ namespace repl { return Timestamp(); } - const bool mustAwaitCommit = replCoord->isV1ElectionProtocol() && supportsAwaitingCommit(); - if (mustAwaitCommit) { + if (supportsAwaitingCommit() && mustAwaitCommit) { txn->recoveryUnit()->goingToAwaitCommit(); } - Timestamp lastOpTime = writeOpsToOplog(txn, ops); - - if (mustAwaitCommit) { + // Wait for journal before setting last op time if any op in batch had j:true + if (supportsAwaitingCommit() && mustAwaitCommit) { txn->recoveryUnit()->awaitCommit(); } ReplClientInfo::forClient(txn->getClient()).setLastOp(lastOpTime); @@ -322,7 +320,8 @@ namespace repl { } void SyncTail::fillWriterVectors(const std::deque& ops, - std::vector< std::vector >* writerVectors) { + std::vector< std::vector >* writerVectors, + bool* mustAwaitCommit) { for (std::deque::const_iterator it = ops.begin(); it != ops.end(); @@ -336,6 +335,12 @@ namespace repl { const char* opType = it->getField( "op" ).valuestrsafe(); + // Check if any entry needs journaling, and if so return the need + const bool foundJournal = it->getField("j").trueValue(); + if (foundJournal) { + *mustAwaitCommit = true; + } + if (getGlobalServiceContext()->getGlobalStorageEngine()->supportsDocLocking() && isCrudOpType(opType)) { BSONElement id; diff --git a/src/mongo/db/repl/sync_tail.h b/src/mongo/db/repl/sync_tail.h index 8d0f79ced7c..b2bad4c4828 100644 --- a/src/mongo/db/repl/sync_tail.h +++ b/src/mongo/db/repl/sync_tail.h @@ -128,8 +128,11 @@ namespace repl { // Doles out all the work to the writer pool threads and waits for them to complete void applyOps(const std::vector< std::vector >& writerVectors); - void fillWriterVectors(const std::deque& ops, - std::vector< std::vector >* writerVectors); + // mustAwaitCommit is an out-parameter and indicates that at least one of the ops + // in 'ops' had j:true. + void fillWriterVectors(const std::deque& ops, + std::vector< std::vector >* writerVectors, + bool* mustAwaitCommit); void handleSlaveDelay(const BSONObj& op); // persistent pool of worker threads for writing ops to the databases diff --git a/src/mongo/db/write_concern.cpp b/src/mongo/db/write_concern.cpp index 7bb8edd0fa2..a16352b34fc 100644 --- a/src/mongo/db/write_concern.cpp +++ b/src/mongo/db/write_concern.cpp @@ -62,18 +62,6 @@ namespace mongo { } } - namespace { - // The consensus protocol requires that w: majority implies j: true on all nodes. - void addJournalSyncForWMajority(WriteConcernOptions* writeConcern) { - if (repl::getGlobalReplicationCoordinator()->isV1ElectionProtocol() - && writeConcern->wMode == WriteConcernOptions::kMajority - && writeConcern->syncMode == WriteConcernOptions::NONE) - { - writeConcern->syncMode = WriteConcernOptions::JOURNAL; - } - } - } // namespace - StatusWith extractWriteConcern(const BSONObj& cmdObj) { // The default write concern if empty is w : 1 // Specifying w : 0 is/was allowed, but is interpreted identically to w : 1 @@ -82,8 +70,6 @@ namespace mongo { if (writeConcern.wNumNodes == 0 && writeConcern.wMode.empty()) { writeConcern.wNumNodes = 1; } - // Upgrade default write concern if necessary. - addJournalSyncForWMajority(&writeConcern); BSONElement writeConcernElement; Status wcStatus = bsonExtractTypedField(cmdObj, @@ -114,9 +100,6 @@ namespace mongo { return wcStatus; } - // Upgrade parsed write concern if necessary. - addJournalSyncForWMajority(&writeConcern); - return writeConcern; } @@ -147,7 +130,7 @@ namespace mongo { if ( replMode != repl::ReplicationCoordinator::modeReplSet && !writeConcern.wMode.empty() && - writeConcern.wMode != WriteConcernOptions::kMajority ) { + writeConcern.wMode != "majority" ) { return Status( ErrorCodes::BadValue, string( "cannot use non-majority 'w' mode " ) + writeConcern.wMode + " when a host is not a member of a replica set" ); diff --git a/src/mongo/db/write_concern_options.cpp b/src/mongo/db/write_concern_options.cpp index 993e373ec9b..50bf9cad040 100644 --- a/src/mongo/db/write_concern_options.cpp +++ b/src/mongo/db/write_concern_options.cpp @@ -39,8 +39,6 @@ namespace mongo { const BSONObj WriteConcernOptions::Acknowledged(BSON("w" << W_NORMAL)); const BSONObj WriteConcernOptions::Unacknowledged(BSON("w" << W_NONE)); - const string WriteConcernOptions::kMajority("majority"); - static const BSONField mongosSecondaryThrottleField("_secondaryThrottle", true); static const BSONField secondaryThrottleField("secondaryThrottle", true); static const BSONField writeConcernField("writeConcern"); diff --git a/src/mongo/db/write_concern_options.h b/src/mongo/db/write_concern_options.h index 945e88a4fca..445b6fab1db 100644 --- a/src/mongo/db/write_concern_options.h +++ b/src/mongo/db/write_concern_options.h @@ -46,8 +46,6 @@ namespace mongo { static const BSONObj Acknowledged; static const BSONObj Unacknowledged; - static const std::string kMajority; // = "majority" - WriteConcernOptions() { reset(); } WriteConcernOptions(int numNodes, -- cgit v1.2.1