summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorSiyuan Zhou <siyuan.zhou@mongodb.com>2015-04-29 20:28:39 -0400
committerSiyuan Zhou <siyuan.zhou@mongodb.com>2015-04-29 20:28:39 -0400
commit66a48710fcdfa9783d44f1b467a7bceb6df7dcd2 (patch)
tree3c3e13cc68fa4fb493208a9ad1186cb057ca87e3 /src/mongo/db
parent5c4824de27af185aa584aee61d4c4cf5e9b79ff5 (diff)
downloadmongo-66a48710fcdfa9783d44f1b467a7bceb6df7dcd2.tar.gz
Revert "SERVER-5218 Batch oplog writes always wait for journal."
This reverts commit 27a8cb772d9e876fc7549a2c6ce3c1a62a4a394f.
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/commands/cleanup_orphaned_cmd.cpp2
-rw-r--r--src/mongo/db/commands/write_commands/batch_executor.cpp34
-rw-r--r--src/mongo/db/commands/write_commands/batch_executor.h2
-rw-r--r--src/mongo/db/range_deleter.cpp2
-rw-r--r--src/mongo/db/repl/initial_sync.h2
-rw-r--r--src/mongo/db/repl/oplog.cpp6
-rw-r--r--src/mongo/db/repl/replica_set_config.cpp4
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp4
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_test.cpp10
-rw-r--r--src/mongo/db/repl/sync_tail.cpp21
-rw-r--r--src/mongo/db/repl/sync_tail.h7
-rw-r--r--src/mongo/db/write_concern.cpp19
-rw-r--r--src/mongo/db/write_concern_options.cpp2
-rw-r--r--src/mongo/db/write_concern_options.h2
14 files changed, 72 insertions, 45 deletions
diff --git a/src/mongo/db/commands/cleanup_orphaned_cmd.cpp b/src/mongo/db/commands/cleanup_orphaned_cmd.cpp
index a6ff2b90a6d..89f57103958 100644
--- a/src/mongo/db/commands/cleanup_orphaned_cmd.cpp
+++ b/src/mongo/db/commands/cleanup_orphaned_cmd.cpp
@@ -53,7 +53,7 @@ namespace {
using mongo::WriteConcernOptions;
const int kDefaultWTimeoutMs = 60 * 1000;
- const WriteConcernOptions DefaultWriteConcern(WriteConcernOptions::kMajority,
+ const WriteConcernOptions DefaultWriteConcern("majority",
WriteConcernOptions::NONE,
kDefaultWTimeoutMs);
}
diff --git a/src/mongo/db/commands/write_commands/batch_executor.cpp b/src/mongo/db/commands/write_commands/batch_executor.cpp
index 1c12d272270..ae57fdbecbb 100644
--- a/src/mongo/db/commands/write_commands/batch_executor.cpp
+++ b/src/mongo/db/commands/write_commands/batch_executor.cpp
@@ -735,13 +735,32 @@ namespace mongo {
std::vector<BatchedUpsertDetail*>* upsertedIds,
std::vector<WriteErrorDetail*>* errors ) {
+ WriteConcernOptions originalWC = _txn->getWriteConcern();
+
+ // We adjust the write concern attached to the OperationContext to not wait for
+ // journal. Later, the code will restore the write concern to wait for journal on
+ // the last write of the batch.
+ if (request.sizeWriteOps() > 1
+ && originalWC.syncMode == WriteConcernOptions::JOURNAL)
+ {
+ WriteConcernOptions writeConcern = originalWC;
+ writeConcern.syncMode = WriteConcernOptions::NONE;
+ _txn->setWriteConcern(writeConcern);
+ }
+
if ( request.getBatchType() == BatchedCommandRequest::BatchType_Insert ) {
- execInserts( request, errors );
+ execInserts( request, originalWC, errors );
}
else if ( request.getBatchType() == BatchedCommandRequest::BatchType_Update ) {
for ( size_t i = 0; i < request.sizeWriteOps(); i++ ) {
if ( i + 1 == request.sizeWriteOps() ) {
+ // For the last write in the batch, restore the write concern back to the
+ // original provided one; this may set WriteConcernOptions::JOURNAL back
+ // to true.
+ _txn->setWriteConcern(originalWC);
+ // Use the original write concern to possibly await the commit of this write,
+ // in order to flush the journal as requested.
setupSynchronousCommit( _txn );
}
@@ -768,6 +787,12 @@ namespace mongo {
for ( size_t i = 0; i < request.sizeWriteOps(); i++ ) {
if ( i + 1 == request.sizeWriteOps() ) {
+ // For the last write in the batch, restore the write concern back to the
+ // original provided one; this may set WriteConcernOptions::JOURNAL back
+ // to true.
+ _txn->setWriteConcern(originalWC);
+ // Use the original write concern to possibly await the commit of this write,
+ // in order to flush the journal as requested.
setupSynchronousCommit( _txn );
}
@@ -812,6 +837,7 @@ namespace mongo {
}
void WriteBatchExecutor::execInserts( const BatchedCommandRequest& request,
+ const WriteConcernOptions& originalWC,
std::vector<WriteErrorDetail*>* errors ) {
// Theory of operation:
@@ -856,6 +882,12 @@ namespace mongo {
++state.currIndex) {
if (state.currIndex + 1 == state.request->sizeWriteOps()) {
+ // For the last write in the batch, restore the write concern back to the
+ // original provided one; this may set WriteConcernOptions::JOURNAL back
+ // to true.
+ _txn->setWriteConcern(originalWC);
+ // Use the original write concern to possibly await the commit of this write,
+ // in order to flush the journal as requested.
setupSynchronousCommit(_txn);
}
diff --git a/src/mongo/db/commands/write_commands/batch_executor.h b/src/mongo/db/commands/write_commands/batch_executor.h
index 6216ae65c89..e5d55a5b9fb 100644
--- a/src/mongo/db/commands/write_commands/batch_executor.h
+++ b/src/mongo/db/commands/write_commands/batch_executor.h
@@ -33,6 +33,7 @@
#include "mongo/base/disallow_copying.h"
#include "mongo/db/ops/update_request.h"
+#include "mongo/db/write_concern_options.h"
#include "mongo/s/write_ops/batched_command_request.h"
#include "mongo/s/write_ops/batched_command_response.h"
#include "mongo/s/write_ops/batched_delete_document.h"
@@ -94,6 +95,7 @@ namespace mongo {
* times.
*/
void execInserts( const BatchedCommandRequest& request,
+ const WriteConcernOptions& originalWC,
std::vector<WriteErrorDetail*>* errors );
/**
diff --git a/src/mongo/db/range_deleter.cpp b/src/mongo/db/range_deleter.cpp
index bde2e3ad36c..e754fb9fb02 100644
--- a/src/mongo/db/range_deleter.cpp
+++ b/src/mongo/db/range_deleter.cpp
@@ -269,7 +269,7 @@ namespace {
const int kWTimeoutMillis = 60 * 60 * 1000;
bool _waitForMajority(OperationContext* txn, std::string* errMsg) {
- const WriteConcernOptions writeConcern(WriteConcernOptions::kMajority,
+ const WriteConcernOptions writeConcern("majority",
WriteConcernOptions::NONE,
kWTimeoutMillis);
diff --git a/src/mongo/db/repl/initial_sync.h b/src/mongo/db/repl/initial_sync.h
index bf5800d4643..2cf41660d94 100644
--- a/src/mongo/db/repl/initial_sync.h
+++ b/src/mongo/db/repl/initial_sync.h
@@ -48,7 +48,7 @@ namespace repl {
*/
void oplogApplication(OperationContext* txn, const Timestamp& endOpTime);
- // Initial sync will ignore all journal requirement flags and doesn't await commit
+ // Initial sync will ignore all journal requirement flags and dones't await commit
// before updating last OpTime.
virtual bool supportsAwaitingCommit() { return false; }
};
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index 15f51bee56d..7032cf1994d 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -302,6 +302,12 @@ namespace {
b.appendBool("fromMigrate", true);
}
+ if (txn->getWriteConcern().shouldWaitForOtherNodes()
+ && txn->getWriteConcern().syncMode == WriteConcernOptions::JOURNAL)
+ {
+ b.appendBool("j", true);
+ }
+
if ( o2 ) {
b.append("o2", *o2);
}
diff --git a/src/mongo/db/repl/replica_set_config.cpp b/src/mongo/db/repl/replica_set_config.cpp
index a4e632ccccc..06c27161bc5 100644
--- a/src/mongo/db/repl/replica_set_config.cpp
+++ b/src/mongo/db/repl/replica_set_config.cpp
@@ -359,7 +359,7 @@ namespace {
}
}
else {
- if (WriteConcernOptions::kMajority != _defaultWriteConcern.wMode &&
+ if ("majority" != _defaultWriteConcern.wMode &&
!findCustomWriteMode(_defaultWriteConcern.wMode).isOK()) {
return Status(ErrorCodes::BadValue, str::stream() <<
"Default write concern requires undefined write mode " <<
@@ -377,7 +377,7 @@ namespace {
Status ReplicaSetConfig::checkIfWriteConcernCanBeSatisfied(
const WriteConcernOptions& writeConcern) const {
- if (!writeConcern.wMode.empty() && writeConcern.wMode != WriteConcernOptions::kMajority) {
+ if (!writeConcern.wMode.empty() && writeConcern.wMode != "majority") {
StatusWith<ReplicaSetTagPattern> tagPatternStatus =
findCustomWriteMode(writeConcern.wMode);
if (!tagPatternStatus.isOK()) {
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 778e64e2c27..bc137be8ddc 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -858,7 +858,7 @@ namespace {
if (!writeConcern.wMode.empty()) {
StringData patternName;
- if (writeConcern.wMode == WriteConcernOptions::kMajority) {
+ if (writeConcern.wMode == "majority") {
patternName = ReplicaSetConfig::kMajorityWriteConcernModeName;
}
else {
@@ -959,7 +959,7 @@ namespace {
return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
}
- if (replMode == modeMasterSlave && writeConcern.wMode == WriteConcernOptions::kMajority) {
+ if (replMode == modeMasterSlave && writeConcern.wMode == "majority") {
// with master/slave, majority is equivalent to w=1
return StatusAndDuration(Status::OK(), Milliseconds(timer->millis()));
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index 6411cb8ea09..d7fcffc6ed6 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -432,7 +432,7 @@ namespace {
writeConcern.wNumNodes = 0;
- writeConcern.wMode = WriteConcernOptions::kMajority;
+ writeConcern.wMode = "majority";
// w:majority always works on master/slave
ReplicationCoordinator::StatusAndDuration statusAndDur = getReplCoord()->awaitReplication(
&txn, time, writeConcern);
@@ -574,7 +574,7 @@ namespace {
// Set up valid write concerns for the rest of the test
WriteConcernOptions majorityWriteConcern;
majorityWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- majorityWriteConcern.wMode = WriteConcernOptions::kMajority;
+ majorityWriteConcern.wMode = "majority";
WriteConcernOptions multiDCWriteConcern;
multiDCWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
@@ -1817,7 +1817,7 @@ namespace {
// majority nodes waiting for time
WriteConcernOptions writeConcern;
writeConcern.wTimeout = WriteConcernOptions::kNoTimeout;
- writeConcern.wMode = WriteConcernOptions::kMajority;
+ writeConcern.wMode = "majority";
ReplicationAwaiter awaiter(getReplCoord(), &txn);
awaiter.setOpTime(time);
@@ -1827,7 +1827,7 @@ namespace {
// demonstrate that majority cannot currently be satisfied
WriteConcernOptions writeConcern2;
writeConcern2.wTimeout = WriteConcernOptions::kNoWaiting;
- writeConcern2.wMode = WriteConcernOptions::kMajority;
+ writeConcern2.wMode = "majority";
ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
getReplCoord()->awaitReplication(&txn, time, writeConcern2).status);
@@ -1884,7 +1884,7 @@ namespace {
WriteConcernOptions majorityWriteConcern;
majorityWriteConcern.wTimeout = WriteConcernOptions::kNoWaiting;
- majorityWriteConcern.wMode = WriteConcernOptions::kMajority;
+ majorityWriteConcern.wMode = "majority";
ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit,
getReplCoord()->awaitReplication(&txn, time, majorityWriteConcern).status);
diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp
index 3c6dd40838e..5a997a32a09 100644
--- a/src/mongo/db/repl/sync_tail.cpp
+++ b/src/mongo/db/repl/sync_tail.cpp
@@ -53,7 +53,6 @@
#include "mongo/db/repl/minvalid.h"
#include "mongo/db/repl/oplog.h"
#include "mongo/db/repl/repl_client_info.h"
-#include "mongo/db/repl/replica_set_config.h"
#include "mongo/db/repl/replication_coordinator_global.h"
#include "mongo/db/stats/timer_stats.h"
#include "mongo/util/exit.h"
@@ -277,8 +276,9 @@ namespace repl {
}
std::vector< std::vector<BSONObj> > writerVectors(replWriterThreadCount);
+ bool mustAwaitCommit = false;
- fillWriterVectors(ops, &writerVectors);
+ fillWriterVectors(ops, &writerVectors, &mustAwaitCommit);
LOG(2) << "replication batch size is " << ops.size() << endl;
// We must grab this because we're going to grab write locks later.
// We hold this mutex the entire time we're writing; it doesn't matter
@@ -302,14 +302,12 @@ namespace repl {
return Timestamp();
}
- const bool mustAwaitCommit = replCoord->isV1ElectionProtocol() && supportsAwaitingCommit();
- if (mustAwaitCommit) {
+ if (supportsAwaitingCommit() && mustAwaitCommit) {
txn->recoveryUnit()->goingToAwaitCommit();
}
-
Timestamp lastOpTime = writeOpsToOplog(txn, ops);
-
- if (mustAwaitCommit) {
+ // Wait for journal before setting last op time if any op in batch had j:true
+ if (supportsAwaitingCommit() && mustAwaitCommit) {
txn->recoveryUnit()->awaitCommit();
}
ReplClientInfo::forClient(txn->getClient()).setLastOp(lastOpTime);
@@ -322,7 +320,8 @@ namespace repl {
}
void SyncTail::fillWriterVectors(const std::deque<BSONObj>& ops,
- std::vector< std::vector<BSONObj> >* writerVectors) {
+ std::vector< std::vector<BSONObj> >* writerVectors,
+ bool* mustAwaitCommit) {
for (std::deque<BSONObj>::const_iterator it = ops.begin();
it != ops.end();
@@ -336,6 +335,12 @@ namespace repl {
const char* opType = it->getField( "op" ).valuestrsafe();
+ // Check if any entry needs journaling, and if so return the need
+ const bool foundJournal = it->getField("j").trueValue();
+ if (foundJournal) {
+ *mustAwaitCommit = true;
+ }
+
if (getGlobalServiceContext()->getGlobalStorageEngine()->supportsDocLocking() &&
isCrudOpType(opType)) {
BSONElement id;
diff --git a/src/mongo/db/repl/sync_tail.h b/src/mongo/db/repl/sync_tail.h
index 8d0f79ced7c..b2bad4c4828 100644
--- a/src/mongo/db/repl/sync_tail.h
+++ b/src/mongo/db/repl/sync_tail.h
@@ -128,8 +128,11 @@ namespace repl {
// Doles out all the work to the writer pool threads and waits for them to complete
void applyOps(const std::vector< std::vector<BSONObj> >& writerVectors);
- void fillWriterVectors(const std::deque<BSONObj>& ops,
- std::vector< std::vector<BSONObj> >* writerVectors);
+ // mustAwaitCommit is an out-parameter and indicates that at least one of the ops
+ // in 'ops' had j:true.
+ void fillWriterVectors(const std::deque<BSONObj>& ops,
+ std::vector< std::vector<BSONObj> >* writerVectors,
+ bool* mustAwaitCommit);
void handleSlaveDelay(const BSONObj& op);
// persistent pool of worker threads for writing ops to the databases
diff --git a/src/mongo/db/write_concern.cpp b/src/mongo/db/write_concern.cpp
index 7bb8edd0fa2..a16352b34fc 100644
--- a/src/mongo/db/write_concern.cpp
+++ b/src/mongo/db/write_concern.cpp
@@ -62,18 +62,6 @@ namespace mongo {
}
}
- namespace {
- // The consensus protocol requires that w: majority implies j: true on all nodes.
- void addJournalSyncForWMajority(WriteConcernOptions* writeConcern) {
- if (repl::getGlobalReplicationCoordinator()->isV1ElectionProtocol()
- && writeConcern->wMode == WriteConcernOptions::kMajority
- && writeConcern->syncMode == WriteConcernOptions::NONE)
- {
- writeConcern->syncMode = WriteConcernOptions::JOURNAL;
- }
- }
- } // namespace
-
StatusWith<WriteConcernOptions> extractWriteConcern(const BSONObj& cmdObj) {
// The default write concern if empty is w : 1
// Specifying w : 0 is/was allowed, but is interpreted identically to w : 1
@@ -82,8 +70,6 @@ namespace mongo {
if (writeConcern.wNumNodes == 0 && writeConcern.wMode.empty()) {
writeConcern.wNumNodes = 1;
}
- // Upgrade default write concern if necessary.
- addJournalSyncForWMajority(&writeConcern);
BSONElement writeConcernElement;
Status wcStatus = bsonExtractTypedField(cmdObj,
@@ -114,9 +100,6 @@ namespace mongo {
return wcStatus;
}
- // Upgrade parsed write concern if necessary.
- addJournalSyncForWMajority(&writeConcern);
-
return writeConcern;
}
@@ -147,7 +130,7 @@ namespace mongo {
if ( replMode != repl::ReplicationCoordinator::modeReplSet &&
!writeConcern.wMode.empty() &&
- writeConcern.wMode != WriteConcernOptions::kMajority ) {
+ writeConcern.wMode != "majority" ) {
return Status( ErrorCodes::BadValue,
string( "cannot use non-majority 'w' mode " ) + writeConcern.wMode
+ " when a host is not a member of a replica set" );
diff --git a/src/mongo/db/write_concern_options.cpp b/src/mongo/db/write_concern_options.cpp
index 993e373ec9b..50bf9cad040 100644
--- a/src/mongo/db/write_concern_options.cpp
+++ b/src/mongo/db/write_concern_options.cpp
@@ -39,8 +39,6 @@ namespace mongo {
const BSONObj WriteConcernOptions::Acknowledged(BSON("w" << W_NORMAL));
const BSONObj WriteConcernOptions::Unacknowledged(BSON("w" << W_NONE));
- const string WriteConcernOptions::kMajority("majority");
-
static const BSONField<bool> mongosSecondaryThrottleField("_secondaryThrottle", true);
static const BSONField<bool> secondaryThrottleField("secondaryThrottle", true);
static const BSONField<BSONObj> writeConcernField("writeConcern");
diff --git a/src/mongo/db/write_concern_options.h b/src/mongo/db/write_concern_options.h
index 945e88a4fca..445b6fab1db 100644
--- a/src/mongo/db/write_concern_options.h
+++ b/src/mongo/db/write_concern_options.h
@@ -46,8 +46,6 @@ namespace mongo {
static const BSONObj Acknowledged;
static const BSONObj Unacknowledged;
- static const std::string kMajority; // = "majority"
-
WriteConcernOptions() { reset(); }
WriteConcernOptions(int numNodes,