diff options
author | Daniel Gottlieb <daniel.gottlieb@mongodb.com> | 2017-11-30 09:39:51 -0500 |
---|---|---|
committer | Daniel Gottlieb <daniel.gottlieb@mongodb.com> | 2017-11-30 09:39:51 -0500 |
commit | 887387753d8a6a07825fca302a8c41142b8f79af (patch) | |
tree | 60d1655d35f2024c88355dbbeb6a3755b5ab12af | |
parent | 4b9362092b1a3b65c1aa2b3cc00bcc7fd1da7040 (diff) | |
download | mongo-887387753d8a6a07825fca302a8c41142b8f79af.tar.gz |
Revert "SERVER-32022: Allow disabling majority reads."
This reverts commit 4b9362092b1a3b65c1aa2b3cc00bcc7fd1da7040. This patch is
instead planned for 3.6.1.
-rw-r--r-- | src/mongo/db/mongod_options.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_recovery.cpp | 12 | ||||
-rw-r--r-- | src/mongo/db/repl/sync_tail.cpp | 31 | ||||
-rw-r--r-- | src/mongo/db/storage/kv/kv_engine.h | 5 | ||||
-rw-r--r-- | src/mongo/db/storage/kv/kv_storage_engine.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/storage/kv/kv_storage_engine.h | 2 | ||||
-rw-r--r-- | src/mongo/db/storage/storage_engine.h | 8 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp | 27 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h | 16 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp | 21 |
12 files changed, 32 insertions, 115 deletions
diff --git a/src/mongo/db/mongod_options.cpp b/src/mongo/db/mongod_options.cpp index 1ced8d3ed0f..4360c0e155b 100644 --- a/src/mongo/db/mongod_options.cpp +++ b/src/mongo/db/mongod_options.cpp @@ -1201,15 +1201,6 @@ Status storeMongodOptions(const moe::Environment& params) { if (clusterRoleParam == "configsvr") { serverGlobalParams.clusterRole = ClusterRole::ConfigServer; - if (params.count("replication.enableMajorityReadConcern") && - !params["replication.enableMajorityReadConcern"].as<bool>()) { - warning() - << "Config servers require majority read concern, but it was explicitly " - "disabled. The override is being ignored and the process is continuing " - "with majority read concern enabled."; - } - serverGlobalParams.enableMajorityReadConcern = true; - // If we haven't explicitly specified a journal option, default journaling to true for // the config server role if (!params.count("storage.journal.enabled")) { diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp index 34d01bb71f8..a49b3b1be38 100644 --- a/src/mongo/db/repl/oplog.cpp +++ b/src/mongo/db/repl/oplog.cpp @@ -1112,8 +1112,7 @@ Status applyOperation_inlock(OperationContext* opCtx, const bool assignOperationTimestamp = !opCtx->writesAreReplicated() && !haveWrappingWriteUnitOfWork && fieldTs && getGlobalReplicationCoordinator()->getReplicationMode() != - ReplicationCoordinator::modeMasterSlave && - serverGlobalParams.enableMajorityReadConcern; + ReplicationCoordinator::modeMasterSlave; if (*opType == 'i') { if (requestNss.isSystemDotIndexes()) { @@ -1158,15 +1157,12 @@ Status applyOperation_inlock(OperationContext* opCtx, while (true) { auto oElem = fieldOIt.next(); auto tsElem = fieldTsIt.next(); - SnapshotName timestamp; - if (assignOperationTimestamp) { - timestamp = SnapshotName(tsElem.timestamp()); - } auto tElem = fieldTIt.next(); // Note: we don't care about statement ids here since the secondaries don't create // their own oplog entries. - insertObjs.emplace_back(oElem.Obj(), timestamp, tElem.Long()); + insertObjs.emplace_back( + oElem.Obj(), SnapshotName(tsElem.timestamp()), tElem.Long()); if (!fieldOIt.more()) { // Make sure arrays are the same length. uassert(ErrorCodes::OperationFailed, diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp index fd6496746ae..6b416de6e3e 100644 --- a/src/mongo/db/repl/replication_recovery.cpp +++ b/src/mongo/db/repl/replication_recovery.cpp @@ -140,18 +140,6 @@ void ReplicationRecoveryImpl::recoverFromOplog(OperationContext* opCtx) try { } if (auto startPoint = _getOplogApplicationStartPoint(checkpointTimestamp, appliedThrough)) { - // When `recoverFromOplog` truncates the oplog, that also happens to set the "oldest - // timestamp" to the truncation point[1]. `_applyToEndOfOplog` will then perform writes - // before the truncation point. Doing so violates the constraint that all updates must be - // timestamped newer than the "oldest timestamp". This call will move the "oldest - // timestamp" back to the `startPoint`. - // - // [1] This is arguably incorrect. On rollback for nodes that are not keeping history to - // the "majority point", the "oldest timestamp" likely needs to go back in time. The - // oplog's `cappedTruncateAfter` method was a convenient location for this logic, which, - // unfortunately, conflicts with the usage above. - opCtx->getServiceContext()->getGlobalStorageEngine()->setOldestTimestamp( - SnapshotName(startPoint.get())); _applyToEndOfOplog(opCtx, startPoint.get(), topOfOplog->getTimestamp()); } diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp index 941a272ef1c..f3af941fbc5 100644 --- a/src/mongo/db/repl/sync_tail.cpp +++ b/src/mongo/db/repl/sync_tail.cpp @@ -71,7 +71,6 @@ #include "mongo/db/session.h" #include "mongo/db/session_txn_record_gen.h" #include "mongo/db/stats/timer_stats.h" -#include "mongo/db/storage/recovery_unit.h" #include "mongo/stdx/memory.h" #include "mongo/util/exit.h" #include "mongo/util/fail_point_service.h" @@ -535,6 +534,7 @@ void scheduleWritesToOplog(OperationContext* opCtx, return; } + const size_t numOplogThreads = threadPool->getNumThreads(); const size_t numOpsPerThread = ops.size() / numOplogThreads; for (size_t thread = 0; thread < numOplogThreads; thread++) { @@ -1511,36 +1511,9 @@ StatusWith<OpTime> multiApply(OperationContext* opCtx, std::vector<Status> statusVector(workerPool->getNumThreads(), Status::OK()); { - const bool pinOldestTimestamp = !serverGlobalParams.enableMajorityReadConcern; - std::unique_ptr<RecoveryUnit> pinningTransaction; - if (pinOldestTimestamp) { - // If `enableMajorityReadConcern` is false, storage aggressively trims - // history. Documents may not be inserted before the cutoff point. This piece will pin - // the "oldest timestamp" until after the batch is fully applied. - // - // When `enableMajorityReadConcern` is false, storage sets the "oldest timestamp" to - // the "get all committed" timestamp. Opening a transaction and setting its timestamp - // to first oplog entry's timestamp will prevent the "get all committed" timestamp - // from advancing. - // - // This transaction will be aborted after all writes from the batch of operations are - // complete. Aborting the transaction allows the "get all committed" point to be - // move forward. - pinningTransaction = std::unique_ptr<RecoveryUnit>( - opCtx->getServiceContext()->getGlobalStorageEngine()->newRecoveryUnit()); - pinningTransaction->beginUnitOfWork(opCtx); - fassertStatusOK( - 50658, pinningTransaction->setTimestamp(SnapshotName(ops.front().getTimestamp()))); - } - // We must wait for the all work we've dispatched to complete before leaving this block // because the spawned threads refer to objects on the stack - ON_BLOCK_EXIT([&] { - workerPool->join(); - if (pinOldestTimestamp) { - pinningTransaction->abortUnitOfWork(); - } - }); + ON_BLOCK_EXIT([&] { workerPool->join(); }); // Write batch of ops into oplog. consistencyMarkers->setOplogTruncateAfterPoint(opCtx, ops.front().getTimestamp()); diff --git a/src/mongo/db/storage/kv/kv_engine.h b/src/mongo/db/storage/kv/kv_engine.h index 97237071654..3350971bddc 100644 --- a/src/mongo/db/storage/kv/kv_engine.h +++ b/src/mongo/db/storage/kv/kv_engine.h @@ -257,11 +257,6 @@ public: virtual void setInitialDataTimestamp(SnapshotName initialDataTimestamp) {} /** - * See `StorageEngine::setOldestTimestamp` - */ - virtual void setOldestTimestamp(SnapshotName oldestTimestamp) {} - - /** * See `StorageEngine::supportsRecoverToStableTimestamp` */ virtual bool supportsRecoverToStableTimestamp() const { diff --git a/src/mongo/db/storage/kv/kv_storage_engine.cpp b/src/mongo/db/storage/kv/kv_storage_engine.cpp index 56f5dbf8a88..55d03fd0b52 100644 --- a/src/mongo/db/storage/kv/kv_storage_engine.cpp +++ b/src/mongo/db/storage/kv/kv_storage_engine.cpp @@ -372,10 +372,6 @@ void KVStorageEngine::setInitialDataTimestamp(SnapshotName initialDataTimestamp) _engine->setInitialDataTimestamp(initialDataTimestamp); } -void KVStorageEngine::setOldestTimestamp(SnapshotName oldestTimestamp) { - _engine->setOldestTimestamp(oldestTimestamp); -} - bool KVStorageEngine::supportsRecoverToStableTimestamp() const { return _engine->supportsRecoverToStableTimestamp(); } diff --git a/src/mongo/db/storage/kv/kv_storage_engine.h b/src/mongo/db/storage/kv/kv_storage_engine.h index 6ec8f714d62..580e1c7d069 100644 --- a/src/mongo/db/storage/kv/kv_storage_engine.h +++ b/src/mongo/db/storage/kv/kv_storage_engine.h @@ -116,8 +116,6 @@ public: virtual void setInitialDataTimestamp(SnapshotName initialDataTimestamp) override; - virtual void setOldestTimestamp(SnapshotName oldestTimestamp) override; - virtual bool supportsRecoverToStableTimestamp() const override; virtual void replicationBatchIsComplete() const override; diff --git a/src/mongo/db/storage/storage_engine.h b/src/mongo/db/storage/storage_engine.h index d47a6085765..7af4e45b942 100644 --- a/src/mongo/db/storage/storage_engine.h +++ b/src/mongo/db/storage/storage_engine.h @@ -329,14 +329,6 @@ public: virtual void setInitialDataTimestamp(SnapshotName snapshotName) {} /** - * Sets the oldest timestamp which the storage engine must maintain history - * through. Additionally, all future writes must be newer or equal than this value. This - * method is expected to be used in cases where the oldest timestamp should be set back in - * time. - */ - virtual void setOldestTimestamp(SnapshotName snapshotName) {} - - /** * Notifies the storage engine that a replication batch has completed. * This means that all the writes associated with the oplog entries in the batch are * finished and no new writes with timestamps associated with those oplog entries will show diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp index 3e91269c6f0..e16ddefb2ec 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp @@ -1027,40 +1027,15 @@ void WiredTigerKVEngine::setStableTimestamp(SnapshotName stableTimestamp) { // Communicate to WiredTiger that it can clean up timestamp data earlier than the // timestamp provided. No future queries will need point-in-time reads at a timestamp // prior to the one provided here. - advanceOldestTimestamp(stableTimestamp); + setOldestTimestamp(stableTimestamp); } } void WiredTigerKVEngine::setOldestTimestamp(SnapshotName oldestTimestamp) { - invariant(oldestTimestamp != SnapshotName()); - - char commitTSConfigString["force=true,oldest_timestamp=,commit_timestamp="_sd.size() + - (2 * 8 * 2) /* 8 hexadecimal characters */ + 1 /* trailing null */]; - auto size = std::snprintf(commitTSConfigString, - sizeof(commitTSConfigString), - "force=true,oldest_timestamp=%llx,commit_timestamp=%llx", - static_cast<unsigned long long>(oldestTimestamp.asU64()), - static_cast<unsigned long long>(oldestTimestamp.asU64())); - if (size < 0) { - int e = errno; - error() << "error snprintf " << errnoWithDescription(e); - fassertFailedNoTrace(40662); - } - - invariant(static_cast<std::size_t>(size) < sizeof(commitTSConfigString)); - invariantWTOK(_conn->set_timestamp(_conn, commitTSConfigString)); - - _oplogManager->setOplogReadTimestamp( - Timestamp(static_cast<unsigned long long>(oldestTimestamp.asU64()))); - LOG(1) << "Forced a new oldest_timestamp. Value: " << oldestTimestamp; -} - -void WiredTigerKVEngine::advanceOldestTimestamp(SnapshotName oldestTimestamp) { if (oldestTimestamp == SnapshotName()) { // No oldestTimestamp to set, yet. return; } - { stdx::unique_lock<stdx::mutex> lock(_oplogManagerMutex); if (!_oplogManager) { diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h index 6015d564d04..afb2a86c42b 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h @@ -217,17 +217,11 @@ public: } /** - * Callers to this method and `setOldestTimestamp` must be serialized. A "timestamping" - * MongoDB can be one of two modes: supporting majority reads or not. A node that supports - * majority reads will have its `oldest_timestamp` updates via replication calling - * `setStableTimestamp`. Nodes that do not support majority reads (master-slave or explicitly - * disabled) will call this method directly from the WiredTigerOplogManager background thread. - */ - void advanceOldestTimestamp(SnapshotName oldestTimestamp); - - /** - * Callers to this method and `advanceOldestTimestamp` must be serialized. This method will - * force the oldest timestamp to the input value. + * Callers to this method must be serialized. A "timestamping" MongoDB can be one of two + * modes: supporting majority reads or not. A node that supports majority reads will have its + * `oldest_timestamp` updates via replication calling `setStableTimestamp`. Nodes that do not + * support majority reads (master-slave or explicitly disabled) will call this method directly + * from the WiredTigerOplogManager background thread. */ void setOldestTimestamp(SnapshotName oldestTimestamp); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp index 73a9abe6e9b..3ad8cd506d1 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp @@ -202,7 +202,7 @@ void WiredTigerOplogManager::_oplogJournalThreadLoop(WiredTigerSessionCache* ses // data. This is also exercised when `majorityReadConcern` is disabled. SERVER-31802, // SERVER-32022. if (updateOldestTimestamp) { - sessionCache->getKVEngine()->advanceOldestTimestamp(SnapshotName(newTimestamp)); + sessionCache->getKVEngine()->setOldestTimestamp(SnapshotName(newTimestamp)); } } } diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp index 1c1636167bd..336b7d9d486 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp @@ -1645,7 +1645,26 @@ void WiredTigerRecordStore::cappedTruncateAfter(OperationContext* opCtx, // Immediately rewind visibility to our truncation point, to prevent new // transactions from appearing. Timestamp truncTs(lastKeptId.repr()); - _kvEngine->setOldestTimestamp(SnapshotName(truncTs)); + + + char commitTSConfigString["commit_timestamp="_sd.size() + + (8 * 2) /* 8 hexadecimal characters */ + 1 /* trailing null */]; + auto size = std::snprintf(commitTSConfigString, + sizeof(commitTSConfigString), + "commit_timestamp=%llx", + truncTs.asULL()); + if (size < 0) { + int e = errno; + error() << "error snprintf " << errnoWithDescription(e); + fassertFailedNoTrace(40662); + } + + invariant(static_cast<std::size_t>(size) < sizeof(commitTSConfigString)); + auto conn = WiredTigerRecoveryUnit::get(opCtx)->getSessionCache()->conn(); + invariantWTOK(conn->set_timestamp(conn, commitTSConfigString)); + + _kvEngine->getOplogManager()->setOplogReadTimestamp(truncTs); + LOG(1) << "truncation new read timestamp: " << truncTs; } if (_oplogStones) { |