summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Gottlieb <daniel.gottlieb@mongodb.com>2017-11-30 09:39:51 -0500
committerDaniel Gottlieb <daniel.gottlieb@mongodb.com>2017-11-30 09:39:51 -0500
commit887387753d8a6a07825fca302a8c41142b8f79af (patch)
tree60d1655d35f2024c88355dbbeb6a3755b5ab12af
parent4b9362092b1a3b65c1aa2b3cc00bcc7fd1da7040 (diff)
downloadmongo-887387753d8a6a07825fca302a8c41142b8f79af.tar.gz
Revert "SERVER-32022: Allow disabling majority reads."
This reverts commit 4b9362092b1a3b65c1aa2b3cc00bcc7fd1da7040. This patch is instead planned for 3.6.1.
-rw-r--r--src/mongo/db/mongod_options.cpp9
-rw-r--r--src/mongo/db/repl/oplog.cpp10
-rw-r--r--src/mongo/db/repl/replication_recovery.cpp12
-rw-r--r--src/mongo/db/repl/sync_tail.cpp31
-rw-r--r--src/mongo/db/storage/kv/kv_engine.h5
-rw-r--r--src/mongo/db/storage/kv/kv_storage_engine.cpp4
-rw-r--r--src/mongo/db/storage/kv/kv_storage_engine.h2
-rw-r--r--src/mongo/db/storage/storage_engine.h8
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp27
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h16
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp2
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp21
12 files changed, 32 insertions, 115 deletions
diff --git a/src/mongo/db/mongod_options.cpp b/src/mongo/db/mongod_options.cpp
index 1ced8d3ed0f..4360c0e155b 100644
--- a/src/mongo/db/mongod_options.cpp
+++ b/src/mongo/db/mongod_options.cpp
@@ -1201,15 +1201,6 @@ Status storeMongodOptions(const moe::Environment& params) {
if (clusterRoleParam == "configsvr") {
serverGlobalParams.clusterRole = ClusterRole::ConfigServer;
- if (params.count("replication.enableMajorityReadConcern") &&
- !params["replication.enableMajorityReadConcern"].as<bool>()) {
- warning()
- << "Config servers require majority read concern, but it was explicitly "
- "disabled. The override is being ignored and the process is continuing "
- "with majority read concern enabled.";
- }
- serverGlobalParams.enableMajorityReadConcern = true;
-
// If we haven't explicitly specified a journal option, default journaling to true for
// the config server role
if (!params.count("storage.journal.enabled")) {
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index 34d01bb71f8..a49b3b1be38 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -1112,8 +1112,7 @@ Status applyOperation_inlock(OperationContext* opCtx,
const bool assignOperationTimestamp = !opCtx->writesAreReplicated() &&
!haveWrappingWriteUnitOfWork && fieldTs &&
getGlobalReplicationCoordinator()->getReplicationMode() !=
- ReplicationCoordinator::modeMasterSlave &&
- serverGlobalParams.enableMajorityReadConcern;
+ ReplicationCoordinator::modeMasterSlave;
if (*opType == 'i') {
if (requestNss.isSystemDotIndexes()) {
@@ -1158,15 +1157,12 @@ Status applyOperation_inlock(OperationContext* opCtx,
while (true) {
auto oElem = fieldOIt.next();
auto tsElem = fieldTsIt.next();
- SnapshotName timestamp;
- if (assignOperationTimestamp) {
- timestamp = SnapshotName(tsElem.timestamp());
- }
auto tElem = fieldTIt.next();
// Note: we don't care about statement ids here since the secondaries don't create
// their own oplog entries.
- insertObjs.emplace_back(oElem.Obj(), timestamp, tElem.Long());
+ insertObjs.emplace_back(
+ oElem.Obj(), SnapshotName(tsElem.timestamp()), tElem.Long());
if (!fieldOIt.more()) {
// Make sure arrays are the same length.
uassert(ErrorCodes::OperationFailed,
diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp
index fd6496746ae..6b416de6e3e 100644
--- a/src/mongo/db/repl/replication_recovery.cpp
+++ b/src/mongo/db/repl/replication_recovery.cpp
@@ -140,18 +140,6 @@ void ReplicationRecoveryImpl::recoverFromOplog(OperationContext* opCtx) try {
}
if (auto startPoint = _getOplogApplicationStartPoint(checkpointTimestamp, appliedThrough)) {
- // When `recoverFromOplog` truncates the oplog, that also happens to set the "oldest
- // timestamp" to the truncation point[1]. `_applyToEndOfOplog` will then perform writes
- // before the truncation point. Doing so violates the constraint that all updates must be
- // timestamped newer than the "oldest timestamp". This call will move the "oldest
- // timestamp" back to the `startPoint`.
- //
- // [1] This is arguably incorrect. On rollback for nodes that are not keeping history to
- // the "majority point", the "oldest timestamp" likely needs to go back in time. The
- // oplog's `cappedTruncateAfter` method was a convenient location for this logic, which,
- // unfortunately, conflicts with the usage above.
- opCtx->getServiceContext()->getGlobalStorageEngine()->setOldestTimestamp(
- SnapshotName(startPoint.get()));
_applyToEndOfOplog(opCtx, startPoint.get(), topOfOplog->getTimestamp());
}
diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp
index 941a272ef1c..f3af941fbc5 100644
--- a/src/mongo/db/repl/sync_tail.cpp
+++ b/src/mongo/db/repl/sync_tail.cpp
@@ -71,7 +71,6 @@
#include "mongo/db/session.h"
#include "mongo/db/session_txn_record_gen.h"
#include "mongo/db/stats/timer_stats.h"
-#include "mongo/db/storage/recovery_unit.h"
#include "mongo/stdx/memory.h"
#include "mongo/util/exit.h"
#include "mongo/util/fail_point_service.h"
@@ -535,6 +534,7 @@ void scheduleWritesToOplog(OperationContext* opCtx,
return;
}
+
const size_t numOplogThreads = threadPool->getNumThreads();
const size_t numOpsPerThread = ops.size() / numOplogThreads;
for (size_t thread = 0; thread < numOplogThreads; thread++) {
@@ -1511,36 +1511,9 @@ StatusWith<OpTime> multiApply(OperationContext* opCtx,
std::vector<Status> statusVector(workerPool->getNumThreads(), Status::OK());
{
- const bool pinOldestTimestamp = !serverGlobalParams.enableMajorityReadConcern;
- std::unique_ptr<RecoveryUnit> pinningTransaction;
- if (pinOldestTimestamp) {
- // If `enableMajorityReadConcern` is false, storage aggressively trims
- // history. Documents may not be inserted before the cutoff point. This piece will pin
- // the "oldest timestamp" until after the batch is fully applied.
- //
- // When `enableMajorityReadConcern` is false, storage sets the "oldest timestamp" to
- // the "get all committed" timestamp. Opening a transaction and setting its timestamp
- // to first oplog entry's timestamp will prevent the "get all committed" timestamp
- // from advancing.
- //
- // This transaction will be aborted after all writes from the batch of operations are
- // complete. Aborting the transaction allows the "get all committed" point to be
- // move forward.
- pinningTransaction = std::unique_ptr<RecoveryUnit>(
- opCtx->getServiceContext()->getGlobalStorageEngine()->newRecoveryUnit());
- pinningTransaction->beginUnitOfWork(opCtx);
- fassertStatusOK(
- 50658, pinningTransaction->setTimestamp(SnapshotName(ops.front().getTimestamp())));
- }
-
// We must wait for the all work we've dispatched to complete before leaving this block
// because the spawned threads refer to objects on the stack
- ON_BLOCK_EXIT([&] {
- workerPool->join();
- if (pinOldestTimestamp) {
- pinningTransaction->abortUnitOfWork();
- }
- });
+ ON_BLOCK_EXIT([&] { workerPool->join(); });
// Write batch of ops into oplog.
consistencyMarkers->setOplogTruncateAfterPoint(opCtx, ops.front().getTimestamp());
diff --git a/src/mongo/db/storage/kv/kv_engine.h b/src/mongo/db/storage/kv/kv_engine.h
index 97237071654..3350971bddc 100644
--- a/src/mongo/db/storage/kv/kv_engine.h
+++ b/src/mongo/db/storage/kv/kv_engine.h
@@ -257,11 +257,6 @@ public:
virtual void setInitialDataTimestamp(SnapshotName initialDataTimestamp) {}
/**
- * See `StorageEngine::setOldestTimestamp`
- */
- virtual void setOldestTimestamp(SnapshotName oldestTimestamp) {}
-
- /**
* See `StorageEngine::supportsRecoverToStableTimestamp`
*/
virtual bool supportsRecoverToStableTimestamp() const {
diff --git a/src/mongo/db/storage/kv/kv_storage_engine.cpp b/src/mongo/db/storage/kv/kv_storage_engine.cpp
index 56f5dbf8a88..55d03fd0b52 100644
--- a/src/mongo/db/storage/kv/kv_storage_engine.cpp
+++ b/src/mongo/db/storage/kv/kv_storage_engine.cpp
@@ -372,10 +372,6 @@ void KVStorageEngine::setInitialDataTimestamp(SnapshotName initialDataTimestamp)
_engine->setInitialDataTimestamp(initialDataTimestamp);
}
-void KVStorageEngine::setOldestTimestamp(SnapshotName oldestTimestamp) {
- _engine->setOldestTimestamp(oldestTimestamp);
-}
-
bool KVStorageEngine::supportsRecoverToStableTimestamp() const {
return _engine->supportsRecoverToStableTimestamp();
}
diff --git a/src/mongo/db/storage/kv/kv_storage_engine.h b/src/mongo/db/storage/kv/kv_storage_engine.h
index 6ec8f714d62..580e1c7d069 100644
--- a/src/mongo/db/storage/kv/kv_storage_engine.h
+++ b/src/mongo/db/storage/kv/kv_storage_engine.h
@@ -116,8 +116,6 @@ public:
virtual void setInitialDataTimestamp(SnapshotName initialDataTimestamp) override;
- virtual void setOldestTimestamp(SnapshotName oldestTimestamp) override;
-
virtual bool supportsRecoverToStableTimestamp() const override;
virtual void replicationBatchIsComplete() const override;
diff --git a/src/mongo/db/storage/storage_engine.h b/src/mongo/db/storage/storage_engine.h
index d47a6085765..7af4e45b942 100644
--- a/src/mongo/db/storage/storage_engine.h
+++ b/src/mongo/db/storage/storage_engine.h
@@ -329,14 +329,6 @@ public:
virtual void setInitialDataTimestamp(SnapshotName snapshotName) {}
/**
- * Sets the oldest timestamp which the storage engine must maintain history
- * through. Additionally, all future writes must be newer or equal than this value. This
- * method is expected to be used in cases where the oldest timestamp should be set back in
- * time.
- */
- virtual void setOldestTimestamp(SnapshotName snapshotName) {}
-
- /**
* Notifies the storage engine that a replication batch has completed.
* This means that all the writes associated with the oplog entries in the batch are
* finished and no new writes with timestamps associated with those oplog entries will show
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
index 3e91269c6f0..e16ddefb2ec 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
@@ -1027,40 +1027,15 @@ void WiredTigerKVEngine::setStableTimestamp(SnapshotName stableTimestamp) {
// Communicate to WiredTiger that it can clean up timestamp data earlier than the
// timestamp provided. No future queries will need point-in-time reads at a timestamp
// prior to the one provided here.
- advanceOldestTimestamp(stableTimestamp);
+ setOldestTimestamp(stableTimestamp);
}
}
void WiredTigerKVEngine::setOldestTimestamp(SnapshotName oldestTimestamp) {
- invariant(oldestTimestamp != SnapshotName());
-
- char commitTSConfigString["force=true,oldest_timestamp=,commit_timestamp="_sd.size() +
- (2 * 8 * 2) /* 8 hexadecimal characters */ + 1 /* trailing null */];
- auto size = std::snprintf(commitTSConfigString,
- sizeof(commitTSConfigString),
- "force=true,oldest_timestamp=%llx,commit_timestamp=%llx",
- static_cast<unsigned long long>(oldestTimestamp.asU64()),
- static_cast<unsigned long long>(oldestTimestamp.asU64()));
- if (size < 0) {
- int e = errno;
- error() << "error snprintf " << errnoWithDescription(e);
- fassertFailedNoTrace(40662);
- }
-
- invariant(static_cast<std::size_t>(size) < sizeof(commitTSConfigString));
- invariantWTOK(_conn->set_timestamp(_conn, commitTSConfigString));
-
- _oplogManager->setOplogReadTimestamp(
- Timestamp(static_cast<unsigned long long>(oldestTimestamp.asU64())));
- LOG(1) << "Forced a new oldest_timestamp. Value: " << oldestTimestamp;
-}
-
-void WiredTigerKVEngine::advanceOldestTimestamp(SnapshotName oldestTimestamp) {
if (oldestTimestamp == SnapshotName()) {
// No oldestTimestamp to set, yet.
return;
}
-
{
stdx::unique_lock<stdx::mutex> lock(_oplogManagerMutex);
if (!_oplogManager) {
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
index 6015d564d04..afb2a86c42b 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
@@ -217,17 +217,11 @@ public:
}
/**
- * Callers to this method and `setOldestTimestamp` must be serialized. A "timestamping"
- * MongoDB can be one of two modes: supporting majority reads or not. A node that supports
- * majority reads will have its `oldest_timestamp` updates via replication calling
- * `setStableTimestamp`. Nodes that do not support majority reads (master-slave or explicitly
- * disabled) will call this method directly from the WiredTigerOplogManager background thread.
- */
- void advanceOldestTimestamp(SnapshotName oldestTimestamp);
-
- /**
- * Callers to this method and `advanceOldestTimestamp` must be serialized. This method will
- * force the oldest timestamp to the input value.
+ * Callers to this method must be serialized. A "timestamping" MongoDB can be one of two
+ * modes: supporting majority reads or not. A node that supports majority reads will have its
+ * `oldest_timestamp` updates via replication calling `setStableTimestamp`. Nodes that do not
+ * support majority reads (master-slave or explicitly disabled) will call this method directly
+ * from the WiredTigerOplogManager background thread.
*/
void setOldestTimestamp(SnapshotName oldestTimestamp);
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp
index 73a9abe6e9b..3ad8cd506d1 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp
@@ -202,7 +202,7 @@ void WiredTigerOplogManager::_oplogJournalThreadLoop(WiredTigerSessionCache* ses
// data. This is also exercised when `majorityReadConcern` is disabled. SERVER-31802,
// SERVER-32022.
if (updateOldestTimestamp) {
- sessionCache->getKVEngine()->advanceOldestTimestamp(SnapshotName(newTimestamp));
+ sessionCache->getKVEngine()->setOldestTimestamp(SnapshotName(newTimestamp));
}
}
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
index 1c1636167bd..336b7d9d486 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
@@ -1645,7 +1645,26 @@ void WiredTigerRecordStore::cappedTruncateAfter(OperationContext* opCtx,
// Immediately rewind visibility to our truncation point, to prevent new
// transactions from appearing.
Timestamp truncTs(lastKeptId.repr());
- _kvEngine->setOldestTimestamp(SnapshotName(truncTs));
+
+
+ char commitTSConfigString["commit_timestamp="_sd.size() +
+ (8 * 2) /* 8 hexadecimal characters */ + 1 /* trailing null */];
+ auto size = std::snprintf(commitTSConfigString,
+ sizeof(commitTSConfigString),
+ "commit_timestamp=%llx",
+ truncTs.asULL());
+ if (size < 0) {
+ int e = errno;
+ error() << "error snprintf " << errnoWithDescription(e);
+ fassertFailedNoTrace(40662);
+ }
+
+ invariant(static_cast<std::size_t>(size) < sizeof(commitTSConfigString));
+ auto conn = WiredTigerRecoveryUnit::get(opCtx)->getSessionCache()->conn();
+ invariantWTOK(conn->set_timestamp(conn, commitTSConfigString));
+
+ _kvEngine->getOplogManager()->setOplogReadTimestamp(truncTs);
+ LOG(1) << "truncation new read timestamp: " << truncTs;
}
if (_oplogStones) {