summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Chen <andrew.chen@10gen.com>2020-02-14 16:30:47 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-02-14 22:04:06 +0000
commit0f54376c08d368ffcc80f65a0a4271cc0e6e865a (patch)
treeaf1037063af420dd1433782a842b40b821ee173f
parent379f283cf4e16d84c8d58f702b7b590b50e5a383 (diff)
downloadmongo-0f54376c08d368ffcc80f65a0a4271cc0e6e865a.tar.gz
SERVER-45975: Added --oplogMinRetention cli option
create mode 100644 jstests/noPassthrough/oplog_retention_hours.js
-rw-r--r--jstests/noPassthrough/oplog_retention_hours.js99
-rw-r--r--src/mongo/db/mongod_options.cpp10
-rw-r--r--src/mongo/db/mongod_options_storage.idl8
-rw-r--r--src/mongo/db/storage/storage_options.cpp1
-rw-r--r--src/mongo/db/storage/storage_options.h6
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp24
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h10
7 files changed, 149 insertions, 9 deletions
diff --git a/jstests/noPassthrough/oplog_retention_hours.js b/jstests/noPassthrough/oplog_retention_hours.js
new file mode 100644
index 00000000000..5e7bdb2457d
--- /dev/null
+++ b/jstests/noPassthrough/oplog_retention_hours.js
@@ -0,0 +1,99 @@
+/**
+ * When started with the --oplogMinRetentionHours flag, the server must enforce a minimum retention
+ * time (in hours) in addition to the implicit oplogSize for the oplog.
+ *
+ * Only when the oplog's size has exceeded the server's --oplogSize parameter AND the timestamp
+ * of the newest oplog entry in the oldest stone has fallen outside of the retention window do we
+ * remove the last stone.
+ *
+ * This test floods the oplog collection until it reaches --oplogSize, and then checks that the
+ * current size of the oplog is less than --oplogSize only after the minimum retention time has
+ * passed since inserting the first set of oplog entries
+ *
+ * @tags: [requires_journaling]
+ */
+(function() {
+"use strict";
+load("jstests/core/txns/libs/prepare_helpers.js");
+
+const doTest = () => {
+ const replSet = new ReplSetTest({
+ oplogSize: PrepareHelpers.oplogSizeMB,
+ // Oplog can be truncated each "sync" cycle. Increase its frequency to once per second.
+ nodeOptions: {syncdelay: 1, setParameter: {logComponentVerbosity: tojson({storage: 1})}},
+ nodes: 1
+ });
+ const oplogMinRetentionHours = 0.002777;
+ const minRetention = {oplogMinRetentionHours}; // 10 seconds
+ replSet.startSet(Object.assign(minRetention, PrepareHelpers.replSetStartSetOptions));
+ replSet.initiate();
+ const primary = replSet.getPrimary();
+ let oplogEntries = primary.getDB("local").getCollection("oplog.rs");
+
+ // ensure that oplog is not initially at capacity
+ assert.lt(oplogEntries.dataSize(), PrepareHelpers.oplogSizeBytes);
+
+ primary.startSession();
+
+ jsTestLog("Insert documents until oplog exceeds oplogSize");
+ const startTime = new Date();
+ PrepareHelpers.growOplogPastMaxSize(replSet);
+ // keep inserting docs until hasReplSetBeenTruncated returns true
+ InsertUntilPred(replSet, didReplSetTruncate, replSet);
+ const endTime = new Date();
+
+ const kNumMSInHour = 1000 * 60 * 60;
+ const truncationElapsedTime = (endTime - startTime) / kNumMSInHour;
+ assert.lte(oplogMinRetentionHours, truncationElapsedTime);
+
+ replSet.stopSet();
+};
+
+/**
+ * InsertUntilPred inserts documents into a single-node replica set until the predicate argument
+ * returns true.
+ *
+ * This helper takes in the following arguments:
+ *
+ * - replSet: A single-node replica set
+ *
+ * - pred: A function that returns a boolean statement. When this pred returns true, we stop
+ * inserting documents
+ *
+ * - args: A list of arguments that is passed into the predicate function argument as its
+ * arguments
+ */
+const InsertUntilPred = (replSet, pred, ...args) => {
+ const primary = replSet.getPrimary();
+ const oplog = primary.getDB("local").oplog.rs;
+ const coll = primary.getDB("insertUntilPred").growOplogPastMaxSize;
+ const numNodes = replSet.nodeList().length;
+ const tenKB = new Array(10 * 1024).join("a");
+
+ print(`Oplog on ${primary} dataSize = ${oplog.dataSize()}`);
+ assert.soon(
+ () => {
+ if (pred(...args)) {
+ jsTestLog("Predicate returned true, so we're done");
+ return true;
+ }
+
+ jsTestLog("Inserting a doc...");
+ // insert a doc if predicate is not true
+ assert.commandWorked(coll.insert({tenKB: tenKB}, {writeConcern: {w: numNodes}}));
+ return false;
+ },
+ `timeout occurred while waiting for predicate function to return true`,
+ ReplSetTest.kDefaultTimeoutMS,
+ 1000);
+};
+
+// checks if the oplog has been truncated
+const didReplSetTruncate = replSet => {
+ const oplogCol = replSet.getPrimary().getDB("local").oplog.rs;
+ // The oplog milestone system allows the oplog to grow to 110% its max size.
+ return oplogCol.dataSize() < 1.1 * PrepareHelpers.oplogSizeBytes;
+};
+
+doTest();
+})();
diff --git a/src/mongo/db/mongod_options.cpp b/src/mongo/db/mongod_options.cpp
index d6b4b733e51..9d0030df49e 100644
--- a/src/mongo/db/mongod_options.cpp
+++ b/src/mongo/db/mongod_options.cpp
@@ -528,6 +528,16 @@ Status storeMongodOptions(const moe::Environment& params) {
invariant(replSettings.getOplogSizeBytes() > 0);
}
+ if (params.count("storage.oplogMinRetentionHours")) {
+ storageGlobalParams.oplogMinRetentionHours.store(
+ params["storage.oplogMinRetentionHours"].as<double>());
+ if (storageGlobalParams.oplogMinRetentionHours.load() < 0) {
+ return Status(ErrorCodes::BadValue,
+ "bad --oplogMinRetentionHours, argument must be greater or equal to 0");
+ }
+ invariant(storageGlobalParams.oplogMinRetentionHours.load() >= 0);
+ }
+
if (params.count("cacheSize")) {
long x = params["cacheSize"].as<long>();
if (x <= 0) {
diff --git a/src/mongo/db/mongod_options_storage.idl b/src/mongo/db/mongod_options_storage.idl
index 8c51c9e7295..116ad5ecacf 100644
--- a/src/mongo/db/mongod_options_storage.idl
+++ b/src/mongo/db/mongod_options_storage.idl
@@ -122,3 +122,11 @@ configs:
deprecated_short_name: nodur
arg_vartype: Switch
source: [ cli, ini ]
+
+ 'storage.oplogMinRetentionHours':
+ description: 'Minimum number of hours to preserve in the oplog. Default is 0 (turned off). Fractions are allowed (e.g. 1.5 hours)'
+ short_name: oplogMinRetentionHours
+ arg_vartype: Double
+ default: 0.0
+ validator:
+ gte: 0.0
diff --git a/src/mongo/db/storage/storage_options.cpp b/src/mongo/db/storage/storage_options.cpp
index 5cc702e1302..f9eda312535 100644
--- a/src/mongo/db/storage/storage_options.cpp
+++ b/src/mongo/db/storage/storage_options.cpp
@@ -55,6 +55,7 @@ void StorageGlobalParams::reset() {
syncdelay = 60.0;
readOnly = false;
groupCollections = false;
+ oplogMinRetentionHours.store(0.0);
}
StorageGlobalParams storageGlobalParams;
diff --git a/src/mongo/db/storage/storage_options.h b/src/mongo/db/storage/storage_options.h
index ceec92d42fb..4202cbc10ef 100644
--- a/src/mongo/db/storage/storage_options.h
+++ b/src/mongo/db/storage/storage_options.h
@@ -110,6 +110,12 @@ struct StorageGlobalParams {
// workloads that rely heavily on creating many collections within a database.
bool groupCollections;
+ // --oplogMinRetentionHours
+ // Controls what size the oplog should be in addition to oplogSize. If set, the oplog will only
+ // be truncated if it is over the capped size, and if the bucket of oldest oplog entries fall
+ // outside of the retention window which is set by this option.
+ AtomicWord<double> oplogMinRetentionHours;
+
// Controls whether we allow the OplogStones mechanism to delete oplog history on WT.
bool allowOplogTruncation = true;
};
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
index 235cb8f057e..fd2c083edca 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
@@ -49,6 +49,7 @@
#include "mongo/db/namespace_string.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/repl/repl_settings.h"
+#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/server_recovery.h"
#include "mongo/db/service_context.h"
#include "mongo/db/storage/oplog_hack.h"
@@ -85,6 +86,8 @@ static const int kMaximumRecordStoreVersion = 1;
MONGO_STATIC_ASSERT(kCurrentRecordStoreVersion >= kMinimumRecordStoreVersion);
MONGO_STATIC_ASSERT(kCurrentRecordStoreVersion <= kMaximumRecordStoreVersion);
+const double kNumSecsInHour = 3600.0;
+
void checkOplogFormatVersion(OperationContext* opCtx, const std::string& uri) {
StatusWith<BSONObj> appMetadata = WiredTigerUtil::getApplicationMetadata(opCtx, uri);
fassert(39999, appMetadata);
@@ -226,6 +229,27 @@ void WiredTigerRecordStore::OplogStones::awaitHasExcessStonesOrDead() {
}
}
+bool WiredTigerRecordStore::OplogStones::hasExcessStones_inlock() const {
+ int64_t totalBytes = 0;
+ for (auto&& stone : _stones) {
+ totalBytes += stone.bytes;
+ }
+
+ // check that oplog stones is at capacity
+ if (totalBytes <= _rs->cappedMaxSize()) {
+ return false;
+ }
+
+ double minRetentionHours = storageGlobalParams.oplogMinRetentionHours.load();
+
+ auto rc = repl::ReplicationCoordinator::get(getGlobalServiceContext());
+ double lastAppliedTs = rc->getMyLastAppliedOpTime().getTimestamp().getSecs();
+ double lastStoneTs = Timestamp(_stones.front().lastRecord.repr()).getSecs();
+
+ double currRetentionHours = (lastAppliedTs - lastStoneTs) / kNumSecsInHour;
+ return currRetentionHours >= minRetentionHours;
+}
+
boost::optional<WiredTigerRecordStore::OplogStones::Stone>
WiredTigerRecordStore::OplogStones::peekOldestStoneIfNeeded() const {
stdx::lock_guard<Latch> lk(_mutex);
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h
index 0dcb844f1d0..15950177e74 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h
@@ -57,15 +57,7 @@ public:
void kill();
- bool hasExcessStones_inlock() const {
- int64_t total_bytes = 0;
- for (std::deque<OplogStones::Stone>::const_iterator it = _stones.begin();
- it != _stones.end();
- ++it) {
- total_bytes += it->bytes;
- }
- return total_bytes > _rs->cappedMaxSize();
- }
+ bool hasExcessStones_inlock() const;
void awaitHasExcessStonesOrDead();