diff options
author | Andrew Chen <andrew.chen@10gen.com> | 2020-02-14 16:30:47 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-02-14 22:04:06 +0000 |
commit | 0f54376c08d368ffcc80f65a0a4271cc0e6e865a (patch) | |
tree | af1037063af420dd1433782a842b40b821ee173f | |
parent | 379f283cf4e16d84c8d58f702b7b590b50e5a383 (diff) | |
download | mongo-0f54376c08d368ffcc80f65a0a4271cc0e6e865a.tar.gz |
SERVER-45975: Added --oplogMinRetention cli option
create mode 100644 jstests/noPassthrough/oplog_retention_hours.js
-rw-r--r-- | jstests/noPassthrough/oplog_retention_hours.js | 99 | ||||
-rw-r--r-- | src/mongo/db/mongod_options.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/mongod_options_storage.idl | 8 | ||||
-rw-r--r-- | src/mongo/db/storage/storage_options.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/storage/storage_options.h | 6 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp | 24 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h | 10 |
7 files changed, 149 insertions, 9 deletions
diff --git a/jstests/noPassthrough/oplog_retention_hours.js b/jstests/noPassthrough/oplog_retention_hours.js new file mode 100644 index 00000000000..5e7bdb2457d --- /dev/null +++ b/jstests/noPassthrough/oplog_retention_hours.js @@ -0,0 +1,99 @@ +/** + * When started with the --oplogMinRetentionHours flag, the server must enforce a minimum retention + * time (in hours) in addition to the implicit oplogSize for the oplog. + * + * Only when the oplog's size has exceeded the server's --oplogSize parameter AND the timestamp + * of the newest oplog entry in the oldest stone has fallen outside of the retention window do we + * remove the last stone. + * + * This test floods the oplog collection until it reaches --oplogSize, and then checks that the + * current size of the oplog is less than --oplogSize only after the minimum retention time has + * passed since inserting the first set of oplog entries + * + * @tags: [requires_journaling] + */ +(function() { +"use strict"; +load("jstests/core/txns/libs/prepare_helpers.js"); + +const doTest = () => { + const replSet = new ReplSetTest({ + oplogSize: PrepareHelpers.oplogSizeMB, + // Oplog can be truncated each "sync" cycle. Increase its frequency to once per second. + nodeOptions: {syncdelay: 1, setParameter: {logComponentVerbosity: tojson({storage: 1})}}, + nodes: 1 + }); + const oplogMinRetentionHours = 0.002777; + const minRetention = {oplogMinRetentionHours}; // 10 seconds + replSet.startSet(Object.assign(minRetention, PrepareHelpers.replSetStartSetOptions)); + replSet.initiate(); + const primary = replSet.getPrimary(); + let oplogEntries = primary.getDB("local").getCollection("oplog.rs"); + + // ensure that oplog is not initially at capacity + assert.lt(oplogEntries.dataSize(), PrepareHelpers.oplogSizeBytes); + + primary.startSession(); + + jsTestLog("Insert documents until oplog exceeds oplogSize"); + const startTime = new Date(); + PrepareHelpers.growOplogPastMaxSize(replSet); + // keep inserting docs until hasReplSetBeenTruncated returns true + InsertUntilPred(replSet, didReplSetTruncate, replSet); + const endTime = new Date(); + + const kNumMSInHour = 1000 * 60 * 60; + const truncationElapsedTime = (endTime - startTime) / kNumMSInHour; + assert.lte(oplogMinRetentionHours, truncationElapsedTime); + + replSet.stopSet(); +}; + +/** + * InsertUntilPred inserts documents into a single-node replica set until the predicate argument + * returns true. + * + * This helper takes in the following arguments: + * + * - replSet: A single-node replica set + * + * - pred: A function that returns a boolean statement. When this pred returns true, we stop + * inserting documents + * + * - args: A list of arguments that is passed into the predicate function argument as its + * arguments + */ +const InsertUntilPred = (replSet, pred, ...args) => { + const primary = replSet.getPrimary(); + const oplog = primary.getDB("local").oplog.rs; + const coll = primary.getDB("insertUntilPred").growOplogPastMaxSize; + const numNodes = replSet.nodeList().length; + const tenKB = new Array(10 * 1024).join("a"); + + print(`Oplog on ${primary} dataSize = ${oplog.dataSize()}`); + assert.soon( + () => { + if (pred(...args)) { + jsTestLog("Predicate returned true, so we're done"); + return true; + } + + jsTestLog("Inserting a doc..."); + // insert a doc if predicate is not true + assert.commandWorked(coll.insert({tenKB: tenKB}, {writeConcern: {w: numNodes}})); + return false; + }, + `timeout occurred while waiting for predicate function to return true`, + ReplSetTest.kDefaultTimeoutMS, + 1000); +}; + +// checks if the oplog has been truncated +const didReplSetTruncate = replSet => { + const oplogCol = replSet.getPrimary().getDB("local").oplog.rs; + // The oplog milestone system allows the oplog to grow to 110% its max size. + return oplogCol.dataSize() < 1.1 * PrepareHelpers.oplogSizeBytes; +}; + +doTest(); +})(); diff --git a/src/mongo/db/mongod_options.cpp b/src/mongo/db/mongod_options.cpp index d6b4b733e51..9d0030df49e 100644 --- a/src/mongo/db/mongod_options.cpp +++ b/src/mongo/db/mongod_options.cpp @@ -528,6 +528,16 @@ Status storeMongodOptions(const moe::Environment& params) { invariant(replSettings.getOplogSizeBytes() > 0); } + if (params.count("storage.oplogMinRetentionHours")) { + storageGlobalParams.oplogMinRetentionHours.store( + params["storage.oplogMinRetentionHours"].as<double>()); + if (storageGlobalParams.oplogMinRetentionHours.load() < 0) { + return Status(ErrorCodes::BadValue, + "bad --oplogMinRetentionHours, argument must be greater or equal to 0"); + } + invariant(storageGlobalParams.oplogMinRetentionHours.load() >= 0); + } + if (params.count("cacheSize")) { long x = params["cacheSize"].as<long>(); if (x <= 0) { diff --git a/src/mongo/db/mongod_options_storage.idl b/src/mongo/db/mongod_options_storage.idl index 8c51c9e7295..116ad5ecacf 100644 --- a/src/mongo/db/mongod_options_storage.idl +++ b/src/mongo/db/mongod_options_storage.idl @@ -122,3 +122,11 @@ configs: deprecated_short_name: nodur arg_vartype: Switch source: [ cli, ini ] + + 'storage.oplogMinRetentionHours': + description: 'Minimum number of hours to preserve in the oplog. Default is 0 (turned off). Fractions are allowed (e.g. 1.5 hours)' + short_name: oplogMinRetentionHours + arg_vartype: Double + default: 0.0 + validator: + gte: 0.0 diff --git a/src/mongo/db/storage/storage_options.cpp b/src/mongo/db/storage/storage_options.cpp index 5cc702e1302..f9eda312535 100644 --- a/src/mongo/db/storage/storage_options.cpp +++ b/src/mongo/db/storage/storage_options.cpp @@ -55,6 +55,7 @@ void StorageGlobalParams::reset() { syncdelay = 60.0; readOnly = false; groupCollections = false; + oplogMinRetentionHours.store(0.0); } StorageGlobalParams storageGlobalParams; diff --git a/src/mongo/db/storage/storage_options.h b/src/mongo/db/storage/storage_options.h index ceec92d42fb..4202cbc10ef 100644 --- a/src/mongo/db/storage/storage_options.h +++ b/src/mongo/db/storage/storage_options.h @@ -110,6 +110,12 @@ struct StorageGlobalParams { // workloads that rely heavily on creating many collections within a database. bool groupCollections; + // --oplogMinRetentionHours + // Controls what size the oplog should be in addition to oplogSize. If set, the oplog will only + // be truncated if it is over the capped size, and if the bucket of oldest oplog entries fall + // outside of the retention window which is set by this option. + AtomicWord<double> oplogMinRetentionHours; + // Controls whether we allow the OplogStones mechanism to delete oplog history on WT. bool allowOplogTruncation = true; }; diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp index 235cb8f057e..fd2c083edca 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp @@ -49,6 +49,7 @@ #include "mongo/db/namespace_string.h" #include "mongo/db/operation_context.h" #include "mongo/db/repl/repl_settings.h" +#include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/server_recovery.h" #include "mongo/db/service_context.h" #include "mongo/db/storage/oplog_hack.h" @@ -85,6 +86,8 @@ static const int kMaximumRecordStoreVersion = 1; MONGO_STATIC_ASSERT(kCurrentRecordStoreVersion >= kMinimumRecordStoreVersion); MONGO_STATIC_ASSERT(kCurrentRecordStoreVersion <= kMaximumRecordStoreVersion); +const double kNumSecsInHour = 3600.0; + void checkOplogFormatVersion(OperationContext* opCtx, const std::string& uri) { StatusWith<BSONObj> appMetadata = WiredTigerUtil::getApplicationMetadata(opCtx, uri); fassert(39999, appMetadata); @@ -226,6 +229,27 @@ void WiredTigerRecordStore::OplogStones::awaitHasExcessStonesOrDead() { } } +bool WiredTigerRecordStore::OplogStones::hasExcessStones_inlock() const { + int64_t totalBytes = 0; + for (auto&& stone : _stones) { + totalBytes += stone.bytes; + } + + // check that oplog stones is at capacity + if (totalBytes <= _rs->cappedMaxSize()) { + return false; + } + + double minRetentionHours = storageGlobalParams.oplogMinRetentionHours.load(); + + auto rc = repl::ReplicationCoordinator::get(getGlobalServiceContext()); + double lastAppliedTs = rc->getMyLastAppliedOpTime().getTimestamp().getSecs(); + double lastStoneTs = Timestamp(_stones.front().lastRecord.repr()).getSecs(); + + double currRetentionHours = (lastAppliedTs - lastStoneTs) / kNumSecsInHour; + return currRetentionHours >= minRetentionHours; +} + boost::optional<WiredTigerRecordStore::OplogStones::Stone> WiredTigerRecordStore::OplogStones::peekOldestStoneIfNeeded() const { stdx::lock_guard<Latch> lk(_mutex); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h index 0dcb844f1d0..15950177e74 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h @@ -57,15 +57,7 @@ public: void kill(); - bool hasExcessStones_inlock() const { - int64_t total_bytes = 0; - for (std::deque<OplogStones::Stone>::const_iterator it = _stones.begin(); - it != _stones.end(); - ++it) { - total_bytes += it->bytes; - } - return total_bytes > _rs->cappedMaxSize(); - } + bool hasExcessStones_inlock() const; void awaitHasExcessStonesOrDead(); |