summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--jstests/replsets/oplog_rollover.js5
-rw-r--r--jstests/replsets/oplog_sampling.js41
-rw-r--r--src/mongo/db/storage/record_store.h9
-rw-r--r--src/mongo/db/storage/wiredtiger/SConscript14
-rw-r--r--src/mongo/db/storage/wiredtiger/oplog_stone_parameters.idl59
-rw-r--r--src/mongo/db/storage/wiredtiger/oplog_stones_server_status_section.cpp68
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp48
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h6
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h12
9 files changed, 253 insertions, 9 deletions
diff --git a/jstests/replsets/oplog_rollover.js b/jstests/replsets/oplog_rollover.js
index 7ec72467a71..d44600fa598 100644
--- a/jstests/replsets/oplog_rollover.js
+++ b/jstests/replsets/oplog_rollover.js
@@ -104,6 +104,11 @@ function doTest(storageEngine) {
assert.soon(() => {
return numInsertOplogEntry(secondaryOplog) === 2;
}, "Timeout waiting for oplog to roll over on secondary");
+
+ const res = primary.getDB("test").runCommand({serverStatus: 1});
+ assert.commandWorked(res);
+ assert.eq(res.oplogTruncation.truncateCount, 1, tojson(res.oplogTruncation));
+ assert.gt(res.oplogTruncation.totalTimeTruncatingMicros, 0, tojson(res.oplogTruncation));
} else {
// Only test that oplog truncation will eventually happen.
let numInserted = 2;
diff --git a/jstests/replsets/oplog_sampling.js b/jstests/replsets/oplog_sampling.js
new file mode 100644
index 00000000000..a87f17e72a3
--- /dev/null
+++ b/jstests/replsets/oplog_sampling.js
@@ -0,0 +1,41 @@
+/**
+ * Ensure serverStatus reports the total time spent sampling the oplog for all storage engines that
+ * support OplogStones.
+ * @tags: [ requires_wiredtiger ]
+ */
+(function() {
+"use strict";
+
+// Force oplog sampling to occur on start up for small numbers of oplog inserts.
+const replSet = new ReplSetTest(
+ {nodes: 1, nodeOptions: {setParameter: {"maxOplogTruncationPointsDuringStartup": 10}}});
+replSet.startSet();
+replSet.initiate();
+
+let coll = replSet.getPrimary().getDB("test").getCollection("testcoll");
+
+let res = replSet.getPrimary().getDB("test").serverStatus();
+assert.commandWorked(res);
+
+// Small (or empty) oplogs should be processed by scanning.
+assert.gt(res.oplogTruncation.totalTimeProcessingMicros, 0);
+assert.eq(res.oplogTruncation.processingMethod, "scanning");
+
+// Insert enough documents to force oplog sampling to occur on the following start up.
+const maxOplogDocsForScanning = 2000;
+for (let i = 0; i < maxOplogDocsForScanning + 1; i++) {
+ assert.commandWorked(coll.insert({m: 1 + i}));
+}
+
+// Restart replica set to load entries from the oplog for sampling.
+replSet.stopSet(null /* signal */, true /* forRestart */);
+replSet.startSet({restart: true});
+
+res = replSet.getPrimary().getDB("test").serverStatus();
+assert.commandWorked(res);
+
+assert.gt(res.oplogTruncation.totalTimeProcessingMicros, 0);
+assert.eq(res.oplogTruncation.processingMethod, "sampling");
+
+replSet.stopSet();
+})();
diff --git a/src/mongo/db/storage/record_store.h b/src/mongo/db/storage/record_store.h
index ee5793c128d..e690b35a618 100644
--- a/src/mongo/db/storage/record_store.h
+++ b/src/mongo/db/storage/record_store.h
@@ -521,6 +521,15 @@ public:
MONGO_UNREACHABLE;
}
+ /**
+ * This should only be called if StorageEngine::supportsOplogStones() is true.
+ * Storage engines supporting oplog stones must implement this function.
+ * Populates `builder` with various statistics pertaining to oplog stones and oplog truncation.
+ */
+ virtual void getOplogTruncateStats(BSONObjBuilder& builder) const {
+ MONGO_UNREACHABLE;
+ }
+
protected:
std::string _ns;
diff --git a/src/mongo/db/storage/wiredtiger/SConscript b/src/mongo/db/storage/wiredtiger/SConscript
index 2b25da63dd3..88451147071 100644
--- a/src/mongo/db/storage/wiredtiger/SConscript
+++ b/src/mongo/db/storage/wiredtiger/SConscript
@@ -34,6 +34,7 @@ if wiredtiger:
wtEnv.Library(
target='storage_wiredtiger_core',
source= [
+ 'oplog_stones_server_status_section.cpp',
'wiredtiger_begin_transaction_block.cpp',
'wiredtiger_cursor.cpp',
'wiredtiger_global_options.cpp',
@@ -84,6 +85,9 @@ if wiredtiger:
'storage_wiredtiger_customization_hooks',
],
LIBDEPS_PRIVATE= [
+ 'oplog_stone_parameters',
+ '$BUILD_DIR/mongo/db/db_raii',
+ '$BUILD_DIR/mongo/db/commands/server_status',
'$BUILD_DIR/mongo/db/snapshot_window_options',
'$BUILD_DIR/mongo/db/storage/storage_repair_observer',
'$BUILD_DIR/mongo/util/options_parser/options_parser',
@@ -173,6 +177,16 @@ if wiredtiger:
],
)
+ wtEnv.Library(
+ target='oplog_stone_parameters',
+ source=[
+ env.Idlc('oplog_stone_parameters.idl')[0],
+ ],
+ LIBDEPS=[
+ '$BUILD_DIR/mongo/idl/server_parameter',
+ ],
+ )
+
# All of these tests fail to compile under undefined behavior
# sanitizer due to unexpressed circular dependency edges. In particular
# they all need a definition from the 'catalog'.
diff --git a/src/mongo/db/storage/wiredtiger/oplog_stone_parameters.idl b/src/mongo/db/storage/wiredtiger/oplog_stone_parameters.idl
new file mode 100644
index 00000000000..4737f234bd4
--- /dev/null
+++ b/src/mongo/db/storage/wiredtiger/oplog_stone_parameters.idl
@@ -0,0 +1,59 @@
+# Copyright (C) 2019-present MongoDB, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the Server Side Public License, version 1,
+# as published by MongoDB, Inc.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# Server Side Public License for more details.
+#
+# You should have received a copy of the Server Side Public License
+# along with this program. If not, see
+# <http://www.mongodb.com/licensing/server-side-public-license>.
+#
+# As a special exception, the copyright holders give permission to link the
+# code of portions of this program with the OpenSSL library under certain
+# conditions as described in each individual source file and distribute
+# linked combinations including the program with the OpenSSL library. You
+# must comply with the Server Side Public License in all respects for
+# all of the code used other than as permitted herein. If you modify file(s)
+# with this exception, you may extend this exception to your version of the
+# file(s), but you are not obligated to do so. If you do not wish to do so,
+# delete this exception statement from your version. If you delete this
+# exception statement from all source files in the program, then also delete
+# it in the license file.
+#
+global:
+ cpp_namespace: "mongo"
+
+server_parameters:
+ maxOplogTruncationPointsAfterStartup:
+ description: 'Maximum allowable number of oplog truncation points after startup has finished'
+ set_at: [ startup ]
+ cpp_vartype: 'long long'
+ cpp_varname: gMaxOplogStonesAfterStartup
+ default: 100
+ validator: { gt: 0 }
+ maxOplogTruncationPointsDuringStartup:
+ description: 'Maximum allowable number of oplog truncation points during startup'
+ set_at: [ startup ]
+ cpp_vartype: 'long long'
+ cpp_varname: gMaxOplogStonesDuringStartup
+ default: 100
+ validator: { gt: 0 }
+ minOplogTruncationPoints:
+ description: 'Minimum allowable number of oplog truncation points'
+ set_at: [ startup ]
+ cpp_vartype: 'long long'
+ cpp_varname: gMinOplogStones
+ default: 10
+ validator: { gt: 0 }
+ oplogTruncationPointSizeMB:
+ description: 'Oplog truncation point size in MB used to determine the number of oplog truncation points for an oplog of a given size. The size will be rounded up to the maximum size of an internal BSON object.'
+ set_at: [ startup ]
+ cpp_vartype: 'int'
+ cpp_varname: gOplogStoneSizeMB
+ default: 0
+ validator: { gte: 0 }
diff --git a/src/mongo/db/storage/wiredtiger/oplog_stones_server_status_section.cpp b/src/mongo/db/storage/wiredtiger/oplog_stones_server_status_section.cpp
new file mode 100644
index 00000000000..0c87e2e8693
--- /dev/null
+++ b/src/mongo/db/storage/wiredtiger/oplog_stones_server_status_section.cpp
@@ -0,0 +1,68 @@
+/**
+ * Copyright (C) 2019-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/commands/server_status.h"
+#include "mongo/db/db_raii.h"
+#include "mongo/db/namespace_string.h"
+
+namespace mongo {
+class OplogStonesServerStatusSection : public ServerStatusSection {
+public:
+ OplogStonesServerStatusSection() : ServerStatusSection("oplogTruncation") {}
+ /**
+ * <ServerStatusSection>
+ */
+ bool includeByDefault() const override {
+ return true;
+ }
+
+ /**
+ * <ServerStatusSection>
+ */
+ BSONObj generateSection(OperationContext* opCtx,
+ const BSONElement& configElement) const override {
+ BSONObjBuilder builder;
+ if (!opCtx->getServiceContext()->getStorageEngine()->supportsOplogStones()) {
+ return builder.obj();
+ }
+ {
+ AutoGetCollectionForReadCommand ctx(opCtx, NamespaceString::kRsOplogNamespace);
+ Collection* oplogColl = ctx.getCollection();
+ if (oplogColl) {
+ oplogColl->getRecordStore()->getOplogTruncateStats(builder);
+ }
+ }
+ return builder.obj();
+ }
+
+
+} oplogStonesStats;
+} // namespace mongo
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
index 48f9a53dbff..45abc3f224d 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
@@ -52,6 +52,7 @@
#include "mongo/db/server_recovery.h"
#include "mongo/db/service_context.h"
#include "mongo/db/storage/oplog_hack.h"
+#include "mongo/db/storage/wiredtiger/oplog_stone_parameters_gen.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_customization_hooks.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_global_options.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h"
@@ -165,10 +166,16 @@ WiredTigerRecordStore::OplogStones::OplogStones(OperationContext* opCtx, WiredTi
invariant(rs->cappedMaxSize() > 0);
unsigned long long maxSize = rs->cappedMaxSize();
- const unsigned long long kMinStonesToKeep = 10ULL;
- const unsigned long long kMaxStonesToKeep = 100ULL;
+ // The minimum oplog stone size should be BSONObjMaxInternalSize.
+ const unsigned int oplogStoneSize =
+ std::max(gOplogStoneSizeMB * 1024 * 1024, BSONObjMaxInternalSize);
- unsigned long long numStones = maxSize / BSONObjMaxInternalSize;
+ // IDL does not support unsigned long long types.
+ const unsigned long long kMinStonesToKeep = static_cast<unsigned long long>(gMinOplogStones);
+ const unsigned long long kMaxStonesToKeep =
+ static_cast<unsigned long long>(gMaxOplogStonesDuringStartup);
+
+ unsigned long long numStones = maxSize / oplogStoneSize;
size_t numStonesToKeep = std::min(kMaxStonesToKeep, std::max(kMinStonesToKeep, numStones));
_minBytesPerStone = maxSize / numStonesToKeep;
invariant(_minBytesPerStone > 0);
@@ -314,6 +321,12 @@ void WiredTigerRecordStore::OplogStones::setMinBytesPerStone(int64_t size) {
void WiredTigerRecordStore::OplogStones::_calculateStones(OperationContext* opCtx,
size_t numStonesToKeep) {
+ const std::uint64_t startWaitTime = curTimeMicros64();
+ ON_BLOCK_EXIT([&] {
+ auto waitTime = curTimeMicros64() - startWaitTime;
+ log() << "WiredTiger record store oplog processing took " << waitTime / 1000 << "ms";
+ _totalTimeProcessing.fetchAndAdd(waitTime);
+ });
long long numRecords = _rs->numRecords(opCtx);
long long dataSize = _rs->dataSize(opCtx);
@@ -343,6 +356,7 @@ void WiredTigerRecordStore::OplogStones::_calculateStones(OperationContext* opCt
}
void WiredTigerRecordStore::OplogStones::_calculateStonesByScanning(OperationContext* opCtx) {
+ _processBySampling.store(false); // process by scanning
log() << "Scanning the oplog to determine where to place markers for truncation";
long long numRecords = 0;
@@ -370,6 +384,8 @@ void WiredTigerRecordStore::OplogStones::_calculateStonesByScanning(OperationCon
void WiredTigerRecordStore::OplogStones::_calculateStonesBySampling(OperationContext* opCtx,
int64_t estRecordsPerStone,
int64_t estBytesPerStone) {
+ log() << "Sampling the oplog to determine where to place markers for truncation";
+ _processBySampling.store(true); // process by sampling
Timestamp earliestOpTime;
Timestamp latestOpTime;
@@ -458,10 +474,16 @@ void WiredTigerRecordStore::OplogStones::_pokeReclaimThreadIfNeeded() {
void WiredTigerRecordStore::OplogStones::adjust(int64_t maxSize) {
stdx::lock_guard<Latch> lk(_mutex);
- const unsigned long long kMinStonesToKeep = 10ULL;
- const unsigned long long kMaxStonesToKeep = 100ULL;
- unsigned long long numStones = maxSize / BSONObjMaxInternalSize;
+ const unsigned int oplogStoneSize =
+ std::max(gOplogStoneSizeMB * 1024 * 1024, BSONObjMaxInternalSize);
+
+ // IDL does not support unsigned long long types.
+ const unsigned long long kMinStonesToKeep = static_cast<unsigned long long>(gMinOplogStones);
+ const unsigned long long kMaxStonesToKeep =
+ static_cast<unsigned long long>(gMaxOplogStonesAfterStartup);
+
+ unsigned long long numStones = maxSize / oplogStoneSize;
size_t numStonesToKeep = std::min(kMaxStonesToKeep, std::max(kMinStonesToKeep, numStones));
_minBytesPerStone = maxSize / numStonesToKeep;
invariant(_minBytesPerStone > 0);
@@ -765,6 +787,14 @@ void WiredTigerRecordStore::postConstructorInit(OperationContext* opCtx) {
}
}
+void WiredTigerRecordStore::getOplogTruncateStats(BSONObjBuilder& builder) const {
+ if (_oplogStones) {
+ _oplogStones->getOplogStonesStats(builder);
+ }
+ builder.append("totalTimeTruncatingMicros", _totalTimeTruncating.load());
+ builder.append("truncateCount", _truncateCount.load());
+}
+
const char* WiredTigerRecordStore::name() const {
return _engineName.c_str();
}
@@ -1221,7 +1251,11 @@ void WiredTigerRecordStore::reclaimOplog(OperationContext* opCtx, Timestamp mayT
LOG(1) << "Finished truncating the oplog, it now contains approximately "
<< _sizeInfo->numRecords.load() << " records totaling to " << _sizeInfo->dataSize.load()
<< " bytes";
- log() << "WiredTiger record store oplog truncation finished in: " << timer.millis() << "ms";
+ auto elapsedMicros = timer.micros();
+ auto elapsedMillis = elapsedMicros / 1000;
+ _totalTimeTruncating.fetchAndAdd(elapsedMicros);
+ _truncateCount.fetchAndAdd(1);
+ log() << "WiredTiger record store oplog truncation finished in: " << elapsedMillis << "ms";
}
Status WiredTigerRecordStore::insertRecords(OperationContext* opCtx,
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
index 6d1c4010c3d..99c0b70fb78 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
@@ -118,6 +118,8 @@ public:
WiredTigerRecordStore(WiredTigerKVEngine* kvEngine, OperationContext* opCtx, Params params);
+ virtual void getOplogTruncateStats(BSONObjBuilder& builder) const;
+
virtual ~WiredTigerRecordStore();
virtual void postConstructorInit(OperationContext* opCtx);
@@ -377,6 +379,10 @@ private:
// Non-null if this record store is underlying the active oplog.
std::shared_ptr<OplogStones> _oplogStones;
+
+ AtomicWord<int64_t>
+ _totalTimeTruncating; // Cumulative amount of time spent truncating the oplog.
+ AtomicWord<int64_t> _truncateCount; // Cumulative number of truncates of the oplog.
};
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h
index f88334ea85b..9553ee9f56a 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h
@@ -69,6 +69,11 @@ public:
void awaitHasExcessStonesOrDead();
+ void getOplogStonesStats(BSONObjBuilder& builder) const {
+ builder.append("totalTimeProcessingMicros", _totalTimeProcessing.load());
+ builder.append("processingMethod", _processBySampling.load() ? "sampling" : "scanning");
+ }
+
boost::optional<OplogStones::Stone> peekOldestStoneIfNeeded() const;
void popOldestStone();
@@ -140,8 +145,11 @@ private:
// deque of oplog stones.
int64_t _minBytesPerStone;
- AtomicWord<long long> _currentRecords; // Number of records in the stone being filled.
- AtomicWord<long long> _currentBytes; // Number of bytes in the stone being filled.
+ AtomicWord<long long> _currentRecords; // Number of records in the stone being filled.
+ AtomicWord<long long> _currentBytes; // Number of bytes in the stone being filled.
+ AtomicWord<int64_t> _totalTimeProcessing; // Amount of time spent scanning and/or sampling the
+ // oplog during start up, if any.
+ AtomicWord<bool> _processBySampling; // Whether the oplog was sampled or scanned.
// Protects against concurrent access to the deque of oplog stones.
mutable Mutex _mutex = MONGO_MAKE_LATCH("OplogStones::_mutex");