summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaria van Keulen <maria.vankeulen@mongodb.com>2019-10-10 22:05:51 +0000
committerevergreen <evergreen@mongodb.com>2019-10-10 22:05:51 +0000
commitdd819eb95636f47f13638259208ae8a69e48ded7 (patch)
treece42f99d086105373cf2df7c74740a910279fad2
parentde724d3d8a82667ac9da97c39abf0b9d9728cba4 (diff)
downloadmongo-dd819eb95636f47f13638259208ae8a69e48ded7.tar.gz
SERVER-43322 Add tunable oplog stone sizes and track truncation speeds
(cherry picked from commit 294a8f68615710b47936d5ee42439d01538ac746) SERVER-43322 Disallow oplog_sampling.js on inMemory storage enigne (cherry picked from commit b35dd89515473c97a87b3c06897e8a7ab51c93cc) This backport was not a straightforward cherry-pick due to SERVER-40168 existing in master and not v4.2.
-rw-r--r--jstests/replsets/oplog_rollover.js8
-rw-r--r--jstests/replsets/oplog_sampling.js41
-rw-r--r--src/mongo/db/storage/wiredtiger/SConscript13
-rw-r--r--src/mongo/db/storage/wiredtiger/oplog_stone_parameters.idl59
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_init.cpp1
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp48
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h6
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h12
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp25
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_server_status.h11
10 files changed, 215 insertions, 9 deletions
diff --git a/jstests/replsets/oplog_rollover.js b/jstests/replsets/oplog_rollover.js
index b9a08bbb80e..5199a1802fc 100644
--- a/jstests/replsets/oplog_rollover.js
+++ b/jstests/replsets/oplog_rollover.js
@@ -104,6 +104,14 @@ function doTest(storageEngine) {
assert.soon(() => {
return numInsertOplogEntry(secondaryOplog) === 2;
}, "Timeout waiting for oplog to roll over on secondary");
+
+ if (jsTest.options().storageEngine == "wiredTiger") {
+ const res = primary.getDB("test").runCommand({serverStatus: 1});
+ assert.commandWorked(res);
+ assert.eq(res.oplogTruncation.truncateCount, 1, tojson(res.oplogTruncation));
+ assert.gt(
+ res.oplogTruncation.totalTimeTruncatingMicros, 0, tojson(res.oplogTruncation));
+ }
} else {
// Only test that oplog truncation will eventually happen.
let numInserted = 2;
diff --git a/jstests/replsets/oplog_sampling.js b/jstests/replsets/oplog_sampling.js
new file mode 100644
index 00000000000..3407b1826c9
--- /dev/null
+++ b/jstests/replsets/oplog_sampling.js
@@ -0,0 +1,41 @@
+/**
+ * Ensure serverStatus reports the total time spent sampling the oplog for all storage engines that
+ * support OplogStones.
+ * @tags: [ requires_wiredtiger, requires_persistence ]
+ */
+(function() {
+"use strict";
+
+// Force oplog sampling to occur on start up for small numbers of oplog inserts.
+const replSet = new ReplSetTest(
+ {nodes: 1, nodeOptions: {setParameter: {"maxOplogTruncationPointsDuringStartup": 10}}});
+replSet.startSet();
+replSet.initiate();
+
+let coll = replSet.getPrimary().getDB("test").getCollection("testcoll");
+
+let res = replSet.getPrimary().getDB("test").serverStatus();
+assert.commandWorked(res);
+
+// Small (or empty) oplogs should be processed by scanning.
+assert.gt(res.oplogTruncation.totalTimeProcessingMicros, 0);
+assert.eq(res.oplogTruncation.processingMethod, "scanning");
+
+// Insert enough documents to force oplog sampling to occur on the following start up.
+const maxOplogDocsForScanning = 2000;
+for (let i = 0; i < maxOplogDocsForScanning + 1; i++) {
+ assert.commandWorked(coll.insert({m: 1 + i}));
+}
+
+// Restart replica set to load entries from the oplog for sampling.
+replSet.stopSet(null /* signal */, true /* forRestart */);
+replSet.startSet({restart: true});
+
+res = replSet.getPrimary().getDB("test").serverStatus();
+assert.commandWorked(res);
+
+assert.gt(res.oplogTruncation.totalTimeProcessingMicros, 0);
+assert.eq(res.oplogTruncation.processingMethod, "sampling");
+
+replSet.stopSet();
+})();
diff --git a/src/mongo/db/storage/wiredtiger/SConscript b/src/mongo/db/storage/wiredtiger/SConscript
index 4434fea01bd..33dd1b9c934 100644
--- a/src/mongo/db/storage/wiredtiger/SConscript
+++ b/src/mongo/db/storage/wiredtiger/SConscript
@@ -83,6 +83,9 @@ if wiredtiger:
'storage_wiredtiger_customization_hooks',
],
LIBDEPS_PRIVATE= [
+ 'oplog_stone_parameters',
+ '$BUILD_DIR/mongo/db/db_raii',
+ '$BUILD_DIR/mongo/db/commands/server_status',
'$BUILD_DIR/mongo/db/snapshot_window_options',
'$BUILD_DIR/mongo/db/storage/storage_repair_observer',
'$BUILD_DIR/mongo/util/options_parser/options_parser',
@@ -183,6 +186,16 @@ if wiredtiger:
],
)
+ wtEnv.Library(
+ target='oplog_stone_parameters',
+ source=[
+ env.Idlc('oplog_stone_parameters.idl')[0],
+ ],
+ LIBDEPS=[
+ '$BUILD_DIR/mongo/idl/server_parameter',
+ ],
+ )
+
# All of these tests fail to compile under undefined behavior
# sanitizer due to unexpressed circular dependency edges. In particular
# they all need a definition from the 'catalog'.
diff --git a/src/mongo/db/storage/wiredtiger/oplog_stone_parameters.idl b/src/mongo/db/storage/wiredtiger/oplog_stone_parameters.idl
new file mode 100644
index 00000000000..4737f234bd4
--- /dev/null
+++ b/src/mongo/db/storage/wiredtiger/oplog_stone_parameters.idl
@@ -0,0 +1,59 @@
+# Copyright (C) 2019-present MongoDB, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the Server Side Public License, version 1,
+# as published by MongoDB, Inc.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# Server Side Public License for more details.
+#
+# You should have received a copy of the Server Side Public License
+# along with this program. If not, see
+# <http://www.mongodb.com/licensing/server-side-public-license>.
+#
+# As a special exception, the copyright holders give permission to link the
+# code of portions of this program with the OpenSSL library under certain
+# conditions as described in each individual source file and distribute
+# linked combinations including the program with the OpenSSL library. You
+# must comply with the Server Side Public License in all respects for
+# all of the code used other than as permitted herein. If you modify file(s)
+# with this exception, you may extend this exception to your version of the
+# file(s), but you are not obligated to do so. If you do not wish to do so,
+# delete this exception statement from your version. If you delete this
+# exception statement from all source files in the program, then also delete
+# it in the license file.
+#
+global:
+ cpp_namespace: "mongo"
+
+server_parameters:
+ maxOplogTruncationPointsAfterStartup:
+ description: 'Maximum allowable number of oplog truncation points after startup has finished'
+ set_at: [ startup ]
+ cpp_vartype: 'long long'
+ cpp_varname: gMaxOplogStonesAfterStartup
+ default: 100
+ validator: { gt: 0 }
+ maxOplogTruncationPointsDuringStartup:
+ description: 'Maximum allowable number of oplog truncation points during startup'
+ set_at: [ startup ]
+ cpp_vartype: 'long long'
+ cpp_varname: gMaxOplogStonesDuringStartup
+ default: 100
+ validator: { gt: 0 }
+ minOplogTruncationPoints:
+ description: 'Minimum allowable number of oplog truncation points'
+ set_at: [ startup ]
+ cpp_vartype: 'long long'
+ cpp_varname: gMinOplogStones
+ default: 10
+ validator: { gt: 0 }
+ oplogTruncationPointSizeMB:
+ description: 'Oplog truncation point size in MB used to determine the number of oplog truncation points for an oplog of a given size. The size will be rounded up to the maximum size of an internal BSON object.'
+ set_at: [ startup ]
+ cpp_vartype: 'int'
+ cpp_varname: gOplogStoneSizeMB
+ default: 0
+ validator: { gte: 0 }
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_init.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_init.cpp
index b01e796f1de..3a06df1682b 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_init.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_init.cpp
@@ -118,6 +118,7 @@ public:
kv->setSortedDataInterfaceExtraOptions(wiredTigerGlobalOptions.indexConfig);
// Intentionally leaked.
new WiredTigerServerStatusSection(kv);
+ new OplogStonesServerStatusSection();
auto* param = new WiredTigerEngineRuntimeConfigParameter("wiredTigerEngineRuntimeConfig",
ServerParameterType::kRuntimeOnly);
param->_data.second = kv;
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
index 16358b35029..d04a8edf30a 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
@@ -50,6 +50,7 @@
#include "mongo/db/server_recovery.h"
#include "mongo/db/service_context.h"
#include "mongo/db/storage/oplog_hack.h"
+#include "mongo/db/storage/wiredtiger/oplog_stone_parameters_gen.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_customization_hooks.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_global_options.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h"
@@ -164,10 +165,16 @@ WiredTigerRecordStore::OplogStones::OplogStones(OperationContext* opCtx, WiredTi
invariant(rs->cappedMaxSize() > 0);
unsigned long long maxSize = rs->cappedMaxSize();
- const unsigned long long kMinStonesToKeep = 10ULL;
- const unsigned long long kMaxStonesToKeep = 100ULL;
+ // The minimum oplog stone size should be BSONObjMaxInternalSize.
+ const unsigned int oplogStoneSize =
+ std::max(gOplogStoneSizeMB * 1024 * 1024, BSONObjMaxInternalSize);
- unsigned long long numStones = maxSize / BSONObjMaxInternalSize;
+ // IDL does not support unsigned long long types.
+ const unsigned long long kMinStonesToKeep = static_cast<unsigned long long>(gMinOplogStones);
+ const unsigned long long kMaxStonesToKeep =
+ static_cast<unsigned long long>(gMaxOplogStonesDuringStartup);
+
+ unsigned long long numStones = maxSize / oplogStoneSize;
size_t numStonesToKeep = std::min(kMaxStonesToKeep, std::max(kMinStonesToKeep, numStones));
_minBytesPerStone = maxSize / numStonesToKeep;
invariant(_minBytesPerStone > 0);
@@ -313,6 +320,12 @@ void WiredTigerRecordStore::OplogStones::setMinBytesPerStone(int64_t size) {
void WiredTigerRecordStore::OplogStones::_calculateStones(OperationContext* opCtx,
size_t numStonesToKeep) {
+ const std::uint64_t startWaitTime = curTimeMicros64();
+ ON_BLOCK_EXIT([&] {
+ auto waitTime = curTimeMicros64() - startWaitTime;
+ log() << "WiredTiger record store oplog processing took " << waitTime / 1000 << "ms";
+ _totalTimeProcessing.fetchAndAdd(waitTime);
+ });
long long numRecords = _rs->numRecords(opCtx);
long long dataSize = _rs->dataSize(opCtx);
@@ -342,6 +355,7 @@ void WiredTigerRecordStore::OplogStones::_calculateStones(OperationContext* opCt
}
void WiredTigerRecordStore::OplogStones::_calculateStonesByScanning(OperationContext* opCtx) {
+ _processBySampling.store(false); // process by scanning
log() << "Scanning the oplog to determine where to place markers for truncation";
long long numRecords = 0;
@@ -369,6 +383,8 @@ void WiredTigerRecordStore::OplogStones::_calculateStonesByScanning(OperationCon
void WiredTigerRecordStore::OplogStones::_calculateStonesBySampling(OperationContext* opCtx,
int64_t estRecordsPerStone,
int64_t estBytesPerStone) {
+ log() << "Sampling the oplog to determine where to place markers for truncation";
+ _processBySampling.store(true); // process by sampling
Timestamp earliestOpTime;
Timestamp latestOpTime;
@@ -457,10 +473,16 @@ void WiredTigerRecordStore::OplogStones::_pokeReclaimThreadIfNeeded() {
void WiredTigerRecordStore::OplogStones::adjust(int64_t maxSize) {
stdx::lock_guard<stdx::mutex> lk(_mutex);
- const unsigned long long kMinStonesToKeep = 10ULL;
- const unsigned long long kMaxStonesToKeep = 100ULL;
- unsigned long long numStones = maxSize / BSONObjMaxInternalSize;
+ const unsigned int oplogStoneSize =
+ std::max(gOplogStoneSizeMB * 1024 * 1024, BSONObjMaxInternalSize);
+
+ // IDL does not support unsigned long long types.
+ const unsigned long long kMinStonesToKeep = static_cast<unsigned long long>(gMinOplogStones);
+ const unsigned long long kMaxStonesToKeep =
+ static_cast<unsigned long long>(gMaxOplogStonesAfterStartup);
+
+ unsigned long long numStones = maxSize / oplogStoneSize;
size_t numStonesToKeep = std::min(kMaxStonesToKeep, std::max(kMinStonesToKeep, numStones));
_minBytesPerStone = maxSize / numStonesToKeep;
invariant(_minBytesPerStone > 0);
@@ -763,6 +785,14 @@ void WiredTigerRecordStore::postConstructorInit(OperationContext* opCtx) {
}
}
+void WiredTigerRecordStore::getOplogTruncateStats(BSONObjBuilder& builder) const {
+ if (_oplogStones) {
+ _oplogStones->getOplogStonesStats(builder);
+ }
+ builder.append("totalTimeTruncatingMicros", _totalTimeTruncating.load());
+ builder.append("truncateCount", _truncateCount.load());
+}
+
const char* WiredTigerRecordStore::name() const {
return _engineName.c_str();
}
@@ -1220,7 +1250,11 @@ void WiredTigerRecordStore::reclaimOplog(OperationContext* opCtx, Timestamp mayT
LOG(1) << "Finished truncating the oplog, it now contains approximately "
<< _sizeInfo->numRecords.load() << " records totaling to " << _sizeInfo->dataSize.load()
<< " bytes";
- log() << "WiredTiger record store oplog truncation finished in: " << timer.millis() << "ms";
+ auto elapsedMicros = timer.micros();
+ auto elapsedMillis = elapsedMicros / 1000;
+ _totalTimeTruncating.fetchAndAdd(elapsedMicros);
+ _truncateCount.fetchAndAdd(1);
+ log() << "WiredTiger record store oplog truncation finished in: " << elapsedMillis << "ms";
}
Status WiredTigerRecordStore::insertRecords(OperationContext* opCtx,
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
index a3d5870a489..9737bd700a3 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
@@ -117,6 +117,8 @@ public:
WiredTigerRecordStore(WiredTigerKVEngine* kvEngine, OperationContext* opCtx, Params params);
+ void getOplogTruncateStats(BSONObjBuilder& builder) const;
+
virtual ~WiredTigerRecordStore();
virtual void postConstructorInit(OperationContext* opCtx);
@@ -383,6 +385,10 @@ private:
// Non-null if this record store is underlying the active oplog.
std::shared_ptr<OplogStones> _oplogStones;
+
+ AtomicWord<int64_t>
+ _totalTimeTruncating; // Cumulative amount of time spent truncating the oplog.
+ AtomicWord<int64_t> _truncateCount; // Cumulative number of truncates of the oplog.
};
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h
index f6e9371c894..99697caac08 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h
@@ -69,6 +69,11 @@ public:
void awaitHasExcessStonesOrDead();
+ void getOplogStonesStats(BSONObjBuilder& builder) const {
+ builder.append("totalTimeProcessingMicros", _totalTimeProcessing.load());
+ builder.append("processingMethod", _processBySampling.load() ? "sampling" : "scanning");
+ }
+
boost::optional<OplogStones::Stone> peekOldestStoneIfNeeded() const;
void popOldestStone();
@@ -140,8 +145,11 @@ private:
// deque of oplog stones.
int64_t _minBytesPerStone;
- AtomicWord<long long> _currentRecords; // Number of records in the stone being filled.
- AtomicWord<long long> _currentBytes; // Number of bytes in the stone being filled.
+ AtomicWord<long long> _currentRecords; // Number of records in the stone being filled.
+ AtomicWord<long long> _currentBytes; // Number of bytes in the stone being filled.
+ AtomicWord<int64_t> _totalTimeProcessing; // Amount of time spent scanning and/or sampling the
+ // oplog during start up, if any.
+ AtomicWord<bool> _processBySampling; // Whether the oplog was sampled or scanned.
mutable stdx::mutex _mutex; // Protects against concurrent access to the deque of oplog stones.
std::deque<OplogStones::Stone> _stones; // front = oldest, back = newest.
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp
index 335aebd34a5..100924d9e98 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.cpp
@@ -34,6 +34,8 @@
#include "mongo/base/checked_cast.h"
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/db/concurrency/d_concurrency.h"
+#include "mongo/db/db_raii.h"
+#include "mongo/db/namespace_string.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_record_store.h"
#include "mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h"
@@ -41,6 +43,7 @@
#include "mongo/db/storage/wiredtiger/wiredtiger_util.h"
#include "mongo/util/assert_util.h"
+
namespace mongo {
using std::string;
@@ -85,4 +88,26 @@ BSONObj WiredTigerServerStatusSection::generateSection(OperationContext* opCtx,
return bob.obj();
}
+OplogStonesServerStatusSection::OplogStonesServerStatusSection()
+ : ServerStatusSection("oplogTruncation") {}
+
+bool OplogStonesServerStatusSection::includeByDefault() const {
+ return true;
+}
+
+BSONObj OplogStonesServerStatusSection::generateSection(OperationContext* opCtx,
+ const BSONElement& configElement) const {
+ BSONObjBuilder builder;
+ {
+ AutoGetCollectionForReadCommand ctx(opCtx, NamespaceString::kRsOplogNamespace);
+ Collection* oplogColl = ctx.getCollection();
+ if (oplogColl) {
+ auto oplogRS = checked_cast<WiredTigerRecordStore*>(oplogColl->getRecordStore());
+ oplogRS->getOplogTruncateStats(builder);
+ }
+ }
+ return builder.obj();
+}
+
+
} // namespace mongo
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.h b/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.h
index 9bea39b3398..1bcf68fff20 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_server_status.h
@@ -49,4 +49,15 @@ private:
WiredTigerKVEngine* _engine;
};
+/**
+ * Adds oplog stones statistics to the results of db.serverStatus().
+ */
+class OplogStonesServerStatusSection : public ServerStatusSection {
+public:
+ OplogStonesServerStatusSection();
+ bool includeByDefault() const override;
+ BSONObj generateSection(OperationContext* opCtx,
+ const BSONElement& configElement) const override;
+};
+
} // namespace mongo