diff options
9 files changed, 253 insertions, 9 deletions
diff --git a/jstests/replsets/oplog_rollover.js b/jstests/replsets/oplog_rollover.js index 7ec72467a71..d44600fa598 100644 --- a/jstests/replsets/oplog_rollover.js +++ b/jstests/replsets/oplog_rollover.js @@ -104,6 +104,11 @@ function doTest(storageEngine) { assert.soon(() => { return numInsertOplogEntry(secondaryOplog) === 2; }, "Timeout waiting for oplog to roll over on secondary"); + + const res = primary.getDB("test").runCommand({serverStatus: 1}); + assert.commandWorked(res); + assert.eq(res.oplogTruncation.truncateCount, 1, tojson(res.oplogTruncation)); + assert.gt(res.oplogTruncation.totalTimeTruncatingMicros, 0, tojson(res.oplogTruncation)); } else { // Only test that oplog truncation will eventually happen. let numInserted = 2; diff --git a/jstests/replsets/oplog_sampling.js b/jstests/replsets/oplog_sampling.js new file mode 100644 index 00000000000..a87f17e72a3 --- /dev/null +++ b/jstests/replsets/oplog_sampling.js @@ -0,0 +1,41 @@ +/** + * Ensure serverStatus reports the total time spent sampling the oplog for all storage engines that + * support OplogStones. + * @tags: [ requires_wiredtiger ] + */ +(function() { +"use strict"; + +// Force oplog sampling to occur on start up for small numbers of oplog inserts. +const replSet = new ReplSetTest( + {nodes: 1, nodeOptions: {setParameter: {"maxOplogTruncationPointsDuringStartup": 10}}}); +replSet.startSet(); +replSet.initiate(); + +let coll = replSet.getPrimary().getDB("test").getCollection("testcoll"); + +let res = replSet.getPrimary().getDB("test").serverStatus(); +assert.commandWorked(res); + +// Small (or empty) oplogs should be processed by scanning. +assert.gt(res.oplogTruncation.totalTimeProcessingMicros, 0); +assert.eq(res.oplogTruncation.processingMethod, "scanning"); + +// Insert enough documents to force oplog sampling to occur on the following start up. +const maxOplogDocsForScanning = 2000; +for (let i = 0; i < maxOplogDocsForScanning + 1; i++) { + assert.commandWorked(coll.insert({m: 1 + i})); +} + +// Restart replica set to load entries from the oplog for sampling. +replSet.stopSet(null /* signal */, true /* forRestart */); +replSet.startSet({restart: true}); + +res = replSet.getPrimary().getDB("test").serverStatus(); +assert.commandWorked(res); + +assert.gt(res.oplogTruncation.totalTimeProcessingMicros, 0); +assert.eq(res.oplogTruncation.processingMethod, "sampling"); + +replSet.stopSet(); +})(); diff --git a/src/mongo/db/storage/record_store.h b/src/mongo/db/storage/record_store.h index ee5793c128d..e690b35a618 100644 --- a/src/mongo/db/storage/record_store.h +++ b/src/mongo/db/storage/record_store.h @@ -521,6 +521,15 @@ public: MONGO_UNREACHABLE; } + /** + * This should only be called if StorageEngine::supportsOplogStones() is true. + * Storage engines supporting oplog stones must implement this function. + * Populates `builder` with various statistics pertaining to oplog stones and oplog truncation. + */ + virtual void getOplogTruncateStats(BSONObjBuilder& builder) const { + MONGO_UNREACHABLE; + } + protected: std::string _ns; diff --git a/src/mongo/db/storage/wiredtiger/SConscript b/src/mongo/db/storage/wiredtiger/SConscript index 2b25da63dd3..88451147071 100644 --- a/src/mongo/db/storage/wiredtiger/SConscript +++ b/src/mongo/db/storage/wiredtiger/SConscript @@ -34,6 +34,7 @@ if wiredtiger: wtEnv.Library( target='storage_wiredtiger_core', source= [ + 'oplog_stones_server_status_section.cpp', 'wiredtiger_begin_transaction_block.cpp', 'wiredtiger_cursor.cpp', 'wiredtiger_global_options.cpp', @@ -84,6 +85,9 @@ if wiredtiger: 'storage_wiredtiger_customization_hooks', ], LIBDEPS_PRIVATE= [ + 'oplog_stone_parameters', + '$BUILD_DIR/mongo/db/db_raii', + '$BUILD_DIR/mongo/db/commands/server_status', '$BUILD_DIR/mongo/db/snapshot_window_options', '$BUILD_DIR/mongo/db/storage/storage_repair_observer', '$BUILD_DIR/mongo/util/options_parser/options_parser', @@ -173,6 +177,16 @@ if wiredtiger: ], ) + wtEnv.Library( + target='oplog_stone_parameters', + source=[ + env.Idlc('oplog_stone_parameters.idl')[0], + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/idl/server_parameter', + ], + ) + # All of these tests fail to compile under undefined behavior # sanitizer due to unexpressed circular dependency edges. In particular # they all need a definition from the 'catalog'. diff --git a/src/mongo/db/storage/wiredtiger/oplog_stone_parameters.idl b/src/mongo/db/storage/wiredtiger/oplog_stone_parameters.idl new file mode 100644 index 00000000000..4737f234bd4 --- /dev/null +++ b/src/mongo/db/storage/wiredtiger/oplog_stone_parameters.idl @@ -0,0 +1,59 @@ +# Copyright (C) 2019-present MongoDB, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the Server Side Public License, version 1, +# as published by MongoDB, Inc. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Server Side Public License for more details. +# +# You should have received a copy of the Server Side Public License +# along with this program. If not, see +# <http://www.mongodb.com/licensing/server-side-public-license>. +# +# As a special exception, the copyright holders give permission to link the +# code of portions of this program with the OpenSSL library under certain +# conditions as described in each individual source file and distribute +# linked combinations including the program with the OpenSSL library. You +# must comply with the Server Side Public License in all respects for +# all of the code used other than as permitted herein. If you modify file(s) +# with this exception, you may extend this exception to your version of the +# file(s), but you are not obligated to do so. If you do not wish to do so, +# delete this exception statement from your version. If you delete this +# exception statement from all source files in the program, then also delete +# it in the license file. +# +global: + cpp_namespace: "mongo" + +server_parameters: + maxOplogTruncationPointsAfterStartup: + description: 'Maximum allowable number of oplog truncation points after startup has finished' + set_at: [ startup ] + cpp_vartype: 'long long' + cpp_varname: gMaxOplogStonesAfterStartup + default: 100 + validator: { gt: 0 } + maxOplogTruncationPointsDuringStartup: + description: 'Maximum allowable number of oplog truncation points during startup' + set_at: [ startup ] + cpp_vartype: 'long long' + cpp_varname: gMaxOplogStonesDuringStartup + default: 100 + validator: { gt: 0 } + minOplogTruncationPoints: + description: 'Minimum allowable number of oplog truncation points' + set_at: [ startup ] + cpp_vartype: 'long long' + cpp_varname: gMinOplogStones + default: 10 + validator: { gt: 0 } + oplogTruncationPointSizeMB: + description: 'Oplog truncation point size in MB used to determine the number of oplog truncation points for an oplog of a given size. The size will be rounded up to the maximum size of an internal BSON object.' + set_at: [ startup ] + cpp_vartype: 'int' + cpp_varname: gOplogStoneSizeMB + default: 0 + validator: { gte: 0 } diff --git a/src/mongo/db/storage/wiredtiger/oplog_stones_server_status_section.cpp b/src/mongo/db/storage/wiredtiger/oplog_stones_server_status_section.cpp new file mode 100644 index 00000000000..0c87e2e8693 --- /dev/null +++ b/src/mongo/db/storage/wiredtiger/oplog_stones_server_status_section.cpp @@ -0,0 +1,68 @@ +/** + * Copyright (C) 2019-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/commands/server_status.h" +#include "mongo/db/db_raii.h" +#include "mongo/db/namespace_string.h" + +namespace mongo { +class OplogStonesServerStatusSection : public ServerStatusSection { +public: + OplogStonesServerStatusSection() : ServerStatusSection("oplogTruncation") {} + /** + * <ServerStatusSection> + */ + bool includeByDefault() const override { + return true; + } + + /** + * <ServerStatusSection> + */ + BSONObj generateSection(OperationContext* opCtx, + const BSONElement& configElement) const override { + BSONObjBuilder builder; + if (!opCtx->getServiceContext()->getStorageEngine()->supportsOplogStones()) { + return builder.obj(); + } + { + AutoGetCollectionForReadCommand ctx(opCtx, NamespaceString::kRsOplogNamespace); + Collection* oplogColl = ctx.getCollection(); + if (oplogColl) { + oplogColl->getRecordStore()->getOplogTruncateStats(builder); + } + } + return builder.obj(); + } + + +} oplogStonesStats; +} // namespace mongo diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp index 48f9a53dbff..45abc3f224d 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp @@ -52,6 +52,7 @@ #include "mongo/db/server_recovery.h" #include "mongo/db/service_context.h" #include "mongo/db/storage/oplog_hack.h" +#include "mongo/db/storage/wiredtiger/oplog_stone_parameters_gen.h" #include "mongo/db/storage/wiredtiger/wiredtiger_customization_hooks.h" #include "mongo/db/storage/wiredtiger/wiredtiger_global_options.h" #include "mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h" @@ -165,10 +166,16 @@ WiredTigerRecordStore::OplogStones::OplogStones(OperationContext* opCtx, WiredTi invariant(rs->cappedMaxSize() > 0); unsigned long long maxSize = rs->cappedMaxSize(); - const unsigned long long kMinStonesToKeep = 10ULL; - const unsigned long long kMaxStonesToKeep = 100ULL; + // The minimum oplog stone size should be BSONObjMaxInternalSize. + const unsigned int oplogStoneSize = + std::max(gOplogStoneSizeMB * 1024 * 1024, BSONObjMaxInternalSize); - unsigned long long numStones = maxSize / BSONObjMaxInternalSize; + // IDL does not support unsigned long long types. + const unsigned long long kMinStonesToKeep = static_cast<unsigned long long>(gMinOplogStones); + const unsigned long long kMaxStonesToKeep = + static_cast<unsigned long long>(gMaxOplogStonesDuringStartup); + + unsigned long long numStones = maxSize / oplogStoneSize; size_t numStonesToKeep = std::min(kMaxStonesToKeep, std::max(kMinStonesToKeep, numStones)); _minBytesPerStone = maxSize / numStonesToKeep; invariant(_minBytesPerStone > 0); @@ -314,6 +321,12 @@ void WiredTigerRecordStore::OplogStones::setMinBytesPerStone(int64_t size) { void WiredTigerRecordStore::OplogStones::_calculateStones(OperationContext* opCtx, size_t numStonesToKeep) { + const std::uint64_t startWaitTime = curTimeMicros64(); + ON_BLOCK_EXIT([&] { + auto waitTime = curTimeMicros64() - startWaitTime; + log() << "WiredTiger record store oplog processing took " << waitTime / 1000 << "ms"; + _totalTimeProcessing.fetchAndAdd(waitTime); + }); long long numRecords = _rs->numRecords(opCtx); long long dataSize = _rs->dataSize(opCtx); @@ -343,6 +356,7 @@ void WiredTigerRecordStore::OplogStones::_calculateStones(OperationContext* opCt } void WiredTigerRecordStore::OplogStones::_calculateStonesByScanning(OperationContext* opCtx) { + _processBySampling.store(false); // process by scanning log() << "Scanning the oplog to determine where to place markers for truncation"; long long numRecords = 0; @@ -370,6 +384,8 @@ void WiredTigerRecordStore::OplogStones::_calculateStonesByScanning(OperationCon void WiredTigerRecordStore::OplogStones::_calculateStonesBySampling(OperationContext* opCtx, int64_t estRecordsPerStone, int64_t estBytesPerStone) { + log() << "Sampling the oplog to determine where to place markers for truncation"; + _processBySampling.store(true); // process by sampling Timestamp earliestOpTime; Timestamp latestOpTime; @@ -458,10 +474,16 @@ void WiredTigerRecordStore::OplogStones::_pokeReclaimThreadIfNeeded() { void WiredTigerRecordStore::OplogStones::adjust(int64_t maxSize) { stdx::lock_guard<Latch> lk(_mutex); - const unsigned long long kMinStonesToKeep = 10ULL; - const unsigned long long kMaxStonesToKeep = 100ULL; - unsigned long long numStones = maxSize / BSONObjMaxInternalSize; + const unsigned int oplogStoneSize = + std::max(gOplogStoneSizeMB * 1024 * 1024, BSONObjMaxInternalSize); + + // IDL does not support unsigned long long types. + const unsigned long long kMinStonesToKeep = static_cast<unsigned long long>(gMinOplogStones); + const unsigned long long kMaxStonesToKeep = + static_cast<unsigned long long>(gMaxOplogStonesAfterStartup); + + unsigned long long numStones = maxSize / oplogStoneSize; size_t numStonesToKeep = std::min(kMaxStonesToKeep, std::max(kMinStonesToKeep, numStones)); _minBytesPerStone = maxSize / numStonesToKeep; invariant(_minBytesPerStone > 0); @@ -765,6 +787,14 @@ void WiredTigerRecordStore::postConstructorInit(OperationContext* opCtx) { } } +void WiredTigerRecordStore::getOplogTruncateStats(BSONObjBuilder& builder) const { + if (_oplogStones) { + _oplogStones->getOplogStonesStats(builder); + } + builder.append("totalTimeTruncatingMicros", _totalTimeTruncating.load()); + builder.append("truncateCount", _truncateCount.load()); +} + const char* WiredTigerRecordStore::name() const { return _engineName.c_str(); } @@ -1221,7 +1251,11 @@ void WiredTigerRecordStore::reclaimOplog(OperationContext* opCtx, Timestamp mayT LOG(1) << "Finished truncating the oplog, it now contains approximately " << _sizeInfo->numRecords.load() << " records totaling to " << _sizeInfo->dataSize.load() << " bytes"; - log() << "WiredTiger record store oplog truncation finished in: " << timer.millis() << "ms"; + auto elapsedMicros = timer.micros(); + auto elapsedMillis = elapsedMicros / 1000; + _totalTimeTruncating.fetchAndAdd(elapsedMicros); + _truncateCount.fetchAndAdd(1); + log() << "WiredTiger record store oplog truncation finished in: " << elapsedMillis << "ms"; } Status WiredTigerRecordStore::insertRecords(OperationContext* opCtx, diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h index 6d1c4010c3d..99c0b70fb78 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h @@ -118,6 +118,8 @@ public: WiredTigerRecordStore(WiredTigerKVEngine* kvEngine, OperationContext* opCtx, Params params); + virtual void getOplogTruncateStats(BSONObjBuilder& builder) const; + virtual ~WiredTigerRecordStore(); virtual void postConstructorInit(OperationContext* opCtx); @@ -377,6 +379,10 @@ private: // Non-null if this record store is underlying the active oplog. std::shared_ptr<OplogStones> _oplogStones; + + AtomicWord<int64_t> + _totalTimeTruncating; // Cumulative amount of time spent truncating the oplog. + AtomicWord<int64_t> _truncateCount; // Cumulative number of truncates of the oplog. }; diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h index f88334ea85b..9553ee9f56a 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_oplog_stones.h @@ -69,6 +69,11 @@ public: void awaitHasExcessStonesOrDead(); + void getOplogStonesStats(BSONObjBuilder& builder) const { + builder.append("totalTimeProcessingMicros", _totalTimeProcessing.load()); + builder.append("processingMethod", _processBySampling.load() ? "sampling" : "scanning"); + } + boost::optional<OplogStones::Stone> peekOldestStoneIfNeeded() const; void popOldestStone(); @@ -140,8 +145,11 @@ private: // deque of oplog stones. int64_t _minBytesPerStone; - AtomicWord<long long> _currentRecords; // Number of records in the stone being filled. - AtomicWord<long long> _currentBytes; // Number of bytes in the stone being filled. + AtomicWord<long long> _currentRecords; // Number of records in the stone being filled. + AtomicWord<long long> _currentBytes; // Number of bytes in the stone being filled. + AtomicWord<int64_t> _totalTimeProcessing; // Amount of time spent scanning and/or sampling the + // oplog during start up, if any. + AtomicWord<bool> _processBySampling; // Whether the oplog was sampled or scanned. // Protects against concurrent access to the deque of oplog stones. mutable Mutex _mutex = MONGO_MAKE_LATCH("OplogStones::_mutex"); |