diff options
author | Alyssa Wagenmaker <alyssa.wagenmaker@mongodb.com> | 2023-04-03 19:28:53 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-04-03 22:07:16 +0000 |
commit | 51e2b07895accc3e412f7f1d3ccc323451ea63fe (patch) | |
tree | e4022d3a75c54f52be8a65308aa41a5a10d57482 | |
parent | 140387a6925f88990dfea22429228bd89da04e02 (diff) | |
download | mongo-51e2b07895accc3e412f7f1d3ccc323451ea63fe.tar.gz |
SERVER-75062 Allow telemetry sampling rate to be adjusted at runtime
-rw-r--r-- | jstests/noPassthrough/telemetry/telemetry_sampling_rate.js | 11 | ||||
-rw-r--r-- | jstests/noPassthroughWithMongod/telemetry_configuration.js | 31 | ||||
-rw-r--r-- | src/mongo/db/query/query_knobs.idl | 3 | ||||
-rw-r--r-- | src/mongo/db/query/rate_limiting.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/query/rate_limiting.h | 11 | ||||
-rw-r--r-- | src/mongo/db/query/telemetry.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/query/telemetry_util.cpp | 12 | ||||
-rw-r--r-- | src/mongo/db/query/telemetry_util.h | 13 |
8 files changed, 66 insertions, 23 deletions
diff --git a/jstests/noPassthrough/telemetry/telemetry_sampling_rate.js b/jstests/noPassthrough/telemetry/telemetry_sampling_rate.js index 823991d652b..1bada398a03 100644 --- a/jstests/noPassthrough/telemetry/telemetry_sampling_rate.js +++ b/jstests/noPassthrough/telemetry/telemetry_sampling_rate.js @@ -26,12 +26,13 @@ coll.aggregate([{$match: {foo: 1}}], {cursor: {batchSize: 2}}); let telStore = testdb.adminCommand({aggregate: 1, pipeline: [{$telemetry: {}}], cursor: {}}); assert.eq(telStore.cursor.firstBatch.length, 0); -// TODO SERVER-71531 enable below test. // Reading telemetry store should work now with a sampling rate of greater than 0. -// assert.commandWorked(testdb.adminCommand({setParameter: 1, -// internalQueryConfigureTelemetrySamplingRate: 2147483647})); coll.aggregate([{$match: {foo: 1}}], -// {cursor: {batchSize: 2}}); assert.commandWorked(testdb.adminCommand({aggregate: 1, pipeline: -// [{$telemetry: {}}], cursor: {}})); +assert.commandWorked(testdb.adminCommand( + {setParameter: 1, internalQueryConfigureTelemetrySamplingRate: 2147483647})); +coll.aggregate([{$match: {foo: 1}}], {cursor: {batchSize: 2}}); +telStore = assert.commandWorked( + testdb.adminCommand({aggregate: 1, pipeline: [{$telemetry: {}}], cursor: {}})); +assert.eq(telStore.cursor.firstBatch.length, 1); MongoRunner.stopMongod(conn); }()); diff --git a/jstests/noPassthroughWithMongod/telemetry_configuration.js b/jstests/noPassthroughWithMongod/telemetry_configuration.js index 8d0b6ed68b5..0ae4e8408c3 100644 --- a/jstests/noPassthroughWithMongod/telemetry_configuration.js +++ b/jstests/noPassthroughWithMongod/telemetry_configuration.js @@ -8,25 +8,28 @@ load("jstests/libs/feature_flag_util.js"); if (FeatureFlagUtil.isEnabled(db, "Telemetry")) { - // The feature flag is enabled - make sure the telemetry store can be configured. - const original = assert.commandWorked( - db.adminCommand({getParameter: 1, internalQueryConfigureTelemetryCacheSize: 1})); - assert(original.hasOwnProperty("internalQueryConfigureTelemetryCacheSize"), original); - const originalValue = original.internalQueryConfigureTelemetryCacheSize; - try { - assert.doesNotThrow( - () => db.adminCommand( - {setParameter: 1, internalQueryConfigureTelemetryCacheSize: '2MB'})); - // Other tests verify that resizing actually affects the data structure size. - } finally { - assert.doesNotThrow( - () => db.adminCommand( - {setParameter: 1, internalQueryConfigureTelemetryCacheSize: originalValue})); + function testTelemetrySetting(paramName, paramValue) { + // The feature flag is enabled - make sure the telemetry store can be configured. + const original = assert.commandWorked(db.adminCommand({getParameter: 1, [paramName]: 1})); + assert(original.hasOwnProperty(paramName), original); + const originalValue = original[paramName]; + try { + assert.doesNotThrow(() => db.adminCommand({setParameter: 1, [paramName]: paramValue})); + // Other tests verify that changing the parameter actually affects the behavior. + } finally { + assert.doesNotThrow(() => + db.adminCommand({setParameter: 1, [paramName]: originalValue})); + } } + testTelemetrySetting("internalQueryConfigureTelemetryCacheSize", "2MB"); + testTelemetrySetting("internalQueryConfigureTelemetrySamplingRate", 2147483647); } else { // The feature flag is disabled - make sure the telemetry store *cannot* be configured. assert.commandFailedWithCode( db.adminCommand({setParameter: 1, internalQueryConfigureTelemetryCacheSize: '2MB'}), 7373500); + assert.commandFailedWithCode( + db.adminCommand({setParameter: 1, internalQueryConfigureTelemetrySamplingRate: 2147483647}), + 7506200); } }()); diff --git a/src/mongo/db/query/query_knobs.idl b/src/mongo/db/query/query_knobs.idl index 9188a8b7de6..b64a1184cc2 100644 --- a/src/mongo/db/query/query_knobs.idl +++ b/src/mongo/db/query/query_knobs.idl @@ -1026,6 +1026,9 @@ server_parameters: cpp_varname: "queryTelemetrySamplingRate" cpp_vartype: AtomicWord<int> default: 0 + validator: + gte: 0 + on_update: telemetry_util::onTelemetrySamplingRateUpdate internalQueryConfigureTelemetryCacheSize: description: "The maximum amount of memory that the system will allocate for the query telemetry diff --git a/src/mongo/db/query/rate_limiting.cpp b/src/mongo/db/query/rate_limiting.cpp index 44c94ead313..3aee39facae 100644 --- a/src/mongo/db/query/rate_limiting.cpp +++ b/src/mongo/db/query/rate_limiting.cpp @@ -54,7 +54,7 @@ bool RateLimiting::handleRequestFixedWindow() { stdx::unique_lock windowLock{_windowMutex}; tickWindow(); - if (_currentCount < _samplingRate) { + if (_currentCount < _samplingRate.load()) { _currentCount += 1; return true; } @@ -83,7 +83,7 @@ bool RateLimiting::handleRequestSlidingWindow() { // Add this estimate to the requests we know have taken place within the current time block. double estimatedCount = _currentCount + estimatedRemaining; - if (estimatedCount < _samplingRate) { + if (estimatedCount < _samplingRate.load()) { _currentCount += 1; return true; } diff --git a/src/mongo/db/query/rate_limiting.h b/src/mongo/db/query/rate_limiting.h index 1083562ae06..67b8a7fc106 100644 --- a/src/mongo/db/query/rate_limiting.h +++ b/src/mongo/db/query/rate_limiting.h @@ -53,7 +53,14 @@ public: * Getter for the sampling rate. */ RequestCount getSamplingRate() { - return _samplingRate; + return _samplingRate.load(); + } + + /* + * Setter for the sampling rate. + */ + void setSamplingRate(RequestCount samplingRate) { + _samplingRate.store(samplingRate); } /* @@ -86,7 +93,7 @@ private: /* * Sampling rate is the bound on the number of requests we want to admit per window. */ - const RequestCount _samplingRate; + AtomicWord<RequestCount> _samplingRate; /* * Time period is the window size in ms. diff --git a/src/mongo/db/query/telemetry.cpp b/src/mongo/db/query/telemetry.cpp index ffab6e36ba4..ad1e4d45e44 100644 --- a/src/mongo/db/query/telemetry.cpp +++ b/src/mongo/db/query/telemetry.cpp @@ -418,6 +418,10 @@ public: size_t numEvicted = telemetryStore.reset(cappedSize); telemetryEvictedMetric.increment(numEvicted); } + + void updateSamplingRate(ServiceContext* serviceCtx, int samplingRate) { + telemetryRateLimiter(serviceCtx).get()->setSamplingRate(samplingRate); + } }; ServiceContext::ConstructorActionRegisterer telemetryStoreManagerRegisterer{ diff --git a/src/mongo/db/query/telemetry_util.cpp b/src/mongo/db/query/telemetry_util.cpp index 7badacea8c1..f3918d7212a 100644 --- a/src/mongo/db/query/telemetry_util.cpp +++ b/src/mongo/db/query/telemetry_util.cpp @@ -79,6 +79,18 @@ Status validateTelemetryStoreSize(const std::string& str, const boost::optional< return memory_util::MemorySize::parse(str).getStatus(); } +Status onTelemetrySamplingRateUpdate(int samplingRate) { + // The client is nullptr if the parameter is supplied from the command line. In this case, we + // ignore the update event, the parameter will be processed when initializing the service + // context. + if (auto client = Client::getCurrent()) { + auto&& [serviceCtx, updater] = getUpdater(*client); + updater->updateSamplingRate(serviceCtx, samplingRate); + } + + return Status::OK(); +} + const Decorable<ServiceContext>::Decoration<std::unique_ptr<OnParamChangeUpdater>> telemetryStoreOnParamChangeUpdater = ServiceContext::declareDecoration<std::unique_ptr<OnParamChangeUpdater>>(); diff --git a/src/mongo/db/query/telemetry_util.h b/src/mongo/db/query/telemetry_util.h index 133d45f1338..c8fc37dc5c4 100644 --- a/src/mongo/db/query/telemetry_util.h +++ b/src/mongo/db/query/telemetry_util.h @@ -42,6 +42,8 @@ Status onTelemetryStoreSizeUpdate(const std::string& str); Status validateTelemetryStoreSize(const std::string& str, const boost::optional<TenantId>&); +Status onTelemetrySamplingRateUpdate(int samplingRate); + /** * An interface used to modify the telemetry store when query setParameters are modified. This is * done via an interface decorating the 'ServiceContext' in order to avoid a link-time dependency @@ -57,6 +59,11 @@ public: * cache fits within the new size bound. */ virtual void updateCacheSize(ServiceContext* serviceCtx, memory_util::MemorySize memSize) = 0; + + /** + * Updates the sampling rate for the telemetry rate limiter. + */ + virtual void updateSamplingRate(ServiceContext* serviceCtx, int samplingRate) = 0; }; /** @@ -70,6 +77,12 @@ public: "Cannot configure telemetry store - it is currently disabled and a restart is " "required to activate."); } + + void updateSamplingRate(ServiceContext* serviceCtx, int samplingRate) { + uasserted(7506200, + "Cannot configure telemetry store - it is currently disabled and a restart is " + "required to activate."); + } }; /** |