diff options
author | Will Buerger <will.buerger@mongodb.com> | 2023-05-03 18:58:03 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-05-03 20:09:50 +0000 |
commit | dffa373f8558c13b52299b187a261a7593189237 (patch) | |
tree | 57807b5ee3deebc29b4128b90230aa66ecc4f00f | |
parent | 01e26506a0981b4141bc65c9d016666ada60319e (diff) | |
download | mongo-dffa373f8558c13b52299b187a261a7593189237.tar.gz |
SERVER-76781: Decouple opdebug metric collection from telemetry
-rw-r--r-- | src/mongo/db/clientcursor.cpp | 17 | ||||
-rw-r--r-- | src/mongo/db/clientcursor.h | 11 | ||||
-rw-r--r-- | src/mongo/db/commands/bulk_write.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/commands/getmore_cmd.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/commands/run_aggregate.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/curop.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/curop.h | 17 | ||||
-rw-r--r-- | src/mongo/db/query/find.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/query/telemetry.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/query/telemetry.h | 7 | ||||
-rw-r--r-- | src/mongo/s/commands/cluster_bulk_write_cmd.cpp | 3 | ||||
-rw-r--r-- | src/mongo/s/query/cluster_aggregation_planner.cpp | 5 | ||||
-rw-r--r-- | src/mongo/s/query/cluster_cursor_manager.cpp | 23 | ||||
-rw-r--r-- | src/mongo/s/query/cluster_cursor_manager.h | 17 | ||||
-rw-r--r-- | src/mongo/s/query/cluster_find.cpp | 8 | ||||
-rw-r--r-- | src/mongo/s/query/store_possible_cursor.cpp | 7 |
16 files changed, 61 insertions, 89 deletions
diff --git a/src/mongo/db/clientcursor.cpp b/src/mongo/db/clientcursor.cpp index 90c3a4581cc..9037096354b 100644 --- a/src/mongo/db/clientcursor.cpp +++ b/src/mongo/db/clientcursor.cpp @@ -393,23 +393,14 @@ void startClientCursorMonitor() { getClientCursorMonitor(getGlobalServiceContext()).go(); } -void collectTelemetryMongod(OperationContext* opCtx, - ClientCursorPin& pinnedCursor, - long long nreturned) { - auto curOp = CurOp::get(opCtx); - telemetry::collectMetricsOnOpDebug(curOp, nreturned); - pinnedCursor->incrementCursorMetrics(curOp->debug().additiveMetrics); +void collectTelemetryMongod(OperationContext* opCtx, ClientCursorPin& pinnedCursor) { + pinnedCursor->incrementCursorMetrics(CurOp::get(opCtx)->debug().additiveMetrics); } -void collectTelemetryMongod(OperationContext* opCtx, - const BSONObj& originatingCommand, - long long nreturned) { - auto curOp = CurOp::get(opCtx); - telemetry::collectMetricsOnOpDebug(curOp, nreturned); - +void collectTelemetryMongod(OperationContext* opCtx, const BSONObj& originatingCommand) { // If we haven't registered a cursor to prepare for getMore requests, we record // telemetry directly. - auto& opDebug = curOp->debug(); + auto& opDebug = CurOp::get(opCtx)->debug(); telemetry::writeTelemetry( opCtx, opDebug.telemetryStoreKey, diff --git a/src/mongo/db/clientcursor.h b/src/mongo/db/clientcursor.h index 11e7b604ba0..e254f112bce 100644 --- a/src/mongo/db/clientcursor.h +++ b/src/mongo/db/clientcursor.h @@ -588,10 +588,13 @@ void startClientCursorMonitor(); * Records certain metrics for the current operation on OpDebug and aggregates those metrics for * telemetry use. If a cursor pin is provided, metrics are aggregated on the cursor; otherwise, * metrics are written directly to the telemetry store. + * NOTE: Metrics are taken from opDebug.additiveMetrics, so CurOp::setEndOfOpMetrics must be called + * *prior* to calling these. + * + * Currently, telemetry is only collected for find and aggregate requests (and their subsequent + * getMore requests), so these should only be called from those request paths. */ -void collectTelemetryMongod(OperationContext* opCtx, ClientCursorPin& cursor, long long nreturned); -void collectTelemetryMongod(OperationContext* opCtx, - const BSONObj& originatingCommand, - long long nreturned); +void collectTelemetryMongod(OperationContext* opCtx, ClientCursorPin& cursor); +void collectTelemetryMongod(OperationContext* opCtx, const BSONObj& originatingCommand); } // namespace mongo diff --git a/src/mongo/db/commands/bulk_write.cpp b/src/mongo/db/commands/bulk_write.cpp index 0b2379f8897..c45f4b8d68e 100644 --- a/src/mongo/db/commands/bulk_write.cpp +++ b/src/mongo/db/commands/bulk_write.cpp @@ -926,9 +926,9 @@ public: numRepliesInFirstBatch++; responseSizeTracker.add(nextDoc); } + CurOp::get(opCtx)->setEndOfOpMetrics(numRepliesInFirstBatch); if (exec->isEOF()) { invariant(numRepliesInFirstBatch == replies.size()); - collectTelemetryMongod(opCtx, reqObj, numRepliesInFirstBatch); auto reply = BulkWriteCommandReply( BulkWriteCommandResponseCursor( 0, std::vector<BulkWriteReplyItem>(std::move(replies))), @@ -957,7 +957,6 @@ public: pinnedCursor->incNBatches(); pinnedCursor->incNReturnedSoFar(replies.size()); - collectTelemetryMongod(opCtx, pinnedCursor, numRepliesInFirstBatch); replies.resize(numRepliesInFirstBatch); auto reply = BulkWriteCommandReply( diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp index 796f4efd621..28a3d8fb43b 100644 --- a/src/mongo/db/commands/getmore_cmd.cpp +++ b/src/mongo/db/commands/getmore_cmd.cpp @@ -702,8 +702,8 @@ public: auto& metricsCollector = ResourceConsumption::MetricsCollector::get(opCtx); metricsCollector.incrementDocUnitsReturned(curOp->getNS(), docUnitsReturned); curOp->debug().additiveMetrics.nBatches = 1; - - collectTelemetryMongod(opCtx, cursorPin, numResults); + curOp->setEndOfOpMetrics(numResults); + collectTelemetryMongod(opCtx, cursorPin); if (respondWithId) { cursorDeleter.dismiss(); diff --git a/src/mongo/db/commands/run_aggregate.cpp b/src/mongo/db/commands/run_aggregate.cpp index 7db34d9f041..4b2c535cbc7 100644 --- a/src/mongo/db/commands/run_aggregate.cpp +++ b/src/mongo/db/commands/run_aggregate.cpp @@ -1221,11 +1221,12 @@ Status runAggregate(OperationContext* opCtx, PlanSummaryStats stats; planExplainer.getSummaryStats(&stats); curOp->debug().setPlanSummaryMetrics(stats); + curOp->setEndOfOpMetrics(stats.nReturned); if (keepCursor) { - collectTelemetryMongod(opCtx, pins[0], stats.nReturned); + collectTelemetryMongod(opCtx, pins[0]); } else { - collectTelemetryMongod(opCtx, cmdObj, stats.nReturned); + collectTelemetryMongod(opCtx, cmdObj); } // For an optimized away pipeline, signal the cache that a query operation has completed. diff --git a/src/mongo/db/curop.cpp b/src/mongo/db/curop.cpp index 731e58f6a12..7601b6c90c0 100644 --- a/src/mongo/db/curop.cpp +++ b/src/mongo/db/curop.cpp @@ -337,6 +337,15 @@ void CurOp::setGenericOpRequestDetails(NamespaceString nss, _nss = std::move(nss); } +void CurOp::setEndOfOpMetrics(long long nreturned) { + _debug.additiveMetrics.nreturned = nreturned; + // executionTime is set with the final executionTime in completeAndLogOperation, but for + // telemetry collection we want it set before incrementing cursor metrics using OpDebug's + // AdditiveMetrics. The value set here will be overwritten later in + // completeAndLogOperation. + _debug.additiveMetrics.executionTime = elapsedTimeExcludingPauses(); +} + void CurOp::setMessage_inlock(StringData message) { if (_progressMeter.isActive()) { LOGV2_ERROR(20527, diff --git a/src/mongo/db/curop.h b/src/mongo/db/curop.h index f0012d376a2..e934afe9f68 100644 --- a/src/mongo/db/curop.h +++ b/src/mongo/db/curop.h @@ -470,6 +470,13 @@ public: NetworkOp op); /** + * Sets metrics collected at the end of an operation onto curOp's OpDebug instance. Note that + * this is used in tandem with OpDebug::setPlanSummaryMetrics so should not repeat any metrics + * collected there. + */ + void setEndOfOpMetrics(long long nreturned); + + /** * Marks the operation end time, records the length of the client response if a valid response * exists, and then - subject to the current values of slowMs and sampleRate - logs this CurOp * to file under the given LogComponent. Returns 'true' if, in addition to being logged, this @@ -945,16 +952,6 @@ public: _tickSource = tickSource; } - /** - * Merge match counters from the current operation into the global map and stop counting. - */ - void stopMatchExprCounter(); - - /** - * Increment the counter for the match expression with given name in the current operation. - */ - void incrementMatchExprCounter(StringData name); - private: class CurOpStack; diff --git a/src/mongo/db/query/find.cpp b/src/mongo/db/query/find.cpp index 62d89dd904e..aab6684e772 100644 --- a/src/mongo/db/query/find.cpp +++ b/src/mongo/db/query/find.cpp @@ -122,11 +122,12 @@ void endQueryOp(OperationContext* opCtx, auto&& explainer = exec.getPlanExplainer(); explainer.getSummaryStats(&summaryStats); curOp->debug().setPlanSummaryMetrics(summaryStats); + curOp->setEndOfOpMetrics(numResults); if (cursor) { - collectTelemetryMongod(opCtx, *cursor, numResults); + collectTelemetryMongod(opCtx, *cursor); } else { - collectTelemetryMongod(opCtx, cmdObj, numResults); + collectTelemetryMongod(opCtx, cmdObj); } if (collection) { diff --git a/src/mongo/db/query/telemetry.cpp b/src/mongo/db/query/telemetry.cpp index be688807e5d..c000478a7de 100644 --- a/src/mongo/db/query/telemetry.cpp +++ b/src/mongo/db/query/telemetry.cpp @@ -594,14 +594,5 @@ void writeTelemetry(OperationContext* opCtx, metrics->queryExecMicros.aggregate(queryExecMicros); metrics->docsReturned.aggregate(docsReturned); } - -void collectMetricsOnOpDebug(CurOp* curOp, long long nreturned) { - auto&& opDebug = curOp->debug(); - opDebug.additiveMetrics.nreturned = nreturned; - // executionTime is set with the final executionTime in CurOp::completeAndLogOperation, but for - // telemetry collection we want it set before incrementing cursor metrics using AdditiveMetrics. - // The value set here will be overwritten later in CurOp::completeAndLogOperation. - opDebug.additiveMetrics.executionTime = curOp->elapsedTimeExcludingPauses(); -} } // namespace telemetry } // namespace mongo diff --git a/src/mongo/db/query/telemetry.h b/src/mongo/db/query/telemetry.h index 539f8420bb7..b49e70762c7 100644 --- a/src/mongo/db/query/telemetry.h +++ b/src/mongo/db/query/telemetry.h @@ -236,12 +236,5 @@ BSONObj makeTelemetryKey(const FindCommandRequest& findCommand, const SerializationOptions& opts, const boost::intrusive_ptr<ExpressionContext>& expCtx, boost::optional<const TelemetryMetrics&> existingMetrics = boost::none); - -/** - * Collects metrics for telemetry from the current operation onto OpDebug. This must be called prior - * to incrementing metrics on cursors (either ClientCursor or ClusterClientCursor) since cursor - * metric aggregation happens via OpDebug::AdditiveMetrics. - */ -void collectMetricsOnOpDebug(CurOp* curOp, long long nreturned); } // namespace telemetry } // namespace mongo diff --git a/src/mongo/s/commands/cluster_bulk_write_cmd.cpp b/src/mongo/s/commands/cluster_bulk_write_cmd.cpp index 9f3ce595590..57dc73fe050 100644 --- a/src/mongo/s/commands/cluster_bulk_write_cmd.cpp +++ b/src/mongo/s/commands/cluster_bulk_write_cmd.cpp @@ -171,8 +171,8 @@ public: numRepliesInFirstBatch++; responseSizeTracker.add(nextObj); } + CurOp::get(opCtx)->setEndOfOpMetrics(numRepliesInFirstBatch); if (numRepliesInFirstBatch == replyItems.size()) { - collectTelemetryMongos(opCtx, reqObj, numRepliesInFirstBatch); return BulkWriteCommandReply( BulkWriteCommandResponseCursor( 0, std::vector<BulkWriteReplyItem>(std::move(replyItems))), @@ -181,7 +181,6 @@ public: ccc->detachFromOperationContext(); ccc->incNBatches(); - collectTelemetryMongos(opCtx, ccc, numRepliesInFirstBatch); auto authUser = AuthorizationSession::get(opCtx->getClient())->getAuthenticatedUserName(); diff --git a/src/mongo/s/query/cluster_aggregation_planner.cpp b/src/mongo/s/query/cluster_aggregation_planner.cpp index e10721b2915..aab9d729ae7 100644 --- a/src/mongo/s/query/cluster_aggregation_planner.cpp +++ b/src/mongo/s/query/cluster_aggregation_planner.cpp @@ -365,10 +365,11 @@ BSONObj establishMergingMongosCursor(OperationContext* opCtx, opDebug.nShards = std::max(opDebug.nShards, nShards); opDebug.cursorExhausted = exhausted; opDebug.additiveMetrics.nBatches = 1; + CurOp::get(opCtx)->setEndOfOpMetrics(responseBuilder.numDocs()); if (exhausted) { - collectTelemetryMongos(opCtx, ccc->getOriginatingCommand(), responseBuilder.numDocs()); + collectTelemetryMongos(opCtx, ccc->getOriginatingCommand()); } else { - collectTelemetryMongos(opCtx, ccc, responseBuilder.numDocs()); + collectTelemetryMongos(opCtx, ccc); } ccc->detachFromOperationContext(); diff --git a/src/mongo/s/query/cluster_cursor_manager.cpp b/src/mongo/s/query/cluster_cursor_manager.cpp index 152c990f163..313f283a5b5 100644 --- a/src/mongo/s/query/cluster_cursor_manager.cpp +++ b/src/mongo/s/query/cluster_cursor_manager.cpp @@ -591,12 +591,7 @@ StatusWith<ClusterClientCursorGuard> ClusterCursorManager::_detachCursor(WithLoc return std::move(cursor); } -void collectTelemetryMongos(OperationContext* opCtx, - const BSONObj& originatingCommand, - long long nreturned) { - auto curOp = CurOp::get(opCtx); - telemetry::collectMetricsOnOpDebug(curOp, nreturned); - +void collectTelemetryMongos(OperationContext* opCtx, const BSONObj& originatingCommand) { // If we haven't registered a cursor to prepare for getMore requests, we record // telemetry directly. auto&& opDebug = CurOp::get(opCtx)->debug(); @@ -608,20 +603,12 @@ void collectTelemetryMongos(OperationContext* opCtx, opDebug.additiveMetrics.nreturned.value_or(0)); } -void collectTelemetryMongos(OperationContext* opCtx, - ClusterClientCursorGuard& cursor, - long long nreturned) { - auto curOp = CurOp::get(opCtx); - telemetry::collectMetricsOnOpDebug(curOp, nreturned); - cursor->incrementCursorMetrics(curOp->debug().additiveMetrics); +void collectTelemetryMongos(OperationContext* opCtx, ClusterClientCursorGuard& cursor) { + cursor->incrementCursorMetrics(CurOp::get(opCtx)->debug().additiveMetrics); } -void collectTelemetryMongos(OperationContext* opCtx, - ClusterCursorManager::PinnedCursor& cursor, - long long nreturned) { - auto curOp = CurOp::get(opCtx); - telemetry::collectMetricsOnOpDebug(curOp, nreturned); - cursor->incrementCursorMetrics(curOp->debug().additiveMetrics); +void collectTelemetryMongos(OperationContext* opCtx, ClusterCursorManager::PinnedCursor& cursor) { + cursor->incrementCursorMetrics(CurOp::get(opCtx)->debug().additiveMetrics); } } // namespace mongo diff --git a/src/mongo/s/query/cluster_cursor_manager.h b/src/mongo/s/query/cluster_cursor_manager.h index ec8f9576e40..d194e296b78 100644 --- a/src/mongo/s/query/cluster_cursor_manager.h +++ b/src/mongo/s/query/cluster_cursor_manager.h @@ -604,15 +604,14 @@ private: * use. If a cursor is provided (via ClusterClientCursorGuard or * ClusterCursorManager::PinnedCursor), metrics are aggregated on the cursor; otherwise, metrics are * written directly to the telemetry store. + * NOTE: Metrics are taken from opDebug.additiveMetrics, so CurOp::setEndOfOpMetrics must be called + * *prior* to calling these. + * + * Currently, telemetry is only collected for find and aggregate requests (and their subsequent + * getMore requests), so these should only be called from those request paths. */ -void collectTelemetryMongos(OperationContext* opCtx, - const BSONObj& originatingCommand, - long long nreturned); -void collectTelemetryMongos(OperationContext* opCtx, - ClusterClientCursorGuard& cursor, - long long nreturned); -void collectTelemetryMongos(OperationContext* opCtx, - ClusterCursorManager::PinnedCursor& cursor, - long long nreturned); +void collectTelemetryMongos(OperationContext* opCtx, const BSONObj& originatingCommand); +void collectTelemetryMongos(OperationContext* opCtx, ClusterClientCursorGuard& cursor); +void collectTelemetryMongos(OperationContext* opCtx, ClusterCursorManager::PinnedCursor& cursor); } // namespace mongo diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp index 7b472f79ff9..332a3641b0e 100644 --- a/src/mongo/s/query/cluster_find.cpp +++ b/src/mongo/s/query/cluster_find.cpp @@ -435,6 +435,7 @@ CursorId runQueryWithoutRetrying(OperationContext* opCtx, *partialResultsReturned = ccc->partialResultsReturned(); } + CurOp::get(opCtx)->setEndOfOpMetrics(results->size()); // If the cursor is exhausted, then there are no more results to return and we don't need to // allocate a cursor id. if (cursorState == ClusterCursorManager::CursorState::Exhausted) { @@ -443,7 +444,7 @@ CursorId runQueryWithoutRetrying(OperationContext* opCtx, if (shardIds.size() > 0) { updateNumHostsTargetedMetrics(opCtx, cm, shardIds.size()); } - collectTelemetryMongos(opCtx, ccc->getOriginatingCommand(), results->size()); + collectTelemetryMongos(opCtx, ccc->getOriginatingCommand()); return CursorId(0); } @@ -454,7 +455,7 @@ CursorId runQueryWithoutRetrying(OperationContext* opCtx, ? ClusterCursorManager::CursorLifetime::Immortal : ClusterCursorManager::CursorLifetime::Mortal; auto authUser = AuthorizationSession::get(opCtx->getClient())->getAuthenticatedUserName(); - collectTelemetryMongos(opCtx, ccc, results->size()); + collectTelemetryMongos(opCtx, ccc); auto cursorId = uassertStatusOK(cursorManager->registerCursor( opCtx, ccc.releaseCursor(), query.nss(), cursorType, cursorLifetime, authUser)); @@ -918,10 +919,11 @@ StatusWith<CursorResponse> ClusterFind::runGetMore(OperationContext* opCtx, // Set nReturned and whether the cursor has been exhausted. opDebug.cursorExhausted = (idToReturn == 0); opDebug.additiveMetrics.nBatches = 1; + CurOp::get(opCtx)->setEndOfOpMetrics(batch.size()); const bool partialResultsReturned = pinnedCursor.getValue()->partialResultsReturned(); pinnedCursor.getValue()->setLeftoverMaxTimeMicros(opCtx->getRemainingMaxTimeMicros()); - collectTelemetryMongos(opCtx, pinnedCursor.getValue(), batch.size()); + collectTelemetryMongos(opCtx, pinnedCursor.getValue()); // Upon successful completion, transfer ownership of the cursor back to the cursor manager. If // the cursor has been exhausted, the cursor manager will clean it up for us. diff --git a/src/mongo/s/query/store_possible_cursor.cpp b/src/mongo/s/query/store_possible_cursor.cpp index eb24db3e7cd..4336fe6d96c 100644 --- a/src/mongo/s/query/store_possible_cursor.cpp +++ b/src/mongo/s/query/store_possible_cursor.cpp @@ -94,12 +94,11 @@ StatusWith<BSONObj> storePossibleCursor(OperationContext* opCtx, // a split aggregation pipeline, and the shards half of that pipeline may have targeted multiple // shards. In that case, leave the current value as-is. opDebug.nShards = std::max(opDebug.nShards, 1); + CurOp::get(opCtx)->setEndOfOpMetrics(incomingCursorResponse.getValue().getBatch().size()); if (incomingCursorResponse.getValue().getCursorId() == CursorId(0)) { opDebug.cursorExhausted = true; - collectTelemetryMongos(opCtx, - CurOp::get(opCtx)->opDescription(), - incomingCursorResponse.getValue().getBatch().size()); + collectTelemetryMongos(opCtx, CurOp::get(opCtx)->opDescription()); return cmdResult; } @@ -131,7 +130,7 @@ StatusWith<BSONObj> storePossibleCursor(OperationContext* opCtx, } auto ccc = ClusterClientCursorImpl::make(opCtx, std::move(executor), std::move(params)); - collectTelemetryMongos(opCtx, ccc, incomingCursorResponse.getValue().getBatch().size()); + collectTelemetryMongos(opCtx, ccc); // We don't expect to use this cursor until a subsequent getMore, so detach from the current // OperationContext until then. ccc->detachFromOperationContext(); |