summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWill Buerger <will.buerger@mongodb.com>2023-05-03 18:58:03 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-05-03 20:09:50 +0000
commitdffa373f8558c13b52299b187a261a7593189237 (patch)
tree57807b5ee3deebc29b4128b90230aa66ecc4f00f
parent01e26506a0981b4141bc65c9d016666ada60319e (diff)
downloadmongo-dffa373f8558c13b52299b187a261a7593189237.tar.gz
SERVER-76781: Decouple opdebug metric collection from telemetry
-rw-r--r--src/mongo/db/clientcursor.cpp17
-rw-r--r--src/mongo/db/clientcursor.h11
-rw-r--r--src/mongo/db/commands/bulk_write.cpp3
-rw-r--r--src/mongo/db/commands/getmore_cmd.cpp4
-rw-r--r--src/mongo/db/commands/run_aggregate.cpp5
-rw-r--r--src/mongo/db/curop.cpp9
-rw-r--r--src/mongo/db/curop.h17
-rw-r--r--src/mongo/db/query/find.cpp5
-rw-r--r--src/mongo/db/query/telemetry.cpp9
-rw-r--r--src/mongo/db/query/telemetry.h7
-rw-r--r--src/mongo/s/commands/cluster_bulk_write_cmd.cpp3
-rw-r--r--src/mongo/s/query/cluster_aggregation_planner.cpp5
-rw-r--r--src/mongo/s/query/cluster_cursor_manager.cpp23
-rw-r--r--src/mongo/s/query/cluster_cursor_manager.h17
-rw-r--r--src/mongo/s/query/cluster_find.cpp8
-rw-r--r--src/mongo/s/query/store_possible_cursor.cpp7
16 files changed, 61 insertions, 89 deletions
diff --git a/src/mongo/db/clientcursor.cpp b/src/mongo/db/clientcursor.cpp
index 90c3a4581cc..9037096354b 100644
--- a/src/mongo/db/clientcursor.cpp
+++ b/src/mongo/db/clientcursor.cpp
@@ -393,23 +393,14 @@ void startClientCursorMonitor() {
getClientCursorMonitor(getGlobalServiceContext()).go();
}
-void collectTelemetryMongod(OperationContext* opCtx,
- ClientCursorPin& pinnedCursor,
- long long nreturned) {
- auto curOp = CurOp::get(opCtx);
- telemetry::collectMetricsOnOpDebug(curOp, nreturned);
- pinnedCursor->incrementCursorMetrics(curOp->debug().additiveMetrics);
+void collectTelemetryMongod(OperationContext* opCtx, ClientCursorPin& pinnedCursor) {
+ pinnedCursor->incrementCursorMetrics(CurOp::get(opCtx)->debug().additiveMetrics);
}
-void collectTelemetryMongod(OperationContext* opCtx,
- const BSONObj& originatingCommand,
- long long nreturned) {
- auto curOp = CurOp::get(opCtx);
- telemetry::collectMetricsOnOpDebug(curOp, nreturned);
-
+void collectTelemetryMongod(OperationContext* opCtx, const BSONObj& originatingCommand) {
// If we haven't registered a cursor to prepare for getMore requests, we record
// telemetry directly.
- auto& opDebug = curOp->debug();
+ auto& opDebug = CurOp::get(opCtx)->debug();
telemetry::writeTelemetry(
opCtx,
opDebug.telemetryStoreKey,
diff --git a/src/mongo/db/clientcursor.h b/src/mongo/db/clientcursor.h
index 11e7b604ba0..e254f112bce 100644
--- a/src/mongo/db/clientcursor.h
+++ b/src/mongo/db/clientcursor.h
@@ -588,10 +588,13 @@ void startClientCursorMonitor();
* Records certain metrics for the current operation on OpDebug and aggregates those metrics for
* telemetry use. If a cursor pin is provided, metrics are aggregated on the cursor; otherwise,
* metrics are written directly to the telemetry store.
+ * NOTE: Metrics are taken from opDebug.additiveMetrics, so CurOp::setEndOfOpMetrics must be called
+ * *prior* to calling these.
+ *
+ * Currently, telemetry is only collected for find and aggregate requests (and their subsequent
+ * getMore requests), so these should only be called from those request paths.
*/
-void collectTelemetryMongod(OperationContext* opCtx, ClientCursorPin& cursor, long long nreturned);
-void collectTelemetryMongod(OperationContext* opCtx,
- const BSONObj& originatingCommand,
- long long nreturned);
+void collectTelemetryMongod(OperationContext* opCtx, ClientCursorPin& cursor);
+void collectTelemetryMongod(OperationContext* opCtx, const BSONObj& originatingCommand);
} // namespace mongo
diff --git a/src/mongo/db/commands/bulk_write.cpp b/src/mongo/db/commands/bulk_write.cpp
index 0b2379f8897..c45f4b8d68e 100644
--- a/src/mongo/db/commands/bulk_write.cpp
+++ b/src/mongo/db/commands/bulk_write.cpp
@@ -926,9 +926,9 @@ public:
numRepliesInFirstBatch++;
responseSizeTracker.add(nextDoc);
}
+ CurOp::get(opCtx)->setEndOfOpMetrics(numRepliesInFirstBatch);
if (exec->isEOF()) {
invariant(numRepliesInFirstBatch == replies.size());
- collectTelemetryMongod(opCtx, reqObj, numRepliesInFirstBatch);
auto reply = BulkWriteCommandReply(
BulkWriteCommandResponseCursor(
0, std::vector<BulkWriteReplyItem>(std::move(replies))),
@@ -957,7 +957,6 @@ public:
pinnedCursor->incNBatches();
pinnedCursor->incNReturnedSoFar(replies.size());
- collectTelemetryMongod(opCtx, pinnedCursor, numRepliesInFirstBatch);
replies.resize(numRepliesInFirstBatch);
auto reply = BulkWriteCommandReply(
diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp
index 796f4efd621..28a3d8fb43b 100644
--- a/src/mongo/db/commands/getmore_cmd.cpp
+++ b/src/mongo/db/commands/getmore_cmd.cpp
@@ -702,8 +702,8 @@ public:
auto& metricsCollector = ResourceConsumption::MetricsCollector::get(opCtx);
metricsCollector.incrementDocUnitsReturned(curOp->getNS(), docUnitsReturned);
curOp->debug().additiveMetrics.nBatches = 1;
-
- collectTelemetryMongod(opCtx, cursorPin, numResults);
+ curOp->setEndOfOpMetrics(numResults);
+ collectTelemetryMongod(opCtx, cursorPin);
if (respondWithId) {
cursorDeleter.dismiss();
diff --git a/src/mongo/db/commands/run_aggregate.cpp b/src/mongo/db/commands/run_aggregate.cpp
index 7db34d9f041..4b2c535cbc7 100644
--- a/src/mongo/db/commands/run_aggregate.cpp
+++ b/src/mongo/db/commands/run_aggregate.cpp
@@ -1221,11 +1221,12 @@ Status runAggregate(OperationContext* opCtx,
PlanSummaryStats stats;
planExplainer.getSummaryStats(&stats);
curOp->debug().setPlanSummaryMetrics(stats);
+ curOp->setEndOfOpMetrics(stats.nReturned);
if (keepCursor) {
- collectTelemetryMongod(opCtx, pins[0], stats.nReturned);
+ collectTelemetryMongod(opCtx, pins[0]);
} else {
- collectTelemetryMongod(opCtx, cmdObj, stats.nReturned);
+ collectTelemetryMongod(opCtx, cmdObj);
}
// For an optimized away pipeline, signal the cache that a query operation has completed.
diff --git a/src/mongo/db/curop.cpp b/src/mongo/db/curop.cpp
index 731e58f6a12..7601b6c90c0 100644
--- a/src/mongo/db/curop.cpp
+++ b/src/mongo/db/curop.cpp
@@ -337,6 +337,15 @@ void CurOp::setGenericOpRequestDetails(NamespaceString nss,
_nss = std::move(nss);
}
+void CurOp::setEndOfOpMetrics(long long nreturned) {
+ _debug.additiveMetrics.nreturned = nreturned;
+ // executionTime is set with the final executionTime in completeAndLogOperation, but for
+ // telemetry collection we want it set before incrementing cursor metrics using OpDebug's
+ // AdditiveMetrics. The value set here will be overwritten later in
+ // completeAndLogOperation.
+ _debug.additiveMetrics.executionTime = elapsedTimeExcludingPauses();
+}
+
void CurOp::setMessage_inlock(StringData message) {
if (_progressMeter.isActive()) {
LOGV2_ERROR(20527,
diff --git a/src/mongo/db/curop.h b/src/mongo/db/curop.h
index f0012d376a2..e934afe9f68 100644
--- a/src/mongo/db/curop.h
+++ b/src/mongo/db/curop.h
@@ -470,6 +470,13 @@ public:
NetworkOp op);
/**
+ * Sets metrics collected at the end of an operation onto curOp's OpDebug instance. Note that
+ * this is used in tandem with OpDebug::setPlanSummaryMetrics so should not repeat any metrics
+ * collected there.
+ */
+ void setEndOfOpMetrics(long long nreturned);
+
+ /**
* Marks the operation end time, records the length of the client response if a valid response
* exists, and then - subject to the current values of slowMs and sampleRate - logs this CurOp
* to file under the given LogComponent. Returns 'true' if, in addition to being logged, this
@@ -945,16 +952,6 @@ public:
_tickSource = tickSource;
}
- /**
- * Merge match counters from the current operation into the global map and stop counting.
- */
- void stopMatchExprCounter();
-
- /**
- * Increment the counter for the match expression with given name in the current operation.
- */
- void incrementMatchExprCounter(StringData name);
-
private:
class CurOpStack;
diff --git a/src/mongo/db/query/find.cpp b/src/mongo/db/query/find.cpp
index 62d89dd904e..aab6684e772 100644
--- a/src/mongo/db/query/find.cpp
+++ b/src/mongo/db/query/find.cpp
@@ -122,11 +122,12 @@ void endQueryOp(OperationContext* opCtx,
auto&& explainer = exec.getPlanExplainer();
explainer.getSummaryStats(&summaryStats);
curOp->debug().setPlanSummaryMetrics(summaryStats);
+ curOp->setEndOfOpMetrics(numResults);
if (cursor) {
- collectTelemetryMongod(opCtx, *cursor, numResults);
+ collectTelemetryMongod(opCtx, *cursor);
} else {
- collectTelemetryMongod(opCtx, cmdObj, numResults);
+ collectTelemetryMongod(opCtx, cmdObj);
}
if (collection) {
diff --git a/src/mongo/db/query/telemetry.cpp b/src/mongo/db/query/telemetry.cpp
index be688807e5d..c000478a7de 100644
--- a/src/mongo/db/query/telemetry.cpp
+++ b/src/mongo/db/query/telemetry.cpp
@@ -594,14 +594,5 @@ void writeTelemetry(OperationContext* opCtx,
metrics->queryExecMicros.aggregate(queryExecMicros);
metrics->docsReturned.aggregate(docsReturned);
}
-
-void collectMetricsOnOpDebug(CurOp* curOp, long long nreturned) {
- auto&& opDebug = curOp->debug();
- opDebug.additiveMetrics.nreturned = nreturned;
- // executionTime is set with the final executionTime in CurOp::completeAndLogOperation, but for
- // telemetry collection we want it set before incrementing cursor metrics using AdditiveMetrics.
- // The value set here will be overwritten later in CurOp::completeAndLogOperation.
- opDebug.additiveMetrics.executionTime = curOp->elapsedTimeExcludingPauses();
-}
} // namespace telemetry
} // namespace mongo
diff --git a/src/mongo/db/query/telemetry.h b/src/mongo/db/query/telemetry.h
index 539f8420bb7..b49e70762c7 100644
--- a/src/mongo/db/query/telemetry.h
+++ b/src/mongo/db/query/telemetry.h
@@ -236,12 +236,5 @@ BSONObj makeTelemetryKey(const FindCommandRequest& findCommand,
const SerializationOptions& opts,
const boost::intrusive_ptr<ExpressionContext>& expCtx,
boost::optional<const TelemetryMetrics&> existingMetrics = boost::none);
-
-/**
- * Collects metrics for telemetry from the current operation onto OpDebug. This must be called prior
- * to incrementing metrics on cursors (either ClientCursor or ClusterClientCursor) since cursor
- * metric aggregation happens via OpDebug::AdditiveMetrics.
- */
-void collectMetricsOnOpDebug(CurOp* curOp, long long nreturned);
} // namespace telemetry
} // namespace mongo
diff --git a/src/mongo/s/commands/cluster_bulk_write_cmd.cpp b/src/mongo/s/commands/cluster_bulk_write_cmd.cpp
index 9f3ce595590..57dc73fe050 100644
--- a/src/mongo/s/commands/cluster_bulk_write_cmd.cpp
+++ b/src/mongo/s/commands/cluster_bulk_write_cmd.cpp
@@ -171,8 +171,8 @@ public:
numRepliesInFirstBatch++;
responseSizeTracker.add(nextObj);
}
+ CurOp::get(opCtx)->setEndOfOpMetrics(numRepliesInFirstBatch);
if (numRepliesInFirstBatch == replyItems.size()) {
- collectTelemetryMongos(opCtx, reqObj, numRepliesInFirstBatch);
return BulkWriteCommandReply(
BulkWriteCommandResponseCursor(
0, std::vector<BulkWriteReplyItem>(std::move(replyItems))),
@@ -181,7 +181,6 @@ public:
ccc->detachFromOperationContext();
ccc->incNBatches();
- collectTelemetryMongos(opCtx, ccc, numRepliesInFirstBatch);
auto authUser =
AuthorizationSession::get(opCtx->getClient())->getAuthenticatedUserName();
diff --git a/src/mongo/s/query/cluster_aggregation_planner.cpp b/src/mongo/s/query/cluster_aggregation_planner.cpp
index e10721b2915..aab9d729ae7 100644
--- a/src/mongo/s/query/cluster_aggregation_planner.cpp
+++ b/src/mongo/s/query/cluster_aggregation_planner.cpp
@@ -365,10 +365,11 @@ BSONObj establishMergingMongosCursor(OperationContext* opCtx,
opDebug.nShards = std::max(opDebug.nShards, nShards);
opDebug.cursorExhausted = exhausted;
opDebug.additiveMetrics.nBatches = 1;
+ CurOp::get(opCtx)->setEndOfOpMetrics(responseBuilder.numDocs());
if (exhausted) {
- collectTelemetryMongos(opCtx, ccc->getOriginatingCommand(), responseBuilder.numDocs());
+ collectTelemetryMongos(opCtx, ccc->getOriginatingCommand());
} else {
- collectTelemetryMongos(opCtx, ccc, responseBuilder.numDocs());
+ collectTelemetryMongos(opCtx, ccc);
}
ccc->detachFromOperationContext();
diff --git a/src/mongo/s/query/cluster_cursor_manager.cpp b/src/mongo/s/query/cluster_cursor_manager.cpp
index 152c990f163..313f283a5b5 100644
--- a/src/mongo/s/query/cluster_cursor_manager.cpp
+++ b/src/mongo/s/query/cluster_cursor_manager.cpp
@@ -591,12 +591,7 @@ StatusWith<ClusterClientCursorGuard> ClusterCursorManager::_detachCursor(WithLoc
return std::move(cursor);
}
-void collectTelemetryMongos(OperationContext* opCtx,
- const BSONObj& originatingCommand,
- long long nreturned) {
- auto curOp = CurOp::get(opCtx);
- telemetry::collectMetricsOnOpDebug(curOp, nreturned);
-
+void collectTelemetryMongos(OperationContext* opCtx, const BSONObj& originatingCommand) {
// If we haven't registered a cursor to prepare for getMore requests, we record
// telemetry directly.
auto&& opDebug = CurOp::get(opCtx)->debug();
@@ -608,20 +603,12 @@ void collectTelemetryMongos(OperationContext* opCtx,
opDebug.additiveMetrics.nreturned.value_or(0));
}
-void collectTelemetryMongos(OperationContext* opCtx,
- ClusterClientCursorGuard& cursor,
- long long nreturned) {
- auto curOp = CurOp::get(opCtx);
- telemetry::collectMetricsOnOpDebug(curOp, nreturned);
- cursor->incrementCursorMetrics(curOp->debug().additiveMetrics);
+void collectTelemetryMongos(OperationContext* opCtx, ClusterClientCursorGuard& cursor) {
+ cursor->incrementCursorMetrics(CurOp::get(opCtx)->debug().additiveMetrics);
}
-void collectTelemetryMongos(OperationContext* opCtx,
- ClusterCursorManager::PinnedCursor& cursor,
- long long nreturned) {
- auto curOp = CurOp::get(opCtx);
- telemetry::collectMetricsOnOpDebug(curOp, nreturned);
- cursor->incrementCursorMetrics(curOp->debug().additiveMetrics);
+void collectTelemetryMongos(OperationContext* opCtx, ClusterCursorManager::PinnedCursor& cursor) {
+ cursor->incrementCursorMetrics(CurOp::get(opCtx)->debug().additiveMetrics);
}
} // namespace mongo
diff --git a/src/mongo/s/query/cluster_cursor_manager.h b/src/mongo/s/query/cluster_cursor_manager.h
index ec8f9576e40..d194e296b78 100644
--- a/src/mongo/s/query/cluster_cursor_manager.h
+++ b/src/mongo/s/query/cluster_cursor_manager.h
@@ -604,15 +604,14 @@ private:
* use. If a cursor is provided (via ClusterClientCursorGuard or
* ClusterCursorManager::PinnedCursor), metrics are aggregated on the cursor; otherwise, metrics are
* written directly to the telemetry store.
+ * NOTE: Metrics are taken from opDebug.additiveMetrics, so CurOp::setEndOfOpMetrics must be called
+ * *prior* to calling these.
+ *
+ * Currently, telemetry is only collected for find and aggregate requests (and their subsequent
+ * getMore requests), so these should only be called from those request paths.
*/
-void collectTelemetryMongos(OperationContext* opCtx,
- const BSONObj& originatingCommand,
- long long nreturned);
-void collectTelemetryMongos(OperationContext* opCtx,
- ClusterClientCursorGuard& cursor,
- long long nreturned);
-void collectTelemetryMongos(OperationContext* opCtx,
- ClusterCursorManager::PinnedCursor& cursor,
- long long nreturned);
+void collectTelemetryMongos(OperationContext* opCtx, const BSONObj& originatingCommand);
+void collectTelemetryMongos(OperationContext* opCtx, ClusterClientCursorGuard& cursor);
+void collectTelemetryMongos(OperationContext* opCtx, ClusterCursorManager::PinnedCursor& cursor);
} // namespace mongo
diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp
index 7b472f79ff9..332a3641b0e 100644
--- a/src/mongo/s/query/cluster_find.cpp
+++ b/src/mongo/s/query/cluster_find.cpp
@@ -435,6 +435,7 @@ CursorId runQueryWithoutRetrying(OperationContext* opCtx,
*partialResultsReturned = ccc->partialResultsReturned();
}
+ CurOp::get(opCtx)->setEndOfOpMetrics(results->size());
// If the cursor is exhausted, then there are no more results to return and we don't need to
// allocate a cursor id.
if (cursorState == ClusterCursorManager::CursorState::Exhausted) {
@@ -443,7 +444,7 @@ CursorId runQueryWithoutRetrying(OperationContext* opCtx,
if (shardIds.size() > 0) {
updateNumHostsTargetedMetrics(opCtx, cm, shardIds.size());
}
- collectTelemetryMongos(opCtx, ccc->getOriginatingCommand(), results->size());
+ collectTelemetryMongos(opCtx, ccc->getOriginatingCommand());
return CursorId(0);
}
@@ -454,7 +455,7 @@ CursorId runQueryWithoutRetrying(OperationContext* opCtx,
? ClusterCursorManager::CursorLifetime::Immortal
: ClusterCursorManager::CursorLifetime::Mortal;
auto authUser = AuthorizationSession::get(opCtx->getClient())->getAuthenticatedUserName();
- collectTelemetryMongos(opCtx, ccc, results->size());
+ collectTelemetryMongos(opCtx, ccc);
auto cursorId = uassertStatusOK(cursorManager->registerCursor(
opCtx, ccc.releaseCursor(), query.nss(), cursorType, cursorLifetime, authUser));
@@ -918,10 +919,11 @@ StatusWith<CursorResponse> ClusterFind::runGetMore(OperationContext* opCtx,
// Set nReturned and whether the cursor has been exhausted.
opDebug.cursorExhausted = (idToReturn == 0);
opDebug.additiveMetrics.nBatches = 1;
+ CurOp::get(opCtx)->setEndOfOpMetrics(batch.size());
const bool partialResultsReturned = pinnedCursor.getValue()->partialResultsReturned();
pinnedCursor.getValue()->setLeftoverMaxTimeMicros(opCtx->getRemainingMaxTimeMicros());
- collectTelemetryMongos(opCtx, pinnedCursor.getValue(), batch.size());
+ collectTelemetryMongos(opCtx, pinnedCursor.getValue());
// Upon successful completion, transfer ownership of the cursor back to the cursor manager. If
// the cursor has been exhausted, the cursor manager will clean it up for us.
diff --git a/src/mongo/s/query/store_possible_cursor.cpp b/src/mongo/s/query/store_possible_cursor.cpp
index eb24db3e7cd..4336fe6d96c 100644
--- a/src/mongo/s/query/store_possible_cursor.cpp
+++ b/src/mongo/s/query/store_possible_cursor.cpp
@@ -94,12 +94,11 @@ StatusWith<BSONObj> storePossibleCursor(OperationContext* opCtx,
// a split aggregation pipeline, and the shards half of that pipeline may have targeted multiple
// shards. In that case, leave the current value as-is.
opDebug.nShards = std::max(opDebug.nShards, 1);
+ CurOp::get(opCtx)->setEndOfOpMetrics(incomingCursorResponse.getValue().getBatch().size());
if (incomingCursorResponse.getValue().getCursorId() == CursorId(0)) {
opDebug.cursorExhausted = true;
- collectTelemetryMongos(opCtx,
- CurOp::get(opCtx)->opDescription(),
- incomingCursorResponse.getValue().getBatch().size());
+ collectTelemetryMongos(opCtx, CurOp::get(opCtx)->opDescription());
return cmdResult;
}
@@ -131,7 +130,7 @@ StatusWith<BSONObj> storePossibleCursor(OperationContext* opCtx,
}
auto ccc = ClusterClientCursorImpl::make(opCtx, std::move(executor), std::move(params));
- collectTelemetryMongos(opCtx, ccc, incomingCursorResponse.getValue().getBatch().size());
+ collectTelemetryMongos(opCtx, ccc);
// We don't expect to use this cursor until a subsequent getMore, so detach from the current
// OperationContext until then.
ccc->detachFromOperationContext();