SERVER-47848 Don't assume pipeline needs textScore, unless needsMerge

author: David Percy <david.percy@mongodb.com> 2020-05-01 21:23:45 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2020-05-12 23:12:41 +0000
commit: 2a599c879c0775773ac4f67ef148a0838a5e9898 (patch)
tree: 4b85c129c1832665087e4c1351f35f00961afe78
parent: 29e8bc79aa7d333bec3dfaa0f4a6d003c8a50e14 (diff)
download: mongo-2a599c879c0775773ac4f67ef148a0838a5e9898.tar.gz
3 files changed, 61 insertions, 14 deletions
diff --git a/jstests/aggregation/bugs/server47848.js b/jstests/aggregation/bugs/server47848.js
new file mode 100644
index 00000000000..cf36b6958a0
--- /dev/null
+++ b/jstests/aggregation/bugs/server47848.js
@@ -0,0 +1,35 @@
+// Ensures TEXT_OR stage is replaced with OR when possible.
+// @tags: [
+//   # We don't try to replace TEXT_OR with OR when the results are consumed by a merging node,
+//   # because the shard doesn't know whether the merger needs the textScore metadata.
+//   assumes_unsharded_collection,
+// ]
+(function() {
+'use strict';
+
+load("jstests/libs/analyze_plan.js");
+
+const coll = db.server47848;
+assert.commandWorked(coll.createIndex({"$**": "text"}));
+
+const findExplain = coll.find({$text: {$search: 'banana'}}).explain();
+const aggExplain = coll.explain().aggregate({$match: {$text: {$search: 'banana'}}});
+
+// The .find() plan doesn't have TEXT_OR, it has OR instead.
+// Both kinds of stages deduplicate record IDs, but OR is better because OR is streaming
+// while TEXT_OR is blocking. TEXT_OR is blocking because it has to accumulate the textScore
+// of each record ID, so when textScore is unused we can use the more efficient OR.
+assert(planHasStage(db, findExplain, 'OR'), findExplain);
+assert(!planHasStage(db, findExplain, 'TEXT_OR'), findExplain);
+
+// The same optimization should apply to the equivalent aggregation query. Before SERVER-47848 we
+// failed to apply this optimization because we were too conservative about dependency tracking.
+// Specifically, before SERVER-47848 we assumed that the consumer of a pipeline might depend on
+// textScore if all the stages preserve it. We made that assumption because in a sharded cluster,
+// when a merger sends the shardsPart of a pipeline to the shards, it doesn't specify whether it
+// depends on textScore. After SERVER-47848, we refined that assumption: the shard can tell whether
+// the consumer is a merger by looking at needsMerge. If the consumer is not a merger, there won't
+// be an implicit dependency on textScore.
+assert(planHasStage(db, aggExplain, 'OR'), aggExplain);
+assert(!planHasStage(db, aggExplain, 'TEXT_OR'), aggExplain);
+})();
diff --git a/src/mongo/db/pipeline/pipeline.cpp b/src/mongo/db/pipeline/pipeline.cpp
index 156450b961f..f3fc921a6ee 100644
--- a/src/mongo/db/pipeline/pipeline.cpp
+++ b/src/mongo/db/pipeline/pipeline.cpp
@@ -537,13 +537,14 @@ DepsTracker Pipeline::getDependencies(QueryMetadataBitSet unavailableMetadata) c
         deps.needWholeDocument = true;  // don't know all fields we need
 
     if (!unavailableMetadata[DocumentMetadataFields::kTextScore]) {
-        // If there is a text score, assume we need to keep it if we can't prove we don't. If we are
-        // the first half of a pipeline which has been split, future stages might need it.
-        if (!knowAllMeta) {
+        // There is a text score available. If we are the first half of a split pipeline, then we
+        // have to assume future stages might depend on the textScore (unless we've encountered a
+        // stage that doesn't preserve metadata).
+        if (getContext()->needsMerge && !knowAllMeta) {
             deps.setNeedsMetadata(DocumentMetadataFields::kTextScore, true);
         }
     } else {
-        // If there is no text score available, then we don't need to ask for it.
+        // There is no text score available, so we don't need to ask for it.
         deps.setNeedsMetadata(DocumentMetadataFields::kTextScore, false);
     }
 
diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp
index b0dbbfb99ce..d99123a1404 100644
--- a/src/mongo/db/pipeline/pipeline_test.cpp
+++ b/src/mongo/db/pipeline/pipeline_test.cpp
@@ -3032,7 +3032,6 @@ TEST_F(PipelineDependenciesTest, EmptyPipelineShouldRequireWholeDocument) {
     depsTracker =
         pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore);
     ASSERT_TRUE(depsTracker.needWholeDocument);
-    ASSERT_TRUE(depsTracker.getNeedsMetadata(DocumentMetadataFields::kTextScore));
 }
 
 //
@@ -3203,19 +3202,31 @@ TEST_F(PipelineDependenciesTest, ShouldThrowIfTextScoreIsNeededButNotPresent) {
     ASSERT_THROWS(pipeline->getDependencies(DepsTracker::kAllMetadata), AssertionException);
 }
 
-TEST_F(PipelineDependenciesTest, ShouldRequireTextScoreIfAvailableAndNoStageReturnsExhaustiveMeta) {
+TEST_F(PipelineDependenciesTest,
+       ShouldRequireTextScoreIfAvailableAndNoStageReturnsExhaustiveMetaAndNeedsMerge) {
     auto ctx = getExpCtx();
+
+    // When needsMerge is true, the consumer might implicitly use textScore, if it's available.
+    ctx->needsMerge = true;
+
     auto pipeline = Pipeline::create({}, ctx);
+    auto deps = pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore);
+    ASSERT_TRUE(deps.getNeedsMetadata(DocumentMetadataFields::kTextScore));
 
-    auto depsTracker =
-        pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore);
-    ASSERT_TRUE(depsTracker.getNeedsMetadata(DocumentMetadataFields::kTextScore));
+    pipeline = Pipeline::create({DocumentSourceNeedsASeeNext::create(ctx)}, ctx);
+    deps = pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore);
+    ASSERT_TRUE(deps.getNeedsMetadata(DocumentMetadataFields::kTextScore));
 
-    auto needsASeeNext = DocumentSourceNeedsASeeNext::create(ctx);
-    pipeline = Pipeline::create({needsASeeNext}, ctx);
-    depsTracker =
-        pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore);
-    ASSERT_TRUE(depsTracker.getNeedsMetadata(DocumentMetadataFields::kTextScore));
+    // When needsMerge is false, if no stage explicitly uses textScore then we know it isn't needed.
+    ctx->needsMerge = false;
+
+    pipeline = Pipeline::create({}, ctx);
+    deps = pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore);
+    ASSERT_FALSE(deps.getNeedsMetadata(DocumentMetadataFields::kTextScore));
+
+    pipeline = Pipeline::create({DocumentSourceNeedsASeeNext::create(ctx)}, ctx);
+    deps = pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore);
+    ASSERT_FALSE(deps.getNeedsMetadata(DocumentMetadataFields::kTextScore));
 }
 
 TEST_F(PipelineDependenciesTest, ShouldNotRequireTextScoreIfAvailableButDefinitelyNotNeeded) {
author	David Percy <david.percy@mongodb.com>	2020-05-01 21:23:45 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2020-05-12 23:12:41 +0000
commit	2a599c879c0775773ac4f67ef148a0838a5e9898 (patch)
tree	4b85c129c1832665087e4c1351f35f00961afe78
parent	29e8bc79aa7d333bec3dfaa0f4a6d003c8a50e14 (diff)
download	mongo-2a599c879c0775773ac4f67ef148a0838a5e9898.tar.gz