summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Percy <david.percy@mongodb.com>2020-05-01 21:23:45 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-05-12 23:12:41 +0000
commit2a599c879c0775773ac4f67ef148a0838a5e9898 (patch)
tree4b85c129c1832665087e4c1351f35f00961afe78
parent29e8bc79aa7d333bec3dfaa0f4a6d003c8a50e14 (diff)
downloadmongo-2a599c879c0775773ac4f67ef148a0838a5e9898.tar.gz
SERVER-47848 Don't assume pipeline needs textScore, unless needsMerge
-rw-r--r--jstests/aggregation/bugs/server47848.js35
-rw-r--r--src/mongo/db/pipeline/pipeline.cpp9
-rw-r--r--src/mongo/db/pipeline/pipeline_test.cpp31
3 files changed, 61 insertions, 14 deletions
diff --git a/jstests/aggregation/bugs/server47848.js b/jstests/aggregation/bugs/server47848.js
new file mode 100644
index 00000000000..cf36b6958a0
--- /dev/null
+++ b/jstests/aggregation/bugs/server47848.js
@@ -0,0 +1,35 @@
+// Ensures TEXT_OR stage is replaced with OR when possible.
+// @tags: [
+// # We don't try to replace TEXT_OR with OR when the results are consumed by a merging node,
+// # because the shard doesn't know whether the merger needs the textScore metadata.
+// assumes_unsharded_collection,
+// ]
+(function() {
+'use strict';
+
+load("jstests/libs/analyze_plan.js");
+
+const coll = db.server47848;
+assert.commandWorked(coll.createIndex({"$**": "text"}));
+
+const findExplain = coll.find({$text: {$search: 'banana'}}).explain();
+const aggExplain = coll.explain().aggregate({$match: {$text: {$search: 'banana'}}});
+
+// The .find() plan doesn't have TEXT_OR, it has OR instead.
+// Both kinds of stages deduplicate record IDs, but OR is better because OR is streaming
+// while TEXT_OR is blocking. TEXT_OR is blocking because it has to accumulate the textScore
+// of each record ID, so when textScore is unused we can use the more efficient OR.
+assert(planHasStage(db, findExplain, 'OR'), findExplain);
+assert(!planHasStage(db, findExplain, 'TEXT_OR'), findExplain);
+
+// The same optimization should apply to the equivalent aggregation query. Before SERVER-47848 we
+// failed to apply this optimization because we were too conservative about dependency tracking.
+// Specifically, before SERVER-47848 we assumed that the consumer of a pipeline might depend on
+// textScore if all the stages preserve it. We made that assumption because in a sharded cluster,
+// when a merger sends the shardsPart of a pipeline to the shards, it doesn't specify whether it
+// depends on textScore. After SERVER-47848, we refined that assumption: the shard can tell whether
+// the consumer is a merger by looking at needsMerge. If the consumer is not a merger, there won't
+// be an implicit dependency on textScore.
+assert(planHasStage(db, aggExplain, 'OR'), aggExplain);
+assert(!planHasStage(db, aggExplain, 'TEXT_OR'), aggExplain);
+})();
diff --git a/src/mongo/db/pipeline/pipeline.cpp b/src/mongo/db/pipeline/pipeline.cpp
index 156450b961f..f3fc921a6ee 100644
--- a/src/mongo/db/pipeline/pipeline.cpp
+++ b/src/mongo/db/pipeline/pipeline.cpp
@@ -537,13 +537,14 @@ DepsTracker Pipeline::getDependencies(QueryMetadataBitSet unavailableMetadata) c
deps.needWholeDocument = true; // don't know all fields we need
if (!unavailableMetadata[DocumentMetadataFields::kTextScore]) {
- // If there is a text score, assume we need to keep it if we can't prove we don't. If we are
- // the first half of a pipeline which has been split, future stages might need it.
- if (!knowAllMeta) {
+ // There is a text score available. If we are the first half of a split pipeline, then we
+ // have to assume future stages might depend on the textScore (unless we've encountered a
+ // stage that doesn't preserve metadata).
+ if (getContext()->needsMerge && !knowAllMeta) {
deps.setNeedsMetadata(DocumentMetadataFields::kTextScore, true);
}
} else {
- // If there is no text score available, then we don't need to ask for it.
+ // There is no text score available, so we don't need to ask for it.
deps.setNeedsMetadata(DocumentMetadataFields::kTextScore, false);
}
diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp
index b0dbbfb99ce..d99123a1404 100644
--- a/src/mongo/db/pipeline/pipeline_test.cpp
+++ b/src/mongo/db/pipeline/pipeline_test.cpp
@@ -3032,7 +3032,6 @@ TEST_F(PipelineDependenciesTest, EmptyPipelineShouldRequireWholeDocument) {
depsTracker =
pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore);
ASSERT_TRUE(depsTracker.needWholeDocument);
- ASSERT_TRUE(depsTracker.getNeedsMetadata(DocumentMetadataFields::kTextScore));
}
//
@@ -3203,19 +3202,31 @@ TEST_F(PipelineDependenciesTest, ShouldThrowIfTextScoreIsNeededButNotPresent) {
ASSERT_THROWS(pipeline->getDependencies(DepsTracker::kAllMetadata), AssertionException);
}
-TEST_F(PipelineDependenciesTest, ShouldRequireTextScoreIfAvailableAndNoStageReturnsExhaustiveMeta) {
+TEST_F(PipelineDependenciesTest,
+ ShouldRequireTextScoreIfAvailableAndNoStageReturnsExhaustiveMetaAndNeedsMerge) {
auto ctx = getExpCtx();
+
+ // When needsMerge is true, the consumer might implicitly use textScore, if it's available.
+ ctx->needsMerge = true;
+
auto pipeline = Pipeline::create({}, ctx);
+ auto deps = pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore);
+ ASSERT_TRUE(deps.getNeedsMetadata(DocumentMetadataFields::kTextScore));
- auto depsTracker =
- pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore);
- ASSERT_TRUE(depsTracker.getNeedsMetadata(DocumentMetadataFields::kTextScore));
+ pipeline = Pipeline::create({DocumentSourceNeedsASeeNext::create(ctx)}, ctx);
+ deps = pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore);
+ ASSERT_TRUE(deps.getNeedsMetadata(DocumentMetadataFields::kTextScore));
- auto needsASeeNext = DocumentSourceNeedsASeeNext::create(ctx);
- pipeline = Pipeline::create({needsASeeNext}, ctx);
- depsTracker =
- pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore);
- ASSERT_TRUE(depsTracker.getNeedsMetadata(DocumentMetadataFields::kTextScore));
+ // When needsMerge is false, if no stage explicitly uses textScore then we know it isn't needed.
+ ctx->needsMerge = false;
+
+ pipeline = Pipeline::create({}, ctx);
+ deps = pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore);
+ ASSERT_FALSE(deps.getNeedsMetadata(DocumentMetadataFields::kTextScore));
+
+ pipeline = Pipeline::create({DocumentSourceNeedsASeeNext::create(ctx)}, ctx);
+ deps = pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore);
+ ASSERT_FALSE(deps.getNeedsMetadata(DocumentMetadataFields::kTextScore));
}
TEST_F(PipelineDependenciesTest, ShouldNotRequireTextScoreIfAvailableButDefinitelyNotNeeded) {