diff options
author | David Percy <david.percy@mongodb.com> | 2020-05-01 21:23:45 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-05-12 23:12:41 +0000 |
commit | 2a599c879c0775773ac4f67ef148a0838a5e9898 (patch) | |
tree | 4b85c129c1832665087e4c1351f35f00961afe78 | |
parent | 29e8bc79aa7d333bec3dfaa0f4a6d003c8a50e14 (diff) | |
download | mongo-2a599c879c0775773ac4f67ef148a0838a5e9898.tar.gz |
SERVER-47848 Don't assume pipeline needs textScore, unless needsMerge
-rw-r--r-- | jstests/aggregation/bugs/server47848.js | 35 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline_test.cpp | 31 |
3 files changed, 61 insertions, 14 deletions
diff --git a/jstests/aggregation/bugs/server47848.js b/jstests/aggregation/bugs/server47848.js new file mode 100644 index 00000000000..cf36b6958a0 --- /dev/null +++ b/jstests/aggregation/bugs/server47848.js @@ -0,0 +1,35 @@ +// Ensures TEXT_OR stage is replaced with OR when possible. +// @tags: [ +// # We don't try to replace TEXT_OR with OR when the results are consumed by a merging node, +// # because the shard doesn't know whether the merger needs the textScore metadata. +// assumes_unsharded_collection, +// ] +(function() { +'use strict'; + +load("jstests/libs/analyze_plan.js"); + +const coll = db.server47848; +assert.commandWorked(coll.createIndex({"$**": "text"})); + +const findExplain = coll.find({$text: {$search: 'banana'}}).explain(); +const aggExplain = coll.explain().aggregate({$match: {$text: {$search: 'banana'}}}); + +// The .find() plan doesn't have TEXT_OR, it has OR instead. +// Both kinds of stages deduplicate record IDs, but OR is better because OR is streaming +// while TEXT_OR is blocking. TEXT_OR is blocking because it has to accumulate the textScore +// of each record ID, so when textScore is unused we can use the more efficient OR. +assert(planHasStage(db, findExplain, 'OR'), findExplain); +assert(!planHasStage(db, findExplain, 'TEXT_OR'), findExplain); + +// The same optimization should apply to the equivalent aggregation query. Before SERVER-47848 we +// failed to apply this optimization because we were too conservative about dependency tracking. +// Specifically, before SERVER-47848 we assumed that the consumer of a pipeline might depend on +// textScore if all the stages preserve it. We made that assumption because in a sharded cluster, +// when a merger sends the shardsPart of a pipeline to the shards, it doesn't specify whether it +// depends on textScore. After SERVER-47848, we refined that assumption: the shard can tell whether +// the consumer is a merger by looking at needsMerge. If the consumer is not a merger, there won't +// be an implicit dependency on textScore. +assert(planHasStage(db, aggExplain, 'OR'), aggExplain); +assert(!planHasStage(db, aggExplain, 'TEXT_OR'), aggExplain); +})(); diff --git a/src/mongo/db/pipeline/pipeline.cpp b/src/mongo/db/pipeline/pipeline.cpp index 156450b961f..f3fc921a6ee 100644 --- a/src/mongo/db/pipeline/pipeline.cpp +++ b/src/mongo/db/pipeline/pipeline.cpp @@ -537,13 +537,14 @@ DepsTracker Pipeline::getDependencies(QueryMetadataBitSet unavailableMetadata) c deps.needWholeDocument = true; // don't know all fields we need if (!unavailableMetadata[DocumentMetadataFields::kTextScore]) { - // If there is a text score, assume we need to keep it if we can't prove we don't. If we are - // the first half of a pipeline which has been split, future stages might need it. - if (!knowAllMeta) { + // There is a text score available. If we are the first half of a split pipeline, then we + // have to assume future stages might depend on the textScore (unless we've encountered a + // stage that doesn't preserve metadata). + if (getContext()->needsMerge && !knowAllMeta) { deps.setNeedsMetadata(DocumentMetadataFields::kTextScore, true); } } else { - // If there is no text score available, then we don't need to ask for it. + // There is no text score available, so we don't need to ask for it. deps.setNeedsMetadata(DocumentMetadataFields::kTextScore, false); } diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp index b0dbbfb99ce..d99123a1404 100644 --- a/src/mongo/db/pipeline/pipeline_test.cpp +++ b/src/mongo/db/pipeline/pipeline_test.cpp @@ -3032,7 +3032,6 @@ TEST_F(PipelineDependenciesTest, EmptyPipelineShouldRequireWholeDocument) { depsTracker = pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore); ASSERT_TRUE(depsTracker.needWholeDocument); - ASSERT_TRUE(depsTracker.getNeedsMetadata(DocumentMetadataFields::kTextScore)); } // @@ -3203,19 +3202,31 @@ TEST_F(PipelineDependenciesTest, ShouldThrowIfTextScoreIsNeededButNotPresent) { ASSERT_THROWS(pipeline->getDependencies(DepsTracker::kAllMetadata), AssertionException); } -TEST_F(PipelineDependenciesTest, ShouldRequireTextScoreIfAvailableAndNoStageReturnsExhaustiveMeta) { +TEST_F(PipelineDependenciesTest, + ShouldRequireTextScoreIfAvailableAndNoStageReturnsExhaustiveMetaAndNeedsMerge) { auto ctx = getExpCtx(); + + // When needsMerge is true, the consumer might implicitly use textScore, if it's available. + ctx->needsMerge = true; + auto pipeline = Pipeline::create({}, ctx); + auto deps = pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore); + ASSERT_TRUE(deps.getNeedsMetadata(DocumentMetadataFields::kTextScore)); - auto depsTracker = - pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore); - ASSERT_TRUE(depsTracker.getNeedsMetadata(DocumentMetadataFields::kTextScore)); + pipeline = Pipeline::create({DocumentSourceNeedsASeeNext::create(ctx)}, ctx); + deps = pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore); + ASSERT_TRUE(deps.getNeedsMetadata(DocumentMetadataFields::kTextScore)); - auto needsASeeNext = DocumentSourceNeedsASeeNext::create(ctx); - pipeline = Pipeline::create({needsASeeNext}, ctx); - depsTracker = - pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore); - ASSERT_TRUE(depsTracker.getNeedsMetadata(DocumentMetadataFields::kTextScore)); + // When needsMerge is false, if no stage explicitly uses textScore then we know it isn't needed. + ctx->needsMerge = false; + + pipeline = Pipeline::create({}, ctx); + deps = pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore); + ASSERT_FALSE(deps.getNeedsMetadata(DocumentMetadataFields::kTextScore)); + + pipeline = Pipeline::create({DocumentSourceNeedsASeeNext::create(ctx)}, ctx); + deps = pipeline->getDependencies(DepsTracker::kAllMetadata & ~DepsTracker::kOnlyTextScore); + ASSERT_FALSE(deps.getNeedsMetadata(DocumentMetadataFields::kTextScore)); } TEST_F(PipelineDependenciesTest, ShouldNotRequireTextScoreIfAvailableButDefinitelyNotNeeded) { |