diff options
author | James Cohan <james.cohan@10gen.com> | 2015-06-03 11:05:44 -0400 |
---|---|---|
committer | James Cohan <james.cohan@10gen.com> | 2015-06-18 13:14:51 -0400 |
commit | d424a92eb1050023c7f3627a2ff958ff7d37488e (patch) | |
tree | 78f04196e3326883ca41eddb7e0d94cd37710f26 /src | |
parent | 149d8c8f83017db23b104da1f020d0a994fb79f9 (diff) | |
download | mongo-d424a92eb1050023c7f3627a2ff958ff7d37488e.tar.gz |
SERVER-12376 Move $skip and $limit before $project in aggregation pipeline
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/pipeline/pipeline.cpp | 33 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline_optimizations.h | 8 | ||||
-rw-r--r-- | src/mongo/dbtests/pipelinetests.cpp | 89 |
3 files changed, 123 insertions, 7 deletions
diff --git a/src/mongo/db/pipeline/pipeline.cpp b/src/mongo/db/pipeline/pipeline.cpp index 6c096ddfe5c..bba6bf9615f 100644 --- a/src/mongo/db/pipeline/pipeline.cpp +++ b/src/mongo/db/pipeline/pipeline.cpp @@ -223,6 +223,7 @@ namespace mongo { // The order in which optimizations are applied can have significant impact on the // efficiency of the final pipeline. Be Careful! Optimizations::Local::moveMatchBeforeSort(pPipeline.get()); + Optimizations::Local::moveSkipAndLimitBeforeProject(pPipeline.get()); Optimizations::Local::moveLimitBeforeSkip(pPipeline.get()); Optimizations::Local::coalesceAdjacent(pPipeline.get()); Optimizations::Local::optimizeEachDocumentSource(pPipeline.get()); @@ -251,6 +252,38 @@ namespace mongo { } } + void Pipeline::Optimizations::Local::moveSkipAndLimitBeforeProject(Pipeline* pipeline) { + SourceContainer& sources = pipeline->sources; + if (sources.empty()) return; + + for (int i = sources.size() - 1; i >= 1 /* not looking at 0 */; i--) { + // This optimization only applies when a $project comes before a $skip or $limit. + auto project = dynamic_cast<DocumentSourceProject*>(sources[i-1].get()); + if (!project) continue; + + auto skip = dynamic_cast<DocumentSourceSkip*>(sources[i].get()); + auto limit = dynamic_cast<DocumentSourceLimit*>(sources[i].get()); + if (!(skip || limit)) continue; + + swap(sources[i], sources[i-1]); + + // Start at back again. This is needed to handle cases with more than 1 $skip or + // $limit (S means skip, L means limit, P means project) + // + // These would work without second pass (assuming back to front ordering) + // PS -> SP + // PL -> LP + // PPL -> LPP + // PPS -> SPP + // + // The following cases need a second pass to handle the second skip or limit + // PLL -> LLP + // PPLL -> LLPP + // PLPL -> LLPP + i = sources.size(); // decremented before next pass + } + } + void Pipeline::Optimizations::Local::moveLimitBeforeSkip(Pipeline* pipeline) { SourceContainer& sources = pipeline->sources; if (sources.empty()) diff --git a/src/mongo/db/pipeline/pipeline_optimizations.h b/src/mongo/db/pipeline/pipeline_optimizations.h index d8b89350c28..ac4b9e8b697 100644 --- a/src/mongo/db/pipeline/pipeline_optimizations.h +++ b/src/mongo/db/pipeline/pipeline_optimizations.h @@ -53,6 +53,14 @@ namespace mongo { static void moveMatchBeforeSort(Pipeline* pipeline); /** + * Moves skip and limit before any adjacent project phases. + * + * While this is performance-neutral on its own, it enables other optimizations + * such as combining sort and limit. + */ + static void moveSkipAndLimitBeforeProject(Pipeline* pipeline); + + /** * Moves limits before any adjacent skip phases. * * This is more optimal for sharding since currently, we can only split diff --git a/src/mongo/dbtests/pipelinetests.cpp b/src/mongo/dbtests/pipelinetests.cpp index 11f9230e5b7..f97bf9217a9 100644 --- a/src/mongo/dbtests/pipelinetests.cpp +++ b/src/mongo/dbtests/pipelinetests.cpp @@ -67,7 +67,7 @@ namespace PipelineTests { ASSERT_EQUALS(errmsg, ""); ASSERT(outputPipe != NULL); - ASSERT_EQUALS(outputPipe->serialize()["pipeline"], + ASSERT_EQUALS(Value(outputPipe->writeExplainOps()), Value(outputPipeExpected["pipeline"])); } @@ -77,6 +77,55 @@ namespace PipelineTests { OperationContextImpl _opCtx; }; + class MoveSkipBeforeProject: public Base { + string inputPipeJson() override { + return "[{$project: {a : 1}}, {$skip : 5}]"; + } + + string outputPipeJson() override { + return "[{$skip : 5}, {$project: {a : true}}]"; + } + }; + + class MoveLimitBeforeProject: public Base { + string inputPipeJson() override { + return "[{$project: {a : 1}}, {$limit : 5}]"; + } + + string outputPipeJson() override { + return "[{$limit : 5}, {$project: {a : true}}]"; + } + }; + + class MoveMulitipleSkipsAndLimitsBeforeProject: public Base { + string inputPipeJson() override { + return "[{$project: {a : 1}}, {$limit : 5}, {$skip : 3}]"; + } + + string outputPipeJson() override { + return "[{$limit : 5}, {$skip : 3}, {$project: {a : true}}]"; + } + }; + + class SortMatchProjSkipLimBecomesMatchTopKSortSkipProj: public Base { + string inputPipeJson() override { + return "[{$sort: {a: 1}}" + ",{$match: {a: 1}}" + ",{$project : {a: 1}}" + ",{$skip : 3}" + ",{$limit: 5}" + "]"; + } + + string outputPipeJson() override { + return "[{$match: {a: 1}}" + ",{$sort: {sortKey: {a: 1}, limit: 8}}" + ",{$skip: 3}" + ",{$project: {a: true}}" + "]"; + } + }; + class RemoveSkipZero : public Base { string inputPipeJson() override { return "[{$skip: 0}]"; @@ -157,9 +206,9 @@ namespace PipelineTests { intrusive_ptr<Pipeline> shardPipe = mergePipe->splitForSharded(); ASSERT(shardPipe != NULL); - ASSERT_EQUALS(shardPipe->serialize()["pipeline"], + ASSERT_EQUALS(Value(shardPipe->writeExplainOps()), Value(shardPipeExpected["pipeline"])); - ASSERT_EQUALS(mergePipe->serialize()["pipeline"], + ASSERT_EQUALS(Value(mergePipe->writeExplainOps()), Value(mergePipeExpected["pipeline"])); } @@ -272,18 +321,39 @@ namespace PipelineTests { // change. string inputPipeJson() { return "[{$project: {_id:true, a:true}}" - ",{$limit:1}" ",{$group: {_id: '$_id'}}" "]"; } string shardPipeJson() { return "[{$project: {_id:true, a:true}}" - ",{$limit:1}" + ",{$group: {_id: '$_id'}}" "]"; } string mergePipeJson() { - return "[{$limit:1}" - ",{$group: {_id: '$_id'}}" + return "[{$group: {_id: '$$ROOT._id', $doingMerge: true}}" + "]"; + } + }; + + class ShardedSortMatchProjSkipLimBecomesMatchTopKSortSkipProj: public Base { + string inputPipeJson() { + return "[{$sort: {a : 1}}" + ",{$match: {a: 1}}" + ",{$project : {a: 1}}" + ",{$skip : 3}" + ",{$limit: 5}" + "]"; + } + string shardPipeJson() { + return "[{$match: {a: 1}}" + ",{$sort: {sortKey: {a: 1}, limit: 8}}" + ",{$project: {a: true, _id: true}}" + "]"; + } + string mergePipeJson() { + return "[{$sort: {sortKey: {a: 1}, mergePresorted: true, limit: 8}}" + ",{$skip: 3}" + ",{$project: {a: true}}" "]"; } }; @@ -298,6 +368,10 @@ namespace PipelineTests { } void setupTests() { add<Optimizations::Local::RemoveSkipZero>(); + add<Optimizations::Local::MoveLimitBeforeProject>(); + add<Optimizations::Local::MoveSkipBeforeProject>(); + add<Optimizations::Local::MoveMulitipleSkipsAndLimitsBeforeProject>(); + add<Optimizations::Local::SortMatchProjSkipLimBecomesMatchTopKSortSkipProj>(); add<Optimizations::Local::DoNotRemoveSkipOne>(); add<Optimizations::Local::RemoveEmptyMatch>(); add<Optimizations::Local::RemoveMultipleEmptyMatches>(); @@ -313,6 +387,7 @@ namespace PipelineTests { add<Optimizations::Sharded::limitFieldsSentFromShardsToMerger::NothingNeeded>(); add<Optimizations::Sharded::limitFieldsSentFromShardsToMerger::JustNeedsMetadata>(); add<Optimizations::Sharded::limitFieldsSentFromShardsToMerger::ShardAlreadyExhaustive>(); + add<Optimizations::Sharded::limitFieldsSentFromShardsToMerger::ShardedSortMatchProjSkipLimBecomesMatchTopKSortSkipProj>(); } }; |