From 56dba2953b622efcae75d7bd9b6aa4154dd25d34 Mon Sep 17 00:00:00 2001 From: David Storch Date: Mon, 14 Dec 2015 14:43:08 -0500 Subject: SERVER-17011 add EnsureSorted stage Preserves the sort order for 'ntoreturn hack' plans. --- src/mongo/db/exec/SConscript | 1 + src/mongo/db/exec/ensure_sorted.cpp | 114 ++++++++++++++++++++++++++ src/mongo/db/exec/ensure_sorted.h | 77 +++++++++++++++++ src/mongo/db/exec/plan_stats.h | 12 +++ src/mongo/db/query/explain.cpp | 6 ++ src/mongo/db/query/planner_analysis.cpp | 10 ++- src/mongo/db/query/query_planner_test.cpp | 9 +- src/mongo/db/query/query_planner_test_lib.cpp | 19 +++++ src/mongo/db/query/query_solution.cpp | 24 ++++++ src/mongo/db/query/query_solution.h | 32 ++++++++ src/mongo/db/query/stage_builder.cpp | 8 ++ src/mongo/db/query/stage_types.h | 2 + 12 files changed, 310 insertions(+), 4 deletions(-) create mode 100644 src/mongo/db/exec/ensure_sorted.cpp create mode 100644 src/mongo/db/exec/ensure_sorted.h (limited to 'src/mongo/db') diff --git a/src/mongo/db/exec/SConscript b/src/mongo/db/exec/SConscript index dc8c83ae743..e7d5da0dbd5 100644 --- a/src/mongo/db/exec/SConscript +++ b/src/mongo/db/exec/SConscript @@ -45,6 +45,7 @@ env.Library( "count_scan.cpp", "delete.cpp", "distinct_scan.cpp", + "ensure_sorted.cpp", "eof.cpp", "fetch.cpp", "geo_near.cpp", diff --git a/src/mongo/db/exec/ensure_sorted.cpp b/src/mongo/db/exec/ensure_sorted.cpp new file mode 100644 index 00000000000..9b6cfc87aa4 --- /dev/null +++ b/src/mongo/db/exec/ensure_sorted.cpp @@ -0,0 +1,114 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/exec/ensure_sorted.h" + +#include "mongo/db/exec/scoped_timer.h" +#include "mongo/db/exec/working_set_computed_data.h" +#include "mongo/db/query/find_common.h" +#include "mongo/stdx/memory.h" + +namespace mongo { + +using std::unique_ptr; +using stdx::make_unique; + +const char* EnsureSortedStage::kStageType = "ENSURE_SORTED"; + +EnsureSortedStage::EnsureSortedStage(OperationContext* opCtx, + BSONObj pattern, + WorkingSet* ws, + PlanStage* child) + : PlanStage(kStageType, opCtx), _ws(ws) { + _children.emplace_back(child); + _pattern = FindCommon::transformSortSpec(pattern); +} + +bool EnsureSortedStage::isEOF() { + return child()->isEOF(); +} + +PlanStage::StageState EnsureSortedStage::work(WorkingSetID* out) { + ++_commonStats.works; + + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + + StageState stageState = child()->work(out); + + if (PlanStage::ADVANCED == stageState) { + // We extract the sort key from the WSM's computed data. This must have been generated + // by a SortKeyGeneratorStage descendent in the execution tree. + WorkingSetMember* member = _ws->get(*out); + auto sortKeyComputedData = + static_cast(member->getComputed(WSM_SORT_KEY)); + BSONObj curSortKey = sortKeyComputedData->getSortKey(); + invariant(!curSortKey.isEmpty()); + + if (!_prevSortKey.isEmpty() && !isInOrder(_prevSortKey, curSortKey)) { + // 'member' is out of order. Drop it from the result set. + _ws->free(*out); + ++_specificStats.nDropped; + ++_commonStats.needTime; + return PlanStage::NEED_TIME; + } + + invariant(curSortKey.isOwned()); + _prevSortKey = curSortKey; + ++_commonStats.advanced; + return PlanStage::ADVANCED; + } + + if (PlanStage::NEED_TIME == stageState) { + ++_commonStats.needTime; + } else if (PlanStage::NEED_YIELD == stageState) { + ++_commonStats.needYield; + } + + return stageState; +} + +unique_ptr EnsureSortedStage::getStats() { + _commonStats.isEOF = isEOF(); + unique_ptr ret = make_unique(_commonStats, STAGE_ENSURE_SORTED); + ret->specific = make_unique(_specificStats); + ret->children.emplace_back(child()->getStats()); + return ret; +} + +const SpecificStats* EnsureSortedStage::getSpecificStats() const { + return &_specificStats; +} + +bool EnsureSortedStage::isInOrder(const BSONObj& lhsSortKey, const BSONObj& rhsSortKey) const { + return lhsSortKey.woCompare(rhsSortKey, _pattern, /*considerFieldName*/ false) <= 0; +} + +} // namespace mongo diff --git a/src/mongo/db/exec/ensure_sorted.h b/src/mongo/db/exec/ensure_sorted.h new file mode 100644 index 00000000000..fe47e79e306 --- /dev/null +++ b/src/mongo/db/exec/ensure_sorted.h @@ -0,0 +1,77 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/exec/plan_stage.h" + +namespace mongo { + +/** + * Takes the output of its child and drops any results that are not in the sort order specified by + * 'pattern'. Thus, if the pattern is {a: -1} and the following documents are inputted: + * {a: 9}, {a: 6}, {a: 8}, {a, 1} + * The third document will be dropped so that the output is: + * {a: 9}, {a: 6}, {a, 1} + */ +class EnsureSortedStage final : public PlanStage { +public: + EnsureSortedStage(OperationContext* opCtx, BSONObj pattern, WorkingSet* ws, PlanStage* child); + + bool isEOF() final; + StageState work(WorkingSetID* out) final; + + StageType stageType() const final { + return STAGE_ENSURE_SORTED; + } + + std::unique_ptr getStats() final; + + const SpecificStats* getSpecificStats() const final; + + static const char* kStageType; + +private: + /** + * Returns whether the result with the lhsSortKey should come before the result with the + * rhsSortKey in sort order. + */ + bool isInOrder(const BSONObj& lhsSortKey, const BSONObj& rhsSortKey) const; + + WorkingSet* _ws; + + // The pattern that we're sorting by. + BSONObj _pattern; + + // The sort key of the previous result. + BSONObj _prevSortKey; + + EnsureSortedStats _specificStats; +}; + +} // namespace mongo diff --git a/src/mongo/db/exec/plan_stats.h b/src/mongo/db/exec/plan_stats.h index 14e07391920..7587aa47da8 100644 --- a/src/mongo/db/exec/plan_stats.h +++ b/src/mongo/db/exec/plan_stats.h @@ -298,6 +298,18 @@ struct DistinctScanStats : public SpecificStats { BSONObj indexBounds; }; +struct EnsureSortedStats : public SpecificStats { + EnsureSortedStats() : nDropped(0) {} + + SpecificStats* clone() const final { + EnsureSortedStats* specific = new EnsureSortedStats(*this); + return specific; + } + + // The number of out-of-order results that were dropped. + long long nDropped; +}; + struct FetchStats : public SpecificStats { FetchStats() : alreadyHasObj(0), forcedFetches(0), docsExamined(0) {} diff --git a/src/mongo/db/query/explain.cpp b/src/mongo/db/query/explain.cpp index 5650edd4af0..ead46646ced 100644 --- a/src/mongo/db/query/explain.cpp +++ b/src/mongo/db/query/explain.cpp @@ -314,6 +314,12 @@ void Explain::statsToBSON(const PlanStageStats& stats, if (verbosity >= ExplainCommon::EXEC_STATS) { bob->appendNumber("keysExamined", spec->keysExamined); } + } else if (STAGE_ENSURE_SORTED == stats.stageType) { + EnsureSortedStats* spec = static_cast(stats.specific.get()); + + if (verbosity >= ExplainCommon::EXEC_STATS) { + bob->appendNumber("nDropped", spec->nDropped); + } } else if (STAGE_FETCH == stats.stageType) { FetchStats* spec = static_cast(stats.specific.get()); if (verbosity >= ExplainCommon::EXEC_STATS) { diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp index d4166b2a018..e1a69d04530 100644 --- a/src/mongo/db/query/planner_analysis.cpp +++ b/src/mongo/db/query/planner_analysis.cpp @@ -560,6 +560,9 @@ QuerySolutionNode* QueryPlannerAnalysis::analyzeSort(const CanonicalQuery& query // with the topK first. If the client wants a limit, they'll get the efficiency // of topK. If they want a batchSize, the other OR branch will deliver the missing // results. The OR stage handles deduping. + // + // We must also add an ENSURE_SORTED node above the OR to ensure that the final results are + // in correct sorted order, which may not be true if the data is concurrently modified. if (lpq.wantMore() && params.options & QueryPlannerParams::SPLIT_LIMITED_SORT && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT) && !QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO) && @@ -574,7 +577,12 @@ QuerySolutionNode* QueryPlannerAnalysis::analyzeSort(const CanonicalQuery& query SortNode* sortClone = static_cast(sort->clone()); sortClone->limit = 0; orn->children.push_back(sortClone); - solnRoot = orn; + + // Add ENSURE_SORTED above the OR. + EnsureSortedNode* esn = new EnsureSortedNode(); + esn->pattern = sort->pattern; + esn->children.push_back(orn); + solnRoot = esn; } } else { sort->limit = 0; diff --git a/src/mongo/db/query/query_planner_test.cpp b/src/mongo/db/query/query_planner_test.cpp index 38f66d5548b..0504fc410b8 100644 --- a/src/mongo/db/query/query_planner_test.cpp +++ b/src/mongo/db/query/query_planner_test.cpp @@ -3439,11 +3439,12 @@ TEST_F(QueryPlannerTest, NoKeepWithNToReturn) { runQuerySortProjSkipLimit(fromjson("{a: 1}"), fromjson("{b: 1}"), BSONObj(), 0, 3); assertSolutionExists( + "{ensureSorted: {pattern: {b: 1}, node: " "{or: {nodes: [" "{sort: {pattern: {b: 1}, limit: 3, node: {sortKeyGen: {node: " "{fetch: {node: {ixscan: {pattern: {a: 1}}}}}}}}}, " "{sort: {pattern: {b: 1}, limit: 0, node: {sortKeyGen: {node: " - "{fetch: {node: {ixscan: {pattern: {a: 1}}}}}}}}}]}}"); + "{fetch: {node: {ixscan: {pattern: {a: 1}}}}}}}}}]}}}}"); } // Make sure a top-level $or hits the limiting number @@ -3564,11 +3565,12 @@ TEST_F(QueryPlannerTest, SplitLimitedSort) { // Second solution has a blocking sort with a limit: it gets split and // joined with an OR stage. assertSolutionExists( + "{ensureSorted: {pattern: {b: 1}, node: " "{or: {nodes: [" "{sort: {pattern: {b: 1}, limit: 3, node: {sortKeyGen: {node: " "{fetch: {node: {ixscan: {pattern: {a: 1}}}}}}}}}, " "{sort: {pattern: {b: 1}, limit: 0, node: {sortKeyGen: {node: " - "{fetch: {node: {ixscan: {pattern: {a: 1}}}}}}}}}]}}"); + "{fetch: {node: {ixscan: {pattern: {a: 1}}}}}}}}}]}}}}"); } // The same query run as a find command with a limit should not require the "split limited sort" @@ -4117,10 +4119,11 @@ TEST_F(QueryPlannerTest, NToReturnHackWithFindCommand) { assertNumSolutions(1U); assertSolutionExists( + "{ensureSorted: {pattern: {a: 1}, node: " "{or: {nodes: [" "{sort: {limit:3, pattern: {a:1}, node: {sortKeyGen: {node: {cscan: {dir:1}}}}}}, " "{sort: {limit:0, pattern: {a:1}, node: {sortKeyGen: {node: {cscan: {dir:1}}}}}}" - "]}}"); + "]}}}}"); } TEST_F(QueryPlannerTest, NToReturnHackWithSingleBatch) { diff --git a/src/mongo/db/query/query_planner_test_lib.cpp b/src/mongo/db/query/query_planner_test_lib.cpp index 557bde5f252..e4485738ea7 100644 --- a/src/mongo/db/query/query_planner_test_lib.cpp +++ b/src/mongo/db/query/query_planner_test_lib.cpp @@ -575,6 +575,25 @@ bool QueryPlannerTestLib::solutionMatches(const BSONObj& testSoln, } return solutionMatches(child.Obj(), fn->children[0]); + } else if (STAGE_ENSURE_SORTED == trueSoln->getType()) { + const EnsureSortedNode* esn = static_cast(trueSoln); + + BSONElement el = testSoln["ensureSorted"]; + if (el.eoo() || !el.isABSONObj()) { + return false; + } + BSONObj esObj = el.Obj(); + + BSONElement patternEl = esObj["pattern"]; + if (patternEl.eoo() || !patternEl.isABSONObj()) { + return false; + } + BSONElement child = esObj["node"]; + if (child.eoo() || !child.isABSONObj()) { + return false; + } + + return (patternEl.Obj() == esn->pattern) && solutionMatches(child.Obj(), esn->children[0]); } return false; diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp index a80f9199b33..da1fa79c034 100644 --- a/src/mongo/db/query/query_solution.cpp +++ b/src/mongo/db/query/query_solution.cpp @@ -916,4 +916,28 @@ QuerySolutionNode* CountNode::clone() const { return copy; } +// +// EnsureSortedNode +// + +void EnsureSortedNode::appendToString(mongoutils::str::stream* ss, int indent) const { + addIndent(ss, indent); + *ss << "ENSURE_SORTED\n"; + addIndent(ss, indent + 1); + *ss << "pattern = " << pattern.toString() << '\n'; + addCommon(ss, indent); + addIndent(ss, indent + 1); + *ss << "Child:" << '\n'; + children[0]->appendToString(ss, indent + 2); +} + +QuerySolutionNode* EnsureSortedNode::clone() const { + EnsureSortedNode* copy = new EnsureSortedNode(); + cloneBaseData(copy); + + copy->pattern = this->pattern; + + return copy; +} + } // namespace mongo diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h index 4df3e3e8665..c2501daa354 100644 --- a/src/mongo/db/query/query_solution.h +++ b/src/mongo/db/query/query_solution.h @@ -910,4 +910,36 @@ struct CountNode : public QuerySolutionNode { bool endKeyInclusive; }; +/** + * This stage drops results that are out of sorted order. + */ +struct EnsureSortedNode : public QuerySolutionNode { + EnsureSortedNode() {} + virtual ~EnsureSortedNode() {} + + virtual StageType getType() const { + return STAGE_ENSURE_SORTED; + } + + virtual void appendToString(mongoutils::str::stream* ss, int indent) const; + + bool fetched() const { + return children[0]->fetched(); + } + bool hasField(const std::string& field) const { + return children[0]->hasField(field); + } + bool sortedByDiskLoc() const { + return children[0]->sortedByDiskLoc(); + } + const BSONObjSet& getSort() const { + return children[0]->getSort(); + } + + QuerySolutionNode* clone() const; + + // The pattern that the results should be sorted by. + BSONObj pattern; +}; + } // namespace mongo diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp index f3344921e6b..429651668a2 100644 --- a/src/mongo/db/query/stage_builder.cpp +++ b/src/mongo/db/query/stage_builder.cpp @@ -38,6 +38,7 @@ #include "mongo/db/exec/collection_scan.h" #include "mongo/db/exec/count_scan.h" #include "mongo/db/exec/distinct_scan.h" +#include "mongo/db/exec/ensure_sorted.h" #include "mongo/db/exec/fetch.h" #include "mongo/db/exec/geo_near.h" #include "mongo/db/exec/index_scan.h" @@ -325,6 +326,13 @@ PlanStage* buildStages(OperationContext* txn, params.endKeyInclusive = cn->endKeyInclusive; return new CountScan(txn, params, ws); + } else if (STAGE_ENSURE_SORTED == root->getType()) { + const EnsureSortedNode* esn = static_cast(root); + PlanStage* childStage = buildStages(txn, collection, qsol, esn->children[0], ws); + if (NULL == childStage) { + return NULL; + } + return new EnsureSortedStage(txn, esn->pattern, ws, childStage); } else { mongoutils::str::stream ss; root->appendToString(&ss, 0); diff --git a/src/mongo/db/query/stage_types.h b/src/mongo/db/query/stage_types.h index 29d16653f72..356555c32ab 100644 --- a/src/mongo/db/query/stage_types.h +++ b/src/mongo/db/query/stage_types.h @@ -57,6 +57,8 @@ enum StageType { // Dummy stage used for receiving notifications of deletions during chunk migration. STAGE_NOTIFY_DELETE, + STAGE_ENSURE_SORTED, + STAGE_EOF, // This is more of an "internal-only" stage where we try to keep docs that were mutated -- cgit v1.2.1