summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorDavid Storch <david.storch@10gen.com>2015-12-14 14:43:08 -0500
committerDavid Storch <david.storch@10gen.com>2015-12-21 16:28:22 -0500
commit56dba2953b622efcae75d7bd9b6aa4154dd25d34 (patch)
treea5509777ef6324edd6f710a44bd0973065c44422 /src/mongo/db
parent6bfa81cae0b0fee124065c1f38de339fd4f3dc7c (diff)
downloadmongo-56dba2953b622efcae75d7bd9b6aa4154dd25d34.tar.gz
SERVER-17011 add EnsureSorted stage
Preserves the sort order for 'ntoreturn hack' plans.
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/exec/SConscript1
-rw-r--r--src/mongo/db/exec/ensure_sorted.cpp114
-rw-r--r--src/mongo/db/exec/ensure_sorted.h77
-rw-r--r--src/mongo/db/exec/plan_stats.h12
-rw-r--r--src/mongo/db/query/explain.cpp6
-rw-r--r--src/mongo/db/query/planner_analysis.cpp10
-rw-r--r--src/mongo/db/query/query_planner_test.cpp9
-rw-r--r--src/mongo/db/query/query_planner_test_lib.cpp19
-rw-r--r--src/mongo/db/query/query_solution.cpp24
-rw-r--r--src/mongo/db/query/query_solution.h32
-rw-r--r--src/mongo/db/query/stage_builder.cpp8
-rw-r--r--src/mongo/db/query/stage_types.h2
12 files changed, 310 insertions, 4 deletions
diff --git a/src/mongo/db/exec/SConscript b/src/mongo/db/exec/SConscript
index dc8c83ae743..e7d5da0dbd5 100644
--- a/src/mongo/db/exec/SConscript
+++ b/src/mongo/db/exec/SConscript
@@ -45,6 +45,7 @@ env.Library(
"count_scan.cpp",
"delete.cpp",
"distinct_scan.cpp",
+ "ensure_sorted.cpp",
"eof.cpp",
"fetch.cpp",
"geo_near.cpp",
diff --git a/src/mongo/db/exec/ensure_sorted.cpp b/src/mongo/db/exec/ensure_sorted.cpp
new file mode 100644
index 00000000000..9b6cfc87aa4
--- /dev/null
+++ b/src/mongo/db/exec/ensure_sorted.cpp
@@ -0,0 +1,114 @@
+/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/exec/ensure_sorted.h"
+
+#include "mongo/db/exec/scoped_timer.h"
+#include "mongo/db/exec/working_set_computed_data.h"
+#include "mongo/db/query/find_common.h"
+#include "mongo/stdx/memory.h"
+
+namespace mongo {
+
+using std::unique_ptr;
+using stdx::make_unique;
+
+const char* EnsureSortedStage::kStageType = "ENSURE_SORTED";
+
+EnsureSortedStage::EnsureSortedStage(OperationContext* opCtx,
+ BSONObj pattern,
+ WorkingSet* ws,
+ PlanStage* child)
+ : PlanStage(kStageType, opCtx), _ws(ws) {
+ _children.emplace_back(child);
+ _pattern = FindCommon::transformSortSpec(pattern);
+}
+
+bool EnsureSortedStage::isEOF() {
+ return child()->isEOF();
+}
+
+PlanStage::StageState EnsureSortedStage::work(WorkingSetID* out) {
+ ++_commonStats.works;
+
+ // Adds the amount of time taken by work() to executionTimeMillis.
+ ScopedTimer timer(&_commonStats.executionTimeMillis);
+
+ StageState stageState = child()->work(out);
+
+ if (PlanStage::ADVANCED == stageState) {
+ // We extract the sort key from the WSM's computed data. This must have been generated
+ // by a SortKeyGeneratorStage descendent in the execution tree.
+ WorkingSetMember* member = _ws->get(*out);
+ auto sortKeyComputedData =
+ static_cast<const SortKeyComputedData*>(member->getComputed(WSM_SORT_KEY));
+ BSONObj curSortKey = sortKeyComputedData->getSortKey();
+ invariant(!curSortKey.isEmpty());
+
+ if (!_prevSortKey.isEmpty() && !isInOrder(_prevSortKey, curSortKey)) {
+ // 'member' is out of order. Drop it from the result set.
+ _ws->free(*out);
+ ++_specificStats.nDropped;
+ ++_commonStats.needTime;
+ return PlanStage::NEED_TIME;
+ }
+
+ invariant(curSortKey.isOwned());
+ _prevSortKey = curSortKey;
+ ++_commonStats.advanced;
+ return PlanStage::ADVANCED;
+ }
+
+ if (PlanStage::NEED_TIME == stageState) {
+ ++_commonStats.needTime;
+ } else if (PlanStage::NEED_YIELD == stageState) {
+ ++_commonStats.needYield;
+ }
+
+ return stageState;
+}
+
+unique_ptr<PlanStageStats> EnsureSortedStage::getStats() {
+ _commonStats.isEOF = isEOF();
+ unique_ptr<PlanStageStats> ret = make_unique<PlanStageStats>(_commonStats, STAGE_ENSURE_SORTED);
+ ret->specific = make_unique<EnsureSortedStats>(_specificStats);
+ ret->children.emplace_back(child()->getStats());
+ return ret;
+}
+
+const SpecificStats* EnsureSortedStage::getSpecificStats() const {
+ return &_specificStats;
+}
+
+bool EnsureSortedStage::isInOrder(const BSONObj& lhsSortKey, const BSONObj& rhsSortKey) const {
+ return lhsSortKey.woCompare(rhsSortKey, _pattern, /*considerFieldName*/ false) <= 0;
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/exec/ensure_sorted.h b/src/mongo/db/exec/ensure_sorted.h
new file mode 100644
index 00000000000..fe47e79e306
--- /dev/null
+++ b/src/mongo/db/exec/ensure_sorted.h
@@ -0,0 +1,77 @@
+/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/exec/plan_stage.h"
+
+namespace mongo {
+
+/**
+ * Takes the output of its child and drops any results that are not in the sort order specified by
+ * 'pattern'. Thus, if the pattern is {a: -1} and the following documents are inputted:
+ * {a: 9}, {a: 6}, {a: 8}, {a, 1}
+ * The third document will be dropped so that the output is:
+ * {a: 9}, {a: 6}, {a, 1}
+ */
+class EnsureSortedStage final : public PlanStage {
+public:
+ EnsureSortedStage(OperationContext* opCtx, BSONObj pattern, WorkingSet* ws, PlanStage* child);
+
+ bool isEOF() final;
+ StageState work(WorkingSetID* out) final;
+
+ StageType stageType() const final {
+ return STAGE_ENSURE_SORTED;
+ }
+
+ std::unique_ptr<PlanStageStats> getStats() final;
+
+ const SpecificStats* getSpecificStats() const final;
+
+ static const char* kStageType;
+
+private:
+ /**
+ * Returns whether the result with the lhsSortKey should come before the result with the
+ * rhsSortKey in sort order.
+ */
+ bool isInOrder(const BSONObj& lhsSortKey, const BSONObj& rhsSortKey) const;
+
+ WorkingSet* _ws;
+
+ // The pattern that we're sorting by.
+ BSONObj _pattern;
+
+ // The sort key of the previous result.
+ BSONObj _prevSortKey;
+
+ EnsureSortedStats _specificStats;
+};
+
+} // namespace mongo
diff --git a/src/mongo/db/exec/plan_stats.h b/src/mongo/db/exec/plan_stats.h
index 14e07391920..7587aa47da8 100644
--- a/src/mongo/db/exec/plan_stats.h
+++ b/src/mongo/db/exec/plan_stats.h
@@ -298,6 +298,18 @@ struct DistinctScanStats : public SpecificStats {
BSONObj indexBounds;
};
+struct EnsureSortedStats : public SpecificStats {
+ EnsureSortedStats() : nDropped(0) {}
+
+ SpecificStats* clone() const final {
+ EnsureSortedStats* specific = new EnsureSortedStats(*this);
+ return specific;
+ }
+
+ // The number of out-of-order results that were dropped.
+ long long nDropped;
+};
+
struct FetchStats : public SpecificStats {
FetchStats() : alreadyHasObj(0), forcedFetches(0), docsExamined(0) {}
diff --git a/src/mongo/db/query/explain.cpp b/src/mongo/db/query/explain.cpp
index 5650edd4af0..ead46646ced 100644
--- a/src/mongo/db/query/explain.cpp
+++ b/src/mongo/db/query/explain.cpp
@@ -314,6 +314,12 @@ void Explain::statsToBSON(const PlanStageStats& stats,
if (verbosity >= ExplainCommon::EXEC_STATS) {
bob->appendNumber("keysExamined", spec->keysExamined);
}
+ } else if (STAGE_ENSURE_SORTED == stats.stageType) {
+ EnsureSortedStats* spec = static_cast<EnsureSortedStats*>(stats.specific.get());
+
+ if (verbosity >= ExplainCommon::EXEC_STATS) {
+ bob->appendNumber("nDropped", spec->nDropped);
+ }
} else if (STAGE_FETCH == stats.stageType) {
FetchStats* spec = static_cast<FetchStats*>(stats.specific.get());
if (verbosity >= ExplainCommon::EXEC_STATS) {
diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp
index d4166b2a018..e1a69d04530 100644
--- a/src/mongo/db/query/planner_analysis.cpp
+++ b/src/mongo/db/query/planner_analysis.cpp
@@ -560,6 +560,9 @@ QuerySolutionNode* QueryPlannerAnalysis::analyzeSort(const CanonicalQuery& query
// with the topK first. If the client wants a limit, they'll get the efficiency
// of topK. If they want a batchSize, the other OR branch will deliver the missing
// results. The OR stage handles deduping.
+ //
+ // We must also add an ENSURE_SORTED node above the OR to ensure that the final results are
+ // in correct sorted order, which may not be true if the data is concurrently modified.
if (lpq.wantMore() && params.options & QueryPlannerParams::SPLIT_LIMITED_SORT &&
!QueryPlannerCommon::hasNode(query.root(), MatchExpression::TEXT) &&
!QueryPlannerCommon::hasNode(query.root(), MatchExpression::GEO) &&
@@ -574,7 +577,12 @@ QuerySolutionNode* QueryPlannerAnalysis::analyzeSort(const CanonicalQuery& query
SortNode* sortClone = static_cast<SortNode*>(sort->clone());
sortClone->limit = 0;
orn->children.push_back(sortClone);
- solnRoot = orn;
+
+ // Add ENSURE_SORTED above the OR.
+ EnsureSortedNode* esn = new EnsureSortedNode();
+ esn->pattern = sort->pattern;
+ esn->children.push_back(orn);
+ solnRoot = esn;
}
} else {
sort->limit = 0;
diff --git a/src/mongo/db/query/query_planner_test.cpp b/src/mongo/db/query/query_planner_test.cpp
index 38f66d5548b..0504fc410b8 100644
--- a/src/mongo/db/query/query_planner_test.cpp
+++ b/src/mongo/db/query/query_planner_test.cpp
@@ -3439,11 +3439,12 @@ TEST_F(QueryPlannerTest, NoKeepWithNToReturn) {
runQuerySortProjSkipLimit(fromjson("{a: 1}"), fromjson("{b: 1}"), BSONObj(), 0, 3);
assertSolutionExists(
+ "{ensureSorted: {pattern: {b: 1}, node: "
"{or: {nodes: ["
"{sort: {pattern: {b: 1}, limit: 3, node: {sortKeyGen: {node: "
"{fetch: {node: {ixscan: {pattern: {a: 1}}}}}}}}}, "
"{sort: {pattern: {b: 1}, limit: 0, node: {sortKeyGen: {node: "
- "{fetch: {node: {ixscan: {pattern: {a: 1}}}}}}}}}]}}");
+ "{fetch: {node: {ixscan: {pattern: {a: 1}}}}}}}}}]}}}}");
}
// Make sure a top-level $or hits the limiting number
@@ -3564,11 +3565,12 @@ TEST_F(QueryPlannerTest, SplitLimitedSort) {
// Second solution has a blocking sort with a limit: it gets split and
// joined with an OR stage.
assertSolutionExists(
+ "{ensureSorted: {pattern: {b: 1}, node: "
"{or: {nodes: ["
"{sort: {pattern: {b: 1}, limit: 3, node: {sortKeyGen: {node: "
"{fetch: {node: {ixscan: {pattern: {a: 1}}}}}}}}}, "
"{sort: {pattern: {b: 1}, limit: 0, node: {sortKeyGen: {node: "
- "{fetch: {node: {ixscan: {pattern: {a: 1}}}}}}}}}]}}");
+ "{fetch: {node: {ixscan: {pattern: {a: 1}}}}}}}}}]}}}}");
}
// The same query run as a find command with a limit should not require the "split limited sort"
@@ -4117,10 +4119,11 @@ TEST_F(QueryPlannerTest, NToReturnHackWithFindCommand) {
assertNumSolutions(1U);
assertSolutionExists(
+ "{ensureSorted: {pattern: {a: 1}, node: "
"{or: {nodes: ["
"{sort: {limit:3, pattern: {a:1}, node: {sortKeyGen: {node: {cscan: {dir:1}}}}}}, "
"{sort: {limit:0, pattern: {a:1}, node: {sortKeyGen: {node: {cscan: {dir:1}}}}}}"
- "]}}");
+ "]}}}}");
}
TEST_F(QueryPlannerTest, NToReturnHackWithSingleBatch) {
diff --git a/src/mongo/db/query/query_planner_test_lib.cpp b/src/mongo/db/query/query_planner_test_lib.cpp
index 557bde5f252..e4485738ea7 100644
--- a/src/mongo/db/query/query_planner_test_lib.cpp
+++ b/src/mongo/db/query/query_planner_test_lib.cpp
@@ -575,6 +575,25 @@ bool QueryPlannerTestLib::solutionMatches(const BSONObj& testSoln,
}
return solutionMatches(child.Obj(), fn->children[0]);
+ } else if (STAGE_ENSURE_SORTED == trueSoln->getType()) {
+ const EnsureSortedNode* esn = static_cast<const EnsureSortedNode*>(trueSoln);
+
+ BSONElement el = testSoln["ensureSorted"];
+ if (el.eoo() || !el.isABSONObj()) {
+ return false;
+ }
+ BSONObj esObj = el.Obj();
+
+ BSONElement patternEl = esObj["pattern"];
+ if (patternEl.eoo() || !patternEl.isABSONObj()) {
+ return false;
+ }
+ BSONElement child = esObj["node"];
+ if (child.eoo() || !child.isABSONObj()) {
+ return false;
+ }
+
+ return (patternEl.Obj() == esn->pattern) && solutionMatches(child.Obj(), esn->children[0]);
}
return false;
diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp
index a80f9199b33..da1fa79c034 100644
--- a/src/mongo/db/query/query_solution.cpp
+++ b/src/mongo/db/query/query_solution.cpp
@@ -916,4 +916,28 @@ QuerySolutionNode* CountNode::clone() const {
return copy;
}
+//
+// EnsureSortedNode
+//
+
+void EnsureSortedNode::appendToString(mongoutils::str::stream* ss, int indent) const {
+ addIndent(ss, indent);
+ *ss << "ENSURE_SORTED\n";
+ addIndent(ss, indent + 1);
+ *ss << "pattern = " << pattern.toString() << '\n';
+ addCommon(ss, indent);
+ addIndent(ss, indent + 1);
+ *ss << "Child:" << '\n';
+ children[0]->appendToString(ss, indent + 2);
+}
+
+QuerySolutionNode* EnsureSortedNode::clone() const {
+ EnsureSortedNode* copy = new EnsureSortedNode();
+ cloneBaseData(copy);
+
+ copy->pattern = this->pattern;
+
+ return copy;
+}
+
} // namespace mongo
diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h
index 4df3e3e8665..c2501daa354 100644
--- a/src/mongo/db/query/query_solution.h
+++ b/src/mongo/db/query/query_solution.h
@@ -910,4 +910,36 @@ struct CountNode : public QuerySolutionNode {
bool endKeyInclusive;
};
+/**
+ * This stage drops results that are out of sorted order.
+ */
+struct EnsureSortedNode : public QuerySolutionNode {
+ EnsureSortedNode() {}
+ virtual ~EnsureSortedNode() {}
+
+ virtual StageType getType() const {
+ return STAGE_ENSURE_SORTED;
+ }
+
+ virtual void appendToString(mongoutils::str::stream* ss, int indent) const;
+
+ bool fetched() const {
+ return children[0]->fetched();
+ }
+ bool hasField(const std::string& field) const {
+ return children[0]->hasField(field);
+ }
+ bool sortedByDiskLoc() const {
+ return children[0]->sortedByDiskLoc();
+ }
+ const BSONObjSet& getSort() const {
+ return children[0]->getSort();
+ }
+
+ QuerySolutionNode* clone() const;
+
+ // The pattern that the results should be sorted by.
+ BSONObj pattern;
+};
+
} // namespace mongo
diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp
index f3344921e6b..429651668a2 100644
--- a/src/mongo/db/query/stage_builder.cpp
+++ b/src/mongo/db/query/stage_builder.cpp
@@ -38,6 +38,7 @@
#include "mongo/db/exec/collection_scan.h"
#include "mongo/db/exec/count_scan.h"
#include "mongo/db/exec/distinct_scan.h"
+#include "mongo/db/exec/ensure_sorted.h"
#include "mongo/db/exec/fetch.h"
#include "mongo/db/exec/geo_near.h"
#include "mongo/db/exec/index_scan.h"
@@ -325,6 +326,13 @@ PlanStage* buildStages(OperationContext* txn,
params.endKeyInclusive = cn->endKeyInclusive;
return new CountScan(txn, params, ws);
+ } else if (STAGE_ENSURE_SORTED == root->getType()) {
+ const EnsureSortedNode* esn = static_cast<const EnsureSortedNode*>(root);
+ PlanStage* childStage = buildStages(txn, collection, qsol, esn->children[0], ws);
+ if (NULL == childStage) {
+ return NULL;
+ }
+ return new EnsureSortedStage(txn, esn->pattern, ws, childStage);
} else {
mongoutils::str::stream ss;
root->appendToString(&ss, 0);
diff --git a/src/mongo/db/query/stage_types.h b/src/mongo/db/query/stage_types.h
index 29d16653f72..356555c32ab 100644
--- a/src/mongo/db/query/stage_types.h
+++ b/src/mongo/db/query/stage_types.h
@@ -57,6 +57,8 @@ enum StageType {
// Dummy stage used for receiving notifications of deletions during chunk migration.
STAGE_NOTIFY_DELETE,
+ STAGE_ENSURE_SORTED,
+
STAGE_EOF,
// This is more of an "internal-only" stage where we try to keep docs that were mutated