summaryrefslogtreecommitdiff
path: root/src/mongo/db/exec
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/exec')
-rw-r--r--src/mongo/db/exec/index_scan.cpp166
-rw-r--r--src/mongo/db/exec/index_scan.h104
-rw-r--r--src/mongo/db/exec/plan_stage.h159
-rw-r--r--src/mongo/db/exec/simple_plan_runner.h74
-rw-r--r--src/mongo/db/exec/stagedebug_cmd.cpp156
-rw-r--r--src/mongo/db/exec/working_set.cpp98
-rw-r--r--src/mongo/db/exec/working_set.h135
-rw-r--r--src/mongo/db/exec/working_set_common.cpp56
-rw-r--r--src/mongo/db/exec/working_set_common.h44
-rw-r--r--src/mongo/db/exec/working_set_test.cpp144
10 files changed, 1136 insertions, 0 deletions
diff --git a/src/mongo/db/exec/index_scan.cpp b/src/mongo/db/exec/index_scan.cpp
new file mode 100644
index 00000000000..c4601996b18
--- /dev/null
+++ b/src/mongo/db/exec/index_scan.cpp
@@ -0,0 +1,166 @@
+/**
+ * Copyright (C) 2013 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "mongo/db/exec/index_scan.h"
+
+#include "mongo/db/index/catalog_hack.h"
+#include "mongo/db/index/index_access_method.h"
+#include "mongo/db/index/index_cursor.h"
+#include "mongo/db/index/index_descriptor.h"
+
+namespace {
+
+ // Return a value in the set {-1, 0, 1} to represent the sign of parameter i.
+ int sgn(int i) {
+ if (i == 0)
+ return 0;
+ return i > 0 ? 1 : -1;
+ }
+
+} // namespace
+
+namespace mongo {
+
+ IndexScan::IndexScan(const IndexScanParams& params, WorkingSet* workingSet, Matcher* matcher)
+ : _workingSet(workingSet), _descriptor(params.descriptor), _startKey(params.startKey),
+ _endKey(params.endKey), _endKeyInclusive(params.endKeyInclusive),
+ _direction(params.direction), _hitEnd(false),
+ _matcher(matcher), _shouldDedup(params.descriptor->isMultikey()),
+ _yieldMovedCursor(false), _numWanted(params.limit) {
+
+ string amName;
+
+ if (params.forceBtreeAccessMethod) {
+ _iam.reset(CatalogHack::getBtreeIndex(_descriptor.get()));
+ amName = "";
+ }
+ else {
+ amName = CatalogHack::getAccessMethodName(_descriptor->keyPattern());
+ _iam.reset(CatalogHack::getIndex(_descriptor.get()));
+ }
+
+ if (IndexNames::GEO_2D == amName || IndexNames::GEO_2DSPHERE == amName) {
+ // _endKey is meaningless for 2d and 2dsphere.
+ verify(_endKey.isEmpty());
+ }
+ }
+
+ PlanStage::StageState IndexScan::work(WorkingSetID* out) {
+ if (NULL == _indexCursor.get()) {
+ // First call to work(). Perform cursor init.
+ CursorOptions cursorOptions;
+
+ // The limit is *required* for 2d $near, which is the only index that pays attention to
+ // it anyway.
+ cursorOptions.numWanted = _numWanted;
+ if (1 == _direction) {
+ cursorOptions.direction = CursorOptions::INCREASING;
+ }
+ else {
+ cursorOptions.direction = CursorOptions::DECREASING;
+ }
+
+ IndexCursor *cursor;
+ _iam->newCursor(&cursor);
+ _indexCursor.reset(cursor);
+ _indexCursor->setOptions(cursorOptions);
+ _indexCursor->seek(_startKey);
+ checkEnd();
+ }
+ else if (_yieldMovedCursor) {
+ _yieldMovedCursor = false;
+ // Note that we're not calling next() here.
+ }
+ else {
+ _indexCursor->next();
+ checkEnd();
+ }
+
+ if (isEOF()) { return PlanStage::IS_EOF; }
+
+ DiskLoc loc = _indexCursor->getValue();
+
+ if (_shouldDedup) {
+ if (_returned.end() != _returned.find(loc)) {
+ return PlanStage::NEED_TIME;
+ }
+ else {
+ _returned.insert(loc);
+ }
+ }
+
+ WorkingSetID id = _workingSet->allocate();
+ WorkingSetMember* member = _workingSet->get(id);
+ member->loc = loc;
+ member->keyData.push_back(IndexKeyDatum(_descriptor->keyPattern(),
+ _indexCursor->getKey().getOwned()));
+ member->state = WorkingSetMember::LOC_AND_IDX;
+
+ if (NULL == _matcher || _matcher->matches(member)) {
+ *out = id;
+ return PlanStage::ADVANCED;
+ }
+
+ _workingSet->free(id);
+
+ return PlanStage::NEED_TIME;
+ }
+
+ bool IndexScan::isEOF() { return _indexCursor->isEOF() || _hitEnd; }
+
+ void IndexScan::prepareToYield() {
+ if (isEOF()) { return; }
+ _savedKey = _indexCursor->getKey().getOwned();
+ _savedLoc = _indexCursor->getValue();
+ _indexCursor->savePosition();
+ }
+
+ void IndexScan::recoverFromYield() {
+ if (isEOF()) { return; }
+
+ _indexCursor->restorePosition();
+
+ if (!_savedKey.binaryEqual(_indexCursor->getKey())
+ || _savedLoc != _indexCursor->getValue()) {
+ // Our restored position isn't the same as the saved position. When we call work()
+ // again we want to return where we currently point, not past it.
+ _yieldMovedCursor = true;
+
+ // Our restored position might be past endKey, see if we've hit the end.
+ checkEnd();
+ }
+ }
+
+ void IndexScan::invalidate(const DiskLoc& dl) {
+ // If we see this DiskLoc again, it may not be the same doc. it was before, so we want to
+ // return it.
+ _returned.erase(dl);
+ }
+
+ void IndexScan::checkEnd() {
+ if (isEOF()) { return; }
+
+ // If there is an empty endKey we will scan until we run out of index to scan over.
+ if (_endKey.isEmpty()) { return; }
+
+ int cmp = sgn(_endKey.woCompare(_indexCursor->getKey(), _descriptor->keyPattern()));
+
+ if ((cmp != 0 && cmp != _direction) || (cmp == 0 && !_endKeyInclusive)) {
+ _hitEnd = true;
+ }
+ }
+
+} // namespace mongo
diff --git a/src/mongo/db/exec/index_scan.h b/src/mongo/db/exec/index_scan.h
new file mode 100644
index 00000000000..2f53a1da53a
--- /dev/null
+++ b/src/mongo/db/exec/index_scan.h
@@ -0,0 +1,104 @@
+/**
+ * Copyright (C) 2013 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "mongo/db/diskloc.h"
+#include "mongo/db/jsobj.h"
+#include "mongo/db/matcher.h"
+#include "mongo/db/exec/plan_stage.h"
+#include "mongo/platform/unordered_set.h"
+
+namespace mongo {
+
+ class IndexAccessMethod;
+ class IndexCursor;
+ class IndexDescriptor;
+ class IndexScanParams;
+ class WorkingSet;
+
+ /**
+ * Stage scans over an index from startKey to endKey, returning results that pass the provided
+ * filter. Internally dedups on DiskLoc.
+ *
+ * Sub-stage preconditions: None. Is a leaf and consumes no stage data.
+ */
+ class IndexScan : public PlanStage {
+ public:
+ IndexScan(const IndexScanParams& params, WorkingSet* workingSet, Matcher* matcher);
+ virtual ~IndexScan() { }
+
+ virtual StageState work(WorkingSetID* out);
+ virtual bool isEOF();
+ virtual void prepareToYield();
+ virtual void recoverFromYield();
+ virtual void invalidate(const DiskLoc& dl);
+
+ private:
+ /** See if the cursor is pointing at or past _endKey, if _endKey is non-empty. */
+ void checkEnd();
+
+ // The WorkingSet we annotate with results. Not owned by us.
+ WorkingSet* _workingSet;
+
+ // Index access.
+ scoped_ptr<IndexAccessMethod> _iam;
+ scoped_ptr<IndexCursor> _indexCursor;
+ scoped_ptr<IndexDescriptor> _descriptor;
+
+ // Bounds for the cursor. TODO: take a set of bounds.
+ BSONObj _startKey;
+ BSONObj _endKey;
+ bool _endKeyInclusive;
+ int _direction;
+ bool _hitEnd;
+
+ // Contains expressions only over fields in the index key. We assume this is built
+ // correctly by whomever creates this class.
+ scoped_ptr<Matcher> _matcher;
+
+ // Could our index have duplicates? If so, we use _returned to dedup.
+ bool _shouldDedup;
+ unordered_set<DiskLoc, DiskLoc::Hasher> _returned;
+
+ // For yielding.
+ BSONObj _savedKey;
+ DiskLoc _savedLoc;
+
+ // True if there was a yield and the yield changed the cursor position.
+ bool _yieldMovedCursor;
+
+ // This is IndexScanParams::limit. See comment there.
+ int _numWanted;
+ };
+
+ struct IndexScanParams {
+ IndexScanParams() : descriptor(NULL), endKeyInclusive(true), direction(1), limit(0),
+ forceBtreeAccessMethod(false) { }
+ IndexDescriptor* descriptor;
+ BSONObj startKey;
+ BSONObj endKey;
+ bool endKeyInclusive;
+ int direction;
+
+ // This only matters for 2d indices and will be ignored by every other index.
+ int limit;
+
+ // Special indices internally open an IndexCursor over themselves but as a straight Btree.
+ bool forceBtreeAccessMethod;
+ };
+
+} // namespace mongo
diff --git a/src/mongo/db/exec/plan_stage.h b/src/mongo/db/exec/plan_stage.h
new file mode 100644
index 00000000000..8429e4fd79e
--- /dev/null
+++ b/src/mongo/db/exec/plan_stage.h
@@ -0,0 +1,159 @@
+/**
+ * Copyright (C) 2013 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "mongo/db/exec/working_set.h"
+
+namespace mongo {
+
+ class DiskLoc;
+
+ /**
+ * A PlanStage ("stage") is the basic building block of a "Query Execution Plan." A stage is
+ * the smallest piece of machinery used in executing a compiled query. Stages either access
+ * data (from a collection or an index) to create a stream of results, or transform a stream of
+ * results (e.g. AND, OR, SORT) to create a stream of results.
+ *
+ * Stages have zero or more input streams but only one output stream. Data-accessing stages are
+ * leaves and data-transforming stages have children. Stages can be connected together to form
+ * a tree which is then executed (see plan_runner.h) to solve a query.
+ *
+ * A stage's input and output are each typed. Only stages with compatible types can be
+ * connected.
+ *
+ * All of the stages of a QEP share a WorkingSet (see working_set.h). Data source stages
+ * allocate a slot in the WorkingSet, fill the slot with data, and return the ID of that slot.
+ * Subsequent stages fetch a WorkingSetElement by its ID and operate on the enclosed data.
+ *
+ * Stages do nothing unless work() is called. work() is a request to the stage to consume one
+ * unit of input. Some stages (e.g. AND, SORT) require many calls to work() before generating
+ * output as they must consume many units of input. These stages will inform the caller that
+ * they need more time, and work() must be called again in order to produce an output.
+ *
+ * Every stage of a query implements the PlanStage interface. Queries perform a unit of work
+ * and report on their subsequent status; see StatusCode for possible states. Query results are
+ * passed through the WorkingSet interface; see working_set.h for details.
+ *
+ * All synchronization is the responsibility of the caller. Queries must be told to yield with
+ * prepareToYield() if any underlying database state changes. If prepareToYield() is called,
+ * recoverFromYield() must be called again before any work() is done.
+ *
+ * Here is a very simple usage example:
+ *
+ * WorkingSet workingSet;
+ * PlanStage* rootStage = makeQueryPlan(&workingSet, ...);
+ * while (!rootStage->isEOF()) {
+ * WorkingSetID result;
+ * switch(rootStage->work(&result)) {
+ * case PlanStage::ADVANCED:
+ * // do something with result
+ * WorkingSetMember* member = workingSet.get(result);
+ * cout << "Result: " << member->obj << endl;
+ * break;
+ * case PlanStage::IS_EOF:
+ * // All done. Will fall out of while loop.
+ * break;
+ * case PlanStage::NEED_TIME:
+ * // Need more time.
+ * break;
+ * case PlanStage::ERROR:
+ * // Throw exception or return error
+ * break;
+ * case PlanStage::NEED_FETCH:
+ * // Go to disk and fetch stuff.
+ * break;
+ * }
+ *
+ * if (shouldYield) {
+ * // Occasionally yield.
+ * stage->prepareToYield();
+ * // Do work that requires a yield here (execute other plans, insert, delete, etc.).
+ * stage->recoverFromYield();
+ * }
+ * }
+ */
+ class PlanStage {
+ public:
+ virtual ~PlanStage() { }
+
+ /**
+ * All possible return values of work(...)
+ */
+ enum StageState {
+ // work(...) has returned a new result in its out parameter.
+ ADVANCED,
+ // work(...) won't do anything more. isEOF() will also be true.
+ IS_EOF,
+ // work(...) needs more time to product a result. Call work(...) again.
+ NEED_TIME,
+ // Something has gone unrecoverably wrong. Stop running this query.
+ ERROR,
+ // Something isn't in memory. Fetch it. TODO: actually support this (forthcoming).
+ NEED_YIELD,
+ };
+
+ /**
+ * Perform a unit of work on the query. Ask the stage to produce the next unit of output.
+ * Stage returns StageState::ADVANCED if *out is set to the next unit of output. Otherwise,
+ * returns another value of StageState to indicate the stage's status.
+ */
+ virtual StageState work(WorkingSetID* out) = 0;
+
+ /**
+ * Returns true if no more work can be done on the query / out of results.
+ */
+ virtual bool isEOF() = 0;
+
+ //
+ // Yielding and isolation semantics:
+ //
+ // Any data that is not inserted, deleted, or modified during a yield will be faithfully
+ // returned by a query that should return that data.
+ //
+ // Any data inserted, deleted, or modified during a yield that should be returned by a query
+ // may or may not be returned by that query. The query could return: nothing; the data
+ // before; the data after; or both the data before and the data after.
+ //
+ // In short, there is no isolation between a query and an insert/delete/update. AKA,
+ // READ_UNCOMMITTED.
+ //
+
+ /**
+ * Notifies the stage that all locks are about to be released. The stage must save any
+ * state required to resume where it was before prepareToYield was called.
+ */
+ virtual void prepareToYield() = 0;
+
+ /**
+ * Notifies the stage that any required locks have been reacquired. The stage must restore
+ * any saved state and be ready to handle calls to work().
+ *
+ * Can only be called after prepareToYield.
+ */
+ virtual void recoverFromYield() = 0;
+
+ /**
+ * Notifies a stage that a DiskLoc is going to be deleted (or in-place updated) so that the
+ * stage can invalidate or modify any state required to continue processing without this
+ * DiskLoc.
+ *
+ * Can only be called after a prepareToYield but before a recoverFromYield.
+ */
+ virtual void invalidate(const DiskLoc& dl) = 0;
+ };
+
+} // namespace mongo
diff --git a/src/mongo/db/exec/simple_plan_runner.h b/src/mongo/db/exec/simple_plan_runner.h
new file mode 100644
index 00000000000..490d9ba4996
--- /dev/null
+++ b/src/mongo/db/exec/simple_plan_runner.h
@@ -0,0 +1,74 @@
+/**
+ * Copyright (C) 2013 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "mongo/db/exec/plan_stage.h"
+#include "mongo/db/exec/working_set.h"
+#include "mongo/db/exec/working_set_common.h"
+
+namespace mongo {
+
+ /**
+ * A placeholder for a full-featured plan runner. Calls work() on a plan until a result is
+ * produced. Stops when the plan is EOF or if the plan errors.
+ *
+ * TODO: Yielding policy
+ * TODO: Graceful error handling
+ * TODO: Fetch
+ * TODO: Stats, diagnostics, instrumentation, etc.
+ */
+ class SimplePlanRunner {
+ public:
+ SimplePlanRunner() { }
+
+ WorkingSet* getWorkingSet() { return &_workingSet; }
+
+ /**
+ * Takes ownership of root.
+ */
+ void setRoot(PlanStage* root) {
+ verify(root);
+ _root.reset(root);
+ }
+
+ bool getNext(BSONObj* objOut) {
+ for (;;) {
+ WorkingSetID id;
+ PlanStage::StageState code = _root->work(&id);
+
+ if (PlanStage::ADVANCED == code) {
+ WorkingSetMember* member = _workingSet.get(id);
+ uassert(16912, "Couldn't fetch obj from query plan",
+ WorkingSetCommon::fetch(member));
+ *objOut = member->obj;
+ _workingSet.free(id);
+ return true;
+ }
+ else if (code == PlanStage::NEED_TIME) {
+ // TODO: Occasionally yield. For now, we run until we get another result.
+ }
+ else {
+ // IS_EOF, ERROR, NEED_YIELD. We just stop here.
+ return false;
+ }
+ }
+ }
+
+ private:
+ WorkingSet _workingSet;
+ scoped_ptr<PlanStage> _root;
+ };
+
+} // namespace mongo
diff --git a/src/mongo/db/exec/stagedebug_cmd.cpp b/src/mongo/db/exec/stagedebug_cmd.cpp
new file mode 100644
index 00000000000..b8a876cbba0
--- /dev/null
+++ b/src/mongo/db/exec/stagedebug_cmd.cpp
@@ -0,0 +1,156 @@
+/**
+ * Copyright (C) 2013 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "mongo/db/auth/action_set.h"
+#include "mongo/db/auth/action_type.h"
+#include "mongo/db/auth/privilege.h"
+#include "mongo/db/commands.h"
+#include "mongo/db/exec/index_scan.h"
+#include "mongo/db/exec/simple_plan_runner.h"
+#include "mongo/db/index/catalog_hack.h"
+#include "mongo/db/jsobj.h"
+#include "mongo/db/matcher/matcher.h"
+#include "mongo/db/namespace_details.h"
+#include "mongo/db/namespace-inl.h"
+#include "mongo/db/pdfile.h"
+
+namespace mongo {
+
+ /**
+ * A command for manually constructing a query tree and running it.
+ *
+ * db.runCommand({stageDebug: rootNode})
+ *
+ * The value of the filter field is a BSONObj that specifies values that fields must have. What
+ * you'd pass to a matcher.
+ *
+ * Leaf Nodes:
+ *
+ * node -> {ixscan: {filter: {FILTER},
+ * args: {name: "collectionname", indexKeyPattern: kpObj, start: startObj,
+ * stop: stopObj, endInclusive: true/false, direction: -1/1,
+ * limit: int}}}
+ *
+ * Forthcoming Nodes:
+ *
+ * node -> {cscan: {filter: {filter}, args: {name: "collectionname" }}}
+ * node -> {and: {filter: {filter}, args: { nodes: [node, node]}}}
+ * node -> {or: {filter: {filter}, args: { dedup:bool, nodes:[node, node]}}}
+ * node -> {fetch: {filter: {filter}, args: {node: node}}}
+ * node -> {sort: {filter: {filter}, args: {node: node, pattern: objWithSortCriterion}}}
+ * node -> {dedup: {filter: {filter}, args: {node: node, field: field}}}
+ * node -> {limit: {filter: filter}, args: {node: node, num: posint}}
+ * node -> {skip: {filter: filter}, args: {node: node, num: posint}}
+ * node -> {unwind: {filter: filter}, args: {node: node, field: field}}
+ */
+ class StageDebugCmd : public Command {
+ public:
+ StageDebugCmd() : Command("stageDebug") { }
+
+ // Boilerplate for commands
+ virtual LockType locktype() const { return READ; }
+ bool slaveOk() const { return true; }
+ bool slaveOverrideOk() const { return true; }
+ void help(stringstream& h) const { }
+
+ virtual void addRequiredPrivileges(const std::string& dbname,
+ const BSONObj& cmdObj,
+ std::vector<Privilege>* out) {
+ ActionSet actions;
+ actions.addAction(ActionType::find);
+ out->push_back(Privilege(parseNs(dbname, cmdObj), actions));
+ }
+
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result,
+ bool fromRepl) {
+
+ BSONElement argElt = cmdObj["stageDebug"];
+ if (argElt.eoo() || !argElt.isABSONObj()) { return false; }
+ BSONObj argObj = argElt.Obj();
+
+ SimplePlanRunner runner;
+ auto_ptr<PlanStage> root(parseQuery(dbname, argObj, runner.getWorkingSet()));
+ uassert(16911, "Couldn't parse plan from " + argObj.toString(), root.get());
+ runner.setRoot(root.release());
+
+ BSONArrayBuilder resultBuilder(result.subarrayStart("results"));
+
+ for (BSONObj obj; runner.getNext(&obj); ) {
+ resultBuilder.append(obj);
+ }
+
+ resultBuilder.done();
+ return true;
+ }
+
+ PlanStage* parseQuery(const string& dbname, BSONObj obj, WorkingSet* workingSet) {
+ BSONElement firstElt = obj.firstElement();
+ if (!firstElt.isABSONObj()) { return NULL; }
+ BSONObj paramObj = firstElt.Obj();
+
+ auto_ptr<Matcher> matcher;
+ BSONObj nodeArgs;
+
+ // Every node has these two fields.
+ const string filterTag = "filter";
+ const string argsTag = "args";
+
+ BSONObjIterator it(paramObj);
+ while (it.more()) {
+ BSONElement e = it.next();
+ if (!e.isABSONObj()) { return NULL; }
+ BSONObj argObj = e.Obj();
+ if (filterTag == e.fieldName()) {
+ matcher.reset(new Matcher2(argObj));
+ }
+ else if (argsTag == e.fieldName()) {
+ nodeArgs = argObj;
+ }
+ else {
+ uasserted(16910, "Unknown fieldname " + string(e.fieldName())
+ + " in query node " + obj.toString());
+ return NULL;
+ }
+ }
+
+ string nodeName = firstElt.fieldName();
+
+ if ("ixscan" == nodeName) {
+ NamespaceDetails* nsd = nsdetails(dbname + "." + nodeArgs["name"].String());
+ uassert(16913, "Can't find collection " + nodeArgs["name"].String(), nsd);
+
+ int idxNo = nsd->findIndexByKeyPattern(nodeArgs["keyPattern"].Obj());
+ uassert(16890, "Can't find index: " + nodeArgs["keyPattern"].Obj().toString(),
+ idxNo != -1);
+
+ IndexScanParams params;
+ params.descriptor = CatalogHack::getDescriptor(nsd, idxNo);
+ params.startKey = nodeArgs["startKey"].Obj();
+ params.endKey = nodeArgs["endKey"].Obj();
+ params.endKeyInclusive = nodeArgs["endKeyInclusive"].Bool();
+ params.direction = nodeArgs["direction"].numberInt();
+ params.limit = nodeArgs["limit"].numberInt();
+ params.forceBtreeAccessMethod = false;
+
+ return new IndexScan(params, workingSet, matcher.release());
+ }
+ else {
+ return NULL;
+ }
+ }
+ } stageDebugCmd;
+
+} // namespace mongo
diff --git a/src/mongo/db/exec/working_set.cpp b/src/mongo/db/exec/working_set.cpp
new file mode 100644
index 00000000000..7a4b96e54d4
--- /dev/null
+++ b/src/mongo/db/exec/working_set.cpp
@@ -0,0 +1,98 @@
+/**
+ * Copyright (C) 2013 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "mongo/db/exec/working_set.h"
+
+#include "mongo/db/index/index_descriptor.h"
+
+namespace mongo {
+
+ const WorkingSetID WorkingSet::INVALID_ID = -1;
+
+ WorkingSet::WorkingSet() : _nextId(0) { }
+
+ WorkingSet::~WorkingSet() {
+ for (DataMap::const_iterator i = _data.begin(); i != _data.end(); ++i) {
+ delete i->second;
+ }
+ }
+
+ WorkingSetID WorkingSet::allocate() {
+ verify(_data.end() == _data.find(_nextId));
+ _data[_nextId] = new WorkingSetMember();
+ return _nextId++;
+ }
+
+ WorkingSetMember* WorkingSet::get(const WorkingSetID& i) {
+ DataMap::iterator it = _data.find(i);
+ verify(_data.end() != it);
+ return it->second;
+ }
+
+ void WorkingSet::free(const WorkingSetID& i) {
+ DataMap::iterator it = _data.find(i);
+ verify(_data.end() != it);
+ delete it->second;
+ _data.erase(it);
+ }
+
+ WorkingSetMember::WorkingSetMember() : state(WorkingSetMember::INVALID) { }
+
+ bool WorkingSetMember::hasLoc() const {
+ return state == LOC_AND_IDX || state == LOC_AND_UNOWNED_OBJ
+ || state == LOC_AND_OWNED_OBJ;
+ }
+
+ bool WorkingSetMember::hasObj() const {
+ return hasOwnedObj() || hasUnownedObj();
+ }
+
+ bool WorkingSetMember::hasOwnedObj() const {
+ return state == OWNED_OBJ || state == LOC_AND_OWNED_OBJ;
+ }
+
+ bool WorkingSetMember::hasUnownedObj() const {
+ return state == LOC_AND_UNOWNED_OBJ;
+ }
+
+ bool WorkingSetMember::getFieldDotted(const string& field, BSONElement* out) {
+ // If our state is such that we have an object, use it.
+ if (hasObj()) {
+ *out = obj.getFieldDotted(field);
+ return true;
+ }
+
+ // Our state should be such that we have index data/are covered.
+ for (size_t i = 0; i < keyData.size(); ++i) {
+ BSONObjIterator keyPatternIt(keyData[i].indexKeyPattern);
+ BSONObjIterator keyDataIt(keyData[i].keyData);
+
+ while (keyPatternIt.more()) {
+ BSONElement keyPatternElt = keyPatternIt.next();
+ verify(keyDataIt.more());
+ BSONElement keyDataElt = keyDataIt.next();
+
+ if (field == keyPatternElt.fieldName()) {
+ *out = keyDataElt;
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+} // namespace mongo
diff --git a/src/mongo/db/exec/working_set.h b/src/mongo/db/exec/working_set.h
new file mode 100644
index 00000000000..6a324fb7e6e
--- /dev/null
+++ b/src/mongo/db/exec/working_set.h
@@ -0,0 +1,135 @@
+/**
+ * Copyright (C) 2013 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <vector>
+#include "mongo/db/diskloc.h"
+#include "mongo/db/jsobj.h"
+#include "mongo/platform/unordered_map.h"
+
+namespace mongo {
+
+ class WorkingSetMember;
+
+ typedef long WorkingSetID;
+
+ /**
+ * All data in use by a query. Data is passed through the stage tree by referencing the ID of
+ * an element of the working set. Stages can add elements to the working set, delete elements
+ * from the working set, or mutate elements in the working set.
+ */
+ class WorkingSet {
+ public:
+ static const WorkingSetID INVALID_ID;
+
+ WorkingSet();
+ ~WorkingSet();
+
+ /**
+ * Allocate a new query result and return the ID used to get and free it.
+ */
+ WorkingSetID allocate();
+
+ /**
+ * Get the i-th mutable query result.
+ */
+ WorkingSetMember* get(const WorkingSetID& i);
+
+ /**
+ * Unallocate the i-th query result and release its resouces.
+ */
+ void free(const WorkingSetID& i);
+
+ private:
+ typedef unordered_map<WorkingSetID, WorkingSetMember*> DataMap;
+
+ DataMap _data;
+
+ // The WorkingSetID returned by the next call to allocate(). Should refer to the next valid
+ // ID. IDs allocated contiguously. Should never point at an in-use ID.
+ WorkingSetID _nextId;
+ };
+
+ /**
+ * The key data extracted from an index. Keeps track of both the key (currently a BSONObj) and
+ * the index that provided the key. The index key pattern is required to correctly interpret
+ * the key.
+ */
+ struct IndexKeyDatum {
+ IndexKeyDatum(const BSONObj& keyPattern, const BSONObj& key) : indexKeyPattern(keyPattern),
+ keyData(key) { }
+
+ // This is not owned and points into the IndexDescriptor's data.
+ BSONObj indexKeyPattern;
+
+ // This is the BSONObj for the key that we put into the index. Owned by us.
+ BSONObj keyData;
+ };
+
+ /**
+ * The type of the data passed between query stages. In particular:
+ *
+ * Index scan stages return a WorkingSetMember in the LOC_AND_IDX state.
+ *
+ * Collection scan stages the LOC_AND_UNOWNED_OBJ state.
+ *
+ * A WorkingSetMember may have any of the data above.
+ */
+ struct WorkingSetMember {
+ WorkingSetMember();
+
+ enum MemberState {
+ // Initial state.
+ INVALID,
+
+ // Data is from 1 or more indices.
+ LOC_AND_IDX,
+
+ // Data is from a collection scan.
+ LOC_AND_UNOWNED_OBJ,
+
+ // Data is from a fetch.
+ LOC_AND_OWNED_OBJ,
+
+ // DiskLoc has been invalidated, or the obj doesn't correspond to an on-disk document
+ // anymore (e.g. is a computed expression).
+ OWNED_OBJ,
+ };
+
+ DiskLoc loc;
+ BSONObj obj;
+ vector<IndexKeyDatum> keyData;
+ MemberState state;
+
+ bool hasLoc() const;
+ bool hasObj() const;
+ bool hasOwnedObj() const;
+ bool hasUnownedObj() const;
+
+ /**
+ * getFieldDotted uses its state (obj or index data) to produce the field with the provided
+ * name.
+ *
+ * Returns true if there is the element is in an index key or in an (owned or unowned)
+ * object. *out is set to the element if so.
+ *
+ * Returns false otherwise. Returning false indicates a query planning error.
+ */
+ bool getFieldDotted(const string& field, BSONElement* out);
+ };
+
+} // namespace mongo
diff --git a/src/mongo/db/exec/working_set_common.cpp b/src/mongo/db/exec/working_set_common.cpp
new file mode 100644
index 00000000000..4be4b12d5b8
--- /dev/null
+++ b/src/mongo/db/exec/working_set_common.cpp
@@ -0,0 +1,56 @@
+/**
+ * Copyright (C) 2013 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "mongo/db/exec/working_set.h"
+#include "mongo/db/exec/working_set_common.h"
+#include "mongo/db/pdfile.h"
+
+namespace mongo {
+
+ // static
+ bool WorkingSetCommon::fetch(WorkingSetMember* member) {
+ // Already fetched.
+ if (member->hasObj()) {
+ // If we did a fetch we shouldn't have this around...
+ verify(member->keyData.empty());
+ return true;
+ }
+
+ if (!member->hasLoc()) { return false; }
+
+ member->obj = member->loc.obj();
+ member->state = WorkingSetMember::LOC_AND_OWNED_OBJ;
+
+ // We have an obj. so get rid of the key data.
+ member->keyData.clear();
+ return true;
+ }
+
+ // static
+ bool WorkingSetCommon::fetchAndInvalidateLoc(WorkingSetMember* member) {
+ // Already in our desired state.
+ if (member->state == WorkingSetMember::OWNED_OBJ) { return true; }
+
+ if (fetch(member)) {
+ member->obj = member->obj.getOwned();
+ member->state = WorkingSetMember::OWNED_OBJ;
+ member->loc = DiskLoc();
+ return true;
+ }
+ else { return false; }
+ }
+
+} // namespace mongo
diff --git a/src/mongo/db/exec/working_set_common.h b/src/mongo/db/exec/working_set_common.h
new file mode 100644
index 00000000000..cbafc0c27df
--- /dev/null
+++ b/src/mongo/db/exec/working_set_common.h
@@ -0,0 +1,44 @@
+/**
+ * Copyright (C) 2013 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+namespace mongo {
+
+ class WorkingSetMember;
+
+ class WorkingSetCommon {
+ public:
+ /**
+ * Fetch an unowned copy of the BSONObj that the provided WSM refers to. Updates
+ * member->obj to refer to that unowned BSONObj. Requires a valid DiskLoc. Does nothing if
+ * WSM has a valid obj already.
+ *
+ * Returns true if the fetch succeeded, false otherwise.
+ *
+ * Used in SORT and FETCH.
+ */
+ static bool fetch(WorkingSetMember* member);
+
+ /**
+ * Get an owned copy of the BSONObj the WSM refers to.
+ * Requires either a valid BSONObj or valid DiskLoc.
+ * Returns true if the fetch and invalidate succeeded, false otherwise.
+ */
+ static bool fetchAndInvalidateLoc(WorkingSetMember* member);
+ };
+
+} // namespace mongo
diff --git a/src/mongo/db/exec/working_set_test.cpp b/src/mongo/db/exec/working_set_test.cpp
new file mode 100644
index 00000000000..b186c99c377
--- /dev/null
+++ b/src/mongo/db/exec/working_set_test.cpp
@@ -0,0 +1,144 @@
+/**
+ * Copyright (C) 2013 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * This file contains tests for mongo/db/exec/working_set.cpp
+ */
+
+#include "mongo/db/exec/working_set.h"
+#include "mongo/db/json.h"
+#include "mongo/db/jsobj.h"
+#include "mongo/unittest/unittest.h"
+#include "mongo/util/assert_util.h"
+
+using namespace mongo;
+
+namespace {
+
+ class WorkingSetFixture : public mongo::unittest::Test {
+ protected:
+ void setUp() {
+ WorkingSetID id = ws.allocate();
+ ASSERT(id != WorkingSet::INVALID_ID);
+ member = ws.get(id);
+ ASSERT(NULL != member);
+ }
+
+ void tearDown() {
+ ws = WorkingSet();
+ member = NULL;
+ }
+
+ WorkingSet ws;
+ WorkingSetMember* member;
+ };
+
+ TEST_F(WorkingSetFixture, noFieldToGet) {
+ BSONElement elt;
+
+ // Make sure we're not getting anything out of an invalid WSM.
+ ASSERT_EQUALS(WorkingSetMember::INVALID, member->state);
+ ASSERT_FALSE(member->getFieldDotted("foo", &elt));
+
+ member->state = WorkingSetMember::LOC_AND_IDX;
+ ASSERT_FALSE(member->getFieldDotted("foo", &elt));
+
+ // Our state is that of a valid object. The getFieldDotted shouldn't throw; there's
+ // something to call getFieldDotted on, but there's no field there.
+ member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ;
+ ASSERT_TRUE(member->getFieldDotted("foo", &elt));
+
+ member->state = WorkingSetMember::LOC_AND_OWNED_OBJ;
+ ASSERT_TRUE(member->getFieldDotted("foo", &elt));
+
+ member->state = WorkingSetMember::OWNED_OBJ;
+ ASSERT_TRUE(member->getFieldDotted("foo", &elt));
+ }
+
+ TEST_F(WorkingSetFixture, getFieldUnowned) {
+ string fieldName = "x";
+
+ BSONObj obj = BSON(fieldName << 5);
+ // Not truthful since the loc is bogus, but the loc isn't accessed anyway...
+ member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ;
+ member->obj = BSONObj(obj.objdata());
+ ASSERT_TRUE(obj.isOwned());
+ ASSERT_FALSE(member->obj.isOwned());
+
+ // Get out the field we put in.
+ BSONElement elt;
+ ASSERT_TRUE(member->getFieldDotted(fieldName, &elt));
+ ASSERT_EQUALS(elt.numberInt(), 5);
+ }
+
+ TEST_F(WorkingSetFixture, getFieldOwned) {
+ string fieldName = "x";
+
+ BSONObj obj = BSON(fieldName << 5);
+ member->state = WorkingSetMember::LOC_AND_OWNED_OBJ;
+ member->obj = obj.getOwned();
+ ASSERT_TRUE(member->obj.isOwned());
+
+ BSONElement elt;
+ ASSERT_TRUE(member->getFieldDotted(fieldName, &elt));
+ ASSERT_EQUALS(elt.numberInt(), 5);
+
+ // Try OWNED_OBJ state as well.
+ member->state = WorkingSetMember::OWNED_OBJ;
+ ASSERT_TRUE(member->getFieldDotted(fieldName, &elt));
+ ASSERT_EQUALS(elt.numberInt(), 5);
+ }
+
+ TEST_F(WorkingSetFixture, getFieldFromIndex) {
+ string firstName = "x";
+ int firstValue = 5;
+
+ string secondName = "y";
+ int secondValue = 10;
+
+ member->keyData.push_back(IndexKeyDatum(BSON(firstName << 1), BSON("" << firstValue)));
+ // Also a minor lie as loc is bogus.
+ member->state = WorkingSetMember::LOC_AND_IDX;
+ BSONElement elt;
+ ASSERT_TRUE(member->getFieldDotted(firstName, &elt));
+ ASSERT_EQUALS(elt.numberInt(), firstValue);
+ // No foo field.
+ ASSERT_FALSE(member->getFieldDotted("foo", &elt));
+
+ // Add another index datum.
+ member->keyData.push_back(IndexKeyDatum(BSON(secondName << 1), BSON("" << secondValue)));
+ ASSERT_TRUE(member->getFieldDotted(secondName, &elt));
+ ASSERT_EQUALS(elt.numberInt(), secondValue);
+ ASSERT_TRUE(member->getFieldDotted(firstName, &elt));
+ ASSERT_EQUALS(elt.numberInt(), firstValue);
+ // Still no foo.
+ ASSERT_FALSE(member->getFieldDotted("foo", &elt));
+ }
+
+ TEST_F(WorkingSetFixture, getDottedFieldFromIndex) {
+ string firstName = "x.y";
+ int firstValue = 5;
+
+ member->keyData.push_back(IndexKeyDatum(BSON(firstName << 1), BSON("" << firstValue)));
+ member->state = WorkingSetMember::LOC_AND_IDX;
+ BSONElement elt;
+ ASSERT_TRUE(member->getFieldDotted(firstName, &elt));
+ ASSERT_EQUALS(elt.numberInt(), firstValue);
+ ASSERT_FALSE(member->getFieldDotted("x", &elt));
+ ASSERT_FALSE(member->getFieldDotted("y", &elt));
+ }
+
+} // namespace