diff options
Diffstat (limited to 'src/mongo/db/exec')
-rw-r--r-- | src/mongo/db/exec/index_scan.cpp | 166 | ||||
-rw-r--r-- | src/mongo/db/exec/index_scan.h | 104 | ||||
-rw-r--r-- | src/mongo/db/exec/plan_stage.h | 159 | ||||
-rw-r--r-- | src/mongo/db/exec/simple_plan_runner.h | 74 | ||||
-rw-r--r-- | src/mongo/db/exec/stagedebug_cmd.cpp | 156 | ||||
-rw-r--r-- | src/mongo/db/exec/working_set.cpp | 98 | ||||
-rw-r--r-- | src/mongo/db/exec/working_set.h | 135 | ||||
-rw-r--r-- | src/mongo/db/exec/working_set_common.cpp | 56 | ||||
-rw-r--r-- | src/mongo/db/exec/working_set_common.h | 44 | ||||
-rw-r--r-- | src/mongo/db/exec/working_set_test.cpp | 144 |
10 files changed, 1136 insertions, 0 deletions
diff --git a/src/mongo/db/exec/index_scan.cpp b/src/mongo/db/exec/index_scan.cpp new file mode 100644 index 00000000000..c4601996b18 --- /dev/null +++ b/src/mongo/db/exec/index_scan.cpp @@ -0,0 +1,166 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "mongo/db/exec/index_scan.h" + +#include "mongo/db/index/catalog_hack.h" +#include "mongo/db/index/index_access_method.h" +#include "mongo/db/index/index_cursor.h" +#include "mongo/db/index/index_descriptor.h" + +namespace { + + // Return a value in the set {-1, 0, 1} to represent the sign of parameter i. + int sgn(int i) { + if (i == 0) + return 0; + return i > 0 ? 1 : -1; + } + +} // namespace + +namespace mongo { + + IndexScan::IndexScan(const IndexScanParams& params, WorkingSet* workingSet, Matcher* matcher) + : _workingSet(workingSet), _descriptor(params.descriptor), _startKey(params.startKey), + _endKey(params.endKey), _endKeyInclusive(params.endKeyInclusive), + _direction(params.direction), _hitEnd(false), + _matcher(matcher), _shouldDedup(params.descriptor->isMultikey()), + _yieldMovedCursor(false), _numWanted(params.limit) { + + string amName; + + if (params.forceBtreeAccessMethod) { + _iam.reset(CatalogHack::getBtreeIndex(_descriptor.get())); + amName = ""; + } + else { + amName = CatalogHack::getAccessMethodName(_descriptor->keyPattern()); + _iam.reset(CatalogHack::getIndex(_descriptor.get())); + } + + if (IndexNames::GEO_2D == amName || IndexNames::GEO_2DSPHERE == amName) { + // _endKey is meaningless for 2d and 2dsphere. + verify(_endKey.isEmpty()); + } + } + + PlanStage::StageState IndexScan::work(WorkingSetID* out) { + if (NULL == _indexCursor.get()) { + // First call to work(). Perform cursor init. + CursorOptions cursorOptions; + + // The limit is *required* for 2d $near, which is the only index that pays attention to + // it anyway. + cursorOptions.numWanted = _numWanted; + if (1 == _direction) { + cursorOptions.direction = CursorOptions::INCREASING; + } + else { + cursorOptions.direction = CursorOptions::DECREASING; + } + + IndexCursor *cursor; + _iam->newCursor(&cursor); + _indexCursor.reset(cursor); + _indexCursor->setOptions(cursorOptions); + _indexCursor->seek(_startKey); + checkEnd(); + } + else if (_yieldMovedCursor) { + _yieldMovedCursor = false; + // Note that we're not calling next() here. + } + else { + _indexCursor->next(); + checkEnd(); + } + + if (isEOF()) { return PlanStage::IS_EOF; } + + DiskLoc loc = _indexCursor->getValue(); + + if (_shouldDedup) { + if (_returned.end() != _returned.find(loc)) { + return PlanStage::NEED_TIME; + } + else { + _returned.insert(loc); + } + } + + WorkingSetID id = _workingSet->allocate(); + WorkingSetMember* member = _workingSet->get(id); + member->loc = loc; + member->keyData.push_back(IndexKeyDatum(_descriptor->keyPattern(), + _indexCursor->getKey().getOwned())); + member->state = WorkingSetMember::LOC_AND_IDX; + + if (NULL == _matcher || _matcher->matches(member)) { + *out = id; + return PlanStage::ADVANCED; + } + + _workingSet->free(id); + + return PlanStage::NEED_TIME; + } + + bool IndexScan::isEOF() { return _indexCursor->isEOF() || _hitEnd; } + + void IndexScan::prepareToYield() { + if (isEOF()) { return; } + _savedKey = _indexCursor->getKey().getOwned(); + _savedLoc = _indexCursor->getValue(); + _indexCursor->savePosition(); + } + + void IndexScan::recoverFromYield() { + if (isEOF()) { return; } + + _indexCursor->restorePosition(); + + if (!_savedKey.binaryEqual(_indexCursor->getKey()) + || _savedLoc != _indexCursor->getValue()) { + // Our restored position isn't the same as the saved position. When we call work() + // again we want to return where we currently point, not past it. + _yieldMovedCursor = true; + + // Our restored position might be past endKey, see if we've hit the end. + checkEnd(); + } + } + + void IndexScan::invalidate(const DiskLoc& dl) { + // If we see this DiskLoc again, it may not be the same doc. it was before, so we want to + // return it. + _returned.erase(dl); + } + + void IndexScan::checkEnd() { + if (isEOF()) { return; } + + // If there is an empty endKey we will scan until we run out of index to scan over. + if (_endKey.isEmpty()) { return; } + + int cmp = sgn(_endKey.woCompare(_indexCursor->getKey(), _descriptor->keyPattern())); + + if ((cmp != 0 && cmp != _direction) || (cmp == 0 && !_endKeyInclusive)) { + _hitEnd = true; + } + } + +} // namespace mongo diff --git a/src/mongo/db/exec/index_scan.h b/src/mongo/db/exec/index_scan.h new file mode 100644 index 00000000000..2f53a1da53a --- /dev/null +++ b/src/mongo/db/exec/index_scan.h @@ -0,0 +1,104 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include "mongo/db/diskloc.h" +#include "mongo/db/jsobj.h" +#include "mongo/db/matcher.h" +#include "mongo/db/exec/plan_stage.h" +#include "mongo/platform/unordered_set.h" + +namespace mongo { + + class IndexAccessMethod; + class IndexCursor; + class IndexDescriptor; + class IndexScanParams; + class WorkingSet; + + /** + * Stage scans over an index from startKey to endKey, returning results that pass the provided + * filter. Internally dedups on DiskLoc. + * + * Sub-stage preconditions: None. Is a leaf and consumes no stage data. + */ + class IndexScan : public PlanStage { + public: + IndexScan(const IndexScanParams& params, WorkingSet* workingSet, Matcher* matcher); + virtual ~IndexScan() { } + + virtual StageState work(WorkingSetID* out); + virtual bool isEOF(); + virtual void prepareToYield(); + virtual void recoverFromYield(); + virtual void invalidate(const DiskLoc& dl); + + private: + /** See if the cursor is pointing at or past _endKey, if _endKey is non-empty. */ + void checkEnd(); + + // The WorkingSet we annotate with results. Not owned by us. + WorkingSet* _workingSet; + + // Index access. + scoped_ptr<IndexAccessMethod> _iam; + scoped_ptr<IndexCursor> _indexCursor; + scoped_ptr<IndexDescriptor> _descriptor; + + // Bounds for the cursor. TODO: take a set of bounds. + BSONObj _startKey; + BSONObj _endKey; + bool _endKeyInclusive; + int _direction; + bool _hitEnd; + + // Contains expressions only over fields in the index key. We assume this is built + // correctly by whomever creates this class. + scoped_ptr<Matcher> _matcher; + + // Could our index have duplicates? If so, we use _returned to dedup. + bool _shouldDedup; + unordered_set<DiskLoc, DiskLoc::Hasher> _returned; + + // For yielding. + BSONObj _savedKey; + DiskLoc _savedLoc; + + // True if there was a yield and the yield changed the cursor position. + bool _yieldMovedCursor; + + // This is IndexScanParams::limit. See comment there. + int _numWanted; + }; + + struct IndexScanParams { + IndexScanParams() : descriptor(NULL), endKeyInclusive(true), direction(1), limit(0), + forceBtreeAccessMethod(false) { } + IndexDescriptor* descriptor; + BSONObj startKey; + BSONObj endKey; + bool endKeyInclusive; + int direction; + + // This only matters for 2d indices and will be ignored by every other index. + int limit; + + // Special indices internally open an IndexCursor over themselves but as a straight Btree. + bool forceBtreeAccessMethod; + }; + +} // namespace mongo diff --git a/src/mongo/db/exec/plan_stage.h b/src/mongo/db/exec/plan_stage.h new file mode 100644 index 00000000000..8429e4fd79e --- /dev/null +++ b/src/mongo/db/exec/plan_stage.h @@ -0,0 +1,159 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include "mongo/db/exec/working_set.h" + +namespace mongo { + + class DiskLoc; + + /** + * A PlanStage ("stage") is the basic building block of a "Query Execution Plan." A stage is + * the smallest piece of machinery used in executing a compiled query. Stages either access + * data (from a collection or an index) to create a stream of results, or transform a stream of + * results (e.g. AND, OR, SORT) to create a stream of results. + * + * Stages have zero or more input streams but only one output stream. Data-accessing stages are + * leaves and data-transforming stages have children. Stages can be connected together to form + * a tree which is then executed (see plan_runner.h) to solve a query. + * + * A stage's input and output are each typed. Only stages with compatible types can be + * connected. + * + * All of the stages of a QEP share a WorkingSet (see working_set.h). Data source stages + * allocate a slot in the WorkingSet, fill the slot with data, and return the ID of that slot. + * Subsequent stages fetch a WorkingSetElement by its ID and operate on the enclosed data. + * + * Stages do nothing unless work() is called. work() is a request to the stage to consume one + * unit of input. Some stages (e.g. AND, SORT) require many calls to work() before generating + * output as they must consume many units of input. These stages will inform the caller that + * they need more time, and work() must be called again in order to produce an output. + * + * Every stage of a query implements the PlanStage interface. Queries perform a unit of work + * and report on their subsequent status; see StatusCode for possible states. Query results are + * passed through the WorkingSet interface; see working_set.h for details. + * + * All synchronization is the responsibility of the caller. Queries must be told to yield with + * prepareToYield() if any underlying database state changes. If prepareToYield() is called, + * recoverFromYield() must be called again before any work() is done. + * + * Here is a very simple usage example: + * + * WorkingSet workingSet; + * PlanStage* rootStage = makeQueryPlan(&workingSet, ...); + * while (!rootStage->isEOF()) { + * WorkingSetID result; + * switch(rootStage->work(&result)) { + * case PlanStage::ADVANCED: + * // do something with result + * WorkingSetMember* member = workingSet.get(result); + * cout << "Result: " << member->obj << endl; + * break; + * case PlanStage::IS_EOF: + * // All done. Will fall out of while loop. + * break; + * case PlanStage::NEED_TIME: + * // Need more time. + * break; + * case PlanStage::ERROR: + * // Throw exception or return error + * break; + * case PlanStage::NEED_FETCH: + * // Go to disk and fetch stuff. + * break; + * } + * + * if (shouldYield) { + * // Occasionally yield. + * stage->prepareToYield(); + * // Do work that requires a yield here (execute other plans, insert, delete, etc.). + * stage->recoverFromYield(); + * } + * } + */ + class PlanStage { + public: + virtual ~PlanStage() { } + + /** + * All possible return values of work(...) + */ + enum StageState { + // work(...) has returned a new result in its out parameter. + ADVANCED, + // work(...) won't do anything more. isEOF() will also be true. + IS_EOF, + // work(...) needs more time to product a result. Call work(...) again. + NEED_TIME, + // Something has gone unrecoverably wrong. Stop running this query. + ERROR, + // Something isn't in memory. Fetch it. TODO: actually support this (forthcoming). + NEED_YIELD, + }; + + /** + * Perform a unit of work on the query. Ask the stage to produce the next unit of output. + * Stage returns StageState::ADVANCED if *out is set to the next unit of output. Otherwise, + * returns another value of StageState to indicate the stage's status. + */ + virtual StageState work(WorkingSetID* out) = 0; + + /** + * Returns true if no more work can be done on the query / out of results. + */ + virtual bool isEOF() = 0; + + // + // Yielding and isolation semantics: + // + // Any data that is not inserted, deleted, or modified during a yield will be faithfully + // returned by a query that should return that data. + // + // Any data inserted, deleted, or modified during a yield that should be returned by a query + // may or may not be returned by that query. The query could return: nothing; the data + // before; the data after; or both the data before and the data after. + // + // In short, there is no isolation between a query and an insert/delete/update. AKA, + // READ_UNCOMMITTED. + // + + /** + * Notifies the stage that all locks are about to be released. The stage must save any + * state required to resume where it was before prepareToYield was called. + */ + virtual void prepareToYield() = 0; + + /** + * Notifies the stage that any required locks have been reacquired. The stage must restore + * any saved state and be ready to handle calls to work(). + * + * Can only be called after prepareToYield. + */ + virtual void recoverFromYield() = 0; + + /** + * Notifies a stage that a DiskLoc is going to be deleted (or in-place updated) so that the + * stage can invalidate or modify any state required to continue processing without this + * DiskLoc. + * + * Can only be called after a prepareToYield but before a recoverFromYield. + */ + virtual void invalidate(const DiskLoc& dl) = 0; + }; + +} // namespace mongo diff --git a/src/mongo/db/exec/simple_plan_runner.h b/src/mongo/db/exec/simple_plan_runner.h new file mode 100644 index 00000000000..490d9ba4996 --- /dev/null +++ b/src/mongo/db/exec/simple_plan_runner.h @@ -0,0 +1,74 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "mongo/db/exec/plan_stage.h" +#include "mongo/db/exec/working_set.h" +#include "mongo/db/exec/working_set_common.h" + +namespace mongo { + + /** + * A placeholder for a full-featured plan runner. Calls work() on a plan until a result is + * produced. Stops when the plan is EOF or if the plan errors. + * + * TODO: Yielding policy + * TODO: Graceful error handling + * TODO: Fetch + * TODO: Stats, diagnostics, instrumentation, etc. + */ + class SimplePlanRunner { + public: + SimplePlanRunner() { } + + WorkingSet* getWorkingSet() { return &_workingSet; } + + /** + * Takes ownership of root. + */ + void setRoot(PlanStage* root) { + verify(root); + _root.reset(root); + } + + bool getNext(BSONObj* objOut) { + for (;;) { + WorkingSetID id; + PlanStage::StageState code = _root->work(&id); + + if (PlanStage::ADVANCED == code) { + WorkingSetMember* member = _workingSet.get(id); + uassert(16912, "Couldn't fetch obj from query plan", + WorkingSetCommon::fetch(member)); + *objOut = member->obj; + _workingSet.free(id); + return true; + } + else if (code == PlanStage::NEED_TIME) { + // TODO: Occasionally yield. For now, we run until we get another result. + } + else { + // IS_EOF, ERROR, NEED_YIELD. We just stop here. + return false; + } + } + } + + private: + WorkingSet _workingSet; + scoped_ptr<PlanStage> _root; + }; + +} // namespace mongo diff --git a/src/mongo/db/exec/stagedebug_cmd.cpp b/src/mongo/db/exec/stagedebug_cmd.cpp new file mode 100644 index 00000000000..b8a876cbba0 --- /dev/null +++ b/src/mongo/db/exec/stagedebug_cmd.cpp @@ -0,0 +1,156 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "mongo/db/auth/action_set.h" +#include "mongo/db/auth/action_type.h" +#include "mongo/db/auth/privilege.h" +#include "mongo/db/commands.h" +#include "mongo/db/exec/index_scan.h" +#include "mongo/db/exec/simple_plan_runner.h" +#include "mongo/db/index/catalog_hack.h" +#include "mongo/db/jsobj.h" +#include "mongo/db/matcher/matcher.h" +#include "mongo/db/namespace_details.h" +#include "mongo/db/namespace-inl.h" +#include "mongo/db/pdfile.h" + +namespace mongo { + + /** + * A command for manually constructing a query tree and running it. + * + * db.runCommand({stageDebug: rootNode}) + * + * The value of the filter field is a BSONObj that specifies values that fields must have. What + * you'd pass to a matcher. + * + * Leaf Nodes: + * + * node -> {ixscan: {filter: {FILTER}, + * args: {name: "collectionname", indexKeyPattern: kpObj, start: startObj, + * stop: stopObj, endInclusive: true/false, direction: -1/1, + * limit: int}}} + * + * Forthcoming Nodes: + * + * node -> {cscan: {filter: {filter}, args: {name: "collectionname" }}} + * node -> {and: {filter: {filter}, args: { nodes: [node, node]}}} + * node -> {or: {filter: {filter}, args: { dedup:bool, nodes:[node, node]}}} + * node -> {fetch: {filter: {filter}, args: {node: node}}} + * node -> {sort: {filter: {filter}, args: {node: node, pattern: objWithSortCriterion}}} + * node -> {dedup: {filter: {filter}, args: {node: node, field: field}}} + * node -> {limit: {filter: filter}, args: {node: node, num: posint}} + * node -> {skip: {filter: filter}, args: {node: node, num: posint}} + * node -> {unwind: {filter: filter}, args: {node: node, field: field}} + */ + class StageDebugCmd : public Command { + public: + StageDebugCmd() : Command("stageDebug") { } + + // Boilerplate for commands + virtual LockType locktype() const { return READ; } + bool slaveOk() const { return true; } + bool slaveOverrideOk() const { return true; } + void help(stringstream& h) const { } + + virtual void addRequiredPrivileges(const std::string& dbname, + const BSONObj& cmdObj, + std::vector<Privilege>* out) { + ActionSet actions; + actions.addAction(ActionType::find); + out->push_back(Privilege(parseNs(dbname, cmdObj), actions)); + } + + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, + bool fromRepl) { + + BSONElement argElt = cmdObj["stageDebug"]; + if (argElt.eoo() || !argElt.isABSONObj()) { return false; } + BSONObj argObj = argElt.Obj(); + + SimplePlanRunner runner; + auto_ptr<PlanStage> root(parseQuery(dbname, argObj, runner.getWorkingSet())); + uassert(16911, "Couldn't parse plan from " + argObj.toString(), root.get()); + runner.setRoot(root.release()); + + BSONArrayBuilder resultBuilder(result.subarrayStart("results")); + + for (BSONObj obj; runner.getNext(&obj); ) { + resultBuilder.append(obj); + } + + resultBuilder.done(); + return true; + } + + PlanStage* parseQuery(const string& dbname, BSONObj obj, WorkingSet* workingSet) { + BSONElement firstElt = obj.firstElement(); + if (!firstElt.isABSONObj()) { return NULL; } + BSONObj paramObj = firstElt.Obj(); + + auto_ptr<Matcher> matcher; + BSONObj nodeArgs; + + // Every node has these two fields. + const string filterTag = "filter"; + const string argsTag = "args"; + + BSONObjIterator it(paramObj); + while (it.more()) { + BSONElement e = it.next(); + if (!e.isABSONObj()) { return NULL; } + BSONObj argObj = e.Obj(); + if (filterTag == e.fieldName()) { + matcher.reset(new Matcher2(argObj)); + } + else if (argsTag == e.fieldName()) { + nodeArgs = argObj; + } + else { + uasserted(16910, "Unknown fieldname " + string(e.fieldName()) + + " in query node " + obj.toString()); + return NULL; + } + } + + string nodeName = firstElt.fieldName(); + + if ("ixscan" == nodeName) { + NamespaceDetails* nsd = nsdetails(dbname + "." + nodeArgs["name"].String()); + uassert(16913, "Can't find collection " + nodeArgs["name"].String(), nsd); + + int idxNo = nsd->findIndexByKeyPattern(nodeArgs["keyPattern"].Obj()); + uassert(16890, "Can't find index: " + nodeArgs["keyPattern"].Obj().toString(), + idxNo != -1); + + IndexScanParams params; + params.descriptor = CatalogHack::getDescriptor(nsd, idxNo); + params.startKey = nodeArgs["startKey"].Obj(); + params.endKey = nodeArgs["endKey"].Obj(); + params.endKeyInclusive = nodeArgs["endKeyInclusive"].Bool(); + params.direction = nodeArgs["direction"].numberInt(); + params.limit = nodeArgs["limit"].numberInt(); + params.forceBtreeAccessMethod = false; + + return new IndexScan(params, workingSet, matcher.release()); + } + else { + return NULL; + } + } + } stageDebugCmd; + +} // namespace mongo diff --git a/src/mongo/db/exec/working_set.cpp b/src/mongo/db/exec/working_set.cpp new file mode 100644 index 00000000000..7a4b96e54d4 --- /dev/null +++ b/src/mongo/db/exec/working_set.cpp @@ -0,0 +1,98 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "mongo/db/exec/working_set.h" + +#include "mongo/db/index/index_descriptor.h" + +namespace mongo { + + const WorkingSetID WorkingSet::INVALID_ID = -1; + + WorkingSet::WorkingSet() : _nextId(0) { } + + WorkingSet::~WorkingSet() { + for (DataMap::const_iterator i = _data.begin(); i != _data.end(); ++i) { + delete i->second; + } + } + + WorkingSetID WorkingSet::allocate() { + verify(_data.end() == _data.find(_nextId)); + _data[_nextId] = new WorkingSetMember(); + return _nextId++; + } + + WorkingSetMember* WorkingSet::get(const WorkingSetID& i) { + DataMap::iterator it = _data.find(i); + verify(_data.end() != it); + return it->second; + } + + void WorkingSet::free(const WorkingSetID& i) { + DataMap::iterator it = _data.find(i); + verify(_data.end() != it); + delete it->second; + _data.erase(it); + } + + WorkingSetMember::WorkingSetMember() : state(WorkingSetMember::INVALID) { } + + bool WorkingSetMember::hasLoc() const { + return state == LOC_AND_IDX || state == LOC_AND_UNOWNED_OBJ + || state == LOC_AND_OWNED_OBJ; + } + + bool WorkingSetMember::hasObj() const { + return hasOwnedObj() || hasUnownedObj(); + } + + bool WorkingSetMember::hasOwnedObj() const { + return state == OWNED_OBJ || state == LOC_AND_OWNED_OBJ; + } + + bool WorkingSetMember::hasUnownedObj() const { + return state == LOC_AND_UNOWNED_OBJ; + } + + bool WorkingSetMember::getFieldDotted(const string& field, BSONElement* out) { + // If our state is such that we have an object, use it. + if (hasObj()) { + *out = obj.getFieldDotted(field); + return true; + } + + // Our state should be such that we have index data/are covered. + for (size_t i = 0; i < keyData.size(); ++i) { + BSONObjIterator keyPatternIt(keyData[i].indexKeyPattern); + BSONObjIterator keyDataIt(keyData[i].keyData); + + while (keyPatternIt.more()) { + BSONElement keyPatternElt = keyPatternIt.next(); + verify(keyDataIt.more()); + BSONElement keyDataElt = keyDataIt.next(); + + if (field == keyPatternElt.fieldName()) { + *out = keyDataElt; + return true; + } + } + } + + return false; + } + +} // namespace mongo diff --git a/src/mongo/db/exec/working_set.h b/src/mongo/db/exec/working_set.h new file mode 100644 index 00000000000..6a324fb7e6e --- /dev/null +++ b/src/mongo/db/exec/working_set.h @@ -0,0 +1,135 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <vector> +#include "mongo/db/diskloc.h" +#include "mongo/db/jsobj.h" +#include "mongo/platform/unordered_map.h" + +namespace mongo { + + class WorkingSetMember; + + typedef long WorkingSetID; + + /** + * All data in use by a query. Data is passed through the stage tree by referencing the ID of + * an element of the working set. Stages can add elements to the working set, delete elements + * from the working set, or mutate elements in the working set. + */ + class WorkingSet { + public: + static const WorkingSetID INVALID_ID; + + WorkingSet(); + ~WorkingSet(); + + /** + * Allocate a new query result and return the ID used to get and free it. + */ + WorkingSetID allocate(); + + /** + * Get the i-th mutable query result. + */ + WorkingSetMember* get(const WorkingSetID& i); + + /** + * Unallocate the i-th query result and release its resouces. + */ + void free(const WorkingSetID& i); + + private: + typedef unordered_map<WorkingSetID, WorkingSetMember*> DataMap; + + DataMap _data; + + // The WorkingSetID returned by the next call to allocate(). Should refer to the next valid + // ID. IDs allocated contiguously. Should never point at an in-use ID. + WorkingSetID _nextId; + }; + + /** + * The key data extracted from an index. Keeps track of both the key (currently a BSONObj) and + * the index that provided the key. The index key pattern is required to correctly interpret + * the key. + */ + struct IndexKeyDatum { + IndexKeyDatum(const BSONObj& keyPattern, const BSONObj& key) : indexKeyPattern(keyPattern), + keyData(key) { } + + // This is not owned and points into the IndexDescriptor's data. + BSONObj indexKeyPattern; + + // This is the BSONObj for the key that we put into the index. Owned by us. + BSONObj keyData; + }; + + /** + * The type of the data passed between query stages. In particular: + * + * Index scan stages return a WorkingSetMember in the LOC_AND_IDX state. + * + * Collection scan stages the LOC_AND_UNOWNED_OBJ state. + * + * A WorkingSetMember may have any of the data above. + */ + struct WorkingSetMember { + WorkingSetMember(); + + enum MemberState { + // Initial state. + INVALID, + + // Data is from 1 or more indices. + LOC_AND_IDX, + + // Data is from a collection scan. + LOC_AND_UNOWNED_OBJ, + + // Data is from a fetch. + LOC_AND_OWNED_OBJ, + + // DiskLoc has been invalidated, or the obj doesn't correspond to an on-disk document + // anymore (e.g. is a computed expression). + OWNED_OBJ, + }; + + DiskLoc loc; + BSONObj obj; + vector<IndexKeyDatum> keyData; + MemberState state; + + bool hasLoc() const; + bool hasObj() const; + bool hasOwnedObj() const; + bool hasUnownedObj() const; + + /** + * getFieldDotted uses its state (obj or index data) to produce the field with the provided + * name. + * + * Returns true if there is the element is in an index key or in an (owned or unowned) + * object. *out is set to the element if so. + * + * Returns false otherwise. Returning false indicates a query planning error. + */ + bool getFieldDotted(const string& field, BSONElement* out); + }; + +} // namespace mongo diff --git a/src/mongo/db/exec/working_set_common.cpp b/src/mongo/db/exec/working_set_common.cpp new file mode 100644 index 00000000000..4be4b12d5b8 --- /dev/null +++ b/src/mongo/db/exec/working_set_common.cpp @@ -0,0 +1,56 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "mongo/db/exec/working_set.h" +#include "mongo/db/exec/working_set_common.h" +#include "mongo/db/pdfile.h" + +namespace mongo { + + // static + bool WorkingSetCommon::fetch(WorkingSetMember* member) { + // Already fetched. + if (member->hasObj()) { + // If we did a fetch we shouldn't have this around... + verify(member->keyData.empty()); + return true; + } + + if (!member->hasLoc()) { return false; } + + member->obj = member->loc.obj(); + member->state = WorkingSetMember::LOC_AND_OWNED_OBJ; + + // We have an obj. so get rid of the key data. + member->keyData.clear(); + return true; + } + + // static + bool WorkingSetCommon::fetchAndInvalidateLoc(WorkingSetMember* member) { + // Already in our desired state. + if (member->state == WorkingSetMember::OWNED_OBJ) { return true; } + + if (fetch(member)) { + member->obj = member->obj.getOwned(); + member->state = WorkingSetMember::OWNED_OBJ; + member->loc = DiskLoc(); + return true; + } + else { return false; } + } + +} // namespace mongo diff --git a/src/mongo/db/exec/working_set_common.h b/src/mongo/db/exec/working_set_common.h new file mode 100644 index 00000000000..cbafc0c27df --- /dev/null +++ b/src/mongo/db/exec/working_set_common.h @@ -0,0 +1,44 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +namespace mongo { + + class WorkingSetMember; + + class WorkingSetCommon { + public: + /** + * Fetch an unowned copy of the BSONObj that the provided WSM refers to. Updates + * member->obj to refer to that unowned BSONObj. Requires a valid DiskLoc. Does nothing if + * WSM has a valid obj already. + * + * Returns true if the fetch succeeded, false otherwise. + * + * Used in SORT and FETCH. + */ + static bool fetch(WorkingSetMember* member); + + /** + * Get an owned copy of the BSONObj the WSM refers to. + * Requires either a valid BSONObj or valid DiskLoc. + * Returns true if the fetch and invalidate succeeded, false otherwise. + */ + static bool fetchAndInvalidateLoc(WorkingSetMember* member); + }; + +} // namespace mongo diff --git a/src/mongo/db/exec/working_set_test.cpp b/src/mongo/db/exec/working_set_test.cpp new file mode 100644 index 00000000000..b186c99c377 --- /dev/null +++ b/src/mongo/db/exec/working_set_test.cpp @@ -0,0 +1,144 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * This file contains tests for mongo/db/exec/working_set.cpp + */ + +#include "mongo/db/exec/working_set.h" +#include "mongo/db/json.h" +#include "mongo/db/jsobj.h" +#include "mongo/unittest/unittest.h" +#include "mongo/util/assert_util.h" + +using namespace mongo; + +namespace { + + class WorkingSetFixture : public mongo::unittest::Test { + protected: + void setUp() { + WorkingSetID id = ws.allocate(); + ASSERT(id != WorkingSet::INVALID_ID); + member = ws.get(id); + ASSERT(NULL != member); + } + + void tearDown() { + ws = WorkingSet(); + member = NULL; + } + + WorkingSet ws; + WorkingSetMember* member; + }; + + TEST_F(WorkingSetFixture, noFieldToGet) { + BSONElement elt; + + // Make sure we're not getting anything out of an invalid WSM. + ASSERT_EQUALS(WorkingSetMember::INVALID, member->state); + ASSERT_FALSE(member->getFieldDotted("foo", &elt)); + + member->state = WorkingSetMember::LOC_AND_IDX; + ASSERT_FALSE(member->getFieldDotted("foo", &elt)); + + // Our state is that of a valid object. The getFieldDotted shouldn't throw; there's + // something to call getFieldDotted on, but there's no field there. + member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; + ASSERT_TRUE(member->getFieldDotted("foo", &elt)); + + member->state = WorkingSetMember::LOC_AND_OWNED_OBJ; + ASSERT_TRUE(member->getFieldDotted("foo", &elt)); + + member->state = WorkingSetMember::OWNED_OBJ; + ASSERT_TRUE(member->getFieldDotted("foo", &elt)); + } + + TEST_F(WorkingSetFixture, getFieldUnowned) { + string fieldName = "x"; + + BSONObj obj = BSON(fieldName << 5); + // Not truthful since the loc is bogus, but the loc isn't accessed anyway... + member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; + member->obj = BSONObj(obj.objdata()); + ASSERT_TRUE(obj.isOwned()); + ASSERT_FALSE(member->obj.isOwned()); + + // Get out the field we put in. + BSONElement elt; + ASSERT_TRUE(member->getFieldDotted(fieldName, &elt)); + ASSERT_EQUALS(elt.numberInt(), 5); + } + + TEST_F(WorkingSetFixture, getFieldOwned) { + string fieldName = "x"; + + BSONObj obj = BSON(fieldName << 5); + member->state = WorkingSetMember::LOC_AND_OWNED_OBJ; + member->obj = obj.getOwned(); + ASSERT_TRUE(member->obj.isOwned()); + + BSONElement elt; + ASSERT_TRUE(member->getFieldDotted(fieldName, &elt)); + ASSERT_EQUALS(elt.numberInt(), 5); + + // Try OWNED_OBJ state as well. + member->state = WorkingSetMember::OWNED_OBJ; + ASSERT_TRUE(member->getFieldDotted(fieldName, &elt)); + ASSERT_EQUALS(elt.numberInt(), 5); + } + + TEST_F(WorkingSetFixture, getFieldFromIndex) { + string firstName = "x"; + int firstValue = 5; + + string secondName = "y"; + int secondValue = 10; + + member->keyData.push_back(IndexKeyDatum(BSON(firstName << 1), BSON("" << firstValue))); + // Also a minor lie as loc is bogus. + member->state = WorkingSetMember::LOC_AND_IDX; + BSONElement elt; + ASSERT_TRUE(member->getFieldDotted(firstName, &elt)); + ASSERT_EQUALS(elt.numberInt(), firstValue); + // No foo field. + ASSERT_FALSE(member->getFieldDotted("foo", &elt)); + + // Add another index datum. + member->keyData.push_back(IndexKeyDatum(BSON(secondName << 1), BSON("" << secondValue))); + ASSERT_TRUE(member->getFieldDotted(secondName, &elt)); + ASSERT_EQUALS(elt.numberInt(), secondValue); + ASSERT_TRUE(member->getFieldDotted(firstName, &elt)); + ASSERT_EQUALS(elt.numberInt(), firstValue); + // Still no foo. + ASSERT_FALSE(member->getFieldDotted("foo", &elt)); + } + + TEST_F(WorkingSetFixture, getDottedFieldFromIndex) { + string firstName = "x.y"; + int firstValue = 5; + + member->keyData.push_back(IndexKeyDatum(BSON(firstName << 1), BSON("" << firstValue))); + member->state = WorkingSetMember::LOC_AND_IDX; + BSONElement elt; + ASSERT_TRUE(member->getFieldDotted(firstName, &elt)); + ASSERT_EQUALS(elt.numberInt(), firstValue); + ASSERT_FALSE(member->getFieldDotted("x", &elt)); + ASSERT_FALSE(member->getFieldDotted("y", &elt)); + } + +} // namespace |