summaryrefslogtreecommitdiff
path: root/src/mongo/db/exec/plan_stage.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/exec/plan_stage.h')
-rw-r--r--src/mongo/db/exec/plan_stage.h446
1 files changed, 219 insertions, 227 deletions
diff --git a/src/mongo/db/exec/plan_stage.h b/src/mongo/db/exec/plan_stage.h
index 07536817ca6..a096664b01d 100644
--- a/src/mongo/db/exec/plan_stage.h
+++ b/src/mongo/db/exec/plan_stage.h
@@ -34,255 +34,247 @@
namespace mongo {
- class Collection;
- class RecordId;
- class OperationContext;
+class Collection;
+class RecordId;
+class OperationContext;
+
+/**
+ * A PlanStage ("stage") is the basic building block of a "Query Execution Plan." A stage is
+ * the smallest piece of machinery used in executing a compiled query. Stages either access
+ * data (from a collection or an index) to create a stream of results, or transform a stream of
+ * results (e.g. AND, OR, SORT) to create a stream of results.
+ *
+ * Stages have zero or more input streams but only one output stream. Data-accessing stages are
+ * leaves and data-transforming stages have children. Stages can be connected together to form
+ * a tree which is then executed (see plan_executor.h) to solve a query.
+ *
+ * A stage's input and output are each typed. Only stages with compatible types can be
+ * connected.
+ *
+ * All of the stages of a QEP share a WorkingSet (see working_set.h). Data source stages
+ * allocate a slot in the WorkingSet, fill the slot with data, and return the ID of that slot.
+ * Subsequent stages fetch a WorkingSetElement by its ID and operate on the enclosed data.
+ *
+ * Stages do nothing unless work() is called. work() is a request to the stage to consume one
+ * unit of input. Some stages (e.g. AND, SORT) require many calls to work() before generating
+ * output as they must consume many units of input. These stages will inform the caller that
+ * they need more time, and work() must be called again in order to produce an output.
+ *
+ * Every stage of a query implements the PlanStage interface. Queries perform a unit of work
+ * and report on their subsequent status; see StatusCode for possible states. Query results are
+ * passed through the WorkingSet interface; see working_set.h for details.
+ *
+ * All synchronization is the responsibility of the caller. Queries must be told to yield with
+ * saveState() if any underlying database state changes. If saveState() is called,
+ * restoreState() must be called again before any work() is done.
+ *
+ * Here is a very simple usage example:
+ *
+ * WorkingSet workingSet;
+ * PlanStage* rootStage = makeQueryPlan(&workingSet, ...);
+ * while (!rootStage->isEOF()) {
+ * WorkingSetID result;
+ * switch(rootStage->work(&result)) {
+ * case PlanStage::ADVANCED:
+ * // do something with result
+ * WorkingSetMember* member = workingSet.get(result);
+ * cout << "Result: " << member->obj << std::endl;
+ * break;
+ * case PlanStage::IS_EOF:
+ * // All done. Will fall out of while loop.
+ * break;
+ * case PlanStage::NEED_TIME:
+ * // Need more time.
+ * break;
+ * case PlanStage::FAILURE:
+ * // Throw exception or return error
+ * break;
+ * }
+ *
+ * if (shouldYield) {
+ * // Occasionally yield.
+ * stage->saveState();
+ * // Do work that requires a yield here (execute other plans, insert, delete, etc.).
+ * stage->restoreState();
+ * }
+ * }
+ */
+class PlanStage {
+public:
+ virtual ~PlanStage() {}
/**
- * A PlanStage ("stage") is the basic building block of a "Query Execution Plan." A stage is
- * the smallest piece of machinery used in executing a compiled query. Stages either access
- * data (from a collection or an index) to create a stream of results, or transform a stream of
- * results (e.g. AND, OR, SORT) to create a stream of results.
- *
- * Stages have zero or more input streams but only one output stream. Data-accessing stages are
- * leaves and data-transforming stages have children. Stages can be connected together to form
- * a tree which is then executed (see plan_executor.h) to solve a query.
- *
- * A stage's input and output are each typed. Only stages with compatible types can be
- * connected.
- *
- * All of the stages of a QEP share a WorkingSet (see working_set.h). Data source stages
- * allocate a slot in the WorkingSet, fill the slot with data, and return the ID of that slot.
- * Subsequent stages fetch a WorkingSetElement by its ID and operate on the enclosed data.
- *
- * Stages do nothing unless work() is called. work() is a request to the stage to consume one
- * unit of input. Some stages (e.g. AND, SORT) require many calls to work() before generating
- * output as they must consume many units of input. These stages will inform the caller that
- * they need more time, and work() must be called again in order to produce an output.
- *
- * Every stage of a query implements the PlanStage interface. Queries perform a unit of work
- * and report on their subsequent status; see StatusCode for possible states. Query results are
- * passed through the WorkingSet interface; see working_set.h for details.
- *
- * All synchronization is the responsibility of the caller. Queries must be told to yield with
- * saveState() if any underlying database state changes. If saveState() is called,
- * restoreState() must be called again before any work() is done.
- *
- * Here is a very simple usage example:
- *
- * WorkingSet workingSet;
- * PlanStage* rootStage = makeQueryPlan(&workingSet, ...);
- * while (!rootStage->isEOF()) {
- * WorkingSetID result;
- * switch(rootStage->work(&result)) {
- * case PlanStage::ADVANCED:
- * // do something with result
- * WorkingSetMember* member = workingSet.get(result);
- * cout << "Result: " << member->obj << std::endl;
- * break;
- * case PlanStage::IS_EOF:
- * // All done. Will fall out of while loop.
- * break;
- * case PlanStage::NEED_TIME:
- * // Need more time.
- * break;
- * case PlanStage::FAILURE:
- * // Throw exception or return error
- * break;
- * }
- *
- * if (shouldYield) {
- * // Occasionally yield.
- * stage->saveState();
- * // Do work that requires a yield here (execute other plans, insert, delete, etc.).
- * stage->restoreState();
- * }
- * }
+ * All possible return values of work(...)
*/
- class PlanStage {
- public:
- virtual ~PlanStage() { }
+ enum StageState {
+ // work(...) has returned a new result in its out parameter. The caller must free it
+ // from the working set when done with it.
+ ADVANCED,
- /**
- * All possible return values of work(...)
- */
- enum StageState {
- // work(...) has returned a new result in its out parameter. The caller must free it
- // from the working set when done with it.
- ADVANCED,
+ // work(...) won't do anything more. isEOF() will also be true. There is nothing
+ // output in the out parameter.
+ IS_EOF,
- // work(...) won't do anything more. isEOF() will also be true. There is nothing
- // output in the out parameter.
- IS_EOF,
+ // work(...) needs more time to product a result. Call work(...) again. There is
+ // nothing output in the out parameter.
+ NEED_TIME,
- // work(...) needs more time to product a result. Call work(...) again. There is
- // nothing output in the out parameter.
- NEED_TIME,
-
- // The storage engine says we need to yield, possibly to fetch a record from disk, or
- // due to an aborted transaction in the storage layer.
- //
- // Full yield request semantics:
- //
- // Each stage that receives a NEED_YIELD from a child must propagate the NEED_YIELD up
- // and perform no work.
- //
- // If a yield is requested due to a WriteConflict, the out parameter of work(...) should
- // be populated with WorkingSet::INVALID_ID. If it is illegal to yield, a
- // WriteConflictException will be thrown.
- //
- // A yield-requesting stage populates the out parameter of work(...) with a WSID that
- // refers to a WSM with a Fetcher*. If it is illegal to yield, this is ignored. This
- // difference in behavior can be removed once SERVER-16051 is resolved.
- //
- // The plan executor is responsible for yielding and, if requested, paging in the data
- // upon receipt of a NEED_YIELD. The plan executor does NOT free the WSID of the
- // requested fetch. The stage that requested the fetch holds the WSID of the loc it
- // wants fetched. On the next call to work() that stage can assume a fetch was performed
- // on the WSM that the held WSID refers to.
- NEED_YIELD,
+ // The storage engine says we need to yield, possibly to fetch a record from disk, or
+ // due to an aborted transaction in the storage layer.
+ //
+ // Full yield request semantics:
+ //
+ // Each stage that receives a NEED_YIELD from a child must propagate the NEED_YIELD up
+ // and perform no work.
+ //
+ // If a yield is requested due to a WriteConflict, the out parameter of work(...) should
+ // be populated with WorkingSet::INVALID_ID. If it is illegal to yield, a
+ // WriteConflictException will be thrown.
+ //
+ // A yield-requesting stage populates the out parameter of work(...) with a WSID that
+ // refers to a WSM with a Fetcher*. If it is illegal to yield, this is ignored. This
+ // difference in behavior can be removed once SERVER-16051 is resolved.
+ //
+ // The plan executor is responsible for yielding and, if requested, paging in the data
+ // upon receipt of a NEED_YIELD. The plan executor does NOT free the WSID of the
+ // requested fetch. The stage that requested the fetch holds the WSID of the loc it
+ // wants fetched. On the next call to work() that stage can assume a fetch was performed
+ // on the WSM that the held WSID refers to.
+ NEED_YIELD,
- // Something went wrong but it's not an internal error. Perhaps our collection was
- // dropped or state deleted.
- DEAD,
+ // Something went wrong but it's not an internal error. Perhaps our collection was
+ // dropped or state deleted.
+ DEAD,
- // Something has gone unrecoverably wrong. Stop running this query.
- // If the out parameter does not refer to an invalid working set member,
- // call WorkingSetCommon::getStatusMemberObject() to get details on the failure.
- // Any class implementing this interface must set the WSID out parameter to
- // INVALID_ID or a valid WSM ID if FAILURE is returned.
- FAILURE,
- };
+ // Something has gone unrecoverably wrong. Stop running this query.
+ // If the out parameter does not refer to an invalid working set member,
+ // call WorkingSetCommon::getStatusMemberObject() to get details on the failure.
+ // Any class implementing this interface must set the WSID out parameter to
+ // INVALID_ID or a valid WSM ID if FAILURE is returned.
+ FAILURE,
+ };
- static std::string stateStr(const StageState& state) {
- if (ADVANCED == state) {
- return "ADVANCED";
- }
- else if (IS_EOF == state) {
- return "IS_EOF";
- }
- else if (NEED_TIME == state) {
- return "NEED_TIME";
- }
- else if (NEED_YIELD == state) {
- return "NEED_YIELD";
- }
- else if (DEAD == state) {
- return "DEAD";
- }
- else {
- verify(FAILURE == state);
- return "FAILURE";
- }
+ static std::string stateStr(const StageState& state) {
+ if (ADVANCED == state) {
+ return "ADVANCED";
+ } else if (IS_EOF == state) {
+ return "IS_EOF";
+ } else if (NEED_TIME == state) {
+ return "NEED_TIME";
+ } else if (NEED_YIELD == state) {
+ return "NEED_YIELD";
+ } else if (DEAD == state) {
+ return "DEAD";
+ } else {
+ verify(FAILURE == state);
+ return "FAILURE";
}
+ }
- /**
- * Perform a unit of work on the query. Ask the stage to produce the next unit of output.
- * Stage returns StageState::ADVANCED if *out is set to the next unit of output. Otherwise,
- * returns another value of StageState to indicate the stage's status.
- */
- virtual StageState work(WorkingSetID* out) = 0;
-
- /**
- * Returns true if no more work can be done on the query / out of results.
- */
- virtual bool isEOF() = 0;
+ /**
+ * Perform a unit of work on the query. Ask the stage to produce the next unit of output.
+ * Stage returns StageState::ADVANCED if *out is set to the next unit of output. Otherwise,
+ * returns another value of StageState to indicate the stage's status.
+ */
+ virtual StageState work(WorkingSetID* out) = 0;
- //
- // Yielding and isolation semantics:
- //
- // Any data that is not inserted, deleted, or modified during a yield will be faithfully
- // returned by a query that should return that data.
- //
- // Any data inserted, deleted, or modified during a yield that should be returned by a query
- // may or may not be returned by that query. The query could return: nothing; the data
- // before; the data after; or both the data before and the data after.
- //
- // In short, there is no isolation between a query and an insert/delete/update. AKA,
- // READ_UNCOMMITTED.
- //
+ /**
+ * Returns true if no more work can be done on the query / out of results.
+ */
+ virtual bool isEOF() = 0;
- /**
- * Notifies the stage that all locks are about to be released. The stage must save any
- * state required to resume where it was before saveState was called.
- *
- * Stages must be able to handle multiple calls to saveState() in a row without a call to
- * restoreState() in between.
- */
- virtual void saveState() = 0;
+ //
+ // Yielding and isolation semantics:
+ //
+ // Any data that is not inserted, deleted, or modified during a yield will be faithfully
+ // returned by a query that should return that data.
+ //
+ // Any data inserted, deleted, or modified during a yield that should be returned by a query
+ // may or may not be returned by that query. The query could return: nothing; the data
+ // before; the data after; or both the data before and the data after.
+ //
+ // In short, there is no isolation between a query and an insert/delete/update. AKA,
+ // READ_UNCOMMITTED.
+ //
- /**
- * Notifies the stage that any required locks have been reacquired. The stage must restore
- * any saved state and be ready to handle calls to work().
- *
- * Can only be called after saveState.
- *
- * If the stage needs an OperationContext during its execution, it may keep a handle to the
- * provided OperationContext (which is valid until the next call to saveState()).
- */
- virtual void restoreState(OperationContext* opCtx) = 0;
+ /**
+ * Notifies the stage that all locks are about to be released. The stage must save any
+ * state required to resume where it was before saveState was called.
+ *
+ * Stages must be able to handle multiple calls to saveState() in a row without a call to
+ * restoreState() in between.
+ */
+ virtual void saveState() = 0;
- /**
- * Notifies a stage that a RecordId is going to be deleted (or in-place updated) so that the
- * stage can invalidate or modify any state required to continue processing without this
- * RecordId.
- *
- * Can only be called after a saveState but before a restoreState.
- *
- * The provided OperationContext should be used if any work needs to be performed during the
- * invalidate (as the state of the stage must be saved before any calls to invalidate, the
- * stage's own OperationContext is inactive during the invalidate and should not be used).
- */
- virtual void invalidate(OperationContext* txn,
- const RecordId& dl,
- InvalidationType type) = 0;
+ /**
+ * Notifies the stage that any required locks have been reacquired. The stage must restore
+ * any saved state and be ready to handle calls to work().
+ *
+ * Can only be called after saveState.
+ *
+ * If the stage needs an OperationContext during its execution, it may keep a handle to the
+ * provided OperationContext (which is valid until the next call to saveState()).
+ */
+ virtual void restoreState(OperationContext* opCtx) = 0;
- /**
- * Retrieve a list of this stage's children. This stage keeps ownership of
- * its children.
- */
- virtual std::vector<PlanStage*> getChildren() const = 0;
+ /**
+ * Notifies a stage that a RecordId is going to be deleted (or in-place updated) so that the
+ * stage can invalidate or modify any state required to continue processing without this
+ * RecordId.
+ *
+ * Can only be called after a saveState but before a restoreState.
+ *
+ * The provided OperationContext should be used if any work needs to be performed during the
+ * invalidate (as the state of the stage must be saved before any calls to invalidate, the
+ * stage's own OperationContext is inactive during the invalidate and should not be used).
+ */
+ virtual void invalidate(OperationContext* txn, const RecordId& dl, InvalidationType type) = 0;
- /**
- * What type of stage is this?
- */
- virtual StageType stageType() const = 0;
+ /**
+ * Retrieve a list of this stage's children. This stage keeps ownership of
+ * its children.
+ */
+ virtual std::vector<PlanStage*> getChildren() const = 0;
- //
- // Execution stats.
- //
+ /**
+ * What type of stage is this?
+ */
+ virtual StageType stageType() const = 0;
- /**
- * Returns a tree of stats. See plan_stats.h for the details of this structure. If the
- * stage has any children it must propagate the request for stats to them.
- *
- * Creates plan stats tree which has the same topology as the original execution tree,
- * but has a separate lifetime.
- *
- * Caller owns returned pointer.
- */
- virtual PlanStageStats* getStats() = 0;
+ //
+ // Execution stats.
+ //
- /**
- * Get the CommonStats for this stage. The pointer is *not* owned by the caller.
- *
- * The returned pointer is only valid when the corresponding stage is also valid.
- * It must not exist past the stage. If you need the stats to outlive the stage,
- * use the getStats(...) method above.
- */
- virtual const CommonStats* getCommonStats() const = 0;
+ /**
+ * Returns a tree of stats. See plan_stats.h for the details of this structure. If the
+ * stage has any children it must propagate the request for stats to them.
+ *
+ * Creates plan stats tree which has the same topology as the original execution tree,
+ * but has a separate lifetime.
+ *
+ * Caller owns returned pointer.
+ */
+ virtual PlanStageStats* getStats() = 0;
- /**
- * Get stats specific to this stage. Some stages may not have specific stats, in which
- * case they return NULL. The pointer is *not* owned by the caller.
- *
- * The returned pointer is only valid when the corresponding stage is also valid.
- * It must not exist past the stage. If you need the stats to outlive the stage,
- * use the getStats(...) method above.
- */
- virtual const SpecificStats* getSpecificStats() const = 0;
+ /**
+ * Get the CommonStats for this stage. The pointer is *not* owned by the caller.
+ *
+ * The returned pointer is only valid when the corresponding stage is also valid.
+ * It must not exist past the stage. If you need the stats to outlive the stage,
+ * use the getStats(...) method above.
+ */
+ virtual const CommonStats* getCommonStats() const = 0;
- };
+ /**
+ * Get stats specific to this stage. Some stages may not have specific stats, in which
+ * case they return NULL. The pointer is *not* owned by the caller.
+ *
+ * The returned pointer is only valid when the corresponding stage is also valid.
+ * It must not exist past the stage. If you need the stats to outlive the stage,
+ * use the getStats(...) method above.
+ */
+ virtual const SpecificStats* getSpecificStats() const = 0;
+};
} // namespace mongo