diff options
author | Mathias Stearn <mathias@10gen.com> | 2013-12-03 16:35:36 -0500 |
---|---|---|
committer | Mathias Stearn <mathias@10gen.com> | 2013-12-05 11:41:10 -0500 |
commit | ac5baf851f16c8d14800864d2fad04c3e67d7cf6 (patch) | |
tree | bb091d7597382a5b14bf07749fcba090c43e88b8 | |
parent | 1f0b640fb7b0f6de22fe098f12d3b05b23c1caf4 (diff) | |
download | mongo-ac5baf851f16c8d14800864d2fad04c3e67d7cf6.tar.gz |
SERVER-11938 Get collection metadata at ShardFilterStage build time
-rw-r--r-- | src/mongo/db/exec/shard_filter.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/exec/shard_filter.h | 39 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner_params.h | 3 | ||||
-rw-r--r-- | src/mongo/db/query/stage_builder.cpp | 2 |
4 files changed, 41 insertions, 13 deletions
diff --git a/src/mongo/db/exec/shard_filter.cpp b/src/mongo/db/exec/shard_filter.cpp index e0899065f3c..a67155cf87f 100644 --- a/src/mongo/db/exec/shard_filter.cpp +++ b/src/mongo/db/exec/shard_filter.cpp @@ -32,8 +32,10 @@ namespace mongo { - ShardFilterStage::ShardFilterStage(const string& ns, WorkingSet* ws, PlanStage* child) - : _ws(ws), _child(child), _ns(ns), _initted(false) { } + ShardFilterStage::ShardFilterStage(const CollectionMetadataPtr& metadata, + WorkingSet* ws, + PlanStage* child) + : _ws(ws), _child(child), _metadata(metadata) { } ShardFilterStage::~ShardFilterStage() { } @@ -41,10 +43,6 @@ namespace mongo { PlanStage::StageState ShardFilterStage::work(WorkingSetID* out) { ++_commonStats.works; - if (!_initted) { - _metadata = shardingState.getCollectionMetadata(_ns); - _initted = true; - } // If we've returned as many results as we're limited to, isEOF will be true. if (isEOF()) { return PlanStage::IS_EOF; } diff --git a/src/mongo/db/exec/shard_filter.h b/src/mongo/db/exec/shard_filter.h index a9d13a35a22..6dc238e0539 100644 --- a/src/mongo/db/exec/shard_filter.h +++ b/src/mongo/db/exec/shard_filter.h @@ -38,14 +38,43 @@ namespace mongo { /** - * This stage drops documents that don't belong to the shard we're executing on. + * This stage drops documents that didn't belong to the shard we're executing on at the time of + * construction. This matches the contract for sharded cursorids which guarantees that a + * StaleConfigException will be thrown early or the cursorid for its entire lifetime will return + * documents matching the shard version set on the connection at the time of cursorid creation. + * + * A related system will ensure that the data migrated away from a shard will not be deleted as + * long as there are active queries from before the migration. Currently, "active queries" is + * defined by cursorids so it is important that the metadata used in this stage uses the same + * version as the cursorid. Therefore, you must wrap any Runner using this Stage in a + * ClientCursor during the same lock grab as constructing the Runner. + * + * BEGIN NOTE FROM GREG + * + * There are three sharded query contracts: + * + * 0) Migration commit takes the db lock - i.e. is serialized with writes and reads. + * 1) No data should be returned from a query in ranges of migrations that committed after the + * query started, or from ranges not owned when the query began. + * 2) No migrated data should be removed from a shard while there are queries that were active + * before the migration. + * + * As implementation details, collection metadata is used to determine the ranges of all data + * not actively migrated (or orphaned). CursorIds are currently used to establish "active" + * queries before migration commit. + * + * Combining all this: if a query is started in a db lock and acquires in that (same) lock the + * collection metadata and a cursorId, the query will return results for exactly the ranges in + * the metadata (though of arbitrary staleness). This is the sharded collection query contract. + * + * END NOTE FROM GREG * * Preconditions: Child must be fetched. TODO XXX: when covering analysis is in just build doc * and check that against shard key. */ class ShardFilterStage : public PlanStage { public: - ShardFilterStage(const string& ns, WorkingSet* ws, PlanStage* child); + ShardFilterStage(const CollectionMetadataPtr& metadata, WorkingSet* ws, PlanStage* child); virtual ~ShardFilterStage(); virtual bool isEOF(); @@ -60,14 +89,14 @@ namespace mongo { private: WorkingSet* _ws; scoped_ptr<PlanStage> _child; - string _ns; // Stats CommonStats _commonStats; ShardingFilterStats _specificStats; - bool _initted; - CollectionMetadataPtr _metadata; + // Note: it is important that this is the metadata from the time this stage is constructed. + // See class comment for details. + const CollectionMetadataPtr _metadata; }; } // namespace mongo diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h index 1fa1c4aeaa8..7ccc48bcd75 100644 --- a/src/mongo/db/query/query_planner_params.h +++ b/src/mongo/db/query/query_planner_params.h @@ -52,7 +52,8 @@ namespace mongo { // // In order to set this, you must check // shardingState.needCollectionMetadata(current_namespace) in the same lock that you use - // to build the query runner. + // to build the query runner. You must also wrap the Runner in a ClientCursor within the + // same lock. See the comment on ShardFilterStage for details. INCLUDE_SHARD_FILTER = 4, // Set this if you don't want any plans with a blocking sort stage. All sorts must be diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp index 2332ac7de04..68da614945b 100644 --- a/src/mongo/db/query/stage_builder.cpp +++ b/src/mongo/db/query/stage_builder.cpp @@ -228,7 +228,7 @@ namespace mongo { const ShardingFilterNode* fn = static_cast<const ShardingFilterNode*>(root); PlanStage* childStage = buildStages(ns, fn->children[0], ws); if (NULL == childStage) { return NULL; } - return new ShardFilterStage(ns, ws, childStage); + return new ShardFilterStage(shardingState.getCollectionMetadata(ns), ws, childStage); } else { stringstream ss; |