summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMathias Stearn <mathias@10gen.com>2013-12-03 16:35:36 -0500
committerMathias Stearn <mathias@10gen.com>2013-12-05 11:41:10 -0500
commitac5baf851f16c8d14800864d2fad04c3e67d7cf6 (patch)
treebb091d7597382a5b14bf07749fcba090c43e88b8
parent1f0b640fb7b0f6de22fe098f12d3b05b23c1caf4 (diff)
downloadmongo-ac5baf851f16c8d14800864d2fad04c3e67d7cf6.tar.gz
SERVER-11938 Get collection metadata at ShardFilterStage build time
-rw-r--r--src/mongo/db/exec/shard_filter.cpp10
-rw-r--r--src/mongo/db/exec/shard_filter.h39
-rw-r--r--src/mongo/db/query/query_planner_params.h3
-rw-r--r--src/mongo/db/query/stage_builder.cpp2
4 files changed, 41 insertions, 13 deletions
diff --git a/src/mongo/db/exec/shard_filter.cpp b/src/mongo/db/exec/shard_filter.cpp
index e0899065f3c..a67155cf87f 100644
--- a/src/mongo/db/exec/shard_filter.cpp
+++ b/src/mongo/db/exec/shard_filter.cpp
@@ -32,8 +32,10 @@
namespace mongo {
- ShardFilterStage::ShardFilterStage(const string& ns, WorkingSet* ws, PlanStage* child)
- : _ws(ws), _child(child), _ns(ns), _initted(false) { }
+ ShardFilterStage::ShardFilterStage(const CollectionMetadataPtr& metadata,
+ WorkingSet* ws,
+ PlanStage* child)
+ : _ws(ws), _child(child), _metadata(metadata) { }
ShardFilterStage::~ShardFilterStage() { }
@@ -41,10 +43,6 @@ namespace mongo {
PlanStage::StageState ShardFilterStage::work(WorkingSetID* out) {
++_commonStats.works;
- if (!_initted) {
- _metadata = shardingState.getCollectionMetadata(_ns);
- _initted = true;
- }
// If we've returned as many results as we're limited to, isEOF will be true.
if (isEOF()) { return PlanStage::IS_EOF; }
diff --git a/src/mongo/db/exec/shard_filter.h b/src/mongo/db/exec/shard_filter.h
index a9d13a35a22..6dc238e0539 100644
--- a/src/mongo/db/exec/shard_filter.h
+++ b/src/mongo/db/exec/shard_filter.h
@@ -38,14 +38,43 @@
namespace mongo {
/**
- * This stage drops documents that don't belong to the shard we're executing on.
+ * This stage drops documents that didn't belong to the shard we're executing on at the time of
+ * construction. This matches the contract for sharded cursorids which guarantees that a
+ * StaleConfigException will be thrown early or the cursorid for its entire lifetime will return
+ * documents matching the shard version set on the connection at the time of cursorid creation.
+ *
+ * A related system will ensure that the data migrated away from a shard will not be deleted as
+ * long as there are active queries from before the migration. Currently, "active queries" is
+ * defined by cursorids so it is important that the metadata used in this stage uses the same
+ * version as the cursorid. Therefore, you must wrap any Runner using this Stage in a
+ * ClientCursor during the same lock grab as constructing the Runner.
+ *
+ * BEGIN NOTE FROM GREG
+ *
+ * There are three sharded query contracts:
+ *
+ * 0) Migration commit takes the db lock - i.e. is serialized with writes and reads.
+ * 1) No data should be returned from a query in ranges of migrations that committed after the
+ * query started, or from ranges not owned when the query began.
+ * 2) No migrated data should be removed from a shard while there are queries that were active
+ * before the migration.
+ *
+ * As implementation details, collection metadata is used to determine the ranges of all data
+ * not actively migrated (or orphaned). CursorIds are currently used to establish "active"
+ * queries before migration commit.
+ *
+ * Combining all this: if a query is started in a db lock and acquires in that (same) lock the
+ * collection metadata and a cursorId, the query will return results for exactly the ranges in
+ * the metadata (though of arbitrary staleness). This is the sharded collection query contract.
+ *
+ * END NOTE FROM GREG
*
* Preconditions: Child must be fetched. TODO XXX: when covering analysis is in just build doc
* and check that against shard key.
*/
class ShardFilterStage : public PlanStage {
public:
- ShardFilterStage(const string& ns, WorkingSet* ws, PlanStage* child);
+ ShardFilterStage(const CollectionMetadataPtr& metadata, WorkingSet* ws, PlanStage* child);
virtual ~ShardFilterStage();
virtual bool isEOF();
@@ -60,14 +89,14 @@ namespace mongo {
private:
WorkingSet* _ws;
scoped_ptr<PlanStage> _child;
- string _ns;
// Stats
CommonStats _commonStats;
ShardingFilterStats _specificStats;
- bool _initted;
- CollectionMetadataPtr _metadata;
+ // Note: it is important that this is the metadata from the time this stage is constructed.
+ // See class comment for details.
+ const CollectionMetadataPtr _metadata;
};
} // namespace mongo
diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h
index 1fa1c4aeaa8..7ccc48bcd75 100644
--- a/src/mongo/db/query/query_planner_params.h
+++ b/src/mongo/db/query/query_planner_params.h
@@ -52,7 +52,8 @@ namespace mongo {
//
// In order to set this, you must check
// shardingState.needCollectionMetadata(current_namespace) in the same lock that you use
- // to build the query runner.
+ // to build the query runner. You must also wrap the Runner in a ClientCursor within the
+ // same lock. See the comment on ShardFilterStage for details.
INCLUDE_SHARD_FILTER = 4,
// Set this if you don't want any plans with a blocking sort stage. All sorts must be
diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp
index 2332ac7de04..68da614945b 100644
--- a/src/mongo/db/query/stage_builder.cpp
+++ b/src/mongo/db/query/stage_builder.cpp
@@ -228,7 +228,7 @@ namespace mongo {
const ShardingFilterNode* fn = static_cast<const ShardingFilterNode*>(root);
PlanStage* childStage = buildStages(ns, fn->children[0], ws);
if (NULL == childStage) { return NULL; }
- return new ShardFilterStage(ns, ws, childStage);
+ return new ShardFilterStage(shardingState.getCollectionMetadata(ns), ws, childStage);
}
else {
stringstream ss;