From 3d19c459fe80fb7aa3244174b61994f13f71c21f Mon Sep 17 00:00:00 2001 From: Jason Rassi Date: Thu, 19 Jun 2014 06:42:41 -0400 Subject: SERVER-14302 SERVER-14304 Fix IDHackRunner's use of sharding filter (backport of 6fe6dee814326ac41f8d626c26bf32763ce73d71) --- jstests/sharding/idhack_sharded.js | 39 +++++++++++ src/mongo/db/query/idhack_runner.cpp | 121 +++++++++++++++++++---------------- src/mongo/db/query/idhack_runner.h | 8 +-- src/mongo/db/query/new_find.cpp | 9 ++- 4 files changed, 115 insertions(+), 62 deletions(-) create mode 100644 jstests/sharding/idhack_sharded.js diff --git a/jstests/sharding/idhack_sharded.js b/jstests/sharding/idhack_sharded.js new file mode 100644 index 00000000000..3414477fa31 --- /dev/null +++ b/jstests/sharding/idhack_sharded.js @@ -0,0 +1,39 @@ +// Test that idhack queries with projections obey the sharding filter. SERVER-14032, SERVER-14034. + +var st = new ShardingTest({shards: 2}); +var coll = st.s0.getCollection("test.foo"); + +// Force write commands mode; this is a backport of a new test. +st.s0.forceWriteMode("commands"); +st.shard0.forceWriteMode("commands"); +st.shard1.forceWriteMode("commands"); + +// +// Pre-split collection: shard 0 takes {x: {$lt: 0}}, shard 1 takes {x: {$gte: 0}}. +// +assert.commandWorked(coll.getDB().adminCommand({enableSharding: coll.getDB().getName()})); +coll.getDB().adminCommand({movePrimary: coll.getDB().getName(), to: "shard0000"}); +assert.commandWorked(coll.getDB().adminCommand({shardCollection: coll.getFullName(), key: {x: 1}})); +assert.commandWorked(coll.getDB().adminCommand({split: coll.getFullName(), middle: {x: 0}})); +assert.commandWorked(coll.getDB().adminCommand({moveChunk: coll.getFullName(), find: {x: 0}, + to: "shard0001"})); + +// +// Test that idhack queries with projections that remove the shard key return correct results. +// SERVER-14032. +// +assert.writeOK(coll.insert({_id: 1, x: 1, y: 1})); +assert.eq(1, coll.find().itcount()); +assert.eq(1, coll.find({_id: 1}, {x: 0}).itcount()); +assert.eq(1, coll.find({_id: 1}, {y: 1}).itcount()); +coll.remove({}); + +// +// Test that idhack queries with covered projections do not return orphan documents. SERVER-14034. +// +assert.writeOK(st.shard0.getCollection(coll.getFullName()).insert({_id: 1, x: 1})); +assert.writeOK(st.shard1.getCollection(coll.getFullName()).insert({_id: 1, x: 1})); +assert.eq(2, coll.count()); +assert.eq(1, coll.find().itcount()); +assert.eq(1, coll.find({_id: 1}, {_id: 1}).itcount()); +coll.remove({}); diff --git a/src/mongo/db/query/idhack_runner.cpp b/src/mongo/db/query/idhack_runner.cpp index 7fcea5c9267..210601ec5a1 100644 --- a/src/mongo/db/query/idhack_runner.cpp +++ b/src/mongo/db/query/idhack_runner.cpp @@ -96,58 +96,74 @@ namespace mongo { if (NULL == objOut) { // No object requested - nothing to do. } - else if (hasCoveredProjection()) { - // Covered query on _id field only. - // Set object to search key. - // Search key is retrieved from the canonical query at - // construction and always contains the _id field name. - // It is possible to construct the ID hack runner with just the collection - // and the key object (which could be {"": my_obj_id}) but _query would be null - // in that case and the query would never be seen as covered. - *objOut = _key.getOwned(); - } else { - // Fetch object from storage. - Record* record = loc.rec(); - - _nscannedObjects++; - - // If the record isn't in memory... - if (!Record::likelyInPhysicalMemory(record->dataNoThrowing())) { - // And we're allowed to yield ourselves... - if (Runner::YIELD_AUTO == _policy) { - // Note what we're yielding to fetch so that we don't crash if the loc is - // deleted during a yield. - _locFetching = loc; - // Yield. TODO: Do we want to bother yielding if micros < 0? - int micros = ClientCursor::suggestYieldMicros(); - ClientCursor::staticYield(micros, "", record); - // This can happen when we're yielded for various reasons (e.g. db/idx dropped). - if (_killed) { - _done = true; - return Runner::RUNNER_DEAD; - } - } + // If we're sharded, get the config metadata for this collection. This will be used + // later to see if we own the document to be returned. + // + // Query execution machinery should generally delegate to ShardFilterStage in order to + // accomplish this task. It is only safe to rely on the state of the config metadata + // here because it is not possible for the config metadata to change during the lifetime + // of the IDHackRunner (since the IDHackRunner returns only a single document, the + // config metadata must be the same as it was when the query started). The one + // exception to this is if the query yields when fetching the document, but that case is + // currently handled by newRunQuery() (which contains logic to detect this and to throw + // SendStaleConfigException if it occurs). + CollectionMetadataPtr collectionMetadata; + if (shardingState.needCollectionMetadata(_collection->ns().ns())) { + collectionMetadata = shardingState.getCollectionMetadata(_collection->ns().ns()); } - // If we're here, either the data was in memory or we paged it in. + // If we're not sharded, consider a covered projection (we can't if we're sharded, since + // we require a fetch in order to apply the sharding filter). + if (!collectionMetadata && hasCoveredProjection()) { + // Covered query on _id field only. Set object to search key. Search key is + // retrieved from the canonical query at construction and always contains the _id + // field name. It is possible to construct the ID hack runner with just the + // collection and the key object (which could be {"": my_obj_id}) but _query would + // be null in that case and the query would never be seen as covered. + *objOut = _key.getOwned(); + } + // Otherwise, fetch the document. + else { + Record* record = loc.rec(); + + _nscannedObjects++; + + // If the record isn't in memory... + if (!Record::likelyInPhysicalMemory(record->dataNoThrowing())) { + // And we're allowed to yield ourselves... + if (Runner::YIELD_AUTO == _policy) { + // Note what we're yielding to fetch so that we don't crash if the loc is + // deleted during a yield. + _locFetching = loc; + // Yield. TODO: Do we want to bother yielding if micros < 0? + int micros = ClientCursor::suggestYieldMicros(); + ClientCursor::staticYield(micros, "", record); + // This can happen when we're yielded for various reasons (e.g. db/idx dropped). + if (_killed) { + _done = true; + return Runner::RUNNER_DEAD; + } + } + } - if (!applyProjection(loc, objOut)) { - // No projection. Just return the object inside the diskloc. + // If we're here, either the data was in memory or we paged it in. *objOut = loc.obj(); - } - // If we're sharded make sure the key belongs to us. We need the object to do this. - if (shardingState.needCollectionMetadata(_collection->ns().ns())) { - CollectionMetadataPtr m = shardingState.getCollectionMetadata(_collection->ns().ns()); - if (m) { - KeyPattern kp(m->getKeyPattern()); - if (!m->keyBelongsToMe( kp.extractSingleKey(*objOut))) { + // If we're sharded, make sure the key belongs to us. + if (collectionMetadata) { + KeyPattern kp(collectionMetadata->getKeyPattern()); + if (!collectionMetadata->keyBelongsToMe(kp.extractSingleKey(*objOut))) { // We have something with a matching _id but it doesn't belong to me. _done = true; return Runner::RUNNER_EOF; } } + + // Apply the projection if one was requested. + if (_query && _query->getProj()) { + *objOut = applyProjection(*objOut); + } } } @@ -160,13 +176,8 @@ namespace mongo { return Runner::RUNNER_ADVANCED; } - bool IDHackRunner::applyProjection(const DiskLoc& loc, BSONObj* objOut) const { - if (NULL == _query.get() || NULL == _query->getProj()) { - // This idhack query does not have a projection. - return false; - } - - const BSONObj& docAtLoc = loc.obj(); + BSONObj IDHackRunner::applyProjection(const BSONObj& docObj) const { + invariant(_query && _query->getProj()); // We have a non-covered projection (covered projections should be handled earlier, // in getNext(..). For simple inclusion projections we use a fast path similar to that @@ -179,12 +190,14 @@ namespace mongo { BSONObjBuilder bob; const BSONObj& queryObj = _query->getParsed().getFilter(); bob.append(queryObj["_id"]); - *objOut = bob.obj(); + return bob.obj(); } else if (_query->getProj()->requiresDocument() || _query->getProj()->wantIndexKey()) { // Not a simple projection, so fallback on the regular projection path. + BSONObj projectedObj; ProjectionExec projExec(projObj, _query->root()); - projExec.transform(docAtLoc, objOut); + projExec.transform(docObj, &projectedObj); + return projectedObj; } else { // This is a simple inclusion projection. Start by getting the set @@ -194,13 +207,11 @@ namespace mongo { // Apply the simple inclusion projection. BSONObjBuilder bob; - ProjectionStage::transformSimpleInclusion(docAtLoc, includedFields, bob); + ProjectionStage::transformSimpleInclusion(docObj, includedFields, bob); - *objOut = bob.obj(); + return bob.obj(); } - - return true; - } + } bool IDHackRunner::isEOF() { return _killed || _done; diff --git a/src/mongo/db/query/idhack_runner.h b/src/mongo/db/query/idhack_runner.h index fce573ee6e5..5f1d0d754ef 100644 --- a/src/mongo/db/query/idhack_runner.h +++ b/src/mongo/db/query/idhack_runner.h @@ -90,12 +90,10 @@ namespace mongo { bool hasCoveredProjection() const; /** - * If '_query' has a projection, then apply it, returning the result in 'objOut'. - * The diskloc 'loc' contains the BSONObj to transform. - * - * Otherwise do nothing and return false. + * Apply the projection from '_query' to the given object and return the result. + * '_query->getProj()' must be non-NULL. */ - bool applyProjection(const DiskLoc& loc, BSONObj* objOut) const; + BSONObj applyProjection(const BSONObj& docObj) const; // Not owned here. const Collection* _collection; diff --git a/src/mongo/db/query/new_find.cpp b/src/mongo/db/query/new_find.cpp index 00520827c32..2848e8300b8 100644 --- a/src/mongo/db/query/new_find.cpp +++ b/src/mongo/db/query/new_find.cpp @@ -668,8 +668,13 @@ namespace mongo { // TODO(greg): This will go away soon. if (!shardingState.getVersion(pq.ns()).isWriteCompatibleWith(shardingVersionAtStart)) { - // if the version changed during the query we might be missing some data and its safe to - // send this as mongos can resend at this point + // If the version changed during the query we might be missing some data, and it's safe + // to send this as mongos can resend at this point. + // + // This check is particularly important for the IDHackRunner, which can yield between + // getting the config metadata and checking ownership of the document to be returned. + // If the version changes during that yield, this branch will be taken to force a retry + // of the query. throw SendStaleConfigException(pq.ns(), "version changed during initial query", shardingVersionAtStart, shardingState.getVersion(pq.ns())); -- cgit v1.2.1