From 3d19c459fe80fb7aa3244174b61994f13f71c21f Mon Sep 17 00:00:00 2001
From: Jason Rassi <rassi@10gen.com>
Date: Thu, 19 Jun 2014 06:42:41 -0400
Subject: SERVER-14302 SERVER-14304 Fix IDHackRunner's use of sharding filter

(backport of 6fe6dee814326ac41f8d626c26bf32763ce73d71)
---
 jstests/sharding/idhack_sharded.js   |  39 +++++++++++
 src/mongo/db/query/idhack_runner.cpp | 121 +++++++++++++++++++----------------
 src/mongo/db/query/idhack_runner.h   |   8 +--
 src/mongo/db/query/new_find.cpp      |   9 ++-
 4 files changed, 115 insertions(+), 62 deletions(-)
 create mode 100644 jstests/sharding/idhack_sharded.js

diff --git a/jstests/sharding/idhack_sharded.js b/jstests/sharding/idhack_sharded.js
new file mode 100644
index 00000000000..3414477fa31
--- /dev/null
+++ b/jstests/sharding/idhack_sharded.js
@@ -0,0 +1,39 @@
+// Test that idhack queries with projections obey the sharding filter.  SERVER-14032, SERVER-14034.
+
+var st = new ShardingTest({shards: 2});
+var coll = st.s0.getCollection("test.foo");
+
+// Force write commands mode; this is a backport of a new test.
+st.s0.forceWriteMode("commands");
+st.shard0.forceWriteMode("commands");
+st.shard1.forceWriteMode("commands");
+
+//
+// Pre-split collection: shard 0 takes {x: {$lt: 0}}, shard 1 takes {x: {$gte: 0}}.
+//
+assert.commandWorked(coll.getDB().adminCommand({enableSharding: coll.getDB().getName()}));
+coll.getDB().adminCommand({movePrimary: coll.getDB().getName(), to: "shard0000"});
+assert.commandWorked(coll.getDB().adminCommand({shardCollection: coll.getFullName(), key: {x: 1}}));
+assert.commandWorked(coll.getDB().adminCommand({split: coll.getFullName(), middle: {x: 0}}));
+assert.commandWorked(coll.getDB().adminCommand({moveChunk: coll.getFullName(), find: {x: 0},
+                                                to: "shard0001"}));
+
+//
+// Test that idhack queries with projections that remove the shard key return correct results.
+// SERVER-14032.
+//
+assert.writeOK(coll.insert({_id: 1, x: 1, y: 1}));
+assert.eq(1, coll.find().itcount());
+assert.eq(1, coll.find({_id: 1}, {x: 0}).itcount());
+assert.eq(1, coll.find({_id: 1}, {y: 1}).itcount());
+coll.remove({});
+
+//
+// Test that idhack queries with covered projections do not return orphan documents.  SERVER-14034.
+//
+assert.writeOK(st.shard0.getCollection(coll.getFullName()).insert({_id: 1, x: 1}));
+assert.writeOK(st.shard1.getCollection(coll.getFullName()).insert({_id: 1, x: 1}));
+assert.eq(2, coll.count());
+assert.eq(1, coll.find().itcount());
+assert.eq(1, coll.find({_id: 1}, {_id: 1}).itcount());
+coll.remove({});
diff --git a/src/mongo/db/query/idhack_runner.cpp b/src/mongo/db/query/idhack_runner.cpp
index 7fcea5c9267..210601ec5a1 100644
--- a/src/mongo/db/query/idhack_runner.cpp
+++ b/src/mongo/db/query/idhack_runner.cpp
@@ -96,58 +96,74 @@ namespace mongo {
         if (NULL == objOut) {
             // No object requested - nothing to do.
         }
-        else if (hasCoveredProjection()) {
-            // Covered query on _id field only.
-            // Set object to search key.
-            // Search key is retrieved from the canonical query at
-            // construction and always contains the _id field name.
-            // It is possible to construct the ID hack runner with just the collection
-            // and the key object (which could be {"": my_obj_id}) but _query would be null
-            // in that case and the query would never be seen as covered.
-            *objOut = _key.getOwned();
-        }
         else {
-            // Fetch object from storage.
-            Record* record = loc.rec();
-
-            _nscannedObjects++;
-
-            // If the record isn't in memory...
-            if (!Record::likelyInPhysicalMemory(record->dataNoThrowing())) {
-                // And we're allowed to yield ourselves...
-                if (Runner::YIELD_AUTO == _policy) {
-                    // Note what we're yielding to fetch so that we don't crash if the loc is
-                    // deleted during a yield.
-                    _locFetching = loc;
-                    // Yield.  TODO: Do we want to bother yielding if micros < 0?
-                    int micros = ClientCursor::suggestYieldMicros();
-                    ClientCursor::staticYield(micros, "", record);
-                    // This can happen when we're yielded for various reasons (e.g. db/idx dropped).
-                    if (_killed) {
-                        _done = true;
-                        return Runner::RUNNER_DEAD;
-                    }
-                }
+            // If we're sharded, get the config metadata for this collection.  This will be used
+            // later to see if we own the document to be returned.
+            //
+            // Query execution machinery should generally delegate to ShardFilterStage in order to
+            // accomplish this task.  It is only safe to rely on the state of the config metadata
+            // here because it is not possible for the config metadata to change during the lifetime
+            // of the IDHackRunner (since the IDHackRunner returns only a single document, the
+            // config metadata must be the same as it was when the query started).  The one
+            // exception to this is if the query yields when fetching the document, but that case is
+            // currently handled by newRunQuery() (which contains logic to detect this and to throw
+            // SendStaleConfigException if it occurs).
+            CollectionMetadataPtr collectionMetadata;
+            if (shardingState.needCollectionMetadata(_collection->ns().ns())) {
+                collectionMetadata = shardingState.getCollectionMetadata(_collection->ns().ns());
             }
 
-            // If we're here, either the data was in memory or we paged it in.
+            // If we're not sharded, consider a covered projection (we can't if we're sharded, since
+            // we require a fetch in order to apply the sharding filter).
+            if (!collectionMetadata && hasCoveredProjection()) {
+                // Covered query on _id field only.  Set object to search key.  Search key is
+                // retrieved from the canonical query at construction and always contains the _id
+                // field name.  It is possible to construct the ID hack runner with just the
+                // collection and the key object (which could be {"": my_obj_id}) but _query would
+                // be null in that case and the query would never be seen as covered.
+                *objOut = _key.getOwned();
+            }
+            // Otherwise, fetch the document.
+            else {
+                Record* record = loc.rec();
+
+                _nscannedObjects++;
+
+                // If the record isn't in memory...
+                if (!Record::likelyInPhysicalMemory(record->dataNoThrowing())) {
+                    // And we're allowed to yield ourselves...
+                    if (Runner::YIELD_AUTO == _policy) {
+                        // Note what we're yielding to fetch so that we don't crash if the loc is
+                        // deleted during a yield.
+                        _locFetching = loc;
+                        // Yield.  TODO: Do we want to bother yielding if micros < 0?
+                        int micros = ClientCursor::suggestYieldMicros();
+                        ClientCursor::staticYield(micros, "", record);
+                        // This can happen when we're yielded for various reasons (e.g. db/idx dropped).
+                        if (_killed) {
+                            _done = true;
+                            return Runner::RUNNER_DEAD;
+                        }
+                    }
+                }
 
-            if (!applyProjection(loc, objOut)) {
-                // No projection. Just return the object inside the diskloc.
+                // If we're here, either the data was in memory or we paged it in.
                 *objOut = loc.obj();
-            }
 
-            // If we're sharded make sure the key belongs to us.  We need the object to do this.
-            if (shardingState.needCollectionMetadata(_collection->ns().ns())) {
-                CollectionMetadataPtr m = shardingState.getCollectionMetadata(_collection->ns().ns());
-                if (m) {
-                    KeyPattern kp(m->getKeyPattern());
-                    if (!m->keyBelongsToMe( kp.extractSingleKey(*objOut))) {
+                // If we're sharded, make sure the key belongs to us.
+                if (collectionMetadata) {
+                    KeyPattern kp(collectionMetadata->getKeyPattern());
+                    if (!collectionMetadata->keyBelongsToMe(kp.extractSingleKey(*objOut))) {
                         // We have something with a matching _id but it doesn't belong to me.
                         _done = true;
                         return Runner::RUNNER_EOF;
                     }
                 }
+
+                // Apply the projection if one was requested.
+                if (_query && _query->getProj()) {
+                    *objOut = applyProjection(*objOut);
+                }
             }
         }
 
@@ -160,13 +176,8 @@ namespace mongo {
         return Runner::RUNNER_ADVANCED;
     }
 
-     bool IDHackRunner::applyProjection(const DiskLoc& loc, BSONObj* objOut) const {
-        if (NULL == _query.get() || NULL == _query->getProj()) {
-            // This idhack query does not have a projection.
-            return false;
-        }
-
-        const BSONObj& docAtLoc = loc.obj();
+    BSONObj IDHackRunner::applyProjection(const BSONObj& docObj) const {
+        invariant(_query && _query->getProj());
 
         // We have a non-covered projection (covered projections should be handled earlier,
         // in getNext(..). For simple inclusion projections we use a fast path similar to that
@@ -179,12 +190,14 @@ namespace mongo {
             BSONObjBuilder bob;
             const BSONObj& queryObj = _query->getParsed().getFilter();
             bob.append(queryObj["_id"]);
-            *objOut = bob.obj();
+            return bob.obj();
         }
         else if (_query->getProj()->requiresDocument() || _query->getProj()->wantIndexKey()) {
             // Not a simple projection, so fallback on the regular projection path.
+            BSONObj projectedObj;
             ProjectionExec projExec(projObj, _query->root());
-            projExec.transform(docAtLoc, objOut);
+            projExec.transform(docObj, &projectedObj);
+            return projectedObj;
         }
         else {
             // This is a simple inclusion projection. Start by getting the set
@@ -194,13 +207,11 @@ namespace mongo {
 
             // Apply the simple inclusion projection.
             BSONObjBuilder bob;
-            ProjectionStage::transformSimpleInclusion(docAtLoc, includedFields, bob);
+            ProjectionStage::transformSimpleInclusion(docObj, includedFields, bob);
 
-            *objOut = bob.obj();
+            return bob.obj();
         }
-
-        return true;
-     }
+    }
 
     bool IDHackRunner::isEOF() {
         return _killed || _done;
diff --git a/src/mongo/db/query/idhack_runner.h b/src/mongo/db/query/idhack_runner.h
index fce573ee6e5..5f1d0d754ef 100644
--- a/src/mongo/db/query/idhack_runner.h
+++ b/src/mongo/db/query/idhack_runner.h
@@ -90,12 +90,10 @@ namespace mongo {
         bool hasCoveredProjection() const;
 
         /**
-         * If '_query' has a projection, then apply it, returning the result in 'objOut'.
-         * The diskloc 'loc' contains the BSONObj to transform.
-         *
-         * Otherwise do nothing and return false.
+         * Apply the projection from '_query' to the given object and return the result.
+         * '_query->getProj()' must be non-NULL.
          */
-         bool applyProjection(const DiskLoc& loc, BSONObj* objOut) const;
+        BSONObj applyProjection(const BSONObj& docObj) const;
 
         // Not owned here.
         const Collection* _collection;
diff --git a/src/mongo/db/query/new_find.cpp b/src/mongo/db/query/new_find.cpp
index 00520827c32..2848e8300b8 100644
--- a/src/mongo/db/query/new_find.cpp
+++ b/src/mongo/db/query/new_find.cpp
@@ -668,8 +668,13 @@ namespace mongo {
 
         // TODO(greg): This will go away soon.
         if (!shardingState.getVersion(pq.ns()).isWriteCompatibleWith(shardingVersionAtStart)) {
-            // if the version changed during the query we might be missing some data and its safe to
-            // send this as mongos can resend at this point
+            // If the version changed during the query we might be missing some data, and it's safe
+            // to send this as mongos can resend at this point.
+            //
+            // This check is particularly important for the IDHackRunner, which can yield between
+            // getting the config metadata and checking ownership of the document to be returned.
+            // If the version changes during that yield, this branch will be taken to force a retry
+            // of the query.
             throw SendStaleConfigException(pq.ns(), "version changed during initial query",
                                            shardingVersionAtStart,
                                            shardingState.getVersion(pq.ns()));
-- 
cgit v1.2.1