From 04237a169a4a8824fd6476617e237c5d69c156c7 Mon Sep 17 00:00:00 2001
From: Jason Rassi <rassi@10gen.com>
Date: Mon, 12 Jan 2015 17:24:21 -0500
Subject: SERVER-16607 Agg cursor's pin needs to be cleaned up under coll lock

---
 src/mongo/db/clientcursor.cpp              |  5 ++-
 src/mongo/db/clientcursor.h                | 15 +++++++--
 src/mongo/db/commands/pipeline_command.cpp | 49 +++++++++++++++++++++++++-----
 src/mongo/db/query/find.cpp                | 37 +++++++++++++++++++++-
 4 files changed, 94 insertions(+), 12 deletions(-)

(limited to 'src/mongo/db')
diff --git a/src/mongo/db/clientcursor.cpp b/src/mongo/db/clientcursor.cpp
index ffa40cab530..bc09cc4bc01 100644
--- a/src/mongo/db/clientcursor.cpp
+++ b/src/mongo/db/clientcursor.cpp
@@ -226,7 +226,7 @@ namespace mongo {
 
     ClientCursorPin::~ClientCursorPin() {
         cursorStatsOpenPinned.decrement();
-        DESTRUCTOR_GUARD( release(); );
+        release();
     }
 
     void ClientCursorPin::release() {
@@ -244,9 +244,12 @@ namespace mongo {
             // Unpin the cursor under the collection cursor manager lock.
             _cursor->cursorManager()->unpin( _cursor );
         }
+
+        _cursor = NULL;
     }
 
     void ClientCursorPin::deleteUnderlying() {
+        invariant( _cursor );
         invariant( _cursor->isPinned() );
         // Note the following subtleties of this method's implementation:
         // - We must unpin the cursor before destruction, since it is an error to destroy a pinned
diff --git a/src/mongo/db/clientcursor.h b/src/mongo/db/clientcursor.h
index 3f80c19d824..f89eaecd681 100644
--- a/src/mongo/db/clientcursor.h
+++ b/src/mongo/db/clientcursor.h
@@ -329,12 +329,21 @@ namespace mongo {
          */
         ~ClientCursorPin();
 
-        // This just releases the pin, does not delete the underlying
-        // unless ownership has passed to us after kill
+        /**
+         * Releases the pin.  It does not delete the underlying cursor unless ownership has passed
+         * to us after kill.  Turns into a no-op if release() or deleteUnderlying() have already
+         * been called on this pin.
+         */
         void release();
-        // Call this to delete the underlying ClientCursor.
+
+        /**
+         * Deletes the underlying cursor.  Cannot be called if release() or deleteUnderlying() have
+         * already been called on this pin.
+         */
         void deleteUnderlying();
+
         ClientCursor *c() const;
+
     private:
         ClientCursor* _cursor;
     };
diff --git a/src/mongo/db/commands/pipeline_command.cpp b/src/mongo/db/commands/pipeline_command.cpp
index cfabbc18cc7..ad8cbb0bcb0 100644
--- a/src/mongo/db/commands/pipeline_command.cpp
+++ b/src/mongo/db/commands/pipeline_command.cpp
@@ -58,7 +58,11 @@ namespace mongo {
     using boost::scoped_ptr;
     using boost::shared_ptr;
 
-    static void handleCursorCommand(OperationContext* txn,
+    /**
+     * Returns true if we need to keep a ClientCursor saved for this pipeline (for future getMore
+     * requests).  Otherwise, returns false.
+     */
+    static bool handleCursorCommand(OperationContext* txn,
                                     const string& ns,
                                     ClientCursorPin* pin,
                                     PlanExecutor* exec,
@@ -84,7 +88,6 @@ namespace mongo {
             // The initial getNext() on a PipelineProxyStage may be very expensive so we don't
             // do it when batchSize is 0 since that indicates a desire for a fast return.
             if (exec->getNext(&next, NULL) != PlanExecutor::ADVANCED) {
-                if (pin) pin->deleteUnderlying();
                 // make it an obvious error to use cursor or executor after this point
                 cursor = NULL;
                 exec = NULL;
@@ -135,6 +138,8 @@ namespace mongo {
 
         const long long cursorId = cursor ? cursor->cursorid() : 0LL;
         Command::appendCursorResponseObject(cursorId, ns, resultsArray.arr(), &result);
+
+        return static_cast<bool>(cursor);
     }
 
 
@@ -263,6 +268,13 @@ namespace mongo {
                                                   cursor->cursorid()));
                     // Don't add any code between here and the start of the try block.
                 }
+
+                // At this point, it is safe to release the collection lock.
+                // - In the case where we have a collection: we will need to reacquire the
+                //   collection lock later when cleaning up our ClientCursorPin.
+                // - In the case where we don't have a collection: our PlanExecutor won't be
+                //   registered, so it will be safe to clean it up outside the lock.
+                invariant(NULL == execHolder.get() || NULL == execHolder->collection());
             }
 
             try {
@@ -276,18 +288,41 @@ namespace mongo {
                     result << "stages" << Value(pPipeline->writeExplainOps());
                 }
                 else if (isCursorCommand) {
-                    handleCursorCommand(txn, nss.ns(), pin.get(), exec, cmdObj, result);
-                    keepCursor = true;
+                    keepCursor = handleCursorCommand(txn,
+                                                     nss.ns(),
+                                                     pin.get(),
+                                                     exec,
+                                                     cmdObj,
+                                                     result);
                 }
                 else {
                     pPipeline->run(result);
                 }
 
-                if (!keepCursor && pin) pin->deleteUnderlying();
+                // Clean up our ClientCursorPin, if needed.  We must reacquire the collection lock
+                // in order to do so.
+                if (pin) {
+                    // We acquire locks here with DBLock and CollectionLock instead of using
+                    // AutoGetCollectionForRead.  AutoGetCollectionForRead will throw if the
+                    // sharding version is out of date, and we don't care if the sharding version
+                    // has changed.
+                    Lock::DBLock dbLock(txn->lockState(), nss.db(), MODE_IS);
+                    Lock::CollectionLock collLock(txn->lockState(), nss.ns(), MODE_IS);
+                    if (keepCursor) {
+                        pin->release();
+                    }
+                    else {
+                        pin->deleteUnderlying();
+                    }
+                }
             }
             catch (...) {
-                // Clean up cursor on way out of scope.
-                if (pin) pin->deleteUnderlying();
+                // On our way out of scope, we clean up our ClientCursorPin if needed.
+                if (pin) {
+                    Lock::DBLock dbLock(txn->lockState(), nss.db(), MODE_IS);
+                    Lock::CollectionLock collLock(txn->lockState(), nss.ns(), MODE_IS);
+                    pin->deleteUnderlying();
+                }
                 throw;
             }
             // Any code that needs the cursor pinned must be inside the try block, above.
diff --git a/src/mongo/db/query/find.cpp b/src/mongo/db/query/find.cpp
index 2f14820e8f1..74e58268b68 100644
--- a/src/mongo/db/query/find.cpp
+++ b/src/mongo/db/query/find.cpp
@@ -194,9 +194,27 @@ namespace mongo {
 
         exhaust = false;
 
-        // This is a read lock.
         const NamespaceString nss(ns);
+
+        // Depending on the type of cursor being operated on, we hold locks for the whole getMore,
+        // or none of the getMore, or part of the getMore.  The three cases in detail:
+        //
+        // 1) Normal cursor: we lock with "ctx" and hold it for the whole getMore.
+        // 2) Cursor owned by global cursor manager: we don't lock anything.  These cursors don't
+        //    own any collection state.
+        // 3) Agg cursor: we lock with "ctx", then release, then relock with "unpinDBLock" and
+        //    "unpinCollLock".  This is because agg cursors handle locking internally (hence the
+        //    release), but the pin and unpin of the cursor must occur under the collection lock.
+        //    We don't use our AutoGetCollectionForRead "ctx" to relock, because
+        //    AutoGetCollectionForRead checks the sharding version (and we want the relock for the
+        //    unpin to succeed even if the sharding version has changed).
+        //
+        // Note that we declare our locks before our ClientCursorPin, in order to ensure that the
+        // pin's destructor is called before the lock destructors (so that the unpin occurs under
+        // the lock).
         boost::scoped_ptr<AutoGetCollectionForRead> ctx;
+        boost::scoped_ptr<Lock::DBLock> unpinDBLock;
+        boost::scoped_ptr<Lock::CollectionLock> unpinCollLock;
 
         CursorManager* cursorManager;
         CursorManager* globalCursorManager = CursorManager::getGlobalCursorManager();
@@ -378,6 +396,23 @@ namespace mongo {
                 saveClientCursor = true;
             }
 
+            // If we are operating on an aggregation cursor, then we dropped our collection lock
+            // earlier and need to reacquire it in order to clean up our ClientCursorPin.
+            //
+            // TODO: We need to ensure that this relock happens if we release the pin above in
+            // response to PlanExecutor::getNext() throwing an exception.
+            if (cc->isAggCursor()) {
+                invariant(NULL == ctx.get());
+                unpinDBLock.reset(new Lock::DBLock(txn->lockState(), nss.db(), MODE_IS));
+                unpinCollLock.reset(new Lock::CollectionLock(txn->lockState(), nss.ns(), MODE_IS));
+            }
+
+            // Our two possible ClientCursorPin cleanup paths are:
+            // 1) If the cursor is not going to be saved, we call deleteUnderlying() on the pin.
+            // 2) If the cursor is going to be saved, we simply let the pin go out of scope.  In
+            //    this case, the pin's destructor will be invoked, which will call release() on the
+            //    pin.  Because our ClientCursorPin is declared after our lock is declared, this
+            //    will happen under the lock.
             if (!saveClientCursor) {
                 ruSwapper.reset();
                 ccPin.deleteUnderlying();
-- 
cgit v1.2.1