diff options
author | James Wahlin <james.wahlin@10gen.com> | 2015-10-23 11:05:39 -0400 |
---|---|---|
committer | James Wahlin <james.wahlin@10gen.com> | 2015-10-23 14:13:34 -0400 |
commit | 817c963bea7f12220deee14ecaa512161d39555a (patch) | |
tree | 254df03f52fc844358c9053fd1f612dc630dae52 | |
parent | 9967e79ea6c20eded9818539c59964afc20058af (diff) | |
download | mongo-817c963bea7f12220deee14ecaa512161d39555a.tar.gz |
SERVER-2454 Return error on find/getMore PlanExecutor::DEAD
This is a partial backport to the 3.0 branch. It is intended to fix the
issue reported in SERVER-20973.
-rw-r--r-- | jstests/concurrency/fsm_workload_modifiers/make_capped.js | 2 | ||||
-rwxr-xr-x | jstests/replsets/resync.js | 13 | ||||
-rw-r--r-- | src/mongo/base/error_codes.err | 1 | ||||
-rw-r--r-- | src/mongo/db/exec/collection_scan.cpp | 18 | ||||
-rw-r--r-- | src/mongo/db/query/find.cpp | 35 | ||||
-rw-r--r-- | src/mongo/dbtests/querytests.cpp | 25 |
6 files changed, 45 insertions, 49 deletions
diff --git a/jstests/concurrency/fsm_workload_modifiers/make_capped.js b/jstests/concurrency/fsm_workload_modifiers/make_capped.js index 40ba3d09c24..d17f291faa6 100644 --- a/jstests/concurrency/fsm_workload_modifiers/make_capped.js +++ b/jstests/concurrency/fsm_workload_modifiers/make_capped.js @@ -20,7 +20,7 @@ function makeCapped($config, $super) { db[collName].drop(); assertAlways.commandWorked(db.createCollection(collName, { capped: true, - size: 4096 // bytes + size: 16384 // bytes })); }); diff --git a/jstests/replsets/resync.js b/jstests/replsets/resync.js index af75d8bb813..49eb570a318 100755 --- a/jstests/replsets/resync.js +++ b/jstests/replsets/resync.js @@ -39,7 +39,18 @@ function hasCycled() { var oplog = a_conn.getDB("local").oplog.rs; - return oplog.find( { "o.x" : 1 } ).sort( { $natural : 1 } ).limit(10).itcount() == 0; + try { + // Collection scan to determine if the oplog entry from the first insert has been + // deleted yet. + return oplog.find( { "o.x" : 1 } ).sort( { $natural : 1 } ).limit(10).itcount() == 0; + } + catch (except) { + // An error is expected in the case that capped deletions blow away the position of the + // collection scan during a yield. In this case, we just try again. + var errorRegex = /CappedPositionLost/; + assert(errorRegex.test(except.message)); + return hasCycled(); + } } // Make sure the oplog has rolled over on the primary and secondary that is up, diff --git a/src/mongo/base/error_codes.err b/src/mongo/base/error_codes.err index 680404aeeea..41d9ce6c581 100644 --- a/src/mongo/base/error_codes.err +++ b/src/mongo/base/error_codes.err @@ -120,6 +120,7 @@ error_code("ConflictingOperationInProgress", 117) # backported error_code("OplogStartMissing", 120) +error_code("CappedPositionLost", 121) # Non-sequential error codes (for compatibility only) error_code("NotMaster", 10107) #this comes from assert_util.h diff --git a/src/mongo/db/exec/collection_scan.cpp b/src/mongo/db/exec/collection_scan.cpp index 3a7700299a6..c5028da674a 100644 --- a/src/mongo/db/exec/collection_scan.cpp +++ b/src/mongo/db/exec/collection_scan.cpp @@ -35,6 +35,7 @@ #include "mongo/db/exec/filter.h" #include "mongo/db/exec/scoped_timer.h" #include "mongo/db/exec/working_set.h" +#include "mongo/db/exec/working_set_common.h" #include "mongo/db/catalog/collection.h" #include "mongo/db/storage/record_fetcher.h" #include "mongo/util/fail_point_service.h" @@ -78,6 +79,12 @@ PlanStage::StageState CollectionScan::work(WorkingSetID* out) { ScopedTimer timer(&_commonStats.executionTimeMillis); if (_isDead) { + Status status( + ErrorCodes::CappedPositionLost, + str::stream() + << "CollectionScan died due to position in capped collection being deleted. " + << "Last seen record id: " << _lastSeenLoc); + *out = WorkingSetCommon::allocateStatusMember(_workingSet, status); return PlanStage::DEAD; } @@ -85,6 +92,9 @@ PlanStage::StageState CollectionScan::work(WorkingSetID* out) { if (NULL == _iter) { if (_params.collection == NULL) { _isDead = true; + Status status(ErrorCodes::CappedPositionLost, + str::stream() << "CollectionScan died due to NULL collection param"); + *out = WorkingSetCommon::allocateStatusMember(_workingSet, status); return PlanStage::DEAD; } @@ -100,6 +110,11 @@ PlanStage::StageState CollectionScan::work(WorkingSetID* out) { // the stream. This is related to the _lastSeenLock handling in invalidate. if (_iter->getNext() != _lastSeenLoc) { _isDead = true; + Status status(ErrorCodes::CappedPositionLost, + str::stream() << "CollectionScan died due to failure to restore " + << "tailable cursor position. " + << "Last seen record id: " << _lastSeenLoc); + *out = WorkingSetCommon::allocateStatusMember(_workingSet, status); return PlanStage::DEAD; } } @@ -221,8 +236,7 @@ void CollectionScan::restoreState(OperationContext* opCtx) { ++_commonStats.unyields; if (NULL != _iter) { if (!_iter->restoreState(opCtx)) { - warning() << "Collection dropped or state deleted during yield of CollectionScan: " - << opCtx->getNS(); + warning() << "Could not restore RecordCursor for CollectionScan: " << opCtx->getNS(); _isDead = true; } } diff --git a/src/mongo/db/query/find.cpp b/src/mongo/db/query/find.cpp index 08567c1c986..a35f69bf1f7 100644 --- a/src/mongo/db/query/find.cpp +++ b/src/mongo/db/query/find.cpp @@ -366,23 +366,9 @@ QueryResult::View getMore(OperationContext* txn, // Propagate this error to caller. if (PlanExecutor::FAILURE == state) { scoped_ptr<PlanStageStats> stats(exec->getStats()); - error() << "Plan executor error, stats: " << Explain::statsToBSON(*stats); - uasserted(17406, - "getMore executor error: " + WorkingSetCommon::toStatusString(obj)); - } - - // If we're dead there's no way to get more results. - saveClientCursor = false; - - // In the old system tailable capped cursors would be killed off at the - // cursorid level. If a tailable capped cursor is nuked the cursorid - // would vanish. - // - // In the new system they die and are cleaned up later (or time out). - // So this is where we get to remove the cursorid. - if (0 == numResults) { - resultFlags = ResultFlag_CursorNotFound; + error() << "getMore executor error, stats: " << Explain::statsToBSON(*stats); } + uasserted(17406, "getMore executor error: " + WorkingSetCommon::toStatusString(obj)); } else if (PlanExecutor::IS_EOF == state) { // EOF is also end of the line unless it's tailable. saveClientCursor = queryOptions & QueryOption_CursorTailable; @@ -786,16 +772,17 @@ std::string runQuery(OperationContext* txn, exec->deregisterExec(); // Caller expects exceptions thrown in certain cases. - if (PlanExecutor::FAILURE == state) { - scoped_ptr<PlanStageStats> stats(exec->getStats()); - error() << "Plan executor error, stats: " << Explain::statsToBSON(*stats); - uasserted(17144, "Executor error: " + WorkingSetCommon::toStatusString(obj)); + if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) { + if (PlanExecutor::FAILURE == state) { + const std::unique_ptr<PlanStageStats> stats(exec->getStats()); + error() << "Plan executor error during find: " << PlanExecutor::statestr(state) + << ", stats: " << Explain::statsToBSON(*stats); + } + uasserted(17144, + "Plan executor error during find: " + WorkingSetCommon::toStatusString(obj)); } - // Why save a dead executor? - if (PlanExecutor::DEAD == state) { - saveClientCursor = false; - } else if (pq.getOptions().tailable) { + if (pq.getOptions().tailable) { // If we're tailing a capped collection, we don't bother saving the cursor if the // collection is empty. Otherwise, the semantics of the tailable cursor is that the // client will keep trying to read from it. So we'll keep it around. diff --git a/src/mongo/dbtests/querytests.cpp b/src/mongo/dbtests/querytests.cpp index b53f7166218..666e5e2761b 100644 --- a/src/mongo/dbtests/querytests.cpp +++ b/src/mongo/dbtests/querytests.cpp @@ -514,25 +514,10 @@ public: insert(ns, BSON("a" << 2)); insert(ns, BSON("a" << 3)); - // This can either have been killed, or jumped to the right thing. - // Key is that it can't skip. + // We have overwritten the previous cursor position and should encounter a dead cursor. if (c->more()) { - BSONObj x = c->next(); - ASSERT_EQUALS(2, x["a"].numberInt()); + ASSERT_THROWS(c->nextSafe(), AssertionException); } - - // Inserting a document into a capped collection can force another document out. - // In this case, the capped collection has 2 documents, so inserting two more clobbers - // whatever RecordId that the underlying cursor had as its state. - // - // In the Cursor world, the ClientCursor was responsible for manipulating cursors. It - // would detect that the cursor's "refloc" (translation: diskloc required to maintain - // iteration state) was being clobbered and it would kill the cursor. - // - // In the Runner world there is no notion of a "refloc" and as such the invalidation - // broadcast code doesn't know enough to know that the underlying collection iteration - // can't proceed. - // ASSERT_EQUALS( 0, c->getCursorId() ); } }; @@ -555,11 +540,9 @@ public: insert(ns, BSON("a" << 3)); insert(ns, BSON("a" << 4)); - // This can either have been killed, or jumped to the right thing. - // Key is that it can't skip. + // We have overwritten the previous cursor position and should encounter a dead cursor. if (c->more()) { - BSONObj x = c->next(); - ASSERT_EQUALS(2, x["a"].numberInt()); + ASSERT_THROWS(c->nextSafe(), AssertionException); } } }; |