summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Wahlin <james.wahlin@10gen.com>2015-10-23 11:05:39 -0400
committerJames Wahlin <james.wahlin@10gen.com>2015-10-23 14:13:34 -0400
commit817c963bea7f12220deee14ecaa512161d39555a (patch)
tree254df03f52fc844358c9053fd1f612dc630dae52
parent9967e79ea6c20eded9818539c59964afc20058af (diff)
downloadmongo-817c963bea7f12220deee14ecaa512161d39555a.tar.gz
SERVER-2454 Return error on find/getMore PlanExecutor::DEAD
This is a partial backport to the 3.0 branch. It is intended to fix the issue reported in SERVER-20973.
-rw-r--r--jstests/concurrency/fsm_workload_modifiers/make_capped.js2
-rwxr-xr-xjstests/replsets/resync.js13
-rw-r--r--src/mongo/base/error_codes.err1
-rw-r--r--src/mongo/db/exec/collection_scan.cpp18
-rw-r--r--src/mongo/db/query/find.cpp35
-rw-r--r--src/mongo/dbtests/querytests.cpp25
6 files changed, 45 insertions, 49 deletions
diff --git a/jstests/concurrency/fsm_workload_modifiers/make_capped.js b/jstests/concurrency/fsm_workload_modifiers/make_capped.js
index 40ba3d09c24..d17f291faa6 100644
--- a/jstests/concurrency/fsm_workload_modifiers/make_capped.js
+++ b/jstests/concurrency/fsm_workload_modifiers/make_capped.js
@@ -20,7 +20,7 @@ function makeCapped($config, $super) {
db[collName].drop();
assertAlways.commandWorked(db.createCollection(collName, {
capped: true,
- size: 4096 // bytes
+ size: 16384 // bytes
}));
});
diff --git a/jstests/replsets/resync.js b/jstests/replsets/resync.js
index af75d8bb813..49eb570a318 100755
--- a/jstests/replsets/resync.js
+++ b/jstests/replsets/resync.js
@@ -39,7 +39,18 @@
function hasCycled() {
var oplog = a_conn.getDB("local").oplog.rs;
- return oplog.find( { "o.x" : 1 } ).sort( { $natural : 1 } ).limit(10).itcount() == 0;
+ try {
+ // Collection scan to determine if the oplog entry from the first insert has been
+ // deleted yet.
+ return oplog.find( { "o.x" : 1 } ).sort( { $natural : 1 } ).limit(10).itcount() == 0;
+ }
+ catch (except) {
+ // An error is expected in the case that capped deletions blow away the position of the
+ // collection scan during a yield. In this case, we just try again.
+ var errorRegex = /CappedPositionLost/;
+ assert(errorRegex.test(except.message));
+ return hasCycled();
+ }
}
// Make sure the oplog has rolled over on the primary and secondary that is up,
diff --git a/src/mongo/base/error_codes.err b/src/mongo/base/error_codes.err
index 680404aeeea..41d9ce6c581 100644
--- a/src/mongo/base/error_codes.err
+++ b/src/mongo/base/error_codes.err
@@ -120,6 +120,7 @@ error_code("ConflictingOperationInProgress", 117)
# backported
error_code("OplogStartMissing", 120)
+error_code("CappedPositionLost", 121)
# Non-sequential error codes (for compatibility only)
error_code("NotMaster", 10107) #this comes from assert_util.h
diff --git a/src/mongo/db/exec/collection_scan.cpp b/src/mongo/db/exec/collection_scan.cpp
index 3a7700299a6..c5028da674a 100644
--- a/src/mongo/db/exec/collection_scan.cpp
+++ b/src/mongo/db/exec/collection_scan.cpp
@@ -35,6 +35,7 @@
#include "mongo/db/exec/filter.h"
#include "mongo/db/exec/scoped_timer.h"
#include "mongo/db/exec/working_set.h"
+#include "mongo/db/exec/working_set_common.h"
#include "mongo/db/catalog/collection.h"
#include "mongo/db/storage/record_fetcher.h"
#include "mongo/util/fail_point_service.h"
@@ -78,6 +79,12 @@ PlanStage::StageState CollectionScan::work(WorkingSetID* out) {
ScopedTimer timer(&_commonStats.executionTimeMillis);
if (_isDead) {
+ Status status(
+ ErrorCodes::CappedPositionLost,
+ str::stream()
+ << "CollectionScan died due to position in capped collection being deleted. "
+ << "Last seen record id: " << _lastSeenLoc);
+ *out = WorkingSetCommon::allocateStatusMember(_workingSet, status);
return PlanStage::DEAD;
}
@@ -85,6 +92,9 @@ PlanStage::StageState CollectionScan::work(WorkingSetID* out) {
if (NULL == _iter) {
if (_params.collection == NULL) {
_isDead = true;
+ Status status(ErrorCodes::CappedPositionLost,
+ str::stream() << "CollectionScan died due to NULL collection param");
+ *out = WorkingSetCommon::allocateStatusMember(_workingSet, status);
return PlanStage::DEAD;
}
@@ -100,6 +110,11 @@ PlanStage::StageState CollectionScan::work(WorkingSetID* out) {
// the stream. This is related to the _lastSeenLock handling in invalidate.
if (_iter->getNext() != _lastSeenLoc) {
_isDead = true;
+ Status status(ErrorCodes::CappedPositionLost,
+ str::stream() << "CollectionScan died due to failure to restore "
+ << "tailable cursor position. "
+ << "Last seen record id: " << _lastSeenLoc);
+ *out = WorkingSetCommon::allocateStatusMember(_workingSet, status);
return PlanStage::DEAD;
}
}
@@ -221,8 +236,7 @@ void CollectionScan::restoreState(OperationContext* opCtx) {
++_commonStats.unyields;
if (NULL != _iter) {
if (!_iter->restoreState(opCtx)) {
- warning() << "Collection dropped or state deleted during yield of CollectionScan: "
- << opCtx->getNS();
+ warning() << "Could not restore RecordCursor for CollectionScan: " << opCtx->getNS();
_isDead = true;
}
}
diff --git a/src/mongo/db/query/find.cpp b/src/mongo/db/query/find.cpp
index 08567c1c986..a35f69bf1f7 100644
--- a/src/mongo/db/query/find.cpp
+++ b/src/mongo/db/query/find.cpp
@@ -366,23 +366,9 @@ QueryResult::View getMore(OperationContext* txn,
// Propagate this error to caller.
if (PlanExecutor::FAILURE == state) {
scoped_ptr<PlanStageStats> stats(exec->getStats());
- error() << "Plan executor error, stats: " << Explain::statsToBSON(*stats);
- uasserted(17406,
- "getMore executor error: " + WorkingSetCommon::toStatusString(obj));
- }
-
- // If we're dead there's no way to get more results.
- saveClientCursor = false;
-
- // In the old system tailable capped cursors would be killed off at the
- // cursorid level. If a tailable capped cursor is nuked the cursorid
- // would vanish.
- //
- // In the new system they die and are cleaned up later (or time out).
- // So this is where we get to remove the cursorid.
- if (0 == numResults) {
- resultFlags = ResultFlag_CursorNotFound;
+ error() << "getMore executor error, stats: " << Explain::statsToBSON(*stats);
}
+ uasserted(17406, "getMore executor error: " + WorkingSetCommon::toStatusString(obj));
} else if (PlanExecutor::IS_EOF == state) {
// EOF is also end of the line unless it's tailable.
saveClientCursor = queryOptions & QueryOption_CursorTailable;
@@ -786,16 +772,17 @@ std::string runQuery(OperationContext* txn,
exec->deregisterExec();
// Caller expects exceptions thrown in certain cases.
- if (PlanExecutor::FAILURE == state) {
- scoped_ptr<PlanStageStats> stats(exec->getStats());
- error() << "Plan executor error, stats: " << Explain::statsToBSON(*stats);
- uasserted(17144, "Executor error: " + WorkingSetCommon::toStatusString(obj));
+ if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) {
+ if (PlanExecutor::FAILURE == state) {
+ const std::unique_ptr<PlanStageStats> stats(exec->getStats());
+ error() << "Plan executor error during find: " << PlanExecutor::statestr(state)
+ << ", stats: " << Explain::statsToBSON(*stats);
+ }
+ uasserted(17144,
+ "Plan executor error during find: " + WorkingSetCommon::toStatusString(obj));
}
- // Why save a dead executor?
- if (PlanExecutor::DEAD == state) {
- saveClientCursor = false;
- } else if (pq.getOptions().tailable) {
+ if (pq.getOptions().tailable) {
// If we're tailing a capped collection, we don't bother saving the cursor if the
// collection is empty. Otherwise, the semantics of the tailable cursor is that the
// client will keep trying to read from it. So we'll keep it around.
diff --git a/src/mongo/dbtests/querytests.cpp b/src/mongo/dbtests/querytests.cpp
index b53f7166218..666e5e2761b 100644
--- a/src/mongo/dbtests/querytests.cpp
+++ b/src/mongo/dbtests/querytests.cpp
@@ -514,25 +514,10 @@ public:
insert(ns, BSON("a" << 2));
insert(ns, BSON("a" << 3));
- // This can either have been killed, or jumped to the right thing.
- // Key is that it can't skip.
+ // We have overwritten the previous cursor position and should encounter a dead cursor.
if (c->more()) {
- BSONObj x = c->next();
- ASSERT_EQUALS(2, x["a"].numberInt());
+ ASSERT_THROWS(c->nextSafe(), AssertionException);
}
-
- // Inserting a document into a capped collection can force another document out.
- // In this case, the capped collection has 2 documents, so inserting two more clobbers
- // whatever RecordId that the underlying cursor had as its state.
- //
- // In the Cursor world, the ClientCursor was responsible for manipulating cursors. It
- // would detect that the cursor's "refloc" (translation: diskloc required to maintain
- // iteration state) was being clobbered and it would kill the cursor.
- //
- // In the Runner world there is no notion of a "refloc" and as such the invalidation
- // broadcast code doesn't know enough to know that the underlying collection iteration
- // can't proceed.
- // ASSERT_EQUALS( 0, c->getCursorId() );
}
};
@@ -555,11 +540,9 @@ public:
insert(ns, BSON("a" << 3));
insert(ns, BSON("a" << 4));
- // This can either have been killed, or jumped to the right thing.
- // Key is that it can't skip.
+ // We have overwritten the previous cursor position and should encounter a dead cursor.
if (c->more()) {
- BSONObj x = c->next();
- ASSERT_EQUALS(2, x["a"].numberInt());
+ ASSERT_THROWS(c->nextSafe(), AssertionException);
}
}
};