From c980c02f219e595279e2b81f558ec1c81e841573 Mon Sep 17 00:00:00 2001 From: James Wahlin Date: Thu, 29 Oct 2015 11:05:14 -0400 Subject: SERVER-2454 Backport of PlanExecutor::DEAD handling fix to 2.6.x --- src/mongo/base/error_codes.err | 1 + src/mongo/db/exec/collection_scan.cpp | 20 +++++++++++----- src/mongo/db/exec/collection_scan.h | 3 +-- src/mongo/db/query/new_find.cpp | 43 ++++++++++++----------------------- src/mongo/dbtests/querytests.cpp | 18 ++++----------- 5 files changed, 35 insertions(+), 50 deletions(-) diff --git a/src/mongo/base/error_codes.err b/src/mongo/base/error_codes.err index ac900d23d27..d45ff64dd3d 100644 --- a/src/mongo/base/error_codes.err +++ b/src/mongo/base/error_codes.err @@ -87,6 +87,7 @@ error_code("IndexOptionsConflict", 85 ) error_code("IndexKeySpecsConflict", 86 ) error_code("OutdatedClient", 101) error_code("IncompatibleAuditMetadata", 102) +error_code("CappedPositionLost", 103) # Non-sequential error codes (for compatibility only) error_code("NetworkTimeout", 89) diff --git a/src/mongo/db/exec/collection_scan.cpp b/src/mongo/db/exec/collection_scan.cpp index 1a29d8b76fb..e9f6992ba94 100644 --- a/src/mongo/db/exec/collection_scan.cpp +++ b/src/mongo/db/exec/collection_scan.cpp @@ -32,6 +32,7 @@ #include "mongo/db/exec/collection_scan_common.h" #include "mongo/db/exec/filter.h" #include "mongo/db/exec/working_set.h" +#include "mongo/db/exec/working_set_common.h" #include "mongo/db/catalog/collection.h" #include "mongo/db/structure/collection_iterator.h" #include "mongo/util/fail_point_service.h" @@ -64,7 +65,7 @@ namespace mongo { : _workingSet(workingSet), _filter(filter), _params(params), - _nsDropped(false) { + _isDead(false) { // We pre-allocate a WSID and use it to pass up fetch requests. It is only // used to pass up fetch requests and we should never use it for anything else. @@ -77,13 +78,21 @@ namespace mongo { PlanStage::StageState CollectionScan::work(WorkingSetID* out) { ++_commonStats.works; - if (_nsDropped) { return PlanStage::DEAD; } + if (_isDead) { + Status status(ErrorCodes::CappedPositionLost, str::stream() + << "CollectionScan died due to position in capped collection being deleted."); + *out = WorkingSetCommon::allocateStatusMember(_workingSet, status); + return PlanStage::DEAD; + } // Do some init if we haven't already. if (NULL == _iter) { Collection* collection = cc().database()->getCollection( _params.ns ); if ( collection == NULL ) { - _nsDropped = true; + _isDead = true; + Status status(ErrorCodes::InternalError, + str::stream() << "CollectionScan died due to NULL collection."); + *out = WorkingSetCommon::allocateStatusMember(_workingSet, status); return PlanStage::DEAD; } @@ -152,7 +161,7 @@ namespace mongo { if ((0 != _params.maxScan) && (_specificStats.docsTested >= _params.maxScan)) { return true; } - if (_nsDropped) { return true; } + if (_isDead) { return true; } if (NULL == _iter) { return false; } return _iter->isEOF(); } @@ -190,8 +199,7 @@ namespace mongo { ++_commonStats.unyields; if (NULL != _iter) { if (!_iter->recoverFromYield()) { - warning() << "Collection dropped or state deleted during yield of CollectionScan"; - _nsDropped = true; + _isDead = true; } } } diff --git a/src/mongo/db/exec/collection_scan.h b/src/mongo/db/exec/collection_scan.h index e0d4eb945f8..94edd43c7b2 100644 --- a/src/mongo/db/exec/collection_scan.h +++ b/src/mongo/db/exec/collection_scan.h @@ -75,8 +75,7 @@ namespace mongo { CollectionScanParams _params; - // True if nsdetails(_ns) == NULL on our first call to work. - bool _nsDropped; + bool _isDead; // If we want to return a DiskLoc and it points at something that's not in memory, we return // a a "please page this in" result. We allocate one WSM for this purpose at construction diff --git a/src/mongo/db/query/new_find.cpp b/src/mongo/db/query/new_find.cpp index b2214008a7a..6698bd5d7e0 100644 --- a/src/mongo/db/query/new_find.cpp +++ b/src/mongo/db/query/new_find.cpp @@ -277,26 +277,13 @@ namespace mongo { Status res = runner->getInfo(&bareExplain, NULL); if (res.isOK()) { boost::scoped_ptr errorExplain(bareExplain); - error() << "Runner error, stats:\n" + error() << "getMore runner error, stats:\n" << errorExplain->stats.jsonString(Strict, true); } - - uasserted(17406, "getMore runner error: " + - WorkingSetCommon::toStatusString(obj)); } - // If we're dead there's no way to get more results. - saveClientCursor = false; - - // In the old system tailable capped cursors would be killed off at the - // cursorid level. If a tailable capped cursor is nuked the cursorid - // would vanish. - // - // In the new system they die and are cleaned up later (or time out). - // So this is where we get to remove the cursorid. - if (0 == numResults) { - resultFlags = ResultFlag_CursorNotFound; - } + uasserted(17406, "getMore runner error: " + + WorkingSetCommon::toStatusString(obj)); } else if (Runner::RUNNER_EOF == state) { // EOF is also end of the line unless it's tailable. @@ -665,22 +652,20 @@ namespace mongo { safety.reset(); // Caller expects exceptions thrown in certain cases. - if (Runner::RUNNER_ERROR == state) { - TypeExplain* bareExplain; - Status res = runner->getInfo(&bareExplain, NULL); - if (res.isOK()) { - boost::scoped_ptr errorExplain(bareExplain); - error() << "Runner error, stats:\n" - << errorExplain->stats.jsonString(Strict, true); + if (Runner::RUNNER_ERROR == state || Runner::RUNNER_DEAD == state) { + if (Runner::RUNNER_ERROR == state) { + TypeExplain* bareExplain; + Status res = runner->getInfo(&bareExplain, NULL); + if (res.isOK()) { + boost::scoped_ptr errorExplain(bareExplain); + error() << "Runner error during find, stats:\n" + << errorExplain->stats.jsonString(Strict, true); + } } - uasserted(17144, "Runner error: " + WorkingSetCommon::toStatusString(obj)); + uasserted(17144, "Runner error during find: " + WorkingSetCommon::toStatusString(obj)); } - // Why save a dead runner? - if (Runner::RUNNER_DEAD == state) { - saveClientCursor = false; - } - else if (pq.hasOption(QueryOption_CursorTailable)) { + if (pq.hasOption(QueryOption_CursorTailable)) { // If we're tailing a capped collection, we don't bother saving the cursor if the // collection is empty. Otherwise, the semantics of the tailable cursor is that the // client will keep trying to read from it. So we'll keep it around. diff --git a/src/mongo/dbtests/querytests.cpp b/src/mongo/dbtests/querytests.cpp index a804908887e..20ac1d3cfca 100644 --- a/src/mongo/dbtests/querytests.cpp +++ b/src/mongo/dbtests/querytests.cpp @@ -456,19 +456,11 @@ namespace QueryTests { ASSERT( !c->more() ); insert( ns, BSON( "a" << 2 ) ); insert( ns, BSON( "a" << 3 ) ); - ASSERT( !c->more() ); - // Inserting a document into a capped collection can force another document out. - // In this case, the capped collection has 2 documents, so inserting two more clobbers - // whatever DiskLoc that the underlying cursor had as its state. - // - // In the Cursor world, the ClientCursor was responsible for manipulating cursors. It - // would detect that the cursor's "refloc" (translation: diskloc required to maintain - // iteration state) was being clobbered and it would kill the cursor. - // - // In the Runner world there is no notion of a "refloc" and as such the invalidation - // broadcast code doesn't know enough to know that the underlying collection iteration - // can't proceed. - // ASSERT_EQUALS( 0, c->getCursorId() ); + + // We have overwritten the previous cursor position and should encounter a dead cursor. + if (c->more()) { + ASSERT_THROWS(c->nextSafe(), AssertionException); + } } }; -- cgit v1.2.1