diff options
author | Benety Goh <benety@mongodb.com> | 2019-11-09 23:29:19 +0000 |
---|---|---|
committer | evergreen <evergreen@mongodb.com> | 2019-11-09 23:29:19 +0000 |
commit | a5b55ef3f97e3fef647d9b17ef3109144bdf8477 (patch) | |
tree | f2432895beec36bd5e3310d74bea9f7f515ac602 | |
parent | 6096c0e308c8ccc98c61636f924ffed4689efd12 (diff) | |
download | mongo-a5b55ef3f97e3fef647d9b17ef3109144bdf8477.tar.gz |
SERVER-44393 two phase index build crashes on commitIndexBuild while waiting for abort due to prior indexing errors
-rw-r--r-- | jstests/noPassthrough/index_secondary_commit_after_scan_error.js | 14 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator.cpp | 12 | ||||
-rw-r--r-- | src/mongo/db/index_builds_coordinator.h | 6 |
3 files changed, 20 insertions, 12 deletions
diff --git a/jstests/noPassthrough/index_secondary_commit_after_scan_error.js b/jstests/noPassthrough/index_secondary_commit_after_scan_error.js index f7ba33edcc1..55eb378db4e 100644 --- a/jstests/noPassthrough/index_secondary_commit_after_scan_error.js +++ b/jstests/noPassthrough/index_secondary_commit_after_scan_error.js @@ -75,21 +75,19 @@ IndexBuildTest.waitForIndexBuildToStop(testDB); const exitCode = createIdx(); assert.eq(0, exitCode, 'expected shell to exit successfully'); -// Confirm that the index build on the secondary failed because of the invalid document. -checkLog.contains(secondary, 'background index build aborted due to failpoint'); +// Secondary should crash on receiving the unexpected commitIndexBuild oplog entry. +const fassertProcessExitCode = _isWindows() ? MongoRunner.EXIT_ABRUPT : MongoRunner.EXIT_ABORT; +rst.stop(secondary, undefined, {allowedExitCode: fassertProcessExitCode}); +assert(rawMongoProgramOutput().match('Fatal assertion 51101 OperationFailed: Index build:'), + 'Index build should have aborted secondary due to unexpected commitIndexBuild oplog entry.'); // Check indexes on primary. -rst.awaitReplication(); IndexBuildTest.assertIndexes(coll, 2, ['_id_', 'a_1']); -// Check that indexes were created on the secondary in spite of the scanning error. -const secondaryColl = secondaryDB.getCollection(coll.getName()); -IndexBuildTest.assertIndexes(secondaryColl, 2, ['_id_', 'a_1']); - const cmdNs = testDB.getCollection('$cmd').getFullName(); const ops = rst.dumpOplog(primary, {op: 'c', ns: cmdNs, 'o.commitIndexBuild': coll.getName()}); assert.eq(1, ops.length, 'primary did not write commitIndexBuild oplog entry: ' + tojson(ops)); TestData.skipCheckDBHashes = true; -rst.stopSet(undefined, undefined, {skipValidation: true}); +rst.stopSet(); })(); diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index 9e9977f772b..a3e8b228134 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -1301,7 +1301,7 @@ void IndexBuildsCoordinator::_buildIndexTwoPhase( preAbortStatus = ex.toStatus(); } - auto commitIndexBuildTimestamp = _waitForCommitOrAbort(opCtx, nss, replState); + auto commitIndexBuildTimestamp = _waitForCommitOrAbort(opCtx, nss, replState, preAbortStatus); _insertKeysFromSideTablesAndCommit(opCtx, dbAndUUID, replState, @@ -1415,7 +1415,8 @@ NamespaceString IndexBuildsCoordinator::_insertKeysFromSideTablesWithoutBlocking Timestamp IndexBuildsCoordinator::_waitForCommitOrAbort( OperationContext* opCtx, const NamespaceString& nss, - std::shared_ptr<ReplIndexBuildState> replState) { + std::shared_ptr<ReplIndexBuildState> replState, + const Status& preAbortStatus) { Timestamp commitIndexBuildTimestamp; if (shouldWaitForCommitOrAbort(opCtx, nss, *replState)) { log() << "Index build waiting for commit or abort before completing final phase: " @@ -1439,11 +1440,16 @@ Timestamp IndexBuildsCoordinator::_waitForCommitOrAbort( << ", collection UUID: " << replState->collectionUUID; commitIndexBuildTimestamp = replState->commitTimestamp; invariant(!replState->aborted, replState->buildUUID.toString()); + uassertStatusOK(preAbortStatus.withContext( + str::stream() << "index build failed on this node but we received a " + "commitIndexBuild oplog entry from the primary with timestamp: " + << replState->commitTimestamp.toString())); } else if (replState->aborted) { log() << "Aborting index build: " << replState->buildUUID << ", timestamp: " << replState->abortTimestamp << ", reason: " << replState->abortReason - << ", collection UUID: " << replState->collectionUUID; + << ", collection UUID: " << replState->collectionUUID + << ", local index error (if any): " << preAbortStatus; invariant(!replState->isCommitReady, replState->buildUUID.toString()); } } diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h index 2183b1f1e5f..76ed8f74945 100644 --- a/src/mongo/db/index_builds_coordinator.h +++ b/src/mongo/db/index_builds_coordinator.h @@ -464,6 +464,9 @@ protected: /** * Waits for commit or abort signal from primary. + * 'preAbortStatus' holds any indexing errors from the prior phases during oplog application. + * If 'preAbortStatus' is not OK, we need to ensure that we get a abortIndexBuild oplog entry + * from the primary, not commitIndexBuild. * * On completion, this function returns a timestamp, which may be null, that may be used to * update the mdb catalog as we commit the index build. The commit index build timestamp is @@ -474,7 +477,8 @@ protected: */ Timestamp _waitForCommitOrAbort(OperationContext* opCtx, const NamespaceString& nss, - std::shared_ptr<ReplIndexBuildState> replState); + std::shared_ptr<ReplIndexBuildState> replState, + const Status& preAbortStatus); /** * Third phase is catching up on all the writes that occurred during the first two phases. |