summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenety Goh <benety@mongodb.com>2019-11-09 23:29:19 +0000
committerevergreen <evergreen@mongodb.com>2019-11-09 23:29:19 +0000
commita5b55ef3f97e3fef647d9b17ef3109144bdf8477 (patch)
treef2432895beec36bd5e3310d74bea9f7f515ac602
parent6096c0e308c8ccc98c61636f924ffed4689efd12 (diff)
downloadmongo-a5b55ef3f97e3fef647d9b17ef3109144bdf8477.tar.gz
SERVER-44393 two phase index build crashes on commitIndexBuild while waiting for abort due to prior indexing errors
-rw-r--r--jstests/noPassthrough/index_secondary_commit_after_scan_error.js14
-rw-r--r--src/mongo/db/index_builds_coordinator.cpp12
-rw-r--r--src/mongo/db/index_builds_coordinator.h6
3 files changed, 20 insertions, 12 deletions
diff --git a/jstests/noPassthrough/index_secondary_commit_after_scan_error.js b/jstests/noPassthrough/index_secondary_commit_after_scan_error.js
index f7ba33edcc1..55eb378db4e 100644
--- a/jstests/noPassthrough/index_secondary_commit_after_scan_error.js
+++ b/jstests/noPassthrough/index_secondary_commit_after_scan_error.js
@@ -75,21 +75,19 @@ IndexBuildTest.waitForIndexBuildToStop(testDB);
const exitCode = createIdx();
assert.eq(0, exitCode, 'expected shell to exit successfully');
-// Confirm that the index build on the secondary failed because of the invalid document.
-checkLog.contains(secondary, 'background index build aborted due to failpoint');
+// Secondary should crash on receiving the unexpected commitIndexBuild oplog entry.
+const fassertProcessExitCode = _isWindows() ? MongoRunner.EXIT_ABRUPT : MongoRunner.EXIT_ABORT;
+rst.stop(secondary, undefined, {allowedExitCode: fassertProcessExitCode});
+assert(rawMongoProgramOutput().match('Fatal assertion 51101 OperationFailed: Index build:'),
+ 'Index build should have aborted secondary due to unexpected commitIndexBuild oplog entry.');
// Check indexes on primary.
-rst.awaitReplication();
IndexBuildTest.assertIndexes(coll, 2, ['_id_', 'a_1']);
-// Check that indexes were created on the secondary in spite of the scanning error.
-const secondaryColl = secondaryDB.getCollection(coll.getName());
-IndexBuildTest.assertIndexes(secondaryColl, 2, ['_id_', 'a_1']);
-
const cmdNs = testDB.getCollection('$cmd').getFullName();
const ops = rst.dumpOplog(primary, {op: 'c', ns: cmdNs, 'o.commitIndexBuild': coll.getName()});
assert.eq(1, ops.length, 'primary did not write commitIndexBuild oplog entry: ' + tojson(ops));
TestData.skipCheckDBHashes = true;
-rst.stopSet(undefined, undefined, {skipValidation: true});
+rst.stopSet();
})();
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index 9e9977f772b..a3e8b228134 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -1301,7 +1301,7 @@ void IndexBuildsCoordinator::_buildIndexTwoPhase(
preAbortStatus = ex.toStatus();
}
- auto commitIndexBuildTimestamp = _waitForCommitOrAbort(opCtx, nss, replState);
+ auto commitIndexBuildTimestamp = _waitForCommitOrAbort(opCtx, nss, replState, preAbortStatus);
_insertKeysFromSideTablesAndCommit(opCtx,
dbAndUUID,
replState,
@@ -1415,7 +1415,8 @@ NamespaceString IndexBuildsCoordinator::_insertKeysFromSideTablesWithoutBlocking
Timestamp IndexBuildsCoordinator::_waitForCommitOrAbort(
OperationContext* opCtx,
const NamespaceString& nss,
- std::shared_ptr<ReplIndexBuildState> replState) {
+ std::shared_ptr<ReplIndexBuildState> replState,
+ const Status& preAbortStatus) {
Timestamp commitIndexBuildTimestamp;
if (shouldWaitForCommitOrAbort(opCtx, nss, *replState)) {
log() << "Index build waiting for commit or abort before completing final phase: "
@@ -1439,11 +1440,16 @@ Timestamp IndexBuildsCoordinator::_waitForCommitOrAbort(
<< ", collection UUID: " << replState->collectionUUID;
commitIndexBuildTimestamp = replState->commitTimestamp;
invariant(!replState->aborted, replState->buildUUID.toString());
+ uassertStatusOK(preAbortStatus.withContext(
+ str::stream() << "index build failed on this node but we received a "
+ "commitIndexBuild oplog entry from the primary with timestamp: "
+ << replState->commitTimestamp.toString()));
} else if (replState->aborted) {
log() << "Aborting index build: " << replState->buildUUID
<< ", timestamp: " << replState->abortTimestamp
<< ", reason: " << replState->abortReason
- << ", collection UUID: " << replState->collectionUUID;
+ << ", collection UUID: " << replState->collectionUUID
+ << ", local index error (if any): " << preAbortStatus;
invariant(!replState->isCommitReady, replState->buildUUID.toString());
}
}
diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h
index 2183b1f1e5f..76ed8f74945 100644
--- a/src/mongo/db/index_builds_coordinator.h
+++ b/src/mongo/db/index_builds_coordinator.h
@@ -464,6 +464,9 @@ protected:
/**
* Waits for commit or abort signal from primary.
+ * 'preAbortStatus' holds any indexing errors from the prior phases during oplog application.
+ * If 'preAbortStatus' is not OK, we need to ensure that we get a abortIndexBuild oplog entry
+ * from the primary, not commitIndexBuild.
*
* On completion, this function returns a timestamp, which may be null, that may be used to
* update the mdb catalog as we commit the index build. The commit index build timestamp is
@@ -474,7 +477,8 @@ protected:
*/
Timestamp _waitForCommitOrAbort(OperationContext* opCtx,
const NamespaceString& nss,
- std::shared_ptr<ReplIndexBuildState> replState);
+ std::shared_ptr<ReplIndexBuildState> replState,
+ const Status& preAbortStatus);
/**
* Third phase is catching up on all the writes that occurred during the first two phases.