From df29113ac3419a3acb4736940a0ac05b779f82fb Mon Sep 17 00:00:00 2001 From: Gregory Wlodarek Date: Tue, 10 Jan 2023 03:24:34 +0000 Subject: SERVER-71769 Add a new log message when a secondary node is skipping a two-phase index build (cherry picked from commit fe2d44ca11588c665d17a164ac2c9aed2c24adfb) --- jstests/noPassthrough/libs/index_build.js | 27 +++++++ jstests/noPassthrough/rolling_index_builds.js | 43 +++-------- .../rolling_index_builds_interrupted.js | 87 ++++++++++++++++++++++ src/mongo/db/index_builds_coordinator.cpp | 21 +++++- 4 files changed, 144 insertions(+), 34 deletions(-) create mode 100644 jstests/noPassthrough/rolling_index_builds_interrupted.js diff --git a/jstests/noPassthrough/libs/index_build.js b/jstests/noPassthrough/libs/index_build.js index 113a8478cc0..fb76ca24e44 100644 --- a/jstests/noPassthrough/libs/index_build.js +++ b/jstests/noPassthrough/libs/index_build.js @@ -199,6 +199,33 @@ var IndexBuildTest = class { conn.adminCommand({configureFailPoint: 'hangAfterStartingIndexBuild', mode: 'off'})); } + /** + * Restarts the node in standalone mode to build the index in a rolling fashion. + */ + static buildIndexOnNodeAsStandalone(rst, node, port, dbName, collName, indexSpec, indexName) { + jsTestLog('Restarting as standalone: ' + node.host); + rst.stop(node, /*signal=*/null, /*opts=*/null, {forRestart: true, waitpid: true}); + const standalone = MongoRunner.runMongod({ + restart: true, + dbpath: node.dbpath, + port: port, + setParameter: { + disableLogicalSessionCacheRefresh: true, + ttlMonitorEnabled: false, + }, + }); + + jsTestLog('Building index on standalone: ' + standalone.host); + const standaloneDB = standalone.getDB(dbName); + const standaloneColl = standaloneDB.getCollection(collName); + assert.commandWorked(standaloneColl.createIndex(indexSpec, {name: indexName})); + + jsTestLog('Restarting as replica set node: ' + node.host); + MongoRunner.stopMongod(standalone); + rst.restart(node); + rst.awaitReplication(); + } + /** * Returns true if two phase index builds are supported. */ diff --git a/jstests/noPassthrough/rolling_index_builds.js b/jstests/noPassthrough/rolling_index_builds.js index 9f2fa5847cf..cd136ebb116 100644 --- a/jstests/noPassthrough/rolling_index_builds.js +++ b/jstests/noPassthrough/rolling_index_builds.js @@ -12,6 +12,8 @@ (function() { 'use strict'; +load('jstests/noPassthrough/libs/index_build.js'); + // Set up replica set const replTest = new ReplSetTest({nodes: 3}); @@ -53,48 +55,23 @@ assert.eq(nodes.length - 1, const standalonePort = allocatePort(); jsTestLog('Standalone server will listen on port: ' + standalonePort); -function buildIndexOnNodeAsStandalone(node) { - jsTestLog('A. Restarting as standalone: ' + node.host); - replTest.stop(node, /*signal=*/null, /*opts=*/null, {forRestart: true, waitpid: true}); - const standalone = MongoRunner.runMongod({ - restart: true, - dbpath: node.dbpath, - port: standalonePort, - setParameter: { - disableLogicalSessionCacheRefresh: true, - ttlMonitorEnabled: false, - }, - }); - if (jsTestOptions().keyFile) { - assert(jsTest.authenticate(standalone), - 'Failed authentication during restart: ' + standalone.host); - } - - jsTestLog('B. Building index on standalone: ' + standalone.host); - const standaloneDB = standalone.getDB(dbName); - const standaloneColl = standaloneDB.getCollection(collName); - assert.commandWorked(standaloneColl.createIndex({b: 1}, {name: 'rolling_index_b_1'})); - - jsTestLog('C. Restarting as replica set node: ' + node.host); - MongoRunner.stopMongod(standalone); - replTest.restart(node); - replTest.awaitReplication(); -} - -buildIndexOnNodeAsStandalone(secondaries[0]); +IndexBuildTest.buildIndexOnNodeAsStandalone( + replTest, secondaries[0], standalonePort, dbName, collName, {b: 1}, 'rolling_index_b_1'); -jsTestLog('D. Repeat the procedure for the remaining secondary: ' + secondaries[1].host); -buildIndexOnNodeAsStandalone(secondaries[1]); +jsTestLog('Repeat the procedure for the remaining secondary: ' + secondaries[1].host); +IndexBuildTest.buildIndexOnNodeAsStandalone( + replTest, secondaries[1], standalonePort, dbName, collName, {b: 1}, 'rolling_index_b_1'); replTest.awaitNodesAgreeOnPrimary( replTest.kDefaultTimeoutMS, replTest.nodes, replTest.getNodeId(primary)); -jsTestLog('E. Build index on the primary: ' + primary.host); +jsTestLog('Build index on the primary: ' + primary.host); assert.commandWorked(primaryDB.adminCommand({replSetStepDown: 60})); const newPrimary = replTest.getPrimary(); jsTestLog('Stepped down primary for index build: ' + primary.host + '. New primary elected: ' + newPrimary.host); -buildIndexOnNodeAsStandalone(primary); +IndexBuildTest.buildIndexOnNodeAsStandalone( + replTest, primary, standalonePort, dbName, collName, {b: 1}, 'rolling_index_b_1'); // Ensure we can create an index after doing a rolling index build. let newPrimaryDB = newPrimary.getDB(dbName); diff --git a/jstests/noPassthrough/rolling_index_builds_interrupted.js b/jstests/noPassthrough/rolling_index_builds_interrupted.js new file mode 100644 index 00000000000..d98bc5cdeb9 --- /dev/null +++ b/jstests/noPassthrough/rolling_index_builds_interrupted.js @@ -0,0 +1,87 @@ +/** + * Builds an index on a subset of nodes in a rolling fashion. Tests that building the same index + * with a primary that doesn't have the index logs a message on the secondaries that the index build + * commit quorum may not be achieved. + * + * @tags: [ + * requires_persistence, + * requires_replication, + * ] + */ +(function() { +'use strict'; + +load('jstests/noPassthrough/libs/index_build.js'); + +const replTest = new ReplSetTest({nodes: 3}); +const nodes = replTest.startSet(); +replTest.initiate(); + +const dbName = 'test'; +const collName = 't'; + +let primary = replTest.getPrimary(); +let primaryDB = primary.getDB(dbName); +let primaryColl = primaryDB.getCollection(collName); + +// Populate collection to avoid empty collection optimization. +const numDocs = 100; +for (let i = 0; i < numDocs; i++) { + assert.commandWorked(primaryColl.insert({x: i})); +} + +// Make sure the documents make it to the secondaries. +replTest.awaitReplication(); + +const secondaries = replTest.getSecondaries(); +assert.eq(nodes.length - 1, + secondaries.length, + 'unexpected number of secondaries: ' + tojson(secondaries)); + +const standalonePort = allocatePort(); +jsTestLog('Standalone server will listen on port: ' + standalonePort); + +// Build the index on the secondaries only. +IndexBuildTest.buildIndexOnNodeAsStandalone( + replTest, secondaries[0], standalonePort, dbName, collName, {x: 1}, 'x_1'); +IndexBuildTest.buildIndexOnNodeAsStandalone( + replTest, secondaries[1], standalonePort, dbName, collName, {x: 1}, 'x_1'); + +replTest.awaitNodesAgreeOnPrimary( + replTest.kDefaultTimeoutMS, replTest.nodes, replTest.getNodeId(primary)); + +// TODO(SERVER-71768): fix the index build stall. +jsTestLog('Build index on the primary as part of the replica set: ' + primary.host); +let createIdx = IndexBuildTest.startIndexBuild( + primary, primaryColl.getFullName(), {x: 1}, {name: 'x_1'}, [ErrorCodes.Interrupted]); + +// When the index build starts, find its op id. This will be the op id of the client connection, not +// the thread pool task managed by IndexBuildsCoordinatorMongod. +const filter = { + "desc": {$regex: /conn.*/} +}; +let opId = IndexBuildTest.waitForIndexBuildToStart(primaryDB, primaryColl.getName(), 'x_1', filter); + +checkLog.containsJson(secondaries[0], 7176900); +checkLog.containsJson(secondaries[1], 7176900); +clearRawMongoProgramOutput(); + +assert.commandWorked(primaryDB.killOp(opId)); +createIdx(); + +// Test building multiple indexes, some of which exist on the secondary. +createIdx = IndexBuildTest.startIndexBuild( + primary, primaryColl.getFullName(), [{x: 1}, {y: 1}], {}, [ErrorCodes.Interrupted]); + +checkLog.containsJson(secondaries[0], 7176900); +checkLog.containsJson(secondaries[1], 7176900); + +opId = IndexBuildTest.waitForIndexBuildToStart(primaryDB, primaryColl.getName(), 'x_1', filter); +assert.commandWorked(primaryDB.killOp(opId)); + +createIdx(); + +// TODO(SERVER-71768): Check dbHash. +TestData.skipCheckDBHashes = true; +replTest.stopSet(); +}()); diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index bb6822dac18..23e7fcd26ef 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -2719,15 +2719,34 @@ std::vector IndexBuildsCoordinator::prepareSpecListForCreate( // During secondary oplog application, the index specs have already been normalized in the // oplog entries read from the primary. We should not be modifying the specs any further. + auto indexCatalog = collection->getIndexCatalog(); auto replCoord = repl::ReplicationCoordinator::get(opCtx); if (replCoord->getSettings().usingReplSets() && !replCoord->canAcceptWritesFor(opCtx, nss)) { + // A secondary node with a subset of the indexes already built will not vote for the commit + // quorum, which can stall the index build indefinitely on a replica set. + try { + auto specsToBuild = indexCatalog->removeExistingIndexes( + opCtx, indexSpecs, /*removeIndexBuildsToo=*/true); + if (indexSpecs.size() != specsToBuild.size()) { + LOGV2_WARNING( + 7176900, + "Secondary node already has a subset of indexes built and will not " + "participate in voting towards the commit quorum. Use the " + "'setIndexCommitQuorum' command to adjust the commit quorum accordingly", + logAttrs(nss), + logAttrs(collection->uuid()), + "requestedSpecs"_attr = indexSpecs, + "specsToBuild"_attr = specsToBuild); + } + } catch (const AssertionException&) { + // Skip check. + } return indexSpecs; } auto specsWithCollationDefaults = uassertStatusOK(collection->addCollationDefaultsToIndexSpecsForCreate(opCtx, indexSpecs)); - auto indexCatalog = collection->getIndexCatalog(); std::vector resultSpecs; resultSpecs = indexCatalog->removeExistingIndexes( -- cgit v1.2.1