summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVesselina Ratcheva <vesselina.ratcheva@10gen.com>2020-05-11 19:58:09 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-05-13 02:28:34 +0000
commitd7d6a14d30d8cd5b9ecc9d0bb74a959ab9349b61 (patch)
tree2f478a68dc530755a7ebde618474a7fc9aa6b948
parent6ed3c7611c6fdf992360c12765773f94fd903297 (diff)
downloadmongo-d7d6a14d30d8cd5b9ecc9d0bb74a959ab9349b61.tar.gz
SERVER-46357 Make it clear in currentOp when an automatic reconfig is happening
-rw-r--r--jstests/replsets/currentOp_during_automatic_reconfig.js96
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp27
2 files changed, 123 insertions, 0 deletions
diff --git a/jstests/replsets/currentOp_during_automatic_reconfig.js b/jstests/replsets/currentOp_during_automatic_reconfig.js
new file mode 100644
index 00000000000..846ab3d0f50
--- /dev/null
+++ b/jstests/replsets/currentOp_during_automatic_reconfig.js
@@ -0,0 +1,96 @@
+/**
+ * Tests that currentOp displays information about in-progress automatic reconfigs.
+ *
+ * @tags: [
+ * requires_fcv_46,
+ * ]
+ */
+
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+load('jstests/replsets/rslib.js');
+
+const testName = jsTestName();
+const dbName = "testdb";
+const collName = "testcoll";
+
+const rst = new ReplSetTest({
+ name: testName,
+ nodes: [{}],
+ nodeOptions: {setParameter: {enableAutomaticReconfig: true}},
+ settings: {chainingAllowed: false},
+ useBridge: true
+});
+rst.startSet();
+rst.initiateWithHighElectionTimeout();
+
+const primary = rst.getPrimary();
+const primaryDb = primary.getDB(dbName);
+const primaryColl = primaryDb.getCollection(collName);
+
+// TODO (SERVER-46808): Move this into ReplSetTest.initiate
+waitForNewlyAddedRemovalForNodeToBeCommitted(primary, 0);
+waitForConfigReplication(primary, rst.nodes);
+
+assert.commandWorked(primaryColl.insert({"starting": "doc"}));
+
+jsTestLog("Adding a new node to the replica set");
+const secondary = rst.add({
+ rsConfig: {priority: 0},
+ setParameter: {
+ 'failpoint.initialSyncHangBeforeFinish': tojson({mode: 'alwaysOn'}),
+ 'numInitialSyncAttempts': 1,
+ 'enableAutomaticReconfig': true,
+ }
+});
+rst.reInitiate();
+assert.commandWorked(secondary.adminCommand({
+ waitForFailPoint: "initialSyncHangBeforeFinish",
+ timesEntered: 1,
+ maxTimeMS: kDefaultWaitForFailPointTimeout
+}));
+
+jsTestLog("Checking that the 'newlyAdded' field is set on the new node");
+assert(isMemberNewlyAdded(primary, 1));
+
+jsTestLog("Allowing primary to initiate the 'newlyAdded' field removal");
+let hangDuringAutomaticReconfigFP = configureFailPoint(primaryDb, "hangDuringAutomaticReconfig");
+assert.commandWorked(
+ secondary.adminCommand({configureFailPoint: "initialSyncHangBeforeFinish", mode: "off"}));
+rst.waitForState(secondary, ReplSetTest.State.SECONDARY);
+
+hangDuringAutomaticReconfigFP.wait();
+
+jsTestLog("Looking for the automatic reconfig in the currentOp output");
+const curOpRes = assert.commandWorked(primaryDb.adminCommand({currentOp: 1}));
+
+const ops = curOpRes.inprog;
+let found = false;
+for (let i = 0; i < ops.length; i++) {
+ let op = ops[i];
+ assert(op.hasOwnProperty("command"), op);
+ const commandField = op["command"];
+ if (commandField.hasOwnProperty("replSetReconfig")) {
+ if (commandField["replSetReconfig"] === "automatic") {
+ assert(commandField.hasOwnProperty("configVersionAndTerm"));
+ assert(commandField.hasOwnProperty("memberId"), op);
+ assert.eq(1, commandField["memberId"], op);
+
+ assert(op.hasOwnProperty("desc"), op);
+ assert(op["desc"].startsWith("ReplCoord")); // client name
+
+ jsTestLog("Found automatic reconfig: " + tojson(op));
+ found = true;
+ break;
+ }
+ }
+}
+
+assert(found, ops);
+
+hangDuringAutomaticReconfigFP.off();
+waitForNewlyAddedRemovalForNodeToBeCommitted(primary, 1);
+rst.stopSet();
+})(); \ No newline at end of file
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index bcae984ce91..c887ef0c442 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -55,6 +55,7 @@
#include "mongo/db/commands/test_commands_enabled.h"
#include "mongo/db/concurrency/d_concurrency.h"
#include "mongo/db/concurrency/replication_state_transition_lock_guard.h"
+#include "mongo/db/curop.h"
#include "mongo/db/curop_failpoint_helpers.h"
#include "mongo/db/dbdirectclient.h"
#include "mongo/db/index/index_descriptor.h"
@@ -127,6 +128,8 @@ MONGO_FAIL_POINT_DEFINE(omitConfigQuorumCheck);
// Will cause signal drain complete to hang after reconfig
MONGO_FAIL_POINT_DEFINE(hangAfterReconfigOnDrainComplete);
MONGO_FAIL_POINT_DEFINE(doNotRemoveNewlyAddedOnHeartbeats);
+// Will hang right after setting the currentOp info associated with an automatic reconfig.
+MONGO_FAIL_POINT_DEFINE(hangDuringAutomaticReconfig);
// Number of times we tried to go live as a secondary.
Counter64 attemptsToBecomeSecondary;
@@ -3716,6 +3719,30 @@ void ReplicationCoordinatorImpl::_reconfigToRemoveNewlyAddedField(
};
auto opCtx = cc().makeOperationContext();
+
+ // Set info for currentOp to display if called while this is still running.
+ {
+ stdx::unique_lock<Client> lk(*opCtx->getClient());
+ auto curOp = CurOp::get(opCtx.get());
+ curOp->setLogicalOp_inlock(LogicalOp::opCommand);
+ BSONObjBuilder bob;
+ bob.append("replSetReconfig", "automatic");
+ bob.append("memberId", memberId.getData());
+ bob.append("configVersionAndTerm", versionAndTerm.toString());
+ bob.append("info",
+ "An automatic reconfig. Used to remove a 'newlyAdded' config field for a "
+ "replica set member.");
+ curOp->setOpDescription_inlock(bob.obj());
+ curOp->setNS_inlock("local.system.replset");
+ curOp->ensureStarted();
+ }
+
+ if (MONGO_unlikely(hangDuringAutomaticReconfig.shouldFail())) {
+ LOGV2(4635700,
+ "Failpoint 'hangDuringAutomaticReconfig' enabled. Blocking until it is disabled.");
+ hangDuringAutomaticReconfig.pauseWhileSet();
+ }
+
auto status = doReplSetReconfig(opCtx.get(), getNewConfig, false /* force */);
if (!status.isOK()) {