summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCheahuychou Mao <cheahuychou.mao@mongodb.com>2019-12-03 15:51:38 +0000
committerevergreen <evergreen@mongodb.com>2019-12-03 15:51:38 +0000
commit966d9b880ad80b6d126f5bb4ad777312562cd93e (patch)
tree1b233ffe756892108717fe6543895c571d9b598f
parenta0831c75a97b935f30c2987a67a3ee6c92fab49c (diff)
downloadmongo-966d9b880ad80b6d126f5bb4ad777312562cd93e.tar.gz
SERVER-44775 Make pauseWhileSet increment _timesEntered once
-rw-r--r--jstests/fail_point/fail_point.js7
-rw-r--r--jstests/libs/fail_point_util.js4
-rw-r--r--jstests/noPassthrough/currentop_two_phase_coordinator_metrics.js10
-rw-r--r--jstests/noPassthrough/transaction_coordinator_curop_info.js8
-rw-r--r--jstests/sharding/track_unsharded_collections_rename_collection.js10
-rw-r--r--jstests/sharding/txn_two_phase_commit_failover.js15
-rw-r--r--src/mongo/db/commands/fail_point_cmd.cpp6
-rw-r--r--src/mongo/s/request_types/wait_for_fail_point.idl2
-rw-r--r--src/mongo/util/fail_point.h9
-rw-r--r--src/mongo/util/fail_point_test.cpp37
10 files changed, 73 insertions, 35 deletions
diff --git a/jstests/fail_point/fail_point.js b/jstests/fail_point/fail_point.js
index a4d1f0ce52c..ffb568eb90d 100644
--- a/jstests/fail_point/fail_point.js
+++ b/jstests/fail_point/fail_point.js
@@ -129,13 +129,10 @@ assert.commandWorked(testDB.adminCommand({
maxTimeMS: kDefaultWaitForFailPointTimeout
}));
-// Wait for a while before turning it off.
-sleep(200);
+// Turn off the fail point
configureFailPointRes =
assert.commandWorked(testDB.adminCommand({configureFailPoint: failPointName, mode: "off"}));
-
-// Check that waiting does not cause the count to be incremented.
-assert.eq(1, configureFailPointRes.count);
+assert.lte(1, configureFailPointRes.count);
joinHungWrite();
diff --git a/jstests/libs/fail_point_util.js b/jstests/libs/fail_point_util.js
index 3841562c96a..4f6443cc524 100644
--- a/jstests/libs/fail_point_util.js
+++ b/jstests/libs/fail_point_util.js
@@ -23,13 +23,13 @@ configureFailPoint = function(conn, failPointName, data = {}, failPointMode = "a
{configureFailPoint: failPointName, mode: failPointMode, data: data}))
.count,
wait:
- function(additionalTimes = 1, maxTimeMS = kDefaultWaitForFailPointTimeout) {
+ function(maxTimeMS = kDefaultWaitForFailPointTimeout) {
// Can only be called once because this function does not keep track of the
// number of times the fail point is entered between the time it returns
// and the next time it gets called.
assert.commandWorked(conn.adminCommand({
waitForFailPoint: failPointName,
- timesEntered: this.timesEntered + additionalTimes,
+ timesEntered: this.timesEntered + 1,
maxTimeMS: maxTimeMS
}));
},
diff --git a/jstests/noPassthrough/currentop_two_phase_coordinator_metrics.js b/jstests/noPassthrough/currentop_two_phase_coordinator_metrics.js
index b7927719312..f5cdc047a48 100644
--- a/jstests/noPassthrough/currentop_two_phase_coordinator_metrics.js
+++ b/jstests/noPassthrough/currentop_two_phase_coordinator_metrics.js
@@ -7,11 +7,17 @@
(function() {
'use strict';
load('jstests/libs/fail_point_util.js');
+load('jstests/sharding/libs/sharded_transactions_helpers.js'); // for waitForFailpoint
function curOpAfterFailpoint(failPoint, filter, timesEntered, curOpParams) {
- jsTest.log(`waiting for failpoint '${failPoint.failPointName}' to appear in the log ${
+ jsTest.log(`waiting for failpoint '${failPoint.failPointName}' to be entered ${
timesEntered} time(s).`);
- failPoint.wait(timesEntered);
+ if (timesEntered > 1) {
+ const expectedLog = "Hit " + failPoint.failPointName + " failpoint";
+ waitForFailpoint(expectedLog, timesEntered);
+ } else {
+ failPoint.wait();
+ }
jsTest.log(`Running curOp operation after '${failPoint.failPointName}' failpoint.`);
let result = adminDB.aggregate([{$currentOp: {}}, {$match: filter}]).toArray();
diff --git a/jstests/noPassthrough/transaction_coordinator_curop_info.js b/jstests/noPassthrough/transaction_coordinator_curop_info.js
index 748ea166889..2369285bc2a 100644
--- a/jstests/noPassthrough/transaction_coordinator_curop_info.js
+++ b/jstests/noPassthrough/transaction_coordinator_curop_info.js
@@ -7,6 +7,7 @@
(function() {
'use strict';
load('jstests/libs/fail_point_util.js');
+load('jstests/sharding/libs/sharded_transactions_helpers.js'); // for waitForFailpoint
function commitTxn(st, lsid, txnNumber, expectedError = null) {
let cmd = "db.adminCommand({" +
@@ -27,7 +28,12 @@ function commitTxn(st, lsid, txnNumber, expectedError = null) {
function curOpAfterFailpoint(failPoint, filter, timesEntered = 1) {
jsTest.log(`waiting for failpoint '${failPoint.failPointName}' to be entered ${
timesEntered} time(s).`);
- failPoint.wait(timesEntered);
+ if (timesEntered > 1) {
+ const expectedLog = "Hit " + failPoint.failPointName + " failpoint";
+ waitForFailpoint(expectedLog, timesEntered);
+ } else {
+ failPoint.wait();
+ }
jsTest.log(`Running curOp operation after '${failPoint.failPointName}' failpoint.`);
let result = adminDB.aggregate([{$currentOp: {}}, {$match: filter}]).toArray();
diff --git a/jstests/sharding/track_unsharded_collections_rename_collection.js b/jstests/sharding/track_unsharded_collections_rename_collection.js
index 0712f569a6e..6fd1fcf5864 100644
--- a/jstests/sharding/track_unsharded_collections_rename_collection.js
+++ b/jstests/sharding/track_unsharded_collections_rename_collection.js
@@ -10,6 +10,7 @@ TestData.skipCheckingUUIDsConsistentAcrossCluster = true;
'use strict';
load("jstests/libs/fail_point_util.js");
+load('jstests/sharding/libs/sharded_transactions_helpers.js'); // for waitForFailpoint
load("jstests/sharding/libs/track_unsharded_collections_helpers.js");
function runCollectionRenameWithConfigStepdownAtFailpointInShard(
@@ -18,15 +19,15 @@ function runCollectionRenameWithConfigStepdownAtFailpointInShard(
const configPrimary = st.configRS.getPrimary();
clearRawMongoProgramOutput();
- let failPoint = configureFailPoint(node, failpointName);
+ setFailpoint(failpointName, node);
const renameCode = `assert.commandWorked(db.getSiblingDB("${
dbName}").adminCommand({renameCollection: "${fromNs}", to: "${toNs}"}));`;
const awaitResult = startParallelShell(renameCode, st.s.port);
- failPoint.wait();
+ waitForFailpoint("Hit " + failpointName, 1);
assert.commandWorked(configPrimary.adminCommand({replSetStepDown: 1, force: true}));
- failPoint.wait(2);
- failPoint.off();
+ waitForFailpoint("Hit " + failpointName, 2);
+ unsetFailpoint(failpointName, node);
awaitResult();
}
@@ -37,7 +38,6 @@ function runCollectionRenameWithConfigStepdownAtFailpointInPrimaryConfigsvr(
const configPrimary = st.configRS.getPrimary();
clearRawMongoProgramOutput();
- setFailpoint(failpointName, configPrimary);
let failPoint = configureFailPoint(configPrimary, failpointName);
const renameCode = `assert.commandWorked(db.getSiblingDB("${
diff --git a/jstests/sharding/txn_two_phase_commit_failover.js b/jstests/sharding/txn_two_phase_commit_failover.js
index de2ac501a52..1e86c8a3cf3 100644
--- a/jstests/sharding/txn_two_phase_commit_failover.js
+++ b/jstests/sharding/txn_two_phase_commit_failover.js
@@ -13,7 +13,6 @@ TestData.skipCheckingUUIDsConsistentAcrossCluster = true;
(function() {
'use strict';
-load("jstests/libs/fail_point_util.js");
load('jstests/sharding/libs/sharded_transactions_helpers.js');
const dbName = "test";
@@ -132,9 +131,10 @@ const runTest = function(sameNodeStepsUpAfterFailover) {
}));
}
- let failPoint = configureFailPoint(coordPrimary, failpointData.failpoint, {}, {
- skip: (failpointData.skip ? failpointData.skip : 0)
- });
+ assert.commandWorked(coordPrimary.adminCommand({
+ configureFailPoint: failpointData.failpoint,
+ mode: {skip: (failpointData.skip ? failpointData.skip : 0)},
+ }));
// Run commitTransaction through a parallel shell.
let awaitResult;
@@ -152,12 +152,15 @@ const runTest = function(sameNodeStepsUpAfterFailover) {
numTimesShouldBeHit++;
}
- failPoint.wait(numTimesShouldBeHit);
+ waitForFailpoint("Hit " + failpointData.failpoint + " failpoint", numTimesShouldBeHit);
// Induce the coordinator primary to step down.
assert.commandWorked(
coordPrimary.adminCommand({replSetStepDown: stepDownSecs, force: true}));
- failPoint.off();
+ assert.commandWorked(coordPrimary.adminCommand({
+ configureFailPoint: failpointData.failpoint,
+ mode: "off",
+ }));
// The router should retry commitTransaction against the new primary.
awaitResult();
diff --git a/src/mongo/db/commands/fail_point_cmd.cpp b/src/mongo/db/commands/fail_point_cmd.cpp
index ffc7783b52d..f166169a805 100644
--- a/src/mongo/db/commands/fail_point_cmd.cpp
+++ b/src/mongo/db/commands/fail_point_cmd.cpp
@@ -111,6 +111,12 @@ public:
/**
* Command for waiting for installed fail points.
+ *
+ * For number of additional times entered > 1, this command is only guaranteed to work
+ * correctly if the code that enters the fail point uses the FailPoint API correctly.
+ * That is, the code can only use one of shouldFail, pauseWhileSet, scopedIf, scoped,
+ * executeIf, and execute to enter the fail point (as all of these functions have side
+ * effects on the counter for times entered).
*/
class WaitForFailPointCommand : public TypedCommand<WaitForFailPointCommand> {
public:
diff --git a/src/mongo/s/request_types/wait_for_fail_point.idl b/src/mongo/s/request_types/wait_for_fail_point.idl
index b3ef4891bd0..b0df9d27e78 100644
--- a/src/mongo/s/request_types/wait_for_fail_point.idl
+++ b/src/mongo/s/request_types/wait_for_fail_point.idl
@@ -43,7 +43,7 @@ commands:
fields:
timesEntered:
type: safeInt64
- description: "The number of times the fail point has been entered."
+ description: "The target number of times the fail point has been entered."
optional: false
maxTimeMS:
type: safeInt64
diff --git a/src/mongo/util/fail_point.h b/src/mongo/util/fail_point.h
index dd8bd8c9916..d0a0c4ea5d8 100644
--- a/src/mongo/util/fail_point.h
+++ b/src/mongo/util/fail_point.h
@@ -318,10 +318,12 @@ public:
/**
* Take 100msec pauses for as long as the FailPoint is active.
- * This uses `shouldFail()` and therefore affects FailPoint counters.
+ * This calls `shouldFail()` with kFirstTimeEntered once and with kEnteredAlready thereafter, so
+ * affects FailPoint counters once.
*/
void pauseWhileSet() {
- while (MONGO_unlikely(shouldFail(kEnteredAlready))) {
+ for (auto entryMode = kFirstTimeEntered; MONGO_unlikely(shouldFail(entryMode));
+ entryMode = kEnteredAlready) {
sleepmillis(100);
}
}
@@ -332,7 +334,8 @@ public:
* OperationContext).
*/
void pauseWhileSet(OperationContext* opCtx) {
- while (MONGO_unlikely(shouldFail(kEnteredAlready))) {
+ for (auto entryMode = kFirstTimeEntered; MONGO_unlikely(shouldFail(entryMode));
+ entryMode = kEnteredAlready) {
opCtx->sleepFor(Milliseconds(100));
}
}
diff --git a/src/mongo/util/fail_point_test.cpp b/src/mongo/util/fail_point_test.cpp
index efc7d56346e..7b9973a9825 100644
--- a/src/mongo/util/fail_point_test.cpp
+++ b/src/mongo/util/fail_point_test.cpp
@@ -432,28 +432,45 @@ TEST(FailPoint, FailPointBlockIfBasicTest) {
namespace mongo {
-TEST(FailPoint, WaitForFailPointTimeout) {
- FailPoint failPoint;
- failPoint.setMode(FailPoint::alwaysOn);
-
+/**
+ * Runs the given function with an operation context that has a deadline and asserts that
+ * the function is interruptable.
+ */
+void assertFunctionInterruptable(std::function<void(OperationContext* opCtx)> f) {
const auto service = ServiceContext::make();
const std::shared_ptr<ClockSourceMock> mockClock = std::make_shared<ClockSourceMock>();
service->setFastClockSource(std::make_unique<SharedClockSourceAdapter>(mockClock));
service->setPreciseClockSource(std::make_unique<SharedClockSourceAdapter>(mockClock));
service->setTickSource(std::make_unique<TickSourceMock<>>());
- const auto client = service->makeClient("WaitForFailPointTest");
+ const auto client = service->makeClient("FailPointTest");
auto opCtx = client->makeOperationContext();
opCtx->setDeadlineAfterNowBy(Milliseconds{999}, ErrorCodes::ExceededTimeLimit);
- stdx::thread waitForFailPoint([&] {
- ASSERT_THROWS_CODE(failPoint.waitForTimesEntered(opCtx.get(), 1),
- AssertionException,
- ErrorCodes::ExceededTimeLimit);
+ stdx::thread th([&] {
+ ASSERT_THROWS_CODE(f(opCtx.get()), AssertionException, ErrorCodes::ExceededTimeLimit);
});
mockClock->advance(Milliseconds{1000});
- waitForFailPoint.join();
+ th.join();
+}
+
+TEST(FailPoint, PauseWhileSetInterruptibility) {
+ FailPoint failPoint;
+ failPoint.setMode(FailPoint::alwaysOn);
+
+ assertFunctionInterruptable(
+ [&failPoint](OperationContext* opCtx) { failPoint.pauseWhileSet(opCtx); });
+
+ failPoint.setMode(FailPoint::off);
+}
+
+TEST(FailPoint, WaitForFailPointTimeout) {
+ FailPoint failPoint;
+ failPoint.setMode(FailPoint::alwaysOn);
+
+ assertFunctionInterruptable(
+ [&failPoint](OperationContext* opCtx) { failPoint.waitForTimesEntered(opCtx, 1); });
failPoint.setMode(FailPoint::off);
}