diff options
author | Cheahuychou Mao <cheahuychou.mao@mongodb.com> | 2019-12-03 15:51:38 +0000 |
---|---|---|
committer | evergreen <evergreen@mongodb.com> | 2019-12-03 15:51:38 +0000 |
commit | 966d9b880ad80b6d126f5bb4ad777312562cd93e (patch) | |
tree | 1b233ffe756892108717fe6543895c571d9b598f | |
parent | a0831c75a97b935f30c2987a67a3ee6c92fab49c (diff) | |
download | mongo-966d9b880ad80b6d126f5bb4ad777312562cd93e.tar.gz |
SERVER-44775 Make pauseWhileSet increment _timesEntered once
-rw-r--r-- | jstests/fail_point/fail_point.js | 7 | ||||
-rw-r--r-- | jstests/libs/fail_point_util.js | 4 | ||||
-rw-r--r-- | jstests/noPassthrough/currentop_two_phase_coordinator_metrics.js | 10 | ||||
-rw-r--r-- | jstests/noPassthrough/transaction_coordinator_curop_info.js | 8 | ||||
-rw-r--r-- | jstests/sharding/track_unsharded_collections_rename_collection.js | 10 | ||||
-rw-r--r-- | jstests/sharding/txn_two_phase_commit_failover.js | 15 | ||||
-rw-r--r-- | src/mongo/db/commands/fail_point_cmd.cpp | 6 | ||||
-rw-r--r-- | src/mongo/s/request_types/wait_for_fail_point.idl | 2 | ||||
-rw-r--r-- | src/mongo/util/fail_point.h | 9 | ||||
-rw-r--r-- | src/mongo/util/fail_point_test.cpp | 37 |
10 files changed, 73 insertions, 35 deletions
diff --git a/jstests/fail_point/fail_point.js b/jstests/fail_point/fail_point.js index a4d1f0ce52c..ffb568eb90d 100644 --- a/jstests/fail_point/fail_point.js +++ b/jstests/fail_point/fail_point.js @@ -129,13 +129,10 @@ assert.commandWorked(testDB.adminCommand({ maxTimeMS: kDefaultWaitForFailPointTimeout })); -// Wait for a while before turning it off. -sleep(200); +// Turn off the fail point configureFailPointRes = assert.commandWorked(testDB.adminCommand({configureFailPoint: failPointName, mode: "off"})); - -// Check that waiting does not cause the count to be incremented. -assert.eq(1, configureFailPointRes.count); +assert.lte(1, configureFailPointRes.count); joinHungWrite(); diff --git a/jstests/libs/fail_point_util.js b/jstests/libs/fail_point_util.js index 3841562c96a..4f6443cc524 100644 --- a/jstests/libs/fail_point_util.js +++ b/jstests/libs/fail_point_util.js @@ -23,13 +23,13 @@ configureFailPoint = function(conn, failPointName, data = {}, failPointMode = "a {configureFailPoint: failPointName, mode: failPointMode, data: data})) .count, wait: - function(additionalTimes = 1, maxTimeMS = kDefaultWaitForFailPointTimeout) { + function(maxTimeMS = kDefaultWaitForFailPointTimeout) { // Can only be called once because this function does not keep track of the // number of times the fail point is entered between the time it returns // and the next time it gets called. assert.commandWorked(conn.adminCommand({ waitForFailPoint: failPointName, - timesEntered: this.timesEntered + additionalTimes, + timesEntered: this.timesEntered + 1, maxTimeMS: maxTimeMS })); }, diff --git a/jstests/noPassthrough/currentop_two_phase_coordinator_metrics.js b/jstests/noPassthrough/currentop_two_phase_coordinator_metrics.js index b7927719312..f5cdc047a48 100644 --- a/jstests/noPassthrough/currentop_two_phase_coordinator_metrics.js +++ b/jstests/noPassthrough/currentop_two_phase_coordinator_metrics.js @@ -7,11 +7,17 @@ (function() { 'use strict'; load('jstests/libs/fail_point_util.js'); +load('jstests/sharding/libs/sharded_transactions_helpers.js'); // for waitForFailpoint function curOpAfterFailpoint(failPoint, filter, timesEntered, curOpParams) { - jsTest.log(`waiting for failpoint '${failPoint.failPointName}' to appear in the log ${ + jsTest.log(`waiting for failpoint '${failPoint.failPointName}' to be entered ${ timesEntered} time(s).`); - failPoint.wait(timesEntered); + if (timesEntered > 1) { + const expectedLog = "Hit " + failPoint.failPointName + " failpoint"; + waitForFailpoint(expectedLog, timesEntered); + } else { + failPoint.wait(); + } jsTest.log(`Running curOp operation after '${failPoint.failPointName}' failpoint.`); let result = adminDB.aggregate([{$currentOp: {}}, {$match: filter}]).toArray(); diff --git a/jstests/noPassthrough/transaction_coordinator_curop_info.js b/jstests/noPassthrough/transaction_coordinator_curop_info.js index 748ea166889..2369285bc2a 100644 --- a/jstests/noPassthrough/transaction_coordinator_curop_info.js +++ b/jstests/noPassthrough/transaction_coordinator_curop_info.js @@ -7,6 +7,7 @@ (function() { 'use strict'; load('jstests/libs/fail_point_util.js'); +load('jstests/sharding/libs/sharded_transactions_helpers.js'); // for waitForFailpoint function commitTxn(st, lsid, txnNumber, expectedError = null) { let cmd = "db.adminCommand({" + @@ -27,7 +28,12 @@ function commitTxn(st, lsid, txnNumber, expectedError = null) { function curOpAfterFailpoint(failPoint, filter, timesEntered = 1) { jsTest.log(`waiting for failpoint '${failPoint.failPointName}' to be entered ${ timesEntered} time(s).`); - failPoint.wait(timesEntered); + if (timesEntered > 1) { + const expectedLog = "Hit " + failPoint.failPointName + " failpoint"; + waitForFailpoint(expectedLog, timesEntered); + } else { + failPoint.wait(); + } jsTest.log(`Running curOp operation after '${failPoint.failPointName}' failpoint.`); let result = adminDB.aggregate([{$currentOp: {}}, {$match: filter}]).toArray(); diff --git a/jstests/sharding/track_unsharded_collections_rename_collection.js b/jstests/sharding/track_unsharded_collections_rename_collection.js index 0712f569a6e..6fd1fcf5864 100644 --- a/jstests/sharding/track_unsharded_collections_rename_collection.js +++ b/jstests/sharding/track_unsharded_collections_rename_collection.js @@ -10,6 +10,7 @@ TestData.skipCheckingUUIDsConsistentAcrossCluster = true; 'use strict'; load("jstests/libs/fail_point_util.js"); +load('jstests/sharding/libs/sharded_transactions_helpers.js'); // for waitForFailpoint load("jstests/sharding/libs/track_unsharded_collections_helpers.js"); function runCollectionRenameWithConfigStepdownAtFailpointInShard( @@ -18,15 +19,15 @@ function runCollectionRenameWithConfigStepdownAtFailpointInShard( const configPrimary = st.configRS.getPrimary(); clearRawMongoProgramOutput(); - let failPoint = configureFailPoint(node, failpointName); + setFailpoint(failpointName, node); const renameCode = `assert.commandWorked(db.getSiblingDB("${ dbName}").adminCommand({renameCollection: "${fromNs}", to: "${toNs}"}));`; const awaitResult = startParallelShell(renameCode, st.s.port); - failPoint.wait(); + waitForFailpoint("Hit " + failpointName, 1); assert.commandWorked(configPrimary.adminCommand({replSetStepDown: 1, force: true})); - failPoint.wait(2); - failPoint.off(); + waitForFailpoint("Hit " + failpointName, 2); + unsetFailpoint(failpointName, node); awaitResult(); } @@ -37,7 +38,6 @@ function runCollectionRenameWithConfigStepdownAtFailpointInPrimaryConfigsvr( const configPrimary = st.configRS.getPrimary(); clearRawMongoProgramOutput(); - setFailpoint(failpointName, configPrimary); let failPoint = configureFailPoint(configPrimary, failpointName); const renameCode = `assert.commandWorked(db.getSiblingDB("${ diff --git a/jstests/sharding/txn_two_phase_commit_failover.js b/jstests/sharding/txn_two_phase_commit_failover.js index de2ac501a52..1e86c8a3cf3 100644 --- a/jstests/sharding/txn_two_phase_commit_failover.js +++ b/jstests/sharding/txn_two_phase_commit_failover.js @@ -13,7 +13,6 @@ TestData.skipCheckingUUIDsConsistentAcrossCluster = true; (function() { 'use strict'; -load("jstests/libs/fail_point_util.js"); load('jstests/sharding/libs/sharded_transactions_helpers.js'); const dbName = "test"; @@ -132,9 +131,10 @@ const runTest = function(sameNodeStepsUpAfterFailover) { })); } - let failPoint = configureFailPoint(coordPrimary, failpointData.failpoint, {}, { - skip: (failpointData.skip ? failpointData.skip : 0) - }); + assert.commandWorked(coordPrimary.adminCommand({ + configureFailPoint: failpointData.failpoint, + mode: {skip: (failpointData.skip ? failpointData.skip : 0)}, + })); // Run commitTransaction through a parallel shell. let awaitResult; @@ -152,12 +152,15 @@ const runTest = function(sameNodeStepsUpAfterFailover) { numTimesShouldBeHit++; } - failPoint.wait(numTimesShouldBeHit); + waitForFailpoint("Hit " + failpointData.failpoint + " failpoint", numTimesShouldBeHit); // Induce the coordinator primary to step down. assert.commandWorked( coordPrimary.adminCommand({replSetStepDown: stepDownSecs, force: true})); - failPoint.off(); + assert.commandWorked(coordPrimary.adminCommand({ + configureFailPoint: failpointData.failpoint, + mode: "off", + })); // The router should retry commitTransaction against the new primary. awaitResult(); diff --git a/src/mongo/db/commands/fail_point_cmd.cpp b/src/mongo/db/commands/fail_point_cmd.cpp index ffc7783b52d..f166169a805 100644 --- a/src/mongo/db/commands/fail_point_cmd.cpp +++ b/src/mongo/db/commands/fail_point_cmd.cpp @@ -111,6 +111,12 @@ public: /** * Command for waiting for installed fail points. + * + * For number of additional times entered > 1, this command is only guaranteed to work + * correctly if the code that enters the fail point uses the FailPoint API correctly. + * That is, the code can only use one of shouldFail, pauseWhileSet, scopedIf, scoped, + * executeIf, and execute to enter the fail point (as all of these functions have side + * effects on the counter for times entered). */ class WaitForFailPointCommand : public TypedCommand<WaitForFailPointCommand> { public: diff --git a/src/mongo/s/request_types/wait_for_fail_point.idl b/src/mongo/s/request_types/wait_for_fail_point.idl index b3ef4891bd0..b0df9d27e78 100644 --- a/src/mongo/s/request_types/wait_for_fail_point.idl +++ b/src/mongo/s/request_types/wait_for_fail_point.idl @@ -43,7 +43,7 @@ commands: fields: timesEntered: type: safeInt64 - description: "The number of times the fail point has been entered." + description: "The target number of times the fail point has been entered." optional: false maxTimeMS: type: safeInt64 diff --git a/src/mongo/util/fail_point.h b/src/mongo/util/fail_point.h index dd8bd8c9916..d0a0c4ea5d8 100644 --- a/src/mongo/util/fail_point.h +++ b/src/mongo/util/fail_point.h @@ -318,10 +318,12 @@ public: /** * Take 100msec pauses for as long as the FailPoint is active. - * This uses `shouldFail()` and therefore affects FailPoint counters. + * This calls `shouldFail()` with kFirstTimeEntered once and with kEnteredAlready thereafter, so + * affects FailPoint counters once. */ void pauseWhileSet() { - while (MONGO_unlikely(shouldFail(kEnteredAlready))) { + for (auto entryMode = kFirstTimeEntered; MONGO_unlikely(shouldFail(entryMode)); + entryMode = kEnteredAlready) { sleepmillis(100); } } @@ -332,7 +334,8 @@ public: * OperationContext). */ void pauseWhileSet(OperationContext* opCtx) { - while (MONGO_unlikely(shouldFail(kEnteredAlready))) { + for (auto entryMode = kFirstTimeEntered; MONGO_unlikely(shouldFail(entryMode)); + entryMode = kEnteredAlready) { opCtx->sleepFor(Milliseconds(100)); } } diff --git a/src/mongo/util/fail_point_test.cpp b/src/mongo/util/fail_point_test.cpp index efc7d56346e..7b9973a9825 100644 --- a/src/mongo/util/fail_point_test.cpp +++ b/src/mongo/util/fail_point_test.cpp @@ -432,28 +432,45 @@ TEST(FailPoint, FailPointBlockIfBasicTest) { namespace mongo { -TEST(FailPoint, WaitForFailPointTimeout) { - FailPoint failPoint; - failPoint.setMode(FailPoint::alwaysOn); - +/** + * Runs the given function with an operation context that has a deadline and asserts that + * the function is interruptable. + */ +void assertFunctionInterruptable(std::function<void(OperationContext* opCtx)> f) { const auto service = ServiceContext::make(); const std::shared_ptr<ClockSourceMock> mockClock = std::make_shared<ClockSourceMock>(); service->setFastClockSource(std::make_unique<SharedClockSourceAdapter>(mockClock)); service->setPreciseClockSource(std::make_unique<SharedClockSourceAdapter>(mockClock)); service->setTickSource(std::make_unique<TickSourceMock<>>()); - const auto client = service->makeClient("WaitForFailPointTest"); + const auto client = service->makeClient("FailPointTest"); auto opCtx = client->makeOperationContext(); opCtx->setDeadlineAfterNowBy(Milliseconds{999}, ErrorCodes::ExceededTimeLimit); - stdx::thread waitForFailPoint([&] { - ASSERT_THROWS_CODE(failPoint.waitForTimesEntered(opCtx.get(), 1), - AssertionException, - ErrorCodes::ExceededTimeLimit); + stdx::thread th([&] { + ASSERT_THROWS_CODE(f(opCtx.get()), AssertionException, ErrorCodes::ExceededTimeLimit); }); mockClock->advance(Milliseconds{1000}); - waitForFailPoint.join(); + th.join(); +} + +TEST(FailPoint, PauseWhileSetInterruptibility) { + FailPoint failPoint; + failPoint.setMode(FailPoint::alwaysOn); + + assertFunctionInterruptable( + [&failPoint](OperationContext* opCtx) { failPoint.pauseWhileSet(opCtx); }); + + failPoint.setMode(FailPoint::off); +} + +TEST(FailPoint, WaitForFailPointTimeout) { + FailPoint failPoint; + failPoint.setMode(FailPoint::alwaysOn); + + assertFunctionInterruptable( + [&failPoint](OperationContext* opCtx) { failPoint.waitForTimesEntered(opCtx, 1); }); failPoint.setMode(FailPoint::off); } |