diff options
-rw-r--r-- | jstests/replsets/tenant_migration_donor_shutdown_while_blocking_reads.js | 67 | ||||
-rw-r--r-- | src/mongo/db/repl/tenant_migration_access_blocker_util.cpp | 26 |
2 files changed, 87 insertions, 6 deletions
diff --git a/jstests/replsets/tenant_migration_donor_shutdown_while_blocking_reads.js b/jstests/replsets/tenant_migration_donor_shutdown_while_blocking_reads.js new file mode 100644 index 00000000000..1c968cb9015 --- /dev/null +++ b/jstests/replsets/tenant_migration_donor_shutdown_while_blocking_reads.js @@ -0,0 +1,67 @@ +/** + * Tests that tenant migration donor can peacefully shut down when there are reads being blocked due + * to an in-progress migration. + * + * @tags: [requires_fcv_47, requires_majority_read_concern, incompatible_with_eft, + * incompatible_with_windows_tls, incompatible_with_macos, requires_persistence] + */ + +(function() { +"use strict"; + +load("jstests/libs/parallelTester.js"); +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/replsets/libs/tenant_migration_test.js"); +load("jstests/replsets/libs/tenant_migration_util.js"); + +const tenantMigrationTest = new TenantMigrationTest({name: jsTestName()}); +if (!tenantMigrationTest.isFeatureFlagEnabled()) { + jsTestLog("Skipping test because the tenant migrations feature flag is disabled"); + return; +} + +const kTenantId = "testTenantId"; +const kDbName = kTenantId + "_testDb"; +const kCollName = "testColl"; + +const donorRst = tenantMigrationTest.getDonorRst(); +const donorPrimary = tenantMigrationTest.getDonorPrimary(); +const testDb = donorPrimary.getDB(kDbName); + +assert.commandWorked(testDb.runCommand({insert: kCollName, documents: [{_id: 0}]})); + +const migrationId = UUID(); +const migrationOpts = { + migrationIdString: extractUUIDFromObject(migrationId), + tenantId: kTenantId, +}; + +let fp = configureFailPoint(donorPrimary, "pauseTenantMigrationBeforeLeavingBlockingState"); +assert.commandWorked(tenantMigrationTest.startMigration( + migrationOpts, false /* retryOnRetryableErrors */, false /* automaticForgetMigration */)); + +fp.wait(); +const donorDoc = + donorPrimary.getCollection(TenantMigrationTest.kConfigDonorsNS).findOne({tenantId: kTenantId}); +assert.neq(null, donorDoc); + +let readThread = new Thread((host, dbName, collName, afterClusterTime) => { + const node = new Mongo(host); + const db = node.getDB(dbName); + const res = db.runCommand({ + find: collName, + readConcern: {afterClusterTime: Timestamp(afterClusterTime.t, afterClusterTime.i)} + }); + assert.commandFailedWithCode(res, ErrorCodes.InterruptedAtShutdown); +}, donorPrimary.host, kDbName, kCollName, donorDoc.blockTimestamp); +readThread.start(); + +// Shut down the donor after the read starts blocking. +assert.soon(() => TenantMigrationUtil.getNumBlockedReads(donorPrimary, kTenantId) == 1); +donorRst.stop(donorPrimary); +readThread.join(); + +donorRst.stopSet(); +tenantMigrationTest.stop(); +})(); diff --git a/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp b/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp index d96f4461848..e68e56a0404 100644 --- a/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp +++ b/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp @@ -145,7 +145,7 @@ SemiFuture<void> checkIfCanReadOrBlock(OperationContext* opCtx, const OpMsgReque // Source to cancel the timeout if the operation completed in time. CancelationSource cancelTimeoutSource; // Source to cancel waiting on the 'canReadFutures'. - CancelationSource cancelCanReadSource; + CancelationSource cancelCanReadSource(opCtx->getCancelationToken()); const auto donorMtab = mtabPair->getAccessBlocker(MtabType::kDonor); const auto recipientMtab = mtabPair->getAccessBlocker(MtabType::kRecipient); // A vector of futures where the donor access blocker's 'getCanReadFuture' will always precede @@ -207,20 +207,34 @@ SemiFuture<void> checkIfCanReadOrBlock(OperationContext* opCtx, const OpMsgReque } return donorMtabStatus; }) - .onError<ErrorCodes::CallbackCanceled>([cancelCanReadSource, + .onError<ErrorCodes::CallbackCanceled>([cancelTimeoutSource, + cancelCanReadSource, donorMtab, recipientMtab, timeoutError = opCtx->getTimeoutError()]( Status status) mutable { - cancelCanReadSource.cancel(); - // At least one of 'donorMtab' or 'recipientMtab' must exist if we timed out here. + auto isCanceledDueToTimeout = cancelTimeoutSource.token().isCanceled(); + + if (!isCanceledDueToTimeout) { + cancelTimeoutSource.cancel(); + } + + // At least one of 'donorMtab' or 'recipientMtab' must exist if we were canceled here. BSONObj info = donorMtab ? donorMtab->getDebugInfo() : recipientMtab->getDebugInfo(); if (recipientMtab) { info = info.addField(recipientMtab->getDebugInfo().getField("donorConnectionString")); } - return Status( - timeoutError, "Read timed out waiting for tenant migration blocker", info); + + if (isCanceledDueToTimeout) { + return Status( + timeoutError, + "Blocked read timed out waiting for tenant migration to commit or abort", + info); + } + + return status.withContext(str::stream() << "Canceled read blocked by tenant migration " + << info.toString()); }) .semi(); // To require continuation in the user executor. } |