summaryrefslogtreecommitdiff
path: root/src/mongo/db/serverless
diff options
context:
space:
mode:
authorDidier Nadeau <didier.nadeau@mongodb.com>2022-07-27 18:53:27 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-07-27 21:42:58 +0000
commit0760d88c733011a7835b765368283e8dc2d5c144 (patch)
tree4e25db0cc6347d555e175976b5f55788bb9d50e9 /src/mongo/db/serverless
parent7a4fd7f264f0088c3e941d23ee66c4a6f5485b43 (diff)
downloadmongo-0760d88c733011a7835b765368283e8dc2d5c144.tar.gz
SERVER-68263 Do not remove blockers when deleting an aborted shard split state document
Diffstat (limited to 'src/mongo/db/serverless')
-rw-r--r--src/mongo/db/serverless/shard_split_donor_op_observer.cpp6
-rw-r--r--src/mongo/db/serverless/shard_split_donor_op_observer_test.cpp124
2 files changed, 125 insertions, 5 deletions
diff --git a/src/mongo/db/serverless/shard_split_donor_op_observer.cpp b/src/mongo/db/serverless/shard_split_donor_op_observer.cpp
index e016c9c4a86..fa8f8d69fa7 100644
--- a/src/mongo/db/serverless/shard_split_donor_op_observer.cpp
+++ b/src/mongo/db/serverless/shard_split_donor_op_observer.cpp
@@ -392,7 +392,11 @@ void ShardSplitDonorOpObserver::aboutToDelete(OperationContext* opCtx,
donorStateDoc.getExpireAt() ||
serverless::shouldRemoveStateDocumentOnRecipient(opCtx, donorStateDoc));
- if (donorStateDoc.getTenantIds()) {
+ // To support back-to-back split retries, when a split is aborted, we remove its
+ // TenantMigrationDonorAccessBlockers as soon as its donor state doc is marked as garbage
+ // collectable. So onDelete should skip removing the TenantMigrationDonorAccessBlockers for
+ // aborted splits.
+ if (donorStateDoc.getState() != ShardSplitDonorStateEnum::kAborted) {
auto tenantIds = *donorStateDoc.getTenantIds();
std::vector<std::string> result;
result.reserve(tenantIds.size());
diff --git a/src/mongo/db/serverless/shard_split_donor_op_observer_test.cpp b/src/mongo/db/serverless/shard_split_donor_op_observer_test.cpp
index 6f2e376de47..cea3f892e46 100644
--- a/src/mongo/db/serverless/shard_split_donor_op_observer_test.cpp
+++ b/src/mongo/db/serverless/shard_split_donor_op_observer_test.cpp
@@ -384,9 +384,9 @@ TEST_F(ShardSplitDonorOpObserverTest, TransitionToCommit) {
}
TEST_F(ShardSplitDonorOpObserverTest, TransitionToAbort) {
- // Transition to commit needs a commitOpTime in the OpLog
- auto commitOpTime = mongo::repl::OpTime(Timestamp(1, 3), 2);
- _replicationCoordinatorMock->setCurrentCommittedSnapshotOpTime(commitOpTime);
+ // Transition to abort needs a commitOpTime in the OpLog
+ auto abortOpTime = mongo::repl::OpTime(Timestamp(1, 3), 2);
+ _replicationCoordinatorMock->setCurrentCommittedSnapshotOpTime(abortOpTime);
Status status(ErrorCodes::CallbackCanceled, "Split has been aborted");
BSONObjBuilder bob;
@@ -395,7 +395,7 @@ TEST_F(ShardSplitDonorOpObserverTest, TransitionToAbort) {
auto stateDocument = defaultStateDocument();
stateDocument.setState(ShardSplitDonorStateEnum::kAborted);
stateDocument.setBlockTimestamp(Timestamp(1, 2));
- stateDocument.setCommitOrAbortOpTime(commitOpTime);
+ stateDocument.setCommitOrAbortOpTime(abortOpTime);
stateDocument.setAbortReason(bob.obj());
auto blockers = createBlockersAndStartBlockingWrites(_tenantIds, _opCtx.get(), _connectionStr);
@@ -414,5 +414,121 @@ TEST_F(ShardSplitDonorOpObserverTest, TransitionToAbort) {
runUpdateTestCase(stateDocument, _tenantIds, mtabVerifier);
}
+TEST_F(ShardSplitDonorOpObserverTest, SetExpireAtForAbortedRemoveBlockers) {
+ // Transition to abort needs an abortOpTime in the OpLog
+ auto abortOpTime = mongo::repl::OpTime(Timestamp(1, 3), 2);
+ _replicationCoordinatorMock->setCurrentCommittedSnapshotOpTime(abortOpTime);
+
+ Status status(ErrorCodes::CallbackCanceled, "Split has been aborted");
+ BSONObjBuilder bob;
+ status.serializeErrorToBSON(&bob);
+
+ auto stateDocument = defaultStateDocument();
+ stateDocument.setState(ShardSplitDonorStateEnum::kAborted);
+ stateDocument.setBlockTimestamp(Timestamp(1, 2));
+ stateDocument.setCommitOrAbortOpTime(abortOpTime);
+ stateDocument.setAbortReason(bob.obj());
+ stateDocument.setExpireAt(mongo::Date_t::fromMillisSinceEpoch(1000));
+
+ auto blockers = createBlockersAndStartBlockingWrites(_tenantIds, _opCtx.get(), _connectionStr);
+ startBlockingReadsAfter(blockers, Timestamp(1));
+ for (auto& blocker : blockers) {
+ blocker->setAbortOpTime(_opCtx.get(), *stateDocument.getCommitOrAbortOpTime());
+ }
+
+ auto mtabVerifier = [opCtx = _opCtx.get()](std::shared_ptr<TenantMigrationAccessBlocker> mtab) {
+ ASSERT_FALSE(mtab);
+ };
+
+ runUpdateTestCase(stateDocument, _tenantIds, mtabVerifier);
+}
+
+TEST_F(ShardSplitDonorOpObserverTest, DeleteAbortedDocumentDoesNotRemoveBlockers) {
+ // Transition to abort needs an abortOpTime in the OpLog
+ auto abortOpTime = mongo::repl::OpTime(Timestamp(1, 3), 2);
+ _replicationCoordinatorMock->setCurrentCommittedSnapshotOpTime(abortOpTime);
+
+ Status status(ErrorCodes::CallbackCanceled, "Split has been aborted");
+ BSONObjBuilder bob;
+ status.serializeErrorToBSON(&bob);
+
+ auto stateDocument = defaultStateDocument();
+ stateDocument.setState(ShardSplitDonorStateEnum::kAborted);
+ stateDocument.setBlockTimestamp(Timestamp(1, 2));
+ stateDocument.setCommitOrAbortOpTime(abortOpTime);
+ stateDocument.setAbortReason(bob.obj());
+ stateDocument.setExpireAt(mongo::Date_t::fromMillisSinceEpoch(1000));
+
+ auto blockers = createBlockersAndStartBlockingWrites(_tenantIds, _opCtx.get(), _connectionStr);
+ startBlockingReadsAfter(blockers, Timestamp(1));
+ for (auto& blocker : blockers) {
+ blocker->setAbortOpTime(_opCtx.get(), *stateDocument.getCommitOrAbortOpTime());
+ }
+
+ auto bsonDoc = stateDocument.toBSON();
+
+ WriteUnitOfWork wuow(_opCtx.get());
+ _observer->aboutToDelete(
+ _opCtx.get(), NamespaceString::kShardSplitDonorsNamespace, UUID::gen(), bsonDoc);
+
+ OplogDeleteEntryArgs deleteArgs;
+ deleteArgs.deletedDoc = &bsonDoc;
+
+ _observer->onDelete(_opCtx.get(),
+ NamespaceString::kShardSplitDonorsNamespace,
+ UUID::gen(),
+ 0 /* stmtId */,
+ deleteArgs);
+ wuow.commit();
+
+ // Verify blockers have not been removed
+ for (const auto& tenantId : _tenantIds) {
+ ASSERT_TRUE(TenantMigrationAccessBlockerRegistry::get(_opCtx->getServiceContext())
+ .getTenantMigrationAccessBlockerForTenantId(
+ tenantId, TenantMigrationAccessBlocker::BlockerType::kDonor));
+ }
+}
+
+TEST_F(ShardSplitDonorOpObserverTest, DeleteCommittedDocumentRemovesBlockers) {
+ // Transition to committed needs a commitOpTime in the OpLog
+ auto commitOpTime = mongo::repl::OpTime(Timestamp(1, 3), 2);
+ _replicationCoordinatorMock->setCurrentCommittedSnapshotOpTime(commitOpTime);
+
+ auto stateDocument = defaultStateDocument();
+ stateDocument.setState(ShardSplitDonorStateEnum::kCommitted);
+ stateDocument.setBlockTimestamp(Timestamp(1, 2));
+ stateDocument.setCommitOrAbortOpTime(commitOpTime);
+ stateDocument.setExpireAt(mongo::Date_t::fromMillisSinceEpoch(1000));
+
+ auto blockers = createBlockersAndStartBlockingWrites(_tenantIds, _opCtx.get(), _connectionStr);
+ startBlockingReadsAfter(blockers, Timestamp(1));
+ for (auto& blocker : blockers) {
+ blocker->setCommitOpTime(_opCtx.get(), *stateDocument.getCommitOrAbortOpTime());
+ }
+
+ auto bsonDoc = stateDocument.toBSON();
+
+ WriteUnitOfWork wuow(_opCtx.get());
+ _observer->aboutToDelete(
+ _opCtx.get(), NamespaceString::kShardSplitDonorsNamespace, UUID::gen(), bsonDoc);
+
+ OplogDeleteEntryArgs deleteArgs;
+ deleteArgs.deletedDoc = &bsonDoc;
+
+ _observer->onDelete(_opCtx.get(),
+ NamespaceString::kShardSplitDonorsNamespace,
+ UUID::gen(),
+ 0 /* stmtId */,
+ deleteArgs);
+ wuow.commit();
+
+ // Verify blockers have been removed
+ for (const auto& tenantId : _tenantIds) {
+ ASSERT_FALSE(TenantMigrationAccessBlockerRegistry::get(_opCtx->getServiceContext())
+ .getTenantMigrationAccessBlockerForTenantId(
+ tenantId, TenantMigrationAccessBlocker::BlockerType::kDonor));
+ }
+}
+
} // namespace
} // namespace mongo