summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCheahuychou Mao <mao.cheahuychou@gmail.com>2021-02-09 21:14:45 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-02-10 23:05:01 +0000
commit913c4be3ed219d83f19e0311c312fda5b3ca186e (patch)
treeb4a92f5bfa8c1ded407c69d45886703817bb74a4 /src
parent36c670a00152ed2af43f1cf1e7ac9bebb6d0e2f0 (diff)
downloadmongo-913c4be3ed219d83f19e0311c312fda5b3ca186e.tar.gz
SERVER-54425 Make TenantMigrationDonorAccessBlocker keep track of number of blocked reads and writes and tenant migration errors thrown
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/commands/write_commands/write_commands.cpp1
-rw-r--r--src/mongo/db/op_observer_impl.cpp3
-rw-r--r--src/mongo/db/repl/oplog.cpp2
-rw-r--r--src/mongo/db/repl/tenant_migration_access_blocker.h14
-rw-r--r--src/mongo/db/repl/tenant_migration_access_blocker_util.cpp22
-rw-r--r--src/mongo/db/repl/tenant_migration_access_blocker_util.h2
-rw-r--r--src/mongo/db/repl/tenant_migration_donor_access_blocker.cpp64
-rw-r--r--src/mongo/db/repl/tenant_migration_donor_access_blocker.h30
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_access_blocker.cpp8
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_access_blocker.h7
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_access_blocker_test.cpp4
11 files changed, 99 insertions, 58 deletions
diff --git a/src/mongo/db/commands/write_commands/write_commands.cpp b/src/mongo/db/commands/write_commands/write_commands.cpp
index ff84f0897e1..33f4f719367 100644
--- a/src/mongo/db/commands/write_commands/write_commands.cpp
+++ b/src/mongo/db/commands/write_commands/write_commands.cpp
@@ -307,6 +307,7 @@ boost::optional<BSONObj> generateError(OperationContext* opCtx,
auto migrationStatus =
mtab->waitUntilCommittedOrAborted(opCtx, migrationConflictInfo->getOperationType());
+ mtab->recordTenantMigrationError(migrationStatus);
error.append("code", static_cast<int>(migrationStatus.code()));
// We want to append an empty errmsg for the errors after the first one, so let the
diff --git a/src/mongo/db/op_observer_impl.cpp b/src/mongo/db/op_observer_impl.cpp
index a419e3150d9..3b31c7f1b17 100644
--- a/src/mongo/db/op_observer_impl.cpp
+++ b/src/mongo/db/op_observer_impl.cpp
@@ -1232,7 +1232,8 @@ void OpObserverImpl::onUnpreparedTransactionCommit(OperationContext* opCtx,
// Throw TenantMigrationConflict error if the database for the transaction statements is being
// migrated. We only need check the namespace of the first statement since a transaction's
// statements must all be for the same tenant.
- tenant_migration_access_blocker::onWriteToDatabase(opCtx, statements->begin()->getNss().db());
+ tenant_migration_access_blocker::checkIfCanWriteOrThrow(opCtx,
+ statements->begin()->getNss().db());
if (MONGO_unlikely(hangAndFailUnpreparedCommitAfterReservingOplogSlot.shouldFail())) {
hangAndFailUnpreparedCommitAfterReservingOplogSlot.pauseWhileSet(opCtx);
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index 251153830c5..dba72ba9275 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -296,7 +296,7 @@ void _logOpsInner(OperationContext* opCtx,
});
if (!isAbortIndexBuild) {
- tenant_migration_access_blocker::onWriteToDatabase(opCtx, nss.db());
+ tenant_migration_access_blocker::checkIfCanWriteOrThrow(opCtx, nss.db());
} else if (records->size() > 1) {
str::stream ss;
ss << "abortIndexBuild cannot be logged with other oplog entries ";
diff --git a/src/mongo/db/repl/tenant_migration_access_blocker.h b/src/mongo/db/repl/tenant_migration_access_blocker.h
index e88cb155d6d..e14c0d2f456 100644
--- a/src/mongo/db/repl/tenant_migration_access_blocker.h
+++ b/src/mongo/db/repl/tenant_migration_access_blocker.h
@@ -55,11 +55,11 @@ public:
// Called by all writes and reads against the database.
//
- virtual void checkIfCanWriteOrThrow() = 0;
+ virtual Status checkIfCanWrite() = 0;
virtual Status waitUntilCommittedOrAborted(OperationContext* opCtx,
OperationType operationType) = 0;
- virtual void checkIfLinearizableReadWasAllowedOrThrow(OperationContext* opCtx) = 0;
+ virtual Status checkIfLinearizableReadWasAllowed(OperationContext* opCtx) = 0;
virtual SharedSemiFuture<void> getCanReadFuture(OperationContext* opCtx) = 0;
//
@@ -79,8 +79,16 @@ public:
virtual void appendInfoForServerStatus(BSONObjBuilder* builder) const = 0;
- // Returns structured info with current tenant ID and connection string.
+ /**
+ * Returns structured info with tenant id and connection string.
+ */
virtual BSONObj getDebugInfo() const = 0;
+
+ /**
+ * Updates the runtime statistics for the number of tenant migration errors that have been
+ * thrown based on the given status.
+ */
+ virtual void recordTenantMigrationError(Status status) = 0;
};
} // namespace mongo
diff --git a/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp b/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp
index fe1e4e2d73f..36573d870d4 100644
--- a/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp
+++ b/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp
@@ -137,7 +137,9 @@ void checkIfCanReadOrBlock(OperationContext* opCtx, StringData dbName) {
// Optimisation: if the future is already ready, we are done.
if (futures[0].isReady()) {
- futures[0].get(); // Throw if error.
+ auto status = futures[0].getNoThrow();
+ mtab->recordTenantMigrationError(status);
+ uassertStatusOK(status);
return;
}
@@ -153,6 +155,7 @@ void checkIfCanReadOrBlock(OperationContext* opCtx, StringData dbName) {
if (idx == 0) {
// Read unblock condition finished first.
cancelTimeoutSource.cancel();
+ mtab->recordTenantMigrationError(status);
uassertStatusOK(status);
} else if (idx == 1) {
// Deadline finished first, throw error.
@@ -167,17 +170,21 @@ void checkIfLinearizableReadWasAllowedOrThrow(OperationContext* opCtx, StringDat
repl::ReadConcernLevel::kLinearizableReadConcern) {
if (auto mtab = TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
.getTenantMigrationAccessBlockerForDbName(dbName)) {
- mtab->checkIfLinearizableReadWasAllowedOrThrow(opCtx);
+ auto status = mtab->checkIfLinearizableReadWasAllowed(opCtx);
+ mtab->recordTenantMigrationError(status);
+ uassertStatusOK(status);
}
}
}
-void onWriteToDatabase(OperationContext* opCtx, StringData dbName) {
+void checkIfCanWriteOrThrow(OperationContext* opCtx, StringData dbName) {
auto mtab = TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
.getTenantMigrationAccessBlockerForDbName(dbName);
if (mtab) {
- mtab->checkIfCanWriteOrThrow();
+ auto status = mtab->checkIfCanWrite();
+ mtab->recordTenantMigrationError(status);
+ uassertStatusOK(status);
}
}
@@ -188,6 +195,7 @@ Status checkIfCanBuildIndex(OperationContext* opCtx, StringData dbName) {
if (mtab) {
// This log is included for synchronization of the tenant migration buildindex jstests.
auto status = mtab->checkIfCanBuildIndex();
+ mtab->recordTenantMigrationError(status);
LOGV2_DEBUG(4886202,
1,
"Checked if tenant migration on database prevents index builds",
@@ -256,8 +264,10 @@ void handleTenantMigrationConflict(OperationContext* opCtx, Status status) {
invariant(migrationConflictInfo);
auto mtab = migrationConflictInfo->getTenantMigrationAccessBlocker();
invariant(mtab);
- uassertStatusOK(
- mtab->waitUntilCommittedOrAborted(opCtx, migrationConflictInfo->getOperationType()));
+ auto migrationStatus =
+ mtab->waitUntilCommittedOrAborted(opCtx, migrationConflictInfo->getOperationType());
+ mtab->recordTenantMigrationError(migrationStatus);
+ uassertStatusOK(migrationStatus);
}
void performNoopWrite(OperationContext* opCtx, StringData msg) {
diff --git a/src/mongo/db/repl/tenant_migration_access_blocker_util.h b/src/mongo/db/repl/tenant_migration_access_blocker_util.h
index cf338366e5a..a0cf4a57c8d 100644
--- a/src/mongo/db/repl/tenant_migration_access_blocker_util.h
+++ b/src/mongo/db/repl/tenant_migration_access_blocker_util.h
@@ -74,7 +74,7 @@ void checkIfLinearizableReadWasAllowedOrThrow(OperationContext* opCtx, StringDat
* Throws TenantMigrationConflict if the database is being migrated and the migration is in the
* blocking state. Throws TenantMigrationCommitted if it is in committed.
*/
-void onWriteToDatabase(OperationContext* opCtx, StringData dbName);
+void checkIfCanWriteOrThrow(OperationContext* opCtx, StringData dbName);
/**
* Returns TenantMigrationConflict if the database is being migrated (even if migration is not yet
diff --git a/src/mongo/db/repl/tenant_migration_donor_access_blocker.cpp b/src/mongo/db/repl/tenant_migration_donor_access_blocker.cpp
index 3b1ccf096b4..9789ca823ea 100644
--- a/src/mongo/db/repl/tenant_migration_donor_access_blocker.cpp
+++ b/src/mongo/db/repl/tenant_migration_donor_access_blocker.cpp
@@ -48,9 +48,6 @@ namespace mongo {
namespace {
-MONGO_FAIL_POINT_DEFINE(tenantMigrationBlockRead);
-MONGO_FAIL_POINT_DEFINE(tenantMigrationBlockWrite);
-
const Backoff kExponentialBackoff(Seconds(1), Milliseconds::max());
} // namespace
@@ -64,21 +61,20 @@ TenantMigrationDonorAccessBlocker::TenantMigrationDonorAccessBlocker(
.getOrCreateBlockedOperationsExecutor();
}
-void TenantMigrationDonorAccessBlocker::checkIfCanWriteOrThrow() {
+Status TenantMigrationDonorAccessBlocker::checkIfCanWrite() {
stdx::lock_guard<Latch> lg(_mutex);
switch (_state) {
case State::kAllow:
- return;
case State::kAborted:
- return;
+ return Status::OK();
case State::kBlockWrites:
case State::kBlockWritesAndReads:
- uasserted(TenantMigrationConflictInfo(_tenantId, shared_from_this()),
- "Write must block until this tenant migration commits or aborts");
+ return {TenantMigrationConflictInfo(_tenantId, shared_from_this()),
+ "Write must block until this tenant migration commits or aborts"};
case State::kReject:
- uasserted(TenantMigrationCommittedInfo(_tenantId, _recipientConnString),
- "Write must be re-routed to the new owner of this tenant");
+ return {TenantMigrationCommittedInfo(_tenantId, _recipientConnString),
+ "Write must be re-routed to the new owner of this tenant"};
default:
MONGO_UNREACHABLE;
}
@@ -86,19 +82,6 @@ void TenantMigrationDonorAccessBlocker::checkIfCanWriteOrThrow() {
Status TenantMigrationDonorAccessBlocker::waitUntilCommittedOrAborted(OperationContext* opCtx,
OperationType operationType) {
- {
- stdx::unique_lock<Latch> ul(_mutex);
-
- auto canWrite = [&]() {
- return (operationType == kWrite && _state == State::kAllow) ||
- _state == State::kAborted;
- };
-
- if (!canWrite()) {
- tenantMigrationBlockWrite.shouldFail(); // Return value intentionally ignored.
- }
- }
-
// Source to cancel the timeout if the operation completed in time.
CancelationSource cancelTimeoutSource;
auto executor = getAsyncBlockingOperationsExecutor();
@@ -160,28 +143,29 @@ SharedSemiFuture<void> TenantMigrationDonorAccessBlocker::_getCanDoClusterTimeRe
auto canRead = _state == State::kAllow || _state == State::kAborted ||
_state == State::kBlockWrites || readTimestamp < *_blockTimestamp;
- if (!canRead) {
- tenantMigrationBlockRead.shouldFail(); // Return value intentionally ignored.
- }
if (canRead) {
return SharedSemiFuture<void>();
}
+
if (_state == State::kReject) {
return SharedSemiFuture<void>(
Status(ErrorCodes::TenantMigrationCommitted,
- "Write or read must be re-routed to the new owner of this tenant",
+ "Read must be re-routed to the new owner of this tenant",
TenantMigrationCommittedInfo(_tenantId, _recipientConnString).toBSON()));
}
+ _stats.numBlockedReads.addAndFetch(1);
return _transitionOutOfBlockingPromise.getFuture();
}
-void TenantMigrationDonorAccessBlocker::checkIfLinearizableReadWasAllowedOrThrow(
+Status TenantMigrationDonorAccessBlocker::checkIfLinearizableReadWasAllowed(
OperationContext* opCtx) {
stdx::lock_guard<Latch> lg(_mutex);
- uassert(TenantMigrationCommittedInfo(_tenantId, _recipientConnString),
- "Read must be re-routed to the new owner of this tenant",
- _state != State::kReject);
+ if (_state == State::kReject) {
+ return {TenantMigrationCommittedInfo(_tenantId, _recipientConnString),
+ "Read must be re-routed to the new owner of this tenant"};
+ }
+ return Status::OK();
}
Status TenantMigrationDonorAccessBlocker::checkIfCanBuildIndex() {
@@ -369,6 +353,7 @@ void TenantMigrationDonorAccessBlocker::appendInfoForServerStatus(BSONObjBuilder
if (_abortOpTime) {
tenantBuilder.append("abortOpTime", _abortOpTime->toBSON());
}
+ _stats.report(&tenantBuilder);
builder->append(_tenantId, tenantBuilder.obj());
}
@@ -393,4 +378,21 @@ BSONObj TenantMigrationDonorAccessBlocker::getDebugInfo() const {
return BSON("tenantId" << _tenantId << "recipientConnectionString" << _recipientConnString);
}
+void TenantMigrationDonorAccessBlocker::recordTenantMigrationError(Status status) {
+ if (status == ErrorCodes::TenantMigrationConflict) {
+ _stats.numBlockedWrites.addAndFetch(1);
+ } else if (status == ErrorCodes::TenantMigrationCommitted) {
+ _stats.numTenantMigrationCommittedErrors.addAndFetch(1);
+ } else if (status == ErrorCodes::TenantMigrationAborted) {
+ _stats.numTenantMigrationAbortedErrors.addAndFetch(1);
+ }
+}
+
+void TenantMigrationDonorAccessBlocker::Stats::report(BSONObjBuilder* builder) const {
+ builder->append("numBlockedReads", numBlockedReads.load());
+ builder->append("numBlockedWrites", numBlockedWrites.load());
+ builder->append("numTenantMigrationCommittedErrors", numTenantMigrationCommittedErrors.load());
+ builder->append("numTenantMigrationAbortedErrors", numTenantMigrationAbortedErrors.load());
+}
+
} // namespace mongo
diff --git a/src/mongo/db/repl/tenant_migration_donor_access_blocker.h b/src/mongo/db/repl/tenant_migration_donor_access_blocker.h
index a9ec4adf9d2..88969a16f68 100644
--- a/src/mongo/db/repl/tenant_migration_donor_access_blocker.h
+++ b/src/mongo/db/repl/tenant_migration_donor_access_blocker.h
@@ -113,7 +113,7 @@ inline RepeatableSharedPromise<void>::~RepeatableSharedPromise() {
* }
* }
*
- * Writes call checkIfCanWriteOrThrow after being assigned an OpTime but before committing. The
+ * Writes call checkIfCanWrite after being assigned an OpTime but before committing. The
* method throws TenantMigrationConflict if writes are being blocked, which is caught in the loop.
* The write then blocks until the migration either commits (in which case checkIfCanWriteOrBlock
* throws an error that causes the write to be rejected) or aborts (in which case
@@ -140,10 +140,10 @@ inline RepeatableSharedPromise<void>::~RepeatableSharedPromise() {
* Timestamp will be the "blockTimestamp".
*
* At this point:
- * - Writes that have already passed checkIfCanWriteOrThrow must have been assigned an OpTime before
+ * - Writes that have already passed checkIfCanWrite must have been assigned an OpTime before
* the blockTimestamp, since the blockTimestamp hasn't been assigned yet, and OpTimes are handed
* out in monotonically increasing order.
- * - Writes that have not yet passed checkIfCanWriteOrThrow will end up blocking. Some of these
+ * - Writes that have not yet passed checkIfCanWrite will end up blocking. Some of these
* writes may have already been assigned an OpTime, or may end up being assigned an OpTime that is
* before the blockTimestamp, and so will end up blocking unnecessarily, but not incorrectly.
*
@@ -185,10 +185,10 @@ public:
// Called by all writes and reads against the database.
//
- void checkIfCanWriteOrThrow() final;
+ Status checkIfCanWrite() final;
Status waitUntilCommittedOrAborted(OperationContext* opCtx, OperationType operationType) final;
- void checkIfLinearizableReadWasAllowedOrThrow(OperationContext* opCtx) final;
+ Status checkIfLinearizableReadWasAllowed(OperationContext* opCtx) final;
SharedSemiFuture<void> getCanReadFuture(OperationContext* opCtx) final;
//
@@ -210,9 +210,10 @@ public:
void appendInfoForServerStatus(BSONObjBuilder* builder) const final;
- // Returns structured info with current tenant ID and connection string.
BSONObj getDebugInfo() const final;
+ void recordTenantMigrationError(Status status) final;
+
//
// Called while donating this database.
//
@@ -240,6 +241,23 @@ private:
enum class State { kAllow, kBlockWrites, kBlockWritesAndReads, kReject, kAborted };
std::string _stateToString(State state) const;
+ /**
+ * Encapsulates runtime statistics on blocked reads and writes, and tenant migration errors
+ * thrown.
+ */
+ struct Stats {
+ AtomicWord<long long> numBlockedReads;
+ AtomicWord<long long> numBlockedWrites;
+ AtomicWord<long long> numTenantMigrationCommittedErrors;
+ AtomicWord<long long> numTenantMigrationAbortedErrors;
+
+ /**
+ * Reports the accumulated statistics for serverStatus.
+ */
+ void report(BSONObjBuilder* builder) const;
+
+ } _stats;
+
SharedSemiFuture<void> _onCompletion() {
return _completionPromise.getFuture();
}
diff --git a/src/mongo/db/repl/tenant_migration_recipient_access_blocker.cpp b/src/mongo/db/repl/tenant_migration_recipient_access_blocker.cpp
index 6f551162c4b..6fdec9a3208 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_access_blocker.cpp
+++ b/src/mongo/db/repl/tenant_migration_recipient_access_blocker.cpp
@@ -54,10 +54,10 @@ TenantMigrationRecipientAccessBlocker::TenantMigrationRecipientAccessBlocker(
.getOrCreateBlockedOperationsExecutor();
}
-void TenantMigrationRecipientAccessBlocker::checkIfCanWriteOrThrow() {
+Status TenantMigrationRecipientAccessBlocker::checkIfCanWrite() {
// This is guaranteed by the migration protocol. The recipient will not get any writes until the
// migration is committed on the donor.
- return;
+ return Status::OK();
}
Status TenantMigrationRecipientAccessBlocker::waitUntilCommittedOrAborted(
@@ -109,14 +109,14 @@ SharedSemiFuture<void> TenantMigrationRecipientAccessBlocker::getCanReadFuture(
return SharedSemiFuture<void>();
}
-void TenantMigrationRecipientAccessBlocker::checkIfLinearizableReadWasAllowedOrThrow(
+Status TenantMigrationRecipientAccessBlocker::checkIfLinearizableReadWasAllowed(
OperationContext* opCtx) {
// The donor will block all writes at the blockOpTime, and will not signal the proxy to allow
// reading from the recipient until that blockOpTime is majority committed on the recipient.
// This means any writes made on the donor set are available in the majority snapshot of the
// recipient, so linearizable guarantees will hold using the existing linearizable read
// mechanism of doing a no-op write and waiting for it to be majority committed.
- return;
+ return Status::OK();
}
Status TenantMigrationRecipientAccessBlocker::checkIfCanBuildIndex() {
diff --git a/src/mongo/db/repl/tenant_migration_recipient_access_blocker.h b/src/mongo/db/repl/tenant_migration_recipient_access_blocker.h
index 896cc957925..099b2b78a25 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_access_blocker.h
+++ b/src/mongo/db/repl/tenant_migration_recipient_access_blocker.h
@@ -77,10 +77,10 @@ public:
// Called by all writes and reads against the database.
//
- void checkIfCanWriteOrThrow() final;
+ Status checkIfCanWrite() final;
Status waitUntilCommittedOrAborted(OperationContext* opCtx, OperationType operationType) final;
- void checkIfLinearizableReadWasAllowedOrThrow(OperationContext* opCtx) final;
+ Status checkIfLinearizableReadWasAllowed(OperationContext* opCtx) final;
SharedSemiFuture<void> getCanReadFuture(OperationContext* opCtx) final;
//
@@ -100,9 +100,10 @@ public:
void appendInfoForServerStatus(BSONObjBuilder* builder) const final;
- // Returns structured info with current tenant ID and connection string.
BSONObj getDebugInfo() const final;
+ void recordTenantMigrationError(Status status) final{};
+
//
// Called as a recipient to reject reads before the `timestamp`.
//
diff --git a/src/mongo/db/repl/tenant_migration_recipient_access_blocker_test.cpp b/src/mongo/db/repl/tenant_migration_recipient_access_blocker_test.cpp
index 5cb27284e70..aeb5e23b60a 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_access_blocker_test.cpp
+++ b/src/mongo/db/repl/tenant_migration_recipient_access_blocker_test.cpp
@@ -120,8 +120,8 @@ TEST_F(TenantMigrationRecipientAccessBlockerTest, NoopFunctions) {
getServiceContext(), getTenantId(), getDonorConnectionString());
// These functions are noop functions and should not throw even in reject state.
- mtab.checkIfCanWriteOrThrow();
- mtab.checkIfLinearizableReadWasAllowedOrThrow(opCtx());
+ ASSERT_OK(mtab.checkIfCanWrite());
+ ASSERT_OK(mtab.checkIfLinearizableReadWasAllowed(opCtx()));
ASSERT_OK(mtab.checkIfCanBuildIndex());
}