summaryrefslogtreecommitdiff
path: root/src/mongo/db/repl
diff options
context:
space:
mode:
authorVishnu Kaushik <vishnu.kaushik@mongodb.com>2021-04-20 16:45:17 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-04-21 16:09:26 +0000
commit87488185ced2c3035a8df51eaf22b03fc980b555 (patch)
tree6e8eb5edbb10393437a099ff9c083a68b25be5d9 /src/mongo/db/repl
parentded506addbb8e3821eb84604bd5c4a7458cabb45 (diff)
downloadmongo-87488185ced2c3035a8df51eaf22b03fc980b555.tar.gz
SERVER-55229 Implement and test recipient cloner stats after failover
Diffstat (limited to 'src/mongo/db/repl')
-rw-r--r--src/mongo/db/repl/tenant_all_database_cloner.cpp98
-rw-r--r--src/mongo/db/repl/tenant_all_database_cloner.h6
-rw-r--r--src/mongo/db/repl/tenant_all_database_cloner_test.cpp145
-rw-r--r--src/mongo/db/repl/tenant_database_cloner.cpp25
-rw-r--r--src/mongo/db/repl/tenant_database_cloner_test.cpp132
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_service.cpp10
6 files changed, 351 insertions, 65 deletions
diff --git a/src/mongo/db/repl/tenant_all_database_cloner.cpp b/src/mongo/db/repl/tenant_all_database_cloner.cpp
index 2c32d3155de..1b633cf7ecf 100644
--- a/src/mongo/db/repl/tenant_all_database_cloner.cpp
+++ b/src/mongo/db/repl/tenant_all_database_cloner.cpp
@@ -37,9 +37,11 @@
#include "mongo/db/repl/cloner_utils.h"
#include "mongo/db/repl/tenant_all_database_cloner.h"
#include "mongo/db/repl/tenant_database_cloner.h"
+#include "mongo/db/repl/tenant_migration_decoration.h"
#include "mongo/logv2/log.h"
#include "mongo/rpc/get_status_from_command_result.h"
#include "mongo/util/assert_util.h"
+#include "mongo/util/string_map.h"
namespace mongo {
namespace repl {
@@ -59,10 +61,12 @@ TenantAllDatabaseCloner::TenantAllDatabaseCloner(TenantMigrationSharedData* shar
_tenantId(tenantId),
_listDatabasesStage("listDatabases", this, &TenantAllDatabaseCloner::listDatabasesStage),
_listExistingDatabasesStage(
- "listExistingDatabases", this, &TenantAllDatabaseCloner::listExistingDatabasesStage) {}
+ "listExistingDatabases", this, &TenantAllDatabaseCloner::listExistingDatabasesStage),
+ _initializeStatsStage(
+ "initializeStatsStage", this, &TenantAllDatabaseCloner::initializeStatsStage) {}
BaseCloner::ClonerStages TenantAllDatabaseCloner::getStages() {
- return {&_listDatabasesStage, &_listExistingDatabasesStage};
+ return {&_listDatabasesStage, &_listExistingDatabasesStage, &_initializeStatsStage};
}
void TenantAllDatabaseCloner::preStage() {
@@ -130,10 +134,16 @@ BaseCloner::AfterStageBehavior TenantAllDatabaseCloner::listDatabasesStage() {
BaseCloner::AfterStageBehavior TenantAllDatabaseCloner::listExistingDatabasesStage() {
auto opCtx = cc().makeOperationContext();
DBDirectClient client(opCtx.get());
+ tenantMigrationRecipientInfo(opCtx.get()) =
+ boost::make_optional<TenantMigrationRecipientInfo>(getSharedData()->getMigrationId());
const BSONObj filter = ClonerUtils::makeTenantDatabaseFilter(_tenantId);
auto databasesArray = client.getDatabaseInfos(filter, true /* nameOnly */);
+ long long approxTotalSizeOnDisk = 0;
+ // Use a map to figure out the size of the partially cloned database.
+ StringMap<long long> dbNameToSize;
+
std::vector<std::string> clonedDatabases;
for (const auto& dbBSON : databasesArray) {
LOGV2_DEBUG(5271500,
@@ -148,6 +158,21 @@ BaseCloner::AfterStageBehavior TenantAllDatabaseCloner::listExistingDatabasesSta
const auto& dbName = dbBSON["name"].str();
clonedDatabases.emplace_back(dbName);
+
+ BSONObj res;
+ client.runCommand(dbName, BSON("dbStats" << 1), res);
+ if (auto status = getStatusFromCommandResult(res); !status.isOK()) {
+ LOGV2_WARNING(5522900,
+ "Skipping recording of data size metrics for database due to failure "
+ "in the 'dbStats' command, tenant migration stats may be inaccurate.",
+ "db"_attr = dbName,
+ "migrationId"_attr = getSharedData()->getMigrationId(),
+ "tenantId"_attr = _tenantId,
+ "status"_attr = status);
+ } else {
+ dbNameToSize[dbName] = res.getField("dataSize").safeNumberLong();
+ approxTotalSizeOnDisk += dbNameToSize[dbName];
+ }
}
if (!getSharedData()->isResuming()) {
@@ -169,8 +194,15 @@ BaseCloner::AfterStageBehavior TenantAllDatabaseCloner::listExistingDatabasesSta
stdx::lock_guard<Latch> lk(_mutex);
if (startingDb != _databases.end() && *startingDb == lastClonedDb) {
_stats.databasesClonedBeforeFailover = clonedDatabases.size() - 1;
+
+ // When the 'startingDb' matches the 'lastClonedDb', the 'startingDb' is currently
+ // partially cloned. Therefore, exclude the 'startingDb' when calculating the size,
+ // as it is counted on demand by the database cloner.
+ _stats.approxTotalBytesCopied =
+ approxTotalSizeOnDisk - dbNameToSize.at(*startingDb);
} else {
_stats.databasesClonedBeforeFailover = clonedDatabases.size();
+ _stats.approxTotalBytesCopied = approxTotalSizeOnDisk;
}
}
_databases.erase(_databases.begin(), startingDb);
@@ -191,39 +223,43 @@ BaseCloner::AfterStageBehavior TenantAllDatabaseCloner::listExistingDatabasesSta
return kContinueNormally;
}
-void TenantAllDatabaseCloner::postStage() {
- {
- // Finish calculating the size of the databases that were either partially cloned or
- // completely un-cloned from a previous migration. Perform this before grabbing the _mutex,
- // as commands are being sent over the network.
- long long approxTotalDataSize = 0;
- for (const auto& dbName : _databases) {
- BSONObj res;
- getClient()->runCommand(dbName, BSON("dbStats" << 1), res);
- if (auto status = getStatusFromCommandResult(res); !status.isOK()) {
- LOGV2_WARNING(5426600,
- "Skipping recording of data size metrics for database due to failure "
- "in the 'dbStats' command, tenant migration stats may be inaccurate.",
- "db"_attr = dbName,
- "migrationId"_attr = getSharedData()->getMigrationId(),
- "tenantId"_attr = _tenantId,
- "status"_attr = status);
- } else {
- approxTotalDataSize += res.getField("dataSize").safeNumberLong();
- }
+BaseCloner::AfterStageBehavior TenantAllDatabaseCloner::initializeStatsStage() {
+ // Finish calculating the size of the databases that were either partially cloned or
+ // completely un-cloned from a previous migration. Perform this before grabbing the _mutex,
+ // as commands are being sent over the network.
+ long long approxTotalDataSizeLeftOnRemote = 0;
+ for (const auto& dbName : _databases) {
+ BSONObj res;
+ getClient()->runCommand(dbName, BSON("dbStats" << 1), res);
+ if (auto status = getStatusFromCommandResult(res); !status.isOK()) {
+ LOGV2_WARNING(5426600,
+ "Skipping recording of data size metrics for database due to failure "
+ "in the 'dbStats' command, tenant migration stats may be inaccurate.",
+ "db"_attr = dbName,
+ "migrationId"_attr = getSharedData()->getMigrationId(),
+ "tenantId"_attr = _tenantId,
+ "status"_attr = status);
+ } else {
+ approxTotalDataSizeLeftOnRemote += res.getField("dataSize").safeNumberLong();
}
+ }
- stdx::lock_guard<Latch> lk(_mutex);
- _stats.databasesCloned = 0;
- _stats.databasesToClone = _databases.size();
- _stats.databaseStats.reserve(_databases.size());
- for (const auto& dbName : _databases) {
- _stats.databaseStats.emplace_back();
- _stats.databaseStats.back().dbname = dbName;
- }
- _stats.approxTotalDataSize = approxTotalDataSize;
+ stdx::lock_guard<Latch> lk(_mutex);
+ // The 'approxTotalDataSize' is the sum of the size copied so far and the size left to be
+ // copied.
+ _stats.approxTotalDataSize = _stats.approxTotalBytesCopied + approxTotalDataSizeLeftOnRemote;
+ _stats.databasesCloned = 0;
+ _stats.databasesToClone = _databases.size();
+ _stats.databaseStats.reserve(_databases.size());
+ for (const auto& dbName : _databases) {
+ _stats.databaseStats.emplace_back();
+ _stats.databaseStats.back().dbname = dbName;
}
+ return kContinueNormally;
+}
+
+void TenantAllDatabaseCloner::postStage() {
for (const auto& dbName : _databases) {
{
stdx::lock_guard<Latch> lk(_mutex);
diff --git a/src/mongo/db/repl/tenant_all_database_cloner.h b/src/mongo/db/repl/tenant_all_database_cloner.h
index 627f1507f00..cc993cefdfd 100644
--- a/src/mongo/db/repl/tenant_all_database_cloner.h
+++ b/src/mongo/db/repl/tenant_all_database_cloner.h
@@ -102,6 +102,11 @@ private:
AfterStageBehavior listExistingDatabasesStage();
/**
+ * Stage function that initializes several stats before carrying on to the 'postStage'.
+ */
+ AfterStageBehavior initializeStatsStage();
+
+ /**
* The preStage sets the start time in _stats.
*/
void preStage() final;
@@ -130,6 +135,7 @@ private:
TenantAllDatabaseClonerStage _listDatabasesStage; // (R)
TenantAllDatabaseClonerStage _listExistingDatabasesStage; // (R)
+ TenantAllDatabaseClonerStage _initializeStatsStage; // (R)
// The operationTime returned with the listDatabases result.
Timestamp _operationTime; // (X)
diff --git a/src/mongo/db/repl/tenant_all_database_cloner_test.cpp b/src/mongo/db/repl/tenant_all_database_cloner_test.cpp
index c736493c0d6..580b101b433 100644
--- a/src/mongo/db/repl/tenant_all_database_cloner_test.cpp
+++ b/src/mongo/db/repl/tenant_all_database_cloner_test.cpp
@@ -374,18 +374,37 @@ TEST_F(TenantAllDatabaseClonerTest, TenantDatabasesAlreadyExist) {
}
TEST_F(TenantAllDatabaseClonerTest, ResumingFromLastClonedDb) {
- // Test that all databases cloner correctly resume from the last cloned database.
- ASSERT_OK(createCollection(NamespaceString(_tenantDbA, "coll"), CollectionOptions()));
- ASSERT_OK(createCollection(NamespaceString(_tenantDbAAB, "coll"), CollectionOptions()));
+ // Test that all databases cloner correctly resumes from the last cloned database.
+ auto nssDbA = NamespaceString(_tenantDbA, "coll");
+ auto nssDbAAb = NamespaceString(_tenantDbAAB, "coll");
+ ASSERT_OK(createCollection(nssDbA, CollectionOptions()));
+ ASSERT_OK(createCollection(nssDbAAb, CollectionOptions()));
+
+ long long sizeOfDbA = 0;
+ {
+ // Insert some documents into both collections.
+ auto storage = StorageInterface::get(serviceContext);
+ auto opCtx = cc().makeOperationContext();
+
+ ASSERT_OK(storage->insertDocument(
+ opCtx.get(), nssDbA, {BSON("_id" << 0 << "a" << 1001), Timestamp(0)}, 0));
+ ASSERT_OK(storage->insertDocument(
+ opCtx.get(), nssDbAAb, {BSON("_id" << 0 << "a" << 1001), Timestamp(0)}, 0));
+
+ auto swSizeofDbA = storage->getCollectionSize(opCtx.get(), nssDbA);
+ ASSERT_OK(swSizeofDbA.getStatus());
+ sizeOfDbA = swSizeofDbA.getValue();
+ }
auto listDatabasesReply =
"{ok:1, databases:[{name:'" + _tenantDbA + "'}, {name:'" + _tenantDbAAB + "'}]}";
_mockServer->setCommandReply("listDatabases", fromjson(listDatabasesReply));
_mockServer->setCommandReply("find", createFindResponse());
+ _mockServer->setCommandReply("dbStats", fromjson("{ok:1, dataSize: 30}"));
TenantMigrationSharedData resumingSharedData(&_clock, _migrationId, /*resuming=*/true);
auto cloner = makeAllDatabaseCloner(&resumingSharedData);
- cloner->setStopAfterStage_forTest("listExistingDatabases");
+ cloner->setStopAfterStage_forTest("updateStatsStage");
ASSERT_OK(cloner->run());
@@ -393,23 +412,42 @@ TEST_F(TenantAllDatabaseClonerTest, ResumingFromLastClonedDb) {
ASSERT_EQUALS(1u, databases.size());
ASSERT_EQUALS(_tenantDbAAB, databases[0]);
- ASSERT_EQUALS(1, cloner->getStats().databasesClonedBeforeFailover);
+ auto stats = cloner->getStats();
+ ASSERT_EQUALS(1, stats.databasesClonedBeforeFailover);
+
+ ASSERT_EQUALS(sizeOfDbA, stats.approxTotalBytesCopied);
+ ASSERT_LESS_THAN(stats.approxTotalBytesCopied, stats.approxTotalDataSize);
}
TEST_F(TenantAllDatabaseClonerTest, LastClonedDbDeleted_AllGreater) {
// Test that we correctly resume from next database compared greater than the last cloned
// database if the last cloned database is dropped. This tests the case when all databases in
// the latest listDatabases result are compared greater than the last cloned database.
- ASSERT_OK(createCollection(NamespaceString(_tenantDbA, "coll"), CollectionOptions()));
+ auto nssDbA = NamespaceString(_tenantDbA, "coll");
+ ASSERT_OK(createCollection(nssDbA, CollectionOptions()));
+
+ long long size = 0;
+ {
+ // Insert document into the collection.
+ auto storage = StorageInterface::get(serviceContext);
+ auto opCtx = cc().makeOperationContext();
+
+ ASSERT_OK(storage->insertDocument(
+ opCtx.get(), nssDbA, {BSON("_id" << 0 << "a_field" << 1001), Timestamp(0)}, 0));
+ auto swSize = storage->getCollectionSize(opCtx.get(), nssDbA);
+ ASSERT_OK(swSize.getStatus());
+ size = swSize.getValue();
+ }
auto listDatabasesReply =
"{ok:1, databases:[{name:'" + _tenantDbAAB + "'}, {name:'" + _tenantDbABC + "'}]}";
_mockServer->setCommandReply("listDatabases", fromjson(listDatabasesReply));
_mockServer->setCommandReply("find", createFindResponse());
+ _mockServer->setCommandReply("dbStats", fromjson("{ok:1, dataSize: 30}"));
TenantMigrationSharedData resumingSharedData(&_clock, _migrationId, /*resuming=*/true);
auto cloner = makeAllDatabaseCloner(&resumingSharedData);
- cloner->setStopAfterStage_forTest("listExistingDatabases");
+ cloner->setStopAfterStage_forTest("updateStatsStage");
ASSERT_OK(cloner->run());
@@ -418,7 +456,10 @@ TEST_F(TenantAllDatabaseClonerTest, LastClonedDbDeleted_AllGreater) {
ASSERT_EQUALS(_tenantDbAAB, databases[0]);
ASSERT_EQUALS(_tenantDbABC, databases[1]);
- ASSERT_EQUALS(1, cloner->getStats().databasesClonedBeforeFailover);
+ auto stats = cloner->getStats();
+ ASSERT_EQUALS(1, stats.databasesClonedBeforeFailover);
+ ASSERT_EQUALS(size, stats.approxTotalBytesCopied);
+ ASSERT_LESS_THAN(stats.approxTotalBytesCopied, stats.approxTotalDataSize);
}
TEST_F(TenantAllDatabaseClonerTest, LastClonedDbDeleted_SomeGreater) {
@@ -426,17 +467,44 @@ TEST_F(TenantAllDatabaseClonerTest, LastClonedDbDeleted_SomeGreater) {
// database if the last cloned database is dropped. This tests the case when some but not all
// databases in the latest listDatabases result are compared greater than the last cloned
// database.
- ASSERT_OK(createCollection(NamespaceString(_tenantDbA, "coll"), CollectionOptions()));
- ASSERT_OK(createCollection(NamespaceString(_tenantDbAAB, "coll"), CollectionOptions()));
+ auto nssDbA = NamespaceString(_tenantDbA, "coll");
+ auto nssDbAAb = NamespaceString(_tenantDbAAB, "coll");
+ ASSERT_OK(createCollection(nssDbA, CollectionOptions()));
+ ASSERT_OK(createCollection(nssDbAAb, CollectionOptions()));
+
+ long long size = 0;
+ {
+ // Insert some documents into both collections.
+ auto storage = StorageInterface::get(serviceContext);
+ auto opCtx = cc().makeOperationContext();
+
+ ASSERT_OK(storage->insertDocument(
+ opCtx.get(), nssDbA, {BSON("_id" << 0 << "a" << 1001), Timestamp(0)}, 0));
+ ASSERT_OK(storage->insertDocument(opCtx.get(),
+ nssDbAAb,
+ {BSON("_id" << 0 << "a"
+ << "hello"),
+ Timestamp(0)},
+ 0));
+
+ auto swSizeDbA = storage->getCollectionSize(opCtx.get(), nssDbA);
+ ASSERT_OK(swSizeDbA.getStatus());
+ size = swSizeDbA.getValue();
+
+ auto swSizeDbAAb = storage->getCollectionSize(opCtx.get(), nssDbAAb);
+ ASSERT_OK(swSizeDbAAb.getStatus());
+ size += swSizeDbAAb.getValue();
+ }
auto listDatabasesReply =
"{ok:1, databases:[{name:'" + _tenantDbA + "'}, {name:'" + _tenantDbABC + "'}]}";
_mockServer->setCommandReply("listDatabases", fromjson(listDatabasesReply));
_mockServer->setCommandReply("find", createFindResponse());
+ _mockServer->setCommandReply("dbStats", fromjson("{ok:1, dataSize: 30}"));
TenantMigrationSharedData resumingSharedData(&_clock, _migrationId, /*resuming=*/true);
auto cloner = makeAllDatabaseCloner(&resumingSharedData);
- cloner->setStopAfterStage_forTest("listExistingDatabases");
+ cloner->setStopAfterStage_forTest("updateStatsStage");
ASSERT_OK(cloner->run());
@@ -444,25 +512,67 @@ TEST_F(TenantAllDatabaseClonerTest, LastClonedDbDeleted_SomeGreater) {
ASSERT_EQUALS(1u, databases.size());
ASSERT_EQUALS(_tenantDbABC, databases[0]);
+ auto stats = cloner->getStats();
ASSERT_EQUALS(2, cloner->getStats().databasesClonedBeforeFailover);
+ ASSERT_EQUALS(size, stats.approxTotalBytesCopied);
+ ASSERT_LESS_THAN(stats.approxTotalBytesCopied, stats.approxTotalDataSize);
}
TEST_F(TenantAllDatabaseClonerTest, LastClonedDbDeleted_AllLess) {
// Test that we correctly resume from next database compared greater than the last cloned
// database if the last cloned database is dropped. This tests the case when all databases in
// the latest listDatabases result are compared less than the last cloned database.
- ASSERT_OK(createCollection(NamespaceString(_tenantDbA, "coll"), CollectionOptions()));
- ASSERT_OK(createCollection(NamespaceString(_tenantDbAAB, "coll"), CollectionOptions()));
- ASSERT_OK(createCollection(NamespaceString(_tenantDbABC, "coll"), CollectionOptions()));
+ auto nssDbA = NamespaceString(_tenantDbA, "coll");
+ auto nssDbAAb = NamespaceString(_tenantDbAAB, "coll");
+ auto nssDbABC = NamespaceString(_tenantDbABC, "coll");
+
+ ASSERT_OK(createCollection(nssDbA, CollectionOptions()));
+ ASSERT_OK(createCollection(nssDbAAb, CollectionOptions()));
+ ASSERT_OK(createCollection(nssDbABC, CollectionOptions()));
+
+ long long size = 0;
+ {
+ // Insert some documents into all three collections.
+ auto storage = StorageInterface::get(serviceContext);
+ auto opCtx = cc().makeOperationContext();
+
+ ASSERT_OK(storage->insertDocument(
+ opCtx.get(), nssDbA, {BSON("_id" << 0 << "a" << 1001), Timestamp(0)}, 0));
+ ASSERT_OK(storage->insertDocument(opCtx.get(),
+ nssDbAAb,
+ {BSON("_id" << 0 << "a"
+ << "hello"),
+ Timestamp(0)},
+ 0));
+ ASSERT_OK(storage->insertDocument(opCtx.get(),
+ nssDbABC,
+ {BSON("_id" << 0 << "a"
+ << "goodbye"),
+ Timestamp(0)},
+ 0));
+
+ auto swSizeDbA = storage->getCollectionSize(opCtx.get(), nssDbA);
+ ASSERT_OK(swSizeDbA.getStatus());
+ size = swSizeDbA.getValue();
+
+ auto swSizeDbAAb = storage->getCollectionSize(opCtx.get(), nssDbAAb);
+ ASSERT_OK(swSizeDbAAb.getStatus());
+ size += swSizeDbAAb.getValue();
+
+ auto swSizeDbABC = storage->getCollectionSize(opCtx.get(), nssDbABC);
+ ASSERT_OK(swSizeDbABC.getStatus());
+ size += swSizeDbABC.getValue();
+ }
auto listDatabasesReply =
"{ok:1, databases:[{name:'" + _tenantDbA + "'}, {name:'" + _tenantDbAAB + "'}]}";
_mockServer->setCommandReply("listDatabases", fromjson(listDatabasesReply));
_mockServer->setCommandReply("find", createFindResponse());
+ _mockServer->setCommandReply("dbStats", fromjson("{ok:1, dataSize: 30}"));
TenantMigrationSharedData resumingSharedData(&_clock, _migrationId, /*resuming=*/true);
auto cloner = makeAllDatabaseCloner(&resumingSharedData);
- cloner->setStopAfterStage_forTest("listExistingDatabases");
+ cloner->setStopAfterStage_forTest("updateStatsStage");
ASSERT_OK(cloner->run());
@@ -470,7 +580,10 @@ TEST_F(TenantAllDatabaseClonerTest, LastClonedDbDeleted_AllLess) {
auto databases = getDatabasesFromCloner(cloner.get());
ASSERT_EQUALS(0u, databases.size());
- ASSERT_EQUALS(3, cloner->getStats().databasesClonedBeforeFailover);
+ auto stats = cloner->getStats();
+ ASSERT_EQUALS(3, stats.databasesClonedBeforeFailover);
+ ASSERT_EQUALS(size, stats.approxTotalBytesCopied);
+ ASSERT_EQUALS(stats.approxTotalBytesCopied, stats.approxTotalDataSize);
}
} // namespace repl
diff --git a/src/mongo/db/repl/tenant_database_cloner.cpp b/src/mongo/db/repl/tenant_database_cloner.cpp
index a955b15e973..5258e335151 100644
--- a/src/mongo/db/repl/tenant_database_cloner.cpp
+++ b/src/mongo/db/repl/tenant_database_cloner.cpp
@@ -178,6 +178,9 @@ BaseCloner::AfterStageBehavior TenantDatabaseCloner::listExistingCollectionsStag
tenantMigrationRecipientInfo(opCtx.get()) =
boost::make_optional<TenantMigrationRecipientInfo>(getSharedData()->getMigrationId());
+ long long sizeOfCurrCollOnDisk = 0;
+ long long approxTotalDBSizeOnDisk = 0;
+
std::vector<UUID> clonedCollectionUUIDs;
auto collectionInfos =
client.getCollectionInfos(_dbName, ListCollectionsFilter::makeTypeCollectionFilter());
@@ -204,6 +207,22 @@ BaseCloner::AfterStageBehavior TenantDatabaseCloner::listExistingCollectionsStag
continue;
}
clonedCollectionUUIDs.emplace_back(result.getInfo().getUuid());
+
+ BSONObj res;
+ client.runCommand(_dbName, BSON("collStats" << result.getName()), res);
+ if (auto status = getStatusFromCommandResult(res); !status.isOK()) {
+ LOGV2_WARNING(5522901,
+ "Skipping recording of data size metrics for database due to failure "
+ "in the 'collStats' command, tenant migration stats may be inaccurate.",
+ "db"_attr = _dbName,
+ "coll"_attr = result.getName(),
+ "migrationId"_attr = getSharedData()->getMigrationId(),
+ "tenantId"_attr = _tenantId,
+ "status"_attr = status);
+ } else {
+ sizeOfCurrCollOnDisk = res.getField("size").safeNumberLong();
+ approxTotalDBSizeOnDisk += sizeOfCurrCollOnDisk;
+ }
}
if (!getSharedData()->isResuming()) {
@@ -228,8 +247,14 @@ BaseCloner::AfterStageBehavior TenantDatabaseCloner::listExistingCollectionsStag
if (startingCollection != _collections.end() &&
startingCollection->second.uuid == lastClonedCollectionUUID) {
_stats.clonedCollectionsBeforeFailover = clonedCollectionUUIDs.size() - 1;
+
+ // When the last collection is partially cloned, we exclude it from the total size
+ // on disk, as the partially cloned collections stats will be added by the cloner
+ // on demand.
+ _stats.approxTotalBytesCopied = approxTotalDBSizeOnDisk - sizeOfCurrCollOnDisk;
} else {
_stats.clonedCollectionsBeforeFailover = clonedCollectionUUIDs.size();
+ _stats.approxTotalBytesCopied = approxTotalDBSizeOnDisk;
}
}
_collections.erase(_collections.begin(), startingCollection);
diff --git a/src/mongo/db/repl/tenant_database_cloner_test.cpp b/src/mongo/db/repl/tenant_database_cloner_test.cpp
index e72e393d6a7..01540fe3d57 100644
--- a/src/mongo/db/repl/tenant_database_cloner_test.cpp
+++ b/src/mongo/db/repl/tenant_database_cloner_test.cpp
@@ -666,11 +666,33 @@ TEST_F(TenantDatabaseClonerTest, ResumingFromLastClonedCollection) {
uuid.push_back(UUID::gen());
std::sort(uuid.begin(), uuid.end());
+ auto aNss = NamespaceString(_dbName, "a");
+ auto bNss = NamespaceString(_dbName, "b");
CollectionOptions options;
options.uuid = uuid[0];
- ASSERT_OK(createCollection(NamespaceString(_dbName, "a"), options));
+ ASSERT_OK(createCollection(aNss, options));
options.uuid = uuid[1];
- ASSERT_OK(createCollection(NamespaceString(_dbName, "b"), options));
+ ASSERT_OK(createCollection(bNss, options));
+
+ long long sizeOfOneCollection = 0;
+ {
+ // Insert documents into collections.
+ auto storage = StorageInterface::get(serviceContext);
+ auto opCtx = cc().makeOperationContext();
+
+ ASSERT_OK(storage->insertDocument(
+ opCtx.get(), aNss, {BSON("_id" << 0 << "a" << 1001), Timestamp(0)}, 0));
+ ASSERT_OK(storage->insertDocument(opCtx.get(),
+ bNss,
+ {BSON("_id" << 0 << "a"
+ << "hello"),
+ Timestamp(0)},
+ 0));
+
+ auto swSize = storage->getCollectionSize(opCtx.get(), aNss);
+ ASSERT_OK(swSize.getStatus());
+ sizeOfOneCollection = swSize.getValue();
+ }
TenantMigrationSharedData resumingSharedData(&_clock, _migrationId, /*resuming=*/true);
auto cloner = makeDatabaseCloner(&resumingSharedData);
@@ -701,7 +723,9 @@ TEST_F(TenantDatabaseClonerTest, ResumingFromLastClonedCollection) {
ASSERT_EQ(NamespaceString(_dbName, "b"), collections[0].first);
ASSERT_BSONOBJ_EQ(BSON("uuid" << uuid[1]), collections[0].second.toBSON());
- ASSERT_EQUALS(1, cloner->getStats().clonedCollectionsBeforeFailover);
+ auto stats = cloner->getStats();
+ ASSERT_EQUALS(1, stats.clonedCollectionsBeforeFailover);
+ ASSERT_EQUALS(sizeOfOneCollection, stats.approxTotalBytesCopied);
}
TEST_F(TenantDatabaseClonerTest, LastClonedCollectionDeleted_AllGreater) {
@@ -715,9 +739,24 @@ TEST_F(TenantDatabaseClonerTest, LastClonedCollectionDeleted_AllGreater) {
uuid.push_back(UUID::gen());
std::sort(uuid.begin(), uuid.end());
+ auto aNss = NamespaceString(_dbName, "a");
CollectionOptions options;
options.uuid = uuid[0];
- ASSERT_OK(createCollection(NamespaceString(_dbName, "a"), options));
+ ASSERT_OK(createCollection(aNss, options));
+
+ long long sizeANss = 0;
+ {
+ // Insert documents into collections.
+ auto storage = StorageInterface::get(serviceContext);
+ auto opCtx = cc().makeOperationContext();
+
+ ASSERT_OK(storage->insertDocument(
+ opCtx.get(), aNss, {BSON("_id" << 0 << "a" << 1001), Timestamp(0)}, 0));
+
+ auto swSize = storage->getCollectionSize(opCtx.get(), aNss);
+ ASSERT_OK(swSize.getStatus());
+ sizeANss = swSize.getValue();
+ }
TenantMigrationSharedData resumingSharedData(&_clock, _migrationId, /*resuming=*/true);
auto cloner = makeDatabaseCloner(&resumingSharedData);
@@ -750,7 +789,9 @@ TEST_F(TenantDatabaseClonerTest, LastClonedCollectionDeleted_AllGreater) {
ASSERT_EQ(NamespaceString(_dbName, "c"), collections[1].first);
ASSERT_BSONOBJ_EQ(BSON("uuid" << uuid[2]), collections[1].second.toBSON());
- ASSERT_EQUALS(1, cloner->getStats().clonedCollectionsBeforeFailover);
+ auto stats = cloner->getStats();
+ ASSERT_EQUALS(1, stats.clonedCollectionsBeforeFailover);
+ ASSERT_EQUALS(sizeANss, stats.approxTotalBytesCopied);
}
TEST_F(TenantDatabaseClonerTest, LastClonedCollectionDeleted_SomeGreater) {
@@ -764,11 +805,37 @@ TEST_F(TenantDatabaseClonerTest, LastClonedCollectionDeleted_SomeGreater) {
uuid.push_back(UUID::gen());
std::sort(uuid.begin(), uuid.end());
+ auto aNss = NamespaceString(_dbName, "a");
+ auto bNss = NamespaceString(_dbName, "b");
CollectionOptions options;
options.uuid = uuid[0];
- ASSERT_OK(createCollection(NamespaceString(_dbName, "a"), options));
+ ASSERT_OK(createCollection(aNss, options));
options.uuid = uuid[1];
- ASSERT_OK(createCollection(NamespaceString(_dbName, "b"), options));
+ ASSERT_OK(createCollection(bNss, options));
+
+ long long ANssBNssSize = 0;
+ {
+ // Insert some documents into both collections.
+ auto storage = StorageInterface::get(serviceContext);
+ auto opCtx = cc().makeOperationContext();
+
+ ASSERT_OK(storage->insertDocument(
+ opCtx.get(), aNss, {BSON("_id" << 0 << "a" << 1001), Timestamp(0)}, 0));
+ ASSERT_OK(storage->insertDocument(opCtx.get(),
+ bNss,
+ {BSON("_id" << 0 << "a"
+ << "hello"),
+ Timestamp(0)},
+ 0));
+
+ auto swSizeANss = storage->getCollectionSize(opCtx.get(), aNss);
+ ASSERT_OK(swSizeANss.getStatus());
+ ANssBNssSize = swSizeANss.getValue();
+
+ auto swSizeBNss = storage->getCollectionSize(opCtx.get(), bNss);
+ ASSERT_OK(swSizeBNss.getStatus());
+ ANssBNssSize += swSizeBNss.getValue();
+ }
TenantMigrationSharedData resumingSharedData(&_clock, _migrationId, /*resuming=*/true);
auto cloner = makeDatabaseCloner(&resumingSharedData);
@@ -799,7 +866,9 @@ TEST_F(TenantDatabaseClonerTest, LastClonedCollectionDeleted_SomeGreater) {
ASSERT_EQ(NamespaceString(_dbName, "c"), collections[0].first);
ASSERT_BSONOBJ_EQ(BSON("uuid" << uuid[2]), collections[0].second.toBSON());
- ASSERT_EQUALS(2, cloner->getStats().clonedCollectionsBeforeFailover);
+ auto stats = cloner->getStats();
+ ASSERT_EQUALS(2, stats.clonedCollectionsBeforeFailover);
+ ASSERT_EQUALS(ANssBNssSize, stats.approxTotalBytesCopied);
}
TEST_F(TenantDatabaseClonerTest, LastClonedCollectionDeleted_AllLess) {
@@ -813,13 +882,50 @@ TEST_F(TenantDatabaseClonerTest, LastClonedCollectionDeleted_AllLess) {
uuid.push_back(UUID::gen());
std::sort(uuid.begin(), uuid.end());
+ auto aNss = NamespaceString(_dbName, "a");
+ auto bNss = NamespaceString(_dbName, "b");
+ auto cNss = NamespaceString(_dbName, "c");
CollectionOptions options;
options.uuid = uuid[0];
- ASSERT_OK(createCollection(NamespaceString(_dbName, "a"), options));
+ ASSERT_OK(createCollection(aNss, options));
options.uuid = uuid[1];
- ASSERT_OK(createCollection(NamespaceString(_dbName, "b"), options));
+ ASSERT_OK(createCollection(bNss, options));
options.uuid = uuid[2];
- ASSERT_OK(createCollection(NamespaceString(_dbName, "c"), options));
+ ASSERT_OK(createCollection(cNss, options));
+
+ long long sizeOfAllColls = 0;
+ {
+ // Insert some documents into all three collections.
+ auto storage = StorageInterface::get(serviceContext);
+ auto opCtx = cc().makeOperationContext();
+
+ ASSERT_OK(storage->insertDocument(
+ opCtx.get(), aNss, {BSON("_id" << 0 << "a" << 1001), Timestamp(0)}, 0));
+ ASSERT_OK(storage->insertDocument(opCtx.get(),
+ bNss,
+ {BSON("_id" << 0 << "a"
+ << "hello"),
+ Timestamp(0)},
+ 0));
+ ASSERT_OK(storage->insertDocument(opCtx.get(),
+ cNss,
+ {BSON("_id" << 0 << "a"
+ << "goodbye"),
+ Timestamp(0)},
+ 0));
+
+ auto swSizeANss = storage->getCollectionSize(opCtx.get(), aNss);
+ ASSERT_OK(swSizeANss.getStatus());
+ sizeOfAllColls = swSizeANss.getValue();
+
+ auto swSizeBNss = storage->getCollectionSize(opCtx.get(), bNss);
+ ASSERT_OK(swSizeBNss.getStatus());
+ sizeOfAllColls += swSizeBNss.getValue();
+
+ auto swSizeCNss = storage->getCollectionSize(opCtx.get(), cNss);
+ ASSERT_OK(swSizeCNss.getStatus());
+ sizeOfAllColls += swSizeCNss.getValue();
+ }
TenantMigrationSharedData resumingSharedData(&_clock, _migrationId, /*resuming=*/true);
auto cloner = makeDatabaseCloner(&resumingSharedData);
@@ -849,7 +955,9 @@ TEST_F(TenantDatabaseClonerTest, LastClonedCollectionDeleted_AllLess) {
// Nothing to clone.
ASSERT_EQUALS(0U, collections.size());
- ASSERT_EQUALS(3, cloner->getStats().clonedCollectionsBeforeFailover);
+ auto stats = cloner->getStats();
+ ASSERT_EQUALS(3, stats.clonedCollectionsBeforeFailover);
+ ASSERT_EQUALS(sizeOfAllColls, stats.approxTotalBytesCopied);
}
} // namespace repl
diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.cpp b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
index a8fec293dcb..88faf60b191 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_service.cpp
+++ b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
@@ -343,6 +343,10 @@ boost::optional<BSONObj> TenantMigrationRecipientService::Instance::reportForCur
(stats.approxTotalBytesCopied + 1);
bob.append("remainingReceiveEstimatedMillis", timeRemainingMillis);
+
+ BSONObjBuilder dbsBuilder(bob.subobjStart("databases"));
+ _tenantAllDatabaseCloner->getStats().append(&dbsBuilder);
+ dbsBuilder.doneFast();
}
if (_stateDoc.getStartFetchingDonorOpTime())
@@ -370,12 +374,6 @@ boost::optional<BSONObj> TenantMigrationRecipientService::Instance::reportForCur
static_cast<long long>(_tenantOplogApplier->getNumOpsApplied()));
}
- if (_tenantAllDatabaseCloner) {
- BSONObjBuilder dbsBuilder(bob.subobjStart("databases"));
- _tenantAllDatabaseCloner->getStats().append(&dbsBuilder);
- dbsBuilder.doneFast();
- }
-
return bob.obj();
}