diff options
author | Suganthi Mani <38441312+smani87@users.noreply.github.com> | 2022-02-15 18:48:08 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-02-25 15:18:03 +0000 |
commit | 71d70bf5ebba88a8f51a20d660cb4d9c6532f35c (patch) | |
tree | 48eec8abe846ab67135ac9a11e92a4b06b235e13 | |
parent | 455957ee90136e70ec93df6bdf26b118cc42b5b2 (diff) | |
download | mongo-r5.3.0-rc2.tar.gz |
SERVER-63129 Tenant collection cloner resume should ignore “view already exists” errors while creating collections.r5.3.0-rc2
(cherry picked from commit e840bb65779035e3f5e7d1fb9b6951c291957a74)
9 files changed, 231 insertions, 55 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml index 5513d4b7af8..bbeb9f6b8d7 100644 --- a/etc/backports_required_for_multiversion_tests.yml +++ b/etc/backports_required_for_multiversion_tests.yml @@ -150,6 +150,8 @@ last-continuous: test_file: jstests/core/exhaust.js - ticket: SERVER-63141 test_file: jstests/aggregation/lookup_let_optimization.js + - ticket: SERVER-63129 + test_file: jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover_with_dropped_views.js # Tests that should only be excluded from particular suites should be listed under that suite. suites: @@ -439,6 +441,8 @@ last-lts: test_file: jstests/core/exhaust.js - ticket: SERVER-63141 test_file: jstests/aggregation/lookup_let_optimization.js + - ticket: SERVER-63129 + test_file: jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover_with_dropped_views.js # Tests that should only be excluded from particular suites should be listed under that suite. suites: diff --git a/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover_with_dropped_views.js b/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover_with_dropped_views.js new file mode 100644 index 00000000000..70c422dacf8 --- /dev/null +++ b/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover_with_dropped_views.js @@ -0,0 +1,136 @@ +/** + * Tests that in tenant migration, the collection recreated on a dropped view namespace is handled + * correctly on resuming the logical tenant collection cloning phase due to recipient failover. + * @tags: [ + * incompatible_with_eft, + * incompatible_with_macos, + * incompatible_with_shard_merge, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * ] + */ + +(function() { +"use strict"; + +const tenantMigrationFailoverTest = function(isTimeSeries, createCollFn) { + load("jstests/libs/fail_point_util.js"); + load("jstests/libs/uuid_util.js"); // for 'extractUUIDFromObject' + load("jstests/replsets/libs/tenant_migration_test.js"); + load("jstests/replsets/libs/tenant_migration_util.js"); + + const recipientRst = new ReplSetTest({ + nodes: 2, + name: jsTestName() + "_recipient", + nodeOptions: Object.assign(TenantMigrationUtil.makeX509OptionsForTest().recipient, { + setParameter: { + // Allow reads on recipient before migration completes for testing. + 'failpoint.tenantMigrationRecipientNotRejectReads': tojson({mode: 'alwaysOn'}), + } + }) + }); + + recipientRst.startSet(); + recipientRst.initiate(); + + const tenantMigrationTest = + new TenantMigrationTest({name: jsTestName(), recipientRst: recipientRst}); + + const donorRst = tenantMigrationTest.getDonorRst(); + const donorPrimary = donorRst.getPrimary(); + + const tenantId = "testTenantId"; + const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB"); + const donorDB = donorPrimary.getDB(dbName); + const collName = "testColl"; + const donorColl = donorDB[collName]; + + let getCollectionInfo = function(conn) { + return conn.getDB(dbName).getCollectionInfos().filter(coll => { + return coll.name === collName; + }); + }; + + // Create a timeseries collection or a regular view. + assert.commandWorked(createCollFn(donorDB, collName)); + donorRst.awaitReplication(); + + const migrationId = UUID(); + const migrationIdString = extractUUIDFromObject(migrationId); + const migrationOpts = { + migrationIdString: migrationIdString, + recipientConnString: tenantMigrationTest.getRecipientConnString(), + tenantId: tenantId, + }; + + const recipientPrimary = recipientRst.getPrimary(); + const recipientDb = recipientPrimary.getDB(dbName); + const recipientSystemViewsColl = recipientDb.getCollection("system.views"); + + // Configure a fail point to have the recipient primary hang after cloning + // "testTenantId_testDB.system.views" collection. + const hangDuringCollectionClone = + configureFailPoint(recipientPrimary, + "tenantMigrationHangCollectionClonerAfterHandlingBatchResponse", + {nss: recipientSystemViewsColl.getFullName()}); + + // Start the migration and wait for the migration to hang after cloning + // "testTenantId_testDB.system.views" collection. + assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts)); + hangDuringCollectionClone.wait(); + + assert.soon(() => recipientSystemViewsColl.find().itcount() >= 1); + recipientRst.awaitLastOpCommitted(); + const newRecipientPrimary = recipientRst.getSecondaries()[0]; + + // Verify that a view has been registered for "testTenantId_testDB.testColl" on the new + // recipient primary. + let collectionInfo = getCollectionInfo(newRecipientPrimary); + assert.eq(1, collectionInfo.length); + assert(collectionInfo[0].type === (isTimeSeries ? "timeseries" : "view"), + "data store type mismatch: " + tojson(collectionInfo[0])); + + // Drop the view and create a regular collection with the same namespace as the + // dropped view on donor. + assert(donorColl.drop()); + assert.commandWorked(donorDB.createCollection(collName)); + + // We need to skip TenantDatabaseCloner::listExistingCollectionsStage() to make sure + // the recipient always clone the above newly created regular collection after the failover. + // Currently, we restart cloning after a failover, only from the collection whose UUID is + // greater than or equal to the last collection we have on disk. + const skiplistExistingCollectionsStage = + configureFailPoint(newRecipientPrimary, "skiplistExistingCollectionsStage"); + + // Step up a new node in the recipient set and trigger a failover. + recipientRst.stepUp(newRecipientPrimary); + hangDuringCollectionClone.off(); + + // The migration should go through after recipient failover. + TenantMigrationTest.assertCommitted( + tenantMigrationTest.waitForMigrationToComplete(migrationOpts)); + + // Check that recipient has dropped the view and and re-created the regular collection as part + // of migration oplog catchup phase. + collectionInfo = getCollectionInfo(newRecipientPrimary); + assert.eq(1, collectionInfo.length); + assert(collectionInfo[0].type === "collection", + "data store type mismatch: " + tojson(collectionInfo[0])); + + tenantMigrationTest.stop(); + recipientRst.stopSet(); +}; + +jsTestLog("Running tenant migration test for time-series collection"); +// Creating a timeseries collection, implicity creates a view on the 'collName' collection +// namespace. +tenantMigrationFailoverTest(true, + (db, collName) => db.createCollection( + collName, {timeseries: {timeField: "time", metaField: "bucket"}})); + +jsTestLog("Running tenant migration test for regular view"); +tenantMigrationFailoverTest(false, + (db, collName) => db.createView(collName, "sourceCollection", [])); +})(); diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript index 17a173e2a51..53ebd0ee51e 100644 --- a/src/mongo/db/catalog/SConscript +++ b/src/mongo/db/catalog/SConscript @@ -309,6 +309,7 @@ env.Library( ], LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/db/concurrency/lock_manager', + '$BUILD_DIR/mongo/db/views/views', 'collection', 'collection_catalog', ], @@ -499,6 +500,9 @@ env.Library( 'rename_collection.cpp', 'list_indexes.cpp', ], + LIBDEPS=[ + 'collection_catalog_helper', + ], LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/base', '$BUILD_DIR/mongo/db/db_raii', diff --git a/src/mongo/db/catalog/collection_catalog_helper.cpp b/src/mongo/db/catalog/collection_catalog_helper.cpp index 50c78212e58..6c7dd9e19bc 100644 --- a/src/mongo/db/catalog/collection_catalog_helper.cpp +++ b/src/mongo/db/catalog/collection_catalog_helper.cpp @@ -31,6 +31,7 @@ #include "mongo/db/catalog/collection.h" #include "mongo/db/catalog/collection_catalog.h" #include "mongo/db/concurrency/d_concurrency.h" +#include "mongo/db/views/view_catalog.h" namespace mongo { @@ -38,6 +39,26 @@ MONGO_FAIL_POINT_DEFINE(hangBeforeGettingNextCollection); namespace catalog { +Status checkIfNamespaceExists(OperationContext* opCtx, const NamespaceString& nss) { + if (CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, nss)) { + return Status(ErrorCodes::NamespaceExists, + str::stream() << "Collection " << nss.ns() << " already exists."); + } + + auto view = ViewCatalog::get(opCtx)->lookup(opCtx, nss); + if (!view) + return Status::OK(); + + if (view->timeseries()) { + return Status(ErrorCodes::NamespaceExists, + str::stream() << "A timeseries collection already exists. NS: " << nss); + } + + return Status(ErrorCodes::NamespaceExists, + str::stream() << "A view already exists. NS: " << nss); +} + + void forEachCollectionFromDb(OperationContext* opCtx, const TenantDatabaseName& tenantDbName, LockMode collLockMode, diff --git a/src/mongo/db/catalog/collection_catalog_helper.h b/src/mongo/db/catalog/collection_catalog_helper.h index f45b0f54a45..60751e6b537 100644 --- a/src/mongo/db/catalog/collection_catalog_helper.h +++ b/src/mongo/db/catalog/collection_catalog_helper.h @@ -42,6 +42,15 @@ class CollectionCatalogEntry; namespace catalog { /** + * Returns ErrorCodes::NamespaceExists if a collection or any type of views exists on the given + * namespace 'nss'. Otherwise returns Status::OK(). + * + * Note: If the caller calls this method without locking the collection, then the returned result + * could be stale right after this call. + */ +Status checkIfNamespaceExists(OperationContext* opCtx, const NamespaceString& nss); + +/** * Iterates through all the collections in the given database and runs the callback function on each * collection. If a predicate is provided, then the callback will only be executed against the * collections that satisfy the predicate. diff --git a/src/mongo/db/catalog/create_collection.cpp b/src/mongo/db/catalog/create_collection.cpp index 89357d9280b..ae9d3d4fa04 100644 --- a/src/mongo/db/catalog/create_collection.cpp +++ b/src/mongo/db/catalog/create_collection.cpp @@ -39,6 +39,7 @@ #include "mongo/bson/json.h" #include "mongo/db/catalog/clustered_collection_util.h" #include "mongo/db/catalog/collection_catalog.h" +#include "mongo/db/catalog/collection_catalog_helper.h" #include "mongo/db/catalog/database_holder.h" #include "mongo/db/catalog/index_key_validate.h" #include "mongo/db/commands.h" @@ -278,26 +279,16 @@ Status _createTimeseries(OperationContext* opCtx, writeConflictRetry(opCtx, "createBucketCollection", bucketsNs.ns(), [&]() -> Status { AutoGetDb autoDb(opCtx, bucketsNs.db(), MODE_IX); Lock::CollectionLock bucketsCollLock(opCtx, bucketsNs, MODE_IX); + auto db = autoDb.ensureDbExists(opCtx); // Check if there already exist a Collection on the namespace we will later create a // view on. We're not holding a Collection lock for this Collection so we may only check // if the pointer is null or not. The answer may also change at any point after this // call which is fine as we properly handle an orphaned bucket collection. This check is // just here to prevent it from being created in the common case. - if (CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, ns)) { - return Status(ErrorCodes::NamespaceExists, - str::stream() << "Collection already exists. NS: " << ns); - } - - auto db = autoDb.ensureDbExists(opCtx); - if (auto view = ViewCatalog::get(opCtx)->lookup(opCtx, ns); view) { - if (view->timeseries()) { - return Status(ErrorCodes::NamespaceExists, - str::stream() - << "A timeseries collection already exists. NS: " << ns); - } - return Status(ErrorCodes::NamespaceExists, - str::stream() << "A view already exists. NS: " << ns); + Status status = catalog::checkIfNamespaceExists(opCtx, ns); + if (!status.isOK()) { + return status; } if (opCtx->writesAreReplicated() && @@ -369,23 +360,14 @@ Status _createTimeseries(OperationContext* opCtx, opCtx, NamespaceString(ns.db(), NamespaceString::kSystemDotViewsCollectionName), MODE_X); + auto db = autoColl.ensureDbExists(opCtx); // This is a top-level handler for time-series creation name conflicts. New commands coming // in, or commands that generated a WriteConflict must return a NamespaceExists error here // on conflict. - if (CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, ns)) { - return Status(ErrorCodes::NamespaceExists, - str::stream() << "Collection already exists. NS: " << ns); - } - - auto db = autoColl.ensureDbExists(opCtx); - if (auto view = ViewCatalog::get(opCtx)->lookup(opCtx, ns)) { - if (view->timeseries()) { - return {ErrorCodes::NamespaceExists, - str::stream() << "A timeseries collection already exists. NS: " << ns}; - } - return {ErrorCodes::NamespaceExists, - str::stream() << "A view already exists. NS: " << ns}; + Status status = catalog::checkIfNamespaceExists(opCtx, ns); + if (!status.isOK()) { + return status; } if (opCtx->writesAreReplicated() && @@ -430,7 +412,7 @@ Status _createTimeseries(OperationContext* opCtx, viewOptions.pipeline = timeseries::generateViewPipeline(*options.timeseries, asArray); // Create the time-series view. - auto status = db->userCreateNS(opCtx, ns, viewOptions); + status = db->userCreateNS(opCtx, ns, viewOptions); if (!status.isOK()) { return status.withContext(str::stream() << "Failed to create view on " << bucketsNs << " for time-series collection " << ns @@ -451,23 +433,14 @@ Status _createCollection(OperationContext* opCtx, return writeConflictRetry(opCtx, "create", nss.ns(), [&] { AutoGetDb autoDb(opCtx, nss.db(), MODE_IX); Lock::CollectionLock collLock(opCtx, nss, MODE_IX); + auto db = autoDb.ensureDbExists(opCtx); + // This is a top-level handler for collection creation name conflicts. New commands coming // in, or commands that generated a WriteConflict must return a NamespaceExists error here // on conflict. - if (CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, nss)) { - return Status(ErrorCodes::NamespaceExists, - str::stream() << "Collection already exists. NS: " << nss); - } - - auto db = autoDb.ensureDbExists(opCtx); - if (auto view = ViewCatalog::get(opCtx)->lookup(opCtx, nss); view) { - if (view->timeseries()) { - return Status(ErrorCodes::NamespaceExists, - str::stream() - << "A timeseries collection already exists. NS: " << nss); - } - return Status(ErrorCodes::NamespaceExists, - str::stream() << "A view already exists. NS: " << nss); + Status status = catalog::checkIfNamespaceExists(opCtx, nss); + if (!status.isOK()) { + return status; } // If the FCV has changed while executing the command to the version, where the feature flag @@ -544,7 +517,6 @@ Status _createCollection(OperationContext* opCtx, // Even though 'collectionOptions' is passed by rvalue reference, it is not safe to move // because 'userCreateNS' may throw a WriteConflictException. - Status status = Status::OK(); if (idIndex == boost::none || collectionOptions.clusteredIndex) { status = db->userCreateNS(opCtx, nss, collectionOptions, /*createIdIndex=*/false); } else { diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp index cc2b0c251c1..2125711a336 100644 --- a/src/mongo/db/repl/storage_interface_impl.cpp +++ b/src/mongo/db/repl/storage_interface_impl.cpp @@ -47,6 +47,7 @@ #include "mongo/db/catalog/coll_mod.h" #include "mongo/db/catalog/collection.h" #include "mongo/db/catalog/collection_catalog.h" +#include "mongo/db/catalog/collection_catalog_helper.h" #include "mongo/db/catalog/database_holder.h" #include "mongo/db/catalog/document_validation.h" #include "mongo/db/catalog/index_catalog.h" @@ -481,19 +482,28 @@ Status StorageInterfaceImpl::createCollection(OperationContext* opCtx, AutoGetDb databaseWriteGuard(opCtx, nss.db(), MODE_IX); auto db = databaseWriteGuard.ensureDbExists(opCtx); invariant(db); - if (CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, nss)) { - return Status(ErrorCodes::NamespaceExists, - str::stream() << "Collection " << nss.ns() << " already exists."); + + // Check if there already exist a Collection/view on the given namespace 'nss'. The answer + // may change at any point after this call as we make this call without holding the + // collection lock. But, it is fine as we properly handle while registering the uncommitted + // collection with CollectionCatalog. This check is just here to prevent it from being + // created in the common case. + Status status = mongo::catalog::checkIfNamespaceExists(opCtx, nss); + if (!status.isOK()) { + return status; } + Lock::CollectionLock lk(opCtx, nss, MODE_IX); WriteUnitOfWork wuow(opCtx); try { auto coll = db->createCollection(opCtx, nss, options, createIdIndex, idIndexSpec); invariant(coll); + + // This commit call can throw if a view already exists while registering the collection. + wuow.commit(); } catch (const AssertionException& ex) { return ex.toStatus(); } - wuow.commit(); return Status::OK(); }); diff --git a/src/mongo/db/repl/tenant_collection_cloner.cpp b/src/mongo/db/repl/tenant_collection_cloner.cpp index 4e391644b5a..d74b0760cd4 100644 --- a/src/mongo/db/repl/tenant_collection_cloner.cpp +++ b/src/mongo/db/repl/tenant_collection_cloner.cpp @@ -394,17 +394,24 @@ BaseCloner::AfterStageBehavior TenantCollectionCloner::createCollectionStage() { !_idIndexSpec.isEmpty() /* createIdIndex */, _idIndexSpec); if (status == ErrorCodes::NamespaceExists && getSharedData()->isResuming()) { - // If we are resuming from a recipient failover and we have a collection on disk with - // the same namespace but a different uuid, it means this collection must have been - // dropped and re-created under a different uuid on the donor during the recipient - // failover. And the drop and the re-create will be covered by the oplog application - // phase. + // If we are resuming from a recipient failover we can get ErrorCodes::NamespaceExists + // due to following conditions: + // + // 1) We have a collection on disk with the same namespace but a different uuid. It + // means this collection must have been dropped and re-created under a different uuid on + // the donor during the recipient failover. And the drop and the re-create will be + // covered by the oplog application phase. + // + // 2) We have a [time series] view on disk with the same namespace. It means the view + // must have dropped and created a regular collection with the namespace same as the + // dropped view during the recipient failover. The drop view and create collection + // will be covered by the oplog application phase. LOGV2(5767200, - "TenantCollectionCloner found same namespace with different uuid locally on " - "resume, skipping cloning this collection.", + "Tenant collection cloner: Skipping cloning this collection.", "namespace"_attr = getSourceNss(), "migrationId"_attr = getSharedData()->getMigrationId(), - "tenantId"_attr = getTenantId()); + "tenantId"_attr = getTenantId(), + "error"_attr = status); return kSkipRemainingStages; } uassertStatusOKWithContext(status, "Tenant collection cloner: create collection"); diff --git a/src/mongo/db/repl/tenant_database_cloner.cpp b/src/mongo/db/repl/tenant_database_cloner.cpp index 7c7e93062a5..0cde8bfa265 100644 --- a/src/mongo/db/repl/tenant_database_cloner.cpp +++ b/src/mongo/db/repl/tenant_database_cloner.cpp @@ -49,6 +49,10 @@ namespace repl { // Failpoint which the tenant database cloner to hang after it has successully run listCollections // and recorded the results and the operationTime. MONGO_FAIL_POINT_DEFINE(tenantDatabaseClonerHangAfterGettingOperationTime); +// Failpoint to skip comparing the list of collections that are already cloned, instead it will +// resume the cloning from the beginning of the list, that's provided by +// TenantDatabaseCloner::listCollectionsStage. +MONGO_FAIL_POINT_DEFINE(skiplistExistingCollectionsStage); TenantDatabaseCloner::TenantDatabaseCloner(const std::string& dbName, TenantMigrationSharedData* sharedData, @@ -173,6 +177,15 @@ BaseCloner::AfterStageBehavior TenantDatabaseCloner::listCollectionsStage() { } BaseCloner::AfterStageBehavior TenantDatabaseCloner::listExistingCollectionsStage() { + if (MONGO_unlikely(skiplistExistingCollectionsStage.shouldFail())) { + LOGV2(6312900, + "skiplistExistingCollectionsStage failpoint is enabled. " + "Tenant DatabaseCloner resumes cloning", + "migrationId"_attr = getSharedData()->getMigrationId(), + "tenantId"_attr = _tenantId, + "resumeFrom"_attr = _collections.front().first); + return kContinueNormally; + } auto opCtx = cc().makeOperationContext(); DBDirectClient client(opCtx.get()); tenantMigrationRecipientInfo(opCtx.get()) = |