summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSuganthi Mani <38441312+smani87@users.noreply.github.com>2022-02-15 18:48:08 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-02-25 15:18:03 +0000
commit71d70bf5ebba88a8f51a20d660cb4d9c6532f35c (patch)
tree48eec8abe846ab67135ac9a11e92a4b06b235e13
parent455957ee90136e70ec93df6bdf26b118cc42b5b2 (diff)
downloadmongo-r5.3.0-rc2.tar.gz
SERVER-63129 Tenant collection cloner resume should ignore “view already exists” errors while creating collections.r5.3.0-rc2
(cherry picked from commit e840bb65779035e3f5e7d1fb9b6951c291957a74)
-rw-r--r--etc/backports_required_for_multiversion_tests.yml4
-rw-r--r--jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover_with_dropped_views.js136
-rw-r--r--src/mongo/db/catalog/SConscript4
-rw-r--r--src/mongo/db/catalog/collection_catalog_helper.cpp21
-rw-r--r--src/mongo/db/catalog/collection_catalog_helper.h9
-rw-r--r--src/mongo/db/catalog/create_collection.cpp58
-rw-r--r--src/mongo/db/repl/storage_interface_impl.cpp18
-rw-r--r--src/mongo/db/repl/tenant_collection_cloner.cpp23
-rw-r--r--src/mongo/db/repl/tenant_database_cloner.cpp13
9 files changed, 231 insertions, 55 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index 5513d4b7af8..bbeb9f6b8d7 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -150,6 +150,8 @@ last-continuous:
test_file: jstests/core/exhaust.js
- ticket: SERVER-63141
test_file: jstests/aggregation/lookup_let_optimization.js
+ - ticket: SERVER-63129
+ test_file: jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover_with_dropped_views.js
# Tests that should only be excluded from particular suites should be listed under that suite.
suites:
@@ -439,6 +441,8 @@ last-lts:
test_file: jstests/core/exhaust.js
- ticket: SERVER-63141
test_file: jstests/aggregation/lookup_let_optimization.js
+ - ticket: SERVER-63129
+ test_file: jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover_with_dropped_views.js
# Tests that should only be excluded from particular suites should be listed under that suite.
suites:
diff --git a/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover_with_dropped_views.js b/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover_with_dropped_views.js
new file mode 100644
index 00000000000..70c422dacf8
--- /dev/null
+++ b/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover_with_dropped_views.js
@@ -0,0 +1,136 @@
+/**
+ * Tests that in tenant migration, the collection recreated on a dropped view namespace is handled
+ * correctly on resuming the logical tenant collection cloning phase due to recipient failover.
+ * @tags: [
+ * incompatible_with_eft,
+ * incompatible_with_macos,
+ * incompatible_with_shard_merge,
+ * incompatible_with_windows_tls,
+ * requires_majority_read_concern,
+ * requires_persistence,
+ * serverless,
+ * ]
+ */
+
+(function() {
+"use strict";
+
+const tenantMigrationFailoverTest = function(isTimeSeries, createCollFn) {
+ load("jstests/libs/fail_point_util.js");
+ load("jstests/libs/uuid_util.js"); // for 'extractUUIDFromObject'
+ load("jstests/replsets/libs/tenant_migration_test.js");
+ load("jstests/replsets/libs/tenant_migration_util.js");
+
+ const recipientRst = new ReplSetTest({
+ nodes: 2,
+ name: jsTestName() + "_recipient",
+ nodeOptions: Object.assign(TenantMigrationUtil.makeX509OptionsForTest().recipient, {
+ setParameter: {
+ // Allow reads on recipient before migration completes for testing.
+ 'failpoint.tenantMigrationRecipientNotRejectReads': tojson({mode: 'alwaysOn'}),
+ }
+ })
+ });
+
+ recipientRst.startSet();
+ recipientRst.initiate();
+
+ const tenantMigrationTest =
+ new TenantMigrationTest({name: jsTestName(), recipientRst: recipientRst});
+
+ const donorRst = tenantMigrationTest.getDonorRst();
+ const donorPrimary = donorRst.getPrimary();
+
+ const tenantId = "testTenantId";
+ const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB");
+ const donorDB = donorPrimary.getDB(dbName);
+ const collName = "testColl";
+ const donorColl = donorDB[collName];
+
+ let getCollectionInfo = function(conn) {
+ return conn.getDB(dbName).getCollectionInfos().filter(coll => {
+ return coll.name === collName;
+ });
+ };
+
+ // Create a timeseries collection or a regular view.
+ assert.commandWorked(createCollFn(donorDB, collName));
+ donorRst.awaitReplication();
+
+ const migrationId = UUID();
+ const migrationIdString = extractUUIDFromObject(migrationId);
+ const migrationOpts = {
+ migrationIdString: migrationIdString,
+ recipientConnString: tenantMigrationTest.getRecipientConnString(),
+ tenantId: tenantId,
+ };
+
+ const recipientPrimary = recipientRst.getPrimary();
+ const recipientDb = recipientPrimary.getDB(dbName);
+ const recipientSystemViewsColl = recipientDb.getCollection("system.views");
+
+ // Configure a fail point to have the recipient primary hang after cloning
+ // "testTenantId_testDB.system.views" collection.
+ const hangDuringCollectionClone =
+ configureFailPoint(recipientPrimary,
+ "tenantMigrationHangCollectionClonerAfterHandlingBatchResponse",
+ {nss: recipientSystemViewsColl.getFullName()});
+
+ // Start the migration and wait for the migration to hang after cloning
+ // "testTenantId_testDB.system.views" collection.
+ assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
+ hangDuringCollectionClone.wait();
+
+ assert.soon(() => recipientSystemViewsColl.find().itcount() >= 1);
+ recipientRst.awaitLastOpCommitted();
+ const newRecipientPrimary = recipientRst.getSecondaries()[0];
+
+ // Verify that a view has been registered for "testTenantId_testDB.testColl" on the new
+ // recipient primary.
+ let collectionInfo = getCollectionInfo(newRecipientPrimary);
+ assert.eq(1, collectionInfo.length);
+ assert(collectionInfo[0].type === (isTimeSeries ? "timeseries" : "view"),
+ "data store type mismatch: " + tojson(collectionInfo[0]));
+
+ // Drop the view and create a regular collection with the same namespace as the
+ // dropped view on donor.
+ assert(donorColl.drop());
+ assert.commandWorked(donorDB.createCollection(collName));
+
+ // We need to skip TenantDatabaseCloner::listExistingCollectionsStage() to make sure
+ // the recipient always clone the above newly created regular collection after the failover.
+ // Currently, we restart cloning after a failover, only from the collection whose UUID is
+ // greater than or equal to the last collection we have on disk.
+ const skiplistExistingCollectionsStage =
+ configureFailPoint(newRecipientPrimary, "skiplistExistingCollectionsStage");
+
+ // Step up a new node in the recipient set and trigger a failover.
+ recipientRst.stepUp(newRecipientPrimary);
+ hangDuringCollectionClone.off();
+
+ // The migration should go through after recipient failover.
+ TenantMigrationTest.assertCommitted(
+ tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
+
+ // Check that recipient has dropped the view and and re-created the regular collection as part
+ // of migration oplog catchup phase.
+ collectionInfo = getCollectionInfo(newRecipientPrimary);
+ assert.eq(1, collectionInfo.length);
+ assert(collectionInfo[0].type === "collection",
+ "data store type mismatch: " + tojson(collectionInfo[0]));
+
+ tenantMigrationTest.stop();
+ recipientRst.stopSet();
+};
+
+jsTestLog("Running tenant migration test for time-series collection");
+// Creating a timeseries collection, implicity creates a view on the 'collName' collection
+// namespace.
+tenantMigrationFailoverTest(true,
+ (db, collName) => db.createCollection(
+ collName, {timeseries: {timeField: "time", metaField: "bucket"}}));
+
+jsTestLog("Running tenant migration test for regular view");
+tenantMigrationFailoverTest(false,
+ (db, collName) => db.createView(collName, "sourceCollection", []));
+})();
diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript
index 17a173e2a51..53ebd0ee51e 100644
--- a/src/mongo/db/catalog/SConscript
+++ b/src/mongo/db/catalog/SConscript
@@ -309,6 +309,7 @@ env.Library(
],
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/db/concurrency/lock_manager',
+ '$BUILD_DIR/mongo/db/views/views',
'collection',
'collection_catalog',
],
@@ -499,6 +500,9 @@ env.Library(
'rename_collection.cpp',
'list_indexes.cpp',
],
+ LIBDEPS=[
+ 'collection_catalog_helper',
+ ],
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/base',
'$BUILD_DIR/mongo/db/db_raii',
diff --git a/src/mongo/db/catalog/collection_catalog_helper.cpp b/src/mongo/db/catalog/collection_catalog_helper.cpp
index 50c78212e58..6c7dd9e19bc 100644
--- a/src/mongo/db/catalog/collection_catalog_helper.cpp
+++ b/src/mongo/db/catalog/collection_catalog_helper.cpp
@@ -31,6 +31,7 @@
#include "mongo/db/catalog/collection.h"
#include "mongo/db/catalog/collection_catalog.h"
#include "mongo/db/concurrency/d_concurrency.h"
+#include "mongo/db/views/view_catalog.h"
namespace mongo {
@@ -38,6 +39,26 @@ MONGO_FAIL_POINT_DEFINE(hangBeforeGettingNextCollection);
namespace catalog {
+Status checkIfNamespaceExists(OperationContext* opCtx, const NamespaceString& nss) {
+ if (CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, nss)) {
+ return Status(ErrorCodes::NamespaceExists,
+ str::stream() << "Collection " << nss.ns() << " already exists.");
+ }
+
+ auto view = ViewCatalog::get(opCtx)->lookup(opCtx, nss);
+ if (!view)
+ return Status::OK();
+
+ if (view->timeseries()) {
+ return Status(ErrorCodes::NamespaceExists,
+ str::stream() << "A timeseries collection already exists. NS: " << nss);
+ }
+
+ return Status(ErrorCodes::NamespaceExists,
+ str::stream() << "A view already exists. NS: " << nss);
+}
+
+
void forEachCollectionFromDb(OperationContext* opCtx,
const TenantDatabaseName& tenantDbName,
LockMode collLockMode,
diff --git a/src/mongo/db/catalog/collection_catalog_helper.h b/src/mongo/db/catalog/collection_catalog_helper.h
index f45b0f54a45..60751e6b537 100644
--- a/src/mongo/db/catalog/collection_catalog_helper.h
+++ b/src/mongo/db/catalog/collection_catalog_helper.h
@@ -42,6 +42,15 @@ class CollectionCatalogEntry;
namespace catalog {
/**
+ * Returns ErrorCodes::NamespaceExists if a collection or any type of views exists on the given
+ * namespace 'nss'. Otherwise returns Status::OK().
+ *
+ * Note: If the caller calls this method without locking the collection, then the returned result
+ * could be stale right after this call.
+ */
+Status checkIfNamespaceExists(OperationContext* opCtx, const NamespaceString& nss);
+
+/**
* Iterates through all the collections in the given database and runs the callback function on each
* collection. If a predicate is provided, then the callback will only be executed against the
* collections that satisfy the predicate.
diff --git a/src/mongo/db/catalog/create_collection.cpp b/src/mongo/db/catalog/create_collection.cpp
index 89357d9280b..ae9d3d4fa04 100644
--- a/src/mongo/db/catalog/create_collection.cpp
+++ b/src/mongo/db/catalog/create_collection.cpp
@@ -39,6 +39,7 @@
#include "mongo/bson/json.h"
#include "mongo/db/catalog/clustered_collection_util.h"
#include "mongo/db/catalog/collection_catalog.h"
+#include "mongo/db/catalog/collection_catalog_helper.h"
#include "mongo/db/catalog/database_holder.h"
#include "mongo/db/catalog/index_key_validate.h"
#include "mongo/db/commands.h"
@@ -278,26 +279,16 @@ Status _createTimeseries(OperationContext* opCtx,
writeConflictRetry(opCtx, "createBucketCollection", bucketsNs.ns(), [&]() -> Status {
AutoGetDb autoDb(opCtx, bucketsNs.db(), MODE_IX);
Lock::CollectionLock bucketsCollLock(opCtx, bucketsNs, MODE_IX);
+ auto db = autoDb.ensureDbExists(opCtx);
// Check if there already exist a Collection on the namespace we will later create a
// view on. We're not holding a Collection lock for this Collection so we may only check
// if the pointer is null or not. The answer may also change at any point after this
// call which is fine as we properly handle an orphaned bucket collection. This check is
// just here to prevent it from being created in the common case.
- if (CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, ns)) {
- return Status(ErrorCodes::NamespaceExists,
- str::stream() << "Collection already exists. NS: " << ns);
- }
-
- auto db = autoDb.ensureDbExists(opCtx);
- if (auto view = ViewCatalog::get(opCtx)->lookup(opCtx, ns); view) {
- if (view->timeseries()) {
- return Status(ErrorCodes::NamespaceExists,
- str::stream()
- << "A timeseries collection already exists. NS: " << ns);
- }
- return Status(ErrorCodes::NamespaceExists,
- str::stream() << "A view already exists. NS: " << ns);
+ Status status = catalog::checkIfNamespaceExists(opCtx, ns);
+ if (!status.isOK()) {
+ return status;
}
if (opCtx->writesAreReplicated() &&
@@ -369,23 +360,14 @@ Status _createTimeseries(OperationContext* opCtx,
opCtx,
NamespaceString(ns.db(), NamespaceString::kSystemDotViewsCollectionName),
MODE_X);
+ auto db = autoColl.ensureDbExists(opCtx);
// This is a top-level handler for time-series creation name conflicts. New commands coming
// in, or commands that generated a WriteConflict must return a NamespaceExists error here
// on conflict.
- if (CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, ns)) {
- return Status(ErrorCodes::NamespaceExists,
- str::stream() << "Collection already exists. NS: " << ns);
- }
-
- auto db = autoColl.ensureDbExists(opCtx);
- if (auto view = ViewCatalog::get(opCtx)->lookup(opCtx, ns)) {
- if (view->timeseries()) {
- return {ErrorCodes::NamespaceExists,
- str::stream() << "A timeseries collection already exists. NS: " << ns};
- }
- return {ErrorCodes::NamespaceExists,
- str::stream() << "A view already exists. NS: " << ns};
+ Status status = catalog::checkIfNamespaceExists(opCtx, ns);
+ if (!status.isOK()) {
+ return status;
}
if (opCtx->writesAreReplicated() &&
@@ -430,7 +412,7 @@ Status _createTimeseries(OperationContext* opCtx,
viewOptions.pipeline = timeseries::generateViewPipeline(*options.timeseries, asArray);
// Create the time-series view.
- auto status = db->userCreateNS(opCtx, ns, viewOptions);
+ status = db->userCreateNS(opCtx, ns, viewOptions);
if (!status.isOK()) {
return status.withContext(str::stream() << "Failed to create view on " << bucketsNs
<< " for time-series collection " << ns
@@ -451,23 +433,14 @@ Status _createCollection(OperationContext* opCtx,
return writeConflictRetry(opCtx, "create", nss.ns(), [&] {
AutoGetDb autoDb(opCtx, nss.db(), MODE_IX);
Lock::CollectionLock collLock(opCtx, nss, MODE_IX);
+ auto db = autoDb.ensureDbExists(opCtx);
+
// This is a top-level handler for collection creation name conflicts. New commands coming
// in, or commands that generated a WriteConflict must return a NamespaceExists error here
// on conflict.
- if (CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, nss)) {
- return Status(ErrorCodes::NamespaceExists,
- str::stream() << "Collection already exists. NS: " << nss);
- }
-
- auto db = autoDb.ensureDbExists(opCtx);
- if (auto view = ViewCatalog::get(opCtx)->lookup(opCtx, nss); view) {
- if (view->timeseries()) {
- return Status(ErrorCodes::NamespaceExists,
- str::stream()
- << "A timeseries collection already exists. NS: " << nss);
- }
- return Status(ErrorCodes::NamespaceExists,
- str::stream() << "A view already exists. NS: " << nss);
+ Status status = catalog::checkIfNamespaceExists(opCtx, nss);
+ if (!status.isOK()) {
+ return status;
}
// If the FCV has changed while executing the command to the version, where the feature flag
@@ -544,7 +517,6 @@ Status _createCollection(OperationContext* opCtx,
// Even though 'collectionOptions' is passed by rvalue reference, it is not safe to move
// because 'userCreateNS' may throw a WriteConflictException.
- Status status = Status::OK();
if (idIndex == boost::none || collectionOptions.clusteredIndex) {
status = db->userCreateNS(opCtx, nss, collectionOptions, /*createIdIndex=*/false);
} else {
diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp
index cc2b0c251c1..2125711a336 100644
--- a/src/mongo/db/repl/storage_interface_impl.cpp
+++ b/src/mongo/db/repl/storage_interface_impl.cpp
@@ -47,6 +47,7 @@
#include "mongo/db/catalog/coll_mod.h"
#include "mongo/db/catalog/collection.h"
#include "mongo/db/catalog/collection_catalog.h"
+#include "mongo/db/catalog/collection_catalog_helper.h"
#include "mongo/db/catalog/database_holder.h"
#include "mongo/db/catalog/document_validation.h"
#include "mongo/db/catalog/index_catalog.h"
@@ -481,19 +482,28 @@ Status StorageInterfaceImpl::createCollection(OperationContext* opCtx,
AutoGetDb databaseWriteGuard(opCtx, nss.db(), MODE_IX);
auto db = databaseWriteGuard.ensureDbExists(opCtx);
invariant(db);
- if (CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, nss)) {
- return Status(ErrorCodes::NamespaceExists,
- str::stream() << "Collection " << nss.ns() << " already exists.");
+
+ // Check if there already exist a Collection/view on the given namespace 'nss'. The answer
+ // may change at any point after this call as we make this call without holding the
+ // collection lock. But, it is fine as we properly handle while registering the uncommitted
+ // collection with CollectionCatalog. This check is just here to prevent it from being
+ // created in the common case.
+ Status status = mongo::catalog::checkIfNamespaceExists(opCtx, nss);
+ if (!status.isOK()) {
+ return status;
}
+
Lock::CollectionLock lk(opCtx, nss, MODE_IX);
WriteUnitOfWork wuow(opCtx);
try {
auto coll = db->createCollection(opCtx, nss, options, createIdIndex, idIndexSpec);
invariant(coll);
+
+ // This commit call can throw if a view already exists while registering the collection.
+ wuow.commit();
} catch (const AssertionException& ex) {
return ex.toStatus();
}
- wuow.commit();
return Status::OK();
});
diff --git a/src/mongo/db/repl/tenant_collection_cloner.cpp b/src/mongo/db/repl/tenant_collection_cloner.cpp
index 4e391644b5a..d74b0760cd4 100644
--- a/src/mongo/db/repl/tenant_collection_cloner.cpp
+++ b/src/mongo/db/repl/tenant_collection_cloner.cpp
@@ -394,17 +394,24 @@ BaseCloner::AfterStageBehavior TenantCollectionCloner::createCollectionStage() {
!_idIndexSpec.isEmpty() /* createIdIndex */,
_idIndexSpec);
if (status == ErrorCodes::NamespaceExists && getSharedData()->isResuming()) {
- // If we are resuming from a recipient failover and we have a collection on disk with
- // the same namespace but a different uuid, it means this collection must have been
- // dropped and re-created under a different uuid on the donor during the recipient
- // failover. And the drop and the re-create will be covered by the oplog application
- // phase.
+ // If we are resuming from a recipient failover we can get ErrorCodes::NamespaceExists
+ // due to following conditions:
+ //
+ // 1) We have a collection on disk with the same namespace but a different uuid. It
+ // means this collection must have been dropped and re-created under a different uuid on
+ // the donor during the recipient failover. And the drop and the re-create will be
+ // covered by the oplog application phase.
+ //
+ // 2) We have a [time series] view on disk with the same namespace. It means the view
+ // must have dropped and created a regular collection with the namespace same as the
+ // dropped view during the recipient failover. The drop view and create collection
+ // will be covered by the oplog application phase.
LOGV2(5767200,
- "TenantCollectionCloner found same namespace with different uuid locally on "
- "resume, skipping cloning this collection.",
+ "Tenant collection cloner: Skipping cloning this collection.",
"namespace"_attr = getSourceNss(),
"migrationId"_attr = getSharedData()->getMigrationId(),
- "tenantId"_attr = getTenantId());
+ "tenantId"_attr = getTenantId(),
+ "error"_attr = status);
return kSkipRemainingStages;
}
uassertStatusOKWithContext(status, "Tenant collection cloner: create collection");
diff --git a/src/mongo/db/repl/tenant_database_cloner.cpp b/src/mongo/db/repl/tenant_database_cloner.cpp
index 7c7e93062a5..0cde8bfa265 100644
--- a/src/mongo/db/repl/tenant_database_cloner.cpp
+++ b/src/mongo/db/repl/tenant_database_cloner.cpp
@@ -49,6 +49,10 @@ namespace repl {
// Failpoint which the tenant database cloner to hang after it has successully run listCollections
// and recorded the results and the operationTime.
MONGO_FAIL_POINT_DEFINE(tenantDatabaseClonerHangAfterGettingOperationTime);
+// Failpoint to skip comparing the list of collections that are already cloned, instead it will
+// resume the cloning from the beginning of the list, that's provided by
+// TenantDatabaseCloner::listCollectionsStage.
+MONGO_FAIL_POINT_DEFINE(skiplistExistingCollectionsStage);
TenantDatabaseCloner::TenantDatabaseCloner(const std::string& dbName,
TenantMigrationSharedData* sharedData,
@@ -173,6 +177,15 @@ BaseCloner::AfterStageBehavior TenantDatabaseCloner::listCollectionsStage() {
}
BaseCloner::AfterStageBehavior TenantDatabaseCloner::listExistingCollectionsStage() {
+ if (MONGO_unlikely(skiplistExistingCollectionsStage.shouldFail())) {
+ LOGV2(6312900,
+ "skiplistExistingCollectionsStage failpoint is enabled. "
+ "Tenant DatabaseCloner resumes cloning",
+ "migrationId"_attr = getSharedData()->getMigrationId(),
+ "tenantId"_attr = _tenantId,
+ "resumeFrom"_attr = _collections.front().first);
+ return kContinueNormally;
+ }
auto opCtx = cc().makeOperationContext();
DBDirectClient client(opCtx.get());
tenantMigrationRecipientInfo(opCtx.get()) =