diff options
author | Yuhong Zhang <yuhong.zhang@mongodb.com> | 2022-03-04 20:40:48 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-03-11 06:04:10 +0000 |
commit | a8ce9aab78095ba80a813200b31f179735693332 (patch) | |
tree | d3111c6709e5dea7eba709020ab6ccb2cffbe067 | |
parent | fe15f13c05c4bfa65f09f6b60931770277785117 (diff) | |
download | mongo-a8ce9aab78095ba80a813200b31f179735693332.tar.gz |
SERVER-61159 Check for duplicates before converting a non-unique index to unique
-rw-r--r-- | jstests/core/collmod_convert_to_unique.js | 5 | ||||
-rw-r--r-- | src/mongo/db/catalog/coll_mod.cpp | 69 | ||||
-rw-r--r-- | src/mongo/db/catalog/coll_mod.h | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog.cpp | 2 |
4 files changed, 72 insertions, 8 deletions
diff --git a/jstests/core/collmod_convert_to_unique.js b/jstests/core/collmod_convert_to_unique.js index 6647c0e4345..680252e11e7 100644 --- a/jstests/core/collmod_convert_to_unique.js +++ b/jstests/core/collmod_convert_to_unique.js @@ -49,8 +49,13 @@ assert.commandFailedWithCode(db.runCommand({ }), ErrorCodes.BadValue); +// Conversion should fail when there are existing duplicates. assert.commandWorked(coll.insert({_id: 1, a: 100})); assert.commandWorked(coll.insert({_id: 2, a: 100})); +const duplicateKeyError = assert.commandFailedWithCode( + db.runCommand({collMod: collName, index: {keyPattern: {a: 1}, unique: true}}), + ErrorCodes.DuplicateKey); +jsTestLog('Duplicate key error from failed conversion: ' + tojson(duplicateKeyError)); assert.commandWorked(coll.remove({_id: 2})); // Successfully converts to a unique index. diff --git a/src/mongo/db/catalog/coll_mod.cpp b/src/mongo/db/catalog/coll_mod.cpp index d6ddb8b9ee0..c8988ae85b7 100644 --- a/src/mongo/db/catalog/coll_mod.cpp +++ b/src/mongo/db/catalog/coll_mod.cpp @@ -43,6 +43,7 @@ #include "mongo/db/client.h" #include "mongo/db/command_generic_argument.h" #include "mongo/db/db_raii.h" +#include "mongo/db/index/index_access_method.h" #include "mongo/db/index/index_descriptor.h" #include "mongo/db/index_builds_coordinator.h" #include "mongo/db/op_observer.h" @@ -52,6 +53,8 @@ #include "mongo/db/server_options.h" #include "mongo/db/service_context.h" #include "mongo/db/storage/durable_catalog.h" +#include "mongo/db/storage/index_entry_comparison.h" +#include "mongo/db/storage/key_string.h" #include "mongo/db/storage/recovery_unit.h" #include "mongo/db/storage/storage_parameters_gen.h" #include "mongo/db/views/view_catalog.h" @@ -255,6 +258,52 @@ StatusWith<CollModRequest> parseCollModRequest(OperationContext* opCtx, return {std::move(cmr)}; } +void scanIndexForDuplicates(OperationContext* opCtx, Collection* coll, const IndexDescriptor* idx) { + // Starting point of index traversal. + const Ordering ord = Ordering::make(idx->keyPattern()); + auto keyStringVersion = KeyString::kLatestVersion; + bool isFirstEntry = true; + std::unique_ptr<KeyString> indexKeyString = std::make_unique<KeyString>(keyStringVersion); + std::unique_ptr<KeyString> prevIndexKeyString = std::make_unique<KeyString>(keyStringVersion); + + // Scan index for duplicates, comparing consecutive index entries. + // KeyStrings will be in strictly increasing order because all keys are sorted and they are + // in the format (Key, RID), and all RecordIDs are unique. + auto entry = coll->getIndexCatalog()->getEntry(idx); + auto accessMethod = entry->accessMethod(); + auto indexCursor = accessMethod->newCursor(opCtx, true /*forward=*/); + for (auto indexEntry = indexCursor->seek(BSONObj(), true); indexEntry; + indexEntry = indexCursor->next()) { + indexKeyString->resetToKey(indexEntry->key, ord, indexEntry->loc); + if (!isFirstEntry) { + int cmp = KeyString::compare( + indexKeyString->getBuffer(), + prevIndexKeyString->getBuffer(), + KeyString::sizeWithoutRecordIdAtEnd(indexKeyString->getBuffer(), + indexKeyString->getSize()), + KeyString::sizeWithoutRecordIdAtEnd(prevIndexKeyString->getBuffer(), + prevIndexKeyString->getSize())); + if (cmp == 0) { + auto dupKeyErrorStatus = buildDupKeyErrorStatus(indexEntry->key, + coll->ns(), + idx->indexName(), + idx->keyPattern(), + idx->collation()); + auto firstRecordId = KeyString::decodeRecordIdAtEnd(prevIndexKeyString->getBuffer(), + prevIndexKeyString->getSize()); + auto secondRecordId = KeyString::decodeRecordIdAtEnd(indexKeyString->getBuffer(), + indexKeyString->getSize()); + uassertStatusOK(dupKeyErrorStatus.withContext( + str::stream() << "Unique index '" << idx->indexName() << ", first record: " + << firstRecordId << ", second record: " << secondRecordId)); + } + } + + isFirstEntry = false; + prevIndexKeyString.swap(indexKeyString); + } +} + /** * If uuid is specified, add it to the collection specified by nss. This will error if the * collection already has a UUID. @@ -263,7 +312,8 @@ Status _collModInternal(OperationContext* opCtx, const NamespaceString& nss, const BSONObj& cmdObj, BSONObjBuilder* result, - bool upgradeUniqueIndexes) { + bool upgradeUniqueIndexes, + boost::optional<repl::OplogApplication::Mode> mode) { StringData dbName = nss.db(); AutoGetCollection autoColl( opCtx, nss, MODE_IX, MODE_X, AutoGetCollection::ViewMode::kViewsPermitted); @@ -389,6 +439,10 @@ Status _collModInternal(OperationContext* opCtx, // UniqueIndex if (cmr.indexUnique) { if (!cmr.idx->infoObj().getField("unique").trueValue()) { + // Checks for duplicates on the primary or for the 'applyOps' command. + if (!mode || *mode == repl::OplogApplication::Mode::kApplyOpsCmd) { + scanIndexForDuplicates(opCtx, coll, cmr.idx); + } newUnique = cmr.indexUnique; // Change the value of "unique" on disk. DurableCatalog::get(opCtx)->updateUniqueSetting( @@ -466,12 +520,14 @@ Status collMod(OperationContext* opCtx, nss, cmdObj, result, - /*upgradeUniqueIndexes*/ false); + /*upgradeUniqueIndexes*/ false, + boost::none); } Status collModWithUpgrade(OperationContext* opCtx, const NamespaceString& nss, - const BSONObj& cmdObj) { + const BSONObj& cmdObj, + boost::optional<repl::OplogApplication::Mode> mode) { // An empty collMod is used to upgrade unique index during FCV upgrade. If an application // executes the empty collMod when the secondary is upgrading FCV it is fine to upgrade the // unique index becuase the secondary will eventually get the real empty collMod. If the @@ -490,7 +546,7 @@ Status collModWithUpgrade(OperationContext* opCtx, } BSONObjBuilder resultWeDontCareAbout; - return _collModInternal(opCtx, nss, cmdObj, &resultWeDontCareAbout, upgradeUniqueIndex); + return _collModInternal(opCtx, nss, cmdObj, &resultWeDontCareAbout, upgradeUniqueIndex, mode); } Status _updateNonReplicatedIndexPerCollection(OperationContext* opCtx, Collection* coll) { @@ -503,7 +559,8 @@ Status _updateNonReplicatedIndexPerCollection(OperationContext* opCtx, Collectio coll->ns(), collModObj, &resultWeDontCareAbout, - /*upgradeUniqueIndexes*/ true); + /*upgradeUniqueIndexes*/ true, + boost::none); return collModStatus; } @@ -566,7 +623,7 @@ void _updateUniqueIndexesForDatabase(OperationContext* opCtx, const std::string& collModObjBuilder.append("collMod", collNSS.coll()); BSONObj collModObj = collModObjBuilder.done(); - uassertStatusOK(collModWithUpgrade(opCtx, collNSS, collModObj)); + uassertStatusOK(collModWithUpgrade(opCtx, collNSS, collModObj, boost::none)); } } } diff --git a/src/mongo/db/catalog/coll_mod.h b/src/mongo/db/catalog/coll_mod.h index 5629926b4b7..995cde6eb34 100644 --- a/src/mongo/db/catalog/coll_mod.h +++ b/src/mongo/db/catalog/coll_mod.h @@ -30,6 +30,7 @@ #include "mongo/base/status.h" #include "mongo/base/status_with.h" #include "mongo/db/catalog/collection_options.h" +#include "mongo/db/repl/oplog.h" namespace mongo { class BSONObj; @@ -58,7 +59,8 @@ Status collMod(OperationContext* opCtx, */ Status collModWithUpgrade(OperationContext* opCtx, const NamespaceString& nss, - const BSONObj& cmdObj); + const BSONObj& cmdObj, + boost::optional<repl::OplogApplication::Mode> mode); /* * Updates the unique indexes to timestamp safe unique index format on setFCV=4.2. It also updates diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp index f982d3e940a..7d264a0f9ed 100644 --- a/src/mongo/db/repl/oplog.cpp +++ b/src/mongo/db/repl/oplog.cpp @@ -1135,7 +1135,7 @@ const StringMap<ApplyOpMetadata> kOpsMap = { std::tie(std::ignore, nss) = parseCollModUUIDAndNss(opCtx, ui, ns, cmd); // The collMod for apply ops could be either a user driven collMod or a collMod triggered // by an upgrade. - return collModWithUpgrade(opCtx, nss, cmd); + return collModWithUpgrade(opCtx, nss, cmd, mode); }, {ErrorCodes::IndexNotFound, ErrorCodes::NamespaceNotFound}}}, {"dbCheck", {dbCheckOplogCommand, {}}}, |