SERVER-54360 Support secondary unique indexes on clustered collections

author: Louis Williams <louis.williams@mongodb.com> 2021-11-05 15:08:40 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2021-11-05 16:09:45 +0000
commit: 7bcb659c013aaf44e1edab7b8a102a738c0bfb8c (patch)
tree: 89d69d8c8bc4eb13bc6523ef89cba8f64b5b6e94
parent: 43cba1cc995ffc6436493246b10146365d593b8e (diff)
download: mongo-7bcb659c013aaf44e1edab7b8a102a738c0bfb8c.tar.gz
12 files changed, 420 insertions, 19 deletions
diff --git a/jstests/libs/clustered_indexes_utils.js b/jstests/libs/clustered_indexes_utils.js
new file mode 100644
index 00000000000..7d99c0df71b
--- /dev/null
+++ b/jstests/libs/clustered_indexes_utils.js
@@ -0,0 +1,191 @@
+/**
+ * Utilies for clustered collections.
+ */
+
+function areClusteredIndexesEnabled(conn) {
+    const clusteredIndexesEnabled =
+        assert.commandWorked(conn.adminCommand({getParameter: 1, featureFlagClusteredIndexes: 1}))
+            .featureFlagClusteredIndexes.value;
+
+    if (!clusteredIndexesEnabled) {
+        return false;
+    }
+    return true;
+}
+
+function validateClusteredCollection(db, collName, clusterKey) {
+    const lengths = [100, 1024, 1024 * 1024, 3 * 1024 * 1024];
+    const coll = db[collName];
+    const clusterKeyString = new String(clusterKey);
+
+    assert.commandWorked(
+        db.createCollection(collName, {clusteredIndex: {key: {[clusterKey]: 1}, unique: true}}));
+
+    // Expect that duplicates are rejected.
+    for (let len of lengths) {
+        let id = 'x'.repeat(len);
+        assert.commandWorked(coll.insert({[clusterKey]: id}));
+        assert.commandFailedWithCode(coll.insert({[clusterKey]: id}), ErrorCodes.DuplicateKey);
+        assert.eq(1, coll.find({[clusterKey]: id}).itcount());
+    }
+
+    // Updates should work.
+    for (let len of lengths) {
+        let id = 'x'.repeat(len);
+
+        // Validate the below for _id-clustered collection only until the following tickets are
+        // addressed:
+        // * TODO SERVER-60734 replacement updates should preserve the cluster key
+        // * TODO SERVER-60702 enable bounded collscans for arbitary cluster keys
+        if (clusterKey == "_id") {
+            assert.commandWorked(coll.update({[clusterKey]: id}, {a: len}));
+
+            assert.eq(1, coll.find({[clusterKey]: id}).itcount());
+            assert.eq(len, coll.findOne({[clusterKey]: id})['a']);
+        }
+    }
+
+    // This section is based on jstests/core/timeseries/clustered_index_crud.js with
+    // specific additions for general-purpose (non-timeseries) clustered collections
+    assert.commandWorked(coll.insert({[clusterKey]: 0, a: 1}));
+    assert.commandWorked(coll.insert({[clusterKey]: 1, a: 1}));
+    assert.eq(1, coll.find({[clusterKey]: 0}).itcount());
+    assert.commandWorked(coll.insert({[clusterKey]: "", a: 2}));
+    assert.eq(1, coll.find({[clusterKey]: ""}).itcount());
+    assert.commandWorked(coll.insert({[clusterKey]: NumberLong("9223372036854775807"), a: 3}));
+    assert.eq(1, coll.find({[clusterKey]: NumberLong("9223372036854775807")}).itcount());
+    assert.commandWorked(coll.insert({[clusterKey]: {a: 1, b: 1}, a: 4}));
+    assert.eq(1, coll.find({[clusterKey]: {a: 1, b: 1}}).itcount());
+    assert.commandWorked(coll.insert({[clusterKey]: {a: {b: 1}, c: 1}, a: 5}));
+    assert.commandWorked(coll.insert({[clusterKey]: -1, a: 6}));
+    assert.eq(1, coll.find({[clusterKey]: -1}).itcount());
+    assert.commandWorked(coll.insert({[clusterKey]: "123456789012", a: 7}));
+    assert.eq(1, coll.find({[clusterKey]: "123456789012"}).itcount());
+    if (clusterKey == "_id") {
+        assert.commandWorked(coll.insert({a: 8}));
+    } else {
+        // Missing required cluster key field.
+        assert.commandFailedWithCode(coll.insert({a: 8}), 2);
+        assert.commandWorked(coll.insert({[clusterKey]: "withFieldA", a: 8}));
+    }
+    assert.eq(1, coll.find({a: 8}).itcount());
+    assert.commandWorked(coll.insert({[clusterKey]: null, a: 9}));
+    assert.eq(1, coll.find({[clusterKey]: null}).itcount());
+    assert.commandWorked(coll.insert({[clusterKey]: 'x'.repeat(99), a: 10}));
+
+    if (clusterKey == "_id") {
+        assert.commandWorked(coll.insert({}));
+    } else {
+        // Missing required ts field.
+        assert.commandFailedWithCode(coll.insert({}), 2);
+        assert.commandWorked(coll.insert({[clusterKey]: 'missingFieldA'}));
+    }
+    // Can build a secondary index with a 3MB RecordId doc.
+    assert.commandWorked(coll.createIndex({a: 1}));
+    // Can drop the secondary index
+    assert.commandWorked(coll.dropIndex({a: 1}));
+
+    // This key is too large.
+    assert.commandFailedWithCode(coll.insert({[clusterKey]: 'x'.repeat(8 * 1024 * 1024), a: 11}),
+                                 5894900);
+
+    // Look up using the secondary index on {a: 1}
+    assert.commandWorked(coll.createIndex({a: 1}));
+
+    // TODO remove the branch once SERVER-60734 "replacement updates should preserve the cluster
+    // key" is resolved.
+    if (clusterKey == "_id") {
+        assert.eq(1, coll.find({a: null}).itcount());
+    } else {
+        assert.eq(5, coll.find({a: null}).itcount());
+    }
+    assert.eq(0, coll.find({a: 0}).itcount());
+    assert.eq(2, coll.find({a: 1}).itcount());
+    assert.eq(1, coll.find({a: 2}).itcount());
+    assert.eq(1, coll.find({a: 8}).itcount());
+    assert.eq(1, coll.find({a: 9}).itcount());
+    assert.eq(null, coll.findOne({a: 9})[clusterKeyString]);
+    assert.eq(1, coll.find({a: 10}).itcount());
+    assert.eq(99, coll.findOne({a: 10})[clusterKeyString].length);
+
+    // todo make it unconditional once server-60734 "replacement updates should preserve the cluster
+    // key" is resolved.
+    if (clusterKey == "_id") {
+        for (let len of lengths) {
+            // Secondary index lookups for documents with large RecordId's.
+            assert.eq(1, coll.find({a: len}).itcount());
+            assert.eq(len, coll.findOne({a: len})[clusterKeyString].length);
+        }
+    }
+
+    let res = coll.validate();
+    assert(res.valid, res);
+    assert.commandWorked(coll.dropIndex({a: 1}));
+
+    // Test secondary unique indexes constraints.
+    assert.commandFailedWithCode(coll.createIndex({a: 1}, {unique: true}), ErrorCodes.DuplicateKey);
+    assert.commandWorked(coll.deleteOne({a: 1}));
+
+    // TODO remove the branch once SERVER-60734 "replacement updates should preserve the cluster
+    // key" is resolved.
+    if (clusterKey == "_id") {
+        assert.eq(1, coll.find({a: null}).itcount());
+        assert.commandWorked(coll.removeOne({a: null}));
+    } else {
+        assert.eq(5, coll.find({a: null}).itcount());
+        assert.commandWorked(coll.remove({a: null}, {multi: true}));
+    }
+
+    assert.commandWorked(coll.createIndex({a: 1}, {unique: true}));
+
+    // Check that document retrieval using the unique index key behaves correctly for cluster keys
+    // of different types.
+    assert.eq(0, coll.find({a: 0}).itcount());
+    assert.commandWorked(coll.insert({[clusterKey]: 100, a: 0}));
+
+    assert.eq(1, coll.find({a: 1}).itcount());
+    assert.commandFailedWithCode(coll.insert({[clusterKey]: -1, a: 1}), ErrorCodes.DuplicateKey);
+
+    assert.eq(1, coll.find({a: 2}).itcount());
+    assert.commandFailedWithCode(coll.insert({[clusterKey]: -1, a: 2}), ErrorCodes.DuplicateKey);
+
+    assert.eq(1, coll.find({a: 8}).itcount());
+    assert.commandFailedWithCode(coll.insert({[clusterKey]: -1, a: 8}), ErrorCodes.DuplicateKey);
+
+    assert.eq(1, coll.find({a: 9}).itcount());
+    assert.eq(null, coll.findOne({a: 9})['_id']);
+    assert.commandFailedWithCode(coll.insert({[clusterKey]: -1, a: 9}), ErrorCodes.DuplicateKey);
+
+    assert.eq(1, coll.find({a: 10}).itcount());
+    assert.eq(99, coll.findOne({a: 10})['_id'].length);
+    assert.commandFailedWithCode(coll.insert({[clusterKey]: -1, a: 10}), ErrorCodes.DuplicateKey);
+
+    // Cycle values of 'a' around to different documents. This intended to ensure correctness of
+    // batch application when this test is run in replication passthrough suites.
+    assert.commandWorked(coll.insert({[clusterKey]: 10, a: 110}));
+    assert.commandWorked(coll.insert({[clusterKey]: 11, a: 111}));
+    assert.commandWorked(coll.insert({[clusterKey]: 12, a: 112}));
+
+    assert.commandFailedWithCode(coll.update({[clusterKey]: 12}, {a: 110}),
+                                 ErrorCodes.DuplicateKey);
+    assert.commandWorked(coll.update({[clusterKey]: 12}, {a: 113}));
+    assert.commandWorked(coll.update({[clusterKey]: 11}, {a: 112}));
+    assert.commandWorked(coll.update({[clusterKey]: 10}, {a: 111}));
+    assert.commandWorked(coll.update({[clusterKey]: 12}, {a: 110}));
+
+    // Unique index lookups for documents with large RecordIds.
+    for (let len of lengths) {
+        assert.eq(1, coll.find({a: len}).itcount());
+        assert.eq(len, coll.findOne({a: len})['_id'].length);
+        assert.commandFailedWithCode(coll.insert({a: len}), ErrorCodes.DuplicateKey);
+    }
+
+    // No support for numeric type differentiation.
+    assert.commandWorked(coll.insert({[clusterKey]: 42.0}));
+    assert.commandFailedWithCode(coll.insert({[clusterKey]: 42}), ErrorCodes.DuplicateKey);
+    assert.commandFailedWithCode(coll.insert({[clusterKey]: NumberLong("42")}),
+                                 ErrorCodes.DuplicateKey);
+    assert.eq(1, coll.find({[clusterKey]: 42.0}).itcount());
+    assert.eq(1, coll.find({[clusterKey]: 42}).itcount());
+    assert.eq(1, coll.find({[clusterKey]: NumberLong("42")}).itcount());
+}
diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp
index 261f0f8fb71..fd63fb5d40e 100644
--- a/src/mongo/db/catalog/index_catalog_impl.cpp
+++ b/src/mongo/db/catalog/index_catalog_impl.cpp
@@ -796,10 +796,6 @@ Status IndexCatalogImpl::_isSpecOk(OperationContext* opCtx,
         }
     }
 
-    uassert(ErrorCodes::InvalidOptions,
-            "Unique indexes are not supported on clustered collections",
-            !collection->isClustered() || !spec[IndexDescriptor::kUniqueFieldName].trueValue());
-
     if (IndexDescriptor::isIdIndexPattern(key)) {
         if (collection->isClustered()) {
             return Status(ErrorCodes::CannotCreateIndex,
diff --git a/src/mongo/db/index/duplicate_key_tracker.cpp b/src/mongo/db/index/duplicate_key_tracker.cpp
index d5c75435764..67ccb738c20 100644
--- a/src/mongo/db/index/duplicate_key_tracker.cpp
+++ b/src/mongo/db/index/duplicate_key_tracker.cpp
@@ -85,7 +85,12 @@ Status DuplicateKeyTracker::recordKey(OperationContext* opCtx, const KeyString::
     // store the TypeBits for error reporting later on. The RecordId does not need to be stored, so
     // we exclude it from the serialization.
     BufBuilder builder;
-    key.serializeWithoutRecordIdLong(builder);
+    if (KeyFormat::Long ==
+        _indexCatalogEntry->accessMethod()->getSortedDataInterface()->rsKeyFormat()) {
+        key.serializeWithoutRecordIdLong(builder);
+    } else {
+        key.serializeWithoutRecordIdStr(builder);
+    }
 
     auto status =
         _keyConstraintsTable->rs()->insertRecord(opCtx, builder.buf(), builder.len(), Timestamp());
diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp
index 1a2fe8fe133..b7fa09bd0c4 100644
--- a/src/mongo/db/index/index_access_method.cpp
+++ b/src/mongo/db/index/index_access_method.cpp
@@ -810,7 +810,9 @@ Status AbstractIndexAccessMethod::commitBulk(OperationContext* opCtx,
         // builds since this check can be expensive.
         int cmpData;
         if (_descriptor->unique()) {
-            cmpData = data.first.compareWithoutRecordIdLong(previousKey);
+            cmpData = (_newInterface->rsKeyFormat() == KeyFormat::Long)
+                ? data.first.compareWithoutRecordIdLong(previousKey)
+                : data.first.compareWithoutRecordIdStr(previousKey);
         }
 
         if (kDebugBuild && data.first.compare(previousKey) < 0) {
@@ -988,7 +990,9 @@ std::string nextFileName() {
 Status AbstractIndexAccessMethod::_handleDuplicateKey(OperationContext* opCtx,
                                                       const KeyString::Value& dataKey,
                                                       const RecordIdHandlerFn& onDuplicateRecord) {
-    RecordId recordId = KeyString::decodeRecordIdLongAtEnd(dataKey.getBuffer(), dataKey.getSize());
+    RecordId recordId = (KeyFormat::Long == _newInterface->rsKeyFormat())
+        ? KeyString::decodeRecordIdLongAtEnd(dataKey.getBuffer(), dataKey.getSize())
+        : KeyString::decodeRecordIdStrAtEnd(dataKey.getBuffer(), dataKey.getSize());
     if (onDuplicateRecord) {
         return onDuplicateRecord(recordId);
     }
diff --git a/src/mongo/db/storage/key_string.cpp b/src/mongo/db/storage/key_string.cpp
index 0a93caa0c9a..9d1d88121b2 100644
--- a/src/mongo/db/storage/key_string.cpp
+++ b/src/mongo/db/storage/key_string.cpp
@@ -2705,6 +2705,15 @@ void Value::serializeWithoutRecordIdLong(BufBuilder& buf) const {
     buf.appendBuf(_buffer.get() + _ksSize, _buffer.size() - _ksSize);  // Serialize TypeBits
 }
 
+void Value::serializeWithoutRecordIdStr(BufBuilder& buf) const {
+    dassert(decodeRecordIdStrAtEnd(_buffer.get(), _ksSize).isValid());
+
+    const int32_t sizeWithoutRecordId = sizeWithoutRecordIdStrAtEnd(_buffer.get(), _ksSize);
+    buf.appendNum(sizeWithoutRecordId);                 // Serialize size of KeyString
+    buf.appendBuf(_buffer.get(), sizeWithoutRecordId);  // Serialize KeyString
+    buf.appendBuf(_buffer.get() + _ksSize, _buffer.size() - _ksSize);  // Serialize TypeBits
+}
+
 size_t Value::getApproximateSize() const {
     auto size = sizeof(Value);
     size += !_buffer.isShared() ? SharedBuffer::kHolderSize + _buffer.size() : 0;
diff --git a/src/mongo/db/storage/key_string.h b/src/mongo/db/storage/key_string.h
index d8db757213e..f0e1441004c 100644
--- a/src/mongo/db/storage/key_string.h
+++ b/src/mongo/db/storage/key_string.h
@@ -335,13 +335,21 @@ public:
         return *this;
     }
 
+    /**
+     * Compare with another KeyString::Value or Builder.
+     */
     template <class T>
     int compare(const T& other) const;
 
     int compareWithTypeBits(const Value& other) const;
 
+    /**
+     * Compare with another KeyString::Value or Builder, ignoring the RecordId part of both.
+     */
     template <class T>
     int compareWithoutRecordIdLong(const T& other) const;
+    template <class T>
+    int compareWithoutRecordIdStr(const T& other) const;
 
     // Returns the size of the stored KeyString.
     size_t getSize() const {
@@ -383,11 +391,12 @@ public:
     }
 
     /**
-     * Serializes this Value, excluing the RecordId, into a storable format with TypeBits
+     * Serializes this Value, excluding the RecordId, into a storable format with TypeBits
      * information. The serialized format takes the following form:
      *   [keystring size][keystring encoding][typebits encoding]
      */
     void serializeWithoutRecordIdLong(BufBuilder& buf) const;
+    void serializeWithoutRecordIdStr(BufBuilder& buf) const;
 
     // Deserialize the Value from a serialized format.
     static Value deserialize(BufReader& buf, KeyString::Version version) {
@@ -616,11 +625,19 @@ public:
         return _typeBits;
     }
 
+    /**
+     * Compare with another KeyString::Value or Builder.
+     */
     template <class T>
     int compare(const T& other) const;
 
+    /**
+     * Compare with another KeyString::Value or Builder, ignoring the RecordId part of both.
+     */
     template <class T>
     int compareWithoutRecordIdLong(const T& other) const;
+    template <class T>
+    int compareWithoutRecordIdStr(const T& other) const;
 
     /**
      * @return a hex encoding of this key
@@ -1030,6 +1047,16 @@ int BuilderBase<BufferT>::compareWithoutRecordIdLong(const T& other) const {
         !other.isEmpty() ? sizeWithoutRecordIdLongAtEnd(other.getBuffer(), other.getSize()) : 0);
 }
 
+template <class BufferT>
+template <class T>
+int BuilderBase<BufferT>::compareWithoutRecordIdStr(const T& other) const {
+    return KeyString::compare(
+        getBuffer(),
+        other.getBuffer(),
+        !isEmpty() ? sizeWithoutRecordIdStrAtEnd(getBuffer(), getSize()) : 0,
+        !other.isEmpty() ? sizeWithoutRecordIdStrAtEnd(other.getBuffer(), other.getSize()) : 0);
+}
+
 template <class T>
 int Value::compare(const T& other) const {
     return KeyString::compare(getBuffer(), other.getBuffer(), getSize(), other.getSize());
@@ -1044,6 +1071,15 @@ int Value::compareWithoutRecordIdLong(const T& other) const {
         !other.isEmpty() ? sizeWithoutRecordIdLongAtEnd(other.getBuffer(), other.getSize()) : 0);
 }
 
+template <class T>
+int Value::compareWithoutRecordIdStr(const T& other) const {
+    return KeyString::compare(
+        getBuffer(),
+        other.getBuffer(),
+        !isEmpty() ? sizeWithoutRecordIdStrAtEnd(getBuffer(), getSize()) : 0,
+        !other.isEmpty() ? sizeWithoutRecordIdStrAtEnd(other.getBuffer(), other.getSize()) : 0);
+}
+
 /**
  * Takes key string and key pattern information and uses it to present human-readable information
  * about an index or collection entry.
diff --git a/src/mongo/db/storage/sorted_data_interface_test_keyformat_string.cpp b/src/mongo/db/storage/sorted_data_interface_test_keyformat_string.cpp
index 604c4ae0168..b3a75c6f593 100644
--- a/src/mongo/db/storage/sorted_data_interface_test_keyformat_string.cpp
+++ b/src/mongo/db/storage/sorted_data_interface_test_keyformat_string.cpp
@@ -110,6 +110,87 @@ TEST(SortedDataInterface, KeyFormatStringInsertDuplicates) {
     }
 }
 
+TEST(SortedDataInterface, KeyFormatStringUniqueInsertRemoveDuplicates) {
+    const auto harnessHelper(newSortedDataInterfaceHarnessHelper());
+    const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(
+        /*unique=*/true, /*partial=*/false, KeyFormat::String));
+    const ServiceContext::UniqueOperationContext opCtx(harnessHelper->newOperationContext());
+    ASSERT(sorted->isEmpty(opCtx.get()));
+
+    std::string buf1(12, 0);
+    std::string buf2(12, 1);
+    std::string buf3(12, 0xff);
+
+    RecordId rid1(buf1.c_str(), 12);
+    RecordId rid2(buf2.c_str(), 12);
+    RecordId rid3(buf3.c_str(), 12);
+
+    {
+        WriteUnitOfWork uow(opCtx.get());
+        ASSERT_OK(sorted->insert(opCtx.get(),
+                                 makeKeyString(sorted.get(), key1, rid1),
+                                 /*dupsAllowed*/ true));
+        Status status = sorted->insert(opCtx.get(),
+                                       makeKeyString(sorted.get(), key1, rid2),
+                                       /*dupsAllowed*/ false);
+        ASSERT_EQ(ErrorCodes::DuplicateKey, status.code());
+
+        ASSERT_OK(sorted->insert(opCtx.get(),
+                                 makeKeyString(sorted.get(), key1, rid3),
+                                 /*dupsAllowed*/ true));
+        uow.commit();
+    }
+
+    ASSERT_EQUALS(2, sorted->numEntries(opCtx.get()));
+
+    {
+        WriteUnitOfWork uow(opCtx.get());
+        sorted->unindex(opCtx.get(),
+                        makeKeyString(sorted.get(), key1, rid1),
+                        /*dupsAllowed*/ true);
+
+        ASSERT_OK(sorted->insert(opCtx.get(),
+                                 makeKeyString(sorted.get(), key2, rid1),
+                                 /*dupsAllowed*/ true));
+        uow.commit();
+    }
+
+    ASSERT_EQUALS(2, sorted->numEntries(opCtx.get()));
+
+    auto ksSeek = makeKeyStringForSeek(sorted.get(), key1, true, true);
+    {
+        auto cursor = sorted->newCursor(opCtx.get());
+        auto entry = cursor->seek(ksSeek);
+        ASSERT(entry);
+        ASSERT_EQ(*entry, IndexKeyEntry(key1, rid3));
+
+        entry = cursor->next();
+        ASSERT(entry);
+        ASSERT_EQ(*entry, IndexKeyEntry(key2, rid1));
+
+        entry = cursor->next();
+        ASSERT_FALSE(entry);
+    }
+
+    {
+        auto cursor = sorted->newCursor(opCtx.get());
+        auto entry = cursor->seekForKeyString(ksSeek);
+        ASSERT(entry);
+        ASSERT_EQ(entry->loc, rid3);
+        auto ks1 = makeKeyString(sorted.get(), key1, rid3);
+        ASSERT_EQ(entry->keyString, ks1);
+
+        entry = cursor->nextKeyString();
+        ASSERT(entry);
+        ASSERT_EQ(entry->loc, rid1);
+        auto ks2 = makeKeyString(sorted.get(), key2, rid1);
+        ASSERT_EQ(entry->keyString, ks2);
+
+        entry = cursor->nextKeyString();
+        ASSERT_FALSE(entry);
+    }
+}
+
 TEST(SortedDataInterface, KeyFormatStringSetEndPosition) {
     const auto harnessHelper(newSortedDataInterfaceHarnessHelper());
     const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(
@@ -228,6 +309,58 @@ TEST(SortedDataInterface, KeyFormatStringUnindex) {
     ASSERT_EQUALS(0, sorted->numEntries(opCtx.get()));
 }
 
+TEST(SortedDataInterface, KeyFormatStringUniqueUnindex) {
+    const auto harnessHelper(newSortedDataInterfaceHarnessHelper());
+    const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(
+        /*unique=*/true, /*partial=*/false, KeyFormat::String));
+    const ServiceContext::UniqueOperationContext opCtx(harnessHelper->newOperationContext());
+    ASSERT(sorted->isEmpty(opCtx.get()));
+
+    std::string buf1(12, 0);
+    std::string buf2(12, 1);
+    std::string buf3(12, 0xff);
+
+    RecordId rid1(buf1.c_str(), 12);
+    RecordId rid2(buf2.c_str(), 12);
+    RecordId rid3(buf3.c_str(), 12);
+
+    {
+        WriteUnitOfWork uow(opCtx.get());
+        ASSERT_OK(sorted->insert(opCtx.get(),
+                                 makeKeyString(sorted.get(), key1, rid1),
+                                 /*dupsAllowed*/ false));
+        ASSERT_OK(sorted->insert(opCtx.get(),
+                                 makeKeyString(sorted.get(), key2, rid2),
+                                 /*dupsAllowed*/ false));
+        ASSERT_OK(sorted->insert(opCtx.get(),
+                                 makeKeyString(sorted.get(), key3, rid3),
+                                 /*dupsAllowed*/ false));
+        uow.commit();
+    }
+    ASSERT_EQUALS(3, sorted->numEntries(opCtx.get()));
+
+    {
+        WriteUnitOfWork uow(opCtx.get());
+        // Does not exist, does nothing.
+        sorted->unindex(opCtx.get(),
+                        makeKeyString(sorted.get(), key1, rid3),
+                        /*dupsAllowed*/ false);
+
+        sorted->unindex(opCtx.get(),
+                        makeKeyString(sorted.get(), key1, rid1),
+                        /*dupsAllowed*/ false);
+        sorted->unindex(opCtx.get(),
+                        makeKeyString(sorted.get(), key2, rid2),
+                        /*dupsAllowed*/ false);
+        sorted->unindex(opCtx.get(),
+                        makeKeyString(sorted.get(), key3, rid3),
+                        /*dupsAllowed*/ false);
+
+        uow.commit();
+    }
+    ASSERT_EQUALS(0, sorted->numEntries(opCtx.get()));
+}
+
 TEST(SortedDataInterface, InsertReservedRecordIdStr) {
     const auto harnessHelper(newSortedDataInterfaceHarnessHelper());
     const std::unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
index 8ad8a557afd..b3fe456cbb2 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
@@ -654,11 +654,13 @@ public:
     }
 
     Status addKey(const KeyString::Value& newKeyString) override {
-        dassertRecordIdAtEnd(newKeyString, KeyFormat::Long);
+        dassertRecordIdAtEnd(newKeyString, _idx->rsKeyFormat());
 
         // Do a duplicate check, but only if dups aren't allowed.
         if (!_dupsAllowed) {
-            const int cmp = newKeyString.compareWithoutRecordIdLong(_previousKeyString);
+            const int cmp = (_idx->_rsKeyFormat == KeyFormat::Long)
+                ? newKeyString.compareWithoutRecordIdLong(_previousKeyString)
+                : newKeyString.compareWithoutRecordIdStr(_previousKeyString);
             if (cmp == 0) {
                 // Duplicate found!
                 auto newKey = KeyString::toBson(newKeyString, _idx->_ordering);
@@ -842,6 +844,8 @@ public:
         dassert(KeyString::decodeDiscriminator(
                     key.getBuffer(), key.getSize(), _idx.getOrdering(), key.getTypeBits()) ==
                 KeyString::Discriminator::kInclusive);
+        // seekExact is only used on the _id index, which only uses the Long format.
+        invariant(KeyFormat::Long == _idx.rsKeyFormat());
 
         auto ksEntry = [&]() {
             if (_forward) {
@@ -1303,6 +1307,9 @@ private:
     // Must not throw WriteConflictException, throwing a WriteConflictException will retry the
     // operation effectively skipping over this key.
     void _updateIdAndTypeBitsFromValue() {
+        // Old-format unique index keys always use the Long format.
+        invariant(_idx.rsKeyFormat() == KeyFormat::Long);
+
         // We assume that cursors can only ever see unique indexes in their "pristine" state,
         // where no duplicates are possible. The cases where dups are allowed should hold
         // sufficient locks to ensure that no cursor ever sees them.
@@ -1340,6 +1347,9 @@ public:
     // Must not throw WriteConflictException, throwing a WriteConflictException will retry the
     // operation effectively skipping over this key.
     void updateIdAndTypeBits() override {
+        // _id index keys always use the Long format.
+        invariant(_idx.rsKeyFormat() == KeyFormat::Long);
+
         WT_CURSOR* c = _cursor->get();
         WT_ITEM item;
         auto ret = c->get_value(c, &item);
@@ -1365,10 +1375,10 @@ public:
 WiredTigerIndexUnique::WiredTigerIndexUnique(OperationContext* ctx,
                                              const std::string& uri,
                                              StringData ident,
+                                             KeyFormat rsKeyFormat,
                                              const IndexDescriptor* desc,
                                              bool isReadOnly)
-    : WiredTigerIndex(ctx, uri, ident, KeyFormat::Long, desc, isReadOnly),
-      _partial(desc->isPartial()) {
+    : WiredTigerIndex(ctx, uri, ident, rsKeyFormat, desc, isReadOnly), _partial(desc->isPartial()) {
     // _id indexes must use WiredTigerIdIndex
     invariant(!isIdIndex());
     // All unique indexes should be in the timestamp-safe format version as of version 4.2.
@@ -1488,6 +1498,7 @@ Status WiredTigerIdIndex::_insert(OperationContext* opCtx,
                                   WT_CURSOR* c,
                                   const KeyString::Value& keyString,
                                   bool dupsAllowed) {
+    invariant(KeyFormat::Long == _rsKeyFormat);
     invariant(!dupsAllowed);
     const RecordId id =
         KeyString::decodeRecordIdLongAtEnd(keyString.getBuffer(), keyString.getSize());
@@ -1534,8 +1545,9 @@ Status WiredTigerIndexUnique::_insert(OperationContext* opCtx,
     if (!dupsAllowed) {
         // A prefix key is KeyString of index key. It is the component of the index entry that
         // should be unique.
-        auto sizeWithoutRecordId =
-            KeyString::sizeWithoutRecordIdLongAtEnd(keyString.getBuffer(), keyString.getSize());
+        auto sizeWithoutRecordId = (_rsKeyFormat == KeyFormat::Long)
+            ? KeyString::sizeWithoutRecordIdLongAtEnd(keyString.getBuffer(), keyString.getSize())
+            : KeyString::sizeWithoutRecordIdStrAtEnd(keyString.getBuffer(), keyString.getSize());
         WiredTigerItem prefixKeyItem(keyString.getBuffer(), sizeWithoutRecordId);
 
         // First phase inserts the prefix key to prohibit concurrent insertions of same key
@@ -1611,6 +1623,7 @@ void WiredTigerIdIndex::_unindex(OperationContext* opCtx,
                                  WT_CURSOR* c,
                                  const KeyString::Value& keyString,
                                  bool dupsAllowed) {
+    invariant(KeyFormat::Long == _rsKeyFormat);
     const RecordId id =
         KeyString::decodeRecordIdLongAtEnd(keyString.getBuffer(), keyString.getSize());
     invariant(id.isValid());
@@ -1701,6 +1714,12 @@ void WiredTigerIndexUnique::_unindex(OperationContext* opCtx,
         return;
     }
 
+    if (KeyFormat::String == _rsKeyFormat) {
+        // This is a unique index on a clustered collection. These indexes will only have keys
+        // in the timestamp safe format where the RecordId is appended at the end of the key.
+        return;
+    }
+
     // After a rolling upgrade an index can have keys from both timestamp unsafe (old) and
     // timestamp safe (new) unique indexes. Old format keys just had the index key while new
     // format key has index key + Record id. WT_NOTFOUND is possible if index key is in old format.
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_index.h b/src/mongo/db/storage/wiredtiger/wiredtiger_index.h
index f37605e784f..a109b3ae5f4 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_index.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_index.h
@@ -201,6 +201,7 @@ public:
     WiredTigerIndexUnique(OperationContext* ctx,
                           const std::string& uri,
                           StringData ident,
+                          KeyFormat rsKeyFormat,
                           const IndexDescriptor* desc,
                           bool readOnly = false);
 
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
index bcc74d25591..b46954184ea 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
@@ -1579,12 +1579,12 @@ std::unique_ptr<SortedDataInterface> WiredTigerKVEngine::getSortedDataInterface(
         invariant(!collOptions.clusteredIndex);
         return std::make_unique<WiredTigerIdIndex>(opCtx, _uri(ident), ident, desc, _readOnly);
     }
+    auto keyFormat = (collOptions.clusteredIndex) ? KeyFormat::String : KeyFormat::Long;
     if (desc->unique()) {
-        invariant(!collOptions.clusteredIndex);
-        return std::make_unique<WiredTigerIndexUnique>(opCtx, _uri(ident), ident, desc, _readOnly);
+        return std::make_unique<WiredTigerIndexUnique>(
+            opCtx, _uri(ident), ident, keyFormat, desc, _readOnly);
     }
 
-    auto keyFormat = (collOptions.clusteredIndex) ? KeyFormat::String : KeyFormat::Long;
     return std::make_unique<WiredTigerIndexStandard>(
         opCtx, _uri(ident), ident, keyFormat, desc, _readOnly);
 }
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_standard_index_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_standard_index_test.cpp
index 09c654ca822..0ecb7d90806 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_standard_index_test.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_standard_index_test.cpp
@@ -130,8 +130,8 @@ public:
         invariantWTOK(WiredTigerIndex::Create(&opCtx, uri, result.getValue()));
 
         if (unique) {
-            invariant(keyFormat == KeyFormat::Long);
-            return std::make_unique<WiredTigerIndexUnique>(&opCtx, uri, "" /* ident */, &desc);
+            return std::make_unique<WiredTigerIndexUnique>(
+                &opCtx, uri, "" /* ident */, keyFormat, &desc);
         }
         return std::make_unique<WiredTigerIndexStandard>(
             &opCtx, uri, "" /* ident */, keyFormat, &desc);
diff --git a/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp b/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp
index 79d8483ab6f..db4e0295c61 100644
--- a/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp
+++ b/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp
@@ -112,6 +112,13 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx,
                           "TTL indexes are not supported on time-series collections");
             }
 
+
+            if (elem.fieldNameStringData() == IndexDescriptor::kUniqueFieldName) {
+                uassert(ErrorCodes::InvalidOptions,
+                        "Unique indexes are not supported on time-series collections",
+                        !elem.trueValue());
+            }
+
             if (elem.fieldNameStringData() == NewIndexSpec::kKeyFieldName) {
                 auto pluginName = IndexNames::findPluginName(elem.Obj());
                 uassert(ErrorCodes::InvalidOptions,
author	Louis Williams <louis.williams@mongodb.com>	2021-11-05 15:08:40 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2021-11-05 16:09:45 +0000
commit	7bcb659c013aaf44e1edab7b8a102a738c0bfb8c (patch)
tree	89d69d8c8bc4eb13bc6523ef89cba8f64b5b6e94
parent	43cba1cc995ffc6436493246b10146365d593b8e (diff)
download	mongo-7bcb659c013aaf44e1edab7b8a102a738c0bfb8c.tar.gz