summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Storch <david.storch@mongodb.com>2020-04-09 17:24:32 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-05-07 14:15:41 +0000
commit0e8998afb31300f39bc63af4a745a437b07ff977 (patch)
tree861ac556d7878104b236f503d9997f764927ece6
parentad1881107da994e533954a52633aa2a404ebb9b1 (diff)
downloadmongo-0e8998afb31300f39bc63af4a745a437b07ff977.tar.gz
SERVER-46810 Hex encode collation keys and invalid UTF-8 in dup key error messages
Also changes the error message to include the index's collation, in order to help users interpret the collation keys. (cherry picked from commit 9dbaf78c605a576db8b15895c32aedc3e07d7ec8)
-rw-r--r--src/mongo/db/index/index_access_method.cpp3
-rw-r--r--src/mongo/db/storage/biggie/biggie_sorted_impl.cpp16
-rw-r--r--src/mongo/db/storage/biggie/biggie_sorted_impl.h5
-rw-r--r--src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl.cpp36
-rw-r--r--src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl.h1
-rw-r--r--src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl_test.cpp3
-rw-r--r--src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_engine.cpp1
-rw-r--r--src/mongo/db/storage/index_entry_comparison.cpp50
-rw-r--r--src/mongo/db/storage/index_entry_comparison.h3
-rw-r--r--src/mongo/db/storage/index_entry_comparison_test.cpp58
-rw-r--r--src/mongo/db/storage/mobile/mobile_index.cpp35
-rw-r--r--src/mongo/db/storage/mobile/mobile_index.h1
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp27
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_index.h1
14 files changed, 194 insertions, 46 deletions
diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp
index 10d61556e12..5f4a6d7dd92 100644
--- a/src/mongo/db/index/index_access_method.cpp
+++ b/src/mongo/db/index/index_access_method.cpp
@@ -698,7 +698,8 @@ Status AbstractIndexAccessMethod::commitBulk(OperationContext* opCtx,
return buildDupKeyErrorStatus(data.first,
_descriptor->parentNS(),
_descriptor->indexName(),
- _descriptor->keyPattern());
+ _descriptor->keyPattern(),
+ _descriptor->collation());
}
}
diff --git a/src/mongo/db/storage/biggie/biggie_sorted_impl.cpp b/src/mongo/db/storage/biggie/biggie_sorted_impl.cpp
index c000336928d..32f75c0ee6f 100644
--- a/src/mongo/db/storage/biggie/biggie_sorted_impl.cpp
+++ b/src/mongo/db/storage/biggie/biggie_sorted_impl.cpp
@@ -166,7 +166,8 @@ SortedDataBuilderInterface::SortedDataBuilderInterface(OperationContext* opCtx,
const std::string& identEnd,
const NamespaceString& collectionNamespace,
const std::string& indexName,
- const BSONObj& keyPattern)
+ const BSONObj& keyPattern,
+ const BSONObj& collation)
: _opCtx(opCtx),
_unique(unique),
_dupsAllowed(dupsAllowed),
@@ -176,6 +177,7 @@ SortedDataBuilderInterface::SortedDataBuilderInterface(OperationContext* opCtx,
_collectionNamespace(collectionNamespace),
_indexName(indexName),
_keyPattern(keyPattern),
+ _collation(collation),
_hasLast(false),
_lastKeyToString(""),
_lastRID(-1) {}
@@ -214,7 +216,8 @@ StatusWith<SpecialFormatInserted> SortedDataBuilderInterface::addKey(const BSONO
if (twoKeyCmp == 0 && twoRIDCmp != 0) {
if (!_dupsAllowed) {
- return buildDupKeyErrorStatus(key, _collectionNamespace, _indexName, _keyPattern);
+ return buildDupKeyErrorStatus(
+ key, _collectionNamespace, _indexName, _keyPattern, _collation);
}
// Duplicate index entries are allowed on this unique index, so we put the RecordId in the
// KeyString until the unique constraint is resolved.
@@ -249,7 +252,8 @@ SortedDataBuilderInterface* SortedDataInterface::getBulkBuilder(OperationContext
_identEnd,
_collectionNamespace,
_indexName,
- _keyPattern);
+ _keyPattern,
+ _collation);
}
// We append \1 to all idents we get, and therefore the KeyString with ident + \0 will only be
@@ -266,6 +270,7 @@ SortedDataInterface::SortedDataInterface(OperationContext* opCtx,
_collectionNamespace(desc->parentNS()),
_indexName(desc->indexName()),
_keyPattern(desc->keyPattern()),
+ _collation(desc->collation()),
_isUnique(desc->unique()),
_isPartial(desc->isPartial()) {
// This is the string representation of the KeyString before elements in this ident, which is
@@ -321,7 +326,7 @@ StatusWith<SpecialFormatInserted> SortedDataInterface::insert(OperationContext*
// There was an attempt to create an index entry with a different RecordId while
// dups were not allowed.
return buildDupKeyErrorStatus(
- key, _collectionNamespace, _indexName, _keyPattern);
+ key, _collectionNamespace, _indexName, _keyPattern, _collation);
}
} else {
return StatusWith<SpecialFormatInserted>(
@@ -445,7 +450,8 @@ Status SortedDataInterface::dupKeyCheck(OperationContext* opCtx, const BSONObj&
auto next =
keyStringToIndexKeyEntry(lowerBoundIterator->first, lowerBoundIterator->second, _order);
if (key.woCompare(next.key, _order, false) == 0) {
- return buildDupKeyErrorStatus(key, _collectionNamespace, _indexName, _keyPattern);
+ return buildDupKeyErrorStatus(
+ key, _collectionNamespace, _indexName, _keyPattern, _collation);
}
return Status::OK();
diff --git a/src/mongo/db/storage/biggie/biggie_sorted_impl.h b/src/mongo/db/storage/biggie/biggie_sorted_impl.h
index 5ad19e9cded..0c5f8a78d38 100644
--- a/src/mongo/db/storage/biggie/biggie_sorted_impl.h
+++ b/src/mongo/db/storage/biggie/biggie_sorted_impl.h
@@ -46,7 +46,8 @@ public:
const std::string& identEnd,
const NamespaceString& collectionNamespace,
const std::string& indexName,
- const BSONObj& keyPattern);
+ const BSONObj& keyPattern,
+ const BSONObj& collation);
SpecialFormatInserted commit(bool mayInterrupt) override;
virtual StatusWith<SpecialFormatInserted> addKey(const BSONObj& key, const RecordId& loc);
@@ -63,6 +64,7 @@ private:
const NamespaceString _collectionNamespace;
const std::string _indexName;
const BSONObj _keyPattern;
+ const BSONObj _collation;
// Whether or not we've already added something before.
bool _hasLast;
// This is the KeyString of the last key added.
@@ -192,6 +194,7 @@ private:
const NamespaceString _collectionNamespace;
const std::string _indexName;
const BSONObj _keyPattern;
+ const BSONObj _collation;
// These are the keystring representations of the _prefix and the _identEnd.
std::string _KSForIdentStart;
std::string _KSForIdentEnd;
diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl.cpp b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl.cpp
index fef05e67b96..3b5f6f2cd02 100644
--- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl.cpp
+++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl.cpp
@@ -93,14 +93,16 @@ public:
bool dupsAllowed,
const NamespaceString& collectionNamespace,
const std::string& indexName,
- const BSONObj& keyPattern)
+ const BSONObj& keyPattern,
+ const BSONObj& collation)
: _data(data),
_currentKeySize(currentKeySize),
_dupsAllowed(dupsAllowed),
_comparator(_data->key_comp()),
_collectionNamespace(collectionNamespace),
_indexName(indexName),
- _keyPattern(keyPattern) {
+ _keyPattern(keyPattern),
+ _collation(collation) {
invariant(_data->empty());
}
@@ -117,7 +119,8 @@ public:
return Status(ErrorCodes::InternalError,
"expected ascending (key, RecordId) order in bulk builder");
} else if (!_dupsAllowed && cmp == 0 && loc != _last->loc) {
- return buildDupKeyErrorStatus(key, _collectionNamespace, _indexName, _keyPattern);
+ return buildDupKeyErrorStatus(
+ key, _collectionNamespace, _indexName, _keyPattern, _collation);
}
}
@@ -139,6 +142,7 @@ private:
const NamespaceString _collectionNamespace;
const std::string _indexName;
const BSONObj _keyPattern;
+ const BSONObj _collation;
};
class EphemeralForTestBtreeImpl : public SortedDataInterface {
@@ -147,18 +151,25 @@ public:
bool isUnique,
const NamespaceString& collectionNamespace,
const std::string& indexName,
- const BSONObj& keyPattern)
+ const BSONObj& keyPattern,
+ const BSONObj& collation)
: _data(data),
_isUnique(isUnique),
_collectionNamespace(collectionNamespace),
_indexName(indexName),
- _keyPattern(keyPattern) {
+ _keyPattern(keyPattern),
+ _collation(collation) {
_currentKeySize = 0;
}
virtual SortedDataBuilderInterface* getBulkBuilder(OperationContext* opCtx, bool dupsAllowed) {
- return new EphemeralForTestBtreeBuilderImpl(
- _data, &_currentKeySize, dupsAllowed, _collectionNamespace, _indexName, _keyPattern);
+ return new EphemeralForTestBtreeBuilderImpl(_data,
+ &_currentKeySize,
+ dupsAllowed,
+ _collectionNamespace,
+ _indexName,
+ _keyPattern,
+ _collation);
}
virtual StatusWith<SpecialFormatInserted> insert(OperationContext* opCtx,
@@ -171,7 +182,8 @@ public:
// TODO optimization: save the iterator from the dup-check to speed up insert
if (!dupsAllowed && keyExists(*_data, key))
- return buildDupKeyErrorStatus(key, _collectionNamespace, _indexName, _keyPattern);
+ return buildDupKeyErrorStatus(
+ key, _collectionNamespace, _indexName, _keyPattern, _collation);
IndexKeyEntry entry(key.getOwned(), loc);
if (_data->insert(entry).second) {
@@ -217,7 +229,8 @@ public:
virtual Status dupKeyCheck(OperationContext* opCtx, const BSONObj& key) {
invariant(!hasFieldNames(key));
if (isDup(*_data, key))
- return buildDupKeyErrorStatus(key, _collectionNamespace, _indexName, _keyPattern);
+ return buildDupKeyErrorStatus(
+ key, _collectionNamespace, _indexName, _keyPattern, _collation);
return Status::OK();
}
@@ -516,6 +529,7 @@ private:
const NamespaceString _collectionNamespace;
const std::string _indexName;
const BSONObj _keyPattern;
+ const BSONObj _collation;
};
} // namespace
@@ -526,6 +540,7 @@ SortedDataInterface* getEphemeralForTestBtreeImpl(const Ordering& ordering,
const NamespaceString& collectionNamespace,
const std::string& indexName,
const BSONObj& keyPattern,
+ const BSONObj& collation,
std::shared_ptr<void>* dataInOut) {
invariant(dataInOut);
if (!*dataInOut) {
@@ -535,7 +550,8 @@ SortedDataInterface* getEphemeralForTestBtreeImpl(const Ordering& ordering,
isUnique,
collectionNamespace,
indexName,
- keyPattern);
+ keyPattern,
+ collation);
}
} // namespace mongo
diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl.h b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl.h
index 2b8b566e711..03b797caa9e 100644
--- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl.h
+++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl.h
@@ -45,6 +45,7 @@ SortedDataInterface* getEphemeralForTestBtreeImpl(const Ordering& ordering,
const NamespaceString& collectionNamespace,
const std::string& indexName,
const BSONObj& keyPattern,
+ const BSONObj& collation,
std::shared_ptr<void>* dataInOut);
} // namespace mongo
diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl_test.cpp b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl_test.cpp
index ad0b5b12d71..545c284aef8 100644
--- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl_test.cpp
+++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl_test.cpp
@@ -50,7 +50,8 @@ public:
unique,
NamespaceString("test.EphemeralForTest"),
"indexName",
- BSONObj(),
+ BSONObj{},
+ BSONObj{},
&_data));
}
diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_engine.cpp b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_engine.cpp
index 6eed77b808e..e3741a3fc3e 100644
--- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_engine.cpp
+++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_engine.cpp
@@ -103,6 +103,7 @@ SortedDataInterface* EphemeralForTestEngine::getSortedDataInterface(OperationCon
desc->parentNS(),
desc->indexName(),
desc->keyPattern(),
+ desc->collation(),
&_dataMap[ident]);
}
diff --git a/src/mongo/db/storage/index_entry_comparison.cpp b/src/mongo/db/storage/index_entry_comparison.cpp
index ac769623941..45c6356ce01 100644
--- a/src/mongo/db/storage/index_entry_comparison.cpp
+++ b/src/mongo/db/storage/index_entry_comparison.cpp
@@ -35,6 +35,8 @@
#include "mongo/db/jsobj.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/storage/duplicate_key_error_info.h"
+#include "mongo/util/hex.h"
+#include "mongo/util/text.h"
namespace mongo {
@@ -171,14 +173,29 @@ BSONObj IndexEntryComparison::makeQueryObject(const BSONObj& keyPrefix,
Status buildDupKeyErrorStatus(const BSONObj& key,
const NamespaceString& collectionNamespace,
const std::string& indexName,
- const BSONObj& keyPattern) {
+ const BSONObj& keyPattern,
+ const BSONObj& indexCollation) {
+ const bool hasCollation = !indexCollation.isEmpty();
+
StringBuilder sb;
sb << "E11000 duplicate key error";
sb << " collection: " << collectionNamespace;
sb << " index: " << indexName;
+ if (hasCollation) {
+ sb << " collation: " << indexCollation;
+ }
sb << " dup key: ";
- BSONObjBuilder builder;
+ // For the purpose of producing a useful error message, generate a representation of the key
+ // with field names hydrated and with invalid UTF-8 hex-encoded.
+ BSONObjBuilder builderForErrmsg;
+
+ // Used to build a version of the key after hydrating with field names but without hex encoding
+ // invalid UTF-8. This key is attached to the extra error info and consumed by callers who may
+ // wish to retry on duplicate key errors. The field names are rehydrated so that we don't return
+ // BSON with duplicate key names to clients.
+ BSONObjBuilder builderForErrorExtraInfo;
+
// key is a document with forms like: '{ : 123}', '{ : {num: 123} }', '{ : 123, : "str" }'
BSONObjIterator keyValueIt(key);
// keyPattern is a document with only one level. e.g. '{a : 1, b : -1}', '{a.b : 1}'
@@ -191,12 +208,33 @@ Status buildDupKeyErrorStatus(const BSONObj& key,
if (keyNameElem.eoo())
break;
- builder.appendAs(keyValueElem, keyNameElem.fieldName());
+ builderForErrorExtraInfo.appendAs(keyValueElem, keyNameElem.fieldName());
+
+ // If the duplicate key value contains a string, then it's possible that the string contains
+ // binary data which is not valid UTF-8. This is true for all indexes with a collation,
+ // since the index stores collation keys rather than raw user strings. But it's also
+ // possible that the application has stored binary data inside a string, which the system
+ // has never rejected.
+ //
+ // If the string in the key is invalid UTF-8, then we hex encode it before adding it to the
+ // error message so that the driver can assume valid UTF-8 when reading the reply.
+ const bool shouldHexEncode = keyValueElem.type() == BSONType::String &&
+ (hasCollation || !isValidUTF8(keyValueElem.valueStringData()));
+
+ if (shouldHexEncode) {
+ auto stringToEncode = keyValueElem.valueStringData();
+ builderForErrmsg.append(
+ keyNameElem.fieldName(),
+ str::stream() << "0x"
+ << toHexLower(stringToEncode.rawData(), stringToEncode.size()));
+ } else {
+ builderForErrmsg.appendAs(keyValueElem, keyNameElem.fieldName());
+ }
}
- auto keyValueWithName = builder.obj();
- sb << keyValueWithName;
- return Status(DuplicateKeyErrorInfo(keyPattern, keyValueWithName), sb.str());
+ sb << builderForErrmsg.obj();
+
+ return Status(DuplicateKeyErrorInfo(keyPattern, builderForErrorExtraInfo.obj()), sb.str());
}
} // namespace mongo
diff --git a/src/mongo/db/storage/index_entry_comparison.h b/src/mongo/db/storage/index_entry_comparison.h
index 649333a36ab..2dfeb9dbb19 100644
--- a/src/mongo/db/storage/index_entry_comparison.h
+++ b/src/mongo/db/storage/index_entry_comparison.h
@@ -212,6 +212,7 @@ private:
Status buildDupKeyErrorStatus(const BSONObj& key,
const NamespaceString& collectionNamespace,
const std::string& indexName,
- const BSONObj& keyPattern);
+ const BSONObj& keyPattern,
+ const BSONObj& indexCollation);
} // namespace mongo
diff --git a/src/mongo/db/storage/index_entry_comparison_test.cpp b/src/mongo/db/storage/index_entry_comparison_test.cpp
index 0cfd6b830fc..5ef646b36e0 100644
--- a/src/mongo/db/storage/index_entry_comparison_test.cpp
+++ b/src/mongo/db/storage/index_entry_comparison_test.cpp
@@ -32,6 +32,7 @@
#include "mongo/db/storage/duplicate_key_error_info.h"
#include "mongo/db/storage/index_entry_comparison.h"
#include "mongo/unittest/unittest.h"
+#include "mongo/util/hex.h"
namespace mongo {
@@ -42,7 +43,7 @@ TEST(IndexEntryComparison, BuildDupKeyErrorStatusProducesExpectedErrorObject) {
auto keyValue = BSON("" << 10 << ""
<< "abc");
- auto dupKeyStatus = buildDupKeyErrorStatus(keyValue, collNss, indexName, keyPattern);
+ auto dupKeyStatus = buildDupKeyErrorStatus(keyValue, collNss, indexName, keyPattern, BSONObj{});
ASSERT_NOT_OK(dupKeyStatus);
ASSERT_EQUALS(dupKeyStatus.code(), ErrorCodes::DuplicateKey);
@@ -61,4 +62,59 @@ TEST(IndexEntryComparison, BuildDupKeyErrorStatusProducesExpectedErrorObject) {
BSON("keyPattern" << keyPattern << "keyValue" << keyValueWithFieldName));
}
+TEST(IndexEntryComparison, BuildDupKeyErrorMessageIncludesCollationAndHexEncodedCollationKey) {
+ StringData mockCollationKey("bar");
+
+ NamespaceString collNss("test.foo");
+ std::string indexName("a_1");
+ auto keyPattern = BSON("a" << 1);
+ auto keyValue = BSON("" << mockCollationKey);
+ auto collation = BSON("locale"
+ << "en_US");
+
+ auto dupKeyStatus = buildDupKeyErrorStatus(keyValue, collNss, indexName, keyPattern, collation);
+ ASSERT_NOT_OK(dupKeyStatus);
+ ASSERT_EQUALS(dupKeyStatus.code(), ErrorCodes::DuplicateKey);
+
+ ASSERT(dupKeyStatus.reason().find("collation:") != std::string::npos);
+
+ // Verify that the collation key is hex encoded in the error message.
+ std::string expectedHexEncoding =
+ "0x" + toHexLower(mockCollationKey.rawData(), mockCollationKey.size());
+ ASSERT(dupKeyStatus.reason().find(expectedHexEncoding) != std::string::npos);
+
+ // But no hex encoding should have taken place inside the key attached to the extra error info.
+ auto extraInfo = dupKeyStatus.extraInfo<DuplicateKeyErrorInfo>();
+ ASSERT(extraInfo);
+ ASSERT_BSONOBJ_EQ(extraInfo->getKeyPattern(), keyPattern);
+ ASSERT_BSONOBJ_EQ(extraInfo->getDuplicatedKeyValue(), BSON("a" << mockCollationKey));
+}
+
+TEST(IndexEntryComparison, BuildDupKeyErrorMessageHexEncodesInvalidUTF8ForIndexWithoutCollation) {
+ NamespaceString collNss("test.foo");
+ std::string indexName("a_1");
+ auto keyPattern = BSON("a" << 1);
+
+ // The byte sequence c0 16 is invalid UTF-8 since this is an overlong encoding of the letter
+ // "a", which should be represented as simply 0x16. The byte 0xc0 is always illegal in UTF-8
+ // since it would only ever be used for an overload two-byte encoding of an ASCII character.
+ auto keyValue = BSON(""
+ << "\xc0\x16");
+ auto dupKeyStatus = buildDupKeyErrorStatus(keyValue, collNss, indexName, keyPattern, BSONObj{});
+ ASSERT_NOT_OK(dupKeyStatus);
+ ASSERT_EQUALS(dupKeyStatus.code(), ErrorCodes::DuplicateKey);
+
+ // We expect to find a hex-encoded version of the illegal UTF-8 byte sequence inside the error
+ // string.
+ ASSERT(dupKeyStatus.reason().find("0xc016") != std::string::npos);
+
+ // In the extra error info, we expect that no hex encoding has taken place.
+ auto extraInfo = dupKeyStatus.extraInfo<DuplicateKeyErrorInfo>();
+ ASSERT(extraInfo);
+ ASSERT_BSONOBJ_EQ(extraInfo->getKeyPattern(), keyPattern);
+ ASSERT_BSONOBJ_EQ(extraInfo->getDuplicatedKeyValue(),
+ BSON("a"
+ << "\xc0\x16"));
+}
+
} // namespace mongo
diff --git a/src/mongo/db/storage/mobile/mobile_index.cpp b/src/mongo/db/storage/mobile/mobile_index.cpp
index a1c54273ce0..a95e5e53d64 100644
--- a/src/mongo/db/storage/mobile/mobile_index.cpp
+++ b/src/mongo/db/storage/mobile/mobile_index.cpp
@@ -78,7 +78,8 @@ MobileIndex::MobileIndex(OperationContext* opCtx,
_ident(ident),
_collectionNamespace(desc->parentNS()),
_indexName(desc->indexName()),
- _keyPattern(desc->keyPattern()) {}
+ _keyPattern(desc->keyPattern()),
+ _collation(desc->collation()) {}
StatusWith<SpecialFormatInserted> MobileIndex::insert(OperationContext* opCtx,
const BSONObj& key,
@@ -115,7 +116,8 @@ StatusWith<SpecialFormatInserted> MobileIndex::doInsert(OperationContext* opCtx,
// Return error if duplicate key inserted in a unique index.
BSONObj bson =
KeyString::toBson(key.getBuffer(), key.getSize(), _ordering, key.getTypeBits());
- return buildDupKeyErrorStatus(bson, _collectionNamespace, _indexName, _keyPattern);
+ return buildDupKeyErrorStatus(
+ bson, _collectionNamespace, _indexName, _keyPattern, _collation);
} else {
// A record with same key could already be present in a standard index, that is OK. This
// can happen when building a background index while documents are being written in
@@ -239,7 +241,8 @@ Status MobileIndex::dupKeyCheck(OperationContext* opCtx, const BSONObj& key) {
invariant(_isUnique);
if (_isDup(opCtx, key))
- return buildDupKeyErrorStatus(key, _collectionNamespace, _indexName, _keyPattern);
+ return buildDupKeyErrorStatus(
+ key, _collectionNamespace, _indexName, _keyPattern, _collation);
return Status::OK();
}
@@ -263,13 +266,15 @@ public:
bool dupsAllowed,
const NamespaceString& collectionNamespace,
const std::string& indexName,
- const BSONObj& keyPattern)
+ const BSONObj& keyPattern,
+ const BSONObj& collation)
: _index(index),
_opCtx(opCtx),
_dupsAllowed(dupsAllowed),
_collectionNamespace(collectionNamespace),
_indexName(indexName),
- _keyPattern(keyPattern) {}
+ _keyPattern(keyPattern),
+ _collation(collation) {}
virtual ~BulkBuilderBase() {}
@@ -299,7 +304,8 @@ protected:
Status _checkNextKey(const BSONObj& key) {
const int cmp = key.woCompare(_lastKey, _index->getOrdering());
if (!_dupsAllowed && cmp == 0) {
- return buildDupKeyErrorStatus(key, _collectionNamespace, _indexName, _keyPattern);
+ return buildDupKeyErrorStatus(
+ key, _collectionNamespace, _indexName, _keyPattern, _collation);
} else if (cmp < 0) {
return Status(ErrorCodes::InternalError, "expected higher RecordId in bulk builder");
}
@@ -316,6 +322,7 @@ protected:
const NamespaceString _collectionNamespace;
const std::string _indexName;
const BSONObj _keyPattern;
+ const BSONObj _collation;
};
/**
@@ -328,8 +335,10 @@ public:
bool dupsAllowed,
const NamespaceString& collectionNamespace,
const std::string& indexName,
- const BSONObj& keyPattern)
- : BulkBuilderBase(index, opCtx, dupsAllowed, collectionNamespace, indexName, keyPattern) {}
+ const BSONObj& keyPattern,
+ const BSONObj& collation)
+ : BulkBuilderBase(
+ index, opCtx, dupsAllowed, collectionNamespace, indexName, keyPattern, collation) {}
protected:
StatusWith<SpecialFormatInserted> _addKey(const BSONObj& key, const RecordId& recId) override {
@@ -349,8 +358,10 @@ public:
bool dupsAllowed,
const NamespaceString& collectionNamespace,
const std::string& indexName,
- const BSONObj& keyPattern)
- : BulkBuilderBase(index, opCtx, dupsAllowed, collectionNamespace, indexName, keyPattern) {
+ const BSONObj& keyPattern,
+ const BSONObj& collation)
+ : BulkBuilderBase(
+ index, opCtx, dupsAllowed, collectionNamespace, indexName, keyPattern, collation) {
// Replication is not supported so dups are not allowed.
invariant(!dupsAllowed);
}
@@ -655,7 +666,7 @@ SortedDataBuilderInterface* MobileIndexStandard::getBulkBuilder(OperationContext
bool dupsAllowed) {
invariant(dupsAllowed);
return new BulkBuilderStandard(
- this, opCtx, dupsAllowed, _collectionNamespace, _indexName, _keyPattern);
+ this, opCtx, dupsAllowed, _collectionNamespace, _indexName, _keyPattern, _collation);
}
std::unique_ptr<SortedDataInterface::Cursor> MobileIndexStandard::newCursor(OperationContext* opCtx,
@@ -694,7 +705,7 @@ SortedDataBuilderInterface* MobileIndexUnique::getBulkBuilder(OperationContext*
// Replication is not supported so dups are not allowed.
invariant(!dupsAllowed);
return new BulkBuilderUnique(
- this, opCtx, dupsAllowed, _collectionNamespace, _indexName, _keyPattern);
+ this, opCtx, dupsAllowed, _collectionNamespace, _indexName, _keyPattern, _collation);
}
std::unique_ptr<SortedDataInterface::Cursor> MobileIndexUnique::newCursor(OperationContext* opCtx,
diff --git a/src/mongo/db/storage/mobile/mobile_index.h b/src/mongo/db/storage/mobile/mobile_index.h
index 36e0736465b..283f8dd3486 100644
--- a/src/mongo/db/storage/mobile/mobile_index.h
+++ b/src/mongo/db/storage/mobile/mobile_index.h
@@ -141,6 +141,7 @@ protected:
const NamespaceString _collectionNamespace;
const std::string _indexName;
const BSONObj _keyPattern;
+ const BSONObj _collation;
};
class MobileIndexStandard final : public MobileIndex {
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
index df3a370d1d0..b1021a7dd43 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
@@ -268,6 +268,7 @@ WiredTigerIndex::WiredTigerIndex(OperationContext* ctx,
_collectionNamespace(desc->parentNS()),
_indexName(desc->indexName()),
_keyPattern(desc->keyPattern()),
+ _collation(desc->collation()),
_prefix(prefix),
_isIdIndex(desc->isIdIndex()) {
auto version = WiredTigerUtil::checkApplicationMetadataFormatVersion(
@@ -420,7 +421,8 @@ Status WiredTigerIndex::dupKeyCheck(OperationContext* opCtx, const BSONObj& key)
WT_CURSOR* c = curwrap.get();
if (isDup(opCtx, c, key))
- return buildDupKeyErrorStatus(key, _collectionNamespace, _indexName, _keyPattern);
+ return buildDupKeyErrorStatus(
+ key, _collectionNamespace, _indexName, _keyPattern, _collation);
return Status::OK();
}
@@ -686,8 +688,11 @@ private:
const int cmp = newKey.woCompare(_previousKey, _ordering);
if (cmp == 0) {
// Duplicate found!
- return buildDupKeyErrorStatus(
- newKey, _idx->collectionNamespace(), _idx->indexName(), _idx->keyPattern());
+ return buildDupKeyErrorStatus(newKey,
+ _idx->collectionNamespace(),
+ _idx->indexName(),
+ _idx->keyPattern(),
+ _idx->_collation);
} else {
// _previousKey.isEmpty() is only true on the first call to addKey().
// newKey must be > the last key
@@ -736,8 +741,11 @@ private:
} else {
// Dup found!
if (!_dupsAllowed) {
- return buildDupKeyErrorStatus(
- newKey, _idx->collectionNamespace(), _idx->indexName(), _idx->keyPattern());
+ return buildDupKeyErrorStatus(newKey,
+ _idx->collectionNamespace(),
+ _idx->indexName(),
+ _idx->keyPattern(),
+ _idx->_collation);
}
// If we get here, we are in the weird mode where dups are allowed on a unique
@@ -1425,7 +1433,8 @@ StatusWith<SpecialFormatInserted> WiredTigerIndexUnique::_insertTimestampUnsafe(
}
if (!dupsAllowed)
- return buildDupKeyErrorStatus(key, _collectionNamespace, _indexName, _keyPattern);
+ return buildDupKeyErrorStatus(
+ key, _collectionNamespace, _indexName, _keyPattern, _collation);
if (!insertedId) {
// This id is higher than all currently in the index for this key
@@ -1471,7 +1480,8 @@ StatusWith<SpecialFormatInserted> WiredTigerIndexUnique::_insertTimestampSafe(
// An entry with prefix key already exists. This can happen only during rolling upgrade when
// both timestamp unsafe and timestamp safe index format keys could be present.
if (ret == WT_DUPLICATE_KEY) {
- return buildDupKeyErrorStatus(key, _collectionNamespace, _indexName, _keyPattern);
+ return buildDupKeyErrorStatus(
+ key, _collectionNamespace, _indexName, _keyPattern, _collation);
}
invariantWTOK(ret);
@@ -1484,7 +1494,8 @@ StatusWith<SpecialFormatInserted> WiredTigerIndexUnique::_insertTimestampSafe(
// Second phase looks up for existence of key to avoid insertion of duplicate key
if (_keyExists(opCtx, c, prefixKey))
- return buildDupKeyErrorStatus(key, _collectionNamespace, _indexName, _keyPattern);
+ return buildDupKeyErrorStatus(
+ key, _collectionNamespace, _indexName, _keyPattern, _collation);
}
// Now create the table key/value, the actual data record.
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_index.h b/src/mongo/db/storage/wiredtiger/wiredtiger_index.h
index 854c8799b67..fdf30877940 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_index.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_index.h
@@ -187,6 +187,7 @@ protected:
const NamespaceString _collectionNamespace;
const std::string _indexName;
const BSONObj _keyPattern;
+ const BSONObj _collation;
KVPrefix _prefix;
bool _isIdIndex;
};